docrev 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,426 @@
1
+ /**
2
+ * Cross-reference handling - dynamic figure/table references
3
+ *
4
+ * Enables:
5
+ * - @fig:label syntax in source (auto-numbered)
6
+ * - Conversion to "Figure 1" in Word output
7
+ * - Auto-conversion back during import
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+
13
+ /**
14
+ * Patterns for detecting hardcoded references
15
+ * Matches: Figure 1, Fig. 1a, fig 1b-c, Figs. 1-3, Table S1, etc.
16
+ * Includes optional letter suffixes for sub-panels (a, b, c, etc.)
17
+ */
18
+ const DETECTION_PATTERNS = {
19
+ // Figures: Fig, Fig., fig, figure, Figure, FIGURE, Figs, Figures (plural)
20
+ // With optional letter suffix: 1a, 1b, 2a-c, etc.
21
+ figure: /\b(Figures?|Figs?\.?)\s*(\d+|S\d+)([a-z])?(?:\s*[-–—&,]\s*(\d+|S\d+)?([a-z])?)?\b/gi,
22
+
23
+ // Tables: Tab, Tab., tab, table, Table, TABLE, Tabs, Tables (plural)
24
+ // With optional letter suffix for sub-tables
25
+ table: /\b(Tables?|Tabs?\.?)\s*(\d+|S\d+)([a-z])?(?:\s*[-–—&,]\s*(\d+|S\d+)?([a-z])?)?\b/gi,
26
+
27
+ // Equations: Eq, Eq., eq, equation, Equation
28
+ equation: /\b(Equations?|Eqs?\.?)\s*(\d+)([a-z])?(?:\s*[-–—&,]\s*(\d+)?([a-z])?)?\b/gi,
29
+ };
30
+
31
+ /**
32
+ * Pattern for extracting anchors from markdown: {#fig:label}, {#tbl:label}
33
+ */
34
+ const ANCHOR_PATTERN = /\{#(fig|tbl|eq):([^}]+)\}/gi;
35
+
36
+ /**
37
+ * Pattern for @-style references: @fig:label, @tbl:label
38
+ */
39
+ const REF_PATTERN = /@(fig|tbl|eq):([a-zA-Z0-9_-]+)/gi;
40
+
41
+ /**
42
+ * Normalize a reference type to standard form
43
+ * @param {string} typeStr - e.g., "Figure", "Fig.", "Figs", "table"
44
+ * @returns {string} - "fig", "tbl", or "eq"
45
+ */
46
+ export function normalizeType(typeStr) {
47
+ const lower = typeStr.toLowerCase().replace(/\.$/, '');
48
+ if (lower.startsWith('fig')) return 'fig';
49
+ if (lower.startsWith('tab')) return 'tbl';
50
+ if (lower.startsWith('eq')) return 'eq';
51
+ return lower;
52
+ }
53
+
54
+ /**
55
+ * Parse a reference number, handling supplementary (S1, S2) and letter suffixes (1a, 1b)
56
+ * @param {string} numStr - e.g., "1", "S1", "S2", "1a", "S1b"
57
+ * @param {string} [suffix] - optional letter suffix already extracted
58
+ * @returns {{isSupp: boolean, num: number, suffix: string|null}}
59
+ */
60
+ export function parseRefNumber(numStr, suffix = null) {
61
+ if (!numStr) return { isSupp: false, num: 0, suffix };
62
+ const isSupp = numStr.toUpperCase().startsWith('S');
63
+ const numPart = isSupp ? numStr.slice(1) : numStr;
64
+ // Extract suffix if embedded in numStr (e.g., "1a")
65
+ const match = numPart.match(/^(\d+)([a-z])?$/i);
66
+ const num = match ? parseInt(match[1], 10) : parseInt(numPart, 10);
67
+ const extractedSuffix = suffix || (match && match[2]) || null;
68
+ return { isSupp, num, suffix: extractedSuffix ? extractedSuffix.toLowerCase() : null };
69
+ }
70
+
71
+ /**
72
+ * Build a registry of figure/table labels from .md files
73
+ * Scans for {#fig:label} and {#tbl:label} anchors
74
+ *
75
+ * @param {string} directory - Directory containing .md files
76
+ * @param {string[]} [excludeFiles] - Files to exclude
77
+ * @returns {{
78
+ * figures: Map<string, {label: string, num: number, isSupp: boolean, file: string}>,
79
+ * tables: Map<string, {label: string, num: number, isSupp: boolean, file: string}>,
80
+ * equations: Map<string, {label: string, num: number, file: string}>,
81
+ * byNumber: {fig: Map<string, string>, tbl: Map<string, string>, eq: Map<string, string>}
82
+ * }}
83
+ */
84
+ export function buildRegistry(directory, excludeFiles = ['paper.md', 'README.md', 'CLAUDE.md']) {
85
+ const figures = new Map();
86
+ const tables = new Map();
87
+ const equations = new Map();
88
+
89
+ // Counters for numbering (separate for main and supplementary)
90
+ let figNum = 0;
91
+ let figSuppNum = 0;
92
+ let tblNum = 0;
93
+ let tblSuppNum = 0;
94
+ let eqNum = 0;
95
+
96
+ // Get all .md files
97
+ const files = fs.readdirSync(directory).filter((f) => {
98
+ if (!f.endsWith('.md')) return false;
99
+ if (excludeFiles.some((e) => f.toLowerCase() === e.toLowerCase())) return false;
100
+ return true;
101
+ });
102
+
103
+ // Sort by likely document order (use sections.yaml if available)
104
+ let orderedFiles = files;
105
+ const sectionsPath = path.join(directory, 'sections.yaml');
106
+ if (fs.existsSync(sectionsPath)) {
107
+ try {
108
+ const yaml = require('js-yaml');
109
+ const config = yaml.load(fs.readFileSync(sectionsPath, 'utf-8'));
110
+ if (config.sections) {
111
+ const sectionOrder = Object.entries(config.sections)
112
+ .sort((a, b) => (a[1].order ?? 999) - (b[1].order ?? 999))
113
+ .map(([file]) => file);
114
+ orderedFiles = sectionOrder.filter((f) => files.includes(f));
115
+ // Add any remaining files not in sections.yaml
116
+ for (const f of files) {
117
+ if (!orderedFiles.includes(f)) orderedFiles.push(f);
118
+ }
119
+ }
120
+ } catch {
121
+ // Ignore yaml errors, use default order
122
+ }
123
+ }
124
+
125
+ // Determine if a file is supplementary
126
+ const isSupplementary = (filename) =>
127
+ filename.toLowerCase().includes('supp') || filename.toLowerCase().includes('appendix');
128
+
129
+ // Process each file in order
130
+ for (const file of orderedFiles) {
131
+ const filePath = path.join(directory, file);
132
+ const content = fs.readFileSync(filePath, 'utf-8');
133
+ const isSupp = isSupplementary(file);
134
+
135
+ // Find all anchors
136
+ let match;
137
+ ANCHOR_PATTERN.lastIndex = 0;
138
+ while ((match = ANCHOR_PATTERN.exec(content)) !== null) {
139
+ const type = match[1].toLowerCase();
140
+ const label = match[2];
141
+
142
+ if (type === 'fig') {
143
+ if (isSupp) {
144
+ figSuppNum++;
145
+ figures.set(label, { label, num: figSuppNum, isSupp: true, file });
146
+ } else {
147
+ figNum++;
148
+ figures.set(label, { label, num: figNum, isSupp: false, file });
149
+ }
150
+ } else if (type === 'tbl') {
151
+ if (isSupp) {
152
+ tblSuppNum++;
153
+ tables.set(label, { label, num: tblSuppNum, isSupp: true, file });
154
+ } else {
155
+ tblNum++;
156
+ tables.set(label, { label, num: tblNum, isSupp: false, file });
157
+ }
158
+ } else if (type === 'eq') {
159
+ eqNum++;
160
+ equations.set(label, { label, num: eqNum, file });
161
+ }
162
+ }
163
+ }
164
+
165
+ // Build reverse lookup: number → label
166
+ const byNumber = {
167
+ fig: new Map(),
168
+ figS: new Map(),
169
+ tbl: new Map(),
170
+ tblS: new Map(),
171
+ eq: new Map(),
172
+ };
173
+
174
+ for (const [label, info] of figures) {
175
+ const key = info.isSupp ? 'figS' : 'fig';
176
+ byNumber[key].set(info.num, label);
177
+ }
178
+ for (const [label, info] of tables) {
179
+ const key = info.isSupp ? 'tblS' : 'tbl';
180
+ byNumber[key].set(info.num, label);
181
+ }
182
+ for (const [label, info] of equations) {
183
+ byNumber.eq.set(info.num, label);
184
+ }
185
+
186
+ return { figures, tables, equations, byNumber };
187
+ }
188
+
189
+ /**
190
+ * Get the display string for a label (e.g., "Figure 1", "Table S2")
191
+ * @param {string} type - "fig", "tbl", "eq"
192
+ * @param {string} label
193
+ * @param {object} registry
194
+ * @returns {string|null}
195
+ */
196
+ export function labelToDisplay(type, label, registry) {
197
+ const collection =
198
+ type === 'fig' ? registry.figures : type === 'tbl' ? registry.tables : registry.equations;
199
+
200
+ const info = collection.get(label);
201
+ if (!info) return null;
202
+
203
+ const prefix = type === 'fig' ? 'Figure' : type === 'tbl' ? 'Table' : 'Equation';
204
+ const numStr = info.isSupp ? `S${info.num}` : `${info.num}`;
205
+
206
+ return `${prefix} ${numStr}`;
207
+ }
208
+
209
+ /**
210
+ * Get the label for a display number (e.g., "fig:heatmap" from Figure 1)
211
+ * @param {string} type - "fig", "tbl", "eq"
212
+ * @param {number} num
213
+ * @param {boolean} isSupp
214
+ * @param {object} registry
215
+ * @returns {string|null}
216
+ */
217
+ export function numberToLabel(type, num, isSupp, registry) {
218
+ const key = isSupp ? `${type}S` : type;
219
+ return registry.byNumber[key]?.get(num) || null;
220
+ }
221
+
222
+ /**
223
+ * Detect all hardcoded references in text
224
+ * @param {string} text
225
+ * @returns {Array<{type: string, match: string, numbers: Array<{num: number, isSupp: boolean, suffix: string|null}>, position: number}>}
226
+ */
227
+ export function detectHardcodedRefs(text) {
228
+ const refs = [];
229
+
230
+ for (const [type, pattern] of Object.entries(DETECTION_PATTERNS)) {
231
+ pattern.lastIndex = 0;
232
+ let match;
233
+
234
+ while ((match = pattern.exec(text)) !== null) {
235
+ const numbers = [];
236
+
237
+ // Pattern groups:
238
+ // [1] = type (Figure, Fig., etc.)
239
+ // [2] = first number (1, S1)
240
+ // [3] = first suffix (a, b) - optional
241
+ // [4] = second number for range (2, S2) - optional
242
+ // [5] = second suffix (b, c) - optional
243
+
244
+ // Parse first number with optional suffix
245
+ const first = parseRefNumber(match[2], match[3]);
246
+ numbers.push(first);
247
+
248
+ // Parse second number/suffix if present (range like 1a-c or 1-3)
249
+ if (match[4] || match[5]) {
250
+ const second = parseRefNumber(match[4] || match[2], match[5]);
251
+
252
+ // Handle suffix-only ranges (e.g., "1a-c" means 1a, 1b, 1c)
253
+ if (!match[4] && match[5] && first.suffix) {
254
+ // Expand letter range: a-c → a, b, c
255
+ const startCode = first.suffix.charCodeAt(0);
256
+ const endCode = match[5].charCodeAt(0);
257
+ for (let code = startCode + 1; code <= endCode; code++) {
258
+ numbers.push({
259
+ num: first.num,
260
+ isSupp: first.isSupp,
261
+ suffix: String.fromCharCode(code)
262
+ });
263
+ }
264
+ } else if (match[4]) {
265
+ // Expand number range
266
+ if (first.isSupp === second.isSupp && !first.suffix && !second.suffix) {
267
+ for (let n = first.num + 1; n <= second.num; n++) {
268
+ numbers.push({ num: n, isSupp: first.isSupp, suffix: null });
269
+ }
270
+ } else {
271
+ numbers.push(second);
272
+ }
273
+ }
274
+ }
275
+
276
+ refs.push({
277
+ type: normalizeType(type),
278
+ match: match[0],
279
+ numbers,
280
+ position: match.index,
281
+ });
282
+ }
283
+ }
284
+
285
+ // Sort by position
286
+ refs.sort((a, b) => a.position - b.position);
287
+ return refs;
288
+ }
289
+
290
+ /**
291
+ * Convert hardcoded references to @-style references
292
+ * @param {string} text
293
+ * @param {object} registry
294
+ * @returns {{converted: string, conversions: Array<{from: string, to: string}>, warnings: string[]}}
295
+ */
296
+ export function convertHardcodedRefs(text, registry) {
297
+ const refs = detectHardcodedRefs(text);
298
+ const conversions = [];
299
+ const warnings = [];
300
+
301
+ // Process in reverse order to preserve positions
302
+ let result = text;
303
+ for (let i = refs.length - 1; i >= 0; i--) {
304
+ const ref = refs[i];
305
+
306
+ // Build replacement
307
+ const labels = [];
308
+ for (const { num, isSupp } of ref.numbers) {
309
+ const label = numberToLabel(ref.type, num, isSupp, registry);
310
+ if (label) {
311
+ labels.push(`@${ref.type}:${label}`);
312
+ } else {
313
+ const displayNum = isSupp ? `S${num}` : `${num}`;
314
+ warnings.push(`Unknown reference: ${ref.type} ${displayNum} (no matching label)`);
315
+ labels.push(ref.match); // Keep original if no match
316
+ }
317
+ }
318
+
319
+ if (labels.length > 0 && !labels.includes(ref.match)) {
320
+ const replacement = labels.join('; ');
321
+ result = result.slice(0, ref.position) + replacement + result.slice(ref.position + ref.match.length);
322
+
323
+ conversions.push({
324
+ from: ref.match,
325
+ to: replacement,
326
+ });
327
+ }
328
+ }
329
+
330
+ return { converted: result, conversions, warnings };
331
+ }
332
+
333
+ /**
334
+ * Detect @-style references in text
335
+ * @param {string} text
336
+ * @returns {Array<{type: string, label: string, match: string, position: number}>}
337
+ */
338
+ export function detectDynamicRefs(text) {
339
+ const refs = [];
340
+ REF_PATTERN.lastIndex = 0;
341
+ let match;
342
+
343
+ while ((match = REF_PATTERN.exec(text)) !== null) {
344
+ refs.push({
345
+ type: match[1],
346
+ label: match[2],
347
+ match: match[0],
348
+ position: match.index,
349
+ });
350
+ }
351
+
352
+ return refs;
353
+ }
354
+
355
+ /**
356
+ * Get reference status for a file/text
357
+ * @param {string} text
358
+ * @param {object} registry
359
+ * @returns {{
360
+ * dynamic: Array,
361
+ * hardcoded: Array,
362
+ * anchors: {figures: number, tables: number, equations: number}
363
+ * }}
364
+ */
365
+ export function getRefStatus(text, registry) {
366
+ const dynamic = detectDynamicRefs(text);
367
+ const hardcoded = detectHardcodedRefs(text);
368
+
369
+ // Count anchors in this text
370
+ ANCHOR_PATTERN.lastIndex = 0;
371
+ let figCount = 0,
372
+ tblCount = 0,
373
+ eqCount = 0;
374
+ let match;
375
+ while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
376
+ if (match[1] === 'fig') figCount++;
377
+ else if (match[1] === 'tbl') tblCount++;
378
+ else if (match[1] === 'eq') eqCount++;
379
+ }
380
+
381
+ return {
382
+ dynamic,
383
+ hardcoded,
384
+ anchors: { figures: figCount, tables: tblCount, equations: eqCount },
385
+ };
386
+ }
387
+
388
+ /**
389
+ * Format registry for display
390
+ * @param {object} registry
391
+ * @returns {string}
392
+ */
393
+ export function formatRegistry(registry) {
394
+ const lines = [];
395
+
396
+ if (registry.figures.size > 0) {
397
+ lines.push('Figures:');
398
+ for (const [label, info] of registry.figures) {
399
+ const num = info.isSupp ? `S${info.num}` : info.num;
400
+ lines.push(` Figure ${num}: @fig:${label} (${info.file})`);
401
+ }
402
+ }
403
+
404
+ if (registry.tables.size > 0) {
405
+ if (lines.length > 0) lines.push('');
406
+ lines.push('Tables:');
407
+ for (const [label, info] of registry.tables) {
408
+ const num = info.isSupp ? `S${info.num}` : info.num;
409
+ lines.push(` Table ${num}: @tbl:${label} (${info.file})`);
410
+ }
411
+ }
412
+
413
+ if (registry.equations.size > 0) {
414
+ if (lines.length > 0) lines.push('');
415
+ lines.push('Equations:');
416
+ for (const [label, info] of registry.equations) {
417
+ lines.push(` Equation ${info.num}: @eq:${label} (${info.file})`);
418
+ }
419
+ }
420
+
421
+ if (lines.length === 0) {
422
+ lines.push('No figure/table anchors found.');
423
+ }
424
+
425
+ return lines.join('\n');
426
+ }