docrev 0.7.7 → 0.7.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -157,10 +157,25 @@ my-report/
157
157
  Write your content in the markdown files. When ready to share:
158
158
 
159
159
  ```bash
160
- rev build docx
160
+ rev build docx pdf
161
161
  ```
162
162
 
163
- This produces `my-report.docx` with citations resolved, equations rendered, and cross-references numbered. Use `rev build pdf` for PDF output instead.
163
+ After building, your project structure looks like:
164
+
165
+ ```
166
+ my-report/
167
+ ├── intro.md
168
+ ├── methods.md
169
+ ├── results.md
170
+ ├── discussion.md
171
+ ├── references.bib
172
+ ├── rev.yaml
173
+ ├── paper.md ← combined sections (auto-generated)
174
+ ├── my-report.docx ← output for collaborators
175
+ └── my-report.pdf ← output for journals
176
+ ```
177
+
178
+ The output filename is derived from your project title in `rev.yaml`. Citations are resolved, equations rendered, and cross-references numbered.
164
179
 
165
180
  ### Starting from an Existing Word Document
166
181
 
package/lib/build.js CHANGED
@@ -13,7 +13,7 @@ import * as path from 'path';
13
13
  import { execSync, spawn } from 'child_process';
14
14
  import YAML from 'yaml';
15
15
  import { stripAnnotations } from './annotations.js';
16
- import { buildRegistry, labelToDisplay, detectDynamicRefs } from './crossref.js';
16
+ import { buildRegistry, labelToDisplay, detectDynamicRefs, resolveForwardRefs } from './crossref.js';
17
17
  import { processVariables, hasVariables } from './variables.js';
18
18
 
19
19
  /**
@@ -160,8 +160,22 @@ export function combineSections(directory, config, options = {}) {
160
160
  // Read all section contents for variable processing
161
161
  const sectionContents = [];
162
162
 
163
+ // Check if we need to auto-inject references before supplementary
164
+ // Pandoc places refs at the end by default, which breaks when supplementary follows
165
+ const hasRefsSection = sections.some(s =>
166
+ s.toLowerCase().includes('reference') || s.toLowerCase().includes('refs')
167
+ );
168
+ const suppIndex = sections.findIndex(s =>
169
+ s.toLowerCase().includes('supp') || s.toLowerCase().includes('appendix')
170
+ );
171
+ const hasBibliography = config.bibliography && fs.existsSync(path.join(directory, config.bibliography));
172
+
173
+ // Track if we find an explicit refs div in any section
174
+ let hasExplicitRefsDiv = false;
175
+
163
176
  // Combine sections
164
- for (const section of sections) {
177
+ for (let i = 0; i < sections.length; i++) {
178
+ const section = sections[i];
165
179
  const filePath = path.join(directory, section);
166
180
  let content = fs.readFileSync(filePath, 'utf-8');
167
181
 
@@ -169,6 +183,21 @@ export function combineSections(directory, config, options = {}) {
169
183
  content = stripFrontmatter(content);
170
184
  sectionContents.push(content);
171
185
 
186
+ // Check if this section has an explicit refs div
187
+ if (content.includes('::: {#refs}') || content.includes('::: {#refs}')) {
188
+ hasExplicitRefsDiv = true;
189
+ }
190
+
191
+ // Auto-inject references before supplementary if needed
192
+ if (i === suppIndex && hasBibliography && !hasRefsSection && !hasExplicitRefsDiv) {
193
+ parts.push('# References\n');
194
+ parts.push('::: {#refs}');
195
+ parts.push(':::');
196
+ parts.push('');
197
+ parts.push('');
198
+ options._refsAutoInjected = true;
199
+ }
200
+
172
201
  parts.push(content.trim());
173
202
  parts.push('');
174
203
  parts.push(''); // Double newline between sections
@@ -181,6 +210,18 @@ export function combineSections(directory, config, options = {}) {
181
210
  paperContent = processVariables(paperContent, config, { sectionContents });
182
211
  }
183
212
 
213
+ // Resolve forward references (refs that appear before their anchor definition)
214
+ // This fixes pandoc-crossref limitation with multi-file documents
215
+ if (hasPandocCrossref()) {
216
+ const registry = buildRegistry(directory, sections);
217
+ const { text, resolved } = resolveForwardRefs(paperContent, registry);
218
+ if (resolved.length > 0) {
219
+ paperContent = text;
220
+ // Store resolved count for optional reporting
221
+ options._forwardRefsResolved = resolved.length;
222
+ }
223
+ }
224
+
184
225
  const paperPath = path.join(directory, 'paper.md');
185
226
 
186
227
  fs.writeFileSync(paperPath, paperContent, 'utf-8');
@@ -239,7 +280,8 @@ export function prepareForFormat(paperPath, format, config, options = {}) {
239
280
  let content = fs.readFileSync(paperPath, 'utf-8');
240
281
 
241
282
  // Build crossref registry for reference conversion
242
- const registry = buildRegistry(directory);
283
+ // Pass sections from config to ensure correct file ordering
284
+ const registry = buildRegistry(directory, config.sections);
243
285
 
244
286
  if (format === 'pdf' || format === 'tex') {
245
287
  // Strip all annotations for clean output
@@ -520,10 +562,11 @@ export async function runPandoc(inputPath, format, config, options = {}) {
520
562
  * @param {string} directory
521
563
  * @param {string[]} formats - ['pdf', 'docx', 'tex'] or ['all']
522
564
  * @param {object} options
523
- * @returns {Promise<{results: object[], paperPath: string, warnings: string[]}>}
565
+ * @returns {Promise<{results: object[], paperPath: string, warnings: string[], forwardRefsResolved: number}>}
524
566
  */
525
567
  export async function build(directory, formats = ['pdf', 'docx'], options = {}) {
526
568
  const warnings = [];
569
+ let forwardRefsResolved = 0;
527
570
 
528
571
  // Check pandoc
529
572
  if (!hasPandoc()) {
@@ -545,7 +588,10 @@ export async function build(directory, formats = ['pdf', 'docx'], options = {})
545
588
  const config = options.config || loadConfig(directory);
546
589
 
547
590
  // Combine sections → paper.md
548
- const paperPath = combineSections(directory, config, options);
591
+ const buildOptions = { ...options };
592
+ const paperPath = combineSections(directory, config, buildOptions);
593
+ forwardRefsResolved = buildOptions._forwardRefsResolved || 0;
594
+ const refsAutoInjected = buildOptions._refsAutoInjected || false;
549
595
 
550
596
  // Expand 'all' to all formats
551
597
  if (formats.includes('all')) {
@@ -570,7 +616,7 @@ export async function build(directory, formats = ['pdf', 'docx'], options = {})
570
616
  }
571
617
  }
572
618
 
573
- return { results, paperPath, warnings };
619
+ return { results, paperPath, warnings, forwardRefsResolved, refsAutoInjected };
574
620
  }
575
621
 
576
622
  /**
@@ -17,7 +17,7 @@ import {
17
17
  getRefStatus,
18
18
  formatRegistry,
19
19
  build,
20
- loadConfig as loadBuildConfig,
20
+ loadBuildConfig,
21
21
  hasPandoc,
22
22
  hasPandocCrossref,
23
23
  formatBuildResults,
@@ -555,7 +555,7 @@ export function register(program, pkg) {
555
555
  const spin = fmt.spinner('Building...').start();
556
556
 
557
557
  try {
558
- const { results, paperPath } = await build(dir, targetFormats, {
558
+ const { results, paperPath, forwardRefsResolved, refsAutoInjected } = await build(dir, targetFormats, {
559
559
  crossref: options.crossref,
560
560
  config,
561
561
  });
@@ -563,7 +563,14 @@ export function register(program, pkg) {
563
563
  spin.stop();
564
564
 
565
565
  console.log(chalk.cyan('Combined sections → paper.md'));
566
- console.log(chalk.dim(` ${paperPath}\n`));
566
+ console.log(chalk.dim(` ${paperPath}`));
567
+ if (forwardRefsResolved > 0) {
568
+ console.log(chalk.dim(` ${forwardRefsResolved} forward reference(s) pre-resolved`));
569
+ }
570
+ if (refsAutoInjected) {
571
+ console.log(chalk.dim(` References section auto-injected before supplementary`));
572
+ }
573
+ console.log('');
567
574
 
568
575
  console.log(chalk.cyan('Output:'));
569
576
  console.log(formatBuildResults(results));
package/lib/crossref.js CHANGED
@@ -11,6 +11,49 @@ import * as fs from 'fs';
11
11
  import * as path from 'path';
12
12
  import YAML from 'yaml';
13
13
 
14
+ /**
15
+ * Discover section files from a directory by reading config files
16
+ * Only returns files explicitly defined in rev.yaml or sections.yaml
17
+ * Returns empty array if no config found (caller should handle this)
18
+ *
19
+ * @param {string} directory
20
+ * @returns {string[]} Ordered list of section filenames, or empty if no config
21
+ */
22
+ function discoverSectionFiles(directory) {
23
+ // Try rev.yaml first
24
+ const revYamlPath = path.join(directory, 'rev.yaml');
25
+ if (fs.existsSync(revYamlPath)) {
26
+ try {
27
+ const config = YAML.parse(fs.readFileSync(revYamlPath, 'utf-8'));
28
+ if (config.sections && Array.isArray(config.sections) && config.sections.length > 0) {
29
+ return config.sections.filter(f => fs.existsSync(path.join(directory, f)));
30
+ }
31
+ } catch {
32
+ // Ignore yaml errors, try next option
33
+ }
34
+ }
35
+
36
+ // Try sections.yaml
37
+ const sectionsPath = path.join(directory, 'sections.yaml');
38
+ if (fs.existsSync(sectionsPath)) {
39
+ try {
40
+ const config = YAML.parse(fs.readFileSync(sectionsPath, 'utf-8'));
41
+ if (config.sections) {
42
+ const sectionOrder = Object.entries(config.sections)
43
+ .sort((a, b) => (a[1].order ?? 999) - (b[1].order ?? 999))
44
+ .map(([file]) => file);
45
+ return sectionOrder.filter(f => fs.existsSync(path.join(directory, f)));
46
+ }
47
+ } catch {
48
+ // Ignore yaml errors
49
+ }
50
+ }
51
+
52
+ // No config found - return empty array
53
+ // Caller must handle this (either error or use explicit sections)
54
+ return [];
55
+ }
56
+
14
57
  /**
15
58
  * Patterns for detecting hardcoded references
16
59
  * Matches complex patterns including:
@@ -208,8 +251,14 @@ export function parseReferenceList(listStr) {
208
251
  * Build a registry of figure/table labels from .md files
209
252
  * Scans for {#fig:label} and {#tbl:label} anchors
210
253
  *
254
+ * IMPORTANT: This function requires either explicit sections or a rev.yaml/sections.yaml config.
255
+ * It will NOT guess by scanning all .md files, as this leads to incorrect numbering
256
+ * when temporary files (paper_clean.md, etc.) exist in the directory.
257
+ *
211
258
  * @param {string} directory - Directory containing .md files
212
- * @param {string[]} [excludeFiles] - Files to exclude
259
+ * @param {string[]} [sections] - Array of section filenames to scan (recommended).
260
+ * If not provided, reads from rev.yaml or sections.yaml.
261
+ * Returns empty registry if no sections can be determined.
213
262
  * @returns {{
214
263
  * figures: Map<string, {label: string, num: number, isSupp: boolean, file: string}>,
215
264
  * tables: Map<string, {label: string, num: number, isSupp: boolean, file: string}>,
@@ -217,7 +266,7 @@ export function parseReferenceList(listStr) {
217
266
  * byNumber: {fig: Map<string, string>, tbl: Map<string, string>, eq: Map<string, string>}
218
267
  * }}
219
268
  */
220
- export function buildRegistry(directory, excludeFiles = ['paper.md', 'README.md', 'CLAUDE.md']) {
269
+ export function buildRegistry(directory, sections) {
221
270
  const figures = new Map();
222
271
  const tables = new Map();
223
272
  const equations = new Map();
@@ -229,32 +278,16 @@ export function buildRegistry(directory, excludeFiles = ['paper.md', 'README.md'
229
278
  let tblSuppNum = 0;
230
279
  let eqNum = 0;
231
280
 
232
- // Get all .md files
233
- const files = fs.readdirSync(directory).filter((f) => {
234
- if (!f.endsWith('.md')) return false;
235
- if (excludeFiles.some((e) => f.toLowerCase() === e.toLowerCase())) return false;
236
- return true;
237
- });
281
+ let orderedFiles;
238
282
 
239
- // Sort by likely document order (use sections.yaml if available)
240
- let orderedFiles = files;
241
- const sectionsPath = path.join(directory, 'sections.yaml');
242
- if (fs.existsSync(sectionsPath)) {
243
- try {
244
- const config = YAML.parse(fs.readFileSync(sectionsPath, 'utf-8'));
245
- if (config.sections) {
246
- const sectionOrder = Object.entries(config.sections)
247
- .sort((a, b) => (a[1].order ?? 999) - (b[1].order ?? 999))
248
- .map(([file]) => file);
249
- orderedFiles = sectionOrder.filter((f) => files.includes(f));
250
- // Add any remaining files not in sections.yaml
251
- for (const f of files) {
252
- if (!orderedFiles.includes(f)) orderedFiles.push(f);
253
- }
254
- }
255
- } catch {
256
- // Ignore yaml errors, use default order
257
- }
283
+ if (Array.isArray(sections) && sections.length > 0) {
284
+ // Use explicitly provided section files - most reliable
285
+ orderedFiles = sections.filter(f => fs.existsSync(path.join(directory, f)));
286
+ } else {
287
+ // Try to determine sections from config files (rev.yaml or sections.yaml)
288
+ orderedFiles = discoverSectionFiles(directory);
289
+ // If no config found, return empty registry rather than guessing
290
+ // This prevents bugs from scanning wrong files
258
291
  }
259
292
 
260
293
  // Determine if a file is supplementary
@@ -489,6 +522,88 @@ export function getRefStatus(text, registry) {
489
522
  };
490
523
  }
491
524
 
525
+ /**
526
+ * Detect forward references in combined text
527
+ * A forward reference is a @ref that appears before its {#anchor} definition
528
+ *
529
+ * @param {string} text - Combined document text
530
+ * @returns {{
531
+ * forwardRefs: Array<{type: string, label: string, match: string, position: number}>,
532
+ * anchorPositions: Map<string, number>
533
+ * }}
534
+ */
535
+ export function detectForwardRefs(text) {
536
+ // Build map of anchor positions: "fig:label" -> position
537
+ const anchorPositions = new Map();
538
+ ANCHOR_PATTERN.lastIndex = 0;
539
+ let match;
540
+ while ((match = ANCHOR_PATTERN.exec(text)) !== null) {
541
+ const key = `${match[1]}:${match[2]}`;
542
+ // Only store first occurrence (in case of duplicates)
543
+ if (!anchorPositions.has(key)) {
544
+ anchorPositions.set(key, match.index);
545
+ }
546
+ }
547
+
548
+ // Find all references
549
+ const refs = detectDynamicRefs(text);
550
+
551
+ // Filter to only forward references
552
+ const forwardRefs = refs.filter((ref) => {
553
+ const key = `${ref.type}:${ref.label}`;
554
+ const anchorPos = anchorPositions.get(key);
555
+ // Forward ref if anchor doesn't exist or appears after the reference
556
+ return anchorPos === undefined || ref.position < anchorPos;
557
+ });
558
+
559
+ return { forwardRefs, anchorPositions };
560
+ }
561
+
562
+ /**
563
+ * Resolve forward references to display format
564
+ * Only resolves refs that appear before their anchor definition
565
+ * Leaves other refs for pandoc-crossref to handle (preserves clickable links)
566
+ *
567
+ * @param {string} text - Combined document text
568
+ * @param {object} registry - Registry from buildRegistry()
569
+ * @returns {{
570
+ * text: string,
571
+ * resolved: Array<{from: string, to: string, position: number}>,
572
+ * unresolved: Array<{ref: string, position: number}>
573
+ * }}
574
+ */
575
+ export function resolveForwardRefs(text, registry) {
576
+ const { forwardRefs } = detectForwardRefs(text);
577
+ const resolved = [];
578
+ const unresolved = [];
579
+
580
+ // Process in reverse order to preserve positions
581
+ let result = text;
582
+ for (let i = forwardRefs.length - 1; i >= 0; i--) {
583
+ const ref = forwardRefs[i];
584
+ const display = labelToDisplay(ref.type, ref.label, registry);
585
+
586
+ if (display) {
587
+ result =
588
+ result.slice(0, ref.position) +
589
+ display +
590
+ result.slice(ref.position + ref.match.length);
591
+ resolved.push({
592
+ from: ref.match,
593
+ to: display,
594
+ position: ref.position,
595
+ });
596
+ } else {
597
+ unresolved.push({
598
+ ref: ref.match,
599
+ position: ref.position,
600
+ });
601
+ }
602
+ }
603
+
604
+ return { text: result, resolved, unresolved };
605
+ }
606
+
492
607
  /**
493
608
  * Format registry for display
494
609
  * @param {object} registry
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "docrev",
3
- "version": "0.7.7",
3
+ "version": "0.7.9",
4
4
  "description": "Academic paper revision workflow: Word ↔ Markdown round-trips, DOI validation, reviewer comments",
5
5
  "type": "module",
6
6
  "types": "types/index.d.ts",