docrev 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +2 -2
- package/README.md +24 -2
- package/bin/rev.js +222 -5
- package/lib/build.js +10 -2
- package/lib/crossref.js +138 -49
- package/lib/equations.js +235 -0
- package/package.json +5 -4
package/CLAUDE.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
# Claude Instructions for
|
|
1
|
+
# Claude Instructions for docrev
|
|
2
2
|
|
|
3
|
-
This is `rev
|
|
3
|
+
This is `docrev` (command: `rev`), a CLI tool for academic paper workflows with Word ↔ Markdown round-trips.
|
|
4
4
|
|
|
5
5
|
## Key Commands for Claude
|
|
6
6
|
|
package/README.md
CHANGED
|
@@ -41,9 +41,12 @@ rev install # Check for missing dependencies
|
|
|
41
41
|
- **Import from Word** - Diff Word docs against your Markdown, generating CriticMarkup annotations
|
|
42
42
|
- **Section-aware import** - Import directly to modular section files (intro.md, methods.md, etc.)
|
|
43
43
|
- **Interactive review** - Accept/reject track changes with a TUI
|
|
44
|
-
- **Comment management** - List and
|
|
44
|
+
- **Comment management** - List, filter, resolve, and reply to reviewer comments
|
|
45
|
+
- **Response letter generation** - Auto-generate point-by-point response from comments
|
|
45
46
|
- **DOI validation** - Check and find DOIs via Crossref/DataCite APIs
|
|
46
|
-
- **Cross-reference conversion** - Auto-convert
|
|
47
|
+
- **Cross-reference conversion** - Auto-convert "Figures 1-3" to `@fig:label` syntax (handles complex patterns)
|
|
48
|
+
- **Equation extraction** - Extract LaTeX equations from Word documents (OMML → LaTeX)
|
|
49
|
+
- **Citation validation** - Check citations against bibliography
|
|
47
50
|
|
|
48
51
|
## Quick Start
|
|
49
52
|
|
|
@@ -114,6 +117,7 @@ rev build docx
|
|
|
114
117
|
| Command | Description |
|
|
115
118
|
|---------|-------------|
|
|
116
119
|
| `rev build [formats...]` | Build PDF/DOCX/TEX from sections |
|
|
120
|
+
| `rev build --toc` | Build with table of contents |
|
|
117
121
|
| `rev new <name>` | Create new project from template |
|
|
118
122
|
| `rev new --list` | List available templates |
|
|
119
123
|
| `rev install` | Check/install dependencies (pandoc-crossref) |
|
|
@@ -134,6 +138,8 @@ rev build docx
|
|
|
134
138
|
| `rev review <file>` | Interactive accept/reject TUI for track changes |
|
|
135
139
|
| `rev status <file>` | Show annotation counts |
|
|
136
140
|
| `rev comments <file>` | List all comments with context |
|
|
141
|
+
| `rev comments <file> --export comments.csv` | Export comments to CSV |
|
|
142
|
+
| `rev resolve <file> -n 1` | Mark comment #1 as resolved |
|
|
137
143
|
| `rev strip <file>` | Output clean Markdown (annotations applied) |
|
|
138
144
|
|
|
139
145
|
### Cross-References
|
|
@@ -160,6 +166,17 @@ rev build docx
|
|
|
160
166
|
| `rev doi fetch <doi>` | Fetch BibTeX entry from DOI |
|
|
161
167
|
| `rev doi add <doi>` | Fetch and add DOI entry to bibliography |
|
|
162
168
|
|
|
169
|
+
### Validation & Analysis
|
|
170
|
+
|
|
171
|
+
| Command | Description |
|
|
172
|
+
|---------|-------------|
|
|
173
|
+
| `rev citations [file.bib]` | Validate citations against bibliography |
|
|
174
|
+
| `rev figures [file]` | List figures/tables with reference counts |
|
|
175
|
+
| `rev equations list` | List all equations in section files |
|
|
176
|
+
| `rev equations from-word <docx>` | Extract equations from Word to LaTeX |
|
|
177
|
+
| `rev response [files]` | Generate response letter from comments |
|
|
178
|
+
| `rev anonymize <file>` | Prepare document for blind review |
|
|
179
|
+
|
|
163
180
|
### Configuration
|
|
164
181
|
|
|
165
182
|
| Command | Description |
|
|
@@ -215,10 +232,12 @@ pdf:
|
|
|
215
232
|
fontsize: 12pt
|
|
216
233
|
geometry: margin=1in
|
|
217
234
|
linestretch: 1.5
|
|
235
|
+
toc: false # Table of contents
|
|
218
236
|
|
|
219
237
|
docx:
|
|
220
238
|
reference: template.docx # Optional reference doc
|
|
221
239
|
keepComments: true
|
|
240
|
+
toc: false # Table of contents
|
|
222
241
|
```
|
|
223
242
|
|
|
224
243
|
## Annotation Syntax (CriticMarkup)
|
|
@@ -266,6 +285,9 @@ When importing from Word, hardcoded refs are auto-converted:
|
|
|
266
285
|
- `Figure 1` → `@fig:heatmap`
|
|
267
286
|
- `Fig. 2a` → `@fig:model`
|
|
268
287
|
- `Figs. 1-3` → `@fig:heatmap; @fig:model; @fig:hierarchy`
|
|
288
|
+
- `Figures 1, 2, and 3` → `@fig:one; @fig:two; @fig:three`
|
|
289
|
+
- `Fig. 1a-c` → `@fig:one` (expands letter suffixes)
|
|
290
|
+
- `Figs. 1a-3b` → all panels from 1a to 3b
|
|
269
291
|
|
|
270
292
|
## Build Outputs
|
|
271
293
|
|
package/bin/rev.js
CHANGED
|
@@ -48,7 +48,7 @@ import * as fmt from '../lib/format.js';
|
|
|
48
48
|
import { inlineDiffPreview } from '../lib/format.js';
|
|
49
49
|
import { parseCommentsWithReplies, collectComments, generateResponseLetter, groupByReviewer } from '../lib/response.js';
|
|
50
50
|
import { validateCitations, getCitationStats } from '../lib/citations.js';
|
|
51
|
-
import { extractEquations, getEquationStats, createEquationsDoc } from '../lib/equations.js';
|
|
51
|
+
import { extractEquations, getEquationStats, createEquationsDoc, extractEquationsFromWord, getWordEquationStats } from '../lib/equations.js';
|
|
52
52
|
import { parseBibEntries, checkBibDois, fetchBibtex, addToBib, isValidDoiFormat, lookupDoi, lookupMissingDois } from '../lib/doi.js';
|
|
53
53
|
|
|
54
54
|
program
|
|
@@ -185,6 +185,7 @@ program
|
|
|
185
185
|
.argument('<file>', 'Markdown file')
|
|
186
186
|
.option('-p, --pending', 'Show only pending (unresolved) comments')
|
|
187
187
|
.option('-r, --resolved', 'Show only resolved comments')
|
|
188
|
+
.option('-e, --export <csvFile>', 'Export comments to CSV file')
|
|
188
189
|
.action((file, options) => {
|
|
189
190
|
if (!fs.existsSync(file)) {
|
|
190
191
|
console.error(chalk.red(`Error: File not found: ${file}`));
|
|
@@ -197,6 +198,34 @@ program
|
|
|
197
198
|
resolvedOnly: options.resolved,
|
|
198
199
|
});
|
|
199
200
|
|
|
201
|
+
// CSV export mode
|
|
202
|
+
if (options.export) {
|
|
203
|
+
const csvEscape = (str) => {
|
|
204
|
+
if (!str) return '';
|
|
205
|
+
str = String(str);
|
|
206
|
+
if (str.includes(',') || str.includes('"') || str.includes('\n')) {
|
|
207
|
+
return '"' + str.replace(/"/g, '""') + '"';
|
|
208
|
+
}
|
|
209
|
+
return str;
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
const header = ['number', 'author', 'comment', 'context', 'status', 'file', 'line'];
|
|
213
|
+
const rows = comments.map((c, i) => [
|
|
214
|
+
i + 1,
|
|
215
|
+
csvEscape(c.author || ''),
|
|
216
|
+
csvEscape(c.content),
|
|
217
|
+
csvEscape(c.before ? c.before.trim() : ''),
|
|
218
|
+
c.resolved ? 'resolved' : 'pending',
|
|
219
|
+
path.basename(file),
|
|
220
|
+
c.line,
|
|
221
|
+
].join(','));
|
|
222
|
+
|
|
223
|
+
const csv = [header.join(','), ...rows].join('\n');
|
|
224
|
+
fs.writeFileSync(options.export, csv, 'utf-8');
|
|
225
|
+
console.log(fmt.status('success', `Exported ${comments.length} comments to ${options.export}`));
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
228
|
+
|
|
200
229
|
if (comments.length === 0) {
|
|
201
230
|
if (options.pending) {
|
|
202
231
|
console.log(fmt.status('success', 'No pending comments'));
|
|
@@ -1351,6 +1380,7 @@ program
|
|
|
1351
1380
|
.argument('[formats...]', 'Output formats: pdf, docx, tex, all', ['pdf', 'docx'])
|
|
1352
1381
|
.option('-d, --dir <directory>', 'Project directory', '.')
|
|
1353
1382
|
.option('--no-crossref', 'Skip pandoc-crossref filter')
|
|
1383
|
+
.option('--toc', 'Include table of contents')
|
|
1354
1384
|
.action(async (formats, options) => {
|
|
1355
1385
|
const dir = path.resolve(options.dir);
|
|
1356
1386
|
|
|
@@ -1380,15 +1410,24 @@ program
|
|
|
1380
1410
|
|
|
1381
1411
|
// Show what we're building
|
|
1382
1412
|
const targetFormats = formats.length > 0 ? formats : ['pdf', 'docx'];
|
|
1413
|
+
const tocEnabled = options.toc || config.pdf?.toc || config.docx?.toc;
|
|
1383
1414
|
console.log(chalk.dim(` Formats: ${targetFormats.join(', ')}`));
|
|
1384
1415
|
console.log(chalk.dim(` Crossref: ${hasPandocCrossref() && options.crossref !== false ? 'enabled' : 'disabled'}`));
|
|
1416
|
+
if (tocEnabled) console.log(chalk.dim(` TOC: enabled`));
|
|
1385
1417
|
console.log('');
|
|
1386
1418
|
|
|
1419
|
+
// Override config with CLI options
|
|
1420
|
+
if (options.toc) {
|
|
1421
|
+
config.pdf.toc = true;
|
|
1422
|
+
config.docx.toc = true;
|
|
1423
|
+
}
|
|
1424
|
+
|
|
1387
1425
|
const spin = fmt.spinner('Building...').start();
|
|
1388
1426
|
|
|
1389
1427
|
try {
|
|
1390
1428
|
const { results, paperPath } = await build(dir, targetFormats, {
|
|
1391
1429
|
crossref: options.crossref,
|
|
1430
|
+
config, // Pass modified config
|
|
1392
1431
|
});
|
|
1393
1432
|
|
|
1394
1433
|
spin.stop();
|
|
@@ -1687,6 +1726,123 @@ program
|
|
|
1687
1726
|
console.log(fmt.status('success', `Created ${outputPath}`));
|
|
1688
1727
|
});
|
|
1689
1728
|
|
|
1729
|
+
// ============================================================================
|
|
1730
|
+
// ANONYMIZE command - Prepare document for blind review
|
|
1731
|
+
// ============================================================================
|
|
1732
|
+
|
|
1733
|
+
program
|
|
1734
|
+
.command('anonymize')
|
|
1735
|
+
.description('Prepare document for blind review')
|
|
1736
|
+
.argument('<input>', 'Input markdown file or directory')
|
|
1737
|
+
.option('-o, --output <file>', 'Output file (default: input-anonymous.md)')
|
|
1738
|
+
.option('--authors <names>', 'Author names to redact (comma-separated)')
|
|
1739
|
+
.option('--dry-run', 'Show what would be changed without writing')
|
|
1740
|
+
.action((input, options) => {
|
|
1741
|
+
const isDir = fs.existsSync(input) && fs.statSync(input).isDirectory();
|
|
1742
|
+
const files = isDir
|
|
1743
|
+
? fs.readdirSync(input)
|
|
1744
|
+
.filter(f => f.endsWith('.md') && !['README.md', 'CLAUDE.md'].includes(f))
|
|
1745
|
+
.map(f => path.join(input, f))
|
|
1746
|
+
: [input];
|
|
1747
|
+
|
|
1748
|
+
if (files.length === 0) {
|
|
1749
|
+
console.error(fmt.status('error', 'No markdown files found'));
|
|
1750
|
+
process.exit(1);
|
|
1751
|
+
}
|
|
1752
|
+
|
|
1753
|
+
// Get author names to redact
|
|
1754
|
+
let authorNames = [];
|
|
1755
|
+
if (options.authors) {
|
|
1756
|
+
authorNames = options.authors.split(',').map(n => n.trim());
|
|
1757
|
+
} else {
|
|
1758
|
+
// Try to load from rev.yaml
|
|
1759
|
+
const configPath = isDir ? path.join(input, 'rev.yaml') : 'rev.yaml';
|
|
1760
|
+
if (fs.existsSync(configPath)) {
|
|
1761
|
+
try {
|
|
1762
|
+
const config = yaml.load(fs.readFileSync(configPath, 'utf-8'));
|
|
1763
|
+
if (config.authors) {
|
|
1764
|
+
authorNames = config.authors.map(a => typeof a === 'string' ? a : a.name).filter(Boolean);
|
|
1765
|
+
}
|
|
1766
|
+
} catch { /* ignore */ }
|
|
1767
|
+
}
|
|
1768
|
+
}
|
|
1769
|
+
|
|
1770
|
+
console.log(fmt.header('Anonymizing Document'));
|
|
1771
|
+
console.log();
|
|
1772
|
+
|
|
1773
|
+
let totalChanges = 0;
|
|
1774
|
+
|
|
1775
|
+
for (const file of files) {
|
|
1776
|
+
if (!fs.existsSync(file)) {
|
|
1777
|
+
console.error(chalk.yellow(` Skipping: ${file} (not found)`));
|
|
1778
|
+
continue;
|
|
1779
|
+
}
|
|
1780
|
+
|
|
1781
|
+
let text = fs.readFileSync(file, 'utf-8');
|
|
1782
|
+
let changes = 0;
|
|
1783
|
+
|
|
1784
|
+
// Remove YAML frontmatter author block
|
|
1785
|
+
text = text.replace(/^---\n([\s\S]*?)\n---/, (match, fm) => {
|
|
1786
|
+
let modified = fm;
|
|
1787
|
+
// Remove author/authors field
|
|
1788
|
+
modified = modified.replace(/^author:.*(?:\n(?: |\t).*)*$/m, '');
|
|
1789
|
+
modified = modified.replace(/^authors:.*(?:\n(?: |\t|-\s+).*)*$/m, '');
|
|
1790
|
+
// Remove affiliation/email
|
|
1791
|
+
modified = modified.replace(/^affiliation:.*$/m, '');
|
|
1792
|
+
modified = modified.replace(/^email:.*$/m, '');
|
|
1793
|
+
if (modified !== fm) changes++;
|
|
1794
|
+
return '---\n' + modified.replace(/\n{3,}/g, '\n\n').trim() + '\n---';
|
|
1795
|
+
});
|
|
1796
|
+
|
|
1797
|
+
// Remove acknowledgments section
|
|
1798
|
+
const ackPatterns = [
|
|
1799
|
+
/^#+\s*Acknowledgments?[\s\S]*?(?=^#|\Z)/gmi,
|
|
1800
|
+
/^#+\s*Funding[\s\S]*?(?=^#|\Z)/gmi,
|
|
1801
|
+
];
|
|
1802
|
+
for (const pattern of ackPatterns) {
|
|
1803
|
+
const before = text;
|
|
1804
|
+
text = text.replace(pattern, '');
|
|
1805
|
+
if (text !== before) changes++;
|
|
1806
|
+
}
|
|
1807
|
+
|
|
1808
|
+
// Redact author names
|
|
1809
|
+
for (const name of authorNames) {
|
|
1810
|
+
const namePattern = new RegExp(`\\b${name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'gi');
|
|
1811
|
+
const before = text;
|
|
1812
|
+
text = text.replace(namePattern, '[AUTHOR]');
|
|
1813
|
+
if (text !== before) changes++;
|
|
1814
|
+
}
|
|
1815
|
+
|
|
1816
|
+
// Replace self-citations: @AuthorLastName2024 -> @AUTHOR2024
|
|
1817
|
+
for (const name of authorNames) {
|
|
1818
|
+
const lastName = name.split(/\s+/).pop();
|
|
1819
|
+
if (lastName && lastName.length > 2) {
|
|
1820
|
+
const citePat = new RegExp(`@${lastName}(\\d{4})`, 'gi');
|
|
1821
|
+
const before = text;
|
|
1822
|
+
text = text.replace(citePat, '@AUTHOR$1');
|
|
1823
|
+
if (text !== before) changes++;
|
|
1824
|
+
}
|
|
1825
|
+
}
|
|
1826
|
+
|
|
1827
|
+
totalChanges += changes;
|
|
1828
|
+
|
|
1829
|
+
if (options.dryRun) {
|
|
1830
|
+
console.log(chalk.dim(` ${path.basename(file)}: ${changes} change(s)`));
|
|
1831
|
+
} else {
|
|
1832
|
+
const outPath = options.output || file.replace(/\.md$/, '-anonymous.md');
|
|
1833
|
+
fs.writeFileSync(outPath, text, 'utf-8');
|
|
1834
|
+
console.log(fmt.status('success', `${path.basename(file)} → ${path.basename(outPath)} (${changes} changes)`));
|
|
1835
|
+
}
|
|
1836
|
+
}
|
|
1837
|
+
|
|
1838
|
+
console.log();
|
|
1839
|
+
if (options.dryRun) {
|
|
1840
|
+
console.log(chalk.dim(` Total: ${totalChanges} change(s) would be made`));
|
|
1841
|
+
} else {
|
|
1842
|
+
console.log(fmt.status('success', `Anonymized ${files.length} file(s)`));
|
|
1843
|
+
}
|
|
1844
|
+
});
|
|
1845
|
+
|
|
1690
1846
|
// ============================================================================
|
|
1691
1847
|
// CITATIONS command - Validate citations against .bib file
|
|
1692
1848
|
// ============================================================================
|
|
@@ -1862,11 +2018,72 @@ program
|
|
|
1862
2018
|
.command('equations')
|
|
1863
2019
|
.alias('eq')
|
|
1864
2020
|
.description('Extract equations or convert to Word')
|
|
1865
|
-
.argument('<action>', 'Action: list, extract, convert')
|
|
1866
|
-
.argument('[input]', 'Input file (for extract/convert)')
|
|
2021
|
+
.argument('<action>', 'Action: list, extract, convert, from-word')
|
|
2022
|
+
.argument('[input]', 'Input file (.md for extract/convert, .docx for from-word)')
|
|
1867
2023
|
.option('-o, --output <file>', 'Output file')
|
|
1868
2024
|
.action(async (action, input, options) => {
|
|
1869
|
-
if (action === '
|
|
2025
|
+
if (action === 'from-word') {
|
|
2026
|
+
// Extract equations from Word document
|
|
2027
|
+
if (!input) {
|
|
2028
|
+
console.error(fmt.status('error', 'Word document required'));
|
|
2029
|
+
process.exit(1);
|
|
2030
|
+
}
|
|
2031
|
+
|
|
2032
|
+
if (!input.endsWith('.docx')) {
|
|
2033
|
+
console.error(fmt.status('error', 'Input must be a .docx file'));
|
|
2034
|
+
process.exit(1);
|
|
2035
|
+
}
|
|
2036
|
+
|
|
2037
|
+
const spin = fmt.spinner(`Extracting equations from ${path.basename(input)}...`).start();
|
|
2038
|
+
|
|
2039
|
+
const result = await extractEquationsFromWord(input);
|
|
2040
|
+
|
|
2041
|
+
if (!result.success) {
|
|
2042
|
+
spin.error(result.error);
|
|
2043
|
+
process.exit(1);
|
|
2044
|
+
}
|
|
2045
|
+
|
|
2046
|
+
spin.stop();
|
|
2047
|
+
console.log(fmt.header('Equations from Word'));
|
|
2048
|
+
console.log();
|
|
2049
|
+
|
|
2050
|
+
if (result.equations.length === 0) {
|
|
2051
|
+
console.log(chalk.dim('No equations found in document.'));
|
|
2052
|
+
return;
|
|
2053
|
+
}
|
|
2054
|
+
|
|
2055
|
+
const display = result.equations.filter(e => e.type === 'display');
|
|
2056
|
+
const inline = result.equations.filter(e => e.type === 'inline');
|
|
2057
|
+
|
|
2058
|
+
console.log(chalk.dim(`Found ${result.equations.length} equations (${display.length} display, ${inline.length} inline)`));
|
|
2059
|
+
console.log();
|
|
2060
|
+
|
|
2061
|
+
// Show equations
|
|
2062
|
+
for (let i = 0; i < result.equations.length; i++) {
|
|
2063
|
+
const eq = result.equations[i];
|
|
2064
|
+
const typeLabel = eq.type === 'display' ? chalk.cyan('[display]') : chalk.yellow('[inline]');
|
|
2065
|
+
|
|
2066
|
+
if (eq.latex) {
|
|
2067
|
+
console.log(`${chalk.bold(i + 1)}. ${typeLabel}`);
|
|
2068
|
+
console.log(chalk.dim(' LaTeX:'), eq.latex.length > 80 ? eq.latex.substring(0, 77) + '...' : eq.latex);
|
|
2069
|
+
} else {
|
|
2070
|
+
console.log(`${chalk.bold(i + 1)}. ${typeLabel} ${chalk.red('[conversion failed]')}`);
|
|
2071
|
+
}
|
|
2072
|
+
}
|
|
2073
|
+
|
|
2074
|
+
// Optionally save to file
|
|
2075
|
+
if (options.output) {
|
|
2076
|
+
const latex = result.equations
|
|
2077
|
+
.filter(e => e.latex)
|
|
2078
|
+
.map((e, i) => `%% Equation ${i + 1} (${e.type})\n${e.type === 'display' ? '$$' : '$'}${e.latex}${e.type === 'display' ? '$$' : '$'}`)
|
|
2079
|
+
.join('\n\n');
|
|
2080
|
+
|
|
2081
|
+
fs.writeFileSync(options.output, latex, 'utf-8');
|
|
2082
|
+
console.log();
|
|
2083
|
+
console.log(fmt.status('success', `Saved ${result.equations.filter(e => e.latex).length} equations to ${options.output}`));
|
|
2084
|
+
}
|
|
2085
|
+
|
|
2086
|
+
} else if (action === 'list') {
|
|
1870
2087
|
// List equations in all section files
|
|
1871
2088
|
const mdFiles = fs.readdirSync('.').filter(f =>
|
|
1872
2089
|
f.endsWith('.md') && !['README.md', 'CLAUDE.md'].includes(f)
|
|
@@ -1935,7 +2152,7 @@ program
|
|
|
1935
2152
|
}
|
|
1936
2153
|
} else {
|
|
1937
2154
|
console.error(fmt.status('error', `Unknown action: ${action}`));
|
|
1938
|
-
console.log(chalk.dim('Actions: list, extract, convert'));
|
|
2155
|
+
console.log(chalk.dim('Actions: list, extract, convert, from-word'));
|
|
1939
2156
|
process.exit(1);
|
|
1940
2157
|
}
|
|
1941
2158
|
});
|
package/lib/build.js
CHANGED
|
@@ -39,10 +39,12 @@ export const DEFAULT_CONFIG = {
|
|
|
39
39
|
geometry: 'margin=1in',
|
|
40
40
|
linestretch: 1.5,
|
|
41
41
|
numbersections: false,
|
|
42
|
+
toc: false,
|
|
42
43
|
},
|
|
43
44
|
docx: {
|
|
44
45
|
reference: null,
|
|
45
46
|
keepComments: true,
|
|
47
|
+
toc: false,
|
|
46
48
|
},
|
|
47
49
|
tex: {
|
|
48
50
|
standalone: true,
|
|
@@ -317,10 +319,16 @@ export function buildPandocArgs(format, config, outputPath) {
|
|
|
317
319
|
if (config.pdf.numbersections) {
|
|
318
320
|
args.push('--number-sections');
|
|
319
321
|
}
|
|
322
|
+
if (config.pdf.toc) {
|
|
323
|
+
args.push('--toc');
|
|
324
|
+
}
|
|
320
325
|
} else if (format === 'docx') {
|
|
321
326
|
if (config.docx.reference) {
|
|
322
327
|
args.push('--reference-doc', config.docx.reference);
|
|
323
328
|
}
|
|
329
|
+
if (config.docx.toc) {
|
|
330
|
+
args.push('--toc');
|
|
331
|
+
}
|
|
324
332
|
}
|
|
325
333
|
|
|
326
334
|
return args;
|
|
@@ -434,8 +442,8 @@ export async function build(directory, formats = ['pdf', 'docx'], options = {})
|
|
|
434
442
|
throw new Error('pandoc not found. Run `rev install` to install dependencies.');
|
|
435
443
|
}
|
|
436
444
|
|
|
437
|
-
// Load config
|
|
438
|
-
const config = loadConfig(directory);
|
|
445
|
+
// Load config (use passed config if provided, otherwise load from file)
|
|
446
|
+
const config = options.config || loadConfig(directory);
|
|
439
447
|
|
|
440
448
|
// Combine sections → paper.md
|
|
441
449
|
const paperPath = combineSections(directory, config, options);
|
package/lib/crossref.js
CHANGED
|
@@ -12,20 +12,26 @@ import * as path from 'path';
|
|
|
12
12
|
|
|
13
13
|
/**
|
|
14
14
|
* Patterns for detecting hardcoded references
|
|
15
|
-
* Matches
|
|
16
|
-
*
|
|
15
|
+
* Matches complex patterns including:
|
|
16
|
+
* - Simple: "Figure 1", "Fig. 2a", "Table S1"
|
|
17
|
+
* - Ranges: "Figures 1-3", "Fig. 1a-c", "Figs. 1a-3b"
|
|
18
|
+
* - Lists: "Figures 1, 2, and 3", "Fig. 1a, b, c", "Tables 1 & 2"
|
|
19
|
+
* - Mixed: "Figs. 1, 3-5, and 7"
|
|
20
|
+
*
|
|
21
|
+
* Uses a simpler base pattern and parses the full match for lists
|
|
17
22
|
*/
|
|
18
23
|
const DETECTION_PATTERNS = {
|
|
19
|
-
//
|
|
20
|
-
//
|
|
21
|
-
|
|
24
|
+
// Captures the full reference including lists with "and"
|
|
25
|
+
// Group 1: type prefix (Figure, Fig., etc.)
|
|
26
|
+
// Group 2: reference list (parsed by parseReferenceList())
|
|
27
|
+
// Matches: "1", "1a", "1-3", "1a-c", "1, 2, 3", "1 and 2", "1, 2 and 3", "1, 2, and 3"
|
|
28
|
+
// Separator: comma/dash/ampersand, optionally followed by "and"
|
|
29
|
+
// Standalone letters must be followed by separator, punctuation, or word boundary
|
|
30
|
+
figure: /\b(Figures?|Figs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
|
|
22
31
|
|
|
23
|
-
|
|
24
|
-
// With optional letter suffix for sub-tables
|
|
25
|
-
table: /\b(Tables?|Tabs?\.?)\s*(\d+|S\d+)([a-z])?(?:\s*[-–—&,]\s*(\d+|S\d+)?([a-z])?)?\b/gi,
|
|
32
|
+
table: /\b(Tables?|Tabs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
|
|
26
33
|
|
|
27
|
-
|
|
28
|
-
equation: /\b(Equations?|Eqs?\.?)\s*(\d+)([a-z])?(?:\s*[-–—&,]\s*(\d+)?([a-z])?)?\b/gi,
|
|
34
|
+
equation: /\b(Equations?|Eqs?\.?)\s+((?:\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+)[a-z]?|[a-z]\b))*)/gi,
|
|
29
35
|
};
|
|
30
36
|
|
|
31
37
|
/**
|
|
@@ -68,6 +74,120 @@ export function parseRefNumber(numStr, suffix = null) {
|
|
|
68
74
|
return { isSupp, num, suffix: extractedSuffix ? extractedSuffix.toLowerCase() : null };
|
|
69
75
|
}
|
|
70
76
|
|
|
77
|
+
/**
|
|
78
|
+
* Parse a reference list string like "1, 2, and 3" or "1a-c" or "1a-3b"
|
|
79
|
+
* Returns an array of {num, isSupp, suffix} objects
|
|
80
|
+
*
|
|
81
|
+
* @param {string} listStr - e.g., "1, 2, and 3", "1a-c", "1a-3b", "1a, b, c"
|
|
82
|
+
* @returns {Array<{num: number, isSupp: boolean, suffix: string|null}>}
|
|
83
|
+
*/
|
|
84
|
+
export function parseReferenceList(listStr) {
|
|
85
|
+
const results = [];
|
|
86
|
+
if (!listStr) return results;
|
|
87
|
+
|
|
88
|
+
// Normalize: replace "and" with comma, normalize dashes
|
|
89
|
+
let normalized = listStr
|
|
90
|
+
.replace(/\s+and\s+/gi, ', ')
|
|
91
|
+
.replace(/[–—]/g, '-') // en-dash, em-dash → hyphen
|
|
92
|
+
.replace(/&/g, ', '); // & → comma
|
|
93
|
+
|
|
94
|
+
// Split by comma (but not by dash, which indicates ranges)
|
|
95
|
+
const parts = normalized.split(/\s*,\s*/).filter(p => p.trim());
|
|
96
|
+
|
|
97
|
+
let lastFullRef = null; // Track the last full reference for implicit prefixes
|
|
98
|
+
|
|
99
|
+
for (const part of parts) {
|
|
100
|
+
const trimmed = part.trim();
|
|
101
|
+
if (!trimmed) continue;
|
|
102
|
+
|
|
103
|
+
// Check if this is a range (contains -)
|
|
104
|
+
if (trimmed.includes('-')) {
|
|
105
|
+
const [start, end] = trimmed.split('-').map(s => s.trim());
|
|
106
|
+
|
|
107
|
+
// Check if end is just a letter (e.g., "1a-c" where end is "c")
|
|
108
|
+
const endIsLetterOnly = /^[a-z]$/i.test(end);
|
|
109
|
+
|
|
110
|
+
const startRef = parseRefNumber(start);
|
|
111
|
+
// For letter-only end, don't parse as number
|
|
112
|
+
const endRef = endIsLetterOnly
|
|
113
|
+
? { num: startRef.num, isSupp: startRef.isSupp, suffix: end.toLowerCase() }
|
|
114
|
+
: parseRefNumber(end);
|
|
115
|
+
|
|
116
|
+
// Handle different range types:
|
|
117
|
+
// 1. Suffix-only range on same number: "1a-c" → 1a, 1b, 1c
|
|
118
|
+
// 2. Number range: "1-3" → 1, 2, 3
|
|
119
|
+
// 3. Cross-number suffix range: "1a-3b" → 1a...1z, 2a...2z, 3a, 3b (limited)
|
|
120
|
+
|
|
121
|
+
if (startRef.suffix && endRef.suffix && startRef.num !== endRef.num) {
|
|
122
|
+
// Cross-number suffix range: "1a-3b"
|
|
123
|
+
// For academic papers, limit intermediate figures to same suffix range
|
|
124
|
+
// e.g., "1a-3b" typically means 1a, 1b, 2a, 2b, 3a, 3b
|
|
125
|
+
const maxSuffix = Math.max(
|
|
126
|
+
startRef.suffix.charCodeAt(0),
|
|
127
|
+
endRef.suffix.charCodeAt(0)
|
|
128
|
+
);
|
|
129
|
+
|
|
130
|
+
for (let n = startRef.num; n <= endRef.num; n++) {
|
|
131
|
+
const suffixStart = (n === startRef.num) ? startRef.suffix.charCodeAt(0) : 'a'.charCodeAt(0);
|
|
132
|
+
const suffixEnd = (n === endRef.num) ? endRef.suffix.charCodeAt(0) : maxSuffix;
|
|
133
|
+
|
|
134
|
+
for (let s = suffixStart; s <= suffixEnd; s++) {
|
|
135
|
+
results.push({
|
|
136
|
+
num: n,
|
|
137
|
+
isSupp: startRef.isSupp,
|
|
138
|
+
suffix: String.fromCharCode(s)
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
|
|
143
|
+
} else if (startRef.suffix || endRef.suffix) {
|
|
144
|
+
// Suffix range on same number: "1a-c"
|
|
145
|
+
const num = startRef.num || (lastFullRef ? lastFullRef.num : 1);
|
|
146
|
+
const isSupp = startRef.isSupp || (lastFullRef ? lastFullRef.isSupp : false);
|
|
147
|
+
const startCode = (startRef.suffix || 'a').charCodeAt(0);
|
|
148
|
+
const endCode = (endRef.suffix || 'a').charCodeAt(0);
|
|
149
|
+
|
|
150
|
+
for (let code = startCode; code <= endCode; code++) {
|
|
151
|
+
results.push({
|
|
152
|
+
num,
|
|
153
|
+
isSupp,
|
|
154
|
+
suffix: String.fromCharCode(code)
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
lastFullRef = { num, isSupp };
|
|
158
|
+
} else {
|
|
159
|
+
// Pure number range: "1-3"
|
|
160
|
+
for (let n = startRef.num; n <= endRef.num; n++) {
|
|
161
|
+
results.push({
|
|
162
|
+
num: n,
|
|
163
|
+
isSupp: startRef.isSupp,
|
|
164
|
+
suffix: null
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
lastFullRef = { num: endRef.num, isSupp: startRef.isSupp };
|
|
168
|
+
}
|
|
169
|
+
} else {
|
|
170
|
+
// Single reference or implicit suffix
|
|
171
|
+
// Check if it's just a letter (implicit prefix from previous number)
|
|
172
|
+
if (/^[a-z]$/i.test(trimmed) && lastFullRef) {
|
|
173
|
+
// Implicit prefix: "b" after "1a" means "1b"
|
|
174
|
+
results.push({
|
|
175
|
+
num: lastFullRef.num,
|
|
176
|
+
isSupp: lastFullRef.isSupp,
|
|
177
|
+
suffix: trimmed.toLowerCase()
|
|
178
|
+
});
|
|
179
|
+
} else {
|
|
180
|
+
// Full reference: "1", "1a", "S1", "S1a"
|
|
181
|
+
const ref = parseRefNumber(trimmed);
|
|
182
|
+
results.push(ref);
|
|
183
|
+
lastFullRef = { num: ref.num, isSupp: ref.isSupp };
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return results;
|
|
189
|
+
}
|
|
190
|
+
|
|
71
191
|
/**
|
|
72
192
|
* Build a registry of figure/table labels from .md files
|
|
73
193
|
* Scans for {#fig:label} and {#tbl:label} anchors
|
|
@@ -232,46 +352,15 @@ export function detectHardcodedRefs(text) {
|
|
|
232
352
|
let match;
|
|
233
353
|
|
|
234
354
|
while ((match = pattern.exec(text)) !== null) {
|
|
235
|
-
const numbers = [];
|
|
236
|
-
|
|
237
355
|
// Pattern groups:
|
|
238
|
-
// [1] = type (Figure, Fig., etc.)
|
|
239
|
-
// [2] =
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
//
|
|
245
|
-
|
|
246
|
-
numbers.push(first);
|
|
247
|
-
|
|
248
|
-
// Parse second number/suffix if present (range like 1a-c or 1-3)
|
|
249
|
-
if (match[4] || match[5]) {
|
|
250
|
-
const second = parseRefNumber(match[4] || match[2], match[5]);
|
|
251
|
-
|
|
252
|
-
// Handle suffix-only ranges (e.g., "1a-c" means 1a, 1b, 1c)
|
|
253
|
-
if (!match[4] && match[5] && first.suffix) {
|
|
254
|
-
// Expand letter range: a-c → a, b, c
|
|
255
|
-
const startCode = first.suffix.charCodeAt(0);
|
|
256
|
-
const endCode = match[5].charCodeAt(0);
|
|
257
|
-
for (let code = startCode + 1; code <= endCode; code++) {
|
|
258
|
-
numbers.push({
|
|
259
|
-
num: first.num,
|
|
260
|
-
isSupp: first.isSupp,
|
|
261
|
-
suffix: String.fromCharCode(code)
|
|
262
|
-
});
|
|
263
|
-
}
|
|
264
|
-
} else if (match[4]) {
|
|
265
|
-
// Expand number range
|
|
266
|
-
if (first.isSupp === second.isSupp && !first.suffix && !second.suffix) {
|
|
267
|
-
for (let n = first.num + 1; n <= second.num; n++) {
|
|
268
|
-
numbers.push({ num: n, isSupp: first.isSupp, suffix: null });
|
|
269
|
-
}
|
|
270
|
-
} else {
|
|
271
|
-
numbers.push(second);
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
}
|
|
356
|
+
// [1] = type prefix (Figure, Fig., etc.)
|
|
357
|
+
// [2] = reference list string (e.g., "1, 2, and 3" or "1a-3b")
|
|
358
|
+
|
|
359
|
+
const listStr = match[2];
|
|
360
|
+
const numbers = parseReferenceList(listStr);
|
|
361
|
+
|
|
362
|
+
// Skip if no valid numbers were parsed
|
|
363
|
+
if (numbers.length === 0) continue;
|
|
275
364
|
|
|
276
365
|
refs.push({
|
|
277
366
|
type: normalizeType(type),
|
package/lib/equations.js
CHANGED
|
@@ -1,14 +1,36 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Equation extraction and conversion utilities
|
|
3
3
|
* Handle LaTeX math in Markdown ↔ Word workflows
|
|
4
|
+
*
|
|
5
|
+
* Supports:
|
|
6
|
+
* - Extract LaTeX equations from Markdown
|
|
7
|
+
* - Extract equations from Word documents (OMML → LaTeX via Pandoc)
|
|
8
|
+
* - Convert Markdown with equations to Word (LaTeX → MathML)
|
|
4
9
|
*/
|
|
5
10
|
|
|
6
11
|
import * as fs from 'fs';
|
|
7
12
|
import * as path from 'path';
|
|
8
13
|
import { exec } from 'child_process';
|
|
9
14
|
import { promisify } from 'util';
|
|
15
|
+
import AdmZip from 'adm-zip';
|
|
16
|
+
import { parseString } from 'xml2js';
|
|
10
17
|
|
|
11
18
|
const execAsync = promisify(exec);
|
|
19
|
+
const parseXml = promisify(parseString);
|
|
20
|
+
|
|
21
|
+
// Dynamic import for mathml-to-latex (ESM)
|
|
22
|
+
let MathMLToLaTeX = null;
|
|
23
|
+
async function getMathMLConverter() {
|
|
24
|
+
if (!MathMLToLaTeX) {
|
|
25
|
+
try {
|
|
26
|
+
const module = await import('mathml-to-latex');
|
|
27
|
+
MathMLToLaTeX = module.MathMLToLaTeX;
|
|
28
|
+
} catch {
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return MathMLToLaTeX;
|
|
33
|
+
}
|
|
12
34
|
|
|
13
35
|
/**
|
|
14
36
|
* Extract all equations from markdown text
|
|
@@ -256,3 +278,216 @@ export function getEquationStats(files) {
|
|
|
256
278
|
byFile,
|
|
257
279
|
};
|
|
258
280
|
}
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Extract equations from a Word document using Pandoc
|
|
284
|
+
* Converts OMML (Office Math Markup) to LaTeX
|
|
285
|
+
*
|
|
286
|
+
* @param {string} docxPath - Path to Word document
|
|
287
|
+
* @returns {Promise<{success: boolean, equations: Array<{type: string, latex: string, position: number}>, error?: string}>}
|
|
288
|
+
*/
|
|
289
|
+
export async function extractEquationsFromWord(docxPath) {
|
|
290
|
+
if (!fs.existsSync(docxPath)) {
|
|
291
|
+
return { success: false, equations: [], error: `File not found: ${docxPath}` };
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Method 1: Use Pandoc to convert docx to markdown with LaTeX math
|
|
295
|
+
try {
|
|
296
|
+
const { stdout } = await execAsync(
|
|
297
|
+
`pandoc "${docxPath}" -t markdown --wrap=none`,
|
|
298
|
+
{ maxBuffer: 50 * 1024 * 1024 }
|
|
299
|
+
);
|
|
300
|
+
|
|
301
|
+
// Extract equations from the markdown output
|
|
302
|
+
const equations = extractEquations(stdout, path.basename(docxPath));
|
|
303
|
+
|
|
304
|
+
return {
|
|
305
|
+
success: true,
|
|
306
|
+
equations: equations.map((eq, i) => ({
|
|
307
|
+
type: eq.type,
|
|
308
|
+
latex: eq.content,
|
|
309
|
+
position: i,
|
|
310
|
+
line: eq.line,
|
|
311
|
+
})),
|
|
312
|
+
};
|
|
313
|
+
} catch (err) {
|
|
314
|
+
// Pandoc failed, try fallback method
|
|
315
|
+
return extractEquationsFromWordDirect(docxPath);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Direct OMML extraction from Word document (fallback if Pandoc fails)
|
|
321
|
+
* Parses document.xml for <m:oMath> elements and attempts conversion
|
|
322
|
+
*
|
|
323
|
+
* @param {string} docxPath
|
|
324
|
+
* @returns {Promise<{success: boolean, equations: Array, error?: string}>}
|
|
325
|
+
*/
|
|
326
|
+
async function extractEquationsFromWordDirect(docxPath) {
|
|
327
|
+
try {
|
|
328
|
+
const zip = new AdmZip(docxPath);
|
|
329
|
+
const documentEntry = zip.getEntry('word/document.xml');
|
|
330
|
+
|
|
331
|
+
if (!documentEntry) {
|
|
332
|
+
return { success: false, equations: [], error: 'Invalid docx: no document.xml' };
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const documentXml = zip.readAsText(documentEntry);
|
|
336
|
+
|
|
337
|
+
// Find all OMML equations (<m:oMath> or <m:oMathPara>)
|
|
338
|
+
const ommlPattern = /<m:oMath[^>]*>[\s\S]*?<\/m:oMath>/gi;
|
|
339
|
+
const matches = documentXml.match(ommlPattern) || [];
|
|
340
|
+
|
|
341
|
+
if (matches.length === 0) {
|
|
342
|
+
return { success: true, equations: [], message: 'No equations found' };
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Try to convert OMML to LaTeX via MathML intermediate
|
|
346
|
+
const Converter = await getMathMLConverter();
|
|
347
|
+
const equations = [];
|
|
348
|
+
|
|
349
|
+
for (let i = 0; i < matches.length; i++) {
|
|
350
|
+
const omml = matches[i];
|
|
351
|
+
|
|
352
|
+
// Attempt OMML → MathML → LaTeX conversion
|
|
353
|
+
// Note: This is a simplified approach; full OMML→MathML requires XSLT
|
|
354
|
+
try {
|
|
355
|
+
const latex = await ommlToLatex(omml, Converter);
|
|
356
|
+
if (latex) {
|
|
357
|
+
equations.push({
|
|
358
|
+
type: isDisplayMath(omml) ? 'display' : 'inline',
|
|
359
|
+
latex,
|
|
360
|
+
position: i,
|
|
361
|
+
raw: omml.substring(0, 100) + '...',
|
|
362
|
+
});
|
|
363
|
+
}
|
|
364
|
+
} catch {
|
|
365
|
+
// Keep raw OMML reference if conversion fails
|
|
366
|
+
equations.push({
|
|
367
|
+
type: 'unknown',
|
|
368
|
+
latex: null,
|
|
369
|
+
position: i,
|
|
370
|
+
raw: omml.substring(0, 100) + '...',
|
|
371
|
+
error: 'Conversion failed',
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
return { success: true, equations };
|
|
377
|
+
} catch (err) {
|
|
378
|
+
return { success: false, equations: [], error: err.message };
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* Check if OMML represents display math (equation on its own line)
|
|
384
|
+
*/
|
|
385
|
+
function isDisplayMath(omml) {
|
|
386
|
+
return omml.includes('<m:oMathPara') || omml.includes('m:jc');
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
/**
|
|
390
|
+
* Convert OMML to LaTeX (simplified approach)
|
|
391
|
+
* For complex equations, Pandoc method is more reliable
|
|
392
|
+
*
|
|
393
|
+
* @param {string} omml - OMML XML string
|
|
394
|
+
* @param {Function} Converter - MathMLToLaTeX converter
|
|
395
|
+
* @returns {Promise<string|null>}
|
|
396
|
+
*/
|
|
397
|
+
async function ommlToLatex(omml, Converter) {
|
|
398
|
+
if (!Converter) return null;
|
|
399
|
+
|
|
400
|
+
// Extract key elements from OMML and build approximate MathML
|
|
401
|
+
// This is a simplified conversion - not all OMML features are supported
|
|
402
|
+
try {
|
|
403
|
+
// Build basic MathML from OMML structure
|
|
404
|
+
const mathml = ommlToMathML(omml);
|
|
405
|
+
if (!mathml) return null;
|
|
406
|
+
|
|
407
|
+
// Convert MathML to LaTeX
|
|
408
|
+
const latex = Converter.convert(mathml);
|
|
409
|
+
return latex;
|
|
410
|
+
} catch {
|
|
411
|
+
return null;
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
/**
|
|
416
|
+
* Convert OMML to MathML (simplified)
|
|
417
|
+
* Maps common OMML elements to MathML equivalents
|
|
418
|
+
*/
|
|
419
|
+
function ommlToMathML(omml) {
|
|
420
|
+
// Remove namespace prefixes for easier parsing
|
|
421
|
+
let xml = omml
|
|
422
|
+
.replace(/<m:/g, '<')
|
|
423
|
+
.replace(/<\/m:/g, '</')
|
|
424
|
+
.replace(/<w:/g, '<w_')
|
|
425
|
+
.replace(/<\/w:/g, '</w_');
|
|
426
|
+
|
|
427
|
+
// Map OMML elements to MathML
|
|
428
|
+
const mappings = [
|
|
429
|
+
[/<oMath[^>]*>/gi, '<math xmlns="http://www.w3.org/1998/Math/MathML">'],
|
|
430
|
+
[/<\/oMath>/gi, '</math>'],
|
|
431
|
+
[/<r>/gi, '<mi>'],
|
|
432
|
+
[/<\/r>/gi, '</mi>'],
|
|
433
|
+
[/<t>/gi, ''],
|
|
434
|
+
[/<\/t>/gi, ''],
|
|
435
|
+
[/<f>/gi, '<mfrac>'],
|
|
436
|
+
[/<\/f>/gi, '</mfrac>'],
|
|
437
|
+
[/<num>/gi, '<mrow>'],
|
|
438
|
+
[/<\/num>/gi, '</mrow>'],
|
|
439
|
+
[/<den>/gi, '<mrow>'],
|
|
440
|
+
[/<\/den>/gi, '</mrow>'],
|
|
441
|
+
[/<sup>/gi, '<msup><mrow>'],
|
|
442
|
+
[/<\/sup>/gi, '</mrow></msup>'],
|
|
443
|
+
[/<sub>/gi, '<msub><mrow>'],
|
|
444
|
+
[/<\/sub>/gi, '</mrow></msub>'],
|
|
445
|
+
[/<rad>/gi, '<msqrt>'],
|
|
446
|
+
[/<\/rad>/gi, '</msqrt>'],
|
|
447
|
+
[/<e>/gi, '<mrow>'],
|
|
448
|
+
[/<\/e>/gi, '</mrow>'],
|
|
449
|
+
// Remove elements we don't map
|
|
450
|
+
[/<rPr>[\s\S]*?<\/rPr>/gi, ''],
|
|
451
|
+
[/<ctrlPr>[\s\S]*?<\/ctrlPr>/gi, ''],
|
|
452
|
+
[/<w_[^>]*>[\s\S]*?<\/w_[^>]*>/gi, ''],
|
|
453
|
+
[/<[^>]*\/>/gi, ''], // Self-closing tags
|
|
454
|
+
];
|
|
455
|
+
|
|
456
|
+
for (const [pattern, replacement] of mappings) {
|
|
457
|
+
xml = xml.replace(pattern, replacement);
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// Clean up any remaining unrecognized tags
|
|
461
|
+
xml = xml.replace(/<[a-zA-Z][^>]*>/g, '').replace(/<\/[a-zA-Z]+>/g, '');
|
|
462
|
+
|
|
463
|
+
// Wrap in math if not already
|
|
464
|
+
if (!xml.includes('<math')) {
|
|
465
|
+
xml = `<math xmlns="http://www.w3.org/1998/Math/MathML">${xml}</math>`;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
return xml;
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
/**
|
|
472
|
+
* Get equation summary from Word document
|
|
473
|
+
* @param {string} docxPath
|
|
474
|
+
* @returns {Promise<{count: number, display: number, inline: number, converted: number}>}
|
|
475
|
+
*/
|
|
476
|
+
export async function getWordEquationStats(docxPath) {
|
|
477
|
+
const result = await extractEquationsFromWord(docxPath);
|
|
478
|
+
|
|
479
|
+
if (!result.success) {
|
|
480
|
+
return { count: 0, display: 0, inline: 0, converted: 0, error: result.error };
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
const display = result.equations.filter(e => e.type === 'display').length;
|
|
484
|
+
const inline = result.equations.filter(e => e.type === 'inline').length;
|
|
485
|
+
const converted = result.equations.filter(e => e.latex).length;
|
|
486
|
+
|
|
487
|
+
return {
|
|
488
|
+
count: result.equations.length,
|
|
489
|
+
display,
|
|
490
|
+
inline,
|
|
491
|
+
converted,
|
|
492
|
+
};
|
|
493
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "docrev",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.1",
|
|
4
4
|
"description": "Academic paper revision workflow: Word ↔ Markdown round-trips, DOI validation, reviewer comments",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -12,12 +12,12 @@
|
|
|
12
12
|
},
|
|
13
13
|
"repository": {
|
|
14
14
|
"type": "git",
|
|
15
|
-
"url": "git+https://github.com/gcol33/
|
|
15
|
+
"url": "git+https://github.com/gcol33/docrev.git"
|
|
16
16
|
},
|
|
17
17
|
"bugs": {
|
|
18
|
-
"url": "https://github.com/gcol33/
|
|
18
|
+
"url": "https://github.com/gcol33/docrev/issues"
|
|
19
19
|
},
|
|
20
|
-
"homepage": "https://github.com/gcol33/
|
|
20
|
+
"homepage": "https://github.com/gcol33/docrev#readme",
|
|
21
21
|
"keywords": [
|
|
22
22
|
"markdown",
|
|
23
23
|
"word",
|
|
@@ -38,6 +38,7 @@
|
|
|
38
38
|
"diff": "^8.0.2",
|
|
39
39
|
"js-yaml": "^4.1.1",
|
|
40
40
|
"mammoth": "^1.6.0",
|
|
41
|
+
"mathml-to-latex": "^1.5.0",
|
|
41
42
|
"xml2js": "^0.6.2"
|
|
42
43
|
}
|
|
43
44
|
}
|