docrev 0.9.11 → 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +9 -9
- package/.gitattributes +1 -1
- package/CHANGELOG.md +149 -149
- package/PLAN-tables-and-postprocess.md +850 -850
- package/README.md +391 -391
- package/bin/rev.js +11 -11
- package/bin/rev.ts +145 -145
- package/completions/rev.bash +127 -127
- package/completions/rev.ps1 +210 -210
- package/completions/rev.zsh +207 -207
- package/dev_notes/stress2/build_adversarial.ts +186 -186
- package/dev_notes/stress2/drift_matcher.ts +62 -62
- package/dev_notes/stress2/probe_anchors.ts +35 -35
- package/dev_notes/stress2/project/discussion.before.md +3 -3
- package/dev_notes/stress2/project/discussion.md +3 -3
- package/dev_notes/stress2/project/methods.before.md +20 -20
- package/dev_notes/stress2/project/methods.md +20 -20
- package/dev_notes/stress2/project/rev.yaml +5 -5
- package/dev_notes/stress2/project/sections.yaml +4 -4
- package/dev_notes/stress2/sections.yaml +5 -5
- package/dev_notes/stress2/trace_placement.ts +50 -50
- package/dev_notes/stresstest_boundaries.ts +27 -27
- package/dev_notes/stresstest_drift_apply.ts +43 -43
- package/dev_notes/stresstest_drift_compare.ts +43 -43
- package/dev_notes/stresstest_drift_v2.ts +54 -54
- package/dev_notes/stresstest_inspect.ts +54 -54
- package/dev_notes/stresstest_pstyle.ts +55 -55
- package/dev_notes/stresstest_section_debug.ts +23 -23
- package/dev_notes/stresstest_split.ts +70 -70
- package/dev_notes/stresstest_trace.ts +19 -19
- package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
- package/dist/lib/build.d.ts +50 -1
- package/dist/lib/build.d.ts.map +1 -1
- package/dist/lib/build.js +80 -30
- package/dist/lib/build.js.map +1 -1
- package/dist/lib/commands/build.d.ts.map +1 -1
- package/dist/lib/commands/build.js +38 -5
- package/dist/lib/commands/build.js.map +1 -1
- package/dist/lib/commands/utilities.js +164 -164
- package/dist/lib/commands/word-tools.js +8 -8
- package/dist/lib/grammar.js +3 -3
- package/dist/lib/import.d.ts.map +1 -1
- package/dist/lib/import.js +146 -24
- package/dist/lib/import.js.map +1 -1
- package/dist/lib/pdf-comments.js +44 -44
- package/dist/lib/plugins.js +57 -57
- package/dist/lib/pptx-themes.js +115 -115
- package/dist/lib/spelling.js +2 -2
- package/dist/lib/templates.js +387 -387
- package/dist/lib/themes.js +51 -51
- package/dist/lib/types.d.ts +20 -0
- package/dist/lib/types.d.ts.map +1 -1
- package/dist/lib/word-extraction.d.ts +6 -0
- package/dist/lib/word-extraction.d.ts.map +1 -1
- package/dist/lib/word-extraction.js +46 -3
- package/dist/lib/word-extraction.js.map +1 -1
- package/dist/lib/wordcomments.d.ts.map +1 -1
- package/dist/lib/wordcomments.js +23 -5
- package/dist/lib/wordcomments.js.map +1 -1
- package/eslint.config.js +27 -27
- package/lib/anchor-match.ts +276 -276
- package/lib/annotations.ts +644 -644
- package/lib/build.ts +1300 -1227
- package/lib/citations.ts +160 -160
- package/lib/commands/build.ts +833 -801
- package/lib/commands/citations.ts +515 -515
- package/lib/commands/comments.ts +1050 -1050
- package/lib/commands/context.ts +174 -174
- package/lib/commands/core.ts +309 -309
- package/lib/commands/doi.ts +435 -435
- package/lib/commands/file-ops.ts +372 -372
- package/lib/commands/history.ts +320 -320
- package/lib/commands/index.ts +87 -87
- package/lib/commands/init.ts +259 -259
- package/lib/commands/merge-resolve.ts +378 -378
- package/lib/commands/preview.ts +178 -178
- package/lib/commands/project-info.ts +244 -244
- package/lib/commands/quality.ts +517 -517
- package/lib/commands/response.ts +454 -454
- package/lib/commands/section-boundaries.ts +82 -82
- package/lib/commands/sections.ts +451 -451
- package/lib/commands/sync.ts +706 -706
- package/lib/commands/text-ops.ts +449 -449
- package/lib/commands/utilities.ts +448 -448
- package/lib/commands/verify-anchors.ts +272 -272
- package/lib/commands/word-tools.ts +340 -340
- package/lib/comment-realign.ts +517 -517
- package/lib/config.ts +84 -84
- package/lib/crossref.ts +781 -781
- package/lib/csl.ts +191 -191
- package/lib/dependencies.ts +98 -98
- package/lib/diff-engine.ts +465 -465
- package/lib/doi-cache.ts +115 -115
- package/lib/doi.ts +897 -897
- package/lib/equations.ts +506 -506
- package/lib/errors.ts +346 -346
- package/lib/format.ts +541 -541
- package/lib/git.ts +326 -326
- package/lib/grammar.ts +303 -303
- package/lib/image-registry.ts +180 -180
- package/lib/import.ts +911 -792
- package/lib/journals.ts +543 -543
- package/lib/merge.ts +633 -633
- package/lib/orcid.ts +144 -144
- package/lib/pdf-comments.ts +263 -263
- package/lib/pdf-import.ts +524 -524
- package/lib/plugins.ts +362 -362
- package/lib/postprocess.ts +188 -188
- package/lib/pptx-color-filter.lua +37 -37
- package/lib/pptx-template.ts +469 -469
- package/lib/pptx-themes.ts +483 -483
- package/lib/protect-restore.ts +520 -520
- package/lib/rate-limiter.ts +94 -94
- package/lib/response.ts +197 -197
- package/lib/restore-references.ts +240 -240
- package/lib/review.ts +327 -327
- package/lib/schema.ts +417 -417
- package/lib/scientific-words.ts +73 -73
- package/lib/sections.ts +335 -335
- package/lib/slides.ts +756 -756
- package/lib/spelling.ts +334 -334
- package/lib/templates.ts +526 -526
- package/lib/themes.ts +742 -742
- package/lib/trackchanges.ts +247 -247
- package/lib/tui.ts +450 -450
- package/lib/types.ts +550 -530
- package/lib/undo.ts +250 -250
- package/lib/utils.ts +69 -69
- package/lib/variables.ts +179 -179
- package/lib/word-extraction.ts +806 -759
- package/lib/word.ts +643 -643
- package/lib/wordcomments.ts +817 -798
- package/package.json +137 -137
- package/scripts/postbuild.js +28 -28
- package/skill/REFERENCE.md +431 -431
- package/skill/SKILL.md +258 -258
- package/tsconfig.json +26 -26
- package/types/index.d.ts +525 -525
package/lib/equations.ts
CHANGED
|
@@ -1,506 +1,506 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Equation extraction and conversion utilities
|
|
3
|
-
* Handle LaTeX math in Markdown ↔ Word workflows
|
|
4
|
-
*
|
|
5
|
-
* Supports:
|
|
6
|
-
* - Extract LaTeX equations from Markdown
|
|
7
|
-
* - Extract equations from Word documents (OMML → LaTeX via Pandoc)
|
|
8
|
-
* - Convert Markdown with equations to Word (LaTeX → MathML)
|
|
9
|
-
*/
|
|
10
|
-
|
|
11
|
-
import * as fs from 'fs';
|
|
12
|
-
import * as path from 'path';
|
|
13
|
-
import { exec } from 'child_process';
|
|
14
|
-
import { promisify } from 'util';
|
|
15
|
-
import AdmZip from 'adm-zip';
|
|
16
|
-
import { parseString } from 'xml2js';
|
|
17
|
-
import type { Equation, EquationStats, WordEquationResult } from './types.js';
|
|
18
|
-
|
|
19
|
-
const execAsync = promisify(exec);
|
|
20
|
-
const parseXml = promisify(parseString);
|
|
21
|
-
|
|
22
|
-
// Dynamic import for mathml-to-latex (ESM)
|
|
23
|
-
let MathMLToLaTeX: any = null;
|
|
24
|
-
async function getMathMLConverter(): Promise<any> {
|
|
25
|
-
if (!MathMLToLaTeX) {
|
|
26
|
-
try {
|
|
27
|
-
const module = await import('mathml-to-latex');
|
|
28
|
-
MathMLToLaTeX = module.MathMLToLaTeX;
|
|
29
|
-
} catch {
|
|
30
|
-
return null;
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
return MathMLToLaTeX;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
/**
|
|
37
|
-
* Extract all equations from markdown text
|
|
38
|
-
*/
|
|
39
|
-
export function extractEquations(text: string, file: string = ''): Equation[] {
|
|
40
|
-
const equations: Equation[] = [];
|
|
41
|
-
const lines = text.split('\n');
|
|
42
|
-
|
|
43
|
-
let inDisplayMath = false;
|
|
44
|
-
let displayMathStart = 0;
|
|
45
|
-
let displayMathContent = '';
|
|
46
|
-
|
|
47
|
-
for (let lineNum = 0; lineNum < lines.length; lineNum++) {
|
|
48
|
-
const line = lines[lineNum];
|
|
49
|
-
if (!line) continue;
|
|
50
|
-
|
|
51
|
-
// Skip code blocks
|
|
52
|
-
if (line.trim().startsWith('```')) continue;
|
|
53
|
-
|
|
54
|
-
// Handle inline math ($...$) in a segment of text
|
|
55
|
-
// Careful not to match $$ or escaped \$
|
|
56
|
-
const inlinePattern = /(?<![\$\\])\$(?!\$)([^$\n]+)\$(?!\$)/g;
|
|
57
|
-
const extractInline = (segment: string): void => {
|
|
58
|
-
let match;
|
|
59
|
-
inlinePattern.lastIndex = 0;
|
|
60
|
-
while ((match = inlinePattern.exec(segment)) !== null) {
|
|
61
|
-
const content = match[1];
|
|
62
|
-
if (content) {
|
|
63
|
-
equations.push({
|
|
64
|
-
type: 'inline',
|
|
65
|
-
content: content.trim(),
|
|
66
|
-
line: lineNum + 1,
|
|
67
|
-
file,
|
|
68
|
-
});
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
};
|
|
72
|
-
|
|
73
|
-
// Handle display math blocks ($$...$$)
|
|
74
|
-
if (line.includes('$$')) {
|
|
75
|
-
const parts = line.split('$$');
|
|
76
|
-
|
|
77
|
-
if (!inDisplayMath && parts.length >= 3) {
|
|
78
|
-
// Single-line display math: $$content$$
|
|
79
|
-
// Also extract inline math from surrounding text
|
|
80
|
-
if (parts[0]) extractInline(parts[0]); // Text before $$
|
|
81
|
-
for (let i = 1; i < parts.length; i += 2) {
|
|
82
|
-
const part = parts[i];
|
|
83
|
-
if (part && part.trim()) {
|
|
84
|
-
equations.push({
|
|
85
|
-
type: 'display',
|
|
86
|
-
content: part.trim(),
|
|
87
|
-
line: lineNum + 1,
|
|
88
|
-
file,
|
|
89
|
-
});
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
// Extract inline from text after the last $$
|
|
93
|
-
const lastPart = parts[parts.length - 1];
|
|
94
|
-
if (parts.length % 2 === 1 && lastPart) {
|
|
95
|
-
extractInline(lastPart);
|
|
96
|
-
}
|
|
97
|
-
} else if (!inDisplayMath) {
|
|
98
|
-
// Start of multi-line display math
|
|
99
|
-
if (parts[0]) extractInline(parts[0]); // Text before $$
|
|
100
|
-
inDisplayMath = true;
|
|
101
|
-
displayMathStart = lineNum + 1;
|
|
102
|
-
displayMathContent = parts[1] || '';
|
|
103
|
-
} else {
|
|
104
|
-
// End of multi-line display math
|
|
105
|
-
inDisplayMath = false;
|
|
106
|
-
displayMathContent += '\n' + (parts[0] || '');
|
|
107
|
-
if (displayMathContent.trim()) {
|
|
108
|
-
equations.push({
|
|
109
|
-
type: 'display',
|
|
110
|
-
content: displayMathContent.trim(),
|
|
111
|
-
line: displayMathStart,
|
|
112
|
-
file,
|
|
113
|
-
});
|
|
114
|
-
}
|
|
115
|
-
displayMathContent = '';
|
|
116
|
-
// Text after $$ on closing line
|
|
117
|
-
const afterPart = parts[1];
|
|
118
|
-
if (afterPart) {
|
|
119
|
-
extractInline(afterPart);
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
continue;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
if (inDisplayMath) {
|
|
126
|
-
displayMathContent += '\n' + line;
|
|
127
|
-
continue;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
// No display math on this line - extract inline math
|
|
131
|
-
extractInline(line);
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
return equations;
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
/**
|
|
138
|
-
* Generate a markdown document with numbered equations
|
|
139
|
-
* Useful for creating an equation reference sheet
|
|
140
|
-
*/
|
|
141
|
-
export function generateEquationSheet(equations: Equation[]): string {
|
|
142
|
-
const lines: string[] = [];
|
|
143
|
-
lines.push('# Equations');
|
|
144
|
-
lines.push('');
|
|
145
|
-
|
|
146
|
-
let displayNum = 0;
|
|
147
|
-
let inlineNum = 0;
|
|
148
|
-
|
|
149
|
-
// Group by file
|
|
150
|
-
const byFile = new Map<string, Equation[]>();
|
|
151
|
-
for (const eq of equations) {
|
|
152
|
-
if (!byFile.has(eq.file)) {
|
|
153
|
-
byFile.set(eq.file, []);
|
|
154
|
-
}
|
|
155
|
-
byFile.get(eq.file)!.push(eq);
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
for (const [file, fileEqs] of byFile) {
|
|
159
|
-
if (file) {
|
|
160
|
-
lines.push(`## ${file}`);
|
|
161
|
-
lines.push('');
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
for (const eq of fileEqs) {
|
|
165
|
-
if (eq.type === 'display') {
|
|
166
|
-
displayNum++;
|
|
167
|
-
lines.push(`### Equation ${displayNum} (line ${eq.line})`);
|
|
168
|
-
lines.push('');
|
|
169
|
-
lines.push('```latex');
|
|
170
|
-
lines.push(eq.content);
|
|
171
|
-
lines.push('```');
|
|
172
|
-
lines.push('');
|
|
173
|
-
lines.push('$$' + eq.content + '$$');
|
|
174
|
-
lines.push('');
|
|
175
|
-
} else {
|
|
176
|
-
inlineNum++;
|
|
177
|
-
lines.push(`- **Inline ${inlineNum}** (line ${eq.line}): \`$${eq.content}$\` → $${eq.content}$`);
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
lines.push('');
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
lines.push('---');
|
|
184
|
-
lines.push(`Total: ${displayNum} display equations, ${inlineNum} inline equations`);
|
|
185
|
-
|
|
186
|
-
return lines.join('\n');
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
interface ConvertToWordOptions {
|
|
190
|
-
preserveLatex?: boolean;
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
/**
|
|
194
|
-
* Convert markdown with equations to Word using pandoc
|
|
195
|
-
*/
|
|
196
|
-
export async function convertToWord(
|
|
197
|
-
inputPath: string,
|
|
198
|
-
outputPath: string,
|
|
199
|
-
options: ConvertToWordOptions = {}
|
|
200
|
-
): Promise<{ success: boolean; message: string }> {
|
|
201
|
-
const { preserveLatex = false } = options;
|
|
202
|
-
|
|
203
|
-
// Check pandoc is available
|
|
204
|
-
try {
|
|
205
|
-
await execAsync('pandoc --version');
|
|
206
|
-
} catch {
|
|
207
|
-
return { success: false, message: 'Pandoc not found. Install pandoc first.' };
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
// Build pandoc command
|
|
211
|
-
// Use --mathml for better equation rendering in Word
|
|
212
|
-
const args = [
|
|
213
|
-
'pandoc',
|
|
214
|
-
`"${inputPath}"`,
|
|
215
|
-
'-o', `"${outputPath}"`,
|
|
216
|
-
'--mathml', // Better equation support in Word
|
|
217
|
-
];
|
|
218
|
-
|
|
219
|
-
if (preserveLatex) {
|
|
220
|
-
// Keep raw LaTeX (less compatible but preserves source)
|
|
221
|
-
args.push('--wrap=preserve');
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
try {
|
|
225
|
-
await execAsync(args.join(' '));
|
|
226
|
-
return { success: true, message: `Created ${outputPath}` };
|
|
227
|
-
} catch (err: any) {
|
|
228
|
-
return { success: false, message: err.message };
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
/**
|
|
233
|
-
* Create a simple equations-only document
|
|
234
|
-
*/
|
|
235
|
-
export async function createEquationsDoc(
|
|
236
|
-
inputPath: string,
|
|
237
|
-
outputPath: string
|
|
238
|
-
): Promise<{ success: boolean; message: string; stats: { display: number; inline: number } | null }> {
|
|
239
|
-
if (!fs.existsSync(inputPath)) {
|
|
240
|
-
return { success: false, message: `File not found: ${inputPath}`, stats: null };
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
const text = fs.readFileSync(inputPath, 'utf-8');
|
|
244
|
-
const equations = extractEquations(text, path.basename(inputPath));
|
|
245
|
-
|
|
246
|
-
if (equations.length === 0) {
|
|
247
|
-
return { success: false, message: 'No equations found', stats: { display: 0, inline: 0 } };
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
const sheet = generateEquationSheet(equations);
|
|
251
|
-
const stats = {
|
|
252
|
-
display: equations.filter(e => e.type === 'display').length,
|
|
253
|
-
inline: equations.filter(e => e.type === 'inline').length,
|
|
254
|
-
};
|
|
255
|
-
|
|
256
|
-
const ext = path.extname(outputPath).toLowerCase();
|
|
257
|
-
|
|
258
|
-
if (ext === '.docx') {
|
|
259
|
-
// Write temp md, convert to docx
|
|
260
|
-
const tempMd = outputPath.replace('.docx', '.tmp.md');
|
|
261
|
-
fs.writeFileSync(tempMd, sheet, 'utf-8');
|
|
262
|
-
const result = await convertToWord(tempMd, outputPath);
|
|
263
|
-
fs.unlinkSync(tempMd);
|
|
264
|
-
return { ...result, stats };
|
|
265
|
-
} else {
|
|
266
|
-
// Write as markdown
|
|
267
|
-
fs.writeFileSync(outputPath, sheet, 'utf-8');
|
|
268
|
-
return { success: true, message: `Created ${outputPath}`, stats };
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
/**
|
|
273
|
-
* Get equation statistics for a file or directory
|
|
274
|
-
*/
|
|
275
|
-
export function getEquationStats(files: string[]): EquationStats {
|
|
276
|
-
let totalDisplay = 0;
|
|
277
|
-
let totalInline = 0;
|
|
278
|
-
const byFile: Array<{ file: string; display: number; inline: number }> = [];
|
|
279
|
-
|
|
280
|
-
for (const file of files) {
|
|
281
|
-
if (!fs.existsSync(file)) continue;
|
|
282
|
-
const text = fs.readFileSync(file, 'utf-8');
|
|
283
|
-
const equations = extractEquations(text, path.basename(file));
|
|
284
|
-
|
|
285
|
-
const display = equations.filter(e => e.type === 'display').length;
|
|
286
|
-
const inline = equations.filter(e => e.type === 'inline').length;
|
|
287
|
-
|
|
288
|
-
totalDisplay += display;
|
|
289
|
-
totalInline += inline;
|
|
290
|
-
|
|
291
|
-
if (display > 0 || inline > 0) {
|
|
292
|
-
byFile.push({ file: path.basename(file), display, inline });
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
return {
|
|
297
|
-
total: totalDisplay + totalInline,
|
|
298
|
-
display: totalDisplay,
|
|
299
|
-
inline: totalInline,
|
|
300
|
-
byFile,
|
|
301
|
-
};
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
/**
|
|
305
|
-
* Extract equations from a Word document using Pandoc
|
|
306
|
-
* Converts OMML (Office Math Markup) to LaTeX
|
|
307
|
-
*/
|
|
308
|
-
export async function extractEquationsFromWord(docxPath: string): Promise<WordEquationResult> {
|
|
309
|
-
if (!fs.existsSync(docxPath)) {
|
|
310
|
-
return { success: false, equations: [], error: `File not found: ${docxPath}` };
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
// Method 1: Use Pandoc to convert docx to markdown with LaTeX math
|
|
314
|
-
try {
|
|
315
|
-
const { stdout } = await execAsync(
|
|
316
|
-
`pandoc "${docxPath}" -t markdown --wrap=none`,
|
|
317
|
-
{ maxBuffer: 50 * 1024 * 1024 }
|
|
318
|
-
);
|
|
319
|
-
|
|
320
|
-
// Extract equations from the markdown output
|
|
321
|
-
const equations = extractEquations(stdout, path.basename(docxPath));
|
|
322
|
-
|
|
323
|
-
return {
|
|
324
|
-
success: true,
|
|
325
|
-
equations: equations.map((eq, i) => ({
|
|
326
|
-
type: eq.type,
|
|
327
|
-
latex: eq.content,
|
|
328
|
-
position: i,
|
|
329
|
-
line: eq.line,
|
|
330
|
-
})),
|
|
331
|
-
};
|
|
332
|
-
} catch (err) {
|
|
333
|
-
// Pandoc failed, try fallback method
|
|
334
|
-
return extractEquationsFromWordDirect(docxPath);
|
|
335
|
-
}
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
/**
|
|
339
|
-
* Direct OMML extraction from Word document (fallback if Pandoc fails)
|
|
340
|
-
* Parses document.xml for <m:oMath> elements and attempts conversion
|
|
341
|
-
*/
|
|
342
|
-
async function extractEquationsFromWordDirect(docxPath: string): Promise<WordEquationResult> {
|
|
343
|
-
try {
|
|
344
|
-
const zip = new AdmZip(docxPath);
|
|
345
|
-
const documentEntry = zip.getEntry('word/document.xml');
|
|
346
|
-
|
|
347
|
-
if (!documentEntry) {
|
|
348
|
-
return { success: false, equations: [], error: 'Invalid docx: no document.xml' };
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
const documentXml = zip.readAsText(documentEntry);
|
|
352
|
-
|
|
353
|
-
// Find all OMML equations (<m:oMath> or <m:oMathPara>)
|
|
354
|
-
const ommlPattern = /<m:oMath[^>]*>[\s\S]*?<\/m:oMath>/gi;
|
|
355
|
-
const matches = documentXml.match(ommlPattern) || [];
|
|
356
|
-
|
|
357
|
-
if (matches.length === 0) {
|
|
358
|
-
return { success: true, equations: [] };
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
// Try to convert OMML to LaTeX via MathML intermediate
|
|
362
|
-
const Converter = await getMathMLConverter();
|
|
363
|
-
const equations: WordEquationResult['equations'] = [];
|
|
364
|
-
|
|
365
|
-
for (let i = 0; i < matches.length; i++) {
|
|
366
|
-
const omml = matches[i];
|
|
367
|
-
if (!omml) continue;
|
|
368
|
-
|
|
369
|
-
// Attempt OMML → MathML → LaTeX conversion
|
|
370
|
-
// Note: This is a simplified approach; full OMML→MathML requires XSLT
|
|
371
|
-
try {
|
|
372
|
-
const latex = await ommlToLatex(omml, Converter);
|
|
373
|
-
if (latex) {
|
|
374
|
-
equations.push({
|
|
375
|
-
type: isDisplayMath(omml) ? 'display' : 'inline',
|
|
376
|
-
latex,
|
|
377
|
-
position: i,
|
|
378
|
-
raw: omml.substring(0, 100) + '...',
|
|
379
|
-
});
|
|
380
|
-
}
|
|
381
|
-
} catch {
|
|
382
|
-
// Keep raw OMML reference if conversion fails
|
|
383
|
-
equations.push({
|
|
384
|
-
type: 'unknown',
|
|
385
|
-
latex: null,
|
|
386
|
-
position: i,
|
|
387
|
-
raw: omml.substring(0, 100) + '...',
|
|
388
|
-
error: 'Conversion failed',
|
|
389
|
-
});
|
|
390
|
-
}
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
return { success: true, equations };
|
|
394
|
-
} catch (err: any) {
|
|
395
|
-
return { success: false, equations: [], error: err.message };
|
|
396
|
-
}
|
|
397
|
-
}
|
|
398
|
-
|
|
399
|
-
/**
|
|
400
|
-
* Check if OMML represents display math (equation on its own line)
|
|
401
|
-
*/
|
|
402
|
-
function isDisplayMath(omml: string): boolean {
|
|
403
|
-
return omml.includes('<m:oMathPara') || omml.includes('m:jc');
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
/**
|
|
407
|
-
* Convert OMML to LaTeX (simplified approach)
|
|
408
|
-
* For complex equations, Pandoc method is more reliable
|
|
409
|
-
*/
|
|
410
|
-
async function ommlToLatex(omml: string, Converter: any): Promise<string | null> {
|
|
411
|
-
if (!Converter) return null;
|
|
412
|
-
|
|
413
|
-
// Extract key elements from OMML and build approximate MathML
|
|
414
|
-
// This is a simplified conversion - not all OMML features are supported
|
|
415
|
-
try {
|
|
416
|
-
// Build basic MathML from OMML structure
|
|
417
|
-
const mathml = ommlToMathML(omml);
|
|
418
|
-
if (!mathml) return null;
|
|
419
|
-
|
|
420
|
-
// Convert MathML to LaTeX
|
|
421
|
-
const latex = Converter.convert(mathml);
|
|
422
|
-
return latex;
|
|
423
|
-
} catch {
|
|
424
|
-
return null;
|
|
425
|
-
}
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
/**
|
|
429
|
-
* Convert OMML to MathML (simplified)
|
|
430
|
-
* Maps common OMML elements to MathML equivalents
|
|
431
|
-
*/
|
|
432
|
-
function ommlToMathML(omml: string): string | null {
|
|
433
|
-
// Remove namespace prefixes for easier parsing
|
|
434
|
-
let xml = omml
|
|
435
|
-
.replace(/<m:/g, '<')
|
|
436
|
-
.replace(/<\/m:/g, '</')
|
|
437
|
-
.replace(/<w:/g, '<w_')
|
|
438
|
-
.replace(/<\/w:/g, '</w_');
|
|
439
|
-
|
|
440
|
-
// Map OMML elements to MathML
|
|
441
|
-
const mappings: Array<[RegExp, string]> = [
|
|
442
|
-
[/<oMath[^>]*>/gi, '<math xmlns="http://www.w3.org/1998/Math/MathML">'],
|
|
443
|
-
[/<\/oMath>/gi, '</math>'],
|
|
444
|
-
[/<r>/gi, '<mi>'],
|
|
445
|
-
[/<\/r>/gi, '</mi>'],
|
|
446
|
-
[/<t>/gi, ''],
|
|
447
|
-
[/<\/t>/gi, ''],
|
|
448
|
-
[/<f>/gi, '<mfrac>'],
|
|
449
|
-
[/<\/f>/gi, '</mfrac>'],
|
|
450
|
-
[/<num>/gi, '<mrow>'],
|
|
451
|
-
[/<\/num>/gi, '</mrow>'],
|
|
452
|
-
[/<den>/gi, '<mrow>'],
|
|
453
|
-
[/<\/den>/gi, '</mrow>'],
|
|
454
|
-
[/<sup>/gi, '<msup><mrow>'],
|
|
455
|
-
[/<\/sup>/gi, '</mrow></msup>'],
|
|
456
|
-
[/<sub>/gi, '<msub><mrow>'],
|
|
457
|
-
[/<\/sub>/gi, '</mrow></msub>'],
|
|
458
|
-
[/<rad>/gi, '<msqrt>'],
|
|
459
|
-
[/<\/rad>/gi, '</msqrt>'],
|
|
460
|
-
[/<e>/gi, '<mrow>'],
|
|
461
|
-
[/<\/e>/gi, '</mrow>'],
|
|
462
|
-
// Remove elements we don't map
|
|
463
|
-
[/<rPr>[\s\S]*?<\/rPr>/gi, ''],
|
|
464
|
-
[/<ctrlPr>[\s\S]*?<\/ctrlPr>/gi, ''],
|
|
465
|
-
[/<w_[^>]*>[\s\S]*?<\/w_[^>]*>/gi, ''],
|
|
466
|
-
[/<[^>]*\/>/gi, ''], // Self-closing tags
|
|
467
|
-
];
|
|
468
|
-
|
|
469
|
-
for (const [pattern, replacement] of mappings) {
|
|
470
|
-
xml = xml.replace(pattern, replacement);
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
// Clean up any remaining unrecognized tags
|
|
474
|
-
xml = xml.replace(/<[a-zA-Z][^>]*>/g, '').replace(/<\/[a-zA-Z]+>/g, '');
|
|
475
|
-
|
|
476
|
-
// Wrap in math if not already
|
|
477
|
-
if (!xml.includes('<math')) {
|
|
478
|
-
xml = `<math xmlns="http://www.w3.org/1998/Math/MathML">${xml}</math>`;
|
|
479
|
-
}
|
|
480
|
-
|
|
481
|
-
return xml;
|
|
482
|
-
}
|
|
483
|
-
|
|
484
|
-
/**
|
|
485
|
-
* Get equation summary from Word document
|
|
486
|
-
*/
|
|
487
|
-
export async function getWordEquationStats(
|
|
488
|
-
docxPath: string
|
|
489
|
-
): Promise<{ count: number; display: number; inline: number; converted: number; error?: string }> {
|
|
490
|
-
const result = await extractEquationsFromWord(docxPath);
|
|
491
|
-
|
|
492
|
-
if (!result.success) {
|
|
493
|
-
return { count: 0, display: 0, inline: 0, converted: 0, error: result.error };
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
const display = result.equations.filter(e => e.type === 'display').length;
|
|
497
|
-
const inline = result.equations.filter(e => e.type === 'inline').length;
|
|
498
|
-
const converted = result.equations.filter(e => e.latex).length;
|
|
499
|
-
|
|
500
|
-
return {
|
|
501
|
-
count: result.equations.length,
|
|
502
|
-
display,
|
|
503
|
-
inline,
|
|
504
|
-
converted,
|
|
505
|
-
};
|
|
506
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Equation extraction and conversion utilities
|
|
3
|
+
* Handle LaTeX math in Markdown ↔ Word workflows
|
|
4
|
+
*
|
|
5
|
+
* Supports:
|
|
6
|
+
* - Extract LaTeX equations from Markdown
|
|
7
|
+
* - Extract equations from Word documents (OMML → LaTeX via Pandoc)
|
|
8
|
+
* - Convert Markdown with equations to Word (LaTeX → MathML)
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import * as fs from 'fs';
|
|
12
|
+
import * as path from 'path';
|
|
13
|
+
import { exec } from 'child_process';
|
|
14
|
+
import { promisify } from 'util';
|
|
15
|
+
import AdmZip from 'adm-zip';
|
|
16
|
+
import { parseString } from 'xml2js';
|
|
17
|
+
import type { Equation, EquationStats, WordEquationResult } from './types.js';
|
|
18
|
+
|
|
19
|
+
const execAsync = promisify(exec);
|
|
20
|
+
const parseXml = promisify(parseString);
|
|
21
|
+
|
|
22
|
+
// Dynamic import for mathml-to-latex (ESM)
|
|
23
|
+
let MathMLToLaTeX: any = null;
|
|
24
|
+
async function getMathMLConverter(): Promise<any> {
|
|
25
|
+
if (!MathMLToLaTeX) {
|
|
26
|
+
try {
|
|
27
|
+
const module = await import('mathml-to-latex');
|
|
28
|
+
MathMLToLaTeX = module.MathMLToLaTeX;
|
|
29
|
+
} catch {
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return MathMLToLaTeX;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Extract all equations from markdown text
|
|
38
|
+
*/
|
|
39
|
+
export function extractEquations(text: string, file: string = ''): Equation[] {
|
|
40
|
+
const equations: Equation[] = [];
|
|
41
|
+
const lines = text.split('\n');
|
|
42
|
+
|
|
43
|
+
let inDisplayMath = false;
|
|
44
|
+
let displayMathStart = 0;
|
|
45
|
+
let displayMathContent = '';
|
|
46
|
+
|
|
47
|
+
for (let lineNum = 0; lineNum < lines.length; lineNum++) {
|
|
48
|
+
const line = lines[lineNum];
|
|
49
|
+
if (!line) continue;
|
|
50
|
+
|
|
51
|
+
// Skip code blocks
|
|
52
|
+
if (line.trim().startsWith('```')) continue;
|
|
53
|
+
|
|
54
|
+
// Handle inline math ($...$) in a segment of text
|
|
55
|
+
// Careful not to match $$ or escaped \$
|
|
56
|
+
const inlinePattern = /(?<![\$\\])\$(?!\$)([^$\n]+)\$(?!\$)/g;
|
|
57
|
+
const extractInline = (segment: string): void => {
|
|
58
|
+
let match;
|
|
59
|
+
inlinePattern.lastIndex = 0;
|
|
60
|
+
while ((match = inlinePattern.exec(segment)) !== null) {
|
|
61
|
+
const content = match[1];
|
|
62
|
+
if (content) {
|
|
63
|
+
equations.push({
|
|
64
|
+
type: 'inline',
|
|
65
|
+
content: content.trim(),
|
|
66
|
+
line: lineNum + 1,
|
|
67
|
+
file,
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
// Handle display math blocks ($$...$$)
|
|
74
|
+
if (line.includes('$$')) {
|
|
75
|
+
const parts = line.split('$$');
|
|
76
|
+
|
|
77
|
+
if (!inDisplayMath && parts.length >= 3) {
|
|
78
|
+
// Single-line display math: $$content$$
|
|
79
|
+
// Also extract inline math from surrounding text
|
|
80
|
+
if (parts[0]) extractInline(parts[0]); // Text before $$
|
|
81
|
+
for (let i = 1; i < parts.length; i += 2) {
|
|
82
|
+
const part = parts[i];
|
|
83
|
+
if (part && part.trim()) {
|
|
84
|
+
equations.push({
|
|
85
|
+
type: 'display',
|
|
86
|
+
content: part.trim(),
|
|
87
|
+
line: lineNum + 1,
|
|
88
|
+
file,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
// Extract inline from text after the last $$
|
|
93
|
+
const lastPart = parts[parts.length - 1];
|
|
94
|
+
if (parts.length % 2 === 1 && lastPart) {
|
|
95
|
+
extractInline(lastPart);
|
|
96
|
+
}
|
|
97
|
+
} else if (!inDisplayMath) {
|
|
98
|
+
// Start of multi-line display math
|
|
99
|
+
if (parts[0]) extractInline(parts[0]); // Text before $$
|
|
100
|
+
inDisplayMath = true;
|
|
101
|
+
displayMathStart = lineNum + 1;
|
|
102
|
+
displayMathContent = parts[1] || '';
|
|
103
|
+
} else {
|
|
104
|
+
// End of multi-line display math
|
|
105
|
+
inDisplayMath = false;
|
|
106
|
+
displayMathContent += '\n' + (parts[0] || '');
|
|
107
|
+
if (displayMathContent.trim()) {
|
|
108
|
+
equations.push({
|
|
109
|
+
type: 'display',
|
|
110
|
+
content: displayMathContent.trim(),
|
|
111
|
+
line: displayMathStart,
|
|
112
|
+
file,
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
displayMathContent = '';
|
|
116
|
+
// Text after $$ on closing line
|
|
117
|
+
const afterPart = parts[1];
|
|
118
|
+
if (afterPart) {
|
|
119
|
+
extractInline(afterPart);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if (inDisplayMath) {
|
|
126
|
+
displayMathContent += '\n' + line;
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// No display math on this line - extract inline math
|
|
131
|
+
extractInline(line);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return equations;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Generate a markdown document with numbered equations
|
|
139
|
+
* Useful for creating an equation reference sheet
|
|
140
|
+
*/
|
|
141
|
+
export function generateEquationSheet(equations: Equation[]): string {
|
|
142
|
+
const lines: string[] = [];
|
|
143
|
+
lines.push('# Equations');
|
|
144
|
+
lines.push('');
|
|
145
|
+
|
|
146
|
+
let displayNum = 0;
|
|
147
|
+
let inlineNum = 0;
|
|
148
|
+
|
|
149
|
+
// Group by file
|
|
150
|
+
const byFile = new Map<string, Equation[]>();
|
|
151
|
+
for (const eq of equations) {
|
|
152
|
+
if (!byFile.has(eq.file)) {
|
|
153
|
+
byFile.set(eq.file, []);
|
|
154
|
+
}
|
|
155
|
+
byFile.get(eq.file)!.push(eq);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
for (const [file, fileEqs] of byFile) {
|
|
159
|
+
if (file) {
|
|
160
|
+
lines.push(`## ${file}`);
|
|
161
|
+
lines.push('');
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
for (const eq of fileEqs) {
|
|
165
|
+
if (eq.type === 'display') {
|
|
166
|
+
displayNum++;
|
|
167
|
+
lines.push(`### Equation ${displayNum} (line ${eq.line})`);
|
|
168
|
+
lines.push('');
|
|
169
|
+
lines.push('```latex');
|
|
170
|
+
lines.push(eq.content);
|
|
171
|
+
lines.push('```');
|
|
172
|
+
lines.push('');
|
|
173
|
+
lines.push('$$' + eq.content + '$$');
|
|
174
|
+
lines.push('');
|
|
175
|
+
} else {
|
|
176
|
+
inlineNum++;
|
|
177
|
+
lines.push(`- **Inline ${inlineNum}** (line ${eq.line}): \`$${eq.content}$\` → $${eq.content}$`);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
lines.push('');
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
lines.push('---');
|
|
184
|
+
lines.push(`Total: ${displayNum} display equations, ${inlineNum} inline equations`);
|
|
185
|
+
|
|
186
|
+
return lines.join('\n');
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
interface ConvertToWordOptions {
|
|
190
|
+
preserveLatex?: boolean;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Convert markdown with equations to Word using pandoc
|
|
195
|
+
*/
|
|
196
|
+
export async function convertToWord(
|
|
197
|
+
inputPath: string,
|
|
198
|
+
outputPath: string,
|
|
199
|
+
options: ConvertToWordOptions = {}
|
|
200
|
+
): Promise<{ success: boolean; message: string }> {
|
|
201
|
+
const { preserveLatex = false } = options;
|
|
202
|
+
|
|
203
|
+
// Check pandoc is available
|
|
204
|
+
try {
|
|
205
|
+
await execAsync('pandoc --version');
|
|
206
|
+
} catch {
|
|
207
|
+
return { success: false, message: 'Pandoc not found. Install pandoc first.' };
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Build pandoc command
|
|
211
|
+
// Use --mathml for better equation rendering in Word
|
|
212
|
+
const args = [
|
|
213
|
+
'pandoc',
|
|
214
|
+
`"${inputPath}"`,
|
|
215
|
+
'-o', `"${outputPath}"`,
|
|
216
|
+
'--mathml', // Better equation support in Word
|
|
217
|
+
];
|
|
218
|
+
|
|
219
|
+
if (preserveLatex) {
|
|
220
|
+
// Keep raw LaTeX (less compatible but preserves source)
|
|
221
|
+
args.push('--wrap=preserve');
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
try {
|
|
225
|
+
await execAsync(args.join(' '));
|
|
226
|
+
return { success: true, message: `Created ${outputPath}` };
|
|
227
|
+
} catch (err: any) {
|
|
228
|
+
return { success: false, message: err.message };
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Create a simple equations-only document
|
|
234
|
+
*/
|
|
235
|
+
export async function createEquationsDoc(
|
|
236
|
+
inputPath: string,
|
|
237
|
+
outputPath: string
|
|
238
|
+
): Promise<{ success: boolean; message: string; stats: { display: number; inline: number } | null }> {
|
|
239
|
+
if (!fs.existsSync(inputPath)) {
|
|
240
|
+
return { success: false, message: `File not found: ${inputPath}`, stats: null };
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const text = fs.readFileSync(inputPath, 'utf-8');
|
|
244
|
+
const equations = extractEquations(text, path.basename(inputPath));
|
|
245
|
+
|
|
246
|
+
if (equations.length === 0) {
|
|
247
|
+
return { success: false, message: 'No equations found', stats: { display: 0, inline: 0 } };
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const sheet = generateEquationSheet(equations);
|
|
251
|
+
const stats = {
|
|
252
|
+
display: equations.filter(e => e.type === 'display').length,
|
|
253
|
+
inline: equations.filter(e => e.type === 'inline').length,
|
|
254
|
+
};
|
|
255
|
+
|
|
256
|
+
const ext = path.extname(outputPath).toLowerCase();
|
|
257
|
+
|
|
258
|
+
if (ext === '.docx') {
|
|
259
|
+
// Write temp md, convert to docx
|
|
260
|
+
const tempMd = outputPath.replace('.docx', '.tmp.md');
|
|
261
|
+
fs.writeFileSync(tempMd, sheet, 'utf-8');
|
|
262
|
+
const result = await convertToWord(tempMd, outputPath);
|
|
263
|
+
fs.unlinkSync(tempMd);
|
|
264
|
+
return { ...result, stats };
|
|
265
|
+
} else {
|
|
266
|
+
// Write as markdown
|
|
267
|
+
fs.writeFileSync(outputPath, sheet, 'utf-8');
|
|
268
|
+
return { success: true, message: `Created ${outputPath}`, stats };
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Get equation statistics for a file or directory
|
|
274
|
+
*/
|
|
275
|
+
export function getEquationStats(files: string[]): EquationStats {
|
|
276
|
+
let totalDisplay = 0;
|
|
277
|
+
let totalInline = 0;
|
|
278
|
+
const byFile: Array<{ file: string; display: number; inline: number }> = [];
|
|
279
|
+
|
|
280
|
+
for (const file of files) {
|
|
281
|
+
if (!fs.existsSync(file)) continue;
|
|
282
|
+
const text = fs.readFileSync(file, 'utf-8');
|
|
283
|
+
const equations = extractEquations(text, path.basename(file));
|
|
284
|
+
|
|
285
|
+
const display = equations.filter(e => e.type === 'display').length;
|
|
286
|
+
const inline = equations.filter(e => e.type === 'inline').length;
|
|
287
|
+
|
|
288
|
+
totalDisplay += display;
|
|
289
|
+
totalInline += inline;
|
|
290
|
+
|
|
291
|
+
if (display > 0 || inline > 0) {
|
|
292
|
+
byFile.push({ file: path.basename(file), display, inline });
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
return {
|
|
297
|
+
total: totalDisplay + totalInline,
|
|
298
|
+
display: totalDisplay,
|
|
299
|
+
inline: totalInline,
|
|
300
|
+
byFile,
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Extract equations from a Word document using Pandoc
|
|
306
|
+
* Converts OMML (Office Math Markup) to LaTeX
|
|
307
|
+
*/
|
|
308
|
+
export async function extractEquationsFromWord(docxPath: string): Promise<WordEquationResult> {
|
|
309
|
+
if (!fs.existsSync(docxPath)) {
|
|
310
|
+
return { success: false, equations: [], error: `File not found: ${docxPath}` };
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// Method 1: Use Pandoc to convert docx to markdown with LaTeX math
|
|
314
|
+
try {
|
|
315
|
+
const { stdout } = await execAsync(
|
|
316
|
+
`pandoc "${docxPath}" -t markdown --wrap=none`,
|
|
317
|
+
{ maxBuffer: 50 * 1024 * 1024 }
|
|
318
|
+
);
|
|
319
|
+
|
|
320
|
+
// Extract equations from the markdown output
|
|
321
|
+
const equations = extractEquations(stdout, path.basename(docxPath));
|
|
322
|
+
|
|
323
|
+
return {
|
|
324
|
+
success: true,
|
|
325
|
+
equations: equations.map((eq, i) => ({
|
|
326
|
+
type: eq.type,
|
|
327
|
+
latex: eq.content,
|
|
328
|
+
position: i,
|
|
329
|
+
line: eq.line,
|
|
330
|
+
})),
|
|
331
|
+
};
|
|
332
|
+
} catch (err) {
|
|
333
|
+
// Pandoc failed, try fallback method
|
|
334
|
+
return extractEquationsFromWordDirect(docxPath);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
/**
|
|
339
|
+
* Direct OMML extraction from Word document (fallback if Pandoc fails)
|
|
340
|
+
* Parses document.xml for <m:oMath> elements and attempts conversion
|
|
341
|
+
*/
|
|
342
|
+
async function extractEquationsFromWordDirect(docxPath: string): Promise<WordEquationResult> {
|
|
343
|
+
try {
|
|
344
|
+
const zip = new AdmZip(docxPath);
|
|
345
|
+
const documentEntry = zip.getEntry('word/document.xml');
|
|
346
|
+
|
|
347
|
+
if (!documentEntry) {
|
|
348
|
+
return { success: false, equations: [], error: 'Invalid docx: no document.xml' };
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
const documentXml = zip.readAsText(documentEntry);
|
|
352
|
+
|
|
353
|
+
// Find all OMML equations (<m:oMath> or <m:oMathPara>)
|
|
354
|
+
const ommlPattern = /<m:oMath[^>]*>[\s\S]*?<\/m:oMath>/gi;
|
|
355
|
+
const matches = documentXml.match(ommlPattern) || [];
|
|
356
|
+
|
|
357
|
+
if (matches.length === 0) {
|
|
358
|
+
return { success: true, equations: [] };
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// Try to convert OMML to LaTeX via MathML intermediate
|
|
362
|
+
const Converter = await getMathMLConverter();
|
|
363
|
+
const equations: WordEquationResult['equations'] = [];
|
|
364
|
+
|
|
365
|
+
for (let i = 0; i < matches.length; i++) {
|
|
366
|
+
const omml = matches[i];
|
|
367
|
+
if (!omml) continue;
|
|
368
|
+
|
|
369
|
+
// Attempt OMML → MathML → LaTeX conversion
|
|
370
|
+
// Note: This is a simplified approach; full OMML→MathML requires XSLT
|
|
371
|
+
try {
|
|
372
|
+
const latex = await ommlToLatex(omml, Converter);
|
|
373
|
+
if (latex) {
|
|
374
|
+
equations.push({
|
|
375
|
+
type: isDisplayMath(omml) ? 'display' : 'inline',
|
|
376
|
+
latex,
|
|
377
|
+
position: i,
|
|
378
|
+
raw: omml.substring(0, 100) + '...',
|
|
379
|
+
});
|
|
380
|
+
}
|
|
381
|
+
} catch {
|
|
382
|
+
// Keep raw OMML reference if conversion fails
|
|
383
|
+
equations.push({
|
|
384
|
+
type: 'unknown',
|
|
385
|
+
latex: null,
|
|
386
|
+
position: i,
|
|
387
|
+
raw: omml.substring(0, 100) + '...',
|
|
388
|
+
error: 'Conversion failed',
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
return { success: true, equations };
|
|
394
|
+
} catch (err: any) {
|
|
395
|
+
return { success: false, equations: [], error: err.message };
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* Check if OMML represents display math (equation on its own line)
|
|
401
|
+
*/
|
|
402
|
+
function isDisplayMath(omml: string): boolean {
|
|
403
|
+
return omml.includes('<m:oMathPara') || omml.includes('m:jc');
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
/**
|
|
407
|
+
* Convert OMML to LaTeX (simplified approach)
|
|
408
|
+
* For complex equations, Pandoc method is more reliable
|
|
409
|
+
*/
|
|
410
|
+
async function ommlToLatex(omml: string, Converter: any): Promise<string | null> {
|
|
411
|
+
if (!Converter) return null;
|
|
412
|
+
|
|
413
|
+
// Extract key elements from OMML and build approximate MathML
|
|
414
|
+
// This is a simplified conversion - not all OMML features are supported
|
|
415
|
+
try {
|
|
416
|
+
// Build basic MathML from OMML structure
|
|
417
|
+
const mathml = ommlToMathML(omml);
|
|
418
|
+
if (!mathml) return null;
|
|
419
|
+
|
|
420
|
+
// Convert MathML to LaTeX
|
|
421
|
+
const latex = Converter.convert(mathml);
|
|
422
|
+
return latex;
|
|
423
|
+
} catch {
|
|
424
|
+
return null;
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
/**
|
|
429
|
+
* Convert OMML to MathML (simplified)
|
|
430
|
+
* Maps common OMML elements to MathML equivalents
|
|
431
|
+
*/
|
|
432
|
+
function ommlToMathML(omml: string): string | null {
|
|
433
|
+
// Remove namespace prefixes for easier parsing
|
|
434
|
+
let xml = omml
|
|
435
|
+
.replace(/<m:/g, '<')
|
|
436
|
+
.replace(/<\/m:/g, '</')
|
|
437
|
+
.replace(/<w:/g, '<w_')
|
|
438
|
+
.replace(/<\/w:/g, '</w_');
|
|
439
|
+
|
|
440
|
+
// Map OMML elements to MathML
|
|
441
|
+
const mappings: Array<[RegExp, string]> = [
|
|
442
|
+
[/<oMath[^>]*>/gi, '<math xmlns="http://www.w3.org/1998/Math/MathML">'],
|
|
443
|
+
[/<\/oMath>/gi, '</math>'],
|
|
444
|
+
[/<r>/gi, '<mi>'],
|
|
445
|
+
[/<\/r>/gi, '</mi>'],
|
|
446
|
+
[/<t>/gi, ''],
|
|
447
|
+
[/<\/t>/gi, ''],
|
|
448
|
+
[/<f>/gi, '<mfrac>'],
|
|
449
|
+
[/<\/f>/gi, '</mfrac>'],
|
|
450
|
+
[/<num>/gi, '<mrow>'],
|
|
451
|
+
[/<\/num>/gi, '</mrow>'],
|
|
452
|
+
[/<den>/gi, '<mrow>'],
|
|
453
|
+
[/<\/den>/gi, '</mrow>'],
|
|
454
|
+
[/<sup>/gi, '<msup><mrow>'],
|
|
455
|
+
[/<\/sup>/gi, '</mrow></msup>'],
|
|
456
|
+
[/<sub>/gi, '<msub><mrow>'],
|
|
457
|
+
[/<\/sub>/gi, '</mrow></msub>'],
|
|
458
|
+
[/<rad>/gi, '<msqrt>'],
|
|
459
|
+
[/<\/rad>/gi, '</msqrt>'],
|
|
460
|
+
[/<e>/gi, '<mrow>'],
|
|
461
|
+
[/<\/e>/gi, '</mrow>'],
|
|
462
|
+
// Remove elements we don't map
|
|
463
|
+
[/<rPr>[\s\S]*?<\/rPr>/gi, ''],
|
|
464
|
+
[/<ctrlPr>[\s\S]*?<\/ctrlPr>/gi, ''],
|
|
465
|
+
[/<w_[^>]*>[\s\S]*?<\/w_[^>]*>/gi, ''],
|
|
466
|
+
[/<[^>]*\/>/gi, ''], // Self-closing tags
|
|
467
|
+
];
|
|
468
|
+
|
|
469
|
+
for (const [pattern, replacement] of mappings) {
|
|
470
|
+
xml = xml.replace(pattern, replacement);
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// Clean up any remaining unrecognized tags
|
|
474
|
+
xml = xml.replace(/<[a-zA-Z][^>]*>/g, '').replace(/<\/[a-zA-Z]+>/g, '');
|
|
475
|
+
|
|
476
|
+
// Wrap in math if not already
|
|
477
|
+
if (!xml.includes('<math')) {
|
|
478
|
+
xml = `<math xmlns="http://www.w3.org/1998/Math/MathML">${xml}</math>`;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
return xml;
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
/**
|
|
485
|
+
* Get equation summary from Word document
|
|
486
|
+
*/
|
|
487
|
+
export async function getWordEquationStats(
|
|
488
|
+
docxPath: string
|
|
489
|
+
): Promise<{ count: number; display: number; inline: number; converted: number; error?: string }> {
|
|
490
|
+
const result = await extractEquationsFromWord(docxPath);
|
|
491
|
+
|
|
492
|
+
if (!result.success) {
|
|
493
|
+
return { count: 0, display: 0, inline: 0, converted: 0, error: result.error };
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
const display = result.equations.filter(e => e.type === 'display').length;
|
|
497
|
+
const inline = result.equations.filter(e => e.type === 'inline').length;
|
|
498
|
+
const converted = result.equations.filter(e => e.latex).length;
|
|
499
|
+
|
|
500
|
+
return {
|
|
501
|
+
count: result.equations.length,
|
|
502
|
+
display,
|
|
503
|
+
inline,
|
|
504
|
+
converted,
|
|
505
|
+
};
|
|
506
|
+
}
|