docrev 0.9.11 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +391 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +50 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +80 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/import.d.ts.map +1 -1
  43. package/dist/lib/import.js +146 -24
  44. package/dist/lib/import.js.map +1 -1
  45. package/dist/lib/pdf-comments.js +44 -44
  46. package/dist/lib/plugins.js +57 -57
  47. package/dist/lib/pptx-themes.js +115 -115
  48. package/dist/lib/spelling.js +2 -2
  49. package/dist/lib/templates.js +387 -387
  50. package/dist/lib/themes.js +51 -51
  51. package/dist/lib/types.d.ts +20 -0
  52. package/dist/lib/types.d.ts.map +1 -1
  53. package/dist/lib/word-extraction.d.ts +6 -0
  54. package/dist/lib/word-extraction.d.ts.map +1 -1
  55. package/dist/lib/word-extraction.js +46 -3
  56. package/dist/lib/word-extraction.js.map +1 -1
  57. package/dist/lib/wordcomments.d.ts.map +1 -1
  58. package/dist/lib/wordcomments.js +23 -5
  59. package/dist/lib/wordcomments.js.map +1 -1
  60. package/eslint.config.js +27 -27
  61. package/lib/anchor-match.ts +276 -276
  62. package/lib/annotations.ts +644 -644
  63. package/lib/build.ts +1300 -1227
  64. package/lib/citations.ts +160 -160
  65. package/lib/commands/build.ts +833 -801
  66. package/lib/commands/citations.ts +515 -515
  67. package/lib/commands/comments.ts +1050 -1050
  68. package/lib/commands/context.ts +174 -174
  69. package/lib/commands/core.ts +309 -309
  70. package/lib/commands/doi.ts +435 -435
  71. package/lib/commands/file-ops.ts +372 -372
  72. package/lib/commands/history.ts +320 -320
  73. package/lib/commands/index.ts +87 -87
  74. package/lib/commands/init.ts +259 -259
  75. package/lib/commands/merge-resolve.ts +378 -378
  76. package/lib/commands/preview.ts +178 -178
  77. package/lib/commands/project-info.ts +244 -244
  78. package/lib/commands/quality.ts +517 -517
  79. package/lib/commands/response.ts +454 -454
  80. package/lib/commands/section-boundaries.ts +82 -82
  81. package/lib/commands/sections.ts +451 -451
  82. package/lib/commands/sync.ts +706 -706
  83. package/lib/commands/text-ops.ts +449 -449
  84. package/lib/commands/utilities.ts +448 -448
  85. package/lib/commands/verify-anchors.ts +272 -272
  86. package/lib/commands/word-tools.ts +340 -340
  87. package/lib/comment-realign.ts +517 -517
  88. package/lib/config.ts +84 -84
  89. package/lib/crossref.ts +781 -781
  90. package/lib/csl.ts +191 -191
  91. package/lib/dependencies.ts +98 -98
  92. package/lib/diff-engine.ts +465 -465
  93. package/lib/doi-cache.ts +115 -115
  94. package/lib/doi.ts +897 -897
  95. package/lib/equations.ts +506 -506
  96. package/lib/errors.ts +346 -346
  97. package/lib/format.ts +541 -541
  98. package/lib/git.ts +326 -326
  99. package/lib/grammar.ts +303 -303
  100. package/lib/image-registry.ts +180 -180
  101. package/lib/import.ts +911 -792
  102. package/lib/journals.ts +543 -543
  103. package/lib/merge.ts +633 -633
  104. package/lib/orcid.ts +144 -144
  105. package/lib/pdf-comments.ts +263 -263
  106. package/lib/pdf-import.ts +524 -524
  107. package/lib/plugins.ts +362 -362
  108. package/lib/postprocess.ts +188 -188
  109. package/lib/pptx-color-filter.lua +37 -37
  110. package/lib/pptx-template.ts +469 -469
  111. package/lib/pptx-themes.ts +483 -483
  112. package/lib/protect-restore.ts +520 -520
  113. package/lib/rate-limiter.ts +94 -94
  114. package/lib/response.ts +197 -197
  115. package/lib/restore-references.ts +240 -240
  116. package/lib/review.ts +327 -327
  117. package/lib/schema.ts +417 -417
  118. package/lib/scientific-words.ts +73 -73
  119. package/lib/sections.ts +335 -335
  120. package/lib/slides.ts +756 -756
  121. package/lib/spelling.ts +334 -334
  122. package/lib/templates.ts +526 -526
  123. package/lib/themes.ts +742 -742
  124. package/lib/trackchanges.ts +247 -247
  125. package/lib/tui.ts +450 -450
  126. package/lib/types.ts +550 -530
  127. package/lib/undo.ts +250 -250
  128. package/lib/utils.ts +69 -69
  129. package/lib/variables.ts +179 -179
  130. package/lib/word-extraction.ts +806 -759
  131. package/lib/word.ts +643 -643
  132. package/lib/wordcomments.ts +817 -798
  133. package/package.json +137 -137
  134. package/scripts/postbuild.js +28 -28
  135. package/skill/REFERENCE.md +431 -431
  136. package/skill/SKILL.md +258 -258
  137. package/tsconfig.json +26 -26
  138. package/types/index.d.ts +525 -525
package/lib/equations.ts CHANGED
@@ -1,506 +1,506 @@
1
- /**
2
- * Equation extraction and conversion utilities
3
- * Handle LaTeX math in Markdown ↔ Word workflows
4
- *
5
- * Supports:
6
- * - Extract LaTeX equations from Markdown
7
- * - Extract equations from Word documents (OMML → LaTeX via Pandoc)
8
- * - Convert Markdown with equations to Word (LaTeX → MathML)
9
- */
10
-
11
- import * as fs from 'fs';
12
- import * as path from 'path';
13
- import { exec } from 'child_process';
14
- import { promisify } from 'util';
15
- import AdmZip from 'adm-zip';
16
- import { parseString } from 'xml2js';
17
- import type { Equation, EquationStats, WordEquationResult } from './types.js';
18
-
19
- const execAsync = promisify(exec);
20
- const parseXml = promisify(parseString);
21
-
22
- // Dynamic import for mathml-to-latex (ESM)
23
- let MathMLToLaTeX: any = null;
24
- async function getMathMLConverter(): Promise<any> {
25
- if (!MathMLToLaTeX) {
26
- try {
27
- const module = await import('mathml-to-latex');
28
- MathMLToLaTeX = module.MathMLToLaTeX;
29
- } catch {
30
- return null;
31
- }
32
- }
33
- return MathMLToLaTeX;
34
- }
35
-
36
- /**
37
- * Extract all equations from markdown text
38
- */
39
- export function extractEquations(text: string, file: string = ''): Equation[] {
40
- const equations: Equation[] = [];
41
- const lines = text.split('\n');
42
-
43
- let inDisplayMath = false;
44
- let displayMathStart = 0;
45
- let displayMathContent = '';
46
-
47
- for (let lineNum = 0; lineNum < lines.length; lineNum++) {
48
- const line = lines[lineNum];
49
- if (!line) continue;
50
-
51
- // Skip code blocks
52
- if (line.trim().startsWith('```')) continue;
53
-
54
- // Handle inline math ($...$) in a segment of text
55
- // Careful not to match $$ or escaped \$
56
- const inlinePattern = /(?<![\$\\])\$(?!\$)([^$\n]+)\$(?!\$)/g;
57
- const extractInline = (segment: string): void => {
58
- let match;
59
- inlinePattern.lastIndex = 0;
60
- while ((match = inlinePattern.exec(segment)) !== null) {
61
- const content = match[1];
62
- if (content) {
63
- equations.push({
64
- type: 'inline',
65
- content: content.trim(),
66
- line: lineNum + 1,
67
- file,
68
- });
69
- }
70
- }
71
- };
72
-
73
- // Handle display math blocks ($$...$$)
74
- if (line.includes('$$')) {
75
- const parts = line.split('$$');
76
-
77
- if (!inDisplayMath && parts.length >= 3) {
78
- // Single-line display math: $$content$$
79
- // Also extract inline math from surrounding text
80
- if (parts[0]) extractInline(parts[0]); // Text before $$
81
- for (let i = 1; i < parts.length; i += 2) {
82
- const part = parts[i];
83
- if (part && part.trim()) {
84
- equations.push({
85
- type: 'display',
86
- content: part.trim(),
87
- line: lineNum + 1,
88
- file,
89
- });
90
- }
91
- }
92
- // Extract inline from text after the last $$
93
- const lastPart = parts[parts.length - 1];
94
- if (parts.length % 2 === 1 && lastPart) {
95
- extractInline(lastPart);
96
- }
97
- } else if (!inDisplayMath) {
98
- // Start of multi-line display math
99
- if (parts[0]) extractInline(parts[0]); // Text before $$
100
- inDisplayMath = true;
101
- displayMathStart = lineNum + 1;
102
- displayMathContent = parts[1] || '';
103
- } else {
104
- // End of multi-line display math
105
- inDisplayMath = false;
106
- displayMathContent += '\n' + (parts[0] || '');
107
- if (displayMathContent.trim()) {
108
- equations.push({
109
- type: 'display',
110
- content: displayMathContent.trim(),
111
- line: displayMathStart,
112
- file,
113
- });
114
- }
115
- displayMathContent = '';
116
- // Text after $$ on closing line
117
- const afterPart = parts[1];
118
- if (afterPart) {
119
- extractInline(afterPart);
120
- }
121
- }
122
- continue;
123
- }
124
-
125
- if (inDisplayMath) {
126
- displayMathContent += '\n' + line;
127
- continue;
128
- }
129
-
130
- // No display math on this line - extract inline math
131
- extractInline(line);
132
- }
133
-
134
- return equations;
135
- }
136
-
137
- /**
138
- * Generate a markdown document with numbered equations
139
- * Useful for creating an equation reference sheet
140
- */
141
- export function generateEquationSheet(equations: Equation[]): string {
142
- const lines: string[] = [];
143
- lines.push('# Equations');
144
- lines.push('');
145
-
146
- let displayNum = 0;
147
- let inlineNum = 0;
148
-
149
- // Group by file
150
- const byFile = new Map<string, Equation[]>();
151
- for (const eq of equations) {
152
- if (!byFile.has(eq.file)) {
153
- byFile.set(eq.file, []);
154
- }
155
- byFile.get(eq.file)!.push(eq);
156
- }
157
-
158
- for (const [file, fileEqs] of byFile) {
159
- if (file) {
160
- lines.push(`## ${file}`);
161
- lines.push('');
162
- }
163
-
164
- for (const eq of fileEqs) {
165
- if (eq.type === 'display') {
166
- displayNum++;
167
- lines.push(`### Equation ${displayNum} (line ${eq.line})`);
168
- lines.push('');
169
- lines.push('```latex');
170
- lines.push(eq.content);
171
- lines.push('```');
172
- lines.push('');
173
- lines.push('$$' + eq.content + '$$');
174
- lines.push('');
175
- } else {
176
- inlineNum++;
177
- lines.push(`- **Inline ${inlineNum}** (line ${eq.line}): \`$${eq.content}$\` → $${eq.content}$`);
178
- }
179
- }
180
- lines.push('');
181
- }
182
-
183
- lines.push('---');
184
- lines.push(`Total: ${displayNum} display equations, ${inlineNum} inline equations`);
185
-
186
- return lines.join('\n');
187
- }
188
-
189
- interface ConvertToWordOptions {
190
- preserveLatex?: boolean;
191
- }
192
-
193
- /**
194
- * Convert markdown with equations to Word using pandoc
195
- */
196
- export async function convertToWord(
197
- inputPath: string,
198
- outputPath: string,
199
- options: ConvertToWordOptions = {}
200
- ): Promise<{ success: boolean; message: string }> {
201
- const { preserveLatex = false } = options;
202
-
203
- // Check pandoc is available
204
- try {
205
- await execAsync('pandoc --version');
206
- } catch {
207
- return { success: false, message: 'Pandoc not found. Install pandoc first.' };
208
- }
209
-
210
- // Build pandoc command
211
- // Use --mathml for better equation rendering in Word
212
- const args = [
213
- 'pandoc',
214
- `"${inputPath}"`,
215
- '-o', `"${outputPath}"`,
216
- '--mathml', // Better equation support in Word
217
- ];
218
-
219
- if (preserveLatex) {
220
- // Keep raw LaTeX (less compatible but preserves source)
221
- args.push('--wrap=preserve');
222
- }
223
-
224
- try {
225
- await execAsync(args.join(' '));
226
- return { success: true, message: `Created ${outputPath}` };
227
- } catch (err: any) {
228
- return { success: false, message: err.message };
229
- }
230
- }
231
-
232
- /**
233
- * Create a simple equations-only document
234
- */
235
- export async function createEquationsDoc(
236
- inputPath: string,
237
- outputPath: string
238
- ): Promise<{ success: boolean; message: string; stats: { display: number; inline: number } | null }> {
239
- if (!fs.existsSync(inputPath)) {
240
- return { success: false, message: `File not found: ${inputPath}`, stats: null };
241
- }
242
-
243
- const text = fs.readFileSync(inputPath, 'utf-8');
244
- const equations = extractEquations(text, path.basename(inputPath));
245
-
246
- if (equations.length === 0) {
247
- return { success: false, message: 'No equations found', stats: { display: 0, inline: 0 } };
248
- }
249
-
250
- const sheet = generateEquationSheet(equations);
251
- const stats = {
252
- display: equations.filter(e => e.type === 'display').length,
253
- inline: equations.filter(e => e.type === 'inline').length,
254
- };
255
-
256
- const ext = path.extname(outputPath).toLowerCase();
257
-
258
- if (ext === '.docx') {
259
- // Write temp md, convert to docx
260
- const tempMd = outputPath.replace('.docx', '.tmp.md');
261
- fs.writeFileSync(tempMd, sheet, 'utf-8');
262
- const result = await convertToWord(tempMd, outputPath);
263
- fs.unlinkSync(tempMd);
264
- return { ...result, stats };
265
- } else {
266
- // Write as markdown
267
- fs.writeFileSync(outputPath, sheet, 'utf-8');
268
- return { success: true, message: `Created ${outputPath}`, stats };
269
- }
270
- }
271
-
272
- /**
273
- * Get equation statistics for a file or directory
274
- */
275
- export function getEquationStats(files: string[]): EquationStats {
276
- let totalDisplay = 0;
277
- let totalInline = 0;
278
- const byFile: Array<{ file: string; display: number; inline: number }> = [];
279
-
280
- for (const file of files) {
281
- if (!fs.existsSync(file)) continue;
282
- const text = fs.readFileSync(file, 'utf-8');
283
- const equations = extractEquations(text, path.basename(file));
284
-
285
- const display = equations.filter(e => e.type === 'display').length;
286
- const inline = equations.filter(e => e.type === 'inline').length;
287
-
288
- totalDisplay += display;
289
- totalInline += inline;
290
-
291
- if (display > 0 || inline > 0) {
292
- byFile.push({ file: path.basename(file), display, inline });
293
- }
294
- }
295
-
296
- return {
297
- total: totalDisplay + totalInline,
298
- display: totalDisplay,
299
- inline: totalInline,
300
- byFile,
301
- };
302
- }
303
-
304
- /**
305
- * Extract equations from a Word document using Pandoc
306
- * Converts OMML (Office Math Markup) to LaTeX
307
- */
308
- export async function extractEquationsFromWord(docxPath: string): Promise<WordEquationResult> {
309
- if (!fs.existsSync(docxPath)) {
310
- return { success: false, equations: [], error: `File not found: ${docxPath}` };
311
- }
312
-
313
- // Method 1: Use Pandoc to convert docx to markdown with LaTeX math
314
- try {
315
- const { stdout } = await execAsync(
316
- `pandoc "${docxPath}" -t markdown --wrap=none`,
317
- { maxBuffer: 50 * 1024 * 1024 }
318
- );
319
-
320
- // Extract equations from the markdown output
321
- const equations = extractEquations(stdout, path.basename(docxPath));
322
-
323
- return {
324
- success: true,
325
- equations: equations.map((eq, i) => ({
326
- type: eq.type,
327
- latex: eq.content,
328
- position: i,
329
- line: eq.line,
330
- })),
331
- };
332
- } catch (err) {
333
- // Pandoc failed, try fallback method
334
- return extractEquationsFromWordDirect(docxPath);
335
- }
336
- }
337
-
338
- /**
339
- * Direct OMML extraction from Word document (fallback if Pandoc fails)
340
- * Parses document.xml for <m:oMath> elements and attempts conversion
341
- */
342
- async function extractEquationsFromWordDirect(docxPath: string): Promise<WordEquationResult> {
343
- try {
344
- const zip = new AdmZip(docxPath);
345
- const documentEntry = zip.getEntry('word/document.xml');
346
-
347
- if (!documentEntry) {
348
- return { success: false, equations: [], error: 'Invalid docx: no document.xml' };
349
- }
350
-
351
- const documentXml = zip.readAsText(documentEntry);
352
-
353
- // Find all OMML equations (<m:oMath> or <m:oMathPara>)
354
- const ommlPattern = /<m:oMath[^>]*>[\s\S]*?<\/m:oMath>/gi;
355
- const matches = documentXml.match(ommlPattern) || [];
356
-
357
- if (matches.length === 0) {
358
- return { success: true, equations: [] };
359
- }
360
-
361
- // Try to convert OMML to LaTeX via MathML intermediate
362
- const Converter = await getMathMLConverter();
363
- const equations: WordEquationResult['equations'] = [];
364
-
365
- for (let i = 0; i < matches.length; i++) {
366
- const omml = matches[i];
367
- if (!omml) continue;
368
-
369
- // Attempt OMML → MathML → LaTeX conversion
370
- // Note: This is a simplified approach; full OMML→MathML requires XSLT
371
- try {
372
- const latex = await ommlToLatex(omml, Converter);
373
- if (latex) {
374
- equations.push({
375
- type: isDisplayMath(omml) ? 'display' : 'inline',
376
- latex,
377
- position: i,
378
- raw: omml.substring(0, 100) + '...',
379
- });
380
- }
381
- } catch {
382
- // Keep raw OMML reference if conversion fails
383
- equations.push({
384
- type: 'unknown',
385
- latex: null,
386
- position: i,
387
- raw: omml.substring(0, 100) + '...',
388
- error: 'Conversion failed',
389
- });
390
- }
391
- }
392
-
393
- return { success: true, equations };
394
- } catch (err: any) {
395
- return { success: false, equations: [], error: err.message };
396
- }
397
- }
398
-
399
- /**
400
- * Check if OMML represents display math (equation on its own line)
401
- */
402
- function isDisplayMath(omml: string): boolean {
403
- return omml.includes('<m:oMathPara') || omml.includes('m:jc');
404
- }
405
-
406
- /**
407
- * Convert OMML to LaTeX (simplified approach)
408
- * For complex equations, Pandoc method is more reliable
409
- */
410
- async function ommlToLatex(omml: string, Converter: any): Promise<string | null> {
411
- if (!Converter) return null;
412
-
413
- // Extract key elements from OMML and build approximate MathML
414
- // This is a simplified conversion - not all OMML features are supported
415
- try {
416
- // Build basic MathML from OMML structure
417
- const mathml = ommlToMathML(omml);
418
- if (!mathml) return null;
419
-
420
- // Convert MathML to LaTeX
421
- const latex = Converter.convert(mathml);
422
- return latex;
423
- } catch {
424
- return null;
425
- }
426
- }
427
-
428
- /**
429
- * Convert OMML to MathML (simplified)
430
- * Maps common OMML elements to MathML equivalents
431
- */
432
- function ommlToMathML(omml: string): string | null {
433
- // Remove namespace prefixes for easier parsing
434
- let xml = omml
435
- .replace(/<m:/g, '<')
436
- .replace(/<\/m:/g, '</')
437
- .replace(/<w:/g, '<w_')
438
- .replace(/<\/w:/g, '</w_');
439
-
440
- // Map OMML elements to MathML
441
- const mappings: Array<[RegExp, string]> = [
442
- [/<oMath[^>]*>/gi, '<math xmlns="http://www.w3.org/1998/Math/MathML">'],
443
- [/<\/oMath>/gi, '</math>'],
444
- [/<r>/gi, '<mi>'],
445
- [/<\/r>/gi, '</mi>'],
446
- [/<t>/gi, ''],
447
- [/<\/t>/gi, ''],
448
- [/<f>/gi, '<mfrac>'],
449
- [/<\/f>/gi, '</mfrac>'],
450
- [/<num>/gi, '<mrow>'],
451
- [/<\/num>/gi, '</mrow>'],
452
- [/<den>/gi, '<mrow>'],
453
- [/<\/den>/gi, '</mrow>'],
454
- [/<sup>/gi, '<msup><mrow>'],
455
- [/<\/sup>/gi, '</mrow></msup>'],
456
- [/<sub>/gi, '<msub><mrow>'],
457
- [/<\/sub>/gi, '</mrow></msub>'],
458
- [/<rad>/gi, '<msqrt>'],
459
- [/<\/rad>/gi, '</msqrt>'],
460
- [/<e>/gi, '<mrow>'],
461
- [/<\/e>/gi, '</mrow>'],
462
- // Remove elements we don't map
463
- [/<rPr>[\s\S]*?<\/rPr>/gi, ''],
464
- [/<ctrlPr>[\s\S]*?<\/ctrlPr>/gi, ''],
465
- [/<w_[^>]*>[\s\S]*?<\/w_[^>]*>/gi, ''],
466
- [/<[^>]*\/>/gi, ''], // Self-closing tags
467
- ];
468
-
469
- for (const [pattern, replacement] of mappings) {
470
- xml = xml.replace(pattern, replacement);
471
- }
472
-
473
- // Clean up any remaining unrecognized tags
474
- xml = xml.replace(/<[a-zA-Z][^>]*>/g, '').replace(/<\/[a-zA-Z]+>/g, '');
475
-
476
- // Wrap in math if not already
477
- if (!xml.includes('<math')) {
478
- xml = `<math xmlns="http://www.w3.org/1998/Math/MathML">${xml}</math>`;
479
- }
480
-
481
- return xml;
482
- }
483
-
484
- /**
485
- * Get equation summary from Word document
486
- */
487
- export async function getWordEquationStats(
488
- docxPath: string
489
- ): Promise<{ count: number; display: number; inline: number; converted: number; error?: string }> {
490
- const result = await extractEquationsFromWord(docxPath);
491
-
492
- if (!result.success) {
493
- return { count: 0, display: 0, inline: 0, converted: 0, error: result.error };
494
- }
495
-
496
- const display = result.equations.filter(e => e.type === 'display').length;
497
- const inline = result.equations.filter(e => e.type === 'inline').length;
498
- const converted = result.equations.filter(e => e.latex).length;
499
-
500
- return {
501
- count: result.equations.length,
502
- display,
503
- inline,
504
- converted,
505
- };
506
- }
1
+ /**
2
+ * Equation extraction and conversion utilities
3
+ * Handle LaTeX math in Markdown ↔ Word workflows
4
+ *
5
+ * Supports:
6
+ * - Extract LaTeX equations from Markdown
7
+ * - Extract equations from Word documents (OMML → LaTeX via Pandoc)
8
+ * - Convert Markdown with equations to Word (LaTeX → MathML)
9
+ */
10
+
11
+ import * as fs from 'fs';
12
+ import * as path from 'path';
13
+ import { exec } from 'child_process';
14
+ import { promisify } from 'util';
15
+ import AdmZip from 'adm-zip';
16
+ import { parseString } from 'xml2js';
17
+ import type { Equation, EquationStats, WordEquationResult } from './types.js';
18
+
19
+ const execAsync = promisify(exec);
20
+ const parseXml = promisify(parseString);
21
+
22
+ // Dynamic import for mathml-to-latex (ESM)
23
+ let MathMLToLaTeX: any = null;
24
+ async function getMathMLConverter(): Promise<any> {
25
+ if (!MathMLToLaTeX) {
26
+ try {
27
+ const module = await import('mathml-to-latex');
28
+ MathMLToLaTeX = module.MathMLToLaTeX;
29
+ } catch {
30
+ return null;
31
+ }
32
+ }
33
+ return MathMLToLaTeX;
34
+ }
35
+
36
+ /**
37
+ * Extract all equations from markdown text
38
+ */
39
+ export function extractEquations(text: string, file: string = ''): Equation[] {
40
+ const equations: Equation[] = [];
41
+ const lines = text.split('\n');
42
+
43
+ let inDisplayMath = false;
44
+ let displayMathStart = 0;
45
+ let displayMathContent = '';
46
+
47
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
48
+ const line = lines[lineNum];
49
+ if (!line) continue;
50
+
51
+ // Skip code blocks
52
+ if (line.trim().startsWith('```')) continue;
53
+
54
+ // Handle inline math ($...$) in a segment of text
55
+ // Careful not to match $$ or escaped \$
56
+ const inlinePattern = /(?<![\$\\])\$(?!\$)([^$\n]+)\$(?!\$)/g;
57
+ const extractInline = (segment: string): void => {
58
+ let match;
59
+ inlinePattern.lastIndex = 0;
60
+ while ((match = inlinePattern.exec(segment)) !== null) {
61
+ const content = match[1];
62
+ if (content) {
63
+ equations.push({
64
+ type: 'inline',
65
+ content: content.trim(),
66
+ line: lineNum + 1,
67
+ file,
68
+ });
69
+ }
70
+ }
71
+ };
72
+
73
+ // Handle display math blocks ($$...$$)
74
+ if (line.includes('$$')) {
75
+ const parts = line.split('$$');
76
+
77
+ if (!inDisplayMath && parts.length >= 3) {
78
+ // Single-line display math: $$content$$
79
+ // Also extract inline math from surrounding text
80
+ if (parts[0]) extractInline(parts[0]); // Text before $$
81
+ for (let i = 1; i < parts.length; i += 2) {
82
+ const part = parts[i];
83
+ if (part && part.trim()) {
84
+ equations.push({
85
+ type: 'display',
86
+ content: part.trim(),
87
+ line: lineNum + 1,
88
+ file,
89
+ });
90
+ }
91
+ }
92
+ // Extract inline from text after the last $$
93
+ const lastPart = parts[parts.length - 1];
94
+ if (parts.length % 2 === 1 && lastPart) {
95
+ extractInline(lastPart);
96
+ }
97
+ } else if (!inDisplayMath) {
98
+ // Start of multi-line display math
99
+ if (parts[0]) extractInline(parts[0]); // Text before $$
100
+ inDisplayMath = true;
101
+ displayMathStart = lineNum + 1;
102
+ displayMathContent = parts[1] || '';
103
+ } else {
104
+ // End of multi-line display math
105
+ inDisplayMath = false;
106
+ displayMathContent += '\n' + (parts[0] || '');
107
+ if (displayMathContent.trim()) {
108
+ equations.push({
109
+ type: 'display',
110
+ content: displayMathContent.trim(),
111
+ line: displayMathStart,
112
+ file,
113
+ });
114
+ }
115
+ displayMathContent = '';
116
+ // Text after $$ on closing line
117
+ const afterPart = parts[1];
118
+ if (afterPart) {
119
+ extractInline(afterPart);
120
+ }
121
+ }
122
+ continue;
123
+ }
124
+
125
+ if (inDisplayMath) {
126
+ displayMathContent += '\n' + line;
127
+ continue;
128
+ }
129
+
130
+ // No display math on this line - extract inline math
131
+ extractInline(line);
132
+ }
133
+
134
+ return equations;
135
+ }
136
+
137
+ /**
138
+ * Generate a markdown document with numbered equations
139
+ * Useful for creating an equation reference sheet
140
+ */
141
+ export function generateEquationSheet(equations: Equation[]): string {
142
+ const lines: string[] = [];
143
+ lines.push('# Equations');
144
+ lines.push('');
145
+
146
+ let displayNum = 0;
147
+ let inlineNum = 0;
148
+
149
+ // Group by file
150
+ const byFile = new Map<string, Equation[]>();
151
+ for (const eq of equations) {
152
+ if (!byFile.has(eq.file)) {
153
+ byFile.set(eq.file, []);
154
+ }
155
+ byFile.get(eq.file)!.push(eq);
156
+ }
157
+
158
+ for (const [file, fileEqs] of byFile) {
159
+ if (file) {
160
+ lines.push(`## ${file}`);
161
+ lines.push('');
162
+ }
163
+
164
+ for (const eq of fileEqs) {
165
+ if (eq.type === 'display') {
166
+ displayNum++;
167
+ lines.push(`### Equation ${displayNum} (line ${eq.line})`);
168
+ lines.push('');
169
+ lines.push('```latex');
170
+ lines.push(eq.content);
171
+ lines.push('```');
172
+ lines.push('');
173
+ lines.push('$$' + eq.content + '$$');
174
+ lines.push('');
175
+ } else {
176
+ inlineNum++;
177
+ lines.push(`- **Inline ${inlineNum}** (line ${eq.line}): \`$${eq.content}$\` → $${eq.content}$`);
178
+ }
179
+ }
180
+ lines.push('');
181
+ }
182
+
183
+ lines.push('---');
184
+ lines.push(`Total: ${displayNum} display equations, ${inlineNum} inline equations`);
185
+
186
+ return lines.join('\n');
187
+ }
188
+
189
+ interface ConvertToWordOptions {
190
+ preserveLatex?: boolean;
191
+ }
192
+
193
+ /**
194
+ * Convert markdown with equations to Word using pandoc
195
+ */
196
+ export async function convertToWord(
197
+ inputPath: string,
198
+ outputPath: string,
199
+ options: ConvertToWordOptions = {}
200
+ ): Promise<{ success: boolean; message: string }> {
201
+ const { preserveLatex = false } = options;
202
+
203
+ // Check pandoc is available
204
+ try {
205
+ await execAsync('pandoc --version');
206
+ } catch {
207
+ return { success: false, message: 'Pandoc not found. Install pandoc first.' };
208
+ }
209
+
210
+ // Build pandoc command
211
+ // Use --mathml for better equation rendering in Word
212
+ const args = [
213
+ 'pandoc',
214
+ `"${inputPath}"`,
215
+ '-o', `"${outputPath}"`,
216
+ '--mathml', // Better equation support in Word
217
+ ];
218
+
219
+ if (preserveLatex) {
220
+ // Keep raw LaTeX (less compatible but preserves source)
221
+ args.push('--wrap=preserve');
222
+ }
223
+
224
+ try {
225
+ await execAsync(args.join(' '));
226
+ return { success: true, message: `Created ${outputPath}` };
227
+ } catch (err: any) {
228
+ return { success: false, message: err.message };
229
+ }
230
+ }
231
+
232
+ /**
233
+ * Create a simple equations-only document
234
+ */
235
+ export async function createEquationsDoc(
236
+ inputPath: string,
237
+ outputPath: string
238
+ ): Promise<{ success: boolean; message: string; stats: { display: number; inline: number } | null }> {
239
+ if (!fs.existsSync(inputPath)) {
240
+ return { success: false, message: `File not found: ${inputPath}`, stats: null };
241
+ }
242
+
243
+ const text = fs.readFileSync(inputPath, 'utf-8');
244
+ const equations = extractEquations(text, path.basename(inputPath));
245
+
246
+ if (equations.length === 0) {
247
+ return { success: false, message: 'No equations found', stats: { display: 0, inline: 0 } };
248
+ }
249
+
250
+ const sheet = generateEquationSheet(equations);
251
+ const stats = {
252
+ display: equations.filter(e => e.type === 'display').length,
253
+ inline: equations.filter(e => e.type === 'inline').length,
254
+ };
255
+
256
+ const ext = path.extname(outputPath).toLowerCase();
257
+
258
+ if (ext === '.docx') {
259
+ // Write temp md, convert to docx
260
+ const tempMd = outputPath.replace('.docx', '.tmp.md');
261
+ fs.writeFileSync(tempMd, sheet, 'utf-8');
262
+ const result = await convertToWord(tempMd, outputPath);
263
+ fs.unlinkSync(tempMd);
264
+ return { ...result, stats };
265
+ } else {
266
+ // Write as markdown
267
+ fs.writeFileSync(outputPath, sheet, 'utf-8');
268
+ return { success: true, message: `Created ${outputPath}`, stats };
269
+ }
270
+ }
271
+
272
+ /**
273
+ * Get equation statistics for a file or directory
274
+ */
275
+ export function getEquationStats(files: string[]): EquationStats {
276
+ let totalDisplay = 0;
277
+ let totalInline = 0;
278
+ const byFile: Array<{ file: string; display: number; inline: number }> = [];
279
+
280
+ for (const file of files) {
281
+ if (!fs.existsSync(file)) continue;
282
+ const text = fs.readFileSync(file, 'utf-8');
283
+ const equations = extractEquations(text, path.basename(file));
284
+
285
+ const display = equations.filter(e => e.type === 'display').length;
286
+ const inline = equations.filter(e => e.type === 'inline').length;
287
+
288
+ totalDisplay += display;
289
+ totalInline += inline;
290
+
291
+ if (display > 0 || inline > 0) {
292
+ byFile.push({ file: path.basename(file), display, inline });
293
+ }
294
+ }
295
+
296
+ return {
297
+ total: totalDisplay + totalInline,
298
+ display: totalDisplay,
299
+ inline: totalInline,
300
+ byFile,
301
+ };
302
+ }
303
+
304
+ /**
305
+ * Extract equations from a Word document using Pandoc
306
+ * Converts OMML (Office Math Markup) to LaTeX
307
+ */
308
+ export async function extractEquationsFromWord(docxPath: string): Promise<WordEquationResult> {
309
+ if (!fs.existsSync(docxPath)) {
310
+ return { success: false, equations: [], error: `File not found: ${docxPath}` };
311
+ }
312
+
313
+ // Method 1: Use Pandoc to convert docx to markdown with LaTeX math
314
+ try {
315
+ const { stdout } = await execAsync(
316
+ `pandoc "${docxPath}" -t markdown --wrap=none`,
317
+ { maxBuffer: 50 * 1024 * 1024 }
318
+ );
319
+
320
+ // Extract equations from the markdown output
321
+ const equations = extractEquations(stdout, path.basename(docxPath));
322
+
323
+ return {
324
+ success: true,
325
+ equations: equations.map((eq, i) => ({
326
+ type: eq.type,
327
+ latex: eq.content,
328
+ position: i,
329
+ line: eq.line,
330
+ })),
331
+ };
332
+ } catch (err) {
333
+ // Pandoc failed, try fallback method
334
+ return extractEquationsFromWordDirect(docxPath);
335
+ }
336
+ }
337
+
338
+ /**
339
+ * Direct OMML extraction from Word document (fallback if Pandoc fails)
340
+ * Parses document.xml for <m:oMath> elements and attempts conversion
341
+ */
342
+ async function extractEquationsFromWordDirect(docxPath: string): Promise<WordEquationResult> {
343
+ try {
344
+ const zip = new AdmZip(docxPath);
345
+ const documentEntry = zip.getEntry('word/document.xml');
346
+
347
+ if (!documentEntry) {
348
+ return { success: false, equations: [], error: 'Invalid docx: no document.xml' };
349
+ }
350
+
351
+ const documentXml = zip.readAsText(documentEntry);
352
+
353
+ // Find all OMML equations (<m:oMath> or <m:oMathPara>)
354
+ const ommlPattern = /<m:oMath[^>]*>[\s\S]*?<\/m:oMath>/gi;
355
+ const matches = documentXml.match(ommlPattern) || [];
356
+
357
+ if (matches.length === 0) {
358
+ return { success: true, equations: [] };
359
+ }
360
+
361
+ // Try to convert OMML to LaTeX via MathML intermediate
362
+ const Converter = await getMathMLConverter();
363
+ const equations: WordEquationResult['equations'] = [];
364
+
365
+ for (let i = 0; i < matches.length; i++) {
366
+ const omml = matches[i];
367
+ if (!omml) continue;
368
+
369
+ // Attempt OMML → MathML → LaTeX conversion
370
+ // Note: This is a simplified approach; full OMML→MathML requires XSLT
371
+ try {
372
+ const latex = await ommlToLatex(omml, Converter);
373
+ if (latex) {
374
+ equations.push({
375
+ type: isDisplayMath(omml) ? 'display' : 'inline',
376
+ latex,
377
+ position: i,
378
+ raw: omml.substring(0, 100) + '...',
379
+ });
380
+ }
381
+ } catch {
382
+ // Keep raw OMML reference if conversion fails
383
+ equations.push({
384
+ type: 'unknown',
385
+ latex: null,
386
+ position: i,
387
+ raw: omml.substring(0, 100) + '...',
388
+ error: 'Conversion failed',
389
+ });
390
+ }
391
+ }
392
+
393
+ return { success: true, equations };
394
+ } catch (err: any) {
395
+ return { success: false, equations: [], error: err.message };
396
+ }
397
+ }
398
+
399
+ /**
400
+ * Check if OMML represents display math (equation on its own line)
401
+ */
402
+ function isDisplayMath(omml: string): boolean {
403
+ return omml.includes('<m:oMathPara') || omml.includes('m:jc');
404
+ }
405
+
406
+ /**
407
+ * Convert OMML to LaTeX (simplified approach)
408
+ * For complex equations, Pandoc method is more reliable
409
+ */
410
+ async function ommlToLatex(omml: string, Converter: any): Promise<string | null> {
411
+ if (!Converter) return null;
412
+
413
+ // Extract key elements from OMML and build approximate MathML
414
+ // This is a simplified conversion - not all OMML features are supported
415
+ try {
416
+ // Build basic MathML from OMML structure
417
+ const mathml = ommlToMathML(omml);
418
+ if (!mathml) return null;
419
+
420
+ // Convert MathML to LaTeX
421
+ const latex = Converter.convert(mathml);
422
+ return latex;
423
+ } catch {
424
+ return null;
425
+ }
426
+ }
427
+
428
+ /**
429
+ * Convert OMML to MathML (simplified)
430
+ * Maps common OMML elements to MathML equivalents
431
+ */
432
+ function ommlToMathML(omml: string): string | null {
433
+ // Remove namespace prefixes for easier parsing
434
+ let xml = omml
435
+ .replace(/<m:/g, '<')
436
+ .replace(/<\/m:/g, '</')
437
+ .replace(/<w:/g, '<w_')
438
+ .replace(/<\/w:/g, '</w_');
439
+
440
+ // Map OMML elements to MathML
441
+ const mappings: Array<[RegExp, string]> = [
442
+ [/<oMath[^>]*>/gi, '<math xmlns="http://www.w3.org/1998/Math/MathML">'],
443
+ [/<\/oMath>/gi, '</math>'],
444
+ [/<r>/gi, '<mi>'],
445
+ [/<\/r>/gi, '</mi>'],
446
+ [/<t>/gi, ''],
447
+ [/<\/t>/gi, ''],
448
+ [/<f>/gi, '<mfrac>'],
449
+ [/<\/f>/gi, '</mfrac>'],
450
+ [/<num>/gi, '<mrow>'],
451
+ [/<\/num>/gi, '</mrow>'],
452
+ [/<den>/gi, '<mrow>'],
453
+ [/<\/den>/gi, '</mrow>'],
454
+ [/<sup>/gi, '<msup><mrow>'],
455
+ [/<\/sup>/gi, '</mrow></msup>'],
456
+ [/<sub>/gi, '<msub><mrow>'],
457
+ [/<\/sub>/gi, '</mrow></msub>'],
458
+ [/<rad>/gi, '<msqrt>'],
459
+ [/<\/rad>/gi, '</msqrt>'],
460
+ [/<e>/gi, '<mrow>'],
461
+ [/<\/e>/gi, '</mrow>'],
462
+ // Remove elements we don't map
463
+ [/<rPr>[\s\S]*?<\/rPr>/gi, ''],
464
+ [/<ctrlPr>[\s\S]*?<\/ctrlPr>/gi, ''],
465
+ [/<w_[^>]*>[\s\S]*?<\/w_[^>]*>/gi, ''],
466
+ [/<[^>]*\/>/gi, ''], // Self-closing tags
467
+ ];
468
+
469
+ for (const [pattern, replacement] of mappings) {
470
+ xml = xml.replace(pattern, replacement);
471
+ }
472
+
473
+ // Clean up any remaining unrecognized tags
474
+ xml = xml.replace(/<[a-zA-Z][^>]*>/g, '').replace(/<\/[a-zA-Z]+>/g, '');
475
+
476
+ // Wrap in math if not already
477
+ if (!xml.includes('<math')) {
478
+ xml = `<math xmlns="http://www.w3.org/1998/Math/MathML">${xml}</math>`;
479
+ }
480
+
481
+ return xml;
482
+ }
483
+
484
+ /**
485
+ * Get equation summary from Word document
486
+ */
487
+ export async function getWordEquationStats(
488
+ docxPath: string
489
+ ): Promise<{ count: number; display: number; inline: number; converted: number; error?: string }> {
490
+ const result = await extractEquationsFromWord(docxPath);
491
+
492
+ if (!result.success) {
493
+ return { count: 0, display: 0, inline: 0, converted: 0, error: result.error };
494
+ }
495
+
496
+ const display = result.equations.filter(e => e.type === 'display').length;
497
+ const inline = result.equations.filter(e => e.type === 'inline').length;
498
+ const converted = result.equations.filter(e => e.latex).length;
499
+
500
+ return {
501
+ count: result.equations.length,
502
+ display,
503
+ inline,
504
+ converted,
505
+ };
506
+ }