docrev 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -164
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -431
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/anchor-match.d.ts +1 -1
  11. package/dist/lib/anchor-match.d.ts.map +1 -1
  12. package/dist/lib/anchor-match.js +17 -47
  13. package/dist/lib/anchor-match.js.map +1 -1
  14. package/dist/lib/build.js +4 -4
  15. package/dist/lib/commands/context.d.ts +1 -1
  16. package/dist/lib/commands/context.d.ts.map +1 -1
  17. package/dist/lib/commands/context.js +1 -1
  18. package/dist/lib/commands/context.js.map +1 -1
  19. package/dist/lib/commands/sections.js +7 -7
  20. package/dist/lib/commands/sections.js.map +1 -1
  21. package/dist/lib/commands/sync.d.ts.map +1 -1
  22. package/dist/lib/commands/sync.js +15 -14
  23. package/dist/lib/commands/sync.js.map +1 -1
  24. package/dist/lib/commands/utilities.js +164 -164
  25. package/dist/lib/commands/verify-anchors.js +6 -6
  26. package/dist/lib/commands/verify-anchors.js.map +1 -1
  27. package/dist/lib/commands/word-tools.js +8 -8
  28. package/dist/lib/grammar.js +3 -3
  29. package/dist/lib/macro-filter.lua +201 -201
  30. package/dist/lib/pdf-comments.js +44 -44
  31. package/dist/lib/plugins.js +57 -57
  32. package/dist/lib/pptx-color-filter.lua +37 -37
  33. package/dist/lib/pptx-themes.js +115 -115
  34. package/dist/lib/sections.d.ts +35 -0
  35. package/dist/lib/sections.d.ts.map +1 -1
  36. package/dist/lib/sections.js +81 -0
  37. package/dist/lib/sections.js.map +1 -1
  38. package/dist/lib/spelling.js +2 -2
  39. package/dist/lib/templates.js +387 -387
  40. package/dist/lib/themes.js +51 -51
  41. package/docs-src/build.py +113 -113
  42. package/docs-src/extra.css +208 -208
  43. package/docs-src/md-to-html.lua +6 -6
  44. package/docs-src/template.html +116 -116
  45. package/eslint.config.js +27 -27
  46. package/lib/anchor-match.ts +276 -308
  47. package/lib/annotations.ts +644 -644
  48. package/lib/build.ts +1766 -1766
  49. package/lib/citations.ts +160 -160
  50. package/lib/commands/build.ts +855 -855
  51. package/lib/commands/citations.ts +515 -515
  52. package/lib/commands/comments.ts +1050 -1050
  53. package/lib/commands/context.ts +176 -174
  54. package/lib/commands/core.ts +309 -309
  55. package/lib/commands/doi.ts +435 -435
  56. package/lib/commands/file-ops.ts +372 -372
  57. package/lib/commands/history.ts +320 -320
  58. package/lib/commands/index.ts +87 -87
  59. package/lib/commands/init.ts +259 -259
  60. package/lib/commands/merge-resolve.ts +378 -378
  61. package/lib/commands/preview.ts +178 -178
  62. package/lib/commands/project-info.ts +244 -244
  63. package/lib/commands/quality.ts +517 -517
  64. package/lib/commands/response.ts +454 -454
  65. package/lib/commands/section-boundaries.ts +82 -82
  66. package/lib/commands/sections.ts +451 -451
  67. package/lib/commands/sync.ts +709 -706
  68. package/lib/commands/text-ops.ts +449 -449
  69. package/lib/commands/utilities.ts +448 -448
  70. package/lib/commands/verify-anchors.ts +272 -272
  71. package/lib/commands/word-tools.ts +340 -340
  72. package/lib/comment-realign.ts +517 -517
  73. package/lib/config.ts +84 -84
  74. package/lib/crossref.ts +781 -781
  75. package/lib/csl.ts +191 -191
  76. package/lib/dependencies.ts +98 -98
  77. package/lib/diff-engine.ts +465 -465
  78. package/lib/doi-cache.ts +115 -115
  79. package/lib/doi.ts +897 -897
  80. package/lib/equations.ts +506 -506
  81. package/lib/errors.ts +346 -346
  82. package/lib/format.ts +541 -541
  83. package/lib/git.ts +326 -326
  84. package/lib/grammar.ts +303 -303
  85. package/lib/image-registry.ts +180 -180
  86. package/lib/import.ts +911 -911
  87. package/lib/journals.ts +543 -543
  88. package/lib/macro-filter.lua +201 -201
  89. package/lib/macros.ts +273 -273
  90. package/lib/merge.ts +633 -633
  91. package/lib/orcid.ts +144 -144
  92. package/lib/pdf-comments.ts +263 -263
  93. package/lib/pdf-import.ts +524 -524
  94. package/lib/plugins.ts +362 -362
  95. package/lib/postprocess.ts +188 -188
  96. package/lib/pptx-color-filter.lua +37 -37
  97. package/lib/pptx-template.ts +469 -469
  98. package/lib/pptx-themes.ts +483 -483
  99. package/lib/protect-restore.ts +520 -520
  100. package/lib/rate-limiter.ts +94 -94
  101. package/lib/response.ts +197 -197
  102. package/lib/restore-references.ts +240 -240
  103. package/lib/review.ts +327 -327
  104. package/lib/schema.ts +488 -488
  105. package/lib/scientific-words.ts +73 -73
  106. package/lib/sections.ts +425 -335
  107. package/lib/slides.ts +756 -756
  108. package/lib/spelling.ts +334 -334
  109. package/lib/templates.ts +526 -526
  110. package/lib/themes.ts +742 -742
  111. package/lib/trackchanges.ts +247 -247
  112. package/lib/tui.ts +450 -450
  113. package/lib/types.ts +550 -550
  114. package/lib/undo.ts +250 -250
  115. package/lib/utils.ts +69 -69
  116. package/lib/variables.ts +179 -179
  117. package/lib/word-extraction.ts +806 -806
  118. package/lib/word.ts +643 -643
  119. package/lib/wordcomments.ts +840 -840
  120. package/mkdocs.yml +64 -64
  121. package/package.json +137 -137
  122. package/scripts/postbuild.js +47 -47
  123. package/skill/REFERENCE.md +539 -539
  124. package/skill/SKILL.md +295 -295
  125. package/tsconfig.json +26 -26
  126. package/types/index.d.ts +525 -525
package/lib/equations.ts CHANGED
@@ -1,506 +1,506 @@
1
- /**
2
- * Equation extraction and conversion utilities
3
- * Handle LaTeX math in Markdown ↔ Word workflows
4
- *
5
- * Supports:
6
- * - Extract LaTeX equations from Markdown
7
- * - Extract equations from Word documents (OMML → LaTeX via Pandoc)
8
- * - Convert Markdown with equations to Word (LaTeX → MathML)
9
- */
10
-
11
- import * as fs from 'fs';
12
- import * as path from 'path';
13
- import { exec } from 'child_process';
14
- import { promisify } from 'util';
15
- import AdmZip from 'adm-zip';
16
- import { parseString } from 'xml2js';
17
- import type { Equation, EquationStats, WordEquationResult } from './types.js';
18
-
19
- const execAsync = promisify(exec);
20
- const parseXml = promisify(parseString);
21
-
22
- // Dynamic import for mathml-to-latex (ESM)
23
- let MathMLToLaTeX: any = null;
24
- async function getMathMLConverter(): Promise<any> {
25
- if (!MathMLToLaTeX) {
26
- try {
27
- const module = await import('mathml-to-latex');
28
- MathMLToLaTeX = module.MathMLToLaTeX;
29
- } catch {
30
- return null;
31
- }
32
- }
33
- return MathMLToLaTeX;
34
- }
35
-
36
- /**
37
- * Extract all equations from markdown text
38
- */
39
- export function extractEquations(text: string, file: string = ''): Equation[] {
40
- const equations: Equation[] = [];
41
- const lines = text.split('\n');
42
-
43
- let inDisplayMath = false;
44
- let displayMathStart = 0;
45
- let displayMathContent = '';
46
-
47
- for (let lineNum = 0; lineNum < lines.length; lineNum++) {
48
- const line = lines[lineNum];
49
- if (!line) continue;
50
-
51
- // Skip code blocks
52
- if (line.trim().startsWith('```')) continue;
53
-
54
- // Handle inline math ($...$) in a segment of text
55
- // Careful not to match $$ or escaped \$
56
- const inlinePattern = /(?<![\$\\])\$(?!\$)([^$\n]+)\$(?!\$)/g;
57
- const extractInline = (segment: string): void => {
58
- let match;
59
- inlinePattern.lastIndex = 0;
60
- while ((match = inlinePattern.exec(segment)) !== null) {
61
- const content = match[1];
62
- if (content) {
63
- equations.push({
64
- type: 'inline',
65
- content: content.trim(),
66
- line: lineNum + 1,
67
- file,
68
- });
69
- }
70
- }
71
- };
72
-
73
- // Handle display math blocks ($$...$$)
74
- if (line.includes('$$')) {
75
- const parts = line.split('$$');
76
-
77
- if (!inDisplayMath && parts.length >= 3) {
78
- // Single-line display math: $$content$$
79
- // Also extract inline math from surrounding text
80
- if (parts[0]) extractInline(parts[0]); // Text before $$
81
- for (let i = 1; i < parts.length; i += 2) {
82
- const part = parts[i];
83
- if (part && part.trim()) {
84
- equations.push({
85
- type: 'display',
86
- content: part.trim(),
87
- line: lineNum + 1,
88
- file,
89
- });
90
- }
91
- }
92
- // Extract inline from text after the last $$
93
- const lastPart = parts[parts.length - 1];
94
- if (parts.length % 2 === 1 && lastPart) {
95
- extractInline(lastPart);
96
- }
97
- } else if (!inDisplayMath) {
98
- // Start of multi-line display math
99
- if (parts[0]) extractInline(parts[0]); // Text before $$
100
- inDisplayMath = true;
101
- displayMathStart = lineNum + 1;
102
- displayMathContent = parts[1] || '';
103
- } else {
104
- // End of multi-line display math
105
- inDisplayMath = false;
106
- displayMathContent += '\n' + (parts[0] || '');
107
- if (displayMathContent.trim()) {
108
- equations.push({
109
- type: 'display',
110
- content: displayMathContent.trim(),
111
- line: displayMathStart,
112
- file,
113
- });
114
- }
115
- displayMathContent = '';
116
- // Text after $$ on closing line
117
- const afterPart = parts[1];
118
- if (afterPart) {
119
- extractInline(afterPart);
120
- }
121
- }
122
- continue;
123
- }
124
-
125
- if (inDisplayMath) {
126
- displayMathContent += '\n' + line;
127
- continue;
128
- }
129
-
130
- // No display math on this line - extract inline math
131
- extractInline(line);
132
- }
133
-
134
- return equations;
135
- }
136
-
137
- /**
138
- * Generate a markdown document with numbered equations
139
- * Useful for creating an equation reference sheet
140
- */
141
- export function generateEquationSheet(equations: Equation[]): string {
142
- const lines: string[] = [];
143
- lines.push('# Equations');
144
- lines.push('');
145
-
146
- let displayNum = 0;
147
- let inlineNum = 0;
148
-
149
- // Group by file
150
- const byFile = new Map<string, Equation[]>();
151
- for (const eq of equations) {
152
- if (!byFile.has(eq.file)) {
153
- byFile.set(eq.file, []);
154
- }
155
- byFile.get(eq.file)!.push(eq);
156
- }
157
-
158
- for (const [file, fileEqs] of byFile) {
159
- if (file) {
160
- lines.push(`## ${file}`);
161
- lines.push('');
162
- }
163
-
164
- for (const eq of fileEqs) {
165
- if (eq.type === 'display') {
166
- displayNum++;
167
- lines.push(`### Equation ${displayNum} (line ${eq.line})`);
168
- lines.push('');
169
- lines.push('```latex');
170
- lines.push(eq.content);
171
- lines.push('```');
172
- lines.push('');
173
- lines.push('$$' + eq.content + '$$');
174
- lines.push('');
175
- } else {
176
- inlineNum++;
177
- lines.push(`- **Inline ${inlineNum}** (line ${eq.line}): \`$${eq.content}$\` → $${eq.content}$`);
178
- }
179
- }
180
- lines.push('');
181
- }
182
-
183
- lines.push('---');
184
- lines.push(`Total: ${displayNum} display equations, ${inlineNum} inline equations`);
185
-
186
- return lines.join('\n');
187
- }
188
-
189
- interface ConvertToWordOptions {
190
- preserveLatex?: boolean;
191
- }
192
-
193
- /**
194
- * Convert markdown with equations to Word using pandoc
195
- */
196
- export async function convertToWord(
197
- inputPath: string,
198
- outputPath: string,
199
- options: ConvertToWordOptions = {}
200
- ): Promise<{ success: boolean; message: string }> {
201
- const { preserveLatex = false } = options;
202
-
203
- // Check pandoc is available
204
- try {
205
- await execAsync('pandoc --version');
206
- } catch {
207
- return { success: false, message: 'Pandoc not found. Install pandoc first.' };
208
- }
209
-
210
- // Build pandoc command
211
- // Use --mathml for better equation rendering in Word
212
- const args = [
213
- 'pandoc',
214
- `"${inputPath}"`,
215
- '-o', `"${outputPath}"`,
216
- '--mathml', // Better equation support in Word
217
- ];
218
-
219
- if (preserveLatex) {
220
- // Keep raw LaTeX (less compatible but preserves source)
221
- args.push('--wrap=preserve');
222
- }
223
-
224
- try {
225
- await execAsync(args.join(' '));
226
- return { success: true, message: `Created ${outputPath}` };
227
- } catch (err: any) {
228
- return { success: false, message: err.message };
229
- }
230
- }
231
-
232
- /**
233
- * Create a simple equations-only document
234
- */
235
- export async function createEquationsDoc(
236
- inputPath: string,
237
- outputPath: string
238
- ): Promise<{ success: boolean; message: string; stats: { display: number; inline: number } | null }> {
239
- if (!fs.existsSync(inputPath)) {
240
- return { success: false, message: `File not found: ${inputPath}`, stats: null };
241
- }
242
-
243
- const text = fs.readFileSync(inputPath, 'utf-8');
244
- const equations = extractEquations(text, path.basename(inputPath));
245
-
246
- if (equations.length === 0) {
247
- return { success: false, message: 'No equations found', stats: { display: 0, inline: 0 } };
248
- }
249
-
250
- const sheet = generateEquationSheet(equations);
251
- const stats = {
252
- display: equations.filter(e => e.type === 'display').length,
253
- inline: equations.filter(e => e.type === 'inline').length,
254
- };
255
-
256
- const ext = path.extname(outputPath).toLowerCase();
257
-
258
- if (ext === '.docx') {
259
- // Write temp md, convert to docx
260
- const tempMd = outputPath.replace('.docx', '.tmp.md');
261
- fs.writeFileSync(tempMd, sheet, 'utf-8');
262
- const result = await convertToWord(tempMd, outputPath);
263
- fs.unlinkSync(tempMd);
264
- return { ...result, stats };
265
- } else {
266
- // Write as markdown
267
- fs.writeFileSync(outputPath, sheet, 'utf-8');
268
- return { success: true, message: `Created ${outputPath}`, stats };
269
- }
270
- }
271
-
272
- /**
273
- * Get equation statistics for a file or directory
274
- */
275
- export function getEquationStats(files: string[]): EquationStats {
276
- let totalDisplay = 0;
277
- let totalInline = 0;
278
- const byFile: Array<{ file: string; display: number; inline: number }> = [];
279
-
280
- for (const file of files) {
281
- if (!fs.existsSync(file)) continue;
282
- const text = fs.readFileSync(file, 'utf-8');
283
- const equations = extractEquations(text, path.basename(file));
284
-
285
- const display = equations.filter(e => e.type === 'display').length;
286
- const inline = equations.filter(e => e.type === 'inline').length;
287
-
288
- totalDisplay += display;
289
- totalInline += inline;
290
-
291
- if (display > 0 || inline > 0) {
292
- byFile.push({ file: path.basename(file), display, inline });
293
- }
294
- }
295
-
296
- return {
297
- total: totalDisplay + totalInline,
298
- display: totalDisplay,
299
- inline: totalInline,
300
- byFile,
301
- };
302
- }
303
-
304
- /**
305
- * Extract equations from a Word document using Pandoc
306
- * Converts OMML (Office Math Markup) to LaTeX
307
- */
308
- export async function extractEquationsFromWord(docxPath: string): Promise<WordEquationResult> {
309
- if (!fs.existsSync(docxPath)) {
310
- return { success: false, equations: [], error: `File not found: ${docxPath}` };
311
- }
312
-
313
- // Method 1: Use Pandoc to convert docx to markdown with LaTeX math
314
- try {
315
- const { stdout } = await execAsync(
316
- `pandoc "${docxPath}" -t markdown --wrap=none`,
317
- { maxBuffer: 50 * 1024 * 1024 }
318
- );
319
-
320
- // Extract equations from the markdown output
321
- const equations = extractEquations(stdout, path.basename(docxPath));
322
-
323
- return {
324
- success: true,
325
- equations: equations.map((eq, i) => ({
326
- type: eq.type,
327
- latex: eq.content,
328
- position: i,
329
- line: eq.line,
330
- })),
331
- };
332
- } catch (err) {
333
- // Pandoc failed, try fallback method
334
- return extractEquationsFromWordDirect(docxPath);
335
- }
336
- }
337
-
338
- /**
339
- * Direct OMML extraction from Word document (fallback if Pandoc fails)
340
- * Parses document.xml for <m:oMath> elements and attempts conversion
341
- */
342
- async function extractEquationsFromWordDirect(docxPath: string): Promise<WordEquationResult> {
343
- try {
344
- const zip = new AdmZip(docxPath);
345
- const documentEntry = zip.getEntry('word/document.xml');
346
-
347
- if (!documentEntry) {
348
- return { success: false, equations: [], error: 'Invalid docx: no document.xml' };
349
- }
350
-
351
- const documentXml = zip.readAsText(documentEntry);
352
-
353
- // Find all OMML equations (<m:oMath> or <m:oMathPara>)
354
- const ommlPattern = /<m:oMath[^>]*>[\s\S]*?<\/m:oMath>/gi;
355
- const matches = documentXml.match(ommlPattern) || [];
356
-
357
- if (matches.length === 0) {
358
- return { success: true, equations: [] };
359
- }
360
-
361
- // Try to convert OMML to LaTeX via MathML intermediate
362
- const Converter = await getMathMLConverter();
363
- const equations: WordEquationResult['equations'] = [];
364
-
365
- for (let i = 0; i < matches.length; i++) {
366
- const omml = matches[i];
367
- if (!omml) continue;
368
-
369
- // Attempt OMML → MathML → LaTeX conversion
370
- // Note: This is a simplified approach; full OMML→MathML requires XSLT
371
- try {
372
- const latex = await ommlToLatex(omml, Converter);
373
- if (latex) {
374
- equations.push({
375
- type: isDisplayMath(omml) ? 'display' : 'inline',
376
- latex,
377
- position: i,
378
- raw: omml.substring(0, 100) + '...',
379
- });
380
- }
381
- } catch {
382
- // Keep raw OMML reference if conversion fails
383
- equations.push({
384
- type: 'unknown',
385
- latex: null,
386
- position: i,
387
- raw: omml.substring(0, 100) + '...',
388
- error: 'Conversion failed',
389
- });
390
- }
391
- }
392
-
393
- return { success: true, equations };
394
- } catch (err: any) {
395
- return { success: false, equations: [], error: err.message };
396
- }
397
- }
398
-
399
- /**
400
- * Check if OMML represents display math (equation on its own line)
401
- */
402
- function isDisplayMath(omml: string): boolean {
403
- return omml.includes('<m:oMathPara') || omml.includes('m:jc');
404
- }
405
-
406
- /**
407
- * Convert OMML to LaTeX (simplified approach)
408
- * For complex equations, Pandoc method is more reliable
409
- */
410
- async function ommlToLatex(omml: string, Converter: any): Promise<string | null> {
411
- if (!Converter) return null;
412
-
413
- // Extract key elements from OMML and build approximate MathML
414
- // This is a simplified conversion - not all OMML features are supported
415
- try {
416
- // Build basic MathML from OMML structure
417
- const mathml = ommlToMathML(omml);
418
- if (!mathml) return null;
419
-
420
- // Convert MathML to LaTeX
421
- const latex = Converter.convert(mathml);
422
- return latex;
423
- } catch {
424
- return null;
425
- }
426
- }
427
-
428
- /**
429
- * Convert OMML to MathML (simplified)
430
- * Maps common OMML elements to MathML equivalents
431
- */
432
- function ommlToMathML(omml: string): string | null {
433
- // Remove namespace prefixes for easier parsing
434
- let xml = omml
435
- .replace(/<m:/g, '<')
436
- .replace(/<\/m:/g, '</')
437
- .replace(/<w:/g, '<w_')
438
- .replace(/<\/w:/g, '</w_');
439
-
440
- // Map OMML elements to MathML
441
- const mappings: Array<[RegExp, string]> = [
442
- [/<oMath[^>]*>/gi, '<math xmlns="http://www.w3.org/1998/Math/MathML">'],
443
- [/<\/oMath>/gi, '</math>'],
444
- [/<r>/gi, '<mi>'],
445
- [/<\/r>/gi, '</mi>'],
446
- [/<t>/gi, ''],
447
- [/<\/t>/gi, ''],
448
- [/<f>/gi, '<mfrac>'],
449
- [/<\/f>/gi, '</mfrac>'],
450
- [/<num>/gi, '<mrow>'],
451
- [/<\/num>/gi, '</mrow>'],
452
- [/<den>/gi, '<mrow>'],
453
- [/<\/den>/gi, '</mrow>'],
454
- [/<sup>/gi, '<msup><mrow>'],
455
- [/<\/sup>/gi, '</mrow></msup>'],
456
- [/<sub>/gi, '<msub><mrow>'],
457
- [/<\/sub>/gi, '</mrow></msub>'],
458
- [/<rad>/gi, '<msqrt>'],
459
- [/<\/rad>/gi, '</msqrt>'],
460
- [/<e>/gi, '<mrow>'],
461
- [/<\/e>/gi, '</mrow>'],
462
- // Remove elements we don't map
463
- [/<rPr>[\s\S]*?<\/rPr>/gi, ''],
464
- [/<ctrlPr>[\s\S]*?<\/ctrlPr>/gi, ''],
465
- [/<w_[^>]*>[\s\S]*?<\/w_[^>]*>/gi, ''],
466
- [/<[^>]*\/>/gi, ''], // Self-closing tags
467
- ];
468
-
469
- for (const [pattern, replacement] of mappings) {
470
- xml = xml.replace(pattern, replacement);
471
- }
472
-
473
- // Clean up any remaining unrecognized tags
474
- xml = xml.replace(/<[a-zA-Z][^>]*>/g, '').replace(/<\/[a-zA-Z]+>/g, '');
475
-
476
- // Wrap in math if not already
477
- if (!xml.includes('<math')) {
478
- xml = `<math xmlns="http://www.w3.org/1998/Math/MathML">${xml}</math>`;
479
- }
480
-
481
- return xml;
482
- }
483
-
484
- /**
485
- * Get equation summary from Word document
486
- */
487
- export async function getWordEquationStats(
488
- docxPath: string
489
- ): Promise<{ count: number; display: number; inline: number; converted: number; error?: string }> {
490
- const result = await extractEquationsFromWord(docxPath);
491
-
492
- if (!result.success) {
493
- return { count: 0, display: 0, inline: 0, converted: 0, error: result.error };
494
- }
495
-
496
- const display = result.equations.filter(e => e.type === 'display').length;
497
- const inline = result.equations.filter(e => e.type === 'inline').length;
498
- const converted = result.equations.filter(e => e.latex).length;
499
-
500
- return {
501
- count: result.equations.length,
502
- display,
503
- inline,
504
- converted,
505
- };
506
- }
1
+ /**
2
+ * Equation extraction and conversion utilities
3
+ * Handle LaTeX math in Markdown ↔ Word workflows
4
+ *
5
+ * Supports:
6
+ * - Extract LaTeX equations from Markdown
7
+ * - Extract equations from Word documents (OMML → LaTeX via Pandoc)
8
+ * - Convert Markdown with equations to Word (LaTeX → MathML)
9
+ */
10
+
11
+ import * as fs from 'fs';
12
+ import * as path from 'path';
13
+ import { exec } from 'child_process';
14
+ import { promisify } from 'util';
15
+ import AdmZip from 'adm-zip';
16
+ import { parseString } from 'xml2js';
17
+ import type { Equation, EquationStats, WordEquationResult } from './types.js';
18
+
19
+ const execAsync = promisify(exec);
20
+ const parseXml = promisify(parseString);
21
+
22
+ // Dynamic import for mathml-to-latex (ESM)
23
+ let MathMLToLaTeX: any = null;
24
+ async function getMathMLConverter(): Promise<any> {
25
+ if (!MathMLToLaTeX) {
26
+ try {
27
+ const module = await import('mathml-to-latex');
28
+ MathMLToLaTeX = module.MathMLToLaTeX;
29
+ } catch {
30
+ return null;
31
+ }
32
+ }
33
+ return MathMLToLaTeX;
34
+ }
35
+
36
+ /**
37
+ * Extract all equations from markdown text
38
+ */
39
+ export function extractEquations(text: string, file: string = ''): Equation[] {
40
+ const equations: Equation[] = [];
41
+ const lines = text.split('\n');
42
+
43
+ let inDisplayMath = false;
44
+ let displayMathStart = 0;
45
+ let displayMathContent = '';
46
+
47
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
48
+ const line = lines[lineNum];
49
+ if (!line) continue;
50
+
51
+ // Skip code blocks
52
+ if (line.trim().startsWith('```')) continue;
53
+
54
+ // Handle inline math ($...$) in a segment of text
55
+ // Careful not to match $$ or escaped \$
56
+ const inlinePattern = /(?<![\$\\])\$(?!\$)([^$\n]+)\$(?!\$)/g;
57
+ const extractInline = (segment: string): void => {
58
+ let match;
59
+ inlinePattern.lastIndex = 0;
60
+ while ((match = inlinePattern.exec(segment)) !== null) {
61
+ const content = match[1];
62
+ if (content) {
63
+ equations.push({
64
+ type: 'inline',
65
+ content: content.trim(),
66
+ line: lineNum + 1,
67
+ file,
68
+ });
69
+ }
70
+ }
71
+ };
72
+
73
+ // Handle display math blocks ($$...$$)
74
+ if (line.includes('$$')) {
75
+ const parts = line.split('$$');
76
+
77
+ if (!inDisplayMath && parts.length >= 3) {
78
+ // Single-line display math: $$content$$
79
+ // Also extract inline math from surrounding text
80
+ if (parts[0]) extractInline(parts[0]); // Text before $$
81
+ for (let i = 1; i < parts.length; i += 2) {
82
+ const part = parts[i];
83
+ if (part && part.trim()) {
84
+ equations.push({
85
+ type: 'display',
86
+ content: part.trim(),
87
+ line: lineNum + 1,
88
+ file,
89
+ });
90
+ }
91
+ }
92
+ // Extract inline from text after the last $$
93
+ const lastPart = parts[parts.length - 1];
94
+ if (parts.length % 2 === 1 && lastPart) {
95
+ extractInline(lastPart);
96
+ }
97
+ } else if (!inDisplayMath) {
98
+ // Start of multi-line display math
99
+ if (parts[0]) extractInline(parts[0]); // Text before $$
100
+ inDisplayMath = true;
101
+ displayMathStart = lineNum + 1;
102
+ displayMathContent = parts[1] || '';
103
+ } else {
104
+ // End of multi-line display math
105
+ inDisplayMath = false;
106
+ displayMathContent += '\n' + (parts[0] || '');
107
+ if (displayMathContent.trim()) {
108
+ equations.push({
109
+ type: 'display',
110
+ content: displayMathContent.trim(),
111
+ line: displayMathStart,
112
+ file,
113
+ });
114
+ }
115
+ displayMathContent = '';
116
+ // Text after $$ on closing line
117
+ const afterPart = parts[1];
118
+ if (afterPart) {
119
+ extractInline(afterPart);
120
+ }
121
+ }
122
+ continue;
123
+ }
124
+
125
+ if (inDisplayMath) {
126
+ displayMathContent += '\n' + line;
127
+ continue;
128
+ }
129
+
130
+ // No display math on this line - extract inline math
131
+ extractInline(line);
132
+ }
133
+
134
+ return equations;
135
+ }
136
+
137
+ /**
138
+ * Generate a markdown document with numbered equations
139
+ * Useful for creating an equation reference sheet
140
+ */
141
+ export function generateEquationSheet(equations: Equation[]): string {
142
+ const lines: string[] = [];
143
+ lines.push('# Equations');
144
+ lines.push('');
145
+
146
+ let displayNum = 0;
147
+ let inlineNum = 0;
148
+
149
+ // Group by file
150
+ const byFile = new Map<string, Equation[]>();
151
+ for (const eq of equations) {
152
+ if (!byFile.has(eq.file)) {
153
+ byFile.set(eq.file, []);
154
+ }
155
+ byFile.get(eq.file)!.push(eq);
156
+ }
157
+
158
+ for (const [file, fileEqs] of byFile) {
159
+ if (file) {
160
+ lines.push(`## ${file}`);
161
+ lines.push('');
162
+ }
163
+
164
+ for (const eq of fileEqs) {
165
+ if (eq.type === 'display') {
166
+ displayNum++;
167
+ lines.push(`### Equation ${displayNum} (line ${eq.line})`);
168
+ lines.push('');
169
+ lines.push('```latex');
170
+ lines.push(eq.content);
171
+ lines.push('```');
172
+ lines.push('');
173
+ lines.push('$$' + eq.content + '$$');
174
+ lines.push('');
175
+ } else {
176
+ inlineNum++;
177
+ lines.push(`- **Inline ${inlineNum}** (line ${eq.line}): \`$${eq.content}$\` → $${eq.content}$`);
178
+ }
179
+ }
180
+ lines.push('');
181
+ }
182
+
183
+ lines.push('---');
184
+ lines.push(`Total: ${displayNum} display equations, ${inlineNum} inline equations`);
185
+
186
+ return lines.join('\n');
187
+ }
188
+
189
+ interface ConvertToWordOptions {
190
+ preserveLatex?: boolean;
191
+ }
192
+
193
+ /**
194
+ * Convert markdown with equations to Word using pandoc
195
+ */
196
+ export async function convertToWord(
197
+ inputPath: string,
198
+ outputPath: string,
199
+ options: ConvertToWordOptions = {}
200
+ ): Promise<{ success: boolean; message: string }> {
201
+ const { preserveLatex = false } = options;
202
+
203
+ // Check pandoc is available
204
+ try {
205
+ await execAsync('pandoc --version');
206
+ } catch {
207
+ return { success: false, message: 'Pandoc not found. Install pandoc first.' };
208
+ }
209
+
210
+ // Build pandoc command
211
+ // Use --mathml for better equation rendering in Word
212
+ const args = [
213
+ 'pandoc',
214
+ `"${inputPath}"`,
215
+ '-o', `"${outputPath}"`,
216
+ '--mathml', // Better equation support in Word
217
+ ];
218
+
219
+ if (preserveLatex) {
220
+ // Keep raw LaTeX (less compatible but preserves source)
221
+ args.push('--wrap=preserve');
222
+ }
223
+
224
+ try {
225
+ await execAsync(args.join(' '));
226
+ return { success: true, message: `Created ${outputPath}` };
227
+ } catch (err: any) {
228
+ return { success: false, message: err.message };
229
+ }
230
+ }
231
+
232
+ /**
233
+ * Create a simple equations-only document
234
+ */
235
+ export async function createEquationsDoc(
236
+ inputPath: string,
237
+ outputPath: string
238
+ ): Promise<{ success: boolean; message: string; stats: { display: number; inline: number } | null }> {
239
+ if (!fs.existsSync(inputPath)) {
240
+ return { success: false, message: `File not found: ${inputPath}`, stats: null };
241
+ }
242
+
243
+ const text = fs.readFileSync(inputPath, 'utf-8');
244
+ const equations = extractEquations(text, path.basename(inputPath));
245
+
246
+ if (equations.length === 0) {
247
+ return { success: false, message: 'No equations found', stats: { display: 0, inline: 0 } };
248
+ }
249
+
250
+ const sheet = generateEquationSheet(equations);
251
+ const stats = {
252
+ display: equations.filter(e => e.type === 'display').length,
253
+ inline: equations.filter(e => e.type === 'inline').length,
254
+ };
255
+
256
+ const ext = path.extname(outputPath).toLowerCase();
257
+
258
+ if (ext === '.docx') {
259
+ // Write temp md, convert to docx
260
+ const tempMd = outputPath.replace('.docx', '.tmp.md');
261
+ fs.writeFileSync(tempMd, sheet, 'utf-8');
262
+ const result = await convertToWord(tempMd, outputPath);
263
+ fs.unlinkSync(tempMd);
264
+ return { ...result, stats };
265
+ } else {
266
+ // Write as markdown
267
+ fs.writeFileSync(outputPath, sheet, 'utf-8');
268
+ return { success: true, message: `Created ${outputPath}`, stats };
269
+ }
270
+ }
271
+
272
+ /**
273
+ * Get equation statistics for a file or directory
274
+ */
275
+ export function getEquationStats(files: string[]): EquationStats {
276
+ let totalDisplay = 0;
277
+ let totalInline = 0;
278
+ const byFile: Array<{ file: string; display: number; inline: number }> = [];
279
+
280
+ for (const file of files) {
281
+ if (!fs.existsSync(file)) continue;
282
+ const text = fs.readFileSync(file, 'utf-8');
283
+ const equations = extractEquations(text, path.basename(file));
284
+
285
+ const display = equations.filter(e => e.type === 'display').length;
286
+ const inline = equations.filter(e => e.type === 'inline').length;
287
+
288
+ totalDisplay += display;
289
+ totalInline += inline;
290
+
291
+ if (display > 0 || inline > 0) {
292
+ byFile.push({ file: path.basename(file), display, inline });
293
+ }
294
+ }
295
+
296
+ return {
297
+ total: totalDisplay + totalInline,
298
+ display: totalDisplay,
299
+ inline: totalInline,
300
+ byFile,
301
+ };
302
+ }
303
+
304
+ /**
305
+ * Extract equations from a Word document using Pandoc
306
+ * Converts OMML (Office Math Markup) to LaTeX
307
+ */
308
+ export async function extractEquationsFromWord(docxPath: string): Promise<WordEquationResult> {
309
+ if (!fs.existsSync(docxPath)) {
310
+ return { success: false, equations: [], error: `File not found: ${docxPath}` };
311
+ }
312
+
313
+ // Method 1: Use Pandoc to convert docx to markdown with LaTeX math
314
+ try {
315
+ const { stdout } = await execAsync(
316
+ `pandoc "${docxPath}" -t markdown --wrap=none`,
317
+ { maxBuffer: 50 * 1024 * 1024 }
318
+ );
319
+
320
+ // Extract equations from the markdown output
321
+ const equations = extractEquations(stdout, path.basename(docxPath));
322
+
323
+ return {
324
+ success: true,
325
+ equations: equations.map((eq, i) => ({
326
+ type: eq.type,
327
+ latex: eq.content,
328
+ position: i,
329
+ line: eq.line,
330
+ })),
331
+ };
332
+ } catch (err) {
333
+ // Pandoc failed, try fallback method
334
+ return extractEquationsFromWordDirect(docxPath);
335
+ }
336
+ }
337
+
338
+ /**
339
+ * Direct OMML extraction from Word document (fallback if Pandoc fails)
340
+ * Parses document.xml for <m:oMath> elements and attempts conversion
341
+ */
342
+ async function extractEquationsFromWordDirect(docxPath: string): Promise<WordEquationResult> {
343
+ try {
344
+ const zip = new AdmZip(docxPath);
345
+ const documentEntry = zip.getEntry('word/document.xml');
346
+
347
+ if (!documentEntry) {
348
+ return { success: false, equations: [], error: 'Invalid docx: no document.xml' };
349
+ }
350
+
351
+ const documentXml = zip.readAsText(documentEntry);
352
+
353
+ // Find all OMML equations (<m:oMath> or <m:oMathPara>)
354
+ const ommlPattern = /<m:oMath[^>]*>[\s\S]*?<\/m:oMath>/gi;
355
+ const matches = documentXml.match(ommlPattern) || [];
356
+
357
+ if (matches.length === 0) {
358
+ return { success: true, equations: [] };
359
+ }
360
+
361
+ // Try to convert OMML to LaTeX via MathML intermediate
362
+ const Converter = await getMathMLConverter();
363
+ const equations: WordEquationResult['equations'] = [];
364
+
365
+ for (let i = 0; i < matches.length; i++) {
366
+ const omml = matches[i];
367
+ if (!omml) continue;
368
+
369
+ // Attempt OMML → MathML → LaTeX conversion
370
+ // Note: This is a simplified approach; full OMML→MathML requires XSLT
371
+ try {
372
+ const latex = await ommlToLatex(omml, Converter);
373
+ if (latex) {
374
+ equations.push({
375
+ type: isDisplayMath(omml) ? 'display' : 'inline',
376
+ latex,
377
+ position: i,
378
+ raw: omml.substring(0, 100) + '...',
379
+ });
380
+ }
381
+ } catch {
382
+ // Keep raw OMML reference if conversion fails
383
+ equations.push({
384
+ type: 'unknown',
385
+ latex: null,
386
+ position: i,
387
+ raw: omml.substring(0, 100) + '...',
388
+ error: 'Conversion failed',
389
+ });
390
+ }
391
+ }
392
+
393
+ return { success: true, equations };
394
+ } catch (err: any) {
395
+ return { success: false, equations: [], error: err.message };
396
+ }
397
+ }
398
+
399
+ /**
400
+ * Check if OMML represents display math (equation on its own line)
401
+ */
402
+ function isDisplayMath(omml: string): boolean {
403
+ return omml.includes('<m:oMathPara') || omml.includes('m:jc');
404
+ }
405
+
406
+ /**
407
+ * Convert OMML to LaTeX (simplified approach)
408
+ * For complex equations, Pandoc method is more reliable
409
+ */
410
+ async function ommlToLatex(omml: string, Converter: any): Promise<string | null> {
411
+ if (!Converter) return null;
412
+
413
+ // Extract key elements from OMML and build approximate MathML
414
+ // This is a simplified conversion - not all OMML features are supported
415
+ try {
416
+ // Build basic MathML from OMML structure
417
+ const mathml = ommlToMathML(omml);
418
+ if (!mathml) return null;
419
+
420
+ // Convert MathML to LaTeX
421
+ const latex = Converter.convert(mathml);
422
+ return latex;
423
+ } catch {
424
+ return null;
425
+ }
426
+ }
427
+
428
+ /**
429
+ * Convert OMML to MathML (simplified)
430
+ * Maps common OMML elements to MathML equivalents
431
+ */
432
+ function ommlToMathML(omml: string): string | null {
433
+ // Remove namespace prefixes for easier parsing
434
+ let xml = omml
435
+ .replace(/<m:/g, '<')
436
+ .replace(/<\/m:/g, '</')
437
+ .replace(/<w:/g, '<w_')
438
+ .replace(/<\/w:/g, '</w_');
439
+
440
+ // Map OMML elements to MathML
441
+ const mappings: Array<[RegExp, string]> = [
442
+ [/<oMath[^>]*>/gi, '<math xmlns="http://www.w3.org/1998/Math/MathML">'],
443
+ [/<\/oMath>/gi, '</math>'],
444
+ [/<r>/gi, '<mi>'],
445
+ [/<\/r>/gi, '</mi>'],
446
+ [/<t>/gi, ''],
447
+ [/<\/t>/gi, ''],
448
+ [/<f>/gi, '<mfrac>'],
449
+ [/<\/f>/gi, '</mfrac>'],
450
+ [/<num>/gi, '<mrow>'],
451
+ [/<\/num>/gi, '</mrow>'],
452
+ [/<den>/gi, '<mrow>'],
453
+ [/<\/den>/gi, '</mrow>'],
454
+ [/<sup>/gi, '<msup><mrow>'],
455
+ [/<\/sup>/gi, '</mrow></msup>'],
456
+ [/<sub>/gi, '<msub><mrow>'],
457
+ [/<\/sub>/gi, '</mrow></msub>'],
458
+ [/<rad>/gi, '<msqrt>'],
459
+ [/<\/rad>/gi, '</msqrt>'],
460
+ [/<e>/gi, '<mrow>'],
461
+ [/<\/e>/gi, '</mrow>'],
462
+ // Remove elements we don't map
463
+ [/<rPr>[\s\S]*?<\/rPr>/gi, ''],
464
+ [/<ctrlPr>[\s\S]*?<\/ctrlPr>/gi, ''],
465
+ [/<w_[^>]*>[\s\S]*?<\/w_[^>]*>/gi, ''],
466
+ [/<[^>]*\/>/gi, ''], // Self-closing tags
467
+ ];
468
+
469
+ for (const [pattern, replacement] of mappings) {
470
+ xml = xml.replace(pattern, replacement);
471
+ }
472
+
473
+ // Clean up any remaining unrecognized tags
474
+ xml = xml.replace(/<[a-zA-Z][^>]*>/g, '').replace(/<\/[a-zA-Z]+>/g, '');
475
+
476
+ // Wrap in math if not already
477
+ if (!xml.includes('<math')) {
478
+ xml = `<math xmlns="http://www.w3.org/1998/Math/MathML">${xml}</math>`;
479
+ }
480
+
481
+ return xml;
482
+ }
483
+
484
+ /**
485
+ * Get equation summary from Word document
486
+ */
487
+ export async function getWordEquationStats(
488
+ docxPath: string
489
+ ): Promise<{ count: number; display: number; inline: number; converted: number; error?: string }> {
490
+ const result = await extractEquationsFromWord(docxPath);
491
+
492
+ if (!result.success) {
493
+ return { count: 0, display: 0, inline: 0, converted: 0, error: result.error };
494
+ }
495
+
496
+ const display = result.equations.filter(e => e.type === 'display').length;
497
+ const inline = result.equations.filter(e => e.type === 'inline').length;
498
+ const converted = result.equations.filter(e => e.latex).length;
499
+
500
+ return {
501
+ count: result.equations.length,
502
+ display,
503
+ inline,
504
+ converted,
505
+ };
506
+ }