docrev 0.9.13 → 0.9.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +411 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +38 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +68 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/pdf-comments.js +44 -44
  43. package/dist/lib/plugins.js +57 -57
  44. package/dist/lib/pptx-themes.js +115 -115
  45. package/dist/lib/spelling.js +2 -2
  46. package/dist/lib/templates.js +387 -387
  47. package/dist/lib/themes.js +51 -51
  48. package/eslint.config.js +27 -27
  49. package/lib/anchor-match.ts +276 -276
  50. package/lib/annotations.ts +644 -644
  51. package/lib/build.ts +1300 -1251
  52. package/lib/citations.ts +160 -160
  53. package/lib/commands/build.ts +833 -801
  54. package/lib/commands/citations.ts +515 -515
  55. package/lib/commands/comments.ts +1050 -1050
  56. package/lib/commands/context.ts +174 -174
  57. package/lib/commands/core.ts +309 -309
  58. package/lib/commands/doi.ts +435 -435
  59. package/lib/commands/file-ops.ts +372 -372
  60. package/lib/commands/history.ts +320 -320
  61. package/lib/commands/index.ts +87 -87
  62. package/lib/commands/init.ts +259 -259
  63. package/lib/commands/merge-resolve.ts +378 -378
  64. package/lib/commands/preview.ts +178 -178
  65. package/lib/commands/project-info.ts +244 -244
  66. package/lib/commands/quality.ts +517 -517
  67. package/lib/commands/response.ts +454 -454
  68. package/lib/commands/section-boundaries.ts +82 -82
  69. package/lib/commands/sections.ts +451 -451
  70. package/lib/commands/sync.ts +706 -706
  71. package/lib/commands/text-ops.ts +449 -449
  72. package/lib/commands/utilities.ts +448 -448
  73. package/lib/commands/verify-anchors.ts +272 -272
  74. package/lib/commands/word-tools.ts +340 -340
  75. package/lib/comment-realign.ts +517 -517
  76. package/lib/config.ts +84 -84
  77. package/lib/crossref.ts +781 -781
  78. package/lib/csl.ts +191 -191
  79. package/lib/dependencies.ts +98 -98
  80. package/lib/diff-engine.ts +465 -465
  81. package/lib/doi-cache.ts +115 -115
  82. package/lib/doi.ts +897 -897
  83. package/lib/equations.ts +506 -506
  84. package/lib/errors.ts +346 -346
  85. package/lib/format.ts +541 -541
  86. package/lib/git.ts +326 -326
  87. package/lib/grammar.ts +303 -303
  88. package/lib/image-registry.ts +180 -180
  89. package/lib/import.ts +911 -911
  90. package/lib/journals.ts +543 -543
  91. package/lib/merge.ts +633 -633
  92. package/lib/orcid.ts +144 -144
  93. package/lib/pdf-comments.ts +263 -263
  94. package/lib/pdf-import.ts +524 -524
  95. package/lib/plugins.ts +362 -362
  96. package/lib/postprocess.ts +188 -188
  97. package/lib/pptx-color-filter.lua +37 -37
  98. package/lib/pptx-template.ts +469 -469
  99. package/lib/pptx-themes.ts +483 -483
  100. package/lib/protect-restore.ts +520 -520
  101. package/lib/rate-limiter.ts +94 -94
  102. package/lib/response.ts +197 -197
  103. package/lib/restore-references.ts +240 -240
  104. package/lib/review.ts +327 -327
  105. package/lib/schema.ts +417 -417
  106. package/lib/scientific-words.ts +73 -73
  107. package/lib/sections.ts +335 -335
  108. package/lib/slides.ts +756 -756
  109. package/lib/spelling.ts +334 -334
  110. package/lib/templates.ts +526 -526
  111. package/lib/themes.ts +742 -742
  112. package/lib/trackchanges.ts +247 -247
  113. package/lib/tui.ts +450 -450
  114. package/lib/types.ts +550 -550
  115. package/lib/undo.ts +250 -250
  116. package/lib/utils.ts +69 -69
  117. package/lib/variables.ts +179 -179
  118. package/lib/word-extraction.ts +806 -806
  119. package/lib/word.ts +643 -643
  120. package/lib/wordcomments.ts +817 -817
  121. package/package.json +137 -137
  122. package/scripts/postbuild.js +28 -28
  123. package/skill/REFERENCE.md +473 -431
  124. package/skill/SKILL.md +274 -258
  125. package/tsconfig.json +26 -26
  126. package/types/index.d.ts +525 -525
package/lib/equations.ts CHANGED
@@ -1,506 +1,506 @@
1
- /**
2
- * Equation extraction and conversion utilities
3
- * Handle LaTeX math in Markdown ↔ Word workflows
4
- *
5
- * Supports:
6
- * - Extract LaTeX equations from Markdown
7
- * - Extract equations from Word documents (OMML → LaTeX via Pandoc)
8
- * - Convert Markdown with equations to Word (LaTeX → MathML)
9
- */
10
-
11
- import * as fs from 'fs';
12
- import * as path from 'path';
13
- import { exec } from 'child_process';
14
- import { promisify } from 'util';
15
- import AdmZip from 'adm-zip';
16
- import { parseString } from 'xml2js';
17
- import type { Equation, EquationStats, WordEquationResult } from './types.js';
18
-
19
- const execAsync = promisify(exec);
20
- const parseXml = promisify(parseString);
21
-
22
- // Dynamic import for mathml-to-latex (ESM)
23
- let MathMLToLaTeX: any = null;
24
- async function getMathMLConverter(): Promise<any> {
25
- if (!MathMLToLaTeX) {
26
- try {
27
- const module = await import('mathml-to-latex');
28
- MathMLToLaTeX = module.MathMLToLaTeX;
29
- } catch {
30
- return null;
31
- }
32
- }
33
- return MathMLToLaTeX;
34
- }
35
-
36
- /**
37
- * Extract all equations from markdown text
38
- */
39
- export function extractEquations(text: string, file: string = ''): Equation[] {
40
- const equations: Equation[] = [];
41
- const lines = text.split('\n');
42
-
43
- let inDisplayMath = false;
44
- let displayMathStart = 0;
45
- let displayMathContent = '';
46
-
47
- for (let lineNum = 0; lineNum < lines.length; lineNum++) {
48
- const line = lines[lineNum];
49
- if (!line) continue;
50
-
51
- // Skip code blocks
52
- if (line.trim().startsWith('```')) continue;
53
-
54
- // Handle inline math ($...$) in a segment of text
55
- // Careful not to match $$ or escaped \$
56
- const inlinePattern = /(?<![\$\\])\$(?!\$)([^$\n]+)\$(?!\$)/g;
57
- const extractInline = (segment: string): void => {
58
- let match;
59
- inlinePattern.lastIndex = 0;
60
- while ((match = inlinePattern.exec(segment)) !== null) {
61
- const content = match[1];
62
- if (content) {
63
- equations.push({
64
- type: 'inline',
65
- content: content.trim(),
66
- line: lineNum + 1,
67
- file,
68
- });
69
- }
70
- }
71
- };
72
-
73
- // Handle display math blocks ($$...$$)
74
- if (line.includes('$$')) {
75
- const parts = line.split('$$');
76
-
77
- if (!inDisplayMath && parts.length >= 3) {
78
- // Single-line display math: $$content$$
79
- // Also extract inline math from surrounding text
80
- if (parts[0]) extractInline(parts[0]); // Text before $$
81
- for (let i = 1; i < parts.length; i += 2) {
82
- const part = parts[i];
83
- if (part && part.trim()) {
84
- equations.push({
85
- type: 'display',
86
- content: part.trim(),
87
- line: lineNum + 1,
88
- file,
89
- });
90
- }
91
- }
92
- // Extract inline from text after the last $$
93
- const lastPart = parts[parts.length - 1];
94
- if (parts.length % 2 === 1 && lastPart) {
95
- extractInline(lastPart);
96
- }
97
- } else if (!inDisplayMath) {
98
- // Start of multi-line display math
99
- if (parts[0]) extractInline(parts[0]); // Text before $$
100
- inDisplayMath = true;
101
- displayMathStart = lineNum + 1;
102
- displayMathContent = parts[1] || '';
103
- } else {
104
- // End of multi-line display math
105
- inDisplayMath = false;
106
- displayMathContent += '\n' + (parts[0] || '');
107
- if (displayMathContent.trim()) {
108
- equations.push({
109
- type: 'display',
110
- content: displayMathContent.trim(),
111
- line: displayMathStart,
112
- file,
113
- });
114
- }
115
- displayMathContent = '';
116
- // Text after $$ on closing line
117
- const afterPart = parts[1];
118
- if (afterPart) {
119
- extractInline(afterPart);
120
- }
121
- }
122
- continue;
123
- }
124
-
125
- if (inDisplayMath) {
126
- displayMathContent += '\n' + line;
127
- continue;
128
- }
129
-
130
- // No display math on this line - extract inline math
131
- extractInline(line);
132
- }
133
-
134
- return equations;
135
- }
136
-
137
- /**
138
- * Generate a markdown document with numbered equations
139
- * Useful for creating an equation reference sheet
140
- */
141
- export function generateEquationSheet(equations: Equation[]): string {
142
- const lines: string[] = [];
143
- lines.push('# Equations');
144
- lines.push('');
145
-
146
- let displayNum = 0;
147
- let inlineNum = 0;
148
-
149
- // Group by file
150
- const byFile = new Map<string, Equation[]>();
151
- for (const eq of equations) {
152
- if (!byFile.has(eq.file)) {
153
- byFile.set(eq.file, []);
154
- }
155
- byFile.get(eq.file)!.push(eq);
156
- }
157
-
158
- for (const [file, fileEqs] of byFile) {
159
- if (file) {
160
- lines.push(`## ${file}`);
161
- lines.push('');
162
- }
163
-
164
- for (const eq of fileEqs) {
165
- if (eq.type === 'display') {
166
- displayNum++;
167
- lines.push(`### Equation ${displayNum} (line ${eq.line})`);
168
- lines.push('');
169
- lines.push('```latex');
170
- lines.push(eq.content);
171
- lines.push('```');
172
- lines.push('');
173
- lines.push('$$' + eq.content + '$$');
174
- lines.push('');
175
- } else {
176
- inlineNum++;
177
- lines.push(`- **Inline ${inlineNum}** (line ${eq.line}): \`$${eq.content}$\` → $${eq.content}$`);
178
- }
179
- }
180
- lines.push('');
181
- }
182
-
183
- lines.push('---');
184
- lines.push(`Total: ${displayNum} display equations, ${inlineNum} inline equations`);
185
-
186
- return lines.join('\n');
187
- }
188
-
189
- interface ConvertToWordOptions {
190
- preserveLatex?: boolean;
191
- }
192
-
193
- /**
194
- * Convert markdown with equations to Word using pandoc
195
- */
196
- export async function convertToWord(
197
- inputPath: string,
198
- outputPath: string,
199
- options: ConvertToWordOptions = {}
200
- ): Promise<{ success: boolean; message: string }> {
201
- const { preserveLatex = false } = options;
202
-
203
- // Check pandoc is available
204
- try {
205
- await execAsync('pandoc --version');
206
- } catch {
207
- return { success: false, message: 'Pandoc not found. Install pandoc first.' };
208
- }
209
-
210
- // Build pandoc command
211
- // Use --mathml for better equation rendering in Word
212
- const args = [
213
- 'pandoc',
214
- `"${inputPath}"`,
215
- '-o', `"${outputPath}"`,
216
- '--mathml', // Better equation support in Word
217
- ];
218
-
219
- if (preserveLatex) {
220
- // Keep raw LaTeX (less compatible but preserves source)
221
- args.push('--wrap=preserve');
222
- }
223
-
224
- try {
225
- await execAsync(args.join(' '));
226
- return { success: true, message: `Created ${outputPath}` };
227
- } catch (err: any) {
228
- return { success: false, message: err.message };
229
- }
230
- }
231
-
232
- /**
233
- * Create a simple equations-only document
234
- */
235
- export async function createEquationsDoc(
236
- inputPath: string,
237
- outputPath: string
238
- ): Promise<{ success: boolean; message: string; stats: { display: number; inline: number } | null }> {
239
- if (!fs.existsSync(inputPath)) {
240
- return { success: false, message: `File not found: ${inputPath}`, stats: null };
241
- }
242
-
243
- const text = fs.readFileSync(inputPath, 'utf-8');
244
- const equations = extractEquations(text, path.basename(inputPath));
245
-
246
- if (equations.length === 0) {
247
- return { success: false, message: 'No equations found', stats: { display: 0, inline: 0 } };
248
- }
249
-
250
- const sheet = generateEquationSheet(equations);
251
- const stats = {
252
- display: equations.filter(e => e.type === 'display').length,
253
- inline: equations.filter(e => e.type === 'inline').length,
254
- };
255
-
256
- const ext = path.extname(outputPath).toLowerCase();
257
-
258
- if (ext === '.docx') {
259
- // Write temp md, convert to docx
260
- const tempMd = outputPath.replace('.docx', '.tmp.md');
261
- fs.writeFileSync(tempMd, sheet, 'utf-8');
262
- const result = await convertToWord(tempMd, outputPath);
263
- fs.unlinkSync(tempMd);
264
- return { ...result, stats };
265
- } else {
266
- // Write as markdown
267
- fs.writeFileSync(outputPath, sheet, 'utf-8');
268
- return { success: true, message: `Created ${outputPath}`, stats };
269
- }
270
- }
271
-
272
- /**
273
- * Get equation statistics for a file or directory
274
- */
275
- export function getEquationStats(files: string[]): EquationStats {
276
- let totalDisplay = 0;
277
- let totalInline = 0;
278
- const byFile: Array<{ file: string; display: number; inline: number }> = [];
279
-
280
- for (const file of files) {
281
- if (!fs.existsSync(file)) continue;
282
- const text = fs.readFileSync(file, 'utf-8');
283
- const equations = extractEquations(text, path.basename(file));
284
-
285
- const display = equations.filter(e => e.type === 'display').length;
286
- const inline = equations.filter(e => e.type === 'inline').length;
287
-
288
- totalDisplay += display;
289
- totalInline += inline;
290
-
291
- if (display > 0 || inline > 0) {
292
- byFile.push({ file: path.basename(file), display, inline });
293
- }
294
- }
295
-
296
- return {
297
- total: totalDisplay + totalInline,
298
- display: totalDisplay,
299
- inline: totalInline,
300
- byFile,
301
- };
302
- }
303
-
304
- /**
305
- * Extract equations from a Word document using Pandoc
306
- * Converts OMML (Office Math Markup) to LaTeX
307
- */
308
- export async function extractEquationsFromWord(docxPath: string): Promise<WordEquationResult> {
309
- if (!fs.existsSync(docxPath)) {
310
- return { success: false, equations: [], error: `File not found: ${docxPath}` };
311
- }
312
-
313
- // Method 1: Use Pandoc to convert docx to markdown with LaTeX math
314
- try {
315
- const { stdout } = await execAsync(
316
- `pandoc "${docxPath}" -t markdown --wrap=none`,
317
- { maxBuffer: 50 * 1024 * 1024 }
318
- );
319
-
320
- // Extract equations from the markdown output
321
- const equations = extractEquations(stdout, path.basename(docxPath));
322
-
323
- return {
324
- success: true,
325
- equations: equations.map((eq, i) => ({
326
- type: eq.type,
327
- latex: eq.content,
328
- position: i,
329
- line: eq.line,
330
- })),
331
- };
332
- } catch (err) {
333
- // Pandoc failed, try fallback method
334
- return extractEquationsFromWordDirect(docxPath);
335
- }
336
- }
337
-
338
- /**
339
- * Direct OMML extraction from Word document (fallback if Pandoc fails)
340
- * Parses document.xml for <m:oMath> elements and attempts conversion
341
- */
342
- async function extractEquationsFromWordDirect(docxPath: string): Promise<WordEquationResult> {
343
- try {
344
- const zip = new AdmZip(docxPath);
345
- const documentEntry = zip.getEntry('word/document.xml');
346
-
347
- if (!documentEntry) {
348
- return { success: false, equations: [], error: 'Invalid docx: no document.xml' };
349
- }
350
-
351
- const documentXml = zip.readAsText(documentEntry);
352
-
353
- // Find all OMML equations (<m:oMath> or <m:oMathPara>)
354
- const ommlPattern = /<m:oMath[^>]*>[\s\S]*?<\/m:oMath>/gi;
355
- const matches = documentXml.match(ommlPattern) || [];
356
-
357
- if (matches.length === 0) {
358
- return { success: true, equations: [] };
359
- }
360
-
361
- // Try to convert OMML to LaTeX via MathML intermediate
362
- const Converter = await getMathMLConverter();
363
- const equations: WordEquationResult['equations'] = [];
364
-
365
- for (let i = 0; i < matches.length; i++) {
366
- const omml = matches[i];
367
- if (!omml) continue;
368
-
369
- // Attempt OMML → MathML → LaTeX conversion
370
- // Note: This is a simplified approach; full OMML→MathML requires XSLT
371
- try {
372
- const latex = await ommlToLatex(omml, Converter);
373
- if (latex) {
374
- equations.push({
375
- type: isDisplayMath(omml) ? 'display' : 'inline',
376
- latex,
377
- position: i,
378
- raw: omml.substring(0, 100) + '...',
379
- });
380
- }
381
- } catch {
382
- // Keep raw OMML reference if conversion fails
383
- equations.push({
384
- type: 'unknown',
385
- latex: null,
386
- position: i,
387
- raw: omml.substring(0, 100) + '...',
388
- error: 'Conversion failed',
389
- });
390
- }
391
- }
392
-
393
- return { success: true, equations };
394
- } catch (err: any) {
395
- return { success: false, equations: [], error: err.message };
396
- }
397
- }
398
-
399
- /**
400
- * Check if OMML represents display math (equation on its own line)
401
- */
402
- function isDisplayMath(omml: string): boolean {
403
- return omml.includes('<m:oMathPara') || omml.includes('m:jc');
404
- }
405
-
406
- /**
407
- * Convert OMML to LaTeX (simplified approach)
408
- * For complex equations, Pandoc method is more reliable
409
- */
410
- async function ommlToLatex(omml: string, Converter: any): Promise<string | null> {
411
- if (!Converter) return null;
412
-
413
- // Extract key elements from OMML and build approximate MathML
414
- // This is a simplified conversion - not all OMML features are supported
415
- try {
416
- // Build basic MathML from OMML structure
417
- const mathml = ommlToMathML(omml);
418
- if (!mathml) return null;
419
-
420
- // Convert MathML to LaTeX
421
- const latex = Converter.convert(mathml);
422
- return latex;
423
- } catch {
424
- return null;
425
- }
426
- }
427
-
428
- /**
429
- * Convert OMML to MathML (simplified)
430
- * Maps common OMML elements to MathML equivalents
431
- */
432
- function ommlToMathML(omml: string): string | null {
433
- // Remove namespace prefixes for easier parsing
434
- let xml = omml
435
- .replace(/<m:/g, '<')
436
- .replace(/<\/m:/g, '</')
437
- .replace(/<w:/g, '<w_')
438
- .replace(/<\/w:/g, '</w_');
439
-
440
- // Map OMML elements to MathML
441
- const mappings: Array<[RegExp, string]> = [
442
- [/<oMath[^>]*>/gi, '<math xmlns="http://www.w3.org/1998/Math/MathML">'],
443
- [/<\/oMath>/gi, '</math>'],
444
- [/<r>/gi, '<mi>'],
445
- [/<\/r>/gi, '</mi>'],
446
- [/<t>/gi, ''],
447
- [/<\/t>/gi, ''],
448
- [/<f>/gi, '<mfrac>'],
449
- [/<\/f>/gi, '</mfrac>'],
450
- [/<num>/gi, '<mrow>'],
451
- [/<\/num>/gi, '</mrow>'],
452
- [/<den>/gi, '<mrow>'],
453
- [/<\/den>/gi, '</mrow>'],
454
- [/<sup>/gi, '<msup><mrow>'],
455
- [/<\/sup>/gi, '</mrow></msup>'],
456
- [/<sub>/gi, '<msub><mrow>'],
457
- [/<\/sub>/gi, '</mrow></msub>'],
458
- [/<rad>/gi, '<msqrt>'],
459
- [/<\/rad>/gi, '</msqrt>'],
460
- [/<e>/gi, '<mrow>'],
461
- [/<\/e>/gi, '</mrow>'],
462
- // Remove elements we don't map
463
- [/<rPr>[\s\S]*?<\/rPr>/gi, ''],
464
- [/<ctrlPr>[\s\S]*?<\/ctrlPr>/gi, ''],
465
- [/<w_[^>]*>[\s\S]*?<\/w_[^>]*>/gi, ''],
466
- [/<[^>]*\/>/gi, ''], // Self-closing tags
467
- ];
468
-
469
- for (const [pattern, replacement] of mappings) {
470
- xml = xml.replace(pattern, replacement);
471
- }
472
-
473
- // Clean up any remaining unrecognized tags
474
- xml = xml.replace(/<[a-zA-Z][^>]*>/g, '').replace(/<\/[a-zA-Z]+>/g, '');
475
-
476
- // Wrap in math if not already
477
- if (!xml.includes('<math')) {
478
- xml = `<math xmlns="http://www.w3.org/1998/Math/MathML">${xml}</math>`;
479
- }
480
-
481
- return xml;
482
- }
483
-
484
- /**
485
- * Get equation summary from Word document
486
- */
487
- export async function getWordEquationStats(
488
- docxPath: string
489
- ): Promise<{ count: number; display: number; inline: number; converted: number; error?: string }> {
490
- const result = await extractEquationsFromWord(docxPath);
491
-
492
- if (!result.success) {
493
- return { count: 0, display: 0, inline: 0, converted: 0, error: result.error };
494
- }
495
-
496
- const display = result.equations.filter(e => e.type === 'display').length;
497
- const inline = result.equations.filter(e => e.type === 'inline').length;
498
- const converted = result.equations.filter(e => e.latex).length;
499
-
500
- return {
501
- count: result.equations.length,
502
- display,
503
- inline,
504
- converted,
505
- };
506
- }
1
+ /**
2
+ * Equation extraction and conversion utilities
3
+ * Handle LaTeX math in Markdown ↔ Word workflows
4
+ *
5
+ * Supports:
6
+ * - Extract LaTeX equations from Markdown
7
+ * - Extract equations from Word documents (OMML → LaTeX via Pandoc)
8
+ * - Convert Markdown with equations to Word (LaTeX → MathML)
9
+ */
10
+
11
+ import * as fs from 'fs';
12
+ import * as path from 'path';
13
+ import { exec } from 'child_process';
14
+ import { promisify } from 'util';
15
+ import AdmZip from 'adm-zip';
16
+ import { parseString } from 'xml2js';
17
+ import type { Equation, EquationStats, WordEquationResult } from './types.js';
18
+
19
+ const execAsync = promisify(exec);
20
+ const parseXml = promisify(parseString);
21
+
22
+ // Dynamic import for mathml-to-latex (ESM)
23
+ let MathMLToLaTeX: any = null;
24
+ async function getMathMLConverter(): Promise<any> {
25
+ if (!MathMLToLaTeX) {
26
+ try {
27
+ const module = await import('mathml-to-latex');
28
+ MathMLToLaTeX = module.MathMLToLaTeX;
29
+ } catch {
30
+ return null;
31
+ }
32
+ }
33
+ return MathMLToLaTeX;
34
+ }
35
+
36
+ /**
37
+ * Extract all equations from markdown text
38
+ */
39
+ export function extractEquations(text: string, file: string = ''): Equation[] {
40
+ const equations: Equation[] = [];
41
+ const lines = text.split('\n');
42
+
43
+ let inDisplayMath = false;
44
+ let displayMathStart = 0;
45
+ let displayMathContent = '';
46
+
47
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
48
+ const line = lines[lineNum];
49
+ if (!line) continue;
50
+
51
+ // Skip code blocks
52
+ if (line.trim().startsWith('```')) continue;
53
+
54
+ // Handle inline math ($...$) in a segment of text
55
+ // Careful not to match $$ or escaped \$
56
+ const inlinePattern = /(?<![\$\\])\$(?!\$)([^$\n]+)\$(?!\$)/g;
57
+ const extractInline = (segment: string): void => {
58
+ let match;
59
+ inlinePattern.lastIndex = 0;
60
+ while ((match = inlinePattern.exec(segment)) !== null) {
61
+ const content = match[1];
62
+ if (content) {
63
+ equations.push({
64
+ type: 'inline',
65
+ content: content.trim(),
66
+ line: lineNum + 1,
67
+ file,
68
+ });
69
+ }
70
+ }
71
+ };
72
+
73
+ // Handle display math blocks ($$...$$)
74
+ if (line.includes('$$')) {
75
+ const parts = line.split('$$');
76
+
77
+ if (!inDisplayMath && parts.length >= 3) {
78
+ // Single-line display math: $$content$$
79
+ // Also extract inline math from surrounding text
80
+ if (parts[0]) extractInline(parts[0]); // Text before $$
81
+ for (let i = 1; i < parts.length; i += 2) {
82
+ const part = parts[i];
83
+ if (part && part.trim()) {
84
+ equations.push({
85
+ type: 'display',
86
+ content: part.trim(),
87
+ line: lineNum + 1,
88
+ file,
89
+ });
90
+ }
91
+ }
92
+ // Extract inline from text after the last $$
93
+ const lastPart = parts[parts.length - 1];
94
+ if (parts.length % 2 === 1 && lastPart) {
95
+ extractInline(lastPart);
96
+ }
97
+ } else if (!inDisplayMath) {
98
+ // Start of multi-line display math
99
+ if (parts[0]) extractInline(parts[0]); // Text before $$
100
+ inDisplayMath = true;
101
+ displayMathStart = lineNum + 1;
102
+ displayMathContent = parts[1] || '';
103
+ } else {
104
+ // End of multi-line display math
105
+ inDisplayMath = false;
106
+ displayMathContent += '\n' + (parts[0] || '');
107
+ if (displayMathContent.trim()) {
108
+ equations.push({
109
+ type: 'display',
110
+ content: displayMathContent.trim(),
111
+ line: displayMathStart,
112
+ file,
113
+ });
114
+ }
115
+ displayMathContent = '';
116
+ // Text after $$ on closing line
117
+ const afterPart = parts[1];
118
+ if (afterPart) {
119
+ extractInline(afterPart);
120
+ }
121
+ }
122
+ continue;
123
+ }
124
+
125
+ if (inDisplayMath) {
126
+ displayMathContent += '\n' + line;
127
+ continue;
128
+ }
129
+
130
+ // No display math on this line - extract inline math
131
+ extractInline(line);
132
+ }
133
+
134
+ return equations;
135
+ }
136
+
137
+ /**
138
+ * Generate a markdown document with numbered equations
139
+ * Useful for creating an equation reference sheet
140
+ */
141
+ export function generateEquationSheet(equations: Equation[]): string {
142
+ const lines: string[] = [];
143
+ lines.push('# Equations');
144
+ lines.push('');
145
+
146
+ let displayNum = 0;
147
+ let inlineNum = 0;
148
+
149
+ // Group by file
150
+ const byFile = new Map<string, Equation[]>();
151
+ for (const eq of equations) {
152
+ if (!byFile.has(eq.file)) {
153
+ byFile.set(eq.file, []);
154
+ }
155
+ byFile.get(eq.file)!.push(eq);
156
+ }
157
+
158
+ for (const [file, fileEqs] of byFile) {
159
+ if (file) {
160
+ lines.push(`## ${file}`);
161
+ lines.push('');
162
+ }
163
+
164
+ for (const eq of fileEqs) {
165
+ if (eq.type === 'display') {
166
+ displayNum++;
167
+ lines.push(`### Equation ${displayNum} (line ${eq.line})`);
168
+ lines.push('');
169
+ lines.push('```latex');
170
+ lines.push(eq.content);
171
+ lines.push('```');
172
+ lines.push('');
173
+ lines.push('$$' + eq.content + '$$');
174
+ lines.push('');
175
+ } else {
176
+ inlineNum++;
177
+ lines.push(`- **Inline ${inlineNum}** (line ${eq.line}): \`$${eq.content}$\` → $${eq.content}$`);
178
+ }
179
+ }
180
+ lines.push('');
181
+ }
182
+
183
+ lines.push('---');
184
+ lines.push(`Total: ${displayNum} display equations, ${inlineNum} inline equations`);
185
+
186
+ return lines.join('\n');
187
+ }
188
+
189
+ interface ConvertToWordOptions {
190
+ preserveLatex?: boolean;
191
+ }
192
+
193
+ /**
194
+ * Convert markdown with equations to Word using pandoc
195
+ */
196
+ export async function convertToWord(
197
+ inputPath: string,
198
+ outputPath: string,
199
+ options: ConvertToWordOptions = {}
200
+ ): Promise<{ success: boolean; message: string }> {
201
+ const { preserveLatex = false } = options;
202
+
203
+ // Check pandoc is available
204
+ try {
205
+ await execAsync('pandoc --version');
206
+ } catch {
207
+ return { success: false, message: 'Pandoc not found. Install pandoc first.' };
208
+ }
209
+
210
+ // Build pandoc command
211
+ // Use --mathml for better equation rendering in Word
212
+ const args = [
213
+ 'pandoc',
214
+ `"${inputPath}"`,
215
+ '-o', `"${outputPath}"`,
216
+ '--mathml', // Better equation support in Word
217
+ ];
218
+
219
+ if (preserveLatex) {
220
+ // Keep raw LaTeX (less compatible but preserves source)
221
+ args.push('--wrap=preserve');
222
+ }
223
+
224
+ try {
225
+ await execAsync(args.join(' '));
226
+ return { success: true, message: `Created ${outputPath}` };
227
+ } catch (err: any) {
228
+ return { success: false, message: err.message };
229
+ }
230
+ }
231
+
232
+ /**
233
+ * Create a simple equations-only document
234
+ */
235
+ export async function createEquationsDoc(
236
+ inputPath: string,
237
+ outputPath: string
238
+ ): Promise<{ success: boolean; message: string; stats: { display: number; inline: number } | null }> {
239
+ if (!fs.existsSync(inputPath)) {
240
+ return { success: false, message: `File not found: ${inputPath}`, stats: null };
241
+ }
242
+
243
+ const text = fs.readFileSync(inputPath, 'utf-8');
244
+ const equations = extractEquations(text, path.basename(inputPath));
245
+
246
+ if (equations.length === 0) {
247
+ return { success: false, message: 'No equations found', stats: { display: 0, inline: 0 } };
248
+ }
249
+
250
+ const sheet = generateEquationSheet(equations);
251
+ const stats = {
252
+ display: equations.filter(e => e.type === 'display').length,
253
+ inline: equations.filter(e => e.type === 'inline').length,
254
+ };
255
+
256
+ const ext = path.extname(outputPath).toLowerCase();
257
+
258
+ if (ext === '.docx') {
259
+ // Write temp md, convert to docx
260
+ const tempMd = outputPath.replace('.docx', '.tmp.md');
261
+ fs.writeFileSync(tempMd, sheet, 'utf-8');
262
+ const result = await convertToWord(tempMd, outputPath);
263
+ fs.unlinkSync(tempMd);
264
+ return { ...result, stats };
265
+ } else {
266
+ // Write as markdown
267
+ fs.writeFileSync(outputPath, sheet, 'utf-8');
268
+ return { success: true, message: `Created ${outputPath}`, stats };
269
+ }
270
+ }
271
+
272
+ /**
273
+ * Get equation statistics for a file or directory
274
+ */
275
+ export function getEquationStats(files: string[]): EquationStats {
276
+ let totalDisplay = 0;
277
+ let totalInline = 0;
278
+ const byFile: Array<{ file: string; display: number; inline: number }> = [];
279
+
280
+ for (const file of files) {
281
+ if (!fs.existsSync(file)) continue;
282
+ const text = fs.readFileSync(file, 'utf-8');
283
+ const equations = extractEquations(text, path.basename(file));
284
+
285
+ const display = equations.filter(e => e.type === 'display').length;
286
+ const inline = equations.filter(e => e.type === 'inline').length;
287
+
288
+ totalDisplay += display;
289
+ totalInline += inline;
290
+
291
+ if (display > 0 || inline > 0) {
292
+ byFile.push({ file: path.basename(file), display, inline });
293
+ }
294
+ }
295
+
296
+ return {
297
+ total: totalDisplay + totalInline,
298
+ display: totalDisplay,
299
+ inline: totalInline,
300
+ byFile,
301
+ };
302
+ }
303
+
304
+ /**
305
+ * Extract equations from a Word document using Pandoc
306
+ * Converts OMML (Office Math Markup) to LaTeX
307
+ */
308
+ export async function extractEquationsFromWord(docxPath: string): Promise<WordEquationResult> {
309
+ if (!fs.existsSync(docxPath)) {
310
+ return { success: false, equations: [], error: `File not found: ${docxPath}` };
311
+ }
312
+
313
+ // Method 1: Use Pandoc to convert docx to markdown with LaTeX math
314
+ try {
315
+ const { stdout } = await execAsync(
316
+ `pandoc "${docxPath}" -t markdown --wrap=none`,
317
+ { maxBuffer: 50 * 1024 * 1024 }
318
+ );
319
+
320
+ // Extract equations from the markdown output
321
+ const equations = extractEquations(stdout, path.basename(docxPath));
322
+
323
+ return {
324
+ success: true,
325
+ equations: equations.map((eq, i) => ({
326
+ type: eq.type,
327
+ latex: eq.content,
328
+ position: i,
329
+ line: eq.line,
330
+ })),
331
+ };
332
+ } catch (err) {
333
+ // Pandoc failed, try fallback method
334
+ return extractEquationsFromWordDirect(docxPath);
335
+ }
336
+ }
337
+
338
+ /**
339
+ * Direct OMML extraction from Word document (fallback if Pandoc fails)
340
+ * Parses document.xml for <m:oMath> elements and attempts conversion
341
+ */
342
+ async function extractEquationsFromWordDirect(docxPath: string): Promise<WordEquationResult> {
343
+ try {
344
+ const zip = new AdmZip(docxPath);
345
+ const documentEntry = zip.getEntry('word/document.xml');
346
+
347
+ if (!documentEntry) {
348
+ return { success: false, equations: [], error: 'Invalid docx: no document.xml' };
349
+ }
350
+
351
+ const documentXml = zip.readAsText(documentEntry);
352
+
353
+ // Find all OMML equations (<m:oMath> or <m:oMathPara>)
354
+ const ommlPattern = /<m:oMath[^>]*>[\s\S]*?<\/m:oMath>/gi;
355
+ const matches = documentXml.match(ommlPattern) || [];
356
+
357
+ if (matches.length === 0) {
358
+ return { success: true, equations: [] };
359
+ }
360
+
361
+ // Try to convert OMML to LaTeX via MathML intermediate
362
+ const Converter = await getMathMLConverter();
363
+ const equations: WordEquationResult['equations'] = [];
364
+
365
+ for (let i = 0; i < matches.length; i++) {
366
+ const omml = matches[i];
367
+ if (!omml) continue;
368
+
369
+ // Attempt OMML → MathML → LaTeX conversion
370
+ // Note: This is a simplified approach; full OMML→MathML requires XSLT
371
+ try {
372
+ const latex = await ommlToLatex(omml, Converter);
373
+ if (latex) {
374
+ equations.push({
375
+ type: isDisplayMath(omml) ? 'display' : 'inline',
376
+ latex,
377
+ position: i,
378
+ raw: omml.substring(0, 100) + '...',
379
+ });
380
+ }
381
+ } catch {
382
+ // Keep raw OMML reference if conversion fails
383
+ equations.push({
384
+ type: 'unknown',
385
+ latex: null,
386
+ position: i,
387
+ raw: omml.substring(0, 100) + '...',
388
+ error: 'Conversion failed',
389
+ });
390
+ }
391
+ }
392
+
393
+ return { success: true, equations };
394
+ } catch (err: any) {
395
+ return { success: false, equations: [], error: err.message };
396
+ }
397
+ }
398
+
399
+ /**
400
+ * Check if OMML represents display math (equation on its own line)
401
+ */
402
+ function isDisplayMath(omml: string): boolean {
403
+ return omml.includes('<m:oMathPara') || omml.includes('m:jc');
404
+ }
405
+
406
+ /**
407
+ * Convert OMML to LaTeX (simplified approach)
408
+ * For complex equations, Pandoc method is more reliable
409
+ */
410
+ async function ommlToLatex(omml: string, Converter: any): Promise<string | null> {
411
+ if (!Converter) return null;
412
+
413
+ // Extract key elements from OMML and build approximate MathML
414
+ // This is a simplified conversion - not all OMML features are supported
415
+ try {
416
+ // Build basic MathML from OMML structure
417
+ const mathml = ommlToMathML(omml);
418
+ if (!mathml) return null;
419
+
420
+ // Convert MathML to LaTeX
421
+ const latex = Converter.convert(mathml);
422
+ return latex;
423
+ } catch {
424
+ return null;
425
+ }
426
+ }
427
+
428
+ /**
429
+ * Convert OMML to MathML (simplified)
430
+ * Maps common OMML elements to MathML equivalents
431
+ */
432
+ function ommlToMathML(omml: string): string | null {
433
+ // Remove namespace prefixes for easier parsing
434
+ let xml = omml
435
+ .replace(/<m:/g, '<')
436
+ .replace(/<\/m:/g, '</')
437
+ .replace(/<w:/g, '<w_')
438
+ .replace(/<\/w:/g, '</w_');
439
+
440
+ // Map OMML elements to MathML
441
+ const mappings: Array<[RegExp, string]> = [
442
+ [/<oMath[^>]*>/gi, '<math xmlns="http://www.w3.org/1998/Math/MathML">'],
443
+ [/<\/oMath>/gi, '</math>'],
444
+ [/<r>/gi, '<mi>'],
445
+ [/<\/r>/gi, '</mi>'],
446
+ [/<t>/gi, ''],
447
+ [/<\/t>/gi, ''],
448
+ [/<f>/gi, '<mfrac>'],
449
+ [/<\/f>/gi, '</mfrac>'],
450
+ [/<num>/gi, '<mrow>'],
451
+ [/<\/num>/gi, '</mrow>'],
452
+ [/<den>/gi, '<mrow>'],
453
+ [/<\/den>/gi, '</mrow>'],
454
+ [/<sup>/gi, '<msup><mrow>'],
455
+ [/<\/sup>/gi, '</mrow></msup>'],
456
+ [/<sub>/gi, '<msub><mrow>'],
457
+ [/<\/sub>/gi, '</mrow></msub>'],
458
+ [/<rad>/gi, '<msqrt>'],
459
+ [/<\/rad>/gi, '</msqrt>'],
460
+ [/<e>/gi, '<mrow>'],
461
+ [/<\/e>/gi, '</mrow>'],
462
+ // Remove elements we don't map
463
+ [/<rPr>[\s\S]*?<\/rPr>/gi, ''],
464
+ [/<ctrlPr>[\s\S]*?<\/ctrlPr>/gi, ''],
465
+ [/<w_[^>]*>[\s\S]*?<\/w_[^>]*>/gi, ''],
466
+ [/<[^>]*\/>/gi, ''], // Self-closing tags
467
+ ];
468
+
469
+ for (const [pattern, replacement] of mappings) {
470
+ xml = xml.replace(pattern, replacement);
471
+ }
472
+
473
+ // Clean up any remaining unrecognized tags
474
+ xml = xml.replace(/<[a-zA-Z][^>]*>/g, '').replace(/<\/[a-zA-Z]+>/g, '');
475
+
476
+ // Wrap in math if not already
477
+ if (!xml.includes('<math')) {
478
+ xml = `<math xmlns="http://www.w3.org/1998/Math/MathML">${xml}</math>`;
479
+ }
480
+
481
+ return xml;
482
+ }
483
+
484
+ /**
485
+ * Get equation summary from Word document
486
+ */
487
+ export async function getWordEquationStats(
488
+ docxPath: string
489
+ ): Promise<{ count: number; display: number; inline: number; converted: number; error?: string }> {
490
+ const result = await extractEquationsFromWord(docxPath);
491
+
492
+ if (!result.success) {
493
+ return { count: 0, display: 0, inline: 0, converted: 0, error: result.error };
494
+ }
495
+
496
+ const display = result.equations.filter(e => e.type === 'display').length;
497
+ const inline = result.equations.filter(e => e.type === 'inline').length;
498
+ const converted = result.equations.filter(e => e.latex).length;
499
+
500
+ return {
501
+ count: result.equations.length,
502
+ display,
503
+ inline,
504
+ converted,
505
+ };
506
+ }