docrev 0.9.18 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -149
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -406
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/build.d.ts +8 -0
  11. package/dist/lib/build.d.ts.map +1 -1
  12. package/dist/lib/build.js +62 -6
  13. package/dist/lib/build.js.map +1 -1
  14. package/dist/lib/commands/context.d.ts +1 -1
  15. package/dist/lib/commands/context.d.ts.map +1 -1
  16. package/dist/lib/commands/context.js +1 -1
  17. package/dist/lib/commands/context.js.map +1 -1
  18. package/dist/lib/commands/sections.js +7 -7
  19. package/dist/lib/commands/sections.js.map +1 -1
  20. package/dist/lib/commands/sync.d.ts.map +1 -1
  21. package/dist/lib/commands/sync.js +15 -14
  22. package/dist/lib/commands/sync.js.map +1 -1
  23. package/dist/lib/commands/utilities.js +164 -164
  24. package/dist/lib/commands/verify-anchors.js +6 -6
  25. package/dist/lib/commands/verify-anchors.js.map +1 -1
  26. package/dist/lib/commands/word-tools.js +8 -8
  27. package/dist/lib/grammar.js +3 -3
  28. package/dist/lib/macro-filter.lua +201 -0
  29. package/dist/lib/macros.d.ts +102 -0
  30. package/dist/lib/macros.d.ts.map +1 -0
  31. package/dist/lib/macros.js +218 -0
  32. package/dist/lib/macros.js.map +1 -0
  33. package/dist/lib/pdf-comments.js +44 -44
  34. package/dist/lib/plugins.js +57 -57
  35. package/dist/lib/pptx-color-filter.lua +37 -0
  36. package/dist/lib/pptx-themes.js +115 -115
  37. package/dist/lib/schema.d.ts.map +1 -1
  38. package/dist/lib/schema.js +34 -0
  39. package/dist/lib/schema.js.map +1 -1
  40. package/dist/lib/sections.d.ts +35 -0
  41. package/dist/lib/sections.d.ts.map +1 -1
  42. package/dist/lib/sections.js +81 -0
  43. package/dist/lib/sections.js.map +1 -1
  44. package/dist/lib/spelling.js +2 -2
  45. package/dist/lib/templates.js +387 -387
  46. package/dist/lib/themes.js +51 -51
  47. package/eslint.config.js +27 -27
  48. package/lib/anchor-match.ts +276 -276
  49. package/lib/annotations.ts +644 -644
  50. package/lib/build.ts +1766 -1694
  51. package/lib/citations.ts +160 -160
  52. package/lib/commands/build.ts +855 -855
  53. package/lib/commands/citations.ts +515 -515
  54. package/lib/commands/comments.ts +1050 -1050
  55. package/lib/commands/context.ts +176 -174
  56. package/lib/commands/core.ts +309 -309
  57. package/lib/commands/doi.ts +435 -435
  58. package/lib/commands/file-ops.ts +372 -372
  59. package/lib/commands/history.ts +320 -320
  60. package/lib/commands/index.ts +87 -87
  61. package/lib/commands/init.ts +259 -259
  62. package/lib/commands/merge-resolve.ts +378 -378
  63. package/lib/commands/preview.ts +178 -178
  64. package/lib/commands/project-info.ts +244 -244
  65. package/lib/commands/quality.ts +517 -517
  66. package/lib/commands/response.ts +454 -454
  67. package/lib/commands/section-boundaries.ts +82 -82
  68. package/lib/commands/sections.ts +451 -451
  69. package/lib/commands/sync.ts +709 -706
  70. package/lib/commands/text-ops.ts +449 -449
  71. package/lib/commands/utilities.ts +448 -448
  72. package/lib/commands/verify-anchors.ts +272 -272
  73. package/lib/commands/word-tools.ts +340 -340
  74. package/lib/comment-realign.ts +517 -517
  75. package/lib/config.ts +84 -84
  76. package/lib/crossref.ts +781 -781
  77. package/lib/csl.ts +191 -191
  78. package/lib/dependencies.ts +98 -98
  79. package/lib/diff-engine.ts +465 -465
  80. package/lib/doi-cache.ts +115 -115
  81. package/lib/doi.ts +897 -897
  82. package/lib/equations.ts +506 -506
  83. package/lib/errors.ts +346 -346
  84. package/lib/format.ts +541 -541
  85. package/lib/git.ts +326 -326
  86. package/lib/grammar.ts +303 -303
  87. package/lib/image-registry.ts +180 -180
  88. package/lib/import.ts +911 -911
  89. package/lib/journals.ts +543 -543
  90. package/lib/macro-filter.lua +201 -0
  91. package/lib/macros.ts +273 -0
  92. package/lib/merge.ts +633 -633
  93. package/lib/orcid.ts +144 -144
  94. package/lib/pdf-comments.ts +263 -263
  95. package/lib/pdf-import.ts +524 -524
  96. package/lib/plugins.ts +362 -362
  97. package/lib/postprocess.ts +188 -188
  98. package/lib/pptx-color-filter.lua +37 -37
  99. package/lib/pptx-template.ts +469 -469
  100. package/lib/pptx-themes.ts +483 -483
  101. package/lib/protect-restore.ts +520 -520
  102. package/lib/rate-limiter.ts +94 -94
  103. package/lib/response.ts +197 -197
  104. package/lib/restore-references.ts +240 -240
  105. package/lib/review.ts +327 -327
  106. package/lib/schema.ts +488 -454
  107. package/lib/scientific-words.ts +73 -73
  108. package/lib/sections.ts +425 -335
  109. package/lib/slides.ts +756 -756
  110. package/lib/spelling.ts +334 -334
  111. package/lib/templates.ts +526 -526
  112. package/lib/themes.ts +742 -742
  113. package/lib/trackchanges.ts +247 -247
  114. package/lib/tui.ts +450 -450
  115. package/lib/types.ts +550 -550
  116. package/lib/undo.ts +250 -250
  117. package/lib/utils.ts +69 -69
  118. package/lib/variables.ts +179 -179
  119. package/lib/word-extraction.ts +806 -806
  120. package/lib/word.ts +643 -643
  121. package/lib/wordcomments.ts +840 -840
  122. package/package.json +137 -137
  123. package/scripts/postbuild.js +47 -28
  124. package/skill/REFERENCE.md +539 -539
  125. package/skill/SKILL.md +295 -295
  126. package/tsconfig.json +26 -26
  127. package/types/index.d.ts +525 -525
  128. package/issues.md +0 -180
  129. package/site/assets/extra.css +0 -208
  130. package/site/commands.html +0 -926
  131. package/site/configuration.html +0 -469
  132. package/site/index.html +0 -288
  133. package/site/troubleshooting.html +0 -461
  134. package/site/workflow.html +0 -518
package/lib/equations.ts CHANGED
@@ -1,506 +1,506 @@
1
- /**
2
- * Equation extraction and conversion utilities
3
- * Handle LaTeX math in Markdown ↔ Word workflows
4
- *
5
- * Supports:
6
- * - Extract LaTeX equations from Markdown
7
- * - Extract equations from Word documents (OMML → LaTeX via Pandoc)
8
- * - Convert Markdown with equations to Word (LaTeX → MathML)
9
- */
10
-
11
- import * as fs from 'fs';
12
- import * as path from 'path';
13
- import { exec } from 'child_process';
14
- import { promisify } from 'util';
15
- import AdmZip from 'adm-zip';
16
- import { parseString } from 'xml2js';
17
- import type { Equation, EquationStats, WordEquationResult } from './types.js';
18
-
19
- const execAsync = promisify(exec);
20
- const parseXml = promisify(parseString);
21
-
22
- // Dynamic import for mathml-to-latex (ESM)
23
- let MathMLToLaTeX: any = null;
24
- async function getMathMLConverter(): Promise<any> {
25
- if (!MathMLToLaTeX) {
26
- try {
27
- const module = await import('mathml-to-latex');
28
- MathMLToLaTeX = module.MathMLToLaTeX;
29
- } catch {
30
- return null;
31
- }
32
- }
33
- return MathMLToLaTeX;
34
- }
35
-
36
- /**
37
- * Extract all equations from markdown text
38
- */
39
- export function extractEquations(text: string, file: string = ''): Equation[] {
40
- const equations: Equation[] = [];
41
- const lines = text.split('\n');
42
-
43
- let inDisplayMath = false;
44
- let displayMathStart = 0;
45
- let displayMathContent = '';
46
-
47
- for (let lineNum = 0; lineNum < lines.length; lineNum++) {
48
- const line = lines[lineNum];
49
- if (!line) continue;
50
-
51
- // Skip code blocks
52
- if (line.trim().startsWith('```')) continue;
53
-
54
- // Handle inline math ($...$) in a segment of text
55
- // Careful not to match $$ or escaped \$
56
- const inlinePattern = /(?<![\$\\])\$(?!\$)([^$\n]+)\$(?!\$)/g;
57
- const extractInline = (segment: string): void => {
58
- let match;
59
- inlinePattern.lastIndex = 0;
60
- while ((match = inlinePattern.exec(segment)) !== null) {
61
- const content = match[1];
62
- if (content) {
63
- equations.push({
64
- type: 'inline',
65
- content: content.trim(),
66
- line: lineNum + 1,
67
- file,
68
- });
69
- }
70
- }
71
- };
72
-
73
- // Handle display math blocks ($$...$$)
74
- if (line.includes('$$')) {
75
- const parts = line.split('$$');
76
-
77
- if (!inDisplayMath && parts.length >= 3) {
78
- // Single-line display math: $$content$$
79
- // Also extract inline math from surrounding text
80
- if (parts[0]) extractInline(parts[0]); // Text before $$
81
- for (let i = 1; i < parts.length; i += 2) {
82
- const part = parts[i];
83
- if (part && part.trim()) {
84
- equations.push({
85
- type: 'display',
86
- content: part.trim(),
87
- line: lineNum + 1,
88
- file,
89
- });
90
- }
91
- }
92
- // Extract inline from text after the last $$
93
- const lastPart = parts[parts.length - 1];
94
- if (parts.length % 2 === 1 && lastPart) {
95
- extractInline(lastPart);
96
- }
97
- } else if (!inDisplayMath) {
98
- // Start of multi-line display math
99
- if (parts[0]) extractInline(parts[0]); // Text before $$
100
- inDisplayMath = true;
101
- displayMathStart = lineNum + 1;
102
- displayMathContent = parts[1] || '';
103
- } else {
104
- // End of multi-line display math
105
- inDisplayMath = false;
106
- displayMathContent += '\n' + (parts[0] || '');
107
- if (displayMathContent.trim()) {
108
- equations.push({
109
- type: 'display',
110
- content: displayMathContent.trim(),
111
- line: displayMathStart,
112
- file,
113
- });
114
- }
115
- displayMathContent = '';
116
- // Text after $$ on closing line
117
- const afterPart = parts[1];
118
- if (afterPart) {
119
- extractInline(afterPart);
120
- }
121
- }
122
- continue;
123
- }
124
-
125
- if (inDisplayMath) {
126
- displayMathContent += '\n' + line;
127
- continue;
128
- }
129
-
130
- // No display math on this line - extract inline math
131
- extractInline(line);
132
- }
133
-
134
- return equations;
135
- }
136
-
137
- /**
138
- * Generate a markdown document with numbered equations
139
- * Useful for creating an equation reference sheet
140
- */
141
- export function generateEquationSheet(equations: Equation[]): string {
142
- const lines: string[] = [];
143
- lines.push('# Equations');
144
- lines.push('');
145
-
146
- let displayNum = 0;
147
- let inlineNum = 0;
148
-
149
- // Group by file
150
- const byFile = new Map<string, Equation[]>();
151
- for (const eq of equations) {
152
- if (!byFile.has(eq.file)) {
153
- byFile.set(eq.file, []);
154
- }
155
- byFile.get(eq.file)!.push(eq);
156
- }
157
-
158
- for (const [file, fileEqs] of byFile) {
159
- if (file) {
160
- lines.push(`## ${file}`);
161
- lines.push('');
162
- }
163
-
164
- for (const eq of fileEqs) {
165
- if (eq.type === 'display') {
166
- displayNum++;
167
- lines.push(`### Equation ${displayNum} (line ${eq.line})`);
168
- lines.push('');
169
- lines.push('```latex');
170
- lines.push(eq.content);
171
- lines.push('```');
172
- lines.push('');
173
- lines.push('$$' + eq.content + '$$');
174
- lines.push('');
175
- } else {
176
- inlineNum++;
177
- lines.push(`- **Inline ${inlineNum}** (line ${eq.line}): \`$${eq.content}$\` → $${eq.content}$`);
178
- }
179
- }
180
- lines.push('');
181
- }
182
-
183
- lines.push('---');
184
- lines.push(`Total: ${displayNum} display equations, ${inlineNum} inline equations`);
185
-
186
- return lines.join('\n');
187
- }
188
-
189
- interface ConvertToWordOptions {
190
- preserveLatex?: boolean;
191
- }
192
-
193
- /**
194
- * Convert markdown with equations to Word using pandoc
195
- */
196
- export async function convertToWord(
197
- inputPath: string,
198
- outputPath: string,
199
- options: ConvertToWordOptions = {}
200
- ): Promise<{ success: boolean; message: string }> {
201
- const { preserveLatex = false } = options;
202
-
203
- // Check pandoc is available
204
- try {
205
- await execAsync('pandoc --version');
206
- } catch {
207
- return { success: false, message: 'Pandoc not found. Install pandoc first.' };
208
- }
209
-
210
- // Build pandoc command
211
- // Use --mathml for better equation rendering in Word
212
- const args = [
213
- 'pandoc',
214
- `"${inputPath}"`,
215
- '-o', `"${outputPath}"`,
216
- '--mathml', // Better equation support in Word
217
- ];
218
-
219
- if (preserveLatex) {
220
- // Keep raw LaTeX (less compatible but preserves source)
221
- args.push('--wrap=preserve');
222
- }
223
-
224
- try {
225
- await execAsync(args.join(' '));
226
- return { success: true, message: `Created ${outputPath}` };
227
- } catch (err: any) {
228
- return { success: false, message: err.message };
229
- }
230
- }
231
-
232
- /**
233
- * Create a simple equations-only document
234
- */
235
- export async function createEquationsDoc(
236
- inputPath: string,
237
- outputPath: string
238
- ): Promise<{ success: boolean; message: string; stats: { display: number; inline: number } | null }> {
239
- if (!fs.existsSync(inputPath)) {
240
- return { success: false, message: `File not found: ${inputPath}`, stats: null };
241
- }
242
-
243
- const text = fs.readFileSync(inputPath, 'utf-8');
244
- const equations = extractEquations(text, path.basename(inputPath));
245
-
246
- if (equations.length === 0) {
247
- return { success: false, message: 'No equations found', stats: { display: 0, inline: 0 } };
248
- }
249
-
250
- const sheet = generateEquationSheet(equations);
251
- const stats = {
252
- display: equations.filter(e => e.type === 'display').length,
253
- inline: equations.filter(e => e.type === 'inline').length,
254
- };
255
-
256
- const ext = path.extname(outputPath).toLowerCase();
257
-
258
- if (ext === '.docx') {
259
- // Write temp md, convert to docx
260
- const tempMd = outputPath.replace('.docx', '.tmp.md');
261
- fs.writeFileSync(tempMd, sheet, 'utf-8');
262
- const result = await convertToWord(tempMd, outputPath);
263
- fs.unlinkSync(tempMd);
264
- return { ...result, stats };
265
- } else {
266
- // Write as markdown
267
- fs.writeFileSync(outputPath, sheet, 'utf-8');
268
- return { success: true, message: `Created ${outputPath}`, stats };
269
- }
270
- }
271
-
272
- /**
273
- * Get equation statistics for a file or directory
274
- */
275
- export function getEquationStats(files: string[]): EquationStats {
276
- let totalDisplay = 0;
277
- let totalInline = 0;
278
- const byFile: Array<{ file: string; display: number; inline: number }> = [];
279
-
280
- for (const file of files) {
281
- if (!fs.existsSync(file)) continue;
282
- const text = fs.readFileSync(file, 'utf-8');
283
- const equations = extractEquations(text, path.basename(file));
284
-
285
- const display = equations.filter(e => e.type === 'display').length;
286
- const inline = equations.filter(e => e.type === 'inline').length;
287
-
288
- totalDisplay += display;
289
- totalInline += inline;
290
-
291
- if (display > 0 || inline > 0) {
292
- byFile.push({ file: path.basename(file), display, inline });
293
- }
294
- }
295
-
296
- return {
297
- total: totalDisplay + totalInline,
298
- display: totalDisplay,
299
- inline: totalInline,
300
- byFile,
301
- };
302
- }
303
-
304
- /**
305
- * Extract equations from a Word document using Pandoc
306
- * Converts OMML (Office Math Markup) to LaTeX
307
- */
308
- export async function extractEquationsFromWord(docxPath: string): Promise<WordEquationResult> {
309
- if (!fs.existsSync(docxPath)) {
310
- return { success: false, equations: [], error: `File not found: ${docxPath}` };
311
- }
312
-
313
- // Method 1: Use Pandoc to convert docx to markdown with LaTeX math
314
- try {
315
- const { stdout } = await execAsync(
316
- `pandoc "${docxPath}" -t markdown --wrap=none`,
317
- { maxBuffer: 50 * 1024 * 1024 }
318
- );
319
-
320
- // Extract equations from the markdown output
321
- const equations = extractEquations(stdout, path.basename(docxPath));
322
-
323
- return {
324
- success: true,
325
- equations: equations.map((eq, i) => ({
326
- type: eq.type,
327
- latex: eq.content,
328
- position: i,
329
- line: eq.line,
330
- })),
331
- };
332
- } catch (err) {
333
- // Pandoc failed, try fallback method
334
- return extractEquationsFromWordDirect(docxPath);
335
- }
336
- }
337
-
338
- /**
339
- * Direct OMML extraction from Word document (fallback if Pandoc fails)
340
- * Parses document.xml for <m:oMath> elements and attempts conversion
341
- */
342
- async function extractEquationsFromWordDirect(docxPath: string): Promise<WordEquationResult> {
343
- try {
344
- const zip = new AdmZip(docxPath);
345
- const documentEntry = zip.getEntry('word/document.xml');
346
-
347
- if (!documentEntry) {
348
- return { success: false, equations: [], error: 'Invalid docx: no document.xml' };
349
- }
350
-
351
- const documentXml = zip.readAsText(documentEntry);
352
-
353
- // Find all OMML equations (<m:oMath> or <m:oMathPara>)
354
- const ommlPattern = /<m:oMath[^>]*>[\s\S]*?<\/m:oMath>/gi;
355
- const matches = documentXml.match(ommlPattern) || [];
356
-
357
- if (matches.length === 0) {
358
- return { success: true, equations: [] };
359
- }
360
-
361
- // Try to convert OMML to LaTeX via MathML intermediate
362
- const Converter = await getMathMLConverter();
363
- const equations: WordEquationResult['equations'] = [];
364
-
365
- for (let i = 0; i < matches.length; i++) {
366
- const omml = matches[i];
367
- if (!omml) continue;
368
-
369
- // Attempt OMML → MathML → LaTeX conversion
370
- // Note: This is a simplified approach; full OMML→MathML requires XSLT
371
- try {
372
- const latex = await ommlToLatex(omml, Converter);
373
- if (latex) {
374
- equations.push({
375
- type: isDisplayMath(omml) ? 'display' : 'inline',
376
- latex,
377
- position: i,
378
- raw: omml.substring(0, 100) + '...',
379
- });
380
- }
381
- } catch {
382
- // Keep raw OMML reference if conversion fails
383
- equations.push({
384
- type: 'unknown',
385
- latex: null,
386
- position: i,
387
- raw: omml.substring(0, 100) + '...',
388
- error: 'Conversion failed',
389
- });
390
- }
391
- }
392
-
393
- return { success: true, equations };
394
- } catch (err: any) {
395
- return { success: false, equations: [], error: err.message };
396
- }
397
- }
398
-
399
- /**
400
- * Check if OMML represents display math (equation on its own line)
401
- */
402
- function isDisplayMath(omml: string): boolean {
403
- return omml.includes('<m:oMathPara') || omml.includes('m:jc');
404
- }
405
-
406
- /**
407
- * Convert OMML to LaTeX (simplified approach)
408
- * For complex equations, Pandoc method is more reliable
409
- */
410
- async function ommlToLatex(omml: string, Converter: any): Promise<string | null> {
411
- if (!Converter) return null;
412
-
413
- // Extract key elements from OMML and build approximate MathML
414
- // This is a simplified conversion - not all OMML features are supported
415
- try {
416
- // Build basic MathML from OMML structure
417
- const mathml = ommlToMathML(omml);
418
- if (!mathml) return null;
419
-
420
- // Convert MathML to LaTeX
421
- const latex = Converter.convert(mathml);
422
- return latex;
423
- } catch {
424
- return null;
425
- }
426
- }
427
-
428
- /**
429
- * Convert OMML to MathML (simplified)
430
- * Maps common OMML elements to MathML equivalents
431
- */
432
- function ommlToMathML(omml: string): string | null {
433
- // Remove namespace prefixes for easier parsing
434
- let xml = omml
435
- .replace(/<m:/g, '<')
436
- .replace(/<\/m:/g, '</')
437
- .replace(/<w:/g, '<w_')
438
- .replace(/<\/w:/g, '</w_');
439
-
440
- // Map OMML elements to MathML
441
- const mappings: Array<[RegExp, string]> = [
442
- [/<oMath[^>]*>/gi, '<math xmlns="http://www.w3.org/1998/Math/MathML">'],
443
- [/<\/oMath>/gi, '</math>'],
444
- [/<r>/gi, '<mi>'],
445
- [/<\/r>/gi, '</mi>'],
446
- [/<t>/gi, ''],
447
- [/<\/t>/gi, ''],
448
- [/<f>/gi, '<mfrac>'],
449
- [/<\/f>/gi, '</mfrac>'],
450
- [/<num>/gi, '<mrow>'],
451
- [/<\/num>/gi, '</mrow>'],
452
- [/<den>/gi, '<mrow>'],
453
- [/<\/den>/gi, '</mrow>'],
454
- [/<sup>/gi, '<msup><mrow>'],
455
- [/<\/sup>/gi, '</mrow></msup>'],
456
- [/<sub>/gi, '<msub><mrow>'],
457
- [/<\/sub>/gi, '</mrow></msub>'],
458
- [/<rad>/gi, '<msqrt>'],
459
- [/<\/rad>/gi, '</msqrt>'],
460
- [/<e>/gi, '<mrow>'],
461
- [/<\/e>/gi, '</mrow>'],
462
- // Remove elements we don't map
463
- [/<rPr>[\s\S]*?<\/rPr>/gi, ''],
464
- [/<ctrlPr>[\s\S]*?<\/ctrlPr>/gi, ''],
465
- [/<w_[^>]*>[\s\S]*?<\/w_[^>]*>/gi, ''],
466
- [/<[^>]*\/>/gi, ''], // Self-closing tags
467
- ];
468
-
469
- for (const [pattern, replacement] of mappings) {
470
- xml = xml.replace(pattern, replacement);
471
- }
472
-
473
- // Clean up any remaining unrecognized tags
474
- xml = xml.replace(/<[a-zA-Z][^>]*>/g, '').replace(/<\/[a-zA-Z]+>/g, '');
475
-
476
- // Wrap in math if not already
477
- if (!xml.includes('<math')) {
478
- xml = `<math xmlns="http://www.w3.org/1998/Math/MathML">${xml}</math>`;
479
- }
480
-
481
- return xml;
482
- }
483
-
484
- /**
485
- * Get equation summary from Word document
486
- */
487
- export async function getWordEquationStats(
488
- docxPath: string
489
- ): Promise<{ count: number; display: number; inline: number; converted: number; error?: string }> {
490
- const result = await extractEquationsFromWord(docxPath);
491
-
492
- if (!result.success) {
493
- return { count: 0, display: 0, inline: 0, converted: 0, error: result.error };
494
- }
495
-
496
- const display = result.equations.filter(e => e.type === 'display').length;
497
- const inline = result.equations.filter(e => e.type === 'inline').length;
498
- const converted = result.equations.filter(e => e.latex).length;
499
-
500
- return {
501
- count: result.equations.length,
502
- display,
503
- inline,
504
- converted,
505
- };
506
- }
1
+ /**
2
+ * Equation extraction and conversion utilities
3
+ * Handle LaTeX math in Markdown ↔ Word workflows
4
+ *
5
+ * Supports:
6
+ * - Extract LaTeX equations from Markdown
7
+ * - Extract equations from Word documents (OMML → LaTeX via Pandoc)
8
+ * - Convert Markdown with equations to Word (LaTeX → MathML)
9
+ */
10
+
11
+ import * as fs from 'fs';
12
+ import * as path from 'path';
13
+ import { exec } from 'child_process';
14
+ import { promisify } from 'util';
15
+ import AdmZip from 'adm-zip';
16
+ import { parseString } from 'xml2js';
17
+ import type { Equation, EquationStats, WordEquationResult } from './types.js';
18
+
19
+ const execAsync = promisify(exec);
20
+ const parseXml = promisify(parseString);
21
+
22
+ // Dynamic import for mathml-to-latex (ESM)
23
+ let MathMLToLaTeX: any = null;
24
+ async function getMathMLConverter(): Promise<any> {
25
+ if (!MathMLToLaTeX) {
26
+ try {
27
+ const module = await import('mathml-to-latex');
28
+ MathMLToLaTeX = module.MathMLToLaTeX;
29
+ } catch {
30
+ return null;
31
+ }
32
+ }
33
+ return MathMLToLaTeX;
34
+ }
35
+
36
+ /**
37
+ * Extract all equations from markdown text
38
+ */
39
+ export function extractEquations(text: string, file: string = ''): Equation[] {
40
+ const equations: Equation[] = [];
41
+ const lines = text.split('\n');
42
+
43
+ let inDisplayMath = false;
44
+ let displayMathStart = 0;
45
+ let displayMathContent = '';
46
+
47
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
48
+ const line = lines[lineNum];
49
+ if (!line) continue;
50
+
51
+ // Skip code blocks
52
+ if (line.trim().startsWith('```')) continue;
53
+
54
+ // Handle inline math ($...$) in a segment of text
55
+ // Careful not to match $$ or escaped \$
56
+ const inlinePattern = /(?<![\$\\])\$(?!\$)([^$\n]+)\$(?!\$)/g;
57
+ const extractInline = (segment: string): void => {
58
+ let match;
59
+ inlinePattern.lastIndex = 0;
60
+ while ((match = inlinePattern.exec(segment)) !== null) {
61
+ const content = match[1];
62
+ if (content) {
63
+ equations.push({
64
+ type: 'inline',
65
+ content: content.trim(),
66
+ line: lineNum + 1,
67
+ file,
68
+ });
69
+ }
70
+ }
71
+ };
72
+
73
+ // Handle display math blocks ($$...$$)
74
+ if (line.includes('$$')) {
75
+ const parts = line.split('$$');
76
+
77
+ if (!inDisplayMath && parts.length >= 3) {
78
+ // Single-line display math: $$content$$
79
+ // Also extract inline math from surrounding text
80
+ if (parts[0]) extractInline(parts[0]); // Text before $$
81
+ for (let i = 1; i < parts.length; i += 2) {
82
+ const part = parts[i];
83
+ if (part && part.trim()) {
84
+ equations.push({
85
+ type: 'display',
86
+ content: part.trim(),
87
+ line: lineNum + 1,
88
+ file,
89
+ });
90
+ }
91
+ }
92
+ // Extract inline from text after the last $$
93
+ const lastPart = parts[parts.length - 1];
94
+ if (parts.length % 2 === 1 && lastPart) {
95
+ extractInline(lastPart);
96
+ }
97
+ } else if (!inDisplayMath) {
98
+ // Start of multi-line display math
99
+ if (parts[0]) extractInline(parts[0]); // Text before $$
100
+ inDisplayMath = true;
101
+ displayMathStart = lineNum + 1;
102
+ displayMathContent = parts[1] || '';
103
+ } else {
104
+ // End of multi-line display math
105
+ inDisplayMath = false;
106
+ displayMathContent += '\n' + (parts[0] || '');
107
+ if (displayMathContent.trim()) {
108
+ equations.push({
109
+ type: 'display',
110
+ content: displayMathContent.trim(),
111
+ line: displayMathStart,
112
+ file,
113
+ });
114
+ }
115
+ displayMathContent = '';
116
+ // Text after $$ on closing line
117
+ const afterPart = parts[1];
118
+ if (afterPart) {
119
+ extractInline(afterPart);
120
+ }
121
+ }
122
+ continue;
123
+ }
124
+
125
+ if (inDisplayMath) {
126
+ displayMathContent += '\n' + line;
127
+ continue;
128
+ }
129
+
130
+ // No display math on this line - extract inline math
131
+ extractInline(line);
132
+ }
133
+
134
+ return equations;
135
+ }
136
+
137
+ /**
138
+ * Generate a markdown document with numbered equations
139
+ * Useful for creating an equation reference sheet
140
+ */
141
+ export function generateEquationSheet(equations: Equation[]): string {
142
+ const lines: string[] = [];
143
+ lines.push('# Equations');
144
+ lines.push('');
145
+
146
+ let displayNum = 0;
147
+ let inlineNum = 0;
148
+
149
+ // Group by file
150
+ const byFile = new Map<string, Equation[]>();
151
+ for (const eq of equations) {
152
+ if (!byFile.has(eq.file)) {
153
+ byFile.set(eq.file, []);
154
+ }
155
+ byFile.get(eq.file)!.push(eq);
156
+ }
157
+
158
+ for (const [file, fileEqs] of byFile) {
159
+ if (file) {
160
+ lines.push(`## ${file}`);
161
+ lines.push('');
162
+ }
163
+
164
+ for (const eq of fileEqs) {
165
+ if (eq.type === 'display') {
166
+ displayNum++;
167
+ lines.push(`### Equation ${displayNum} (line ${eq.line})`);
168
+ lines.push('');
169
+ lines.push('```latex');
170
+ lines.push(eq.content);
171
+ lines.push('```');
172
+ lines.push('');
173
+ lines.push('$$' + eq.content + '$$');
174
+ lines.push('');
175
+ } else {
176
+ inlineNum++;
177
+ lines.push(`- **Inline ${inlineNum}** (line ${eq.line}): \`$${eq.content}$\` → $${eq.content}$`);
178
+ }
179
+ }
180
+ lines.push('');
181
+ }
182
+
183
+ lines.push('---');
184
+ lines.push(`Total: ${displayNum} display equations, ${inlineNum} inline equations`);
185
+
186
+ return lines.join('\n');
187
+ }
188
+
189
+ interface ConvertToWordOptions {
190
+ preserveLatex?: boolean;
191
+ }
192
+
193
+ /**
194
+ * Convert markdown with equations to Word using pandoc
195
+ */
196
+ export async function convertToWord(
197
+ inputPath: string,
198
+ outputPath: string,
199
+ options: ConvertToWordOptions = {}
200
+ ): Promise<{ success: boolean; message: string }> {
201
+ const { preserveLatex = false } = options;
202
+
203
+ // Check pandoc is available
204
+ try {
205
+ await execAsync('pandoc --version');
206
+ } catch {
207
+ return { success: false, message: 'Pandoc not found. Install pandoc first.' };
208
+ }
209
+
210
+ // Build pandoc command
211
+ // Use --mathml for better equation rendering in Word
212
+ const args = [
213
+ 'pandoc',
214
+ `"${inputPath}"`,
215
+ '-o', `"${outputPath}"`,
216
+ '--mathml', // Better equation support in Word
217
+ ];
218
+
219
+ if (preserveLatex) {
220
+ // Keep raw LaTeX (less compatible but preserves source)
221
+ args.push('--wrap=preserve');
222
+ }
223
+
224
+ try {
225
+ await execAsync(args.join(' '));
226
+ return { success: true, message: `Created ${outputPath}` };
227
+ } catch (err: any) {
228
+ return { success: false, message: err.message };
229
+ }
230
+ }
231
+
232
+ /**
233
+ * Create a simple equations-only document
234
+ */
235
+ export async function createEquationsDoc(
236
+ inputPath: string,
237
+ outputPath: string
238
+ ): Promise<{ success: boolean; message: string; stats: { display: number; inline: number } | null }> {
239
+ if (!fs.existsSync(inputPath)) {
240
+ return { success: false, message: `File not found: ${inputPath}`, stats: null };
241
+ }
242
+
243
+ const text = fs.readFileSync(inputPath, 'utf-8');
244
+ const equations = extractEquations(text, path.basename(inputPath));
245
+
246
+ if (equations.length === 0) {
247
+ return { success: false, message: 'No equations found', stats: { display: 0, inline: 0 } };
248
+ }
249
+
250
+ const sheet = generateEquationSheet(equations);
251
+ const stats = {
252
+ display: equations.filter(e => e.type === 'display').length,
253
+ inline: equations.filter(e => e.type === 'inline').length,
254
+ };
255
+
256
+ const ext = path.extname(outputPath).toLowerCase();
257
+
258
+ if (ext === '.docx') {
259
+ // Write temp md, convert to docx
260
+ const tempMd = outputPath.replace('.docx', '.tmp.md');
261
+ fs.writeFileSync(tempMd, sheet, 'utf-8');
262
+ const result = await convertToWord(tempMd, outputPath);
263
+ fs.unlinkSync(tempMd);
264
+ return { ...result, stats };
265
+ } else {
266
+ // Write as markdown
267
+ fs.writeFileSync(outputPath, sheet, 'utf-8');
268
+ return { success: true, message: `Created ${outputPath}`, stats };
269
+ }
270
+ }
271
+
272
+ /**
273
+ * Get equation statistics for a file or directory
274
+ */
275
+ export function getEquationStats(files: string[]): EquationStats {
276
+ let totalDisplay = 0;
277
+ let totalInline = 0;
278
+ const byFile: Array<{ file: string; display: number; inline: number }> = [];
279
+
280
+ for (const file of files) {
281
+ if (!fs.existsSync(file)) continue;
282
+ const text = fs.readFileSync(file, 'utf-8');
283
+ const equations = extractEquations(text, path.basename(file));
284
+
285
+ const display = equations.filter(e => e.type === 'display').length;
286
+ const inline = equations.filter(e => e.type === 'inline').length;
287
+
288
+ totalDisplay += display;
289
+ totalInline += inline;
290
+
291
+ if (display > 0 || inline > 0) {
292
+ byFile.push({ file: path.basename(file), display, inline });
293
+ }
294
+ }
295
+
296
+ return {
297
+ total: totalDisplay + totalInline,
298
+ display: totalDisplay,
299
+ inline: totalInline,
300
+ byFile,
301
+ };
302
+ }
303
+
304
+ /**
305
+ * Extract equations from a Word document using Pandoc
306
+ * Converts OMML (Office Math Markup) to LaTeX
307
+ */
308
+ export async function extractEquationsFromWord(docxPath: string): Promise<WordEquationResult> {
309
+ if (!fs.existsSync(docxPath)) {
310
+ return { success: false, equations: [], error: `File not found: ${docxPath}` };
311
+ }
312
+
313
+ // Method 1: Use Pandoc to convert docx to markdown with LaTeX math
314
+ try {
315
+ const { stdout } = await execAsync(
316
+ `pandoc "${docxPath}" -t markdown --wrap=none`,
317
+ { maxBuffer: 50 * 1024 * 1024 }
318
+ );
319
+
320
+ // Extract equations from the markdown output
321
+ const equations = extractEquations(stdout, path.basename(docxPath));
322
+
323
+ return {
324
+ success: true,
325
+ equations: equations.map((eq, i) => ({
326
+ type: eq.type,
327
+ latex: eq.content,
328
+ position: i,
329
+ line: eq.line,
330
+ })),
331
+ };
332
+ } catch (err) {
333
+ // Pandoc failed, try fallback method
334
+ return extractEquationsFromWordDirect(docxPath);
335
+ }
336
+ }
337
+
338
+ /**
339
+ * Direct OMML extraction from Word document (fallback if Pandoc fails)
340
+ * Parses document.xml for <m:oMath> elements and attempts conversion
341
+ */
342
+ async function extractEquationsFromWordDirect(docxPath: string): Promise<WordEquationResult> {
343
+ try {
344
+ const zip = new AdmZip(docxPath);
345
+ const documentEntry = zip.getEntry('word/document.xml');
346
+
347
+ if (!documentEntry) {
348
+ return { success: false, equations: [], error: 'Invalid docx: no document.xml' };
349
+ }
350
+
351
+ const documentXml = zip.readAsText(documentEntry);
352
+
353
+ // Find all OMML equations (<m:oMath> or <m:oMathPara>)
354
+ const ommlPattern = /<m:oMath[^>]*>[\s\S]*?<\/m:oMath>/gi;
355
+ const matches = documentXml.match(ommlPattern) || [];
356
+
357
+ if (matches.length === 0) {
358
+ return { success: true, equations: [] };
359
+ }
360
+
361
+ // Try to convert OMML to LaTeX via MathML intermediate
362
+ const Converter = await getMathMLConverter();
363
+ const equations: WordEquationResult['equations'] = [];
364
+
365
+ for (let i = 0; i < matches.length; i++) {
366
+ const omml = matches[i];
367
+ if (!omml) continue;
368
+
369
+ // Attempt OMML → MathML → LaTeX conversion
370
+ // Note: This is a simplified approach; full OMML→MathML requires XSLT
371
+ try {
372
+ const latex = await ommlToLatex(omml, Converter);
373
+ if (latex) {
374
+ equations.push({
375
+ type: isDisplayMath(omml) ? 'display' : 'inline',
376
+ latex,
377
+ position: i,
378
+ raw: omml.substring(0, 100) + '...',
379
+ });
380
+ }
381
+ } catch {
382
+ // Keep raw OMML reference if conversion fails
383
+ equations.push({
384
+ type: 'unknown',
385
+ latex: null,
386
+ position: i,
387
+ raw: omml.substring(0, 100) + '...',
388
+ error: 'Conversion failed',
389
+ });
390
+ }
391
+ }
392
+
393
+ return { success: true, equations };
394
+ } catch (err: any) {
395
+ return { success: false, equations: [], error: err.message };
396
+ }
397
+ }
398
+
399
+ /**
400
+ * Check if OMML represents display math (equation on its own line)
401
+ */
402
+ function isDisplayMath(omml: string): boolean {
403
+ return omml.includes('<m:oMathPara') || omml.includes('m:jc');
404
+ }
405
+
406
+ /**
407
+ * Convert OMML to LaTeX (simplified approach)
408
+ * For complex equations, Pandoc method is more reliable
409
+ */
410
+ async function ommlToLatex(omml: string, Converter: any): Promise<string | null> {
411
+ if (!Converter) return null;
412
+
413
+ // Extract key elements from OMML and build approximate MathML
414
+ // This is a simplified conversion - not all OMML features are supported
415
+ try {
416
+ // Build basic MathML from OMML structure
417
+ const mathml = ommlToMathML(omml);
418
+ if (!mathml) return null;
419
+
420
+ // Convert MathML to LaTeX
421
+ const latex = Converter.convert(mathml);
422
+ return latex;
423
+ } catch {
424
+ return null;
425
+ }
426
+ }
427
+
428
+ /**
429
+ * Convert OMML to MathML (simplified)
430
+ * Maps common OMML elements to MathML equivalents
431
+ */
432
+ function ommlToMathML(omml: string): string | null {
433
+ // Remove namespace prefixes for easier parsing
434
+ let xml = omml
435
+ .replace(/<m:/g, '<')
436
+ .replace(/<\/m:/g, '</')
437
+ .replace(/<w:/g, '<w_')
438
+ .replace(/<\/w:/g, '</w_');
439
+
440
+ // Map OMML elements to MathML
441
+ const mappings: Array<[RegExp, string]> = [
442
+ [/<oMath[^>]*>/gi, '<math xmlns="http://www.w3.org/1998/Math/MathML">'],
443
+ [/<\/oMath>/gi, '</math>'],
444
+ [/<r>/gi, '<mi>'],
445
+ [/<\/r>/gi, '</mi>'],
446
+ [/<t>/gi, ''],
447
+ [/<\/t>/gi, ''],
448
+ [/<f>/gi, '<mfrac>'],
449
+ [/<\/f>/gi, '</mfrac>'],
450
+ [/<num>/gi, '<mrow>'],
451
+ [/<\/num>/gi, '</mrow>'],
452
+ [/<den>/gi, '<mrow>'],
453
+ [/<\/den>/gi, '</mrow>'],
454
+ [/<sup>/gi, '<msup><mrow>'],
455
+ [/<\/sup>/gi, '</mrow></msup>'],
456
+ [/<sub>/gi, '<msub><mrow>'],
457
+ [/<\/sub>/gi, '</mrow></msub>'],
458
+ [/<rad>/gi, '<msqrt>'],
459
+ [/<\/rad>/gi, '</msqrt>'],
460
+ [/<e>/gi, '<mrow>'],
461
+ [/<\/e>/gi, '</mrow>'],
462
+ // Remove elements we don't map
463
+ [/<rPr>[\s\S]*?<\/rPr>/gi, ''],
464
+ [/<ctrlPr>[\s\S]*?<\/ctrlPr>/gi, ''],
465
+ [/<w_[^>]*>[\s\S]*?<\/w_[^>]*>/gi, ''],
466
+ [/<[^>]*\/>/gi, ''], // Self-closing tags
467
+ ];
468
+
469
+ for (const [pattern, replacement] of mappings) {
470
+ xml = xml.replace(pattern, replacement);
471
+ }
472
+
473
+ // Clean up any remaining unrecognized tags
474
+ xml = xml.replace(/<[a-zA-Z][^>]*>/g, '').replace(/<\/[a-zA-Z]+>/g, '');
475
+
476
+ // Wrap in math if not already
477
+ if (!xml.includes('<math')) {
478
+ xml = `<math xmlns="http://www.w3.org/1998/Math/MathML">${xml}</math>`;
479
+ }
480
+
481
+ return xml;
482
+ }
483
+
484
+ /**
485
+ * Get equation summary from Word document
486
+ */
487
+ export async function getWordEquationStats(
488
+ docxPath: string
489
+ ): Promise<{ count: number; display: number; inline: number; converted: number; error?: string }> {
490
+ const result = await extractEquationsFromWord(docxPath);
491
+
492
+ if (!result.success) {
493
+ return { count: 0, display: 0, inline: 0, converted: 0, error: result.error };
494
+ }
495
+
496
+ const display = result.equations.filter(e => e.type === 'display').length;
497
+ const inline = result.equations.filter(e => e.type === 'inline').length;
498
+ const converted = result.equations.filter(e => e.latex).length;
499
+
500
+ return {
501
+ count: result.equations.length,
502
+ display,
503
+ inline,
504
+ converted,
505
+ };
506
+ }