agent-reader 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,710 @@
1
+ import { promises as fs } from 'node:fs';
2
+ import os from 'node:os';
3
+ import path from 'node:path';
4
+ import { createRequire } from 'node:module';
5
+ import { fileURLToPath, pathToFileURL } from 'node:url';
6
+ import { execa } from 'execa';
7
+ import MarkdownIt from 'markdown-it';
8
+ import {
9
+ Document,
10
+ HeadingLevel,
11
+ Packer,
12
+ Paragraph,
13
+ ShadingType,
14
+ Table,
15
+ TableCell,
16
+ TableRow,
17
+ TextRun,
18
+ WidthType,
19
+ } from 'docx';
20
+
21
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
22
+ const require = createRequire(import.meta.url);
23
+ const REFERENCE_DOC_PATH = path.join(__dirname, 'templates', 'reference.docx');
24
+ const LUA_TABLE_FILTER = path.join(__dirname, 'templates', 'docx-table.lua');
25
+ const POSTPROCESS_SCRIPT = path.join(__dirname, '..', '..', 'scripts', 'postprocess-docx.py');
26
+
27
+ const markdownParser = new MarkdownIt({ html: false, linkify: true, typographer: true });
28
+
29
+ function paragraphFromText(text) {
30
+ return new Paragraph({
31
+ children: [new TextRun(text)],
32
+ });
33
+ }
34
+
35
+ function headingLevelFromNumber(level) {
36
+ switch (level) {
37
+ case 1:
38
+ return HeadingLevel.HEADING_1;
39
+ case 2:
40
+ return HeadingLevel.HEADING_2;
41
+ case 3:
42
+ return HeadingLevel.HEADING_3;
43
+ case 4:
44
+ return HeadingLevel.HEADING_4;
45
+ case 5:
46
+ return HeadingLevel.HEADING_5;
47
+ default:
48
+ return HeadingLevel.HEADING_6;
49
+ }
50
+ }
51
+
52
+ function inlineToPlainText(token) {
53
+ if (!token.children?.length) {
54
+ return token.content || '';
55
+ }
56
+
57
+ return token.children
58
+ .filter((child) =>
59
+ child.type === 'text'
60
+ || child.type === 'code_inline'
61
+ || child.type === 'link_open'
62
+ || child.type === 'link_close'
63
+ || child.type === 'image')
64
+ .map((child) => child.content || '')
65
+ .join('');
66
+ }
67
+
68
+ function collectTocEntries(tokens) {
69
+ const entries = [];
70
+ for (let i = 0; i < tokens.length; i += 1) {
71
+ const token = tokens[i];
72
+ if (token.type !== 'heading_open') {
73
+ continue;
74
+ }
75
+
76
+ const inlineToken = tokens[i + 1];
77
+ const text = inlineToPlainText(inlineToken).trim();
78
+ if (!text) {
79
+ continue;
80
+ }
81
+
82
+ entries.push({
83
+ level: Number(token.tag.slice(1)) || 1,
84
+ text,
85
+ });
86
+ }
87
+ return entries;
88
+ }
89
+
90
+ function buildDocxTocParagraphs(entries) {
91
+ if (!entries.length) {
92
+ return [];
93
+ }
94
+
95
+ const paragraphs = [
96
+ new Paragraph({
97
+ text: '目录',
98
+ heading: HeadingLevel.HEADING_1,
99
+ }),
100
+ ];
101
+
102
+ for (const entry of entries) {
103
+ const indent = ' '.repeat(Math.max(0, Math.min(5, entry.level - 1)));
104
+ paragraphs.push(paragraphFromText(`${indent}${entry.text}`));
105
+ }
106
+
107
+ paragraphs.push(paragraphFromText(''));
108
+ return paragraphs;
109
+ }
110
+
111
+ function parseTable(tokens, startIndex) {
112
+ const rows = [];
113
+ let row = [];
114
+ let currentCell = null;
115
+
116
+ for (let i = startIndex + 1; i < tokens.length; i += 1) {
117
+ const token = tokens[i];
118
+
119
+ if (token.type === 'table_close') {
120
+ return {
121
+ table: new Table({
122
+ width: { size: 100, type: WidthType.PERCENTAGE },
123
+ rows: rows.map(
124
+ (items) =>
125
+ new TableRow({
126
+ children: items.map((item) =>
127
+ new TableCell({
128
+ children: [paragraphFromText(item || '')],
129
+ }),
130
+ ),
131
+ }),
132
+ ),
133
+ }),
134
+ nextIndex: i,
135
+ };
136
+ }
137
+
138
+ if (token.type === 'tr_open') {
139
+ row = [];
140
+ continue;
141
+ }
142
+
143
+ if (token.type === 'tr_close') {
144
+ rows.push(row);
145
+ continue;
146
+ }
147
+
148
+ if (token.type === 'th_open' || token.type === 'td_open') {
149
+ currentCell = '';
150
+ continue;
151
+ }
152
+
153
+ if (token.type === 'inline' && currentCell !== null) {
154
+ currentCell += inlineToPlainText(token);
155
+ continue;
156
+ }
157
+
158
+ if (token.type === 'th_close' || token.type === 'td_close') {
159
+ row.push(currentCell || '');
160
+ currentCell = null;
161
+ }
162
+ }
163
+
164
+ return {
165
+ table: null,
166
+ nextIndex: startIndex,
167
+ };
168
+ }
169
+
170
+ function markdownToDocx(markdown) {
171
+ const tokens = markdownParser.parse(markdown, {});
172
+ const tocEntries = collectTocEntries(tokens);
173
+ const children = [];
174
+ const listStack = [];
175
+ let pendingBlock = null;
176
+
177
+ for (let i = 0; i < tokens.length; i += 1) {
178
+ const token = tokens[i];
179
+
180
+ if (token.type === 'heading_open') {
181
+ pendingBlock = { type: 'heading', level: Number(token.tag.slice(1)) || 1 };
182
+ continue;
183
+ }
184
+
185
+ if (token.type === 'paragraph_open') {
186
+ pendingBlock = { type: 'paragraph' };
187
+ continue;
188
+ }
189
+
190
+ if (token.type === 'bullet_list_open') {
191
+ listStack.push({ type: 'bullet', count: 0 });
192
+ continue;
193
+ }
194
+
195
+ if (token.type === 'ordered_list_open') {
196
+ listStack.push({ type: 'ordered', count: Number(token.attrGet('start')) || 1 });
197
+ continue;
198
+ }
199
+
200
+ if (token.type === 'bullet_list_close' || token.type === 'ordered_list_close') {
201
+ listStack.pop();
202
+ continue;
203
+ }
204
+
205
+ if (token.type === 'list_item_close') {
206
+ const top = listStack[listStack.length - 1];
207
+ if (top?.type === 'ordered') {
208
+ top.count += 1;
209
+ }
210
+ continue;
211
+ }
212
+
213
+ if (token.type === 'fence' || token.type === 'code_block') {
214
+ children.push(
215
+ new Paragraph({
216
+ children: [new TextRun({ text: token.content, font: 'Consolas' })],
217
+ shading: {
218
+ type: ShadingType.CLEAR,
219
+ color: 'auto',
220
+ fill: 'F4F4F5',
221
+ },
222
+ }),
223
+ );
224
+ continue;
225
+ }
226
+
227
+ if (token.type === 'table_open') {
228
+ const parsed = parseTable(tokens, i);
229
+ if (parsed.table) {
230
+ children.push(parsed.table);
231
+ }
232
+ i = parsed.nextIndex;
233
+ continue;
234
+ }
235
+
236
+ if (token.type !== 'inline') {
237
+ continue;
238
+ }
239
+
240
+ const text = inlineToPlainText(token).trim();
241
+ if (!text) {
242
+ continue;
243
+ }
244
+
245
+ if (pendingBlock?.type === 'heading') {
246
+ children.push(
247
+ new Paragraph({
248
+ text,
249
+ heading: headingLevelFromNumber(pendingBlock.level),
250
+ }),
251
+ );
252
+ pendingBlock = null;
253
+ continue;
254
+ }
255
+
256
+ if (listStack.length > 0) {
257
+ const top = listStack[listStack.length - 1];
258
+ const prefix = top.type === 'ordered' ? `${top.count}. ` : '• ';
259
+ children.push(paragraphFromText(`${prefix}${text}`));
260
+ continue;
261
+ }
262
+
263
+ children.push(paragraphFromText(text));
264
+ pendingBlock = null;
265
+ }
266
+
267
+ return new Document({
268
+ sections: [
269
+ {
270
+ children,
271
+ },
272
+ ],
273
+ });
274
+ }
275
+
276
+ /**
277
+ * Convert Markdown to HTML with smart table column widths.
278
+ * Uses markdown-it to render, then injects <colgroup> with proportional
279
+ * widths based on content length. Pandoc respects colgroup widths when
280
+ * converting HTML → DOCX, unlike pipe/grid table width hints.
281
+ */
282
+ function markdownToHtmlWithTableWidths(markdown) {
283
+ // First render to HTML normally
284
+ let html = markdownParser.render(markdown);
285
+
286
+ // Parse original markdown to measure table column content widths
287
+ const tokens = markdownParser.parse(markdown, {});
288
+ const tables = [];
289
+ let tableIdx = -1;
290
+
291
+ for (let i = 0; i < tokens.length; i += 1) {
292
+ const token = tokens[i];
293
+ if (token.type === 'table_open') {
294
+ tableIdx += 1;
295
+ tables[tableIdx] = { maxLen: [], ncols: 0 };
296
+ continue;
297
+ }
298
+ if (token.type === 'table_close') continue;
299
+ if (tableIdx < 0) continue;
300
+
301
+ if (token.type === 'inline') {
302
+ const text = inlineToPlainText(token).trim();
303
+ const tbl = tables[tableIdx];
304
+ // Track which column we're in
305
+ if (tbl._currentCol !== undefined) {
306
+ let w = 0;
307
+ for (const ch of text) {
308
+ const code = ch.codePointAt(0);
309
+ if (
310
+ (code >= 0x4E00 && code <= 0x9FFF)
311
+ || (code >= 0x3000 && code <= 0x303F)
312
+ || (code >= 0xFF00 && code <= 0xFFEF)
313
+ || (code >= 0x3400 && code <= 0x4DBF)
314
+ ) {
315
+ w += 2;
316
+ } else {
317
+ w += 1;
318
+ }
319
+ }
320
+ const ci = tbl._currentCol;
321
+ tbl.maxLen[ci] = Math.max(tbl.maxLen[ci] || 1, w);
322
+ }
323
+ }
324
+
325
+ if (token.type === 'th_open' || token.type === 'td_open') {
326
+ const tbl = tables[tableIdx];
327
+ if (tbl._currentCol === undefined) tbl._currentCol = 0;
328
+ else tbl._currentCol += 1;
329
+ continue;
330
+ }
331
+ if (token.type === 'tr_open') {
332
+ const tbl = tables[tableIdx];
333
+ tbl._currentCol = -1;
334
+ continue;
335
+ }
336
+ if (token.type === 'tr_close') {
337
+ const tbl = tables[tableIdx];
338
+ tbl.ncols = Math.max(tbl.ncols, (tbl._currentCol || 0) + 1);
339
+ continue;
340
+ }
341
+ }
342
+
343
+ // Now inject <colgroup> into each <table> in the HTML
344
+ let tIdx = 0;
345
+ html = html.replace(/<table>/g, () => {
346
+ const tbl = tables[tIdx];
347
+ tIdx += 1;
348
+ if (!tbl || tbl.ncols === 0) return '<table>';
349
+
350
+ // Smart proportional widths with first-column protection and max-width caps
351
+ const needed = [];
352
+ for (let c = 0; c < tbl.ncols; c += 1) {
353
+ const rawLen = tbl.maxLen[c] || 1;
354
+ // Penalize extremely long columns (>80 chars) to prevent them from dominating
355
+ // Use sqrt to compress their weight while still giving them more space
356
+ needed[c] = rawLen > 80 ? Math.sqrt(rawLen * 80) : rawLen;
357
+ }
358
+ const totalNeeded = needed.reduce((a, b) => a + b, 0);
359
+
360
+ // Raw proportional percentages
361
+ let pcts = needed.map((n) => Math.round((n / totalNeeded) * 100));
362
+
363
+ // First column protection: always at least 18% (common label/title column)
364
+ if (tbl.ncols >= 2 && pcts[0] < 18) {
365
+ pcts[0] = 18;
366
+ }
367
+
368
+ // Cap maximum: no column should exceed 65% (prevents one column from dominating)
369
+ for (let c = 0; c < tbl.ncols; c += 1) {
370
+ if (pcts[c] > 65) pcts[c] = 65;
371
+ }
372
+
373
+ // Apply smart minimums based on content length
374
+ for (let c = 0; c < tbl.ncols; c += 1) {
375
+ const len = tbl.maxLen[c] || 1;
376
+ const minPct = len <= 4 ? 6 : len <= 8 ? 10 : 12;
377
+ if (pcts[c] < minPct) pcts[c] = minPct;
378
+ }
379
+
380
+ // Normalize to 100%
381
+ const pctTotal = pcts.reduce((a, b) => a + b, 0);
382
+ if (pctTotal !== 100) {
383
+ // Distribute the difference proportionally to non-first columns
384
+ const diff = 100 - pctTotal;
385
+ const adjustableCols = tbl.ncols > 1 ? tbl.ncols - 1 : 1;
386
+ const perCol = Math.floor(diff / adjustableCols);
387
+ for (let c = 1; c < tbl.ncols; c += 1) {
388
+ pcts[c] += perCol;
389
+ }
390
+ // Put any remainder on the last column
391
+ pcts[pcts.length - 1] += diff - (perCol * adjustableCols);
392
+ }
393
+
394
+ // Use width attribute (not style) — Pandoc respects this more reliably
395
+ const colgroup = `<colgroup>${pcts.map((p) => `<col width="${p}%" />`).join('')}</colgroup>`;
396
+ return `<table>${colgroup}`;
397
+ });
398
+
399
+ return html;
400
+ }
401
+
402
+ export async function checkPandoc() {
403
+ try {
404
+ const result = await execa('pandoc', ['--version']);
405
+ const firstLine = result.stdout.split('\n')[0] || '';
406
+ return {
407
+ available: true,
408
+ version: firstLine,
409
+ };
410
+ } catch {
411
+ return {
412
+ available: false,
413
+ version: null,
414
+ };
415
+ }
416
+ }
417
+
418
+ export async function checkPuppeteer() {
419
+ try {
420
+ await import('puppeteer');
421
+ const packageJson = JSON.parse(await fs.readFile(require.resolve('puppeteer/package.json'), 'utf8'));
422
+ return {
423
+ available: true,
424
+ version: packageJson.version || null,
425
+ };
426
+ } catch {
427
+ return {
428
+ available: false,
429
+ version: null,
430
+ };
431
+ }
432
+ }
433
+
434
+ export async function exportPDF(html, options = {}) {
435
+ const {
436
+ pageSize = 'A4',
437
+ landscape = false,
438
+ outDir = os.tmpdir(),
439
+ fileName = 'output.pdf',
440
+ htmlPath,
441
+ } = options;
442
+
443
+ let puppeteer;
444
+ try {
445
+ const mod = await import('puppeteer');
446
+ puppeteer = mod.default || mod;
447
+ } catch {
448
+ throw new Error('Puppeteer is not installed. Install optional dependency: npm install puppeteer');
449
+ }
450
+
451
+ const warnings = [];
452
+ const pdfPath = path.join(path.resolve(outDir), fileName);
453
+
454
+ const browser = await puppeteer.launch({
455
+ headless: true,
456
+ args: landscape ? ['--allow-file-access-from-files'] : [],
457
+ });
458
+ try {
459
+ const page = await browser.newPage();
460
+
461
+ if (htmlPath) {
462
+ await page.goto(pathToFileURL(path.resolve(htmlPath)).toString(), {
463
+ waitUntil: 'networkidle0',
464
+ });
465
+ } else {
466
+ await page.setContent(html, {
467
+ waitUntil: 'networkidle0',
468
+ });
469
+ }
470
+
471
+ await page.addStyleTag({
472
+ content: `
473
+ @page {
474
+ size: ${pageSize}${landscape ? ' landscape' : ''};
475
+ margin: ${landscape ? '0' : '2cm 2.5cm'};
476
+ }
477
+
478
+ /* Hide sidebar and toolbar for PDF export */
479
+ #sidebar { display: none !important; }
480
+ .doc-toolbar { display: none !important; }
481
+ body {
482
+ display: block !important;
483
+ overflow: visible !important;
484
+ height: auto !important;
485
+ }
486
+ #main-content {
487
+ overflow: visible !important;
488
+ flex: none !important;
489
+ }
490
+ .markdown-body {
491
+ max-width: 100% !important;
492
+ padding: 40px 0 20px !important;
493
+ margin: 0 auto !important;
494
+ }
495
+
496
+ thead { display: table-header-group; }
497
+ h1, h2, h3, h4, h5, h6 { break-after: avoid; }
498
+ pre { break-inside: auto; white-space: pre-wrap; word-break: break-word; }
499
+ table { break-inside: auto; }
500
+ tr { break-inside: avoid; }
501
+ img { break-inside: avoid; max-width: 100%; }
502
+ blockquote { break-inside: avoid; }`,
503
+ });
504
+
505
+ if (landscape) {
506
+ await page.evaluate(() => {
507
+ const MAX_W = 1920;
508
+ const QUALITY = 0.82;
509
+ const imgs = document.querySelectorAll('img');
510
+ const promises = Array.from(imgs).map((img) => new Promise((resolve) => {
511
+ if (!img.naturalWidth) { resolve(); return; }
512
+ const scale = img.naturalWidth > MAX_W ? MAX_W / img.naturalWidth : 1;
513
+ const w = Math.round(img.naturalWidth * scale);
514
+ const h = Math.round(img.naturalHeight * scale);
515
+ const canvas = document.createElement('canvas');
516
+ canvas.width = w;
517
+ canvas.height = h;
518
+ const ctx = canvas.getContext('2d');
519
+ ctx.drawImage(img, 0, 0, w, h);
520
+ img.src = canvas.toDataURL('image/jpeg', QUALITY);
521
+ img.onload = resolve;
522
+ img.onerror = resolve;
523
+ }));
524
+ return Promise.all(promises);
525
+ });
526
+ }
527
+
528
+ await page.pdf({
529
+ path: pdfPath,
530
+ format: pageSize,
531
+ landscape,
532
+ printBackground: true,
533
+ preferCSSPageSize: true,
534
+ outline: true,
535
+ tagged: true,
536
+ });
537
+ } finally {
538
+ await browser.close();
539
+ }
540
+
541
+ const stat = await fs.stat(pdfPath);
542
+ return {
543
+ pdfPath,
544
+ size: stat.size,
545
+ warnings,
546
+ };
547
+ }
548
+
549
+ export async function exportDOCX(markdownString, options = {}) {
550
+ const {
551
+ baseDir,
552
+ outDir = os.tmpdir(),
553
+ fileName = 'output.docx',
554
+ } = options;
555
+
556
+ const targetDir = path.resolve(outDir);
557
+ await fs.mkdir(targetDir, { recursive: true });
558
+ const docxPath = path.join(targetDir, fileName);
559
+ const warnings = [];
560
+
561
+ const pandoc = await checkPandoc();
562
+ if (pandoc.available) {
563
+ // Convert MD → HTML with smart colgroup widths, then HTML → DOCX via Pandoc
564
+ // Pandoc respects <colgroup> width percentages in HTML input
565
+ const htmlContent = markdownToHtmlWithTableWidths(markdownString);
566
+ const tempInput = path.join(targetDir, `.temp-${Date.now()}.html`);
567
+ await fs.writeFile(tempInput, htmlContent, 'utf8');
568
+
569
+ const args = [
570
+ tempInput,
571
+ '-f',
572
+ 'html',
573
+ '-o',
574
+ docxPath,
575
+ `--reference-doc=${REFERENCE_DOC_PATH}`,
576
+ ];
577
+
578
+ if (baseDir) {
579
+ args.push(`--resource-path=${path.resolve(baseDir)}`);
580
+ }
581
+
582
+ try {
583
+ await execa('pandoc', args);
584
+ // Post-process: style tables (header bg, bold, centered)
585
+ await execa('python3', [POSTPROCESS_SCRIPT, docxPath]).catch(() => {});
586
+ } finally {
587
+ await fs.rm(tempInput, { force: true }).catch(() => {});
588
+ }
589
+ } else {
590
+ const warning = 'Pandoc not found, using docx fallback. Install Pandoc for better results.';
591
+ warnings.push(warning);
592
+
593
+ const document = markdownToDocx(markdownString);
594
+ const buffer = await Packer.toBuffer(document);
595
+ await fs.writeFile(docxPath, buffer);
596
+ }
597
+
598
+ const stat = await fs.stat(docxPath);
599
+ return {
600
+ docxPath,
601
+ size: stat.size,
602
+ warnings,
603
+ };
604
+ }
605
+
606
+ export async function exportDOCXFromHTML(htmlString, options = {}) {
607
+ const {
608
+ htmlPath,
609
+ baseDir,
610
+ outDir = os.tmpdir(),
611
+ fileName = 'output.docx',
612
+ } = options;
613
+
614
+ const targetDir = path.resolve(outDir);
615
+ await fs.mkdir(targetDir, { recursive: true });
616
+
617
+ const docxPath = path.join(targetDir, fileName);
618
+ const warnings = [];
619
+
620
+ // Read the full HTML (from file or string)
621
+ let fullHtml = htmlString || '';
622
+ if (htmlPath) {
623
+ fullHtml = await fs.readFile(path.resolve(htmlPath), 'utf8');
624
+ }
625
+
626
+ // Strip UI elements (sidebar, toolbar) — only keep .markdown-body content
627
+ // This prevents navigation/TOC UI from leaking into the Word document
628
+ const bodyMatch = fullHtml.match(/<div[^>]*class="markdown-body"[^>]*>([\s\S]*?)<\/div>\s*<\/div>\s*(?:<script|$)/i);
629
+ if (bodyMatch) {
630
+ fullHtml = bodyMatch[1];
631
+ } else {
632
+ // Try a simpler match: extract content between markdown-body div
633
+ const startIdx = fullHtml.indexOf('class="markdown-body"');
634
+ if (startIdx !== -1) {
635
+ const tagEnd = fullHtml.indexOf('>', startIdx);
636
+ if (tagEnd !== -1) {
637
+ // Find the content after the opening tag, strip scripts/style at the end
638
+ let content = fullHtml.substring(tagEnd + 1);
639
+ // Remove trailing scripts and closing divs
640
+ content = content.replace(/<script[\s\S]*$/i, '');
641
+ // Remove sidebar and toolbar remnants
642
+ content = content.replace(/<div[^>]*id="sidebar"[\s\S]*?<\/div>\s*<\/div>/gi, '');
643
+ content = content.replace(/<div[^>]*class="doc-toolbar"[\s\S]*?<\/div>\s*<\/div>/gi, '');
644
+ fullHtml = content;
645
+ }
646
+ }
647
+ }
648
+
649
+ // Also strip any remaining UI elements by tag/class
650
+ fullHtml = fullHtml.replace(/<nav[\s\S]*?<\/nav>/gi, '');
651
+ fullHtml = fullHtml.replace(/<div[^>]*id="sidebar"[\s\S]*?<\/div>/gi, '');
652
+ fullHtml = fullHtml.replace(/<div[^>]*class="doc-toolbar"[\s\S]*?<\/div>/gi, '');
653
+
654
+ const pandoc = await checkPandoc();
655
+ if (pandoc.available) {
656
+ const tempInput = path.join(targetDir, `.temp-${Date.now()}.html`);
657
+ await fs.writeFile(tempInput, fullHtml, 'utf8');
658
+
659
+ const args = [
660
+ tempInput,
661
+ '-f',
662
+ 'html',
663
+ '-o',
664
+ docxPath,
665
+ `--reference-doc=${REFERENCE_DOC_PATH}`,
666
+ `--lua-filter=${LUA_TABLE_FILTER}`,
667
+ ];
668
+
669
+ if (baseDir) {
670
+ args.push(`--resource-path=${path.resolve(baseDir)}`);
671
+ }
672
+
673
+ try {
674
+ await execa('pandoc', args);
675
+ await execa('python3', [POSTPROCESS_SCRIPT, docxPath]).catch(() => {});
676
+ } finally {
677
+ await fs.rm(tempInput, { force: true }).catch(() => {});
678
+ }
679
+ } else {
680
+ warnings.push('Pandoc not found, using text-only DOCX fallback.');
681
+
682
+ const plainText = String(htmlString || '')
683
+ .replace(/<style[\s\S]*?<\/style>/gi, ' ')
684
+ .replace(/<script[\s\S]*?<\/script>/gi, ' ')
685
+ .replace(/<[^>]+>/g, ' ')
686
+ .replaceAll('&nbsp;', ' ')
687
+ .replaceAll('&amp;', '&')
688
+ .replaceAll('&lt;', '<')
689
+ .replaceAll('&gt;', '>')
690
+ .replace(/\s+/g, ' ')
691
+ .trim();
692
+
693
+ const document = new Document({
694
+ sections: [
695
+ {
696
+ children: [paragraphFromText(plainText || ' ')],
697
+ },
698
+ ],
699
+ });
700
+ const buffer = await Packer.toBuffer(document);
701
+ await fs.writeFile(docxPath, buffer);
702
+ }
703
+
704
+ const stat = await fs.stat(docxPath);
705
+ return {
706
+ docxPath,
707
+ size: stat.size,
708
+ warnings,
709
+ };
710
+ }