@wonderwhy-er/desktop-commander 0.2.33 → 0.2.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/dist/remote-device/scripts/blocking-offline-update.js +64 -0
  2. package/dist/tools/docx/builders/html-builder.d.ts +17 -0
  3. package/dist/tools/docx/builders/html-builder.js +92 -0
  4. package/dist/tools/docx/builders/index.d.ts +5 -0
  5. package/dist/tools/docx/builders/index.js +5 -0
  6. package/dist/tools/docx/builders/markdown-builder.d.ts +2 -0
  7. package/dist/tools/docx/builders/markdown-builder.js +260 -0
  8. package/dist/tools/docx/constants.d.ts +36 -0
  9. package/dist/tools/docx/constants.js +57 -0
  10. package/dist/tools/docx/converters/markdown-to-html.d.ts +17 -0
  11. package/dist/tools/docx/converters/markdown-to-html.js +111 -0
  12. package/dist/tools/docx/errors.d.ts +28 -0
  13. package/dist/tools/docx/errors.js +48 -0
  14. package/dist/tools/docx/extractors/images.d.ts +14 -0
  15. package/dist/tools/docx/extractors/images.js +40 -0
  16. package/dist/tools/docx/extractors/metadata.d.ts +14 -0
  17. package/dist/tools/docx/extractors/metadata.js +64 -0
  18. package/dist/tools/docx/extractors/sections.d.ts +14 -0
  19. package/dist/tools/docx/extractors/sections.js +61 -0
  20. package/dist/tools/docx/html.d.ts +17 -0
  21. package/dist/tools/docx/html.js +111 -0
  22. package/dist/tools/docx/index.d.ts +14 -0
  23. package/dist/tools/docx/index.js +16 -0
  24. package/dist/tools/docx/markdown.d.ts +84 -0
  25. package/dist/tools/docx/markdown.js +507 -0
  26. package/dist/tools/docx/operations/handlers/index.d.ts +39 -0
  27. package/dist/tools/docx/operations/handlers/index.js +152 -0
  28. package/dist/tools/docx/operations/html-manipulator.d.ts +24 -0
  29. package/dist/tools/docx/operations/html-manipulator.js +352 -0
  30. package/dist/tools/docx/operations/index.d.ts +14 -0
  31. package/dist/tools/docx/operations/index.js +61 -0
  32. package/dist/tools/docx/operations/operation-handlers.d.ts +3 -0
  33. package/dist/tools/docx/operations/operation-handlers.js +67 -0
  34. package/dist/tools/docx/operations/preprocessor.d.ts +14 -0
  35. package/dist/tools/docx/operations/preprocessor.js +44 -0
  36. package/dist/tools/docx/operations/xml-replacer.d.ts +9 -0
  37. package/dist/tools/docx/operations/xml-replacer.js +35 -0
  38. package/dist/tools/docx/operations.d.ts +13 -0
  39. package/dist/tools/docx/operations.js +13 -0
  40. package/dist/tools/docx/parsers/image-extractor.d.ts +18 -0
  41. package/dist/tools/docx/parsers/image-extractor.js +61 -0
  42. package/dist/tools/docx/parsers/index.d.ts +9 -0
  43. package/dist/tools/docx/parsers/index.js +9 -0
  44. package/dist/tools/docx/parsers/paragraph-parser.d.ts +2 -0
  45. package/dist/tools/docx/parsers/paragraph-parser.js +88 -0
  46. package/dist/tools/docx/parsers/table-parser.d.ts +9 -0
  47. package/dist/tools/docx/parsers/table-parser.js +72 -0
  48. package/dist/tools/docx/parsers/xml-parser.d.ts +25 -0
  49. package/dist/tools/docx/parsers/xml-parser.js +71 -0
  50. package/dist/tools/docx/parsers/zip-reader.d.ts +23 -0
  51. package/dist/tools/docx/parsers/zip-reader.js +52 -0
  52. package/dist/tools/docx/structure.d.ts +25 -0
  53. package/dist/tools/docx/structure.js +102 -0
  54. package/dist/tools/docx/styled-html-parser.d.ts +23 -0
  55. package/dist/tools/docx/styled-html-parser.js +1262 -0
  56. package/dist/tools/docx/types.d.ts +114 -0
  57. package/dist/tools/docx/types.js +8 -0
  58. package/dist/tools/docx/utils/escaping.d.ts +13 -0
  59. package/dist/tools/docx/utils/escaping.js +26 -0
  60. package/dist/tools/docx/utils/images.d.ts +9 -0
  61. package/dist/tools/docx/utils/images.js +26 -0
  62. package/dist/tools/docx/utils/index.d.ts +12 -0
  63. package/dist/tools/docx/utils/index.js +17 -0
  64. package/dist/tools/docx/utils/markdown.d.ts +13 -0
  65. package/dist/tools/docx/utils/markdown.js +32 -0
  66. package/dist/tools/docx/utils/paths.d.ts +15 -0
  67. package/dist/tools/docx/utils/paths.js +27 -0
  68. package/dist/tools/docx/utils/versioning.d.ts +25 -0
  69. package/dist/tools/docx/utils/versioning.js +55 -0
  70. package/dist/tools/docx/utils.d.ts +101 -0
  71. package/dist/tools/docx/utils.js +299 -0
  72. package/dist/tools/docx/validators.d.ts +13 -0
  73. package/dist/tools/docx/validators.js +40 -0
  74. package/dist/utils/capture.js +4 -4
  75. package/dist/utils/files/docx.d.ts +41 -0
  76. package/dist/utils/files/docx.js +245 -0
  77. package/dist/version.d.ts +1 -1
  78. package/dist/version.js +1 -1
  79. package/package.json +2 -2
@@ -0,0 +1,507 @@
1
+ /**
2
+ * DOCX to Markdown Conversion
3
+ * Uses Docxtemplater + XML parsing for reading Word documents
4
+ */
5
+ import fs from 'fs/promises';
6
+ import path from 'path';
7
+ import { createRequire } from 'module';
8
+ const require = createRequire(import.meta.url);
9
+ const PizZip = require('pizzip');
10
+ const Docxtemplater = require('docxtemplater');
11
+ const { DOMParser } = require('@xmldom/xmldom');
12
+ /**
13
+ * Check if source is a URL
14
+ */
15
+ const isUrl = (source) => source.startsWith('http://') || source.startsWith('https://');
16
+ /**
17
+ * Load DOCX file as buffer
18
+ */
19
+ async function loadDocxToBuffer(source) {
20
+ if (isUrl(source)) {
21
+ const response = await fetch(source);
22
+ const arrayBuffer = await response.arrayBuffer();
23
+ return Buffer.from(arrayBuffer);
24
+ }
25
+ else {
26
+ return await fs.readFile(source);
27
+ }
28
+ }
29
+ function readZipFileText(zip, filePath) {
30
+ const file = zip.file(filePath);
31
+ if (!file)
32
+ return null;
33
+ if (typeof file.asText === 'function') {
34
+ return file.asText();
35
+ }
36
+ if (typeof file.asBinary === 'function') {
37
+ return Buffer.from(file.asBinary(), 'binary').toString('utf8');
38
+ }
39
+ return null;
40
+ }
41
+ function readZipFileBuffer(zip, filePath) {
42
+ const file = zip.file(filePath);
43
+ if (!file)
44
+ return null;
45
+ if (typeof file.asUint8Array === 'function') {
46
+ return Buffer.from(file.asUint8Array());
47
+ }
48
+ if (typeof file.asNodeBuffer === 'function') {
49
+ return file.asNodeBuffer();
50
+ }
51
+ if (typeof file.asBinary === 'function') {
52
+ return Buffer.from(file.asBinary(), 'binary');
53
+ }
54
+ return null;
55
+ }
56
+ function getMimeTypeForTarget(target) {
57
+ const ext = path.extname(target).toLowerCase();
58
+ const mimeTypes = {
59
+ '.png': 'image/png',
60
+ '.jpg': 'image/jpeg',
61
+ '.jpeg': 'image/jpeg',
62
+ '.gif': 'image/gif',
63
+ '.bmp': 'image/bmp',
64
+ '.webp': 'image/webp',
65
+ '.svg': 'image/svg+xml',
66
+ };
67
+ return mimeTypes[ext] || 'application/octet-stream';
68
+ }
69
+ function escapeTableCell(text) {
70
+ return text.replace(/\|/g, '\\|').replace(/\r?\n/g, '<br>');
71
+ }
72
+ function getElementChildren(node) {
73
+ const children = [];
74
+ for (let i = 0; i < node.childNodes.length; i++) {
75
+ const child = node.childNodes[i];
76
+ if (child.nodeType === 1) {
77
+ children.push(child);
78
+ }
79
+ }
80
+ return children;
81
+ }
82
+ function getAttributeValue(node, name) {
83
+ return node.getAttribute(name) || node.getAttribute(`w:${name}`) || null;
84
+ }
85
+ function getHeadingLevelFromParagraph(paragraph) {
86
+ const pPr = paragraph.getElementsByTagName('w:pPr')[0];
87
+ if (!pPr)
88
+ return null;
89
+ const pStyle = pPr.getElementsByTagName('w:pStyle')[0];
90
+ if (!pStyle)
91
+ return null;
92
+ const styleVal = getAttributeValue(pStyle, 'val');
93
+ if (!styleVal)
94
+ return null;
95
+ const match = styleVal.match(/heading\s*([1-6])/i);
96
+ if (!match)
97
+ return null;
98
+ return Number(match[1]);
99
+ }
100
+ function extractRelationshipMap(relsXml) {
101
+ const relMap = new Map();
102
+ if (!relsXml)
103
+ return relMap;
104
+ const relDoc = new DOMParser().parseFromString(relsXml, 'application/xml');
105
+ const rels = relDoc.getElementsByTagName('Relationship');
106
+ for (let i = 0; i < rels.length; i++) {
107
+ const rel = rels[i];
108
+ const id = rel.getAttribute('Id');
109
+ const type = rel.getAttribute('Type') || '';
110
+ const target = rel.getAttribute('Target') || '';
111
+ if (id && target) {
112
+ relMap.set(id, { target, type });
113
+ }
114
+ }
115
+ return relMap;
116
+ }
117
+ function buildImageResolver(zip, relMap, images, includeImages) {
118
+ const cache = new Map();
119
+ return (relId) => {
120
+ if (!includeImages || !relId)
121
+ return '';
122
+ const rel = relMap.get(relId);
123
+ if (!rel || !rel.type.includes('/image'))
124
+ return '';
125
+ if (cache.has(relId)) {
126
+ const cached = cache.get(relId);
127
+ return `![image](data:${cached.mimeType};base64,${cached.data})`;
128
+ }
129
+ const targetPath = rel.target.startsWith('word/')
130
+ ? rel.target
131
+ : `word/${rel.target.replace(/^\/?/, '')}`;
132
+ const buffer = readZipFileBuffer(zip, targetPath);
133
+ if (!buffer)
134
+ return '';
135
+ const mimeType = getMimeTypeForTarget(rel.target);
136
+ const base64 = buffer.toString('base64');
137
+ const image = {
138
+ id: relId,
139
+ data: base64,
140
+ mimeType,
141
+ originalSize: buffer.length,
142
+ };
143
+ images.push(image);
144
+ cache.set(relId, image);
145
+ return `![image](data:${mimeType};base64,${base64})`;
146
+ };
147
+ }
148
+ function extractTextFromRun(run, resolveImage) {
149
+ let text = '';
150
+ const children = getElementChildren(run);
151
+ for (const child of children) {
152
+ const nodeName = child.nodeName;
153
+ if (nodeName === 'w:t') {
154
+ text += child.textContent || '';
155
+ continue;
156
+ }
157
+ if (nodeName === 'w:tab') {
158
+ text += '\t';
159
+ continue;
160
+ }
161
+ if (nodeName === 'w:br') {
162
+ text += '\n';
163
+ continue;
164
+ }
165
+ if (nodeName === 'w:drawing' || nodeName === 'w:pict') {
166
+ const blips = child.getElementsByTagName('a:blip');
167
+ for (let i = 0; i < blips.length; i++) {
168
+ const blip = blips[i];
169
+ const relId = blip.getAttribute('r:embed') || blip.getAttribute('embed');
170
+ const imageMarkdown = resolveImage(relId);
171
+ if (imageMarkdown) {
172
+ text += imageMarkdown;
173
+ }
174
+ }
175
+ }
176
+ }
177
+ return text;
178
+ }
179
+ function extractParagraphText(paragraph, resolveImage) {
180
+ let text = '';
181
+ const children = getElementChildren(paragraph);
182
+ for (const child of children) {
183
+ const nodeName = child.nodeName;
184
+ if (nodeName === 'w:r') {
185
+ text += extractTextFromRun(child, resolveImage);
186
+ continue;
187
+ }
188
+ if (nodeName === 'w:hyperlink') {
189
+ const runs = child.getElementsByTagName('w:r');
190
+ for (let i = 0; i < runs.length; i++) {
191
+ text += extractTextFromRun(runs[i], resolveImage);
192
+ }
193
+ continue;
194
+ }
195
+ }
196
+ return text;
197
+ }
198
+ function convertTableToMarkdown(table, resolveImage) {
199
+ const rows = [];
200
+ const rowNodes = table.getElementsByTagName('w:tr');
201
+ for (let i = 0; i < rowNodes.length; i++) {
202
+ const row = rowNodes[i];
203
+ const cells = row.getElementsByTagName('w:tc');
204
+ const rowCells = [];
205
+ for (let j = 0; j < cells.length; j++) {
206
+ const cell = cells[j];
207
+ const paragraphs = cell.getElementsByTagName('w:p');
208
+ const cellTexts = [];
209
+ for (let k = 0; k < paragraphs.length; k++) {
210
+ const text = extractParagraphText(paragraphs[k], resolveImage).trim();
211
+ if (text) {
212
+ cellTexts.push(text);
213
+ }
214
+ }
215
+ const combined = cellTexts.length > 0 ? cellTexts.join('<br>') : ' ';
216
+ rowCells.push(escapeTableCell(combined));
217
+ }
218
+ if (rowCells.length > 0) {
219
+ rows.push(rowCells);
220
+ }
221
+ }
222
+ if (rows.length === 0)
223
+ return null;
224
+ const maxCols = Math.max(...rows.map(row => row.length));
225
+ for (const row of rows) {
226
+ while (row.length < maxCols) {
227
+ row.push(' ');
228
+ }
229
+ }
230
+ const header = rows[0];
231
+ const bodyRows = rows.slice(1);
232
+ const headerLine = `| ${header.join(' | ')} |`;
233
+ const separatorLine = `| ${header.map(() => '---').join(' | ')} |`;
234
+ const dataLines = bodyRows.map(row => `| ${row.join(' | ')} |`);
235
+ return [headerLine, separatorLine, ...dataLines].join('\n');
236
+ }
237
+ function convertBodyToMarkdown(body, resolveImage) {
238
+ const blocks = [];
239
+ const children = getElementChildren(body);
240
+ for (const child of children) {
241
+ const nodeName = child.nodeName;
242
+ if (nodeName === 'w:p') {
243
+ const text = extractParagraphText(child, resolveImage).trim();
244
+ if (!text)
245
+ continue;
246
+ const headingLevel = getHeadingLevelFromParagraph(child);
247
+ if (headingLevel && headingLevel >= 1 && headingLevel <= 6) {
248
+ blocks.push(`${'#'.repeat(headingLevel)} ${text}`);
249
+ }
250
+ else {
251
+ blocks.push(text);
252
+ }
253
+ continue;
254
+ }
255
+ if (nodeName === 'w:tbl') {
256
+ const tableMarkdown = convertTableToMarkdown(child, resolveImage);
257
+ if (tableMarkdown) {
258
+ blocks.push(tableMarkdown);
259
+ }
260
+ continue;
261
+ }
262
+ }
263
+ return blocks.join('\n\n');
264
+ }
265
+ /**
266
+ * Convert DOCX to Markdown using Docxtemplater + XML parsing
267
+ * @param source Path to DOCX file or URL
268
+ * @param options Conversion options
269
+ * @returns Parsed DOCX result with markdown and metadata
270
+ */
271
+ export async function parseDocxToMarkdown(source, options = {}) {
272
+ const { includeImages = true, preserveFormatting = true, styleMap = [] } = options;
273
+ try {
274
+ // Load DOCX file
275
+ const buffer = await loadDocxToBuffer(source);
276
+ // Get file size (for local files)
277
+ let fileSize;
278
+ if (!isUrl(source)) {
279
+ try {
280
+ const stats = await fs.stat(source);
281
+ fileSize = stats.size;
282
+ }
283
+ catch {
284
+ // Ignore stat errors for URLs
285
+ }
286
+ }
287
+ const zip = new PizZip(buffer);
288
+ try {
289
+ new Docxtemplater(zip, { paragraphLoop: true, linebreaks: true });
290
+ }
291
+ catch (error) {
292
+ console.warn('Docxtemplater validation failed, continuing with raw XML parsing:', error);
293
+ }
294
+ const documentXml = readZipFileText(zip, 'word/document.xml');
295
+ if (!documentXml) {
296
+ throw new Error('Invalid DOCX file: word/document.xml not found');
297
+ }
298
+ const relsXml = readZipFileText(zip, 'word/_rels/document.xml.rels');
299
+ const relMap = extractRelationshipMap(relsXml);
300
+ const images = [];
301
+ const resolveImage = buildImageResolver(zip, relMap, images, includeImages);
302
+ const doc = new DOMParser().parseFromString(documentXml, 'application/xml');
303
+ const body = doc.getElementsByTagName('w:body')[0];
304
+ if (!body) {
305
+ throw new Error('Invalid DOCX file: <w:body> not found');
306
+ }
307
+ let markdown = convertBodyToMarkdown(body, resolveImage);
308
+ // Extract metadata from DOCX
309
+ const metadata = await extractMetadata(source, buffer, fileSize);
310
+ // Post-process markdown for better formatting
311
+ markdown = postProcessMarkdown(markdown);
312
+ // Parse into sections (optional advanced feature)
313
+ const sections = parseIntoSections(markdown, images);
314
+ return {
315
+ markdown,
316
+ metadata,
317
+ images,
318
+ sections
319
+ };
320
+ }
321
+ catch (error) {
322
+ console.error('Error converting DOCX to Markdown:', error);
323
+ throw new Error(`Failed to parse DOCX file: ${error instanceof Error ? error.message : String(error)}`);
324
+ }
325
+ }
326
+ /**
327
+ * Extract metadata from DOCX file
328
+ */
329
+ async function extractMetadata(source, buffer, fileSize) {
330
+ try {
331
+ // Core properties aren't exposed by the parser, so we'll use JSZip directly
332
+ // For now, return basic metadata structure
333
+ // TODO: Could enhance with docx-parser or officegen for full metadata
334
+ const metadata = {
335
+ fileSize
336
+ };
337
+ // Try to extract basic metadata if available
338
+ // This is a simplified version - full implementation would use docx package
339
+ try {
340
+ // Attempt to read core properties using JSZip (DOCX is a ZIP file)
341
+ const JSZip = require('jszip');
342
+ const zip = await JSZip.loadAsync(buffer);
343
+ // Read core properties XML
344
+ const corePropsFile = zip.file('docProps/core.xml');
345
+ if (corePropsFile) {
346
+ const corePropsXml = await corePropsFile.async('string');
347
+ // Basic XML parsing (ideally use proper XML parser)
348
+ const extractTag = (xml, tag) => {
349
+ const regex = new RegExp(`<dc:${tag}[^>]*>([^<]*)<\/dc:${tag}>`, 'i');
350
+ const match = xml.match(regex);
351
+ if (match)
352
+ return match[1];
353
+ // Try cp: namespace
354
+ const regex2 = new RegExp(`<cp:${tag}[^>]*>([^<]*)<\/cp:${tag}>`, 'i');
355
+ const match2 = xml.match(regex2);
356
+ return match2 ? match2[1] : undefined;
357
+ };
358
+ const extractDcmiTerms = (xml, tag) => {
359
+ const regex = new RegExp(`<dcterms:${tag}[^>]*>([^<]*)<\/dcterms:${tag}>`, 'i');
360
+ const match = xml.match(regex);
361
+ if (match) {
362
+ try {
363
+ return new Date(match[1]);
364
+ }
365
+ catch {
366
+ return undefined;
367
+ }
368
+ }
369
+ return undefined;
370
+ };
371
+ metadata.title = extractTag(corePropsXml, 'title');
372
+ metadata.author = extractTag(corePropsXml, 'creator');
373
+ metadata.subject = extractTag(corePropsXml, 'subject');
374
+ metadata.description = extractTag(corePropsXml, 'description');
375
+ metadata.lastModifiedBy = extractTag(corePropsXml, 'lastModifiedBy');
376
+ metadata.revision = extractTag(corePropsXml, 'revision');
377
+ metadata.creationDate = extractDcmiTerms(corePropsXml, 'created');
378
+ metadata.modificationDate = extractDcmiTerms(corePropsXml, 'modified');
379
+ }
380
+ }
381
+ catch (metaError) {
382
+ // Metadata extraction is optional, don't fail if it doesn't work
383
+ console.warn('Could not extract detailed metadata:', metaError);
384
+ }
385
+ return metadata;
386
+ }
387
+ catch (error) {
388
+ // Return minimal metadata on error
389
+ return { fileSize };
390
+ }
391
+ }
392
+ /**
393
+ * Post-process markdown for better formatting
394
+ */
395
+ function postProcessMarkdown(markdown) {
396
+ // Clean up excessive newlines
397
+ markdown = markdown.replace(/\n{3,}/g, '\n\n');
398
+ // Ensure proper spacing around headings
399
+ markdown = markdown.replace(/([^\n])\n(#+\s)/g, '$1\n\n$2');
400
+ markdown = markdown.replace(/(#+\s[^\n]+)\n([^\n])/g, '$1\n\n$2');
401
+ // Clean up list formatting
402
+ markdown = markdown.replace(/\n([*-]\s)/g, '\n$1');
403
+ // Ensure proper spacing around code blocks
404
+ markdown = markdown.replace(/([^\n])\n```/g, '$1\n\n```');
405
+ markdown = markdown.replace(/```\n([^\n])/g, '```\n\n$1');
406
+ // Ensure proper spacing around tables
407
+ markdown = markdown.replace(/([^\n])\n(\|[^\n]+\|)/g, '$1\n\n$2');
408
+ markdown = markdown.replace(/(\|[^\n]+\|)\n([^\n|])/g, '$1\n\n$2');
409
+ // Trim leading/trailing whitespace
410
+ markdown = markdown.trim();
411
+ return markdown;
412
+ }
413
+ /**
414
+ * Parse markdown into structured sections
415
+ */
416
+ function parseIntoSections(markdown, images) {
417
+ const sections = [];
418
+ const lines = markdown.split('\n');
419
+ let currentSection = null;
420
+ let currentContent = [];
421
+ for (const line of lines) {
422
+ // Detect headings
423
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
424
+ if (headingMatch) {
425
+ // Save previous section
426
+ if (currentSection) {
427
+ currentSection.content = currentContent.join('\n').trim();
428
+ sections.push(currentSection);
429
+ }
430
+ // Start new heading section
431
+ currentSection = {
432
+ type: 'heading',
433
+ level: headingMatch[1].length,
434
+ content: '' // Will be set later
435
+ };
436
+ currentContent = [line];
437
+ continue;
438
+ }
439
+ // Detect images
440
+ const imageMatch = line.match(/!\[([^\]]*)\]\(([^)]+)\)/);
441
+ if (imageMatch) {
442
+ // Save previous section
443
+ if (currentSection && currentContent.length > 0) {
444
+ currentSection.content = currentContent.join('\n').trim();
445
+ sections.push(currentSection);
446
+ }
447
+ // Create image section
448
+ sections.push({
449
+ type: 'image',
450
+ content: line
451
+ });
452
+ currentSection = null;
453
+ currentContent = [];
454
+ continue;
455
+ }
456
+ // Detect lists
457
+ if (line.match(/^[*\-+]\s/) || line.match(/^\d+\.\s/)) {
458
+ if (!currentSection || currentSection.type !== 'list') {
459
+ // Save previous section
460
+ if (currentSection && currentContent.length > 0) {
461
+ currentSection.content = currentContent.join('\n').trim();
462
+ sections.push(currentSection);
463
+ }
464
+ // Start new list section
465
+ currentSection = {
466
+ type: 'list',
467
+ content: ''
468
+ };
469
+ currentContent = [];
470
+ }
471
+ currentContent.push(line);
472
+ continue;
473
+ }
474
+ // Regular paragraph content
475
+ if (line.trim()) {
476
+ if (!currentSection || (currentSection.type !== 'paragraph' && currentSection.type !== 'heading')) {
477
+ // Save previous section
478
+ if (currentSection && currentContent.length > 0) {
479
+ currentSection.content = currentContent.join('\n').trim();
480
+ sections.push(currentSection);
481
+ }
482
+ // Start new paragraph section
483
+ currentSection = {
484
+ type: 'paragraph',
485
+ content: ''
486
+ };
487
+ currentContent = [];
488
+ }
489
+ currentContent.push(line);
490
+ }
491
+ else if (currentContent.length > 0) {
492
+ // Empty line - finalize current section
493
+ if (currentSection) {
494
+ currentSection.content = currentContent.join('\n').trim();
495
+ sections.push(currentSection);
496
+ }
497
+ currentSection = null;
498
+ currentContent = [];
499
+ }
500
+ }
501
+ // Save final section
502
+ if (currentSection && currentContent.length > 0) {
503
+ currentSection.content = currentContent.join('\n').trim();
504
+ sections.push(currentSection);
505
+ }
506
+ return sections;
507
+ }
@@ -0,0 +1,39 @@
1
+ /**
2
+ * DOCX Operation Handlers
3
+ *
4
+ * Pure functions: HTML in → modified HTML out.
5
+ * Each handler corresponds to one DocxOperation type.
6
+ *
7
+ * @module docx/operations/handlers
8
+ */
9
+ import type { DocxOperation } from '../../types.js';
10
+ /**
11
+ * Replace text in HTML while protecting base64 data URLs and other attribute values.
12
+ *
13
+ * Strategy: Temporarily extract `<img>` tags (which contain huge base64 data URLs)
14
+ * and replace them with placeholders, perform the text replacement,
15
+ * then restore the original `<img>` tags. This prevents the regex from
16
+ * accidentally matching / corrupting base64 image data.
17
+ */
18
+ export declare function handleReplaceText(html: string, search: string, replace: string, matchCase?: boolean, global?: boolean): string;
19
+ export declare function handleAppendMarkdown(html: string, markdown: string): string;
20
+ export declare function handleAppendHtml(html: string, content: string): string;
21
+ export declare function handleInsertHtml(html: string, content: string, selector?: string, position?: 'before' | 'after' | 'inside'): string;
22
+ export declare function handleReplaceHtml(html: string, selector: string, content: string, replaceAll?: boolean): string;
23
+ export declare function handleUpdateHtml(html: string, selector: string, content?: string, attributes?: Record<string, string>, updateAll?: boolean): string;
24
+ /**
25
+ * Insert a table from markdown or a rows array.
26
+ * If a selector is given, the table is placed relative to that element;
27
+ * otherwise it is appended to the end of the document.
28
+ */
29
+ export declare function handleInsertTable(html: string, markdownTable?: string, rows?: string[][], selector?: string, position?: 'before' | 'after' | 'inside'): string;
30
+ /**
31
+ * Insert an image into the document.
32
+ *
33
+ * By the time this handler runs, local file paths should already be converted
34
+ * to base64 data URLs by `preprocessOperations()` in `operations/index.ts`.
35
+ * html-to-docx only supports base64 data URLs and HTTP URLs.
36
+ */
37
+ export declare function handleInsertImage(html: string, imagePath: string, altText?: string, width?: number, height?: number, baseDir?: string, selector?: string, position?: 'before' | 'after' | 'inside'): string;
38
+ /** Apply a single DocxOperation to HTML content, routing to the correct handler. */
39
+ export declare function applyOperation(html: string, operation: DocxOperation, baseDir?: string): string;