@wonderwhy-er/desktop-commander 0.2.34 → 0.2.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/dist/tools/docx/builders/html-builder.d.ts +17 -0
  2. package/dist/tools/docx/builders/html-builder.js +92 -0
  3. package/dist/tools/docx/builders/index.d.ts +5 -0
  4. package/dist/tools/docx/builders/index.js +5 -0
  5. package/dist/tools/docx/builders/markdown-builder.d.ts +2 -0
  6. package/dist/tools/docx/builders/markdown-builder.js +260 -0
  7. package/dist/tools/docx/constants.d.ts +36 -0
  8. package/dist/tools/docx/constants.js +57 -0
  9. package/dist/tools/docx/converters/markdown-to-html.d.ts +17 -0
  10. package/dist/tools/docx/converters/markdown-to-html.js +111 -0
  11. package/dist/tools/docx/errors.d.ts +28 -0
  12. package/dist/tools/docx/errors.js +48 -0
  13. package/dist/tools/docx/extractors/images.d.ts +14 -0
  14. package/dist/tools/docx/extractors/images.js +40 -0
  15. package/dist/tools/docx/extractors/metadata.d.ts +14 -0
  16. package/dist/tools/docx/extractors/metadata.js +64 -0
  17. package/dist/tools/docx/extractors/sections.d.ts +14 -0
  18. package/dist/tools/docx/extractors/sections.js +61 -0
  19. package/dist/tools/docx/html.d.ts +17 -0
  20. package/dist/tools/docx/html.js +111 -0
  21. package/dist/tools/docx/index.d.ts +14 -0
  22. package/dist/tools/docx/index.js +16 -0
  23. package/dist/tools/docx/markdown.d.ts +84 -0
  24. package/dist/tools/docx/markdown.js +507 -0
  25. package/dist/tools/docx/operations/handlers/index.d.ts +39 -0
  26. package/dist/tools/docx/operations/handlers/index.js +152 -0
  27. package/dist/tools/docx/operations/html-manipulator.d.ts +24 -0
  28. package/dist/tools/docx/operations/html-manipulator.js +352 -0
  29. package/dist/tools/docx/operations/index.d.ts +14 -0
  30. package/dist/tools/docx/operations/index.js +61 -0
  31. package/dist/tools/docx/operations/operation-handlers.d.ts +3 -0
  32. package/dist/tools/docx/operations/operation-handlers.js +67 -0
  33. package/dist/tools/docx/operations/preprocessor.d.ts +14 -0
  34. package/dist/tools/docx/operations/preprocessor.js +44 -0
  35. package/dist/tools/docx/operations/xml-replacer.d.ts +9 -0
  36. package/dist/tools/docx/operations/xml-replacer.js +35 -0
  37. package/dist/tools/docx/operations.d.ts +13 -0
  38. package/dist/tools/docx/operations.js +13 -0
  39. package/dist/tools/docx/parsers/image-extractor.d.ts +18 -0
  40. package/dist/tools/docx/parsers/image-extractor.js +61 -0
  41. package/dist/tools/docx/parsers/index.d.ts +9 -0
  42. package/dist/tools/docx/parsers/index.js +9 -0
  43. package/dist/tools/docx/parsers/paragraph-parser.d.ts +2 -0
  44. package/dist/tools/docx/parsers/paragraph-parser.js +88 -0
  45. package/dist/tools/docx/parsers/table-parser.d.ts +9 -0
  46. package/dist/tools/docx/parsers/table-parser.js +72 -0
  47. package/dist/tools/docx/parsers/xml-parser.d.ts +25 -0
  48. package/dist/tools/docx/parsers/xml-parser.js +71 -0
  49. package/dist/tools/docx/parsers/zip-reader.d.ts +23 -0
  50. package/dist/tools/docx/parsers/zip-reader.js +52 -0
  51. package/dist/tools/docx/structure.d.ts +25 -0
  52. package/dist/tools/docx/structure.js +102 -0
  53. package/dist/tools/docx/styled-html-parser.d.ts +23 -0
  54. package/dist/tools/docx/styled-html-parser.js +1262 -0
  55. package/dist/tools/docx/types.d.ts +114 -0
  56. package/dist/tools/docx/types.js +8 -0
  57. package/dist/tools/docx/utils/escaping.d.ts +13 -0
  58. package/dist/tools/docx/utils/escaping.js +26 -0
  59. package/dist/tools/docx/utils/images.d.ts +9 -0
  60. package/dist/tools/docx/utils/images.js +26 -0
  61. package/dist/tools/docx/utils/index.d.ts +12 -0
  62. package/dist/tools/docx/utils/index.js +17 -0
  63. package/dist/tools/docx/utils/markdown.d.ts +13 -0
  64. package/dist/tools/docx/utils/markdown.js +32 -0
  65. package/dist/tools/docx/utils/paths.d.ts +15 -0
  66. package/dist/tools/docx/utils/paths.js +27 -0
  67. package/dist/tools/docx/utils/versioning.d.ts +25 -0
  68. package/dist/tools/docx/utils/versioning.js +55 -0
  69. package/dist/tools/docx/utils.d.ts +101 -0
  70. package/dist/tools/docx/utils.js +299 -0
  71. package/dist/tools/docx/validators.d.ts +13 -0
  72. package/dist/tools/docx/validators.js +40 -0
  73. package/dist/utils/capture.js +4 -4
  74. package/dist/utils/files/docx.d.ts +41 -0
  75. package/dist/utils/files/docx.js +245 -0
  76. package/dist/version.d.ts +1 -1
  77. package/dist/version.js +1 -1
  78. package/package.json +1 -1
@@ -0,0 +1,14 @@
1
+ /**
2
+ * DOCX Operation Preprocessor
3
+ *
4
+ * Preprocesses operations before execution (e.g., converting local image paths to base64).
5
+ * Follows Single Responsibility Principle — only handles operation preprocessing.
6
+ *
7
+ * @module docx/operations/preprocessor
8
+ */
9
+ import type { DocxOperation } from '../types.js';
10
+ /**
11
+ * Resolve local image paths to base64 data URLs before operations are applied.
12
+ * html-to-docx cannot handle `file://` URLs — only base64 data URLs and HTTP URLs work.
13
+ */
14
+ export declare function preprocessOperations(operations: DocxOperation[], baseDir: string): Promise<DocxOperation[]>;
@@ -0,0 +1,44 @@
1
+ /**
2
+ * DOCX Operation Preprocessor
3
+ *
4
+ * Preprocesses operations before execution (e.g., converting local image paths to base64).
5
+ * Follows Single Responsibility Principle — only handles operation preprocessing.
6
+ *
7
+ * @module docx/operations/preprocessor
8
+ */
9
+ import fs from 'fs/promises';
10
+ import { DocxError, DocxErrorCode } from '../errors.js';
11
+ import { isDataUrl, isUrl, resolveImagePath } from '../utils/paths.js';
12
+ import { getMimeType } from '../utils/images.js';
13
+ /**
14
+ * Resolve local image paths to base64 data URLs before operations are applied.
15
+ * html-to-docx cannot handle `file://` URLs — only base64 data URLs and HTTP URLs work.
16
+ */
17
+ export async function preprocessOperations(operations, baseDir) {
18
+ const processed = [];
19
+ for (const op of operations) {
20
+ if (op.type !== 'insertImage') {
21
+ processed.push(op);
22
+ continue;
23
+ }
24
+ const imgOp = op;
25
+ const trimmedPath = imgOp.imagePath?.trim();
26
+ if (!trimmedPath || isDataUrl(trimmedPath) || isUrl(trimmedPath)) {
27
+ processed.push(op);
28
+ continue;
29
+ }
30
+ const resolvedPath = resolveImagePath(trimmedPath, baseDir);
31
+ try {
32
+ const imageBuffer = await fs.readFile(resolvedPath);
33
+ const mimeType = getMimeType(resolvedPath) || 'image/png';
34
+ processed.push({
35
+ ...imgOp,
36
+ imagePath: `data:${mimeType};base64,${imageBuffer.toString('base64')}`,
37
+ });
38
+ }
39
+ catch (err) {
40
+ throw new DocxError(`Failed to read image file: ${trimmedPath} (resolved: ${resolvedPath}). ${err instanceof Error ? err.message : String(err)}`, DocxErrorCode.INVALID_IMAGE_FILE, { imagePath: trimmedPath, resolvedPath, baseDir });
41
+ }
42
+ }
43
+ return processed;
44
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * XML Text Replacer
3
+ * Handles direct XML manipulation for text replacements (preserves formatting)
4
+ */
5
+ /**
6
+ * Replace text directly in DOCX XML without converting to markdown
7
+ * This preserves all formatting, tables, styles, etc.
8
+ */
9
+ export declare function replaceTextInDocxXml(docxPath: string, searchText: string, replaceText: string): Promise<Buffer>;
@@ -0,0 +1,35 @@
1
+ /**
2
+ * XML Text Replacer
3
+ * Handles direct XML manipulation for text replacements (preserves formatting)
4
+ */
5
+ import fs from 'fs/promises';
6
+ import JSZip from 'jszip';
7
+ import { DocxError, DocxErrorCode, withErrorContext } from '../errors.js';
8
+ /**
9
+ * Replace text directly in DOCX XML without converting to markdown
10
+ * This preserves all formatting, tables, styles, etc.
11
+ */
12
+ export async function replaceTextInDocxXml(docxPath, searchText, replaceText) {
13
+ return withErrorContext(async () => {
14
+ const docxBuffer = await fs.readFile(docxPath);
15
+ const zip = await JSZip.loadAsync(docxBuffer);
16
+ const documentXml = await zip.file('word/document.xml')?.async('string');
17
+ if (!documentXml) {
18
+ throw new DocxError('Invalid DOCX file: word/document.xml not found', DocxErrorCode.INVALID_DOCX, { path: docxPath });
19
+ }
20
+ // Escape XML special characters
21
+ const escapeXml = (str) => str
22
+ .replace(/&/g, '&amp;')
23
+ .replace(/</g, '&lt;')
24
+ .replace(/>/g, '&gt;')
25
+ .replace(/"/g, '&quot;')
26
+ .replace(/'/g, '&apos;');
27
+ const escapedSearch = escapeXml(searchText);
28
+ const escapedReplace = escapeXml(replaceText);
29
+ // Simple replacement in text nodes
30
+ const modifiedXml = documentXml.replace(new RegExp(escapedSearch, 'g'), escapedReplace);
31
+ zip.file('word/document.xml', modifiedXml);
32
+ const arrayBuffer = await zip.generateAsync({ type: 'uint8array' });
33
+ return Buffer.from(arrayBuffer);
34
+ }, DocxErrorCode.DOCX_XML_REPLACE_FAILED, { searchText, replaceText });
35
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * DOCX Creation and Editing Operations
3
+ *
4
+ * Main entry point for DOCX operations. This module provides:
5
+ * - Creating DOCX files from markdown
6
+ * - Editing existing DOCX files via operations
7
+ *
8
+ * @module docx/operations
9
+ */
10
+ import { createDocxFromMarkdown } from './builders/markdown-builder.js';
11
+ import { editDocxWithOperations } from './operations/index.js';
12
+ export type { DocxBuildOptions, DocxEditOptions, DocxOperation, DocxReplaceTextOperation, DocxAppendMarkdownOperation, DocxInsertTableOperation, DocxInsertImageOperation, } from './types.js';
13
+ export { createDocxFromMarkdown, editDocxWithOperations };
@@ -0,0 +1,13 @@
1
+ /**
2
+ * DOCX Creation and Editing Operations
3
+ *
4
+ * Main entry point for DOCX operations. This module provides:
5
+ * - Creating DOCX files from markdown
6
+ * - Editing existing DOCX files via operations
7
+ *
8
+ * @module docx/operations
9
+ */
10
+ import { createDocxFromMarkdown } from './builders/markdown-builder.js';
11
+ import { editDocxWithOperations } from './operations/index.js';
12
+ // Re-export main functions
13
+ export { createDocxFromMarkdown, editDocxWithOperations };
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Image Extractor
3
+ * Utilities for extracting and handling images from DOCX files
4
+ */
5
+ import type { ZipArchive } from './zip-reader.js';
6
+ import type { DocxRelationship } from '../types.js';
7
+ /**
8
+ * Get MIME type from file extension or target path
9
+ */
10
+ export declare function getMimeTypeForTarget(target: string): string;
11
+ /**
12
+ * Extract all images from a DOCX ZIP archive
13
+ */
14
+ export declare function extractImagesFromZip(zip: ZipArchive, relMap: Map<string, DocxRelationship>): Map<string, Buffer>;
15
+ /**
16
+ * Resolve image relationship ID from drawing or pict element
17
+ */
18
+ export declare function resolveImageRelId(element: Element): string | null;
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Image Extractor
3
+ * Utilities for extracting and handling images from DOCX files
4
+ */
5
+ import path from 'path';
6
+ import { readZipFileBuffer } from './zip-reader.js';
7
+ /**
8
+ * Get MIME type from file extension or target path
9
+ */
10
+ export function getMimeTypeForTarget(target) {
11
+ const ext = path.extname(target).toLowerCase();
12
+ const mimeTypes = {
13
+ '.png': 'image/png',
14
+ '.jpg': 'image/jpeg',
15
+ '.jpeg': 'image/jpeg',
16
+ '.gif': 'image/gif',
17
+ '.bmp': 'image/bmp',
18
+ '.webp': 'image/webp',
19
+ '.svg': 'image/svg+xml',
20
+ };
21
+ return mimeTypes[ext] || 'application/octet-stream';
22
+ }
23
+ /**
24
+ * Extract all images from a DOCX ZIP archive
25
+ */
26
+ export function extractImagesFromZip(zip, relMap) {
27
+ const images = new Map();
28
+ for (const [relId, rel] of relMap.entries()) {
29
+ if (!rel.type.includes('/image'))
30
+ continue;
31
+ const targetPath = rel.target.startsWith('word/')
32
+ ? rel.target
33
+ : `word/${rel.target.replace(/^\/?/, '')}`;
34
+ const imgBuffer = readZipFileBuffer(zip, targetPath);
35
+ if (imgBuffer) {
36
+ images.set(relId, imgBuffer);
37
+ }
38
+ }
39
+ return images;
40
+ }
41
+ /**
42
+ * Resolve image relationship ID from drawing or pict element
43
+ */
44
+ export function resolveImageRelId(element) {
45
+ // Try drawing element first (newer format)
46
+ const blips = element.getElementsByTagName('a:blip');
47
+ for (let i = 0; i < blips.length; i++) {
48
+ const blip = blips[i];
49
+ const relId = blip.getAttribute('r:embed') || blip.getAttribute('embed');
50
+ if (relId)
51
+ return relId;
52
+ }
53
+ // Try pict element (older format)
54
+ const imagedata = element.getElementsByTagName('v:imagedata');
55
+ for (let i = 0; i < imagedata.length; i++) {
56
+ const relId = imagedata[i].getAttribute('r:id') || imagedata[i].getAttribute('id');
57
+ if (relId)
58
+ return relId;
59
+ }
60
+ return null;
61
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * DOCX Parsers
3
+ * Centralized exports for all parsing utilities
4
+ */
5
+ export * from './zip-reader.js';
6
+ export * from './xml-parser.js';
7
+ export * from './image-extractor.js';
8
+ export * from './paragraph-parser.js';
9
+ export * from './table-parser.js';
@@ -0,0 +1,9 @@
1
+ /**
2
+ * DOCX Parsers
3
+ * Centralized exports for all parsing utilities
4
+ */
5
+ export * from './zip-reader.js';
6
+ export * from './xml-parser.js';
7
+ export * from './image-extractor.js';
8
+ export * from './paragraph-parser.js';
9
+ export * from './table-parser.js';
@@ -0,0 +1,2 @@
1
+ import type { DocxParagraph } from '../types.js';
2
+ export declare function parseParagraphElement(paragraph: Element, images: Map<string, Buffer>, headingLevel: number | null): DocxParagraph | null;
@@ -0,0 +1,88 @@
1
+ import { createRequire } from 'module';
2
+ const require = createRequire(import.meta.url);
3
+ // @ts-ignore
4
+ import * as docx from 'docx';
5
+ const { Paragraph, TextRun, ImageRun, HeadingLevel } = docx;
6
+ import { getElementChildren } from './xml-parser.js';
7
+ import { resolveImageRelId } from './image-extractor.js';
8
+ export function parseParagraphElement(paragraph, images, headingLevel) {
9
+ const runs = extractRunsFromParagraph(paragraph, images);
10
+ if (runs.length === 0) {
11
+ return null;
12
+ }
13
+ return new Paragraph({
14
+ children: runs,
15
+ heading: headingLevel ? getDocxHeadingLevel(headingLevel) : undefined,
16
+ });
17
+ }
18
+ function extractRunsFromParagraph(paragraph, images) {
19
+ const runs = [];
20
+ const children = getElementChildren(paragraph);
21
+ for (const child of children) {
22
+ const nodeName = child.nodeName;
23
+ if (nodeName === 'w:r') {
24
+ const textRuns = extractTextRun(child, images);
25
+ runs.push(...textRuns);
26
+ }
27
+ else if (nodeName === 'w:hyperlink') {
28
+ const linkRuns = child.getElementsByTagName('w:r');
29
+ for (let i = 0; i < linkRuns.length; i++) {
30
+ const textRuns = extractTextRun(linkRuns[i], images);
31
+ runs.push(...textRuns);
32
+ }
33
+ }
34
+ }
35
+ return runs;
36
+ }
37
+ function extractTextRun(run, images) {
38
+ const runs = [];
39
+ const rPr = run.getElementsByTagName('w:rPr')[0];
40
+ const isBold = rPr?.getElementsByTagName('w:b').length > 0;
41
+ const isItalic = rPr?.getElementsByTagName('w:i').length > 0;
42
+ const children = getElementChildren(run);
43
+ for (const child of children) {
44
+ const nodeName = child.nodeName;
45
+ if (nodeName === 'w:t') {
46
+ const text = child.textContent || '';
47
+ if (text) {
48
+ runs.push(new TextRun({
49
+ text,
50
+ bold: isBold,
51
+ italics: isItalic,
52
+ }));
53
+ }
54
+ }
55
+ else if (nodeName === 'w:tab') {
56
+ runs.push(new TextRun({ text: '\t' }));
57
+ }
58
+ else if (nodeName === 'w:br') {
59
+ runs.push(new TextRun({ text: '\n', break: 1 }));
60
+ }
61
+ else if (nodeName === 'w:drawing' || nodeName === 'w:pict') {
62
+ const relId = resolveImageRelId(child);
63
+ if (relId && images.has(relId)) {
64
+ try {
65
+ runs.push(new ImageRun({
66
+ data: images.get(relId),
67
+ transformation: { width: 600, height: 400 },
68
+ }));
69
+ }
70
+ catch (err) {
71
+ // Skip invalid images
72
+ }
73
+ }
74
+ }
75
+ }
76
+ return runs;
77
+ }
78
+ function getDocxHeadingLevel(level) {
79
+ const levelMap = {
80
+ 1: HeadingLevel.HEADING_1,
81
+ 2: HeadingLevel.HEADING_2,
82
+ 3: HeadingLevel.HEADING_3,
83
+ 4: HeadingLevel.HEADING_4,
84
+ 5: HeadingLevel.HEADING_5,
85
+ 6: HeadingLevel.HEADING_6,
86
+ };
87
+ return levelMap[level] ?? HeadingLevel.HEADING_1;
88
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Table Parser
3
+ * Parses DOCX table elements to DOCX library Table objects
4
+ */
5
+ import type { DocxTable } from '../types.js';
6
+ /**
7
+ * Parse a table element to DOCX Table
8
+ */
9
+ export declare function parseTableElement(table: Element, images: Map<string, Buffer>): DocxTable | null;
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Table Parser
3
+ * Parses DOCX table elements to DOCX library Table objects
4
+ */
5
+ import { createRequire } from 'module';
6
+ const require = createRequire(import.meta.url);
7
+ // @ts-ignore
8
+ import * as docx from 'docx';
9
+ const { Table, TableRow, TableCell, Paragraph, WidthType } = docx;
10
+ import { parseParagraphElement } from './paragraph-parser.js';
11
+ /**
12
+ * Parse a table element to DOCX Table
13
+ */
14
+ export function parseTableElement(table, images) {
15
+ const rows = [];
16
+ const rowNodes = table.getElementsByTagName('w:tr');
17
+ for (let i = 0; i < rowNodes.length; i++) {
18
+ const rowNode = rowNodes[i];
19
+ const row = parseTableRow(rowNode, images);
20
+ if (row) {
21
+ rows.push(row);
22
+ }
23
+ }
24
+ if (rows.length === 0) {
25
+ return null;
26
+ }
27
+ return new Table({
28
+ width: { size: 100, type: WidthType.PERCENTAGE },
29
+ rows,
30
+ });
31
+ }
32
+ /**
33
+ * Parse a table row element
34
+ */
35
+ function parseTableRow(rowNode, images) {
36
+ const cells = [];
37
+ const cellNodes = rowNode.getElementsByTagName('w:tc');
38
+ for (let j = 0; j < cellNodes.length; j++) {
39
+ const cellNode = cellNodes[j];
40
+ const cell = parseTableCell(cellNode, images);
41
+ if (cell) {
42
+ cells.push(cell);
43
+ }
44
+ }
45
+ if (cells.length === 0) {
46
+ return null;
47
+ }
48
+ return new TableRow({
49
+ children: cells,
50
+ });
51
+ }
52
+ /**
53
+ * Parse a table cell element
54
+ */
55
+ function parseTableCell(cellNode, images) {
56
+ const cellParagraphs = [];
57
+ const paragraphNodes = cellNode.getElementsByTagName('w:p');
58
+ for (let k = 0; k < paragraphNodes.length; k++) {
59
+ const paraNode = paragraphNodes[k];
60
+ const para = parseParagraphElement(paraNode, images, null);
61
+ if (para) {
62
+ cellParagraphs.push(para);
63
+ }
64
+ }
65
+ // Ensure at least one paragraph per cell
66
+ if (cellParagraphs.length === 0) {
67
+ cellParagraphs.push(new Paragraph({ text: '' }));
68
+ }
69
+ return new TableCell({
70
+ children: cellParagraphs,
71
+ });
72
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * XML Parser Utilities
3
+ * Helper functions for parsing DOCX XML content
4
+ */
5
+ import type { DocxRelationship } from '../types.js';
6
+ /**
7
+ * Get all element children of a node
8
+ */
9
+ export declare function getElementChildren(node: Node): Element[];
10
+ /**
11
+ * Get attribute value, checking both direct and namespaced attributes
12
+ */
13
+ export declare function getAttributeValue(node: Element, name: string): string | null;
14
+ /**
15
+ * Parse XML string to Document
16
+ */
17
+ export declare function parseXml(xml: string): Document;
18
+ /**
19
+ * Extract relationship map from relationships XML
20
+ */
21
+ export declare function extractRelationshipMap(relsXml: string | null): Map<string, DocxRelationship>;
22
+ /**
23
+ * Get heading level from paragraph element
24
+ */
25
+ export declare function getHeadingLevelFromParagraph(paragraph: Element): number | null;
@@ -0,0 +1,71 @@
1
+ /**
2
+ * XML Parser Utilities
3
+ * Helper functions for parsing DOCX XML content
4
+ */
5
+ import { createRequire } from 'module';
6
+ const require = createRequire(import.meta.url);
7
+ const { DOMParser } = require('@xmldom/xmldom');
8
+ /**
9
+ * Get all element children of a node
10
+ */
11
+ export function getElementChildren(node) {
12
+ const children = [];
13
+ for (let i = 0; i < node.childNodes.length; i++) {
14
+ const child = node.childNodes[i];
15
+ if (child.nodeType === 1) { // ELEMENT_NODE
16
+ children.push(child);
17
+ }
18
+ }
19
+ return children;
20
+ }
21
+ /**
22
+ * Get attribute value, checking both direct and namespaced attributes
23
+ */
24
+ export function getAttributeValue(node, name) {
25
+ return node.getAttribute(name) || node.getAttribute(`w:${name}`) || null;
26
+ }
27
+ /**
28
+ * Parse XML string to Document
29
+ */
30
+ export function parseXml(xml) {
31
+ const parser = new DOMParser();
32
+ return parser.parseFromString(xml, 'application/xml');
33
+ }
34
+ /**
35
+ * Extract relationship map from relationships XML
36
+ */
37
+ export function extractRelationshipMap(relsXml) {
38
+ const relMap = new Map();
39
+ if (!relsXml)
40
+ return relMap;
41
+ const relDoc = parseXml(relsXml);
42
+ const rels = relDoc.getElementsByTagName('Relationship');
43
+ for (let i = 0; i < rels.length; i++) {
44
+ const rel = rels[i];
45
+ const id = rel.getAttribute('Id');
46
+ const type = rel.getAttribute('Type') || '';
47
+ const target = rel.getAttribute('Target') || '';
48
+ if (id && target) {
49
+ relMap.set(id, { target, type });
50
+ }
51
+ }
52
+ return relMap;
53
+ }
54
+ /**
55
+ * Get heading level from paragraph element
56
+ */
57
+ export function getHeadingLevelFromParagraph(paragraph) {
58
+ const pPr = paragraph.getElementsByTagName('w:pPr')[0];
59
+ if (!pPr)
60
+ return null;
61
+ const pStyle = pPr.getElementsByTagName('w:pStyle')[0];
62
+ if (!pStyle)
63
+ return null;
64
+ const styleVal = getAttributeValue(pStyle, 'val');
65
+ if (!styleVal)
66
+ return null;
67
+ const match = styleVal.match(/heading\s*([1-6])/i);
68
+ if (!match)
69
+ return null;
70
+ return Number(match[1]);
71
+ }
@@ -0,0 +1,23 @@
1
+ /**
2
+ * ZIP File Reader
3
+ * Utilities for reading files from DOCX ZIP archives
4
+ */
5
+ declare const PizZip: any;
6
+ export type ZipArchive = InstanceType<typeof PizZip>;
7
+ /**
8
+ * Create a ZIP archive from a buffer
9
+ */
10
+ export declare function createZipFromBuffer(buffer: Buffer): ZipArchive;
11
+ /**
12
+ * Read a text file from a ZIP archive
13
+ */
14
+ export declare function readZipFileText(zip: ZipArchive, filePath: string): string | null;
15
+ /**
16
+ * Read a binary file from a ZIP archive as Buffer
17
+ */
18
+ export declare function readZipFileBuffer(zip: ZipArchive, filePath: string): Buffer | null;
19
+ /**
20
+ * Check if a file exists in the ZIP archive
21
+ */
22
+ export declare function zipFileExists(zip: ZipArchive, filePath: string): boolean;
23
+ export {};
@@ -0,0 +1,52 @@
1
+ /**
2
+ * ZIP File Reader
3
+ * Utilities for reading files from DOCX ZIP archives
4
+ */
5
+ import { createRequire } from 'module';
6
+ const require = createRequire(import.meta.url);
7
+ const PizZip = require('pizzip');
8
+ /**
9
+ * Create a ZIP archive from a buffer
10
+ */
11
+ export function createZipFromBuffer(buffer) {
12
+ return new PizZip(buffer);
13
+ }
14
+ /**
15
+ * Read a text file from a ZIP archive
16
+ */
17
+ export function readZipFileText(zip, filePath) {
18
+ const file = zip.file(filePath);
19
+ if (!file)
20
+ return null;
21
+ if (typeof file.asText === 'function') {
22
+ return file.asText();
23
+ }
24
+ if (typeof file.asBinary === 'function') {
25
+ return Buffer.from(file.asBinary(), 'binary').toString('utf8');
26
+ }
27
+ return null;
28
+ }
29
+ /**
30
+ * Read a binary file from a ZIP archive as Buffer
31
+ */
32
+ export function readZipFileBuffer(zip, filePath) {
33
+ const file = zip.file(filePath);
34
+ if (!file)
35
+ return null;
36
+ if (typeof file.asUint8Array === 'function') {
37
+ return Buffer.from(file.asUint8Array());
38
+ }
39
+ if (typeof file.asNodeBuffer === 'function') {
40
+ return file.asNodeBuffer();
41
+ }
42
+ if (typeof file.asBinary === 'function') {
43
+ return Buffer.from(file.asBinary(), 'binary');
44
+ }
45
+ return null;
46
+ }
47
+ /**
48
+ * Check if a file exists in the ZIP archive
49
+ */
50
+ export function zipFileExists(zip, filePath) {
51
+ return zip.file(filePath) !== null;
52
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * DOCX Structure Parser and Builder
3
+ *
4
+ * Provides structure-preserving parsing and building of DOCX files.
5
+ * This approach maintains tables, images, and formatting at the DOCX element level,
6
+ * avoiding lossy markdown round-trips.
7
+ */
8
+ import type { DocxStructure, DocxElement } from './types.js';
9
+ export type { DocxElement, DocxStructure };
10
+ /**
11
+ * Parse a DOCX file into structured elements that can be manipulated and rebuilt
12
+ *
13
+ * @param buffer - DOCX file buffer
14
+ * @returns Structured representation of the DOCX document
15
+ * @throws {DocxError} If the DOCX file is invalid or cannot be parsed
16
+ */
17
+ export declare function parseDocxStructure(buffer: Buffer): Promise<DocxStructure>;
18
+ /**
19
+ * Build a DOCX file from structured elements
20
+ *
21
+ * @param structure - Structured DOCX representation
22
+ * @returns Buffer containing the DOCX file
23
+ * @throws {DocxError} If document building fails
24
+ */
25
+ export declare function buildDocxFromStructure(structure: DocxStructure): Promise<Buffer>;