@wonderwhy-er/desktop-commander 0.2.34 → 0.2.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/tools/docx/builders/html-builder.d.ts +17 -0
- package/dist/tools/docx/builders/html-builder.js +92 -0
- package/dist/tools/docx/builders/index.d.ts +5 -0
- package/dist/tools/docx/builders/index.js +5 -0
- package/dist/tools/docx/builders/markdown-builder.d.ts +2 -0
- package/dist/tools/docx/builders/markdown-builder.js +260 -0
- package/dist/tools/docx/constants.d.ts +36 -0
- package/dist/tools/docx/constants.js +57 -0
- package/dist/tools/docx/converters/markdown-to-html.d.ts +17 -0
- package/dist/tools/docx/converters/markdown-to-html.js +111 -0
- package/dist/tools/docx/errors.d.ts +28 -0
- package/dist/tools/docx/errors.js +48 -0
- package/dist/tools/docx/extractors/images.d.ts +14 -0
- package/dist/tools/docx/extractors/images.js +40 -0
- package/dist/tools/docx/extractors/metadata.d.ts +14 -0
- package/dist/tools/docx/extractors/metadata.js +64 -0
- package/dist/tools/docx/extractors/sections.d.ts +14 -0
- package/dist/tools/docx/extractors/sections.js +61 -0
- package/dist/tools/docx/html.d.ts +17 -0
- package/dist/tools/docx/html.js +111 -0
- package/dist/tools/docx/index.d.ts +14 -0
- package/dist/tools/docx/index.js +16 -0
- package/dist/tools/docx/markdown.d.ts +84 -0
- package/dist/tools/docx/markdown.js +507 -0
- package/dist/tools/docx/operations/handlers/index.d.ts +39 -0
- package/dist/tools/docx/operations/handlers/index.js +152 -0
- package/dist/tools/docx/operations/html-manipulator.d.ts +24 -0
- package/dist/tools/docx/operations/html-manipulator.js +352 -0
- package/dist/tools/docx/operations/index.d.ts +14 -0
- package/dist/tools/docx/operations/index.js +61 -0
- package/dist/tools/docx/operations/operation-handlers.d.ts +3 -0
- package/dist/tools/docx/operations/operation-handlers.js +67 -0
- package/dist/tools/docx/operations/preprocessor.d.ts +14 -0
- package/dist/tools/docx/operations/preprocessor.js +44 -0
- package/dist/tools/docx/operations/xml-replacer.d.ts +9 -0
- package/dist/tools/docx/operations/xml-replacer.js +35 -0
- package/dist/tools/docx/operations.d.ts +13 -0
- package/dist/tools/docx/operations.js +13 -0
- package/dist/tools/docx/parsers/image-extractor.d.ts +18 -0
- package/dist/tools/docx/parsers/image-extractor.js +61 -0
- package/dist/tools/docx/parsers/index.d.ts +9 -0
- package/dist/tools/docx/parsers/index.js +9 -0
- package/dist/tools/docx/parsers/paragraph-parser.d.ts +2 -0
- package/dist/tools/docx/parsers/paragraph-parser.js +88 -0
- package/dist/tools/docx/parsers/table-parser.d.ts +9 -0
- package/dist/tools/docx/parsers/table-parser.js +72 -0
- package/dist/tools/docx/parsers/xml-parser.d.ts +25 -0
- package/dist/tools/docx/parsers/xml-parser.js +71 -0
- package/dist/tools/docx/parsers/zip-reader.d.ts +23 -0
- package/dist/tools/docx/parsers/zip-reader.js +52 -0
- package/dist/tools/docx/structure.d.ts +25 -0
- package/dist/tools/docx/structure.js +102 -0
- package/dist/tools/docx/styled-html-parser.d.ts +23 -0
- package/dist/tools/docx/styled-html-parser.js +1262 -0
- package/dist/tools/docx/types.d.ts +114 -0
- package/dist/tools/docx/types.js +8 -0
- package/dist/tools/docx/utils/escaping.d.ts +13 -0
- package/dist/tools/docx/utils/escaping.js +26 -0
- package/dist/tools/docx/utils/images.d.ts +9 -0
- package/dist/tools/docx/utils/images.js +26 -0
- package/dist/tools/docx/utils/index.d.ts +12 -0
- package/dist/tools/docx/utils/index.js +17 -0
- package/dist/tools/docx/utils/markdown.d.ts +13 -0
- package/dist/tools/docx/utils/markdown.js +32 -0
- package/dist/tools/docx/utils/paths.d.ts +15 -0
- package/dist/tools/docx/utils/paths.js +27 -0
- package/dist/tools/docx/utils/versioning.d.ts +25 -0
- package/dist/tools/docx/utils/versioning.js +55 -0
- package/dist/tools/docx/utils.d.ts +101 -0
- package/dist/tools/docx/utils.js +299 -0
- package/dist/tools/docx/validators.d.ts +13 -0
- package/dist/tools/docx/validators.js +40 -0
- package/dist/utils/capture.js +4 -4
- package/dist/utils/files/docx.d.ts +41 -0
- package/dist/utils/files/docx.js +245 -0
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Operation Preprocessor
|
|
3
|
+
*
|
|
4
|
+
* Preprocesses operations before execution (e.g., converting local image paths to base64).
|
|
5
|
+
* Follows Single Responsibility Principle — only handles operation preprocessing.
|
|
6
|
+
*
|
|
7
|
+
* @module docx/operations/preprocessor
|
|
8
|
+
*/
|
|
9
|
+
import type { DocxOperation } from '../types.js';
|
|
10
|
+
/**
|
|
11
|
+
* Resolve local image paths to base64 data URLs before operations are applied.
|
|
12
|
+
* html-to-docx cannot handle `file://` URLs — only base64 data URLs and HTTP URLs work.
|
|
13
|
+
*/
|
|
14
|
+
export declare function preprocessOperations(operations: DocxOperation[], baseDir: string): Promise<DocxOperation[]>;
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Operation Preprocessor
|
|
3
|
+
*
|
|
4
|
+
* Preprocesses operations before execution (e.g., converting local image paths to base64).
|
|
5
|
+
* Follows Single Responsibility Principle — only handles operation preprocessing.
|
|
6
|
+
*
|
|
7
|
+
* @module docx/operations/preprocessor
|
|
8
|
+
*/
|
|
9
|
+
import fs from 'fs/promises';
|
|
10
|
+
import { DocxError, DocxErrorCode } from '../errors.js';
|
|
11
|
+
import { isDataUrl, isUrl, resolveImagePath } from '../utils/paths.js';
|
|
12
|
+
import { getMimeType } from '../utils/images.js';
|
|
13
|
+
/**
|
|
14
|
+
* Resolve local image paths to base64 data URLs before operations are applied.
|
|
15
|
+
* html-to-docx cannot handle `file://` URLs — only base64 data URLs and HTTP URLs work.
|
|
16
|
+
*/
|
|
17
|
+
export async function preprocessOperations(operations, baseDir) {
|
|
18
|
+
const processed = [];
|
|
19
|
+
for (const op of operations) {
|
|
20
|
+
if (op.type !== 'insertImage') {
|
|
21
|
+
processed.push(op);
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
const imgOp = op;
|
|
25
|
+
const trimmedPath = imgOp.imagePath?.trim();
|
|
26
|
+
if (!trimmedPath || isDataUrl(trimmedPath) || isUrl(trimmedPath)) {
|
|
27
|
+
processed.push(op);
|
|
28
|
+
continue;
|
|
29
|
+
}
|
|
30
|
+
const resolvedPath = resolveImagePath(trimmedPath, baseDir);
|
|
31
|
+
try {
|
|
32
|
+
const imageBuffer = await fs.readFile(resolvedPath);
|
|
33
|
+
const mimeType = getMimeType(resolvedPath) || 'image/png';
|
|
34
|
+
processed.push({
|
|
35
|
+
...imgOp,
|
|
36
|
+
imagePath: `data:${mimeType};base64,${imageBuffer.toString('base64')}`,
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
catch (err) {
|
|
40
|
+
throw new DocxError(`Failed to read image file: ${trimmedPath} (resolved: ${resolvedPath}). ${err instanceof Error ? err.message : String(err)}`, DocxErrorCode.INVALID_IMAGE_FILE, { imagePath: trimmedPath, resolvedPath, baseDir });
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return processed;
|
|
44
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* XML Text Replacer
|
|
3
|
+
* Handles direct XML manipulation for text replacements (preserves formatting)
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Replace text directly in DOCX XML without converting to markdown
|
|
7
|
+
* This preserves all formatting, tables, styles, etc.
|
|
8
|
+
*/
|
|
9
|
+
export declare function replaceTextInDocxXml(docxPath: string, searchText: string, replaceText: string): Promise<Buffer>;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* XML Text Replacer
|
|
3
|
+
* Handles direct XML manipulation for text replacements (preserves formatting)
|
|
4
|
+
*/
|
|
5
|
+
import fs from 'fs/promises';
|
|
6
|
+
import JSZip from 'jszip';
|
|
7
|
+
import { DocxError, DocxErrorCode, withErrorContext } from '../errors.js';
|
|
8
|
+
/**
|
|
9
|
+
* Replace text directly in DOCX XML without converting to markdown
|
|
10
|
+
* This preserves all formatting, tables, styles, etc.
|
|
11
|
+
*/
|
|
12
|
+
export async function replaceTextInDocxXml(docxPath, searchText, replaceText) {
|
|
13
|
+
return withErrorContext(async () => {
|
|
14
|
+
const docxBuffer = await fs.readFile(docxPath);
|
|
15
|
+
const zip = await JSZip.loadAsync(docxBuffer);
|
|
16
|
+
const documentXml = await zip.file('word/document.xml')?.async('string');
|
|
17
|
+
if (!documentXml) {
|
|
18
|
+
throw new DocxError('Invalid DOCX file: word/document.xml not found', DocxErrorCode.INVALID_DOCX, { path: docxPath });
|
|
19
|
+
}
|
|
20
|
+
// Escape XML special characters
|
|
21
|
+
const escapeXml = (str) => str
|
|
22
|
+
.replace(/&/g, '&')
|
|
23
|
+
.replace(/</g, '<')
|
|
24
|
+
.replace(/>/g, '>')
|
|
25
|
+
.replace(/"/g, '"')
|
|
26
|
+
.replace(/'/g, ''');
|
|
27
|
+
const escapedSearch = escapeXml(searchText);
|
|
28
|
+
const escapedReplace = escapeXml(replaceText);
|
|
29
|
+
// Simple replacement in text nodes
|
|
30
|
+
const modifiedXml = documentXml.replace(new RegExp(escapedSearch, 'g'), escapedReplace);
|
|
31
|
+
zip.file('word/document.xml', modifiedXml);
|
|
32
|
+
const arrayBuffer = await zip.generateAsync({ type: 'uint8array' });
|
|
33
|
+
return Buffer.from(arrayBuffer);
|
|
34
|
+
}, DocxErrorCode.DOCX_XML_REPLACE_FAILED, { searchText, replaceText });
|
|
35
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Creation and Editing Operations
|
|
3
|
+
*
|
|
4
|
+
* Main entry point for DOCX operations. This module provides:
|
|
5
|
+
* - Creating DOCX files from markdown
|
|
6
|
+
* - Editing existing DOCX files via operations
|
|
7
|
+
*
|
|
8
|
+
* @module docx/operations
|
|
9
|
+
*/
|
|
10
|
+
import { createDocxFromMarkdown } from './builders/markdown-builder.js';
|
|
11
|
+
import { editDocxWithOperations } from './operations/index.js';
|
|
12
|
+
export type { DocxBuildOptions, DocxEditOptions, DocxOperation, DocxReplaceTextOperation, DocxAppendMarkdownOperation, DocxInsertTableOperation, DocxInsertImageOperation, } from './types.js';
|
|
13
|
+
export { createDocxFromMarkdown, editDocxWithOperations };
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Creation and Editing Operations
|
|
3
|
+
*
|
|
4
|
+
* Main entry point for DOCX operations. This module provides:
|
|
5
|
+
* - Creating DOCX files from markdown
|
|
6
|
+
* - Editing existing DOCX files via operations
|
|
7
|
+
*
|
|
8
|
+
* @module docx/operations
|
|
9
|
+
*/
|
|
10
|
+
import { createDocxFromMarkdown } from './builders/markdown-builder.js';
|
|
11
|
+
import { editDocxWithOperations } from './operations/index.js';
|
|
12
|
+
// Re-export main functions
|
|
13
|
+
export { createDocxFromMarkdown, editDocxWithOperations };
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image Extractor
|
|
3
|
+
* Utilities for extracting and handling images from DOCX files
|
|
4
|
+
*/
|
|
5
|
+
import type { ZipArchive } from './zip-reader.js';
|
|
6
|
+
import type { DocxRelationship } from '../types.js';
|
|
7
|
+
/**
|
|
8
|
+
* Get MIME type from file extension or target path
|
|
9
|
+
*/
|
|
10
|
+
export declare function getMimeTypeForTarget(target: string): string;
|
|
11
|
+
/**
|
|
12
|
+
* Extract all images from a DOCX ZIP archive
|
|
13
|
+
*/
|
|
14
|
+
export declare function extractImagesFromZip(zip: ZipArchive, relMap: Map<string, DocxRelationship>): Map<string, Buffer>;
|
|
15
|
+
/**
|
|
16
|
+
* Resolve image relationship ID from drawing or pict element
|
|
17
|
+
*/
|
|
18
|
+
export declare function resolveImageRelId(element: Element): string | null;
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image Extractor
|
|
3
|
+
* Utilities for extracting and handling images from DOCX files
|
|
4
|
+
*/
|
|
5
|
+
import path from 'path';
|
|
6
|
+
import { readZipFileBuffer } from './zip-reader.js';
|
|
7
|
+
/**
|
|
8
|
+
* Get MIME type from file extension or target path
|
|
9
|
+
*/
|
|
10
|
+
export function getMimeTypeForTarget(target) {
|
|
11
|
+
const ext = path.extname(target).toLowerCase();
|
|
12
|
+
const mimeTypes = {
|
|
13
|
+
'.png': 'image/png',
|
|
14
|
+
'.jpg': 'image/jpeg',
|
|
15
|
+
'.jpeg': 'image/jpeg',
|
|
16
|
+
'.gif': 'image/gif',
|
|
17
|
+
'.bmp': 'image/bmp',
|
|
18
|
+
'.webp': 'image/webp',
|
|
19
|
+
'.svg': 'image/svg+xml',
|
|
20
|
+
};
|
|
21
|
+
return mimeTypes[ext] || 'application/octet-stream';
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Extract all images from a DOCX ZIP archive
|
|
25
|
+
*/
|
|
26
|
+
export function extractImagesFromZip(zip, relMap) {
|
|
27
|
+
const images = new Map();
|
|
28
|
+
for (const [relId, rel] of relMap.entries()) {
|
|
29
|
+
if (!rel.type.includes('/image'))
|
|
30
|
+
continue;
|
|
31
|
+
const targetPath = rel.target.startsWith('word/')
|
|
32
|
+
? rel.target
|
|
33
|
+
: `word/${rel.target.replace(/^\/?/, '')}`;
|
|
34
|
+
const imgBuffer = readZipFileBuffer(zip, targetPath);
|
|
35
|
+
if (imgBuffer) {
|
|
36
|
+
images.set(relId, imgBuffer);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return images;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Resolve image relationship ID from drawing or pict element
|
|
43
|
+
*/
|
|
44
|
+
export function resolveImageRelId(element) {
|
|
45
|
+
// Try drawing element first (newer format)
|
|
46
|
+
const blips = element.getElementsByTagName('a:blip');
|
|
47
|
+
for (let i = 0; i < blips.length; i++) {
|
|
48
|
+
const blip = blips[i];
|
|
49
|
+
const relId = blip.getAttribute('r:embed') || blip.getAttribute('embed');
|
|
50
|
+
if (relId)
|
|
51
|
+
return relId;
|
|
52
|
+
}
|
|
53
|
+
// Try pict element (older format)
|
|
54
|
+
const imagedata = element.getElementsByTagName('v:imagedata');
|
|
55
|
+
for (let i = 0; i < imagedata.length; i++) {
|
|
56
|
+
const relId = imagedata[i].getAttribute('r:id') || imagedata[i].getAttribute('id');
|
|
57
|
+
if (relId)
|
|
58
|
+
return relId;
|
|
59
|
+
}
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { createRequire } from 'module';
|
|
2
|
+
const require = createRequire(import.meta.url);
|
|
3
|
+
// @ts-ignore
|
|
4
|
+
import * as docx from 'docx';
|
|
5
|
+
const { Paragraph, TextRun, ImageRun, HeadingLevel } = docx;
|
|
6
|
+
import { getElementChildren } from './xml-parser.js';
|
|
7
|
+
import { resolveImageRelId } from './image-extractor.js';
|
|
8
|
+
export function parseParagraphElement(paragraph, images, headingLevel) {
|
|
9
|
+
const runs = extractRunsFromParagraph(paragraph, images);
|
|
10
|
+
if (runs.length === 0) {
|
|
11
|
+
return null;
|
|
12
|
+
}
|
|
13
|
+
return new Paragraph({
|
|
14
|
+
children: runs,
|
|
15
|
+
heading: headingLevel ? getDocxHeadingLevel(headingLevel) : undefined,
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
function extractRunsFromParagraph(paragraph, images) {
|
|
19
|
+
const runs = [];
|
|
20
|
+
const children = getElementChildren(paragraph);
|
|
21
|
+
for (const child of children) {
|
|
22
|
+
const nodeName = child.nodeName;
|
|
23
|
+
if (nodeName === 'w:r') {
|
|
24
|
+
const textRuns = extractTextRun(child, images);
|
|
25
|
+
runs.push(...textRuns);
|
|
26
|
+
}
|
|
27
|
+
else if (nodeName === 'w:hyperlink') {
|
|
28
|
+
const linkRuns = child.getElementsByTagName('w:r');
|
|
29
|
+
for (let i = 0; i < linkRuns.length; i++) {
|
|
30
|
+
const textRuns = extractTextRun(linkRuns[i], images);
|
|
31
|
+
runs.push(...textRuns);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return runs;
|
|
36
|
+
}
|
|
37
|
+
function extractTextRun(run, images) {
|
|
38
|
+
const runs = [];
|
|
39
|
+
const rPr = run.getElementsByTagName('w:rPr')[0];
|
|
40
|
+
const isBold = rPr?.getElementsByTagName('w:b').length > 0;
|
|
41
|
+
const isItalic = rPr?.getElementsByTagName('w:i').length > 0;
|
|
42
|
+
const children = getElementChildren(run);
|
|
43
|
+
for (const child of children) {
|
|
44
|
+
const nodeName = child.nodeName;
|
|
45
|
+
if (nodeName === 'w:t') {
|
|
46
|
+
const text = child.textContent || '';
|
|
47
|
+
if (text) {
|
|
48
|
+
runs.push(new TextRun({
|
|
49
|
+
text,
|
|
50
|
+
bold: isBold,
|
|
51
|
+
italics: isItalic,
|
|
52
|
+
}));
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
else if (nodeName === 'w:tab') {
|
|
56
|
+
runs.push(new TextRun({ text: '\t' }));
|
|
57
|
+
}
|
|
58
|
+
else if (nodeName === 'w:br') {
|
|
59
|
+
runs.push(new TextRun({ text: '\n', break: 1 }));
|
|
60
|
+
}
|
|
61
|
+
else if (nodeName === 'w:drawing' || nodeName === 'w:pict') {
|
|
62
|
+
const relId = resolveImageRelId(child);
|
|
63
|
+
if (relId && images.has(relId)) {
|
|
64
|
+
try {
|
|
65
|
+
runs.push(new ImageRun({
|
|
66
|
+
data: images.get(relId),
|
|
67
|
+
transformation: { width: 600, height: 400 },
|
|
68
|
+
}));
|
|
69
|
+
}
|
|
70
|
+
catch (err) {
|
|
71
|
+
// Skip invalid images
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return runs;
|
|
77
|
+
}
|
|
78
|
+
function getDocxHeadingLevel(level) {
|
|
79
|
+
const levelMap = {
|
|
80
|
+
1: HeadingLevel.HEADING_1,
|
|
81
|
+
2: HeadingLevel.HEADING_2,
|
|
82
|
+
3: HeadingLevel.HEADING_3,
|
|
83
|
+
4: HeadingLevel.HEADING_4,
|
|
84
|
+
5: HeadingLevel.HEADING_5,
|
|
85
|
+
6: HeadingLevel.HEADING_6,
|
|
86
|
+
};
|
|
87
|
+
return levelMap[level] ?? HeadingLevel.HEADING_1;
|
|
88
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Table Parser
|
|
3
|
+
* Parses DOCX table elements to DOCX library Table objects
|
|
4
|
+
*/
|
|
5
|
+
import type { DocxTable } from '../types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Parse a table element to DOCX Table
|
|
8
|
+
*/
|
|
9
|
+
export declare function parseTableElement(table: Element, images: Map<string, Buffer>): DocxTable | null;
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Table Parser
|
|
3
|
+
* Parses DOCX table elements to DOCX library Table objects
|
|
4
|
+
*/
|
|
5
|
+
import { createRequire } from 'module';
|
|
6
|
+
const require = createRequire(import.meta.url);
|
|
7
|
+
// @ts-ignore
|
|
8
|
+
import * as docx from 'docx';
|
|
9
|
+
const { Table, TableRow, TableCell, Paragraph, WidthType } = docx;
|
|
10
|
+
import { parseParagraphElement } from './paragraph-parser.js';
|
|
11
|
+
/**
|
|
12
|
+
* Parse a table element to DOCX Table
|
|
13
|
+
*/
|
|
14
|
+
export function parseTableElement(table, images) {
|
|
15
|
+
const rows = [];
|
|
16
|
+
const rowNodes = table.getElementsByTagName('w:tr');
|
|
17
|
+
for (let i = 0; i < rowNodes.length; i++) {
|
|
18
|
+
const rowNode = rowNodes[i];
|
|
19
|
+
const row = parseTableRow(rowNode, images);
|
|
20
|
+
if (row) {
|
|
21
|
+
rows.push(row);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
if (rows.length === 0) {
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
return new Table({
|
|
28
|
+
width: { size: 100, type: WidthType.PERCENTAGE },
|
|
29
|
+
rows,
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Parse a table row element
|
|
34
|
+
*/
|
|
35
|
+
function parseTableRow(rowNode, images) {
|
|
36
|
+
const cells = [];
|
|
37
|
+
const cellNodes = rowNode.getElementsByTagName('w:tc');
|
|
38
|
+
for (let j = 0; j < cellNodes.length; j++) {
|
|
39
|
+
const cellNode = cellNodes[j];
|
|
40
|
+
const cell = parseTableCell(cellNode, images);
|
|
41
|
+
if (cell) {
|
|
42
|
+
cells.push(cell);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
if (cells.length === 0) {
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
return new TableRow({
|
|
49
|
+
children: cells,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Parse a table cell element
|
|
54
|
+
*/
|
|
55
|
+
function parseTableCell(cellNode, images) {
|
|
56
|
+
const cellParagraphs = [];
|
|
57
|
+
const paragraphNodes = cellNode.getElementsByTagName('w:p');
|
|
58
|
+
for (let k = 0; k < paragraphNodes.length; k++) {
|
|
59
|
+
const paraNode = paragraphNodes[k];
|
|
60
|
+
const para = parseParagraphElement(paraNode, images, null);
|
|
61
|
+
if (para) {
|
|
62
|
+
cellParagraphs.push(para);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
// Ensure at least one paragraph per cell
|
|
66
|
+
if (cellParagraphs.length === 0) {
|
|
67
|
+
cellParagraphs.push(new Paragraph({ text: '' }));
|
|
68
|
+
}
|
|
69
|
+
return new TableCell({
|
|
70
|
+
children: cellParagraphs,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* XML Parser Utilities
|
|
3
|
+
* Helper functions for parsing DOCX XML content
|
|
4
|
+
*/
|
|
5
|
+
import type { DocxRelationship } from '../types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Get all element children of a node
|
|
8
|
+
*/
|
|
9
|
+
export declare function getElementChildren(node: Node): Element[];
|
|
10
|
+
/**
|
|
11
|
+
* Get attribute value, checking both direct and namespaced attributes
|
|
12
|
+
*/
|
|
13
|
+
export declare function getAttributeValue(node: Element, name: string): string | null;
|
|
14
|
+
/**
|
|
15
|
+
* Parse XML string to Document
|
|
16
|
+
*/
|
|
17
|
+
export declare function parseXml(xml: string): Document;
|
|
18
|
+
/**
|
|
19
|
+
* Extract relationship map from relationships XML
|
|
20
|
+
*/
|
|
21
|
+
export declare function extractRelationshipMap(relsXml: string | null): Map<string, DocxRelationship>;
|
|
22
|
+
/**
|
|
23
|
+
* Get heading level from paragraph element
|
|
24
|
+
*/
|
|
25
|
+
export declare function getHeadingLevelFromParagraph(paragraph: Element): number | null;
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* XML Parser Utilities
|
|
3
|
+
* Helper functions for parsing DOCX XML content
|
|
4
|
+
*/
|
|
5
|
+
import { createRequire } from 'module';
|
|
6
|
+
const require = createRequire(import.meta.url);
|
|
7
|
+
const { DOMParser } = require('@xmldom/xmldom');
|
|
8
|
+
/**
|
|
9
|
+
* Get all element children of a node
|
|
10
|
+
*/
|
|
11
|
+
export function getElementChildren(node) {
|
|
12
|
+
const children = [];
|
|
13
|
+
for (let i = 0; i < node.childNodes.length; i++) {
|
|
14
|
+
const child = node.childNodes[i];
|
|
15
|
+
if (child.nodeType === 1) { // ELEMENT_NODE
|
|
16
|
+
children.push(child);
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return children;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Get attribute value, checking both direct and namespaced attributes
|
|
23
|
+
*/
|
|
24
|
+
export function getAttributeValue(node, name) {
|
|
25
|
+
return node.getAttribute(name) || node.getAttribute(`w:${name}`) || null;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Parse XML string to Document
|
|
29
|
+
*/
|
|
30
|
+
export function parseXml(xml) {
|
|
31
|
+
const parser = new DOMParser();
|
|
32
|
+
return parser.parseFromString(xml, 'application/xml');
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Extract relationship map from relationships XML
|
|
36
|
+
*/
|
|
37
|
+
export function extractRelationshipMap(relsXml) {
|
|
38
|
+
const relMap = new Map();
|
|
39
|
+
if (!relsXml)
|
|
40
|
+
return relMap;
|
|
41
|
+
const relDoc = parseXml(relsXml);
|
|
42
|
+
const rels = relDoc.getElementsByTagName('Relationship');
|
|
43
|
+
for (let i = 0; i < rels.length; i++) {
|
|
44
|
+
const rel = rels[i];
|
|
45
|
+
const id = rel.getAttribute('Id');
|
|
46
|
+
const type = rel.getAttribute('Type') || '';
|
|
47
|
+
const target = rel.getAttribute('Target') || '';
|
|
48
|
+
if (id && target) {
|
|
49
|
+
relMap.set(id, { target, type });
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return relMap;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Get heading level from paragraph element
|
|
56
|
+
*/
|
|
57
|
+
export function getHeadingLevelFromParagraph(paragraph) {
|
|
58
|
+
const pPr = paragraph.getElementsByTagName('w:pPr')[0];
|
|
59
|
+
if (!pPr)
|
|
60
|
+
return null;
|
|
61
|
+
const pStyle = pPr.getElementsByTagName('w:pStyle')[0];
|
|
62
|
+
if (!pStyle)
|
|
63
|
+
return null;
|
|
64
|
+
const styleVal = getAttributeValue(pStyle, 'val');
|
|
65
|
+
if (!styleVal)
|
|
66
|
+
return null;
|
|
67
|
+
const match = styleVal.match(/heading\s*([1-6])/i);
|
|
68
|
+
if (!match)
|
|
69
|
+
return null;
|
|
70
|
+
return Number(match[1]);
|
|
71
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ZIP File Reader
|
|
3
|
+
* Utilities for reading files from DOCX ZIP archives
|
|
4
|
+
*/
|
|
5
|
+
declare const PizZip: any;
|
|
6
|
+
export type ZipArchive = InstanceType<typeof PizZip>;
|
|
7
|
+
/**
|
|
8
|
+
* Create a ZIP archive from a buffer
|
|
9
|
+
*/
|
|
10
|
+
export declare function createZipFromBuffer(buffer: Buffer): ZipArchive;
|
|
11
|
+
/**
|
|
12
|
+
* Read a text file from a ZIP archive
|
|
13
|
+
*/
|
|
14
|
+
export declare function readZipFileText(zip: ZipArchive, filePath: string): string | null;
|
|
15
|
+
/**
|
|
16
|
+
* Read a binary file from a ZIP archive as Buffer
|
|
17
|
+
*/
|
|
18
|
+
export declare function readZipFileBuffer(zip: ZipArchive, filePath: string): Buffer | null;
|
|
19
|
+
/**
|
|
20
|
+
* Check if a file exists in the ZIP archive
|
|
21
|
+
*/
|
|
22
|
+
export declare function zipFileExists(zip: ZipArchive, filePath: string): boolean;
|
|
23
|
+
export {};
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ZIP File Reader
|
|
3
|
+
* Utilities for reading files from DOCX ZIP archives
|
|
4
|
+
*/
|
|
5
|
+
import { createRequire } from 'module';
|
|
6
|
+
const require = createRequire(import.meta.url);
|
|
7
|
+
const PizZip = require('pizzip');
|
|
8
|
+
/**
|
|
9
|
+
* Create a ZIP archive from a buffer
|
|
10
|
+
*/
|
|
11
|
+
export function createZipFromBuffer(buffer) {
|
|
12
|
+
return new PizZip(buffer);
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Read a text file from a ZIP archive
|
|
16
|
+
*/
|
|
17
|
+
export function readZipFileText(zip, filePath) {
|
|
18
|
+
const file = zip.file(filePath);
|
|
19
|
+
if (!file)
|
|
20
|
+
return null;
|
|
21
|
+
if (typeof file.asText === 'function') {
|
|
22
|
+
return file.asText();
|
|
23
|
+
}
|
|
24
|
+
if (typeof file.asBinary === 'function') {
|
|
25
|
+
return Buffer.from(file.asBinary(), 'binary').toString('utf8');
|
|
26
|
+
}
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Read a binary file from a ZIP archive as Buffer
|
|
31
|
+
*/
|
|
32
|
+
export function readZipFileBuffer(zip, filePath) {
|
|
33
|
+
const file = zip.file(filePath);
|
|
34
|
+
if (!file)
|
|
35
|
+
return null;
|
|
36
|
+
if (typeof file.asUint8Array === 'function') {
|
|
37
|
+
return Buffer.from(file.asUint8Array());
|
|
38
|
+
}
|
|
39
|
+
if (typeof file.asNodeBuffer === 'function') {
|
|
40
|
+
return file.asNodeBuffer();
|
|
41
|
+
}
|
|
42
|
+
if (typeof file.asBinary === 'function') {
|
|
43
|
+
return Buffer.from(file.asBinary(), 'binary');
|
|
44
|
+
}
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Check if a file exists in the ZIP archive
|
|
49
|
+
*/
|
|
50
|
+
export function zipFileExists(zip, filePath) {
|
|
51
|
+
return zip.file(filePath) !== null;
|
|
52
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Structure Parser and Builder
|
|
3
|
+
*
|
|
4
|
+
* Provides structure-preserving parsing and building of DOCX files.
|
|
5
|
+
* This approach maintains tables, images, and formatting at the DOCX element level,
|
|
6
|
+
* avoiding lossy markdown round-trips.
|
|
7
|
+
*/
|
|
8
|
+
import type { DocxStructure, DocxElement } from './types.js';
|
|
9
|
+
export type { DocxElement, DocxStructure };
|
|
10
|
+
/**
|
|
11
|
+
* Parse a DOCX file into structured elements that can be manipulated and rebuilt
|
|
12
|
+
*
|
|
13
|
+
* @param buffer - DOCX file buffer
|
|
14
|
+
* @returns Structured representation of the DOCX document
|
|
15
|
+
* @throws {DocxError} If the DOCX file is invalid or cannot be parsed
|
|
16
|
+
*/
|
|
17
|
+
export declare function parseDocxStructure(buffer: Buffer): Promise<DocxStructure>;
|
|
18
|
+
/**
|
|
19
|
+
* Build a DOCX file from structured elements
|
|
20
|
+
*
|
|
21
|
+
* @param structure - Structured DOCX representation
|
|
22
|
+
* @returns Buffer containing the DOCX file
|
|
23
|
+
* @throws {DocxError} If document building fails
|
|
24
|
+
*/
|
|
25
|
+
export declare function buildDocxFromStructure(structure: DocxStructure): Promise<Buffer>;
|