@wonderwhy-er/desktop-commander 0.2.33 → 0.2.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/remote-device/scripts/blocking-offline-update.js +64 -0
- package/dist/tools/docx/builders/html-builder.d.ts +17 -0
- package/dist/tools/docx/builders/html-builder.js +92 -0
- package/dist/tools/docx/builders/index.d.ts +5 -0
- package/dist/tools/docx/builders/index.js +5 -0
- package/dist/tools/docx/builders/markdown-builder.d.ts +2 -0
- package/dist/tools/docx/builders/markdown-builder.js +260 -0
- package/dist/tools/docx/constants.d.ts +36 -0
- package/dist/tools/docx/constants.js +57 -0
- package/dist/tools/docx/converters/markdown-to-html.d.ts +17 -0
- package/dist/tools/docx/converters/markdown-to-html.js +111 -0
- package/dist/tools/docx/errors.d.ts +28 -0
- package/dist/tools/docx/errors.js +48 -0
- package/dist/tools/docx/extractors/images.d.ts +14 -0
- package/dist/tools/docx/extractors/images.js +40 -0
- package/dist/tools/docx/extractors/metadata.d.ts +14 -0
- package/dist/tools/docx/extractors/metadata.js +64 -0
- package/dist/tools/docx/extractors/sections.d.ts +14 -0
- package/dist/tools/docx/extractors/sections.js +61 -0
- package/dist/tools/docx/html.d.ts +17 -0
- package/dist/tools/docx/html.js +111 -0
- package/dist/tools/docx/index.d.ts +14 -0
- package/dist/tools/docx/index.js +16 -0
- package/dist/tools/docx/markdown.d.ts +84 -0
- package/dist/tools/docx/markdown.js +507 -0
- package/dist/tools/docx/operations/handlers/index.d.ts +39 -0
- package/dist/tools/docx/operations/handlers/index.js +152 -0
- package/dist/tools/docx/operations/html-manipulator.d.ts +24 -0
- package/dist/tools/docx/operations/html-manipulator.js +352 -0
- package/dist/tools/docx/operations/index.d.ts +14 -0
- package/dist/tools/docx/operations/index.js +61 -0
- package/dist/tools/docx/operations/operation-handlers.d.ts +3 -0
- package/dist/tools/docx/operations/operation-handlers.js +67 -0
- package/dist/tools/docx/operations/preprocessor.d.ts +14 -0
- package/dist/tools/docx/operations/preprocessor.js +44 -0
- package/dist/tools/docx/operations/xml-replacer.d.ts +9 -0
- package/dist/tools/docx/operations/xml-replacer.js +35 -0
- package/dist/tools/docx/operations.d.ts +13 -0
- package/dist/tools/docx/operations.js +13 -0
- package/dist/tools/docx/parsers/image-extractor.d.ts +18 -0
- package/dist/tools/docx/parsers/image-extractor.js +61 -0
- package/dist/tools/docx/parsers/index.d.ts +9 -0
- package/dist/tools/docx/parsers/index.js +9 -0
- package/dist/tools/docx/parsers/paragraph-parser.d.ts +2 -0
- package/dist/tools/docx/parsers/paragraph-parser.js +88 -0
- package/dist/tools/docx/parsers/table-parser.d.ts +9 -0
- package/dist/tools/docx/parsers/table-parser.js +72 -0
- package/dist/tools/docx/parsers/xml-parser.d.ts +25 -0
- package/dist/tools/docx/parsers/xml-parser.js +71 -0
- package/dist/tools/docx/parsers/zip-reader.d.ts +23 -0
- package/dist/tools/docx/parsers/zip-reader.js +52 -0
- package/dist/tools/docx/structure.d.ts +25 -0
- package/dist/tools/docx/structure.js +102 -0
- package/dist/tools/docx/styled-html-parser.d.ts +23 -0
- package/dist/tools/docx/styled-html-parser.js +1262 -0
- package/dist/tools/docx/types.d.ts +114 -0
- package/dist/tools/docx/types.js +8 -0
- package/dist/tools/docx/utils/escaping.d.ts +13 -0
- package/dist/tools/docx/utils/escaping.js +26 -0
- package/dist/tools/docx/utils/images.d.ts +9 -0
- package/dist/tools/docx/utils/images.js +26 -0
- package/dist/tools/docx/utils/index.d.ts +12 -0
- package/dist/tools/docx/utils/index.js +17 -0
- package/dist/tools/docx/utils/markdown.d.ts +13 -0
- package/dist/tools/docx/utils/markdown.js +32 -0
- package/dist/tools/docx/utils/paths.d.ts +15 -0
- package/dist/tools/docx/utils/paths.js +27 -0
- package/dist/tools/docx/utils/versioning.d.ts +25 -0
- package/dist/tools/docx/utils/versioning.js +55 -0
- package/dist/tools/docx/utils.d.ts +101 -0
- package/dist/tools/docx/utils.js +299 -0
- package/dist/tools/docx/validators.d.ts +13 -0
- package/dist/tools/docx/validators.js +40 -0
- package/dist/utils/capture.js +4 -4
- package/dist/utils/files/docx.d.ts +41 -0
- package/dist/utils/files/docx.js +245 -0
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +2 -2
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Operation Handlers
|
|
3
|
+
*
|
|
4
|
+
* Pure functions: HTML in → modified HTML out.
|
|
5
|
+
* Each handler corresponds to one DocxOperation type.
|
|
6
|
+
*
|
|
7
|
+
* @module docx/operations/handlers
|
|
8
|
+
*/
|
|
9
|
+
import { DocxError, DocxErrorCode } from '../../errors.js';
|
|
10
|
+
import { markdownToHtml, markdownTableToHtml, buildMarkdownTableFromRows, } from '../../converters/markdown-to-html.js';
|
|
11
|
+
import { appendHtml, insertHtml, replaceHtml, updateHtml } from '../html-manipulator.js';
|
|
12
|
+
import { escapeHtmlAttribute, escapeRegExp } from '../../utils/escaping.js';
|
|
13
|
+
import { isUrl, isDataUrl, resolveImagePath } from '../../utils/paths.js';
|
|
14
|
+
import { validateImageDimensions } from '../../validators.js';
|
|
15
|
+
// ─── Text Operations ─────────────────────────────────────────────────────────
|
|
16
|
+
/**
|
|
17
|
+
* Replace text in HTML while protecting base64 data URLs and other attribute values.
|
|
18
|
+
*
|
|
19
|
+
* Strategy: Temporarily extract `<img>` tags (which contain huge base64 data URLs)
|
|
20
|
+
* and replace them with placeholders, perform the text replacement,
|
|
21
|
+
* then restore the original `<img>` tags. This prevents the regex from
|
|
22
|
+
* accidentally matching / corrupting base64 image data.
|
|
23
|
+
*/
|
|
24
|
+
export function handleReplaceText(html, search, replace, matchCase = false, global = true) {
|
|
25
|
+
if (!search?.trim())
|
|
26
|
+
return html;
|
|
27
|
+
// Extract and protect all <img> tags from text replacement
|
|
28
|
+
const imgPlaceholders = [];
|
|
29
|
+
const protectedHtml = html.replace(/<img\s[^>]*>/gi, (match) => {
|
|
30
|
+
imgPlaceholders.push(match);
|
|
31
|
+
return `\x00IMG_PLACEHOLDER_${imgPlaceholders.length - 1}\x00`;
|
|
32
|
+
});
|
|
33
|
+
// Perform the text replacement on the protected HTML
|
|
34
|
+
const flags = matchCase ? (global ? 'g' : '') : global ? 'gi' : 'i';
|
|
35
|
+
let result = protectedHtml.replace(new RegExp(escapeRegExp(search), flags), replace);
|
|
36
|
+
// Restore the original <img> tags
|
|
37
|
+
for (let i = 0; i < imgPlaceholders.length; i++) {
|
|
38
|
+
result = result.replace(`\x00IMG_PLACEHOLDER_${i}\x00`, imgPlaceholders[i]);
|
|
39
|
+
}
|
|
40
|
+
return result;
|
|
41
|
+
}
|
|
42
|
+
// ─── HTML / Markdown Append & Insert ─────────────────────────────────────────
|
|
43
|
+
export function handleAppendMarkdown(html, markdown) {
|
|
44
|
+
if (!markdown?.trim())
|
|
45
|
+
return html;
|
|
46
|
+
const converted = markdownToHtml(markdown);
|
|
47
|
+
return converted ? appendHtml(html, converted) : html;
|
|
48
|
+
}
|
|
49
|
+
export function handleAppendHtml(html, content) {
|
|
50
|
+
return appendHtml(html, content);
|
|
51
|
+
}
|
|
52
|
+
export function handleInsertHtml(html, content, selector, position = 'after') {
|
|
53
|
+
return insertHtml(html, content, selector, position);
|
|
54
|
+
}
|
|
55
|
+
export function handleReplaceHtml(html, selector, content, replaceAll = false) {
|
|
56
|
+
return replaceHtml(html, selector, content, replaceAll);
|
|
57
|
+
}
|
|
58
|
+
export function handleUpdateHtml(html, selector, content, attributes, updateAll = false) {
|
|
59
|
+
return updateHtml(html, selector, content, attributes, updateAll);
|
|
60
|
+
}
|
|
61
|
+
// ─── Table Insertion ─────────────────────────────────────────────────────────
|
|
62
|
+
/**
|
|
63
|
+
* Insert a table from markdown or a rows array.
|
|
64
|
+
* If a selector is given, the table is placed relative to that element;
|
|
65
|
+
* otherwise it is appended to the end of the document.
|
|
66
|
+
*/
|
|
67
|
+
export function handleInsertTable(html, markdownTable, rows, selector, position = 'after') {
|
|
68
|
+
let tableHtml = '';
|
|
69
|
+
if (markdownTable?.trim()) {
|
|
70
|
+
tableHtml = markdownTableToHtml(markdownTable);
|
|
71
|
+
}
|
|
72
|
+
else if (rows?.length) {
|
|
73
|
+
const md = buildMarkdownTableFromRows(rows);
|
|
74
|
+
if (md)
|
|
75
|
+
tableHtml = markdownTableToHtml(md);
|
|
76
|
+
}
|
|
77
|
+
if (!tableHtml)
|
|
78
|
+
return html;
|
|
79
|
+
return selector?.trim()
|
|
80
|
+
? insertHtml(html, tableHtml, selector, position)
|
|
81
|
+
: appendHtml(html, tableHtml);
|
|
82
|
+
}
|
|
83
|
+
// ─── Image Insertion ─────────────────────────────────────────────────────────
|
|
84
|
+
/**
|
|
85
|
+
* Insert an image into the document.
|
|
86
|
+
*
|
|
87
|
+
* By the time this handler runs, local file paths should already be converted
|
|
88
|
+
* to base64 data URLs by `preprocessOperations()` in `operations/index.ts`.
|
|
89
|
+
* html-to-docx only supports base64 data URLs and HTTP URLs.
|
|
90
|
+
*/
|
|
91
|
+
export function handleInsertImage(html, imagePath, altText = '', width, height, baseDir, selector, position = 'after') {
|
|
92
|
+
if (!imagePath?.trim())
|
|
93
|
+
return html;
|
|
94
|
+
if (width !== undefined || height !== undefined)
|
|
95
|
+
validateImageDimensions(width, height);
|
|
96
|
+
const trimmedPath = imagePath.trim();
|
|
97
|
+
let imageSrc;
|
|
98
|
+
if (isDataUrl(trimmedPath) || isUrl(trimmedPath)) {
|
|
99
|
+
imageSrc = trimmedPath;
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
// Fallback: should not normally occur after preprocessing
|
|
103
|
+
const resolved = resolveImagePath(trimmedPath, baseDir).replace(/\\/g, '/');
|
|
104
|
+
imageSrc = resolved.startsWith('/') ? `file://${resolved}` : `file:///${resolved}`;
|
|
105
|
+
}
|
|
106
|
+
// Build img attributes
|
|
107
|
+
const attrs = [`src="${escapeHtmlAttribute(imageSrc)}"`];
|
|
108
|
+
if (altText?.trim())
|
|
109
|
+
attrs.push(`alt="${escapeHtmlAttribute(altText.trim())}"`);
|
|
110
|
+
// Add dimensions as both attributes and inline style (for compatibility)
|
|
111
|
+
const styles = [];
|
|
112
|
+
if (width && width > 0) {
|
|
113
|
+
attrs.push(`width="${width}"`);
|
|
114
|
+
styles.push(`width:${width}px`);
|
|
115
|
+
}
|
|
116
|
+
if (height && height > 0) {
|
|
117
|
+
attrs.push(`height="${height}"`);
|
|
118
|
+
styles.push(`height:${height}px`);
|
|
119
|
+
}
|
|
120
|
+
if (styles.length > 0)
|
|
121
|
+
attrs.push(`style="${styles.join('; ')}"`);
|
|
122
|
+
const imgTag = `<p><img ${attrs.join(' ')} /></p>`;
|
|
123
|
+
return selector?.trim()
|
|
124
|
+
? insertHtml(html, imgTag, selector, position)
|
|
125
|
+
: appendHtml(html, imgTag);
|
|
126
|
+
}
|
|
127
|
+
// ─── Operation Router ────────────────────────────────────────────────────────
|
|
128
|
+
/** Apply a single DocxOperation to HTML content, routing to the correct handler. */
|
|
129
|
+
export function applyOperation(html, operation, baseDir) {
|
|
130
|
+
switch (operation.type) {
|
|
131
|
+
case 'replaceText':
|
|
132
|
+
return handleReplaceText(html, operation.search, operation.replace, operation.matchCase ?? false, operation.global ?? true);
|
|
133
|
+
case 'appendMarkdown':
|
|
134
|
+
return handleAppendMarkdown(html, operation.markdown);
|
|
135
|
+
case 'appendHtml':
|
|
136
|
+
return handleAppendHtml(html, operation.html);
|
|
137
|
+
case 'insertHtml':
|
|
138
|
+
return handleInsertHtml(html, operation.html, operation.selector, operation.position ?? 'after');
|
|
139
|
+
case 'replaceHtml':
|
|
140
|
+
return handleReplaceHtml(html, operation.selector, operation.html, operation.replaceAll ?? false);
|
|
141
|
+
case 'updateHtml':
|
|
142
|
+
return handleUpdateHtml(html, operation.selector, operation.html, operation.attributes, operation.updateAll ?? false);
|
|
143
|
+
case 'insertTable':
|
|
144
|
+
return handleInsertTable(html, operation.markdownTable, operation.rows, operation.selector, operation.position ?? 'after');
|
|
145
|
+
case 'insertImage':
|
|
146
|
+
return handleInsertImage(html, operation.imagePath, operation.altText, operation.width, operation.height, baseDir, operation.selector, operation.position ?? 'after');
|
|
147
|
+
default: {
|
|
148
|
+
const unknownOp = operation;
|
|
149
|
+
throw new DocxError(`Unknown operation type: ${unknownOp.type}`, DocxErrorCode.UNKNOWN_OPERATION, { operation: unknownOp });
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML DOM Manipulation
|
|
3
|
+
*
|
|
4
|
+
* DOM-based insert / append / replace / update for HTML content.
|
|
5
|
+
* Uses @xmldom/xmldom as the parser (not a browser DOMParser).
|
|
6
|
+
*
|
|
7
|
+
* IMPORTANT: All public functions use `Base64Guard` to protect base64 data URLs
|
|
8
|
+
* from corruption during xmldom parse/serialize cycles. Without this, images
|
|
9
|
+
* (which are embedded as long `data:image/...;base64,...` strings in `src` attributes)
|
|
10
|
+
* can be lost or mangled by the XML serializer.
|
|
11
|
+
*
|
|
12
|
+
* @module docx/operations/html-manipulator
|
|
13
|
+
*/
|
|
14
|
+
/** Append HTML content to the end of the document body. */
|
|
15
|
+
export declare function appendHtml(html: string, content: string): string;
|
|
16
|
+
/**
|
|
17
|
+
* Insert HTML content at a specific position relative to a selector target.
|
|
18
|
+
* If no selector is given, appends to the root element.
|
|
19
|
+
*/
|
|
20
|
+
export declare function insertHtml(html: string, content: string, selector?: string, position?: 'before' | 'after' | 'inside'): string;
|
|
21
|
+
/** Replace matched elements with new HTML content. */
|
|
22
|
+
export declare function replaceHtml(html: string, selector: string, content: string, replaceAll?: boolean): string;
|
|
23
|
+
/** Update matched elements' content and/or attributes. */
|
|
24
|
+
export declare function updateHtml(html: string, selector: string, content?: string, attributes?: Record<string, string>, updateAll?: boolean): string;
|
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML DOM Manipulation
|
|
3
|
+
*
|
|
4
|
+
* DOM-based insert / append / replace / update for HTML content.
|
|
5
|
+
* Uses @xmldom/xmldom as the parser (not a browser DOMParser).
|
|
6
|
+
*
|
|
7
|
+
* IMPORTANT: All public functions use `Base64Guard` to protect base64 data URLs
|
|
8
|
+
* from corruption during xmldom parse/serialize cycles. Without this, images
|
|
9
|
+
* (which are embedded as long `data:image/...;base64,...` strings in `src` attributes)
|
|
10
|
+
* can be lost or mangled by the XML serializer.
|
|
11
|
+
*
|
|
12
|
+
* @module docx/operations/html-manipulator
|
|
13
|
+
*/
|
|
14
|
+
import { createRequire } from 'module';
|
|
15
|
+
import { DocxError, DocxErrorCode } from '../errors.js';
|
|
16
|
+
const require = createRequire(import.meta.url);
|
|
17
|
+
const { DOMParser, XMLSerializer } = require('@xmldom/xmldom');
|
|
18
|
+
// ─── Selector Patterns (pre-compiled) ────────────────────────────────────────
|
|
19
|
+
const RE_CONTAINS = /^([a-zA-Z][a-zA-Z0-9]*)?:contains\((.+)\)$/i;
|
|
20
|
+
const RE_NTH_OF_TYPE = /^([a-zA-Z][a-zA-Z0-9]*):nth-of-type\((\d+)\)$/i;
|
|
21
|
+
const RE_FIRST_OF_TYPE = /^([a-zA-Z][a-zA-Z0-9]*):first-of-type$/i;
|
|
22
|
+
const RE_LAST_OF_TYPE = /^([a-zA-Z][a-zA-Z0-9]*):last-of-type$/i;
|
|
23
|
+
// ─── Base64 Data URL Protection ──────────────────────────────────────────────
|
|
24
|
+
/**
|
|
25
|
+
* Protects base64 data URLs from corruption during xmldom parse/serialize.
|
|
26
|
+
*
|
|
27
|
+
* Problem: xmldom's DOMParser + XMLSerializer can mangle very long attribute
|
|
28
|
+
* values (base64 image data). Symptoms range from silent truncation to dropped
|
|
29
|
+
* `<img>` elements.
|
|
30
|
+
*
|
|
31
|
+
* Solution: Before DOM operations, replace all `data:…` URLs in `src` attributes
|
|
32
|
+
* with short placeholder URNs. After serialization, restore the originals.
|
|
33
|
+
* This keeps the DOM tree lightweight and avoids serializer issues.
|
|
34
|
+
*/
|
|
35
|
+
class Base64Guard {
|
|
36
|
+
constructor() {
|
|
37
|
+
this.store = [];
|
|
38
|
+
}
|
|
39
|
+
/** Replace all data: URLs in src attributes with short placeholders. */
|
|
40
|
+
protect(html) {
|
|
41
|
+
if (!html.includes('data:'))
|
|
42
|
+
return html; // Fast path: no data URLs
|
|
43
|
+
return html.replace(/\bsrc="(data:[^"]+)"/g, (_, dataUrl) => {
|
|
44
|
+
const index = this.store.length;
|
|
45
|
+
this.store.push(dataUrl);
|
|
46
|
+
return `src="urn:b64:${index}"`;
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
/** Restore original data: URLs from placeholders. */
|
|
50
|
+
restore(html) {
|
|
51
|
+
if (this.store.length === 0)
|
|
52
|
+
return html; // Fast path: nothing to restore
|
|
53
|
+
let result = html;
|
|
54
|
+
// Use split/join to avoid $-pattern issues in String.replace
|
|
55
|
+
for (let i = 0; i < this.store.length; i++) {
|
|
56
|
+
result = result.split(`urn:b64:${i}`).join(this.store[i]);
|
|
57
|
+
}
|
|
58
|
+
return result;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
// ─── Internal Helpers ────────────────────────────────────────────────────────
|
|
62
|
+
/** Re-throw any non-DocxError as a DocxError. */
|
|
63
|
+
function rethrowAsDocxError(error, message, context) {
|
|
64
|
+
if (error instanceof DocxError)
|
|
65
|
+
throw error;
|
|
66
|
+
throw new DocxError(`${message}: ${error instanceof Error ? error.message : String(error)}`, DocxErrorCode.OPERATION_FAILED, context);
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Parse HTML string into a DOM Document.
|
|
70
|
+
*
|
|
71
|
+
* @xmldom/xmldom is an XML parser — it does NOT auto-create `<html>/<body>` wrappers
|
|
72
|
+
* like a browser would. We always ensure a proper structure so that
|
|
73
|
+
* `getElementsByTagName('body')` works reliably.
|
|
74
|
+
*/
|
|
75
|
+
function parseHtml(html) {
|
|
76
|
+
try {
|
|
77
|
+
let htmlToParse = html;
|
|
78
|
+
const lower = html.toLowerCase();
|
|
79
|
+
if (!lower.includes('<body'))
|
|
80
|
+
htmlToParse = `<html><body>${html}</body></html>`;
|
|
81
|
+
else if (!lower.includes('<html'))
|
|
82
|
+
htmlToParse = `<html>${html}</html>`;
|
|
83
|
+
const doc = new DOMParser().parseFromString(htmlToParse, 'text/html');
|
|
84
|
+
const parserErrors = doc.getElementsByTagName('parsererror');
|
|
85
|
+
if (parserErrors.length > 0) {
|
|
86
|
+
throw new DocxError('Failed to parse HTML: invalid structure', DocxErrorCode.OPERATION_FAILED, { htmlSnippet: html.substring(0, 100) });
|
|
87
|
+
}
|
|
88
|
+
return doc;
|
|
89
|
+
}
|
|
90
|
+
catch (error) {
|
|
91
|
+
rethrowAsDocxError(error, 'Failed to parse HTML', { htmlSnippet: html.substring(0, 100) });
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Serialize a DOM Document back to an HTML string.
|
|
96
|
+
* Returns only the inner content of `<body>` (no wrapper tags) because we
|
|
97
|
+
* added those in `parseHtml` for xmldom compatibility.
|
|
98
|
+
*/
|
|
99
|
+
function serializeHtml(doc) {
|
|
100
|
+
try {
|
|
101
|
+
const serializer = new XMLSerializer();
|
|
102
|
+
const body = doc.getElementsByTagName('body')[0];
|
|
103
|
+
if (body) {
|
|
104
|
+
let content = '';
|
|
105
|
+
for (let i = 0; i < body.childNodes.length; i++) {
|
|
106
|
+
content += serializer.serializeToString(body.childNodes[i]);
|
|
107
|
+
}
|
|
108
|
+
return content;
|
|
109
|
+
}
|
|
110
|
+
return doc.documentElement ? serializer.serializeToString(doc.documentElement) : '';
|
|
111
|
+
}
|
|
112
|
+
catch (error) {
|
|
113
|
+
throw new DocxError(`Failed to serialize HTML: ${error instanceof Error ? error.message : String(error)}`, DocxErrorCode.OPERATION_FAILED);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
/** Get the root element (body or documentElement) for querying. */
|
|
117
|
+
function getRootElement(doc) {
|
|
118
|
+
return doc.getElementsByTagName('body')[0] || doc.documentElement;
|
|
119
|
+
}
|
|
120
|
+
/** Clone nodes from one document into another. */
|
|
121
|
+
function cloneNodesToDocument(sourceNodes, targetDoc) {
|
|
122
|
+
const nodes = [];
|
|
123
|
+
for (let i = 0; i < sourceNodes.length; i++) {
|
|
124
|
+
nodes.push(targetDoc.importNode(sourceNodes[i], true));
|
|
125
|
+
}
|
|
126
|
+
return nodes;
|
|
127
|
+
}
|
|
128
|
+
// ─── CSS-like Selector Engine ────────────────────────────────────────────────
|
|
129
|
+
/**
|
|
130
|
+
* Find elements using an extended CSS-like selector.
|
|
131
|
+
*
|
|
132
|
+
* Supported:
|
|
133
|
+
* `#id`, `.class`, `tag`,
|
|
134
|
+
* `tag:contains(text)`, `:contains(text)`,
|
|
135
|
+
* `tag:nth-of-type(N)`, `tag:first-of-type`, `tag:last-of-type`
|
|
136
|
+
*/
|
|
137
|
+
function querySelectorAll(doc, selector) {
|
|
138
|
+
const s = selector?.trim();
|
|
139
|
+
if (!s)
|
|
140
|
+
return [];
|
|
141
|
+
const root = getRootElement(doc);
|
|
142
|
+
const elements = [];
|
|
143
|
+
try {
|
|
144
|
+
// #id
|
|
145
|
+
if (s.startsWith('#')) {
|
|
146
|
+
const el = doc.getElementById(s.substring(1));
|
|
147
|
+
if (el)
|
|
148
|
+
elements.push(el);
|
|
149
|
+
return elements;
|
|
150
|
+
}
|
|
151
|
+
// .class
|
|
152
|
+
if (s.startsWith('.')) {
|
|
153
|
+
const found = root.getElementsByClassName(s.substring(1));
|
|
154
|
+
for (let i = 0; i < found.length; i++)
|
|
155
|
+
elements.push(found[i]);
|
|
156
|
+
return elements;
|
|
157
|
+
}
|
|
158
|
+
// tag:contains(text)
|
|
159
|
+
const containsMatch = s.match(RE_CONTAINS);
|
|
160
|
+
if (containsMatch) {
|
|
161
|
+
const tag = containsMatch[1] || '*';
|
|
162
|
+
const needle = containsMatch[2].trim().toLowerCase();
|
|
163
|
+
const candidates = root.getElementsByTagName(tag);
|
|
164
|
+
for (let i = 0; i < candidates.length; i++) {
|
|
165
|
+
if ((candidates[i].textContent || '').toLowerCase().includes(needle)) {
|
|
166
|
+
elements.push(candidates[i]);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return elements;
|
|
170
|
+
}
|
|
171
|
+
// tag:nth-of-type(N)
|
|
172
|
+
const nthMatch = s.match(RE_NTH_OF_TYPE);
|
|
173
|
+
if (nthMatch) {
|
|
174
|
+
const n = parseInt(nthMatch[2], 10);
|
|
175
|
+
const found = root.getElementsByTagName(nthMatch[1]);
|
|
176
|
+
if (n >= 1 && n <= found.length)
|
|
177
|
+
elements.push(found[n - 1]);
|
|
178
|
+
return elements;
|
|
179
|
+
}
|
|
180
|
+
// tag:first-of-type
|
|
181
|
+
const firstMatch = s.match(RE_FIRST_OF_TYPE);
|
|
182
|
+
if (firstMatch) {
|
|
183
|
+
const found = root.getElementsByTagName(firstMatch[1]);
|
|
184
|
+
if (found.length > 0)
|
|
185
|
+
elements.push(found[0]);
|
|
186
|
+
return elements;
|
|
187
|
+
}
|
|
188
|
+
// tag:last-of-type
|
|
189
|
+
const lastMatch = s.match(RE_LAST_OF_TYPE);
|
|
190
|
+
if (lastMatch) {
|
|
191
|
+
const found = root.getElementsByTagName(lastMatch[1]);
|
|
192
|
+
if (found.length > 0)
|
|
193
|
+
elements.push(found[found.length - 1]);
|
|
194
|
+
return elements;
|
|
195
|
+
}
|
|
196
|
+
// Plain tag name (fallback)
|
|
197
|
+
const found = root.getElementsByTagName(s);
|
|
198
|
+
for (let i = 0; i < found.length; i++)
|
|
199
|
+
elements.push(found[i]);
|
|
200
|
+
}
|
|
201
|
+
catch (error) {
|
|
202
|
+
throw new DocxError(`Failed to query selector "${selector}": ${error instanceof Error ? error.message : String(error)}`, DocxErrorCode.OPERATION_FAILED);
|
|
203
|
+
}
|
|
204
|
+
return elements;
|
|
205
|
+
}
|
|
206
|
+
// ─── DOM Position Helper ─────────────────────────────────────────────────────
|
|
207
|
+
/** Insert `node` relative to `target` at the given `position`. */
|
|
208
|
+
function insertAtPosition(node, target, position) {
|
|
209
|
+
switch (position) {
|
|
210
|
+
case 'before':
|
|
211
|
+
target.parentNode?.insertBefore(node, target);
|
|
212
|
+
break;
|
|
213
|
+
case 'inside':
|
|
214
|
+
target.appendChild(node);
|
|
215
|
+
break;
|
|
216
|
+
case 'after':
|
|
217
|
+
default:
|
|
218
|
+
if (target.nextSibling)
|
|
219
|
+
target.parentNode?.insertBefore(node, target.nextSibling);
|
|
220
|
+
else
|
|
221
|
+
target.parentNode?.appendChild(node);
|
|
222
|
+
break;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
// ─── Public Operations ───────────────────────────────────────────────────────
|
|
226
|
+
// All public functions use Base64Guard to protect image data URLs from
|
|
227
|
+
// corruption during the xmldom parse → manipulate → serialize cycle.
|
|
228
|
+
/** Append HTML content to the end of the document body. */
|
|
229
|
+
export function appendHtml(html, content) {
|
|
230
|
+
if (!content?.trim())
|
|
231
|
+
return html;
|
|
232
|
+
const guard = new Base64Guard();
|
|
233
|
+
const safeHtml = guard.protect(html);
|
|
234
|
+
const safeContent = guard.protect(content);
|
|
235
|
+
try {
|
|
236
|
+
const doc = parseHtml(safeHtml);
|
|
237
|
+
const body = doc.getElementsByTagName('body')[0];
|
|
238
|
+
if (!body)
|
|
239
|
+
return html.trim() + '\n' + content.trim();
|
|
240
|
+
const contentDoc = parseHtml(safeContent);
|
|
241
|
+
const contentRoot = getRootElement(contentDoc);
|
|
242
|
+
for (const node of cloneNodesToDocument(contentRoot.childNodes, doc)) {
|
|
243
|
+
body.appendChild(node);
|
|
244
|
+
}
|
|
245
|
+
return guard.restore(serializeHtml(doc));
|
|
246
|
+
}
|
|
247
|
+
catch (error) {
|
|
248
|
+
rethrowAsDocxError(error, 'Failed to append HTML');
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Insert HTML content at a specific position relative to a selector target.
|
|
253
|
+
* If no selector is given, appends to the root element.
|
|
254
|
+
*/
|
|
255
|
+
export function insertHtml(html, content, selector, position = 'after') {
|
|
256
|
+
if (!content?.trim())
|
|
257
|
+
return html;
|
|
258
|
+
const guard = new Base64Guard();
|
|
259
|
+
const safeHtml = guard.protect(html);
|
|
260
|
+
const safeContent = guard.protect(content);
|
|
261
|
+
try {
|
|
262
|
+
const doc = parseHtml(safeHtml);
|
|
263
|
+
const root = getRootElement(doc);
|
|
264
|
+
const contentDoc = parseHtml(safeContent);
|
|
265
|
+
const contentRoot = getRootElement(contentDoc);
|
|
266
|
+
const nodesToInsert = cloneNodesToDocument(contentRoot.childNodes, doc);
|
|
267
|
+
if (!selector) {
|
|
268
|
+
for (const node of nodesToInsert)
|
|
269
|
+
root.appendChild(node);
|
|
270
|
+
return guard.restore(serializeHtml(doc));
|
|
271
|
+
}
|
|
272
|
+
const targets = querySelectorAll(doc, selector);
|
|
273
|
+
if (targets.length === 0) {
|
|
274
|
+
throw new DocxError(`Target element not found for selector: "${selector}"`, DocxErrorCode.OPERATION_FAILED, { selector });
|
|
275
|
+
}
|
|
276
|
+
// Insert at FIRST match only to prevent duplication
|
|
277
|
+
const target = targets[0];
|
|
278
|
+
for (const node of nodesToInsert) {
|
|
279
|
+
insertAtPosition(node.cloneNode(true), target, position);
|
|
280
|
+
}
|
|
281
|
+
return guard.restore(serializeHtml(doc));
|
|
282
|
+
}
|
|
283
|
+
catch (error) {
|
|
284
|
+
rethrowAsDocxError(error, 'Failed to insert HTML', { selector, position });
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
/** Replace matched elements with new HTML content. */
|
|
288
|
+
export function replaceHtml(html, selector, content, replaceAll = false) {
|
|
289
|
+
if (!selector?.trim())
|
|
290
|
+
return html;
|
|
291
|
+
const guard = new Base64Guard();
|
|
292
|
+
const safeHtml = guard.protect(html);
|
|
293
|
+
const safeContent = guard.protect(content);
|
|
294
|
+
try {
|
|
295
|
+
const doc = parseHtml(safeHtml);
|
|
296
|
+
const targets = querySelectorAll(doc, selector);
|
|
297
|
+
if (targets.length === 0) {
|
|
298
|
+
throw new DocxError(`Target element not found for selector: "${selector}"`, DocxErrorCode.OPERATION_FAILED, { selector });
|
|
299
|
+
}
|
|
300
|
+
const contentDoc = parseHtml(safeContent);
|
|
301
|
+
const contentRoot = getRootElement(contentDoc);
|
|
302
|
+
const replaceNodes = cloneNodesToDocument(contentRoot.childNodes, doc);
|
|
303
|
+
for (const target of replaceAll ? targets : [targets[0]]) {
|
|
304
|
+
const parent = target.parentNode;
|
|
305
|
+
if (!parent)
|
|
306
|
+
continue;
|
|
307
|
+
for (const node of replaceNodes)
|
|
308
|
+
parent.insertBefore(node.cloneNode(true), target);
|
|
309
|
+
parent.removeChild(target);
|
|
310
|
+
}
|
|
311
|
+
return guard.restore(serializeHtml(doc));
|
|
312
|
+
}
|
|
313
|
+
catch (error) {
|
|
314
|
+
rethrowAsDocxError(error, 'Failed to replace HTML', { selector, replaceAll });
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
/** Update matched elements' content and/or attributes. */
|
|
318
|
+
export function updateHtml(html, selector, content, attributes, updateAll = false) {
|
|
319
|
+
if (!selector?.trim())
|
|
320
|
+
return html;
|
|
321
|
+
const guard = new Base64Guard();
|
|
322
|
+
const safeHtml = guard.protect(html);
|
|
323
|
+
const safeContent = content !== undefined ? guard.protect(content) : undefined;
|
|
324
|
+
try {
|
|
325
|
+
const doc = parseHtml(safeHtml);
|
|
326
|
+
const targets = querySelectorAll(doc, selector);
|
|
327
|
+
if (targets.length === 0) {
|
|
328
|
+
throw new DocxError(`Target element not found for selector: "${selector}"`, DocxErrorCode.OPERATION_FAILED, { selector });
|
|
329
|
+
}
|
|
330
|
+
for (const target of updateAll ? targets : [targets[0]]) {
|
|
331
|
+
// Replace innerHTML via DOM methods (xmldom doesn't support .innerHTML setter)
|
|
332
|
+
if (safeContent !== undefined) {
|
|
333
|
+
while (target.firstChild)
|
|
334
|
+
target.removeChild(target.firstChild);
|
|
335
|
+
const contentDoc = parseHtml(safeContent);
|
|
336
|
+
const contentRoot = getRootElement(contentDoc);
|
|
337
|
+
for (const child of cloneNodesToDocument(contentRoot.childNodes, doc)) {
|
|
338
|
+
target.appendChild(child);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
if (attributes) {
|
|
342
|
+
for (const [key, value] of Object.entries(attributes)) {
|
|
343
|
+
target.setAttribute(key, value);
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
return guard.restore(serializeHtml(doc));
|
|
348
|
+
}
|
|
349
|
+
catch (error) {
|
|
350
|
+
rethrowAsDocxError(error, 'Failed to update HTML', { selector, updateAll });
|
|
351
|
+
}
|
|
352
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Editing Operations
|
|
3
|
+
*
|
|
4
|
+
* Reads DOCX → HTML (via direct XML parser or mammoth fallback),
|
|
5
|
+
* applies a sequence of operations to the HTML DOM,
|
|
6
|
+
* then converts the modified HTML → DOCX (html-to-docx).
|
|
7
|
+
*
|
|
8
|
+
* @module docx/operations
|
|
9
|
+
*/
|
|
10
|
+
import type { DocxEditOptions, DocxOperation } from '../types.js';
|
|
11
|
+
/**
|
|
12
|
+
* Apply a sequence of edit operations to a DOCX file and return the modified DOCX as a Buffer.
|
|
13
|
+
*/
|
|
14
|
+
export declare function editDocxWithOperations(docxPath: string, operations: DocxOperation[], options?: DocxEditOptions): Promise<Buffer>;
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Editing Operations
|
|
3
|
+
*
|
|
4
|
+
* Reads DOCX → HTML (via direct XML parser or mammoth fallback),
|
|
5
|
+
* applies a sequence of operations to the HTML DOM,
|
|
6
|
+
* then converts the modified HTML → DOCX (html-to-docx).
|
|
7
|
+
*
|
|
8
|
+
* @module docx/operations
|
|
9
|
+
*/
|
|
10
|
+
import path from 'path';
|
|
11
|
+
import { DocxError, DocxErrorCode, withErrorContext } from '../errors.js';
|
|
12
|
+
import { parseDocxToHtml } from '../html.js';
|
|
13
|
+
import { createDocxFromHtml } from '../builders/html-builder.js';
|
|
14
|
+
import { DEFAULT_CONVERSION_OPTIONS } from '../constants.js';
|
|
15
|
+
import { DocxOperationSchema } from '../../schemas.js';
|
|
16
|
+
import { validateDocxPath, validateOperations } from '../validators.js';
|
|
17
|
+
import { applyOperation } from './handlers/index.js';
|
|
18
|
+
import { preprocessOperations } from './preprocessor.js';
|
|
19
|
+
// ─── Public API ──────────────────────────────────────────────────────────────
|
|
20
|
+
/**
|
|
21
|
+
* Apply a sequence of edit operations to a DOCX file and return the modified DOCX as a Buffer.
|
|
22
|
+
*/
|
|
23
|
+
export async function editDocxWithOperations(docxPath, operations, options = {}) {
|
|
24
|
+
return withErrorContext(async () => {
|
|
25
|
+
validateDocxPath(docxPath);
|
|
26
|
+
validateOperations(operations);
|
|
27
|
+
const normalizedPath = docxPath.trim();
|
|
28
|
+
const baseDir = options.baseDir ?? path.dirname(normalizedPath);
|
|
29
|
+
const parseOptions = {
|
|
30
|
+
includeImages: options.includeImages ?? DEFAULT_CONVERSION_OPTIONS.includeImages,
|
|
31
|
+
preserveFormatting: options.preserveFormatting ?? DEFAULT_CONVERSION_OPTIONS.preserveFormatting,
|
|
32
|
+
...(options.styleMap && { styleMap: options.styleMap }),
|
|
33
|
+
};
|
|
34
|
+
// Read DOCX → HTML
|
|
35
|
+
const docxResult = await parseDocxToHtml(normalizedPath, parseOptions);
|
|
36
|
+
let html = docxResult.html;
|
|
37
|
+
const { documentDefaults } = docxResult;
|
|
38
|
+
// Preprocess operations (e.g., convert local image paths to base64)
|
|
39
|
+
const preprocessedOps = await preprocessOperations(operations, baseDir);
|
|
40
|
+
// Apply each operation sequentially
|
|
41
|
+
for (let i = 0; i < preprocessedOps.length; i++) {
|
|
42
|
+
const op = preprocessedOps[i];
|
|
43
|
+
try {
|
|
44
|
+
const validatedOp = DocxOperationSchema.parse(op);
|
|
45
|
+
html = applyOperation(html, validatedOp, baseDir);
|
|
46
|
+
}
|
|
47
|
+
catch (error) {
|
|
48
|
+
if (error instanceof DocxError)
|
|
49
|
+
throw error;
|
|
50
|
+
// Zod validation errors
|
|
51
|
+
if (error instanceof Error && 'issues' in error) {
|
|
52
|
+
throw new DocxError(`Invalid operation at index ${i}: ${error.message}`, DocxErrorCode.OPERATION_FAILED, { path: normalizedPath, operationIndex: i, operation: op, validationError: error });
|
|
53
|
+
}
|
|
54
|
+
// Other errors
|
|
55
|
+
throw new DocxError(`Failed to apply operation at index ${i}: ${error instanceof Error ? error.message : String(error)}`, DocxErrorCode.OPERATION_FAILED, { path: normalizedPath, operationIndex: i, operation: op });
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// HTML → DOCX, preserving the original document's default styles
|
|
59
|
+
return await createDocxFromHtml(html, { baseDir, documentDefaults });
|
|
60
|
+
}, DocxErrorCode.DOCX_EDIT_FAILED, { path: docxPath, operationCount: operations.length });
|
|
61
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { DocxError, DocxErrorCode } from '../errors.js';
|
|
2
|
+
import { createDocxFromMarkdown } from '../builders/markdown-builder.js';
|
|
3
|
+
import { parseDocxStructure } from '../structure.js';
|
|
4
|
+
import { buildMarkdownTableFromRows, prepareImageForDocx, createImageRun, } from '../utils.js';
|
|
5
|
+
import { createRequire } from 'module';
|
|
6
|
+
const require = createRequire(import.meta.url);
|
|
7
|
+
// @ts-ignore
|
|
8
|
+
import * as docx from 'docx';
|
|
9
|
+
const { Paragraph, AlignmentType } = docx;
|
|
10
|
+
export async function applyOperationToStructure(structure, op, baseDir) {
|
|
11
|
+
switch (op.type) {
|
|
12
|
+
case 'replaceText':
|
|
13
|
+
throw new DocxError('replaceText should use XML manipulation', DocxErrorCode.UNSUPPORTED_OPERATION);
|
|
14
|
+
case 'appendMarkdown':
|
|
15
|
+
await handleAppendMarkdown(structure, op, baseDir);
|
|
16
|
+
break;
|
|
17
|
+
case 'insertTable':
|
|
18
|
+
await handleInsertTable(structure, op);
|
|
19
|
+
break;
|
|
20
|
+
case 'insertImage':
|
|
21
|
+
await handleInsertImage(structure, op, baseDir);
|
|
22
|
+
break;
|
|
23
|
+
default:
|
|
24
|
+
throw new DocxError(`Unknown operation: ${op.type}`, DocxErrorCode.UNKNOWN_OPERATION);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
async function handleAppendMarkdown(structure, op, baseDir) {
|
|
28
|
+
if (!op.markdown?.trim())
|
|
29
|
+
return;
|
|
30
|
+
const appendBuffer = await createDocxFromMarkdown(op.markdown, { baseDir });
|
|
31
|
+
const appendStructure = await parseDocxStructure(appendBuffer);
|
|
32
|
+
structure.elements.push(...appendStructure.elements);
|
|
33
|
+
for (const [relId, imgBuffer] of appendStructure.images.entries()) {
|
|
34
|
+
structure.images.set(relId, imgBuffer);
|
|
35
|
+
}
|
|
36
|
+
for (const [relId, rel] of appendStructure.relationships.entries()) {
|
|
37
|
+
structure.relationships.set(relId, rel);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
async function handleInsertTable(structure, op) {
|
|
41
|
+
let tableMarkdown = op.markdownTable;
|
|
42
|
+
if (!tableMarkdown && op.rows?.length) {
|
|
43
|
+
tableMarkdown = buildMarkdownTableFromRows(op.rows);
|
|
44
|
+
}
|
|
45
|
+
if (!tableMarkdown?.trim())
|
|
46
|
+
return;
|
|
47
|
+
const tableBuffer = await createDocxFromMarkdown(tableMarkdown, {});
|
|
48
|
+
const tableStructure = await parseDocxStructure(tableBuffer);
|
|
49
|
+
const tableElement = tableStructure.elements.find(el => el.type === 'table');
|
|
50
|
+
if (tableElement) {
|
|
51
|
+
structure.elements.push(tableElement);
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
structure.elements.push(...tableStructure.elements);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
async function handleInsertImage(structure, op, baseDir) {
|
|
58
|
+
if (!op.imagePath?.trim())
|
|
59
|
+
return;
|
|
60
|
+
const imageData = await prepareImageForDocx(op.imagePath, op.altText || '', baseDir);
|
|
61
|
+
const imageRun = createImageRun(imageData);
|
|
62
|
+
const paragraph = new Paragraph({
|
|
63
|
+
children: [imageRun],
|
|
64
|
+
alignment: AlignmentType.CENTER,
|
|
65
|
+
});
|
|
66
|
+
structure.elements.push({ type: 'paragraph', content: paragraph });
|
|
67
|
+
}
|