@wonderwhy-er/desktop-commander 0.2.34 → 0.2.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/handlers/filesystem-handlers.js +58 -11
- package/dist/handlers/history-handlers.d.ts +7 -0
- package/dist/handlers/history-handlers.js +33 -1
- package/dist/server.js +30 -4
- package/dist/tools/docx/builders/html-builder.d.ts +17 -0
- package/dist/tools/docx/builders/html-builder.js +92 -0
- package/dist/tools/docx/builders/image.d.ts +14 -0
- package/dist/tools/docx/builders/image.js +84 -0
- package/dist/tools/docx/builders/index.d.ts +11 -0
- package/dist/tools/docx/builders/index.js +11 -0
- package/dist/tools/docx/builders/markdown-builder.d.ts +2 -0
- package/dist/tools/docx/builders/markdown-builder.js +260 -0
- package/dist/tools/docx/builders/paragraph.d.ts +12 -0
- package/dist/tools/docx/builders/paragraph.js +29 -0
- package/dist/tools/docx/builders/table.d.ts +8 -0
- package/dist/tools/docx/builders/table.js +94 -0
- package/dist/tools/docx/builders/utils.d.ts +5 -0
- package/dist/tools/docx/builders/utils.js +18 -0
- package/dist/tools/docx/constants.d.ts +32 -0
- package/dist/tools/docx/constants.js +61 -0
- package/dist/tools/docx/converters/markdown-to-html.d.ts +17 -0
- package/dist/tools/docx/converters/markdown-to-html.js +111 -0
- package/dist/tools/docx/create.d.ts +21 -0
- package/dist/tools/docx/create.js +386 -0
- package/dist/tools/docx/dom.d.ts +66 -0
- package/dist/tools/docx/dom.js +228 -0
- package/dist/tools/docx/errors.d.ts +28 -0
- package/dist/tools/docx/errors.js +48 -0
- package/dist/tools/docx/extractors/images.d.ts +14 -0
- package/dist/tools/docx/extractors/images.js +40 -0
- package/dist/tools/docx/extractors/metadata.d.ts +14 -0
- package/dist/tools/docx/extractors/metadata.js +64 -0
- package/dist/tools/docx/extractors/sections.d.ts +14 -0
- package/dist/tools/docx/extractors/sections.js +61 -0
- package/dist/tools/docx/html.d.ts +17 -0
- package/dist/tools/docx/html.js +111 -0
- package/dist/tools/docx/index.d.ts +10 -0
- package/dist/tools/docx/index.js +10 -0
- package/dist/tools/docx/markdown.d.ts +84 -0
- package/dist/tools/docx/markdown.js +507 -0
- package/dist/tools/docx/modify.d.ts +28 -0
- package/dist/tools/docx/modify.js +271 -0
- package/dist/tools/docx/operations/handlers/index.d.ts +39 -0
- package/dist/tools/docx/operations/handlers/index.js +152 -0
- package/dist/tools/docx/operations/html-manipulator.d.ts +24 -0
- package/dist/tools/docx/operations/html-manipulator.js +352 -0
- package/dist/tools/docx/operations/index.d.ts +14 -0
- package/dist/tools/docx/operations/index.js +61 -0
- package/dist/tools/docx/operations/operation-handlers.d.ts +3 -0
- package/dist/tools/docx/operations/operation-handlers.js +67 -0
- package/dist/tools/docx/operations/preprocessor.d.ts +14 -0
- package/dist/tools/docx/operations/preprocessor.js +44 -0
- package/dist/tools/docx/operations/xml-replacer.d.ts +9 -0
- package/dist/tools/docx/operations/xml-replacer.js +35 -0
- package/dist/tools/docx/operations.d.ts +13 -0
- package/dist/tools/docx/operations.js +13 -0
- package/dist/tools/docx/ops/delete-paragraph-at-body-index.d.ts +11 -0
- package/dist/tools/docx/ops/delete-paragraph-at-body-index.js +23 -0
- package/dist/tools/docx/ops/header-replace-text-exact.d.ts +13 -0
- package/dist/tools/docx/ops/header-replace-text-exact.js +55 -0
- package/dist/tools/docx/ops/index.d.ts +17 -0
- package/dist/tools/docx/ops/index.js +67 -0
- package/dist/tools/docx/ops/insert-image-after-text.d.ts +24 -0
- package/dist/tools/docx/ops/insert-image-after-text.js +128 -0
- package/dist/tools/docx/ops/insert-paragraph-after-text.d.ts +12 -0
- package/dist/tools/docx/ops/insert-paragraph-after-text.js +74 -0
- package/dist/tools/docx/ops/insert-table-after-text.d.ts +19 -0
- package/dist/tools/docx/ops/insert-table-after-text.js +57 -0
- package/dist/tools/docx/ops/replace-hyperlink-url.d.ts +12 -0
- package/dist/tools/docx/ops/replace-hyperlink-url.js +37 -0
- package/dist/tools/docx/ops/replace-paragraph-at-body-index.d.ts +9 -0
- package/dist/tools/docx/ops/replace-paragraph-at-body-index.js +25 -0
- package/dist/tools/docx/ops/replace-paragraph-text-exact.d.ts +9 -0
- package/dist/tools/docx/ops/replace-paragraph-text-exact.js +21 -0
- package/dist/tools/docx/ops/set-color-for-paragraph-exact.d.ts +8 -0
- package/dist/tools/docx/ops/set-color-for-paragraph-exact.js +23 -0
- package/dist/tools/docx/ops/set-color-for-style.d.ts +9 -0
- package/dist/tools/docx/ops/set-color-for-style.js +27 -0
- package/dist/tools/docx/ops/set-paragraph-style-at-body-index.d.ts +8 -0
- package/dist/tools/docx/ops/set-paragraph-style-at-body-index.js +57 -0
- package/dist/tools/docx/ops/table-set-cell-text.d.ts +9 -0
- package/dist/tools/docx/ops/table-set-cell-text.js +72 -0
- package/dist/tools/docx/parsers/image-extractor.d.ts +18 -0
- package/dist/tools/docx/parsers/image-extractor.js +61 -0
- package/dist/tools/docx/parsers/index.d.ts +9 -0
- package/dist/tools/docx/parsers/index.js +9 -0
- package/dist/tools/docx/parsers/paragraph-parser.d.ts +2 -0
- package/dist/tools/docx/parsers/paragraph-parser.js +88 -0
- package/dist/tools/docx/parsers/table-parser.d.ts +9 -0
- package/dist/tools/docx/parsers/table-parser.js +72 -0
- package/dist/tools/docx/parsers/xml-parser.d.ts +25 -0
- package/dist/tools/docx/parsers/xml-parser.js +71 -0
- package/dist/tools/docx/parsers/zip-reader.d.ts +23 -0
- package/dist/tools/docx/parsers/zip-reader.js +52 -0
- package/dist/tools/docx/read.d.ts +27 -0
- package/dist/tools/docx/read.js +188 -0
- package/dist/tools/docx/relationships.d.ts +22 -0
- package/dist/tools/docx/relationships.js +76 -0
- package/dist/tools/docx/structure.d.ts +25 -0
- package/dist/tools/docx/structure.js +102 -0
- package/dist/tools/docx/styled-html-parser.d.ts +23 -0
- package/dist/tools/docx/styled-html-parser.js +1262 -0
- package/dist/tools/docx/types.d.ts +184 -0
- package/dist/tools/docx/types.js +5 -0
- package/dist/tools/docx/utils/escaping.d.ts +13 -0
- package/dist/tools/docx/utils/escaping.js +26 -0
- package/dist/tools/docx/utils/images.d.ts +9 -0
- package/dist/tools/docx/utils/images.js +26 -0
- package/dist/tools/docx/utils/index.d.ts +12 -0
- package/dist/tools/docx/utils/index.js +17 -0
- package/dist/tools/docx/utils/markdown.d.ts +13 -0
- package/dist/tools/docx/utils/markdown.js +32 -0
- package/dist/tools/docx/utils/paths.d.ts +15 -0
- package/dist/tools/docx/utils/paths.js +27 -0
- package/dist/tools/docx/utils/versioning.d.ts +25 -0
- package/dist/tools/docx/utils/versioning.js +55 -0
- package/dist/tools/docx/utils.d.ts +101 -0
- package/dist/tools/docx/utils.js +299 -0
- package/dist/tools/docx/validate.d.ts +33 -0
- package/dist/tools/docx/validate.js +49 -0
- package/dist/tools/docx/validators.d.ts +13 -0
- package/dist/tools/docx/validators.js +40 -0
- package/dist/tools/docx/write.d.ts +17 -0
- package/dist/tools/docx/write.js +88 -0
- package/dist/tools/docx/zip.d.ts +21 -0
- package/dist/tools/docx/zip.js +35 -0
- package/dist/tools/schemas.d.ts +13 -0
- package/dist/tools/schemas.js +5 -0
- package/dist/types.d.ts +10 -0
- package/dist/ui/contracts.d.ts +14 -0
- package/dist/ui/contracts.js +18 -0
- package/dist/ui/file-preview/index.html +16 -0
- package/dist/ui/file-preview/preview-runtime.js +13977 -0
- package/dist/ui/file-preview/shared/preview-file-types.d.ts +5 -0
- package/dist/ui/file-preview/shared/preview-file-types.js +57 -0
- package/dist/ui/file-preview/src/app.d.ts +4 -0
- package/dist/ui/file-preview/src/app.js +800 -0
- package/dist/ui/file-preview/src/components/code-viewer.d.ts +6 -0
- package/dist/ui/file-preview/src/components/code-viewer.js +73 -0
- package/dist/ui/file-preview/src/components/highlighting.d.ts +2 -0
- package/dist/ui/file-preview/src/components/highlighting.js +54 -0
- package/dist/ui/file-preview/src/components/html-renderer.d.ts +9 -0
- package/dist/ui/file-preview/src/components/html-renderer.js +63 -0
- package/dist/ui/file-preview/src/components/markdown-renderer.d.ts +1 -0
- package/dist/ui/file-preview/src/components/markdown-renderer.js +21 -0
- package/dist/ui/file-preview/src/components/toolbar.d.ts +6 -0
- package/dist/ui/file-preview/src/components/toolbar.js +75 -0
- package/dist/ui/file-preview/src/image-preview.d.ts +3 -0
- package/dist/ui/file-preview/src/image-preview.js +21 -0
- package/dist/ui/file-preview/src/main.d.ts +1 -0
- package/dist/ui/file-preview/src/main.js +5 -0
- package/dist/ui/file-preview/src/types.d.ts +1 -0
- package/dist/ui/file-preview/src/types.js +1 -0
- package/dist/ui/file-preview/styles.css +764 -0
- package/dist/ui/resources.d.ts +21 -0
- package/dist/ui/resources.js +72 -0
- package/dist/ui/shared/escape-html.d.ts +4 -0
- package/dist/ui/shared/escape-html.js +11 -0
- package/dist/ui/shared/host-lifecycle.d.ts +16 -0
- package/dist/ui/shared/host-lifecycle.js +35 -0
- package/dist/ui/shared/rpc-client.d.ts +14 -0
- package/dist/ui/shared/rpc-client.js +72 -0
- package/dist/ui/shared/theme-adaptation.d.ts +10 -0
- package/dist/ui/shared/theme-adaptation.js +118 -0
- package/dist/ui/shared/tool-header.d.ts +9 -0
- package/dist/ui/shared/tool-header.js +25 -0
- package/dist/ui/shared/tool-shell.d.ts +16 -0
- package/dist/ui/shared/tool-shell.js +65 -0
- package/dist/ui/shared/widget-state.d.ts +28 -0
- package/dist/ui/shared/widget-state.js +60 -0
- package/dist/utils/capture.d.ts +1 -0
- package/dist/utils/capture.js +10 -4
- package/dist/utils/files/docx.d.ts +34 -0
- package/dist/utils/files/docx.js +145 -0
- package/dist/utils/files/text.js +9 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +5 -2
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOM utilities for DOCX XML manipulation.
|
|
3
|
+
*
|
|
4
|
+
* Single Responsibility: XML parsing, navigation, and minimal element
|
|
5
|
+
* mutation. No file I/O — every function works on in-memory DOM nodes.
|
|
6
|
+
*
|
|
7
|
+
* Uses @xmldom/xmldom for parsing and serialisation so that the
|
|
8
|
+
* document-order of nodes is always preserved.
|
|
9
|
+
*/
|
|
10
|
+
import { DOMParser, XMLSerializer } from '@xmldom/xmldom';
|
|
11
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
12
|
+
// XML parse / serialize
|
|
13
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
14
|
+
export function parseXml(xmlStr) {
|
|
15
|
+
return new DOMParser().parseFromString(xmlStr, 'application/xml');
|
|
16
|
+
}
|
|
17
|
+
export function serializeXml(doc) {
|
|
18
|
+
return new XMLSerializer().serializeToString(doc);
|
|
19
|
+
}
|
|
20
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
21
|
+
// Generic DOM helpers
|
|
22
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
23
|
+
/**
|
|
24
|
+
* Convert any NodeList / HTMLCollection-like object into a real array.
|
|
25
|
+
*/
|
|
26
|
+
export function nodeListToArray(nl) {
|
|
27
|
+
const arr = [];
|
|
28
|
+
for (let i = 0; i < nl.length; i++) {
|
|
29
|
+
const n = nl.item(i);
|
|
30
|
+
if (n)
|
|
31
|
+
arr.push(n);
|
|
32
|
+
}
|
|
33
|
+
return arr;
|
|
34
|
+
}
|
|
35
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
36
|
+
// Body access
|
|
37
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
38
|
+
/** Return the single <w:body> element from a parsed document.xml DOM. */
|
|
39
|
+
export function getBody(doc) {
|
|
40
|
+
const body = doc.getElementsByTagName('w:body').item(0);
|
|
41
|
+
if (!body)
|
|
42
|
+
throw new Error('Invalid DOCX DOM: missing <w:body>');
|
|
43
|
+
return body;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Return ALL direct element children of w:body **in document order**.
|
|
47
|
+
* Includes w:p, w:tbl, w:sdt, w:sectPr, etc.
|
|
48
|
+
*/
|
|
49
|
+
export function getBodyChildren(body) {
|
|
50
|
+
const out = [];
|
|
51
|
+
for (const node of nodeListToArray(body.childNodes)) {
|
|
52
|
+
if (node.nodeType === 1)
|
|
53
|
+
out.push(node);
|
|
54
|
+
}
|
|
55
|
+
return out;
|
|
56
|
+
}
|
|
57
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
58
|
+
// Body signature
|
|
59
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
60
|
+
/**
|
|
61
|
+
* Build a compact signature string from the body children array.
|
|
62
|
+
* Maps each node's qualified name to a short local name:
|
|
63
|
+
* w:p → p, w:tbl → tbl, w:sdt → sdt, w:sectPr → sectPr, …
|
|
64
|
+
* Returns e.g. "p,tbl,p,p,sectPr".
|
|
65
|
+
*/
|
|
66
|
+
export function bodySignature(children) {
|
|
67
|
+
return children
|
|
68
|
+
.map((ch) => {
|
|
69
|
+
const name = ch.nodeName;
|
|
70
|
+
const idx = name.indexOf(':');
|
|
71
|
+
return idx >= 0 ? name.substring(idx + 1) : name;
|
|
72
|
+
})
|
|
73
|
+
.join(',');
|
|
74
|
+
}
|
|
75
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
76
|
+
// Paragraph text helpers
|
|
77
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
78
|
+
/** Concatenate text from every <w:t> descendant of a paragraph. */
|
|
79
|
+
export function getParagraphText(p) {
|
|
80
|
+
const tNodes = p.getElementsByTagName('w:t');
|
|
81
|
+
let out = '';
|
|
82
|
+
for (let i = 0; i < tNodes.length; i++) {
|
|
83
|
+
out += tNodes.item(i)?.textContent ?? '';
|
|
84
|
+
}
|
|
85
|
+
return out;
|
|
86
|
+
}
|
|
87
|
+
/** Read the style id from w:pPr/w:pStyle/@w:val, or null if absent. */
|
|
88
|
+
export function getParagraphStyle(p) {
|
|
89
|
+
for (const child of nodeListToArray(p.childNodes)) {
|
|
90
|
+
if (child.nodeType === 1 && child.nodeName === 'w:pPr') {
|
|
91
|
+
const pPr = child;
|
|
92
|
+
for (const prChild of nodeListToArray(pPr.childNodes)) {
|
|
93
|
+
if (prChild.nodeType === 1 && prChild.nodeName === 'w:pStyle') {
|
|
94
|
+
return prChild.getAttribute('w:val');
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return null;
|
|
101
|
+
}
|
|
102
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
103
|
+
// Minimal text replacement
|
|
104
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
105
|
+
/**
|
|
106
|
+
* Replace the text of a paragraph with minimal DOM changes.
|
|
107
|
+
* Sets the FIRST w:t to `text`, clears every subsequent w:t.
|
|
108
|
+
* Sets xml:space="preserve" so leading/trailing spaces survive.
|
|
109
|
+
* Does NOT recreate runs or remove paragraph properties.
|
|
110
|
+
*/
|
|
111
|
+
export function setParagraphTextMinimal(p, text) {
|
|
112
|
+
const tNodes = p.getElementsByTagName('w:t');
|
|
113
|
+
if (tNodes.length === 0)
|
|
114
|
+
return;
|
|
115
|
+
const first = tNodes.item(0);
|
|
116
|
+
first.textContent = text;
|
|
117
|
+
first.setAttribute('xml:space', 'preserve');
|
|
118
|
+
for (let i = 1; i < tNodes.length; i++) {
|
|
119
|
+
tNodes.item(i).textContent = '';
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
123
|
+
// Run-level formatting helpers
|
|
124
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
125
|
+
/**
|
|
126
|
+
* Ensure a <w:r> element has w:rPr/w:color[@w:val=hex].
|
|
127
|
+
* Creates w:rPr and w:color if they don't exist.
|
|
128
|
+
* Only touches the colour — leaves every other run property intact.
|
|
129
|
+
*/
|
|
130
|
+
export function ensureRunColor(run, hex) {
|
|
131
|
+
const doc = run.ownerDocument;
|
|
132
|
+
if (!doc)
|
|
133
|
+
return;
|
|
134
|
+
let rPr = findDirectChild(run, 'w:rPr');
|
|
135
|
+
if (!rPr) {
|
|
136
|
+
rPr = doc.createElement('w:rPr');
|
|
137
|
+
if (run.firstChild) {
|
|
138
|
+
run.insertBefore(rPr, run.firstChild);
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
run.appendChild(rPr);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
let colorEl = findDirectChild(rPr, 'w:color');
|
|
145
|
+
if (!colorEl) {
|
|
146
|
+
colorEl = doc.createElement('w:color');
|
|
147
|
+
rPr.appendChild(colorEl);
|
|
148
|
+
}
|
|
149
|
+
colorEl.setAttribute('w:val', hex);
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Apply run-level colour to every <w:r> in a paragraph.
|
|
153
|
+
*/
|
|
154
|
+
export function colorParagraphRuns(p, color) {
|
|
155
|
+
const runs = nodeListToArray(p.getElementsByTagName('w:r'));
|
|
156
|
+
for (const r of runs) {
|
|
157
|
+
ensureRunColor(r, color);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Apply bold / italic / color to every <w:r> in a paragraph.
|
|
162
|
+
* Preserves all existing w:rPr children; only modifies specified props.
|
|
163
|
+
*/
|
|
164
|
+
export function styleParagraphRuns(p, style) {
|
|
165
|
+
const doc = p.ownerDocument;
|
|
166
|
+
if (!doc)
|
|
167
|
+
return;
|
|
168
|
+
const runs = nodeListToArray(p.getElementsByTagName('w:r'));
|
|
169
|
+
for (const r of runs) {
|
|
170
|
+
let rPr = findDirectChild(r, 'w:rPr');
|
|
171
|
+
if (!rPr) {
|
|
172
|
+
rPr = doc.createElement('w:rPr');
|
|
173
|
+
if (r.firstChild) {
|
|
174
|
+
r.insertBefore(rPr, r.firstChild);
|
|
175
|
+
}
|
|
176
|
+
else {
|
|
177
|
+
r.appendChild(rPr);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
if (style.color) {
|
|
181
|
+
let colorNode = findDirectChild(rPr, 'w:color');
|
|
182
|
+
if (!colorNode) {
|
|
183
|
+
colorNode = doc.createElement('w:color');
|
|
184
|
+
rPr.appendChild(colorNode);
|
|
185
|
+
}
|
|
186
|
+
colorNode.setAttribute('w:val', style.color);
|
|
187
|
+
}
|
|
188
|
+
if (style.bold !== undefined) {
|
|
189
|
+
toggleElement(doc, rPr, 'w:b', style.bold);
|
|
190
|
+
}
|
|
191
|
+
if (style.italic !== undefined) {
|
|
192
|
+
toggleElement(doc, rPr, 'w:i', style.italic);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
197
|
+
// Counting helpers
|
|
198
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
199
|
+
/** Count direct w:tbl children of body. */
|
|
200
|
+
export function countTables(children) {
|
|
201
|
+
return children.filter((ch) => ch.nodeName === 'w:tbl').length;
|
|
202
|
+
}
|
|
203
|
+
/** Count <w:drawing> descendants (rough image count). */
|
|
204
|
+
export function countImages(body) {
|
|
205
|
+
return body.getElementsByTagName('w:drawing').length;
|
|
206
|
+
}
|
|
207
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
208
|
+
// Private helpers (DRY: used by multiple public functions)
|
|
209
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
210
|
+
/** Find the first direct child element with the given nodeName. */
|
|
211
|
+
function findDirectChild(parent, nodeName) {
|
|
212
|
+
for (const child of nodeListToArray(parent.childNodes)) {
|
|
213
|
+
if (child.nodeType === 1 && child.nodeName === nodeName) {
|
|
214
|
+
return child;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
return null;
|
|
218
|
+
}
|
|
219
|
+
/** Add or remove a simple flag element (e.g. w:b, w:i) inside a parent. */
|
|
220
|
+
function toggleElement(doc, parent, nodeName, enabled) {
|
|
221
|
+
const existing = findDirectChild(parent, nodeName);
|
|
222
|
+
if (enabled && !existing) {
|
|
223
|
+
parent.appendChild(doc.createElement(nodeName));
|
|
224
|
+
}
|
|
225
|
+
else if (!enabled && existing) {
|
|
226
|
+
parent.removeChild(existing);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Error Handling
|
|
3
|
+
*
|
|
4
|
+
* Centralised error class and async error-wrapping utility.
|
|
5
|
+
*
|
|
6
|
+
* @module docx/errors
|
|
7
|
+
*/
|
|
8
|
+
export declare class DocxError extends Error {
|
|
9
|
+
readonly code: string;
|
|
10
|
+
readonly context?: Record<string, unknown> | undefined;
|
|
11
|
+
constructor(message: string, code: string, context?: Record<string, unknown> | undefined);
|
|
12
|
+
toJSON(): Record<string, unknown>;
|
|
13
|
+
}
|
|
14
|
+
export declare enum DocxErrorCode {
|
|
15
|
+
INVALID_DOCX = "INVALID_DOCX",
|
|
16
|
+
INVALID_PATH = "INVALID_PATH",
|
|
17
|
+
OPERATION_FAILED = "OPERATION_FAILED",
|
|
18
|
+
UNKNOWN_OPERATION = "UNKNOWN_OPERATION",
|
|
19
|
+
UNSUPPORTED_OPERATION = "UNSUPPORTED_OPERATION",
|
|
20
|
+
DOCX_CREATE_FAILED = "DOCX_CREATE_FAILED",
|
|
21
|
+
DOCX_EDIT_FAILED = "DOCX_EDIT_FAILED",
|
|
22
|
+
DOCX_READ_FAILED = "DOCX_READ_FAILED",
|
|
23
|
+
INVALID_IMAGE_FILE = "INVALID_IMAGE_FILE",
|
|
24
|
+
INVALID_IMAGE_DATA_URL = "INVALID_IMAGE_DATA_URL",
|
|
25
|
+
GET_INFO_FAILED = "GET_INFO_FAILED"
|
|
26
|
+
}
|
|
27
|
+
/** Wrap an async operation — re-throws existing DocxErrors, wraps everything else. */
|
|
28
|
+
export declare function withErrorContext<T>(operation: () => Promise<T>, errorCode: DocxErrorCode | string, context?: Record<string, unknown>): Promise<T>;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Error Handling
|
|
3
|
+
*
|
|
4
|
+
* Centralised error class and async error-wrapping utility.
|
|
5
|
+
*
|
|
6
|
+
* @module docx/errors
|
|
7
|
+
*/
|
|
8
|
+
export class DocxError extends Error {
|
|
9
|
+
constructor(message, code, context) {
|
|
10
|
+
super(message);
|
|
11
|
+
this.code = code;
|
|
12
|
+
this.context = context;
|
|
13
|
+
this.name = 'DocxError';
|
|
14
|
+
Error.captureStackTrace?.(this, DocxError);
|
|
15
|
+
}
|
|
16
|
+
toJSON() {
|
|
17
|
+
return { name: this.name, message: this.message, code: this.code, context: this.context };
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
export var DocxErrorCode;
|
|
21
|
+
(function (DocxErrorCode) {
|
|
22
|
+
DocxErrorCode["INVALID_DOCX"] = "INVALID_DOCX";
|
|
23
|
+
DocxErrorCode["INVALID_PATH"] = "INVALID_PATH";
|
|
24
|
+
DocxErrorCode["OPERATION_FAILED"] = "OPERATION_FAILED";
|
|
25
|
+
DocxErrorCode["UNKNOWN_OPERATION"] = "UNKNOWN_OPERATION";
|
|
26
|
+
DocxErrorCode["UNSUPPORTED_OPERATION"] = "UNSUPPORTED_OPERATION";
|
|
27
|
+
DocxErrorCode["DOCX_CREATE_FAILED"] = "DOCX_CREATE_FAILED";
|
|
28
|
+
DocxErrorCode["DOCX_EDIT_FAILED"] = "DOCX_EDIT_FAILED";
|
|
29
|
+
DocxErrorCode["DOCX_READ_FAILED"] = "DOCX_READ_FAILED";
|
|
30
|
+
DocxErrorCode["INVALID_IMAGE_FILE"] = "INVALID_IMAGE_FILE";
|
|
31
|
+
DocxErrorCode["INVALID_IMAGE_DATA_URL"] = "INVALID_IMAGE_DATA_URL";
|
|
32
|
+
DocxErrorCode["GET_INFO_FAILED"] = "GET_INFO_FAILED";
|
|
33
|
+
})(DocxErrorCode || (DocxErrorCode = {}));
|
|
34
|
+
/** Wrap an async operation — re-throws existing DocxErrors, wraps everything else. */
|
|
35
|
+
export async function withErrorContext(operation, errorCode, context) {
|
|
36
|
+
try {
|
|
37
|
+
return await operation();
|
|
38
|
+
}
|
|
39
|
+
catch (error) {
|
|
40
|
+
if (error instanceof DocxError)
|
|
41
|
+
throw error;
|
|
42
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
43
|
+
throw new DocxError(message, errorCode, {
|
|
44
|
+
...context,
|
|
45
|
+
originalError: error instanceof Error ? error.stack : String(error),
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Image Extractor (Mammoth Fallback)
|
|
3
|
+
*
|
|
4
|
+
* Extracts images from HTML generated by mammoth.js fallback.
|
|
5
|
+
* Only used when styled parser is not available.
|
|
6
|
+
*
|
|
7
|
+
* @module docx/extractors/images
|
|
8
|
+
*/
|
|
9
|
+
import type { DocxImage } from '../types.js';
|
|
10
|
+
/**
|
|
11
|
+
* Extract base64-encoded images from HTML (mammoth.js fallback only).
|
|
12
|
+
* Returns empty array if parsing fails.
|
|
13
|
+
*/
|
|
14
|
+
export declare function extractImagesFromHtml(html: string): DocxImage[];
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Image Extractor (Mammoth Fallback)
|
|
3
|
+
*
|
|
4
|
+
* Extracts images from HTML generated by mammoth.js fallback.
|
|
5
|
+
* Only used when styled parser is not available.
|
|
6
|
+
*
|
|
7
|
+
* @module docx/extractors/images
|
|
8
|
+
*/
|
|
9
|
+
import { createRequire } from 'module';
|
|
10
|
+
const require = createRequire(import.meta.url);
|
|
11
|
+
const { DOMParser } = require('@xmldom/xmldom');
|
|
12
|
+
/**
|
|
13
|
+
* Extract base64-encoded images from HTML (mammoth.js fallback only).
|
|
14
|
+
* Returns empty array if parsing fails.
|
|
15
|
+
*/
|
|
16
|
+
export function extractImagesFromHtml(html) {
|
|
17
|
+
const images = [];
|
|
18
|
+
try {
|
|
19
|
+
const doc = new DOMParser().parseFromString(html, 'text/html');
|
|
20
|
+
const imgElements = doc.getElementsByTagName('img');
|
|
21
|
+
for (let i = 0; i < imgElements.length; i++) {
|
|
22
|
+
const src = imgElements[i].getAttribute('src') || '';
|
|
23
|
+
const alt = imgElements[i].getAttribute('alt') || '';
|
|
24
|
+
const match = src.match(/^data:([^;]+);base64,(.+)$/);
|
|
25
|
+
if (match) {
|
|
26
|
+
images.push({
|
|
27
|
+
id: `img_${i}`,
|
|
28
|
+
data: match[2],
|
|
29
|
+
mimeType: match[1],
|
|
30
|
+
altText: alt || undefined,
|
|
31
|
+
originalSize: Buffer.from(match[2], 'base64').length,
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
// Non-critical
|
|
38
|
+
}
|
|
39
|
+
return images;
|
|
40
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Metadata Extractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts document metadata (title, author, dates, etc.) from DOCX core properties.
|
|
5
|
+
* Follows Single Responsibility Principle — only handles metadata extraction.
|
|
6
|
+
*
|
|
7
|
+
* @module docx/extractors/metadata
|
|
8
|
+
*/
|
|
9
|
+
import type { DocxMetadata } from '../types.js';
|
|
10
|
+
/**
|
|
11
|
+
* Extract metadata from a DOCX buffer.
|
|
12
|
+
* Returns minimal metadata if extraction fails (non-critical operation).
|
|
13
|
+
*/
|
|
14
|
+
export declare function extractDocxMetadata(buffer: Buffer, fileSize?: number): Promise<DocxMetadata>;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Metadata Extractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts document metadata (title, author, dates, etc.) from DOCX core properties.
|
|
5
|
+
* Follows Single Responsibility Principle — only handles metadata extraction.
|
|
6
|
+
*
|
|
7
|
+
* @module docx/extractors/metadata
|
|
8
|
+
*/
|
|
9
|
+
import { createRequire } from 'module';
|
|
10
|
+
import { CORE_PROPERTIES_PATH, DOCX_NAMESPACES } from '../constants.js';
|
|
11
|
+
const require = createRequire(import.meta.url);
|
|
12
|
+
const { DOMParser } = require('@xmldom/xmldom');
|
|
13
|
+
/**
|
|
14
|
+
* Extract metadata from a DOCX buffer.
|
|
15
|
+
* Returns minimal metadata if extraction fails (non-critical operation).
|
|
16
|
+
*/
|
|
17
|
+
export async function extractDocxMetadata(buffer, fileSize) {
|
|
18
|
+
const metadata = { fileSize };
|
|
19
|
+
try {
|
|
20
|
+
const JSZip = require('jszip');
|
|
21
|
+
const zip = await JSZip.loadAsync(buffer);
|
|
22
|
+
const corePropsFile = zip.file(CORE_PROPERTIES_PATH);
|
|
23
|
+
if (!corePropsFile)
|
|
24
|
+
return metadata;
|
|
25
|
+
const corePropsXml = await corePropsFile.async('string');
|
|
26
|
+
const doc = new DOMParser().parseFromString(corePropsXml, 'application/xml');
|
|
27
|
+
/** Extract text content from a namespaced tag. */
|
|
28
|
+
const getText = (tag, nsList = [DOCX_NAMESPACES.DUBLIN_CORE, DOCX_NAMESPACES.CUSTOM_PROPERTIES]) => {
|
|
29
|
+
for (const ns of nsList) {
|
|
30
|
+
const els = doc.getElementsByTagName(`${ns}:${tag}`);
|
|
31
|
+
if (els.length > 0 && els[0].textContent) {
|
|
32
|
+
const text = els[0].textContent.trim();
|
|
33
|
+
return text || undefined;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return undefined;
|
|
37
|
+
};
|
|
38
|
+
/** Extract date from a DCTERMS namespaced tag. */
|
|
39
|
+
const getDate = (tag) => {
|
|
40
|
+
const els = doc.getElementsByTagName(`${DOCX_NAMESPACES.DCTERMS}:${tag}`);
|
|
41
|
+
if (els.length > 0 && els[0].textContent) {
|
|
42
|
+
const dateStr = els[0].textContent.trim();
|
|
43
|
+
if (dateStr) {
|
|
44
|
+
const d = new Date(dateStr);
|
|
45
|
+
if (!isNaN(d.getTime()))
|
|
46
|
+
return d;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return undefined;
|
|
50
|
+
};
|
|
51
|
+
metadata.title = getText('title');
|
|
52
|
+
metadata.author = getText('creator');
|
|
53
|
+
metadata.subject = getText('subject');
|
|
54
|
+
metadata.description = getText('description');
|
|
55
|
+
metadata.lastModifiedBy = getText('lastModifiedBy', [DOCX_NAMESPACES.CUSTOM_PROPERTIES]);
|
|
56
|
+
metadata.revision = getText('revision', [DOCX_NAMESPACES.CUSTOM_PROPERTIES]);
|
|
57
|
+
metadata.creationDate = getDate('created');
|
|
58
|
+
metadata.modificationDate = getDate('modified');
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
// Non-critical — return metadata with fileSize only
|
|
62
|
+
}
|
|
63
|
+
return metadata;
|
|
64
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Section Parser
|
|
3
|
+
*
|
|
4
|
+
* Parses HTML into structured sections (headings, paragraphs, tables, lists, images).
|
|
5
|
+
* Follows Single Responsibility Principle — only handles section parsing.
|
|
6
|
+
*
|
|
7
|
+
* @module docx/extractors/sections
|
|
8
|
+
*/
|
|
9
|
+
import type { DocxSection } from '../types.js';
|
|
10
|
+
/**
|
|
11
|
+
* Parse HTML into structured sections.
|
|
12
|
+
* Returns a single paragraph section if parsing fails.
|
|
13
|
+
*/
|
|
14
|
+
export declare function parseHtmlIntoSections(html: string): DocxSection[];
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Section Parser
|
|
3
|
+
*
|
|
4
|
+
* Parses HTML into structured sections (headings, paragraphs, tables, lists, images).
|
|
5
|
+
* Follows Single Responsibility Principle — only handles section parsing.
|
|
6
|
+
*
|
|
7
|
+
* @module docx/extractors/sections
|
|
8
|
+
*/
|
|
9
|
+
import { createRequire } from 'module';
|
|
10
|
+
const require = createRequire(import.meta.url);
|
|
11
|
+
const { DOMParser } = require('@xmldom/xmldom');
|
|
12
|
+
/**
|
|
13
|
+
* Parse HTML into structured sections.
|
|
14
|
+
* Returns a single paragraph section if parsing fails.
|
|
15
|
+
*/
|
|
16
|
+
export function parseHtmlIntoSections(html) {
|
|
17
|
+
const sections = [];
|
|
18
|
+
try {
|
|
19
|
+
const doc = new DOMParser().parseFromString(html, 'text/html');
|
|
20
|
+
const body = doc.getElementsByTagName('body')[0];
|
|
21
|
+
if (!body) {
|
|
22
|
+
sections.push({ type: 'paragraph', content: html });
|
|
23
|
+
return sections;
|
|
24
|
+
}
|
|
25
|
+
for (let i = 0; i < body.childNodes.length; i++) {
|
|
26
|
+
const child = body.childNodes[i];
|
|
27
|
+
if (child.nodeType !== 1)
|
|
28
|
+
continue;
|
|
29
|
+
const element = child;
|
|
30
|
+
const tag = element.tagName.toLowerCase();
|
|
31
|
+
const content = element.outerHTML || element.innerHTML;
|
|
32
|
+
// Heading detection
|
|
33
|
+
const headingMatch = tag.match(/^h([1-6])$/);
|
|
34
|
+
if (headingMatch) {
|
|
35
|
+
sections.push({ type: 'heading', level: parseInt(headingMatch[1], 10), content });
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
// Other element types
|
|
39
|
+
switch (tag) {
|
|
40
|
+
case 'img':
|
|
41
|
+
sections.push({ type: 'image', content });
|
|
42
|
+
break;
|
|
43
|
+
case 'table':
|
|
44
|
+
sections.push({ type: 'table', content });
|
|
45
|
+
break;
|
|
46
|
+
case 'ul':
|
|
47
|
+
case 'ol':
|
|
48
|
+
sections.push({ type: 'list', content });
|
|
49
|
+
break;
|
|
50
|
+
case 'p':
|
|
51
|
+
case 'div':
|
|
52
|
+
sections.push({ type: 'paragraph', content });
|
|
53
|
+
break;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
catch {
|
|
58
|
+
sections.push({ type: 'paragraph', content: html });
|
|
59
|
+
}
|
|
60
|
+
return sections;
|
|
61
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX → HTML Conversion
|
|
3
|
+
*
|
|
4
|
+
* Primary: Direct DOCX XML parsing (`styled-html-parser`) — preserves inline styles
|
|
5
|
+
* (font colours, sizes, families, alignment, highlights, etc.)
|
|
6
|
+
* Fallback: mammoth.js — semantic-only conversion, strips visual styles.
|
|
7
|
+
*
|
|
8
|
+
* @module docx/html
|
|
9
|
+
*/
|
|
10
|
+
import type { DocxParseResult, DocxParseOptions } from './types.js';
|
|
11
|
+
/**
|
|
12
|
+
* Parse a DOCX file to styled HTML.
|
|
13
|
+
*
|
|
14
|
+
* Uses direct XML parsing when `preserveFormatting` is true (default).
|
|
15
|
+
* Falls back to mammoth.js if direct parsing fails or a custom `styleMap` is provided.
|
|
16
|
+
*/
|
|
17
|
+
export declare function parseDocxToHtml(source: string, options?: DocxParseOptions): Promise<DocxParseResult>;
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX → HTML Conversion
|
|
3
|
+
*
|
|
4
|
+
* Primary: Direct DOCX XML parsing (`styled-html-parser`) — preserves inline styles
|
|
5
|
+
* (font colours, sizes, families, alignment, highlights, etc.)
|
|
6
|
+
* Fallback: mammoth.js — semantic-only conversion, strips visual styles.
|
|
7
|
+
*
|
|
8
|
+
* @module docx/html
|
|
9
|
+
*/
|
|
10
|
+
import fs from 'fs/promises';
|
|
11
|
+
import { createRequire } from 'module';
|
|
12
|
+
import { DocxError, DocxErrorCode, withErrorContext } from './errors.js';
|
|
13
|
+
import { DEFAULT_CONVERSION_OPTIONS } from './constants.js';
|
|
14
|
+
import { isUrl } from './utils/paths.js';
|
|
15
|
+
import { convertDocxToStyledHtml } from './styled-html-parser.js';
|
|
16
|
+
import { extractDocxMetadata } from './extractors/metadata.js';
|
|
17
|
+
import { parseHtmlIntoSections } from './extractors/sections.js';
|
|
18
|
+
import { extractImagesFromHtml } from './extractors/images.js';
|
|
19
|
+
const require = createRequire(import.meta.url);
|
|
20
|
+
const mammoth = require('mammoth');
|
|
21
|
+
// ─── Public API ──────────────────────────────────────────────────────────────
|
|
22
|
+
/**
|
|
23
|
+
* Parse a DOCX file to styled HTML.
|
|
24
|
+
*
|
|
25
|
+
* Uses direct XML parsing when `preserveFormatting` is true (default).
|
|
26
|
+
* Falls back to mammoth.js if direct parsing fails or a custom `styleMap` is provided.
|
|
27
|
+
*/
|
|
28
|
+
export async function parseDocxToHtml(source, options = {}) {
|
|
29
|
+
return withErrorContext(async () => {
|
|
30
|
+
const { includeImages = DEFAULT_CONVERSION_OPTIONS.includeImages, preserveFormatting = DEFAULT_CONVERSION_OPTIONS.preserveFormatting, styleMap = DEFAULT_CONVERSION_OPTIONS.styleMap, } = options;
|
|
31
|
+
const buffer = await loadDocxToBuffer(source);
|
|
32
|
+
let fileSize;
|
|
33
|
+
if (!isUrl(source)) {
|
|
34
|
+
try {
|
|
35
|
+
fileSize = (await fs.stat(source)).size;
|
|
36
|
+
}
|
|
37
|
+
catch { /* ignore */ }
|
|
38
|
+
}
|
|
39
|
+
const { html: rawHtml, images, documentDefaults } = await convertToHtml(buffer, includeImages, preserveFormatting, styleMap);
|
|
40
|
+
const metadata = await extractDocxMetadata(buffer, fileSize);
|
|
41
|
+
const html = postProcessHtml(rawHtml);
|
|
42
|
+
const sections = parseHtmlIntoSections(html);
|
|
43
|
+
return { html, metadata, images, sections, documentDefaults };
|
|
44
|
+
}, DocxErrorCode.DOCX_READ_FAILED, { path: source });
|
|
45
|
+
}
|
|
46
|
+
// ─── Buffer Loading ──────────────────────────────────────────────────────────
|
|
47
|
+
async function loadDocxToBuffer(source) {
|
|
48
|
+
return withErrorContext(async () => {
|
|
49
|
+
if (isUrl(source)) {
|
|
50
|
+
const response = await fetch(source);
|
|
51
|
+
if (!response.ok) {
|
|
52
|
+
throw new DocxError(`Failed to fetch DOCX from URL: ${response.statusText}`, DocxErrorCode.DOCX_READ_FAILED, { url: source, status: response.status });
|
|
53
|
+
}
|
|
54
|
+
return Buffer.from(await response.arrayBuffer());
|
|
55
|
+
}
|
|
56
|
+
return await fs.readFile(source);
|
|
57
|
+
}, DocxErrorCode.DOCX_READ_FAILED, { source });
|
|
58
|
+
}
|
|
59
|
+
// ─── Conversion Dispatch ─────────────────────────────────────────────────────
|
|
60
|
+
/**
|
|
61
|
+
* Pick the best converter: direct XML parser (preserves styles) or mammoth.js (semantic only).
|
|
62
|
+
*/
|
|
63
|
+
async function convertToHtml(buffer, includeImages, preserveFormatting, styleMap) {
|
|
64
|
+
// Use the styled XML parser when no custom styleMap is provided and formatting is requested
|
|
65
|
+
if (preserveFormatting && styleMap.length === 0) {
|
|
66
|
+
try {
|
|
67
|
+
return await convertDocxToStyledHtml(buffer, includeImages);
|
|
68
|
+
}
|
|
69
|
+
catch {
|
|
70
|
+
// Fall through to mammoth.js fallback
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
const mammothResult = await convertWithMammoth(buffer, includeImages, styleMap, preserveFormatting);
|
|
74
|
+
return { ...mammothResult, documentDefaults: undefined };
|
|
75
|
+
}
|
|
76
|
+
/** Fallback: mammoth.js (semantic-only — strips visual styles). */
|
|
77
|
+
async function convertWithMammoth(buffer, includeImages, styleMap, preserveFormatting) {
|
|
78
|
+
const mammothOptions = {};
|
|
79
|
+
if (includeImages) {
|
|
80
|
+
mammothOptions.convertImage = mammoth.images.imgElement((image) => image.read('base64').then((base64Data) => ({
|
|
81
|
+
src: `data:${image.contentType};base64,${base64Data}`,
|
|
82
|
+
})));
|
|
83
|
+
}
|
|
84
|
+
if (styleMap.length > 0) {
|
|
85
|
+
mammothOptions.styleMap = [...styleMap];
|
|
86
|
+
}
|
|
87
|
+
else if (preserveFormatting) {
|
|
88
|
+
mammothOptions.styleMap = [
|
|
89
|
+
"p[style-name='Heading 1'] => h1:fresh",
|
|
90
|
+
"p[style-name='Heading 2'] => h2:fresh",
|
|
91
|
+
"p[style-name='Heading 3'] => h3:fresh",
|
|
92
|
+
"p[style-name='Heading 4'] => h4:fresh",
|
|
93
|
+
"p[style-name='Heading 5'] => h5:fresh",
|
|
94
|
+
"p[style-name='Heading 6'] => h6:fresh",
|
|
95
|
+
"p[style-name='Title'] => h1:fresh",
|
|
96
|
+
"p[style-name='Subtitle'] => h2:fresh",
|
|
97
|
+
"p[style-name='Quote'] => blockquote:fresh",
|
|
98
|
+
"r[style-name='Strong'] => strong",
|
|
99
|
+
"r[style-name='Emphasis'] => em",
|
|
100
|
+
];
|
|
101
|
+
}
|
|
102
|
+
const result = await mammoth.convertToHtml({ buffer }, mammothOptions);
|
|
103
|
+
const html = result.value;
|
|
104
|
+
const images = extractImagesFromHtml(html);
|
|
105
|
+
return { html, images };
|
|
106
|
+
}
|
|
107
|
+
// ─── Post-Processing ─────────────────────────────────────────────────────────
|
|
108
|
+
/** Minimal whitespace cleanup — preserves all inline style attributes. */
|
|
109
|
+
function postProcessHtml(html) {
|
|
110
|
+
return html.replace(/>\s{2,}</g, '>\n<').trim();
|
|
111
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX file manipulation tools — barrel exports.
|
|
3
|
+
*/
|
|
4
|
+
export { readDocxOutline } from './read.js';
|
|
5
|
+
export { writeDocxPatched } from './write.js';
|
|
6
|
+
export { createDocxNew } from './create.js';
|
|
7
|
+
export type { DocxContentStructure, DocxContentItem, DocxContentParagraph, DocxContentTable, DocxContentImage, } from './types.js';
|
|
8
|
+
export { readDocx, extractTextFromDocx, getDocxMetadata, extractBodyXml } from './read.js';
|
|
9
|
+
export { writeDocx, modifyDocxContent, replaceBodyXml } from './modify.js';
|
|
10
|
+
export type { DocxMetadata, DocxParagraph, DocxRun, DocxModification, ParagraphOutline, ReadDocxResult, WriteDocxStats, WriteDocxResult, BodySnapshot, DocxOp, OpResult, } from './types.js';
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX file manipulation tools — barrel exports.
|
|
3
|
+
*/
|
|
4
|
+
// Patch-based tools (read_docx / write_docx)
|
|
5
|
+
export { readDocxOutline } from './read.js';
|
|
6
|
+
export { writeDocxPatched } from './write.js';
|
|
7
|
+
export { createDocxNew } from './create.js';
|
|
8
|
+
// Legacy functions (used by read_file, write_file, edit_block handlers)
|
|
9
|
+
export { readDocx, extractTextFromDocx, getDocxMetadata, extractBodyXml } from './read.js';
|
|
10
|
+
export { writeDocx, modifyDocxContent, replaceBodyXml } from './modify.js';
|