@wonderwhy-er/desktop-commander 0.2.34 → 0.2.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/handlers/filesystem-handlers.js +58 -11
- package/dist/handlers/history-handlers.d.ts +7 -0
- package/dist/handlers/history-handlers.js +33 -1
- package/dist/server.js +30 -4
- package/dist/tools/docx/builders/html-builder.d.ts +17 -0
- package/dist/tools/docx/builders/html-builder.js +92 -0
- package/dist/tools/docx/builders/image.d.ts +14 -0
- package/dist/tools/docx/builders/image.js +84 -0
- package/dist/tools/docx/builders/index.d.ts +11 -0
- package/dist/tools/docx/builders/index.js +11 -0
- package/dist/tools/docx/builders/markdown-builder.d.ts +2 -0
- package/dist/tools/docx/builders/markdown-builder.js +260 -0
- package/dist/tools/docx/builders/paragraph.d.ts +12 -0
- package/dist/tools/docx/builders/paragraph.js +29 -0
- package/dist/tools/docx/builders/table.d.ts +8 -0
- package/dist/tools/docx/builders/table.js +94 -0
- package/dist/tools/docx/builders/utils.d.ts +5 -0
- package/dist/tools/docx/builders/utils.js +18 -0
- package/dist/tools/docx/constants.d.ts +32 -0
- package/dist/tools/docx/constants.js +61 -0
- package/dist/tools/docx/converters/markdown-to-html.d.ts +17 -0
- package/dist/tools/docx/converters/markdown-to-html.js +111 -0
- package/dist/tools/docx/create.d.ts +21 -0
- package/dist/tools/docx/create.js +386 -0
- package/dist/tools/docx/dom.d.ts +66 -0
- package/dist/tools/docx/dom.js +228 -0
- package/dist/tools/docx/errors.d.ts +28 -0
- package/dist/tools/docx/errors.js +48 -0
- package/dist/tools/docx/extractors/images.d.ts +14 -0
- package/dist/tools/docx/extractors/images.js +40 -0
- package/dist/tools/docx/extractors/metadata.d.ts +14 -0
- package/dist/tools/docx/extractors/metadata.js +64 -0
- package/dist/tools/docx/extractors/sections.d.ts +14 -0
- package/dist/tools/docx/extractors/sections.js +61 -0
- package/dist/tools/docx/html.d.ts +17 -0
- package/dist/tools/docx/html.js +111 -0
- package/dist/tools/docx/index.d.ts +10 -0
- package/dist/tools/docx/index.js +10 -0
- package/dist/tools/docx/markdown.d.ts +84 -0
- package/dist/tools/docx/markdown.js +507 -0
- package/dist/tools/docx/modify.d.ts +28 -0
- package/dist/tools/docx/modify.js +271 -0
- package/dist/tools/docx/operations/handlers/index.d.ts +39 -0
- package/dist/tools/docx/operations/handlers/index.js +152 -0
- package/dist/tools/docx/operations/html-manipulator.d.ts +24 -0
- package/dist/tools/docx/operations/html-manipulator.js +352 -0
- package/dist/tools/docx/operations/index.d.ts +14 -0
- package/dist/tools/docx/operations/index.js +61 -0
- package/dist/tools/docx/operations/operation-handlers.d.ts +3 -0
- package/dist/tools/docx/operations/operation-handlers.js +67 -0
- package/dist/tools/docx/operations/preprocessor.d.ts +14 -0
- package/dist/tools/docx/operations/preprocessor.js +44 -0
- package/dist/tools/docx/operations/xml-replacer.d.ts +9 -0
- package/dist/tools/docx/operations/xml-replacer.js +35 -0
- package/dist/tools/docx/operations.d.ts +13 -0
- package/dist/tools/docx/operations.js +13 -0
- package/dist/tools/docx/ops/delete-paragraph-at-body-index.d.ts +11 -0
- package/dist/tools/docx/ops/delete-paragraph-at-body-index.js +23 -0
- package/dist/tools/docx/ops/header-replace-text-exact.d.ts +13 -0
- package/dist/tools/docx/ops/header-replace-text-exact.js +55 -0
- package/dist/tools/docx/ops/index.d.ts +17 -0
- package/dist/tools/docx/ops/index.js +67 -0
- package/dist/tools/docx/ops/insert-image-after-text.d.ts +24 -0
- package/dist/tools/docx/ops/insert-image-after-text.js +128 -0
- package/dist/tools/docx/ops/insert-paragraph-after-text.d.ts +12 -0
- package/dist/tools/docx/ops/insert-paragraph-after-text.js +74 -0
- package/dist/tools/docx/ops/insert-table-after-text.d.ts +19 -0
- package/dist/tools/docx/ops/insert-table-after-text.js +57 -0
- package/dist/tools/docx/ops/replace-hyperlink-url.d.ts +12 -0
- package/dist/tools/docx/ops/replace-hyperlink-url.js +37 -0
- package/dist/tools/docx/ops/replace-paragraph-at-body-index.d.ts +9 -0
- package/dist/tools/docx/ops/replace-paragraph-at-body-index.js +25 -0
- package/dist/tools/docx/ops/replace-paragraph-text-exact.d.ts +9 -0
- package/dist/tools/docx/ops/replace-paragraph-text-exact.js +21 -0
- package/dist/tools/docx/ops/set-color-for-paragraph-exact.d.ts +8 -0
- package/dist/tools/docx/ops/set-color-for-paragraph-exact.js +23 -0
- package/dist/tools/docx/ops/set-color-for-style.d.ts +9 -0
- package/dist/tools/docx/ops/set-color-for-style.js +27 -0
- package/dist/tools/docx/ops/set-paragraph-style-at-body-index.d.ts +8 -0
- package/dist/tools/docx/ops/set-paragraph-style-at-body-index.js +57 -0
- package/dist/tools/docx/ops/table-set-cell-text.d.ts +9 -0
- package/dist/tools/docx/ops/table-set-cell-text.js +72 -0
- package/dist/tools/docx/parsers/image-extractor.d.ts +18 -0
- package/dist/tools/docx/parsers/image-extractor.js +61 -0
- package/dist/tools/docx/parsers/index.d.ts +9 -0
- package/dist/tools/docx/parsers/index.js +9 -0
- package/dist/tools/docx/parsers/paragraph-parser.d.ts +2 -0
- package/dist/tools/docx/parsers/paragraph-parser.js +88 -0
- package/dist/tools/docx/parsers/table-parser.d.ts +9 -0
- package/dist/tools/docx/parsers/table-parser.js +72 -0
- package/dist/tools/docx/parsers/xml-parser.d.ts +25 -0
- package/dist/tools/docx/parsers/xml-parser.js +71 -0
- package/dist/tools/docx/parsers/zip-reader.d.ts +23 -0
- package/dist/tools/docx/parsers/zip-reader.js +52 -0
- package/dist/tools/docx/read.d.ts +27 -0
- package/dist/tools/docx/read.js +188 -0
- package/dist/tools/docx/relationships.d.ts +22 -0
- package/dist/tools/docx/relationships.js +76 -0
- package/dist/tools/docx/structure.d.ts +25 -0
- package/dist/tools/docx/structure.js +102 -0
- package/dist/tools/docx/styled-html-parser.d.ts +23 -0
- package/dist/tools/docx/styled-html-parser.js +1262 -0
- package/dist/tools/docx/types.d.ts +184 -0
- package/dist/tools/docx/types.js +5 -0
- package/dist/tools/docx/utils/escaping.d.ts +13 -0
- package/dist/tools/docx/utils/escaping.js +26 -0
- package/dist/tools/docx/utils/images.d.ts +9 -0
- package/dist/tools/docx/utils/images.js +26 -0
- package/dist/tools/docx/utils/index.d.ts +12 -0
- package/dist/tools/docx/utils/index.js +17 -0
- package/dist/tools/docx/utils/markdown.d.ts +13 -0
- package/dist/tools/docx/utils/markdown.js +32 -0
- package/dist/tools/docx/utils/paths.d.ts +15 -0
- package/dist/tools/docx/utils/paths.js +27 -0
- package/dist/tools/docx/utils/versioning.d.ts +25 -0
- package/dist/tools/docx/utils/versioning.js +55 -0
- package/dist/tools/docx/utils.d.ts +101 -0
- package/dist/tools/docx/utils.js +299 -0
- package/dist/tools/docx/validate.d.ts +33 -0
- package/dist/tools/docx/validate.js +49 -0
- package/dist/tools/docx/validators.d.ts +13 -0
- package/dist/tools/docx/validators.js +40 -0
- package/dist/tools/docx/write.d.ts +17 -0
- package/dist/tools/docx/write.js +88 -0
- package/dist/tools/docx/zip.d.ts +21 -0
- package/dist/tools/docx/zip.js +35 -0
- package/dist/tools/schemas.d.ts +13 -0
- package/dist/tools/schemas.js +5 -0
- package/dist/types.d.ts +10 -0
- package/dist/ui/contracts.d.ts +14 -0
- package/dist/ui/contracts.js +18 -0
- package/dist/ui/file-preview/index.html +16 -0
- package/dist/ui/file-preview/preview-runtime.js +13977 -0
- package/dist/ui/file-preview/shared/preview-file-types.d.ts +5 -0
- package/dist/ui/file-preview/shared/preview-file-types.js +57 -0
- package/dist/ui/file-preview/src/app.d.ts +4 -0
- package/dist/ui/file-preview/src/app.js +800 -0
- package/dist/ui/file-preview/src/components/code-viewer.d.ts +6 -0
- package/dist/ui/file-preview/src/components/code-viewer.js +73 -0
- package/dist/ui/file-preview/src/components/highlighting.d.ts +2 -0
- package/dist/ui/file-preview/src/components/highlighting.js +54 -0
- package/dist/ui/file-preview/src/components/html-renderer.d.ts +9 -0
- package/dist/ui/file-preview/src/components/html-renderer.js +63 -0
- package/dist/ui/file-preview/src/components/markdown-renderer.d.ts +1 -0
- package/dist/ui/file-preview/src/components/markdown-renderer.js +21 -0
- package/dist/ui/file-preview/src/components/toolbar.d.ts +6 -0
- package/dist/ui/file-preview/src/components/toolbar.js +75 -0
- package/dist/ui/file-preview/src/image-preview.d.ts +3 -0
- package/dist/ui/file-preview/src/image-preview.js +21 -0
- package/dist/ui/file-preview/src/main.d.ts +1 -0
- package/dist/ui/file-preview/src/main.js +5 -0
- package/dist/ui/file-preview/src/types.d.ts +1 -0
- package/dist/ui/file-preview/src/types.js +1 -0
- package/dist/ui/file-preview/styles.css +764 -0
- package/dist/ui/resources.d.ts +21 -0
- package/dist/ui/resources.js +72 -0
- package/dist/ui/shared/escape-html.d.ts +4 -0
- package/dist/ui/shared/escape-html.js +11 -0
- package/dist/ui/shared/host-lifecycle.d.ts +16 -0
- package/dist/ui/shared/host-lifecycle.js +35 -0
- package/dist/ui/shared/rpc-client.d.ts +14 -0
- package/dist/ui/shared/rpc-client.js +72 -0
- package/dist/ui/shared/theme-adaptation.d.ts +10 -0
- package/dist/ui/shared/theme-adaptation.js +118 -0
- package/dist/ui/shared/tool-header.d.ts +9 -0
- package/dist/ui/shared/tool-header.js +25 -0
- package/dist/ui/shared/tool-shell.d.ts +16 -0
- package/dist/ui/shared/tool-shell.js +65 -0
- package/dist/ui/shared/widget-state.d.ts +28 -0
- package/dist/ui/shared/widget-state.js +60 -0
- package/dist/utils/capture.d.ts +1 -0
- package/dist/utils/capture.js +10 -4
- package/dist/utils/files/docx.d.ts +34 -0
- package/dist/utils/files/docx.js +145 -0
- package/dist/utils/files/text.js +9 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +5 -2
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Op: set_color_for_style
|
|
3
|
+
*
|
|
4
|
+
* For every paragraph whose w:pPr/w:pStyle/@w:val === style,
|
|
5
|
+
* set run-level colour on every w:r in that paragraph.
|
|
6
|
+
* Does NOT modify word/styles.xml — only in-document run formatting.
|
|
7
|
+
*/
|
|
8
|
+
import { getBodyChildren, getParagraphStyle, ensureRunColor } from '../dom.js';
|
|
9
|
+
export function applySetColorForStyle(body, op) {
|
|
10
|
+
const children = getBodyChildren(body);
|
|
11
|
+
let matched = 0;
|
|
12
|
+
for (const child of children) {
|
|
13
|
+
if (child.nodeName !== 'w:p')
|
|
14
|
+
continue;
|
|
15
|
+
if (getParagraphStyle(child) !== op.style)
|
|
16
|
+
continue;
|
|
17
|
+
const runs = child.getElementsByTagName('w:r');
|
|
18
|
+
for (let i = 0; i < runs.length; i++) {
|
|
19
|
+
ensureRunColor(runs.item(i), op.color);
|
|
20
|
+
}
|
|
21
|
+
matched++;
|
|
22
|
+
}
|
|
23
|
+
if (matched === 0) {
|
|
24
|
+
return { op, status: 'skipped', matched: 0, reason: 'no_match' };
|
|
25
|
+
}
|
|
26
|
+
return { op, status: 'applied', matched };
|
|
27
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Op: set_paragraph_style_at_body_index
|
|
3
|
+
*
|
|
4
|
+
* Set (or replace) the paragraph style (w:pPr/w:pStyle) at a given
|
|
5
|
+
* bodyChildIndex. Skips if the child is not a w:p.
|
|
6
|
+
*/
|
|
7
|
+
import type { SetParagraphStyleAtBodyIndexOp, OpResult } from '../types.js';
|
|
8
|
+
export declare function applySetParagraphStyleAtBodyIndex(body: Element, op: SetParagraphStyleAtBodyIndexOp): OpResult;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Op: set_paragraph_style_at_body_index
|
|
3
|
+
*
|
|
4
|
+
* Set (or replace) the paragraph style (w:pPr/w:pStyle) at a given
|
|
5
|
+
* bodyChildIndex. Skips if the child is not a w:p.
|
|
6
|
+
*/
|
|
7
|
+
import { getBodyChildren, nodeListToArray } from '../dom.js';
|
|
8
|
+
export function applySetParagraphStyleAtBodyIndex(body, op) {
|
|
9
|
+
const children = getBodyChildren(body);
|
|
10
|
+
const idx = op.bodyChildIndex;
|
|
11
|
+
if (idx < 0 || idx >= children.length) {
|
|
12
|
+
return { op, status: 'skipped', matched: 0, reason: 'index_out_of_range' };
|
|
13
|
+
}
|
|
14
|
+
const child = children[idx];
|
|
15
|
+
if (child.nodeName !== 'w:p') {
|
|
16
|
+
return { op, status: 'skipped', matched: 0, reason: 'not_a_paragraph' };
|
|
17
|
+
}
|
|
18
|
+
const doc = child.ownerDocument;
|
|
19
|
+
if (!doc)
|
|
20
|
+
return { op, status: 'skipped', matched: 0, reason: 'no_owner_document' };
|
|
21
|
+
// Find or create w:pPr
|
|
22
|
+
let pPr = null;
|
|
23
|
+
for (const n of nodeListToArray(child.childNodes)) {
|
|
24
|
+
if (n.nodeType === 1 && n.nodeName === 'w:pPr') {
|
|
25
|
+
pPr = n;
|
|
26
|
+
break;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
if (!pPr) {
|
|
30
|
+
pPr = doc.createElement('w:pPr');
|
|
31
|
+
if (child.firstChild) {
|
|
32
|
+
child.insertBefore(pPr, child.firstChild);
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
child.appendChild(pPr);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
// Find or create w:pStyle inside pPr
|
|
39
|
+
let pStyle = null;
|
|
40
|
+
for (const n of nodeListToArray(pPr.childNodes)) {
|
|
41
|
+
if (n.nodeType === 1 && n.nodeName === 'w:pStyle') {
|
|
42
|
+
pStyle = n;
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
if (!pStyle) {
|
|
47
|
+
pStyle = doc.createElement('w:pStyle');
|
|
48
|
+
if (pPr.firstChild) {
|
|
49
|
+
pPr.insertBefore(pStyle, pPr.firstChild);
|
|
50
|
+
}
|
|
51
|
+
else {
|
|
52
|
+
pPr.appendChild(pStyle);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
pStyle.setAttribute('w:val', op.style);
|
|
56
|
+
return { op, status: 'applied', matched: 1 };
|
|
57
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Op: table_set_cell_text
|
|
3
|
+
*
|
|
4
|
+
* Set the text content of a specific table cell.
|
|
5
|
+
* Targets by: tableIndex (0-based among w:tbl in body), row, col.
|
|
6
|
+
* Applies minimal text replacement inside the cell's first paragraph.
|
|
7
|
+
*/
|
|
8
|
+
import type { TableSetCellTextOp, OpResult } from '../types.js';
|
|
9
|
+
export declare function applyTableSetCellText(body: Element, op: TableSetCellTextOp): OpResult;
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Op: table_set_cell_text
|
|
3
|
+
*
|
|
4
|
+
* Set the text content of a specific table cell.
|
|
5
|
+
* Targets by: tableIndex (0-based among w:tbl in body), row, col.
|
|
6
|
+
* Applies minimal text replacement inside the cell's first paragraph.
|
|
7
|
+
*/
|
|
8
|
+
import { getBodyChildren, nodeListToArray, setParagraphTextMinimal } from '../dom.js';
|
|
9
|
+
export function applyTableSetCellText(body, op) {
|
|
10
|
+
const children = getBodyChildren(body);
|
|
11
|
+
// Find the n-th w:tbl
|
|
12
|
+
let tableCount = 0;
|
|
13
|
+
let table = null;
|
|
14
|
+
for (const child of children) {
|
|
15
|
+
if (child.nodeName === 'w:tbl') {
|
|
16
|
+
if (tableCount === op.tableIndex) {
|
|
17
|
+
table = child;
|
|
18
|
+
break;
|
|
19
|
+
}
|
|
20
|
+
tableCount++;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
if (!table) {
|
|
24
|
+
return { op, status: 'skipped', matched: 0, reason: 'table_not_found' };
|
|
25
|
+
}
|
|
26
|
+
// Find the n-th w:tr
|
|
27
|
+
const rows = [];
|
|
28
|
+
for (const child of nodeListToArray(table.childNodes)) {
|
|
29
|
+
if (child.nodeType === 1 && child.nodeName === 'w:tr') {
|
|
30
|
+
rows.push(child);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
if (op.row < 0 || op.row >= rows.length) {
|
|
34
|
+
return { op, status: 'skipped', matched: 0, reason: 'row_out_of_range' };
|
|
35
|
+
}
|
|
36
|
+
// Find the n-th w:tc in the row
|
|
37
|
+
const cells = [];
|
|
38
|
+
for (const child of nodeListToArray(rows[op.row].childNodes)) {
|
|
39
|
+
if (child.nodeType === 1 && child.nodeName === 'w:tc') {
|
|
40
|
+
cells.push(child);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
if (op.col < 0 || op.col >= cells.length) {
|
|
44
|
+
return { op, status: 'skipped', matched: 0, reason: 'col_out_of_range' };
|
|
45
|
+
}
|
|
46
|
+
const cell = cells[op.col];
|
|
47
|
+
// Find first w:p inside the cell and apply minimal text replacement
|
|
48
|
+
for (const child of nodeListToArray(cell.childNodes)) {
|
|
49
|
+
if (child.nodeType === 1 && child.nodeName === 'w:p') {
|
|
50
|
+
const p = child;
|
|
51
|
+
const tNodes = p.getElementsByTagName('w:t');
|
|
52
|
+
if (tNodes.length > 0) {
|
|
53
|
+
// Existing runs — use minimal replacement
|
|
54
|
+
setParagraphTextMinimal(p, op.text);
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
// Empty cell — create a run
|
|
58
|
+
const doc = cell.ownerDocument;
|
|
59
|
+
if (!doc)
|
|
60
|
+
return { op, status: 'skipped', matched: 0, reason: 'no_owner_document' };
|
|
61
|
+
const r = doc.createElement('w:r');
|
|
62
|
+
const t = doc.createElement('w:t');
|
|
63
|
+
t.setAttribute('xml:space', 'preserve');
|
|
64
|
+
t.textContent = op.text;
|
|
65
|
+
r.appendChild(t);
|
|
66
|
+
p.appendChild(r);
|
|
67
|
+
}
|
|
68
|
+
return { op, status: 'applied', matched: 1 };
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return { op, status: 'skipped', matched: 0, reason: 'no_paragraph_in_cell' };
|
|
72
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image Extractor
|
|
3
|
+
* Utilities for extracting and handling images from DOCX files
|
|
4
|
+
*/
|
|
5
|
+
import type { ZipArchive } from './zip-reader.js';
|
|
6
|
+
import type { DocxRelationship } from '../types.js';
|
|
7
|
+
/**
|
|
8
|
+
* Get MIME type from file extension or target path
|
|
9
|
+
*/
|
|
10
|
+
export declare function getMimeTypeForTarget(target: string): string;
|
|
11
|
+
/**
|
|
12
|
+
* Extract all images from a DOCX ZIP archive
|
|
13
|
+
*/
|
|
14
|
+
export declare function extractImagesFromZip(zip: ZipArchive, relMap: Map<string, DocxRelationship>): Map<string, Buffer>;
|
|
15
|
+
/**
|
|
16
|
+
* Resolve image relationship ID from drawing or pict element
|
|
17
|
+
*/
|
|
18
|
+
export declare function resolveImageRelId(element: Element): string | null;
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image Extractor
|
|
3
|
+
* Utilities for extracting and handling images from DOCX files
|
|
4
|
+
*/
|
|
5
|
+
import path from 'path';
|
|
6
|
+
import { readZipFileBuffer } from './zip-reader.js';
|
|
7
|
+
/**
|
|
8
|
+
* Get MIME type from file extension or target path
|
|
9
|
+
*/
|
|
10
|
+
export function getMimeTypeForTarget(target) {
|
|
11
|
+
const ext = path.extname(target).toLowerCase();
|
|
12
|
+
const mimeTypes = {
|
|
13
|
+
'.png': 'image/png',
|
|
14
|
+
'.jpg': 'image/jpeg',
|
|
15
|
+
'.jpeg': 'image/jpeg',
|
|
16
|
+
'.gif': 'image/gif',
|
|
17
|
+
'.bmp': 'image/bmp',
|
|
18
|
+
'.webp': 'image/webp',
|
|
19
|
+
'.svg': 'image/svg+xml',
|
|
20
|
+
};
|
|
21
|
+
return mimeTypes[ext] || 'application/octet-stream';
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Extract all images from a DOCX ZIP archive
|
|
25
|
+
*/
|
|
26
|
+
export function extractImagesFromZip(zip, relMap) {
|
|
27
|
+
const images = new Map();
|
|
28
|
+
for (const [relId, rel] of relMap.entries()) {
|
|
29
|
+
if (!rel.type.includes('/image'))
|
|
30
|
+
continue;
|
|
31
|
+
const targetPath = rel.target.startsWith('word/')
|
|
32
|
+
? rel.target
|
|
33
|
+
: `word/${rel.target.replace(/^\/?/, '')}`;
|
|
34
|
+
const imgBuffer = readZipFileBuffer(zip, targetPath);
|
|
35
|
+
if (imgBuffer) {
|
|
36
|
+
images.set(relId, imgBuffer);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return images;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Resolve image relationship ID from drawing or pict element
|
|
43
|
+
*/
|
|
44
|
+
export function resolveImageRelId(element) {
|
|
45
|
+
// Try drawing element first (newer format)
|
|
46
|
+
const blips = element.getElementsByTagName('a:blip');
|
|
47
|
+
for (let i = 0; i < blips.length; i++) {
|
|
48
|
+
const blip = blips[i];
|
|
49
|
+
const relId = blip.getAttribute('r:embed') || blip.getAttribute('embed');
|
|
50
|
+
if (relId)
|
|
51
|
+
return relId;
|
|
52
|
+
}
|
|
53
|
+
// Try pict element (older format)
|
|
54
|
+
const imagedata = element.getElementsByTagName('v:imagedata');
|
|
55
|
+
for (let i = 0; i < imagedata.length; i++) {
|
|
56
|
+
const relId = imagedata[i].getAttribute('r:id') || imagedata[i].getAttribute('id');
|
|
57
|
+
if (relId)
|
|
58
|
+
return relId;
|
|
59
|
+
}
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { createRequire } from 'module';
|
|
2
|
+
const require = createRequire(import.meta.url);
|
|
3
|
+
// @ts-ignore
|
|
4
|
+
import * as docx from 'docx';
|
|
5
|
+
const { Paragraph, TextRun, ImageRun, HeadingLevel } = docx;
|
|
6
|
+
import { getElementChildren } from './xml-parser.js';
|
|
7
|
+
import { resolveImageRelId } from './image-extractor.js';
|
|
8
|
+
export function parseParagraphElement(paragraph, images, headingLevel) {
|
|
9
|
+
const runs = extractRunsFromParagraph(paragraph, images);
|
|
10
|
+
if (runs.length === 0) {
|
|
11
|
+
return null;
|
|
12
|
+
}
|
|
13
|
+
return new Paragraph({
|
|
14
|
+
children: runs,
|
|
15
|
+
heading: headingLevel ? getDocxHeadingLevel(headingLevel) : undefined,
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
function extractRunsFromParagraph(paragraph, images) {
|
|
19
|
+
const runs = [];
|
|
20
|
+
const children = getElementChildren(paragraph);
|
|
21
|
+
for (const child of children) {
|
|
22
|
+
const nodeName = child.nodeName;
|
|
23
|
+
if (nodeName === 'w:r') {
|
|
24
|
+
const textRuns = extractTextRun(child, images);
|
|
25
|
+
runs.push(...textRuns);
|
|
26
|
+
}
|
|
27
|
+
else if (nodeName === 'w:hyperlink') {
|
|
28
|
+
const linkRuns = child.getElementsByTagName('w:r');
|
|
29
|
+
for (let i = 0; i < linkRuns.length; i++) {
|
|
30
|
+
const textRuns = extractTextRun(linkRuns[i], images);
|
|
31
|
+
runs.push(...textRuns);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return runs;
|
|
36
|
+
}
|
|
37
|
+
function extractTextRun(run, images) {
|
|
38
|
+
const runs = [];
|
|
39
|
+
const rPr = run.getElementsByTagName('w:rPr')[0];
|
|
40
|
+
const isBold = rPr?.getElementsByTagName('w:b').length > 0;
|
|
41
|
+
const isItalic = rPr?.getElementsByTagName('w:i').length > 0;
|
|
42
|
+
const children = getElementChildren(run);
|
|
43
|
+
for (const child of children) {
|
|
44
|
+
const nodeName = child.nodeName;
|
|
45
|
+
if (nodeName === 'w:t') {
|
|
46
|
+
const text = child.textContent || '';
|
|
47
|
+
if (text) {
|
|
48
|
+
runs.push(new TextRun({
|
|
49
|
+
text,
|
|
50
|
+
bold: isBold,
|
|
51
|
+
italics: isItalic,
|
|
52
|
+
}));
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
else if (nodeName === 'w:tab') {
|
|
56
|
+
runs.push(new TextRun({ text: '\t' }));
|
|
57
|
+
}
|
|
58
|
+
else if (nodeName === 'w:br') {
|
|
59
|
+
runs.push(new TextRun({ text: '\n', break: 1 }));
|
|
60
|
+
}
|
|
61
|
+
else if (nodeName === 'w:drawing' || nodeName === 'w:pict') {
|
|
62
|
+
const relId = resolveImageRelId(child);
|
|
63
|
+
if (relId && images.has(relId)) {
|
|
64
|
+
try {
|
|
65
|
+
runs.push(new ImageRun({
|
|
66
|
+
data: images.get(relId),
|
|
67
|
+
transformation: { width: 600, height: 400 },
|
|
68
|
+
}));
|
|
69
|
+
}
|
|
70
|
+
catch (err) {
|
|
71
|
+
// Skip invalid images
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return runs;
|
|
77
|
+
}
|
|
78
|
+
function getDocxHeadingLevel(level) {
|
|
79
|
+
const levelMap = {
|
|
80
|
+
1: HeadingLevel.HEADING_1,
|
|
81
|
+
2: HeadingLevel.HEADING_2,
|
|
82
|
+
3: HeadingLevel.HEADING_3,
|
|
83
|
+
4: HeadingLevel.HEADING_4,
|
|
84
|
+
5: HeadingLevel.HEADING_5,
|
|
85
|
+
6: HeadingLevel.HEADING_6,
|
|
86
|
+
};
|
|
87
|
+
return levelMap[level] ?? HeadingLevel.HEADING_1;
|
|
88
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Table Parser
|
|
3
|
+
* Parses DOCX table elements to DOCX library Table objects
|
|
4
|
+
*/
|
|
5
|
+
import type { DocxTable } from '../types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Parse a table element to DOCX Table
|
|
8
|
+
*/
|
|
9
|
+
export declare function parseTableElement(table: Element, images: Map<string, Buffer>): DocxTable | null;
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Table Parser
|
|
3
|
+
* Parses DOCX table elements to DOCX library Table objects
|
|
4
|
+
*/
|
|
5
|
+
import { createRequire } from 'module';
|
|
6
|
+
const require = createRequire(import.meta.url);
|
|
7
|
+
// @ts-ignore
|
|
8
|
+
import * as docx from 'docx';
|
|
9
|
+
const { Table, TableRow, TableCell, Paragraph, WidthType } = docx;
|
|
10
|
+
import { parseParagraphElement } from './paragraph-parser.js';
|
|
11
|
+
/**
|
|
12
|
+
* Parse a table element to DOCX Table
|
|
13
|
+
*/
|
|
14
|
+
export function parseTableElement(table, images) {
|
|
15
|
+
const rows = [];
|
|
16
|
+
const rowNodes = table.getElementsByTagName('w:tr');
|
|
17
|
+
for (let i = 0; i < rowNodes.length; i++) {
|
|
18
|
+
const rowNode = rowNodes[i];
|
|
19
|
+
const row = parseTableRow(rowNode, images);
|
|
20
|
+
if (row) {
|
|
21
|
+
rows.push(row);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
if (rows.length === 0) {
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
return new Table({
|
|
28
|
+
width: { size: 100, type: WidthType.PERCENTAGE },
|
|
29
|
+
rows,
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Parse a table row element
|
|
34
|
+
*/
|
|
35
|
+
function parseTableRow(rowNode, images) {
|
|
36
|
+
const cells = [];
|
|
37
|
+
const cellNodes = rowNode.getElementsByTagName('w:tc');
|
|
38
|
+
for (let j = 0; j < cellNodes.length; j++) {
|
|
39
|
+
const cellNode = cellNodes[j];
|
|
40
|
+
const cell = parseTableCell(cellNode, images);
|
|
41
|
+
if (cell) {
|
|
42
|
+
cells.push(cell);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
if (cells.length === 0) {
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
return new TableRow({
|
|
49
|
+
children: cells,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Parse a table cell element
|
|
54
|
+
*/
|
|
55
|
+
function parseTableCell(cellNode, images) {
|
|
56
|
+
const cellParagraphs = [];
|
|
57
|
+
const paragraphNodes = cellNode.getElementsByTagName('w:p');
|
|
58
|
+
for (let k = 0; k < paragraphNodes.length; k++) {
|
|
59
|
+
const paraNode = paragraphNodes[k];
|
|
60
|
+
const para = parseParagraphElement(paraNode, images, null);
|
|
61
|
+
if (para) {
|
|
62
|
+
cellParagraphs.push(para);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
// Ensure at least one paragraph per cell
|
|
66
|
+
if (cellParagraphs.length === 0) {
|
|
67
|
+
cellParagraphs.push(new Paragraph({ text: '' }));
|
|
68
|
+
}
|
|
69
|
+
return new TableCell({
|
|
70
|
+
children: cellParagraphs,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* XML Parser Utilities
|
|
3
|
+
* Helper functions for parsing DOCX XML content
|
|
4
|
+
*/
|
|
5
|
+
import type { DocxRelationship } from '../types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Get all element children of a node
|
|
8
|
+
*/
|
|
9
|
+
export declare function getElementChildren(node: Node): Element[];
|
|
10
|
+
/**
|
|
11
|
+
* Get attribute value, checking both direct and namespaced attributes
|
|
12
|
+
*/
|
|
13
|
+
export declare function getAttributeValue(node: Element, name: string): string | null;
|
|
14
|
+
/**
|
|
15
|
+
* Parse XML string to Document
|
|
16
|
+
*/
|
|
17
|
+
export declare function parseXml(xml: string): Document;
|
|
18
|
+
/**
|
|
19
|
+
* Extract relationship map from relationships XML
|
|
20
|
+
*/
|
|
21
|
+
export declare function extractRelationshipMap(relsXml: string | null): Map<string, DocxRelationship>;
|
|
22
|
+
/**
|
|
23
|
+
* Get heading level from paragraph element
|
|
24
|
+
*/
|
|
25
|
+
export declare function getHeadingLevelFromParagraph(paragraph: Element): number | null;
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* XML Parser Utilities
|
|
3
|
+
* Helper functions for parsing DOCX XML content
|
|
4
|
+
*/
|
|
5
|
+
import { createRequire } from 'module';
|
|
6
|
+
const require = createRequire(import.meta.url);
|
|
7
|
+
const { DOMParser } = require('@xmldom/xmldom');
|
|
8
|
+
/**
|
|
9
|
+
* Get all element children of a node
|
|
10
|
+
*/
|
|
11
|
+
export function getElementChildren(node) {
|
|
12
|
+
const children = [];
|
|
13
|
+
for (let i = 0; i < node.childNodes.length; i++) {
|
|
14
|
+
const child = node.childNodes[i];
|
|
15
|
+
if (child.nodeType === 1) { // ELEMENT_NODE
|
|
16
|
+
children.push(child);
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return children;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Get attribute value, checking both direct and namespaced attributes
|
|
23
|
+
*/
|
|
24
|
+
export function getAttributeValue(node, name) {
|
|
25
|
+
return node.getAttribute(name) || node.getAttribute(`w:${name}`) || null;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Parse XML string to Document
|
|
29
|
+
*/
|
|
30
|
+
export function parseXml(xml) {
|
|
31
|
+
const parser = new DOMParser();
|
|
32
|
+
return parser.parseFromString(xml, 'application/xml');
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Extract relationship map from relationships XML
|
|
36
|
+
*/
|
|
37
|
+
export function extractRelationshipMap(relsXml) {
|
|
38
|
+
const relMap = new Map();
|
|
39
|
+
if (!relsXml)
|
|
40
|
+
return relMap;
|
|
41
|
+
const relDoc = parseXml(relsXml);
|
|
42
|
+
const rels = relDoc.getElementsByTagName('Relationship');
|
|
43
|
+
for (let i = 0; i < rels.length; i++) {
|
|
44
|
+
const rel = rels[i];
|
|
45
|
+
const id = rel.getAttribute('Id');
|
|
46
|
+
const type = rel.getAttribute('Type') || '';
|
|
47
|
+
const target = rel.getAttribute('Target') || '';
|
|
48
|
+
if (id && target) {
|
|
49
|
+
relMap.set(id, { target, type });
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return relMap;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Get heading level from paragraph element
|
|
56
|
+
*/
|
|
57
|
+
export function getHeadingLevelFromParagraph(paragraph) {
|
|
58
|
+
const pPr = paragraph.getElementsByTagName('w:pPr')[0];
|
|
59
|
+
if (!pPr)
|
|
60
|
+
return null;
|
|
61
|
+
const pStyle = pPr.getElementsByTagName('w:pStyle')[0];
|
|
62
|
+
if (!pStyle)
|
|
63
|
+
return null;
|
|
64
|
+
const styleVal = getAttributeValue(pStyle, 'val');
|
|
65
|
+
if (!styleVal)
|
|
66
|
+
return null;
|
|
67
|
+
const match = styleVal.match(/heading\s*([1-6])/i);
|
|
68
|
+
if (!match)
|
|
69
|
+
return null;
|
|
70
|
+
return Number(match[1]);
|
|
71
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ZIP File Reader
|
|
3
|
+
* Utilities for reading files from DOCX ZIP archives
|
|
4
|
+
*/
|
|
5
|
+
declare const PizZip: any;
|
|
6
|
+
export type ZipArchive = InstanceType<typeof PizZip>;
|
|
7
|
+
/**
|
|
8
|
+
* Create a ZIP archive from a buffer
|
|
9
|
+
*/
|
|
10
|
+
export declare function createZipFromBuffer(buffer: Buffer): ZipArchive;
|
|
11
|
+
/**
|
|
12
|
+
* Read a text file from a ZIP archive
|
|
13
|
+
*/
|
|
14
|
+
export declare function readZipFileText(zip: ZipArchive, filePath: string): string | null;
|
|
15
|
+
/**
|
|
16
|
+
* Read a binary file from a ZIP archive as Buffer
|
|
17
|
+
*/
|
|
18
|
+
export declare function readZipFileBuffer(zip: ZipArchive, filePath: string): Buffer | null;
|
|
19
|
+
/**
|
|
20
|
+
* Check if a file exists in the ZIP archive
|
|
21
|
+
*/
|
|
22
|
+
export declare function zipFileExists(zip: ZipArchive, filePath: string): boolean;
|
|
23
|
+
export {};
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ZIP File Reader
|
|
3
|
+
* Utilities for reading files from DOCX ZIP archives
|
|
4
|
+
*/
|
|
5
|
+
import { createRequire } from 'module';
|
|
6
|
+
const require = createRequire(import.meta.url);
|
|
7
|
+
const PizZip = require('pizzip');
|
|
8
|
+
/**
|
|
9
|
+
* Create a ZIP archive from a buffer
|
|
10
|
+
*/
|
|
11
|
+
export function createZipFromBuffer(buffer) {
|
|
12
|
+
return new PizZip(buffer);
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Read a text file from a ZIP archive
|
|
16
|
+
*/
|
|
17
|
+
export function readZipFileText(zip, filePath) {
|
|
18
|
+
const file = zip.file(filePath);
|
|
19
|
+
if (!file)
|
|
20
|
+
return null;
|
|
21
|
+
if (typeof file.asText === 'function') {
|
|
22
|
+
return file.asText();
|
|
23
|
+
}
|
|
24
|
+
if (typeof file.asBinary === 'function') {
|
|
25
|
+
return Buffer.from(file.asBinary(), 'binary').toString('utf8');
|
|
26
|
+
}
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Read a binary file from a ZIP archive as Buffer
|
|
31
|
+
*/
|
|
32
|
+
export function readZipFileBuffer(zip, filePath) {
|
|
33
|
+
const file = zip.file(filePath);
|
|
34
|
+
if (!file)
|
|
35
|
+
return null;
|
|
36
|
+
if (typeof file.asUint8Array === 'function') {
|
|
37
|
+
return Buffer.from(file.asUint8Array());
|
|
38
|
+
}
|
|
39
|
+
if (typeof file.asNodeBuffer === 'function') {
|
|
40
|
+
return file.asNodeBuffer();
|
|
41
|
+
}
|
|
42
|
+
if (typeof file.asBinary === 'function') {
|
|
43
|
+
return Buffer.from(file.asBinary(), 'binary');
|
|
44
|
+
}
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Check if a file exists in the ZIP archive
|
|
49
|
+
*/
|
|
50
|
+
export function zipFileExists(zip, filePath) {
|
|
51
|
+
return zip.file(filePath) !== null;
|
|
52
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX reading utilities
|
|
3
|
+
* Extracts text, metadata, and compact outlines from DOCX files.
|
|
4
|
+
*/
|
|
5
|
+
import type { DocxMetadata, DocxParagraph, ReadDocxResult } from './types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Return a token-efficient outline of a DOCX file.
|
|
8
|
+
* Every paragraph gets a bodyChildIndex (among ALL w:body children)
|
|
9
|
+
* plus a paragraphIndex (counting only w:p), style id, and text.
|
|
10
|
+
*/
|
|
11
|
+
export declare function readDocxOutline(filePath: string): Promise<ReadDocxResult>;
|
|
12
|
+
/** Extract plain text from DOCX. */
|
|
13
|
+
export declare function extractTextFromDocx(path: string): Promise<string>;
|
|
14
|
+
/** Get comprehensive metadata. */
|
|
15
|
+
export declare function getDocxMetadata(path: string): Promise<DocxMetadata>;
|
|
16
|
+
/** Extract body XML. */
|
|
17
|
+
export declare function extractBodyXml(path: string): Promise<string>;
|
|
18
|
+
/** Read DOCX file with optional pagination. */
|
|
19
|
+
export declare function readDocx(path: string, options?: {
|
|
20
|
+
offset?: number;
|
|
21
|
+
length?: number;
|
|
22
|
+
}): Promise<{
|
|
23
|
+
text: string;
|
|
24
|
+
paragraphs: DocxParagraph[];
|
|
25
|
+
metadata: DocxMetadata;
|
|
26
|
+
bodyXml: string;
|
|
27
|
+
}>;
|