@wonderwhy-er/desktop-commander 0.2.36 → 0.2.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +240 -100
- package/dist/command-manager.js +6 -3
- package/dist/config-field-definitions.d.ts +41 -0
- package/dist/config-field-definitions.js +37 -0
- package/dist/config-manager.d.ts +2 -0
- package/dist/config-manager.js +22 -2
- package/dist/handlers/filesystem-handlers.js +6 -11
- package/dist/handlers/macos-control-handlers.d.ts +16 -0
- package/dist/handlers/macos-control-handlers.js +81 -0
- package/dist/lib.d.ts +10 -0
- package/dist/lib.js +10 -0
- package/dist/remote-device/remote-channel.d.ts +8 -3
- package/dist/remote-device/remote-channel.js +68 -21
- package/dist/search-manager.d.ts +13 -0
- package/dist/search-manager.js +146 -0
- package/dist/server.js +29 -1
- package/dist/test-docx.d.ts +1 -0
- package/dist/tools/config.d.ts +71 -0
- package/dist/tools/config.js +117 -2
- package/dist/tools/docx/builders/table.d.ts +2 -0
- package/dist/tools/docx/builders/table.js +60 -16
- package/dist/tools/docx/dom.d.ts +74 -1
- package/dist/tools/docx/dom.js +221 -1
- package/dist/tools/docx/index.d.ts +2 -2
- package/dist/tools/docx/ops/index.js +3 -0
- package/dist/tools/docx/ops/replace-paragraph-text-exact.d.ts +15 -3
- package/dist/tools/docx/ops/replace-paragraph-text-exact.js +25 -10
- package/dist/tools/docx/ops/replace-table-cell-text.d.ts +25 -0
- package/dist/tools/docx/ops/replace-table-cell-text.js +85 -0
- package/dist/tools/docx/ops/set-color-for-paragraph-exact.d.ts +2 -1
- package/dist/tools/docx/ops/set-color-for-paragraph-exact.js +9 -8
- package/dist/tools/docx/ops/set-color-for-style.d.ts +4 -0
- package/dist/tools/docx/ops/set-color-for-style.js +11 -7
- package/dist/tools/docx/ops/table-set-cell-text.js +8 -40
- package/dist/tools/docx/read.d.ts +2 -2
- package/dist/tools/docx/read.js +137 -17
- package/dist/tools/docx/types.d.ts +32 -3
- package/dist/tools/docx/xml-view-test.d.ts +1 -0
- package/dist/tools/docx/xml-view-test.js +63 -0
- package/dist/tools/docx/xml-view.d.ts +56 -0
- package/dist/tools/docx/xml-view.js +169 -0
- package/dist/tools/edit.js +57 -27
- package/dist/tools/macos-control/ax-adapter.d.ts +55 -0
- package/dist/tools/macos-control/ax-adapter.js +438 -0
- package/dist/tools/macos-control/cdp-adapter.d.ts +23 -0
- package/dist/tools/macos-control/cdp-adapter.js +402 -0
- package/dist/tools/macos-control/orchestrator.d.ts +77 -0
- package/dist/tools/macos-control/orchestrator.js +136 -0
- package/dist/tools/macos-control/role-aliases.d.ts +5 -0
- package/dist/tools/macos-control/role-aliases.js +34 -0
- package/dist/tools/macos-control/types.d.ts +129 -0
- package/dist/tools/macos-control/types.js +1 -0
- package/dist/tools/schemas.d.ts +3 -0
- package/dist/tools/schemas.js +2 -1
- package/dist/types.d.ts +0 -1
- package/dist/ui/config-editor/config-editor-runtime.js +14181 -0
- package/dist/ui/config-editor/index.html +13 -0
- package/dist/ui/config-editor/src/app.d.ts +43 -0
- package/dist/ui/config-editor/src/app.js +840 -0
- package/dist/ui/config-editor/src/array-modal.d.ts +19 -0
- package/dist/ui/config-editor/src/array-modal.js +185 -0
- package/dist/ui/config-editor/src/main.d.ts +1 -0
- package/dist/ui/config-editor/src/main.js +2 -0
- package/dist/ui/config-editor/styles.css +586 -0
- package/dist/ui/file-preview/preview-runtime.js +13337 -752
- package/dist/ui/file-preview/shared/preview-file-types.js +3 -1
- package/dist/ui/file-preview/src/app.d.ts +5 -1
- package/dist/ui/file-preview/src/app.js +114 -200
- package/dist/ui/file-preview/src/components/html-renderer.d.ts +1 -5
- package/dist/ui/file-preview/src/components/html-renderer.js +11 -27
- package/dist/ui/file-preview/styles.css +117 -83
- package/dist/ui/resources.d.ts +7 -0
- package/dist/ui/resources.js +16 -2
- package/dist/ui/shared/compact-row.d.ts +11 -0
- package/dist/ui/shared/compact-row.js +18 -0
- package/dist/ui/shared/host-context.d.ts +15 -0
- package/dist/ui/shared/host-context.js +51 -0
- package/dist/ui/shared/tool-bridge.d.ts +30 -0
- package/dist/ui/shared/tool-bridge.js +137 -0
- package/dist/ui/shared/tool-shell.d.ts +9 -0
- package/dist/ui/shared/tool-shell.js +46 -4
- package/dist/ui/shared/ui-event-tracker.d.ts +9 -0
- package/dist/ui/shared/ui-event-tracker.js +27 -0
- package/dist/utils/capture.js +173 -11
- package/dist/utils/files/base.d.ts +3 -1
- package/dist/utils/files/docx.d.ts +28 -15
- package/dist/utils/files/docx.js +622 -88
- package/dist/utils/files/factory.d.ts +6 -5
- package/dist/utils/files/factory.js +18 -6
- package/dist/utils/system-info.js +1 -1
- package/dist/utils/usageTracker.js +5 -0
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +8 -3
package/dist/tools/docx/read.js
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
import fs from 'fs/promises';
|
|
6
6
|
import PizZip from 'pizzip';
|
|
7
7
|
import { DOMParser, XMLSerializer } from '@xmldom/xmldom';
|
|
8
|
-
import { nodeListToArray, getParagraphText, getParagraphStyle, getBody, getBodyChildren,
|
|
8
|
+
import { nodeListToArray, getParagraphText, getParagraphStyle, getBody, getBodyChildren, getAllBodyTables, countImages, getTableContent, getTableStyle, getImageReference, } from './dom.js';
|
|
9
9
|
// ═══════════════════════════════════════════════════════════════════════
|
|
10
10
|
// Internal helpers
|
|
11
11
|
// ═══════════════════════════════════════════════════════════════════════
|
|
@@ -16,10 +16,59 @@ async function loadDocx(path) {
|
|
|
16
16
|
// ═══════════════════════════════════════════════════════════════════════
|
|
17
17
|
// readDocxOutline — compact JSON outline (used by read_docx tool)
|
|
18
18
|
// ═══════════════════════════════════════════════════════════════════════
|
|
19
|
+
/**
|
|
20
|
+
* Extract image relationship mappings from word/_rels/document.xml.rels.
|
|
21
|
+
* Returns a map of rId -> mediaPath (e.g., "rId1" -> "word/media/image1.png").
|
|
22
|
+
*/
|
|
23
|
+
function extractImageRelationships(zip) {
|
|
24
|
+
const relsPath = 'word/_rels/document.xml.rels';
|
|
25
|
+
const relsFile = zip.file(relsPath);
|
|
26
|
+
if (!relsFile)
|
|
27
|
+
return new Map();
|
|
28
|
+
const relsXml = relsFile.asText();
|
|
29
|
+
const relsDom = new DOMParser().parseFromString(relsXml, 'application/xml');
|
|
30
|
+
const relationships = relsDom.getElementsByTagName('Relationship');
|
|
31
|
+
const imageMap = new Map();
|
|
32
|
+
for (const rel of nodeListToArray(relationships)) {
|
|
33
|
+
const relEl = rel;
|
|
34
|
+
const type = relEl.getAttribute('Type');
|
|
35
|
+
const id = relEl.getAttribute('Id');
|
|
36
|
+
const target = relEl.getAttribute('Target');
|
|
37
|
+
// Check if it's an image relationship
|
|
38
|
+
if (type &&
|
|
39
|
+
type.includes('/image') &&
|
|
40
|
+
id &&
|
|
41
|
+
target &&
|
|
42
|
+
target.startsWith('media/')) {
|
|
43
|
+
imageMap.set(id, `word/${target}`);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return imageMap;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Extract alt text from wp:docPr/@descr or pic:cNvPr/@descr in a drawing element.
|
|
50
|
+
*/
|
|
51
|
+
function getImageAltText(drawing) {
|
|
52
|
+
// Try wp:docPr/@descr first
|
|
53
|
+
const docPr = drawing.getElementsByTagName('wp:docPr').item(0);
|
|
54
|
+
if (docPr) {
|
|
55
|
+
const descr = docPr.getAttribute('descr');
|
|
56
|
+
if (descr)
|
|
57
|
+
return descr;
|
|
58
|
+
}
|
|
59
|
+
// Fall back to pic:cNvPr/@descr
|
|
60
|
+
const cNvPr = drawing.getElementsByTagName('pic:cNvPr').item(0);
|
|
61
|
+
if (cNvPr) {
|
|
62
|
+
const descr = cNvPr.getAttribute('descr');
|
|
63
|
+
if (descr)
|
|
64
|
+
return descr;
|
|
65
|
+
}
|
|
66
|
+
return undefined;
|
|
67
|
+
}
|
|
19
68
|
/**
|
|
20
69
|
* Return a token-efficient outline of a DOCX file.
|
|
21
|
-
*
|
|
22
|
-
*
|
|
70
|
+
* Extracts paragraphs, tables (with full cell content), and images (references only, not binary).
|
|
71
|
+
* Every element gets a bodyChildIndex (among ALL w:body children).
|
|
23
72
|
*/
|
|
24
73
|
export async function readDocxOutline(filePath) {
|
|
25
74
|
const zip = await loadDocx(filePath);
|
|
@@ -30,31 +79,102 @@ export async function readDocxOutline(filePath) {
|
|
|
30
79
|
const dom = new DOMParser().parseFromString(xmlStr, 'application/xml');
|
|
31
80
|
const body = getBody(dom);
|
|
32
81
|
const children = getBodyChildren(body);
|
|
82
|
+
// Extract image relationships (rId -> mediaPath)
|
|
83
|
+
const imageRelationships = extractImageRelationships(zip);
|
|
33
84
|
const paragraphs = [];
|
|
85
|
+
const tables = [];
|
|
86
|
+
const images = [];
|
|
34
87
|
const stylesSet = new Set();
|
|
35
88
|
let paragraphIndex = 0;
|
|
89
|
+
let tableIndex = 0;
|
|
90
|
+
let imageIndex = 0;
|
|
36
91
|
for (let i = 0; i < children.length; i++) {
|
|
37
92
|
const child = children[i];
|
|
38
|
-
if (child.nodeName
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
93
|
+
if (child.nodeName === 'w:p') {
|
|
94
|
+
// Extract paragraph
|
|
95
|
+
const text = getParagraphText(child).trim();
|
|
96
|
+
const style = getParagraphStyle(child);
|
|
97
|
+
if (style)
|
|
98
|
+
stylesSet.add(style);
|
|
99
|
+
paragraphs.push({
|
|
100
|
+
bodyChildIndex: i,
|
|
101
|
+
paragraphIndex,
|
|
102
|
+
style,
|
|
103
|
+
text,
|
|
104
|
+
});
|
|
105
|
+
paragraphIndex++;
|
|
106
|
+
// Check if paragraph contains an image (w:drawing)
|
|
107
|
+
const drawings = child.getElementsByTagName('w:drawing');
|
|
108
|
+
for (let d = 0; d < drawings.length; d++) {
|
|
109
|
+
const drawing = drawings.item(d);
|
|
110
|
+
const imgRef = getImageReference(drawing);
|
|
111
|
+
if (imgRef.rId) {
|
|
112
|
+
const mediaPath = imageRelationships.get(imgRef.rId);
|
|
113
|
+
if (mediaPath) {
|
|
114
|
+
const altText = getImageAltText(drawing);
|
|
115
|
+
images.push({
|
|
116
|
+
bodyChildIndex: i,
|
|
117
|
+
imageIndex,
|
|
118
|
+
mediaPath,
|
|
119
|
+
rId: imgRef.rId,
|
|
120
|
+
altText,
|
|
121
|
+
});
|
|
122
|
+
imageIndex++;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
else if (child.nodeName === 'w:tbl') {
|
|
128
|
+
// Extract table content (direct table in body)
|
|
129
|
+
const tableContent = getTableContent(child);
|
|
130
|
+
const style = getTableStyle(child);
|
|
131
|
+
if (style)
|
|
132
|
+
stylesSet.add(style);
|
|
133
|
+
tables.push({
|
|
134
|
+
bodyChildIndex: i,
|
|
135
|
+
tableIndex,
|
|
136
|
+
style,
|
|
137
|
+
headers: tableContent.headers,
|
|
138
|
+
rows: tableContent.rows,
|
|
139
|
+
});
|
|
140
|
+
tableIndex++;
|
|
141
|
+
}
|
|
142
|
+
else if (child.nodeName === 'w:sdt') {
|
|
143
|
+
// Structured document tag: look inside w:sdtContent for tables that
|
|
144
|
+
// are logically at this body position.
|
|
145
|
+
const sdtContent = child.getElementsByTagName('w:sdtContent').item(0);
|
|
146
|
+
if (sdtContent) {
|
|
147
|
+
for (const sdtChild of nodeListToArray(sdtContent.childNodes)) {
|
|
148
|
+
if (sdtChild.nodeType === 1 &&
|
|
149
|
+
sdtChild.nodeName === 'w:tbl') {
|
|
150
|
+
const tbl = sdtChild;
|
|
151
|
+
const tableContent = getTableContent(tbl);
|
|
152
|
+
const style = getTableStyle(tbl);
|
|
153
|
+
if (style)
|
|
154
|
+
stylesSet.add(style);
|
|
155
|
+
tables.push({
|
|
156
|
+
bodyChildIndex: i,
|
|
157
|
+
tableIndex,
|
|
158
|
+
style,
|
|
159
|
+
headers: tableContent.headers,
|
|
160
|
+
rows: tableContent.rows,
|
|
161
|
+
});
|
|
162
|
+
tableIndex++;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
51
167
|
}
|
|
52
168
|
return {
|
|
53
169
|
path: filePath,
|
|
54
170
|
paragraphs,
|
|
171
|
+
tables,
|
|
172
|
+
images,
|
|
55
173
|
stylesSeen: [...stylesSet].sort(),
|
|
56
174
|
counts: {
|
|
57
|
-
tables
|
|
175
|
+
// Table count should reflect all logical tables, including those
|
|
176
|
+
// wrapped in SDTs, so we reuse the same helper used by ops.
|
|
177
|
+
tables: getAllBodyTables(body).length,
|
|
58
178
|
images: countImages(body),
|
|
59
179
|
bodyChildren: children.length,
|
|
60
180
|
},
|
|
@@ -41,9 +41,25 @@ export interface ParagraphOutline {
|
|
|
41
41
|
style: string | null;
|
|
42
42
|
text: string;
|
|
43
43
|
}
|
|
44
|
+
export interface TableOutline {
|
|
45
|
+
bodyChildIndex: number;
|
|
46
|
+
tableIndex: number;
|
|
47
|
+
style: string | null;
|
|
48
|
+
headers?: string[];
|
|
49
|
+
rows: string[][];
|
|
50
|
+
}
|
|
51
|
+
export interface ImageOutline {
|
|
52
|
+
bodyChildIndex: number;
|
|
53
|
+
imageIndex: number;
|
|
54
|
+
mediaPath: string;
|
|
55
|
+
rId: string;
|
|
56
|
+
altText?: string;
|
|
57
|
+
}
|
|
44
58
|
export interface ReadDocxResult {
|
|
45
59
|
path: string;
|
|
46
60
|
paragraphs: ParagraphOutline[];
|
|
61
|
+
tables: TableOutline[];
|
|
62
|
+
images: ImageOutline[];
|
|
47
63
|
stylesSeen: string[];
|
|
48
64
|
counts: {
|
|
49
65
|
tables: number;
|
|
@@ -112,6 +128,11 @@ export interface TableSetCellTextOp {
|
|
|
112
128
|
col: number;
|
|
113
129
|
text: string;
|
|
114
130
|
}
|
|
131
|
+
export interface ReplaceTableCellTextOp {
|
|
132
|
+
type: 'replace_table_cell_text';
|
|
133
|
+
from: string;
|
|
134
|
+
to: string;
|
|
135
|
+
}
|
|
115
136
|
export interface ReplaceHyperlinkUrlOp {
|
|
116
137
|
type: 'replace_hyperlink_url';
|
|
117
138
|
oldUrl: string;
|
|
@@ -152,7 +173,7 @@ export interface InsertImageOp {
|
|
|
152
173
|
/** Alt text for accessibility */
|
|
153
174
|
altText?: string;
|
|
154
175
|
}
|
|
155
|
-
export type DocxOp = ReplaceParagraphTextExactOp | ReplaceParagraphAtBodyIndexOp | SetColorForStyleOp | SetColorForParagraphExactOp | SetParagraphStyleAtBodyIndexOp | InsertParagraphAfterTextOp | DeleteParagraphAtBodyIndexOp | TableSetCellTextOp | ReplaceHyperlinkUrlOp | HeaderReplaceTextExactOp | InsertTableOp | InsertImageOp;
|
|
176
|
+
export type DocxOp = ReplaceParagraphTextExactOp | ReplaceParagraphAtBodyIndexOp | SetColorForStyleOp | SetColorForParagraphExactOp | SetParagraphStyleAtBodyIndexOp | InsertParagraphAfterTextOp | DeleteParagraphAtBodyIndexOp | TableSetCellTextOp | ReplaceTableCellTextOp | ReplaceHyperlinkUrlOp | HeaderReplaceTextExactOp | InsertTableOp | InsertImageOp;
|
|
156
177
|
export interface OpResult {
|
|
157
178
|
op: DocxOp;
|
|
158
179
|
status: 'applied' | 'skipped';
|
|
@@ -164,10 +185,18 @@ export interface DocxContentParagraph {
|
|
|
164
185
|
text: string;
|
|
165
186
|
style?: string | null;
|
|
166
187
|
}
|
|
188
|
+
/**
|
|
189
|
+
* Cell content can be:
|
|
190
|
+
* - A string (simple case, creates one paragraph)
|
|
191
|
+
* - An array of paragraphs (allows multiple paragraphs with different styles per cell)
|
|
192
|
+
*/
|
|
193
|
+
export type DocxTableCellContent = string | DocxContentParagraph[];
|
|
167
194
|
export interface DocxContentTable {
|
|
168
195
|
type: 'table';
|
|
169
|
-
|
|
170
|
-
|
|
196
|
+
/** Header cells - can be strings or arrays of paragraphs */
|
|
197
|
+
headers?: DocxTableCellContent[];
|
|
198
|
+
/** Data rows - each cell can be a string or array of paragraphs */
|
|
199
|
+
rows: DocxTableCellContent[][];
|
|
171
200
|
colWidths?: number[];
|
|
172
201
|
style?: string;
|
|
173
202
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Quick test: extract XML, show context, then edit a checkbox.
|
|
3
|
+
*/
|
|
4
|
+
import { extractDocxXml, editDocxXml } from './xml-view.js';
|
|
5
|
+
async function main() {
|
|
6
|
+
const input = '/Users/eduardsruzga/Downloads/dcox/Wedding Photo Checklist.docx';
|
|
7
|
+
const output = '/Users/eduardsruzga/Downloads/dcox/Wedding Photo Checklist - XML Edit Test.docx';
|
|
8
|
+
// 1. Extract and show stats
|
|
9
|
+
const view = await extractDocxXml(input);
|
|
10
|
+
console.log(`Extracted XML: ${view.lineCount} lines, ${view.rawSize} raw chars`);
|
|
11
|
+
// 2. Show context around "Bride hairstyling" with EXACT indentation
|
|
12
|
+
const lines = view.xml.split('\n');
|
|
13
|
+
for (let i = 0; i < lines.length; i++) {
|
|
14
|
+
if (lines[i].includes('Bride hairstyling')) {
|
|
15
|
+
console.log(`\nFound "Bride hairstyling" at line ${i + 1}:`);
|
|
16
|
+
for (let j = Math.max(0, i - 20); j <= Math.min(lines.length - 1, i + 5); j++) {
|
|
17
|
+
console.log(`${String(j + 1).padStart(4)}|${lines[j]}`);
|
|
18
|
+
}
|
|
19
|
+
// 3. Build the old/new strings from ACTUAL lines
|
|
20
|
+
// The ☐ should be around line i-20 to i-15
|
|
21
|
+
// Find it by scanning backwards
|
|
22
|
+
let checkboxLine = -1;
|
|
23
|
+
for (let j = i - 1; j >= Math.max(0, i - 25); j--) {
|
|
24
|
+
if (lines[j].includes('☐')) {
|
|
25
|
+
checkboxLine = j;
|
|
26
|
+
break;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
console.log(`\nCheckbox ☐ found at line ${checkboxLine + 1}: "${lines[checkboxLine]}"`);
|
|
30
|
+
// Use just enough context: the checkbox line + next line to make it unique
|
|
31
|
+
const oldStr = lines[checkboxLine];
|
|
32
|
+
const newStr = oldStr.replace('☐', '☑');
|
|
33
|
+
console.log(`\nold: "${oldStr}"`);
|
|
34
|
+
console.log(`new: "${newStr}"`);
|
|
35
|
+
// Check uniqueness
|
|
36
|
+
const matches = lines.filter(l => l === oldStr).length;
|
|
37
|
+
console.log(`Exact line matches in document: ${matches}`);
|
|
38
|
+
if (matches > 1) {
|
|
39
|
+
// Expand context until we get exactly 1 match
|
|
40
|
+
for (let ctx = 3; ctx <= 30; ctx++) {
|
|
41
|
+
const contextOld = lines.slice(checkboxLine, checkboxLine + ctx).join('\n');
|
|
42
|
+
const contextMatches = view.xml.split(contextOld).length - 1;
|
|
43
|
+
if (contextMatches === 1) {
|
|
44
|
+
console.log(`\nUnique match found with ${ctx}-line context`);
|
|
45
|
+
const contextNew = contextOld.replace('☐', '☑');
|
|
46
|
+
const result = await editDocxXml(input, output, contextOld, contextNew);
|
|
47
|
+
console.log(`Edit result:`, result);
|
|
48
|
+
break;
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
console.log(`${ctx} lines of context: ${contextMatches} matches`);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
const result = await editDocxXml(input, output, oldStr, newStr);
|
|
57
|
+
console.log(`\nEdit result:`, result);
|
|
58
|
+
}
|
|
59
|
+
break;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
main().catch(console.error);
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX XML View
|
|
3
|
+
*
|
|
4
|
+
* Exposes word/document.xml from a DOCX file as pretty-printed XML that can be
|
|
5
|
+
* read with offset/length pagination and edited with find/replace, just like a
|
|
6
|
+
* text file. All formatting is preserved because we operate on the actual XML.
|
|
7
|
+
*
|
|
8
|
+
* Round-trip: DOCX → unzip → pretty-print → edit → compact → repack → DOCX
|
|
9
|
+
*/
|
|
10
|
+
/**
|
|
11
|
+
* Pretty-print XML by splitting tags onto separate lines with indentation.
|
|
12
|
+
*
|
|
13
|
+
* Preserves content inside text nodes exactly. Self-closing tags, inline
|
|
14
|
+
* open+close tags (e.g. <w:t>text</w:t>), and pure closing tags are all
|
|
15
|
+
* handled so that compact→pretty→compact is lossless.
|
|
16
|
+
*/
|
|
17
|
+
export declare function prettyPrintXml(xml: string): string;
|
|
18
|
+
/**
|
|
19
|
+
* Compact pretty-printed XML back to a single line.
|
|
20
|
+
*
|
|
21
|
+
* CRITICAL: Must not introduce or remove whitespace inside <w:t> text nodes
|
|
22
|
+
* or break xml:space="preserve" semantics. We achieve this by only stripping
|
|
23
|
+
* leading indentation (which we added) and joining lines. The original XML
|
|
24
|
+
* had no newlines between tags, so this restores the original form.
|
|
25
|
+
*/
|
|
26
|
+
export declare function compactXml(prettyXml: string): string;
|
|
27
|
+
export interface DocxXmlReadResult {
|
|
28
|
+
/** Pretty-printed XML content */
|
|
29
|
+
content: string;
|
|
30
|
+
/** Total number of lines */
|
|
31
|
+
lineCount: number;
|
|
32
|
+
/** Source DOCX path */
|
|
33
|
+
path: string;
|
|
34
|
+
/** Size of the raw (compact) XML in chars */
|
|
35
|
+
rawSize: number;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Read the document.xml from a DOCX as pretty-printed, line-based XML.
|
|
39
|
+
* Supports offset/length pagination just like text file reading.
|
|
40
|
+
*/
|
|
41
|
+
export declare function readDocxXml(filePath: string, offset?: number, length?: number): Promise<DocxXmlReadResult>;
|
|
42
|
+
/**
|
|
43
|
+
* Apply a find/replace edit to a DOCX file's XML and write a new DOCX.
|
|
44
|
+
*
|
|
45
|
+
* The edit operates on the pretty-printed XML so that line-based context
|
|
46
|
+
* from read_file can be used directly as the search string. After editing,
|
|
47
|
+
* the XML is compacted back and repacked into the DOCX zip.
|
|
48
|
+
*
|
|
49
|
+
* @returns result with status, match count, and output path
|
|
50
|
+
*/
|
|
51
|
+
export declare function editDocxXml(inputPath: string, outputPath: string, oldStr: string, newStr: string, expectedReplacements?: number): Promise<{
|
|
52
|
+
status: 'applied' | 'no_match' | 'unexpected_count';
|
|
53
|
+
matchCount: number;
|
|
54
|
+
outputPath: string;
|
|
55
|
+
message?: string;
|
|
56
|
+
}>;
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX XML View
|
|
3
|
+
*
|
|
4
|
+
* Exposes word/document.xml from a DOCX file as pretty-printed XML that can be
|
|
5
|
+
* read with offset/length pagination and edited with find/replace, just like a
|
|
6
|
+
* text file. All formatting is preserved because we operate on the actual XML.
|
|
7
|
+
*
|
|
8
|
+
* Round-trip: DOCX → unzip → pretty-print → edit → compact → repack → DOCX
|
|
9
|
+
*/
|
|
10
|
+
import fs from 'fs/promises';
|
|
11
|
+
import PizZip from 'pizzip';
|
|
12
|
+
// ════════════════════════════════════════════════════════════════
|
|
13
|
+
// XML pretty-print / compact
|
|
14
|
+
// ════════════════════════════════════════════════════════════════
|
|
15
|
+
/**
|
|
16
|
+
* Pretty-print XML by splitting tags onto separate lines with indentation.
|
|
17
|
+
*
|
|
18
|
+
* Preserves content inside text nodes exactly. Self-closing tags, inline
|
|
19
|
+
* open+close tags (e.g. <w:t>text</w:t>), and pure closing tags are all
|
|
20
|
+
* handled so that compact→pretty→compact is lossless.
|
|
21
|
+
*/
|
|
22
|
+
export function prettyPrintXml(xml) {
|
|
23
|
+
// Split between adjacent tags: "><" → ">\n<"
|
|
24
|
+
// But NOT inside text content — we only split where > is immediately followed by <
|
|
25
|
+
const parts = xml.split(/(?<=>)(?=<)/);
|
|
26
|
+
const lines = [];
|
|
27
|
+
let depth = 0;
|
|
28
|
+
for (const part of parts) {
|
|
29
|
+
const trimmed = part.trim();
|
|
30
|
+
if (!trimmed)
|
|
31
|
+
continue;
|
|
32
|
+
const isClosing = trimmed.startsWith('</');
|
|
33
|
+
const isSelfClosing = trimmed.endsWith('/>');
|
|
34
|
+
const isProcessingInstruction = trimmed.startsWith('<?');
|
|
35
|
+
// Inline: opens and closes on same fragment, e.g. <w:t>foo</w:t>
|
|
36
|
+
const isInline = !isClosing && !isSelfClosing && trimmed.includes('</');
|
|
37
|
+
if (isClosing) {
|
|
38
|
+
depth = Math.max(0, depth - 1);
|
|
39
|
+
}
|
|
40
|
+
lines.push(' '.repeat(depth) + trimmed);
|
|
41
|
+
if (!isClosing && !isSelfClosing && !isInline && !isProcessingInstruction) {
|
|
42
|
+
depth++;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return lines.join('\n');
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Compact pretty-printed XML back to a single line.
|
|
49
|
+
*
|
|
50
|
+
* CRITICAL: Must not introduce or remove whitespace inside <w:t> text nodes
|
|
51
|
+
* or break xml:space="preserve" semantics. We achieve this by only stripping
|
|
52
|
+
* leading indentation (which we added) and joining lines. The original XML
|
|
53
|
+
* had no newlines between tags, so this restores the original form.
|
|
54
|
+
*/
|
|
55
|
+
export function compactXml(prettyXml) {
|
|
56
|
+
const lines = prettyXml.split('\n');
|
|
57
|
+
return lines.map(l => l.trimStart()).join('');
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Read the document.xml from a DOCX as pretty-printed, line-based XML.
|
|
61
|
+
* Supports offset/length pagination just like text file reading.
|
|
62
|
+
*/
|
|
63
|
+
export async function readDocxXml(filePath, offset = 0, length) {
|
|
64
|
+
const buf = await fs.readFile(filePath);
|
|
65
|
+
const zip = new PizZip(buf);
|
|
66
|
+
const docFile = zip.file('word/document.xml');
|
|
67
|
+
if (!docFile)
|
|
68
|
+
throw new Error('Invalid DOCX: missing word/document.xml');
|
|
69
|
+
const rawXml = docFile.asText();
|
|
70
|
+
const pretty = prettyPrintXml(rawXml);
|
|
71
|
+
const allLines = pretty.split('\n');
|
|
72
|
+
const totalLines = allLines.length;
|
|
73
|
+
// Apply pagination
|
|
74
|
+
let startLine;
|
|
75
|
+
let sliceLength;
|
|
76
|
+
if (offset < 0) {
|
|
77
|
+
// Negative offset = tail (last N lines)
|
|
78
|
+
startLine = Math.max(0, totalLines + offset);
|
|
79
|
+
sliceLength = totalLines - startLine;
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
startLine = offset;
|
|
83
|
+
sliceLength = length ?? totalLines;
|
|
84
|
+
}
|
|
85
|
+
const slicedLines = allLines.slice(startLine, startLine + sliceLength);
|
|
86
|
+
const content = slicedLines.join('\n');
|
|
87
|
+
return {
|
|
88
|
+
content,
|
|
89
|
+
lineCount: totalLines,
|
|
90
|
+
path: filePath,
|
|
91
|
+
rawSize: rawXml.length,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Apply a find/replace edit to a DOCX file's XML and write a new DOCX.
|
|
96
|
+
*
|
|
97
|
+
* The edit operates on the pretty-printed XML so that line-based context
|
|
98
|
+
* from read_file can be used directly as the search string. After editing,
|
|
99
|
+
* the XML is compacted back and repacked into the DOCX zip.
|
|
100
|
+
*
|
|
101
|
+
* @returns result with status, match count, and output path
|
|
102
|
+
*/
|
|
103
|
+
export async function editDocxXml(inputPath, outputPath, oldStr, newStr, expectedReplacements = 1) {
|
|
104
|
+
// 1. Read + pretty-print
|
|
105
|
+
const buf = await fs.readFile(inputPath);
|
|
106
|
+
const zip = new PizZip(buf);
|
|
107
|
+
const docFile = zip.file('word/document.xml');
|
|
108
|
+
if (!docFile)
|
|
109
|
+
throw new Error('Invalid DOCX: missing word/document.xml');
|
|
110
|
+
const rawXml = docFile.asText();
|
|
111
|
+
const pretty = prettyPrintXml(rawXml);
|
|
112
|
+
// 2. Normalize the search string's indentation to match our pretty-print.
|
|
113
|
+
// The caller might copy lines from read_file output which uses the same
|
|
114
|
+
// indentation, but just in case, we also try trimStart normalization.
|
|
115
|
+
let matchCount = countOccurrences(pretty, oldStr);
|
|
116
|
+
if (matchCount === 0) {
|
|
117
|
+
return {
|
|
118
|
+
status: 'no_match',
|
|
119
|
+
matchCount: 0,
|
|
120
|
+
outputPath,
|
|
121
|
+
message: `Search string not found in ${inputPath}`,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
if (matchCount !== expectedReplacements) {
|
|
125
|
+
return {
|
|
126
|
+
status: 'unexpected_count',
|
|
127
|
+
matchCount,
|
|
128
|
+
outputPath,
|
|
129
|
+
message: `Expected ${expectedReplacements} occurrence(s) but found ${matchCount}. ` +
|
|
130
|
+
`Add more surrounding context to make the search string unique, ` +
|
|
131
|
+
`or set expected_replacements to ${matchCount} to replace all.`,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
// 3. Apply edit(s)
|
|
135
|
+
let edited = pretty;
|
|
136
|
+
if (expectedReplacements === 1) {
|
|
137
|
+
const idx = edited.indexOf(oldStr);
|
|
138
|
+
edited = edited.substring(0, idx) + newStr + edited.substring(idx + oldStr.length);
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
edited = edited.split(oldStr).join(newStr);
|
|
142
|
+
}
|
|
143
|
+
// 4. Compact + repack
|
|
144
|
+
const compacted = compactXml(edited);
|
|
145
|
+
zip.file('word/document.xml', compacted);
|
|
146
|
+
const outBuf = zip.generate({
|
|
147
|
+
type: 'nodebuffer',
|
|
148
|
+
compression: 'DEFLATE',
|
|
149
|
+
compressionOptions: { level: 6 },
|
|
150
|
+
});
|
|
151
|
+
await fs.writeFile(outputPath, outBuf);
|
|
152
|
+
return {
|
|
153
|
+
status: 'applied',
|
|
154
|
+
matchCount,
|
|
155
|
+
outputPath,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
// ════════════════════════════════════════════════════════════════
|
|
159
|
+
// Helpers
|
|
160
|
+
// ════════════════════════════════════════════════════════════════
|
|
161
|
+
function countOccurrences(haystack, needle) {
|
|
162
|
+
let count = 0;
|
|
163
|
+
let pos = haystack.indexOf(needle);
|
|
164
|
+
while (pos !== -1) {
|
|
165
|
+
count++;
|
|
166
|
+
pos = haystack.indexOf(needle, pos + 1);
|
|
167
|
+
}
|
|
168
|
+
return count;
|
|
169
|
+
}
|
package/dist/tools/edit.js
CHANGED
|
@@ -305,28 +305,36 @@ function highlightDifferences(expected, actual) {
|
|
|
305
305
|
*/
|
|
306
306
|
export async function handleEditBlock(args) {
|
|
307
307
|
const parsed = EditBlockArgsSchema.parse(args);
|
|
308
|
-
// Structured files: Range rewrite
|
|
309
308
|
// Note: Check for truthy range to handle empty strings from AI clients that send all optional params
|
|
310
309
|
const hasRange = parsed.range !== undefined && parsed.range !== '';
|
|
311
310
|
const hasContent = parsed.content !== undefined && parsed.content !== '';
|
|
311
|
+
// Validate path and resolve handler once — used by both dispatch paths below
|
|
312
|
+
let validatedPath;
|
|
313
|
+
let handler;
|
|
314
|
+
try {
|
|
315
|
+
validatedPath = await validatePath(parsed.file_path);
|
|
316
|
+
const { getFileHandler } = await import('../utils/files/factory.js');
|
|
317
|
+
handler = await getFileHandler(validatedPath);
|
|
318
|
+
}
|
|
319
|
+
catch (error) {
|
|
320
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
321
|
+
return createErrorResponse(errorMessage);
|
|
322
|
+
}
|
|
323
|
+
const hasEditRange = 'editRange' in handler && typeof handler.editRange === 'function';
|
|
324
|
+
// Path 1: Range rewrite (Excel, etc.) — range + content
|
|
312
325
|
if (hasRange && hasContent) {
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
// Parse content if it's a JSON string (AI often sends arrays as JSON strings)
|
|
319
|
-
let content = parsed.content;
|
|
320
|
-
if (typeof content === 'string') {
|
|
321
|
-
try {
|
|
322
|
-
content = JSON.parse(content);
|
|
323
|
-
}
|
|
324
|
-
catch {
|
|
325
|
-
// Leave as-is if not valid JSON - let handler decide
|
|
326
|
-
}
|
|
326
|
+
// Parse content if it's a JSON string (AI often sends arrays as JSON strings)
|
|
327
|
+
let content = parsed.content;
|
|
328
|
+
if (typeof content === 'string') {
|
|
329
|
+
try {
|
|
330
|
+
content = JSON.parse(content);
|
|
327
331
|
}
|
|
328
|
-
|
|
329
|
-
|
|
332
|
+
catch {
|
|
333
|
+
// Leave as-is if not valid JSON - let handler decide
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
if (hasEditRange) {
|
|
337
|
+
try {
|
|
330
338
|
// parsed.range is guaranteed non-empty string by hasRange check above
|
|
331
339
|
await handler.editRange(validatedPath, parsed.range, content, parsed.options);
|
|
332
340
|
return {
|
|
@@ -336,23 +344,45 @@ export async function handleEditBlock(args) {
|
|
|
336
344
|
}],
|
|
337
345
|
};
|
|
338
346
|
}
|
|
339
|
-
|
|
340
|
-
|
|
347
|
+
catch (error) {
|
|
348
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
349
|
+
return createErrorResponse(errorMessage);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
return createErrorResponse(`Range-based editing not supported for ${parsed.file_path}. For text files, use old_string and new_string parameters instead. If your client requires range/content parameters, set them to empty strings ("").`);
|
|
353
|
+
}
|
|
354
|
+
// Path 2: Text replacement — old_string + new_string
|
|
355
|
+
if (parsed.old_string === undefined || parsed.new_string === undefined) {
|
|
356
|
+
return createErrorResponse(`Text replacement requires both old_string and new_string parameters`);
|
|
357
|
+
}
|
|
358
|
+
// If the handler implements editRange it owns text-replacement for its file type
|
|
359
|
+
// (e.g. DocxFileHandler does find/replace on pretty-printed XML rather than raw bytes).
|
|
360
|
+
// Plain text files fall through to performSearchReplace.
|
|
361
|
+
if (hasEditRange) {
|
|
362
|
+
try {
|
|
363
|
+
const result = await handler.editRange(validatedPath, '', {
|
|
364
|
+
old_string: parsed.old_string,
|
|
365
|
+
new_string: parsed.new_string,
|
|
366
|
+
expected_replacements: parsed.expected_replacements,
|
|
367
|
+
});
|
|
368
|
+
if (result.success) {
|
|
369
|
+
return {
|
|
370
|
+
content: [{
|
|
371
|
+
type: "text",
|
|
372
|
+
text: `Successfully applied ${result.editsApplied} edit(s) to ${parsed.file_path}`
|
|
373
|
+
}],
|
|
374
|
+
};
|
|
341
375
|
}
|
|
376
|
+
const errorMsg = result.errors?.map(e => e.error).join('; ') || 'Unknown error';
|
|
377
|
+
return createErrorResponse(errorMsg);
|
|
342
378
|
}
|
|
343
379
|
catch (error) {
|
|
344
380
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
345
381
|
return createErrorResponse(errorMessage);
|
|
346
382
|
}
|
|
347
383
|
}
|
|
348
|
-
|
|
349
|
-
// Validate required parameters for text replacement
|
|
350
|
-
if (parsed.old_string === undefined || parsed.new_string === undefined) {
|
|
351
|
-
return createErrorResponse(`Text replacement requires both old_string and new_string parameters`);
|
|
352
|
-
}
|
|
353
|
-
const searchReplace = {
|
|
384
|
+
return performSearchReplace(parsed.file_path, {
|
|
354
385
|
search: parsed.old_string,
|
|
355
386
|
replace: parsed.new_string
|
|
356
|
-
};
|
|
357
|
-
return performSearchReplace(parsed.file_path, searchReplace, parsed.expected_replacements);
|
|
387
|
+
}, parsed.expected_replacements);
|
|
358
388
|
}
|