@adeu/core 1.7.1 → 1.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +14 -13
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -4
- package/dist/index.d.ts +2 -4
- package/dist/index.js +14 -13
- package/dist/index.js.map +1 -1
- package/package.json +5 -2
- package/src/docx/bridge.ts +14 -12
- package/src/sanitize/sanitize.test.ts +134 -104
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adeu/core",
|
|
3
|
-
"version": "1.7.
|
|
3
|
+
"version": "1.7.4",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"types": "./dist/index.d.ts",
|
|
@@ -20,6 +20,9 @@
|
|
|
20
20
|
"url": "https://github.com/dealfluence/adeu.git",
|
|
21
21
|
"directory": "node/packages/core"
|
|
22
22
|
},
|
|
23
|
+
"engines": {
|
|
24
|
+
"node": ">=22.0.0"
|
|
25
|
+
},
|
|
23
26
|
"publishConfig": {
|
|
24
27
|
"access": "public"
|
|
25
28
|
},
|
|
@@ -29,7 +32,7 @@
|
|
|
29
32
|
"dependencies": {
|
|
30
33
|
"@xmldom/xmldom": "^0.9.10",
|
|
31
34
|
"diff-match-patch": "^1.0.5",
|
|
32
|
-
"
|
|
35
|
+
"fflate": "^0.8.2",
|
|
33
36
|
"xpath": "^0.0.34"
|
|
34
37
|
},
|
|
35
38
|
"devDependencies": {
|
package/src/docx/bridge.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
// FILE: node/packages/core/src/docx/bridge.ts
|
|
2
|
+
import { unzipSync, zipSync, strFromU8, strToU8 } from "fflate";
|
|
2
3
|
import {
|
|
3
4
|
parseXml,
|
|
4
5
|
findChild,
|
|
@@ -59,10 +60,10 @@ export class DocxPackage {
|
|
|
59
60
|
public parts: Part[] = [];
|
|
60
61
|
public mainDocumentPart!: Part;
|
|
61
62
|
|
|
62
|
-
constructor(public
|
|
63
|
+
constructor(public unzipped: Record<string, Uint8Array>) {}
|
|
63
64
|
|
|
64
65
|
public getPartByPath(path: string): Part | undefined {
|
|
65
|
-
// Strip leading slash for
|
|
66
|
+
// Strip leading slash for zip compat
|
|
66
67
|
const searchPath = path.startsWith("/") ? path.substring(1) : path;
|
|
67
68
|
return this.parts.find(
|
|
68
69
|
(p) => p.partname === searchPath || p.partname === "/" + searchPath,
|
|
@@ -151,14 +152,15 @@ export class DocumentObject {
|
|
|
151
152
|
public static async load(
|
|
152
153
|
buffer: Buffer | ArrayBuffer,
|
|
153
154
|
): Promise<DocumentObject> {
|
|
154
|
-
const
|
|
155
|
-
const
|
|
155
|
+
const u8 = buffer instanceof Uint8Array ? buffer : new Uint8Array(buffer);
|
|
156
|
+
const unzipped = unzipSync(u8);
|
|
157
|
+
const pkg = new DocxPackage(unzipped);
|
|
156
158
|
|
|
157
159
|
// 1. Load Content Types
|
|
158
|
-
const ctFile =
|
|
160
|
+
const ctFile = unzipped["[Content_Types].xml"];
|
|
159
161
|
let contentTypes: Record<string, string> = {};
|
|
160
162
|
if (ctFile) {
|
|
161
|
-
const ctXml = parseXml(
|
|
163
|
+
const ctXml = parseXml(strFromU8(ctFile));
|
|
162
164
|
const overrides = findAllDescendants(ctXml.documentElement, "Override");
|
|
163
165
|
for (const override of overrides) {
|
|
164
166
|
contentTypes[override.getAttribute("PartName") || ""] =
|
|
@@ -167,9 +169,9 @@ export class DocumentObject {
|
|
|
167
169
|
}
|
|
168
170
|
|
|
169
171
|
// 2. Pre-load all XML parts to allow synchronous traversal later
|
|
170
|
-
for (const [path,
|
|
171
|
-
if (
|
|
172
|
-
const text =
|
|
172
|
+
for (const [path, fileData] of Object.entries(unzipped)) {
|
|
173
|
+
if (path.endsWith(".xml") || path.endsWith(".rels")) {
|
|
174
|
+
const text = strFromU8(fileData);
|
|
173
175
|
const doc = parseXml(text);
|
|
174
176
|
const cType = contentTypes["/" + path] || "application/xml";
|
|
175
177
|
const part = new Part("/" + path, text, doc.documentElement, cType);
|
|
@@ -236,8 +238,8 @@ export class DocumentObject {
|
|
|
236
238
|
xmlStr =
|
|
237
239
|
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n' + xmlStr;
|
|
238
240
|
}
|
|
239
|
-
this.pkg.
|
|
241
|
+
this.pkg.unzipped[part.partname.substring(1)] = strToU8(xmlStr); // Strip leading slash
|
|
240
242
|
}
|
|
241
|
-
return this.pkg.
|
|
243
|
+
return Buffer.from(zipSync(this.pkg.unzipped));
|
|
242
244
|
}
|
|
243
245
|
}
|
|
@@ -1,60 +1,63 @@
|
|
|
1
|
-
import { describe, it, expect, vi } from
|
|
2
|
-
import { DOMParser } from
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
import { finalize_document } from './core.js';
|
|
1
|
+
import { describe, it, expect, vi } from "vitest";
|
|
2
|
+
import { DOMParser } from "@xmldom/xmldom";
|
|
3
|
+
import { DocumentObject, Part, DocxPackage } from "../docx/bridge.js";
|
|
4
|
+
import * as transforms from "./transforms.js";
|
|
5
|
+
import { finalize_document } from "./core.js";
|
|
7
6
|
|
|
8
7
|
// --- Helper to build a lightweight in-memory DocumentObject ---
|
|
9
8
|
function createMockDoc(bodyXml: string): DocumentObject {
|
|
10
9
|
const fullXml = `<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"><w:body>${bodyXml}</w:body></w:document>`;
|
|
11
|
-
const doc = new DOMParser().parseFromString(fullXml,
|
|
12
|
-
const
|
|
13
|
-
const pkg = new DocxPackage(
|
|
14
|
-
|
|
15
|
-
const part = new Part(
|
|
10
|
+
const doc = new DOMParser().parseFromString(fullXml, "text/xml");
|
|
11
|
+
const unzipped: Record<string, Uint8Array> = {};
|
|
12
|
+
const pkg = new DocxPackage(unzipped);
|
|
13
|
+
|
|
14
|
+
const part = new Part(
|
|
15
|
+
"/word/document.xml",
|
|
16
|
+
fullXml,
|
|
17
|
+
doc.documentElement,
|
|
18
|
+
"application/xml",
|
|
19
|
+
);
|
|
16
20
|
pkg.parts.push(part);
|
|
17
21
|
pkg.mainDocumentPart = part;
|
|
18
|
-
|
|
22
|
+
|
|
19
23
|
return new DocumentObject(pkg, part);
|
|
20
24
|
}
|
|
21
25
|
|
|
22
26
|
// --- Transforms Unit Tests ---
|
|
23
|
-
describe(
|
|
24
|
-
|
|
25
|
-
it('should strip RSID attributes and elements', () => {
|
|
27
|
+
describe("Sanitize Transforms", () => {
|
|
28
|
+
it("should strip RSID attributes and elements", () => {
|
|
26
29
|
const doc = createMockDoc(`
|
|
27
30
|
<w:p w:rsidR="00A21F3B" w:rsidP="00B33E21">
|
|
28
31
|
<w:r><w:t>Hello</w:t></w:r>
|
|
29
32
|
</w:p>
|
|
30
33
|
<w:sectPr><w:rsids><w:rsidRoot w:val="00A21F3B"/></w:rsids></w:sectPr>
|
|
31
34
|
`);
|
|
32
|
-
|
|
35
|
+
|
|
33
36
|
const lines = transforms.strip_rsid(doc);
|
|
34
37
|
const xml = doc.element.toString();
|
|
35
|
-
|
|
38
|
+
|
|
36
39
|
expect(lines.length).toBeGreaterThan(0);
|
|
37
|
-
expect(xml).not.toContain(
|
|
38
|
-
expect(xml).not.toContain(
|
|
39
|
-
expect(xml).not.toContain(
|
|
40
|
+
expect(xml).not.toContain("w:rsidR");
|
|
41
|
+
expect(xml).not.toContain("w:rsidP");
|
|
42
|
+
expect(xml).not.toContain("w:rsids");
|
|
40
43
|
});
|
|
41
44
|
|
|
42
|
-
it(
|
|
45
|
+
it("should strip w14:paraId and w14:textId", () => {
|
|
43
46
|
const doc = createMockDoc(`
|
|
44
47
|
<w:p w14:paraId="3F2A91BC" w14:textId="77777777">
|
|
45
48
|
<w:r><w:t>Test</w:t></w:r>
|
|
46
49
|
</w:p>
|
|
47
50
|
`);
|
|
48
|
-
|
|
51
|
+
|
|
49
52
|
const lines = transforms.strip_para_ids(doc);
|
|
50
53
|
const xml = doc.element.toString();
|
|
51
|
-
|
|
54
|
+
|
|
52
55
|
expect(lines.length).toBeGreaterThan(0);
|
|
53
|
-
expect(xml).not.toContain(
|
|
54
|
-
expect(xml).not.toContain(
|
|
56
|
+
expect(xml).not.toContain("w14:paraId");
|
|
57
|
+
expect(xml).not.toContain("w14:textId");
|
|
55
58
|
});
|
|
56
59
|
|
|
57
|
-
it(
|
|
60
|
+
it("should strip hidden text runs", () => {
|
|
58
61
|
const doc = createMockDoc(`
|
|
59
62
|
<w:p>
|
|
60
63
|
<w:r>
|
|
@@ -66,34 +69,43 @@ describe('Sanitize Transforms', () => {
|
|
|
66
69
|
</w:r>
|
|
67
70
|
</w:p>
|
|
68
71
|
`);
|
|
69
|
-
|
|
72
|
+
|
|
70
73
|
const lines = transforms.strip_hidden_text(doc);
|
|
71
74
|
const xml = doc.element.toString();
|
|
72
|
-
|
|
75
|
+
|
|
73
76
|
expect(lines.length).toBeGreaterThan(0);
|
|
74
|
-
expect(xml).not.toContain(
|
|
75
|
-
expect(xml).toContain(
|
|
77
|
+
expect(xml).not.toContain("HiddenSecret");
|
|
78
|
+
expect(xml).toContain("VisibleText");
|
|
76
79
|
});
|
|
77
80
|
|
|
78
|
-
it(
|
|
79
|
-
const doc = createMockDoc(
|
|
80
|
-
|
|
81
|
+
it("should scrub document properties", () => {
|
|
82
|
+
const doc = createMockDoc("<w:p/>");
|
|
83
|
+
|
|
81
84
|
// Mock docProps/app.xml
|
|
82
|
-
const appXml =
|
|
83
|
-
|
|
84
|
-
const
|
|
85
|
+
const appXml =
|
|
86
|
+
"<Properties><TotalTime>15</TotalTime><Template>Confidential.dotm</Template></Properties>";
|
|
87
|
+
const appEl = new DOMParser().parseFromString(
|
|
88
|
+
appXml,
|
|
89
|
+
"text/xml",
|
|
90
|
+
).documentElement;
|
|
91
|
+
const appPart = new Part(
|
|
92
|
+
"/docProps/app.xml",
|
|
93
|
+
appXml,
|
|
94
|
+
appEl,
|
|
95
|
+
"application/xml",
|
|
96
|
+
);
|
|
85
97
|
doc.pkg.parts.push(appPart);
|
|
86
|
-
|
|
98
|
+
|
|
87
99
|
const lines = transforms.scrub_doc_properties(doc);
|
|
88
100
|
const resultXml = appPart._element.toString();
|
|
89
|
-
|
|
101
|
+
|
|
90
102
|
expect(lines.length).toBeGreaterThan(0);
|
|
91
|
-
expect(resultXml).toContain(
|
|
92
|
-
expect(resultXml).toContain(
|
|
93
|
-
expect(resultXml).not.toContain(
|
|
103
|
+
expect(resultXml).toContain("<TotalTime>0</TotalTime>");
|
|
104
|
+
expect(resultXml).toContain("<Template/>");
|
|
105
|
+
expect(resultXml).not.toContain("Confidential.dotm");
|
|
94
106
|
});
|
|
95
107
|
|
|
96
|
-
it(
|
|
108
|
+
it("should strip custom XML parts and data bindings", () => {
|
|
97
109
|
const doc = createMockDoc(`
|
|
98
110
|
<w:p>
|
|
99
111
|
<w:sdt>
|
|
@@ -101,19 +113,26 @@ describe('Sanitize Transforms', () => {
|
|
|
101
113
|
</w:sdt>
|
|
102
114
|
</w:p>
|
|
103
115
|
`);
|
|
104
|
-
|
|
116
|
+
|
|
105
117
|
// Mock custom XML part
|
|
106
|
-
const customPart = new Part(
|
|
118
|
+
const customPart = new Part(
|
|
119
|
+
"/customXml/item1.xml",
|
|
120
|
+
"<t/>",
|
|
121
|
+
new DOMParser().parseFromString("<t/>", "text/xml").documentElement,
|
|
122
|
+
"application/xml",
|
|
123
|
+
);
|
|
107
124
|
doc.pkg.parts.push(customPart);
|
|
108
|
-
|
|
125
|
+
|
|
109
126
|
const lines = transforms.strip_custom_xml(doc);
|
|
110
|
-
|
|
127
|
+
|
|
111
128
|
expect(lines.length).toBeGreaterThan(0);
|
|
112
|
-
expect(
|
|
113
|
-
|
|
129
|
+
expect(
|
|
130
|
+
doc.pkg.parts.find((p) => p.partname.includes("customXml")),
|
|
131
|
+
).toBeUndefined();
|
|
132
|
+
expect(doc.element.toString()).not.toContain("w:dataBinding");
|
|
114
133
|
});
|
|
115
134
|
|
|
116
|
-
it(
|
|
135
|
+
it("should count and accept all tracked changes", () => {
|
|
117
136
|
const doc = createMockDoc(`
|
|
118
137
|
<w:p>
|
|
119
138
|
<w:del w:id="1">
|
|
@@ -124,74 +143,80 @@ describe('Sanitize Transforms', () => {
|
|
|
124
143
|
</w:ins>
|
|
125
144
|
</w:p>
|
|
126
145
|
`);
|
|
127
|
-
|
|
146
|
+
|
|
128
147
|
const [ins, del, fmt] = transforms.count_tracked_changes(doc);
|
|
129
148
|
expect(ins).toBe(1);
|
|
130
149
|
expect(del).toBe(1);
|
|
131
|
-
|
|
150
|
+
|
|
132
151
|
const lines = transforms.accept_all_tracked_changes(doc);
|
|
133
152
|
const xml = doc.element.toString();
|
|
134
|
-
|
|
153
|
+
|
|
135
154
|
expect(lines.length).toBeGreaterThan(0);
|
|
136
|
-
expect(xml).not.toContain(
|
|
137
|
-
expect(xml).not.toContain(
|
|
138
|
-
expect(xml).not.toContain(
|
|
139
|
-
expect(xml).toContain(
|
|
155
|
+
expect(xml).not.toContain("w:del");
|
|
156
|
+
expect(xml).not.toContain("w:ins");
|
|
157
|
+
expect(xml).not.toContain("Vendor"); // Deletion was removed
|
|
158
|
+
expect(xml).toContain("Supplier"); // Insertion was unwrapped
|
|
140
159
|
});
|
|
141
|
-
|
|
142
160
|
});
|
|
143
161
|
|
|
144
162
|
// --- Orchestrator Integration Tests ---
|
|
145
|
-
describe(
|
|
163
|
+
describe("Finalize Document (Core)", () => {
|
|
164
|
+
it("should inject XML locking (Read-Only) into settings.xml", async () => {
|
|
165
|
+
const doc = createMockDoc("<w:p/>");
|
|
146
166
|
|
|
147
|
-
it('should inject XML locking (Read-Only) into settings.xml', async () => {
|
|
148
|
-
const doc = createMockDoc('<w:p/>');
|
|
149
|
-
|
|
150
167
|
// Mock word/settings.xml
|
|
151
|
-
const settingsXml =
|
|
152
|
-
|
|
153
|
-
const
|
|
168
|
+
const settingsXml =
|
|
169
|
+
'<w:settings xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"></w:settings>';
|
|
170
|
+
const settingsEl = new DOMParser().parseFromString(
|
|
171
|
+
settingsXml,
|
|
172
|
+
"text/xml",
|
|
173
|
+
).documentElement;
|
|
174
|
+
const settingsPart = new Part(
|
|
175
|
+
"/word/settings.xml",
|
|
176
|
+
settingsXml,
|
|
177
|
+
settingsEl,
|
|
178
|
+
"application/xml",
|
|
179
|
+
);
|
|
154
180
|
doc.pkg.parts.push(settingsPart);
|
|
155
|
-
|
|
181
|
+
|
|
156
182
|
// Mock the doc.save buffer return
|
|
157
|
-
doc.save = vi.fn().mockResolvedValue(Buffer.from(
|
|
158
|
-
|
|
159
|
-
const res = await finalize_document(doc, {
|
|
160
|
-
filename:
|
|
161
|
-
protection_mode:
|
|
183
|
+
doc.save = vi.fn().mockResolvedValue(Buffer.from("mock"));
|
|
184
|
+
|
|
185
|
+
const res = await finalize_document(doc, {
|
|
186
|
+
filename: "test.docx",
|
|
187
|
+
protection_mode: "read_only",
|
|
162
188
|
});
|
|
163
|
-
|
|
189
|
+
|
|
164
190
|
const finalSettings = settingsPart._element.toString();
|
|
165
191
|
|
|
166
|
-
expect(res.reportText).toContain(
|
|
167
|
-
expect(res.reportText).toContain(
|
|
168
|
-
|
|
192
|
+
expect(res.reportText).toContain("Result: CLEAN");
|
|
193
|
+
expect(res.reportText).toContain("Document locked (Read-Only");
|
|
194
|
+
|
|
169
195
|
// Validate mathematical injection
|
|
170
|
-
expect(finalSettings).toContain(
|
|
196
|
+
expect(finalSettings).toContain("w:documentProtection");
|
|
171
197
|
expect(finalSettings).toContain('w:edit="readOnly"');
|
|
172
198
|
expect(finalSettings).toContain('w:enforcement="1"');
|
|
173
199
|
});
|
|
174
200
|
|
|
175
|
-
it(
|
|
201
|
+
it("should return a blocked status if unaccepted changes remain and accept_all is false", async () => {
|
|
176
202
|
const doc = createMockDoc(`
|
|
177
203
|
<w:p>
|
|
178
204
|
<w:ins w:id="1"><w:r><w:t>Unresolved Edit</w:t></w:r></w:ins>
|
|
179
205
|
</w:p>
|
|
180
206
|
`);
|
|
181
|
-
|
|
182
|
-
const res = await finalize_document(doc, {
|
|
183
|
-
filename:
|
|
184
|
-
sanitize_mode:
|
|
185
|
-
accept_all: false // <-- Should block
|
|
207
|
+
|
|
208
|
+
const res = await finalize_document(doc, {
|
|
209
|
+
filename: "draft.docx",
|
|
210
|
+
sanitize_mode: "full",
|
|
211
|
+
accept_all: false, // <-- Should block
|
|
186
212
|
});
|
|
187
|
-
|
|
188
|
-
expect(res.reportText).toContain(
|
|
189
|
-
expect(res.reportText).toContain(
|
|
213
|
+
|
|
214
|
+
expect(res.reportText).toContain("BLOCKED:");
|
|
215
|
+
expect(res.reportText).toContain("unresolved tracked changes");
|
|
190
216
|
});
|
|
191
217
|
|
|
192
|
-
describe(
|
|
193
|
-
|
|
194
|
-
it('BUG-FRAG-1: Coalesces adjacent identical runs after accepting tracked changes', async () => {
|
|
218
|
+
describe("Resolved Bugs Sanitize Parity Verification", () => {
|
|
219
|
+
it("BUG-FRAG-1: Coalesces adjacent identical runs after accepting tracked changes", async () => {
|
|
195
220
|
const doc = createMockDoc(`
|
|
196
221
|
<w:p>
|
|
197
222
|
<w:r><w:t xml:space="preserve">The term shall be </w:t></w:r>
|
|
@@ -199,39 +224,44 @@ describe('Finalize Document (Core)', () => {
|
|
|
199
224
|
<w:r><w:t xml:space="preserve"> years from the Effective Date.</w:t></w:r>
|
|
200
225
|
</w:p>
|
|
201
226
|
`);
|
|
202
|
-
|
|
203
|
-
doc.save = vi.fn().mockResolvedValue(Buffer.from(
|
|
204
|
-
|
|
227
|
+
|
|
228
|
+
doc.save = vi.fn().mockResolvedValue(Buffer.from("mock"));
|
|
229
|
+
|
|
205
230
|
await finalize_document(doc, {
|
|
206
|
-
filename:
|
|
207
|
-
sanitize_mode:
|
|
208
|
-
accept_all: true
|
|
231
|
+
filename: "test.docx",
|
|
232
|
+
sanitize_mode: "full",
|
|
233
|
+
accept_all: true,
|
|
209
234
|
});
|
|
210
235
|
|
|
211
236
|
const xml = doc.element.toString();
|
|
212
237
|
// We should see a single coalesced string rather than fragmented <w:t> nodes
|
|
213
|
-
expect(xml).toContain(
|
|
238
|
+
expect(xml).toContain(
|
|
239
|
+
"The term shall be five (5) years from the Effective Date.",
|
|
240
|
+
);
|
|
214
241
|
|
|
215
|
-
const runs = doc.element.getElementsByTagName(
|
|
242
|
+
const runs = doc.element.getElementsByTagName("w:r");
|
|
216
243
|
// If they are coalesced properly, there will be exactly 1 run instead of 3
|
|
217
244
|
expect(runs.length).toBe(1);
|
|
218
245
|
});
|
|
219
246
|
|
|
220
|
-
it(
|
|
221
|
-
const doc = createMockDoc(
|
|
247
|
+
it("BUG-NS-1: Strips unused xmlns:w16du namespace declarations during finalization", async () => {
|
|
248
|
+
const doc = createMockDoc("<w:p/>");
|
|
222
249
|
// Manually inject the namespace onto the absolute root as the engine does
|
|
223
|
-
doc.part._element.setAttribute(
|
|
224
|
-
|
|
225
|
-
|
|
250
|
+
doc.part._element.setAttribute(
|
|
251
|
+
"xmlns:w16du",
|
|
252
|
+
"http://schemas.microsoft.com/office/word/2023/wordml/word16du",
|
|
253
|
+
);
|
|
254
|
+
|
|
255
|
+
doc.save = vi.fn().mockResolvedValue(Buffer.from("mock"));
|
|
226
256
|
|
|
227
257
|
await finalize_document(doc, {
|
|
228
|
-
filename:
|
|
229
|
-
sanitize_mode:
|
|
258
|
+
filename: "test.docx",
|
|
259
|
+
sanitize_mode: "full",
|
|
230
260
|
});
|
|
231
261
|
|
|
232
262
|
// The final stringified XML of the root document should NOT contain the unused namespace
|
|
233
263
|
const xml = doc.part._element.toString();
|
|
234
|
-
expect(xml).not.toContain(
|
|
264
|
+
expect(xml).not.toContain("xmlns:w16du");
|
|
235
265
|
});
|
|
236
266
|
});
|
|
237
|
-
});
|
|
267
|
+
});
|