@adeu/core 1.8.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +618 -102
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +15 -2
- package/dist/index.d.ts +15 -2
- package/dist/index.js +618 -102
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/comments.ts +33 -14
- package/src/consistency.test.ts +62 -4
- package/src/diff.ts +42 -6
- package/src/docx/dom.ts +6 -2
- package/src/engine.bugs.test.ts +63 -10
- package/src/engine.feedback.test.ts +144 -0
- package/src/engine.issue23.test.ts +511 -0
- package/src/engine.ts +513 -64
- package/src/sanitize/core.ts +1 -0
- package/src/sanitize/sanitize.test.ts +48 -6
- package/src/sanitize/transforms.ts +88 -1
package/src/sanitize/core.ts
CHANGED
|
@@ -44,6 +44,7 @@ export async function finalize_document(doc: DocumentObject, options: FinalizeOp
|
|
|
44
44
|
const commentsSummary = transforms.get_comments_summary(doc);
|
|
45
45
|
report.comments_removed = commentsSummary.total;
|
|
46
46
|
report.add_transform_lines(transforms.remove_all_comments(doc));
|
|
47
|
+
transforms.eject_comment_parts(doc);
|
|
47
48
|
} else if (options.sanitize_mode === 'keep-markup') {
|
|
48
49
|
// Basic support for keep-markup in TS
|
|
49
50
|
const counts = transforms.count_tracked_changes(doc);
|
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
import { describe, it, expect, vi } from "vitest";
|
|
2
2
|
import { DOMParser } from "@xmldom/xmldom";
|
|
3
|
+
import { readFileSync } from "node:fs";
|
|
4
|
+
import { resolve, dirname } from "node:path";
|
|
5
|
+
import { fileURLToPath } from "node:url";
|
|
3
6
|
import { DocumentObject, Part, DocxPackage } from "../docx/bridge.js";
|
|
4
7
|
import * as transforms from "./transforms.js";
|
|
5
8
|
import { finalize_document } from "./core.js";
|
|
6
9
|
|
|
10
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
11
|
+
const __dirname = dirname(__filename);
|
|
12
|
+
|
|
7
13
|
// --- Helper to build a lightweight in-memory DocumentObject ---
|
|
8
14
|
function createMockDoc(bodyXml: string): DocumentObject {
|
|
9
15
|
const fullXml = `<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"><w:body>${bodyXml}</w:body></w:document>`;
|
|
@@ -14,7 +20,7 @@ function createMockDoc(bodyXml: string): DocumentObject {
|
|
|
14
20
|
const part = new Part(
|
|
15
21
|
"/word/document.xml",
|
|
16
22
|
fullXml,
|
|
17
|
-
doc.documentElement,
|
|
23
|
+
doc.documentElement! as unknown as Element,
|
|
18
24
|
"application/xml",
|
|
19
25
|
);
|
|
20
26
|
pkg.parts.push(part);
|
|
@@ -87,11 +93,11 @@ describe("Sanitize Transforms", () => {
|
|
|
87
93
|
const appEl = new DOMParser().parseFromString(
|
|
88
94
|
appXml,
|
|
89
95
|
"text/xml",
|
|
90
|
-
).documentElement
|
|
96
|
+
).documentElement!;
|
|
91
97
|
const appPart = new Part(
|
|
92
98
|
"/docProps/app.xml",
|
|
93
99
|
appXml,
|
|
94
|
-
appEl,
|
|
100
|
+
appEl as unknown as Element,
|
|
95
101
|
"application/xml",
|
|
96
102
|
);
|
|
97
103
|
doc.pkg.parts.push(appPart);
|
|
@@ -118,7 +124,7 @@ describe("Sanitize Transforms", () => {
|
|
|
118
124
|
const customPart = new Part(
|
|
119
125
|
"/customXml/item1.xml",
|
|
120
126
|
"<t/>",
|
|
121
|
-
new DOMParser().parseFromString("<t/>", "text/xml").documentElement,
|
|
127
|
+
new DOMParser().parseFromString("<t/>", "text/xml").documentElement! as unknown as Element,
|
|
122
128
|
"application/xml",
|
|
123
129
|
);
|
|
124
130
|
doc.pkg.parts.push(customPart);
|
|
@@ -170,11 +176,11 @@ describe("Finalize Document (Core)", () => {
|
|
|
170
176
|
const settingsEl = new DOMParser().parseFromString(
|
|
171
177
|
settingsXml,
|
|
172
178
|
"text/xml",
|
|
173
|
-
).documentElement
|
|
179
|
+
).documentElement!;
|
|
174
180
|
const settingsPart = new Part(
|
|
175
181
|
"/word/settings.xml",
|
|
176
182
|
settingsXml,
|
|
177
|
-
settingsEl,
|
|
183
|
+
settingsEl as unknown as Element,
|
|
178
184
|
"application/xml",
|
|
179
185
|
);
|
|
180
186
|
doc.pkg.parts.push(settingsPart);
|
|
@@ -263,5 +269,41 @@ describe("Finalize Document (Core)", () => {
|
|
|
263
269
|
const xml = doc.part._element.toString();
|
|
264
270
|
expect(xml).not.toContain("xmlns:w16du");
|
|
265
271
|
});
|
|
272
|
+
|
|
273
|
+
it("BUG-REPRO: finalize_document leaks comment parts in full sanitize mode", async () => {
|
|
274
|
+
// 1. Load the golden DOCX which we know has comments and comment parts
|
|
275
|
+
const fixturePath = resolve(__dirname, "../../../../../shared/fixtures/golden.docx");
|
|
276
|
+
const buf = readFileSync(fixturePath);
|
|
277
|
+
const doc = await DocumentObject.load(buf);
|
|
278
|
+
|
|
279
|
+
// Verify pre-condition: comment parts exist in the loaded package
|
|
280
|
+
const original_comment_parts = doc.pkg.parts.filter(p => p.contentType.includes("comments"));
|
|
281
|
+
expect(original_comment_parts.length).toBeGreaterThan(0);
|
|
282
|
+
|
|
283
|
+
const original_xml = doc.element.toString();
|
|
284
|
+
expect(original_xml).toContain("w:commentRangeStart");
|
|
285
|
+
expect(original_xml).toContain("w:commentReference");
|
|
286
|
+
|
|
287
|
+
// Mock the doc.save buffer return
|
|
288
|
+
doc.save = vi.fn().mockResolvedValue(Buffer.from("mock"));
|
|
289
|
+
|
|
290
|
+
// 2. Act: Finalize the document in full sanitize mode with accept_all: true
|
|
291
|
+
await finalize_document(doc, {
|
|
292
|
+
filename: "golden.docx",
|
|
293
|
+
sanitize_mode: "full",
|
|
294
|
+
accept_all: true,
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
// 3. Assert: All comments and comment parts are completely removed
|
|
298
|
+
const final_xml = doc.element.toString();
|
|
299
|
+
|
|
300
|
+
// Assert NO in-body comment anchors survive (anchors must be completely gone)
|
|
301
|
+
expect(final_xml).not.toContain("w:commentRangeStart");
|
|
302
|
+
expect(final_xml).not.toContain("w:commentRangeEnd");
|
|
303
|
+
expect(final_xml).not.toContain("w:commentReference");
|
|
304
|
+
|
|
305
|
+
const final_comment_parts = doc.pkg.parts.filter(p => p.contentType.includes("comments"));
|
|
306
|
+
expect(final_comment_parts.length).toBe(0);
|
|
307
|
+
});
|
|
266
308
|
});
|
|
267
309
|
});
|
|
@@ -271,17 +271,104 @@ export function remove_all_comments(doc: DocumentObject): string[] {
|
|
|
271
271
|
cm.deleteComment(cId);
|
|
272
272
|
}
|
|
273
273
|
|
|
274
|
-
for (const tag of ['w:commentRangeStart', 'w:commentRangeEnd'
|
|
274
|
+
for (const tag of ['w:commentRangeStart', 'w:commentRangeEnd']) {
|
|
275
275
|
for (const el of findAllDescendants(doc.element, tag)) {
|
|
276
276
|
el.parentNode?.removeChild(el);
|
|
277
277
|
}
|
|
278
278
|
}
|
|
279
279
|
|
|
280
|
+
const refs = findAllDescendants(doc.element, 'w:commentReference');
|
|
281
|
+
for (const ref of refs) {
|
|
282
|
+
const parent = ref.parentNode as Element | null;
|
|
283
|
+
if (parent) {
|
|
284
|
+
if (parent.tagName === 'w:r' || parent.tagName.endsWith(':r')) {
|
|
285
|
+
const nonRprChildren = Array.from(parent.childNodes).filter(
|
|
286
|
+
(c) => c.nodeType === 1 && (c as Element).tagName !== 'w:rPr' && (c as Element).tagName !== 'rPr'
|
|
287
|
+
);
|
|
288
|
+
if (nonRprChildren.length <= 1) {
|
|
289
|
+
parent.parentNode?.removeChild(parent);
|
|
290
|
+
} else {
|
|
291
|
+
parent.removeChild(ref);
|
|
292
|
+
}
|
|
293
|
+
} else {
|
|
294
|
+
parent.removeChild(ref);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
280
299
|
const resolvedCount = Object.values(data).filter(c => c.resolved).length;
|
|
281
300
|
const openCount = Object.values(data).filter(c => !c.resolved).length;
|
|
282
301
|
return [`Comments removed: ${keys.length} (${resolvedCount} resolved, ${openCount} open)`].concat(lines);
|
|
283
302
|
}
|
|
284
303
|
|
|
304
|
+
export function eject_comment_parts(doc: DocumentObject) {
|
|
305
|
+
const pkg = doc.pkg;
|
|
306
|
+
|
|
307
|
+
// 1. Find all comment-related partnames
|
|
308
|
+
const comment_partnames = new Set<string>();
|
|
309
|
+
for (const part of pkg.parts) {
|
|
310
|
+
if (part.partname.toLowerCase().includes("comments")) {
|
|
311
|
+
comment_partnames.add(part.partname);
|
|
312
|
+
const withSlash = part.partname.startsWith("/") ? part.partname : "/" + part.partname;
|
|
313
|
+
const withoutSlash = part.partname.startsWith("/") ? part.partname.substring(1) : part.partname;
|
|
314
|
+
comment_partnames.add(withSlash);
|
|
315
|
+
comment_partnames.add(withoutSlash);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
if (comment_partnames.size === 0) return;
|
|
320
|
+
|
|
321
|
+
// 2. Sever relationships referencing these parts from all parts in the package
|
|
322
|
+
for (const part of pkg.parts) {
|
|
323
|
+
if (part.partname.endsWith(".rels")) {
|
|
324
|
+
const rels = findAllDescendants(part._element, "Relationship");
|
|
325
|
+
const toRemove: Element[] = [];
|
|
326
|
+
for (const rel of rels) {
|
|
327
|
+
const target = rel.getAttribute("Target") || "";
|
|
328
|
+
if (target.toLowerCase().includes("comments")) {
|
|
329
|
+
toRemove.push(rel);
|
|
330
|
+
|
|
331
|
+
const sourcePath = part.partname.replace("/_rels/", "/").replace(".rels", "");
|
|
332
|
+
const sourcePart = pkg.getPartByPath(sourcePath);
|
|
333
|
+
if (sourcePart) {
|
|
334
|
+
const relId = rel.getAttribute("Id");
|
|
335
|
+
if (relId) sourcePart.rels.delete(relId);
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
for (const relEl of toRemove) {
|
|
340
|
+
relEl.parentNode?.removeChild(relEl);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// 3. Remove overrides from [Content_Types].xml
|
|
346
|
+
const ctPart = pkg.getPartByPath("[Content_Types].xml");
|
|
347
|
+
if (ctPart) {
|
|
348
|
+
const overrides = findAllDescendants(ctPart._element, "Override");
|
|
349
|
+
const toRemove: Element[] = [];
|
|
350
|
+
for (const override of overrides) {
|
|
351
|
+
const partName = override.getAttribute("PartName") || "";
|
|
352
|
+
if (comment_partnames.has(partName) || partName.toLowerCase().includes("comments")) {
|
|
353
|
+
toRemove.push(override);
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
for (const overrideEl of toRemove) {
|
|
357
|
+
overrideEl.parentNode?.removeChild(overrideEl);
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// 4. Remove comment parts from pkg.parts
|
|
362
|
+
pkg.parts = pkg.parts.filter(p => !p.partname.toLowerCase().includes("comments"));
|
|
363
|
+
|
|
364
|
+
// 5. Remove comment files from pkg.unzipped as well
|
|
365
|
+
for (const key of Object.keys(pkg.unzipped)) {
|
|
366
|
+
if (key.toLowerCase().includes("comments")) {
|
|
367
|
+
delete pkg.unzipped[key];
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
285
372
|
export function replace_comment_authors(doc: DocumentObject, newAuthor: string): string[] {
|
|
286
373
|
const cm = new CommentsManager(doc);
|
|
287
374
|
if (!cm.commentsPart) return [];
|