@adeu/core 1.9.0 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,6 +44,7 @@ export async function finalize_document(doc: DocumentObject, options: FinalizeOp
44
44
  const commentsSummary = transforms.get_comments_summary(doc);
45
45
  report.comments_removed = commentsSummary.total;
46
46
  report.add_transform_lines(transforms.remove_all_comments(doc));
47
+ transforms.eject_comment_parts(doc);
47
48
  } else if (options.sanitize_mode === 'keep-markup') {
48
49
  // Basic support for keep-markup in TS
49
50
  const counts = transforms.count_tracked_changes(doc);
@@ -1,9 +1,15 @@
1
1
  import { describe, it, expect, vi } from "vitest";
2
2
  import { DOMParser } from "@xmldom/xmldom";
3
+ import { readFileSync } from "node:fs";
4
+ import { resolve, dirname } from "node:path";
5
+ import { fileURLToPath } from "node:url";
3
6
  import { DocumentObject, Part, DocxPackage } from "../docx/bridge.js";
4
7
  import * as transforms from "./transforms.js";
5
8
  import { finalize_document } from "./core.js";
6
9
 
10
+ const __filename = fileURLToPath(import.meta.url);
11
+ const __dirname = dirname(__filename);
12
+
7
13
  // --- Helper to build a lightweight in-memory DocumentObject ---
8
14
  function createMockDoc(bodyXml: string): DocumentObject {
9
15
  const fullXml = `<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"><w:body>${bodyXml}</w:body></w:document>`;
@@ -14,7 +20,7 @@ function createMockDoc(bodyXml: string): DocumentObject {
14
20
  const part = new Part(
15
21
  "/word/document.xml",
16
22
  fullXml,
17
- doc.documentElement,
23
+ doc.documentElement! as unknown as Element,
18
24
  "application/xml",
19
25
  );
20
26
  pkg.parts.push(part);
@@ -87,11 +93,11 @@ describe("Sanitize Transforms", () => {
87
93
  const appEl = new DOMParser().parseFromString(
88
94
  appXml,
89
95
  "text/xml",
90
- ).documentElement;
96
+ ).documentElement!;
91
97
  const appPart = new Part(
92
98
  "/docProps/app.xml",
93
99
  appXml,
94
- appEl,
100
+ appEl as unknown as Element,
95
101
  "application/xml",
96
102
  );
97
103
  doc.pkg.parts.push(appPart);
@@ -118,7 +124,7 @@ describe("Sanitize Transforms", () => {
118
124
  const customPart = new Part(
119
125
  "/customXml/item1.xml",
120
126
  "<t/>",
121
- new DOMParser().parseFromString("<t/>", "text/xml").documentElement,
127
+ new DOMParser().parseFromString("<t/>", "text/xml").documentElement! as unknown as Element,
122
128
  "application/xml",
123
129
  );
124
130
  doc.pkg.parts.push(customPart);
@@ -170,11 +176,11 @@ describe("Finalize Document (Core)", () => {
170
176
  const settingsEl = new DOMParser().parseFromString(
171
177
  settingsXml,
172
178
  "text/xml",
173
- ).documentElement;
179
+ ).documentElement!;
174
180
  const settingsPart = new Part(
175
181
  "/word/settings.xml",
176
182
  settingsXml,
177
- settingsEl,
183
+ settingsEl as unknown as Element,
178
184
  "application/xml",
179
185
  );
180
186
  doc.pkg.parts.push(settingsPart);
@@ -263,5 +269,41 @@ describe("Finalize Document (Core)", () => {
263
269
  const xml = doc.part._element.toString();
264
270
  expect(xml).not.toContain("xmlns:w16du");
265
271
  });
272
+
273
+ it("BUG-REPRO: finalize_document leaks comment parts in full sanitize mode", async () => {
274
+ // 1. Load the golden DOCX which we know has comments and comment parts
275
+ const fixturePath = resolve(__dirname, "../../../../../shared/fixtures/golden.docx");
276
+ const buf = readFileSync(fixturePath);
277
+ const doc = await DocumentObject.load(buf);
278
+
279
+ // Verify pre-condition: comment parts exist in the loaded package
280
+ const original_comment_parts = doc.pkg.parts.filter(p => p.contentType.includes("comments"));
281
+ expect(original_comment_parts.length).toBeGreaterThan(0);
282
+
283
+ const original_xml = doc.element.toString();
284
+ expect(original_xml).toContain("w:commentRangeStart");
285
+ expect(original_xml).toContain("w:commentReference");
286
+
287
+ // Mock the doc.save buffer return
288
+ doc.save = vi.fn().mockResolvedValue(Buffer.from("mock"));
289
+
290
+ // 2. Act: Finalize the document in full sanitize mode with accept_all: true
291
+ await finalize_document(doc, {
292
+ filename: "golden.docx",
293
+ sanitize_mode: "full",
294
+ accept_all: true,
295
+ });
296
+
297
+ // 3. Assert: All comments and comment parts are completely removed
298
+ const final_xml = doc.element.toString();
299
+
300
+ // Assert NO in-body comment anchors survive (anchors must be completely gone)
301
+ expect(final_xml).not.toContain("w:commentRangeStart");
302
+ expect(final_xml).not.toContain("w:commentRangeEnd");
303
+ expect(final_xml).not.toContain("w:commentReference");
304
+
305
+ const final_comment_parts = doc.pkg.parts.filter(p => p.contentType.includes("comments"));
306
+ expect(final_comment_parts.length).toBe(0);
307
+ });
266
308
  });
267
309
  });
@@ -271,17 +271,104 @@ export function remove_all_comments(doc: DocumentObject): string[] {
271
271
  cm.deleteComment(cId);
272
272
  }
273
273
 
274
- for (const tag of ['w:commentRangeStart', 'w:commentRangeEnd', 'w:commentReference']) {
274
+ for (const tag of ['w:commentRangeStart', 'w:commentRangeEnd']) {
275
275
  for (const el of findAllDescendants(doc.element, tag)) {
276
276
  el.parentNode?.removeChild(el);
277
277
  }
278
278
  }
279
279
 
280
+ const refs = findAllDescendants(doc.element, 'w:commentReference');
281
+ for (const ref of refs) {
282
+ const parent = ref.parentNode as Element | null;
283
+ if (parent) {
284
+ if (parent.tagName === 'w:r' || parent.tagName.endsWith(':r')) {
285
+ const nonRprChildren = Array.from(parent.childNodes).filter(
286
+ (c) => c.nodeType === 1 && (c as Element).tagName !== 'w:rPr' && (c as Element).tagName !== 'rPr'
287
+ );
288
+ if (nonRprChildren.length <= 1) {
289
+ parent.parentNode?.removeChild(parent);
290
+ } else {
291
+ parent.removeChild(ref);
292
+ }
293
+ } else {
294
+ parent.removeChild(ref);
295
+ }
296
+ }
297
+ }
298
+
280
299
  const resolvedCount = Object.values(data).filter(c => c.resolved).length;
281
300
  const openCount = Object.values(data).filter(c => !c.resolved).length;
282
301
  return [`Comments removed: ${keys.length} (${resolvedCount} resolved, ${openCount} open)`].concat(lines);
283
302
  }
284
303
 
304
+ export function eject_comment_parts(doc: DocumentObject) {
305
+ const pkg = doc.pkg;
306
+
307
+ // 1. Find all comment-related partnames
308
+ const comment_partnames = new Set<string>();
309
+ for (const part of pkg.parts) {
310
+ if (part.partname.toLowerCase().includes("comments")) {
311
+ comment_partnames.add(part.partname);
312
+ const withSlash = part.partname.startsWith("/") ? part.partname : "/" + part.partname;
313
+ const withoutSlash = part.partname.startsWith("/") ? part.partname.substring(1) : part.partname;
314
+ comment_partnames.add(withSlash);
315
+ comment_partnames.add(withoutSlash);
316
+ }
317
+ }
318
+
319
+ if (comment_partnames.size === 0) return;
320
+
321
+ // 2. Sever relationships referencing these parts from all parts in the package
322
+ for (const part of pkg.parts) {
323
+ if (part.partname.endsWith(".rels")) {
324
+ const rels = findAllDescendants(part._element, "Relationship");
325
+ const toRemove: Element[] = [];
326
+ for (const rel of rels) {
327
+ const target = rel.getAttribute("Target") || "";
328
+ if (target.toLowerCase().includes("comments")) {
329
+ toRemove.push(rel);
330
+
331
+ const sourcePath = part.partname.replace("/_rels/", "/").replace(".rels", "");
332
+ const sourcePart = pkg.getPartByPath(sourcePath);
333
+ if (sourcePart) {
334
+ const relId = rel.getAttribute("Id");
335
+ if (relId) sourcePart.rels.delete(relId);
336
+ }
337
+ }
338
+ }
339
+ for (const relEl of toRemove) {
340
+ relEl.parentNode?.removeChild(relEl);
341
+ }
342
+ }
343
+ }
344
+
345
+ // 3. Remove overrides from [Content_Types].xml
346
+ const ctPart = pkg.getPartByPath("[Content_Types].xml");
347
+ if (ctPart) {
348
+ const overrides = findAllDescendants(ctPart._element, "Override");
349
+ const toRemove: Element[] = [];
350
+ for (const override of overrides) {
351
+ const partName = override.getAttribute("PartName") || "";
352
+ if (comment_partnames.has(partName) || partName.toLowerCase().includes("comments")) {
353
+ toRemove.push(override);
354
+ }
355
+ }
356
+ for (const overrideEl of toRemove) {
357
+ overrideEl.parentNode?.removeChild(overrideEl);
358
+ }
359
+ }
360
+
361
+ // 4. Remove comment parts from pkg.parts
362
+ pkg.parts = pkg.parts.filter(p => !p.partname.toLowerCase().includes("comments"));
363
+
364
+ // 5. Remove comment files from pkg.unzipped as well
365
+ for (const key of Object.keys(pkg.unzipped)) {
366
+ if (key.toLowerCase().includes("comments")) {
367
+ delete pkg.unzipped[key];
368
+ }
369
+ }
370
+ }
371
+
285
372
  export function replace_comment_authors(doc: DocumentObject, newAuthor: string): string[] {
286
373
  const cm = new CommentsManager(doc);
287
374
  if (!cm.commentsPart) return [];