npm - @adeu/core - Versions diffs - 1.6.7 → 1.6.9 - Mend

@adeu/core 1.6.7 → 1.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/index.cjs +3969 -1859
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +95 -8
package/dist/index.d.ts +95 -8
package/dist/index.js +3966 -1859
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/src/consistency.test.ts +134 -0
package/src/diff.test.ts +13 -1
package/src/diff.ts +220 -47
package/src/docx/bridge.ts +111 -57
package/src/docx/dom.ts +66 -7
package/src/domain.test.ts +280 -0
package/src/domain.ts +264 -10
package/src/engine.bugs.test.ts +481 -0
package/src/engine.ts +1346 -192
package/src/index.ts +7 -8
package/src/ingest.ts +8 -0
package/src/markup.ts +160 -53
package/src/outline.ts +199 -69
package/src/sanitize/core.ts +130 -0
package/src/sanitize/report.ts +125 -0
package/src/sanitize/sanitize.test.ts +237 -0
package/src/sanitize/transforms.ts +452 -0
package/src/utils/docx.ts +292 -158

package/src/outline.ts CHANGED Viewed

@@ -2,18 +2,18 @@
  * Structural outline extractor.
  */
-import { DocumentObject } from './docx/bridge.js';
-import { Paragraph, Table, DocxEvent } from './docx/primitives.js';
-import { build_paragraph_text, extract_table } from './ingest.js';
-import { extract_comments_data } from './comments.js';
-import { findChild } from './docx/dom.js';
+import { DocumentObject } from "./docx/bridge.js";
+import { Paragraph, Table, DocxEvent } from "./docx/primitives.js";
+import { build_paragraph_text, extract_table } from "./ingest.js";
+import { extract_comments_data } from "./comments.js";
+import { findChild } from "./docx/dom.js";
 import {
   _get_style_cache,
   get_paragraph_prefix,
   iter_block_items,
   iter_document_parts,
   iter_paragraph_content,
-} from './utils/docx.js';
+} from "./utils/docx.js";
 const _HEADING_PREFIX_RE = /^(#{1,6}) /;
 const _HEURISTIC_MIN_WORDS = 3;
@@ -40,10 +40,10 @@ export function extract_outline(
   projected_body: string,
   body_pages: string[],
   body_page_offsets: number[],
-  paragraph_offsets: Record<string, [number, number]> | null = null
+  paragraph_offsets: Record<string, [number, number]> | null = null,
 ): OutlineNode[] {
   if (body_pages.length !== body_page_offsets.length) {
-    throw new Error('body_pages and body_page_offsets length mismatch');
+    throw new Error("body_pages and body_page_offsets length mismatch");
   }
   const comments_map = extract_comments_data(doc.pkg);
@@ -69,7 +69,12 @@ export function extract_outline(
     const text = _heading_text(paragraph, comments_map);
     const style = _determine_heading_style(paragraph);
-    const owned_end = _find_owned_end(block_records, heading_indices, h_pos, level);
+    const owned_end = _find_owned_end(
+      block_records,
+      heading_indices,
+      h_pos,
+      level,
+    );
     const owned_blocks = block_records.slice(rec_idx + 1, owned_end);
     const has_table = _direct_has_table(block_records, rec_idx + 1, owned_end);
@@ -83,7 +88,11 @@ export function extract_outline(
   return nodes;
 }
-function _direct_has_table(block_records: _BlockRecord[], range_start: number, range_end: number): boolean {
+function _direct_has_table(
+  block_records: _BlockRecord[],
+  range_start: number,
+  range_end: number,
+): boolean {
   for (let idx = range_start; idx < range_end; idx++) {
     const rec = block_records[idx];
     if (rec.is_paragraph && _is_heading(rec.item)) return false;
@@ -92,7 +101,10 @@ function _direct_has_table(block_records: _BlockRecord[], range_start: number, r
   return false;
 }
-function _walk_doc_body(doc: DocumentObject, comments_map: any): _BlockRecord[] {
+function _walk_doc_body(
+  doc: DocumentObject,
+  comments_map: any,
+): _BlockRecord[] {
   const parts = Array.from(iter_document_parts(doc));
   let body_start_offset = 0;
   let body_part: any = null;
@@ -128,7 +140,13 @@ function _walk_doc_body(doc: DocumentObject, comments_map: any): _BlockRecord[]
       if (!is_first_block) cursor += 2;
-      records.push({ item, is_paragraph: true, is_table: false, start_offset: cursor, projected_length: block_len });
+      records.push({
+        item,
+        is_paragraph: true,
+        is_table: false,
+        start_offset: cursor,
+        projected_length: block_len,
+      });
       cursor += block_len;
       is_first_block = false;
     } else if (item instanceof Table) {
@@ -138,7 +156,13 @@ function _walk_doc_body(doc: DocumentObject, comments_map: any): _BlockRecord[]
       if (!is_first_block) cursor += 2;
       const table_start = cursor;
-      records.push({ item, is_paragraph: false, is_table: true, start_offset: table_start, projected_length: block_len });
+      records.push({
+        item,
+        is_paragraph: false,
+        is_table: true,
+        start_offset: table_start,
+        projected_length: block_len,
+      });
       _record_table_inner_blocks_lite(item, table_start, records, comments_map);
       cursor += block_len;
       is_first_block = false;
@@ -148,7 +172,12 @@ function _walk_doc_body(doc: DocumentObject, comments_map: any): _BlockRecord[]
   return records;
 }
-function _compute_inner_block_offset(table: Table, target_paragraph: Paragraph, table_start_offset: number, comments_map: any): number {
+function _compute_inner_block_offset(
+  table: Table,
+  target_paragraph: Paragraph,
+  table_start_offset: number,
+  comments_map: any,
+): number {
   const target_el = target_paragraph._element;
   let cursor = table_start_offset;
   let rows_processed = 0;
@@ -165,7 +194,12 @@ function _compute_inner_block_offset(table: Table, target_paragraph: Paragraph,
       if (cells_in_row > 0) cursor += 3;
-      const [new_cursor, found] = _walk_cell_for_offset(cell, target_el, cursor, comments_map);
+      const [new_cursor, found] = _walk_cell_for_offset(
+        cell,
+        target_el,
+        cursor,
+        comments_map,
+      );
       if (found) return new_cursor;
       cursor = new_cursor;
@@ -177,7 +211,12 @@ function _compute_inner_block_offset(table: Table, target_paragraph: Paragraph,
   return table_start_offset;
 }
-function _walk_cell_for_offset(cell: any, target_el: any, cell_start_cursor: number, comments_map: any): [number, boolean] {
+function _walk_cell_for_offset(
+  cell: any,
+  target_el: any,
+  cell_start_cursor: number,
+  comments_map: any,
+): [number, boolean] {
   let cursor = cell_start_cursor;
   let is_first_block = true;
@@ -190,9 +229,15 @@ function _walk_cell_for_offset(cell: any, target_el: any, cell_start_cursor: num
       const p_text = build_paragraph_text(inner_item, comments_map, false);
       cursor += (prefix + p_text).length;
     } else if (inner_item instanceof Table) {
-      const nested_offset = _compute_inner_block_offset(inner_item, new Paragraph(target_el, null), cursor, comments_map);
+      const nested_offset = _compute_inner_block_offset(
+        inner_item,
+        new Paragraph(target_el, null),
+        cursor,
+        comments_map,
+      );
       if (nested_offset !== cursor) {
-        if (_element_is_descendant(target_el, inner_item._element)) return [nested_offset, true];
+        if (_element_is_descendant(target_el, inner_item._element))
+          return [nested_offset, true];
       }
       const table_text = extract_table(inner_item, comments_map, false, 0);
       cursor += table_text ? table_text.length : 0;
@@ -202,7 +247,10 @@ function _walk_cell_for_offset(cell: any, target_el: any, cell_start_cursor: num
   return [cursor, false];
 }
-function _element_is_descendant(target_el: Element, ancestor_el: Element): boolean {
+function _element_is_descendant(
+  target_el: Element,
+  ancestor_el: Element,
+): boolean {
   let cur: Node | null = target_el.parentNode;
   while (cur) {
     if (cur === ancestor_el) return true;
@@ -211,7 +259,12 @@ function _element_is_descendant(target_el: Element, ancestor_el: Element): boole
   return false;
 }
-function _record_table_inner_blocks_lite(table: Table, inherited_offset: number, records: _BlockRecord[], comments_map: any) {
+function _record_table_inner_blocks_lite(
+  table: Table,
+  inherited_offset: number,
+  records: _BlockRecord[],
+  comments_map: any,
+) {
   const seen_cells = new Set();
   for (const row of table.rows) {
     for (const cell of row.cells) {
@@ -220,11 +273,35 @@ function _record_table_inner_blocks_lite(table: Table, inherited_offset: number,
       for (const inner_item of iter_block_items(cell)) {
         if (inner_item instanceof Paragraph) {
-          const true_offset = _is_heading(inner_item) ? _compute_inner_block_offset(table, inner_item, inherited_offset, comments_map) : inherited_offset;
-          records.push({ item: inner_item, is_paragraph: true, is_table: false, start_offset: true_offset, projected_length: 0 });
+          const true_offset = _is_heading(inner_item)
+            ? _compute_inner_block_offset(
+                table,
+                inner_item,
+                inherited_offset,
+                comments_map,
+              )
+            : inherited_offset;
+          records.push({
+            item: inner_item,
+            is_paragraph: true,
+            is_table: false,
+            start_offset: true_offset,
+            projected_length: 0,
+          });
         } else if (inner_item instanceof Table) {
-          records.push({ item: inner_item, is_paragraph: false, is_table: true, start_offset: inherited_offset, projected_length: 0 });
-          _record_table_inner_blocks_lite(inner_item, inherited_offset, records, comments_map);
+          records.push({
+            item: inner_item,
+            is_paragraph: false,
+            is_table: true,
+            start_offset: inherited_offset,
+            projected_length: 0,
+          });
+          _record_table_inner_blocks_lite(
+            inner_item,
+            inherited_offset,
+            records,
+            comments_map,
+          );
         }
       }
     }
@@ -235,19 +312,20 @@ function _project_part(part: any, comments_map: any): string {
   const blocks: string[] = [];
   const c_type = part.constructor.name;
-  if (c_type === 'NotesPart') {
-    const header = part.note_type === 'fn' ? '## Footnotes' : '## Endnotes';
+  if (c_type === "NotesPart") {
+    const header = part.note_type === "fn" ? "## Footnotes" : "## Endnotes";
     blocks.push(`---\n${header}`);
   }
   let is_first_para = true;
   for (const item of iter_block_items(part)) {
-    if (item.constructor.name === 'FootnoteItem') {
+    if (item.constructor.name === "FootnoteItem") {
       const fn_text = _project_part(item, comments_map);
       if (fn_text) blocks.push(fn_text);
     } else if (item instanceof Paragraph) {
       let prefix = get_paragraph_prefix(item);
-      if (is_first_para && c_type === 'FootnoteItem') prefix = `[^${part.note_type}-${part.id}]: ${prefix}`;
+      if (is_first_para && c_type === "FootnoteItem")
+        prefix = `[^${part.note_type}-${part.id}]: ${prefix}`;
       const p_text = build_paragraph_text(item, comments_map, false);
       blocks.push(prefix + p_text);
       is_first_para = false;
@@ -258,16 +336,19 @@ function _project_part(part: any, comments_map: any): string {
     }
   }
-  return blocks.join('\n\n');
+  return blocks.join("\n\n");
 }
 function _is_heading(paragraph: Paragraph): boolean {
   return _HEADING_PREFIX_RE.test(get_paragraph_prefix(paragraph));
 }
-function _heading_passes_quality_filter(paragraph: Paragraph, comments_map: any): boolean {
+function _heading_passes_quality_filter(
+  paragraph: Paragraph,
+  comments_map: any,
+): boolean {
   const style = _determine_heading_style(paragraph);
-  if (style !== '(heuristic)') return true;
+  if (style !== "(heuristic)") return true;
   const text = _heading_text(paragraph, comments_map);
   if (!text) return false;
   const word_count = (text.match(/\w+/g) || []).length;
@@ -287,60 +368,109 @@ function _heading_text(paragraph: Paragraph, comments_map: any): string {
 }
 function _strip_critic_markup(text: string): string {
-  if (!text) return '';
-  text = text.replace(/\{--[\s\S]*?--\}/g, '');
-  text = text.replace(/\{>>[\s\S]*?<<\}/g, '');
-  text = text.replace(/\{\+\+([\s\S]*?)\+\+\}/g, '$1');
-  text = text.replace(/\{==([\s\S]*?)==\}/g, '$1');
+  if (!text) return "";
+  text = text.replace(/\{--[\s\S]*?--\}/g, "");
+  text = text.replace(/\{>>[\s\S]*?<<\}/g, "");
+  text = text.replace(/\{\+\+([\s\S]*?)\+\+\}/g, "$1");
+  text = text.replace(/\{==([\s\S]*?)==\}/g, "$1");
   return text;
 }
 function _strip_inline_formatting(text: string): string {
-  if (!text) return '';
-  text = text.replace(/\*\*(.+?)\*\*/g, '$1');
-  text = text.replace(/__(.+?)__/g, '$1');
-  text = text.replace(/(?<!\w)_(\S(?:.*?\S)?)_(?!\w)/g, '$1');
+  if (!text) return "";
+  text = text.replace(/\*\*(.+?)\*\*/g, "$1");
+  text = text.replace(/__(.+?)__/g, "$1");
+  text = text.replace(/(?<!\w)_(\S(?:.*?\S)?)_(?!\w)/g, "$1");
   return text;
 }
 function _determine_heading_style(paragraph: Paragraph): string {
-  const [style_cache, default_pstyle] = _get_style_cache(paragraph._parent.part || paragraph._parent);
-  const pPr = findChild(paragraph._element, 'w:pPr');
+  const [style_cache, default_pstyle] = _get_style_cache(
+    paragraph._parent.part || paragraph._parent,
+  );
+  const pPr = findChild(paragraph._element, "w:pPr");
   let style_id = default_pstyle;
   if (pPr) {
-    const oLvl = findChild(pPr, 'w:outlineLvl');
-    if (oLvl && /^\d+$/.test(oLvl.getAttribute('w:val') || '')) {
-      const style = _safe_style_name(paragraph, style_cache, default_pstyle);
-      if (style && (style.startsWith('Heading') || style === 'Title')) return style;
-      return '(outline_level)';
-    }
-    const pStyle = findChild(pPr, 'w:pStyle');
-    if (pStyle) style_id = pStyle.getAttribute('w:val') || default_pstyle;
+    const pStyle = findChild(pPr, "w:pStyle");
+    if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
   }
-  const style_name = (style_id && style_cache && style_cache[style_id]) ? style_cache[style_id].name : null;
-  if (style_name && (style_name.startsWith('Heading') || style_name === 'Title')) return style_name;
+  let outline_level: number | null = null;
+  if (pPr) {
+    const oLvl = findChild(pPr, "w:outlineLvl");
+    if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
+      outline_level = parseInt(oLvl.getAttribute("w:val") as string, 10);
+    }
+  }
-  if (style_name && /Heading[ ]?([1-6])(?![0-9])/.test(style_name)) return style_name;
+  if (outline_level === null && style_id && style_cache && style_cache[style_id]) {
+    outline_level = style_cache[style_id].outline_level;
+  }
+  const style_name =
+    style_id && style_cache && style_cache[style_id]
+      ? style_cache[style_id].name
+      : style_id;
+  let normalized_style_name = style_name;
+  if (normalized_style_name && typeof normalized_style_name === "string") {
+    if (normalized_style_name.toLowerCase().startsWith("heading")) {
+      normalized_style_name = normalized_style_name.replace(/^heading/i, "Heading");
+    } else if (normalized_style_name.toLowerCase() === "title") {
+      normalized_style_name = "Title";
+    }
+  }
+  if (outline_level !== null && outline_level >= 0 && outline_level <= 8) {
+    if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")) {
+      return normalized_style_name;
+    }
+    return "(outline_level)";
+  }
-  return '(heuristic)';
+  if (
+    normalized_style_name &&
+    (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")
+  )
+    return normalized_style_name;
+  if (normalized_style_name && /Heading[ ]?([1-6])(?![0-9])/.test(normalized_style_name))
+    return normalized_style_name;
+  return "(heuristic)";
 }
-function _safe_style_name(paragraph: Paragraph, style_cache: any, default_pstyle: any): string | null {
-  const pPr = findChild(paragraph._element, 'w:pPr');
+function _safe_style_name(
+  paragraph: Paragraph,
+  style_cache: any,
+  default_pstyle: any,
+): string | null {
+  const pPr = findChild(paragraph._element, "w:pPr");
   let style_id = default_pstyle;
   if (pPr) {
-    const pStyle = findChild(pPr, 'w:pStyle');
-    if (pStyle) style_id = pStyle.getAttribute('w:val') || default_pstyle;
+    const pStyle = findChild(pPr, "w:pStyle");
+    if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
   }
-  return (style_id && style_cache && style_cache[style_id]) ? style_cache[style_id].name : null;
+  return style_id && style_cache && style_cache[style_id]
+    ? style_cache[style_id].name
+    : style_id;
 }
-function _find_owned_end(block_records: _BlockRecord[], heading_indices: number[], current_h_pos: number, current_level: number): number {
-  for (let next_h_pos = current_h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
+function _find_owned_end(
+  block_records: _BlockRecord[],
+  heading_indices: number[],
+  current_h_pos: number,
+  current_level: number,
+): number {
+  for (
+    let next_h_pos = current_h_pos + 1;
+    next_h_pos < heading_indices.length;
+    next_h_pos++
+  ) {
     const next_idx = heading_indices[next_h_pos];
-    if (_heading_level(block_records[next_idx].item) <= current_level) return next_idx;
+    if (_heading_level(block_records[next_idx].item) <= current_level)
+      return next_idx;
   }
   return block_records.length;
 }
@@ -351,12 +481,12 @@ function _collect_footnote_ids(owned_blocks: _BlockRecord[]): string[] {
   for (const rec of owned_blocks) {
     if (!rec.is_paragraph) continue;
     for (const event of iter_paragraph_content(rec.item)) {
-      if (!('type' in event)) continue;
-      let fn_id = '';
-      if (event.type === 'footnote') fn_id = `fn-${event.id}`;
-      else if (event.type === 'endnote') fn_id = `en-${event.id}`;
+      if (!("type" in event)) continue;
+      let fn_id = "";
+      if (event.type === "footnote") fn_id = `fn-${event.id}`;
+      else if (event.type === "endnote") fn_id = `en-${event.id}`;
       else continue;
       if (!seen.has(fn_id)) {
         seen.add(fn_id);
         ordered.push(fn_id);
@@ -374,4 +504,4 @@ function _offset_to_page(offset: number, body_page_offsets: number[]): number {
     else break;
   }
   return page;
-}
+}

package/src/sanitize/core.ts ADDED Viewed

@@ -0,0 +1,130 @@
+import { DocumentObject } from '../docx/bridge.js';
+import { SanitizeReport } from './report.js';
+import * as transforms from './transforms.js';
+import { findAllDescendants } from '../docx/dom.js';
+export interface FinalizeOptions {
+  filename: string;
+  sanitize_mode?: 'full' | 'keep-markup' | 'baseline';
+  accept_all?: boolean;
+  protection_mode?: 'read_only' | 'encrypt' | null;
+  password?: string | null;
+  author?: string | null;
+  export_pdf?: boolean;
+}
+export interface FinalizeResult {
+  reportText: string;
+  outBuffer?: Buffer;
+}
+export async function finalize_document(doc: DocumentObject, options: FinalizeOptions): Promise<FinalizeResult> {
+  const report = new SanitizeReport(options.filename, options.sanitize_mode || 'full', options.author || null);
+  if (options.sanitize_mode === 'full') {
+    const counts = transforms.count_tracked_changes(doc);
+    const total = counts[0] + counts[1] + counts[2];
+    report.tracked_changes_found = total;
+    if (total > 0 && !options.accept_all) {
+      report.status = 'blocked';
+      report.blocked_reason = `Document contains ${total} unresolved tracked changes (${counts[0]} insertions, ${counts[1]} deletions, ${counts[2]} formatting). Review in Word first, or set accept_all=true.`;
+      return { reportText: report.render() };
+    }
+    if (total > 0) {
+      const authors = transforms.get_track_change_authors(doc);
+      if (authors.size > 1) {
+        report.warnings.push(`Multiple authors detected in tracked changes: ${Array.from(authors).sort().join(', ')}. Review per-change list before sending.`);
+      }
+      report.add_transform_lines(transforms.accept_all_tracked_changes(doc));
+      report.tracked_changes_accepted = total;
+    }
+    const commentsSummary = transforms.get_comments_summary(doc);
+    report.comments_removed = commentsSummary.total;
+    report.add_transform_lines(transforms.remove_all_comments(doc));
+  } else if (options.sanitize_mode === 'keep-markup') {
+    // Basic support for keep-markup in TS
+    const counts = transforms.count_tracked_changes(doc);
+    report.tracked_changes_found = counts[0] + counts[1] + counts[2];
+    report.tracked_changes_kept = report.tracked_changes_found;
+    if (options.author) {
+      report.add_transform_lines(transforms.replace_comment_authors(doc, options.author));
+      report.add_transform_lines(transforms.replace_change_authors(doc, options.author));
+    }
+  }
+  // Common transforms
+  report.add_transform_lines(transforms.strip_rsid(doc));
+  report.add_transform_lines(transforms.strip_para_ids(doc));
+  report.add_transform_lines(transforms.strip_proof_errors(doc));
+  report.add_transform_lines(transforms.strip_empty_properties(doc));
+  report.add_transform_lines(transforms.strip_hidden_text(doc));
+  report.add_transform_lines(transforms.coalesce_runs(doc));
+  report.add_transform_lines(transforms.scrub_doc_properties(doc));
+  report.add_transform_lines(transforms.scrub_timestamps(doc));
+  report.add_transform_lines(transforms.strip_custom_xml(doc));
+  report.add_transform_lines(transforms.strip_image_alt_text(doc));
+  const warnings = transforms.audit_hyperlinks(doc);
+  for (const w of warnings) report.warnings.push(w);
+  report.add_transform_lines(transforms.normalize_change_dates(doc));
+  // Protection (Settings injection)
+  if (options.protection_mode === 'read_only' || options.protection_mode === 'encrypt') {
+    if (options.protection_mode === 'encrypt') {
+      report.warnings.push("Encryption mode (AES compound wrappers) is strictly unsupported in the zero-dependency Node engine. Falling back to native Word Read-Only lock.");
+    }
+    const settingsPart = doc.pkg.getPartByPath('word/settings.xml');
+    if (settingsPart) {
+      const docEl = settingsPart._element.ownerDocument!;
+      let prot = transforms.findDescendantsByLocalName(settingsPart._element, 'documentProtection')[0];
+      if (!prot) {
+        prot = docEl.createElement('w:documentProtection');
+        // Word expects documentProtection to be inserted before elements like w:autoFormatOverride, w:styleLockTheme, etc.
+        // For standard robustness without complex XSD enforcement, appendChild generally works.
+        settingsPart._element.appendChild(prot);
+      }
+      prot.setAttribute('w:edit', 'readOnly');
+      prot.setAttribute('w:enforcement', '1');
+      report.structural_lines.push("Document locked (Read-Only enforcement injected into settings.xml)");
+    }
+  }
+  if (options.export_pdf) {
+    report.warnings.push("PDF export requires the Python/Word COM environment and is skipped in this zero-dependency Node agent.");
+  }
+  // Clean up leaked Microsoft namespaces
+  for (const part of doc.pkg.parts) {
+    // Match the exact injection condition from RedlineEngine constructor
+    if (part === doc.part || (part.contentType.includes('wordprocessingml') && part.contentType.endsWith('+xml'))) {
+      if (part._element.hasAttribute('xmlns:w16du')) {
+        let hasW16du = false;
+        // Check root element attributes (excluding the xmlns declaration itself)
+        if (Array.from(part._element.attributes || []).some(a => a.name.startsWith('w16du:') && a.name !== 'xmlns:w16du')) {
+          hasW16du = true;
+        }
+        if (!hasW16du) {
+          const allNodes = findAllDescendants(part._element, '*');
+          for (const n of allNodes) {
+            if (n.tagName.startsWith('w16du:') || Array.from(n.attributes || []).some(a => a.name.startsWith('w16du:'))) {
+              hasW16du = true;
+              break;
+            }
+          }
+        }
+        if (!hasW16du) part._element.removeAttribute('xmlns:w16du');
+      }
+    }
+  }
+  if (report.warnings.length > 0) report.status = 'clean_with_warnings';
+  const outBuffer = await doc.save();
+  return { reportText: report.render(), outBuffer };
+}