npm - @adeu/core - Versions diffs - 1.6.8 → 1.6.9 - Mend

@adeu/core 1.6.8 → 1.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/index.cjs +1833 -540
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +75 -1
package/dist/index.d.ts +75 -1
package/dist/index.js +1832 -540
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/src/consistency.test.ts +134 -0
package/src/diff.test.ts +13 -1
package/src/diff.ts +189 -70
package/src/docx/bridge.ts +99 -57
package/src/docx/dom.ts +66 -7
package/src/engine.bugs.test.ts +481 -0
package/src/engine.ts +1346 -192
package/src/index.ts +1 -1
package/src/markup.ts +160 -53
package/src/outline.ts +199 -69
package/src/sanitize/core.ts +26 -0
package/src/sanitize/report.ts +1 -1
package/src/sanitize/sanitize.test.ts +47 -2
package/src/sanitize/transforms.ts +87 -0
package/src/utils/docx.ts +282 -157

package/src/outline.ts CHANGED Viewed

@@ -2,18 +2,18 @@
  * Structural outline extractor.
  */
-import { DocumentObject } from './docx/bridge.js';
-import { Paragraph, Table, DocxEvent } from './docx/primitives.js';
-import { build_paragraph_text, extract_table } from './ingest.js';
-import { extract_comments_data } from './comments.js';
-import { findChild } from './docx/dom.js';
+import { DocumentObject } from "./docx/bridge.js";
+import { Paragraph, Table, DocxEvent } from "./docx/primitives.js";
+import { build_paragraph_text, extract_table } from "./ingest.js";
+import { extract_comments_data } from "./comments.js";
+import { findChild } from "./docx/dom.js";
 import {
   _get_style_cache,
   get_paragraph_prefix,
   iter_block_items,
   iter_document_parts,
   iter_paragraph_content,
-} from './utils/docx.js';
+} from "./utils/docx.js";
 const _HEADING_PREFIX_RE = /^(#{1,6}) /;
 const _HEURISTIC_MIN_WORDS = 3;
@@ -40,10 +40,10 @@ export function extract_outline(
   projected_body: string,
   body_pages: string[],
   body_page_offsets: number[],
-  paragraph_offsets: Record<string, [number, number]> | null = null
+  paragraph_offsets: Record<string, [number, number]> | null = null,
 ): OutlineNode[] {
   if (body_pages.length !== body_page_offsets.length) {
-    throw new Error('body_pages and body_page_offsets length mismatch');
+    throw new Error("body_pages and body_page_offsets length mismatch");
   }
   const comments_map = extract_comments_data(doc.pkg);
@@ -69,7 +69,12 @@ export function extract_outline(
     const text = _heading_text(paragraph, comments_map);
     const style = _determine_heading_style(paragraph);
-    const owned_end = _find_owned_end(block_records, heading_indices, h_pos, level);
+    const owned_end = _find_owned_end(
+      block_records,
+      heading_indices,
+      h_pos,
+      level,
+    );
     const owned_blocks = block_records.slice(rec_idx + 1, owned_end);
     const has_table = _direct_has_table(block_records, rec_idx + 1, owned_end);
@@ -83,7 +88,11 @@ export function extract_outline(
   return nodes;
 }
-function _direct_has_table(block_records: _BlockRecord[], range_start: number, range_end: number): boolean {
+function _direct_has_table(
+  block_records: _BlockRecord[],
+  range_start: number,
+  range_end: number,
+): boolean {
   for (let idx = range_start; idx < range_end; idx++) {
     const rec = block_records[idx];
     if (rec.is_paragraph && _is_heading(rec.item)) return false;
@@ -92,7 +101,10 @@ function _direct_has_table(block_records: _BlockRecord[], range_start: number, r
   return false;
 }
-function _walk_doc_body(doc: DocumentObject, comments_map: any): _BlockRecord[] {
+function _walk_doc_body(
+  doc: DocumentObject,
+  comments_map: any,
+): _BlockRecord[] {
   const parts = Array.from(iter_document_parts(doc));
   let body_start_offset = 0;
   let body_part: any = null;
@@ -128,7 +140,13 @@ function _walk_doc_body(doc: DocumentObject, comments_map: any): _BlockRecord[]
       if (!is_first_block) cursor += 2;
-      records.push({ item, is_paragraph: true, is_table: false, start_offset: cursor, projected_length: block_len });
+      records.push({
+        item,
+        is_paragraph: true,
+        is_table: false,
+        start_offset: cursor,
+        projected_length: block_len,
+      });
       cursor += block_len;
       is_first_block = false;
     } else if (item instanceof Table) {
@@ -138,7 +156,13 @@ function _walk_doc_body(doc: DocumentObject, comments_map: any): _BlockRecord[]
       if (!is_first_block) cursor += 2;
       const table_start = cursor;
-      records.push({ item, is_paragraph: false, is_table: true, start_offset: table_start, projected_length: block_len });
+      records.push({
+        item,
+        is_paragraph: false,
+        is_table: true,
+        start_offset: table_start,
+        projected_length: block_len,
+      });
       _record_table_inner_blocks_lite(item, table_start, records, comments_map);
       cursor += block_len;
       is_first_block = false;
@@ -148,7 +172,12 @@ function _walk_doc_body(doc: DocumentObject, comments_map: any): _BlockRecord[]
   return records;
 }
-function _compute_inner_block_offset(table: Table, target_paragraph: Paragraph, table_start_offset: number, comments_map: any): number {
+function _compute_inner_block_offset(
+  table: Table,
+  target_paragraph: Paragraph,
+  table_start_offset: number,
+  comments_map: any,
+): number {
   const target_el = target_paragraph._element;
   let cursor = table_start_offset;
   let rows_processed = 0;
@@ -165,7 +194,12 @@ function _compute_inner_block_offset(table: Table, target_paragraph: Paragraph,
       if (cells_in_row > 0) cursor += 3;
-      const [new_cursor, found] = _walk_cell_for_offset(cell, target_el, cursor, comments_map);
+      const [new_cursor, found] = _walk_cell_for_offset(
+        cell,
+        target_el,
+        cursor,
+        comments_map,
+      );
       if (found) return new_cursor;
       cursor = new_cursor;
@@ -177,7 +211,12 @@ function _compute_inner_block_offset(table: Table, target_paragraph: Paragraph,
   return table_start_offset;
 }
-function _walk_cell_for_offset(cell: any, target_el: any, cell_start_cursor: number, comments_map: any): [number, boolean] {
+function _walk_cell_for_offset(
+  cell: any,
+  target_el: any,
+  cell_start_cursor: number,
+  comments_map: any,
+): [number, boolean] {
   let cursor = cell_start_cursor;
   let is_first_block = true;
@@ -190,9 +229,15 @@ function _walk_cell_for_offset(cell: any, target_el: any, cell_start_cursor: num
       const p_text = build_paragraph_text(inner_item, comments_map, false);
       cursor += (prefix + p_text).length;
     } else if (inner_item instanceof Table) {
-      const nested_offset = _compute_inner_block_offset(inner_item, new Paragraph(target_el, null), cursor, comments_map);
+      const nested_offset = _compute_inner_block_offset(
+        inner_item,
+        new Paragraph(target_el, null),
+        cursor,
+        comments_map,
+      );
       if (nested_offset !== cursor) {
-        if (_element_is_descendant(target_el, inner_item._element)) return [nested_offset, true];
+        if (_element_is_descendant(target_el, inner_item._element))
+          return [nested_offset, true];
       }
       const table_text = extract_table(inner_item, comments_map, false, 0);
       cursor += table_text ? table_text.length : 0;
@@ -202,7 +247,10 @@ function _walk_cell_for_offset(cell: any, target_el: any, cell_start_cursor: num
   return [cursor, false];
 }
-function _element_is_descendant(target_el: Element, ancestor_el: Element): boolean {
+function _element_is_descendant(
+  target_el: Element,
+  ancestor_el: Element,
+): boolean {
   let cur: Node | null = target_el.parentNode;
   while (cur) {
     if (cur === ancestor_el) return true;
@@ -211,7 +259,12 @@ function _element_is_descendant(target_el: Element, ancestor_el: Element): boole
   return false;
 }
-function _record_table_inner_blocks_lite(table: Table, inherited_offset: number, records: _BlockRecord[], comments_map: any) {
+function _record_table_inner_blocks_lite(
+  table: Table,
+  inherited_offset: number,
+  records: _BlockRecord[],
+  comments_map: any,
+) {
   const seen_cells = new Set();
   for (const row of table.rows) {
     for (const cell of row.cells) {
@@ -220,11 +273,35 @@ function _record_table_inner_blocks_lite(table: Table, inherited_offset: number,
       for (const inner_item of iter_block_items(cell)) {
         if (inner_item instanceof Paragraph) {
-          const true_offset = _is_heading(inner_item) ? _compute_inner_block_offset(table, inner_item, inherited_offset, comments_map) : inherited_offset;
-          records.push({ item: inner_item, is_paragraph: true, is_table: false, start_offset: true_offset, projected_length: 0 });
+          const true_offset = _is_heading(inner_item)
+            ? _compute_inner_block_offset(
+                table,
+                inner_item,
+                inherited_offset,
+                comments_map,
+              )
+            : inherited_offset;
+          records.push({
+            item: inner_item,
+            is_paragraph: true,
+            is_table: false,
+            start_offset: true_offset,
+            projected_length: 0,
+          });
         } else if (inner_item instanceof Table) {
-          records.push({ item: inner_item, is_paragraph: false, is_table: true, start_offset: inherited_offset, projected_length: 0 });
-          _record_table_inner_blocks_lite(inner_item, inherited_offset, records, comments_map);
+          records.push({
+            item: inner_item,
+            is_paragraph: false,
+            is_table: true,
+            start_offset: inherited_offset,
+            projected_length: 0,
+          });
+          _record_table_inner_blocks_lite(
+            inner_item,
+            inherited_offset,
+            records,
+            comments_map,
+          );
         }
       }
     }
@@ -235,19 +312,20 @@ function _project_part(part: any, comments_map: any): string {
   const blocks: string[] = [];
   const c_type = part.constructor.name;
-  if (c_type === 'NotesPart') {
-    const header = part.note_type === 'fn' ? '## Footnotes' : '## Endnotes';
+  if (c_type === "NotesPart") {
+    const header = part.note_type === "fn" ? "## Footnotes" : "## Endnotes";
     blocks.push(`---\n${header}`);
   }
   let is_first_para = true;
   for (const item of iter_block_items(part)) {
-    if (item.constructor.name === 'FootnoteItem') {
+    if (item.constructor.name === "FootnoteItem") {
       const fn_text = _project_part(item, comments_map);
       if (fn_text) blocks.push(fn_text);
     } else if (item instanceof Paragraph) {
       let prefix = get_paragraph_prefix(item);
-      if (is_first_para && c_type === 'FootnoteItem') prefix = `[^${part.note_type}-${part.id}]: ${prefix}`;
+      if (is_first_para && c_type === "FootnoteItem")
+        prefix = `[^${part.note_type}-${part.id}]: ${prefix}`;
       const p_text = build_paragraph_text(item, comments_map, false);
       blocks.push(prefix + p_text);
       is_first_para = false;
@@ -258,16 +336,19 @@ function _project_part(part: any, comments_map: any): string {
     }
   }
-  return blocks.join('\n\n');
+  return blocks.join("\n\n");
 }
 function _is_heading(paragraph: Paragraph): boolean {
   return _HEADING_PREFIX_RE.test(get_paragraph_prefix(paragraph));
 }
-function _heading_passes_quality_filter(paragraph: Paragraph, comments_map: any): boolean {
+function _heading_passes_quality_filter(
+  paragraph: Paragraph,
+  comments_map: any,
+): boolean {
   const style = _determine_heading_style(paragraph);
-  if (style !== '(heuristic)') return true;
+  if (style !== "(heuristic)") return true;
   const text = _heading_text(paragraph, comments_map);
   if (!text) return false;
   const word_count = (text.match(/\w+/g) || []).length;
@@ -287,60 +368,109 @@ function _heading_text(paragraph: Paragraph, comments_map: any): string {
 }
 function _strip_critic_markup(text: string): string {
-  if (!text) return '';
-  text = text.replace(/\{--[\s\S]*?--\}/g, '');
-  text = text.replace(/\{>>[\s\S]*?<<\}/g, '');
-  text = text.replace(/\{\+\+([\s\S]*?)\+\+\}/g, '$1');
-  text = text.replace(/\{==([\s\S]*?)==\}/g, '$1');
+  if (!text) return "";
+  text = text.replace(/\{--[\s\S]*?--\}/g, "");
+  text = text.replace(/\{>>[\s\S]*?<<\}/g, "");
+  text = text.replace(/\{\+\+([\s\S]*?)\+\+\}/g, "$1");
+  text = text.replace(/\{==([\s\S]*?)==\}/g, "$1");
   return text;
 }
 function _strip_inline_formatting(text: string): string {
-  if (!text) return '';
-  text = text.replace(/\*\*(.+?)\*\*/g, '$1');
-  text = text.replace(/__(.+?)__/g, '$1');
-  text = text.replace(/(?<!\w)_(\S(?:.*?\S)?)_(?!\w)/g, '$1');
+  if (!text) return "";
+  text = text.replace(/\*\*(.+?)\*\*/g, "$1");
+  text = text.replace(/__(.+?)__/g, "$1");
+  text = text.replace(/(?<!\w)_(\S(?:.*?\S)?)_(?!\w)/g, "$1");
   return text;
 }
 function _determine_heading_style(paragraph: Paragraph): string {
-  const [style_cache, default_pstyle] = _get_style_cache(paragraph._parent.part || paragraph._parent);
-  const pPr = findChild(paragraph._element, 'w:pPr');
+  const [style_cache, default_pstyle] = _get_style_cache(
+    paragraph._parent.part || paragraph._parent,
+  );
+  const pPr = findChild(paragraph._element, "w:pPr");
   let style_id = default_pstyle;
   if (pPr) {
-    const oLvl = findChild(pPr, 'w:outlineLvl');
-    if (oLvl && /^\d+$/.test(oLvl.getAttribute('w:val') || '')) {
-      const style = _safe_style_name(paragraph, style_cache, default_pstyle);
-      if (style && (style.startsWith('Heading') || style === 'Title')) return style;
-      return '(outline_level)';
-    }
-    const pStyle = findChild(pPr, 'w:pStyle');
-    if (pStyle) style_id = pStyle.getAttribute('w:val') || default_pstyle;
+    const pStyle = findChild(pPr, "w:pStyle");
+    if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
   }
-  const style_name = (style_id && style_cache && style_cache[style_id]) ? style_cache[style_id].name : null;
-  if (style_name && (style_name.startsWith('Heading') || style_name === 'Title')) return style_name;
+  let outline_level: number | null = null;
+  if (pPr) {
+    const oLvl = findChild(pPr, "w:outlineLvl");
+    if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
+      outline_level = parseInt(oLvl.getAttribute("w:val") as string, 10);
+    }
+  }
-  if (style_name && /Heading[ ]?([1-6])(?![0-9])/.test(style_name)) return style_name;
+  if (outline_level === null && style_id && style_cache && style_cache[style_id]) {
+    outline_level = style_cache[style_id].outline_level;
+  }
+  const style_name =
+    style_id && style_cache && style_cache[style_id]
+      ? style_cache[style_id].name
+      : style_id;
+  let normalized_style_name = style_name;
+  if (normalized_style_name && typeof normalized_style_name === "string") {
+    if (normalized_style_name.toLowerCase().startsWith("heading")) {
+      normalized_style_name = normalized_style_name.replace(/^heading/i, "Heading");
+    } else if (normalized_style_name.toLowerCase() === "title") {
+      normalized_style_name = "Title";
+    }
+  }
+  if (outline_level !== null && outline_level >= 0 && outline_level <= 8) {
+    if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")) {
+      return normalized_style_name;
+    }
+    return "(outline_level)";
+  }
-  return '(heuristic)';
+  if (
+    normalized_style_name &&
+    (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")
+  )
+    return normalized_style_name;
+  if (normalized_style_name && /Heading[ ]?([1-6])(?![0-9])/.test(normalized_style_name))
+    return normalized_style_name;
+  return "(heuristic)";
 }
-function _safe_style_name(paragraph: Paragraph, style_cache: any, default_pstyle: any): string | null {
-  const pPr = findChild(paragraph._element, 'w:pPr');
+function _safe_style_name(
+  paragraph: Paragraph,
+  style_cache: any,
+  default_pstyle: any,
+): string | null {
+  const pPr = findChild(paragraph._element, "w:pPr");
   let style_id = default_pstyle;
   if (pPr) {
-    const pStyle = findChild(pPr, 'w:pStyle');
-    if (pStyle) style_id = pStyle.getAttribute('w:val') || default_pstyle;
+    const pStyle = findChild(pPr, "w:pStyle");
+    if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
   }
-  return (style_id && style_cache && style_cache[style_id]) ? style_cache[style_id].name : null;
+  return style_id && style_cache && style_cache[style_id]
+    ? style_cache[style_id].name
+    : style_id;
 }
-function _find_owned_end(block_records: _BlockRecord[], heading_indices: number[], current_h_pos: number, current_level: number): number {
-  for (let next_h_pos = current_h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
+function _find_owned_end(
+  block_records: _BlockRecord[],
+  heading_indices: number[],
+  current_h_pos: number,
+  current_level: number,
+): number {
+  for (
+    let next_h_pos = current_h_pos + 1;
+    next_h_pos < heading_indices.length;
+    next_h_pos++
+  ) {
     const next_idx = heading_indices[next_h_pos];
-    if (_heading_level(block_records[next_idx].item) <= current_level) return next_idx;
+    if (_heading_level(block_records[next_idx].item) <= current_level)
+      return next_idx;
   }
   return block_records.length;
 }
@@ -351,12 +481,12 @@ function _collect_footnote_ids(owned_blocks: _BlockRecord[]): string[] {
   for (const rec of owned_blocks) {
     if (!rec.is_paragraph) continue;
     for (const event of iter_paragraph_content(rec.item)) {
-      if (!('type' in event)) continue;
-      let fn_id = '';
-      if (event.type === 'footnote') fn_id = `fn-${event.id}`;
-      else if (event.type === 'endnote') fn_id = `en-${event.id}`;
+      if (!("type" in event)) continue;
+      let fn_id = "";
+      if (event.type === "footnote") fn_id = `fn-${event.id}`;
+      else if (event.type === "endnote") fn_id = `en-${event.id}`;
       else continue;
       if (!seen.has(fn_id)) {
         seen.add(fn_id);
         ordered.push(fn_id);
@@ -374,4 +504,4 @@ function _offset_to_page(offset: number, body_page_offsets: number[]): number {
     else break;
   }
   return page;
-}
+}

package/src/sanitize/core.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { DocumentObject } from '../docx/bridge.js';
 import { SanitizeReport } from './report.js';
 import * as transforms from './transforms.js';
+import { findAllDescendants } from '../docx/dom.js';
 export interface FinalizeOptions {
   filename: string;
@@ -61,6 +62,7 @@ export async function finalize_document(doc: DocumentObject, options: FinalizeOp
   report.add_transform_lines(transforms.strip_proof_errors(doc));
   report.add_transform_lines(transforms.strip_empty_properties(doc));
   report.add_transform_lines(transforms.strip_hidden_text(doc));
+  report.add_transform_lines(transforms.coalesce_runs(doc));
   report.add_transform_lines(transforms.scrub_doc_properties(doc));
   report.add_transform_lines(transforms.scrub_timestamps(doc));
   report.add_transform_lines(transforms.strip_custom_xml(doc));
@@ -97,6 +99,30 @@ export async function finalize_document(doc: DocumentObject, options: FinalizeOp
     report.warnings.push("PDF export requires the Python/Word COM environment and is skipped in this zero-dependency Node agent.");
   }
+  // Clean up leaked Microsoft namespaces
+  for (const part of doc.pkg.parts) {
+    // Match the exact injection condition from RedlineEngine constructor
+    if (part === doc.part || (part.contentType.includes('wordprocessingml') && part.contentType.endsWith('+xml'))) {
+      if (part._element.hasAttribute('xmlns:w16du')) {
+        let hasW16du = false;
+        // Check root element attributes (excluding the xmlns declaration itself)
+        if (Array.from(part._element.attributes || []).some(a => a.name.startsWith('w16du:') && a.name !== 'xmlns:w16du')) {
+          hasW16du = true;
+        }
+        if (!hasW16du) {
+          const allNodes = findAllDescendants(part._element, '*');
+          for (const n of allNodes) {
+            if (n.tagName.startsWith('w16du:') || Array.from(n.attributes || []).some(a => a.name.startsWith('w16du:'))) {
+              hasW16du = true;
+              break;
+            }
+          }
+        }
+        if (!hasW16du) part._element.removeAttribute('xmlns:w16du');
+      }
+    }
+  }
   if (report.warnings.length > 0) report.status = 'clean_with_warnings';
   const outBuffer = await doc.save();

package/src/sanitize/report.ts CHANGED Viewed

@@ -116,7 +116,7 @@ export class SanitizeReport {
     if (this.warnings.length > 0) {
       lines.push(`Result: CLEAN WITH WARNINGS (${this.warnings.length} warning${this.warnings.length > 1 ? 's' : ''})`);
     } else {
-      lines.push("Result: SECURE & READY TO SEND");
+      lines.push(`Result: CLEAN (${this.tracked_changes_found} changes resolved, ${this.comments_removed} comments removed)`);
     }
     lines.push(sep);

package/src/sanitize/sanitize.test.ts CHANGED Viewed

@@ -162,8 +162,8 @@ describe('Finalize Document (Core)', () => {
     });
     const finalSettings = settingsPart._element.toString();
-    expect(res.reportText).toContain('Result: SECURE & READY TO SEND');
+    expect(res.reportText).toContain('Result: CLEAN');
     expect(res.reportText).toContain('Document locked (Read-Only');
     // Validate mathematical injection
@@ -189,4 +189,49 @@ describe('Finalize Document (Core)', () => {
     expect(res.reportText).toContain('unresolved tracked changes');
   });
+  describe('Resolved Bugs Sanitize Parity Verification', () => {
+    it('BUG-FRAG-1: Coalesces adjacent identical runs after accepting tracked changes', async () => {
+      const doc = createMockDoc(`
+        <w:p>
+          <w:r><w:t xml:space="preserve">The term shall be </w:t></w:r>
+          <w:ins w:id="1"><w:r><w:t>five (5)</w:t></w:r></w:ins>
+          <w:r><w:t xml:space="preserve"> years from the Effective Date.</w:t></w:r>
+        </w:p>
+      `);
+      doc.save = vi.fn().mockResolvedValue(Buffer.from('mock'));
+      await finalize_document(doc, {
+        filename: 'test.docx',
+        sanitize_mode: 'full',
+        accept_all: true
+      });
+      const xml = doc.element.toString();
+      // We should see a single coalesced string rather than fragmented <w:t> nodes
+      expect(xml).toContain('The term shall be five (5) years from the Effective Date.');
+      const runs = doc.element.getElementsByTagName('w:r');
+      // If they are coalesced properly, there will be exactly 1 run instead of 3
+      expect(runs.length).toBe(1);
+    });
+    it('BUG-NS-1: Strips unused xmlns:w16du namespace declarations during finalization', async () => {
+      const doc = createMockDoc('<w:p/>');
+      // Manually inject the namespace onto the absolute root as the engine does
+      doc.part._element.setAttribute('xmlns:w16du', 'http://schemas.microsoft.com/office/word/2023/wordml/word16du');
+      doc.save = vi.fn().mockResolvedValue(Buffer.from('mock'));
+      await finalize_document(doc, {
+        filename: 'test.docx',
+        sanitize_mode: 'full'
+      });
+      // The final stringified XML of the root document should NOT contain the unused namespace
+      const xml = doc.part._element.toString();
+      expect(xml).not.toContain('xmlns:w16du');
+    });
+  });
 });

package/src/sanitize/transforms.ts CHANGED Viewed

@@ -15,6 +15,93 @@ export function findDescendantsByLocalName(element: Element, localName: string):
   return result;
 }
+export function coalesce_runs(doc: DocumentObject): string[] {
+  let count = 0;
+  function areRunsIdentical(rPr1: Element | null, rPr2: Element | null): boolean {
+    const xml1 = rPr1 ? rPr1.toString() : '';
+    const xml2 = rPr2 ? rPr2.toString() : '';
+    return xml1 === xml2;
+  }
+  function hasSpecialContent(run: Element): boolean {
+    const safeTags = ['w:t', 'w:tab', 'w:br', 'w:cr', 'w:delText', 'w:rPr'];
+    for (let i = 0; i < run.childNodes.length; i++) {
+      const child = run.childNodes[i];
+      if (child.nodeType === 1) {
+        const tag = (child as Element).tagName;
+        if (!safeTags.includes(tag)) return true;
+      }
+    }
+    return false;
+  }
+  function coalesceContainer(container: Element) {
+    const children = Array.from(container.childNodes).filter(n => n.nodeType === 1) as Element[];
+    let i = 0;
+    while (i < children.length - 1) {
+      const curr = children[i];
+      const nxt = children[i + 1];
+      if (curr.tagName === 'w:r' && nxt.tagName === 'w:r') {
+        if (!hasSpecialContent(curr) && !hasSpecialContent(nxt)) {
+          const rPr1 = findChild(curr, 'w:rPr');
+          const rPr2 = findChild(nxt, 'w:rPr');
+          if (areRunsIdentical(rPr1, rPr2)) {
+            let last_t: Element | null = null;
+            for (let c = 0; c < curr.childNodes.length; c++) {
+              const child = curr.childNodes[c];
+              if (child.nodeType === 1 && ((child as Element).tagName === 'w:t' || (child as Element).tagName === 'w:delText')) {
+                last_t = child as Element;
+              }
+            }
+            const nxtChildren = Array.from(nxt.childNodes).filter(n => n.nodeType === 1) as Element[];
+            for (const child of nxtChildren) {
+              if (child.tagName === 'w:rPr') continue;
+              if ((child.tagName === 'w:t' || child.tagName === 'w:delText') && last_t && last_t.tagName === child.tagName) {
+                const t1 = last_t.textContent || '';
+                const t2 = child.textContent || '';
+                const combined = t1 + t2;
+                last_t.textContent = combined;
+                if (combined.trim() !== combined) {
+                  last_t.setAttribute('xml:space', 'preserve');
+                }
+              } else {
+                curr.appendChild(child);
+                if (child.tagName === 'w:t' || child.tagName === 'w:delText') {
+                  last_t = child;
+                }
+              }
+            }
+            container.removeChild(nxt);
+            children.splice(i + 1, 1);
+            count++;
+            continue;
+          }
+        }
+      }
+      if (['w:ins', 'w:del', 'w:hyperlink', 'w:sdt', 'w:smartTag', 'w:fldSimple', 'w:sdtContent'].includes(curr.tagName)) {
+        coalesceContainer(curr);
+      }
+      i++;
+    }
+    if (children.length > 0) {
+      const last = children[children.length - 1];
+      if (['w:ins', 'w:del', 'w:hyperlink', 'w:sdt', 'w:smartTag', 'w:fldSimple', 'w:sdtContent'].includes(last.tagName)) {
+        coalesceContainer(last);
+      }
+    }
+  }
+  const paragraphs = findAllDescendants(doc.element, 'w:p');
+  for (const p of paragraphs) coalesceContainer(p);
+  return count ? [`Adjacent identical runs coalesced: ${count}`] : [];
+}
 export function strip_rsid(doc: DocumentObject): string[] {
   let count = 0;
   const rsidAttrs = ['w:rsidR', 'w:rsidRPr', 'w:rsidRDefault', 'w:rsidP', 'w:rsidDel', 'w:rsidSect', 'w:rsidTr'];