npm - @adeu/core - Versions diffs - 1.6.7 → 1.6.9 - Mend

@adeu/core 1.6.7 → 1.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/index.cjs +3969 -1859
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +95 -8
package/dist/index.d.ts +95 -8
package/dist/index.js +3966 -1859
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/src/consistency.test.ts +134 -0
package/src/diff.test.ts +13 -1
package/src/diff.ts +220 -47
package/src/docx/bridge.ts +111 -57
package/src/docx/dom.ts +66 -7
package/src/domain.test.ts +280 -0
package/src/domain.ts +264 -10
package/src/engine.bugs.test.ts +481 -0
package/src/engine.ts +1346 -192
package/src/index.ts +7 -8
package/src/ingest.ts +8 -0
package/src/markup.ts +160 -53
package/src/outline.ts +199 -69
package/src/sanitize/core.ts +130 -0
package/src/sanitize/report.ts +125 -0
package/src/sanitize/sanitize.test.ts +237 -0
package/src/sanitize/transforms.ts +452 -0
package/src/utils/docx.ts +292 -158

package/src/docx/bridge.ts CHANGED Viewed

@@ -1,40 +1,54 @@
-import JSZip from 'jszip';
-import { parseXml, findChild, findAllDescendants, serializeXml } from './dom.js';
+import JSZip from "jszip";
+import {
+  parseXml,
+  findChild,
+  findAllDescendants,
+  serializeXml,
+} from "./dom.js";
 export class Relationship {
   constructor(
     public id: string,
     public type: string,
     public target: string,
-    public isExternal: boolean
+    public isExternal: boolean,
   ) {}
 }
 export class Part {
   public rels: Map<string, Relationship> = new Map();
   public _element: Element;
+  public package?: DocxPackage;
   constructor(
     public partname: string,
     public blob: string,
     element: Element,
-    public contentType: string
+    public contentType: string,
   ) {
     this._element = element;
   }
-  public addRelationship(id: string, type: string, target: string, isExternal: boolean = false) {
+  public addRelationship(
+    id: string,
+    type: string,
+    target: string,
+    isExternal: boolean = false,
+  ) {
     this.rels.set(id, new Relationship(id, type, target, isExternal));
-    // If this part represents a .rels file, update the XML directly
-    if (this._element.tagName === 'Relationships') {
+    // Directly append the relationship element to the document structure
+    if (this.partname.endsWith(".rels")) {
       const doc = this._element.ownerDocument;
       if (doc) {
-        const relEl = doc.createElement('Relationship');
-        relEl.setAttribute('Id', id);
-        relEl.setAttribute('Type', type);
-        relEl.setAttribute('Target', target);
-        if (isExternal) relEl.setAttribute('TargetMode', 'External');
+        // Use strict namespace to ensure it parses successfully on reload
+        const relEl = doc.createElementNS(
+          "http://schemas.openxmlformats.org/package/2006/relationships",
+          "Relationship",
+        );
+        relEl.setAttribute("Id", id);
+        relEl.setAttribute("Type", type);
+        relEl.setAttribute("Target", target);
+        if (isExternal) relEl.setAttribute("TargetMode", "External");
         this._element.appendChild(relEl);
       }
     }
@@ -49,32 +63,44 @@ export class DocxPackage {
   public getPartByPath(path: string): Part | undefined {
     // Strip leading slash for jszip compat
-    const searchPath = path.startsWith('/') ? path.substring(1) : path;
-    return this.parts.find((p) => p.partname === searchPath || p.partname === '/' + searchPath);
+    const searchPath = path.startsWith("/") ? path.substring(1) : path;
+    return this.parts.find(
+      (p) => p.partname === searchPath || p.partname === "/" + searchPath,
+    );
   }
   public nextPartname(pattern: string): string {
     let i = 1;
     while (true) {
-      const candidate = pattern.replace('%d', i === 1 ? '' : i.toString());
+      const candidate = pattern.replace("%d", i === 1 ? "" : i.toString());
       if (!this.getPartByPath(candidate)) return candidate;
       i++;
     }
   }
-  public addPart(partname: string, contentType: string, xmlString: string): Part {
+  public addPart(
+    partname: string,
+    contentType: string,
+    xmlString: string,
+  ): Part {
     const doc = parseXml(xmlString);
-    const part = new Part(partname, xmlString, doc.documentElement, contentType);
+    const part = new Part(
+      partname,
+      xmlString,
+      doc.documentElement,
+      contentType,
+    );
+    part.package = this;
     this.parts.push(part);
     // Update [Content_Types].xml
-    const ctPart = this.getPartByPath('[Content_Types].xml');
+    const ctPart = this.getPartByPath("[Content_Types].xml");
     if (ctPart) {
       const docCT = ctPart._element.ownerDocument;
       if (docCT) {
-        const override = docCT.createElement('Override');
-        override.setAttribute('PartName', partname);
-        override.setAttribute('ContentType', contentType);
+        const override = docCT.createElement("Override");
+        override.setAttribute("PartName", partname);
+        override.setAttribute("ContentType", contentType);
         ctPart._element.appendChild(override);
       }
     }
@@ -83,14 +109,18 @@ export class DocxPackage {
   public getOrCreateRelsPart(sourcePartname: string): Part {
     // e.g., /word/document.xml -> /word/_rels/document.xml.rels
-    const parts = sourcePartname.split('/');
+    const parts = sourcePartname.split("/");
     const file = parts.pop();
-    const relsPath = parts.join('/') + '/_rels/' + file + '.rels';
+    const relsPath = parts.join("/") + "/_rels/" + file + ".rels";
     let relsPart = this.getPartByPath(relsPath);
     if (!relsPart) {
       const xml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"></Relationships>`;
-      relsPart = this.addPart(relsPath, 'application/vnd.openxmlformats-package.relationships+xml', xml);
+      relsPart = this.addPart(
+        relsPath,
+        "application/vnd.openxmlformats-package.relationships+xml",
+        xml,
+      );
     }
     return relsPart;
   }
@@ -98,63 +128,75 @@ export class DocxPackage {
 export class DocumentObject {
   public part: Part;
-  public settings: { oddAndEvenPagesHeaderFooter: boolean } = { oddAndEvenPagesHeaderFooter: false };
+  public settings: { oddAndEvenPagesHeaderFooter: boolean } = {
+    oddAndEvenPagesHeaderFooter: false,
+  };
   // Simplification for the TS port: sections hold header/footer refs
-  public sections: any[] = [];
+  public sections: any[] = [];
-  constructor(public pkg: DocxPackage, part: Part) {
+  constructor(
+    public pkg: DocxPackage,
+    part: Part,
+  ) {
     this.part = part;
   }
   public get element(): Element {
-    return findChild(this.part._element, 'w:body') || this.part._element;
+    return findChild(this.part._element, "w:body") || this.part._element;
   }
   /**
    * Main entrypoint for loading a DOCX buffer into the DOM wrapper.
    */
-  public static async load(buffer: Buffer | ArrayBuffer): Promise<DocumentObject> {
+  public static async load(
+    buffer: Buffer | ArrayBuffer,
+  ): Promise<DocumentObject> {
     const zip = await JSZip.loadAsync(buffer);
     const pkg = new DocxPackage(zip);
     // 1. Load Content Types
-    const ctFile = zip.file('[Content_Types].xml');
+    const ctFile = zip.file("[Content_Types].xml");
     let contentTypes: Record<string, string> = {};
     if (ctFile) {
-      const ctXml = parseXml(await ctFile.async('text'));
-      const overrides = findAllDescendants(ctXml.documentElement, 'Override');
+      const ctXml = parseXml(await ctFile.async("text"));
+      const overrides = findAllDescendants(ctXml.documentElement, "Override");
       for (const override of overrides) {
-        contentTypes[override.getAttribute('PartName') || ''] = override.getAttribute('ContentType') || '';
+        contentTypes[override.getAttribute("PartName") || ""] =
+          override.getAttribute("ContentType") || "";
       }
     }
     // 2. Pre-load all XML parts to allow synchronous traversal later
     for (const [path, file] of Object.entries(zip.files)) {
-      if (!file.dir && (path.endsWith('.xml') || path.endsWith('.rels'))) {
-        const text = await file.async('text');
+      if (!file.dir && (path.endsWith(".xml") || path.endsWith(".rels"))) {
+        const text = await file.async("text");
         const doc = parseXml(text);
-        const cType = contentTypes['/' + path] || 'application/xml';
-        const part = new Part('/' + path, text, doc.documentElement, cType);
+        const cType = contentTypes["/" + path] || "application/xml";
+        const part = new Part("/" + path, text, doc.documentElement, cType);
+        part.package = pkg;
         pkg.parts.push(part);
       }
     }
     // 3. Resolve Relationships for the main document
-    const mainPart = pkg.getPartByPath('word/document.xml');
-    if (!mainPart) throw new Error('Invalid DOCX: Missing word/document.xml');
+    const mainPart = pkg.getPartByPath("word/document.xml");
+    if (!mainPart) throw new Error("Invalid DOCX: Missing word/document.xml");
     pkg.mainDocumentPart = mainPart;
-    const relsPart = pkg.getPartByPath('word/_rels/document.xml.rels');
+    const relsPart = pkg.getPartByPath("word/_rels/document.xml.rels");
     if (relsPart) {
-      const relElements = findAllDescendants(relsPart._element, 'Relationship');
+      const relElements = findAllDescendants(relsPart._element, "Relationship");
       for (const rel of relElements) {
-        const rId = rel.getAttribute('Id');
-        const target = rel.getAttribute('Target');
-        const type = rel.getAttribute('Type');
-        const targetMode = rel.getAttribute('TargetMode');
+        const rId = rel.getAttribute("Id");
+        const target = rel.getAttribute("Target");
+        const type = rel.getAttribute("Type");
+        const targetMode = rel.getAttribute("TargetMode");
         if (rId && target && type) {
-          mainPart.rels.set(rId, new Relationship(rId, type, target, targetMode === 'External'));
+          mainPart.rels.set(
+            rId,
+            new Relationship(rId, type, target, targetMode === "External"),
+          );
         }
       }
     }
@@ -166,24 +208,36 @@ export class DocumentObject {
     let rId = 1;
     while (this.part.rels.has(`rId${rId}`)) rId++;
     const id = `rId${rId}`;
     // In DOCX, targets in .rels are relative to the source part's directory.
     // /word/document.xml relating to /word/comments.xml -> target is "comments.xml"
-    const target = part.partname.split('/').pop()!;
+    const target = part.partname.split("/").pop()!;
     this.part.rels.set(id, new Relationship(id, relType, target, false));
     const relsPart = this.pkg.getOrCreateRelsPart(this.part.partname);
     relsPart.addRelationship(id, relType, target, false);
   }
+  public relateToExternal(target: string, relType: string): string {
+    let rId = 1;
+    while (this.part.rels.has(`rId${rId}`)) rId++;
+    const id = `rId${rId}`;
+    this.part.rels.set(id, new Relationship(id, relType, target, true));
+    const relsPart = this.pkg.getOrCreateRelsPart(this.part.partname);
+    relsPart.addRelationship(id, relType, target, true);
+    return id;
+  }
   public async save(): Promise<Buffer> {
     for (const part of this.pkg.parts) {
       let xmlStr = serializeXml(part._element.ownerDocument || part._element);
-      if (!xmlStr.startsWith('<?xml')) {
-        xmlStr = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n' + xmlStr;
+      if (!xmlStr.startsWith("<?xml")) {
+        xmlStr =
+          '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n' + xmlStr;
       }
       this.pkg.zip.file(part.partname.substring(1), xmlStr); // Strip leading slash for JSZip
     }
-    return this.pkg.zip.generateAsync({ type: 'nodebuffer' });
+    return this.pkg.zip.generateAsync({ type: "nodebuffer" });
   }
-}
+}

package/src/docx/dom.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { DOMParser, XMLSerializer } from '@xmldom/xmldom';
+import { DOMParser, XMLSerializer } from "@xmldom/xmldom";
 /**
  * Simulates docx.oxml.ns.qn. In xmldom, namespaces are preserved in tagName.
@@ -11,7 +11,10 @@ export const qn = (name: string) => name;
 export function findChild(element: Element, tagName: string): Element | null {
   for (let i = 0; i < element.childNodes.length; i++) {
     const child = element.childNodes[i];
-    if (child.nodeType === 1 /* ELEMENT_NODE */ && (child as Element).tagName === tagName) {
+    if (
+      child.nodeType === 1 /* ELEMENT_NODE */ &&
+      (child as Element).tagName === tagName
+    ) {
       return child as Element;
     }
   }
@@ -35,7 +38,10 @@ export function findChildren(element: Element, tagName: string): Element[] {
 /**
  * Simulates lxml element.findall(".//w:tag") - searches ALL descendants.
  */
-export function findAllDescendants(element: Element, tagName: string): Element[] {
+export function findAllDescendants(
+  element: Element,
+  tagName: string,
+): Element[] {
   return Array.from(element.getElementsByTagName(tagName));
 }
@@ -43,12 +49,65 @@ export function findAllDescendants(element: Element, tagName: string): Element[]
  * Parses raw XML strings into xmldom Documents.
  */
 export function parseXml(xmlString: string): Document {
-  return new DOMParser().parseFromString(xmlString, 'text/xml');
+  return new DOMParser().parseFromString(xmlString, "text/xml");
 }
 /**
- * Serializes an xmldom Document or Element back to a string.
+ * Serializes an xmldom Document or Element back to a string,
+ * enforcing deterministic attribute ordering on the root element.
  */
 export function serializeXml(node: Node): string {
-  return new XMLSerializer().serializeToString(node);
-}
+  let xml = new XMLSerializer().serializeToString(node);
+  // BUG-11: Deterministic namespace ordering on root elements.
+  const rootTagRegex = /<([a-zA-Z0-9_:]+)(\s+[^>]+?)(>|\/>)/;
+  const match = rootTagRegex.exec(xml);
+  if (match && !match[1].startsWith("?")) {
+    const index = match.index;
+    const textBefore = xml.substring(0, index);
+    // Ensure this is the absolute root tag (only <?xml...?> allowed before it)
+    const isRoot =
+      !textBefore.includes("<") ||
+      (textBefore.trim().startsWith("<?xml") &&
+        (textBefore.match(/</g) || []).length === 1);
+    if (isRoot) {
+      const fullTag = match[0];
+      const elemStart = `<${match[1]}`;
+      const attrsStr = match[2];
+      const tagEnd = match[3];
+      // Robust extraction matching any quote style and internal spacing
+      const attrRegex = /([a-zA-Z0-9_:]+)\s*=\s*(["'])(.*?)\2/g;
+      const attrs: string[] = [];
+      let m;
+      while ((m = attrRegex.exec(attrsStr)) !== null) {
+        attrs.push(m[0].trim());
+      }
+      // Sort attributes: xmlns definitions first, then standard attributes
+      attrs.sort((a, b) => {
+        const aName = a.split("=")[0].trim();
+        const bName = b.split("=")[0].trim();
+        const aIsXmlns = aName.startsWith("xmlns");
+        const bIsXmlns = bName.startsWith("xmlns");
+        if (aIsXmlns && !bIsXmlns) return -1;
+        if (!aIsXmlns && bIsXmlns) return 1;
+        return aName < bName ? -1 : aName > bName ? 1 : 0;
+      });
+      const newTag =
+        attrs.length > 0
+          ? `${elemStart} ${attrs.join(" ")}${tagEnd}`
+          : `${elemStart}${tagEnd}`;
+      xml =
+        xml.substring(0, index) +
+        newTag +
+        xml.substring(index + fullTag.length);
+    }
+  }
+  return xml;
+}

package/src/domain.test.ts ADDED Viewed

@@ -0,0 +1,280 @@
+import { describe, it, expect } from 'vitest';
+import { createTestDocument, addParagraph } from './test-utils.js';
+import { DocumentObject } from './docx/bridge.js';
+import { extractTextFromBuffer } from './ingest.js';
+import { RedlineEngine, BatchValidationError } from './engine.js';
+import { ModifyText } from './models.js';
+import { split_structural_appendix } from './pagination.js';
+function addBookmark(paragraph: Element, name: string, idVal: string = "0", text: string = "") {
+  const doc = paragraph.ownerDocument!;
+  const start = doc.createElement('w:bookmarkStart');
+  start.setAttribute('w:name', name);
+  start.setAttribute('w:id', idVal);
+  paragraph.appendChild(start);
+  if (text) {
+    const r = doc.createElement('w:r');
+    const t = doc.createElement('w:t');
+    t.textContent = text;
+    if (text.includes(' ')) t.setAttribute('xml:space', 'preserve');
+    r.appendChild(t);
+    paragraph.appendChild(r);
+  }
+  const end = doc.createElement('w:bookmarkEnd');
+  end.setAttribute('w:id', idVal);
+  paragraph.appendChild(end);
+}
+function addCrossReference(paragraph: Element, refName: string, text: string) {
+  const doc = paragraph.ownerDocument!;
+  const fld = doc.createElement('w:fldSimple');
+  fld.setAttribute('w:instr', ` REF ${refName} \\h `);
+  const r = doc.createElement('w:r');
+  const t = doc.createElement('w:t');
+  t.textContent = text;
+  if (text.includes(' ')) t.setAttribute('xml:space', 'preserve');
+  r.appendChild(t);
+  fld.appendChild(r);
+  paragraph.appendChild(fld);
+}
+function addHyperlink(docObj: DocumentObject, paragraph: Element, url: string, text: string) {
+  const idStr = docObj.relateToExternal(url, 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink');
+  const doc = paragraph.ownerDocument!;
+  const hyperlink = doc.createElement('w:hyperlink');
+  hyperlink.setAttribute('r:id', idStr);
+  const r = doc.createElement('w:r');
+  const t = doc.createElement('w:t');
+  t.textContent = text;
+  if (text.includes(' ')) t.setAttribute('xml:space', 'preserve');
+  r.appendChild(t);
+  hyperlink.appendChild(r);
+  paragraph.appendChild(hyperlink);
+}
+function setupFootnotesPart(docObj: DocumentObject) {
+  const fnXml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+    <w:footnotes xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
+        <w:footnote w:type="separator" w:id="-1">
+            <w:p><w:r><w:separator/></w:r></w:p>
+        </w:footnote>
+        <w:footnote w:id="1">
+            <w:p><w:r><w:t>Footnote content.</w:t></w:r></w:p>
+        </w:footnote>
+    </w:footnotes>`;
+  const partname = '/word/footnotes.xml';
+  const ctype = 'application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml';
+  const relType = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes';
+  const part = docObj.pkg.addPart(partname, ctype, fnXml);
+  docObj.relateTo(part, relType);
+}
+async function createDomainSemanticsStream() {
+  const doc = await createTestDocument();
+  // 1. Appendix / Definitions
+  const p1 = addParagraph(doc, "1. Definitions");
+  p1.insertBefore(p1.ownerDocument!.createElement('w:pPr'), p1.firstChild);
+  addParagraph(doc, '"Affiliate" means any entity that controls, is controlled by, or is under common control.');
+  addParagraph(doc, "“Confidential Information” means all non-public information disclosed by one party to the other.");
+  addParagraph(doc, "This paragraph does not define anything.");
+  const p2 = addParagraph(doc, "2. Obligations");
+  p2.insertBefore(p2.ownerDocument!.createElement('w:pPr'), p2.firstChild);
+  addParagraph(doc, "The Affiliate shall protect the Confidential Information to the highest standard.");
+  // 3. Bookmarks and Cross-References
+  const p3 = addParagraph(doc, "Subject to ");
+  addBookmark(p3, "MyBookmark_1", "1", "Anchored Clause");
+  const r3 = p3.ownerDocument!.createElement('w:r');
+  const t3 = p3.ownerDocument!.createElement('w:t');
+  t3.textContent = ", the parties agree to...";
+  t3.setAttribute('xml:space', 'preserve');
+  r3.appendChild(t3);
+  p3.appendChild(r3);
+  const p4 = addParagraph(doc, "As strictly stated in ");
+  addCrossReference(p4, "MyBookmark_1", "Anchored Clause");
+  const r4 = p4.ownerDocument!.createElement('w:r');
+  const t4 = p4.ownerDocument!.createElement('w:t');
+  t4.textContent = ", either party may terminate.";
+  t4.setAttribute('xml:space', 'preserve');
+  r4.appendChild(t4);
+  p4.appendChild(r4);
+  // 4. Internal Anchors
+  const pAnchor = addParagraph(doc, "Section 5. Indemnification");
+  addBookmark(pAnchor, "_Ref12345", "0");
+  const pNoise = addParagraph(doc, "Some text.");
+  addBookmark(pNoise, "_GoBack", "2");
+  addBookmark(pNoise, "_Toc123456789", "3");
+  // 5. Footnotes
+  const pFn = addParagraph(doc, "Sentence with footnote");
+  const rFn = pFn.ownerDocument!.createElement('w:r');
+  const ref = pFn.ownerDocument!.createElement('w:footnoteReference');
+  ref.setAttribute('w:id', "1");
+  rFn.appendChild(ref);
+  pFn.appendChild(rFn);
+  setupFootnotesPart(doc);
+  // 6. Links and Cross references
+  const pLink = addParagraph(doc, "Please visit ");
+  addHyperlink(doc, pLink, "https://adeu.com", "Adeu HQ");
+  const pXref = addParagraph(doc, "As detailed in ");
+  addCrossReference(pXref, "_Ref12345", "Section 5");
+  return doc.save();
+}
+describe('Domain Semantics Engine', () => {
+  it('extracts and projects structural appendix and diagnostics correctly', async () => {
+    const buf = await createDomainSemanticsStream();
+    const text = await extractTextFromBuffer(buf);
+    expect(text).toContain("<!-- READONLY_BOUNDARY_START -->");
+    expect(text).toContain("# Document Structure (Read-Only)");
+    // Definitions
+    expect(text).toContain("## Defined Terms");
+    expect(text).toContain('"Affiliate"');
+    expect(text).toContain('"Confidential Information"');
+    expect(text).toContain("used 1 times");
+    // Named Anchors & Back-References
+    expect(text).toContain("## Named Anchors");
+    expect(text).toContain("MyBookmark_1");
+    expect(text).toContain("Anchored to:");
+    expect(text).toContain("Referenced from:");
+    // Internal anchors & Noise suppression
+    expect(text).toContain("{#_Ref12345}");
+    expect(text).toContain("Section 5. Indemnification{#_Ref12345}");
+    expect(text).not.toContain("{#_GoBack}");
+    expect(text).not.toContain("{#_Toc123456789}");
+    // Footnotes
+    expect(text).toContain("[^fn-1]");
+    expect(text).toContain("## Footnotes");
+    expect(text).toContain("[^fn-1]: Footnote content.");
+    // Links
+    expect(text).toContain("[Adeu HQ](https://adeu.com)");
+    expect(text).toContain("[~Section 5~](#_Ref12345)");
+  });
+  const edgeCases = [
+    { target: "# Document Structure (Read-Only)", newText: "# Modified Document Structure", errChecker: (m: string) => m.includes('read-only boundary') || m.includes('appendix') },
+    { target: "Sentence with footnote[^fn-1]", newText: "Sentence with footnote", errChecker: (m: string) => m.includes('footnote') && (m.includes('delete') || m.includes('remove')) },
+    { target: "Sentence with footnote", newText: "Sentence with footnote[^fn-99]", errChecker: (m: string) => m.includes('footnote') && (m.includes('insert') || m.includes('create')) },
+    { target: "Some text.", newText: "Some text.{#_Ref99999}", errChecker: (m: string) => m.includes('anchor') },
+    { target: "Section 5. Indemnification{#_Ref12345}", newText: "Section 5. Indemnification{#_Ref99999}", errChecker: (m: string) => m.includes('anchor') },
+    { target: "[~Section 5~](#_Ref12345)", newText: "[~Section 6~](#_Ref12345)", errChecker: (m: string) => m.includes('cross-reference') || m.includes('rejected') },
+    { target: "[~Section 5~](#_Ref12345)", newText: "[~Section 5~](#_Ref99999)", errChecker: (m: string) => m.includes('dependency corruption') || m.includes('rejected') },
+    { target: "As detailed in [~Section 5~](#_Ref12345)", newText: "As detailed in [~Section 5~](#_Ref12345) and [~Section 6~](#_Ref999)", errChecker: (m: string) => m.includes('cross-reference') || m.includes('read-only') },
+    { target: "As detailed in [~Section 5~](#_Ref12345)", newText: "As detailed in nothing", errChecker: (m: string) => m.includes('cross-reference') || m.includes('delete') },
+    { target: "Please visit [Adeu HQ](https://adeu.com)", newText: "Please visit [Adeu HQ](https://adeu.com) and [Google](https://google.com)", errChecker: (m: string) => m.includes('hyperlink') || m.includes('insert') },
+    { target: "Please visit [Adeu HQ](https://adeu.com)", newText: "Please visit nothing", errChecker: (m: string) => m.includes('hyperlink') || m.includes('delete') },
+  ];
+  for (const tc of edgeCases) {
+    it(`rejects invalid edits: ${tc.target} -> ${tc.newText}`, async () => {
+      const buf = await createDomainSemanticsStream();
+      const doc = await DocumentObject.load(buf);
+      const engine = new RedlineEngine(doc);
+      const edit: ModifyText = { type: 'modify', target_text: tc.target, new_text: tc.newText };
+      let errorThrown = false;
+      try {
+        engine.process_batch([edit]);
+      } catch (e) {
+        errorThrown = true;
+        if (e instanceof BatchValidationError) {
+          const msg = e.errors.join('\n').toLowerCase();
+          expect(tc.errChecker(msg)).toBe(true);
+        } else {
+          throw e; // unexpected error
+        }
+      }
+      expect(errorThrown).toBe(true);
+    });
+  }
+  it('safely edits footnotes and accepts changes', async () => {
+    const buf = await createDomainSemanticsStream();
+    const doc = await DocumentObject.load(buf);
+    const engine = new RedlineEngine(doc);
+    const edit: ModifyText = { type: 'modify', target_text: "Footnote content.", new_text: "This is an edited footnote." };
+    const stats = engine.process_batch([edit]);
+    expect(stats.edits_applied).toBe(1);
+    engine.accept_all_revisions();
+    const outBuf = await doc.save();
+    const cleanText = await extractTextFromBuffer(outBuf, true);
+    expect(cleanText).toContain("[^fn-1]: This is an edited footnote.");
+  });
+  it('extracts defined terms and finds typos correctly', async () => {
+    const doc = await createTestDocument();
+    addParagraph(doc, '"Agreement" means this contract.');
+    addParagraph(doc, "“Party” shall mean either side.");
+    addParagraph(doc, '"Agreement" means another thing.'); // Duplicate
+    addParagraph(doc, 'This contract (hereinafter, the "Contract") is valid.');
+    addParagraph(doc, '"Confidential Information" on salainen asia.');
+    addParagraph(doc, '1.1 "Affiliate" tarkoittaa osakkuusyhtiötä.');
+    addParagraph(doc, 'We will act as the disclosing party (jäljempänä "Discloser").');
+    addParagraph(doc, 'This is a syntax example: ("Heading*") and ("<Term>")');
+    addParagraph(doc, "The Agreement is binding. The Contract is signed.");
+    addParagraph(doc, "There is an Agrement here.");
+    addParagraph(doc, "We shared Confidential Information with the Affiliate. The Discloser is happy.");
+    const buf = await doc.save();
+    const full_text = await extractTextFromBuffer(buf, false);
+    const [, appendix] = split_structural_appendix(full_text);
+    expect(appendix).toContain('"Agreement" \u2014 used');
+    expect(appendix).toContain('"Contract" \u2014 used');
+    expect(appendix).toContain('"Confidential Information" \u2014 used');
+    expect(appendix).toContain('"Affiliate" \u2014 used');
+    expect(appendix).toContain('"Discloser" \u2014 used');
+    expect(appendix).not.toContain('"Party"');
+    expect(appendix).not.toContain('"Heading*"');
+    expect(appendix).not.toContain('"<Term>"');
+    expect(appendix).toContain("[Error] Duplicate Definition: 'Agreement' is defined multiple times.");
+    expect(appendix).toContain("[Info] Possible Typos for 'Agreement': Found 'Agrement'");
+  });
+  it('reduces typo noise for short acronyms', async () => {
+    const doc = await createTestDocument();
+    addParagraph(doc, '"PSUs" means power supply units.');
+    addParagraph(doc, '"CPU" means central processing unit.');
+    addParagraph(doc, '"Party" means the entity.');
+    addParagraph(doc, "We rely on ESAs, LSPs, and GPUs for the servers.");
+    addParagraph(doc, "The GPU is very fast.");
+    addParagraph(doc, "The Pary signed the contract.");
+    addParagraph(doc, "We bought PSUs and a CPU.");
+    addParagraph(doc, "The Party begins today.");
+    const buf = await doc.save();
+    const full_text = await extractTextFromBuffer(buf, false);
+    const [, appendix] = split_structural_appendix(full_text);
+    expect(appendix).toContain("[Info] Possible Typos for 'Party': Found 'Pary'");
+    expect(appendix).not.toContain("'GPU'");
+    expect(appendix).not.toContain("'GPUs'");
+    expect(appendix).not.toContain("'ESAs'");
+    expect(appendix).not.toContain("'LSPs'");
+  });
+});