npm - @adeu/core - Versions diffs - 1.9.0 → 1.10.0 - Mend

@adeu/core 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/index.cjs +615 -102
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +15 -2
package/dist/index.d.ts +15 -2
package/dist/index.js +615 -102
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/src/comments.ts +33 -14
package/src/consistency.test.ts +62 -4
package/src/diff.ts +42 -6
package/src/docx/dom.ts +2 -2
package/src/engine.bugs.test.ts +38 -0
package/src/engine.feedback.test.ts +144 -0
package/src/engine.issue23.test.ts +511 -0
package/src/engine.ts +513 -64
package/src/sanitize/core.ts +1 -0
package/src/sanitize/sanitize.test.ts +48 -6
package/src/sanitize/transforms.ts +88 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@adeu/core",
-  "version": "1.9.0",
+  "version": "1.10.0",
   "description": "",
   "main": "./dist/index.js",
   "types": "./dist/index.d.ts",

package/src/comments.ts CHANGED Viewed

@@ -33,27 +33,27 @@ export class CommentsManager {
   constructor(public doc: DocumentObject) {}
-  public get commentsPart() {
+  public get commentsPart(): Part {
     if (!this._commentsPart) {
       this._commentsPart = this._getOrCreateCommentsPart();
       this._ensureNamespaces();
     }
-    return this._commentsPart;
+    return this._commentsPart!;
   }
-  public get extendedPart() {
+  public get extendedPart(): Part {
     if (!this._extendedPart) this._extendedPart = this._getOrCreateExtendedPart();
-    return this._extendedPart;
+    return this._extendedPart!;
   }
-  public get idsPart() {
+  public get idsPart(): Part {
     if (!this._idsPart) this._idsPart = this._getOrCreateIdsPart();
-    return this._idsPart;
+    return this._idsPart!;
   }
-  public get extensiblePart() {
+  public get extensiblePart(): Part {
     if (!this._extensiblePart) this._extensiblePart = this._getOrCreateExtensiblePart();
-    return this._extensiblePart;
+    return this._extensiblePart!;
   }
   public get nextId(): number {
@@ -124,9 +124,28 @@ export class CommentsManager {
   }
   private _ensureNamespaces() {
-    // In TS we use full xml reconstruction if attributes are missing, but xmldom generally tolerates
-    // runtime attributes if the namespace is declared. For absolute safety, if it's completely missing,
-    // we would rebuild. Assuming the parser caught them if they existed.
+    // When the comments part already existed (e.g. a legacy or pandoc-produced
+    // document) its root <w:comments> may omit the namespaces we rely on —
+    // most importantly w14, which qualifies the w14:paraId / w14:textId
+    // attributes we write on each comment paragraph. Without the declaration
+    // the serialised XML is invalid ("Namespace prefix w14 ... is not defined").
+    // Declare any missing namespace prefixes on the existing root element.
+    const root = this._commentsPart?._element;
+    if (!root) return;
+    const required: [string, string][] = [
+      ['xmlns:w', NS.w],
+      ['xmlns:w14', NS.w14],
+      ['xmlns:w15', NS.w15],
+      ['xmlns:w16cid', NS.w16cid],
+      ['xmlns:w16cex', NS.w16cex],
+      ['xmlns:mc', NS.mc],
+    ];
+    for (const [attr, uri] of required) {
+      if (!root.getAttribute(attr)) {
+        root.setAttribute(attr, uri);
+      }
+    }
   }
   private _getNextCommentId(): number {
@@ -273,10 +292,10 @@ export class CommentsManager {
   }
   public deleteComment(commentId: string) {
-    if (!this._commentsPart) return;
+    if (!this.commentsPart) return;
     let commentEl: Element | null = null;
-    for (const c of findAllDescendants(this._commentsPart._element, 'w:comment')) {
+    for (const c of findAllDescendants(this.commentsPart._element, 'w:comment')) {
       if (c.getAttribute('w:id') === commentId) {
         commentEl = c;
         break;
@@ -304,7 +323,7 @@ export class CommentsManager {
           if (child.getAttribute('w15:paraIdParent') === paraId) {
             const childParaId = child.getAttribute('w15:paraId');
             if (childParaId) {
-              for (const c of findAllDescendants(this._commentsPart._element, 'w:comment')) {
+              for (const c of findAllDescendants(this.commentsPart._element, 'w:comment')) {
                 for (const p of findAllDescendants(c, 'w:p')) {
                   if (p.getAttribute('w14:paraId') === childParaId) {
                     const cid = c.getAttribute('w:id');

package/src/consistency.test.ts CHANGED Viewed

@@ -8,12 +8,13 @@ import {
 } from "node:fs";
 import { resolve, dirname } from "node:path";
 import { fileURLToPath } from "node:url";
-import { execSync } from "node:child_process";
+import { execSync, execFileSync } from "node:child_process";
 import { tmpdir } from "node:os";
 import { DocumentObject } from "./docx/bridge.js";
 import { RedlineEngine } from "./engine.js";
 import { extractTextFromBuffer } from "./ingest.js";
+import { serializeXml } from "./docx/dom.js";
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = dirname(__filename);
@@ -28,10 +29,62 @@ const PYTHON_ABSTRACT_CMD = resolve(
 );
 const PYTHON_DIR = resolve(__dirname, "../../../../python");
+const CT_COMMENTS =
+  "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml";
 function normalizeMdTimestamps(mdText: string): string {
   return mdText.replace(/@ \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z/g, "@ DATE");
 }
+function xmllintCheck(xmlContent: string, label: string): void {
+  // Cross-platform lookup: `which` on POSIX, `where` on Windows.
+  const locator = process.platform === "win32" ? "where" : "which";
+  let xmllintBin: string | null = null;
+  try {
+    xmllintBin =
+      execSync(`${locator} xmllint`, { encoding: "utf-8" })
+        .split(/\r?\n/)
+        .map((l) => l.trim())
+        .filter(Boolean)[0] || null;
+  } catch {
+    /* not found */
+  }
+  if (!xmllintBin) {
+    // Optional external XML validation: skip when xmllint is unavailable
+    // (common on Windows). The in-code namespace assertion still runs.
+    return;
+  }
+  const tmpFile = resolve(tmpdir(), `adeu_consistency_${Date.now()}_${label}`);
+  try {
+    writeFileSync(tmpFile, xmlContent, "utf-8");
+    execFileSync(xmllintBin, ["--noout", tmpFile]);
+  } catch (err: any) {
+    throw new Error(
+      `xmllint validation failed for ${label}:\n${err.stderr ?? err.message}`,
+    );
+  } finally {
+    if (existsSync(tmpFile)) unlinkSync(tmpFile);
+  }
+}
+async function validateCommentsXmlNamespaces(
+  outBuffer: Buffer,
+  folder: string,
+): Promise<void> {
+  const doc = await DocumentObject.load(outBuffer);
+  const commentsPart = doc.pkg.parts.find((p) => p.contentType === CT_COMMENTS);
+  if (!commentsPart) return;
+  const commentsXml = serializeXml(
+    commentsPart._element.ownerDocument ?? commentsPart._element,
+  );
+  expect(commentsXml).toContain(
+    'xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"',
+  );
+  xmllintCheck(commentsXml, `${folder}_comments.xml`);
+}
 describe("Polyglot Consistency Framework (TS vs Python)", () => {
   if (!existsSync(CORPUS_DIR)) {
     it.skip("Cross-platform test corpus not found", () => {});
@@ -72,7 +125,12 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
           engine.process_batch(testConfig.changes || []);
           outBuffer = await doc.save();
-          // 2. Assert XML Structure Parity (via Python Bridge)
+          // 2. Validate comments XML namespaces when requested by test.json
+          if (testConfig.validate_comments_xml_namespaces) {
+            await validateCommentsXmlNamespaces(outBuffer, folder);
+          }
+          // 3. Assert XML Structure Parity (via Python Bridge)
           const goldenXmlPath = resolve(testDir, "golden_abstract.xml");
           if (existsSync(goldenXmlPath)) {
             const expectedXml = readFileSync(goldenXmlPath, "utf-8");
@@ -103,7 +161,7 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
           }
         }
-        // 3. Assert Markdown Extraction Parity (Raw View)
+        // 4. Assert Markdown Extraction Parity (Raw View)
         const rawMdPath = resolve(testDir, "golden_raw.md");
         if (existsSync(rawMdPath)) {
           const expectedRaw = readFileSync(rawMdPath, "utf-8").replace(
@@ -116,7 +174,7 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
           expect(actualRaw).toBe(expectedRaw);
         }
-        // 4. Assert Markdown Extraction Parity (Clean View)
+        // 5. Assert Markdown Extraction Parity (Clean View)
         const cleanMdPath = resolve(testDir, "golden_clean.md");
         if (existsSync(cleanMdPath)) {
           const expectedClean = readFileSync(cleanMdPath, "utf-8").replace(

package/src/diff.ts CHANGED Viewed

@@ -1,6 +1,34 @@
 import diff_match_patch from "diff-match-patch";
 import { ModifyText } from "./models.js";
+function _count_standalone_underscores(s: string): number {
+  let count = 0;
+  let i = 0;
+  const n = s.length;
+  const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
+  while (i < n) {
+    if (s[i] === "_") {
+      // Is it part of "__"?
+      let is_double = false;
+      if ((i > 0 && s[i - 1] === "_") || (i < n - 1 && s[i + 1] === "_")) {
+        is_double = true;
+      }
+      // Is it intra-word?
+      let is_intra = false;
+      if (i > 0 && isAlnum(s[i - 1]) && i < n - 1 && isAlnum(s[i + 1])) {
+        is_intra = true;
+      }
+      if (!is_double && !is_intra) {
+        count++;
+      }
+    }
+    i++;
+  }
+  return count;
+}
 export function trim_common_context(
   target: string,
   new_val: string,
@@ -44,7 +72,7 @@ export function trim_common_context(
     const left = target.substring(0, prefix_len);
     const b_count = (left.match(/\*\*/g) || []).length;
     const u2_count = (left.match(/__/g) || []).length;
-    const u1_count = (left.replace(/__/g, "").match(/_/g) || []).length;
+    const u1_count = _count_standalone_underscores(left);
     if (b_count % 2 !== 0) {
       prefix_len = left.lastIndexOf("**");
@@ -56,14 +84,18 @@ export function trim_common_context(
     }
     if (u1_count % 2 !== 0) {
       let idx = left.length - 1;
+      const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
       while (idx >= 0) {
         if (
           left[idx] === "_" &&
           (idx === 0 || left[idx - 1] !== "_") &&
           (idx === left.length - 1 || left[idx + 1] !== "_")
         ) {
-          prefix_len = idx;
-          break;
+          const is_intra = idx > 0 && isAlnum(left[idx - 1]) && idx < left.length - 1 && isAlnum(left[idx + 1]);
+          if (!is_intra) {
+            prefix_len = idx;
+            break;
+          }
         }
         idx--;
       }
@@ -140,7 +172,7 @@ export function trim_common_context(
     const right = target.substring(target.length - suffix_len);
     const b_count = (right.match(/\*\*/g) || []).length;
     const u2_count = (right.match(/__/g) || []).length;
-    const u1_count = (right.replace(/__/g, "").match(/_/g) || []).length;
+    const u1_count = _count_standalone_underscores(right);
     if (b_count % 2 !== 0) {
       suffix_len -= right.indexOf("**") + 2;
@@ -152,14 +184,18 @@ export function trim_common_context(
     }
     if (u1_count % 2 !== 0) {
       let idx_in_right = 0;
+      const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
       while (idx_in_right < right.length) {
         if (
           right[idx_in_right] === "_" &&
           (idx_in_right === 0 || right[idx_in_right - 1] !== "_") &&
           (idx_in_right === right.length - 1 || right[idx_in_right + 1] !== "_")
         ) {
-          suffix_len -= idx_in_right + 1;
-          break;
+          const is_intra = idx_in_right > 0 && isAlnum(right[idx_in_right - 1]) && idx_in_right < right.length - 1 && isAlnum(right[idx_in_right + 1]);
+          if (!is_intra) {
+            suffix_len -= idx_in_right + 1;
+            break;
+          }
         }
         idx_in_right++;
       }

package/src/docx/dom.ts CHANGED Viewed

@@ -53,7 +53,7 @@ export function parseXml(xmlString: string): Document {
   if (xmlString.startsWith("\uFEFF")) {
     xmlString = xmlString.slice(1);
   }
-  return new DOMParser().parseFromString(xmlString, "text/xml");
+  return new DOMParser().parseFromString(xmlString, "text/xml") as unknown as Document;
 }
 /**
@@ -61,7 +61,7 @@ export function parseXml(xmlString: string): Document {
  * enforcing deterministic attribute ordering on the root element.
  */
 export function serializeXml(node: Node): string {
-  let xml = new XMLSerializer().serializeToString(node);
+  let xml = new XMLSerializer().serializeToString(node as any);
   // BUG-11: Deterministic namespace ordering on root elements.
   const rootTagRegex = /<([a-zA-Z0-9_:]+)(\s+[^>]+?)(>|\/>)/;

package/src/engine.bugs.test.ts CHANGED Viewed

@@ -493,4 +493,42 @@ describe("Resolved Bugs Core Engine Verification", () => {
     expect(cleanText).not.toContain("Paragraph 2");
     expect(cleanText).toContain("Paragraph 1 ends here. MERGED here.");
   });
+  it("BUG-REPRO: accept_all_revisions leaks comments and in-body comment anchors", async () => {
+    const doc = await createTestDocument();
+    addParagraph(doc, "This is the original text of the agreement.");
+    const engine = new RedlineEngine(doc, "Reviewer AI");
+    // Add a tracked change with a comment attached
+    engine.process_batch([
+      {
+        type: "modify",
+        target_text: "original text",
+        new_text: "updated text",
+        comment: "Should this be updated or kept as original?",
+      },
+    ]);
+    // Pre-condition check: comment parts exist
+    const original_comment_parts = doc.pkg.parts.filter(p => p.contentType.includes("comments"));
+    expect(original_comment_parts.length).toBeGreaterThan(0);
+    const original_xml = doc.element.toString();
+    expect(original_xml).toContain("w:commentRangeStart");
+    expect(original_xml).toContain("w:commentReference");
+    // Accept all
+    engine.accept_all_revisions();
+    // Verify comment removal
+    const final_xml = doc.element.toString();
+    // Assert NO in-body comment anchors survive (anchors must be completely gone)
+    expect(final_xml).not.toContain("w:commentRangeStart");
+    expect(final_xml).not.toContain("w:commentRangeEnd");
+    expect(final_xml).not.toContain("w:commentReference");
+    const final_comment_parts = doc.pkg.parts.filter(p => p.contentType.includes("comments"));
+    expect(final_comment_parts.length).toBe(0);
+  });
 });

package/src/engine.feedback.test.ts ADDED Viewed

@@ -0,0 +1,144 @@
+import { describe, it, expect } from "vitest";
+import { createTestDocument, addParagraph } from "./test-utils.js";
+import { RedlineEngine } from "./engine.js";
+import { extractTextFromBuffer } from "./ingest.js";
+describe("Feedback Layer & Dry Run Verification", () => {
+  it("process_batch returns detailed edit reports", async () => {
+    const doc = await createTestDocument();
+    addParagraph(doc, "The quick brown fox jumps over the lazy dog.");
+    const engine = new RedlineEngine(doc, "Reviewer TS");
+    const stats = (engine as any).process_batch([
+      { type: "modify", target_text: "quick brown fox", new_text: "fast red fox" }
+    ]);
+    expect(stats.edits).toBeDefined();
+    expect(stats.edits.length).toBe(1);
+    const report = stats.edits[0];
+    expect(report.status).toBe("applied");
+    expect(report.target_text).toBe("quick brown fox");
+    expect(report.new_text).toBe("fast red fox");
+    // Previews with context window
+    expect(report.critic_markup).toContain("{--quick brown--}{++fast red++} fox");
+    expect(report.critic_markup).toContain("The ");
+    expect(report.critic_markup).toContain(" jumps over");
+    expect(report.clean_text).toContain("The fast red fox jumps over");
+    expect(stats.engine).toBe("node");
+    expect(stats.version).toBeDefined();
+  });
+  it("punctuation anchor triggers warning", async () => {
+    const doc = await createTestDocument();
+    addParagraph(doc, "Refer to sample_term_name in Section 4.");
+    const engine = new RedlineEngine(doc, "Reviewer TS");
+    const stats = (engine as any).process_batch([
+      { type: "modify", target_text: "sample_term_name", new_text: "validated_term_name" }
+    ]);
+    const report = stats.edits[0];
+    expect(report.warning).not.toBeNull();
+    expect(report.warning.toLowerCase()).toContain("punctuation");
+    expect(report.warning).toContain("sample_term_name");
+  });
+  it("dry_run does not mutate and reports safely", async () => {
+    const doc = await createTestDocument();
+    addParagraph(doc, "Baseline text.");
+    const engine = new RedlineEngine(doc, "Reviewer TS");
+    // 1. Valid Dry Run
+    const stats = (engine as any).process_batch([
+      { type: "modify", target_text: "Baseline", new_text: "Modified Preview" }
+    ], true);
+    expect(stats.edits_applied).toBe(1);
+    expect(stats.edits[0].status).toBe("applied");
+    expect(stats.edits[0].clean_text).toContain("Modified Preview");
+    // Verify original document remains pristine
+    const buf = await doc.save();
+    const cleanText = await extractTextFromBuffer(buf, true);
+    expect(cleanText).not.toContain("Modified Preview");
+    expect(cleanText).toContain("Baseline text");
+    // 2. Invalid Dry Run should not throw and instead report the failure safely
+    const statsInvalid = (engine as any).process_batch([
+      { type: "modify", target_text: "NON_EXISTENT", new_text: "fail" }
+    ], true);
+    expect(statsInvalid.edits_skipped).toBe(1);
+    expect(statsInvalid.edits[0].status).toBe("failed");
+    expect(statsInvalid.edits[0].error).not.toBeNull();
+    expect(statsInvalid.edits[0].error.toLowerCase()).toContain("not found");
+  });
+  it("preview self-consistency on underscore terms", async () => {
+    const doc = await createTestDocument();
+    addParagraph(doc, "ANCHOR_LINE governs the interpretation of this Agreement.");
+    const engine = new RedlineEngine(doc, "Reviewer TS");
+    const stats = (engine as any).process_batch([
+      {
+        type: "modify",
+        target_text: "ANCHOR_LINE governs the interpretation of this Agreement.",
+        new_text: "NEW_PARA inserted before.\n\nANCHOR_LINE governs the interpretation of this Agreement.",
+      }
+    ]);
+    const buf = await doc.save();
+    const cleanDocText = await extractTextFromBuffer(buf, true);
+    const report = stats.edits[0];
+    expect(report.clean_text).not.toBeNull();
+    const cleanPreview = report.clean_text.replace(/^\.+|\.+$/g, "");
+    expect(cleanDocText).toContain(cleanPreview);
+  });
+  it("preview does not contain duplicate garbling", async () => {
+     const doc = await createTestDocument();
+     addParagraph(doc, "Payment Terms");
+     const xmlDoc = doc.element.ownerDocument!;
+     const p2 = xmlDoc.createElement("w:p");
+     const del = xmlDoc.createElement("w:del");
+     del.setAttribute("w:id", "900");
+     del.setAttribute("w:author", "Reviewer");
+     del.setAttribute("w:date", "2026-06-01T00:00:00Z");
+     const r = xmlDoc.createElement("w:r");
+     const t = xmlDoc.createElement("w:delText");
+     t.setAttribute("xml:space", "preserve");
+     t.textContent = "DUP_PHRASE shall be paid within thirty days of invoice.";
+     r.appendChild(t);
+     del.appendChild(r);
+     p2.appendChild(del);
+    const firstP = doc.element.getElementsByTagName("w:p")[0];
+    firstP.parentNode!.appendChild(p2);
+     addParagraph(doc, "DUP_PHRASE shall be paid within thirty days of invoice.");
+     addParagraph(doc, "Late payments accrue interest at the statutory rate.");
+     const engine = new RedlineEngine(doc, "Reviewer TS");
+     const stats = (engine as any).process_batch([
+       {
+         type: "modify",
+         target_text: "DUP_PHRASE shall be paid within thirty days of invoice.",
+         new_text: "DUP_PHRASE shall be paid within sixty days of invoice.",
+       }
+     ]);
+    const buf = await doc.save();
+    const cleanDocText = await extractTextFromBuffer(buf, true);
+    const report = stats.edits[0];
+    expect(report.clean_text).not.toBeNull();
+    const cleanPreview = report.clean_text.replace(/^\.+|\.+$/g, "");
+    expect(cleanDocText).toContain(cleanPreview);
+   });
+ });