@adeu/core 1.8.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adeu/core",
3
- "version": "1.8.0",
3
+ "version": "1.10.0",
4
4
  "description": "",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
package/src/comments.ts CHANGED
@@ -33,27 +33,27 @@ export class CommentsManager {
33
33
 
34
34
  constructor(public doc: DocumentObject) {}
35
35
 
36
- public get commentsPart() {
36
+ public get commentsPart(): Part {
37
37
  if (!this._commentsPart) {
38
38
  this._commentsPart = this._getOrCreateCommentsPart();
39
39
  this._ensureNamespaces();
40
40
  }
41
- return this._commentsPart;
41
+ return this._commentsPart!;
42
42
  }
43
43
 
44
- public get extendedPart() {
44
+ public get extendedPart(): Part {
45
45
  if (!this._extendedPart) this._extendedPart = this._getOrCreateExtendedPart();
46
- return this._extendedPart;
46
+ return this._extendedPart!;
47
47
  }
48
48
 
49
- public get idsPart() {
49
+ public get idsPart(): Part {
50
50
  if (!this._idsPart) this._idsPart = this._getOrCreateIdsPart();
51
- return this._idsPart;
51
+ return this._idsPart!;
52
52
  }
53
53
 
54
- public get extensiblePart() {
54
+ public get extensiblePart(): Part {
55
55
  if (!this._extensiblePart) this._extensiblePart = this._getOrCreateExtensiblePart();
56
- return this._extensiblePart;
56
+ return this._extensiblePart!;
57
57
  }
58
58
 
59
59
  public get nextId(): number {
@@ -124,9 +124,28 @@ export class CommentsManager {
124
124
  }
125
125
 
126
126
  private _ensureNamespaces() {
127
- // In TS we use full xml reconstruction if attributes are missing, but xmldom generally tolerates
128
- // runtime attributes if the namespace is declared. For absolute safety, if it's completely missing,
129
- // we would rebuild. Assuming the parser caught them if they existed.
127
+ // When the comments part already existed (e.g. a legacy or pandoc-produced
128
+ // document) its root <w:comments> may omit the namespaces we rely on
129
+ // most importantly w14, which qualifies the w14:paraId / w14:textId
130
+ // attributes we write on each comment paragraph. Without the declaration
131
+ // the serialised XML is invalid ("Namespace prefix w14 ... is not defined").
132
+ // Declare any missing namespace prefixes on the existing root element.
133
+ const root = this._commentsPart?._element;
134
+ if (!root) return;
135
+
136
+ const required: [string, string][] = [
137
+ ['xmlns:w', NS.w],
138
+ ['xmlns:w14', NS.w14],
139
+ ['xmlns:w15', NS.w15],
140
+ ['xmlns:w16cid', NS.w16cid],
141
+ ['xmlns:w16cex', NS.w16cex],
142
+ ['xmlns:mc', NS.mc],
143
+ ];
144
+ for (const [attr, uri] of required) {
145
+ if (!root.getAttribute(attr)) {
146
+ root.setAttribute(attr, uri);
147
+ }
148
+ }
130
149
  }
131
150
 
132
151
  private _getNextCommentId(): number {
@@ -273,10 +292,10 @@ export class CommentsManager {
273
292
  }
274
293
 
275
294
  public deleteComment(commentId: string) {
276
- if (!this._commentsPart) return;
295
+ if (!this.commentsPart) return;
277
296
 
278
297
  let commentEl: Element | null = null;
279
- for (const c of findAllDescendants(this._commentsPart._element, 'w:comment')) {
298
+ for (const c of findAllDescendants(this.commentsPart._element, 'w:comment')) {
280
299
  if (c.getAttribute('w:id') === commentId) {
281
300
  commentEl = c;
282
301
  break;
@@ -304,7 +323,7 @@ export class CommentsManager {
304
323
  if (child.getAttribute('w15:paraIdParent') === paraId) {
305
324
  const childParaId = child.getAttribute('w15:paraId');
306
325
  if (childParaId) {
307
- for (const c of findAllDescendants(this._commentsPart._element, 'w:comment')) {
326
+ for (const c of findAllDescendants(this.commentsPart._element, 'w:comment')) {
308
327
  for (const p of findAllDescendants(c, 'w:p')) {
309
328
  if (p.getAttribute('w14:paraId') === childParaId) {
310
329
  const cid = c.getAttribute('w:id');
@@ -8,12 +8,13 @@ import {
8
8
  } from "node:fs";
9
9
  import { resolve, dirname } from "node:path";
10
10
  import { fileURLToPath } from "node:url";
11
- import { execSync } from "node:child_process";
11
+ import { execSync, execFileSync } from "node:child_process";
12
12
  import { tmpdir } from "node:os";
13
13
 
14
14
  import { DocumentObject } from "./docx/bridge.js";
15
15
  import { RedlineEngine } from "./engine.js";
16
16
  import { extractTextFromBuffer } from "./ingest.js";
17
+ import { serializeXml } from "./docx/dom.js";
17
18
 
18
19
  const __filename = fileURLToPath(import.meta.url);
19
20
  const __dirname = dirname(__filename);
@@ -28,10 +29,62 @@ const PYTHON_ABSTRACT_CMD = resolve(
28
29
  );
29
30
  const PYTHON_DIR = resolve(__dirname, "../../../../python");
30
31
 
32
+ const CT_COMMENTS =
33
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml";
34
+
31
35
  function normalizeMdTimestamps(mdText: string): string {
32
36
  return mdText.replace(/@ \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z/g, "@ DATE");
33
37
  }
34
38
 
39
+ function xmllintCheck(xmlContent: string, label: string): void {
40
+ // Cross-platform lookup: `which` on POSIX, `where` on Windows.
41
+ const locator = process.platform === "win32" ? "where" : "which";
42
+ let xmllintBin: string | null = null;
43
+ try {
44
+ xmllintBin =
45
+ execSync(`${locator} xmllint`, { encoding: "utf-8" })
46
+ .split(/\r?\n/)
47
+ .map((l) => l.trim())
48
+ .filter(Boolean)[0] || null;
49
+ } catch {
50
+ /* not found */
51
+ }
52
+ if (!xmllintBin) {
53
+ // Optional external XML validation: skip when xmllint is unavailable
54
+ // (common on Windows). The in-code namespace assertion still runs.
55
+ return;
56
+ }
57
+ const tmpFile = resolve(tmpdir(), `adeu_consistency_${Date.now()}_${label}`);
58
+ try {
59
+ writeFileSync(tmpFile, xmlContent, "utf-8");
60
+ execFileSync(xmllintBin, ["--noout", tmpFile]);
61
+ } catch (err: any) {
62
+ throw new Error(
63
+ `xmllint validation failed for ${label}:\n${err.stderr ?? err.message}`,
64
+ );
65
+ } finally {
66
+ if (existsSync(tmpFile)) unlinkSync(tmpFile);
67
+ }
68
+ }
69
+
70
+ async function validateCommentsXmlNamespaces(
71
+ outBuffer: Buffer,
72
+ folder: string,
73
+ ): Promise<void> {
74
+ const doc = await DocumentObject.load(outBuffer);
75
+ const commentsPart = doc.pkg.parts.find((p) => p.contentType === CT_COMMENTS);
76
+ if (!commentsPart) return;
77
+
78
+ const commentsXml = serializeXml(
79
+ commentsPart._element.ownerDocument ?? commentsPart._element,
80
+ );
81
+
82
+ expect(commentsXml).toContain(
83
+ 'xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"',
84
+ );
85
+ xmllintCheck(commentsXml, `${folder}_comments.xml`);
86
+ }
87
+
35
88
  describe("Polyglot Consistency Framework (TS vs Python)", () => {
36
89
  if (!existsSync(CORPUS_DIR)) {
37
90
  it.skip("Cross-platform test corpus not found", () => {});
@@ -72,7 +125,12 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
72
125
  engine.process_batch(testConfig.changes || []);
73
126
  outBuffer = await doc.save();
74
127
 
75
- // 2. Assert XML Structure Parity (via Python Bridge)
128
+ // 2. Validate comments XML namespaces when requested by test.json
129
+ if (testConfig.validate_comments_xml_namespaces) {
130
+ await validateCommentsXmlNamespaces(outBuffer, folder);
131
+ }
132
+
133
+ // 3. Assert XML Structure Parity (via Python Bridge)
76
134
  const goldenXmlPath = resolve(testDir, "golden_abstract.xml");
77
135
  if (existsSync(goldenXmlPath)) {
78
136
  const expectedXml = readFileSync(goldenXmlPath, "utf-8");
@@ -103,7 +161,7 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
103
161
  }
104
162
  }
105
163
 
106
- // 3. Assert Markdown Extraction Parity (Raw View)
164
+ // 4. Assert Markdown Extraction Parity (Raw View)
107
165
  const rawMdPath = resolve(testDir, "golden_raw.md");
108
166
  if (existsSync(rawMdPath)) {
109
167
  const expectedRaw = readFileSync(rawMdPath, "utf-8").replace(
@@ -116,7 +174,7 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
116
174
  expect(actualRaw).toBe(expectedRaw);
117
175
  }
118
176
 
119
- // 4. Assert Markdown Extraction Parity (Clean View)
177
+ // 5. Assert Markdown Extraction Parity (Clean View)
120
178
  const cleanMdPath = resolve(testDir, "golden_clean.md");
121
179
  if (existsSync(cleanMdPath)) {
122
180
  const expectedClean = readFileSync(cleanMdPath, "utf-8").replace(
package/src/diff.ts CHANGED
@@ -1,6 +1,34 @@
1
1
  import diff_match_patch from "diff-match-patch";
2
2
  import { ModifyText } from "./models.js";
3
3
 
4
+ function _count_standalone_underscores(s: string): number {
5
+ let count = 0;
6
+ let i = 0;
7
+ const n = s.length;
8
+ const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
9
+ while (i < n) {
10
+ if (s[i] === "_") {
11
+ // Is it part of "__"?
12
+ let is_double = false;
13
+ if ((i > 0 && s[i - 1] === "_") || (i < n - 1 && s[i + 1] === "_")) {
14
+ is_double = true;
15
+ }
16
+
17
+ // Is it intra-word?
18
+ let is_intra = false;
19
+ if (i > 0 && isAlnum(s[i - 1]) && i < n - 1 && isAlnum(s[i + 1])) {
20
+ is_intra = true;
21
+ }
22
+
23
+ if (!is_double && !is_intra) {
24
+ count++;
25
+ }
26
+ }
27
+ i++;
28
+ }
29
+ return count;
30
+ }
31
+
4
32
  export function trim_common_context(
5
33
  target: string,
6
34
  new_val: string,
@@ -44,7 +72,7 @@ export function trim_common_context(
44
72
  const left = target.substring(0, prefix_len);
45
73
  const b_count = (left.match(/\*\*/g) || []).length;
46
74
  const u2_count = (left.match(/__/g) || []).length;
47
- const u1_count = (left.replace(/__/g, "").match(/_/g) || []).length;
75
+ const u1_count = _count_standalone_underscores(left);
48
76
 
49
77
  if (b_count % 2 !== 0) {
50
78
  prefix_len = left.lastIndexOf("**");
@@ -56,14 +84,18 @@ export function trim_common_context(
56
84
  }
57
85
  if (u1_count % 2 !== 0) {
58
86
  let idx = left.length - 1;
87
+ const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
59
88
  while (idx >= 0) {
60
89
  if (
61
90
  left[idx] === "_" &&
62
91
  (idx === 0 || left[idx - 1] !== "_") &&
63
92
  (idx === left.length - 1 || left[idx + 1] !== "_")
64
93
  ) {
65
- prefix_len = idx;
66
- break;
94
+ const is_intra = idx > 0 && isAlnum(left[idx - 1]) && idx < left.length - 1 && isAlnum(left[idx + 1]);
95
+ if (!is_intra) {
96
+ prefix_len = idx;
97
+ break;
98
+ }
67
99
  }
68
100
  idx--;
69
101
  }
@@ -140,7 +172,7 @@ export function trim_common_context(
140
172
  const right = target.substring(target.length - suffix_len);
141
173
  const b_count = (right.match(/\*\*/g) || []).length;
142
174
  const u2_count = (right.match(/__/g) || []).length;
143
- const u1_count = (right.replace(/__/g, "").match(/_/g) || []).length;
175
+ const u1_count = _count_standalone_underscores(right);
144
176
 
145
177
  if (b_count % 2 !== 0) {
146
178
  suffix_len -= right.indexOf("**") + 2;
@@ -152,14 +184,18 @@ export function trim_common_context(
152
184
  }
153
185
  if (u1_count % 2 !== 0) {
154
186
  let idx_in_right = 0;
187
+ const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
155
188
  while (idx_in_right < right.length) {
156
189
  if (
157
190
  right[idx_in_right] === "_" &&
158
191
  (idx_in_right === 0 || right[idx_in_right - 1] !== "_") &&
159
192
  (idx_in_right === right.length - 1 || right[idx_in_right + 1] !== "_")
160
193
  ) {
161
- suffix_len -= idx_in_right + 1;
162
- break;
194
+ const is_intra = idx_in_right > 0 && isAlnum(right[idx_in_right - 1]) && idx_in_right < right.length - 1 && isAlnum(right[idx_in_right + 1]);
195
+ if (!is_intra) {
196
+ suffix_len -= idx_in_right + 1;
197
+ break;
198
+ }
163
199
  }
164
200
  idx_in_right++;
165
201
  }
package/src/docx/dom.ts CHANGED
@@ -49,7 +49,11 @@ export function findAllDescendants(
49
49
  * Parses raw XML strings into xmldom Documents.
50
50
  */
51
51
  export function parseXml(xmlString: string): Document {
52
- return new DOMParser().parseFromString(xmlString, "text/xml");
52
+ // Strip UTF-8 BOM if present
53
+ if (xmlString.startsWith("\uFEFF")) {
54
+ xmlString = xmlString.slice(1);
55
+ }
56
+ return new DOMParser().parseFromString(xmlString, "text/xml") as unknown as Document;
53
57
  }
54
58
 
55
59
  /**
@@ -57,7 +61,7 @@ export function parseXml(xmlString: string): Document {
57
61
  * enforcing deterministic attribute ordering on the root element.
58
62
  */
59
63
  export function serializeXml(node: Node): string {
60
- let xml = new XMLSerializer().serializeToString(node);
64
+ let xml = new XMLSerializer().serializeToString(node as any);
61
65
 
62
66
  // BUG-11: Deterministic namespace ordering on root elements.
63
67
  const rootTagRegex = /<([a-zA-Z0-9_:]+)(\s+[^>]+?)(>|\/>)/;
@@ -134,6 +134,12 @@ describe("Resolved Bugs Core Engine Verification", () => {
134
134
  // Direct string equality so Vitest prints the exact diff if they mismatch!
135
135
  expect(serialized).toBe(expected);
136
136
  });
137
+
138
+ it("BUG-BOM-1: parseXml successfully strips leading UTF-8 BOM (\\uFEFF)", () => {
139
+ const rawXml = `\uFEFF<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"></w:document>`;
140
+ const docXml = parseXml(rawXml);
141
+ expect(docXml.documentElement.tagName).toBe("w:document");
142
+ });
137
143
  it("BUG-11b: Sweeps orphaned comment anchors when accepting tracked changes", async () => {
138
144
  const doc = await createTestDocument();
139
145
  addParagraph(doc, "Confidential Information");
@@ -235,7 +241,7 @@ describe("Resolved Bugs Core Engine Verification", () => {
235
241
  "",
236
242
  null,
237
243
  "123",
238
- doc.element.ownerDocument!
244
+ doc.element.ownerDocument!,
239
245
  );
240
246
  expect(ins).toBeNull();
241
247
  });
@@ -245,7 +251,7 @@ describe("Resolved Bugs Core Engine Verification", () => {
245
251
  const p = addParagraph(doc, "Short heading");
246
252
 
247
253
  const fakeCache = {
248
- "CustomHeading": { name: "Custom Heading", outline_level: 2, bold: true }
254
+ CustomHeading: { name: "Custom Heading", outline_level: 2, bold: true },
249
255
  };
250
256
  (doc.pkg as any)._adeu_style_cache = [fakeCache, "Normal"];
251
257
 
@@ -259,7 +265,7 @@ describe("Resolved Bugs Core Engine Verification", () => {
259
265
  const buf = await doc.save();
260
266
  const body = await extractTextFromBuffer(buf, false);
261
267
  const pages = paginate(body, "");
262
-
268
+
263
269
  const outlineNodes = extract_outline(
264
270
  doc,
265
271
  body,
@@ -295,7 +301,7 @@ describe("Resolved Bugs Core Engine Verification", () => {
295
301
  const end = xmlDoc.createElement("w:commentRangeEnd");
296
302
  end.setAttribute("w:id", c_id);
297
303
  p.appendChild(end);
298
-
304
+
299
305
  const ref_run = xmlDoc.createElement("w:r");
300
306
  const ref = xmlDoc.createElement("w:commentReference");
301
307
  ref.setAttribute("w:id", c_id);
@@ -428,10 +434,18 @@ describe("Resolved Bugs Core Engine Verification", () => {
428
434
 
429
435
  // Author B tries to modify Author A's pending insertion
430
436
  const engineB = new RedlineEngine(doc, "Author B");
431
-
437
+
432
438
  expect(() => {
433
- engineB.process_batch([{ type: "modify", target_text: "Inserted by A.", new_text: "Modified by B." }]);
434
- }).toThrowError(/Accept that change first or scope your edit outside of it/);
439
+ engineB.process_batch([
440
+ {
441
+ type: "modify",
442
+ target_text: "Inserted by A.",
443
+ new_text: "Modified by B.",
444
+ },
445
+ ]);
446
+ }).toThrowError(
447
+ /Accept that change first or scope your edit outside of it/,
448
+ );
435
449
  });
436
450
 
437
451
  it("BUG-CROSS-PARA-1: Cross-paragraph modify coalesces paragraphs and tracks para-mark deletion", async () => {
@@ -452,7 +466,7 @@ describe("Resolved Bugs Core Engine Verification", () => {
452
466
 
453
467
  const buf = await doc.save();
454
468
  const cleanText = await extractTextFromBuffer(buf, true);
455
-
469
+
456
470
  expect(cleanText).not.toContain("ends here.\n\n");
457
471
  expect(cleanText).toContain("Clause 1 ends here. MERGED here.");
458
472
  });
@@ -467,15 +481,54 @@ describe("Resolved Bugs Core Engine Verification", () => {
467
481
  engine.process_batch([
468
482
  {
469
483
  type: "modify",
470
- target_text: "ends here.\n\nParagraph 2 is in the middle.\n\nParagraph 3 begins",
484
+ target_text:
485
+ "ends here.\n\nParagraph 2 is in the middle.\n\nParagraph 3 begins",
471
486
  new_text: "ends here. MERGED",
472
487
  },
473
488
  ]);
474
489
 
475
490
  engine.accept_all_revisions();
476
491
  const cleanText = await extractTextFromBuffer(await doc.save(), true);
477
-
492
+
478
493
  expect(cleanText).not.toContain("Paragraph 2");
479
494
  expect(cleanText).toContain("Paragraph 1 ends here. MERGED here.");
480
495
  });
496
+
497
+ it("BUG-REPRO: accept_all_revisions leaks comments and in-body comment anchors", async () => {
498
+ const doc = await createTestDocument();
499
+ addParagraph(doc, "This is the original text of the agreement.");
500
+ const engine = new RedlineEngine(doc, "Reviewer AI");
501
+
502
+ // Add a tracked change with a comment attached
503
+ engine.process_batch([
504
+ {
505
+ type: "modify",
506
+ target_text: "original text",
507
+ new_text: "updated text",
508
+ comment: "Should this be updated or kept as original?",
509
+ },
510
+ ]);
511
+
512
+ // Pre-condition check: comment parts exist
513
+ const original_comment_parts = doc.pkg.parts.filter(p => p.contentType.includes("comments"));
514
+ expect(original_comment_parts.length).toBeGreaterThan(0);
515
+
516
+ const original_xml = doc.element.toString();
517
+ expect(original_xml).toContain("w:commentRangeStart");
518
+ expect(original_xml).toContain("w:commentReference");
519
+
520
+ // Accept all
521
+ engine.accept_all_revisions();
522
+
523
+ // Verify comment removal
524
+ const final_xml = doc.element.toString();
525
+
526
+ // Assert NO in-body comment anchors survive (anchors must be completely gone)
527
+ expect(final_xml).not.toContain("w:commentRangeStart");
528
+ expect(final_xml).not.toContain("w:commentRangeEnd");
529
+ expect(final_xml).not.toContain("w:commentReference");
530
+
531
+ const final_comment_parts = doc.pkg.parts.filter(p => p.contentType.includes("comments"));
532
+ expect(final_comment_parts.length).toBe(0);
533
+ });
481
534
  });
@@ -0,0 +1,144 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { createTestDocument, addParagraph } from "./test-utils.js";
3
+ import { RedlineEngine } from "./engine.js";
4
+ import { extractTextFromBuffer } from "./ingest.js";
5
+
6
+ describe("Feedback Layer & Dry Run Verification", () => {
7
+ it("process_batch returns detailed edit reports", async () => {
8
+ const doc = await createTestDocument();
9
+ addParagraph(doc, "The quick brown fox jumps over the lazy dog.");
10
+ const engine = new RedlineEngine(doc, "Reviewer TS");
11
+
12
+ const stats = (engine as any).process_batch([
13
+ { type: "modify", target_text: "quick brown fox", new_text: "fast red fox" }
14
+ ]);
15
+
16
+ expect(stats.edits).toBeDefined();
17
+ expect(stats.edits.length).toBe(1);
18
+
19
+ const report = stats.edits[0];
20
+ expect(report.status).toBe("applied");
21
+ expect(report.target_text).toBe("quick brown fox");
22
+ expect(report.new_text).toBe("fast red fox");
23
+
24
+ // Previews with context window
25
+ expect(report.critic_markup).toContain("{--quick brown--}{++fast red++} fox");
26
+ expect(report.critic_markup).toContain("The ");
27
+ expect(report.critic_markup).toContain(" jumps over");
28
+
29
+ expect(report.clean_text).toContain("The fast red fox jumps over");
30
+ expect(stats.engine).toBe("node");
31
+ expect(stats.version).toBeDefined();
32
+ });
33
+
34
+ it("punctuation anchor triggers warning", async () => {
35
+ const doc = await createTestDocument();
36
+ addParagraph(doc, "Refer to sample_term_name in Section 4.");
37
+ const engine = new RedlineEngine(doc, "Reviewer TS");
38
+
39
+ const stats = (engine as any).process_batch([
40
+ { type: "modify", target_text: "sample_term_name", new_text: "validated_term_name" }
41
+ ]);
42
+
43
+ const report = stats.edits[0];
44
+ expect(report.warning).not.toBeNull();
45
+ expect(report.warning.toLowerCase()).toContain("punctuation");
46
+ expect(report.warning).toContain("sample_term_name");
47
+ });
48
+
49
+ it("dry_run does not mutate and reports safely", async () => {
50
+ const doc = await createTestDocument();
51
+ addParagraph(doc, "Baseline text.");
52
+ const engine = new RedlineEngine(doc, "Reviewer TS");
53
+
54
+ // 1. Valid Dry Run
55
+ const stats = (engine as any).process_batch([
56
+ { type: "modify", target_text: "Baseline", new_text: "Modified Preview" }
57
+ ], true);
58
+
59
+ expect(stats.edits_applied).toBe(1);
60
+ expect(stats.edits[0].status).toBe("applied");
61
+ expect(stats.edits[0].clean_text).toContain("Modified Preview");
62
+
63
+ // Verify original document remains pristine
64
+ const buf = await doc.save();
65
+ const cleanText = await extractTextFromBuffer(buf, true);
66
+ expect(cleanText).not.toContain("Modified Preview");
67
+ expect(cleanText).toContain("Baseline text");
68
+
69
+ // 2. Invalid Dry Run should not throw and instead report the failure safely
70
+ const statsInvalid = (engine as any).process_batch([
71
+ { type: "modify", target_text: "NON_EXISTENT", new_text: "fail" }
72
+ ], true);
73
+
74
+ expect(statsInvalid.edits_skipped).toBe(1);
75
+ expect(statsInvalid.edits[0].status).toBe("failed");
76
+ expect(statsInvalid.edits[0].error).not.toBeNull();
77
+ expect(statsInvalid.edits[0].error.toLowerCase()).toContain("not found");
78
+ });
79
+
80
+ it("preview self-consistency on underscore terms", async () => {
81
+ const doc = await createTestDocument();
82
+ addParagraph(doc, "ANCHOR_LINE governs the interpretation of this Agreement.");
83
+ const engine = new RedlineEngine(doc, "Reviewer TS");
84
+
85
+ const stats = (engine as any).process_batch([
86
+ {
87
+ type: "modify",
88
+ target_text: "ANCHOR_LINE governs the interpretation of this Agreement.",
89
+ new_text: "NEW_PARA inserted before.\n\nANCHOR_LINE governs the interpretation of this Agreement.",
90
+ }
91
+ ]);
92
+
93
+ const buf = await doc.save();
94
+ const cleanDocText = await extractTextFromBuffer(buf, true);
95
+
96
+ const report = stats.edits[0];
97
+
98
+ expect(report.clean_text).not.toBeNull();
99
+ const cleanPreview = report.clean_text.replace(/^\.+|\.+$/g, "");
100
+ expect(cleanDocText).toContain(cleanPreview);
101
+ });
102
+
103
+ it("preview does not contain duplicate garbling", async () => {
104
+ const doc = await createTestDocument();
105
+
106
+ addParagraph(doc, "Payment Terms");
107
+
108
+ const xmlDoc = doc.element.ownerDocument!;
109
+ const p2 = xmlDoc.createElement("w:p");
110
+ const del = xmlDoc.createElement("w:del");
111
+ del.setAttribute("w:id", "900");
112
+ del.setAttribute("w:author", "Reviewer");
113
+ del.setAttribute("w:date", "2026-06-01T00:00:00Z");
114
+ const r = xmlDoc.createElement("w:r");
115
+ const t = xmlDoc.createElement("w:delText");
116
+ t.setAttribute("xml:space", "preserve");
117
+ t.textContent = "DUP_PHRASE shall be paid within thirty days of invoice.";
118
+ r.appendChild(t);
119
+ del.appendChild(r);
120
+ p2.appendChild(del);
121
+ const firstP = doc.element.getElementsByTagName("w:p")[0];
122
+ firstP.parentNode!.appendChild(p2);
123
+
124
+ addParagraph(doc, "DUP_PHRASE shall be paid within thirty days of invoice.");
125
+ addParagraph(doc, "Late payments accrue interest at the statutory rate.");
126
+
127
+ const engine = new RedlineEngine(doc, "Reviewer TS");
128
+ const stats = (engine as any).process_batch([
129
+ {
130
+ type: "modify",
131
+ target_text: "DUP_PHRASE shall be paid within thirty days of invoice.",
132
+ new_text: "DUP_PHRASE shall be paid within sixty days of invoice.",
133
+ }
134
+ ]);
135
+
136
+ const buf = await doc.save();
137
+ const cleanDocText = await extractTextFromBuffer(buf, true);
138
+
139
+ const report = stats.edits[0];
140
+ expect(report.clean_text).not.toBeNull();
141
+ const cleanPreview = report.clean_text.replace(/^\.+|\.+$/g, "");
142
+ expect(cleanDocText).toContain(cleanPreview);
143
+ });
144
+ });