@adeu/core 1.9.0 → 1.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +639 -105
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +15 -2
- package/dist/index.d.ts +15 -2
- package/dist/index.js +639 -105
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/comments.ts +33 -14
- package/src/consistency.test.ts +62 -4
- package/src/diff.ts +42 -6
- package/src/docx/dom.ts +2 -2
- package/src/engine.bugs.test.ts +38 -0
- package/src/engine.feedback.test.ts +144 -0
- package/src/engine.issue23.test.ts +511 -0
- package/src/engine.ts +614 -82
- package/src/sanitize/core.ts +1 -0
- package/src/sanitize/sanitize.test.ts +48 -6
- package/src/sanitize/transforms.ts +88 -1
package/package.json
CHANGED
package/src/comments.ts
CHANGED
|
@@ -33,27 +33,27 @@ export class CommentsManager {
|
|
|
33
33
|
|
|
34
34
|
constructor(public doc: DocumentObject) {}
|
|
35
35
|
|
|
36
|
-
public get commentsPart() {
|
|
36
|
+
public get commentsPart(): Part {
|
|
37
37
|
if (!this._commentsPart) {
|
|
38
38
|
this._commentsPart = this._getOrCreateCommentsPart();
|
|
39
39
|
this._ensureNamespaces();
|
|
40
40
|
}
|
|
41
|
-
return this._commentsPart
|
|
41
|
+
return this._commentsPart!;
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
-
public get extendedPart() {
|
|
44
|
+
public get extendedPart(): Part {
|
|
45
45
|
if (!this._extendedPart) this._extendedPart = this._getOrCreateExtendedPart();
|
|
46
|
-
return this._extendedPart
|
|
46
|
+
return this._extendedPart!;
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
-
public get idsPart() {
|
|
49
|
+
public get idsPart(): Part {
|
|
50
50
|
if (!this._idsPart) this._idsPart = this._getOrCreateIdsPart();
|
|
51
|
-
return this._idsPart
|
|
51
|
+
return this._idsPart!;
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
-
public get extensiblePart() {
|
|
54
|
+
public get extensiblePart(): Part {
|
|
55
55
|
if (!this._extensiblePart) this._extensiblePart = this._getOrCreateExtensiblePart();
|
|
56
|
-
return this._extensiblePart
|
|
56
|
+
return this._extensiblePart!;
|
|
57
57
|
}
|
|
58
58
|
|
|
59
59
|
public get nextId(): number {
|
|
@@ -124,9 +124,28 @@ export class CommentsManager {
|
|
|
124
124
|
}
|
|
125
125
|
|
|
126
126
|
private _ensureNamespaces() {
|
|
127
|
-
//
|
|
128
|
-
//
|
|
129
|
-
//
|
|
127
|
+
// When the comments part already existed (e.g. a legacy or pandoc-produced
|
|
128
|
+
// document) its root <w:comments> may omit the namespaces we rely on —
|
|
129
|
+
// most importantly w14, which qualifies the w14:paraId / w14:textId
|
|
130
|
+
// attributes we write on each comment paragraph. Without the declaration
|
|
131
|
+
// the serialised XML is invalid ("Namespace prefix w14 ... is not defined").
|
|
132
|
+
// Declare any missing namespace prefixes on the existing root element.
|
|
133
|
+
const root = this._commentsPart?._element;
|
|
134
|
+
if (!root) return;
|
|
135
|
+
|
|
136
|
+
const required: [string, string][] = [
|
|
137
|
+
['xmlns:w', NS.w],
|
|
138
|
+
['xmlns:w14', NS.w14],
|
|
139
|
+
['xmlns:w15', NS.w15],
|
|
140
|
+
['xmlns:w16cid', NS.w16cid],
|
|
141
|
+
['xmlns:w16cex', NS.w16cex],
|
|
142
|
+
['xmlns:mc', NS.mc],
|
|
143
|
+
];
|
|
144
|
+
for (const [attr, uri] of required) {
|
|
145
|
+
if (!root.getAttribute(attr)) {
|
|
146
|
+
root.setAttribute(attr, uri);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
130
149
|
}
|
|
131
150
|
|
|
132
151
|
private _getNextCommentId(): number {
|
|
@@ -273,10 +292,10 @@ export class CommentsManager {
|
|
|
273
292
|
}
|
|
274
293
|
|
|
275
294
|
public deleteComment(commentId: string) {
|
|
276
|
-
if (!this.
|
|
295
|
+
if (!this.commentsPart) return;
|
|
277
296
|
|
|
278
297
|
let commentEl: Element | null = null;
|
|
279
|
-
for (const c of findAllDescendants(this.
|
|
298
|
+
for (const c of findAllDescendants(this.commentsPart._element, 'w:comment')) {
|
|
280
299
|
if (c.getAttribute('w:id') === commentId) {
|
|
281
300
|
commentEl = c;
|
|
282
301
|
break;
|
|
@@ -304,7 +323,7 @@ export class CommentsManager {
|
|
|
304
323
|
if (child.getAttribute('w15:paraIdParent') === paraId) {
|
|
305
324
|
const childParaId = child.getAttribute('w15:paraId');
|
|
306
325
|
if (childParaId) {
|
|
307
|
-
for (const c of findAllDescendants(this.
|
|
326
|
+
for (const c of findAllDescendants(this.commentsPart._element, 'w:comment')) {
|
|
308
327
|
for (const p of findAllDescendants(c, 'w:p')) {
|
|
309
328
|
if (p.getAttribute('w14:paraId') === childParaId) {
|
|
310
329
|
const cid = c.getAttribute('w:id');
|
package/src/consistency.test.ts
CHANGED
|
@@ -8,12 +8,13 @@ import {
|
|
|
8
8
|
} from "node:fs";
|
|
9
9
|
import { resolve, dirname } from "node:path";
|
|
10
10
|
import { fileURLToPath } from "node:url";
|
|
11
|
-
import { execSync } from "node:child_process";
|
|
11
|
+
import { execSync, execFileSync } from "node:child_process";
|
|
12
12
|
import { tmpdir } from "node:os";
|
|
13
13
|
|
|
14
14
|
import { DocumentObject } from "./docx/bridge.js";
|
|
15
15
|
import { RedlineEngine } from "./engine.js";
|
|
16
16
|
import { extractTextFromBuffer } from "./ingest.js";
|
|
17
|
+
import { serializeXml } from "./docx/dom.js";
|
|
17
18
|
|
|
18
19
|
const __filename = fileURLToPath(import.meta.url);
|
|
19
20
|
const __dirname = dirname(__filename);
|
|
@@ -28,10 +29,62 @@ const PYTHON_ABSTRACT_CMD = resolve(
|
|
|
28
29
|
);
|
|
29
30
|
const PYTHON_DIR = resolve(__dirname, "../../../../python");
|
|
30
31
|
|
|
32
|
+
const CT_COMMENTS =
|
|
33
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml";
|
|
34
|
+
|
|
31
35
|
function normalizeMdTimestamps(mdText: string): string {
|
|
32
36
|
return mdText.replace(/@ \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z/g, "@ DATE");
|
|
33
37
|
}
|
|
34
38
|
|
|
39
|
+
function xmllintCheck(xmlContent: string, label: string): void {
|
|
40
|
+
// Cross-platform lookup: `which` on POSIX, `where` on Windows.
|
|
41
|
+
const locator = process.platform === "win32" ? "where" : "which";
|
|
42
|
+
let xmllintBin: string | null = null;
|
|
43
|
+
try {
|
|
44
|
+
xmllintBin =
|
|
45
|
+
execSync(`${locator} xmllint`, { encoding: "utf-8" })
|
|
46
|
+
.split(/\r?\n/)
|
|
47
|
+
.map((l) => l.trim())
|
|
48
|
+
.filter(Boolean)[0] || null;
|
|
49
|
+
} catch {
|
|
50
|
+
/* not found */
|
|
51
|
+
}
|
|
52
|
+
if (!xmllintBin) {
|
|
53
|
+
// Optional external XML validation: skip when xmllint is unavailable
|
|
54
|
+
// (common on Windows). The in-code namespace assertion still runs.
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
const tmpFile = resolve(tmpdir(), `adeu_consistency_${Date.now()}_${label}`);
|
|
58
|
+
try {
|
|
59
|
+
writeFileSync(tmpFile, xmlContent, "utf-8");
|
|
60
|
+
execFileSync(xmllintBin, ["--noout", tmpFile]);
|
|
61
|
+
} catch (err: any) {
|
|
62
|
+
throw new Error(
|
|
63
|
+
`xmllint validation failed for ${label}:\n${err.stderr ?? err.message}`,
|
|
64
|
+
);
|
|
65
|
+
} finally {
|
|
66
|
+
if (existsSync(tmpFile)) unlinkSync(tmpFile);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async function validateCommentsXmlNamespaces(
|
|
71
|
+
outBuffer: Buffer,
|
|
72
|
+
folder: string,
|
|
73
|
+
): Promise<void> {
|
|
74
|
+
const doc = await DocumentObject.load(outBuffer);
|
|
75
|
+
const commentsPart = doc.pkg.parts.find((p) => p.contentType === CT_COMMENTS);
|
|
76
|
+
if (!commentsPart) return;
|
|
77
|
+
|
|
78
|
+
const commentsXml = serializeXml(
|
|
79
|
+
commentsPart._element.ownerDocument ?? commentsPart._element,
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
expect(commentsXml).toContain(
|
|
83
|
+
'xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"',
|
|
84
|
+
);
|
|
85
|
+
xmllintCheck(commentsXml, `${folder}_comments.xml`);
|
|
86
|
+
}
|
|
87
|
+
|
|
35
88
|
describe("Polyglot Consistency Framework (TS vs Python)", () => {
|
|
36
89
|
if (!existsSync(CORPUS_DIR)) {
|
|
37
90
|
it.skip("Cross-platform test corpus not found", () => {});
|
|
@@ -72,7 +125,12 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
|
|
|
72
125
|
engine.process_batch(testConfig.changes || []);
|
|
73
126
|
outBuffer = await doc.save();
|
|
74
127
|
|
|
75
|
-
// 2.
|
|
128
|
+
// 2. Validate comments XML namespaces when requested by test.json
|
|
129
|
+
if (testConfig.validate_comments_xml_namespaces) {
|
|
130
|
+
await validateCommentsXmlNamespaces(outBuffer, folder);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// 3. Assert XML Structure Parity (via Python Bridge)
|
|
76
134
|
const goldenXmlPath = resolve(testDir, "golden_abstract.xml");
|
|
77
135
|
if (existsSync(goldenXmlPath)) {
|
|
78
136
|
const expectedXml = readFileSync(goldenXmlPath, "utf-8");
|
|
@@ -103,7 +161,7 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
|
|
|
103
161
|
}
|
|
104
162
|
}
|
|
105
163
|
|
|
106
|
-
//
|
|
164
|
+
// 4. Assert Markdown Extraction Parity (Raw View)
|
|
107
165
|
const rawMdPath = resolve(testDir, "golden_raw.md");
|
|
108
166
|
if (existsSync(rawMdPath)) {
|
|
109
167
|
const expectedRaw = readFileSync(rawMdPath, "utf-8").replace(
|
|
@@ -116,7 +174,7 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
|
|
|
116
174
|
expect(actualRaw).toBe(expectedRaw);
|
|
117
175
|
}
|
|
118
176
|
|
|
119
|
-
//
|
|
177
|
+
// 5. Assert Markdown Extraction Parity (Clean View)
|
|
120
178
|
const cleanMdPath = resolve(testDir, "golden_clean.md");
|
|
121
179
|
if (existsSync(cleanMdPath)) {
|
|
122
180
|
const expectedClean = readFileSync(cleanMdPath, "utf-8").replace(
|
package/src/diff.ts
CHANGED
|
@@ -1,6 +1,34 @@
|
|
|
1
1
|
import diff_match_patch from "diff-match-patch";
|
|
2
2
|
import { ModifyText } from "./models.js";
|
|
3
3
|
|
|
4
|
+
function _count_standalone_underscores(s: string): number {
|
|
5
|
+
let count = 0;
|
|
6
|
+
let i = 0;
|
|
7
|
+
const n = s.length;
|
|
8
|
+
const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
|
|
9
|
+
while (i < n) {
|
|
10
|
+
if (s[i] === "_") {
|
|
11
|
+
// Is it part of "__"?
|
|
12
|
+
let is_double = false;
|
|
13
|
+
if ((i > 0 && s[i - 1] === "_") || (i < n - 1 && s[i + 1] === "_")) {
|
|
14
|
+
is_double = true;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Is it intra-word?
|
|
18
|
+
let is_intra = false;
|
|
19
|
+
if (i > 0 && isAlnum(s[i - 1]) && i < n - 1 && isAlnum(s[i + 1])) {
|
|
20
|
+
is_intra = true;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if (!is_double && !is_intra) {
|
|
24
|
+
count++;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
i++;
|
|
28
|
+
}
|
|
29
|
+
return count;
|
|
30
|
+
}
|
|
31
|
+
|
|
4
32
|
export function trim_common_context(
|
|
5
33
|
target: string,
|
|
6
34
|
new_val: string,
|
|
@@ -44,7 +72,7 @@ export function trim_common_context(
|
|
|
44
72
|
const left = target.substring(0, prefix_len);
|
|
45
73
|
const b_count = (left.match(/\*\*/g) || []).length;
|
|
46
74
|
const u2_count = (left.match(/__/g) || []).length;
|
|
47
|
-
const u1_count = (left
|
|
75
|
+
const u1_count = _count_standalone_underscores(left);
|
|
48
76
|
|
|
49
77
|
if (b_count % 2 !== 0) {
|
|
50
78
|
prefix_len = left.lastIndexOf("**");
|
|
@@ -56,14 +84,18 @@ export function trim_common_context(
|
|
|
56
84
|
}
|
|
57
85
|
if (u1_count % 2 !== 0) {
|
|
58
86
|
let idx = left.length - 1;
|
|
87
|
+
const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
|
|
59
88
|
while (idx >= 0) {
|
|
60
89
|
if (
|
|
61
90
|
left[idx] === "_" &&
|
|
62
91
|
(idx === 0 || left[idx - 1] !== "_") &&
|
|
63
92
|
(idx === left.length - 1 || left[idx + 1] !== "_")
|
|
64
93
|
) {
|
|
65
|
-
|
|
66
|
-
|
|
94
|
+
const is_intra = idx > 0 && isAlnum(left[idx - 1]) && idx < left.length - 1 && isAlnum(left[idx + 1]);
|
|
95
|
+
if (!is_intra) {
|
|
96
|
+
prefix_len = idx;
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
67
99
|
}
|
|
68
100
|
idx--;
|
|
69
101
|
}
|
|
@@ -140,7 +172,7 @@ export function trim_common_context(
|
|
|
140
172
|
const right = target.substring(target.length - suffix_len);
|
|
141
173
|
const b_count = (right.match(/\*\*/g) || []).length;
|
|
142
174
|
const u2_count = (right.match(/__/g) || []).length;
|
|
143
|
-
const u1_count = (right
|
|
175
|
+
const u1_count = _count_standalone_underscores(right);
|
|
144
176
|
|
|
145
177
|
if (b_count % 2 !== 0) {
|
|
146
178
|
suffix_len -= right.indexOf("**") + 2;
|
|
@@ -152,14 +184,18 @@ export function trim_common_context(
|
|
|
152
184
|
}
|
|
153
185
|
if (u1_count % 2 !== 0) {
|
|
154
186
|
let idx_in_right = 0;
|
|
187
|
+
const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
|
|
155
188
|
while (idx_in_right < right.length) {
|
|
156
189
|
if (
|
|
157
190
|
right[idx_in_right] === "_" &&
|
|
158
191
|
(idx_in_right === 0 || right[idx_in_right - 1] !== "_") &&
|
|
159
192
|
(idx_in_right === right.length - 1 || right[idx_in_right + 1] !== "_")
|
|
160
193
|
) {
|
|
161
|
-
|
|
162
|
-
|
|
194
|
+
const is_intra = idx_in_right > 0 && isAlnum(right[idx_in_right - 1]) && idx_in_right < right.length - 1 && isAlnum(right[idx_in_right + 1]);
|
|
195
|
+
if (!is_intra) {
|
|
196
|
+
suffix_len -= idx_in_right + 1;
|
|
197
|
+
break;
|
|
198
|
+
}
|
|
163
199
|
}
|
|
164
200
|
idx_in_right++;
|
|
165
201
|
}
|
package/src/docx/dom.ts
CHANGED
|
@@ -53,7 +53,7 @@ export function parseXml(xmlString: string): Document {
|
|
|
53
53
|
if (xmlString.startsWith("\uFEFF")) {
|
|
54
54
|
xmlString = xmlString.slice(1);
|
|
55
55
|
}
|
|
56
|
-
return new DOMParser().parseFromString(xmlString, "text/xml");
|
|
56
|
+
return new DOMParser().parseFromString(xmlString, "text/xml") as unknown as Document;
|
|
57
57
|
}
|
|
58
58
|
|
|
59
59
|
/**
|
|
@@ -61,7 +61,7 @@ export function parseXml(xmlString: string): Document {
|
|
|
61
61
|
* enforcing deterministic attribute ordering on the root element.
|
|
62
62
|
*/
|
|
63
63
|
export function serializeXml(node: Node): string {
|
|
64
|
-
let xml = new XMLSerializer().serializeToString(node);
|
|
64
|
+
let xml = new XMLSerializer().serializeToString(node as any);
|
|
65
65
|
|
|
66
66
|
// BUG-11: Deterministic namespace ordering on root elements.
|
|
67
67
|
const rootTagRegex = /<([a-zA-Z0-9_:]+)(\s+[^>]+?)(>|\/>)/;
|
package/src/engine.bugs.test.ts
CHANGED
|
@@ -493,4 +493,42 @@ describe("Resolved Bugs Core Engine Verification", () => {
|
|
|
493
493
|
expect(cleanText).not.toContain("Paragraph 2");
|
|
494
494
|
expect(cleanText).toContain("Paragraph 1 ends here. MERGED here.");
|
|
495
495
|
});
|
|
496
|
+
|
|
497
|
+
it("BUG-REPRO: accept_all_revisions leaks comments and in-body comment anchors", async () => {
|
|
498
|
+
const doc = await createTestDocument();
|
|
499
|
+
addParagraph(doc, "This is the original text of the agreement.");
|
|
500
|
+
const engine = new RedlineEngine(doc, "Reviewer AI");
|
|
501
|
+
|
|
502
|
+
// Add a tracked change with a comment attached
|
|
503
|
+
engine.process_batch([
|
|
504
|
+
{
|
|
505
|
+
type: "modify",
|
|
506
|
+
target_text: "original text",
|
|
507
|
+
new_text: "updated text",
|
|
508
|
+
comment: "Should this be updated or kept as original?",
|
|
509
|
+
},
|
|
510
|
+
]);
|
|
511
|
+
|
|
512
|
+
// Pre-condition check: comment parts exist
|
|
513
|
+
const original_comment_parts = doc.pkg.parts.filter(p => p.contentType.includes("comments"));
|
|
514
|
+
expect(original_comment_parts.length).toBeGreaterThan(0);
|
|
515
|
+
|
|
516
|
+
const original_xml = doc.element.toString();
|
|
517
|
+
expect(original_xml).toContain("w:commentRangeStart");
|
|
518
|
+
expect(original_xml).toContain("w:commentReference");
|
|
519
|
+
|
|
520
|
+
// Accept all
|
|
521
|
+
engine.accept_all_revisions();
|
|
522
|
+
|
|
523
|
+
// Verify comment removal
|
|
524
|
+
const final_xml = doc.element.toString();
|
|
525
|
+
|
|
526
|
+
// Assert NO in-body comment anchors survive (anchors must be completely gone)
|
|
527
|
+
expect(final_xml).not.toContain("w:commentRangeStart");
|
|
528
|
+
expect(final_xml).not.toContain("w:commentRangeEnd");
|
|
529
|
+
expect(final_xml).not.toContain("w:commentReference");
|
|
530
|
+
|
|
531
|
+
const final_comment_parts = doc.pkg.parts.filter(p => p.contentType.includes("comments"));
|
|
532
|
+
expect(final_comment_parts.length).toBe(0);
|
|
533
|
+
});
|
|
496
534
|
});
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { createTestDocument, addParagraph } from "./test-utils.js";
|
|
3
|
+
import { RedlineEngine } from "./engine.js";
|
|
4
|
+
import { extractTextFromBuffer } from "./ingest.js";
|
|
5
|
+
|
|
6
|
+
describe("Feedback Layer & Dry Run Verification", () => {
|
|
7
|
+
it("process_batch returns detailed edit reports", async () => {
|
|
8
|
+
const doc = await createTestDocument();
|
|
9
|
+
addParagraph(doc, "The quick brown fox jumps over the lazy dog.");
|
|
10
|
+
const engine = new RedlineEngine(doc, "Reviewer TS");
|
|
11
|
+
|
|
12
|
+
const stats = (engine as any).process_batch([
|
|
13
|
+
{ type: "modify", target_text: "quick brown fox", new_text: "fast red fox" }
|
|
14
|
+
]);
|
|
15
|
+
|
|
16
|
+
expect(stats.edits).toBeDefined();
|
|
17
|
+
expect(stats.edits.length).toBe(1);
|
|
18
|
+
|
|
19
|
+
const report = stats.edits[0];
|
|
20
|
+
expect(report.status).toBe("applied");
|
|
21
|
+
expect(report.target_text).toBe("quick brown fox");
|
|
22
|
+
expect(report.new_text).toBe("fast red fox");
|
|
23
|
+
|
|
24
|
+
// Previews with context window
|
|
25
|
+
expect(report.critic_markup).toContain("{--quick brown--}{++fast red++} fox");
|
|
26
|
+
expect(report.critic_markup).toContain("The ");
|
|
27
|
+
expect(report.critic_markup).toContain(" jumps over");
|
|
28
|
+
|
|
29
|
+
expect(report.clean_text).toContain("The fast red fox jumps over");
|
|
30
|
+
expect(stats.engine).toBe("node");
|
|
31
|
+
expect(stats.version).toBeDefined();
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it("punctuation anchor triggers warning", async () => {
|
|
35
|
+
const doc = await createTestDocument();
|
|
36
|
+
addParagraph(doc, "Refer to sample_term_name in Section 4.");
|
|
37
|
+
const engine = new RedlineEngine(doc, "Reviewer TS");
|
|
38
|
+
|
|
39
|
+
const stats = (engine as any).process_batch([
|
|
40
|
+
{ type: "modify", target_text: "sample_term_name", new_text: "validated_term_name" }
|
|
41
|
+
]);
|
|
42
|
+
|
|
43
|
+
const report = stats.edits[0];
|
|
44
|
+
expect(report.warning).not.toBeNull();
|
|
45
|
+
expect(report.warning.toLowerCase()).toContain("punctuation");
|
|
46
|
+
expect(report.warning).toContain("sample_term_name");
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("dry_run does not mutate and reports safely", async () => {
|
|
50
|
+
const doc = await createTestDocument();
|
|
51
|
+
addParagraph(doc, "Baseline text.");
|
|
52
|
+
const engine = new RedlineEngine(doc, "Reviewer TS");
|
|
53
|
+
|
|
54
|
+
// 1. Valid Dry Run
|
|
55
|
+
const stats = (engine as any).process_batch([
|
|
56
|
+
{ type: "modify", target_text: "Baseline", new_text: "Modified Preview" }
|
|
57
|
+
], true);
|
|
58
|
+
|
|
59
|
+
expect(stats.edits_applied).toBe(1);
|
|
60
|
+
expect(stats.edits[0].status).toBe("applied");
|
|
61
|
+
expect(stats.edits[0].clean_text).toContain("Modified Preview");
|
|
62
|
+
|
|
63
|
+
// Verify original document remains pristine
|
|
64
|
+
const buf = await doc.save();
|
|
65
|
+
const cleanText = await extractTextFromBuffer(buf, true);
|
|
66
|
+
expect(cleanText).not.toContain("Modified Preview");
|
|
67
|
+
expect(cleanText).toContain("Baseline text");
|
|
68
|
+
|
|
69
|
+
// 2. Invalid Dry Run should not throw and instead report the failure safely
|
|
70
|
+
const statsInvalid = (engine as any).process_batch([
|
|
71
|
+
{ type: "modify", target_text: "NON_EXISTENT", new_text: "fail" }
|
|
72
|
+
], true);
|
|
73
|
+
|
|
74
|
+
expect(statsInvalid.edits_skipped).toBe(1);
|
|
75
|
+
expect(statsInvalid.edits[0].status).toBe("failed");
|
|
76
|
+
expect(statsInvalid.edits[0].error).not.toBeNull();
|
|
77
|
+
expect(statsInvalid.edits[0].error.toLowerCase()).toContain("not found");
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it("preview self-consistency on underscore terms", async () => {
|
|
81
|
+
const doc = await createTestDocument();
|
|
82
|
+
addParagraph(doc, "ANCHOR_LINE governs the interpretation of this Agreement.");
|
|
83
|
+
const engine = new RedlineEngine(doc, "Reviewer TS");
|
|
84
|
+
|
|
85
|
+
const stats = (engine as any).process_batch([
|
|
86
|
+
{
|
|
87
|
+
type: "modify",
|
|
88
|
+
target_text: "ANCHOR_LINE governs the interpretation of this Agreement.",
|
|
89
|
+
new_text: "NEW_PARA inserted before.\n\nANCHOR_LINE governs the interpretation of this Agreement.",
|
|
90
|
+
}
|
|
91
|
+
]);
|
|
92
|
+
|
|
93
|
+
const buf = await doc.save();
|
|
94
|
+
const cleanDocText = await extractTextFromBuffer(buf, true);
|
|
95
|
+
|
|
96
|
+
const report = stats.edits[0];
|
|
97
|
+
|
|
98
|
+
expect(report.clean_text).not.toBeNull();
|
|
99
|
+
const cleanPreview = report.clean_text.replace(/^\.+|\.+$/g, "");
|
|
100
|
+
expect(cleanDocText).toContain(cleanPreview);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it("preview does not contain duplicate garbling", async () => {
|
|
104
|
+
const doc = await createTestDocument();
|
|
105
|
+
|
|
106
|
+
addParagraph(doc, "Payment Terms");
|
|
107
|
+
|
|
108
|
+
const xmlDoc = doc.element.ownerDocument!;
|
|
109
|
+
const p2 = xmlDoc.createElement("w:p");
|
|
110
|
+
const del = xmlDoc.createElement("w:del");
|
|
111
|
+
del.setAttribute("w:id", "900");
|
|
112
|
+
del.setAttribute("w:author", "Reviewer");
|
|
113
|
+
del.setAttribute("w:date", "2026-06-01T00:00:00Z");
|
|
114
|
+
const r = xmlDoc.createElement("w:r");
|
|
115
|
+
const t = xmlDoc.createElement("w:delText");
|
|
116
|
+
t.setAttribute("xml:space", "preserve");
|
|
117
|
+
t.textContent = "DUP_PHRASE shall be paid within thirty days of invoice.";
|
|
118
|
+
r.appendChild(t);
|
|
119
|
+
del.appendChild(r);
|
|
120
|
+
p2.appendChild(del);
|
|
121
|
+
const firstP = doc.element.getElementsByTagName("w:p")[0];
|
|
122
|
+
firstP.parentNode!.appendChild(p2);
|
|
123
|
+
|
|
124
|
+
addParagraph(doc, "DUP_PHRASE shall be paid within thirty days of invoice.");
|
|
125
|
+
addParagraph(doc, "Late payments accrue interest at the statutory rate.");
|
|
126
|
+
|
|
127
|
+
const engine = new RedlineEngine(doc, "Reviewer TS");
|
|
128
|
+
const stats = (engine as any).process_batch([
|
|
129
|
+
{
|
|
130
|
+
type: "modify",
|
|
131
|
+
target_text: "DUP_PHRASE shall be paid within thirty days of invoice.",
|
|
132
|
+
new_text: "DUP_PHRASE shall be paid within sixty days of invoice.",
|
|
133
|
+
}
|
|
134
|
+
]);
|
|
135
|
+
|
|
136
|
+
const buf = await doc.save();
|
|
137
|
+
const cleanDocText = await extractTextFromBuffer(buf, true);
|
|
138
|
+
|
|
139
|
+
const report = stats.edits[0];
|
|
140
|
+
expect(report.clean_text).not.toBeNull();
|
|
141
|
+
const cleanPreview = report.clean_text.replace(/^\.+|\.+$/g, "");
|
|
142
|
+
expect(cleanDocText).toContain(cleanPreview);
|
|
143
|
+
});
|
|
144
|
+
});
|