@adeu/core 1.8.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +618 -102
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +15 -2
- package/dist/index.d.ts +15 -2
- package/dist/index.js +618 -102
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/comments.ts +33 -14
- package/src/consistency.test.ts +62 -4
- package/src/diff.ts +42 -6
- package/src/docx/dom.ts +6 -2
- package/src/engine.bugs.test.ts +63 -10
- package/src/engine.feedback.test.ts +144 -0
- package/src/engine.issue23.test.ts +511 -0
- package/src/engine.ts +513 -64
- package/src/sanitize/core.ts +1 -0
- package/src/sanitize/sanitize.test.ts +48 -6
- package/src/sanitize/transforms.ts +88 -1
package/package.json
CHANGED
package/src/comments.ts
CHANGED
|
@@ -33,27 +33,27 @@ export class CommentsManager {
|
|
|
33
33
|
|
|
34
34
|
constructor(public doc: DocumentObject) {}
|
|
35
35
|
|
|
36
|
-
public get commentsPart() {
|
|
36
|
+
public get commentsPart(): Part {
|
|
37
37
|
if (!this._commentsPart) {
|
|
38
38
|
this._commentsPart = this._getOrCreateCommentsPart();
|
|
39
39
|
this._ensureNamespaces();
|
|
40
40
|
}
|
|
41
|
-
return this._commentsPart
|
|
41
|
+
return this._commentsPart!;
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
-
public get extendedPart() {
|
|
44
|
+
public get extendedPart(): Part {
|
|
45
45
|
if (!this._extendedPart) this._extendedPart = this._getOrCreateExtendedPart();
|
|
46
|
-
return this._extendedPart
|
|
46
|
+
return this._extendedPart!;
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
-
public get idsPart() {
|
|
49
|
+
public get idsPart(): Part {
|
|
50
50
|
if (!this._idsPart) this._idsPart = this._getOrCreateIdsPart();
|
|
51
|
-
return this._idsPart
|
|
51
|
+
return this._idsPart!;
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
-
public get extensiblePart() {
|
|
54
|
+
public get extensiblePart(): Part {
|
|
55
55
|
if (!this._extensiblePart) this._extensiblePart = this._getOrCreateExtensiblePart();
|
|
56
|
-
return this._extensiblePart
|
|
56
|
+
return this._extensiblePart!;
|
|
57
57
|
}
|
|
58
58
|
|
|
59
59
|
public get nextId(): number {
|
|
@@ -124,9 +124,28 @@ export class CommentsManager {
|
|
|
124
124
|
}
|
|
125
125
|
|
|
126
126
|
private _ensureNamespaces() {
|
|
127
|
-
//
|
|
128
|
-
//
|
|
129
|
-
//
|
|
127
|
+
// When the comments part already existed (e.g. a legacy or pandoc-produced
|
|
128
|
+
// document) its root <w:comments> may omit the namespaces we rely on —
|
|
129
|
+
// most importantly w14, which qualifies the w14:paraId / w14:textId
|
|
130
|
+
// attributes we write on each comment paragraph. Without the declaration
|
|
131
|
+
// the serialised XML is invalid ("Namespace prefix w14 ... is not defined").
|
|
132
|
+
// Declare any missing namespace prefixes on the existing root element.
|
|
133
|
+
const root = this._commentsPart?._element;
|
|
134
|
+
if (!root) return;
|
|
135
|
+
|
|
136
|
+
const required: [string, string][] = [
|
|
137
|
+
['xmlns:w', NS.w],
|
|
138
|
+
['xmlns:w14', NS.w14],
|
|
139
|
+
['xmlns:w15', NS.w15],
|
|
140
|
+
['xmlns:w16cid', NS.w16cid],
|
|
141
|
+
['xmlns:w16cex', NS.w16cex],
|
|
142
|
+
['xmlns:mc', NS.mc],
|
|
143
|
+
];
|
|
144
|
+
for (const [attr, uri] of required) {
|
|
145
|
+
if (!root.getAttribute(attr)) {
|
|
146
|
+
root.setAttribute(attr, uri);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
130
149
|
}
|
|
131
150
|
|
|
132
151
|
private _getNextCommentId(): number {
|
|
@@ -273,10 +292,10 @@ export class CommentsManager {
|
|
|
273
292
|
}
|
|
274
293
|
|
|
275
294
|
public deleteComment(commentId: string) {
|
|
276
|
-
if (!this.
|
|
295
|
+
if (!this.commentsPart) return;
|
|
277
296
|
|
|
278
297
|
let commentEl: Element | null = null;
|
|
279
|
-
for (const c of findAllDescendants(this.
|
|
298
|
+
for (const c of findAllDescendants(this.commentsPart._element, 'w:comment')) {
|
|
280
299
|
if (c.getAttribute('w:id') === commentId) {
|
|
281
300
|
commentEl = c;
|
|
282
301
|
break;
|
|
@@ -304,7 +323,7 @@ export class CommentsManager {
|
|
|
304
323
|
if (child.getAttribute('w15:paraIdParent') === paraId) {
|
|
305
324
|
const childParaId = child.getAttribute('w15:paraId');
|
|
306
325
|
if (childParaId) {
|
|
307
|
-
for (const c of findAllDescendants(this.
|
|
326
|
+
for (const c of findAllDescendants(this.commentsPart._element, 'w:comment')) {
|
|
308
327
|
for (const p of findAllDescendants(c, 'w:p')) {
|
|
309
328
|
if (p.getAttribute('w14:paraId') === childParaId) {
|
|
310
329
|
const cid = c.getAttribute('w:id');
|
package/src/consistency.test.ts
CHANGED
|
@@ -8,12 +8,13 @@ import {
|
|
|
8
8
|
} from "node:fs";
|
|
9
9
|
import { resolve, dirname } from "node:path";
|
|
10
10
|
import { fileURLToPath } from "node:url";
|
|
11
|
-
import { execSync } from "node:child_process";
|
|
11
|
+
import { execSync, execFileSync } from "node:child_process";
|
|
12
12
|
import { tmpdir } from "node:os";
|
|
13
13
|
|
|
14
14
|
import { DocumentObject } from "./docx/bridge.js";
|
|
15
15
|
import { RedlineEngine } from "./engine.js";
|
|
16
16
|
import { extractTextFromBuffer } from "./ingest.js";
|
|
17
|
+
import { serializeXml } from "./docx/dom.js";
|
|
17
18
|
|
|
18
19
|
const __filename = fileURLToPath(import.meta.url);
|
|
19
20
|
const __dirname = dirname(__filename);
|
|
@@ -28,10 +29,62 @@ const PYTHON_ABSTRACT_CMD = resolve(
|
|
|
28
29
|
);
|
|
29
30
|
const PYTHON_DIR = resolve(__dirname, "../../../../python");
|
|
30
31
|
|
|
32
|
+
const CT_COMMENTS =
|
|
33
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml";
|
|
34
|
+
|
|
31
35
|
function normalizeMdTimestamps(mdText: string): string {
|
|
32
36
|
return mdText.replace(/@ \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z/g, "@ DATE");
|
|
33
37
|
}
|
|
34
38
|
|
|
39
|
+
function xmllintCheck(xmlContent: string, label: string): void {
|
|
40
|
+
// Cross-platform lookup: `which` on POSIX, `where` on Windows.
|
|
41
|
+
const locator = process.platform === "win32" ? "where" : "which";
|
|
42
|
+
let xmllintBin: string | null = null;
|
|
43
|
+
try {
|
|
44
|
+
xmllintBin =
|
|
45
|
+
execSync(`${locator} xmllint`, { encoding: "utf-8" })
|
|
46
|
+
.split(/\r?\n/)
|
|
47
|
+
.map((l) => l.trim())
|
|
48
|
+
.filter(Boolean)[0] || null;
|
|
49
|
+
} catch {
|
|
50
|
+
/* not found */
|
|
51
|
+
}
|
|
52
|
+
if (!xmllintBin) {
|
|
53
|
+
// Optional external XML validation: skip when xmllint is unavailable
|
|
54
|
+
// (common on Windows). The in-code namespace assertion still runs.
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
const tmpFile = resolve(tmpdir(), `adeu_consistency_${Date.now()}_${label}`);
|
|
58
|
+
try {
|
|
59
|
+
writeFileSync(tmpFile, xmlContent, "utf-8");
|
|
60
|
+
execFileSync(xmllintBin, ["--noout", tmpFile]);
|
|
61
|
+
} catch (err: any) {
|
|
62
|
+
throw new Error(
|
|
63
|
+
`xmllint validation failed for ${label}:\n${err.stderr ?? err.message}`,
|
|
64
|
+
);
|
|
65
|
+
} finally {
|
|
66
|
+
if (existsSync(tmpFile)) unlinkSync(tmpFile);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async function validateCommentsXmlNamespaces(
|
|
71
|
+
outBuffer: Buffer,
|
|
72
|
+
folder: string,
|
|
73
|
+
): Promise<void> {
|
|
74
|
+
const doc = await DocumentObject.load(outBuffer);
|
|
75
|
+
const commentsPart = doc.pkg.parts.find((p) => p.contentType === CT_COMMENTS);
|
|
76
|
+
if (!commentsPart) return;
|
|
77
|
+
|
|
78
|
+
const commentsXml = serializeXml(
|
|
79
|
+
commentsPart._element.ownerDocument ?? commentsPart._element,
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
expect(commentsXml).toContain(
|
|
83
|
+
'xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"',
|
|
84
|
+
);
|
|
85
|
+
xmllintCheck(commentsXml, `${folder}_comments.xml`);
|
|
86
|
+
}
|
|
87
|
+
|
|
35
88
|
describe("Polyglot Consistency Framework (TS vs Python)", () => {
|
|
36
89
|
if (!existsSync(CORPUS_DIR)) {
|
|
37
90
|
it.skip("Cross-platform test corpus not found", () => {});
|
|
@@ -72,7 +125,12 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
|
|
|
72
125
|
engine.process_batch(testConfig.changes || []);
|
|
73
126
|
outBuffer = await doc.save();
|
|
74
127
|
|
|
75
|
-
// 2.
|
|
128
|
+
// 2. Validate comments XML namespaces when requested by test.json
|
|
129
|
+
if (testConfig.validate_comments_xml_namespaces) {
|
|
130
|
+
await validateCommentsXmlNamespaces(outBuffer, folder);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// 3. Assert XML Structure Parity (via Python Bridge)
|
|
76
134
|
const goldenXmlPath = resolve(testDir, "golden_abstract.xml");
|
|
77
135
|
if (existsSync(goldenXmlPath)) {
|
|
78
136
|
const expectedXml = readFileSync(goldenXmlPath, "utf-8");
|
|
@@ -103,7 +161,7 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
|
|
|
103
161
|
}
|
|
104
162
|
}
|
|
105
163
|
|
|
106
|
-
//
|
|
164
|
+
// 4. Assert Markdown Extraction Parity (Raw View)
|
|
107
165
|
const rawMdPath = resolve(testDir, "golden_raw.md");
|
|
108
166
|
if (existsSync(rawMdPath)) {
|
|
109
167
|
const expectedRaw = readFileSync(rawMdPath, "utf-8").replace(
|
|
@@ -116,7 +174,7 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
|
|
|
116
174
|
expect(actualRaw).toBe(expectedRaw);
|
|
117
175
|
}
|
|
118
176
|
|
|
119
|
-
//
|
|
177
|
+
// 5. Assert Markdown Extraction Parity (Clean View)
|
|
120
178
|
const cleanMdPath = resolve(testDir, "golden_clean.md");
|
|
121
179
|
if (existsSync(cleanMdPath)) {
|
|
122
180
|
const expectedClean = readFileSync(cleanMdPath, "utf-8").replace(
|
package/src/diff.ts
CHANGED
|
@@ -1,6 +1,34 @@
|
|
|
1
1
|
import diff_match_patch from "diff-match-patch";
|
|
2
2
|
import { ModifyText } from "./models.js";
|
|
3
3
|
|
|
4
|
+
function _count_standalone_underscores(s: string): number {
|
|
5
|
+
let count = 0;
|
|
6
|
+
let i = 0;
|
|
7
|
+
const n = s.length;
|
|
8
|
+
const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
|
|
9
|
+
while (i < n) {
|
|
10
|
+
if (s[i] === "_") {
|
|
11
|
+
// Is it part of "__"?
|
|
12
|
+
let is_double = false;
|
|
13
|
+
if ((i > 0 && s[i - 1] === "_") || (i < n - 1 && s[i + 1] === "_")) {
|
|
14
|
+
is_double = true;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Is it intra-word?
|
|
18
|
+
let is_intra = false;
|
|
19
|
+
if (i > 0 && isAlnum(s[i - 1]) && i < n - 1 && isAlnum(s[i + 1])) {
|
|
20
|
+
is_intra = true;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if (!is_double && !is_intra) {
|
|
24
|
+
count++;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
i++;
|
|
28
|
+
}
|
|
29
|
+
return count;
|
|
30
|
+
}
|
|
31
|
+
|
|
4
32
|
export function trim_common_context(
|
|
5
33
|
target: string,
|
|
6
34
|
new_val: string,
|
|
@@ -44,7 +72,7 @@ export function trim_common_context(
|
|
|
44
72
|
const left = target.substring(0, prefix_len);
|
|
45
73
|
const b_count = (left.match(/\*\*/g) || []).length;
|
|
46
74
|
const u2_count = (left.match(/__/g) || []).length;
|
|
47
|
-
const u1_count = (left
|
|
75
|
+
const u1_count = _count_standalone_underscores(left);
|
|
48
76
|
|
|
49
77
|
if (b_count % 2 !== 0) {
|
|
50
78
|
prefix_len = left.lastIndexOf("**");
|
|
@@ -56,14 +84,18 @@ export function trim_common_context(
|
|
|
56
84
|
}
|
|
57
85
|
if (u1_count % 2 !== 0) {
|
|
58
86
|
let idx = left.length - 1;
|
|
87
|
+
const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
|
|
59
88
|
while (idx >= 0) {
|
|
60
89
|
if (
|
|
61
90
|
left[idx] === "_" &&
|
|
62
91
|
(idx === 0 || left[idx - 1] !== "_") &&
|
|
63
92
|
(idx === left.length - 1 || left[idx + 1] !== "_")
|
|
64
93
|
) {
|
|
65
|
-
|
|
66
|
-
|
|
94
|
+
const is_intra = idx > 0 && isAlnum(left[idx - 1]) && idx < left.length - 1 && isAlnum(left[idx + 1]);
|
|
95
|
+
if (!is_intra) {
|
|
96
|
+
prefix_len = idx;
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
67
99
|
}
|
|
68
100
|
idx--;
|
|
69
101
|
}
|
|
@@ -140,7 +172,7 @@ export function trim_common_context(
|
|
|
140
172
|
const right = target.substring(target.length - suffix_len);
|
|
141
173
|
const b_count = (right.match(/\*\*/g) || []).length;
|
|
142
174
|
const u2_count = (right.match(/__/g) || []).length;
|
|
143
|
-
const u1_count = (right
|
|
175
|
+
const u1_count = _count_standalone_underscores(right);
|
|
144
176
|
|
|
145
177
|
if (b_count % 2 !== 0) {
|
|
146
178
|
suffix_len -= right.indexOf("**") + 2;
|
|
@@ -152,14 +184,18 @@ export function trim_common_context(
|
|
|
152
184
|
}
|
|
153
185
|
if (u1_count % 2 !== 0) {
|
|
154
186
|
let idx_in_right = 0;
|
|
187
|
+
const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
|
|
155
188
|
while (idx_in_right < right.length) {
|
|
156
189
|
if (
|
|
157
190
|
right[idx_in_right] === "_" &&
|
|
158
191
|
(idx_in_right === 0 || right[idx_in_right - 1] !== "_") &&
|
|
159
192
|
(idx_in_right === right.length - 1 || right[idx_in_right + 1] !== "_")
|
|
160
193
|
) {
|
|
161
|
-
|
|
162
|
-
|
|
194
|
+
const is_intra = idx_in_right > 0 && isAlnum(right[idx_in_right - 1]) && idx_in_right < right.length - 1 && isAlnum(right[idx_in_right + 1]);
|
|
195
|
+
if (!is_intra) {
|
|
196
|
+
suffix_len -= idx_in_right + 1;
|
|
197
|
+
break;
|
|
198
|
+
}
|
|
163
199
|
}
|
|
164
200
|
idx_in_right++;
|
|
165
201
|
}
|
package/src/docx/dom.ts
CHANGED
|
@@ -49,7 +49,11 @@ export function findAllDescendants(
|
|
|
49
49
|
* Parses raw XML strings into xmldom Documents.
|
|
50
50
|
*/
|
|
51
51
|
export function parseXml(xmlString: string): Document {
|
|
52
|
-
|
|
52
|
+
// Strip UTF-8 BOM if present
|
|
53
|
+
if (xmlString.startsWith("\uFEFF")) {
|
|
54
|
+
xmlString = xmlString.slice(1);
|
|
55
|
+
}
|
|
56
|
+
return new DOMParser().parseFromString(xmlString, "text/xml") as unknown as Document;
|
|
53
57
|
}
|
|
54
58
|
|
|
55
59
|
/**
|
|
@@ -57,7 +61,7 @@ export function parseXml(xmlString: string): Document {
|
|
|
57
61
|
* enforcing deterministic attribute ordering on the root element.
|
|
58
62
|
*/
|
|
59
63
|
export function serializeXml(node: Node): string {
|
|
60
|
-
let xml = new XMLSerializer().serializeToString(node);
|
|
64
|
+
let xml = new XMLSerializer().serializeToString(node as any);
|
|
61
65
|
|
|
62
66
|
// BUG-11: Deterministic namespace ordering on root elements.
|
|
63
67
|
const rootTagRegex = /<([a-zA-Z0-9_:]+)(\s+[^>]+?)(>|\/>)/;
|
package/src/engine.bugs.test.ts
CHANGED
|
@@ -134,6 +134,12 @@ describe("Resolved Bugs Core Engine Verification", () => {
|
|
|
134
134
|
// Direct string equality so Vitest prints the exact diff if they mismatch!
|
|
135
135
|
expect(serialized).toBe(expected);
|
|
136
136
|
});
|
|
137
|
+
|
|
138
|
+
it("BUG-BOM-1: parseXml successfully strips leading UTF-8 BOM (\\uFEFF)", () => {
|
|
139
|
+
const rawXml = `\uFEFF<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"></w:document>`;
|
|
140
|
+
const docXml = parseXml(rawXml);
|
|
141
|
+
expect(docXml.documentElement.tagName).toBe("w:document");
|
|
142
|
+
});
|
|
137
143
|
it("BUG-11b: Sweeps orphaned comment anchors when accepting tracked changes", async () => {
|
|
138
144
|
const doc = await createTestDocument();
|
|
139
145
|
addParagraph(doc, "Confidential Information");
|
|
@@ -235,7 +241,7 @@ describe("Resolved Bugs Core Engine Verification", () => {
|
|
|
235
241
|
"",
|
|
236
242
|
null,
|
|
237
243
|
"123",
|
|
238
|
-
doc.element.ownerDocument
|
|
244
|
+
doc.element.ownerDocument!,
|
|
239
245
|
);
|
|
240
246
|
expect(ins).toBeNull();
|
|
241
247
|
});
|
|
@@ -245,7 +251,7 @@ describe("Resolved Bugs Core Engine Verification", () => {
|
|
|
245
251
|
const p = addParagraph(doc, "Short heading");
|
|
246
252
|
|
|
247
253
|
const fakeCache = {
|
|
248
|
-
|
|
254
|
+
CustomHeading: { name: "Custom Heading", outline_level: 2, bold: true },
|
|
249
255
|
};
|
|
250
256
|
(doc.pkg as any)._adeu_style_cache = [fakeCache, "Normal"];
|
|
251
257
|
|
|
@@ -259,7 +265,7 @@ describe("Resolved Bugs Core Engine Verification", () => {
|
|
|
259
265
|
const buf = await doc.save();
|
|
260
266
|
const body = await extractTextFromBuffer(buf, false);
|
|
261
267
|
const pages = paginate(body, "");
|
|
262
|
-
|
|
268
|
+
|
|
263
269
|
const outlineNodes = extract_outline(
|
|
264
270
|
doc,
|
|
265
271
|
body,
|
|
@@ -295,7 +301,7 @@ describe("Resolved Bugs Core Engine Verification", () => {
|
|
|
295
301
|
const end = xmlDoc.createElement("w:commentRangeEnd");
|
|
296
302
|
end.setAttribute("w:id", c_id);
|
|
297
303
|
p.appendChild(end);
|
|
298
|
-
|
|
304
|
+
|
|
299
305
|
const ref_run = xmlDoc.createElement("w:r");
|
|
300
306
|
const ref = xmlDoc.createElement("w:commentReference");
|
|
301
307
|
ref.setAttribute("w:id", c_id);
|
|
@@ -428,10 +434,18 @@ describe("Resolved Bugs Core Engine Verification", () => {
|
|
|
428
434
|
|
|
429
435
|
// Author B tries to modify Author A's pending insertion
|
|
430
436
|
const engineB = new RedlineEngine(doc, "Author B");
|
|
431
|
-
|
|
437
|
+
|
|
432
438
|
expect(() => {
|
|
433
|
-
engineB.process_batch([
|
|
434
|
-
|
|
439
|
+
engineB.process_batch([
|
|
440
|
+
{
|
|
441
|
+
type: "modify",
|
|
442
|
+
target_text: "Inserted by A.",
|
|
443
|
+
new_text: "Modified by B.",
|
|
444
|
+
},
|
|
445
|
+
]);
|
|
446
|
+
}).toThrowError(
|
|
447
|
+
/Accept that change first or scope your edit outside of it/,
|
|
448
|
+
);
|
|
435
449
|
});
|
|
436
450
|
|
|
437
451
|
it("BUG-CROSS-PARA-1: Cross-paragraph modify coalesces paragraphs and tracks para-mark deletion", async () => {
|
|
@@ -452,7 +466,7 @@ describe("Resolved Bugs Core Engine Verification", () => {
|
|
|
452
466
|
|
|
453
467
|
const buf = await doc.save();
|
|
454
468
|
const cleanText = await extractTextFromBuffer(buf, true);
|
|
455
|
-
|
|
469
|
+
|
|
456
470
|
expect(cleanText).not.toContain("ends here.\n\n");
|
|
457
471
|
expect(cleanText).toContain("Clause 1 ends here. MERGED here.");
|
|
458
472
|
});
|
|
@@ -467,15 +481,54 @@ describe("Resolved Bugs Core Engine Verification", () => {
|
|
|
467
481
|
engine.process_batch([
|
|
468
482
|
{
|
|
469
483
|
type: "modify",
|
|
470
|
-
target_text:
|
|
484
|
+
target_text:
|
|
485
|
+
"ends here.\n\nParagraph 2 is in the middle.\n\nParagraph 3 begins",
|
|
471
486
|
new_text: "ends here. MERGED",
|
|
472
487
|
},
|
|
473
488
|
]);
|
|
474
489
|
|
|
475
490
|
engine.accept_all_revisions();
|
|
476
491
|
const cleanText = await extractTextFromBuffer(await doc.save(), true);
|
|
477
|
-
|
|
492
|
+
|
|
478
493
|
expect(cleanText).not.toContain("Paragraph 2");
|
|
479
494
|
expect(cleanText).toContain("Paragraph 1 ends here. MERGED here.");
|
|
480
495
|
});
|
|
496
|
+
|
|
497
|
+
it("BUG-REPRO: accept_all_revisions leaks comments and in-body comment anchors", async () => {
|
|
498
|
+
const doc = await createTestDocument();
|
|
499
|
+
addParagraph(doc, "This is the original text of the agreement.");
|
|
500
|
+
const engine = new RedlineEngine(doc, "Reviewer AI");
|
|
501
|
+
|
|
502
|
+
// Add a tracked change with a comment attached
|
|
503
|
+
engine.process_batch([
|
|
504
|
+
{
|
|
505
|
+
type: "modify",
|
|
506
|
+
target_text: "original text",
|
|
507
|
+
new_text: "updated text",
|
|
508
|
+
comment: "Should this be updated or kept as original?",
|
|
509
|
+
},
|
|
510
|
+
]);
|
|
511
|
+
|
|
512
|
+
// Pre-condition check: comment parts exist
|
|
513
|
+
const original_comment_parts = doc.pkg.parts.filter(p => p.contentType.includes("comments"));
|
|
514
|
+
expect(original_comment_parts.length).toBeGreaterThan(0);
|
|
515
|
+
|
|
516
|
+
const original_xml = doc.element.toString();
|
|
517
|
+
expect(original_xml).toContain("w:commentRangeStart");
|
|
518
|
+
expect(original_xml).toContain("w:commentReference");
|
|
519
|
+
|
|
520
|
+
// Accept all
|
|
521
|
+
engine.accept_all_revisions();
|
|
522
|
+
|
|
523
|
+
// Verify comment removal
|
|
524
|
+
const final_xml = doc.element.toString();
|
|
525
|
+
|
|
526
|
+
// Assert NO in-body comment anchors survive (anchors must be completely gone)
|
|
527
|
+
expect(final_xml).not.toContain("w:commentRangeStart");
|
|
528
|
+
expect(final_xml).not.toContain("w:commentRangeEnd");
|
|
529
|
+
expect(final_xml).not.toContain("w:commentReference");
|
|
530
|
+
|
|
531
|
+
const final_comment_parts = doc.pkg.parts.filter(p => p.contentType.includes("comments"));
|
|
532
|
+
expect(final_comment_parts.length).toBe(0);
|
|
533
|
+
});
|
|
481
534
|
});
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { createTestDocument, addParagraph } from "./test-utils.js";
|
|
3
|
+
import { RedlineEngine } from "./engine.js";
|
|
4
|
+
import { extractTextFromBuffer } from "./ingest.js";
|
|
5
|
+
|
|
6
|
+
describe("Feedback Layer & Dry Run Verification", () => {
|
|
7
|
+
it("process_batch returns detailed edit reports", async () => {
|
|
8
|
+
const doc = await createTestDocument();
|
|
9
|
+
addParagraph(doc, "The quick brown fox jumps over the lazy dog.");
|
|
10
|
+
const engine = new RedlineEngine(doc, "Reviewer TS");
|
|
11
|
+
|
|
12
|
+
const stats = (engine as any).process_batch([
|
|
13
|
+
{ type: "modify", target_text: "quick brown fox", new_text: "fast red fox" }
|
|
14
|
+
]);
|
|
15
|
+
|
|
16
|
+
expect(stats.edits).toBeDefined();
|
|
17
|
+
expect(stats.edits.length).toBe(1);
|
|
18
|
+
|
|
19
|
+
const report = stats.edits[0];
|
|
20
|
+
expect(report.status).toBe("applied");
|
|
21
|
+
expect(report.target_text).toBe("quick brown fox");
|
|
22
|
+
expect(report.new_text).toBe("fast red fox");
|
|
23
|
+
|
|
24
|
+
// Previews with context window
|
|
25
|
+
expect(report.critic_markup).toContain("{--quick brown--}{++fast red++} fox");
|
|
26
|
+
expect(report.critic_markup).toContain("The ");
|
|
27
|
+
expect(report.critic_markup).toContain(" jumps over");
|
|
28
|
+
|
|
29
|
+
expect(report.clean_text).toContain("The fast red fox jumps over");
|
|
30
|
+
expect(stats.engine).toBe("node");
|
|
31
|
+
expect(stats.version).toBeDefined();
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it("punctuation anchor triggers warning", async () => {
|
|
35
|
+
const doc = await createTestDocument();
|
|
36
|
+
addParagraph(doc, "Refer to sample_term_name in Section 4.");
|
|
37
|
+
const engine = new RedlineEngine(doc, "Reviewer TS");
|
|
38
|
+
|
|
39
|
+
const stats = (engine as any).process_batch([
|
|
40
|
+
{ type: "modify", target_text: "sample_term_name", new_text: "validated_term_name" }
|
|
41
|
+
]);
|
|
42
|
+
|
|
43
|
+
const report = stats.edits[0];
|
|
44
|
+
expect(report.warning).not.toBeNull();
|
|
45
|
+
expect(report.warning.toLowerCase()).toContain("punctuation");
|
|
46
|
+
expect(report.warning).toContain("sample_term_name");
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("dry_run does not mutate and reports safely", async () => {
|
|
50
|
+
const doc = await createTestDocument();
|
|
51
|
+
addParagraph(doc, "Baseline text.");
|
|
52
|
+
const engine = new RedlineEngine(doc, "Reviewer TS");
|
|
53
|
+
|
|
54
|
+
// 1. Valid Dry Run
|
|
55
|
+
const stats = (engine as any).process_batch([
|
|
56
|
+
{ type: "modify", target_text: "Baseline", new_text: "Modified Preview" }
|
|
57
|
+
], true);
|
|
58
|
+
|
|
59
|
+
expect(stats.edits_applied).toBe(1);
|
|
60
|
+
expect(stats.edits[0].status).toBe("applied");
|
|
61
|
+
expect(stats.edits[0].clean_text).toContain("Modified Preview");
|
|
62
|
+
|
|
63
|
+
// Verify original document remains pristine
|
|
64
|
+
const buf = await doc.save();
|
|
65
|
+
const cleanText = await extractTextFromBuffer(buf, true);
|
|
66
|
+
expect(cleanText).not.toContain("Modified Preview");
|
|
67
|
+
expect(cleanText).toContain("Baseline text");
|
|
68
|
+
|
|
69
|
+
// 2. Invalid Dry Run should not throw and instead report the failure safely
|
|
70
|
+
const statsInvalid = (engine as any).process_batch([
|
|
71
|
+
{ type: "modify", target_text: "NON_EXISTENT", new_text: "fail" }
|
|
72
|
+
], true);
|
|
73
|
+
|
|
74
|
+
expect(statsInvalid.edits_skipped).toBe(1);
|
|
75
|
+
expect(statsInvalid.edits[0].status).toBe("failed");
|
|
76
|
+
expect(statsInvalid.edits[0].error).not.toBeNull();
|
|
77
|
+
expect(statsInvalid.edits[0].error.toLowerCase()).toContain("not found");
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it("preview self-consistency on underscore terms", async () => {
|
|
81
|
+
const doc = await createTestDocument();
|
|
82
|
+
addParagraph(doc, "ANCHOR_LINE governs the interpretation of this Agreement.");
|
|
83
|
+
const engine = new RedlineEngine(doc, "Reviewer TS");
|
|
84
|
+
|
|
85
|
+
const stats = (engine as any).process_batch([
|
|
86
|
+
{
|
|
87
|
+
type: "modify",
|
|
88
|
+
target_text: "ANCHOR_LINE governs the interpretation of this Agreement.",
|
|
89
|
+
new_text: "NEW_PARA inserted before.\n\nANCHOR_LINE governs the interpretation of this Agreement.",
|
|
90
|
+
}
|
|
91
|
+
]);
|
|
92
|
+
|
|
93
|
+
const buf = await doc.save();
|
|
94
|
+
const cleanDocText = await extractTextFromBuffer(buf, true);
|
|
95
|
+
|
|
96
|
+
const report = stats.edits[0];
|
|
97
|
+
|
|
98
|
+
expect(report.clean_text).not.toBeNull();
|
|
99
|
+
const cleanPreview = report.clean_text.replace(/^\.+|\.+$/g, "");
|
|
100
|
+
expect(cleanDocText).toContain(cleanPreview);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it("preview does not contain duplicate garbling", async () => {
|
|
104
|
+
const doc = await createTestDocument();
|
|
105
|
+
|
|
106
|
+
addParagraph(doc, "Payment Terms");
|
|
107
|
+
|
|
108
|
+
const xmlDoc = doc.element.ownerDocument!;
|
|
109
|
+
const p2 = xmlDoc.createElement("w:p");
|
|
110
|
+
const del = xmlDoc.createElement("w:del");
|
|
111
|
+
del.setAttribute("w:id", "900");
|
|
112
|
+
del.setAttribute("w:author", "Reviewer");
|
|
113
|
+
del.setAttribute("w:date", "2026-06-01T00:00:00Z");
|
|
114
|
+
const r = xmlDoc.createElement("w:r");
|
|
115
|
+
const t = xmlDoc.createElement("w:delText");
|
|
116
|
+
t.setAttribute("xml:space", "preserve");
|
|
117
|
+
t.textContent = "DUP_PHRASE shall be paid within thirty days of invoice.";
|
|
118
|
+
r.appendChild(t);
|
|
119
|
+
del.appendChild(r);
|
|
120
|
+
p2.appendChild(del);
|
|
121
|
+
const firstP = doc.element.getElementsByTagName("w:p")[0];
|
|
122
|
+
firstP.parentNode!.appendChild(p2);
|
|
123
|
+
|
|
124
|
+
addParagraph(doc, "DUP_PHRASE shall be paid within thirty days of invoice.");
|
|
125
|
+
addParagraph(doc, "Late payments accrue interest at the statutory rate.");
|
|
126
|
+
|
|
127
|
+
const engine = new RedlineEngine(doc, "Reviewer TS");
|
|
128
|
+
const stats = (engine as any).process_batch([
|
|
129
|
+
{
|
|
130
|
+
type: "modify",
|
|
131
|
+
target_text: "DUP_PHRASE shall be paid within thirty days of invoice.",
|
|
132
|
+
new_text: "DUP_PHRASE shall be paid within sixty days of invoice.",
|
|
133
|
+
}
|
|
134
|
+
]);
|
|
135
|
+
|
|
136
|
+
const buf = await doc.save();
|
|
137
|
+
const cleanDocText = await extractTextFromBuffer(buf, true);
|
|
138
|
+
|
|
139
|
+
const report = stats.edits[0];
|
|
140
|
+
expect(report.clean_text).not.toBeNull();
|
|
141
|
+
const cleanPreview = report.clean_text.replace(/^\.+|\.+$/g, "");
|
|
142
|
+
expect(cleanDocText).toContain(cleanPreview);
|
|
143
|
+
});
|
|
144
|
+
});
|