@adeu/core 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adeu/core",
3
- "version": "1.9.0",
3
+ "version": "1.10.0",
4
4
  "description": "",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
package/src/comments.ts CHANGED
@@ -33,27 +33,27 @@ export class CommentsManager {
33
33
 
34
34
  constructor(public doc: DocumentObject) {}
35
35
 
36
- public get commentsPart() {
36
+ public get commentsPart(): Part {
37
37
  if (!this._commentsPart) {
38
38
  this._commentsPart = this._getOrCreateCommentsPart();
39
39
  this._ensureNamespaces();
40
40
  }
41
- return this._commentsPart;
41
+ return this._commentsPart!;
42
42
  }
43
43
 
44
- public get extendedPart() {
44
+ public get extendedPart(): Part {
45
45
  if (!this._extendedPart) this._extendedPart = this._getOrCreateExtendedPart();
46
- return this._extendedPart;
46
+ return this._extendedPart!;
47
47
  }
48
48
 
49
- public get idsPart() {
49
+ public get idsPart(): Part {
50
50
  if (!this._idsPart) this._idsPart = this._getOrCreateIdsPart();
51
- return this._idsPart;
51
+ return this._idsPart!;
52
52
  }
53
53
 
54
- public get extensiblePart() {
54
+ public get extensiblePart(): Part {
55
55
  if (!this._extensiblePart) this._extensiblePart = this._getOrCreateExtensiblePart();
56
- return this._extensiblePart;
56
+ return this._extensiblePart!;
57
57
  }
58
58
 
59
59
  public get nextId(): number {
@@ -124,9 +124,28 @@ export class CommentsManager {
124
124
  }
125
125
 
126
126
  private _ensureNamespaces() {
127
- // In TS we use full xml reconstruction if attributes are missing, but xmldom generally tolerates
128
- // runtime attributes if the namespace is declared. For absolute safety, if it's completely missing,
129
- // we would rebuild. Assuming the parser caught them if they existed.
127
+ // When the comments part already existed (e.g. a legacy or pandoc-produced
128
+ // document) its root <w:comments> may omit the namespaces we rely on
129
+ // most importantly w14, which qualifies the w14:paraId / w14:textId
130
+ // attributes we write on each comment paragraph. Without the declaration
131
+ // the serialised XML is invalid ("Namespace prefix w14 ... is not defined").
132
+ // Declare any missing namespace prefixes on the existing root element.
133
+ const root = this._commentsPart?._element;
134
+ if (!root) return;
135
+
136
+ const required: [string, string][] = [
137
+ ['xmlns:w', NS.w],
138
+ ['xmlns:w14', NS.w14],
139
+ ['xmlns:w15', NS.w15],
140
+ ['xmlns:w16cid', NS.w16cid],
141
+ ['xmlns:w16cex', NS.w16cex],
142
+ ['xmlns:mc', NS.mc],
143
+ ];
144
+ for (const [attr, uri] of required) {
145
+ if (!root.getAttribute(attr)) {
146
+ root.setAttribute(attr, uri);
147
+ }
148
+ }
130
149
  }
131
150
 
132
151
  private _getNextCommentId(): number {
@@ -273,10 +292,10 @@ export class CommentsManager {
273
292
  }
274
293
 
275
294
  public deleteComment(commentId: string) {
276
- if (!this._commentsPart) return;
295
+ if (!this.commentsPart) return;
277
296
 
278
297
  let commentEl: Element | null = null;
279
- for (const c of findAllDescendants(this._commentsPart._element, 'w:comment')) {
298
+ for (const c of findAllDescendants(this.commentsPart._element, 'w:comment')) {
280
299
  if (c.getAttribute('w:id') === commentId) {
281
300
  commentEl = c;
282
301
  break;
@@ -304,7 +323,7 @@ export class CommentsManager {
304
323
  if (child.getAttribute('w15:paraIdParent') === paraId) {
305
324
  const childParaId = child.getAttribute('w15:paraId');
306
325
  if (childParaId) {
307
- for (const c of findAllDescendants(this._commentsPart._element, 'w:comment')) {
326
+ for (const c of findAllDescendants(this.commentsPart._element, 'w:comment')) {
308
327
  for (const p of findAllDescendants(c, 'w:p')) {
309
328
  if (p.getAttribute('w14:paraId') === childParaId) {
310
329
  const cid = c.getAttribute('w:id');
@@ -8,12 +8,13 @@ import {
8
8
  } from "node:fs";
9
9
  import { resolve, dirname } from "node:path";
10
10
  import { fileURLToPath } from "node:url";
11
- import { execSync } from "node:child_process";
11
+ import { execSync, execFileSync } from "node:child_process";
12
12
  import { tmpdir } from "node:os";
13
13
 
14
14
  import { DocumentObject } from "./docx/bridge.js";
15
15
  import { RedlineEngine } from "./engine.js";
16
16
  import { extractTextFromBuffer } from "./ingest.js";
17
+ import { serializeXml } from "./docx/dom.js";
17
18
 
18
19
  const __filename = fileURLToPath(import.meta.url);
19
20
  const __dirname = dirname(__filename);
@@ -28,10 +29,62 @@ const PYTHON_ABSTRACT_CMD = resolve(
28
29
  );
29
30
  const PYTHON_DIR = resolve(__dirname, "../../../../python");
30
31
 
32
+ const CT_COMMENTS =
33
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml";
34
+
31
35
  function normalizeMdTimestamps(mdText: string): string {
32
36
  return mdText.replace(/@ \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z/g, "@ DATE");
33
37
  }
34
38
 
39
+ function xmllintCheck(xmlContent: string, label: string): void {
40
+ // Cross-platform lookup: `which` on POSIX, `where` on Windows.
41
+ const locator = process.platform === "win32" ? "where" : "which";
42
+ let xmllintBin: string | null = null;
43
+ try {
44
+ xmllintBin =
45
+ execSync(`${locator} xmllint`, { encoding: "utf-8" })
46
+ .split(/\r?\n/)
47
+ .map((l) => l.trim())
48
+ .filter(Boolean)[0] || null;
49
+ } catch {
50
+ /* not found */
51
+ }
52
+ if (!xmllintBin) {
53
+ // Optional external XML validation: skip when xmllint is unavailable
54
+ // (common on Windows). The in-code namespace assertion still runs.
55
+ return;
56
+ }
57
+ const tmpFile = resolve(tmpdir(), `adeu_consistency_${Date.now()}_${label}`);
58
+ try {
59
+ writeFileSync(tmpFile, xmlContent, "utf-8");
60
+ execFileSync(xmllintBin, ["--noout", tmpFile]);
61
+ } catch (err: any) {
62
+ throw new Error(
63
+ `xmllint validation failed for ${label}:\n${err.stderr ?? err.message}`,
64
+ );
65
+ } finally {
66
+ if (existsSync(tmpFile)) unlinkSync(tmpFile);
67
+ }
68
+ }
69
+
70
+ async function validateCommentsXmlNamespaces(
71
+ outBuffer: Buffer,
72
+ folder: string,
73
+ ): Promise<void> {
74
+ const doc = await DocumentObject.load(outBuffer);
75
+ const commentsPart = doc.pkg.parts.find((p) => p.contentType === CT_COMMENTS);
76
+ if (!commentsPart) return;
77
+
78
+ const commentsXml = serializeXml(
79
+ commentsPart._element.ownerDocument ?? commentsPart._element,
80
+ );
81
+
82
+ expect(commentsXml).toContain(
83
+ 'xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"',
84
+ );
85
+ xmllintCheck(commentsXml, `${folder}_comments.xml`);
86
+ }
87
+
35
88
  describe("Polyglot Consistency Framework (TS vs Python)", () => {
36
89
  if (!existsSync(CORPUS_DIR)) {
37
90
  it.skip("Cross-platform test corpus not found", () => {});
@@ -72,7 +125,12 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
72
125
  engine.process_batch(testConfig.changes || []);
73
126
  outBuffer = await doc.save();
74
127
 
75
- // 2. Assert XML Structure Parity (via Python Bridge)
128
+ // 2. Validate comments XML namespaces when requested by test.json
129
+ if (testConfig.validate_comments_xml_namespaces) {
130
+ await validateCommentsXmlNamespaces(outBuffer, folder);
131
+ }
132
+
133
+ // 3. Assert XML Structure Parity (via Python Bridge)
76
134
  const goldenXmlPath = resolve(testDir, "golden_abstract.xml");
77
135
  if (existsSync(goldenXmlPath)) {
78
136
  const expectedXml = readFileSync(goldenXmlPath, "utf-8");
@@ -103,7 +161,7 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
103
161
  }
104
162
  }
105
163
 
106
- // 3. Assert Markdown Extraction Parity (Raw View)
164
+ // 4. Assert Markdown Extraction Parity (Raw View)
107
165
  const rawMdPath = resolve(testDir, "golden_raw.md");
108
166
  if (existsSync(rawMdPath)) {
109
167
  const expectedRaw = readFileSync(rawMdPath, "utf-8").replace(
@@ -116,7 +174,7 @@ describe("Polyglot Consistency Framework (TS vs Python)", () => {
116
174
  expect(actualRaw).toBe(expectedRaw);
117
175
  }
118
176
 
119
- // 4. Assert Markdown Extraction Parity (Clean View)
177
+ // 5. Assert Markdown Extraction Parity (Clean View)
120
178
  const cleanMdPath = resolve(testDir, "golden_clean.md");
121
179
  if (existsSync(cleanMdPath)) {
122
180
  const expectedClean = readFileSync(cleanMdPath, "utf-8").replace(
package/src/diff.ts CHANGED
@@ -1,6 +1,34 @@
1
1
  import diff_match_patch from "diff-match-patch";
2
2
  import { ModifyText } from "./models.js";
3
3
 
4
+ function _count_standalone_underscores(s: string): number {
5
+ let count = 0;
6
+ let i = 0;
7
+ const n = s.length;
8
+ const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
9
+ while (i < n) {
10
+ if (s[i] === "_") {
11
+ // Is it part of "__"?
12
+ let is_double = false;
13
+ if ((i > 0 && s[i - 1] === "_") || (i < n - 1 && s[i + 1] === "_")) {
14
+ is_double = true;
15
+ }
16
+
17
+ // Is it intra-word?
18
+ let is_intra = false;
19
+ if (i > 0 && isAlnum(s[i - 1]) && i < n - 1 && isAlnum(s[i + 1])) {
20
+ is_intra = true;
21
+ }
22
+
23
+ if (!is_double && !is_intra) {
24
+ count++;
25
+ }
26
+ }
27
+ i++;
28
+ }
29
+ return count;
30
+ }
31
+
4
32
  export function trim_common_context(
5
33
  target: string,
6
34
  new_val: string,
@@ -44,7 +72,7 @@ export function trim_common_context(
44
72
  const left = target.substring(0, prefix_len);
45
73
  const b_count = (left.match(/\*\*/g) || []).length;
46
74
  const u2_count = (left.match(/__/g) || []).length;
47
- const u1_count = (left.replace(/__/g, "").match(/_/g) || []).length;
75
+ const u1_count = _count_standalone_underscores(left);
48
76
 
49
77
  if (b_count % 2 !== 0) {
50
78
  prefix_len = left.lastIndexOf("**");
@@ -56,14 +84,18 @@ export function trim_common_context(
56
84
  }
57
85
  if (u1_count % 2 !== 0) {
58
86
  let idx = left.length - 1;
87
+ const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
59
88
  while (idx >= 0) {
60
89
  if (
61
90
  left[idx] === "_" &&
62
91
  (idx === 0 || left[idx - 1] !== "_") &&
63
92
  (idx === left.length - 1 || left[idx + 1] !== "_")
64
93
  ) {
65
- prefix_len = idx;
66
- break;
94
+ const is_intra = idx > 0 && isAlnum(left[idx - 1]) && idx < left.length - 1 && isAlnum(left[idx + 1]);
95
+ if (!is_intra) {
96
+ prefix_len = idx;
97
+ break;
98
+ }
67
99
  }
68
100
  idx--;
69
101
  }
@@ -140,7 +172,7 @@ export function trim_common_context(
140
172
  const right = target.substring(target.length - suffix_len);
141
173
  const b_count = (right.match(/\*\*/g) || []).length;
142
174
  const u2_count = (right.match(/__/g) || []).length;
143
- const u1_count = (right.replace(/__/g, "").match(/_/g) || []).length;
175
+ const u1_count = _count_standalone_underscores(right);
144
176
 
145
177
  if (b_count % 2 !== 0) {
146
178
  suffix_len -= right.indexOf("**") + 2;
@@ -152,14 +184,18 @@ export function trim_common_context(
152
184
  }
153
185
  if (u1_count % 2 !== 0) {
154
186
  let idx_in_right = 0;
187
+ const isAlnum = (char: string) => /[a-zA-Z0-9]/.test(char);
155
188
  while (idx_in_right < right.length) {
156
189
  if (
157
190
  right[idx_in_right] === "_" &&
158
191
  (idx_in_right === 0 || right[idx_in_right - 1] !== "_") &&
159
192
  (idx_in_right === right.length - 1 || right[idx_in_right + 1] !== "_")
160
193
  ) {
161
- suffix_len -= idx_in_right + 1;
162
- break;
194
+ const is_intra = idx_in_right > 0 && isAlnum(right[idx_in_right - 1]) && idx_in_right < right.length - 1 && isAlnum(right[idx_in_right + 1]);
195
+ if (!is_intra) {
196
+ suffix_len -= idx_in_right + 1;
197
+ break;
198
+ }
163
199
  }
164
200
  idx_in_right++;
165
201
  }
package/src/docx/dom.ts CHANGED
@@ -53,7 +53,7 @@ export function parseXml(xmlString: string): Document {
53
53
  if (xmlString.startsWith("\uFEFF")) {
54
54
  xmlString = xmlString.slice(1);
55
55
  }
56
- return new DOMParser().parseFromString(xmlString, "text/xml");
56
+ return new DOMParser().parseFromString(xmlString, "text/xml") as unknown as Document;
57
57
  }
58
58
 
59
59
  /**
@@ -61,7 +61,7 @@ export function parseXml(xmlString: string): Document {
61
61
  * enforcing deterministic attribute ordering on the root element.
62
62
  */
63
63
  export function serializeXml(node: Node): string {
64
- let xml = new XMLSerializer().serializeToString(node);
64
+ let xml = new XMLSerializer().serializeToString(node as any);
65
65
 
66
66
  // BUG-11: Deterministic namespace ordering on root elements.
67
67
  const rootTagRegex = /<([a-zA-Z0-9_:]+)(\s+[^>]+?)(>|\/>)/;
@@ -493,4 +493,42 @@ describe("Resolved Bugs Core Engine Verification", () => {
493
493
  expect(cleanText).not.toContain("Paragraph 2");
494
494
  expect(cleanText).toContain("Paragraph 1 ends here. MERGED here.");
495
495
  });
496
+
497
+ it("BUG-REPRO: accept_all_revisions leaks comments and in-body comment anchors", async () => {
498
+ const doc = await createTestDocument();
499
+ addParagraph(doc, "This is the original text of the agreement.");
500
+ const engine = new RedlineEngine(doc, "Reviewer AI");
501
+
502
+ // Add a tracked change with a comment attached
503
+ engine.process_batch([
504
+ {
505
+ type: "modify",
506
+ target_text: "original text",
507
+ new_text: "updated text",
508
+ comment: "Should this be updated or kept as original?",
509
+ },
510
+ ]);
511
+
512
+ // Pre-condition check: comment parts exist
513
+ const original_comment_parts = doc.pkg.parts.filter(p => p.contentType.includes("comments"));
514
+ expect(original_comment_parts.length).toBeGreaterThan(0);
515
+
516
+ const original_xml = doc.element.toString();
517
+ expect(original_xml).toContain("w:commentRangeStart");
518
+ expect(original_xml).toContain("w:commentReference");
519
+
520
+ // Accept all
521
+ engine.accept_all_revisions();
522
+
523
+ // Verify comment removal
524
+ const final_xml = doc.element.toString();
525
+
526
+ // Assert NO in-body comment anchors survive (anchors must be completely gone)
527
+ expect(final_xml).not.toContain("w:commentRangeStart");
528
+ expect(final_xml).not.toContain("w:commentRangeEnd");
529
+ expect(final_xml).not.toContain("w:commentReference");
530
+
531
+ const final_comment_parts = doc.pkg.parts.filter(p => p.contentType.includes("comments"));
532
+ expect(final_comment_parts.length).toBe(0);
533
+ });
496
534
  });
@@ -0,0 +1,144 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { createTestDocument, addParagraph } from "./test-utils.js";
3
+ import { RedlineEngine } from "./engine.js";
4
+ import { extractTextFromBuffer } from "./ingest.js";
5
+
6
+ describe("Feedback Layer & Dry Run Verification", () => {
7
+ it("process_batch returns detailed edit reports", async () => {
8
+ const doc = await createTestDocument();
9
+ addParagraph(doc, "The quick brown fox jumps over the lazy dog.");
10
+ const engine = new RedlineEngine(doc, "Reviewer TS");
11
+
12
+ const stats = (engine as any).process_batch([
13
+ { type: "modify", target_text: "quick brown fox", new_text: "fast red fox" }
14
+ ]);
15
+
16
+ expect(stats.edits).toBeDefined();
17
+ expect(stats.edits.length).toBe(1);
18
+
19
+ const report = stats.edits[0];
20
+ expect(report.status).toBe("applied");
21
+ expect(report.target_text).toBe("quick brown fox");
22
+ expect(report.new_text).toBe("fast red fox");
23
+
24
+ // Previews with context window
25
+ expect(report.critic_markup).toContain("{--quick brown--}{++fast red++} fox");
26
+ expect(report.critic_markup).toContain("The ");
27
+ expect(report.critic_markup).toContain(" jumps over");
28
+
29
+ expect(report.clean_text).toContain("The fast red fox jumps over");
30
+ expect(stats.engine).toBe("node");
31
+ expect(stats.version).toBeDefined();
32
+ });
33
+
34
+ it("punctuation anchor triggers warning", async () => {
35
+ const doc = await createTestDocument();
36
+ addParagraph(doc, "Refer to sample_term_name in Section 4.");
37
+ const engine = new RedlineEngine(doc, "Reviewer TS");
38
+
39
+ const stats = (engine as any).process_batch([
40
+ { type: "modify", target_text: "sample_term_name", new_text: "validated_term_name" }
41
+ ]);
42
+
43
+ const report = stats.edits[0];
44
+ expect(report.warning).not.toBeNull();
45
+ expect(report.warning.toLowerCase()).toContain("punctuation");
46
+ expect(report.warning).toContain("sample_term_name");
47
+ });
48
+
49
+ it("dry_run does not mutate and reports safely", async () => {
50
+ const doc = await createTestDocument();
51
+ addParagraph(doc, "Baseline text.");
52
+ const engine = new RedlineEngine(doc, "Reviewer TS");
53
+
54
+ // 1. Valid Dry Run
55
+ const stats = (engine as any).process_batch([
56
+ { type: "modify", target_text: "Baseline", new_text: "Modified Preview" }
57
+ ], true);
58
+
59
+ expect(stats.edits_applied).toBe(1);
60
+ expect(stats.edits[0].status).toBe("applied");
61
+ expect(stats.edits[0].clean_text).toContain("Modified Preview");
62
+
63
+ // Verify original document remains pristine
64
+ const buf = await doc.save();
65
+ const cleanText = await extractTextFromBuffer(buf, true);
66
+ expect(cleanText).not.toContain("Modified Preview");
67
+ expect(cleanText).toContain("Baseline text");
68
+
69
+ // 2. Invalid Dry Run should not throw and instead report the failure safely
70
+ const statsInvalid = (engine as any).process_batch([
71
+ { type: "modify", target_text: "NON_EXISTENT", new_text: "fail" }
72
+ ], true);
73
+
74
+ expect(statsInvalid.edits_skipped).toBe(1);
75
+ expect(statsInvalid.edits[0].status).toBe("failed");
76
+ expect(statsInvalid.edits[0].error).not.toBeNull();
77
+ expect(statsInvalid.edits[0].error.toLowerCase()).toContain("not found");
78
+ });
79
+
80
+ it("preview self-consistency on underscore terms", async () => {
81
+ const doc = await createTestDocument();
82
+ addParagraph(doc, "ANCHOR_LINE governs the interpretation of this Agreement.");
83
+ const engine = new RedlineEngine(doc, "Reviewer TS");
84
+
85
+ const stats = (engine as any).process_batch([
86
+ {
87
+ type: "modify",
88
+ target_text: "ANCHOR_LINE governs the interpretation of this Agreement.",
89
+ new_text: "NEW_PARA inserted before.\n\nANCHOR_LINE governs the interpretation of this Agreement.",
90
+ }
91
+ ]);
92
+
93
+ const buf = await doc.save();
94
+ const cleanDocText = await extractTextFromBuffer(buf, true);
95
+
96
+ const report = stats.edits[0];
97
+
98
+ expect(report.clean_text).not.toBeNull();
99
+ const cleanPreview = report.clean_text.replace(/^\.+|\.+$/g, "");
100
+ expect(cleanDocText).toContain(cleanPreview);
101
+ });
102
+
103
+ it("preview does not contain duplicate garbling", async () => {
104
+ const doc = await createTestDocument();
105
+
106
+ addParagraph(doc, "Payment Terms");
107
+
108
+ const xmlDoc = doc.element.ownerDocument!;
109
+ const p2 = xmlDoc.createElement("w:p");
110
+ const del = xmlDoc.createElement("w:del");
111
+ del.setAttribute("w:id", "900");
112
+ del.setAttribute("w:author", "Reviewer");
113
+ del.setAttribute("w:date", "2026-06-01T00:00:00Z");
114
+ const r = xmlDoc.createElement("w:r");
115
+ const t = xmlDoc.createElement("w:delText");
116
+ t.setAttribute("xml:space", "preserve");
117
+ t.textContent = "DUP_PHRASE shall be paid within thirty days of invoice.";
118
+ r.appendChild(t);
119
+ del.appendChild(r);
120
+ p2.appendChild(del);
121
+ const firstP = doc.element.getElementsByTagName("w:p")[0];
122
+ firstP.parentNode!.appendChild(p2);
123
+
124
+ addParagraph(doc, "DUP_PHRASE shall be paid within thirty days of invoice.");
125
+ addParagraph(doc, "Late payments accrue interest at the statutory rate.");
126
+
127
+ const engine = new RedlineEngine(doc, "Reviewer TS");
128
+ const stats = (engine as any).process_batch([
129
+ {
130
+ type: "modify",
131
+ target_text: "DUP_PHRASE shall be paid within thirty days of invoice.",
132
+ new_text: "DUP_PHRASE shall be paid within sixty days of invoice.",
133
+ }
134
+ ]);
135
+
136
+ const buf = await doc.save();
137
+ const cleanDocText = await extractTextFromBuffer(buf, true);
138
+
139
+ const report = stats.edits[0];
140
+ expect(report.clean_text).not.toBeNull();
141
+ const cleanPreview = report.clean_text.replace(/^\.+|\.+$/g, "");
142
+ expect(cleanDocText).toContain(cleanPreview);
143
+ });
144
+ });