@adeu/core 1.6.7 → 1.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,40 +1,54 @@
1
- import JSZip from 'jszip';
2
- import { parseXml, findChild, findAllDescendants, serializeXml } from './dom.js';
1
+ import JSZip from "jszip";
2
+ import {
3
+ parseXml,
4
+ findChild,
5
+ findAllDescendants,
6
+ serializeXml,
7
+ } from "./dom.js";
3
8
 
4
9
  export class Relationship {
5
10
  constructor(
6
11
  public id: string,
7
12
  public type: string,
8
13
  public target: string,
9
- public isExternal: boolean
14
+ public isExternal: boolean,
10
15
  ) {}
11
16
  }
12
17
 
13
18
  export class Part {
14
19
  public rels: Map<string, Relationship> = new Map();
15
20
  public _element: Element;
16
-
21
+ public package?: DocxPackage;
17
22
  constructor(
18
23
  public partname: string,
19
24
  public blob: string,
20
25
  element: Element,
21
- public contentType: string
26
+ public contentType: string,
22
27
  ) {
23
28
  this._element = element;
24
29
  }
25
30
 
26
- public addRelationship(id: string, type: string, target: string, isExternal: boolean = false) {
31
+ public addRelationship(
32
+ id: string,
33
+ type: string,
34
+ target: string,
35
+ isExternal: boolean = false,
36
+ ) {
27
37
  this.rels.set(id, new Relationship(id, type, target, isExternal));
28
-
29
- // If this part represents a .rels file, update the XML directly
30
- if (this._element.tagName === 'Relationships') {
38
+
39
+ // Directly append the relationship element to the document structure
40
+ if (this.partname.endsWith(".rels")) {
31
41
  const doc = this._element.ownerDocument;
32
42
  if (doc) {
33
- const relEl = doc.createElement('Relationship');
34
- relEl.setAttribute('Id', id);
35
- relEl.setAttribute('Type', type);
36
- relEl.setAttribute('Target', target);
37
- if (isExternal) relEl.setAttribute('TargetMode', 'External');
43
+ // Use strict namespace to ensure it parses successfully on reload
44
+ const relEl = doc.createElementNS(
45
+ "http://schemas.openxmlformats.org/package/2006/relationships",
46
+ "Relationship",
47
+ );
48
+ relEl.setAttribute("Id", id);
49
+ relEl.setAttribute("Type", type);
50
+ relEl.setAttribute("Target", target);
51
+ if (isExternal) relEl.setAttribute("TargetMode", "External");
38
52
  this._element.appendChild(relEl);
39
53
  }
40
54
  }
@@ -49,32 +63,44 @@ export class DocxPackage {
49
63
 
50
64
  public getPartByPath(path: string): Part | undefined {
51
65
  // Strip leading slash for jszip compat
52
- const searchPath = path.startsWith('/') ? path.substring(1) : path;
53
- return this.parts.find((p) => p.partname === searchPath || p.partname === '/' + searchPath);
66
+ const searchPath = path.startsWith("/") ? path.substring(1) : path;
67
+ return this.parts.find(
68
+ (p) => p.partname === searchPath || p.partname === "/" + searchPath,
69
+ );
54
70
  }
55
71
 
56
72
  public nextPartname(pattern: string): string {
57
73
  let i = 1;
58
74
  while (true) {
59
- const candidate = pattern.replace('%d', i === 1 ? '' : i.toString());
75
+ const candidate = pattern.replace("%d", i === 1 ? "" : i.toString());
60
76
  if (!this.getPartByPath(candidate)) return candidate;
61
77
  i++;
62
78
  }
63
79
  }
64
80
 
65
- public addPart(partname: string, contentType: string, xmlString: string): Part {
81
+ public addPart(
82
+ partname: string,
83
+ contentType: string,
84
+ xmlString: string,
85
+ ): Part {
66
86
  const doc = parseXml(xmlString);
67
- const part = new Part(partname, xmlString, doc.documentElement, contentType);
87
+ const part = new Part(
88
+ partname,
89
+ xmlString,
90
+ doc.documentElement,
91
+ contentType,
92
+ );
93
+ part.package = this;
68
94
  this.parts.push(part);
69
95
 
70
96
  // Update [Content_Types].xml
71
- const ctPart = this.getPartByPath('[Content_Types].xml');
97
+ const ctPart = this.getPartByPath("[Content_Types].xml");
72
98
  if (ctPart) {
73
99
  const docCT = ctPart._element.ownerDocument;
74
100
  if (docCT) {
75
- const override = docCT.createElement('Override');
76
- override.setAttribute('PartName', partname);
77
- override.setAttribute('ContentType', contentType);
101
+ const override = docCT.createElement("Override");
102
+ override.setAttribute("PartName", partname);
103
+ override.setAttribute("ContentType", contentType);
78
104
  ctPart._element.appendChild(override);
79
105
  }
80
106
  }
@@ -83,14 +109,18 @@ export class DocxPackage {
83
109
 
84
110
  public getOrCreateRelsPart(sourcePartname: string): Part {
85
111
  // e.g., /word/document.xml -> /word/_rels/document.xml.rels
86
- const parts = sourcePartname.split('/');
112
+ const parts = sourcePartname.split("/");
87
113
  const file = parts.pop();
88
- const relsPath = parts.join('/') + '/_rels/' + file + '.rels';
89
-
114
+ const relsPath = parts.join("/") + "/_rels/" + file + ".rels";
115
+
90
116
  let relsPart = this.getPartByPath(relsPath);
91
117
  if (!relsPart) {
92
118
  const xml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"></Relationships>`;
93
- relsPart = this.addPart(relsPath, 'application/vnd.openxmlformats-package.relationships+xml', xml);
119
+ relsPart = this.addPart(
120
+ relsPath,
121
+ "application/vnd.openxmlformats-package.relationships+xml",
122
+ xml,
123
+ );
94
124
  }
95
125
  return relsPart;
96
126
  }
@@ -98,63 +128,75 @@ export class DocxPackage {
98
128
 
99
129
  export class DocumentObject {
100
130
  public part: Part;
101
- public settings: { oddAndEvenPagesHeaderFooter: boolean } = { oddAndEvenPagesHeaderFooter: false };
131
+ public settings: { oddAndEvenPagesHeaderFooter: boolean } = {
132
+ oddAndEvenPagesHeaderFooter: false,
133
+ };
102
134
  // Simplification for the TS port: sections hold header/footer refs
103
- public sections: any[] = [];
135
+ public sections: any[] = [];
104
136
 
105
- constructor(public pkg: DocxPackage, part: Part) {
137
+ constructor(
138
+ public pkg: DocxPackage,
139
+ part: Part,
140
+ ) {
106
141
  this.part = part;
107
142
  }
108
143
 
109
144
  public get element(): Element {
110
- return findChild(this.part._element, 'w:body') || this.part._element;
145
+ return findChild(this.part._element, "w:body") || this.part._element;
111
146
  }
112
147
 
113
148
  /**
114
149
  * Main entrypoint for loading a DOCX buffer into the DOM wrapper.
115
150
  */
116
- public static async load(buffer: Buffer | ArrayBuffer): Promise<DocumentObject> {
151
+ public static async load(
152
+ buffer: Buffer | ArrayBuffer,
153
+ ): Promise<DocumentObject> {
117
154
  const zip = await JSZip.loadAsync(buffer);
118
155
  const pkg = new DocxPackage(zip);
119
156
 
120
157
  // 1. Load Content Types
121
- const ctFile = zip.file('[Content_Types].xml');
158
+ const ctFile = zip.file("[Content_Types].xml");
122
159
  let contentTypes: Record<string, string> = {};
123
160
  if (ctFile) {
124
- const ctXml = parseXml(await ctFile.async('text'));
125
- const overrides = findAllDescendants(ctXml.documentElement, 'Override');
161
+ const ctXml = parseXml(await ctFile.async("text"));
162
+ const overrides = findAllDescendants(ctXml.documentElement, "Override");
126
163
  for (const override of overrides) {
127
- contentTypes[override.getAttribute('PartName') || ''] = override.getAttribute('ContentType') || '';
164
+ contentTypes[override.getAttribute("PartName") || ""] =
165
+ override.getAttribute("ContentType") || "";
128
166
  }
129
167
  }
130
168
 
131
169
  // 2. Pre-load all XML parts to allow synchronous traversal later
132
170
  for (const [path, file] of Object.entries(zip.files)) {
133
- if (!file.dir && (path.endsWith('.xml') || path.endsWith('.rels'))) {
134
- const text = await file.async('text');
171
+ if (!file.dir && (path.endsWith(".xml") || path.endsWith(".rels"))) {
172
+ const text = await file.async("text");
135
173
  const doc = parseXml(text);
136
- const cType = contentTypes['/' + path] || 'application/xml';
137
- const part = new Part('/' + path, text, doc.documentElement, cType);
174
+ const cType = contentTypes["/" + path] || "application/xml";
175
+ const part = new Part("/" + path, text, doc.documentElement, cType);
176
+ part.package = pkg;
138
177
  pkg.parts.push(part);
139
178
  }
140
179
  }
141
180
 
142
181
  // 3. Resolve Relationships for the main document
143
- const mainPart = pkg.getPartByPath('word/document.xml');
144
- if (!mainPart) throw new Error('Invalid DOCX: Missing word/document.xml');
182
+ const mainPart = pkg.getPartByPath("word/document.xml");
183
+ if (!mainPart) throw new Error("Invalid DOCX: Missing word/document.xml");
145
184
  pkg.mainDocumentPart = mainPart;
146
185
 
147
- const relsPart = pkg.getPartByPath('word/_rels/document.xml.rels');
186
+ const relsPart = pkg.getPartByPath("word/_rels/document.xml.rels");
148
187
  if (relsPart) {
149
- const relElements = findAllDescendants(relsPart._element, 'Relationship');
188
+ const relElements = findAllDescendants(relsPart._element, "Relationship");
150
189
  for (const rel of relElements) {
151
- const rId = rel.getAttribute('Id');
152
- const target = rel.getAttribute('Target');
153
- const type = rel.getAttribute('Type');
154
- const targetMode = rel.getAttribute('TargetMode');
155
-
190
+ const rId = rel.getAttribute("Id");
191
+ const target = rel.getAttribute("Target");
192
+ const type = rel.getAttribute("Type");
193
+ const targetMode = rel.getAttribute("TargetMode");
194
+
156
195
  if (rId && target && type) {
157
- mainPart.rels.set(rId, new Relationship(rId, type, target, targetMode === 'External'));
196
+ mainPart.rels.set(
197
+ rId,
198
+ new Relationship(rId, type, target, targetMode === "External"),
199
+ );
158
200
  }
159
201
  }
160
202
  }
@@ -166,24 +208,36 @@ export class DocumentObject {
166
208
  let rId = 1;
167
209
  while (this.part.rels.has(`rId${rId}`)) rId++;
168
210
  const id = `rId${rId}`;
169
-
211
+
170
212
  // In DOCX, targets in .rels are relative to the source part's directory.
171
213
  // /word/document.xml relating to /word/comments.xml -> target is "comments.xml"
172
- const target = part.partname.split('/').pop()!;
173
-
214
+ const target = part.partname.split("/").pop()!;
215
+
174
216
  this.part.rels.set(id, new Relationship(id, relType, target, false));
175
217
  const relsPart = this.pkg.getOrCreateRelsPart(this.part.partname);
176
218
  relsPart.addRelationship(id, relType, target, false);
177
219
  }
178
220
 
221
+ public relateToExternal(target: string, relType: string): string {
222
+ let rId = 1;
223
+ while (this.part.rels.has(`rId${rId}`)) rId++;
224
+ const id = `rId${rId}`;
225
+
226
+ this.part.rels.set(id, new Relationship(id, relType, target, true));
227
+ const relsPart = this.pkg.getOrCreateRelsPart(this.part.partname);
228
+ relsPart.addRelationship(id, relType, target, true);
229
+ return id;
230
+ }
231
+
179
232
  public async save(): Promise<Buffer> {
180
233
  for (const part of this.pkg.parts) {
181
234
  let xmlStr = serializeXml(part._element.ownerDocument || part._element);
182
- if (!xmlStr.startsWith('<?xml')) {
183
- xmlStr = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n' + xmlStr;
235
+ if (!xmlStr.startsWith("<?xml")) {
236
+ xmlStr =
237
+ '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n' + xmlStr;
184
238
  }
185
239
  this.pkg.zip.file(part.partname.substring(1), xmlStr); // Strip leading slash for JSZip
186
240
  }
187
- return this.pkg.zip.generateAsync({ type: 'nodebuffer' });
241
+ return this.pkg.zip.generateAsync({ type: "nodebuffer" });
188
242
  }
189
- }
243
+ }
package/src/docx/dom.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { DOMParser, XMLSerializer } from '@xmldom/xmldom';
1
+ import { DOMParser, XMLSerializer } from "@xmldom/xmldom";
2
2
 
3
3
  /**
4
4
  * Simulates docx.oxml.ns.qn. In xmldom, namespaces are preserved in tagName.
@@ -11,7 +11,10 @@ export const qn = (name: string) => name;
11
11
  export function findChild(element: Element, tagName: string): Element | null {
12
12
  for (let i = 0; i < element.childNodes.length; i++) {
13
13
  const child = element.childNodes[i];
14
- if (child.nodeType === 1 /* ELEMENT_NODE */ && (child as Element).tagName === tagName) {
14
+ if (
15
+ child.nodeType === 1 /* ELEMENT_NODE */ &&
16
+ (child as Element).tagName === tagName
17
+ ) {
15
18
  return child as Element;
16
19
  }
17
20
  }
@@ -35,7 +38,10 @@ export function findChildren(element: Element, tagName: string): Element[] {
35
38
  /**
36
39
  * Simulates lxml element.findall(".//w:tag") - searches ALL descendants.
37
40
  */
38
- export function findAllDescendants(element: Element, tagName: string): Element[] {
41
+ export function findAllDescendants(
42
+ element: Element,
43
+ tagName: string,
44
+ ): Element[] {
39
45
  return Array.from(element.getElementsByTagName(tagName));
40
46
  }
41
47
 
@@ -43,12 +49,65 @@ export function findAllDescendants(element: Element, tagName: string): Element[]
43
49
  * Parses raw XML strings into xmldom Documents.
44
50
  */
45
51
  export function parseXml(xmlString: string): Document {
46
- return new DOMParser().parseFromString(xmlString, 'text/xml');
52
+ return new DOMParser().parseFromString(xmlString, "text/xml");
47
53
  }
48
54
 
49
55
  /**
50
- * Serializes an xmldom Document or Element back to a string.
56
+ * Serializes an xmldom Document or Element back to a string,
57
+ * enforcing deterministic attribute ordering on the root element.
51
58
  */
52
59
  export function serializeXml(node: Node): string {
53
- return new XMLSerializer().serializeToString(node);
54
- }
60
+ let xml = new XMLSerializer().serializeToString(node);
61
+
62
+ // BUG-11: Deterministic namespace ordering on root elements.
63
+ const rootTagRegex = /<([a-zA-Z0-9_:]+)(\s+[^>]+?)(>|\/>)/;
64
+ const match = rootTagRegex.exec(xml);
65
+
66
+ if (match && !match[1].startsWith("?")) {
67
+ const index = match.index;
68
+ const textBefore = xml.substring(0, index);
69
+
70
+ // Ensure this is the absolute root tag (only <?xml...?> allowed before it)
71
+ const isRoot =
72
+ !textBefore.includes("<") ||
73
+ (textBefore.trim().startsWith("<?xml") &&
74
+ (textBefore.match(/</g) || []).length === 1);
75
+
76
+ if (isRoot) {
77
+ const fullTag = match[0];
78
+ const elemStart = `<${match[1]}`;
79
+ const attrsStr = match[2];
80
+ const tagEnd = match[3];
81
+
82
+ // Robust extraction matching any quote style and internal spacing
83
+ const attrRegex = /([a-zA-Z0-9_:]+)\s*=\s*(["'])(.*?)\2/g;
84
+ const attrs: string[] = [];
85
+ let m;
86
+ while ((m = attrRegex.exec(attrsStr)) !== null) {
87
+ attrs.push(m[0].trim());
88
+ }
89
+
90
+ // Sort attributes: xmlns definitions first, then standard attributes
91
+ attrs.sort((a, b) => {
92
+ const aName = a.split("=")[0].trim();
93
+ const bName = b.split("=")[0].trim();
94
+ const aIsXmlns = aName.startsWith("xmlns");
95
+ const bIsXmlns = bName.startsWith("xmlns");
96
+ if (aIsXmlns && !bIsXmlns) return -1;
97
+ if (!aIsXmlns && bIsXmlns) return 1;
98
+ return aName < bName ? -1 : aName > bName ? 1 : 0;
99
+ });
100
+
101
+ const newTag =
102
+ attrs.length > 0
103
+ ? `${elemStart} ${attrs.join(" ")}${tagEnd}`
104
+ : `${elemStart}${tagEnd}`;
105
+ xml =
106
+ xml.substring(0, index) +
107
+ newTag +
108
+ xml.substring(index + fullTag.length);
109
+ }
110
+ }
111
+
112
+ return xml;
113
+ }
@@ -0,0 +1,280 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { createTestDocument, addParagraph } from './test-utils.js';
3
+ import { DocumentObject } from './docx/bridge.js';
4
+ import { extractTextFromBuffer } from './ingest.js';
5
+ import { RedlineEngine, BatchValidationError } from './engine.js';
6
+ import { ModifyText } from './models.js';
7
+ import { split_structural_appendix } from './pagination.js';
8
+
9
+ function addBookmark(paragraph: Element, name: string, idVal: string = "0", text: string = "") {
10
+ const doc = paragraph.ownerDocument!;
11
+ const start = doc.createElement('w:bookmarkStart');
12
+ start.setAttribute('w:name', name);
13
+ start.setAttribute('w:id', idVal);
14
+ paragraph.appendChild(start);
15
+
16
+ if (text) {
17
+ const r = doc.createElement('w:r');
18
+ const t = doc.createElement('w:t');
19
+ t.textContent = text;
20
+ if (text.includes(' ')) t.setAttribute('xml:space', 'preserve');
21
+ r.appendChild(t);
22
+ paragraph.appendChild(r);
23
+ }
24
+
25
+ const end = doc.createElement('w:bookmarkEnd');
26
+ end.setAttribute('w:id', idVal);
27
+ paragraph.appendChild(end);
28
+ }
29
+
30
+ function addCrossReference(paragraph: Element, refName: string, text: string) {
31
+ const doc = paragraph.ownerDocument!;
32
+ const fld = doc.createElement('w:fldSimple');
33
+ fld.setAttribute('w:instr', ` REF ${refName} \\h `);
34
+ const r = doc.createElement('w:r');
35
+ const t = doc.createElement('w:t');
36
+ t.textContent = text;
37
+ if (text.includes(' ')) t.setAttribute('xml:space', 'preserve');
38
+ r.appendChild(t);
39
+ fld.appendChild(r);
40
+ paragraph.appendChild(fld);
41
+ }
42
+
43
+ function addHyperlink(docObj: DocumentObject, paragraph: Element, url: string, text: string) {
44
+ const idStr = docObj.relateToExternal(url, 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink');
45
+
46
+ const doc = paragraph.ownerDocument!;
47
+ const hyperlink = doc.createElement('w:hyperlink');
48
+ hyperlink.setAttribute('r:id', idStr);
49
+ const r = doc.createElement('w:r');
50
+ const t = doc.createElement('w:t');
51
+ t.textContent = text;
52
+ if (text.includes(' ')) t.setAttribute('xml:space', 'preserve');
53
+ r.appendChild(t);
54
+ hyperlink.appendChild(r);
55
+ paragraph.appendChild(hyperlink);
56
+ }
57
+
58
+ function setupFootnotesPart(docObj: DocumentObject) {
59
+ const fnXml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
60
+ <w:footnotes xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
61
+ <w:footnote w:type="separator" w:id="-1">
62
+ <w:p><w:r><w:separator/></w:r></w:p>
63
+ </w:footnote>
64
+ <w:footnote w:id="1">
65
+ <w:p><w:r><w:t>Footnote content.</w:t></w:r></w:p>
66
+ </w:footnote>
67
+ </w:footnotes>`;
68
+
69
+ const partname = '/word/footnotes.xml';
70
+ const ctype = 'application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml';
71
+ const relType = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes';
72
+
73
+ const part = docObj.pkg.addPart(partname, ctype, fnXml);
74
+ docObj.relateTo(part, relType);
75
+ }
76
+
77
+ async function createDomainSemanticsStream() {
78
+ const doc = await createTestDocument();
79
+
80
+ // 1. Appendix / Definitions
81
+ const p1 = addParagraph(doc, "1. Definitions");
82
+ p1.insertBefore(p1.ownerDocument!.createElement('w:pPr'), p1.firstChild);
83
+ addParagraph(doc, '"Affiliate" means any entity that controls, is controlled by, or is under common control.');
84
+ addParagraph(doc, "“Confidential Information” means all non-public information disclosed by one party to the other.");
85
+ addParagraph(doc, "This paragraph does not define anything.");
86
+
87
+ const p2 = addParagraph(doc, "2. Obligations");
88
+ p2.insertBefore(p2.ownerDocument!.createElement('w:pPr'), p2.firstChild);
89
+ addParagraph(doc, "The Affiliate shall protect the Confidential Information to the highest standard.");
90
+
91
+ // 3. Bookmarks and Cross-References
92
+ const p3 = addParagraph(doc, "Subject to ");
93
+ addBookmark(p3, "MyBookmark_1", "1", "Anchored Clause");
94
+ const r3 = p3.ownerDocument!.createElement('w:r');
95
+ const t3 = p3.ownerDocument!.createElement('w:t');
96
+ t3.textContent = ", the parties agree to...";
97
+ t3.setAttribute('xml:space', 'preserve');
98
+ r3.appendChild(t3);
99
+ p3.appendChild(r3);
100
+
101
+ const p4 = addParagraph(doc, "As strictly stated in ");
102
+ addCrossReference(p4, "MyBookmark_1", "Anchored Clause");
103
+ const r4 = p4.ownerDocument!.createElement('w:r');
104
+ const t4 = p4.ownerDocument!.createElement('w:t');
105
+ t4.textContent = ", either party may terminate.";
106
+ t4.setAttribute('xml:space', 'preserve');
107
+ r4.appendChild(t4);
108
+ p4.appendChild(r4);
109
+
110
+ // 4. Internal Anchors
111
+ const pAnchor = addParagraph(doc, "Section 5. Indemnification");
112
+ addBookmark(pAnchor, "_Ref12345", "0");
113
+
114
+ const pNoise = addParagraph(doc, "Some text.");
115
+ addBookmark(pNoise, "_GoBack", "2");
116
+ addBookmark(pNoise, "_Toc123456789", "3");
117
+
118
+ // 5. Footnotes
119
+ const pFn = addParagraph(doc, "Sentence with footnote");
120
+ const rFn = pFn.ownerDocument!.createElement('w:r');
121
+ const ref = pFn.ownerDocument!.createElement('w:footnoteReference');
122
+ ref.setAttribute('w:id', "1");
123
+ rFn.appendChild(ref);
124
+ pFn.appendChild(rFn);
125
+ setupFootnotesPart(doc);
126
+
127
+ // 6. Links and Cross references
128
+ const pLink = addParagraph(doc, "Please visit ");
129
+ addHyperlink(doc, pLink, "https://adeu.com", "Adeu HQ");
130
+
131
+ const pXref = addParagraph(doc, "As detailed in ");
132
+ addCrossReference(pXref, "_Ref12345", "Section 5");
133
+
134
+ return doc.save();
135
+ }
136
+
137
+ describe('Domain Semantics Engine', () => {
138
+ it('extracts and projects structural appendix and diagnostics correctly', async () => {
139
+ const buf = await createDomainSemanticsStream();
140
+ const text = await extractTextFromBuffer(buf);
141
+
142
+ expect(text).toContain("<!-- READONLY_BOUNDARY_START -->");
143
+ expect(text).toContain("# Document Structure (Read-Only)");
144
+
145
+ // Definitions
146
+ expect(text).toContain("## Defined Terms");
147
+ expect(text).toContain('"Affiliate"');
148
+ expect(text).toContain('"Confidential Information"');
149
+ expect(text).toContain("used 1 times");
150
+
151
+ // Named Anchors & Back-References
152
+ expect(text).toContain("## Named Anchors");
153
+ expect(text).toContain("MyBookmark_1");
154
+ expect(text).toContain("Anchored to:");
155
+ expect(text).toContain("Referenced from:");
156
+
157
+ // Internal anchors & Noise suppression
158
+ expect(text).toContain("{#_Ref12345}");
159
+ expect(text).toContain("Section 5. Indemnification{#_Ref12345}");
160
+ expect(text).not.toContain("{#_GoBack}");
161
+ expect(text).not.toContain("{#_Toc123456789}");
162
+
163
+ // Footnotes
164
+ expect(text).toContain("[^fn-1]");
165
+ expect(text).toContain("## Footnotes");
166
+ expect(text).toContain("[^fn-1]: Footnote content.");
167
+
168
+ // Links
169
+ expect(text).toContain("[Adeu HQ](https://adeu.com)");
170
+ expect(text).toContain("[~Section 5~](#_Ref12345)");
171
+ });
172
+
173
+ const edgeCases = [
174
+ { target: "# Document Structure (Read-Only)", newText: "# Modified Document Structure", errChecker: (m: string) => m.includes('read-only boundary') || m.includes('appendix') },
175
+ { target: "Sentence with footnote[^fn-1]", newText: "Sentence with footnote", errChecker: (m: string) => m.includes('footnote') && (m.includes('delete') || m.includes('remove')) },
176
+ { target: "Sentence with footnote", newText: "Sentence with footnote[^fn-99]", errChecker: (m: string) => m.includes('footnote') && (m.includes('insert') || m.includes('create')) },
177
+ { target: "Some text.", newText: "Some text.{#_Ref99999}", errChecker: (m: string) => m.includes('anchor') },
178
+ { target: "Section 5. Indemnification{#_Ref12345}", newText: "Section 5. Indemnification{#_Ref99999}", errChecker: (m: string) => m.includes('anchor') },
179
+ { target: "[~Section 5~](#_Ref12345)", newText: "[~Section 6~](#_Ref12345)", errChecker: (m: string) => m.includes('cross-reference') || m.includes('rejected') },
180
+ { target: "[~Section 5~](#_Ref12345)", newText: "[~Section 5~](#_Ref99999)", errChecker: (m: string) => m.includes('dependency corruption') || m.includes('rejected') },
181
+ { target: "As detailed in [~Section 5~](#_Ref12345)", newText: "As detailed in [~Section 5~](#_Ref12345) and [~Section 6~](#_Ref999)", errChecker: (m: string) => m.includes('cross-reference') || m.includes('read-only') },
182
+ { target: "As detailed in [~Section 5~](#_Ref12345)", newText: "As detailed in nothing", errChecker: (m: string) => m.includes('cross-reference') || m.includes('delete') },
183
+ { target: "Please visit [Adeu HQ](https://adeu.com)", newText: "Please visit [Adeu HQ](https://adeu.com) and [Google](https://google.com)", errChecker: (m: string) => m.includes('hyperlink') || m.includes('insert') },
184
+ { target: "Please visit [Adeu HQ](https://adeu.com)", newText: "Please visit nothing", errChecker: (m: string) => m.includes('hyperlink') || m.includes('delete') },
185
+ ];
186
+
187
+ for (const tc of edgeCases) {
188
+ it(`rejects invalid edits: ${tc.target} -> ${tc.newText}`, async () => {
189
+ const buf = await createDomainSemanticsStream();
190
+ const doc = await DocumentObject.load(buf);
191
+ const engine = new RedlineEngine(doc);
192
+ const edit: ModifyText = { type: 'modify', target_text: tc.target, new_text: tc.newText };
193
+
194
+ let errorThrown = false;
195
+ try {
196
+ engine.process_batch([edit]);
197
+ } catch (e) {
198
+ errorThrown = true;
199
+ if (e instanceof BatchValidationError) {
200
+ const msg = e.errors.join('\n').toLowerCase();
201
+ expect(tc.errChecker(msg)).toBe(true);
202
+ } else {
203
+ throw e; // unexpected error
204
+ }
205
+ }
206
+ expect(errorThrown).toBe(true);
207
+ });
208
+ }
209
+
210
+ it('safely edits footnotes and accepts changes', async () => {
211
+ const buf = await createDomainSemanticsStream();
212
+ const doc = await DocumentObject.load(buf);
213
+ const engine = new RedlineEngine(doc);
214
+
215
+ const edit: ModifyText = { type: 'modify', target_text: "Footnote content.", new_text: "This is an edited footnote." };
216
+ const stats = engine.process_batch([edit]);
217
+ expect(stats.edits_applied).toBe(1);
218
+
219
+ engine.accept_all_revisions();
220
+ const outBuf = await doc.save();
221
+ const cleanText = await extractTextFromBuffer(outBuf, true);
222
+
223
+ expect(cleanText).toContain("[^fn-1]: This is an edited footnote.");
224
+ });
225
+
226
+ it('extracts defined terms and finds typos correctly', async () => {
227
+ const doc = await createTestDocument();
228
+ addParagraph(doc, '"Agreement" means this contract.');
229
+ addParagraph(doc, "“Party” shall mean either side.");
230
+ addParagraph(doc, '"Agreement" means another thing.'); // Duplicate
231
+ addParagraph(doc, 'This contract (hereinafter, the "Contract") is valid.');
232
+ addParagraph(doc, '"Confidential Information" on salainen asia.');
233
+ addParagraph(doc, '1.1 "Affiliate" tarkoittaa osakkuusyhtiötä.');
234
+ addParagraph(doc, 'We will act as the disclosing party (jäljempänä "Discloser").');
235
+ addParagraph(doc, 'This is a syntax example: ("Heading*") and ("<Term>")');
236
+
237
+ addParagraph(doc, "The Agreement is binding. The Contract is signed.");
238
+ addParagraph(doc, "There is an Agrement here.");
239
+ addParagraph(doc, "We shared Confidential Information with the Affiliate. The Discloser is happy.");
240
+
241
+ const buf = await doc.save();
242
+ const full_text = await extractTextFromBuffer(buf, false);
243
+ const [, appendix] = split_structural_appendix(full_text);
244
+
245
+ expect(appendix).toContain('"Agreement" \u2014 used');
246
+ expect(appendix).toContain('"Contract" \u2014 used');
247
+ expect(appendix).toContain('"Confidential Information" \u2014 used');
248
+ expect(appendix).toContain('"Affiliate" \u2014 used');
249
+ expect(appendix).toContain('"Discloser" \u2014 used');
250
+
251
+ expect(appendix).not.toContain('"Party"');
252
+ expect(appendix).not.toContain('"Heading*"');
253
+ expect(appendix).not.toContain('"<Term>"');
254
+
255
+ expect(appendix).toContain("[Error] Duplicate Definition: 'Agreement' is defined multiple times.");
256
+ expect(appendix).toContain("[Info] Possible Typos for 'Agreement': Found 'Agrement'");
257
+ });
258
+
259
+ it('reduces typo noise for short acronyms', async () => {
260
+ const doc = await createTestDocument();
261
+ addParagraph(doc, '"PSUs" means power supply units.');
262
+ addParagraph(doc, '"CPU" means central processing unit.');
263
+ addParagraph(doc, '"Party" means the entity.');
264
+ addParagraph(doc, "We rely on ESAs, LSPs, and GPUs for the servers.");
265
+ addParagraph(doc, "The GPU is very fast.");
266
+ addParagraph(doc, "The Pary signed the contract.");
267
+ addParagraph(doc, "We bought PSUs and a CPU.");
268
+ addParagraph(doc, "The Party begins today.");
269
+
270
+ const buf = await doc.save();
271
+ const full_text = await extractTextFromBuffer(buf, false);
272
+ const [, appendix] = split_structural_appendix(full_text);
273
+
274
+ expect(appendix).toContain("[Info] Possible Typos for 'Party': Found 'Pary'");
275
+ expect(appendix).not.toContain("'GPU'");
276
+ expect(appendix).not.toContain("'GPUs'");
277
+ expect(appendix).not.toContain("'ESAs'");
278
+ expect(appendix).not.toContain("'LSPs'");
279
+ });
280
+ });