@adeu/core 1.6.7 → 1.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ export class SanitizeReport {
2
+ public filename: string;
3
+ public mode: string;
4
+ public author: string | null;
5
+
6
+ public tracked_changes_found: number = 0;
7
+ public tracked_changes_accepted: number = 0;
8
+ public tracked_changes_kept: number = 0;
9
+ public change_lines: string[] = [];
10
+
11
+ public comments_removed: number = 0;
12
+ public comments_kept: number = 0;
13
+ public removed_comment_lines: string[] = [];
14
+ public kept_comment_lines: string[] = [];
15
+
16
+ public metadata_lines: string[] = [];
17
+ public structural_lines: string[] = [];
18
+ public warnings: string[] = [];
19
+
20
+ public status: string = "clean";
21
+ public blocked_reason: string | null = null;
22
+
23
+ constructor(filename: string, mode: string = "full", author: string | null = null) {
24
+ this.filename = filename;
25
+ this.mode = mode;
26
+ this.author = author;
27
+ }
28
+
29
+ public add_transform_lines(lines: string[]) {
30
+ for (const line of lines) {
31
+ const lower = line.toLowerCase();
32
+ if (lower.includes("tracked change") || lower.includes("insertion") || lower.includes("deletion") || lower.includes("accepted")) {
33
+ this.change_lines.push(line);
34
+ } else if (lower.includes("comment") || lower.includes("[open]") || lower.includes("[resolved]")) {
35
+ if (lower.includes("kept") || lower.includes("visible")) {
36
+ this.kept_comment_lines.push(line);
37
+ } else {
38
+ this.removed_comment_lines.push(line);
39
+ }
40
+ } else if (
41
+ lower.includes("author") || lower.includes("template") || lower.includes("company") ||
42
+ lower.includes("manager") || lower.includes("metadata") || lower.includes("timestamp") ||
43
+ lower.includes("custom xml") || lower.includes("last modified by") || lower.includes("revision count") || lower.includes("last printed")
44
+ ) {
45
+ this.metadata_lines.push(line);
46
+ } else if (lower.includes("hyperlink") || lower.includes("warning")) {
47
+ this.warnings.push(line);
48
+ } else {
49
+ this.structural_lines.push(line);
50
+ }
51
+ }
52
+ }
53
+
54
+ public render(): string {
55
+ const sep = "═".repeat(50);
56
+ const lines: string[] = [sep, `Finalization Report: ${this.filename}`];
57
+
58
+ const flags: string[] = [];
59
+ if (this.mode === "keep-markup") flags.push("--keep-markup");
60
+ if (this.author) flags.push(`--author "${this.author}"`);
61
+ if (this.tracked_changes_accepted > 0) flags.push("--accept-all");
62
+
63
+ if (flags.length > 0) lines.push(flags.join(" "));
64
+ lines.push(sep);
65
+
66
+ if (this.status === "blocked") {
67
+ lines.push("");
68
+ lines.push(`BLOCKED: ${this.blocked_reason}`);
69
+ lines.push(sep);
70
+ return lines.join("\n");
71
+ }
72
+
73
+ if (this.mode === "keep-markup" && (this.tracked_changes_kept > 0 || this.comments_kept > 0)) {
74
+ lines.push("");
75
+ lines.push("VISIBLE TO COUNTERPARTY");
76
+ if (this.tracked_changes_kept > 0) lines.push(` Tracked changes: ${this.tracked_changes_kept}`);
77
+ if (this.comments_kept > 0) {
78
+ lines.push(` Open comments: ${this.comments_kept}`);
79
+ for (const cl of this.kept_comment_lines) lines.push(` ${cl}`);
80
+ }
81
+ if (this.author) lines.push(` Author on all markup: "${this.author}"`);
82
+ }
83
+
84
+ if (this.change_lines.length > 0) {
85
+ lines.push("");
86
+ lines.push("TRACKED CHANGES");
87
+ for (const cl of this.change_lines) lines.push(` ${cl}`);
88
+ }
89
+
90
+ if (this.removed_comment_lines.length > 0) {
91
+ lines.push("");
92
+ lines.push("COMMENTS (stripped)");
93
+ for (const cl of this.removed_comment_lines) lines.push(` ${cl}`);
94
+ }
95
+
96
+ if (this.metadata_lines.length > 0) {
97
+ lines.push("");
98
+ lines.push("METADATA");
99
+ for (const ml of this.metadata_lines) lines.push(` ${ml}`);
100
+ }
101
+
102
+ if (this.structural_lines.length > 0) {
103
+ lines.push("");
104
+ lines.push("STRUCTURAL & PROTECTION");
105
+ for (const sl of this.structural_lines) lines.push(` ${sl}`);
106
+ }
107
+
108
+ if (this.warnings.length > 0) {
109
+ lines.push("");
110
+ lines.push("WARNINGS");
111
+ for (const w of this.warnings) lines.push(` ⚠ ${w}`);
112
+ }
113
+
114
+ lines.push("");
115
+ lines.push(sep);
116
+ if (this.warnings.length > 0) {
117
+ lines.push(`Result: CLEAN WITH WARNINGS (${this.warnings.length} warning${this.warnings.length > 1 ? 's' : ''})`);
118
+ } else {
119
+ lines.push(`Result: CLEAN (${this.tracked_changes_found} changes resolved, ${this.comments_removed} comments removed)`);
120
+ }
121
+ lines.push(sep);
122
+
123
+ return lines.join("\n");
124
+ }
125
+ }
@@ -0,0 +1,237 @@
1
+ import { describe, it, expect, vi } from 'vitest';
2
+ import { DOMParser } from '@xmldom/xmldom';
3
+ import JSZip from 'jszip';
4
+ import { DocumentObject, Part, DocxPackage } from '../docx/bridge.js';
5
+ import * as transforms from './transforms.js';
6
+ import { finalize_document } from './core.js';
7
+
8
+ // --- Helper to build a lightweight in-memory DocumentObject ---
9
+ function createMockDoc(bodyXml: string): DocumentObject {
10
+ const fullXml = `<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"><w:body>${bodyXml}</w:body></w:document>`;
11
+ const doc = new DOMParser().parseFromString(fullXml, 'text/xml');
12
+ const zip = new JSZip();
13
+ const pkg = new DocxPackage(zip);
14
+
15
+ const part = new Part('/word/document.xml', fullXml, doc.documentElement, 'application/xml');
16
+ pkg.parts.push(part);
17
+ pkg.mainDocumentPart = part;
18
+
19
+ return new DocumentObject(pkg, part);
20
+ }
21
+
22
+ // --- Transforms Unit Tests ---
23
+ describe('Sanitize Transforms', () => {
24
+
25
+ it('should strip RSID attributes and elements', () => {
26
+ const doc = createMockDoc(`
27
+ <w:p w:rsidR="00A21F3B" w:rsidP="00B33E21">
28
+ <w:r><w:t>Hello</w:t></w:r>
29
+ </w:p>
30
+ <w:sectPr><w:rsids><w:rsidRoot w:val="00A21F3B"/></w:rsids></w:sectPr>
31
+ `);
32
+
33
+ const lines = transforms.strip_rsid(doc);
34
+ const xml = doc.element.toString();
35
+
36
+ expect(lines.length).toBeGreaterThan(0);
37
+ expect(xml).not.toContain('w:rsidR');
38
+ expect(xml).not.toContain('w:rsidP');
39
+ expect(xml).not.toContain('w:rsids');
40
+ });
41
+
42
+ it('should strip w14:paraId and w14:textId', () => {
43
+ const doc = createMockDoc(`
44
+ <w:p w14:paraId="3F2A91BC" w14:textId="77777777">
45
+ <w:r><w:t>Test</w:t></w:r>
46
+ </w:p>
47
+ `);
48
+
49
+ const lines = transforms.strip_para_ids(doc);
50
+ const xml = doc.element.toString();
51
+
52
+ expect(lines.length).toBeGreaterThan(0);
53
+ expect(xml).not.toContain('w14:paraId');
54
+ expect(xml).not.toContain('w14:textId');
55
+ });
56
+
57
+ it('should strip hidden text runs', () => {
58
+ const doc = createMockDoc(`
59
+ <w:p>
60
+ <w:r>
61
+ <w:rPr><w:vanish/></w:rPr>
62
+ <w:t>HiddenSecret</w:t>
63
+ </w:r>
64
+ <w:r>
65
+ <w:t>VisibleText</w:t>
66
+ </w:r>
67
+ </w:p>
68
+ `);
69
+
70
+ const lines = transforms.strip_hidden_text(doc);
71
+ const xml = doc.element.toString();
72
+
73
+ expect(lines.length).toBeGreaterThan(0);
74
+ expect(xml).not.toContain('HiddenSecret');
75
+ expect(xml).toContain('VisibleText');
76
+ });
77
+
78
+ it('should scrub document properties', () => {
79
+ const doc = createMockDoc('<w:p/>');
80
+
81
+ // Mock docProps/app.xml
82
+ const appXml = '<Properties><TotalTime>15</TotalTime><Template>Confidential.dotm</Template></Properties>';
83
+ const appEl = new DOMParser().parseFromString(appXml, 'text/xml').documentElement;
84
+ const appPart = new Part('/docProps/app.xml', appXml, appEl, 'application/xml');
85
+ doc.pkg.parts.push(appPart);
86
+
87
+ const lines = transforms.scrub_doc_properties(doc);
88
+ const resultXml = appPart._element.toString();
89
+
90
+ expect(lines.length).toBeGreaterThan(0);
91
+ expect(resultXml).toContain('<TotalTime>0</TotalTime>');
92
+ expect(resultXml).toContain('<Template/>');
93
+ expect(resultXml).not.toContain('Confidential.dotm');
94
+ });
95
+
96
+ it('should strip custom XML parts and data bindings', () => {
97
+ const doc = createMockDoc(`
98
+ <w:p>
99
+ <w:sdt>
100
+ <w:sdtPr><w:dataBinding w:xpath="/test"/></w:sdtPr>
101
+ </w:sdt>
102
+ </w:p>
103
+ `);
104
+
105
+ // Mock custom XML part
106
+ const customPart = new Part('/customXml/item1.xml', '<t/>', new DOMParser().parseFromString('<t/>', 'text/xml').documentElement, 'application/xml');
107
+ doc.pkg.parts.push(customPart);
108
+
109
+ const lines = transforms.strip_custom_xml(doc);
110
+
111
+ expect(lines.length).toBeGreaterThan(0);
112
+ expect(doc.pkg.parts.find(p => p.partname.includes('customXml'))).toBeUndefined();
113
+ expect(doc.element.toString()).not.toContain('w:dataBinding');
114
+ });
115
+
116
+ it('should count and accept all tracked changes', () => {
117
+ const doc = createMockDoc(`
118
+ <w:p>
119
+ <w:del w:id="1">
120
+ <w:r><w:delText>Vendor</w:delText></w:r>
121
+ </w:del>
122
+ <w:ins w:id="2">
123
+ <w:r><w:t>Supplier</w:t></w:r>
124
+ </w:ins>
125
+ </w:p>
126
+ `);
127
+
128
+ const [ins, del, fmt] = transforms.count_tracked_changes(doc);
129
+ expect(ins).toBe(1);
130
+ expect(del).toBe(1);
131
+
132
+ const lines = transforms.accept_all_tracked_changes(doc);
133
+ const xml = doc.element.toString();
134
+
135
+ expect(lines.length).toBeGreaterThan(0);
136
+ expect(xml).not.toContain('w:del');
137
+ expect(xml).not.toContain('w:ins');
138
+ expect(xml).not.toContain('Vendor'); // Deletion was removed
139
+ expect(xml).toContain('Supplier'); // Insertion was unwrapped
140
+ });
141
+
142
+ });
143
+
144
+ // --- Orchestrator Integration Tests ---
145
+ describe('Finalize Document (Core)', () => {
146
+
147
+ it('should inject XML locking (Read-Only) into settings.xml', async () => {
148
+ const doc = createMockDoc('<w:p/>');
149
+
150
+ // Mock word/settings.xml
151
+ const settingsXml = '<w:settings xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"></w:settings>';
152
+ const settingsEl = new DOMParser().parseFromString(settingsXml, 'text/xml').documentElement;
153
+ const settingsPart = new Part('/word/settings.xml', settingsXml, settingsEl, 'application/xml');
154
+ doc.pkg.parts.push(settingsPart);
155
+
156
+ // Mock the doc.save buffer return
157
+ doc.save = vi.fn().mockResolvedValue(Buffer.from('mock'));
158
+
159
+ const res = await finalize_document(doc, {
160
+ filename: 'test.docx',
161
+ protection_mode: 'read_only'
162
+ });
163
+
164
+ const finalSettings = settingsPart._element.toString();
165
+
166
+ expect(res.reportText).toContain('Result: CLEAN');
167
+ expect(res.reportText).toContain('Document locked (Read-Only');
168
+
169
+ // Validate mathematical injection
170
+ expect(finalSettings).toContain('w:documentProtection');
171
+ expect(finalSettings).toContain('w:edit="readOnly"');
172
+ expect(finalSettings).toContain('w:enforcement="1"');
173
+ });
174
+
175
+ it('should return a blocked status if unaccepted changes remain and accept_all is false', async () => {
176
+ const doc = createMockDoc(`
177
+ <w:p>
178
+ <w:ins w:id="1"><w:r><w:t>Unresolved Edit</w:t></w:r></w:ins>
179
+ </w:p>
180
+ `);
181
+
182
+ const res = await finalize_document(doc, {
183
+ filename: 'draft.docx',
184
+ sanitize_mode: 'full',
185
+ accept_all: false // <-- Should block
186
+ });
187
+
188
+ expect(res.reportText).toContain('BLOCKED:');
189
+ expect(res.reportText).toContain('unresolved tracked changes');
190
+ });
191
+
192
+ describe('Resolved Bugs Sanitize Parity Verification', () => {
193
+
194
+ it('BUG-FRAG-1: Coalesces adjacent identical runs after accepting tracked changes', async () => {
195
+ const doc = createMockDoc(`
196
+ <w:p>
197
+ <w:r><w:t xml:space="preserve">The term shall be </w:t></w:r>
198
+ <w:ins w:id="1"><w:r><w:t>five (5)</w:t></w:r></w:ins>
199
+ <w:r><w:t xml:space="preserve"> years from the Effective Date.</w:t></w:r>
200
+ </w:p>
201
+ `);
202
+
203
+ doc.save = vi.fn().mockResolvedValue(Buffer.from('mock'));
204
+
205
+ await finalize_document(doc, {
206
+ filename: 'test.docx',
207
+ sanitize_mode: 'full',
208
+ accept_all: true
209
+ });
210
+
211
+ const xml = doc.element.toString();
212
+ // We should see a single coalesced string rather than fragmented <w:t> nodes
213
+ expect(xml).toContain('The term shall be five (5) years from the Effective Date.');
214
+
215
+ const runs = doc.element.getElementsByTagName('w:r');
216
+ // If they are coalesced properly, there will be exactly 1 run instead of 3
217
+ expect(runs.length).toBe(1);
218
+ });
219
+
220
+ it('BUG-NS-1: Strips unused xmlns:w16du namespace declarations during finalization', async () => {
221
+ const doc = createMockDoc('<w:p/>');
222
+ // Manually inject the namespace onto the absolute root as the engine does
223
+ doc.part._element.setAttribute('xmlns:w16du', 'http://schemas.microsoft.com/office/word/2023/wordml/word16du');
224
+
225
+ doc.save = vi.fn().mockResolvedValue(Buffer.from('mock'));
226
+
227
+ await finalize_document(doc, {
228
+ filename: 'test.docx',
229
+ sanitize_mode: 'full'
230
+ });
231
+
232
+ // The final stringified XML of the root document should NOT contain the unused namespace
233
+ const xml = doc.part._element.toString();
234
+ expect(xml).not.toContain('xmlns:w16du');
235
+ });
236
+ });
237
+ });