@adeu/core 1.6.7 → 1.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/ingest.ts CHANGED
@@ -49,6 +49,14 @@ function _extract_blocks(container: any, comments_map: any, cleanView: boolean,
49
49
  let is_first_block = true;
50
50
  let is_first_para = true;
51
51
 
52
+ if (container.constructor && container.constructor.name === 'NotesPart') {
53
+ const header = container.note_type === 'fn' ? '## Footnotes' : '## Endnotes';
54
+ const sep = `---\n${header}`;
55
+ blocks.push(sep);
56
+ local_cursor += sep.length;
57
+ is_first_block = false;
58
+ }
59
+
52
60
  for (const item of iter_block_items(container)) {
53
61
  if (!is_first_block) local_cursor += 2;
54
62
  const block_start = local_cursor;
@@ -0,0 +1,104 @@
1
+ import { DocumentObject } from '../docx/bridge.js';
2
+ import { SanitizeReport } from './report.js';
3
+ import * as transforms from './transforms.js';
4
+
5
+ export interface FinalizeOptions {
6
+ filename: string;
7
+ sanitize_mode?: 'full' | 'keep-markup' | 'baseline';
8
+ accept_all?: boolean;
9
+ protection_mode?: 'read_only' | 'encrypt' | null;
10
+ password?: string | null;
11
+ author?: string | null;
12
+ export_pdf?: boolean;
13
+ }
14
+
15
+ export interface FinalizeResult {
16
+ reportText: string;
17
+ outBuffer?: Buffer;
18
+ }
19
+
20
+ export async function finalize_document(doc: DocumentObject, options: FinalizeOptions): Promise<FinalizeResult> {
21
+ const report = new SanitizeReport(options.filename, options.sanitize_mode || 'full', options.author || null);
22
+
23
+ if (options.sanitize_mode === 'full') {
24
+ const counts = transforms.count_tracked_changes(doc);
25
+ const total = counts[0] + counts[1] + counts[2];
26
+ report.tracked_changes_found = total;
27
+
28
+ if (total > 0 && !options.accept_all) {
29
+ report.status = 'blocked';
30
+ report.blocked_reason = `Document contains ${total} unresolved tracked changes (${counts[0]} insertions, ${counts[1]} deletions, ${counts[2]} formatting). Review in Word first, or set accept_all=true.`;
31
+ return { reportText: report.render() };
32
+ }
33
+
34
+ if (total > 0) {
35
+ const authors = transforms.get_track_change_authors(doc);
36
+ if (authors.size > 1) {
37
+ report.warnings.push(`Multiple authors detected in tracked changes: ${Array.from(authors).sort().join(', ')}. Review per-change list before sending.`);
38
+ }
39
+ report.add_transform_lines(transforms.accept_all_tracked_changes(doc));
40
+ report.tracked_changes_accepted = total;
41
+ }
42
+
43
+ const commentsSummary = transforms.get_comments_summary(doc);
44
+ report.comments_removed = commentsSummary.total;
45
+ report.add_transform_lines(transforms.remove_all_comments(doc));
46
+ } else if (options.sanitize_mode === 'keep-markup') {
47
+ // Basic support for keep-markup in TS
48
+ const counts = transforms.count_tracked_changes(doc);
49
+ report.tracked_changes_found = counts[0] + counts[1] + counts[2];
50
+ report.tracked_changes_kept = report.tracked_changes_found;
51
+
52
+ if (options.author) {
53
+ report.add_transform_lines(transforms.replace_comment_authors(doc, options.author));
54
+ report.add_transform_lines(transforms.replace_change_authors(doc, options.author));
55
+ }
56
+ }
57
+
58
+ // Common transforms
59
+ report.add_transform_lines(transforms.strip_rsid(doc));
60
+ report.add_transform_lines(transforms.strip_para_ids(doc));
61
+ report.add_transform_lines(transforms.strip_proof_errors(doc));
62
+ report.add_transform_lines(transforms.strip_empty_properties(doc));
63
+ report.add_transform_lines(transforms.strip_hidden_text(doc));
64
+ report.add_transform_lines(transforms.scrub_doc_properties(doc));
65
+ report.add_transform_lines(transforms.scrub_timestamps(doc));
66
+ report.add_transform_lines(transforms.strip_custom_xml(doc));
67
+ report.add_transform_lines(transforms.strip_image_alt_text(doc));
68
+
69
+ const warnings = transforms.audit_hyperlinks(doc);
70
+ for (const w of warnings) report.warnings.push(w);
71
+
72
+ report.add_transform_lines(transforms.normalize_change_dates(doc));
73
+
74
+ // Protection (Settings injection)
75
+ if (options.protection_mode === 'read_only' || options.protection_mode === 'encrypt') {
76
+ if (options.protection_mode === 'encrypt') {
77
+ report.warnings.push("Encryption mode (AES compound wrappers) is strictly unsupported in the zero-dependency Node engine. Falling back to native Word Read-Only lock.");
78
+ }
79
+
80
+ const settingsPart = doc.pkg.getPartByPath('word/settings.xml');
81
+ if (settingsPart) {
82
+ const docEl = settingsPart._element.ownerDocument!;
83
+ let prot = transforms.findDescendantsByLocalName(settingsPart._element, 'documentProtection')[0];
84
+ if (!prot) {
85
+ prot = docEl.createElement('w:documentProtection');
86
+ // Word expects documentProtection to be inserted before elements like w:autoFormatOverride, w:styleLockTheme, etc.
87
+ // For standard robustness without complex XSD enforcement, appendChild generally works.
88
+ settingsPart._element.appendChild(prot);
89
+ }
90
+ prot.setAttribute('w:edit', 'readOnly');
91
+ prot.setAttribute('w:enforcement', '1');
92
+ report.structural_lines.push("Document locked (Read-Only enforcement injected into settings.xml)");
93
+ }
94
+ }
95
+
96
+ if (options.export_pdf) {
97
+ report.warnings.push("PDF export requires the Python/Word COM environment and is skipped in this zero-dependency Node agent.");
98
+ }
99
+
100
+ if (report.warnings.length > 0) report.status = 'clean_with_warnings';
101
+
102
+ const outBuffer = await doc.save();
103
+ return { reportText: report.render(), outBuffer };
104
+ }
@@ -0,0 +1,125 @@
1
+ export class SanitizeReport {
2
+ public filename: string;
3
+ public mode: string;
4
+ public author: string | null;
5
+
6
+ public tracked_changes_found: number = 0;
7
+ public tracked_changes_accepted: number = 0;
8
+ public tracked_changes_kept: number = 0;
9
+ public change_lines: string[] = [];
10
+
11
+ public comments_removed: number = 0;
12
+ public comments_kept: number = 0;
13
+ public removed_comment_lines: string[] = [];
14
+ public kept_comment_lines: string[] = [];
15
+
16
+ public metadata_lines: string[] = [];
17
+ public structural_lines: string[] = [];
18
+ public warnings: string[] = [];
19
+
20
+ public status: string = "clean";
21
+ public blocked_reason: string | null = null;
22
+
23
+ constructor(filename: string, mode: string = "full", author: string | null = null) {
24
+ this.filename = filename;
25
+ this.mode = mode;
26
+ this.author = author;
27
+ }
28
+
29
+ public add_transform_lines(lines: string[]) {
30
+ for (const line of lines) {
31
+ const lower = line.toLowerCase();
32
+ if (lower.includes("tracked change") || lower.includes("insertion") || lower.includes("deletion") || lower.includes("accepted")) {
33
+ this.change_lines.push(line);
34
+ } else if (lower.includes("comment") || lower.includes("[open]") || lower.includes("[resolved]")) {
35
+ if (lower.includes("kept") || lower.includes("visible")) {
36
+ this.kept_comment_lines.push(line);
37
+ } else {
38
+ this.removed_comment_lines.push(line);
39
+ }
40
+ } else if (
41
+ lower.includes("author") || lower.includes("template") || lower.includes("company") ||
42
+ lower.includes("manager") || lower.includes("metadata") || lower.includes("timestamp") ||
43
+ lower.includes("custom xml") || lower.includes("last modified by") || lower.includes("revision count") || lower.includes("last printed")
44
+ ) {
45
+ this.metadata_lines.push(line);
46
+ } else if (lower.includes("hyperlink") || lower.includes("warning")) {
47
+ this.warnings.push(line);
48
+ } else {
49
+ this.structural_lines.push(line);
50
+ }
51
+ }
52
+ }
53
+
54
+ public render(): string {
55
+ const sep = "═".repeat(50);
56
+ const lines: string[] = [sep, `Finalization Report: ${this.filename}`];
57
+
58
+ const flags: string[] = [];
59
+ if (this.mode === "keep-markup") flags.push("--keep-markup");
60
+ if (this.author) flags.push(`--author "${this.author}"`);
61
+ if (this.tracked_changes_accepted > 0) flags.push("--accept-all");
62
+
63
+ if (flags.length > 0) lines.push(flags.join(" "));
64
+ lines.push(sep);
65
+
66
+ if (this.status === "blocked") {
67
+ lines.push("");
68
+ lines.push(`BLOCKED: ${this.blocked_reason}`);
69
+ lines.push(sep);
70
+ return lines.join("\n");
71
+ }
72
+
73
+ if (this.mode === "keep-markup" && (this.tracked_changes_kept > 0 || this.comments_kept > 0)) {
74
+ lines.push("");
75
+ lines.push("VISIBLE TO COUNTERPARTY");
76
+ if (this.tracked_changes_kept > 0) lines.push(` Tracked changes: ${this.tracked_changes_kept}`);
77
+ if (this.comments_kept > 0) {
78
+ lines.push(` Open comments: ${this.comments_kept}`);
79
+ for (const cl of this.kept_comment_lines) lines.push(` ${cl}`);
80
+ }
81
+ if (this.author) lines.push(` Author on all markup: "${this.author}"`);
82
+ }
83
+
84
+ if (this.change_lines.length > 0) {
85
+ lines.push("");
86
+ lines.push("TRACKED CHANGES");
87
+ for (const cl of this.change_lines) lines.push(` ${cl}`);
88
+ }
89
+
90
+ if (this.removed_comment_lines.length > 0) {
91
+ lines.push("");
92
+ lines.push("COMMENTS (stripped)");
93
+ for (const cl of this.removed_comment_lines) lines.push(` ${cl}`);
94
+ }
95
+
96
+ if (this.metadata_lines.length > 0) {
97
+ lines.push("");
98
+ lines.push("METADATA");
99
+ for (const ml of this.metadata_lines) lines.push(` ${ml}`);
100
+ }
101
+
102
+ if (this.structural_lines.length > 0) {
103
+ lines.push("");
104
+ lines.push("STRUCTURAL & PROTECTION");
105
+ for (const sl of this.structural_lines) lines.push(` ${sl}`);
106
+ }
107
+
108
+ if (this.warnings.length > 0) {
109
+ lines.push("");
110
+ lines.push("WARNINGS");
111
+ for (const w of this.warnings) lines.push(` ⚠ ${w}`);
112
+ }
113
+
114
+ lines.push("");
115
+ lines.push(sep);
116
+ if (this.warnings.length > 0) {
117
+ lines.push(`Result: CLEAN WITH WARNINGS (${this.warnings.length} warning${this.warnings.length > 1 ? 's' : ''})`);
118
+ } else {
119
+ lines.push("Result: SECURE & READY TO SEND");
120
+ }
121
+ lines.push(sep);
122
+
123
+ return lines.join("\n");
124
+ }
125
+ }
@@ -0,0 +1,192 @@
1
+ import { describe, it, expect, vi } from 'vitest';
2
+ import { DOMParser } from '@xmldom/xmldom';
3
+ import JSZip from 'jszip';
4
+ import { DocumentObject, Part, DocxPackage } from '../docx/bridge.js';
5
+ import * as transforms from './transforms.js';
6
+ import { finalize_document } from './core.js';
7
+
8
+ // --- Helper to build a lightweight in-memory DocumentObject ---
9
+ function createMockDoc(bodyXml: string): DocumentObject {
10
+ const fullXml = `<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"><w:body>${bodyXml}</w:body></w:document>`;
11
+ const doc = new DOMParser().parseFromString(fullXml, 'text/xml');
12
+ const zip = new JSZip();
13
+ const pkg = new DocxPackage(zip);
14
+
15
+ const part = new Part('/word/document.xml', fullXml, doc.documentElement, 'application/xml');
16
+ pkg.parts.push(part);
17
+ pkg.mainDocumentPart = part;
18
+
19
+ return new DocumentObject(pkg, part);
20
+ }
21
+
22
+ // --- Transforms Unit Tests ---
23
+ describe('Sanitize Transforms', () => {
24
+
25
+ it('should strip RSID attributes and elements', () => {
26
+ const doc = createMockDoc(`
27
+ <w:p w:rsidR="00A21F3B" w:rsidP="00B33E21">
28
+ <w:r><w:t>Hello</w:t></w:r>
29
+ </w:p>
30
+ <w:sectPr><w:rsids><w:rsidRoot w:val="00A21F3B"/></w:rsids></w:sectPr>
31
+ `);
32
+
33
+ const lines = transforms.strip_rsid(doc);
34
+ const xml = doc.element.toString();
35
+
36
+ expect(lines.length).toBeGreaterThan(0);
37
+ expect(xml).not.toContain('w:rsidR');
38
+ expect(xml).not.toContain('w:rsidP');
39
+ expect(xml).not.toContain('w:rsids');
40
+ });
41
+
42
+ it('should strip w14:paraId and w14:textId', () => {
43
+ const doc = createMockDoc(`
44
+ <w:p w14:paraId="3F2A91BC" w14:textId="77777777">
45
+ <w:r><w:t>Test</w:t></w:r>
46
+ </w:p>
47
+ `);
48
+
49
+ const lines = transforms.strip_para_ids(doc);
50
+ const xml = doc.element.toString();
51
+
52
+ expect(lines.length).toBeGreaterThan(0);
53
+ expect(xml).not.toContain('w14:paraId');
54
+ expect(xml).not.toContain('w14:textId');
55
+ });
56
+
57
+ it('should strip hidden text runs', () => {
58
+ const doc = createMockDoc(`
59
+ <w:p>
60
+ <w:r>
61
+ <w:rPr><w:vanish/></w:rPr>
62
+ <w:t>HiddenSecret</w:t>
63
+ </w:r>
64
+ <w:r>
65
+ <w:t>VisibleText</w:t>
66
+ </w:r>
67
+ </w:p>
68
+ `);
69
+
70
+ const lines = transforms.strip_hidden_text(doc);
71
+ const xml = doc.element.toString();
72
+
73
+ expect(lines.length).toBeGreaterThan(0);
74
+ expect(xml).not.toContain('HiddenSecret');
75
+ expect(xml).toContain('VisibleText');
76
+ });
77
+
78
+ it('should scrub document properties', () => {
79
+ const doc = createMockDoc('<w:p/>');
80
+
81
+ // Mock docProps/app.xml
82
+ const appXml = '<Properties><TotalTime>15</TotalTime><Template>Confidential.dotm</Template></Properties>';
83
+ const appEl = new DOMParser().parseFromString(appXml, 'text/xml').documentElement;
84
+ const appPart = new Part('/docProps/app.xml', appXml, appEl, 'application/xml');
85
+ doc.pkg.parts.push(appPart);
86
+
87
+ const lines = transforms.scrub_doc_properties(doc);
88
+ const resultXml = appPart._element.toString();
89
+
90
+ expect(lines.length).toBeGreaterThan(0);
91
+ expect(resultXml).toContain('<TotalTime>0</TotalTime>');
92
+ expect(resultXml).toContain('<Template/>');
93
+ expect(resultXml).not.toContain('Confidential.dotm');
94
+ });
95
+
96
+ it('should strip custom XML parts and data bindings', () => {
97
+ const doc = createMockDoc(`
98
+ <w:p>
99
+ <w:sdt>
100
+ <w:sdtPr><w:dataBinding w:xpath="/test"/></w:sdtPr>
101
+ </w:sdt>
102
+ </w:p>
103
+ `);
104
+
105
+ // Mock custom XML part
106
+ const customPart = new Part('/customXml/item1.xml', '<t/>', new DOMParser().parseFromString('<t/>', 'text/xml').documentElement, 'application/xml');
107
+ doc.pkg.parts.push(customPart);
108
+
109
+ const lines = transforms.strip_custom_xml(doc);
110
+
111
+ expect(lines.length).toBeGreaterThan(0);
112
+ expect(doc.pkg.parts.find(p => p.partname.includes('customXml'))).toBeUndefined();
113
+ expect(doc.element.toString()).not.toContain('w:dataBinding');
114
+ });
115
+
116
+ it('should count and accept all tracked changes', () => {
117
+ const doc = createMockDoc(`
118
+ <w:p>
119
+ <w:del w:id="1">
120
+ <w:r><w:delText>Vendor</w:delText></w:r>
121
+ </w:del>
122
+ <w:ins w:id="2">
123
+ <w:r><w:t>Supplier</w:t></w:r>
124
+ </w:ins>
125
+ </w:p>
126
+ `);
127
+
128
+ const [ins, del, fmt] = transforms.count_tracked_changes(doc);
129
+ expect(ins).toBe(1);
130
+ expect(del).toBe(1);
131
+
132
+ const lines = transforms.accept_all_tracked_changes(doc);
133
+ const xml = doc.element.toString();
134
+
135
+ expect(lines.length).toBeGreaterThan(0);
136
+ expect(xml).not.toContain('w:del');
137
+ expect(xml).not.toContain('w:ins');
138
+ expect(xml).not.toContain('Vendor'); // Deletion was removed
139
+ expect(xml).toContain('Supplier'); // Insertion was unwrapped
140
+ });
141
+
142
+ });
143
+
144
+ // --- Orchestrator Integration Tests ---
145
+ describe('Finalize Document (Core)', () => {
146
+
147
+ it('should inject XML locking (Read-Only) into settings.xml', async () => {
148
+ const doc = createMockDoc('<w:p/>');
149
+
150
+ // Mock word/settings.xml
151
+ const settingsXml = '<w:settings xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"></w:settings>';
152
+ const settingsEl = new DOMParser().parseFromString(settingsXml, 'text/xml').documentElement;
153
+ const settingsPart = new Part('/word/settings.xml', settingsXml, settingsEl, 'application/xml');
154
+ doc.pkg.parts.push(settingsPart);
155
+
156
+ // Mock the doc.save buffer return
157
+ doc.save = vi.fn().mockResolvedValue(Buffer.from('mock'));
158
+
159
+ const res = await finalize_document(doc, {
160
+ filename: 'test.docx',
161
+ protection_mode: 'read_only'
162
+ });
163
+
164
+ const finalSettings = settingsPart._element.toString();
165
+
166
+ expect(res.reportText).toContain('Result: SECURE & READY TO SEND');
167
+ expect(res.reportText).toContain('Document locked (Read-Only');
168
+
169
+ // Validate mathematical injection
170
+ expect(finalSettings).toContain('w:documentProtection');
171
+ expect(finalSettings).toContain('w:edit="readOnly"');
172
+ expect(finalSettings).toContain('w:enforcement="1"');
173
+ });
174
+
175
+ it('should return a blocked status if unaccepted changes remain and accept_all is false', async () => {
176
+ const doc = createMockDoc(`
177
+ <w:p>
178
+ <w:ins w:id="1"><w:r><w:t>Unresolved Edit</w:t></w:r></w:ins>
179
+ </w:p>
180
+ `);
181
+
182
+ const res = await finalize_document(doc, {
183
+ filename: 'draft.docx',
184
+ sanitize_mode: 'full',
185
+ accept_all: false // <-- Should block
186
+ });
187
+
188
+ expect(res.reportText).toContain('BLOCKED:');
189
+ expect(res.reportText).toContain('unresolved tracked changes');
190
+ });
191
+
192
+ });