@adeu/core 1.6.7 → 1.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/outline.ts CHANGED
@@ -2,18 +2,18 @@
2
2
  * Structural outline extractor.
3
3
  */
4
4
 
5
- import { DocumentObject } from './docx/bridge.js';
6
- import { Paragraph, Table, DocxEvent } from './docx/primitives.js';
7
- import { build_paragraph_text, extract_table } from './ingest.js';
8
- import { extract_comments_data } from './comments.js';
9
- import { findChild } from './docx/dom.js';
5
+ import { DocumentObject } from "./docx/bridge.js";
6
+ import { Paragraph, Table, DocxEvent } from "./docx/primitives.js";
7
+ import { build_paragraph_text, extract_table } from "./ingest.js";
8
+ import { extract_comments_data } from "./comments.js";
9
+ import { findChild } from "./docx/dom.js";
10
10
  import {
11
11
  _get_style_cache,
12
12
  get_paragraph_prefix,
13
13
  iter_block_items,
14
14
  iter_document_parts,
15
15
  iter_paragraph_content,
16
- } from './utils/docx.js';
16
+ } from "./utils/docx.js";
17
17
 
18
18
  const _HEADING_PREFIX_RE = /^(#{1,6}) /;
19
19
  const _HEURISTIC_MIN_WORDS = 3;
@@ -40,10 +40,10 @@ export function extract_outline(
40
40
  projected_body: string,
41
41
  body_pages: string[],
42
42
  body_page_offsets: number[],
43
- paragraph_offsets: Record<string, [number, number]> | null = null
43
+ paragraph_offsets: Record<string, [number, number]> | null = null,
44
44
  ): OutlineNode[] {
45
45
  if (body_pages.length !== body_page_offsets.length) {
46
- throw new Error('body_pages and body_page_offsets length mismatch');
46
+ throw new Error("body_pages and body_page_offsets length mismatch");
47
47
  }
48
48
 
49
49
  const comments_map = extract_comments_data(doc.pkg);
@@ -69,7 +69,12 @@ export function extract_outline(
69
69
  const text = _heading_text(paragraph, comments_map);
70
70
  const style = _determine_heading_style(paragraph);
71
71
 
72
- const owned_end = _find_owned_end(block_records, heading_indices, h_pos, level);
72
+ const owned_end = _find_owned_end(
73
+ block_records,
74
+ heading_indices,
75
+ h_pos,
76
+ level,
77
+ );
73
78
  const owned_blocks = block_records.slice(rec_idx + 1, owned_end);
74
79
 
75
80
  const has_table = _direct_has_table(block_records, rec_idx + 1, owned_end);
@@ -83,7 +88,11 @@ export function extract_outline(
83
88
  return nodes;
84
89
  }
85
90
 
86
- function _direct_has_table(block_records: _BlockRecord[], range_start: number, range_end: number): boolean {
91
+ function _direct_has_table(
92
+ block_records: _BlockRecord[],
93
+ range_start: number,
94
+ range_end: number,
95
+ ): boolean {
87
96
  for (let idx = range_start; idx < range_end; idx++) {
88
97
  const rec = block_records[idx];
89
98
  if (rec.is_paragraph && _is_heading(rec.item)) return false;
@@ -92,7 +101,10 @@ function _direct_has_table(block_records: _BlockRecord[], range_start: number, r
92
101
  return false;
93
102
  }
94
103
 
95
- function _walk_doc_body(doc: DocumentObject, comments_map: any): _BlockRecord[] {
104
+ function _walk_doc_body(
105
+ doc: DocumentObject,
106
+ comments_map: any,
107
+ ): _BlockRecord[] {
96
108
  const parts = Array.from(iter_document_parts(doc));
97
109
  let body_start_offset = 0;
98
110
  let body_part: any = null;
@@ -128,7 +140,13 @@ function _walk_doc_body(doc: DocumentObject, comments_map: any): _BlockRecord[]
128
140
 
129
141
  if (!is_first_block) cursor += 2;
130
142
 
131
- records.push({ item, is_paragraph: true, is_table: false, start_offset: cursor, projected_length: block_len });
143
+ records.push({
144
+ item,
145
+ is_paragraph: true,
146
+ is_table: false,
147
+ start_offset: cursor,
148
+ projected_length: block_len,
149
+ });
132
150
  cursor += block_len;
133
151
  is_first_block = false;
134
152
  } else if (item instanceof Table) {
@@ -138,7 +156,13 @@ function _walk_doc_body(doc: DocumentObject, comments_map: any): _BlockRecord[]
138
156
  if (!is_first_block) cursor += 2;
139
157
 
140
158
  const table_start = cursor;
141
- records.push({ item, is_paragraph: false, is_table: true, start_offset: table_start, projected_length: block_len });
159
+ records.push({
160
+ item,
161
+ is_paragraph: false,
162
+ is_table: true,
163
+ start_offset: table_start,
164
+ projected_length: block_len,
165
+ });
142
166
  _record_table_inner_blocks_lite(item, table_start, records, comments_map);
143
167
  cursor += block_len;
144
168
  is_first_block = false;
@@ -148,7 +172,12 @@ function _walk_doc_body(doc: DocumentObject, comments_map: any): _BlockRecord[]
148
172
  return records;
149
173
  }
150
174
 
151
- function _compute_inner_block_offset(table: Table, target_paragraph: Paragraph, table_start_offset: number, comments_map: any): number {
175
+ function _compute_inner_block_offset(
176
+ table: Table,
177
+ target_paragraph: Paragraph,
178
+ table_start_offset: number,
179
+ comments_map: any,
180
+ ): number {
152
181
  const target_el = target_paragraph._element;
153
182
  let cursor = table_start_offset;
154
183
  let rows_processed = 0;
@@ -165,7 +194,12 @@ function _compute_inner_block_offset(table: Table, target_paragraph: Paragraph,
165
194
 
166
195
  if (cells_in_row > 0) cursor += 3;
167
196
 
168
- const [new_cursor, found] = _walk_cell_for_offset(cell, target_el, cursor, comments_map);
197
+ const [new_cursor, found] = _walk_cell_for_offset(
198
+ cell,
199
+ target_el,
200
+ cursor,
201
+ comments_map,
202
+ );
169
203
  if (found) return new_cursor;
170
204
  cursor = new_cursor;
171
205
 
@@ -177,7 +211,12 @@ function _compute_inner_block_offset(table: Table, target_paragraph: Paragraph,
177
211
  return table_start_offset;
178
212
  }
179
213
 
180
- function _walk_cell_for_offset(cell: any, target_el: any, cell_start_cursor: number, comments_map: any): [number, boolean] {
214
+ function _walk_cell_for_offset(
215
+ cell: any,
216
+ target_el: any,
217
+ cell_start_cursor: number,
218
+ comments_map: any,
219
+ ): [number, boolean] {
181
220
  let cursor = cell_start_cursor;
182
221
  let is_first_block = true;
183
222
 
@@ -190,9 +229,15 @@ function _walk_cell_for_offset(cell: any, target_el: any, cell_start_cursor: num
190
229
  const p_text = build_paragraph_text(inner_item, comments_map, false);
191
230
  cursor += (prefix + p_text).length;
192
231
  } else if (inner_item instanceof Table) {
193
- const nested_offset = _compute_inner_block_offset(inner_item, new Paragraph(target_el, null), cursor, comments_map);
232
+ const nested_offset = _compute_inner_block_offset(
233
+ inner_item,
234
+ new Paragraph(target_el, null),
235
+ cursor,
236
+ comments_map,
237
+ );
194
238
  if (nested_offset !== cursor) {
195
- if (_element_is_descendant(target_el, inner_item._element)) return [nested_offset, true];
239
+ if (_element_is_descendant(target_el, inner_item._element))
240
+ return [nested_offset, true];
196
241
  }
197
242
  const table_text = extract_table(inner_item, comments_map, false, 0);
198
243
  cursor += table_text ? table_text.length : 0;
@@ -202,7 +247,10 @@ function _walk_cell_for_offset(cell: any, target_el: any, cell_start_cursor: num
202
247
  return [cursor, false];
203
248
  }
204
249
 
205
- function _element_is_descendant(target_el: Element, ancestor_el: Element): boolean {
250
+ function _element_is_descendant(
251
+ target_el: Element,
252
+ ancestor_el: Element,
253
+ ): boolean {
206
254
  let cur: Node | null = target_el.parentNode;
207
255
  while (cur) {
208
256
  if (cur === ancestor_el) return true;
@@ -211,7 +259,12 @@ function _element_is_descendant(target_el: Element, ancestor_el: Element): boole
211
259
  return false;
212
260
  }
213
261
 
214
- function _record_table_inner_blocks_lite(table: Table, inherited_offset: number, records: _BlockRecord[], comments_map: any) {
262
+ function _record_table_inner_blocks_lite(
263
+ table: Table,
264
+ inherited_offset: number,
265
+ records: _BlockRecord[],
266
+ comments_map: any,
267
+ ) {
215
268
  const seen_cells = new Set();
216
269
  for (const row of table.rows) {
217
270
  for (const cell of row.cells) {
@@ -220,11 +273,35 @@ function _record_table_inner_blocks_lite(table: Table, inherited_offset: number,
220
273
 
221
274
  for (const inner_item of iter_block_items(cell)) {
222
275
  if (inner_item instanceof Paragraph) {
223
- const true_offset = _is_heading(inner_item) ? _compute_inner_block_offset(table, inner_item, inherited_offset, comments_map) : inherited_offset;
224
- records.push({ item: inner_item, is_paragraph: true, is_table: false, start_offset: true_offset, projected_length: 0 });
276
+ const true_offset = _is_heading(inner_item)
277
+ ? _compute_inner_block_offset(
278
+ table,
279
+ inner_item,
280
+ inherited_offset,
281
+ comments_map,
282
+ )
283
+ : inherited_offset;
284
+ records.push({
285
+ item: inner_item,
286
+ is_paragraph: true,
287
+ is_table: false,
288
+ start_offset: true_offset,
289
+ projected_length: 0,
290
+ });
225
291
  } else if (inner_item instanceof Table) {
226
- records.push({ item: inner_item, is_paragraph: false, is_table: true, start_offset: inherited_offset, projected_length: 0 });
227
- _record_table_inner_blocks_lite(inner_item, inherited_offset, records, comments_map);
292
+ records.push({
293
+ item: inner_item,
294
+ is_paragraph: false,
295
+ is_table: true,
296
+ start_offset: inherited_offset,
297
+ projected_length: 0,
298
+ });
299
+ _record_table_inner_blocks_lite(
300
+ inner_item,
301
+ inherited_offset,
302
+ records,
303
+ comments_map,
304
+ );
228
305
  }
229
306
  }
230
307
  }
@@ -235,19 +312,20 @@ function _project_part(part: any, comments_map: any): string {
235
312
  const blocks: string[] = [];
236
313
  const c_type = part.constructor.name;
237
314
 
238
- if (c_type === 'NotesPart') {
239
- const header = part.note_type === 'fn' ? '## Footnotes' : '## Endnotes';
315
+ if (c_type === "NotesPart") {
316
+ const header = part.note_type === "fn" ? "## Footnotes" : "## Endnotes";
240
317
  blocks.push(`---\n${header}`);
241
318
  }
242
319
 
243
320
  let is_first_para = true;
244
321
  for (const item of iter_block_items(part)) {
245
- if (item.constructor.name === 'FootnoteItem') {
322
+ if (item.constructor.name === "FootnoteItem") {
246
323
  const fn_text = _project_part(item, comments_map);
247
324
  if (fn_text) blocks.push(fn_text);
248
325
  } else if (item instanceof Paragraph) {
249
326
  let prefix = get_paragraph_prefix(item);
250
- if (is_first_para && c_type === 'FootnoteItem') prefix = `[^${part.note_type}-${part.id}]: ${prefix}`;
327
+ if (is_first_para && c_type === "FootnoteItem")
328
+ prefix = `[^${part.note_type}-${part.id}]: ${prefix}`;
251
329
  const p_text = build_paragraph_text(item, comments_map, false);
252
330
  blocks.push(prefix + p_text);
253
331
  is_first_para = false;
@@ -258,16 +336,19 @@ function _project_part(part: any, comments_map: any): string {
258
336
  }
259
337
  }
260
338
 
261
- return blocks.join('\n\n');
339
+ return blocks.join("\n\n");
262
340
  }
263
341
 
264
342
  function _is_heading(paragraph: Paragraph): boolean {
265
343
  return _HEADING_PREFIX_RE.test(get_paragraph_prefix(paragraph));
266
344
  }
267
345
 
268
- function _heading_passes_quality_filter(paragraph: Paragraph, comments_map: any): boolean {
346
+ function _heading_passes_quality_filter(
347
+ paragraph: Paragraph,
348
+ comments_map: any,
349
+ ): boolean {
269
350
  const style = _determine_heading_style(paragraph);
270
- if (style !== '(heuristic)') return true;
351
+ if (style !== "(heuristic)") return true;
271
352
  const text = _heading_text(paragraph, comments_map);
272
353
  if (!text) return false;
273
354
  const word_count = (text.match(/\w+/g) || []).length;
@@ -287,60 +368,109 @@ function _heading_text(paragraph: Paragraph, comments_map: any): string {
287
368
  }
288
369
 
289
370
  function _strip_critic_markup(text: string): string {
290
- if (!text) return '';
291
- text = text.replace(/\{--[\s\S]*?--\}/g, '');
292
- text = text.replace(/\{>>[\s\S]*?<<\}/g, '');
293
- text = text.replace(/\{\+\+([\s\S]*?)\+\+\}/g, '$1');
294
- text = text.replace(/\{==([\s\S]*?)==\}/g, '$1');
371
+ if (!text) return "";
372
+ text = text.replace(/\{--[\s\S]*?--\}/g, "");
373
+ text = text.replace(/\{>>[\s\S]*?<<\}/g, "");
374
+ text = text.replace(/\{\+\+([\s\S]*?)\+\+\}/g, "$1");
375
+ text = text.replace(/\{==([\s\S]*?)==\}/g, "$1");
295
376
  return text;
296
377
  }
297
378
 
298
379
  function _strip_inline_formatting(text: string): string {
299
- if (!text) return '';
300
- text = text.replace(/\*\*(.+?)\*\*/g, '$1');
301
- text = text.replace(/__(.+?)__/g, '$1');
302
- text = text.replace(/(?<!\w)_(\S(?:.*?\S)?)_(?!\w)/g, '$1');
380
+ if (!text) return "";
381
+ text = text.replace(/\*\*(.+?)\*\*/g, "$1");
382
+ text = text.replace(/__(.+?)__/g, "$1");
383
+ text = text.replace(/(?<!\w)_(\S(?:.*?\S)?)_(?!\w)/g, "$1");
303
384
  return text;
304
385
  }
305
386
 
306
387
  function _determine_heading_style(paragraph: Paragraph): string {
307
- const [style_cache, default_pstyle] = _get_style_cache(paragraph._parent.part || paragraph._parent);
308
- const pPr = findChild(paragraph._element, 'w:pPr');
388
+ const [style_cache, default_pstyle] = _get_style_cache(
389
+ paragraph._parent.part || paragraph._parent,
390
+ );
391
+ const pPr = findChild(paragraph._element, "w:pPr");
309
392
  let style_id = default_pstyle;
310
-
393
+
311
394
  if (pPr) {
312
- const oLvl = findChild(pPr, 'w:outlineLvl');
313
- if (oLvl && /^\d+$/.test(oLvl.getAttribute('w:val') || '')) {
314
- const style = _safe_style_name(paragraph, style_cache, default_pstyle);
315
- if (style && (style.startsWith('Heading') || style === 'Title')) return style;
316
- return '(outline_level)';
317
- }
318
- const pStyle = findChild(pPr, 'w:pStyle');
319
- if (pStyle) style_id = pStyle.getAttribute('w:val') || default_pstyle;
395
+ const pStyle = findChild(pPr, "w:pStyle");
396
+ if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
320
397
  }
321
398
 
322
- const style_name = (style_id && style_cache && style_cache[style_id]) ? style_cache[style_id].name : null;
323
- if (style_name && (style_name.startsWith('Heading') || style_name === 'Title')) return style_name;
399
+ let outline_level: number | null = null;
400
+ if (pPr) {
401
+ const oLvl = findChild(pPr, "w:outlineLvl");
402
+ if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
403
+ outline_level = parseInt(oLvl.getAttribute("w:val") as string, 10);
404
+ }
405
+ }
324
406
 
325
- if (style_name && /Heading[ ]?([1-6])(?![0-9])/.test(style_name)) return style_name;
407
+ if (outline_level === null && style_id && style_cache && style_cache[style_id]) {
408
+ outline_level = style_cache[style_id].outline_level;
409
+ }
410
+
411
+ const style_name =
412
+ style_id && style_cache && style_cache[style_id]
413
+ ? style_cache[style_id].name
414
+ : style_id;
415
+
416
+ let normalized_style_name = style_name;
417
+ if (normalized_style_name && typeof normalized_style_name === "string") {
418
+ if (normalized_style_name.toLowerCase().startsWith("heading")) {
419
+ normalized_style_name = normalized_style_name.replace(/^heading/i, "Heading");
420
+ } else if (normalized_style_name.toLowerCase() === "title") {
421
+ normalized_style_name = "Title";
422
+ }
423
+ }
424
+
425
+ if (outline_level !== null && outline_level >= 0 && outline_level <= 8) {
426
+ if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")) {
427
+ return normalized_style_name;
428
+ }
429
+ return "(outline_level)";
430
+ }
326
431
 
327
- return '(heuristic)';
432
+ if (
433
+ normalized_style_name &&
434
+ (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")
435
+ )
436
+ return normalized_style_name;
437
+
438
+ if (normalized_style_name && /Heading[ ]?([1-6])(?![0-9])/.test(normalized_style_name))
439
+ return normalized_style_name;
440
+
441
+ return "(heuristic)";
328
442
  }
329
443
 
330
- function _safe_style_name(paragraph: Paragraph, style_cache: any, default_pstyle: any): string | null {
331
- const pPr = findChild(paragraph._element, 'w:pPr');
444
+ function _safe_style_name(
445
+ paragraph: Paragraph,
446
+ style_cache: any,
447
+ default_pstyle: any,
448
+ ): string | null {
449
+ const pPr = findChild(paragraph._element, "w:pPr");
332
450
  let style_id = default_pstyle;
333
451
  if (pPr) {
334
- const pStyle = findChild(pPr, 'w:pStyle');
335
- if (pStyle) style_id = pStyle.getAttribute('w:val') || default_pstyle;
452
+ const pStyle = findChild(pPr, "w:pStyle");
453
+ if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
336
454
  }
337
- return (style_id && style_cache && style_cache[style_id]) ? style_cache[style_id].name : null;
455
+ return style_id && style_cache && style_cache[style_id]
456
+ ? style_cache[style_id].name
457
+ : style_id;
338
458
  }
339
459
 
340
- function _find_owned_end(block_records: _BlockRecord[], heading_indices: number[], current_h_pos: number, current_level: number): number {
341
- for (let next_h_pos = current_h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
460
+ function _find_owned_end(
461
+ block_records: _BlockRecord[],
462
+ heading_indices: number[],
463
+ current_h_pos: number,
464
+ current_level: number,
465
+ ): number {
466
+ for (
467
+ let next_h_pos = current_h_pos + 1;
468
+ next_h_pos < heading_indices.length;
469
+ next_h_pos++
470
+ ) {
342
471
  const next_idx = heading_indices[next_h_pos];
343
- if (_heading_level(block_records[next_idx].item) <= current_level) return next_idx;
472
+ if (_heading_level(block_records[next_idx].item) <= current_level)
473
+ return next_idx;
344
474
  }
345
475
  return block_records.length;
346
476
  }
@@ -351,12 +481,12 @@ function _collect_footnote_ids(owned_blocks: _BlockRecord[]): string[] {
351
481
  for (const rec of owned_blocks) {
352
482
  if (!rec.is_paragraph) continue;
353
483
  for (const event of iter_paragraph_content(rec.item)) {
354
- if (!('type' in event)) continue;
355
- let fn_id = '';
356
- if (event.type === 'footnote') fn_id = `fn-${event.id}`;
357
- else if (event.type === 'endnote') fn_id = `en-${event.id}`;
484
+ if (!("type" in event)) continue;
485
+ let fn_id = "";
486
+ if (event.type === "footnote") fn_id = `fn-${event.id}`;
487
+ else if (event.type === "endnote") fn_id = `en-${event.id}`;
358
488
  else continue;
359
-
489
+
360
490
  if (!seen.has(fn_id)) {
361
491
  seen.add(fn_id);
362
492
  ordered.push(fn_id);
@@ -374,4 +504,4 @@ function _offset_to_page(offset: number, body_page_offsets: number[]): number {
374
504
  else break;
375
505
  }
376
506
  return page;
377
- }
507
+ }
@@ -0,0 +1,130 @@
1
+ import { DocumentObject } from '../docx/bridge.js';
2
+ import { SanitizeReport } from './report.js';
3
+ import * as transforms from './transforms.js';
4
+ import { findAllDescendants } from '../docx/dom.js';
5
+
6
+ export interface FinalizeOptions {
7
+ filename: string;
8
+ sanitize_mode?: 'full' | 'keep-markup' | 'baseline';
9
+ accept_all?: boolean;
10
+ protection_mode?: 'read_only' | 'encrypt' | null;
11
+ password?: string | null;
12
+ author?: string | null;
13
+ export_pdf?: boolean;
14
+ }
15
+
16
+ export interface FinalizeResult {
17
+ reportText: string;
18
+ outBuffer?: Buffer;
19
+ }
20
+
21
+ export async function finalize_document(doc: DocumentObject, options: FinalizeOptions): Promise<FinalizeResult> {
22
+ const report = new SanitizeReport(options.filename, options.sanitize_mode || 'full', options.author || null);
23
+
24
+ if (options.sanitize_mode === 'full') {
25
+ const counts = transforms.count_tracked_changes(doc);
26
+ const total = counts[0] + counts[1] + counts[2];
27
+ report.tracked_changes_found = total;
28
+
29
+ if (total > 0 && !options.accept_all) {
30
+ report.status = 'blocked';
31
+ report.blocked_reason = `Document contains ${total} unresolved tracked changes (${counts[0]} insertions, ${counts[1]} deletions, ${counts[2]} formatting). Review in Word first, or set accept_all=true.`;
32
+ return { reportText: report.render() };
33
+ }
34
+
35
+ if (total > 0) {
36
+ const authors = transforms.get_track_change_authors(doc);
37
+ if (authors.size > 1) {
38
+ report.warnings.push(`Multiple authors detected in tracked changes: ${Array.from(authors).sort().join(', ')}. Review per-change list before sending.`);
39
+ }
40
+ report.add_transform_lines(transforms.accept_all_tracked_changes(doc));
41
+ report.tracked_changes_accepted = total;
42
+ }
43
+
44
+ const commentsSummary = transforms.get_comments_summary(doc);
45
+ report.comments_removed = commentsSummary.total;
46
+ report.add_transform_lines(transforms.remove_all_comments(doc));
47
+ } else if (options.sanitize_mode === 'keep-markup') {
48
+ // Basic support for keep-markup in TS
49
+ const counts = transforms.count_tracked_changes(doc);
50
+ report.tracked_changes_found = counts[0] + counts[1] + counts[2];
51
+ report.tracked_changes_kept = report.tracked_changes_found;
52
+
53
+ if (options.author) {
54
+ report.add_transform_lines(transforms.replace_comment_authors(doc, options.author));
55
+ report.add_transform_lines(transforms.replace_change_authors(doc, options.author));
56
+ }
57
+ }
58
+
59
+ // Common transforms
60
+ report.add_transform_lines(transforms.strip_rsid(doc));
61
+ report.add_transform_lines(transforms.strip_para_ids(doc));
62
+ report.add_transform_lines(transforms.strip_proof_errors(doc));
63
+ report.add_transform_lines(transforms.strip_empty_properties(doc));
64
+ report.add_transform_lines(transforms.strip_hidden_text(doc));
65
+ report.add_transform_lines(transforms.coalesce_runs(doc));
66
+ report.add_transform_lines(transforms.scrub_doc_properties(doc));
67
+ report.add_transform_lines(transforms.scrub_timestamps(doc));
68
+ report.add_transform_lines(transforms.strip_custom_xml(doc));
69
+ report.add_transform_lines(transforms.strip_image_alt_text(doc));
70
+
71
+ const warnings = transforms.audit_hyperlinks(doc);
72
+ for (const w of warnings) report.warnings.push(w);
73
+
74
+ report.add_transform_lines(transforms.normalize_change_dates(doc));
75
+
76
+ // Protection (Settings injection)
77
+ if (options.protection_mode === 'read_only' || options.protection_mode === 'encrypt') {
78
+ if (options.protection_mode === 'encrypt') {
79
+ report.warnings.push("Encryption mode (AES compound wrappers) is strictly unsupported in the zero-dependency Node engine. Falling back to native Word Read-Only lock.");
80
+ }
81
+
82
+ const settingsPart = doc.pkg.getPartByPath('word/settings.xml');
83
+ if (settingsPart) {
84
+ const docEl = settingsPart._element.ownerDocument!;
85
+ let prot = transforms.findDescendantsByLocalName(settingsPart._element, 'documentProtection')[0];
86
+ if (!prot) {
87
+ prot = docEl.createElement('w:documentProtection');
88
+ // Word expects documentProtection to be inserted before elements like w:autoFormatOverride, w:styleLockTheme, etc.
89
+ // For standard robustness without complex XSD enforcement, appendChild generally works.
90
+ settingsPart._element.appendChild(prot);
91
+ }
92
+ prot.setAttribute('w:edit', 'readOnly');
93
+ prot.setAttribute('w:enforcement', '1');
94
+ report.structural_lines.push("Document locked (Read-Only enforcement injected into settings.xml)");
95
+ }
96
+ }
97
+
98
+ if (options.export_pdf) {
99
+ report.warnings.push("PDF export requires the Python/Word COM environment and is skipped in this zero-dependency Node agent.");
100
+ }
101
+
102
+ // Clean up leaked Microsoft namespaces
103
+ for (const part of doc.pkg.parts) {
104
+ // Match the exact injection condition from RedlineEngine constructor
105
+ if (part === doc.part || (part.contentType.includes('wordprocessingml') && part.contentType.endsWith('+xml'))) {
106
+ if (part._element.hasAttribute('xmlns:w16du')) {
107
+ let hasW16du = false;
108
+ // Check root element attributes (excluding the xmlns declaration itself)
109
+ if (Array.from(part._element.attributes || []).some(a => a.name.startsWith('w16du:') && a.name !== 'xmlns:w16du')) {
110
+ hasW16du = true;
111
+ }
112
+ if (!hasW16du) {
113
+ const allNodes = findAllDescendants(part._element, '*');
114
+ for (const n of allNodes) {
115
+ if (n.tagName.startsWith('w16du:') || Array.from(n.attributes || []).some(a => a.name.startsWith('w16du:'))) {
116
+ hasW16du = true;
117
+ break;
118
+ }
119
+ }
120
+ }
121
+ if (!hasW16du) part._element.removeAttribute('xmlns:w16du');
122
+ }
123
+ }
124
+ }
125
+
126
+ if (report.warnings.length > 0) report.status = 'clean_with_warnings';
127
+
128
+ const outBuffer = await doc.save();
129
+ return { reportText: report.render(), outBuffer };
130
+ }