@adeu/core 1.6.7 → 1.6.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +3969 -1859
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +95 -8
- package/dist/index.d.ts +95 -8
- package/dist/index.js +3966 -1859
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/consistency.test.ts +134 -0
- package/src/diff.test.ts +13 -1
- package/src/diff.ts +220 -47
- package/src/docx/bridge.ts +111 -57
- package/src/docx/dom.ts +66 -7
- package/src/domain.test.ts +280 -0
- package/src/domain.ts +264 -10
- package/src/engine.bugs.test.ts +481 -0
- package/src/engine.ts +1346 -192
- package/src/index.ts +7 -8
- package/src/ingest.ts +8 -0
- package/src/markup.ts +160 -53
- package/src/outline.ts +199 -69
- package/src/sanitize/core.ts +130 -0
- package/src/sanitize/report.ts +125 -0
- package/src/sanitize/sanitize.test.ts +237 -0
- package/src/sanitize/transforms.ts +452 -0
- package/src/utils/docx.ts +292 -158
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
import { DocumentObject, Part } from '../docx/bridge.js';
|
|
2
|
+
import { findAllDescendants, findChild, findChildren } from '../docx/dom.js';
|
|
3
|
+
import { extract_comments_data, CommentsManager } from '../comments.js';
|
|
4
|
+
import { RedlineEngine } from '../engine.js';
|
|
5
|
+
|
|
6
|
+
export function findDescendantsByLocalName(element: Element, localName: string): Element[] {
|
|
7
|
+
const result: Element[] = [];
|
|
8
|
+
const all = element.getElementsByTagName('*');
|
|
9
|
+
for (let i = 0; i < all.length; i++) {
|
|
10
|
+
const tag = all[i].tagName;
|
|
11
|
+
if (tag === localName || tag.endsWith(':' + localName)) {
|
|
12
|
+
result.push(all[i]);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
return result;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function coalesce_runs(doc: DocumentObject): string[] {
|
|
19
|
+
let count = 0;
|
|
20
|
+
|
|
21
|
+
function areRunsIdentical(rPr1: Element | null, rPr2: Element | null): boolean {
|
|
22
|
+
const xml1 = rPr1 ? rPr1.toString() : '';
|
|
23
|
+
const xml2 = rPr2 ? rPr2.toString() : '';
|
|
24
|
+
return xml1 === xml2;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function hasSpecialContent(run: Element): boolean {
|
|
28
|
+
const safeTags = ['w:t', 'w:tab', 'w:br', 'w:cr', 'w:delText', 'w:rPr'];
|
|
29
|
+
for (let i = 0; i < run.childNodes.length; i++) {
|
|
30
|
+
const child = run.childNodes[i];
|
|
31
|
+
if (child.nodeType === 1) {
|
|
32
|
+
const tag = (child as Element).tagName;
|
|
33
|
+
if (!safeTags.includes(tag)) return true;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function coalesceContainer(container: Element) {
|
|
40
|
+
const children = Array.from(container.childNodes).filter(n => n.nodeType === 1) as Element[];
|
|
41
|
+
let i = 0;
|
|
42
|
+
while (i < children.length - 1) {
|
|
43
|
+
const curr = children[i];
|
|
44
|
+
const nxt = children[i + 1];
|
|
45
|
+
|
|
46
|
+
if (curr.tagName === 'w:r' && nxt.tagName === 'w:r') {
|
|
47
|
+
if (!hasSpecialContent(curr) && !hasSpecialContent(nxt)) {
|
|
48
|
+
const rPr1 = findChild(curr, 'w:rPr');
|
|
49
|
+
const rPr2 = findChild(nxt, 'w:rPr');
|
|
50
|
+
if (areRunsIdentical(rPr1, rPr2)) {
|
|
51
|
+
let last_t: Element | null = null;
|
|
52
|
+
for (let c = 0; c < curr.childNodes.length; c++) {
|
|
53
|
+
const child = curr.childNodes[c];
|
|
54
|
+
if (child.nodeType === 1 && ((child as Element).tagName === 'w:t' || (child as Element).tagName === 'w:delText')) {
|
|
55
|
+
last_t = child as Element;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const nxtChildren = Array.from(nxt.childNodes).filter(n => n.nodeType === 1) as Element[];
|
|
60
|
+
for (const child of nxtChildren) {
|
|
61
|
+
if (child.tagName === 'w:rPr') continue;
|
|
62
|
+
if ((child.tagName === 'w:t' || child.tagName === 'w:delText') && last_t && last_t.tagName === child.tagName) {
|
|
63
|
+
const t1 = last_t.textContent || '';
|
|
64
|
+
const t2 = child.textContent || '';
|
|
65
|
+
const combined = t1 + t2;
|
|
66
|
+
last_t.textContent = combined;
|
|
67
|
+
if (combined.trim() !== combined) {
|
|
68
|
+
last_t.setAttribute('xml:space', 'preserve');
|
|
69
|
+
}
|
|
70
|
+
} else {
|
|
71
|
+
curr.appendChild(child);
|
|
72
|
+
if (child.tagName === 'w:t' || child.tagName === 'w:delText') {
|
|
73
|
+
last_t = child;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
container.removeChild(nxt);
|
|
78
|
+
children.splice(i + 1, 1);
|
|
79
|
+
count++;
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
if (['w:ins', 'w:del', 'w:hyperlink', 'w:sdt', 'w:smartTag', 'w:fldSimple', 'w:sdtContent'].includes(curr.tagName)) {
|
|
86
|
+
coalesceContainer(curr);
|
|
87
|
+
}
|
|
88
|
+
i++;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (children.length > 0) {
|
|
92
|
+
const last = children[children.length - 1];
|
|
93
|
+
if (['w:ins', 'w:del', 'w:hyperlink', 'w:sdt', 'w:smartTag', 'w:fldSimple', 'w:sdtContent'].includes(last.tagName)) {
|
|
94
|
+
coalesceContainer(last);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const paragraphs = findAllDescendants(doc.element, 'w:p');
|
|
100
|
+
for (const p of paragraphs) coalesceContainer(p);
|
|
101
|
+
|
|
102
|
+
return count ? [`Adjacent identical runs coalesced: ${count}`] : [];
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export function strip_rsid(doc: DocumentObject): string[] {
|
|
106
|
+
let count = 0;
|
|
107
|
+
const rsidAttrs = ['w:rsidR', 'w:rsidRPr', 'w:rsidRDefault', 'w:rsidP', 'w:rsidDel', 'w:rsidSect', 'w:rsidTr'];
|
|
108
|
+
|
|
109
|
+
const all = doc.element.getElementsByTagName('*');
|
|
110
|
+
for (let i = 0; i < all.length; i++) {
|
|
111
|
+
for (const attr of rsidAttrs) {
|
|
112
|
+
if (all[i].hasAttribute(attr)) {
|
|
113
|
+
all[i].removeAttribute(attr);
|
|
114
|
+
count++;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const rsidsElements = findAllDescendants(doc.element, 'w:rsids');
|
|
120
|
+
for (const el of rsidsElements) {
|
|
121
|
+
if (el.parentNode) {
|
|
122
|
+
el.parentNode.removeChild(el);
|
|
123
|
+
count++;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return count ? [`rsid attributes: ${count} removed`] : [];
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
export function strip_para_ids(doc: DocumentObject): string[] {
|
|
131
|
+
let count = 0;
|
|
132
|
+
const attrs = ['w14:paraId', 'w14:textId'];
|
|
133
|
+
const all = doc.element.getElementsByTagName('*');
|
|
134
|
+
for (let i = 0; i < all.length; i++) {
|
|
135
|
+
for (const attr of attrs) {
|
|
136
|
+
if (all[i].hasAttribute(attr)) {
|
|
137
|
+
all[i].removeAttribute(attr);
|
|
138
|
+
count++;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
return count ? [`Paragraph/text IDs: ${count} removed`] : [];
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
export function strip_proof_errors(doc: DocumentObject): string[] {
|
|
146
|
+
const elements = findAllDescendants(doc.element, 'w:proofErr');
|
|
147
|
+
elements.forEach(el => el.parentNode?.removeChild(el));
|
|
148
|
+
return elements.length ? [`Spell check markers: ${elements.length} removed`] : [];
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
export function strip_empty_properties(doc: DocumentObject): string[] {
|
|
152
|
+
let count = 0;
|
|
153
|
+
for (const tag of ['w:rPr', 'w:pPr']) {
|
|
154
|
+
const elements = findAllDescendants(doc.element, tag);
|
|
155
|
+
for (const el of elements) {
|
|
156
|
+
if (el.childNodes.length === 0 || (el.childNodes.length === 1 && el.childNodes[0].nodeType === 3 && !el.childNodes[0].textContent?.trim())) {
|
|
157
|
+
el.parentNode?.removeChild(el);
|
|
158
|
+
count++;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return count ? [`Empty property elements: ${count} removed`] : [];
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
export function strip_hidden_text(doc: DocumentObject): string[] {
|
|
166
|
+
let count = 0;
|
|
167
|
+
const elements = findAllDescendants(doc.element, 'w:rPr');
|
|
168
|
+
for (const rPr of elements) {
|
|
169
|
+
if (findChild(rPr, 'w:vanish') || findChild(rPr, 'w:webHidden')) {
|
|
170
|
+
const run = rPr.parentNode as Element;
|
|
171
|
+
if (run && run.tagName === 'w:r' && run.parentNode) {
|
|
172
|
+
run.parentNode.removeChild(run);
|
|
173
|
+
count++;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return count ? [`Hidden text runs: ${count} removed`] : [];
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
export function count_tracked_changes(doc: DocumentObject): [number, number, number] {
|
|
181
|
+
const ins = findAllDescendants(doc.element, 'w:ins').length;
|
|
182
|
+
const del = findAllDescendants(doc.element, 'w:del').length;
|
|
183
|
+
const fmt = findAllDescendants(doc.element, 'w:rPrChange').length +
|
|
184
|
+
findAllDescendants(doc.element, 'w:pPrChange').length +
|
|
185
|
+
findAllDescendants(doc.element, 'w:sectPrChange').length;
|
|
186
|
+
return [ins, del, fmt];
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
export function get_track_change_authors(doc: DocumentObject): Set<string> {
|
|
190
|
+
const authors = new Set<string>();
|
|
191
|
+
for (const tag of ['w:ins', 'w:del', 'w:rPrChange', 'w:pPrChange', 'w:sectPrChange']) {
|
|
192
|
+
for (const el of findAllDescendants(doc.element, tag)) {
|
|
193
|
+
const author = el.getAttribute('w:author');
|
|
194
|
+
if (author) authors.add(author);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return authors;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function _getElementText(el: Element): string {
|
|
201
|
+
const texts: string[] = [];
|
|
202
|
+
const ts = findAllDescendants(el, 'w:t');
|
|
203
|
+
for (const t of ts) if (t.textContent) texts.push(t.textContent);
|
|
204
|
+
const dts = findAllDescendants(el, 'w:delText');
|
|
205
|
+
for (const dt of dts) if (dt.textContent) texts.push(dt.textContent);
|
|
206
|
+
return texts.join('');
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
export function _truncate(text: string, maxLen: number = 60): string {
|
|
210
|
+
const clean = text.replace(/\n/g, ' ').trim();
|
|
211
|
+
if (clean.length <= maxLen) return clean;
|
|
212
|
+
return clean.substring(0, maxLen - 3) + "...";
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
export function accept_all_tracked_changes(doc: DocumentObject): string[] {
|
|
216
|
+
const lines: string[] = [];
|
|
217
|
+
const insEls = findAllDescendants(doc.element, 'w:ins');
|
|
218
|
+
const delEls = findAllDescendants(doc.element, 'w:del');
|
|
219
|
+
|
|
220
|
+
for (const ins of insEls) {
|
|
221
|
+
const text = _getElementText(ins).trim();
|
|
222
|
+
if (text) lines.push(` Accepted insertion: "${_truncate(text, 60)}"`);
|
|
223
|
+
}
|
|
224
|
+
for (const del of delEls) {
|
|
225
|
+
const text = _getElementText(del).trim();
|
|
226
|
+
if (text) lines.push(` Accepted deletion of: "${_truncate(text, 60)}"`);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
const engine = new RedlineEngine(doc);
|
|
230
|
+
engine.accept_all_revisions();
|
|
231
|
+
|
|
232
|
+
for (const tag of ['w:rPrChange', 'w:pPrChange', 'w:sectPrChange']) {
|
|
233
|
+
for (const el of findAllDescendants(doc.element, tag)) {
|
|
234
|
+
el.parentNode?.removeChild(el);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
const total = insEls.length + delEls.length;
|
|
239
|
+
if (total) {
|
|
240
|
+
return [`Tracked changes auto-accepted: ${total}`].concat(lines);
|
|
241
|
+
}
|
|
242
|
+
return [];
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
export function get_comments_summary(doc: DocumentObject): any {
|
|
246
|
+
const data = extract_comments_data(doc.pkg);
|
|
247
|
+
const comments = [];
|
|
248
|
+
let openCount = 0;
|
|
249
|
+
let resolvedCount = 0;
|
|
250
|
+
|
|
251
|
+
for (const [cId, info] of Object.entries(data)) {
|
|
252
|
+
if (info.resolved) resolvedCount++;
|
|
253
|
+
else openCount++;
|
|
254
|
+
comments.push({ id: cId, ...info });
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return { total: comments.length, open: openCount, resolved: resolvedCount, comments };
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
export function remove_all_comments(doc: DocumentObject): string[] {
|
|
261
|
+
const data = extract_comments_data(doc.pkg);
|
|
262
|
+
const keys = Object.keys(data);
|
|
263
|
+
if (keys.length === 0) return [];
|
|
264
|
+
|
|
265
|
+
const lines: string[] = [];
|
|
266
|
+
const cm = new CommentsManager(doc);
|
|
267
|
+
|
|
268
|
+
for (const [cId, info] of Object.entries(data)) {
|
|
269
|
+
const status = info.resolved ? "[Resolved]" : "[Open]";
|
|
270
|
+
lines.push(` ${status} "${_truncate(info.text || '', 60)}" (${info.author || 'Unknown'})`);
|
|
271
|
+
cm.deleteComment(cId);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
for (const tag of ['w:commentRangeStart', 'w:commentRangeEnd', 'w:commentReference']) {
|
|
275
|
+
for (const el of findAllDescendants(doc.element, tag)) {
|
|
276
|
+
el.parentNode?.removeChild(el);
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
const resolvedCount = Object.values(data).filter(c => c.resolved).length;
|
|
281
|
+
const openCount = Object.values(data).filter(c => !c.resolved).length;
|
|
282
|
+
return [`Comments removed: ${keys.length} (${resolvedCount} resolved, ${openCount} open)`].concat(lines);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
export function replace_comment_authors(doc: DocumentObject, newAuthor: string): string[] {
|
|
286
|
+
const cm = new CommentsManager(doc);
|
|
287
|
+
if (!cm.commentsPart) return [];
|
|
288
|
+
|
|
289
|
+
const original = new Set<string>();
|
|
290
|
+
const comments = findAllDescendants(cm.commentsPart._element, 'w:comment');
|
|
291
|
+
for (const c of comments) {
|
|
292
|
+
const author = c.getAttribute('w:author');
|
|
293
|
+
if (author) {
|
|
294
|
+
original.add(author);
|
|
295
|
+
c.setAttribute('w:author', newAuthor);
|
|
296
|
+
}
|
|
297
|
+
if (c.hasAttribute('w:initials')) {
|
|
298
|
+
const initials = newAuthor.split(' ').filter(Boolean).map(p => p[0]).join('').toUpperCase();
|
|
299
|
+
c.setAttribute('w:initials', initials);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
return original.size ? [`Comment authors replaced: ${Array.from(original).sort().join(', ')} → "${newAuthor}"`] : [];
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
export function replace_change_authors(doc: DocumentObject, newAuthor: string): string[] {
|
|
306
|
+
const original = new Set<string>();
|
|
307
|
+
for (const tag of ['w:ins', 'w:del', 'w:rPrChange', 'w:pPrChange']) {
|
|
308
|
+
for (const el of findAllDescendants(doc.element, tag)) {
|
|
309
|
+
const author = el.getAttribute('w:author');
|
|
310
|
+
if (author) {
|
|
311
|
+
original.add(author);
|
|
312
|
+
el.setAttribute('w:author', newAuthor);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
return original.size ? [`Track change authors replaced: ${Array.from(original).sort().join(', ')} → "${newAuthor}"`] : [];
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
export function normalize_change_dates(doc: DocumentObject): string[] {
|
|
320
|
+
let count = 0;
|
|
321
|
+
const fixed = "2025-01-01T00:00:00Z";
|
|
322
|
+
for (const tag of ['w:ins', 'w:del', 'w:rPrChange', 'w:pPrChange']) {
|
|
323
|
+
for (const el of findAllDescendants(doc.element, tag)) {
|
|
324
|
+
if (el.hasAttribute('w:date')) {
|
|
325
|
+
el.setAttribute('w:date', fixed);
|
|
326
|
+
count++;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
return count ? [`Track change timestamps: ${count} normalized`] : [];
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
export function scrub_doc_properties(doc: DocumentObject): string[] {
|
|
334
|
+
const lines: string[] = [];
|
|
335
|
+
const corePart = doc.pkg.getPartByPath('docProps/core.xml');
|
|
336
|
+
if (corePart) {
|
|
337
|
+
const creators = findDescendantsByLocalName(corePart._element, 'creator');
|
|
338
|
+
creators.forEach(c => { if (c.textContent) { lines.push(`Author: ${c.textContent}`); c.textContent = ""; }});
|
|
339
|
+
|
|
340
|
+
const modifiers = findDescendantsByLocalName(corePart._element, 'lastModifiedBy');
|
|
341
|
+
modifiers.forEach(c => { if (c.textContent) { lines.push(`Last modified by: ${c.textContent}`); c.textContent = ""; }});
|
|
342
|
+
|
|
343
|
+
const revisions = findDescendantsByLocalName(corePart._element, 'revision');
|
|
344
|
+
revisions.forEach(c => { if (c.textContent && parseInt(c.textContent) > 1) { lines.push(`Revision count: ${c.textContent} → 1`); c.textContent = "1"; }});
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
const appPart = doc.pkg.getPartByPath('docProps/app.xml');
|
|
348
|
+
if (appPart) {
|
|
349
|
+
const docEl = appPart._element;
|
|
350
|
+
const intFields = ["TotalTime", "Words", "Characters", "Paragraphs", "Lines", "CharactersWithSpaces"];
|
|
351
|
+
for (const f of intFields) {
|
|
352
|
+
findDescendantsByLocalName(docEl, f).forEach(el => {
|
|
353
|
+
if (el.textContent && el.textContent !== "0") {
|
|
354
|
+
if (f === "TotalTime") lines.push(`Total editing time: ${el.textContent} minutes`);
|
|
355
|
+
el.textContent = "0";
|
|
356
|
+
}
|
|
357
|
+
});
|
|
358
|
+
}
|
|
359
|
+
const strFields = ["Template", "Manager", "Company"];
|
|
360
|
+
for (const f of strFields) {
|
|
361
|
+
findDescendantsByLocalName(docEl, f).forEach(el => {
|
|
362
|
+
if (el.textContent) {
|
|
363
|
+
lines.push(`${f}: ${el.textContent}`);
|
|
364
|
+
el.textContent = "";
|
|
365
|
+
}
|
|
366
|
+
});
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
return lines.length ? ["Metadata scrubbed:", ...lines.map(l => ` ${l}`)] : [];
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
export function scrub_timestamps(doc: DocumentObject): string[] {
|
|
374
|
+
let modified = false;
|
|
375
|
+
const epoch = "1970-01-01T00:00:00Z";
|
|
376
|
+
const corePart = doc.pkg.getPartByPath('docProps/core.xml');
|
|
377
|
+
if (corePart) {
|
|
378
|
+
for (const tag of ['created', 'modified', 'lastPrinted']) {
|
|
379
|
+
findDescendantsByLocalName(corePart._element, tag).forEach(el => {
|
|
380
|
+
if (el.textContent && el.textContent !== epoch) {
|
|
381
|
+
el.textContent = epoch;
|
|
382
|
+
modified = true;
|
|
383
|
+
}
|
|
384
|
+
});
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
return modified ? ["Timestamps normalized to epoch"] : [];
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
export function strip_custom_xml(doc: DocumentObject): string[] {
|
|
391
|
+
const customParts = doc.pkg.parts.filter(p => p.partname.includes('/customXml'));
|
|
392
|
+
if (customParts.length === 0) return [];
|
|
393
|
+
|
|
394
|
+
const partnames = new Set(customParts.map(p => p.partname));
|
|
395
|
+
doc.pkg.parts = doc.pkg.parts.filter(p => !partnames.has(p.partname));
|
|
396
|
+
|
|
397
|
+
const removeRelationsTo = (relsPart: Part) => {
|
|
398
|
+
const toRemove: Element[] = [];
|
|
399
|
+
for (const rel of findAllDescendants(relsPart._element, 'Relationship')) {
|
|
400
|
+
const target = rel.getAttribute('Target');
|
|
401
|
+
if (target && target.includes('customXml')) toRemove.push(rel);
|
|
402
|
+
}
|
|
403
|
+
toRemove.forEach(r => r.parentNode?.removeChild(r));
|
|
404
|
+
};
|
|
405
|
+
|
|
406
|
+
const rootRels = doc.pkg.getPartByPath('_rels/.rels');
|
|
407
|
+
if (rootRels) removeRelationsTo(rootRels);
|
|
408
|
+
|
|
409
|
+
const docRels = doc.pkg.getOrCreateRelsPart(doc.part.partname);
|
|
410
|
+
if (docRels) removeRelationsTo(docRels);
|
|
411
|
+
|
|
412
|
+
for (const sdtPr of findAllDescendants(doc.element, 'w:sdtPr')) {
|
|
413
|
+
findChildren(sdtPr, 'w:dataBinding').forEach(b => sdtPr.removeChild(b));
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
return [`Custom XML parts: ${customParts.length} removed`];
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
export function strip_image_alt_text(doc: DocumentObject): string[] {
|
|
420
|
+
let count = 0;
|
|
421
|
+
for (const docPr of findDescendantsByLocalName(doc.element, 'docPr')) {
|
|
422
|
+
const descr = docPr.getAttribute('descr');
|
|
423
|
+
if (descr) {
|
|
424
|
+
const isShort = descr.length < 10;
|
|
425
|
+
const isFile = descr.includes('.') && descr.length < 60;
|
|
426
|
+
if (isShort || isFile) {
|
|
427
|
+
docPr.removeAttribute('descr');
|
|
428
|
+
count++;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
return count ? [`Image alt text: ${count} auto-generated descriptions removed`] : [];
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
export function audit_hyperlinks(doc: DocumentObject): string[] {
|
|
436
|
+
const internal = ["sharepoint.com", "onedrive.com", ".internal", "intranet", "localhost", "10.", "192.168.", "172.16."];
|
|
437
|
+
const warnings: string[] = [];
|
|
438
|
+
|
|
439
|
+
const docRels = doc.pkg.getOrCreateRelsPart(doc.part.partname);
|
|
440
|
+
for (const rel of findAllDescendants(docRels._element, 'Relationship')) {
|
|
441
|
+
if (rel.getAttribute('TargetMode') === 'External') {
|
|
442
|
+
const url = rel.getAttribute('Target') || '';
|
|
443
|
+
for (const pattern of internal) {
|
|
444
|
+
if (url.toLowerCase().includes(pattern.toLowerCase())) {
|
|
445
|
+
warnings.push(`Hyperlink targets internal URL: ${_truncate(url, 80)}`);
|
|
446
|
+
break;
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
return warnings;
|
|
452
|
+
}
|