@adeu/core 1.6.8 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -0
- package/dist/index.cjs +1833 -540
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +75 -1
- package/dist/index.d.ts +75 -1
- package/dist/index.js +1832 -540
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/consistency.test.ts +134 -0
- package/src/diff.test.ts +13 -1
- package/src/diff.ts +189 -70
- package/src/docx/bridge.ts +99 -57
- package/src/docx/dom.ts +66 -7
- package/src/engine.bugs.test.ts +481 -0
- package/src/engine.ts +1346 -192
- package/src/index.ts +1 -1
- package/src/markup.ts +160 -53
- package/src/outline.ts +199 -69
- package/src/sanitize/core.ts +26 -0
- package/src/sanitize/report.ts +1 -1
- package/src/sanitize/sanitize.test.ts +47 -2
- package/src/sanitize/transforms.ts +87 -0
- package/src/utils/docx.ts +282 -157
package/src/docx/bridge.ts
CHANGED
|
@@ -1,41 +1,54 @@
|
|
|
1
|
-
import JSZip from
|
|
2
|
-
import {
|
|
1
|
+
import JSZip from "jszip";
|
|
2
|
+
import {
|
|
3
|
+
parseXml,
|
|
4
|
+
findChild,
|
|
5
|
+
findAllDescendants,
|
|
6
|
+
serializeXml,
|
|
7
|
+
} from "./dom.js";
|
|
3
8
|
|
|
4
9
|
export class Relationship {
|
|
5
10
|
constructor(
|
|
6
11
|
public id: string,
|
|
7
12
|
public type: string,
|
|
8
13
|
public target: string,
|
|
9
|
-
public isExternal: boolean
|
|
14
|
+
public isExternal: boolean,
|
|
10
15
|
) {}
|
|
11
16
|
}
|
|
12
17
|
|
|
13
18
|
export class Part {
|
|
14
19
|
public rels: Map<string, Relationship> = new Map();
|
|
15
20
|
public _element: Element;
|
|
16
|
-
|
|
21
|
+
public package?: DocxPackage;
|
|
17
22
|
constructor(
|
|
18
23
|
public partname: string,
|
|
19
24
|
public blob: string,
|
|
20
25
|
element: Element,
|
|
21
|
-
public contentType: string
|
|
26
|
+
public contentType: string,
|
|
22
27
|
) {
|
|
23
28
|
this._element = element;
|
|
24
29
|
}
|
|
25
30
|
|
|
26
|
-
public addRelationship(
|
|
31
|
+
public addRelationship(
|
|
32
|
+
id: string,
|
|
33
|
+
type: string,
|
|
34
|
+
target: string,
|
|
35
|
+
isExternal: boolean = false,
|
|
36
|
+
) {
|
|
27
37
|
this.rels.set(id, new Relationship(id, type, target, isExternal));
|
|
28
|
-
|
|
38
|
+
|
|
29
39
|
// Directly append the relationship element to the document structure
|
|
30
|
-
if (this.partname.endsWith(
|
|
40
|
+
if (this.partname.endsWith(".rels")) {
|
|
31
41
|
const doc = this._element.ownerDocument;
|
|
32
42
|
if (doc) {
|
|
33
43
|
// Use strict namespace to ensure it parses successfully on reload
|
|
34
|
-
const relEl = doc.createElementNS(
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
44
|
+
const relEl = doc.createElementNS(
|
|
45
|
+
"http://schemas.openxmlformats.org/package/2006/relationships",
|
|
46
|
+
"Relationship",
|
|
47
|
+
);
|
|
48
|
+
relEl.setAttribute("Id", id);
|
|
49
|
+
relEl.setAttribute("Type", type);
|
|
50
|
+
relEl.setAttribute("Target", target);
|
|
51
|
+
if (isExternal) relEl.setAttribute("TargetMode", "External");
|
|
39
52
|
this._element.appendChild(relEl);
|
|
40
53
|
}
|
|
41
54
|
}
|
|
@@ -50,32 +63,44 @@ export class DocxPackage {
|
|
|
50
63
|
|
|
51
64
|
public getPartByPath(path: string): Part | undefined {
|
|
52
65
|
// Strip leading slash for jszip compat
|
|
53
|
-
const searchPath = path.startsWith(
|
|
54
|
-
return this.parts.find(
|
|
66
|
+
const searchPath = path.startsWith("/") ? path.substring(1) : path;
|
|
67
|
+
return this.parts.find(
|
|
68
|
+
(p) => p.partname === searchPath || p.partname === "/" + searchPath,
|
|
69
|
+
);
|
|
55
70
|
}
|
|
56
71
|
|
|
57
72
|
public nextPartname(pattern: string): string {
|
|
58
73
|
let i = 1;
|
|
59
74
|
while (true) {
|
|
60
|
-
const candidate = pattern.replace(
|
|
75
|
+
const candidate = pattern.replace("%d", i === 1 ? "" : i.toString());
|
|
61
76
|
if (!this.getPartByPath(candidate)) return candidate;
|
|
62
77
|
i++;
|
|
63
78
|
}
|
|
64
79
|
}
|
|
65
80
|
|
|
66
|
-
public addPart(
|
|
81
|
+
public addPart(
|
|
82
|
+
partname: string,
|
|
83
|
+
contentType: string,
|
|
84
|
+
xmlString: string,
|
|
85
|
+
): Part {
|
|
67
86
|
const doc = parseXml(xmlString);
|
|
68
|
-
const part = new Part(
|
|
87
|
+
const part = new Part(
|
|
88
|
+
partname,
|
|
89
|
+
xmlString,
|
|
90
|
+
doc.documentElement,
|
|
91
|
+
contentType,
|
|
92
|
+
);
|
|
93
|
+
part.package = this;
|
|
69
94
|
this.parts.push(part);
|
|
70
95
|
|
|
71
96
|
// Update [Content_Types].xml
|
|
72
|
-
const ctPart = this.getPartByPath(
|
|
97
|
+
const ctPart = this.getPartByPath("[Content_Types].xml");
|
|
73
98
|
if (ctPart) {
|
|
74
99
|
const docCT = ctPart._element.ownerDocument;
|
|
75
100
|
if (docCT) {
|
|
76
|
-
const override = docCT.createElement(
|
|
77
|
-
override.setAttribute(
|
|
78
|
-
override.setAttribute(
|
|
101
|
+
const override = docCT.createElement("Override");
|
|
102
|
+
override.setAttribute("PartName", partname);
|
|
103
|
+
override.setAttribute("ContentType", contentType);
|
|
79
104
|
ctPart._element.appendChild(override);
|
|
80
105
|
}
|
|
81
106
|
}
|
|
@@ -84,14 +109,18 @@ export class DocxPackage {
|
|
|
84
109
|
|
|
85
110
|
public getOrCreateRelsPart(sourcePartname: string): Part {
|
|
86
111
|
// e.g., /word/document.xml -> /word/_rels/document.xml.rels
|
|
87
|
-
const parts = sourcePartname.split(
|
|
112
|
+
const parts = sourcePartname.split("/");
|
|
88
113
|
const file = parts.pop();
|
|
89
|
-
const relsPath = parts.join(
|
|
90
|
-
|
|
114
|
+
const relsPath = parts.join("/") + "/_rels/" + file + ".rels";
|
|
115
|
+
|
|
91
116
|
let relsPart = this.getPartByPath(relsPath);
|
|
92
117
|
if (!relsPart) {
|
|
93
118
|
const xml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"></Relationships>`;
|
|
94
|
-
relsPart = this.addPart(
|
|
119
|
+
relsPart = this.addPart(
|
|
120
|
+
relsPath,
|
|
121
|
+
"application/vnd.openxmlformats-package.relationships+xml",
|
|
122
|
+
xml,
|
|
123
|
+
);
|
|
95
124
|
}
|
|
96
125
|
return relsPart;
|
|
97
126
|
}
|
|
@@ -99,63 +128,75 @@ export class DocxPackage {
|
|
|
99
128
|
|
|
100
129
|
export class DocumentObject {
|
|
101
130
|
public part: Part;
|
|
102
|
-
public settings: { oddAndEvenPagesHeaderFooter: boolean } = {
|
|
131
|
+
public settings: { oddAndEvenPagesHeaderFooter: boolean } = {
|
|
132
|
+
oddAndEvenPagesHeaderFooter: false,
|
|
133
|
+
};
|
|
103
134
|
// Simplification for the TS port: sections hold header/footer refs
|
|
104
|
-
public sections: any[] = [];
|
|
135
|
+
public sections: any[] = [];
|
|
105
136
|
|
|
106
|
-
constructor(
|
|
137
|
+
constructor(
|
|
138
|
+
public pkg: DocxPackage,
|
|
139
|
+
part: Part,
|
|
140
|
+
) {
|
|
107
141
|
this.part = part;
|
|
108
142
|
}
|
|
109
143
|
|
|
110
144
|
public get element(): Element {
|
|
111
|
-
return findChild(this.part._element,
|
|
145
|
+
return findChild(this.part._element, "w:body") || this.part._element;
|
|
112
146
|
}
|
|
113
147
|
|
|
114
148
|
/**
|
|
115
149
|
* Main entrypoint for loading a DOCX buffer into the DOM wrapper.
|
|
116
150
|
*/
|
|
117
|
-
public static async load(
|
|
151
|
+
public static async load(
|
|
152
|
+
buffer: Buffer | ArrayBuffer,
|
|
153
|
+
): Promise<DocumentObject> {
|
|
118
154
|
const zip = await JSZip.loadAsync(buffer);
|
|
119
155
|
const pkg = new DocxPackage(zip);
|
|
120
156
|
|
|
121
157
|
// 1. Load Content Types
|
|
122
|
-
const ctFile = zip.file(
|
|
158
|
+
const ctFile = zip.file("[Content_Types].xml");
|
|
123
159
|
let contentTypes: Record<string, string> = {};
|
|
124
160
|
if (ctFile) {
|
|
125
|
-
const ctXml = parseXml(await ctFile.async(
|
|
126
|
-
const overrides = findAllDescendants(ctXml.documentElement,
|
|
161
|
+
const ctXml = parseXml(await ctFile.async("text"));
|
|
162
|
+
const overrides = findAllDescendants(ctXml.documentElement, "Override");
|
|
127
163
|
for (const override of overrides) {
|
|
128
|
-
contentTypes[override.getAttribute(
|
|
164
|
+
contentTypes[override.getAttribute("PartName") || ""] =
|
|
165
|
+
override.getAttribute("ContentType") || "";
|
|
129
166
|
}
|
|
130
167
|
}
|
|
131
168
|
|
|
132
169
|
// 2. Pre-load all XML parts to allow synchronous traversal later
|
|
133
170
|
for (const [path, file] of Object.entries(zip.files)) {
|
|
134
|
-
if (!file.dir && (path.endsWith(
|
|
135
|
-
const text = await file.async(
|
|
171
|
+
if (!file.dir && (path.endsWith(".xml") || path.endsWith(".rels"))) {
|
|
172
|
+
const text = await file.async("text");
|
|
136
173
|
const doc = parseXml(text);
|
|
137
|
-
const cType = contentTypes[
|
|
138
|
-
const part = new Part(
|
|
174
|
+
const cType = contentTypes["/" + path] || "application/xml";
|
|
175
|
+
const part = new Part("/" + path, text, doc.documentElement, cType);
|
|
176
|
+
part.package = pkg;
|
|
139
177
|
pkg.parts.push(part);
|
|
140
178
|
}
|
|
141
179
|
}
|
|
142
180
|
|
|
143
181
|
// 3. Resolve Relationships for the main document
|
|
144
|
-
const mainPart = pkg.getPartByPath(
|
|
145
|
-
if (!mainPart) throw new Error(
|
|
182
|
+
const mainPart = pkg.getPartByPath("word/document.xml");
|
|
183
|
+
if (!mainPart) throw new Error("Invalid DOCX: Missing word/document.xml");
|
|
146
184
|
pkg.mainDocumentPart = mainPart;
|
|
147
185
|
|
|
148
|
-
const relsPart = pkg.getPartByPath(
|
|
186
|
+
const relsPart = pkg.getPartByPath("word/_rels/document.xml.rels");
|
|
149
187
|
if (relsPart) {
|
|
150
|
-
const relElements = findAllDescendants(relsPart._element,
|
|
188
|
+
const relElements = findAllDescendants(relsPart._element, "Relationship");
|
|
151
189
|
for (const rel of relElements) {
|
|
152
|
-
const rId = rel.getAttribute(
|
|
153
|
-
const target = rel.getAttribute(
|
|
154
|
-
const type = rel.getAttribute(
|
|
155
|
-
const targetMode = rel.getAttribute(
|
|
156
|
-
|
|
190
|
+
const rId = rel.getAttribute("Id");
|
|
191
|
+
const target = rel.getAttribute("Target");
|
|
192
|
+
const type = rel.getAttribute("Type");
|
|
193
|
+
const targetMode = rel.getAttribute("TargetMode");
|
|
194
|
+
|
|
157
195
|
if (rId && target && type) {
|
|
158
|
-
mainPart.rels.set(
|
|
196
|
+
mainPart.rels.set(
|
|
197
|
+
rId,
|
|
198
|
+
new Relationship(rId, type, target, targetMode === "External"),
|
|
199
|
+
);
|
|
159
200
|
}
|
|
160
201
|
}
|
|
161
202
|
}
|
|
@@ -167,11 +208,11 @@ export class DocumentObject {
|
|
|
167
208
|
let rId = 1;
|
|
168
209
|
while (this.part.rels.has(`rId${rId}`)) rId++;
|
|
169
210
|
const id = `rId${rId}`;
|
|
170
|
-
|
|
211
|
+
|
|
171
212
|
// In DOCX, targets in .rels are relative to the source part's directory.
|
|
172
213
|
// /word/document.xml relating to /word/comments.xml -> target is "comments.xml"
|
|
173
|
-
const target = part.partname.split(
|
|
174
|
-
|
|
214
|
+
const target = part.partname.split("/").pop()!;
|
|
215
|
+
|
|
175
216
|
this.part.rels.set(id, new Relationship(id, relType, target, false));
|
|
176
217
|
const relsPart = this.pkg.getOrCreateRelsPart(this.part.partname);
|
|
177
218
|
relsPart.addRelationship(id, relType, target, false);
|
|
@@ -181,7 +222,7 @@ export class DocumentObject {
|
|
|
181
222
|
let rId = 1;
|
|
182
223
|
while (this.part.rels.has(`rId${rId}`)) rId++;
|
|
183
224
|
const id = `rId${rId}`;
|
|
184
|
-
|
|
225
|
+
|
|
185
226
|
this.part.rels.set(id, new Relationship(id, relType, target, true));
|
|
186
227
|
const relsPart = this.pkg.getOrCreateRelsPart(this.part.partname);
|
|
187
228
|
relsPart.addRelationship(id, relType, target, true);
|
|
@@ -191,11 +232,12 @@ export class DocumentObject {
|
|
|
191
232
|
public async save(): Promise<Buffer> {
|
|
192
233
|
for (const part of this.pkg.parts) {
|
|
193
234
|
let xmlStr = serializeXml(part._element.ownerDocument || part._element);
|
|
194
|
-
if (!xmlStr.startsWith(
|
|
195
|
-
xmlStr =
|
|
235
|
+
if (!xmlStr.startsWith("<?xml")) {
|
|
236
|
+
xmlStr =
|
|
237
|
+
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n' + xmlStr;
|
|
196
238
|
}
|
|
197
239
|
this.pkg.zip.file(part.partname.substring(1), xmlStr); // Strip leading slash for JSZip
|
|
198
240
|
}
|
|
199
|
-
return this.pkg.zip.generateAsync({ type:
|
|
241
|
+
return this.pkg.zip.generateAsync({ type: "nodebuffer" });
|
|
200
242
|
}
|
|
201
|
-
}
|
|
243
|
+
}
|
package/src/docx/dom.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { DOMParser, XMLSerializer } from
|
|
1
|
+
import { DOMParser, XMLSerializer } from "@xmldom/xmldom";
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Simulates docx.oxml.ns.qn. In xmldom, namespaces are preserved in tagName.
|
|
@@ -11,7 +11,10 @@ export const qn = (name: string) => name;
|
|
|
11
11
|
export function findChild(element: Element, tagName: string): Element | null {
|
|
12
12
|
for (let i = 0; i < element.childNodes.length; i++) {
|
|
13
13
|
const child = element.childNodes[i];
|
|
14
|
-
if (
|
|
14
|
+
if (
|
|
15
|
+
child.nodeType === 1 /* ELEMENT_NODE */ &&
|
|
16
|
+
(child as Element).tagName === tagName
|
|
17
|
+
) {
|
|
15
18
|
return child as Element;
|
|
16
19
|
}
|
|
17
20
|
}
|
|
@@ -35,7 +38,10 @@ export function findChildren(element: Element, tagName: string): Element[] {
|
|
|
35
38
|
/**
|
|
36
39
|
* Simulates lxml element.findall(".//w:tag") - searches ALL descendants.
|
|
37
40
|
*/
|
|
38
|
-
export function findAllDescendants(
|
|
41
|
+
export function findAllDescendants(
|
|
42
|
+
element: Element,
|
|
43
|
+
tagName: string,
|
|
44
|
+
): Element[] {
|
|
39
45
|
return Array.from(element.getElementsByTagName(tagName));
|
|
40
46
|
}
|
|
41
47
|
|
|
@@ -43,12 +49,65 @@ export function findAllDescendants(element: Element, tagName: string): Element[]
|
|
|
43
49
|
* Parses raw XML strings into xmldom Documents.
|
|
44
50
|
*/
|
|
45
51
|
export function parseXml(xmlString: string): Document {
|
|
46
|
-
return new DOMParser().parseFromString(xmlString,
|
|
52
|
+
return new DOMParser().parseFromString(xmlString, "text/xml");
|
|
47
53
|
}
|
|
48
54
|
|
|
49
55
|
/**
|
|
50
|
-
* Serializes an xmldom Document or Element back to a string
|
|
56
|
+
* Serializes an xmldom Document or Element back to a string,
|
|
57
|
+
* enforcing deterministic attribute ordering on the root element.
|
|
51
58
|
*/
|
|
52
59
|
export function serializeXml(node: Node): string {
|
|
53
|
-
|
|
54
|
-
|
|
60
|
+
let xml = new XMLSerializer().serializeToString(node);
|
|
61
|
+
|
|
62
|
+
// BUG-11: Deterministic namespace ordering on root elements.
|
|
63
|
+
const rootTagRegex = /<([a-zA-Z0-9_:]+)(\s+[^>]+?)(>|\/>)/;
|
|
64
|
+
const match = rootTagRegex.exec(xml);
|
|
65
|
+
|
|
66
|
+
if (match && !match[1].startsWith("?")) {
|
|
67
|
+
const index = match.index;
|
|
68
|
+
const textBefore = xml.substring(0, index);
|
|
69
|
+
|
|
70
|
+
// Ensure this is the absolute root tag (only <?xml...?> allowed before it)
|
|
71
|
+
const isRoot =
|
|
72
|
+
!textBefore.includes("<") ||
|
|
73
|
+
(textBefore.trim().startsWith("<?xml") &&
|
|
74
|
+
(textBefore.match(/</g) || []).length === 1);
|
|
75
|
+
|
|
76
|
+
if (isRoot) {
|
|
77
|
+
const fullTag = match[0];
|
|
78
|
+
const elemStart = `<${match[1]}`;
|
|
79
|
+
const attrsStr = match[2];
|
|
80
|
+
const tagEnd = match[3];
|
|
81
|
+
|
|
82
|
+
// Robust extraction matching any quote style and internal spacing
|
|
83
|
+
const attrRegex = /([a-zA-Z0-9_:]+)\s*=\s*(["'])(.*?)\2/g;
|
|
84
|
+
const attrs: string[] = [];
|
|
85
|
+
let m;
|
|
86
|
+
while ((m = attrRegex.exec(attrsStr)) !== null) {
|
|
87
|
+
attrs.push(m[0].trim());
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Sort attributes: xmlns definitions first, then standard attributes
|
|
91
|
+
attrs.sort((a, b) => {
|
|
92
|
+
const aName = a.split("=")[0].trim();
|
|
93
|
+
const bName = b.split("=")[0].trim();
|
|
94
|
+
const aIsXmlns = aName.startsWith("xmlns");
|
|
95
|
+
const bIsXmlns = bName.startsWith("xmlns");
|
|
96
|
+
if (aIsXmlns && !bIsXmlns) return -1;
|
|
97
|
+
if (!aIsXmlns && bIsXmlns) return 1;
|
|
98
|
+
return aName < bName ? -1 : aName > bName ? 1 : 0;
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
const newTag =
|
|
102
|
+
attrs.length > 0
|
|
103
|
+
? `${elemStart} ${attrs.join(" ")}${tagEnd}`
|
|
104
|
+
: `${elemStart}${tagEnd}`;
|
|
105
|
+
xml =
|
|
106
|
+
xml.substring(0, index) +
|
|
107
|
+
newTag +
|
|
108
|
+
xml.substring(index + fullTag.length);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return xml;
|
|
113
|
+
}
|