docxmlater 0.28.1 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +330 -4
- package/dist/core/Document.d.ts +66 -28
- package/dist/core/Document.d.ts.map +1 -1
- package/dist/core/Document.js +590 -90
- package/dist/core/Document.js.map +1 -1
- package/dist/core/DocumentGenerator.d.ts +15 -13
- package/dist/core/DocumentGenerator.d.ts.map +1 -1
- package/dist/core/DocumentGenerator.js +74 -13
- package/dist/core/DocumentGenerator.js.map +1 -1
- package/dist/core/DocumentParser.d.ts +40 -13
- package/dist/core/DocumentParser.d.ts.map +1 -1
- package/dist/core/DocumentParser.js +800 -316
- package/dist/core/DocumentParser.js.map +1 -1
- package/dist/core/DocumentValidator.d.ts +2 -1
- package/dist/core/DocumentValidator.d.ts.map +1 -1
- package/dist/core/DocumentValidator.js.map +1 -1
- package/dist/elements/Bookmark.d.ts +1 -0
- package/dist/elements/Bookmark.d.ts.map +1 -1
- package/dist/elements/Bookmark.js +1 -1
- package/dist/elements/Bookmark.js.map +1 -1
- package/dist/elements/Hyperlink.d.ts +1 -0
- package/dist/elements/Hyperlink.d.ts.map +1 -1
- package/dist/elements/Hyperlink.js +23 -0
- package/dist/elements/Hyperlink.js.map +1 -1
- package/dist/elements/ImageManager.d.ts +2 -0
- package/dist/elements/ImageManager.d.ts.map +1 -1
- package/dist/elements/ImageManager.js +22 -0
- package/dist/elements/ImageManager.js.map +1 -1
- package/dist/elements/ImageRun.d.ts +10 -0
- package/dist/elements/ImageRun.d.ts.map +1 -0
- package/dist/elements/ImageRun.js +23 -0
- package/dist/elements/ImageRun.js.map +1 -0
- package/dist/elements/Paragraph.d.ts +16 -1
- package/dist/elements/Paragraph.d.ts.map +1 -1
- package/dist/elements/Paragraph.js +146 -2
- package/dist/elements/Paragraph.js.map +1 -1
- package/dist/elements/Run.d.ts +4 -0
- package/dist/elements/Run.d.ts.map +1 -1
- package/dist/elements/Run.js +27 -1
- package/dist/elements/Run.js.map +1 -1
- package/dist/elements/Section.d.ts.map +1 -1
- package/dist/elements/Section.js +3 -8
- package/dist/elements/Section.js.map +1 -1
- package/dist/elements/StructuredDocumentTag.d.ts +32 -0
- package/dist/elements/StructuredDocumentTag.d.ts.map +1 -0
- package/dist/elements/StructuredDocumentTag.js +94 -0
- package/dist/elements/StructuredDocumentTag.js.map +1 -0
- package/dist/elements/Table.d.ts +24 -0
- package/dist/elements/Table.d.ts.map +1 -1
- package/dist/elements/Table.js +177 -3
- package/dist/elements/Table.js.map +1 -1
- package/dist/elements/TableOfContents.d.ts +33 -0
- package/dist/elements/TableOfContents.d.ts.map +1 -1
- package/dist/elements/TableOfContents.js +129 -1
- package/dist/elements/TableOfContents.js.map +1 -1
- package/dist/formatting/AbstractNumbering.d.ts +1 -0
- package/dist/formatting/AbstractNumbering.d.ts.map +1 -1
- package/dist/formatting/AbstractNumbering.js +30 -0
- package/dist/formatting/AbstractNumbering.js.map +1 -1
- package/dist/formatting/NumberingInstance.d.ts +1 -0
- package/dist/formatting/NumberingInstance.d.ts.map +1 -1
- package/dist/formatting/NumberingInstance.js +16 -0
- package/dist/formatting/NumberingInstance.js.map +1 -1
- package/dist/formatting/NumberingLevel.d.ts +1 -0
- package/dist/formatting/NumberingLevel.d.ts.map +1 -1
- package/dist/formatting/NumberingLevel.js +59 -0
- package/dist/formatting/NumberingLevel.js.map +1 -1
- package/dist/formatting/NumberingManager.d.ts +11 -0
- package/dist/formatting/NumberingManager.d.ts.map +1 -1
- package/dist/formatting/NumberingManager.js +92 -0
- package/dist/formatting/NumberingManager.js.map +1 -1
- package/dist/formatting/Style.d.ts +2 -0
- package/dist/formatting/Style.d.ts.map +1 -1
- package/dist/formatting/Style.js +49 -0
- package/dist/formatting/Style.js.map +1 -1
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -2
- package/dist/index.js.map +1 -1
- package/dist/xml/XMLBuilder.d.ts +4 -1
- package/dist/xml/XMLBuilder.d.ts.map +1 -1
- package/dist/xml/XMLBuilder.js +134 -31
- package/dist/xml/XMLBuilder.js.map +1 -1
- package/dist/xml/XMLParser.d.ts +1 -0
- package/dist/xml/XMLParser.d.ts.map +1 -1
- package/dist/xml/XMLParser.js +75 -42
- package/dist/xml/XMLParser.js.map +1 -1
- package/dist/zip/ZipHandler.d.ts.map +1 -1
- package/dist/zip/ZipHandler.js +6 -1
- package/dist/zip/ZipHandler.js.map +1 -1
- package/dist/zip/ZipReader.d.ts.map +1 -1
- package/dist/zip/ZipReader.js +2 -2
- package/dist/zip/ZipReader.js.map +1 -1
- package/dist/zip/ZipWriter.d.ts.map +1 -1
- package/dist/zip/ZipWriter.js +13 -8
- package/dist/zip/ZipWriter.js.map +1 -1
- package/package.json +1 -1
|
@@ -4,12 +4,17 @@ exports.DocumentParser = void 0;
|
|
|
4
4
|
const types_1 = require("../zip/types");
|
|
5
5
|
const Paragraph_1 = require("../elements/Paragraph");
|
|
6
6
|
const Run_1 = require("../elements/Run");
|
|
7
|
+
const Hyperlink_1 = require("../elements/Hyperlink");
|
|
7
8
|
const Table_1 = require("../elements/Table");
|
|
9
|
+
const TableRow_1 = require("../elements/TableRow");
|
|
8
10
|
const TableCell_1 = require("../elements/TableCell");
|
|
9
|
-
const
|
|
11
|
+
const Section_1 = require("../elements/Section");
|
|
10
12
|
const XMLBuilder_1 = require("../xml/XMLBuilder");
|
|
11
13
|
const XMLParser_1 = require("../xml/XMLParser");
|
|
12
14
|
const RelationshipManager_1 = require("./RelationshipManager");
|
|
15
|
+
const Style_1 = require("../formatting/Style");
|
|
16
|
+
const AbstractNumbering_1 = require("../formatting/AbstractNumbering");
|
|
17
|
+
const NumberingInstance_1 = require("../formatting/NumberingInstance");
|
|
13
18
|
class DocumentParser {
|
|
14
19
|
parseErrors = [];
|
|
15
20
|
strictParsing;
|
|
@@ -22,60 +27,182 @@ class DocumentParser {
|
|
|
22
27
|
clearParseErrors() {
|
|
23
28
|
this.parseErrors = [];
|
|
24
29
|
}
|
|
25
|
-
async parseDocument(zipHandler, relationshipManager) {
|
|
30
|
+
async parseDocument(zipHandler, relationshipManager, imageManager) {
|
|
26
31
|
const docXml = zipHandler.getFileAsString(types_1.DOCX_PATHS.DOCUMENT);
|
|
27
32
|
if (!docXml) {
|
|
28
|
-
throw new Error(
|
|
33
|
+
throw new Error("Invalid document: word/document.xml not found");
|
|
29
34
|
}
|
|
30
35
|
const parsedRelationshipManager = this.parseRelationships(zipHandler, relationshipManager);
|
|
31
36
|
const properties = this.parseProperties(zipHandler);
|
|
32
|
-
const bodyElements = this.parseBodyElements(docXml, parsedRelationshipManager);
|
|
33
|
-
|
|
37
|
+
const bodyElements = await this.parseBodyElements(docXml, parsedRelationshipManager, zipHandler, imageManager);
|
|
38
|
+
const styles = this.parseStyles(zipHandler);
|
|
39
|
+
const numbering = this.parseNumbering(zipHandler);
|
|
40
|
+
const section = this.parseSectionProperties(docXml);
|
|
41
|
+
const namespaces = this.parseNamespaces(docXml);
|
|
42
|
+
return {
|
|
43
|
+
bodyElements,
|
|
44
|
+
properties,
|
|
45
|
+
relationshipManager: parsedRelationshipManager,
|
|
46
|
+
styles,
|
|
47
|
+
abstractNumberings: numbering.abstractNumberings,
|
|
48
|
+
numberingInstances: numbering.numberingInstances,
|
|
49
|
+
section,
|
|
50
|
+
namespaces,
|
|
51
|
+
};
|
|
34
52
|
}
|
|
35
|
-
parseBodyElements(docXml, relationshipManager) {
|
|
53
|
+
async parseBodyElements(docXml, relationshipManager, zipHandler, imageManager) {
|
|
36
54
|
const bodyElements = [];
|
|
37
|
-
try {
|
|
38
|
-
XMLParser_1.XMLParser.validateSize(docXml);
|
|
39
|
-
}
|
|
40
|
-
catch (error) {
|
|
41
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
42
|
-
this.parseErrors.push({ element: 'document', error: err });
|
|
43
|
-
if (this.strictParsing) {
|
|
44
|
-
throw err;
|
|
45
|
-
}
|
|
46
|
-
return bodyElements;
|
|
47
|
-
}
|
|
48
55
|
const bodyContent = XMLParser_1.XMLParser.extractBody(docXml);
|
|
49
56
|
if (!bodyContent) {
|
|
50
57
|
return bodyElements;
|
|
51
58
|
}
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
const
|
|
55
|
-
|
|
56
|
-
|
|
59
|
+
let pos = 0;
|
|
60
|
+
while (pos < bodyContent.length) {
|
|
61
|
+
const nextP = this.findNextTopLevelTag(bodyContent, "w:p", pos);
|
|
62
|
+
const nextTbl = this.findNextTopLevelTag(bodyContent, "w:tbl", pos);
|
|
63
|
+
const nextSdt = this.findNextTopLevelTag(bodyContent, "w:sdt", pos);
|
|
64
|
+
const candidates = [];
|
|
65
|
+
if (nextP !== -1)
|
|
66
|
+
candidates.push({ type: "p", pos: nextP });
|
|
67
|
+
if (nextTbl !== -1)
|
|
68
|
+
candidates.push({ type: "tbl", pos: nextTbl });
|
|
69
|
+
if (nextSdt !== -1)
|
|
70
|
+
candidates.push({ type: "sdt", pos: nextSdt });
|
|
71
|
+
if (candidates.length === 0)
|
|
72
|
+
break;
|
|
73
|
+
candidates.sort((a, b) => a.pos - b.pos);
|
|
74
|
+
const next = candidates[0];
|
|
75
|
+
if (next) {
|
|
76
|
+
if (next.type === "p") {
|
|
77
|
+
const elementXml = this.extractSingleElement(bodyContent, "w:p", next.pos);
|
|
78
|
+
if (elementXml) {
|
|
79
|
+
const parsed = XMLParser_1.XMLParser.parseToObject(elementXml, { trimValues: false });
|
|
80
|
+
const paragraph = await this.parseParagraphFromObject(parsed["w:p"], relationshipManager, zipHandler, imageManager);
|
|
81
|
+
if (paragraph)
|
|
82
|
+
bodyElements.push(paragraph);
|
|
83
|
+
pos = next.pos + elementXml.length;
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
pos = next.pos + 1;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
else if (next.type === "tbl") {
|
|
90
|
+
const elementXml = this.extractSingleElement(bodyContent, "w:tbl", next.pos);
|
|
91
|
+
if (elementXml) {
|
|
92
|
+
const parsed = XMLParser_1.XMLParser.parseToObject(elementXml, { trimValues: false });
|
|
93
|
+
const table = await this.parseTableFromObject(parsed["w:tbl"], relationshipManager, zipHandler, imageManager);
|
|
94
|
+
if (table)
|
|
95
|
+
bodyElements.push(table);
|
|
96
|
+
pos = next.pos + elementXml.length;
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
pos = next.pos + 1;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
else if (next.type === "sdt") {
|
|
103
|
+
const elementXml = this.extractSingleElement(bodyContent, "w:sdt", next.pos);
|
|
104
|
+
if (elementXml) {
|
|
105
|
+
const parsed = XMLParser_1.XMLParser.parseToObject(elementXml, { trimValues: false });
|
|
106
|
+
const sdt = await this.parseSDTFromObject(parsed["w:sdt"], relationshipManager, zipHandler, imageManager);
|
|
107
|
+
if (sdt)
|
|
108
|
+
bodyElements.push(sdt);
|
|
109
|
+
pos = next.pos + elementXml.length;
|
|
110
|
+
}
|
|
111
|
+
else {
|
|
112
|
+
pos = next.pos + 1;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
this.validateLoadedContent(bodyElements);
|
|
118
|
+
return bodyElements;
|
|
119
|
+
}
|
|
120
|
+
findNextTag(content, tagName, startPos) {
|
|
121
|
+
const tag = `<${tagName}`;
|
|
122
|
+
let pos = content.indexOf(tag, startPos);
|
|
123
|
+
while (pos !== -1) {
|
|
124
|
+
const charAfterTag = content[pos + tag.length];
|
|
125
|
+
if (charAfterTag &&
|
|
126
|
+
charAfterTag !== ">" &&
|
|
127
|
+
charAfterTag !== "/" &&
|
|
128
|
+
charAfterTag !== " " &&
|
|
129
|
+
charAfterTag !== "\t" &&
|
|
130
|
+
charAfterTag !== "\n" &&
|
|
131
|
+
charAfterTag !== "\r") {
|
|
132
|
+
pos = content.indexOf(tag, pos + tag.length);
|
|
133
|
+
continue;
|
|
57
134
|
}
|
|
135
|
+
return pos;
|
|
136
|
+
}
|
|
137
|
+
return -1;
|
|
138
|
+
}
|
|
139
|
+
findNextTopLevelTag(content, tagName, startPos) {
|
|
140
|
+
let pos = startPos;
|
|
141
|
+
while (pos < content.length) {
|
|
142
|
+
const tagPos = this.findNextTag(content, tagName, pos);
|
|
143
|
+
if (tagPos === -1) {
|
|
144
|
+
return -1;
|
|
145
|
+
}
|
|
146
|
+
const isInsideTable = this.isPositionInsideTable(content, tagPos);
|
|
147
|
+
if (!isInsideTable) {
|
|
148
|
+
return tagPos;
|
|
149
|
+
}
|
|
150
|
+
pos = tagPos + 1;
|
|
151
|
+
}
|
|
152
|
+
return -1;
|
|
153
|
+
}
|
|
154
|
+
isPositionInsideTable(content, position) {
|
|
155
|
+
const beforeContent = content.substring(0, position);
|
|
156
|
+
const openTableTags = (beforeContent.match(/<w:tbl[\s>]/g) || []).length;
|
|
157
|
+
const closeTableTags = (beforeContent.match(/<\/w:tbl>/g) || []).length;
|
|
158
|
+
return openTableTags > closeTableTags;
|
|
159
|
+
}
|
|
160
|
+
extractSingleElement(content, tagName, startPos) {
|
|
161
|
+
const openTag = `<${tagName}`;
|
|
162
|
+
const closeTag = `</${tagName}>`;
|
|
163
|
+
const selfClosingEnd = "/>";
|
|
164
|
+
if (!content.substring(startPos).startsWith(openTag)) {
|
|
165
|
+
return "";
|
|
166
|
+
}
|
|
167
|
+
const openEnd = content.indexOf(">", startPos);
|
|
168
|
+
if (openEnd === -1) {
|
|
169
|
+
return "";
|
|
58
170
|
}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
171
|
+
if (content.substring(openEnd - 1, openEnd + 1) === selfClosingEnd) {
|
|
172
|
+
return content.substring(startPos, openEnd + 1);
|
|
173
|
+
}
|
|
174
|
+
let depth = 1;
|
|
175
|
+
let pos = openEnd + 1;
|
|
176
|
+
while (pos < content.length && depth > 0) {
|
|
177
|
+
const nextOpen = content.indexOf(openTag, pos);
|
|
178
|
+
const nextClose = content.indexOf(closeTag, pos);
|
|
179
|
+
if (nextClose === -1) {
|
|
180
|
+
return "";
|
|
181
|
+
}
|
|
182
|
+
if (nextOpen !== -1 && nextOpen < nextClose) {
|
|
183
|
+
const charAfter = content[nextOpen + openTag.length];
|
|
184
|
+
if (charAfter === ">" ||
|
|
185
|
+
charAfter === "/" ||
|
|
186
|
+
charAfter === " " ||
|
|
187
|
+
charAfter === "\t" ||
|
|
188
|
+
charAfter === "\n" ||
|
|
189
|
+
charAfter === "\r") {
|
|
190
|
+
depth++;
|
|
191
|
+
pos = nextOpen + openTag.length;
|
|
192
|
+
}
|
|
193
|
+
else {
|
|
194
|
+
pos = nextOpen + openTag.length;
|
|
65
195
|
}
|
|
66
196
|
}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
if (this.strictParsing) {
|
|
73
|
-
throw error;
|
|
197
|
+
else {
|
|
198
|
+
depth--;
|
|
199
|
+
pos = nextClose + closeTag.length;
|
|
200
|
+
if (depth === 0) {
|
|
201
|
+
return content.substring(startPos, pos);
|
|
74
202
|
}
|
|
75
203
|
}
|
|
76
204
|
}
|
|
77
|
-
|
|
78
|
-
return bodyElements;
|
|
205
|
+
return "";
|
|
79
206
|
}
|
|
80
207
|
validateLoadedContent(bodyElements) {
|
|
81
208
|
const paragraphs = bodyElements.filter((el) => el instanceof Paragraph_1.Paragraph);
|
|
@@ -108,271 +235,158 @@ class DocumentParser {
|
|
|
108
235
|
` - Text content was stripped by another application\n` +
|
|
109
236
|
` - Encoding issues during document creation\n` +
|
|
110
237
|
`Original document structure is preserved, but text may be lost.`);
|
|
111
|
-
this.parseErrors.push({
|
|
238
|
+
this.parseErrors.push({
|
|
239
|
+
element: "document-validation",
|
|
240
|
+
error: warning,
|
|
241
|
+
});
|
|
112
242
|
console.warn(`\nDocXML Load Warning:\n${warning.message}\n`);
|
|
113
243
|
}
|
|
114
244
|
else if (emptyPercentage > 50 && emptyRuns > 5) {
|
|
115
245
|
const warning = new Error(`Document has ${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) with no text. ` +
|
|
116
246
|
`This is higher than normal and may indicate partial data loss.`);
|
|
117
|
-
this.parseErrors.push({
|
|
247
|
+
this.parseErrors.push({
|
|
248
|
+
element: "document-validation",
|
|
249
|
+
error: warning,
|
|
250
|
+
});
|
|
118
251
|
console.warn(`\nDocXML Load Warning:\n${warning.message}\n`);
|
|
119
252
|
}
|
|
120
253
|
}
|
|
121
254
|
}
|
|
122
|
-
|
|
255
|
+
async parseParagraphFromObject(paraObj, relationshipManager, zipHandler, imageManager) {
|
|
123
256
|
try {
|
|
124
257
|
const paragraph = new Paragraph_1.Paragraph();
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
const hyperlink = this.parseHyperlink(hyperlinkXml, relationshipManager);
|
|
129
|
-
if (hyperlink) {
|
|
130
|
-
paragraph.addHyperlink(hyperlink);
|
|
131
|
-
}
|
|
258
|
+
const paraId = paraObj["w14:paraId"];
|
|
259
|
+
if (paraId) {
|
|
260
|
+
paragraph.formatting.paraId = paraId;
|
|
132
261
|
}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
262
|
+
this.parseParagraphPropertiesFromObject(paraObj["w:pPr"], paragraph);
|
|
263
|
+
const runs = paraObj["w:r"];
|
|
264
|
+
const runChildren = Array.isArray(runs) ? runs : (runs ? [runs] : []);
|
|
265
|
+
for (const child of runChildren) {
|
|
266
|
+
if (child["w:drawing"]) {
|
|
267
|
+
if (zipHandler && imageManager) {
|
|
268
|
+
const imageRun = await this.parseDrawingFromObject(child["w:drawing"], zipHandler, relationshipManager, imageManager);
|
|
269
|
+
if (imageRun) {
|
|
270
|
+
paragraph.addRun(imageRun);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
else {
|
|
275
|
+
const run = this.parseRunFromObject(child);
|
|
276
|
+
if (run) {
|
|
277
|
+
paragraph.addRun(run);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
136
280
|
}
|
|
137
|
-
const
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
281
|
+
const hyperlinks = paraObj["w:hyperlink"];
|
|
282
|
+
const hyperlinkChildren = Array.isArray(hyperlinks) ? hyperlinks : (hyperlinks ? [hyperlinks] : []);
|
|
283
|
+
for (const hyperlinkObj of hyperlinkChildren) {
|
|
284
|
+
const hyperlink = this.parseHyperlinkFromObject(hyperlinkObj, relationshipManager);
|
|
285
|
+
if (hyperlink) {
|
|
286
|
+
paragraph.addHyperlink(hyperlink);
|
|
142
287
|
}
|
|
143
288
|
}
|
|
144
289
|
return paragraph;
|
|
145
290
|
}
|
|
146
291
|
catch (error) {
|
|
147
292
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
148
|
-
this.parseErrors.push({ element:
|
|
293
|
+
this.parseErrors.push({ element: "paragraph", error: err });
|
|
149
294
|
if (this.strictParsing) {
|
|
150
295
|
throw new Error(`Failed to parse paragraph: ${err.message}`);
|
|
151
296
|
}
|
|
152
297
|
return null;
|
|
153
298
|
}
|
|
154
299
|
}
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
if (!pPr) {
|
|
300
|
+
parseParagraphPropertiesFromObject(pPrObj, paragraph) {
|
|
301
|
+
if (!pPrObj)
|
|
158
302
|
return;
|
|
303
|
+
if (pPrObj["w:jc"]?.["@_w:val"]) {
|
|
304
|
+
paragraph.setAlignment(pPrObj["w:jc"]["@_w:val"]);
|
|
159
305
|
}
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
const value = XMLParser_1.XMLParser.extractAttribute(jcElements[0], 'w:val');
|
|
163
|
-
if (value) {
|
|
164
|
-
const validAlignments = ['left', 'center', 'right', 'justify'];
|
|
165
|
-
if (validAlignments.includes(value)) {
|
|
166
|
-
paragraph.setAlignment(value);
|
|
167
|
-
}
|
|
168
|
-
}
|
|
306
|
+
if (pPrObj["w:pStyle"]?.["@_w:val"]) {
|
|
307
|
+
paragraph.setStyle(pPrObj["w:pStyle"]["@_w:val"]);
|
|
169
308
|
}
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
const
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
if (
|
|
184
|
-
paragraph.
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
paragraph.setRightIndent(parseInt(right, 10));
|
|
188
|
-
}
|
|
189
|
-
if (firstLine) {
|
|
190
|
-
paragraph.setFirstLineIndent(parseInt(firstLine, 10));
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
const spacingElements = XMLParser_1.XMLParser.extractElements(pPr, 'w:spacing');
|
|
194
|
-
if (spacingElements.length > 0) {
|
|
195
|
-
const spacingElement = spacingElements[0];
|
|
196
|
-
const before = XMLParser_1.XMLParser.extractAttribute(spacingElement, 'w:before');
|
|
197
|
-
const after = XMLParser_1.XMLParser.extractAttribute(spacingElement, 'w:after');
|
|
198
|
-
const line = XMLParser_1.XMLParser.extractAttribute(spacingElement, 'w:line');
|
|
199
|
-
const lineRule = XMLParser_1.XMLParser.extractAttribute(spacingElement, 'w:lineRule');
|
|
200
|
-
if (before) {
|
|
201
|
-
paragraph.setSpaceBefore(parseInt(before, 10));
|
|
202
|
-
}
|
|
203
|
-
if (after) {
|
|
204
|
-
paragraph.setSpaceAfter(parseInt(after, 10));
|
|
205
|
-
}
|
|
206
|
-
if (line) {
|
|
207
|
-
let validatedLineRule;
|
|
208
|
-
if (lineRule) {
|
|
209
|
-
const validLineRules = ['auto', 'exact', 'atLeast'];
|
|
210
|
-
if (validLineRules.includes(lineRule)) {
|
|
211
|
-
validatedLineRule = lineRule;
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
paragraph.setLineSpacing(parseInt(line, 10), validatedLineRule);
|
|
309
|
+
if (pPrObj["w:ind"]) {
|
|
310
|
+
const ind = pPrObj["w:ind"];
|
|
311
|
+
if (ind["@_w:left"])
|
|
312
|
+
paragraph.setLeftIndent(parseInt(ind["@_w:left"], 10));
|
|
313
|
+
if (ind["@_w:right"])
|
|
314
|
+
paragraph.setRightIndent(parseInt(ind["@_w:right"], 10));
|
|
315
|
+
if (ind["@_w:firstLine"])
|
|
316
|
+
paragraph.setFirstLineIndent(parseInt(ind["@_w:firstLine"], 10));
|
|
317
|
+
}
|
|
318
|
+
if (pPrObj["w:spacing"]) {
|
|
319
|
+
const spacing = pPrObj["w:spacing"];
|
|
320
|
+
if (spacing["@_w:before"])
|
|
321
|
+
paragraph.setSpaceBefore(parseInt(spacing["@_w:before"], 10));
|
|
322
|
+
if (spacing["@_w:after"])
|
|
323
|
+
paragraph.setSpaceAfter(parseInt(spacing["@_w:after"], 10));
|
|
324
|
+
if (spacing["@_w:line"]) {
|
|
325
|
+
paragraph.setLineSpacing(parseInt(spacing["@_w:line"], 10), spacing["@_w:lineRule"]);
|
|
215
326
|
}
|
|
216
327
|
}
|
|
217
|
-
if (
|
|
328
|
+
if (pPrObj["w:pageBreakBefore"])
|
|
329
|
+
paragraph.formatting.pageBreakBefore = true;
|
|
330
|
+
if (pPrObj["w:keepNext"])
|
|
218
331
|
paragraph.setKeepNext(true);
|
|
219
|
-
if (
|
|
332
|
+
if (pPrObj["w:keepLines"])
|
|
220
333
|
paragraph.setKeepLines(true);
|
|
221
|
-
if (
|
|
222
|
-
paragraph.setPageBreakBefore(true);
|
|
223
|
-
if (XMLParser_1.XMLParser.hasSelfClosingTag(pPr, 'w:contextualSpacing')) {
|
|
334
|
+
if (pPrObj["w:contextualSpacing"])
|
|
224
335
|
paragraph.setContextualSpacing(true);
|
|
336
|
+
if (pPrObj["w:numPr"]) {
|
|
337
|
+
const numPr = pPrObj["w:numPr"];
|
|
338
|
+
const numId = numPr["w:numId"]?.["@_w:val"];
|
|
339
|
+
const ilvl = numPr["w:ilvl"]?.["@_w:val"] || "0";
|
|
340
|
+
if (numId) {
|
|
341
|
+
paragraph.setNumbering(parseInt(numId, 10), parseInt(ilvl, 10));
|
|
342
|
+
}
|
|
225
343
|
}
|
|
226
344
|
}
|
|
227
|
-
|
|
345
|
+
parseRunFromObject(runObj) {
|
|
228
346
|
try {
|
|
229
|
-
const
|
|
230
|
-
|
|
231
|
-
|
|
347
|
+
const textElement = runObj["w:t"];
|
|
348
|
+
let text = (typeof textElement === 'object' && textElement !== null)
|
|
349
|
+
? (textElement["#text"] || "")
|
|
350
|
+
: (textElement || "");
|
|
351
|
+
text = XMLBuilder_1.XMLBuilder.unescapeXml(text);
|
|
352
|
+
const run = new Run_1.Run(text, { cleanXmlFromText: false });
|
|
353
|
+
this.parseRunPropertiesFromObject(runObj["w:rPr"], run);
|
|
232
354
|
return run;
|
|
233
355
|
}
|
|
234
356
|
catch (error) {
|
|
235
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
236
|
-
this.parseErrors.push({ element: 'run', error: err });
|
|
237
|
-
if (this.strictParsing) {
|
|
238
|
-
throw new Error(`Failed to parse run: ${err.message}`);
|
|
239
|
-
}
|
|
240
357
|
return null;
|
|
241
358
|
}
|
|
242
359
|
}
|
|
243
|
-
|
|
244
|
-
const rPr = XMLParser_1.XMLParser.extractBetweenTags(runXml, '<w:rPr', '</w:rPr>');
|
|
245
|
-
if (!rPr) {
|
|
246
|
-
return;
|
|
247
|
-
}
|
|
248
|
-
if (XMLParser_1.XMLParser.hasSelfClosingTag(rPr, 'w:b')) {
|
|
249
|
-
run.setBold(true);
|
|
250
|
-
}
|
|
251
|
-
if (XMLParser_1.XMLParser.hasSelfClosingTag(rPr, 'w:i')) {
|
|
252
|
-
run.setItalic(true);
|
|
253
|
-
}
|
|
254
|
-
const underlineElements = XMLParser_1.XMLParser.extractElements(rPr, 'w:u');
|
|
255
|
-
if (underlineElements.length > 0) {
|
|
256
|
-
const value = XMLParser_1.XMLParser.extractAttribute(underlineElements[0], 'w:val');
|
|
257
|
-
if (value) {
|
|
258
|
-
const validUnderlineStyles = [
|
|
259
|
-
'single',
|
|
260
|
-
'double',
|
|
261
|
-
'thick',
|
|
262
|
-
'dotted',
|
|
263
|
-
'dash',
|
|
264
|
-
'dotDash',
|
|
265
|
-
'dotDotDash',
|
|
266
|
-
'wave',
|
|
267
|
-
];
|
|
268
|
-
if (validUnderlineStyles.includes(value) ||
|
|
269
|
-
value === 'true' ||
|
|
270
|
-
value === 'false') {
|
|
271
|
-
const underlineStyle = value;
|
|
272
|
-
run.setUnderline(underlineStyle);
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
else {
|
|
276
|
-
run.setUnderline(true);
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
if (XMLParser_1.XMLParser.hasSelfClosingTag(rPr, 'w:strike')) {
|
|
280
|
-
run.setStrike(true);
|
|
281
|
-
}
|
|
282
|
-
const vertAlignElements = XMLParser_1.XMLParser.extractElements(rPr, 'w:vertAlign');
|
|
283
|
-
if (vertAlignElements.length > 0) {
|
|
284
|
-
const value = XMLParser_1.XMLParser.extractAttribute(vertAlignElements[0], 'w:val');
|
|
285
|
-
if (value === 'subscript') {
|
|
286
|
-
run.setSubscript(true);
|
|
287
|
-
}
|
|
288
|
-
else if (value === 'superscript') {
|
|
289
|
-
run.setSuperscript(true);
|
|
290
|
-
}
|
|
291
|
-
}
|
|
292
|
-
const fontElements = XMLParser_1.XMLParser.extractElements(rPr, 'w:rFonts');
|
|
293
|
-
if (fontElements.length > 0) {
|
|
294
|
-
const fontName = XMLParser_1.XMLParser.extractAttribute(fontElements[0], 'w:ascii');
|
|
295
|
-
if (fontName) {
|
|
296
|
-
run.setFont(fontName);
|
|
297
|
-
}
|
|
298
|
-
}
|
|
299
|
-
const sizeElements = XMLParser_1.XMLParser.extractElements(rPr, 'w:sz');
|
|
300
|
-
if (sizeElements.length > 0) {
|
|
301
|
-
const halfPoints = XMLParser_1.XMLParser.extractAttribute(sizeElements[0], 'w:val');
|
|
302
|
-
if (halfPoints) {
|
|
303
|
-
run.setSize(parseInt(halfPoints, 10) / 2);
|
|
304
|
-
}
|
|
305
|
-
}
|
|
306
|
-
const colorElements = XMLParser_1.XMLParser.extractElements(rPr, 'w:color');
|
|
307
|
-
if (colorElements.length > 0) {
|
|
308
|
-
const colorValue = XMLParser_1.XMLParser.extractAttribute(colorElements[0], 'w:val');
|
|
309
|
-
if (colorValue) {
|
|
310
|
-
run.setColor(colorValue);
|
|
311
|
-
}
|
|
312
|
-
}
|
|
313
|
-
const highlightElements = XMLParser_1.XMLParser.extractElements(rPr, 'w:highlight');
|
|
314
|
-
if (highlightElements.length > 0) {
|
|
315
|
-
const value = XMLParser_1.XMLParser.extractAttribute(highlightElements[0], 'w:val');
|
|
316
|
-
if (value) {
|
|
317
|
-
const validHighlightColors = [
|
|
318
|
-
'yellow',
|
|
319
|
-
'green',
|
|
320
|
-
'cyan',
|
|
321
|
-
'magenta',
|
|
322
|
-
'blue',
|
|
323
|
-
'red',
|
|
324
|
-
'darkBlue',
|
|
325
|
-
'darkCyan',
|
|
326
|
-
'darkGreen',
|
|
327
|
-
'darkMagenta',
|
|
328
|
-
'darkRed',
|
|
329
|
-
'darkYellow',
|
|
330
|
-
'darkGray',
|
|
331
|
-
'lightGray',
|
|
332
|
-
'black',
|
|
333
|
-
'white',
|
|
334
|
-
];
|
|
335
|
-
if (validHighlightColors.includes(value)) {
|
|
336
|
-
const highlightColor = value;
|
|
337
|
-
run.setHighlight(highlightColor);
|
|
338
|
-
}
|
|
339
|
-
}
|
|
340
|
-
}
|
|
341
|
-
if (XMLParser_1.XMLParser.hasSelfClosingTag(rPr, 'w:smallCaps')) {
|
|
342
|
-
run.setSmallCaps(true);
|
|
343
|
-
}
|
|
344
|
-
if (XMLParser_1.XMLParser.hasSelfClosingTag(rPr, 'w:caps')) {
|
|
345
|
-
run.setAllCaps(true);
|
|
346
|
-
}
|
|
347
|
-
}
|
|
348
|
-
parseHyperlink(hyperlinkXml, relationshipManager) {
|
|
360
|
+
parseHyperlinkFromObject(hyperlinkObj, relationshipManager) {
|
|
349
361
|
try {
|
|
350
|
-
const relationshipId =
|
|
351
|
-
const anchor =
|
|
352
|
-
const tooltip =
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
362
|
+
const relationshipId = hyperlinkObj["@_r:id"];
|
|
363
|
+
const anchor = hyperlinkObj["@_w:anchor"];
|
|
364
|
+
const tooltip = hyperlinkObj["@_w:tooltip"];
|
|
365
|
+
const runs = hyperlinkObj["w:r"];
|
|
366
|
+
const runChildren = Array.isArray(runs) ? runs : (runs ? [runs] : []);
|
|
367
|
+
const text = runChildren
|
|
368
|
+
.map((runObj) => {
|
|
369
|
+
const textElement = runObj["w:t"];
|
|
370
|
+
let runText = (typeof textElement === 'object' && textElement !== null)
|
|
371
|
+
? (textElement["#text"] || "")
|
|
372
|
+
: (textElement || "");
|
|
373
|
+
return XMLBuilder_1.XMLBuilder.unescapeXml(runText);
|
|
374
|
+
})
|
|
375
|
+
.join('');
|
|
376
|
+
let formatting = {};
|
|
377
|
+
if (runChildren.length > 0 && runChildren[0]["w:rPr"]) {
|
|
378
|
+
const tempRun = new Run_1.Run('');
|
|
379
|
+
this.parseRunPropertiesFromObject(runChildren[0]["w:rPr"], tempRun);
|
|
380
|
+
formatting = tempRun.getFormatting();
|
|
367
381
|
}
|
|
368
382
|
let url;
|
|
369
383
|
if (relationshipId) {
|
|
370
384
|
const relationship = relationshipManager.getRelationship(relationshipId);
|
|
371
|
-
if (relationship
|
|
385
|
+
if (relationship) {
|
|
372
386
|
url = relationship.getTarget();
|
|
373
387
|
}
|
|
374
388
|
}
|
|
375
|
-
|
|
389
|
+
const hyperlink = new Hyperlink_1.Hyperlink({
|
|
376
390
|
url,
|
|
377
391
|
anchor,
|
|
378
392
|
text: text || url || anchor || 'Link',
|
|
@@ -380,73 +394,112 @@ class DocumentParser {
|
|
|
380
394
|
tooltip,
|
|
381
395
|
relationshipId,
|
|
382
396
|
});
|
|
397
|
+
return hyperlink;
|
|
383
398
|
}
|
|
384
399
|
catch (error) {
|
|
385
|
-
|
|
386
|
-
this.parseErrors.push({ element: 'hyperlink', error: err });
|
|
387
|
-
if (this.strictParsing) {
|
|
388
|
-
throw new Error(`Failed to parse hyperlink: ${err.message}`);
|
|
389
|
-
}
|
|
400
|
+
console.warn('[DocumentParser] Failed to parse hyperlink:', error);
|
|
390
401
|
return null;
|
|
391
402
|
}
|
|
392
403
|
}
|
|
393
|
-
|
|
404
|
+
parseRunPropertiesFromObject(rPrObj, run) {
|
|
405
|
+
if (!rPrObj)
|
|
406
|
+
return;
|
|
407
|
+
if (rPrObj["w:b"])
|
|
408
|
+
run.setBold(true);
|
|
409
|
+
if (rPrObj["w:i"])
|
|
410
|
+
run.setItalic(true);
|
|
411
|
+
if (rPrObj["w:strike"])
|
|
412
|
+
run.setStrike(true);
|
|
413
|
+
if (rPrObj["w:smallCaps"])
|
|
414
|
+
run.setSmallCaps(true);
|
|
415
|
+
if (rPrObj["w:caps"])
|
|
416
|
+
run.setAllCaps(true);
|
|
417
|
+
if (rPrObj["w:u"]) {
|
|
418
|
+
const uVal = rPrObj["w:u"]["@_w:val"];
|
|
419
|
+
run.setUnderline(uVal || true);
|
|
420
|
+
}
|
|
421
|
+
if (rPrObj["w:vertAlign"]) {
|
|
422
|
+
const val = rPrObj["w:vertAlign"]["@_w:val"];
|
|
423
|
+
if (val === "subscript")
|
|
424
|
+
run.setSubscript(true);
|
|
425
|
+
if (val === "superscript")
|
|
426
|
+
run.setSuperscript(true);
|
|
427
|
+
}
|
|
428
|
+
if (rPrObj["w:rFonts"]) {
|
|
429
|
+
run.setFont(rPrObj["w:rFonts"]["@_w:ascii"]);
|
|
430
|
+
}
|
|
431
|
+
if (rPrObj["w:sz"]) {
|
|
432
|
+
run.setSize(parseInt(rPrObj["w:sz"]["@_w:val"], 10) / 2);
|
|
433
|
+
}
|
|
434
|
+
if (rPrObj["w:color"]) {
|
|
435
|
+
run.setColor(rPrObj["w:color"]["@_w:val"]);
|
|
436
|
+
}
|
|
437
|
+
if (rPrObj["w:highlight"]) {
|
|
438
|
+
run.setHighlight(rPrObj["w:highlight"]["@_w:val"]);
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
async parseDrawingFromObject(_drawingObj, _zipHandler, _relationshipManager, _imageManager) {
|
|
442
|
+
return null;
|
|
443
|
+
}
|
|
444
|
+
async parseTableFromObject(tableObj, relationshipManager, zipHandler, imageManager) {
|
|
394
445
|
try {
|
|
395
|
-
const
|
|
396
|
-
const rows = [];
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
const
|
|
400
|
-
|
|
401
|
-
|
|
446
|
+
const table = new Table_1.Table();
|
|
447
|
+
const rows = tableObj["w:tr"];
|
|
448
|
+
const rowChildren = Array.isArray(rows) ? rows : (rows ? [rows] : []);
|
|
449
|
+
for (const rowObj of rowChildren) {
|
|
450
|
+
const row = await this.parseTableRowFromObject(rowObj, relationshipManager, zipHandler, imageManager);
|
|
451
|
+
if (row) {
|
|
452
|
+
table.addRow(row);
|
|
402
453
|
}
|
|
403
|
-
if (cells.length > 0) {
|
|
404
|
-
rows.push(cells);
|
|
405
|
-
}
|
|
406
|
-
}
|
|
407
|
-
if (rows.length === 0) {
|
|
408
|
-
return null;
|
|
409
|
-
}
|
|
410
|
-
const firstRow = rows[0];
|
|
411
|
-
if (!firstRow || firstRow.length === 0) {
|
|
412
|
-
return null;
|
|
413
454
|
}
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
for (const paraElement of paraElements) {
|
|
431
|
-
const para = this.parseParagraph(paraElement, relationshipManager);
|
|
432
|
-
if (para) {
|
|
433
|
-
cell.addParagraph(para);
|
|
434
|
-
}
|
|
435
|
-
}
|
|
436
|
-
}
|
|
455
|
+
return table;
|
|
456
|
+
}
|
|
457
|
+
catch (error) {
|
|
458
|
+
console.warn('[DocumentParser] Failed to parse table:', error);
|
|
459
|
+
return null;
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
async parseTableRowFromObject(rowObj, relationshipManager, zipHandler, imageManager) {
|
|
463
|
+
try {
|
|
464
|
+
const row = new TableRow_1.TableRow();
|
|
465
|
+
const cells = rowObj["w:tc"];
|
|
466
|
+
const cellChildren = Array.isArray(cells) ? cells : (cells ? [cells] : []);
|
|
467
|
+
for (const cellObj of cellChildren) {
|
|
468
|
+
const cell = await this.parseTableCellFromObject(cellObj, relationshipManager, zipHandler, imageManager);
|
|
469
|
+
if (cell) {
|
|
470
|
+
row.addCell(cell);
|
|
437
471
|
}
|
|
438
472
|
}
|
|
439
|
-
return
|
|
473
|
+
return row;
|
|
440
474
|
}
|
|
441
475
|
catch (error) {
|
|
442
|
-
|
|
443
|
-
|
|
476
|
+
console.warn('[DocumentParser] Failed to parse table row:', error);
|
|
477
|
+
return null;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
async parseTableCellFromObject(cellObj, relationshipManager, zipHandler, imageManager) {
|
|
481
|
+
try {
|
|
482
|
+
const cell = new TableCell_1.TableCell();
|
|
483
|
+
const paragraphs = cellObj["w:p"];
|
|
484
|
+
const paraChildren = Array.isArray(paragraphs) ? paragraphs : (paragraphs ? [paragraphs] : []);
|
|
485
|
+
for (const paraObj of paraChildren) {
|
|
486
|
+
const paragraph = await this.parseParagraphFromObject(paraObj, relationshipManager, zipHandler, imageManager);
|
|
487
|
+
if (paragraph) {
|
|
488
|
+
cell.addParagraph(paragraph);
|
|
489
|
+
}
|
|
444
490
|
}
|
|
491
|
+
return cell;
|
|
492
|
+
}
|
|
493
|
+
catch (error) {
|
|
494
|
+
console.warn('[DocumentParser] Failed to parse table cell:', error);
|
|
445
495
|
return null;
|
|
446
496
|
}
|
|
447
497
|
}
|
|
498
|
+
async parseSDTFromObject(_sdtObj, _relationshipManager, _zipHandler, _imageManager) {
|
|
499
|
+
return null;
|
|
500
|
+
}
|
|
448
501
|
parseRelationships(zipHandler, relationshipManager) {
|
|
449
|
-
const relsPath =
|
|
502
|
+
const relsPath = "word/_rels/document.xml.rels";
|
|
450
503
|
const relsXml = zipHandler.getFileAsString(relsPath);
|
|
451
504
|
if (relsXml) {
|
|
452
505
|
return RelationshipManager_1.RelationshipManager.fromXml(relsXml);
|
|
@@ -463,38 +516,448 @@ class DocumentParser {
|
|
|
463
516
|
return tagContent ? XMLBuilder_1.XMLBuilder.unescapeXml(tagContent) : undefined;
|
|
464
517
|
};
|
|
465
518
|
const properties = {
|
|
466
|
-
title: extractTag(coreXml,
|
|
467
|
-
subject: extractTag(coreXml,
|
|
468
|
-
creator: extractTag(coreXml,
|
|
469
|
-
keywords: extractTag(coreXml,
|
|
470
|
-
description: extractTag(coreXml,
|
|
471
|
-
lastModifiedBy: extractTag(coreXml,
|
|
519
|
+
title: extractTag(coreXml, "dc:title"),
|
|
520
|
+
subject: extractTag(coreXml, "dc:subject"),
|
|
521
|
+
creator: extractTag(coreXml, "dc:creator"),
|
|
522
|
+
keywords: extractTag(coreXml, "cp:keywords"),
|
|
523
|
+
description: extractTag(coreXml, "dc:description"),
|
|
524
|
+
lastModifiedBy: extractTag(coreXml, "cp:lastModifiedBy"),
|
|
472
525
|
};
|
|
473
|
-
const revisionStr = extractTag(coreXml,
|
|
526
|
+
const revisionStr = extractTag(coreXml, "cp:revision");
|
|
474
527
|
if (revisionStr) {
|
|
475
528
|
properties.revision = parseInt(revisionStr, 10);
|
|
476
529
|
}
|
|
477
|
-
const createdStr = extractTag(coreXml,
|
|
530
|
+
const createdStr = extractTag(coreXml, "dcterms:created");
|
|
478
531
|
if (createdStr) {
|
|
479
532
|
properties.created = new Date(createdStr);
|
|
480
533
|
}
|
|
481
|
-
const modifiedStr = extractTag(coreXml,
|
|
534
|
+
const modifiedStr = extractTag(coreXml, "dcterms:modified");
|
|
482
535
|
if (modifiedStr) {
|
|
483
536
|
properties.modified = new Date(modifiedStr);
|
|
484
537
|
}
|
|
485
538
|
return properties;
|
|
486
539
|
}
|
|
540
|
+
parseStyles(zipHandler) {
|
|
541
|
+
const styles = [];
|
|
542
|
+
const stylesXml = zipHandler.getFileAsString(types_1.DOCX_PATHS.STYLES);
|
|
543
|
+
if (!stylesXml) {
|
|
544
|
+
return styles;
|
|
545
|
+
}
|
|
546
|
+
try {
|
|
547
|
+
const styleElements = XMLParser_1.XMLParser.extractElements(stylesXml, "w:style");
|
|
548
|
+
for (const styleXml of styleElements) {
|
|
549
|
+
try {
|
|
550
|
+
const style = this.parseStyle(styleXml);
|
|
551
|
+
if (style) {
|
|
552
|
+
styles.push(style);
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
catch (error) {
|
|
556
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
557
|
+
this.parseErrors.push({ element: "style", error: err });
|
|
558
|
+
if (this.strictParsing) {
|
|
559
|
+
throw error;
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
catch (error) {
|
|
565
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
566
|
+
this.parseErrors.push({ element: "styles.xml", error: err });
|
|
567
|
+
if (this.strictParsing) {
|
|
568
|
+
throw new Error(`Failed to parse styles.xml: ${err.message}`);
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
return styles;
|
|
572
|
+
}
|
|
573
|
+
parseNumbering(zipHandler) {
|
|
574
|
+
const abstractNumberings = [];
|
|
575
|
+
const numberingInstances = [];
|
|
576
|
+
const numberingXml = zipHandler.getFileAsString(types_1.DOCX_PATHS.NUMBERING);
|
|
577
|
+
if (!numberingXml) {
|
|
578
|
+
return { abstractNumberings, numberingInstances };
|
|
579
|
+
}
|
|
580
|
+
try {
|
|
581
|
+
const abstractNumElements = XMLParser_1.XMLParser.extractElements(numberingXml, "w:abstractNum");
|
|
582
|
+
for (const abstractNumXml of abstractNumElements) {
|
|
583
|
+
try {
|
|
584
|
+
const abstractNum = AbstractNumbering_1.AbstractNumbering.fromXML(abstractNumXml);
|
|
585
|
+
abstractNumberings.push(abstractNum);
|
|
586
|
+
}
|
|
587
|
+
catch (error) {
|
|
588
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
589
|
+
this.parseErrors.push({ element: "abstractNum", error: err });
|
|
590
|
+
if (this.strictParsing) {
|
|
591
|
+
throw error;
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
const numElements = XMLParser_1.XMLParser.extractElements(numberingXml, "w:num");
|
|
596
|
+
for (const numXml of numElements) {
|
|
597
|
+
try {
|
|
598
|
+
const instance = NumberingInstance_1.NumberingInstance.fromXML(numXml);
|
|
599
|
+
numberingInstances.push(instance);
|
|
600
|
+
}
|
|
601
|
+
catch (error) {
|
|
602
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
603
|
+
this.parseErrors.push({ element: "num", error: err });
|
|
604
|
+
if (this.strictParsing) {
|
|
605
|
+
throw error;
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
catch (error) {
|
|
611
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
612
|
+
this.parseErrors.push({ element: "numbering.xml", error: err });
|
|
613
|
+
if (this.strictParsing) {
|
|
614
|
+
throw new Error(`Failed to parse numbering.xml: ${err.message}`);
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
return { abstractNumberings, numberingInstances };
|
|
618
|
+
}
|
|
619
|
+
parseSectionProperties(docXml) {
|
|
620
|
+
try {
|
|
621
|
+
const bodyElements = XMLParser_1.XMLParser.extractElements(docXml, "w:body");
|
|
622
|
+
if (bodyElements.length === 0) {
|
|
623
|
+
return null;
|
|
624
|
+
}
|
|
625
|
+
const bodyContent = bodyElements[0];
|
|
626
|
+
if (!bodyContent) {
|
|
627
|
+
return null;
|
|
628
|
+
}
|
|
629
|
+
const sectPrElements = XMLParser_1.XMLParser.extractElements(bodyContent, "w:sectPr");
|
|
630
|
+
if (sectPrElements.length === 0) {
|
|
631
|
+
return null;
|
|
632
|
+
}
|
|
633
|
+
const sectPr = sectPrElements[sectPrElements.length - 1];
|
|
634
|
+
if (!sectPr) {
|
|
635
|
+
return null;
|
|
636
|
+
}
|
|
637
|
+
const sectionProps = {};
|
|
638
|
+
const pgSzElements = XMLParser_1.XMLParser.extractElements(sectPr, "w:pgSz");
|
|
639
|
+
if (pgSzElements.length > 0) {
|
|
640
|
+
const pgSz = pgSzElements[0];
|
|
641
|
+
if (pgSz) {
|
|
642
|
+
const width = XMLParser_1.XMLParser.extractAttribute(pgSz, "w:w");
|
|
643
|
+
const height = XMLParser_1.XMLParser.extractAttribute(pgSz, "w:h");
|
|
644
|
+
const orient = XMLParser_1.XMLParser.extractAttribute(pgSz, "w:orient");
|
|
645
|
+
if (width && height) {
|
|
646
|
+
sectionProps.pageSize = {
|
|
647
|
+
width: parseInt(width, 10),
|
|
648
|
+
height: parseInt(height, 10),
|
|
649
|
+
orientation: orient === "landscape" ? "landscape" : "portrait",
|
|
650
|
+
};
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
const pgMarElements = XMLParser_1.XMLParser.extractElements(sectPr, "w:pgMar");
|
|
655
|
+
if (pgMarElements.length > 0) {
|
|
656
|
+
const pgMar = pgMarElements[0];
|
|
657
|
+
if (pgMar) {
|
|
658
|
+
const top = XMLParser_1.XMLParser.extractAttribute(pgMar, "w:top");
|
|
659
|
+
const bottom = XMLParser_1.XMLParser.extractAttribute(pgMar, "w:bottom");
|
|
660
|
+
const left = XMLParser_1.XMLParser.extractAttribute(pgMar, "w:left");
|
|
661
|
+
const right = XMLParser_1.XMLParser.extractAttribute(pgMar, "w:right");
|
|
662
|
+
const header = XMLParser_1.XMLParser.extractAttribute(pgMar, "w:header");
|
|
663
|
+
const footer = XMLParser_1.XMLParser.extractAttribute(pgMar, "w:footer");
|
|
664
|
+
const gutter = XMLParser_1.XMLParser.extractAttribute(pgMar, "w:gutter");
|
|
665
|
+
if (top && bottom && left && right) {
|
|
666
|
+
sectionProps.margins = {
|
|
667
|
+
top: parseInt(top, 10),
|
|
668
|
+
bottom: parseInt(bottom, 10),
|
|
669
|
+
left: parseInt(left, 10),
|
|
670
|
+
right: parseInt(right, 10),
|
|
671
|
+
header: header ? parseInt(header, 10) : undefined,
|
|
672
|
+
footer: footer ? parseInt(footer, 10) : undefined,
|
|
673
|
+
gutter: gutter ? parseInt(gutter, 10) : undefined,
|
|
674
|
+
};
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
const colsElements = XMLParser_1.XMLParser.extractElements(sectPr, "w:cols");
|
|
679
|
+
if (colsElements.length > 0) {
|
|
680
|
+
const cols = colsElements[0];
|
|
681
|
+
if (cols) {
|
|
682
|
+
const num = XMLParser_1.XMLParser.extractAttribute(cols, "w:num");
|
|
683
|
+
const space = XMLParser_1.XMLParser.extractAttribute(cols, "w:space");
|
|
684
|
+
const equalWidth = XMLParser_1.XMLParser.extractAttribute(cols, "w:equalWidth");
|
|
685
|
+
if (num) {
|
|
686
|
+
sectionProps.columns = {
|
|
687
|
+
count: parseInt(num, 10),
|
|
688
|
+
space: space ? parseInt(space, 10) : undefined,
|
|
689
|
+
equalWidth: equalWidth === "1" || equalWidth === "true",
|
|
690
|
+
};
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
const typeElements = XMLParser_1.XMLParser.extractElements(sectPr, "w:type");
|
|
695
|
+
if (typeElements.length > 0) {
|
|
696
|
+
const type = typeElements[0];
|
|
697
|
+
if (type) {
|
|
698
|
+
const typeVal = XMLParser_1.XMLParser.extractAttribute(type, "w:val");
|
|
699
|
+
if (typeVal) {
|
|
700
|
+
sectionProps.type = typeVal;
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
const pgNumTypeElements = XMLParser_1.XMLParser.extractElements(sectPr, "w:pgNumType");
|
|
705
|
+
if (pgNumTypeElements.length > 0) {
|
|
706
|
+
const pgNumType = pgNumTypeElements[0];
|
|
707
|
+
if (pgNumType) {
|
|
708
|
+
const start = XMLParser_1.XMLParser.extractAttribute(pgNumType, "w:start");
|
|
709
|
+
const fmt = XMLParser_1.XMLParser.extractAttribute(pgNumType, "w:fmt");
|
|
710
|
+
sectionProps.pageNumbering = {
|
|
711
|
+
start: start ? parseInt(start, 10) : undefined,
|
|
712
|
+
format: fmt,
|
|
713
|
+
};
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
if (XMLParser_1.XMLParser.hasSelfClosingTag(sectPr, "w:titlePg")) {
|
|
717
|
+
sectionProps.titlePage = true;
|
|
718
|
+
}
|
|
719
|
+
const headerRefs = XMLParser_1.XMLParser.extractElements(sectPr, "w:headerReference");
|
|
720
|
+
if (headerRefs.length > 0) {
|
|
721
|
+
sectionProps.headers = {};
|
|
722
|
+
for (const headerRef of headerRefs) {
|
|
723
|
+
const type = XMLParser_1.XMLParser.extractAttribute(headerRef, "w:type");
|
|
724
|
+
const rId = XMLParser_1.XMLParser.extractAttribute(headerRef, "r:id");
|
|
725
|
+
if (type && rId) {
|
|
726
|
+
if (type === "default")
|
|
727
|
+
sectionProps.headers.default = rId;
|
|
728
|
+
else if (type === "first")
|
|
729
|
+
sectionProps.headers.first = rId;
|
|
730
|
+
else if (type === "even")
|
|
731
|
+
sectionProps.headers.even = rId;
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
const footerRefs = XMLParser_1.XMLParser.extractElements(sectPr, "w:footerReference");
|
|
736
|
+
if (footerRefs.length > 0) {
|
|
737
|
+
sectionProps.footers = {};
|
|
738
|
+
for (const footerRef of footerRefs) {
|
|
739
|
+
const type = XMLParser_1.XMLParser.extractAttribute(footerRef, "w:type");
|
|
740
|
+
const rId = XMLParser_1.XMLParser.extractAttribute(footerRef, "r:id");
|
|
741
|
+
if (type && rId) {
|
|
742
|
+
if (type === "default")
|
|
743
|
+
sectionProps.footers.default = rId;
|
|
744
|
+
else if (type === "first")
|
|
745
|
+
sectionProps.footers.first = rId;
|
|
746
|
+
else if (type === "even")
|
|
747
|
+
sectionProps.footers.even = rId;
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
return new Section_1.Section(sectionProps);
|
|
752
|
+
}
|
|
753
|
+
catch (error) {
|
|
754
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
755
|
+
this.parseErrors.push({ element: "sectPr", error: err });
|
|
756
|
+
if (this.strictParsing) {
|
|
757
|
+
throw new Error(`Failed to parse section properties: ${err.message}`);
|
|
758
|
+
}
|
|
759
|
+
return null;
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
parseStyle(styleXml) {
|
|
763
|
+
const typeAttr = XMLParser_1.XMLParser.extractAttribute(styleXml, "w:type");
|
|
764
|
+
const styleId = XMLParser_1.XMLParser.extractAttribute(styleXml, "w:styleId") || "";
|
|
765
|
+
const defaultAttr = XMLParser_1.XMLParser.extractAttribute(styleXml, "w:default");
|
|
766
|
+
const customStyleAttr = XMLParser_1.XMLParser.extractAttribute(styleXml, "w:customStyle");
|
|
767
|
+
if (!styleId || !typeAttr) {
|
|
768
|
+
return null;
|
|
769
|
+
}
|
|
770
|
+
const nameElement = XMLParser_1.XMLParser.extractBetweenTags(styleXml, "<w:name", "</w:name>");
|
|
771
|
+
const name = nameElement
|
|
772
|
+
? XMLParser_1.XMLParser.extractAttribute(`<w:name${nameElement}`, "w:val") || styleId
|
|
773
|
+
: styleId;
|
|
774
|
+
const basedOnElement = XMLParser_1.XMLParser.extractBetweenTags(styleXml, "<w:basedOn", "</w:basedOn>");
|
|
775
|
+
const basedOn = basedOnElement
|
|
776
|
+
? XMLParser_1.XMLParser.extractAttribute(`<w:basedOn${basedOnElement}`, "w:val")
|
|
777
|
+
: undefined;
|
|
778
|
+
const nextElement = XMLParser_1.XMLParser.extractBetweenTags(styleXml, "<w:next", "</w:next>");
|
|
779
|
+
const next = nextElement
|
|
780
|
+
? XMLParser_1.XMLParser.extractAttribute(`<w:next${nextElement}`, "w:val")
|
|
781
|
+
: undefined;
|
|
782
|
+
let paragraphFormatting;
|
|
783
|
+
const pPrXml = XMLParser_1.XMLParser.extractBetweenTags(styleXml, "<w:pPr>", "</w:pPr>");
|
|
784
|
+
if (pPrXml) {
|
|
785
|
+
paragraphFormatting = this.parseParagraphFormattingFromXml(pPrXml);
|
|
786
|
+
}
|
|
787
|
+
let runFormatting;
|
|
788
|
+
const rPrXml = XMLParser_1.XMLParser.extractBetweenTags(styleXml, "<w:rPr>", "</w:rPr>");
|
|
789
|
+
if (rPrXml) {
|
|
790
|
+
runFormatting = this.parseRunFormattingFromXml(rPrXml);
|
|
791
|
+
}
|
|
792
|
+
const properties = {
|
|
793
|
+
styleId,
|
|
794
|
+
name,
|
|
795
|
+
type: typeAttr,
|
|
796
|
+
basedOn,
|
|
797
|
+
next,
|
|
798
|
+
isDefault: defaultAttr === "1" || defaultAttr === "true",
|
|
799
|
+
customStyle: customStyleAttr === "1" || customStyleAttr === "true",
|
|
800
|
+
paragraphFormatting,
|
|
801
|
+
runFormatting,
|
|
802
|
+
};
|
|
803
|
+
return Style_1.Style.create(properties);
|
|
804
|
+
}
|
|
805
|
+
parseParagraphFormattingFromXml(pPrXml) {
|
|
806
|
+
const formatting = {};
|
|
807
|
+
const jcElement = XMLParser_1.XMLParser.extractBetweenTags(pPrXml, "<w:jc", "/>");
|
|
808
|
+
if (jcElement) {
|
|
809
|
+
const alignment = XMLParser_1.XMLParser.extractAttribute(`<w:jc${jcElement}`, "w:val");
|
|
810
|
+
if (alignment) {
|
|
811
|
+
formatting.alignment = alignment;
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
const spacingElement = XMLParser_1.XMLParser.extractBetweenTags(pPrXml, "<w:spacing", "/>");
|
|
815
|
+
if (spacingElement) {
|
|
816
|
+
const before = XMLParser_1.XMLParser.extractAttribute(`<w:spacing${spacingElement}`, "w:before");
|
|
817
|
+
const after = XMLParser_1.XMLParser.extractAttribute(`<w:spacing${spacingElement}`, "w:after");
|
|
818
|
+
const line = XMLParser_1.XMLParser.extractAttribute(`<w:spacing${spacingElement}`, "w:line");
|
|
819
|
+
const lineRule = XMLParser_1.XMLParser.extractAttribute(`<w:spacing${spacingElement}`, "w:lineRule");
|
|
820
|
+
let validatedLineRule;
|
|
821
|
+
if (lineRule) {
|
|
822
|
+
const validLineRules = ["auto", "exact", "atLeast"];
|
|
823
|
+
if (validLineRules.includes(lineRule)) {
|
|
824
|
+
validatedLineRule = lineRule;
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
formatting.spacing = {
|
|
828
|
+
before: before ? parseInt(before, 10) : undefined,
|
|
829
|
+
after: after ? parseInt(after, 10) : undefined,
|
|
830
|
+
line: line ? parseInt(line, 10) : validatedLineRule ? 240 : undefined,
|
|
831
|
+
lineRule: validatedLineRule,
|
|
832
|
+
};
|
|
833
|
+
}
|
|
834
|
+
const indElement = XMLParser_1.XMLParser.extractBetweenTags(pPrXml, "<w:ind", "/>");
|
|
835
|
+
if (indElement) {
|
|
836
|
+
const left = XMLParser_1.XMLParser.extractAttribute(`<w:ind${indElement}`, "w:left");
|
|
837
|
+
const right = XMLParser_1.XMLParser.extractAttribute(`<w:ind${indElement}`, "w:right");
|
|
838
|
+
const firstLine = XMLParser_1.XMLParser.extractAttribute(`<w:ind${indElement}`, "w:firstLine");
|
|
839
|
+
const hanging = XMLParser_1.XMLParser.extractAttribute(`<w:ind${indElement}`, "w:hanging");
|
|
840
|
+
formatting.indentation = {
|
|
841
|
+
left: left ? parseInt(left, 10) : undefined,
|
|
842
|
+
right: right ? parseInt(right, 10) : undefined,
|
|
843
|
+
firstLine: firstLine ? parseInt(firstLine, 10) : undefined,
|
|
844
|
+
hanging: hanging ? parseInt(hanging, 10) : undefined,
|
|
845
|
+
};
|
|
846
|
+
}
|
|
847
|
+
if (pPrXml.includes("<w:keepNext/>") || pPrXml.includes("<w:keepNext ")) {
|
|
848
|
+
formatting.keepNext = true;
|
|
849
|
+
}
|
|
850
|
+
if (pPrXml.includes("<w:keepLines/>") || pPrXml.includes("<w:keepLines ")) {
|
|
851
|
+
formatting.keepLines = true;
|
|
852
|
+
}
|
|
853
|
+
if (pPrXml.includes("<w:pageBreakBefore/>") ||
|
|
854
|
+
pPrXml.includes("<w:pageBreakBefore ")) {
|
|
855
|
+
formatting.pageBreakBefore = true;
|
|
856
|
+
}
|
|
857
|
+
return formatting;
|
|
858
|
+
}
|
|
859
|
+
parseRunFormattingFromXml(rPrXml) {
|
|
860
|
+
const formatting = {};
|
|
861
|
+
if (rPrXml.includes("<w:b/>") || rPrXml.includes("<w:b ")) {
|
|
862
|
+
formatting.bold = true;
|
|
863
|
+
}
|
|
864
|
+
if (rPrXml.includes("<w:i/>") || rPrXml.includes("<w:i ")) {
|
|
865
|
+
formatting.italic = true;
|
|
866
|
+
}
|
|
867
|
+
if (rPrXml.includes("<w:strike/>") || rPrXml.includes("<w:strike ")) {
|
|
868
|
+
formatting.strike = true;
|
|
869
|
+
}
|
|
870
|
+
if (rPrXml.includes("<w:smallCaps/>") || rPrXml.includes("<w:smallCaps ")) {
|
|
871
|
+
formatting.smallCaps = true;
|
|
872
|
+
}
|
|
873
|
+
if (rPrXml.includes("<w:caps/>") || rPrXml.includes("<w:caps ")) {
|
|
874
|
+
formatting.allCaps = true;
|
|
875
|
+
}
|
|
876
|
+
const uElement = XMLParser_1.XMLParser.extractBetweenTags(rPrXml, "<w:u", "/>");
|
|
877
|
+
if (uElement) {
|
|
878
|
+
const uVal = XMLParser_1.XMLParser.extractAttribute(`<w:u${uElement}`, "w:val");
|
|
879
|
+
if (uVal === "single" ||
|
|
880
|
+
uVal === "double" ||
|
|
881
|
+
uVal === "thick" ||
|
|
882
|
+
uVal === "dotted" ||
|
|
883
|
+
uVal === "dash") {
|
|
884
|
+
formatting.underline = uVal;
|
|
885
|
+
}
|
|
886
|
+
else {
|
|
887
|
+
formatting.underline = true;
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
const vertAlignElement = XMLParser_1.XMLParser.extractBetweenTags(rPrXml, "<w:vertAlign", "/>");
|
|
891
|
+
if (vertAlignElement) {
|
|
892
|
+
const val = XMLParser_1.XMLParser.extractAttribute(`<w:vertAlign${vertAlignElement}`, "w:val");
|
|
893
|
+
if (val === "subscript") {
|
|
894
|
+
formatting.subscript = true;
|
|
895
|
+
}
|
|
896
|
+
else if (val === "superscript") {
|
|
897
|
+
formatting.superscript = true;
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
const rFontsElement = XMLParser_1.XMLParser.extractBetweenTags(rPrXml, "<w:rFonts", "/>");
|
|
901
|
+
if (rFontsElement) {
|
|
902
|
+
const ascii = XMLParser_1.XMLParser.extractAttribute(`<w:rFonts${rFontsElement}`, "w:ascii");
|
|
903
|
+
if (ascii) {
|
|
904
|
+
formatting.font = ascii;
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
const szElement = XMLParser_1.XMLParser.extractBetweenTags(rPrXml, "<w:sz", "/>");
|
|
908
|
+
if (szElement) {
|
|
909
|
+
const val = XMLParser_1.XMLParser.extractAttribute(`<w:sz${szElement}`, "w:val");
|
|
910
|
+
if (val) {
|
|
911
|
+
formatting.size = parseInt(val, 10) / 2;
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
const colorElement = XMLParser_1.XMLParser.extractBetweenTags(rPrXml, "<w:color", "/>");
|
|
915
|
+
if (colorElement) {
|
|
916
|
+
const val = XMLParser_1.XMLParser.extractAttribute(`<w:color${colorElement}`, "w:val");
|
|
917
|
+
if (val && val !== "auto") {
|
|
918
|
+
formatting.color = val;
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
const highlightElement = XMLParser_1.XMLParser.extractBetweenTags(rPrXml, "<w:highlight", "/>");
|
|
922
|
+
if (highlightElement) {
|
|
923
|
+
const val = XMLParser_1.XMLParser.extractAttribute(`<w:highlight${highlightElement}`, "w:val");
|
|
924
|
+
if (val) {
|
|
925
|
+
const validHighlights = [
|
|
926
|
+
"yellow",
|
|
927
|
+
"green",
|
|
928
|
+
"cyan",
|
|
929
|
+
"magenta",
|
|
930
|
+
"blue",
|
|
931
|
+
"red",
|
|
932
|
+
"darkBlue",
|
|
933
|
+
"darkCyan",
|
|
934
|
+
"darkGreen",
|
|
935
|
+
"darkMagenta",
|
|
936
|
+
"darkRed",
|
|
937
|
+
"darkYellow",
|
|
938
|
+
"darkGray",
|
|
939
|
+
"lightGray",
|
|
940
|
+
"black",
|
|
941
|
+
"white",
|
|
942
|
+
];
|
|
943
|
+
if (validHighlights.includes(val)) {
|
|
944
|
+
formatting.highlight = val;
|
|
945
|
+
}
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
return formatting;
|
|
949
|
+
}
|
|
487
950
|
static getRawXml(zipHandler, partName) {
|
|
488
951
|
try {
|
|
489
952
|
const file = zipHandler.getFile(partName);
|
|
490
953
|
if (!file) {
|
|
491
954
|
return null;
|
|
492
955
|
}
|
|
493
|
-
if (typeof file.content ===
|
|
956
|
+
if (typeof file.content === "string") {
|
|
494
957
|
return file.content;
|
|
495
958
|
}
|
|
496
959
|
if (Buffer.isBuffer(file.content)) {
|
|
497
|
-
return file.content.toString(
|
|
960
|
+
return file.content.toString("utf8");
|
|
498
961
|
}
|
|
499
962
|
return null;
|
|
500
963
|
}
|
|
@@ -504,10 +967,12 @@ class DocumentParser {
|
|
|
504
967
|
}
|
|
505
968
|
static setRawXml(zipHandler, partName, xmlContent) {
|
|
506
969
|
try {
|
|
507
|
-
if (typeof xmlContent !==
|
|
970
|
+
if (typeof xmlContent !== "string") {
|
|
508
971
|
return false;
|
|
509
972
|
}
|
|
510
|
-
zipHandler.addFile(partName, Buffer.from(xmlContent,
|
|
973
|
+
zipHandler.addFile(partName, Buffer.from(xmlContent, "utf8"), {
|
|
974
|
+
binary: true,
|
|
975
|
+
});
|
|
511
976
|
return true;
|
|
512
977
|
}
|
|
513
978
|
catch (error) {
|
|
@@ -516,7 +981,7 @@ class DocumentParser {
|
|
|
516
981
|
}
|
|
517
982
|
static getRelationships(zipHandler, partName) {
|
|
518
983
|
try {
|
|
519
|
-
const lastSlash = partName.lastIndexOf(
|
|
984
|
+
const lastSlash = partName.lastIndexOf("/");
|
|
520
985
|
const relsPath = lastSlash === -1
|
|
521
986
|
? `_rels/${partName}.rels`
|
|
522
987
|
: `${partName.substring(0, lastSlash)}/_rels/${partName.substring(lastSlash + 1)}.rels`;
|
|
@@ -552,6 +1017,25 @@ class DocumentParser {
|
|
|
552
1017
|
return [];
|
|
553
1018
|
}
|
|
554
1019
|
}
|
|
1020
|
+
parseNamespaces(docXml) {
|
|
1021
|
+
const namespaces = {};
|
|
1022
|
+
const docTagMatch = docXml.match(/<w:document([^>]+)>/);
|
|
1023
|
+
if (docTagMatch && docTagMatch[1]) {
|
|
1024
|
+
const attributes = docTagMatch[1];
|
|
1025
|
+
const nsPattern = /xmlns:([^=]+)="([^"]+)"/g;
|
|
1026
|
+
let match;
|
|
1027
|
+
while ((match = nsPattern.exec(attributes)) !== null) {
|
|
1028
|
+
if (match[1] && match[2]) {
|
|
1029
|
+
namespaces[match[1]] = match[2];
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
const defaultNsMatch = attributes.match(/xmlns="([^"]+)"/);
|
|
1033
|
+
if (defaultNsMatch && defaultNsMatch[1]) {
|
|
1034
|
+
namespaces["xmlns"] = defaultNsMatch[1];
|
|
1035
|
+
}
|
|
1036
|
+
}
|
|
1037
|
+
return namespaces;
|
|
1038
|
+
}
|
|
555
1039
|
}
|
|
556
1040
|
exports.DocumentParser = DocumentParser;
|
|
557
1041
|
//# sourceMappingURL=DocumentParser.js.map
|