docxmlater 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -21
- package/dist/core/Document.d.ts +8 -20
- package/dist/core/Document.d.ts.map +1 -1
- package/dist/core/Document.js +49 -535
- package/dist/core/Document.js.map +1 -1
- package/dist/core/DocumentGenerator.d.ts +22 -0
- package/dist/core/DocumentGenerator.d.ts.map +1 -0
- package/dist/core/DocumentGenerator.js +161 -0
- package/dist/core/DocumentGenerator.js.map +1 -0
- package/dist/core/DocumentParser.d.ts +32 -0
- package/dist/core/DocumentParser.d.ts.map +1 -0
- package/dist/core/DocumentParser.js +404 -0
- package/dist/core/DocumentParser.js.map +1 -0
- package/dist/core/DocumentValidator.d.ts +46 -0
- package/dist/core/DocumentValidator.d.ts.map +1 -0
- package/dist/core/DocumentValidator.js +223 -0
- package/dist/core/DocumentValidator.js.map +1 -0
- package/dist/core/RelationshipManager.d.ts.map +1 -1
- package/dist/core/RelationshipManager.js +19 -3
- package/dist/core/RelationshipManager.js.map +1 -1
- package/dist/elements/Image.d.ts +5 -1
- package/dist/elements/Image.d.ts.map +1 -1
- package/dist/elements/Image.js +118 -12
- package/dist/elements/Image.js.map +1 -1
- package/dist/elements/ImageManager.d.ts +11 -3
- package/dist/elements/ImageManager.d.ts.map +1 -1
- package/dist/elements/ImageManager.js +72 -6
- package/dist/elements/ImageManager.js.map +1 -1
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -1
- package/dist/index.js.map +1 -1
- package/dist/utils/validation.d.ts.map +1 -1
- package/dist/utils/validation.js +10 -1
- package/dist/utils/validation.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DocumentParser = void 0;
|
|
4
|
+
const types_1 = require("../zip/types");
|
|
5
|
+
const Paragraph_1 = require("../elements/Paragraph");
|
|
6
|
+
const Run_1 = require("../elements/Run");
|
|
7
|
+
const Hyperlink_1 = require("../elements/Hyperlink");
|
|
8
|
+
const XMLBuilder_1 = require("../xml/XMLBuilder");
|
|
9
|
+
const XMLParser_1 = require("../xml/XMLParser");
|
|
10
|
+
const RelationshipManager_1 = require("./RelationshipManager");
|
|
11
|
+
class DocumentParser {
|
|
12
|
+
constructor(strictParsing = false) {
|
|
13
|
+
this.parseErrors = [];
|
|
14
|
+
this.strictParsing = strictParsing;
|
|
15
|
+
}
|
|
16
|
+
getParseErrors() {
|
|
17
|
+
return [...this.parseErrors];
|
|
18
|
+
}
|
|
19
|
+
clearParseErrors() {
|
|
20
|
+
this.parseErrors = [];
|
|
21
|
+
}
|
|
22
|
+
async parseDocument(zipHandler, relationshipManager) {
|
|
23
|
+
const docXml = zipHandler.getFileAsString(types_1.DOCX_PATHS.DOCUMENT);
|
|
24
|
+
if (!docXml) {
|
|
25
|
+
throw new Error('Invalid document: word/document.xml not found');
|
|
26
|
+
}
|
|
27
|
+
const parsedRelationshipManager = this.parseRelationships(zipHandler, relationshipManager);
|
|
28
|
+
const properties = this.parseProperties(zipHandler);
|
|
29
|
+
const bodyElements = this.parseBodyElements(docXml, parsedRelationshipManager);
|
|
30
|
+
return { bodyElements, properties, relationshipManager: parsedRelationshipManager };
|
|
31
|
+
}
|
|
32
|
+
parseBodyElements(docXml, relationshipManager) {
|
|
33
|
+
const bodyElements = [];
|
|
34
|
+
try {
|
|
35
|
+
XMLParser_1.XMLParser.validateSize(docXml);
|
|
36
|
+
}
|
|
37
|
+
catch (error) {
|
|
38
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
39
|
+
this.parseErrors.push({ element: 'document', error: err });
|
|
40
|
+
if (this.strictParsing) {
|
|
41
|
+
throw err;
|
|
42
|
+
}
|
|
43
|
+
return bodyElements;
|
|
44
|
+
}
|
|
45
|
+
const bodyContent = XMLParser_1.XMLParser.extractBody(docXml);
|
|
46
|
+
if (!bodyContent) {
|
|
47
|
+
return bodyElements;
|
|
48
|
+
}
|
|
49
|
+
const paragraphXmls = XMLParser_1.XMLParser.extractElements(bodyContent, 'w:p');
|
|
50
|
+
for (const paraXml of paragraphXmls) {
|
|
51
|
+
const paragraph = this.parseParagraph(paraXml, relationshipManager);
|
|
52
|
+
if (paragraph) {
|
|
53
|
+
bodyElements.push(paragraph);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
const hasTable = bodyContent.includes('<w:tbl');
|
|
57
|
+
if (hasTable) {
|
|
58
|
+
const err = new Error('Document contains tables which are not yet fully supported in Phase 2. Tables will be ignored.');
|
|
59
|
+
this.parseErrors.push({ element: 'table', error: err });
|
|
60
|
+
if (this.strictParsing) {
|
|
61
|
+
throw err;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
this.validateLoadedContent(bodyElements);
|
|
65
|
+
return bodyElements;
|
|
66
|
+
}
|
|
67
|
+
validateLoadedContent(bodyElements) {
|
|
68
|
+
const paragraphs = bodyElements.filter((el) => el instanceof Paragraph_1.Paragraph);
|
|
69
|
+
if (paragraphs.length === 0) {
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
let totalRuns = 0;
|
|
73
|
+
let emptyRuns = 0;
|
|
74
|
+
let runsWithText = 0;
|
|
75
|
+
for (const para of paragraphs) {
|
|
76
|
+
const runs = para.getRuns();
|
|
77
|
+
totalRuns += runs.length;
|
|
78
|
+
for (const run of runs) {
|
|
79
|
+
const text = run.getText();
|
|
80
|
+
if (text.length === 0) {
|
|
81
|
+
emptyRuns++;
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
runsWithText++;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
if (totalRuns > 0) {
|
|
89
|
+
const emptyPercentage = (emptyRuns / totalRuns) * 100;
|
|
90
|
+
if (emptyPercentage > 90 && emptyRuns > 10) {
|
|
91
|
+
const warning = new Error(`WARNING: Document appears to be corrupted or empty. ` +
|
|
92
|
+
`${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) have no text content. ` +
|
|
93
|
+
`This may indicate:\n` +
|
|
94
|
+
` - The document was already corrupted before loading\n` +
|
|
95
|
+
` - Text content was stripped by another application\n` +
|
|
96
|
+
` - Encoding issues during document creation\n` +
|
|
97
|
+
`Original document structure is preserved, but text may be lost.`);
|
|
98
|
+
this.parseErrors.push({ element: 'document-validation', error: warning });
|
|
99
|
+
console.warn(`\nDocXML Load Warning:\n${warning.message}\n`);
|
|
100
|
+
}
|
|
101
|
+
else if (emptyPercentage > 50 && emptyRuns > 5) {
|
|
102
|
+
const warning = new Error(`Document has ${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) with no text. ` +
|
|
103
|
+
`This is higher than normal and may indicate partial data loss.`);
|
|
104
|
+
this.parseErrors.push({ element: 'document-validation', error: warning });
|
|
105
|
+
console.warn(`\nDocXML Load Warning:\n${warning.message}\n`);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
parseParagraph(paraXml, relationshipManager) {
|
|
110
|
+
try {
|
|
111
|
+
const paragraph = new Paragraph_1.Paragraph();
|
|
112
|
+
this.parseParagraphProperties(paraXml, paragraph);
|
|
113
|
+
const hyperlinkXmls = XMLParser_1.XMLParser.extractElements(paraXml, 'w:hyperlink');
|
|
114
|
+
for (const hyperlinkXml of hyperlinkXmls) {
|
|
115
|
+
const hyperlink = this.parseHyperlink(hyperlinkXml, relationshipManager);
|
|
116
|
+
if (hyperlink) {
|
|
117
|
+
paragraph.addHyperlink(hyperlink);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
let paraXmlWithoutHyperlinks = paraXml;
|
|
121
|
+
for (const hyperlinkXml of hyperlinkXmls) {
|
|
122
|
+
paraXmlWithoutHyperlinks = paraXmlWithoutHyperlinks.replace(hyperlinkXml, '');
|
|
123
|
+
}
|
|
124
|
+
const runXmls = XMLParser_1.XMLParser.extractElements(paraXmlWithoutHyperlinks, 'w:r');
|
|
125
|
+
for (const runXml of runXmls) {
|
|
126
|
+
const run = this.parseRun(runXml);
|
|
127
|
+
if (run) {
|
|
128
|
+
paragraph.addRun(run);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
return paragraph;
|
|
132
|
+
}
|
|
133
|
+
catch (error) {
|
|
134
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
135
|
+
this.parseErrors.push({ element: 'paragraph', error: err });
|
|
136
|
+
if (this.strictParsing) {
|
|
137
|
+
throw new Error(`Failed to parse paragraph: ${err.message}`);
|
|
138
|
+
}
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
parseParagraphProperties(paraXml, paragraph) {
|
|
143
|
+
const pPrMatch = paraXml.match(/<w:pPr[^>]*>([\s\S]*?)<\/w:pPr>/);
|
|
144
|
+
if (!pPrMatch || !pPrMatch[1]) {
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
const pPr = pPrMatch[1];
|
|
148
|
+
const alignMatch = pPr.match(/<w:jc\s+w:val="([^"]+)"/);
|
|
149
|
+
if (alignMatch && alignMatch[1]) {
|
|
150
|
+
const value = alignMatch[1];
|
|
151
|
+
const validAlignments = ['left', 'center', 'right', 'justify'];
|
|
152
|
+
if (validAlignments.includes(value)) {
|
|
153
|
+
const alignment = value;
|
|
154
|
+
paragraph.setAlignment(alignment);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
const styleMatch = pPr.match(/<w:pStyle\s+w:val="([^"]+)"/);
|
|
158
|
+
if (styleMatch && styleMatch[1]) {
|
|
159
|
+
paragraph.setStyle(styleMatch[1]);
|
|
160
|
+
}
|
|
161
|
+
const indMatch = pPr.match(/<w:ind([^>]+)\/>/);
|
|
162
|
+
if (indMatch && indMatch[1]) {
|
|
163
|
+
const indStr = indMatch[1];
|
|
164
|
+
const leftMatch = indStr.match(/w:left="(\d+)"/);
|
|
165
|
+
const rightMatch = indStr.match(/w:right="(\d+)"/);
|
|
166
|
+
const firstLineMatch = indStr.match(/w:firstLine="(\d+)"/);
|
|
167
|
+
if (leftMatch && leftMatch[1]) {
|
|
168
|
+
paragraph.setLeftIndent(parseInt(leftMatch[1], 10));
|
|
169
|
+
}
|
|
170
|
+
if (rightMatch && rightMatch[1]) {
|
|
171
|
+
paragraph.setRightIndent(parseInt(rightMatch[1], 10));
|
|
172
|
+
}
|
|
173
|
+
if (firstLineMatch && firstLineMatch[1]) {
|
|
174
|
+
paragraph.setFirstLineIndent(parseInt(firstLineMatch[1], 10));
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
const spacingMatch = pPr.match(/<w:spacing([^>]+)\/>/);
|
|
178
|
+
if (spacingMatch && spacingMatch[1]) {
|
|
179
|
+
const spacingStr = spacingMatch[1];
|
|
180
|
+
const beforeMatch = spacingStr.match(/w:before="(\d+)"/);
|
|
181
|
+
const afterMatch = spacingStr.match(/w:after="(\d+)"/);
|
|
182
|
+
const lineMatch = spacingStr.match(/w:line="(\d+)"/);
|
|
183
|
+
if (beforeMatch && beforeMatch[1]) {
|
|
184
|
+
paragraph.setSpaceBefore(parseInt(beforeMatch[1], 10));
|
|
185
|
+
}
|
|
186
|
+
if (afterMatch && afterMatch[1]) {
|
|
187
|
+
paragraph.setSpaceAfter(parseInt(afterMatch[1], 10));
|
|
188
|
+
}
|
|
189
|
+
if (lineMatch && lineMatch[1]) {
|
|
190
|
+
const lineRule = spacingStr.match(/w:lineRule="([^"]+)"/);
|
|
191
|
+
let validatedLineRule;
|
|
192
|
+
if (lineRule && lineRule[1]) {
|
|
193
|
+
const value = lineRule[1];
|
|
194
|
+
const validLineRules = ['auto', 'exact', 'atLeast'];
|
|
195
|
+
if (validLineRules.includes(value)) {
|
|
196
|
+
validatedLineRule = value;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
paragraph.setLineSpacing(parseInt(lineMatch[1], 10), validatedLineRule);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
if (pPr.includes('<w:keepNext'))
|
|
203
|
+
paragraph.setKeepNext(true);
|
|
204
|
+
if (pPr.includes('<w:keepLines'))
|
|
205
|
+
paragraph.setKeepLines(true);
|
|
206
|
+
if (pPr.includes('<w:pageBreakBefore'))
|
|
207
|
+
paragraph.setPageBreakBefore(true);
|
|
208
|
+
}
|
|
209
|
+
parseRun(runXml) {
|
|
210
|
+
try {
|
|
211
|
+
const text = XMLBuilder_1.XMLBuilder.unescapeXml(XMLParser_1.XMLParser.extractText(runXml));
|
|
212
|
+
const run = new Run_1.Run(text);
|
|
213
|
+
this.parseRunProperties(runXml, run);
|
|
214
|
+
return run;
|
|
215
|
+
}
|
|
216
|
+
catch (error) {
|
|
217
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
218
|
+
this.parseErrors.push({ element: 'run', error: err });
|
|
219
|
+
if (this.strictParsing) {
|
|
220
|
+
throw new Error(`Failed to parse run: ${err.message}`);
|
|
221
|
+
}
|
|
222
|
+
return null;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
parseRunProperties(runXml, run) {
|
|
226
|
+
const rPrMatch = runXml.match(/<w:rPr[^>]*>([\s\S]*?)<\/w:rPr>/);
|
|
227
|
+
if (!rPrMatch || !rPrMatch[1]) {
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
230
|
+
const rPr = rPrMatch[1];
|
|
231
|
+
if (rPr.includes('<w:b/>') || rPr.includes('<w:b ')) {
|
|
232
|
+
run.setBold(true);
|
|
233
|
+
}
|
|
234
|
+
if (rPr.includes('<w:i/>') || rPr.includes('<w:i ')) {
|
|
235
|
+
run.setItalic(true);
|
|
236
|
+
}
|
|
237
|
+
const underlineMatch = rPr.match(/<w:u\s+w:val="([^"]+)"/);
|
|
238
|
+
if (underlineMatch && underlineMatch[1]) {
|
|
239
|
+
const value = underlineMatch[1];
|
|
240
|
+
const validUnderlineStyles = [
|
|
241
|
+
'single',
|
|
242
|
+
'double',
|
|
243
|
+
'thick',
|
|
244
|
+
'dotted',
|
|
245
|
+
'dash',
|
|
246
|
+
'dotDash',
|
|
247
|
+
'dotDotDash',
|
|
248
|
+
'wave',
|
|
249
|
+
];
|
|
250
|
+
if (validUnderlineStyles.includes(value) ||
|
|
251
|
+
value === 'true' ||
|
|
252
|
+
value === 'false') {
|
|
253
|
+
const underlineStyle = value;
|
|
254
|
+
run.setUnderline(underlineStyle);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
else if (rPr.includes('<w:u/>')) {
|
|
258
|
+
run.setUnderline(true);
|
|
259
|
+
}
|
|
260
|
+
if (rPr.includes('<w:strike/>') || rPr.includes('<w:strike ')) {
|
|
261
|
+
run.setStrike(true);
|
|
262
|
+
}
|
|
263
|
+
const vertAlignMatch = rPr.match(/<w:vertAlign\s+w:val="([^"]+)"/);
|
|
264
|
+
if (vertAlignMatch && vertAlignMatch[1]) {
|
|
265
|
+
if (vertAlignMatch[1] === 'subscript') {
|
|
266
|
+
run.setSubscript(true);
|
|
267
|
+
}
|
|
268
|
+
else if (vertAlignMatch[1] === 'superscript') {
|
|
269
|
+
run.setSuperscript(true);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
const fontMatch = rPr.match(/<w:rFonts[^>]+w:ascii="([^"]+)"/);
|
|
273
|
+
if (fontMatch && fontMatch[1]) {
|
|
274
|
+
run.setFont(fontMatch[1]);
|
|
275
|
+
}
|
|
276
|
+
const sizeMatch = rPr.match(/<w:sz\s+w:val="(\d+)"/);
|
|
277
|
+
if (sizeMatch && sizeMatch[1]) {
|
|
278
|
+
const halfPoints = parseInt(sizeMatch[1], 10);
|
|
279
|
+
run.setSize(halfPoints / 2);
|
|
280
|
+
}
|
|
281
|
+
const colorMatch = rPr.match(/<w:color\s+w:val="([^"]+)"/);
|
|
282
|
+
if (colorMatch && colorMatch[1]) {
|
|
283
|
+
run.setColor(colorMatch[1]);
|
|
284
|
+
}
|
|
285
|
+
const highlightMatch = rPr.match(/<w:highlight\s+w:val="([^"]+)"/);
|
|
286
|
+
if (highlightMatch && highlightMatch[1]) {
|
|
287
|
+
const value = highlightMatch[1];
|
|
288
|
+
const validHighlightColors = [
|
|
289
|
+
'yellow',
|
|
290
|
+
'green',
|
|
291
|
+
'cyan',
|
|
292
|
+
'magenta',
|
|
293
|
+
'blue',
|
|
294
|
+
'red',
|
|
295
|
+
'darkBlue',
|
|
296
|
+
'darkCyan',
|
|
297
|
+
'darkGreen',
|
|
298
|
+
'darkMagenta',
|
|
299
|
+
'darkRed',
|
|
300
|
+
'darkYellow',
|
|
301
|
+
'darkGray',
|
|
302
|
+
'lightGray',
|
|
303
|
+
'black',
|
|
304
|
+
'white',
|
|
305
|
+
];
|
|
306
|
+
if (validHighlightColors.includes(value)) {
|
|
307
|
+
const highlightColor = value;
|
|
308
|
+
run.setHighlight(highlightColor);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
if (rPr.includes('<w:smallCaps/>') || rPr.includes('<w:smallCaps ')) {
|
|
312
|
+
run.setSmallCaps(true);
|
|
313
|
+
}
|
|
314
|
+
if (rPr.includes('<w:caps/>') || rPr.includes('<w:caps ')) {
|
|
315
|
+
run.setAllCaps(true);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
parseHyperlink(hyperlinkXml, relationshipManager) {
|
|
319
|
+
try {
|
|
320
|
+
const relationshipId = XMLParser_1.XMLParser.extractAttribute(hyperlinkXml, 'r:id');
|
|
321
|
+
const anchor = XMLParser_1.XMLParser.extractAttribute(hyperlinkXml, 'w:anchor');
|
|
322
|
+
const tooltip = XMLParser_1.XMLParser.extractAttribute(hyperlinkXml, 'w:tooltip');
|
|
323
|
+
if (!relationshipId && !anchor) {
|
|
324
|
+
return null;
|
|
325
|
+
}
|
|
326
|
+
const runXmls = XMLParser_1.XMLParser.extractElements(hyperlinkXml, 'w:r');
|
|
327
|
+
let text = '';
|
|
328
|
+
let formatting;
|
|
329
|
+
for (const runXml of runXmls) {
|
|
330
|
+
text += XMLBuilder_1.XMLBuilder.unescapeXml(XMLParser_1.XMLParser.extractText(runXml));
|
|
331
|
+
if (!formatting) {
|
|
332
|
+
const run = this.parseRun(runXml);
|
|
333
|
+
if (run) {
|
|
334
|
+
formatting = run.getFormatting();
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
let url;
|
|
339
|
+
if (relationshipId) {
|
|
340
|
+
const relationship = relationshipManager.getRelationship(relationshipId);
|
|
341
|
+
if (relationship && relationship.getType().includes('hyperlink')) {
|
|
342
|
+
url = relationship.getTarget();
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
return new Hyperlink_1.Hyperlink({
|
|
346
|
+
url,
|
|
347
|
+
anchor,
|
|
348
|
+
text: text || 'Link',
|
|
349
|
+
formatting,
|
|
350
|
+
tooltip,
|
|
351
|
+
relationshipId,
|
|
352
|
+
});
|
|
353
|
+
}
|
|
354
|
+
catch (error) {
|
|
355
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
356
|
+
this.parseErrors.push({ element: 'hyperlink', error: err });
|
|
357
|
+
if (this.strictParsing) {
|
|
358
|
+
throw new Error(`Failed to parse hyperlink: ${err.message}`);
|
|
359
|
+
}
|
|
360
|
+
return null;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
parseRelationships(zipHandler, relationshipManager) {
|
|
364
|
+
const relsPath = 'word/_rels/document.xml.rels';
|
|
365
|
+
const relsXml = zipHandler.getFileAsString(relsPath);
|
|
366
|
+
if (relsXml) {
|
|
367
|
+
return RelationshipManager_1.RelationshipManager.fromXml(relsXml);
|
|
368
|
+
}
|
|
369
|
+
return relationshipManager;
|
|
370
|
+
}
|
|
371
|
+
parseProperties(zipHandler) {
|
|
372
|
+
const coreXml = zipHandler.getFileAsString(types_1.DOCX_PATHS.CORE_PROPS);
|
|
373
|
+
if (!coreXml) {
|
|
374
|
+
return {};
|
|
375
|
+
}
|
|
376
|
+
const extractTag = (xml, tag) => {
|
|
377
|
+
const match = xml.match(new RegExp(`<${tag}[^>]*>([^<]*)</${tag}>`));
|
|
378
|
+
return match && match[1] ? XMLBuilder_1.XMLBuilder.unescapeXml(match[1]) : undefined;
|
|
379
|
+
};
|
|
380
|
+
const properties = {
|
|
381
|
+
title: extractTag(coreXml, 'dc:title'),
|
|
382
|
+
subject: extractTag(coreXml, 'dc:subject'),
|
|
383
|
+
creator: extractTag(coreXml, 'dc:creator'),
|
|
384
|
+
keywords: extractTag(coreXml, 'cp:keywords'),
|
|
385
|
+
description: extractTag(coreXml, 'dc:description'),
|
|
386
|
+
lastModifiedBy: extractTag(coreXml, 'cp:lastModifiedBy'),
|
|
387
|
+
};
|
|
388
|
+
const revisionStr = extractTag(coreXml, 'cp:revision');
|
|
389
|
+
if (revisionStr) {
|
|
390
|
+
properties.revision = parseInt(revisionStr, 10);
|
|
391
|
+
}
|
|
392
|
+
const createdStr = extractTag(coreXml, 'dcterms:created');
|
|
393
|
+
if (createdStr) {
|
|
394
|
+
properties.created = new Date(createdStr);
|
|
395
|
+
}
|
|
396
|
+
const modifiedStr = extractTag(coreXml, 'dcterms:modified');
|
|
397
|
+
if (modifiedStr) {
|
|
398
|
+
properties.modified = new Date(modifiedStr);
|
|
399
|
+
}
|
|
400
|
+
return properties;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
exports.DocumentParser = DocumentParser;
|
|
404
|
+
//# sourceMappingURL=DocumentParser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DocumentParser.js","sourceRoot":"","sources":["../../src/core/DocumentParser.ts"],"names":[],"mappings":";;;AAMA,wCAA0C;AAC1C,qDAAkD;AAClD,yCAAqD;AACrD,qDAAkD;AAClD,kDAA+C;AAC/C,gDAA6C;AAC7C,+DAA4D;AAmB5D,MAAa,cAAc;IAIzB,YAAY,gBAAyB,KAAK;QAHlC,gBAAW,GAAiB,EAAE,CAAC;QAIrC,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACrC,CAAC;IAKD,cAAc;QACZ,OAAO,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC;IAC/B,CAAC;IAKD,gBAAgB;QACd,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC;IACxB,CAAC;IAQD,KAAK,CAAC,aAAa,CACjB,UAAsB,EACtB,mBAAwC;QAOxC,MAAM,MAAM,GAAG,UAAU,CAAC,eAAe,CAAC,kBAAU,CAAC,QAAQ,CAAC,CAAC;QAC/D,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;QACnE,CAAC;QAGD,MAAM,yBAAyB,GAAG,IAAI,CAAC,kBAAkB,CAAC,UAAU,EAAE,mBAAmB,CAAC,CAAC;QAG3F,MAAM,UAAU,GAAG,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;QAGpD,MAAM,YAAY,GAAG,IAAI,CAAC,iBAAiB,CAAC,MAAM,EAAE,yBAAyB,CAAC,CAAC;QAE/E,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,CAAC;IACtF,CAAC;IAOO,iBAAiB,CACvB,MAAc,EACd,mBAAwC;QAExC,MAAM,YAAY,GAAkB,EAAE,CAAC;QAGvC,IAAI,CAAC;YACH,qBAAS,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QACjC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,GAAG,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;YACtE,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;YAC3D,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACvB,MAAM,GAAG,CAAC;YACZ,CAAC;YACD,OAAO,YAAY,CAAC;QACtB,CAAC;QAGD,MAAM,WAAW,GAAG,qBAAS,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAClD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO,YAAY,CAAC;QACtB,CAAC;QAGD,MAAM,aAAa,GAAG,qBAAS,CAAC,eAAe,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC;QAEpE,KAAK,MAAM,OAAO,IAAI,aAAa,EAAE,CAAC;YACpC,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,mBAAmB,CAAC,CAAC;YACpE,IAAI,SAAS,EAAE,CAAC;gBACd,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC/B,CAAC;QACH,CAAC;QAGD,MAAM,QAAQ,GAAG,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAChD,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,GAAG,GAAG,IAAI,KAAK,CACnB,gGAAgG,CACjG,CAAC;YACF,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;YACxD,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACvB,MAAM,GAAG,CAAC;YACZ,CAAC;QACH,CAAC;QAGD,IAAI,CAAC,qBAAqB,CAAC,YAAY,CAAC,CAAC;QAEzC,OAAO,YAAY,CAAC;IACtB,CAAC;IAMO,qBAAqB,CAAC,YAA2B;QACvD,MAAM,UAAU,GAAG,YAAY,CAAC,MAAM,CACpC,CAAC,EAAE,EAAmB,EAAE,CAAC,EAAE,YAAY,qBAAS,CACjD,CAAC;QAEF,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO;QACT,CAAC;QAGD,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,YAAY,GAAG,CAAC,CAAC;QAErB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC9B,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;YAC5B,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC;YAEzB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;gBACvB,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,EAAE,CAAC;gBAC3B,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACtB,SAAS,EAAE,CAAC;gBACd,CAAC;qBAAM,CAAC;oBACN,YAAY,EAAE,CAAC;gBACjB,CAAC;YACH,CAAC;QACH,CAAC;QAGD,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAClB,MAAM,eAAe,GAAG,CAAC,SAAS,GAAG,SAAS,CAAC,GAAG,GAAG,CAAC;YAEtD,IAAI,eAAe,GAAG,EAAE,IAAI,SAAS,GAAG,EAAE,EAAE,CAAC;gBAC3C,MAAM,OAAO,GAAG,IAAI,KAAK,CACvB,sDAAsD;oBACpD,GAAG,SAAS,WAAW,SAAS,UAAU,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,2BAA2B;oBAC/F,sBAAsB;oBACtB,yDAAyD;oBACzD,wDAAwD;oBACxD,gDAAgD;oBAChD,iEAAiE,CACpE,CAAC;gBACF,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,qBAAqB,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;gBAG1E,OAAO,CAAC,IAAI,CAAC,2BAA2B,OAAO,CAAC,OAAO,IAAI,CAAC,CAAC;YAC/D,CAAC;iBAAM,IAAI,eAAe,GAAG,EAAE,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;gBACjD,MAAM,OAAO,GAAG,IAAI,KAAK,CACvB,gBAAgB,SAAS,WAAW,SAAS,UAAU,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,mBAAmB;oBAClG,gEAAgE,CACnE,CAAC;gBACF,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,qBAAqB,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;gBAC1E,OAAO,CAAC,IAAI,CAAC,2BAA2B,OAAO,CAAC,OAAO,IAAI,CAAC,CAAC;YAC/D,CAAC;QACH,CAAC;IACH,CAAC;IAKO,cAAc,CACpB,OAAe,EACf,mBAAwC;QAExC,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,IAAI,qBAAS,EAAE,CAAC;YAGlC,IAAI,CAAC,wBAAwB,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;YAGlD,MAAM,aAAa,GAAG,qBAAS,CAAC,eAAe,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;YAGxE,KAAK,MAAM,YAAY,IAAI,aAAa,EAAE,CAAC;gBACzC,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,YAAY,EAAE,mBAAmB,CAAC,CAAC;gBACzE,IAAI,SAAS,EAAE,CAAC;oBACd,SAAS,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;gBACpC,CAAC;YACH,CAAC;YAKD,IAAI,wBAAwB,GAAG,OAAO,CAAC;YACvC,KAAK,MAAM,YAAY,IAAI,aAAa,EAAE,CAAC;gBACzC,wBAAwB,GAAG,wBAAwB,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;YAChF,CAAC;YAED,MAAM,OAAO,GAAG,qBAAS,CAAC,eAAe,CAAC,wBAAwB,EAAE,KAAK,CAAC,CAAC;YAG3E,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;gBAClC,IAAI,GAAG,EAAE,CAAC;oBACR,SAAS,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBACxB,CAAC;YACH,CAAC;YAED,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,GAAG,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;YACtE,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;YAE5D,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACvB,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;YAC/D,CAAC;YAGD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAKO,wBAAwB,CAAC,OAAe,EAAE,SAAoB;QACpE,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QAClE,IAAI,CAAC,QAAQ,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9B,OAAO;QACT,CAAC;QAED,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QAGxB,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAC;QACxD,IAAI,UAAU,IAAI,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;YAEhC,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,eAAe,GAAG,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;YAC/D,IAAI,eAAe,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACpC,MAAM,SAAS,GAAG,KAAgD,CAAC;gBACnE,SAAS,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;YACpC,CAAC;QAEH,CAAC;QAGD,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;QAC5D,IAAI,UAAU,IAAI,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;YAChC,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QACpC,CAAC;QAGD,MAAM,QAAQ,GAAG,GAAG,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QAC/C,IAAI,QAAQ,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YAC3B,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;YACjD,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;YACnD,MAAM,cAAc,GAAG,MAAM,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;YAE3D,IAAI,SAAS,IAAI,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC9B,SAAS,CAAC,aAAa,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;YACtD,CAAC;YACD,IAAI,UAAU,IAAI,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;gBAChC,SAAS,CAAC,cAAc,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;YACxD,CAAC;YACD,IAAI,cAAc,IAAI,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC;gBACxC,SAAS,CAAC,kBAAkB,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;YAChE,CAAC;QACH,CAAC;QAGD,MAAM,YAAY,GAAG,GAAG,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;QACvD,IAAI,YAAY,IAAI,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC;YACpC,MAAM,UAAU,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;YACnC,MAAM,WAAW,GAAG,UAAU,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;YACzD,MAAM,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;YACvD,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;YAErD,IAAI,WAAW,IAAI,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC;gBAClC,SAAS,CAAC,cAAc,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;YACzD,CAAC;YACD,IAAI,UAAU,IAAI,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;gBAChC,SAAS,CAAC,aAAa,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;YACvD,CAAC;YACD,IAAI,SAAS,IAAI,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC9B,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;gBAE1D,IAAI,iBAA2D,CAAC;gBAChE,IAAI,QAAQ,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC5B,MAAM,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;oBAC1B,MAAM,cAAc,GAAG,CAAC,MAAM,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;oBACpD,IAAI,cAAc,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;wBACnC,iBAAiB,GAAG,KAAqC,CAAC;oBAC5D,CAAC;gBACH,CAAC;gBACD,SAAS,CAAC,cAAc,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,iBAAiB,CAAC,CAAC;YAC1E,CAAC;QACH,CAAC;QAGD,IAAI,GAAG,CAAC,QAAQ,CAAC,aAAa,CAAC;YAAE,SAAS,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QAC7D,IAAI,GAAG,CAAC,QAAQ,CAAC,cAAc,CAAC;YAAE,SAAS,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QAC/D,IAAI,GAAG,CAAC,QAAQ,CAAC,oBAAoB,CAAC;YAAE,SAAS,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC;IAC7E,CAAC;IAKO,QAAQ,CAAC,MAAc;QAC7B,IAAI,CAAC;YAEH,MAAM,IAAI,GAAG,uBAAU,CAAC,WAAW,CAAC,qBAAS,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC;YAGnE,MAAM,GAAG,GAAG,IAAI,SAAG,CAAC,IAAI,CAAC,CAAC;YAG1B,IAAI,CAAC,kBAAkB,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YAErC,OAAO,GAAG,CAAC;QACb,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,GAAG,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;YACtE,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;YAEtD,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACvB,MAAM,IAAI,KAAK,CAAC,wBAAwB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;YACzD,CAAC;YAGD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAKO,kBAAkB,CAAC,MAAc,EAAE,GAAQ;QACjD,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACjE,IAAI,CAAC,QAAQ,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9B,OAAO;QACT,CAAC;QAED,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QAGxB,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YACpD,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACpB,CAAC;QAGD,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YACpD,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACtB,CAAC;QAGD,MAAM,cAAc,GAAG,GAAG,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;QAC3D,IAAI,cAAc,IAAI,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC;YAExC,MAAM,KAAK,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;YAChC,MAAM,oBAAoB,GAAG;gBAC3B,QAAQ;gBACR,QAAQ;gBACR,OAAO;gBACP,QAAQ;gBACR,MAAM;gBACN,SAAS;gBACT,YAAY;gBACZ,MAAM;aACP,CAAC;YACF,IACE,oBAAoB,CAAC,QAAQ,CAAC,KAAK,CAAC;gBACpC,KAAK,KAAK,MAAM;gBAChB,KAAK,KAAK,OAAO,EACjB,CAAC;gBACD,MAAM,cAAc,GAAG,KAAmC,CAAC;gBAC3D,GAAG,CAAC,YAAY,CAAC,cAAc,CAAC,CAAC;YACnC,CAAC;QAEH,CAAC;aAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;YAClC,GAAG,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QACzB,CAAC;QAGD,IAAI,GAAG,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;YAC9D,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACtB,CAAC;QAGD,MAAM,cAAc,GAAG,GAAG,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACnE,IAAI,cAAc,IAAI,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC;YACxC,IAAI,cAAc,CAAC,CAAC,CAAC,KAAK,WAAW,EAAE,CAAC;gBACtC,GAAG,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;YACzB,CAAC;iBAAM,IAAI,cAAc,CAAC,CAAC,CAAC,KAAK,aAAa,EAAE,CAAC;gBAC/C,GAAG,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QAGD,MAAM,SAAS,GAAG,GAAG,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QAC/D,IAAI,SAAS,IAAI,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9B,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QAC5B,CAAC;QAGD,MAAM,SAAS,GAAG,GAAG,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;QACrD,IAAI,SAAS,IAAI,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9B,MAAM,UAAU,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC9C,GAAG,CAAC,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;QAC9B,CAAC;QAGD,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC3D,IAAI,UAAU,IAAI,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;YAChC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9B,CAAC;QAGD,MAAM,cAAc,GAAG,GAAG,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACnE,IAAI,cAAc,IAAI,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC;YAExC,MAAM,KAAK,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;YAChC,MAAM,oBAAoB,GAAG;gBAC3B,QAAQ;gBACR,OAAO;gBACP,MAAM;gBACN,SAAS;gBACT,MAAM;gBACN,KAAK;gBACL,UAAU;gBACV,UAAU;gBACV,WAAW;gBACX,aAAa;gBACb,SAAS;gBACT,YAAY;gBACZ,UAAU;gBACV,WAAW;gBACX,OAAO;gBACP,OAAO;aACR,CAAC;YACF,IAAI,oBAAoB,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACzC,MAAM,cAAc,GAAG,KAAmC,CAAC;gBAC3D,GAAG,CAAC,YAAY,CAAC,cAAc,CAAC,CAAC;YACnC,CAAC;QAEH,CAAC;QAGD,IAAI,GAAG,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;YACpE,GAAG,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QACzB,CAAC;QAGD,IAAI,GAAG,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;YAC1D,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAMO,cAAc,CACpB,YAAoB,EACpB,mBAAwC;QAExC,IAAI,CAAC;YAEH,MAAM,cAAc,GAAG,qBAAS,CAAC,gBAAgB,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;YACxE,MAAM,MAAM,GAAG,qBAAS,CAAC,gBAAgB,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;YACpE,MAAM,OAAO,GAAG,qBAAS,CAAC,gBAAgB,CAAC,YAAY,EAAE,WAAW,CAAC,CAAC;YAGtE,IAAI,CAAC,cAAc,IAAI,CAAC,MAAM,EAAE,CAAC;gBAC/B,OAAO,IAAI,CAAC;YACd,CAAC;YAGD,MAAM,OAAO,GAAG,qBAAS,CAAC,eAAe,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;YAC/D,IAAI,IAAI,GAAG,EAAE,CAAC;YACd,IAAI,UAAqC,CAAC;YAE1C,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAE7B,IAAI,IAAI,uBAAU,CAAC,WAAW,CAAC,qBAAS,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC;gBAG9D,IAAI,CAAC,UAAU,EAAE,CAAC;oBAChB,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;oBAClC,IAAI,GAAG,EAAE,CAAC;wBACR,UAAU,GAAG,GAAG,CAAC,aAAa,EAAE,CAAC;oBACnC,CAAC;gBACH,CAAC;YACH,CAAC;YAGD,IAAI,GAAuB,CAAC;YAC5B,IAAI,cAAc,EAAE,CAAC;gBACnB,MAAM,YAAY,GAAG,mBAAmB,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC;gBACzE,IAAI,YAAY,IAAI,YAAY,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;oBACjE,GAAG,GAAG,YAAY,CAAC,SAAS,EAAE,CAAC;gBACjC,CAAC;YACH,CAAC;YAGD,OAAO,IAAI,qBAAS,CAAC;gBACnB,GAAG;gBACH,MAAM;gBACN,IAAI,EAAE,IAAI,IAAI,MAAM;gBACpB,UAAU;gBACV,OAAO;gBACP,cAAc;aACf,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,GAAG,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;YACtE,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;YAE5D,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACvB,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;YAC/D,CAAC;YAGD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAOO,kBAAkB,CACxB,UAAsB,EACtB,mBAAwC;QAExC,MAAM,QAAQ,GAAG,8BAA8B,CAAC;QAChD,MAAM,OAAO,GAAG,UAAU,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;QAErD,IAAI,OAAO,EAAE,CAAC;YAEZ,OAAO,yCAAmB,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC9C,CAAC;QAID,OAAO,mBAAmB,CAAC;IAC7B,CAAC;IAKO,eAAe,CAAC,UAAsB;QAC5C,MAAM,OAAO,GAAG,UAAU,CAAC,eAAe,CAAC,kBAAU,CAAC,UAAU,CAAC,CAAC;QAClE,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO,EAAE,CAAC;QACZ,CAAC;QAGD,MAAM,UAAU,GAAG,CAAC,GAAW,EAAE,GAAW,EAAsB,EAAE;YAClE,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,IAAI,GAAG,kBAAkB,GAAG,GAAG,CAAC,CAAC,CAAC;YACrE,OAAO,KAAK,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,uBAAU,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAC1E,CAAC,CAAC;QAEF,MAAM,UAAU,GAAuB;YACrC,KAAK,EAAE,UAAU,CAAC,OAAO,EAAE,UAAU,CAAC;YACtC,OAAO,EAAE,UAAU,CAAC,OAAO,EAAE,YAAY,CAAC;YAC1C,OAAO,EAAE,UAAU,CAAC,OAAO,EAAE,YAAY,CAAC;YAC1C,QAAQ,EAAE,UAAU,CAAC,OAAO,EAAE,aAAa,CAAC;YAC5C,WAAW,EAAE,UAAU,CAAC,OAAO,EAAE,gBAAgB,CAAC;YAClD,cAAc,EAAE,UAAU,CAAC,OAAO,EAAE,mBAAmB,CAAC;SACzD,CAAC;QAGF,MAAM,WAAW,GAAG,UAAU,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;QACvD,IAAI,WAAW,EAAE,CAAC;YAChB,UAAU,CAAC,QAAQ,GAAG,QAAQ,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;QAClD,CAAC;QAGD,MAAM,UAAU,GAAG,UAAU,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAC;QAC1D,IAAI,UAAU,EAAE,CAAC;YACf,UAAU,CAAC,OAAO,GAAG,IAAI,IAAI,CAAC,UAAU,CAAC,CAAC;QAC5C,CAAC;QAED,MAAM,WAAW,GAAG,UAAU,CAAC,OAAO,EAAE,kBAAkB,CAAC,CAAC;QAC5D,IAAI,WAAW,EAAE,CAAC;YAChB,UAAU,CAAC,QAAQ,GAAG,IAAI,IAAI,CAAC,WAAW,CAAC,CAAC;QAC9C,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;CACF;AArlBD,wCAqlBC"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { Paragraph } from '../elements/Paragraph';
|
|
2
|
+
import { Table } from '../elements/Table';
|
|
3
|
+
import { TableOfContentsElement } from '../elements/TableOfContentsElement';
|
|
4
|
+
import { ImageManager } from '../elements/ImageManager';
|
|
5
|
+
import { DocumentProperties } from './Document';
|
|
6
|
+
export interface MemoryOptions {
|
|
7
|
+
maxMemoryUsagePercent?: number;
|
|
8
|
+
maxRssMB?: number;
|
|
9
|
+
useAbsoluteLimit?: boolean;
|
|
10
|
+
}
|
|
11
|
+
type BodyElement = Paragraph | Table | TableOfContentsElement;
|
|
12
|
+
export interface SizeEstimate {
|
|
13
|
+
paragraphs: number;
|
|
14
|
+
tables: number;
|
|
15
|
+
images: number;
|
|
16
|
+
estimatedXmlBytes: number;
|
|
17
|
+
imageBytes: number;
|
|
18
|
+
totalEstimatedBytes: number;
|
|
19
|
+
totalEstimatedMB: number;
|
|
20
|
+
warning?: string;
|
|
21
|
+
}
|
|
22
|
+
export declare class DocumentValidator {
|
|
23
|
+
private maxMemoryUsagePercent;
|
|
24
|
+
private maxRssMB;
|
|
25
|
+
private useAbsoluteLimit;
|
|
26
|
+
constructor(maxMemoryUsagePercent?: number, options?: MemoryOptions);
|
|
27
|
+
static validateProperties(properties: DocumentProperties): DocumentProperties;
|
|
28
|
+
validateBeforeSave(bodyElements: BodyElement[]): void;
|
|
29
|
+
checkMemoryThreshold(): void;
|
|
30
|
+
estimateSize(bodyElements: BodyElement[], imageManager: ImageManager): SizeEstimate;
|
|
31
|
+
getSizeStats(bodyElements: BodyElement[], imageManager: ImageManager): {
|
|
32
|
+
elements: {
|
|
33
|
+
paragraphs: number;
|
|
34
|
+
tables: number;
|
|
35
|
+
images: number;
|
|
36
|
+
};
|
|
37
|
+
size: {
|
|
38
|
+
xml: string;
|
|
39
|
+
images: string;
|
|
40
|
+
total: string;
|
|
41
|
+
};
|
|
42
|
+
warnings: string[];
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
export {};
|
|
46
|
+
//# sourceMappingURL=DocumentValidator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DocumentValidator.d.ts","sourceRoot":"","sources":["../../src/core/DocumentValidator.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAC1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAC5E,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAKhD,MAAM,WAAW,aAAa;IAE5B,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAE/B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAKD,KAAK,WAAW,GAAG,SAAS,GAAG,KAAK,GAAG,sBAAsB,CAAC;AAK9D,MAAM,WAAW,YAAY;IAC3B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,MAAM,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,gBAAgB,EAAE,MAAM,CAAC;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAKD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,qBAAqB,CAAS;IACtC,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,gBAAgB,CAAU;gBAEtB,qBAAqB,GAAE,MAAW,EAAE,OAAO,GAAE,aAAkB;IAqB3E,MAAM,CAAC,kBAAkB,CAAC,UAAU,EAAE,kBAAkB,GAAG,kBAAkB;IA2H7E,kBAAkB,CAAC,YAAY,EAAE,WAAW,EAAE,GAAG,IAAI;IAgDrD,oBAAoB,IAAI,IAAI;IAgD5B,YAAY,CAAC,YAAY,EAAE,WAAW,EAAE,EAAE,YAAY,EAAE,YAAY,GAAG,YAAY;IAqDnF,YAAY,CAAC,YAAY,EAAE,WAAW,EAAE,EAAE,YAAY,EAAE,YAAY,GAAG;QACrE,QAAQ,EAAE;YAAE,UAAU,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAA;SAAE,CAAC;QACjE,IAAI,EAAE;YAAE,GAAG,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAA;SAAE,CAAC;QACrD,QAAQ,EAAE,MAAM,EAAE,CAAC;KACpB;CA6BF"}
|