docxmlater 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -0
- package/dist/core/Document.d.ts +9 -21
- package/dist/core/Document.d.ts.map +1 -1
- package/dist/core/Document.js +65 -590
- package/dist/core/Document.js.map +1 -1
- package/dist/core/DocumentGenerator.d.ts +22 -0
- package/dist/core/DocumentGenerator.d.ts.map +1 -0
- package/dist/core/DocumentGenerator.js +161 -0
- package/dist/core/DocumentGenerator.js.map +1 -0
- package/dist/core/DocumentParser.d.ts +32 -0
- package/dist/core/DocumentParser.d.ts.map +1 -0
- package/dist/core/DocumentParser.js +404 -0
- package/dist/core/DocumentParser.js.map +1 -0
- package/dist/core/DocumentValidator.d.ts +46 -0
- package/dist/core/DocumentValidator.d.ts.map +1 -0
- package/dist/core/DocumentValidator.js +223 -0
- package/dist/core/DocumentValidator.js.map +1 -0
- package/dist/core/RelationshipManager.d.ts.map +1 -1
- package/dist/core/RelationshipManager.js +19 -3
- package/dist/core/RelationshipManager.js.map +1 -1
- package/dist/elements/Hyperlink.d.ts +1 -0
- package/dist/elements/Hyperlink.d.ts.map +1 -1
- package/dist/elements/Hyperlink.js +27 -2
- package/dist/elements/Hyperlink.js.map +1 -1
- package/dist/elements/Image.d.ts +5 -1
- package/dist/elements/Image.d.ts.map +1 -1
- package/dist/elements/Image.js +118 -12
- package/dist/elements/Image.js.map +1 -1
- package/dist/elements/ImageManager.d.ts +11 -3
- package/dist/elements/ImageManager.d.ts.map +1 -1
- package/dist/elements/ImageManager.js +72 -6
- package/dist/elements/ImageManager.js.map +1 -1
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -1
- package/dist/index.js.map +1 -1
- package/dist/utils/validation.d.ts.map +1 -1
- package/dist/utils/validation.js +10 -1
- package/dist/utils/validation.js.map +1 -1
- package/package.json +1 -2
package/dist/core/Document.js
CHANGED
|
@@ -41,7 +41,6 @@ const Table_1 = require("../elements/Table");
|
|
|
41
41
|
const Section_1 = require("../elements/Section");
|
|
42
42
|
const ImageManager_1 = require("../elements/ImageManager");
|
|
43
43
|
const HeaderFooterManager_1 = require("../elements/HeaderFooterManager");
|
|
44
|
-
const Hyperlink_1 = require("../elements/Hyperlink");
|
|
45
44
|
const TableOfContents_1 = require("../elements/TableOfContents");
|
|
46
45
|
const TableOfContentsElement_1 = require("../elements/TableOfContentsElement");
|
|
47
46
|
const BookmarkManager_1 = require("../elements/BookmarkManager");
|
|
@@ -49,17 +48,20 @@ const Revision_1 = require("../elements/Revision");
|
|
|
49
48
|
const RevisionManager_1 = require("../elements/RevisionManager");
|
|
50
49
|
const CommentManager_1 = require("../elements/CommentManager");
|
|
51
50
|
const Run_1 = require("../elements/Run");
|
|
52
|
-
const
|
|
53
|
-
const XMLParser_1 = require("../xml/XMLParser");
|
|
51
|
+
const Hyperlink_1 = require("../elements/Hyperlink");
|
|
54
52
|
const StylesManager_1 = require("../formatting/StylesManager");
|
|
55
53
|
const NumberingManager_1 = require("../formatting/NumberingManager");
|
|
56
54
|
const RelationshipManager_1 = require("./RelationshipManager");
|
|
57
|
-
|
|
55
|
+
const DocumentParser_1 = require("./DocumentParser");
|
|
56
|
+
const DocumentGenerator_1 = require("./DocumentGenerator");
|
|
57
|
+
const DocumentValidator_1 = require("./DocumentValidator");
|
|
58
|
+
class ImageRun extends Run_1.Run {
|
|
58
59
|
constructor(image) {
|
|
59
|
-
|
|
60
|
+
super('');
|
|
61
|
+
this.imageElement = image;
|
|
60
62
|
}
|
|
61
63
|
toXML() {
|
|
62
|
-
const drawing = this.
|
|
64
|
+
const drawing = this.imageElement.toXML();
|
|
63
65
|
return {
|
|
64
66
|
name: 'w:r',
|
|
65
67
|
children: [drawing]
|
|
@@ -69,15 +71,25 @@ class ImageRun {
|
|
|
69
71
|
class Document {
|
|
70
72
|
constructor(zipHandler, options = {}, initDefaults = true) {
|
|
71
73
|
this.bodyElements = [];
|
|
72
|
-
this.parseErrors = [];
|
|
73
74
|
this.zipHandler = zipHandler || new ZipHandler_1.ZipHandler();
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
this.
|
|
75
|
+
const strictParsing = options.strictParsing ?? false;
|
|
76
|
+
const memoryPercent = options.maxMemoryUsagePercent ?? 80;
|
|
77
|
+
this.parser = new DocumentParser_1.DocumentParser(strictParsing);
|
|
78
|
+
this.generator = new DocumentGenerator_1.DocumentGenerator();
|
|
79
|
+
this.validator = new DocumentValidator_1.DocumentValidator(memoryPercent, {
|
|
80
|
+
maxMemoryUsagePercent: options.maxMemoryUsagePercent,
|
|
81
|
+
maxRssMB: options.maxRssMB,
|
|
82
|
+
useAbsoluteLimit: options.useAbsoluteMemoryLimit,
|
|
83
|
+
});
|
|
84
|
+
this.properties = options.properties ? DocumentValidator_1.DocumentValidator.validateProperties(options.properties) : {};
|
|
77
85
|
this.stylesManager = StylesManager_1.StylesManager.create();
|
|
78
86
|
this.numberingManager = NumberingManager_1.NumberingManager.create();
|
|
79
87
|
this.section = Section_1.Section.createLetter();
|
|
80
|
-
this.imageManager = ImageManager_1.ImageManager.create(
|
|
88
|
+
this.imageManager = ImageManager_1.ImageManager.create({
|
|
89
|
+
maxImageCount: options.maxImageCount,
|
|
90
|
+
maxTotalImageSizeMB: options.maxTotalImageSizeMB,
|
|
91
|
+
maxSingleImageSizeMB: options.maxSingleImageSizeMB,
|
|
92
|
+
});
|
|
81
93
|
this.relationshipManager = RelationshipManager_1.RelationshipManager.create();
|
|
82
94
|
this.headerFooterManager = HeaderFooterManager_1.HeaderFooterManager.create();
|
|
83
95
|
this.bookmarkManager = BookmarkManager_1.BookmarkManager.create();
|
|
@@ -108,346 +120,20 @@ class Document {
|
|
|
108
120
|
return doc;
|
|
109
121
|
}
|
|
110
122
|
initializeRequiredFiles() {
|
|
111
|
-
this.zipHandler.addFile(types_1.DOCX_PATHS.CONTENT_TYPES, this.generateContentTypes());
|
|
112
|
-
this.zipHandler.addFile(types_1.DOCX_PATHS.RELS, this.generateRels());
|
|
113
|
-
this.zipHandler.addFile(types_1.DOCX_PATHS.DOCUMENT, this.generateDocumentXml());
|
|
123
|
+
this.zipHandler.addFile(types_1.DOCX_PATHS.CONTENT_TYPES, this.generator.generateContentTypes());
|
|
124
|
+
this.zipHandler.addFile(types_1.DOCX_PATHS.RELS, this.generator.generateRels());
|
|
125
|
+
this.zipHandler.addFile(types_1.DOCX_PATHS.DOCUMENT, this.generator.generateDocumentXml(this.bodyElements, this.section));
|
|
114
126
|
this.zipHandler.addFile('word/_rels/document.xml.rels', this.relationshipManager.generateXml());
|
|
115
127
|
this.zipHandler.addFile(types_1.DOCX_PATHS.STYLES, this.stylesManager.generateStylesXml());
|
|
116
128
|
this.zipHandler.addFile(types_1.DOCX_PATHS.NUMBERING, this.numberingManager.generateNumberingXml());
|
|
117
|
-
this.zipHandler.addFile(types_1.DOCX_PATHS.CORE_PROPS, this.generateCoreProps());
|
|
118
|
-
this.zipHandler.addFile(types_1.DOCX_PATHS.APP_PROPS, this.generateAppProps());
|
|
129
|
+
this.zipHandler.addFile(types_1.DOCX_PATHS.CORE_PROPS, this.generator.generateCoreProps(this.properties));
|
|
130
|
+
this.zipHandler.addFile(types_1.DOCX_PATHS.APP_PROPS, this.generator.generateAppProps());
|
|
119
131
|
}
|
|
120
132
|
async parseDocument() {
|
|
121
|
-
const
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
this.parseRelationships();
|
|
126
|
-
this.parseProperties();
|
|
127
|
-
this.parseBodyElements(docXml);
|
|
128
|
-
}
|
|
129
|
-
parseBodyElements(docXml) {
|
|
130
|
-
this.bodyElements = [];
|
|
131
|
-
try {
|
|
132
|
-
XMLParser_1.XMLParser.validateSize(docXml);
|
|
133
|
-
}
|
|
134
|
-
catch (error) {
|
|
135
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
136
|
-
this.parseErrors.push({ element: 'document', error: err });
|
|
137
|
-
if (this.strictParsing) {
|
|
138
|
-
throw err;
|
|
139
|
-
}
|
|
140
|
-
return;
|
|
141
|
-
}
|
|
142
|
-
const bodyContent = XMLParser_1.XMLParser.extractBody(docXml);
|
|
143
|
-
if (!bodyContent) {
|
|
144
|
-
return;
|
|
145
|
-
}
|
|
146
|
-
const paragraphXmls = XMLParser_1.XMLParser.extractElements(bodyContent, 'w:p');
|
|
147
|
-
for (const paraXml of paragraphXmls) {
|
|
148
|
-
const paragraph = this.parseParagraph(paraXml);
|
|
149
|
-
if (paragraph) {
|
|
150
|
-
this.bodyElements.push(paragraph);
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
const hasTable = bodyContent.includes('<w:tbl');
|
|
154
|
-
if (hasTable) {
|
|
155
|
-
const err = new Error('Document contains tables which are not yet fully supported in Phase 2. Tables will be ignored.');
|
|
156
|
-
this.parseErrors.push({ element: 'table', error: err });
|
|
157
|
-
if (this.strictParsing) {
|
|
158
|
-
throw err;
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
this.validateLoadedContent();
|
|
162
|
-
}
|
|
163
|
-
validateLoadedContent() {
|
|
164
|
-
const paragraphs = this.bodyElements.filter((el) => el instanceof Paragraph_1.Paragraph);
|
|
165
|
-
if (paragraphs.length === 0) {
|
|
166
|
-
return;
|
|
167
|
-
}
|
|
168
|
-
let totalRuns = 0;
|
|
169
|
-
let emptyRuns = 0;
|
|
170
|
-
let runsWithText = 0;
|
|
171
|
-
for (const para of paragraphs) {
|
|
172
|
-
const runs = para.getRuns();
|
|
173
|
-
totalRuns += runs.length;
|
|
174
|
-
for (const run of runs) {
|
|
175
|
-
const text = run.getText();
|
|
176
|
-
if (text.length === 0) {
|
|
177
|
-
emptyRuns++;
|
|
178
|
-
}
|
|
179
|
-
else {
|
|
180
|
-
runsWithText++;
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
if (totalRuns > 0) {
|
|
185
|
-
const emptyPercentage = (emptyRuns / totalRuns) * 100;
|
|
186
|
-
if (emptyPercentage > 90 && emptyRuns > 10) {
|
|
187
|
-
const warning = new Error(`WARNING: Document appears to be corrupted or empty. ` +
|
|
188
|
-
`${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) have no text content. ` +
|
|
189
|
-
`This may indicate:\n` +
|
|
190
|
-
` - The document was already corrupted before loading\n` +
|
|
191
|
-
` - Text content was stripped by another application\n` +
|
|
192
|
-
` - Encoding issues during document creation\n` +
|
|
193
|
-
`Original document structure is preserved, but text may be lost.`);
|
|
194
|
-
this.parseErrors.push({ element: 'document-validation', error: warning });
|
|
195
|
-
console.warn(`\nDocXML Load Warning:\n${warning.message}\n`);
|
|
196
|
-
}
|
|
197
|
-
else if (emptyPercentage > 50 && emptyRuns > 5) {
|
|
198
|
-
const warning = new Error(`Document has ${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) with no text. ` +
|
|
199
|
-
`This is higher than normal and may indicate partial data loss.`);
|
|
200
|
-
this.parseErrors.push({ element: 'document-validation', error: warning });
|
|
201
|
-
console.warn(`\nDocXML Load Warning:\n${warning.message}\n`);
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
parseParagraph(paraXml) {
|
|
206
|
-
try {
|
|
207
|
-
const paragraph = new Paragraph_1.Paragraph();
|
|
208
|
-
this.parseParagraphProperties(paraXml, paragraph);
|
|
209
|
-
const hyperlinkXmls = XMLParser_1.XMLParser.extractElements(paraXml, 'w:hyperlink');
|
|
210
|
-
for (const hyperlinkXml of hyperlinkXmls) {
|
|
211
|
-
const hyperlink = this.parseHyperlink(hyperlinkXml);
|
|
212
|
-
if (hyperlink) {
|
|
213
|
-
paragraph.addHyperlink(hyperlink);
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
let paraXmlWithoutHyperlinks = paraXml;
|
|
217
|
-
for (const hyperlinkXml of hyperlinkXmls) {
|
|
218
|
-
paraXmlWithoutHyperlinks = paraXmlWithoutHyperlinks.replace(hyperlinkXml, '');
|
|
219
|
-
}
|
|
220
|
-
const runXmls = XMLParser_1.XMLParser.extractElements(paraXmlWithoutHyperlinks, 'w:r');
|
|
221
|
-
for (const runXml of runXmls) {
|
|
222
|
-
const run = this.parseRun(runXml);
|
|
223
|
-
if (run) {
|
|
224
|
-
paragraph.addRun(run);
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
return paragraph;
|
|
228
|
-
}
|
|
229
|
-
catch (error) {
|
|
230
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
231
|
-
this.parseErrors.push({ element: 'paragraph', error: err });
|
|
232
|
-
if (this.strictParsing) {
|
|
233
|
-
throw new Error(`Failed to parse paragraph: ${err.message}`);
|
|
234
|
-
}
|
|
235
|
-
return null;
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
parseParagraphProperties(paraXml, paragraph) {
|
|
239
|
-
const pPrMatch = paraXml.match(/<w:pPr[^>]*>([\s\S]*?)<\/w:pPr>/);
|
|
240
|
-
if (!pPrMatch || !pPrMatch[1]) {
|
|
241
|
-
return;
|
|
242
|
-
}
|
|
243
|
-
const pPr = pPrMatch[1];
|
|
244
|
-
const alignMatch = pPr.match(/<w:jc\s+w:val="([^"]+)"/);
|
|
245
|
-
if (alignMatch && alignMatch[1]) {
|
|
246
|
-
const alignment = alignMatch[1];
|
|
247
|
-
paragraph.setAlignment(alignment);
|
|
248
|
-
}
|
|
249
|
-
const styleMatch = pPr.match(/<w:pStyle\s+w:val="([^"]+)"/);
|
|
250
|
-
if (styleMatch && styleMatch[1]) {
|
|
251
|
-
paragraph.setStyle(styleMatch[1]);
|
|
252
|
-
}
|
|
253
|
-
const indMatch = pPr.match(/<w:ind([^>]+)\/>/);
|
|
254
|
-
if (indMatch && indMatch[1]) {
|
|
255
|
-
const indStr = indMatch[1];
|
|
256
|
-
const leftMatch = indStr.match(/w:left="(\d+)"/);
|
|
257
|
-
const rightMatch = indStr.match(/w:right="(\d+)"/);
|
|
258
|
-
const firstLineMatch = indStr.match(/w:firstLine="(\d+)"/);
|
|
259
|
-
if (leftMatch && leftMatch[1]) {
|
|
260
|
-
paragraph.setLeftIndent(parseInt(leftMatch[1], 10));
|
|
261
|
-
}
|
|
262
|
-
if (rightMatch && rightMatch[1]) {
|
|
263
|
-
paragraph.setRightIndent(parseInt(rightMatch[1], 10));
|
|
264
|
-
}
|
|
265
|
-
if (firstLineMatch && firstLineMatch[1]) {
|
|
266
|
-
paragraph.setFirstLineIndent(parseInt(firstLineMatch[1], 10));
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
const spacingMatch = pPr.match(/<w:spacing([^>]+)\/>/);
|
|
270
|
-
if (spacingMatch && spacingMatch[1]) {
|
|
271
|
-
const spacingStr = spacingMatch[1];
|
|
272
|
-
const beforeMatch = spacingStr.match(/w:before="(\d+)"/);
|
|
273
|
-
const afterMatch = spacingStr.match(/w:after="(\d+)"/);
|
|
274
|
-
const lineMatch = spacingStr.match(/w:line="(\d+)"/);
|
|
275
|
-
if (beforeMatch && beforeMatch[1]) {
|
|
276
|
-
paragraph.setSpaceBefore(parseInt(beforeMatch[1], 10));
|
|
277
|
-
}
|
|
278
|
-
if (afterMatch && afterMatch[1]) {
|
|
279
|
-
paragraph.setSpaceAfter(parseInt(afterMatch[1], 10));
|
|
280
|
-
}
|
|
281
|
-
if (lineMatch && lineMatch[1]) {
|
|
282
|
-
const lineRule = spacingStr.match(/w:lineRule="([^"]+)"/);
|
|
283
|
-
paragraph.setLineSpacing(parseInt(lineMatch[1], 10), lineRule && lineRule[1] ? lineRule[1] : undefined);
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
if (pPr.includes('<w:keepNext'))
|
|
287
|
-
paragraph.setKeepNext(true);
|
|
288
|
-
if (pPr.includes('<w:keepLines'))
|
|
289
|
-
paragraph.setKeepLines(true);
|
|
290
|
-
if (pPr.includes('<w:pageBreakBefore'))
|
|
291
|
-
paragraph.setPageBreakBefore(true);
|
|
292
|
-
}
|
|
293
|
-
parseRun(runXml) {
|
|
294
|
-
try {
|
|
295
|
-
const text = XMLBuilder_1.XMLBuilder.unescapeXml(XMLParser_1.XMLParser.extractText(runXml));
|
|
296
|
-
const run = new Run_1.Run(text);
|
|
297
|
-
this.parseRunProperties(runXml, run);
|
|
298
|
-
return run;
|
|
299
|
-
}
|
|
300
|
-
catch (error) {
|
|
301
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
302
|
-
this.parseErrors.push({ element: 'run', error: err });
|
|
303
|
-
if (this.strictParsing) {
|
|
304
|
-
throw new Error(`Failed to parse run: ${err.message}`);
|
|
305
|
-
}
|
|
306
|
-
return null;
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
parseRunProperties(runXml, run) {
|
|
310
|
-
const rPrMatch = runXml.match(/<w:rPr[^>]*>([\s\S]*?)<\/w:rPr>/);
|
|
311
|
-
if (!rPrMatch || !rPrMatch[1]) {
|
|
312
|
-
return;
|
|
313
|
-
}
|
|
314
|
-
const rPr = rPrMatch[1];
|
|
315
|
-
if (rPr.includes('<w:b/>') || rPr.includes('<w:b ')) {
|
|
316
|
-
run.setBold(true);
|
|
317
|
-
}
|
|
318
|
-
if (rPr.includes('<w:i/>') || rPr.includes('<w:i ')) {
|
|
319
|
-
run.setItalic(true);
|
|
320
|
-
}
|
|
321
|
-
const underlineMatch = rPr.match(/<w:u\s+w:val="([^"]+)"/);
|
|
322
|
-
if (underlineMatch && underlineMatch[1]) {
|
|
323
|
-
const underlineStyle = underlineMatch[1];
|
|
324
|
-
run.setUnderline(underlineStyle);
|
|
325
|
-
}
|
|
326
|
-
else if (rPr.includes('<w:u/>')) {
|
|
327
|
-
run.setUnderline(true);
|
|
328
|
-
}
|
|
329
|
-
if (rPr.includes('<w:strike/>') || rPr.includes('<w:strike ')) {
|
|
330
|
-
run.setStrike(true);
|
|
331
|
-
}
|
|
332
|
-
const vertAlignMatch = rPr.match(/<w:vertAlign\s+w:val="([^"]+)"/);
|
|
333
|
-
if (vertAlignMatch && vertAlignMatch[1]) {
|
|
334
|
-
if (vertAlignMatch[1] === 'subscript') {
|
|
335
|
-
run.setSubscript(true);
|
|
336
|
-
}
|
|
337
|
-
else if (vertAlignMatch[1] === 'superscript') {
|
|
338
|
-
run.setSuperscript(true);
|
|
339
|
-
}
|
|
340
|
-
}
|
|
341
|
-
const fontMatch = rPr.match(/<w:rFonts[^>]+w:ascii="([^"]+)"/);
|
|
342
|
-
if (fontMatch && fontMatch[1]) {
|
|
343
|
-
run.setFont(fontMatch[1]);
|
|
344
|
-
}
|
|
345
|
-
const sizeMatch = rPr.match(/<w:sz\s+w:val="(\d+)"/);
|
|
346
|
-
if (sizeMatch && sizeMatch[1]) {
|
|
347
|
-
const halfPoints = parseInt(sizeMatch[1], 10);
|
|
348
|
-
run.setSize(halfPoints / 2);
|
|
349
|
-
}
|
|
350
|
-
const colorMatch = rPr.match(/<w:color\s+w:val="([^"]+)"/);
|
|
351
|
-
if (colorMatch && colorMatch[1]) {
|
|
352
|
-
run.setColor(colorMatch[1]);
|
|
353
|
-
}
|
|
354
|
-
const highlightMatch = rPr.match(/<w:highlight\s+w:val="([^"]+)"/);
|
|
355
|
-
if (highlightMatch && highlightMatch[1]) {
|
|
356
|
-
const highlightColor = highlightMatch[1];
|
|
357
|
-
run.setHighlight(highlightColor);
|
|
358
|
-
}
|
|
359
|
-
if (rPr.includes('<w:smallCaps/>') || rPr.includes('<w:smallCaps ')) {
|
|
360
|
-
run.setSmallCaps(true);
|
|
361
|
-
}
|
|
362
|
-
if (rPr.includes('<w:caps/>') || rPr.includes('<w:caps ')) {
|
|
363
|
-
run.setAllCaps(true);
|
|
364
|
-
}
|
|
365
|
-
}
|
|
366
|
-
parseHyperlink(hyperlinkXml) {
|
|
367
|
-
try {
|
|
368
|
-
const relationshipId = XMLParser_1.XMLParser.extractAttribute(hyperlinkXml, 'r:id');
|
|
369
|
-
const anchor = XMLParser_1.XMLParser.extractAttribute(hyperlinkXml, 'w:anchor');
|
|
370
|
-
const tooltip = XMLParser_1.XMLParser.extractAttribute(hyperlinkXml, 'w:tooltip');
|
|
371
|
-
if (!relationshipId && !anchor) {
|
|
372
|
-
return null;
|
|
373
|
-
}
|
|
374
|
-
const runXmls = XMLParser_1.XMLParser.extractElements(hyperlinkXml, 'w:r');
|
|
375
|
-
let text = '';
|
|
376
|
-
let formatting;
|
|
377
|
-
for (const runXml of runXmls) {
|
|
378
|
-
text += XMLBuilder_1.XMLBuilder.unescapeXml(XMLParser_1.XMLParser.extractText(runXml));
|
|
379
|
-
if (!formatting) {
|
|
380
|
-
const run = this.parseRun(runXml);
|
|
381
|
-
if (run) {
|
|
382
|
-
formatting = run.getFormatting();
|
|
383
|
-
}
|
|
384
|
-
}
|
|
385
|
-
}
|
|
386
|
-
let url;
|
|
387
|
-
if (relationshipId) {
|
|
388
|
-
const relationship = this.relationshipManager.getRelationship(relationshipId);
|
|
389
|
-
if (relationship && relationship.getType().includes('hyperlink')) {
|
|
390
|
-
url = relationship.getTarget();
|
|
391
|
-
}
|
|
392
|
-
}
|
|
393
|
-
return new Hyperlink_1.Hyperlink({
|
|
394
|
-
url,
|
|
395
|
-
anchor,
|
|
396
|
-
text: text || 'Link',
|
|
397
|
-
formatting,
|
|
398
|
-
tooltip,
|
|
399
|
-
relationshipId,
|
|
400
|
-
});
|
|
401
|
-
}
|
|
402
|
-
catch (error) {
|
|
403
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
404
|
-
this.parseErrors.push({ element: 'hyperlink', error: err });
|
|
405
|
-
if (this.strictParsing) {
|
|
406
|
-
throw new Error(`Failed to parse hyperlink: ${err.message}`);
|
|
407
|
-
}
|
|
408
|
-
return null;
|
|
409
|
-
}
|
|
410
|
-
}
|
|
411
|
-
parseRelationships() {
|
|
412
|
-
const relsPath = 'word/_rels/document.xml.rels';
|
|
413
|
-
const relsXml = this.zipHandler.getFileAsString(relsPath);
|
|
414
|
-
if (relsXml) {
|
|
415
|
-
this.relationshipManager = RelationshipManager_1.RelationshipManager.fromXml(relsXml);
|
|
416
|
-
}
|
|
417
|
-
else {
|
|
418
|
-
this.relationshipManager.addStyles();
|
|
419
|
-
this.relationshipManager.addNumbering();
|
|
420
|
-
}
|
|
421
|
-
}
|
|
422
|
-
parseProperties() {
|
|
423
|
-
const coreXml = this.zipHandler.getFileAsString(types_1.DOCX_PATHS.CORE_PROPS);
|
|
424
|
-
if (!coreXml) {
|
|
425
|
-
return;
|
|
426
|
-
}
|
|
427
|
-
const extractTag = (xml, tag) => {
|
|
428
|
-
const match = xml.match(new RegExp(`<${tag}[^>]*>([^<]*)</${tag}>`));
|
|
429
|
-
return match && match[1] ? XMLBuilder_1.XMLBuilder.unescapeXml(match[1]) : undefined;
|
|
430
|
-
};
|
|
431
|
-
this.properties = {
|
|
432
|
-
title: extractTag(coreXml, 'dc:title'),
|
|
433
|
-
subject: extractTag(coreXml, 'dc:subject'),
|
|
434
|
-
creator: extractTag(coreXml, 'dc:creator'),
|
|
435
|
-
keywords: extractTag(coreXml, 'cp:keywords'),
|
|
436
|
-
description: extractTag(coreXml, 'dc:description'),
|
|
437
|
-
lastModifiedBy: extractTag(coreXml, 'cp:lastModifiedBy'),
|
|
438
|
-
};
|
|
439
|
-
const revisionStr = extractTag(coreXml, 'cp:revision');
|
|
440
|
-
if (revisionStr) {
|
|
441
|
-
this.properties.revision = parseInt(revisionStr, 10);
|
|
442
|
-
}
|
|
443
|
-
const createdStr = extractTag(coreXml, 'dcterms:created');
|
|
444
|
-
if (createdStr) {
|
|
445
|
-
this.properties.created = new Date(createdStr);
|
|
446
|
-
}
|
|
447
|
-
const modifiedStr = extractTag(coreXml, 'dcterms:modified');
|
|
448
|
-
if (modifiedStr) {
|
|
449
|
-
this.properties.modified = new Date(modifiedStr);
|
|
450
|
-
}
|
|
133
|
+
const result = await this.parser.parseDocument(this.zipHandler, this.relationshipManager);
|
|
134
|
+
this.bodyElements = result.bodyElements;
|
|
135
|
+
this.properties = result.properties;
|
|
136
|
+
this.relationshipManager = result.relationshipManager;
|
|
451
137
|
}
|
|
452
138
|
addParagraph(paragraph) {
|
|
453
139
|
this.bodyElements.push(paragraph);
|
|
@@ -504,48 +190,21 @@ class Document {
|
|
|
504
190
|
return this;
|
|
505
191
|
}
|
|
506
192
|
setProperties(properties) {
|
|
507
|
-
|
|
193
|
+
const validated = DocumentValidator_1.DocumentValidator.validateProperties(properties);
|
|
194
|
+
this.properties = { ...this.properties, ...validated };
|
|
508
195
|
return this;
|
|
509
196
|
}
|
|
510
197
|
getProperties() {
|
|
511
198
|
return { ...this.properties };
|
|
512
199
|
}
|
|
513
|
-
validateBeforeSave() {
|
|
514
|
-
const paragraphs = this.getParagraphs();
|
|
515
|
-
if (paragraphs.length === 0) {
|
|
516
|
-
console.warn('\nDocXML Save Warning:\n' +
|
|
517
|
-
'Document has no paragraphs. You are saving an empty document.\n');
|
|
518
|
-
return;
|
|
519
|
-
}
|
|
520
|
-
let totalRuns = 0;
|
|
521
|
-
let emptyRuns = 0;
|
|
522
|
-
for (const para of paragraphs) {
|
|
523
|
-
const runs = para.getRuns();
|
|
524
|
-
totalRuns += runs.length;
|
|
525
|
-
for (const run of runs) {
|
|
526
|
-
if (run.getText().length === 0) {
|
|
527
|
-
emptyRuns++;
|
|
528
|
-
}
|
|
529
|
-
}
|
|
530
|
-
}
|
|
531
|
-
if (totalRuns > 0) {
|
|
532
|
-
const emptyPercentage = (emptyRuns / totalRuns) * 100;
|
|
533
|
-
if (emptyPercentage > 90 && emptyRuns > 10) {
|
|
534
|
-
console.warn('\nDocXML Save Warning:\n' +
|
|
535
|
-
`You are about to save a document where ${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) are empty.\n` +
|
|
536
|
-
'This may result in a document with no visible text content.\n' +
|
|
537
|
-
'If this is unintentional, please review the document before saving.\n');
|
|
538
|
-
}
|
|
539
|
-
}
|
|
540
|
-
}
|
|
541
200
|
async save(filePath) {
|
|
542
201
|
const tempPath = `${filePath}.tmp.${Date.now()}`;
|
|
543
202
|
try {
|
|
544
|
-
this.validateBeforeSave();
|
|
545
|
-
this.checkMemoryThreshold();
|
|
203
|
+
this.validator.validateBeforeSave(this.bodyElements);
|
|
204
|
+
this.validator.checkMemoryThreshold();
|
|
546
205
|
await this.imageManager.loadAllImageData();
|
|
547
|
-
this.checkMemoryThreshold();
|
|
548
|
-
const sizeInfo = this.estimateSize();
|
|
206
|
+
this.validator.checkMemoryThreshold();
|
|
207
|
+
const sizeInfo = this.validator.estimateSize(this.bodyElements, this.imageManager);
|
|
549
208
|
if (sizeInfo.warning) {
|
|
550
209
|
console.warn(`DocXML Warning: ${sizeInfo.warning}`);
|
|
551
210
|
}
|
|
@@ -579,11 +238,11 @@ class Document {
|
|
|
579
238
|
}
|
|
580
239
|
async toBuffer() {
|
|
581
240
|
try {
|
|
582
|
-
this.validateBeforeSave();
|
|
583
|
-
this.checkMemoryThreshold();
|
|
241
|
+
this.validator.validateBeforeSave(this.bodyElements);
|
|
242
|
+
this.validator.checkMemoryThreshold();
|
|
584
243
|
await this.imageManager.loadAllImageData();
|
|
585
|
-
this.checkMemoryThreshold();
|
|
586
|
-
const sizeInfo = this.estimateSize();
|
|
244
|
+
this.validator.checkMemoryThreshold();
|
|
245
|
+
const sizeInfo = this.validator.estimateSize(this.bodyElements, this.imageManager);
|
|
587
246
|
if (sizeInfo.warning) {
|
|
588
247
|
console.warn(`DocXML Warning: ${sizeInfo.warning}`);
|
|
589
248
|
}
|
|
@@ -605,11 +264,11 @@ class Document {
|
|
|
605
264
|
}
|
|
606
265
|
}
|
|
607
266
|
updateDocumentXml() {
|
|
608
|
-
const xml = this.generateDocumentXml();
|
|
267
|
+
const xml = this.generator.generateDocumentXml(this.bodyElements, this.section);
|
|
609
268
|
this.zipHandler.updateFile(types_1.DOCX_PATHS.DOCUMENT, xml);
|
|
610
269
|
}
|
|
611
270
|
updateCoreProps() {
|
|
612
|
-
const xml = this.generateCoreProps();
|
|
271
|
+
const xml = this.generator.generateCoreProps(this.properties);
|
|
613
272
|
this.zipHandler.updateFile(types_1.DOCX_PATHS.CORE_PROPS, xml);
|
|
614
273
|
}
|
|
615
274
|
updateStylesXml() {
|
|
@@ -620,78 +279,6 @@ class Document {
|
|
|
620
279
|
const xml = this.numberingManager.generateNumberingXml();
|
|
621
280
|
this.zipHandler.updateFile(types_1.DOCX_PATHS.NUMBERING, xml);
|
|
622
281
|
}
|
|
623
|
-
generateContentTypes() {
|
|
624
|
-
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
625
|
-
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
|
|
626
|
-
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
|
|
627
|
-
<Default Extension="xml" ContentType="application/xml"/>
|
|
628
|
-
<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
|
|
629
|
-
<Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/>
|
|
630
|
-
<Override PartName="/word/numbering.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"/>
|
|
631
|
-
<Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>
|
|
632
|
-
<Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>
|
|
633
|
-
</Types>`;
|
|
634
|
-
}
|
|
635
|
-
generateRels() {
|
|
636
|
-
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
637
|
-
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
|
638
|
-
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
|
|
639
|
-
<Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/>
|
|
640
|
-
<Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/>
|
|
641
|
-
</Relationships>`;
|
|
642
|
-
}
|
|
643
|
-
generateDocumentXml() {
|
|
644
|
-
const bodyXmls = [];
|
|
645
|
-
for (const element of this.bodyElements) {
|
|
646
|
-
const xml = element.toXML();
|
|
647
|
-
if (Array.isArray(xml)) {
|
|
648
|
-
bodyXmls.push(...xml);
|
|
649
|
-
}
|
|
650
|
-
else {
|
|
651
|
-
bodyXmls.push(xml);
|
|
652
|
-
}
|
|
653
|
-
}
|
|
654
|
-
bodyXmls.push(this.section.toXML());
|
|
655
|
-
return XMLBuilder_1.XMLBuilder.createDocument(bodyXmls);
|
|
656
|
-
}
|
|
657
|
-
generateCoreProps() {
|
|
658
|
-
const now = new Date();
|
|
659
|
-
const created = this.properties.created || now;
|
|
660
|
-
const modified = this.properties.modified || now;
|
|
661
|
-
const formatDate = (date) => {
|
|
662
|
-
return date.toISOString();
|
|
663
|
-
};
|
|
664
|
-
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
665
|
-
<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
|
|
666
|
-
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
|
667
|
-
xmlns:dcterms="http://purl.org/dc/terms/"
|
|
668
|
-
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
|
|
669
|
-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
|
670
|
-
<dc:title>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.title || '')}</dc:title>
|
|
671
|
-
<dc:subject>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.subject || '')}</dc:subject>
|
|
672
|
-
<dc:creator>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.creator || 'DocXML')}</dc:creator>
|
|
673
|
-
<cp:keywords>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.keywords || '')}</cp:keywords>
|
|
674
|
-
<dc:description>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.description || '')}</dc:description>
|
|
675
|
-
<cp:lastModifiedBy>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.lastModifiedBy || this.properties.creator || 'DocXML')}</cp:lastModifiedBy>
|
|
676
|
-
<cp:revision>${this.properties.revision || 1}</cp:revision>
|
|
677
|
-
<dcterms:created xsi:type="dcterms:W3CDTF">${formatDate(created)}</dcterms:created>
|
|
678
|
-
<dcterms:modified xsi:type="dcterms:W3CDTF">${formatDate(modified)}</dcterms:modified>
|
|
679
|
-
</cp:coreProperties>`;
|
|
680
|
-
}
|
|
681
|
-
generateAppProps() {
|
|
682
|
-
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
683
|
-
<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"
|
|
684
|
-
xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
|
|
685
|
-
<Application>DocXML</Application>
|
|
686
|
-
<DocSecurity>0</DocSecurity>
|
|
687
|
-
<ScaleCrop>false</ScaleCrop>
|
|
688
|
-
<Company></Company>
|
|
689
|
-
<LinksUpToDate>false</LinksUpToDate>
|
|
690
|
-
<SharedDoc>false</SharedDoc>
|
|
691
|
-
<HyperlinksChanged>false</HyperlinksChanged>
|
|
692
|
-
<AppVersion>0.1.0</AppVersion>
|
|
693
|
-
</Properties>`;
|
|
694
|
-
}
|
|
695
282
|
getStylesManager() {
|
|
696
283
|
return this.stylesManager;
|
|
697
284
|
}
|
|
@@ -800,38 +387,7 @@ class Document {
|
|
|
800
387
|
return this.relationshipManager;
|
|
801
388
|
}
|
|
802
389
|
processHyperlinks() {
|
|
803
|
-
|
|
804
|
-
const headers = this.headerFooterManager.getAllHeaders();
|
|
805
|
-
const footers = this.headerFooterManager.getAllFooters();
|
|
806
|
-
for (const header of headers) {
|
|
807
|
-
for (const element of header.header.getElements()) {
|
|
808
|
-
if (element instanceof Paragraph_1.Paragraph) {
|
|
809
|
-
this.processHyperlinksInParagraph(element);
|
|
810
|
-
}
|
|
811
|
-
}
|
|
812
|
-
}
|
|
813
|
-
for (const footer of footers) {
|
|
814
|
-
for (const element of footer.footer.getElements()) {
|
|
815
|
-
if (element instanceof Paragraph_1.Paragraph) {
|
|
816
|
-
this.processHyperlinksInParagraph(element);
|
|
817
|
-
}
|
|
818
|
-
}
|
|
819
|
-
}
|
|
820
|
-
for (const para of paragraphs) {
|
|
821
|
-
this.processHyperlinksInParagraph(para);
|
|
822
|
-
}
|
|
823
|
-
}
|
|
824
|
-
processHyperlinksInParagraph(paragraph) {
|
|
825
|
-
const content = paragraph.getContent();
|
|
826
|
-
for (const item of content) {
|
|
827
|
-
if (item instanceof Hyperlink_1.Hyperlink && item.isExternal() && !item.getRelationshipId()) {
|
|
828
|
-
const url = item.getUrl();
|
|
829
|
-
if (url) {
|
|
830
|
-
const relationship = this.relationshipManager.addHyperlink(url);
|
|
831
|
-
item.setRelationshipId(relationship.getId());
|
|
832
|
-
}
|
|
833
|
-
}
|
|
834
|
-
}
|
|
390
|
+
this.generator.processHyperlinks(this.bodyElements, this.headerFooterManager, this.relationshipManager);
|
|
835
391
|
}
|
|
836
392
|
saveImages() {
|
|
837
393
|
const images = this.imageManager.getAllImages();
|
|
@@ -871,43 +427,9 @@ class Document {
|
|
|
871
427
|
}
|
|
872
428
|
}
|
|
873
429
|
updateContentTypesWithImagesHeadersFootersAndComments() {
|
|
874
|
-
const contentTypes = this.generateContentTypesWithImagesHeadersFootersAndComments();
|
|
430
|
+
const contentTypes = this.generator.generateContentTypesWithImagesHeadersFootersAndComments(this.imageManager, this.headerFooterManager, this.commentManager);
|
|
875
431
|
this.zipHandler.updateFile(types_1.DOCX_PATHS.CONTENT_TYPES, contentTypes);
|
|
876
432
|
}
|
|
877
|
-
generateContentTypesWithImagesHeadersFootersAndComments() {
|
|
878
|
-
const images = this.imageManager.getAllImages();
|
|
879
|
-
const headers = this.headerFooterManager.getAllHeaders();
|
|
880
|
-
const footers = this.headerFooterManager.getAllFooters();
|
|
881
|
-
const hasComments = this.commentManager.getCount() > 0;
|
|
882
|
-
const extensions = new Set();
|
|
883
|
-
for (const entry of images) {
|
|
884
|
-
extensions.add(entry.image.getExtension());
|
|
885
|
-
}
|
|
886
|
-
let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
|
|
887
|
-
xml += '<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">\n';
|
|
888
|
-
xml += ' <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>\n';
|
|
889
|
-
xml += ' <Default Extension="xml" ContentType="application/xml"/>\n';
|
|
890
|
-
for (const ext of extensions) {
|
|
891
|
-
const mimeType = ImageManager_1.ImageManager.getMimeType(ext);
|
|
892
|
-
xml += ` <Default Extension="${ext}" ContentType="${mimeType}"/>\n`;
|
|
893
|
-
}
|
|
894
|
-
xml += ' <Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>\n';
|
|
895
|
-
xml += ' <Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/>\n';
|
|
896
|
-
xml += ' <Override PartName="/word/numbering.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"/>\n';
|
|
897
|
-
for (const entry of headers) {
|
|
898
|
-
xml += ` <Override PartName="/word/${entry.filename}" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/>\n`;
|
|
899
|
-
}
|
|
900
|
-
for (const entry of footers) {
|
|
901
|
-
xml += ` <Override PartName="/word/${entry.filename}" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/>\n`;
|
|
902
|
-
}
|
|
903
|
-
if (hasComments) {
|
|
904
|
-
xml += ' <Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>\n';
|
|
905
|
-
}
|
|
906
|
-
xml += ' <Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>\n';
|
|
907
|
-
xml += ' <Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>\n';
|
|
908
|
-
xml += '</Types>';
|
|
909
|
-
return xml;
|
|
910
|
-
}
|
|
911
433
|
getBookmarkManager() {
|
|
912
434
|
return this.bookmarkManager;
|
|
913
435
|
}
|
|
@@ -1020,53 +542,30 @@ class Document {
|
|
|
1020
542
|
return this.revisionManager.getAllDeletions();
|
|
1021
543
|
}
|
|
1022
544
|
getParseWarnings() {
|
|
1023
|
-
return
|
|
1024
|
-
}
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
const
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
545
|
+
return this.parser.getParseErrors();
|
|
546
|
+
}
|
|
547
|
+
updateHyperlinkUrls(urlMap) {
|
|
548
|
+
let updatedCount = 0;
|
|
549
|
+
for (const para of this.getParagraphs()) {
|
|
550
|
+
for (const content of para.getContent()) {
|
|
551
|
+
if (content instanceof Hyperlink_1.Hyperlink && content.isExternal()) {
|
|
552
|
+
const currentUrl = content.getUrl();
|
|
553
|
+
if (currentUrl && urlMap.has(currentUrl)) {
|
|
554
|
+
const newUrl = urlMap.get(currentUrl);
|
|
555
|
+
content.setUrl(newUrl);
|
|
556
|
+
updatedCount++;
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
}
|
|
1035
560
|
}
|
|
561
|
+
return updatedCount;
|
|
1036
562
|
}
|
|
1037
563
|
estimateSize() {
|
|
1038
|
-
|
|
1039
|
-
const tableCount = this.getTableCount();
|
|
1040
|
-
const imageCount = this.imageManager.getImageCount();
|
|
1041
|
-
const estimatedXml = (paragraphCount * 200) + (tableCount * 1000) + 50000;
|
|
1042
|
-
const imageBytes = this.imageManager.getTotalSize();
|
|
1043
|
-
const totalBytes = estimatedXml + imageBytes;
|
|
1044
|
-
const totalMB = totalBytes / (1024 * 1024);
|
|
1045
|
-
const WARNING_MB = 50;
|
|
1046
|
-
const ERROR_MB = 100;
|
|
1047
|
-
let warning;
|
|
1048
|
-
if (totalMB > ERROR_MB) {
|
|
1049
|
-
warning = `Document size (${totalMB.toFixed(1)}MB) exceeds recommended maximum of ${ERROR_MB}MB. ` +
|
|
1050
|
-
`This may cause memory issues. Consider splitting into multiple documents or optimizing images.`;
|
|
1051
|
-
}
|
|
1052
|
-
else if (totalMB > WARNING_MB) {
|
|
1053
|
-
warning = `Document size (${totalMB.toFixed(1)}MB) exceeds ${WARNING_MB}MB. ` +
|
|
1054
|
-
`Large documents may take longer to process and use significant memory.`;
|
|
1055
|
-
}
|
|
1056
|
-
return {
|
|
1057
|
-
paragraphs: paragraphCount,
|
|
1058
|
-
tables: tableCount,
|
|
1059
|
-
images: imageCount,
|
|
1060
|
-
estimatedXmlBytes: estimatedXml,
|
|
1061
|
-
imageBytes,
|
|
1062
|
-
totalEstimatedBytes: totalBytes,
|
|
1063
|
-
totalEstimatedMB: parseFloat(totalMB.toFixed(2)),
|
|
1064
|
-
warning,
|
|
1065
|
-
};
|
|
564
|
+
return this.validator.estimateSize(this.bodyElements, this.imageManager);
|
|
1066
565
|
}
|
|
1067
566
|
dispose() {
|
|
1068
567
|
this.bodyElements = [];
|
|
1069
|
-
this.
|
|
568
|
+
this.parser.clearParseErrors();
|
|
1070
569
|
this.stylesManager = StylesManager_1.StylesManager.create();
|
|
1071
570
|
this.numberingManager = NumberingManager_1.NumberingManager.create();
|
|
1072
571
|
this.imageManager.clear();
|
|
@@ -1078,31 +577,7 @@ class Document {
|
|
|
1078
577
|
this.commentManager.clear();
|
|
1079
578
|
}
|
|
1080
579
|
getSizeStats() {
|
|
1081
|
-
|
|
1082
|
-
const warnings = [];
|
|
1083
|
-
if (estimate.warning) {
|
|
1084
|
-
warnings.push(estimate.warning);
|
|
1085
|
-
}
|
|
1086
|
-
const formatBytes = (bytes) => {
|
|
1087
|
-
if (bytes < 1024)
|
|
1088
|
-
return `${bytes} B`;
|
|
1089
|
-
if (bytes < 1024 * 1024)
|
|
1090
|
-
return `${(bytes / 1024).toFixed(1)} KB`;
|
|
1091
|
-
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
1092
|
-
};
|
|
1093
|
-
return {
|
|
1094
|
-
elements: {
|
|
1095
|
-
paragraphs: estimate.paragraphs,
|
|
1096
|
-
tables: estimate.tables,
|
|
1097
|
-
images: estimate.images,
|
|
1098
|
-
},
|
|
1099
|
-
size: {
|
|
1100
|
-
xml: formatBytes(estimate.estimatedXmlBytes),
|
|
1101
|
-
images: formatBytes(estimate.imageBytes),
|
|
1102
|
-
total: formatBytes(estimate.totalEstimatedBytes),
|
|
1103
|
-
},
|
|
1104
|
-
warnings,
|
|
1105
|
-
};
|
|
580
|
+
return this.validator.getSizeStats(this.bodyElements, this.imageManager);
|
|
1106
581
|
}
|
|
1107
582
|
}
|
|
1108
583
|
exports.Document = Document;
|