docxmlater 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -21
- package/dist/core/Document.d.ts +8 -20
- package/dist/core/Document.d.ts.map +1 -1
- package/dist/core/Document.js +49 -535
- package/dist/core/Document.js.map +1 -1
- package/dist/core/DocumentGenerator.d.ts +22 -0
- package/dist/core/DocumentGenerator.d.ts.map +1 -0
- package/dist/core/DocumentGenerator.js +161 -0
- package/dist/core/DocumentGenerator.js.map +1 -0
- package/dist/core/DocumentParser.d.ts +32 -0
- package/dist/core/DocumentParser.d.ts.map +1 -0
- package/dist/core/DocumentParser.js +404 -0
- package/dist/core/DocumentParser.js.map +1 -0
- package/dist/core/DocumentValidator.d.ts +46 -0
- package/dist/core/DocumentValidator.d.ts.map +1 -0
- package/dist/core/DocumentValidator.js +223 -0
- package/dist/core/DocumentValidator.js.map +1 -0
- package/dist/core/RelationshipManager.d.ts.map +1 -1
- package/dist/core/RelationshipManager.js +19 -3
- package/dist/core/RelationshipManager.js.map +1 -1
- package/dist/elements/Image.d.ts +5 -1
- package/dist/elements/Image.d.ts.map +1 -1
- package/dist/elements/Image.js +118 -12
- package/dist/elements/Image.js.map +1 -1
- package/dist/elements/ImageManager.d.ts +11 -3
- package/dist/elements/ImageManager.d.ts.map +1 -1
- package/dist/elements/ImageManager.js +72 -6
- package/dist/elements/ImageManager.js.map +1 -1
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -1
- package/dist/index.js.map +1 -1
- package/dist/utils/validation.d.ts.map +1 -1
- package/dist/utils/validation.js +10 -1
- package/dist/utils/validation.js.map +1 -1
- package/package.json +1 -1
package/dist/core/Document.js
CHANGED
|
@@ -41,7 +41,6 @@ const Table_1 = require("../elements/Table");
|
|
|
41
41
|
const Section_1 = require("../elements/Section");
|
|
42
42
|
const ImageManager_1 = require("../elements/ImageManager");
|
|
43
43
|
const HeaderFooterManager_1 = require("../elements/HeaderFooterManager");
|
|
44
|
-
const Hyperlink_1 = require("../elements/Hyperlink");
|
|
45
44
|
const TableOfContents_1 = require("../elements/TableOfContents");
|
|
46
45
|
const TableOfContentsElement_1 = require("../elements/TableOfContentsElement");
|
|
47
46
|
const BookmarkManager_1 = require("../elements/BookmarkManager");
|
|
@@ -49,17 +48,19 @@ const Revision_1 = require("../elements/Revision");
|
|
|
49
48
|
const RevisionManager_1 = require("../elements/RevisionManager");
|
|
50
49
|
const CommentManager_1 = require("../elements/CommentManager");
|
|
51
50
|
const Run_1 = require("../elements/Run");
|
|
52
|
-
const XMLBuilder_1 = require("../xml/XMLBuilder");
|
|
53
|
-
const XMLParser_1 = require("../xml/XMLParser");
|
|
54
51
|
const StylesManager_1 = require("../formatting/StylesManager");
|
|
55
52
|
const NumberingManager_1 = require("../formatting/NumberingManager");
|
|
56
53
|
const RelationshipManager_1 = require("./RelationshipManager");
|
|
57
|
-
|
|
54
|
+
const DocumentParser_1 = require("./DocumentParser");
|
|
55
|
+
const DocumentGenerator_1 = require("./DocumentGenerator");
|
|
56
|
+
const DocumentValidator_1 = require("./DocumentValidator");
|
|
57
|
+
class ImageRun extends Run_1.Run {
|
|
58
58
|
constructor(image) {
|
|
59
|
-
|
|
59
|
+
super('');
|
|
60
|
+
this.imageElement = image;
|
|
60
61
|
}
|
|
61
62
|
toXML() {
|
|
62
|
-
const drawing = this.
|
|
63
|
+
const drawing = this.imageElement.toXML();
|
|
63
64
|
return {
|
|
64
65
|
name: 'w:r',
|
|
65
66
|
children: [drawing]
|
|
@@ -69,15 +70,25 @@ class ImageRun {
|
|
|
69
70
|
class Document {
|
|
70
71
|
constructor(zipHandler, options = {}, initDefaults = true) {
|
|
71
72
|
this.bodyElements = [];
|
|
72
|
-
this.parseErrors = [];
|
|
73
73
|
this.zipHandler = zipHandler || new ZipHandler_1.ZipHandler();
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
this.
|
|
74
|
+
const strictParsing = options.strictParsing ?? false;
|
|
75
|
+
const memoryPercent = options.maxMemoryUsagePercent ?? 80;
|
|
76
|
+
this.parser = new DocumentParser_1.DocumentParser(strictParsing);
|
|
77
|
+
this.generator = new DocumentGenerator_1.DocumentGenerator();
|
|
78
|
+
this.validator = new DocumentValidator_1.DocumentValidator(memoryPercent, {
|
|
79
|
+
maxMemoryUsagePercent: options.maxMemoryUsagePercent,
|
|
80
|
+
maxRssMB: options.maxRssMB,
|
|
81
|
+
useAbsoluteLimit: options.useAbsoluteMemoryLimit,
|
|
82
|
+
});
|
|
83
|
+
this.properties = options.properties ? DocumentValidator_1.DocumentValidator.validateProperties(options.properties) : {};
|
|
77
84
|
this.stylesManager = StylesManager_1.StylesManager.create();
|
|
78
85
|
this.numberingManager = NumberingManager_1.NumberingManager.create();
|
|
79
86
|
this.section = Section_1.Section.createLetter();
|
|
80
|
-
this.imageManager = ImageManager_1.ImageManager.create(
|
|
87
|
+
this.imageManager = ImageManager_1.ImageManager.create({
|
|
88
|
+
maxImageCount: options.maxImageCount,
|
|
89
|
+
maxTotalImageSizeMB: options.maxTotalImageSizeMB,
|
|
90
|
+
maxSingleImageSizeMB: options.maxSingleImageSizeMB,
|
|
91
|
+
});
|
|
81
92
|
this.relationshipManager = RelationshipManager_1.RelationshipManager.create();
|
|
82
93
|
this.headerFooterManager = HeaderFooterManager_1.HeaderFooterManager.create();
|
|
83
94
|
this.bookmarkManager = BookmarkManager_1.BookmarkManager.create();
|
|
@@ -108,290 +119,20 @@ class Document {
|
|
|
108
119
|
return doc;
|
|
109
120
|
}
|
|
110
121
|
initializeRequiredFiles() {
|
|
111
|
-
this.zipHandler.addFile(types_1.DOCX_PATHS.CONTENT_TYPES, this.generateContentTypes());
|
|
112
|
-
this.zipHandler.addFile(types_1.DOCX_PATHS.RELS, this.generateRels());
|
|
113
|
-
this.zipHandler.addFile(types_1.DOCX_PATHS.DOCUMENT, this.generateDocumentXml());
|
|
122
|
+
this.zipHandler.addFile(types_1.DOCX_PATHS.CONTENT_TYPES, this.generator.generateContentTypes());
|
|
123
|
+
this.zipHandler.addFile(types_1.DOCX_PATHS.RELS, this.generator.generateRels());
|
|
124
|
+
this.zipHandler.addFile(types_1.DOCX_PATHS.DOCUMENT, this.generator.generateDocumentXml(this.bodyElements, this.section));
|
|
114
125
|
this.zipHandler.addFile('word/_rels/document.xml.rels', this.relationshipManager.generateXml());
|
|
115
126
|
this.zipHandler.addFile(types_1.DOCX_PATHS.STYLES, this.stylesManager.generateStylesXml());
|
|
116
127
|
this.zipHandler.addFile(types_1.DOCX_PATHS.NUMBERING, this.numberingManager.generateNumberingXml());
|
|
117
|
-
this.zipHandler.addFile(types_1.DOCX_PATHS.CORE_PROPS, this.generateCoreProps());
|
|
118
|
-
this.zipHandler.addFile(types_1.DOCX_PATHS.APP_PROPS, this.generateAppProps());
|
|
128
|
+
this.zipHandler.addFile(types_1.DOCX_PATHS.CORE_PROPS, this.generator.generateCoreProps(this.properties));
|
|
129
|
+
this.zipHandler.addFile(types_1.DOCX_PATHS.APP_PROPS, this.generator.generateAppProps());
|
|
119
130
|
}
|
|
120
131
|
async parseDocument() {
|
|
121
|
-
const
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
this.parseRelationships();
|
|
126
|
-
this.parseProperties();
|
|
127
|
-
this.parseBodyElements(docXml);
|
|
128
|
-
}
|
|
129
|
-
parseBodyElements(docXml) {
|
|
130
|
-
this.bodyElements = [];
|
|
131
|
-
try {
|
|
132
|
-
XMLParser_1.XMLParser.validateSize(docXml);
|
|
133
|
-
}
|
|
134
|
-
catch (error) {
|
|
135
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
136
|
-
this.parseErrors.push({ element: 'document', error: err });
|
|
137
|
-
if (this.strictParsing) {
|
|
138
|
-
throw err;
|
|
139
|
-
}
|
|
140
|
-
return;
|
|
141
|
-
}
|
|
142
|
-
const bodyContent = XMLParser_1.XMLParser.extractBody(docXml);
|
|
143
|
-
if (!bodyContent) {
|
|
144
|
-
return;
|
|
145
|
-
}
|
|
146
|
-
const paragraphXmls = XMLParser_1.XMLParser.extractElements(bodyContent, 'w:p');
|
|
147
|
-
for (const paraXml of paragraphXmls) {
|
|
148
|
-
const paragraph = this.parseParagraph(paraXml);
|
|
149
|
-
if (paragraph) {
|
|
150
|
-
this.bodyElements.push(paragraph);
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
const hasTable = bodyContent.includes('<w:tbl');
|
|
154
|
-
if (hasTable) {
|
|
155
|
-
const err = new Error('Document contains tables which are not yet fully supported in Phase 2. Tables will be ignored.');
|
|
156
|
-
this.parseErrors.push({ element: 'table', error: err });
|
|
157
|
-
if (this.strictParsing) {
|
|
158
|
-
throw err;
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
this.validateLoadedContent();
|
|
162
|
-
}
|
|
163
|
-
validateLoadedContent() {
|
|
164
|
-
const paragraphs = this.bodyElements.filter((el) => el instanceof Paragraph_1.Paragraph);
|
|
165
|
-
if (paragraphs.length === 0) {
|
|
166
|
-
return;
|
|
167
|
-
}
|
|
168
|
-
let totalRuns = 0;
|
|
169
|
-
let emptyRuns = 0;
|
|
170
|
-
let runsWithText = 0;
|
|
171
|
-
for (const para of paragraphs) {
|
|
172
|
-
const runs = para.getRuns();
|
|
173
|
-
totalRuns += runs.length;
|
|
174
|
-
for (const run of runs) {
|
|
175
|
-
const text = run.getText();
|
|
176
|
-
if (text.length === 0) {
|
|
177
|
-
emptyRuns++;
|
|
178
|
-
}
|
|
179
|
-
else {
|
|
180
|
-
runsWithText++;
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
if (totalRuns > 0) {
|
|
185
|
-
const emptyPercentage = (emptyRuns / totalRuns) * 100;
|
|
186
|
-
if (emptyPercentage > 90 && emptyRuns > 10) {
|
|
187
|
-
const warning = new Error(`WARNING: Document appears to be corrupted or empty. ` +
|
|
188
|
-
`${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) have no text content. ` +
|
|
189
|
-
`This may indicate:\n` +
|
|
190
|
-
` - The document was already corrupted before loading\n` +
|
|
191
|
-
` - Text content was stripped by another application\n` +
|
|
192
|
-
` - Encoding issues during document creation\n` +
|
|
193
|
-
`Original document structure is preserved, but text may be lost.`);
|
|
194
|
-
this.parseErrors.push({ element: 'document-validation', error: warning });
|
|
195
|
-
console.warn(`\nDocXML Load Warning:\n${warning.message}\n`);
|
|
196
|
-
}
|
|
197
|
-
else if (emptyPercentage > 50 && emptyRuns > 5) {
|
|
198
|
-
const warning = new Error(`Document has ${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) with no text. ` +
|
|
199
|
-
`This is higher than normal and may indicate partial data loss.`);
|
|
200
|
-
this.parseErrors.push({ element: 'document-validation', error: warning });
|
|
201
|
-
console.warn(`\nDocXML Load Warning:\n${warning.message}\n`);
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
parseParagraph(paraXml) {
|
|
206
|
-
try {
|
|
207
|
-
const paragraph = new Paragraph_1.Paragraph();
|
|
208
|
-
this.parseParagraphProperties(paraXml, paragraph);
|
|
209
|
-
const runXmls = XMLParser_1.XMLParser.extractElements(paraXml, 'w:r');
|
|
210
|
-
for (const runXml of runXmls) {
|
|
211
|
-
const run = this.parseRun(runXml);
|
|
212
|
-
if (run) {
|
|
213
|
-
paragraph.addRun(run);
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
return paragraph;
|
|
217
|
-
}
|
|
218
|
-
catch (error) {
|
|
219
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
220
|
-
this.parseErrors.push({ element: 'paragraph', error: err });
|
|
221
|
-
if (this.strictParsing) {
|
|
222
|
-
throw new Error(`Failed to parse paragraph: ${err.message}`);
|
|
223
|
-
}
|
|
224
|
-
return null;
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
parseParagraphProperties(paraXml, paragraph) {
|
|
228
|
-
const pPrMatch = paraXml.match(/<w:pPr[^>]*>([\s\S]*?)<\/w:pPr>/);
|
|
229
|
-
if (!pPrMatch || !pPrMatch[1]) {
|
|
230
|
-
return;
|
|
231
|
-
}
|
|
232
|
-
const pPr = pPrMatch[1];
|
|
233
|
-
const alignMatch = pPr.match(/<w:jc\s+w:val="([^"]+)"/);
|
|
234
|
-
if (alignMatch && alignMatch[1]) {
|
|
235
|
-
const alignment = alignMatch[1];
|
|
236
|
-
paragraph.setAlignment(alignment);
|
|
237
|
-
}
|
|
238
|
-
const styleMatch = pPr.match(/<w:pStyle\s+w:val="([^"]+)"/);
|
|
239
|
-
if (styleMatch && styleMatch[1]) {
|
|
240
|
-
paragraph.setStyle(styleMatch[1]);
|
|
241
|
-
}
|
|
242
|
-
const indMatch = pPr.match(/<w:ind([^>]+)\/>/);
|
|
243
|
-
if (indMatch && indMatch[1]) {
|
|
244
|
-
const indStr = indMatch[1];
|
|
245
|
-
const leftMatch = indStr.match(/w:left="(\d+)"/);
|
|
246
|
-
const rightMatch = indStr.match(/w:right="(\d+)"/);
|
|
247
|
-
const firstLineMatch = indStr.match(/w:firstLine="(\d+)"/);
|
|
248
|
-
if (leftMatch && leftMatch[1]) {
|
|
249
|
-
paragraph.setLeftIndent(parseInt(leftMatch[1], 10));
|
|
250
|
-
}
|
|
251
|
-
if (rightMatch && rightMatch[1]) {
|
|
252
|
-
paragraph.setRightIndent(parseInt(rightMatch[1], 10));
|
|
253
|
-
}
|
|
254
|
-
if (firstLineMatch && firstLineMatch[1]) {
|
|
255
|
-
paragraph.setFirstLineIndent(parseInt(firstLineMatch[1], 10));
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
const spacingMatch = pPr.match(/<w:spacing([^>]+)\/>/);
|
|
259
|
-
if (spacingMatch && spacingMatch[1]) {
|
|
260
|
-
const spacingStr = spacingMatch[1];
|
|
261
|
-
const beforeMatch = spacingStr.match(/w:before="(\d+)"/);
|
|
262
|
-
const afterMatch = spacingStr.match(/w:after="(\d+)"/);
|
|
263
|
-
const lineMatch = spacingStr.match(/w:line="(\d+)"/);
|
|
264
|
-
if (beforeMatch && beforeMatch[1]) {
|
|
265
|
-
paragraph.setSpaceBefore(parseInt(beforeMatch[1], 10));
|
|
266
|
-
}
|
|
267
|
-
if (afterMatch && afterMatch[1]) {
|
|
268
|
-
paragraph.setSpaceAfter(parseInt(afterMatch[1], 10));
|
|
269
|
-
}
|
|
270
|
-
if (lineMatch && lineMatch[1]) {
|
|
271
|
-
const lineRule = spacingStr.match(/w:lineRule="([^"]+)"/);
|
|
272
|
-
paragraph.setLineSpacing(parseInt(lineMatch[1], 10), lineRule && lineRule[1] ? lineRule[1] : undefined);
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
if (pPr.includes('<w:keepNext'))
|
|
276
|
-
paragraph.setKeepNext(true);
|
|
277
|
-
if (pPr.includes('<w:keepLines'))
|
|
278
|
-
paragraph.setKeepLines(true);
|
|
279
|
-
if (pPr.includes('<w:pageBreakBefore'))
|
|
280
|
-
paragraph.setPageBreakBefore(true);
|
|
281
|
-
}
|
|
282
|
-
parseRun(runXml) {
|
|
283
|
-
try {
|
|
284
|
-
const text = XMLBuilder_1.XMLBuilder.unescapeXml(XMLParser_1.XMLParser.extractText(runXml));
|
|
285
|
-
const run = new Run_1.Run(text);
|
|
286
|
-
this.parseRunProperties(runXml, run);
|
|
287
|
-
return run;
|
|
288
|
-
}
|
|
289
|
-
catch (error) {
|
|
290
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
291
|
-
this.parseErrors.push({ element: 'run', error: err });
|
|
292
|
-
if (this.strictParsing) {
|
|
293
|
-
throw new Error(`Failed to parse run: ${err.message}`);
|
|
294
|
-
}
|
|
295
|
-
return null;
|
|
296
|
-
}
|
|
297
|
-
}
|
|
298
|
-
parseRunProperties(runXml, run) {
|
|
299
|
-
const rPrMatch = runXml.match(/<w:rPr[^>]*>([\s\S]*?)<\/w:rPr>/);
|
|
300
|
-
if (!rPrMatch || !rPrMatch[1]) {
|
|
301
|
-
return;
|
|
302
|
-
}
|
|
303
|
-
const rPr = rPrMatch[1];
|
|
304
|
-
if (rPr.includes('<w:b/>') || rPr.includes('<w:b ')) {
|
|
305
|
-
run.setBold(true);
|
|
306
|
-
}
|
|
307
|
-
if (rPr.includes('<w:i/>') || rPr.includes('<w:i ')) {
|
|
308
|
-
run.setItalic(true);
|
|
309
|
-
}
|
|
310
|
-
const underlineMatch = rPr.match(/<w:u\s+w:val="([^"]+)"/);
|
|
311
|
-
if (underlineMatch && underlineMatch[1]) {
|
|
312
|
-
const underlineStyle = underlineMatch[1];
|
|
313
|
-
run.setUnderline(underlineStyle);
|
|
314
|
-
}
|
|
315
|
-
else if (rPr.includes('<w:u/>')) {
|
|
316
|
-
run.setUnderline(true);
|
|
317
|
-
}
|
|
318
|
-
if (rPr.includes('<w:strike/>') || rPr.includes('<w:strike ')) {
|
|
319
|
-
run.setStrike(true);
|
|
320
|
-
}
|
|
321
|
-
const vertAlignMatch = rPr.match(/<w:vertAlign\s+w:val="([^"]+)"/);
|
|
322
|
-
if (vertAlignMatch && vertAlignMatch[1]) {
|
|
323
|
-
if (vertAlignMatch[1] === 'subscript') {
|
|
324
|
-
run.setSubscript(true);
|
|
325
|
-
}
|
|
326
|
-
else if (vertAlignMatch[1] === 'superscript') {
|
|
327
|
-
run.setSuperscript(true);
|
|
328
|
-
}
|
|
329
|
-
}
|
|
330
|
-
const fontMatch = rPr.match(/<w:rFonts[^>]+w:ascii="([^"]+)"/);
|
|
331
|
-
if (fontMatch && fontMatch[1]) {
|
|
332
|
-
run.setFont(fontMatch[1]);
|
|
333
|
-
}
|
|
334
|
-
const sizeMatch = rPr.match(/<w:sz\s+w:val="(\d+)"/);
|
|
335
|
-
if (sizeMatch && sizeMatch[1]) {
|
|
336
|
-
const halfPoints = parseInt(sizeMatch[1], 10);
|
|
337
|
-
run.setSize(halfPoints / 2);
|
|
338
|
-
}
|
|
339
|
-
const colorMatch = rPr.match(/<w:color\s+w:val="([^"]+)"/);
|
|
340
|
-
if (colorMatch && colorMatch[1]) {
|
|
341
|
-
run.setColor(colorMatch[1]);
|
|
342
|
-
}
|
|
343
|
-
const highlightMatch = rPr.match(/<w:highlight\s+w:val="([^"]+)"/);
|
|
344
|
-
if (highlightMatch && highlightMatch[1]) {
|
|
345
|
-
const highlightColor = highlightMatch[1];
|
|
346
|
-
run.setHighlight(highlightColor);
|
|
347
|
-
}
|
|
348
|
-
if (rPr.includes('<w:smallCaps/>') || rPr.includes('<w:smallCaps ')) {
|
|
349
|
-
run.setSmallCaps(true);
|
|
350
|
-
}
|
|
351
|
-
if (rPr.includes('<w:caps/>') || rPr.includes('<w:caps ')) {
|
|
352
|
-
run.setAllCaps(true);
|
|
353
|
-
}
|
|
354
|
-
}
|
|
355
|
-
parseRelationships() {
|
|
356
|
-
const relsPath = 'word/_rels/document.xml.rels';
|
|
357
|
-
const relsXml = this.zipHandler.getFileAsString(relsPath);
|
|
358
|
-
if (relsXml) {
|
|
359
|
-
this.relationshipManager = RelationshipManager_1.RelationshipManager.fromXml(relsXml);
|
|
360
|
-
}
|
|
361
|
-
else {
|
|
362
|
-
this.relationshipManager.addStyles();
|
|
363
|
-
this.relationshipManager.addNumbering();
|
|
364
|
-
}
|
|
365
|
-
}
|
|
366
|
-
parseProperties() {
|
|
367
|
-
const coreXml = this.zipHandler.getFileAsString(types_1.DOCX_PATHS.CORE_PROPS);
|
|
368
|
-
if (!coreXml) {
|
|
369
|
-
return;
|
|
370
|
-
}
|
|
371
|
-
const extractTag = (xml, tag) => {
|
|
372
|
-
const match = xml.match(new RegExp(`<${tag}[^>]*>([^<]*)</${tag}>`));
|
|
373
|
-
return match && match[1] ? XMLBuilder_1.XMLBuilder.unescapeXml(match[1]) : undefined;
|
|
374
|
-
};
|
|
375
|
-
this.properties = {
|
|
376
|
-
title: extractTag(coreXml, 'dc:title'),
|
|
377
|
-
subject: extractTag(coreXml, 'dc:subject'),
|
|
378
|
-
creator: extractTag(coreXml, 'dc:creator'),
|
|
379
|
-
keywords: extractTag(coreXml, 'cp:keywords'),
|
|
380
|
-
description: extractTag(coreXml, 'dc:description'),
|
|
381
|
-
lastModifiedBy: extractTag(coreXml, 'cp:lastModifiedBy'),
|
|
382
|
-
};
|
|
383
|
-
const revisionStr = extractTag(coreXml, 'cp:revision');
|
|
384
|
-
if (revisionStr) {
|
|
385
|
-
this.properties.revision = parseInt(revisionStr, 10);
|
|
386
|
-
}
|
|
387
|
-
const createdStr = extractTag(coreXml, 'dcterms:created');
|
|
388
|
-
if (createdStr) {
|
|
389
|
-
this.properties.created = new Date(createdStr);
|
|
390
|
-
}
|
|
391
|
-
const modifiedStr = extractTag(coreXml, 'dcterms:modified');
|
|
392
|
-
if (modifiedStr) {
|
|
393
|
-
this.properties.modified = new Date(modifiedStr);
|
|
394
|
-
}
|
|
132
|
+
const result = await this.parser.parseDocument(this.zipHandler, this.relationshipManager);
|
|
133
|
+
this.bodyElements = result.bodyElements;
|
|
134
|
+
this.properties = result.properties;
|
|
135
|
+
this.relationshipManager = result.relationshipManager;
|
|
395
136
|
}
|
|
396
137
|
addParagraph(paragraph) {
|
|
397
138
|
this.bodyElements.push(paragraph);
|
|
@@ -448,48 +189,21 @@ class Document {
|
|
|
448
189
|
return this;
|
|
449
190
|
}
|
|
450
191
|
setProperties(properties) {
|
|
451
|
-
|
|
192
|
+
const validated = DocumentValidator_1.DocumentValidator.validateProperties(properties);
|
|
193
|
+
this.properties = { ...this.properties, ...validated };
|
|
452
194
|
return this;
|
|
453
195
|
}
|
|
454
196
|
getProperties() {
|
|
455
197
|
return { ...this.properties };
|
|
456
198
|
}
|
|
457
|
-
validateBeforeSave() {
|
|
458
|
-
const paragraphs = this.getParagraphs();
|
|
459
|
-
if (paragraphs.length === 0) {
|
|
460
|
-
console.warn('\nDocXML Save Warning:\n' +
|
|
461
|
-
'Document has no paragraphs. You are saving an empty document.\n');
|
|
462
|
-
return;
|
|
463
|
-
}
|
|
464
|
-
let totalRuns = 0;
|
|
465
|
-
let emptyRuns = 0;
|
|
466
|
-
for (const para of paragraphs) {
|
|
467
|
-
const runs = para.getRuns();
|
|
468
|
-
totalRuns += runs.length;
|
|
469
|
-
for (const run of runs) {
|
|
470
|
-
if (run.getText().length === 0) {
|
|
471
|
-
emptyRuns++;
|
|
472
|
-
}
|
|
473
|
-
}
|
|
474
|
-
}
|
|
475
|
-
if (totalRuns > 0) {
|
|
476
|
-
const emptyPercentage = (emptyRuns / totalRuns) * 100;
|
|
477
|
-
if (emptyPercentage > 90 && emptyRuns > 10) {
|
|
478
|
-
console.warn('\nDocXML Save Warning:\n' +
|
|
479
|
-
`You are about to save a document where ${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) are empty.\n` +
|
|
480
|
-
'This may result in a document with no visible text content.\n' +
|
|
481
|
-
'If this is unintentional, please review the document before saving.\n');
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
}
|
|
485
199
|
async save(filePath) {
|
|
486
200
|
const tempPath = `${filePath}.tmp.${Date.now()}`;
|
|
487
201
|
try {
|
|
488
|
-
this.validateBeforeSave();
|
|
489
|
-
this.checkMemoryThreshold();
|
|
202
|
+
this.validator.validateBeforeSave(this.bodyElements);
|
|
203
|
+
this.validator.checkMemoryThreshold();
|
|
490
204
|
await this.imageManager.loadAllImageData();
|
|
491
|
-
this.checkMemoryThreshold();
|
|
492
|
-
const sizeInfo = this.estimateSize();
|
|
205
|
+
this.validator.checkMemoryThreshold();
|
|
206
|
+
const sizeInfo = this.validator.estimateSize(this.bodyElements, this.imageManager);
|
|
493
207
|
if (sizeInfo.warning) {
|
|
494
208
|
console.warn(`DocXML Warning: ${sizeInfo.warning}`);
|
|
495
209
|
}
|
|
@@ -523,11 +237,11 @@ class Document {
|
|
|
523
237
|
}
|
|
524
238
|
async toBuffer() {
|
|
525
239
|
try {
|
|
526
|
-
this.validateBeforeSave();
|
|
527
|
-
this.checkMemoryThreshold();
|
|
240
|
+
this.validator.validateBeforeSave(this.bodyElements);
|
|
241
|
+
this.validator.checkMemoryThreshold();
|
|
528
242
|
await this.imageManager.loadAllImageData();
|
|
529
|
-
this.checkMemoryThreshold();
|
|
530
|
-
const sizeInfo = this.estimateSize();
|
|
243
|
+
this.validator.checkMemoryThreshold();
|
|
244
|
+
const sizeInfo = this.validator.estimateSize(this.bodyElements, this.imageManager);
|
|
531
245
|
if (sizeInfo.warning) {
|
|
532
246
|
console.warn(`DocXML Warning: ${sizeInfo.warning}`);
|
|
533
247
|
}
|
|
@@ -549,11 +263,11 @@ class Document {
|
|
|
549
263
|
}
|
|
550
264
|
}
|
|
551
265
|
updateDocumentXml() {
|
|
552
|
-
const xml = this.generateDocumentXml();
|
|
266
|
+
const xml = this.generator.generateDocumentXml(this.bodyElements, this.section);
|
|
553
267
|
this.zipHandler.updateFile(types_1.DOCX_PATHS.DOCUMENT, xml);
|
|
554
268
|
}
|
|
555
269
|
updateCoreProps() {
|
|
556
|
-
const xml = this.generateCoreProps();
|
|
270
|
+
const xml = this.generator.generateCoreProps(this.properties);
|
|
557
271
|
this.zipHandler.updateFile(types_1.DOCX_PATHS.CORE_PROPS, xml);
|
|
558
272
|
}
|
|
559
273
|
updateStylesXml() {
|
|
@@ -564,78 +278,6 @@ class Document {
|
|
|
564
278
|
const xml = this.numberingManager.generateNumberingXml();
|
|
565
279
|
this.zipHandler.updateFile(types_1.DOCX_PATHS.NUMBERING, xml);
|
|
566
280
|
}
|
|
567
|
-
generateContentTypes() {
|
|
568
|
-
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
569
|
-
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
|
|
570
|
-
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
|
|
571
|
-
<Default Extension="xml" ContentType="application/xml"/>
|
|
572
|
-
<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
|
|
573
|
-
<Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/>
|
|
574
|
-
<Override PartName="/word/numbering.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"/>
|
|
575
|
-
<Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>
|
|
576
|
-
<Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>
|
|
577
|
-
</Types>`;
|
|
578
|
-
}
|
|
579
|
-
generateRels() {
|
|
580
|
-
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
581
|
-
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
|
582
|
-
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
|
|
583
|
-
<Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/>
|
|
584
|
-
<Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/>
|
|
585
|
-
</Relationships>`;
|
|
586
|
-
}
|
|
587
|
-
generateDocumentXml() {
|
|
588
|
-
const bodyXmls = [];
|
|
589
|
-
for (const element of this.bodyElements) {
|
|
590
|
-
const xml = element.toXML();
|
|
591
|
-
if (Array.isArray(xml)) {
|
|
592
|
-
bodyXmls.push(...xml);
|
|
593
|
-
}
|
|
594
|
-
else {
|
|
595
|
-
bodyXmls.push(xml);
|
|
596
|
-
}
|
|
597
|
-
}
|
|
598
|
-
bodyXmls.push(this.section.toXML());
|
|
599
|
-
return XMLBuilder_1.XMLBuilder.createDocument(bodyXmls);
|
|
600
|
-
}
|
|
601
|
-
generateCoreProps() {
|
|
602
|
-
const now = new Date();
|
|
603
|
-
const created = this.properties.created || now;
|
|
604
|
-
const modified = this.properties.modified || now;
|
|
605
|
-
const formatDate = (date) => {
|
|
606
|
-
return date.toISOString();
|
|
607
|
-
};
|
|
608
|
-
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
609
|
-
<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
|
|
610
|
-
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
|
611
|
-
xmlns:dcterms="http://purl.org/dc/terms/"
|
|
612
|
-
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
|
|
613
|
-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
|
614
|
-
<dc:title>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.title || '')}</dc:title>
|
|
615
|
-
<dc:subject>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.subject || '')}</dc:subject>
|
|
616
|
-
<dc:creator>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.creator || 'DocXML')}</dc:creator>
|
|
617
|
-
<cp:keywords>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.keywords || '')}</cp:keywords>
|
|
618
|
-
<dc:description>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.description || '')}</dc:description>
|
|
619
|
-
<cp:lastModifiedBy>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.lastModifiedBy || this.properties.creator || 'DocXML')}</cp:lastModifiedBy>
|
|
620
|
-
<cp:revision>${this.properties.revision || 1}</cp:revision>
|
|
621
|
-
<dcterms:created xsi:type="dcterms:W3CDTF">${formatDate(created)}</dcterms:created>
|
|
622
|
-
<dcterms:modified xsi:type="dcterms:W3CDTF">${formatDate(modified)}</dcterms:modified>
|
|
623
|
-
</cp:coreProperties>`;
|
|
624
|
-
}
|
|
625
|
-
generateAppProps() {
|
|
626
|
-
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
627
|
-
<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"
|
|
628
|
-
xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
|
|
629
|
-
<Application>DocXML</Application>
|
|
630
|
-
<DocSecurity>0</DocSecurity>
|
|
631
|
-
<ScaleCrop>false</ScaleCrop>
|
|
632
|
-
<Company></Company>
|
|
633
|
-
<LinksUpToDate>false</LinksUpToDate>
|
|
634
|
-
<SharedDoc>false</SharedDoc>
|
|
635
|
-
<HyperlinksChanged>false</HyperlinksChanged>
|
|
636
|
-
<AppVersion>0.1.0</AppVersion>
|
|
637
|
-
</Properties>`;
|
|
638
|
-
}
|
|
639
281
|
getStylesManager() {
|
|
640
282
|
return this.stylesManager;
|
|
641
283
|
}
|
|
@@ -744,38 +386,7 @@ class Document {
|
|
|
744
386
|
return this.relationshipManager;
|
|
745
387
|
}
|
|
746
388
|
processHyperlinks() {
|
|
747
|
-
|
|
748
|
-
const headers = this.headerFooterManager.getAllHeaders();
|
|
749
|
-
const footers = this.headerFooterManager.getAllFooters();
|
|
750
|
-
for (const header of headers) {
|
|
751
|
-
for (const element of header.header.getElements()) {
|
|
752
|
-
if (element instanceof Paragraph_1.Paragraph) {
|
|
753
|
-
this.processHyperlinksInParagraph(element);
|
|
754
|
-
}
|
|
755
|
-
}
|
|
756
|
-
}
|
|
757
|
-
for (const footer of footers) {
|
|
758
|
-
for (const element of footer.footer.getElements()) {
|
|
759
|
-
if (element instanceof Paragraph_1.Paragraph) {
|
|
760
|
-
this.processHyperlinksInParagraph(element);
|
|
761
|
-
}
|
|
762
|
-
}
|
|
763
|
-
}
|
|
764
|
-
for (const para of paragraphs) {
|
|
765
|
-
this.processHyperlinksInParagraph(para);
|
|
766
|
-
}
|
|
767
|
-
}
|
|
768
|
-
processHyperlinksInParagraph(paragraph) {
|
|
769
|
-
const content = paragraph.getContent();
|
|
770
|
-
for (const item of content) {
|
|
771
|
-
if (item instanceof Hyperlink_1.Hyperlink && item.isExternal() && !item.getRelationshipId()) {
|
|
772
|
-
const url = item.getUrl();
|
|
773
|
-
if (url) {
|
|
774
|
-
const relationship = this.relationshipManager.addHyperlink(url);
|
|
775
|
-
item.setRelationshipId(relationship.getId());
|
|
776
|
-
}
|
|
777
|
-
}
|
|
778
|
-
}
|
|
389
|
+
this.generator.processHyperlinks(this.bodyElements, this.headerFooterManager, this.relationshipManager);
|
|
779
390
|
}
|
|
780
391
|
saveImages() {
|
|
781
392
|
const images = this.imageManager.getAllImages();
|
|
@@ -815,43 +426,9 @@ class Document {
|
|
|
815
426
|
}
|
|
816
427
|
}
|
|
817
428
|
updateContentTypesWithImagesHeadersFootersAndComments() {
|
|
818
|
-
const contentTypes = this.generateContentTypesWithImagesHeadersFootersAndComments();
|
|
429
|
+
const contentTypes = this.generator.generateContentTypesWithImagesHeadersFootersAndComments(this.imageManager, this.headerFooterManager, this.commentManager);
|
|
819
430
|
this.zipHandler.updateFile(types_1.DOCX_PATHS.CONTENT_TYPES, contentTypes);
|
|
820
431
|
}
|
|
821
|
-
generateContentTypesWithImagesHeadersFootersAndComments() {
|
|
822
|
-
const images = this.imageManager.getAllImages();
|
|
823
|
-
const headers = this.headerFooterManager.getAllHeaders();
|
|
824
|
-
const footers = this.headerFooterManager.getAllFooters();
|
|
825
|
-
const hasComments = this.commentManager.getCount() > 0;
|
|
826
|
-
const extensions = new Set();
|
|
827
|
-
for (const entry of images) {
|
|
828
|
-
extensions.add(entry.image.getExtension());
|
|
829
|
-
}
|
|
830
|
-
let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
|
|
831
|
-
xml += '<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">\n';
|
|
832
|
-
xml += ' <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>\n';
|
|
833
|
-
xml += ' <Default Extension="xml" ContentType="application/xml"/>\n';
|
|
834
|
-
for (const ext of extensions) {
|
|
835
|
-
const mimeType = ImageManager_1.ImageManager.getMimeType(ext);
|
|
836
|
-
xml += ` <Default Extension="${ext}" ContentType="${mimeType}"/>\n`;
|
|
837
|
-
}
|
|
838
|
-
xml += ' <Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>\n';
|
|
839
|
-
xml += ' <Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/>\n';
|
|
840
|
-
xml += ' <Override PartName="/word/numbering.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"/>\n';
|
|
841
|
-
for (const entry of headers) {
|
|
842
|
-
xml += ` <Override PartName="/word/${entry.filename}" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/>\n`;
|
|
843
|
-
}
|
|
844
|
-
for (const entry of footers) {
|
|
845
|
-
xml += ` <Override PartName="/word/${entry.filename}" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/>\n`;
|
|
846
|
-
}
|
|
847
|
-
if (hasComments) {
|
|
848
|
-
xml += ' <Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>\n';
|
|
849
|
-
}
|
|
850
|
-
xml += ' <Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>\n';
|
|
851
|
-
xml += ' <Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>\n';
|
|
852
|
-
xml += '</Types>';
|
|
853
|
-
return xml;
|
|
854
|
-
}
|
|
855
432
|
getBookmarkManager() {
|
|
856
433
|
return this.bookmarkManager;
|
|
857
434
|
}
|
|
@@ -964,53 +541,14 @@ class Document {
|
|
|
964
541
|
return this.revisionManager.getAllDeletions();
|
|
965
542
|
}
|
|
966
543
|
getParseWarnings() {
|
|
967
|
-
return
|
|
968
|
-
}
|
|
969
|
-
checkMemoryThreshold() {
|
|
970
|
-
const { heapUsed, heapTotal } = process.memoryUsage();
|
|
971
|
-
const usagePercent = (heapUsed / heapTotal) * 100;
|
|
972
|
-
if (usagePercent > this.maxMemoryUsagePercent) {
|
|
973
|
-
throw new Error(`Memory usage critical (${usagePercent.toFixed(1)}% of ${(heapTotal / 1024 / 1024).toFixed(0)}MB heap). ` +
|
|
974
|
-
`Cannot process document safely. Consider:\n` +
|
|
975
|
-
`- Reducing document size\n` +
|
|
976
|
-
`- Optimizing/compressing images\n` +
|
|
977
|
-
`- Splitting into multiple documents\n` +
|
|
978
|
-
`- Increasing Node.js heap size (--max-old-space-size)`);
|
|
979
|
-
}
|
|
544
|
+
return this.parser.getParseErrors();
|
|
980
545
|
}
|
|
981
546
|
estimateSize() {
|
|
982
|
-
|
|
983
|
-
const tableCount = this.getTableCount();
|
|
984
|
-
const imageCount = this.imageManager.getImageCount();
|
|
985
|
-
const estimatedXml = (paragraphCount * 200) + (tableCount * 1000) + 50000;
|
|
986
|
-
const imageBytes = this.imageManager.getTotalSize();
|
|
987
|
-
const totalBytes = estimatedXml + imageBytes;
|
|
988
|
-
const totalMB = totalBytes / (1024 * 1024);
|
|
989
|
-
const WARNING_MB = 50;
|
|
990
|
-
const ERROR_MB = 100;
|
|
991
|
-
let warning;
|
|
992
|
-
if (totalMB > ERROR_MB) {
|
|
993
|
-
warning = `Document size (${totalMB.toFixed(1)}MB) exceeds recommended maximum of ${ERROR_MB}MB. ` +
|
|
994
|
-
`This may cause memory issues. Consider splitting into multiple documents or optimizing images.`;
|
|
995
|
-
}
|
|
996
|
-
else if (totalMB > WARNING_MB) {
|
|
997
|
-
warning = `Document size (${totalMB.toFixed(1)}MB) exceeds ${WARNING_MB}MB. ` +
|
|
998
|
-
`Large documents may take longer to process and use significant memory.`;
|
|
999
|
-
}
|
|
1000
|
-
return {
|
|
1001
|
-
paragraphs: paragraphCount,
|
|
1002
|
-
tables: tableCount,
|
|
1003
|
-
images: imageCount,
|
|
1004
|
-
estimatedXmlBytes: estimatedXml,
|
|
1005
|
-
imageBytes,
|
|
1006
|
-
totalEstimatedBytes: totalBytes,
|
|
1007
|
-
totalEstimatedMB: parseFloat(totalMB.toFixed(2)),
|
|
1008
|
-
warning,
|
|
1009
|
-
};
|
|
547
|
+
return this.validator.estimateSize(this.bodyElements, this.imageManager);
|
|
1010
548
|
}
|
|
1011
549
|
dispose() {
|
|
1012
550
|
this.bodyElements = [];
|
|
1013
|
-
this.
|
|
551
|
+
this.parser.clearParseErrors();
|
|
1014
552
|
this.stylesManager = StylesManager_1.StylesManager.create();
|
|
1015
553
|
this.numberingManager = NumberingManager_1.NumberingManager.create();
|
|
1016
554
|
this.imageManager.clear();
|
|
@@ -1022,31 +560,7 @@ class Document {
|
|
|
1022
560
|
this.commentManager.clear();
|
|
1023
561
|
}
|
|
1024
562
|
getSizeStats() {
|
|
1025
|
-
|
|
1026
|
-
const warnings = [];
|
|
1027
|
-
if (estimate.warning) {
|
|
1028
|
-
warnings.push(estimate.warning);
|
|
1029
|
-
}
|
|
1030
|
-
const formatBytes = (bytes) => {
|
|
1031
|
-
if (bytes < 1024)
|
|
1032
|
-
return `${bytes} B`;
|
|
1033
|
-
if (bytes < 1024 * 1024)
|
|
1034
|
-
return `${(bytes / 1024).toFixed(1)} KB`;
|
|
1035
|
-
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
1036
|
-
};
|
|
1037
|
-
return {
|
|
1038
|
-
elements: {
|
|
1039
|
-
paragraphs: estimate.paragraphs,
|
|
1040
|
-
tables: estimate.tables,
|
|
1041
|
-
images: estimate.images,
|
|
1042
|
-
},
|
|
1043
|
-
size: {
|
|
1044
|
-
xml: formatBytes(estimate.estimatedXmlBytes),
|
|
1045
|
-
images: formatBytes(estimate.imageBytes),
|
|
1046
|
-
total: formatBytes(estimate.totalEstimatedBytes),
|
|
1047
|
-
},
|
|
1048
|
-
warnings,
|
|
1049
|
-
};
|
|
563
|
+
return this.validator.getSizeStats(this.bodyElements, this.imageManager);
|
|
1050
564
|
}
|
|
1051
565
|
}
|
|
1052
566
|
exports.Document = Document;
|