docxmlater 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +69 -0
  2. package/dist/core/Document.d.ts +9 -21
  3. package/dist/core/Document.d.ts.map +1 -1
  4. package/dist/core/Document.js +65 -590
  5. package/dist/core/Document.js.map +1 -1
  6. package/dist/core/DocumentGenerator.d.ts +22 -0
  7. package/dist/core/DocumentGenerator.d.ts.map +1 -0
  8. package/dist/core/DocumentGenerator.js +161 -0
  9. package/dist/core/DocumentGenerator.js.map +1 -0
  10. package/dist/core/DocumentParser.d.ts +32 -0
  11. package/dist/core/DocumentParser.d.ts.map +1 -0
  12. package/dist/core/DocumentParser.js +404 -0
  13. package/dist/core/DocumentParser.js.map +1 -0
  14. package/dist/core/DocumentValidator.d.ts +46 -0
  15. package/dist/core/DocumentValidator.d.ts.map +1 -0
  16. package/dist/core/DocumentValidator.js +223 -0
  17. package/dist/core/DocumentValidator.js.map +1 -0
  18. package/dist/core/RelationshipManager.d.ts.map +1 -1
  19. package/dist/core/RelationshipManager.js +19 -3
  20. package/dist/core/RelationshipManager.js.map +1 -1
  21. package/dist/elements/Hyperlink.d.ts +1 -0
  22. package/dist/elements/Hyperlink.d.ts.map +1 -1
  23. package/dist/elements/Hyperlink.js +27 -2
  24. package/dist/elements/Hyperlink.js.map +1 -1
  25. package/dist/elements/Image.d.ts +5 -1
  26. package/dist/elements/Image.d.ts.map +1 -1
  27. package/dist/elements/Image.js +118 -12
  28. package/dist/elements/Image.js.map +1 -1
  29. package/dist/elements/ImageManager.d.ts +11 -3
  30. package/dist/elements/ImageManager.d.ts.map +1 -1
  31. package/dist/elements/ImageManager.js +72 -6
  32. package/dist/elements/ImageManager.js.map +1 -1
  33. package/dist/index.d.ts +3 -0
  34. package/dist/index.d.ts.map +1 -1
  35. package/dist/index.js +7 -1
  36. package/dist/index.js.map +1 -1
  37. package/dist/utils/validation.d.ts.map +1 -1
  38. package/dist/utils/validation.js +10 -1
  39. package/dist/utils/validation.js.map +1 -1
  40. package/package.json +1 -2
@@ -41,7 +41,6 @@ const Table_1 = require("../elements/Table");
41
41
  const Section_1 = require("../elements/Section");
42
42
  const ImageManager_1 = require("../elements/ImageManager");
43
43
  const HeaderFooterManager_1 = require("../elements/HeaderFooterManager");
44
- const Hyperlink_1 = require("../elements/Hyperlink");
45
44
  const TableOfContents_1 = require("../elements/TableOfContents");
46
45
  const TableOfContentsElement_1 = require("../elements/TableOfContentsElement");
47
46
  const BookmarkManager_1 = require("../elements/BookmarkManager");
@@ -49,17 +48,20 @@ const Revision_1 = require("../elements/Revision");
49
48
  const RevisionManager_1 = require("../elements/RevisionManager");
50
49
  const CommentManager_1 = require("../elements/CommentManager");
51
50
  const Run_1 = require("../elements/Run");
52
- const XMLBuilder_1 = require("../xml/XMLBuilder");
53
- const XMLParser_1 = require("../xml/XMLParser");
51
+ const Hyperlink_1 = require("../elements/Hyperlink");
54
52
  const StylesManager_1 = require("../formatting/StylesManager");
55
53
  const NumberingManager_1 = require("../formatting/NumberingManager");
56
54
  const RelationshipManager_1 = require("./RelationshipManager");
57
- class ImageRun {
55
+ const DocumentParser_1 = require("./DocumentParser");
56
+ const DocumentGenerator_1 = require("./DocumentGenerator");
57
+ const DocumentValidator_1 = require("./DocumentValidator");
58
+ class ImageRun extends Run_1.Run {
58
59
  constructor(image) {
59
- this.image = image;
60
+ super('');
61
+ this.imageElement = image;
60
62
  }
61
63
  toXML() {
62
- const drawing = this.image.toXML();
64
+ const drawing = this.imageElement.toXML();
63
65
  return {
64
66
  name: 'w:r',
65
67
  children: [drawing]
@@ -69,15 +71,25 @@ class ImageRun {
69
71
  class Document {
70
72
  constructor(zipHandler, options = {}, initDefaults = true) {
71
73
  this.bodyElements = [];
72
- this.parseErrors = [];
73
74
  this.zipHandler = zipHandler || new ZipHandler_1.ZipHandler();
74
- this.properties = options.properties || {};
75
- this.maxMemoryUsagePercent = options.maxMemoryUsagePercent ?? 80;
76
- this.strictParsing = options.strictParsing ?? false;
75
+ const strictParsing = options.strictParsing ?? false;
76
+ const memoryPercent = options.maxMemoryUsagePercent ?? 80;
77
+ this.parser = new DocumentParser_1.DocumentParser(strictParsing);
78
+ this.generator = new DocumentGenerator_1.DocumentGenerator();
79
+ this.validator = new DocumentValidator_1.DocumentValidator(memoryPercent, {
80
+ maxMemoryUsagePercent: options.maxMemoryUsagePercent,
81
+ maxRssMB: options.maxRssMB,
82
+ useAbsoluteLimit: options.useAbsoluteMemoryLimit,
83
+ });
84
+ this.properties = options.properties ? DocumentValidator_1.DocumentValidator.validateProperties(options.properties) : {};
77
85
  this.stylesManager = StylesManager_1.StylesManager.create();
78
86
  this.numberingManager = NumberingManager_1.NumberingManager.create();
79
87
  this.section = Section_1.Section.createLetter();
80
- this.imageManager = ImageManager_1.ImageManager.create();
88
+ this.imageManager = ImageManager_1.ImageManager.create({
89
+ maxImageCount: options.maxImageCount,
90
+ maxTotalImageSizeMB: options.maxTotalImageSizeMB,
91
+ maxSingleImageSizeMB: options.maxSingleImageSizeMB,
92
+ });
81
93
  this.relationshipManager = RelationshipManager_1.RelationshipManager.create();
82
94
  this.headerFooterManager = HeaderFooterManager_1.HeaderFooterManager.create();
83
95
  this.bookmarkManager = BookmarkManager_1.BookmarkManager.create();
@@ -108,346 +120,20 @@ class Document {
108
120
  return doc;
109
121
  }
110
122
  initializeRequiredFiles() {
111
- this.zipHandler.addFile(types_1.DOCX_PATHS.CONTENT_TYPES, this.generateContentTypes());
112
- this.zipHandler.addFile(types_1.DOCX_PATHS.RELS, this.generateRels());
113
- this.zipHandler.addFile(types_1.DOCX_PATHS.DOCUMENT, this.generateDocumentXml());
123
+ this.zipHandler.addFile(types_1.DOCX_PATHS.CONTENT_TYPES, this.generator.generateContentTypes());
124
+ this.zipHandler.addFile(types_1.DOCX_PATHS.RELS, this.generator.generateRels());
125
+ this.zipHandler.addFile(types_1.DOCX_PATHS.DOCUMENT, this.generator.generateDocumentXml(this.bodyElements, this.section));
114
126
  this.zipHandler.addFile('word/_rels/document.xml.rels', this.relationshipManager.generateXml());
115
127
  this.zipHandler.addFile(types_1.DOCX_PATHS.STYLES, this.stylesManager.generateStylesXml());
116
128
  this.zipHandler.addFile(types_1.DOCX_PATHS.NUMBERING, this.numberingManager.generateNumberingXml());
117
- this.zipHandler.addFile(types_1.DOCX_PATHS.CORE_PROPS, this.generateCoreProps());
118
- this.zipHandler.addFile(types_1.DOCX_PATHS.APP_PROPS, this.generateAppProps());
129
+ this.zipHandler.addFile(types_1.DOCX_PATHS.CORE_PROPS, this.generator.generateCoreProps(this.properties));
130
+ this.zipHandler.addFile(types_1.DOCX_PATHS.APP_PROPS, this.generator.generateAppProps());
119
131
  }
120
132
  async parseDocument() {
121
- const docXml = this.zipHandler.getFileAsString(types_1.DOCX_PATHS.DOCUMENT);
122
- if (!docXml) {
123
- throw new Error('Invalid document: word/document.xml not found');
124
- }
125
- this.parseRelationships();
126
- this.parseProperties();
127
- this.parseBodyElements(docXml);
128
- }
129
- parseBodyElements(docXml) {
130
- this.bodyElements = [];
131
- try {
132
- XMLParser_1.XMLParser.validateSize(docXml);
133
- }
134
- catch (error) {
135
- const err = error instanceof Error ? error : new Error(String(error));
136
- this.parseErrors.push({ element: 'document', error: err });
137
- if (this.strictParsing) {
138
- throw err;
139
- }
140
- return;
141
- }
142
- const bodyContent = XMLParser_1.XMLParser.extractBody(docXml);
143
- if (!bodyContent) {
144
- return;
145
- }
146
- const paragraphXmls = XMLParser_1.XMLParser.extractElements(bodyContent, 'w:p');
147
- for (const paraXml of paragraphXmls) {
148
- const paragraph = this.parseParagraph(paraXml);
149
- if (paragraph) {
150
- this.bodyElements.push(paragraph);
151
- }
152
- }
153
- const hasTable = bodyContent.includes('<w:tbl');
154
- if (hasTable) {
155
- const err = new Error('Document contains tables which are not yet fully supported in Phase 2. Tables will be ignored.');
156
- this.parseErrors.push({ element: 'table', error: err });
157
- if (this.strictParsing) {
158
- throw err;
159
- }
160
- }
161
- this.validateLoadedContent();
162
- }
163
- validateLoadedContent() {
164
- const paragraphs = this.bodyElements.filter((el) => el instanceof Paragraph_1.Paragraph);
165
- if (paragraphs.length === 0) {
166
- return;
167
- }
168
- let totalRuns = 0;
169
- let emptyRuns = 0;
170
- let runsWithText = 0;
171
- for (const para of paragraphs) {
172
- const runs = para.getRuns();
173
- totalRuns += runs.length;
174
- for (const run of runs) {
175
- const text = run.getText();
176
- if (text.length === 0) {
177
- emptyRuns++;
178
- }
179
- else {
180
- runsWithText++;
181
- }
182
- }
183
- }
184
- if (totalRuns > 0) {
185
- const emptyPercentage = (emptyRuns / totalRuns) * 100;
186
- if (emptyPercentage > 90 && emptyRuns > 10) {
187
- const warning = new Error(`WARNING: Document appears to be corrupted or empty. ` +
188
- `${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) have no text content. ` +
189
- `This may indicate:\n` +
190
- ` - The document was already corrupted before loading\n` +
191
- ` - Text content was stripped by another application\n` +
192
- ` - Encoding issues during document creation\n` +
193
- `Original document structure is preserved, but text may be lost.`);
194
- this.parseErrors.push({ element: 'document-validation', error: warning });
195
- console.warn(`\nDocXML Load Warning:\n${warning.message}\n`);
196
- }
197
- else if (emptyPercentage > 50 && emptyRuns > 5) {
198
- const warning = new Error(`Document has ${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) with no text. ` +
199
- `This is higher than normal and may indicate partial data loss.`);
200
- this.parseErrors.push({ element: 'document-validation', error: warning });
201
- console.warn(`\nDocXML Load Warning:\n${warning.message}\n`);
202
- }
203
- }
204
- }
205
- parseParagraph(paraXml) {
206
- try {
207
- const paragraph = new Paragraph_1.Paragraph();
208
- this.parseParagraphProperties(paraXml, paragraph);
209
- const hyperlinkXmls = XMLParser_1.XMLParser.extractElements(paraXml, 'w:hyperlink');
210
- for (const hyperlinkXml of hyperlinkXmls) {
211
- const hyperlink = this.parseHyperlink(hyperlinkXml);
212
- if (hyperlink) {
213
- paragraph.addHyperlink(hyperlink);
214
- }
215
- }
216
- let paraXmlWithoutHyperlinks = paraXml;
217
- for (const hyperlinkXml of hyperlinkXmls) {
218
- paraXmlWithoutHyperlinks = paraXmlWithoutHyperlinks.replace(hyperlinkXml, '');
219
- }
220
- const runXmls = XMLParser_1.XMLParser.extractElements(paraXmlWithoutHyperlinks, 'w:r');
221
- for (const runXml of runXmls) {
222
- const run = this.parseRun(runXml);
223
- if (run) {
224
- paragraph.addRun(run);
225
- }
226
- }
227
- return paragraph;
228
- }
229
- catch (error) {
230
- const err = error instanceof Error ? error : new Error(String(error));
231
- this.parseErrors.push({ element: 'paragraph', error: err });
232
- if (this.strictParsing) {
233
- throw new Error(`Failed to parse paragraph: ${err.message}`);
234
- }
235
- return null;
236
- }
237
- }
238
- parseParagraphProperties(paraXml, paragraph) {
239
- const pPrMatch = paraXml.match(/<w:pPr[^>]*>([\s\S]*?)<\/w:pPr>/);
240
- if (!pPrMatch || !pPrMatch[1]) {
241
- return;
242
- }
243
- const pPr = pPrMatch[1];
244
- const alignMatch = pPr.match(/<w:jc\s+w:val="([^"]+)"/);
245
- if (alignMatch && alignMatch[1]) {
246
- const alignment = alignMatch[1];
247
- paragraph.setAlignment(alignment);
248
- }
249
- const styleMatch = pPr.match(/<w:pStyle\s+w:val="([^"]+)"/);
250
- if (styleMatch && styleMatch[1]) {
251
- paragraph.setStyle(styleMatch[1]);
252
- }
253
- const indMatch = pPr.match(/<w:ind([^>]+)\/>/);
254
- if (indMatch && indMatch[1]) {
255
- const indStr = indMatch[1];
256
- const leftMatch = indStr.match(/w:left="(\d+)"/);
257
- const rightMatch = indStr.match(/w:right="(\d+)"/);
258
- const firstLineMatch = indStr.match(/w:firstLine="(\d+)"/);
259
- if (leftMatch && leftMatch[1]) {
260
- paragraph.setLeftIndent(parseInt(leftMatch[1], 10));
261
- }
262
- if (rightMatch && rightMatch[1]) {
263
- paragraph.setRightIndent(parseInt(rightMatch[1], 10));
264
- }
265
- if (firstLineMatch && firstLineMatch[1]) {
266
- paragraph.setFirstLineIndent(parseInt(firstLineMatch[1], 10));
267
- }
268
- }
269
- const spacingMatch = pPr.match(/<w:spacing([^>]+)\/>/);
270
- if (spacingMatch && spacingMatch[1]) {
271
- const spacingStr = spacingMatch[1];
272
- const beforeMatch = spacingStr.match(/w:before="(\d+)"/);
273
- const afterMatch = spacingStr.match(/w:after="(\d+)"/);
274
- const lineMatch = spacingStr.match(/w:line="(\d+)"/);
275
- if (beforeMatch && beforeMatch[1]) {
276
- paragraph.setSpaceBefore(parseInt(beforeMatch[1], 10));
277
- }
278
- if (afterMatch && afterMatch[1]) {
279
- paragraph.setSpaceAfter(parseInt(afterMatch[1], 10));
280
- }
281
- if (lineMatch && lineMatch[1]) {
282
- const lineRule = spacingStr.match(/w:lineRule="([^"]+)"/);
283
- paragraph.setLineSpacing(parseInt(lineMatch[1], 10), lineRule && lineRule[1] ? lineRule[1] : undefined);
284
- }
285
- }
286
- if (pPr.includes('<w:keepNext'))
287
- paragraph.setKeepNext(true);
288
- if (pPr.includes('<w:keepLines'))
289
- paragraph.setKeepLines(true);
290
- if (pPr.includes('<w:pageBreakBefore'))
291
- paragraph.setPageBreakBefore(true);
292
- }
293
- parseRun(runXml) {
294
- try {
295
- const text = XMLBuilder_1.XMLBuilder.unescapeXml(XMLParser_1.XMLParser.extractText(runXml));
296
- const run = new Run_1.Run(text);
297
- this.parseRunProperties(runXml, run);
298
- return run;
299
- }
300
- catch (error) {
301
- const err = error instanceof Error ? error : new Error(String(error));
302
- this.parseErrors.push({ element: 'run', error: err });
303
- if (this.strictParsing) {
304
- throw new Error(`Failed to parse run: ${err.message}`);
305
- }
306
- return null;
307
- }
308
- }
309
- parseRunProperties(runXml, run) {
310
- const rPrMatch = runXml.match(/<w:rPr[^>]*>([\s\S]*?)<\/w:rPr>/);
311
- if (!rPrMatch || !rPrMatch[1]) {
312
- return;
313
- }
314
- const rPr = rPrMatch[1];
315
- if (rPr.includes('<w:b/>') || rPr.includes('<w:b ')) {
316
- run.setBold(true);
317
- }
318
- if (rPr.includes('<w:i/>') || rPr.includes('<w:i ')) {
319
- run.setItalic(true);
320
- }
321
- const underlineMatch = rPr.match(/<w:u\s+w:val="([^"]+)"/);
322
- if (underlineMatch && underlineMatch[1]) {
323
- const underlineStyle = underlineMatch[1];
324
- run.setUnderline(underlineStyle);
325
- }
326
- else if (rPr.includes('<w:u/>')) {
327
- run.setUnderline(true);
328
- }
329
- if (rPr.includes('<w:strike/>') || rPr.includes('<w:strike ')) {
330
- run.setStrike(true);
331
- }
332
- const vertAlignMatch = rPr.match(/<w:vertAlign\s+w:val="([^"]+)"/);
333
- if (vertAlignMatch && vertAlignMatch[1]) {
334
- if (vertAlignMatch[1] === 'subscript') {
335
- run.setSubscript(true);
336
- }
337
- else if (vertAlignMatch[1] === 'superscript') {
338
- run.setSuperscript(true);
339
- }
340
- }
341
- const fontMatch = rPr.match(/<w:rFonts[^>]+w:ascii="([^"]+)"/);
342
- if (fontMatch && fontMatch[1]) {
343
- run.setFont(fontMatch[1]);
344
- }
345
- const sizeMatch = rPr.match(/<w:sz\s+w:val="(\d+)"/);
346
- if (sizeMatch && sizeMatch[1]) {
347
- const halfPoints = parseInt(sizeMatch[1], 10);
348
- run.setSize(halfPoints / 2);
349
- }
350
- const colorMatch = rPr.match(/<w:color\s+w:val="([^"]+)"/);
351
- if (colorMatch && colorMatch[1]) {
352
- run.setColor(colorMatch[1]);
353
- }
354
- const highlightMatch = rPr.match(/<w:highlight\s+w:val="([^"]+)"/);
355
- if (highlightMatch && highlightMatch[1]) {
356
- const highlightColor = highlightMatch[1];
357
- run.setHighlight(highlightColor);
358
- }
359
- if (rPr.includes('<w:smallCaps/>') || rPr.includes('<w:smallCaps ')) {
360
- run.setSmallCaps(true);
361
- }
362
- if (rPr.includes('<w:caps/>') || rPr.includes('<w:caps ')) {
363
- run.setAllCaps(true);
364
- }
365
- }
366
- parseHyperlink(hyperlinkXml) {
367
- try {
368
- const relationshipId = XMLParser_1.XMLParser.extractAttribute(hyperlinkXml, 'r:id');
369
- const anchor = XMLParser_1.XMLParser.extractAttribute(hyperlinkXml, 'w:anchor');
370
- const tooltip = XMLParser_1.XMLParser.extractAttribute(hyperlinkXml, 'w:tooltip');
371
- if (!relationshipId && !anchor) {
372
- return null;
373
- }
374
- const runXmls = XMLParser_1.XMLParser.extractElements(hyperlinkXml, 'w:r');
375
- let text = '';
376
- let formatting;
377
- for (const runXml of runXmls) {
378
- text += XMLBuilder_1.XMLBuilder.unescapeXml(XMLParser_1.XMLParser.extractText(runXml));
379
- if (!formatting) {
380
- const run = this.parseRun(runXml);
381
- if (run) {
382
- formatting = run.getFormatting();
383
- }
384
- }
385
- }
386
- let url;
387
- if (relationshipId) {
388
- const relationship = this.relationshipManager.getRelationship(relationshipId);
389
- if (relationship && relationship.getType().includes('hyperlink')) {
390
- url = relationship.getTarget();
391
- }
392
- }
393
- return new Hyperlink_1.Hyperlink({
394
- url,
395
- anchor,
396
- text: text || 'Link',
397
- formatting,
398
- tooltip,
399
- relationshipId,
400
- });
401
- }
402
- catch (error) {
403
- const err = error instanceof Error ? error : new Error(String(error));
404
- this.parseErrors.push({ element: 'hyperlink', error: err });
405
- if (this.strictParsing) {
406
- throw new Error(`Failed to parse hyperlink: ${err.message}`);
407
- }
408
- return null;
409
- }
410
- }
411
- parseRelationships() {
412
- const relsPath = 'word/_rels/document.xml.rels';
413
- const relsXml = this.zipHandler.getFileAsString(relsPath);
414
- if (relsXml) {
415
- this.relationshipManager = RelationshipManager_1.RelationshipManager.fromXml(relsXml);
416
- }
417
- else {
418
- this.relationshipManager.addStyles();
419
- this.relationshipManager.addNumbering();
420
- }
421
- }
422
- parseProperties() {
423
- const coreXml = this.zipHandler.getFileAsString(types_1.DOCX_PATHS.CORE_PROPS);
424
- if (!coreXml) {
425
- return;
426
- }
427
- const extractTag = (xml, tag) => {
428
- const match = xml.match(new RegExp(`<${tag}[^>]*>([^<]*)</${tag}>`));
429
- return match && match[1] ? XMLBuilder_1.XMLBuilder.unescapeXml(match[1]) : undefined;
430
- };
431
- this.properties = {
432
- title: extractTag(coreXml, 'dc:title'),
433
- subject: extractTag(coreXml, 'dc:subject'),
434
- creator: extractTag(coreXml, 'dc:creator'),
435
- keywords: extractTag(coreXml, 'cp:keywords'),
436
- description: extractTag(coreXml, 'dc:description'),
437
- lastModifiedBy: extractTag(coreXml, 'cp:lastModifiedBy'),
438
- };
439
- const revisionStr = extractTag(coreXml, 'cp:revision');
440
- if (revisionStr) {
441
- this.properties.revision = parseInt(revisionStr, 10);
442
- }
443
- const createdStr = extractTag(coreXml, 'dcterms:created');
444
- if (createdStr) {
445
- this.properties.created = new Date(createdStr);
446
- }
447
- const modifiedStr = extractTag(coreXml, 'dcterms:modified');
448
- if (modifiedStr) {
449
- this.properties.modified = new Date(modifiedStr);
450
- }
133
+ const result = await this.parser.parseDocument(this.zipHandler, this.relationshipManager);
134
+ this.bodyElements = result.bodyElements;
135
+ this.properties = result.properties;
136
+ this.relationshipManager = result.relationshipManager;
451
137
  }
452
138
  addParagraph(paragraph) {
453
139
  this.bodyElements.push(paragraph);
@@ -504,48 +190,21 @@ class Document {
504
190
  return this;
505
191
  }
506
192
  setProperties(properties) {
507
- this.properties = { ...this.properties, ...properties };
193
+ const validated = DocumentValidator_1.DocumentValidator.validateProperties(properties);
194
+ this.properties = { ...this.properties, ...validated };
508
195
  return this;
509
196
  }
510
197
  getProperties() {
511
198
  return { ...this.properties };
512
199
  }
513
- validateBeforeSave() {
514
- const paragraphs = this.getParagraphs();
515
- if (paragraphs.length === 0) {
516
- console.warn('\nDocXML Save Warning:\n' +
517
- 'Document has no paragraphs. You are saving an empty document.\n');
518
- return;
519
- }
520
- let totalRuns = 0;
521
- let emptyRuns = 0;
522
- for (const para of paragraphs) {
523
- const runs = para.getRuns();
524
- totalRuns += runs.length;
525
- for (const run of runs) {
526
- if (run.getText().length === 0) {
527
- emptyRuns++;
528
- }
529
- }
530
- }
531
- if (totalRuns > 0) {
532
- const emptyPercentage = (emptyRuns / totalRuns) * 100;
533
- if (emptyPercentage > 90 && emptyRuns > 10) {
534
- console.warn('\nDocXML Save Warning:\n' +
535
- `You are about to save a document where ${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) are empty.\n` +
536
- 'This may result in a document with no visible text content.\n' +
537
- 'If this is unintentional, please review the document before saving.\n');
538
- }
539
- }
540
- }
541
200
  async save(filePath) {
542
201
  const tempPath = `${filePath}.tmp.${Date.now()}`;
543
202
  try {
544
- this.validateBeforeSave();
545
- this.checkMemoryThreshold();
203
+ this.validator.validateBeforeSave(this.bodyElements);
204
+ this.validator.checkMemoryThreshold();
546
205
  await this.imageManager.loadAllImageData();
547
- this.checkMemoryThreshold();
548
- const sizeInfo = this.estimateSize();
206
+ this.validator.checkMemoryThreshold();
207
+ const sizeInfo = this.validator.estimateSize(this.bodyElements, this.imageManager);
549
208
  if (sizeInfo.warning) {
550
209
  console.warn(`DocXML Warning: ${sizeInfo.warning}`);
551
210
  }
@@ -579,11 +238,11 @@ class Document {
579
238
  }
580
239
  async toBuffer() {
581
240
  try {
582
- this.validateBeforeSave();
583
- this.checkMemoryThreshold();
241
+ this.validator.validateBeforeSave(this.bodyElements);
242
+ this.validator.checkMemoryThreshold();
584
243
  await this.imageManager.loadAllImageData();
585
- this.checkMemoryThreshold();
586
- const sizeInfo = this.estimateSize();
244
+ this.validator.checkMemoryThreshold();
245
+ const sizeInfo = this.validator.estimateSize(this.bodyElements, this.imageManager);
587
246
  if (sizeInfo.warning) {
588
247
  console.warn(`DocXML Warning: ${sizeInfo.warning}`);
589
248
  }
@@ -605,11 +264,11 @@ class Document {
605
264
  }
606
265
  }
607
266
  updateDocumentXml() {
608
- const xml = this.generateDocumentXml();
267
+ const xml = this.generator.generateDocumentXml(this.bodyElements, this.section);
609
268
  this.zipHandler.updateFile(types_1.DOCX_PATHS.DOCUMENT, xml);
610
269
  }
611
270
  updateCoreProps() {
612
- const xml = this.generateCoreProps();
271
+ const xml = this.generator.generateCoreProps(this.properties);
613
272
  this.zipHandler.updateFile(types_1.DOCX_PATHS.CORE_PROPS, xml);
614
273
  }
615
274
  updateStylesXml() {
@@ -620,78 +279,6 @@ class Document {
620
279
  const xml = this.numberingManager.generateNumberingXml();
621
280
  this.zipHandler.updateFile(types_1.DOCX_PATHS.NUMBERING, xml);
622
281
  }
623
- generateContentTypes() {
624
- return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
625
- <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
626
- <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
627
- <Default Extension="xml" ContentType="application/xml"/>
628
- <Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
629
- <Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/>
630
- <Override PartName="/word/numbering.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"/>
631
- <Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>
632
- <Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>
633
- </Types>`;
634
- }
635
- generateRels() {
636
- return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
637
- <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
638
- <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
639
- <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/>
640
- <Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/>
641
- </Relationships>`;
642
- }
643
- generateDocumentXml() {
644
- const bodyXmls = [];
645
- for (const element of this.bodyElements) {
646
- const xml = element.toXML();
647
- if (Array.isArray(xml)) {
648
- bodyXmls.push(...xml);
649
- }
650
- else {
651
- bodyXmls.push(xml);
652
- }
653
- }
654
- bodyXmls.push(this.section.toXML());
655
- return XMLBuilder_1.XMLBuilder.createDocument(bodyXmls);
656
- }
657
- generateCoreProps() {
658
- const now = new Date();
659
- const created = this.properties.created || now;
660
- const modified = this.properties.modified || now;
661
- const formatDate = (date) => {
662
- return date.toISOString();
663
- };
664
- return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
665
- <cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
666
- xmlns:dc="http://purl.org/dc/elements/1.1/"
667
- xmlns:dcterms="http://purl.org/dc/terms/"
668
- xmlns:dcmitype="http://purl.org/dc/dcmitype/"
669
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
670
- <dc:title>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.title || '')}</dc:title>
671
- <dc:subject>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.subject || '')}</dc:subject>
672
- <dc:creator>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.creator || 'DocXML')}</dc:creator>
673
- <cp:keywords>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.keywords || '')}</cp:keywords>
674
- <dc:description>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.description || '')}</dc:description>
675
- <cp:lastModifiedBy>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.lastModifiedBy || this.properties.creator || 'DocXML')}</cp:lastModifiedBy>
676
- <cp:revision>${this.properties.revision || 1}</cp:revision>
677
- <dcterms:created xsi:type="dcterms:W3CDTF">${formatDate(created)}</dcterms:created>
678
- <dcterms:modified xsi:type="dcterms:W3CDTF">${formatDate(modified)}</dcterms:modified>
679
- </cp:coreProperties>`;
680
- }
681
- generateAppProps() {
682
- return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
683
- <Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"
684
- xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
685
- <Application>DocXML</Application>
686
- <DocSecurity>0</DocSecurity>
687
- <ScaleCrop>false</ScaleCrop>
688
- <Company></Company>
689
- <LinksUpToDate>false</LinksUpToDate>
690
- <SharedDoc>false</SharedDoc>
691
- <HyperlinksChanged>false</HyperlinksChanged>
692
- <AppVersion>0.1.0</AppVersion>
693
- </Properties>`;
694
- }
695
282
  getStylesManager() {
696
283
  return this.stylesManager;
697
284
  }
@@ -800,38 +387,7 @@ class Document {
800
387
  return this.relationshipManager;
801
388
  }
802
389
  processHyperlinks() {
803
- const paragraphs = this.getParagraphs();
804
- const headers = this.headerFooterManager.getAllHeaders();
805
- const footers = this.headerFooterManager.getAllFooters();
806
- for (const header of headers) {
807
- for (const element of header.header.getElements()) {
808
- if (element instanceof Paragraph_1.Paragraph) {
809
- this.processHyperlinksInParagraph(element);
810
- }
811
- }
812
- }
813
- for (const footer of footers) {
814
- for (const element of footer.footer.getElements()) {
815
- if (element instanceof Paragraph_1.Paragraph) {
816
- this.processHyperlinksInParagraph(element);
817
- }
818
- }
819
- }
820
- for (const para of paragraphs) {
821
- this.processHyperlinksInParagraph(para);
822
- }
823
- }
824
- processHyperlinksInParagraph(paragraph) {
825
- const content = paragraph.getContent();
826
- for (const item of content) {
827
- if (item instanceof Hyperlink_1.Hyperlink && item.isExternal() && !item.getRelationshipId()) {
828
- const url = item.getUrl();
829
- if (url) {
830
- const relationship = this.relationshipManager.addHyperlink(url);
831
- item.setRelationshipId(relationship.getId());
832
- }
833
- }
834
- }
390
+ this.generator.processHyperlinks(this.bodyElements, this.headerFooterManager, this.relationshipManager);
835
391
  }
836
392
  saveImages() {
837
393
  const images = this.imageManager.getAllImages();
@@ -871,43 +427,9 @@ class Document {
871
427
  }
872
428
  }
873
429
  updateContentTypesWithImagesHeadersFootersAndComments() {
874
- const contentTypes = this.generateContentTypesWithImagesHeadersFootersAndComments();
430
+ const contentTypes = this.generator.generateContentTypesWithImagesHeadersFootersAndComments(this.imageManager, this.headerFooterManager, this.commentManager);
875
431
  this.zipHandler.updateFile(types_1.DOCX_PATHS.CONTENT_TYPES, contentTypes);
876
432
  }
877
- generateContentTypesWithImagesHeadersFootersAndComments() {
878
- const images = this.imageManager.getAllImages();
879
- const headers = this.headerFooterManager.getAllHeaders();
880
- const footers = this.headerFooterManager.getAllFooters();
881
- const hasComments = this.commentManager.getCount() > 0;
882
- const extensions = new Set();
883
- for (const entry of images) {
884
- extensions.add(entry.image.getExtension());
885
- }
886
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
887
- xml += '<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">\n';
888
- xml += ' <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>\n';
889
- xml += ' <Default Extension="xml" ContentType="application/xml"/>\n';
890
- for (const ext of extensions) {
891
- const mimeType = ImageManager_1.ImageManager.getMimeType(ext);
892
- xml += ` <Default Extension="${ext}" ContentType="${mimeType}"/>\n`;
893
- }
894
- xml += ' <Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>\n';
895
- xml += ' <Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/>\n';
896
- xml += ' <Override PartName="/word/numbering.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"/>\n';
897
- for (const entry of headers) {
898
- xml += ` <Override PartName="/word/${entry.filename}" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/>\n`;
899
- }
900
- for (const entry of footers) {
901
- xml += ` <Override PartName="/word/${entry.filename}" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/>\n`;
902
- }
903
- if (hasComments) {
904
- xml += ' <Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>\n';
905
- }
906
- xml += ' <Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>\n';
907
- xml += ' <Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>\n';
908
- xml += '</Types>';
909
- return xml;
910
- }
911
433
  getBookmarkManager() {
912
434
  return this.bookmarkManager;
913
435
  }
@@ -1020,53 +542,30 @@ class Document {
1020
542
  return this.revisionManager.getAllDeletions();
1021
543
  }
1022
544
  getParseWarnings() {
1023
- return [...this.parseErrors];
1024
- }
1025
- checkMemoryThreshold() {
1026
- const { heapUsed, heapTotal } = process.memoryUsage();
1027
- const usagePercent = (heapUsed / heapTotal) * 100;
1028
- if (usagePercent > this.maxMemoryUsagePercent) {
1029
- throw new Error(`Memory usage critical (${usagePercent.toFixed(1)}% of ${(heapTotal / 1024 / 1024).toFixed(0)}MB heap). ` +
1030
- `Cannot process document safely. Consider:\n` +
1031
- `- Reducing document size\n` +
1032
- `- Optimizing/compressing images\n` +
1033
- `- Splitting into multiple documents\n` +
1034
- `- Increasing Node.js heap size (--max-old-space-size)`);
545
+ return this.parser.getParseErrors();
546
+ }
547
+ updateHyperlinkUrls(urlMap) {
548
+ let updatedCount = 0;
549
+ for (const para of this.getParagraphs()) {
550
+ for (const content of para.getContent()) {
551
+ if (content instanceof Hyperlink_1.Hyperlink && content.isExternal()) {
552
+ const currentUrl = content.getUrl();
553
+ if (currentUrl && urlMap.has(currentUrl)) {
554
+ const newUrl = urlMap.get(currentUrl);
555
+ content.setUrl(newUrl);
556
+ updatedCount++;
557
+ }
558
+ }
559
+ }
1035
560
  }
561
+ return updatedCount;
1036
562
  }
1037
563
  estimateSize() {
1038
- const paragraphCount = this.getParagraphCount();
1039
- const tableCount = this.getTableCount();
1040
- const imageCount = this.imageManager.getImageCount();
1041
- const estimatedXml = (paragraphCount * 200) + (tableCount * 1000) + 50000;
1042
- const imageBytes = this.imageManager.getTotalSize();
1043
- const totalBytes = estimatedXml + imageBytes;
1044
- const totalMB = totalBytes / (1024 * 1024);
1045
- const WARNING_MB = 50;
1046
- const ERROR_MB = 100;
1047
- let warning;
1048
- if (totalMB > ERROR_MB) {
1049
- warning = `Document size (${totalMB.toFixed(1)}MB) exceeds recommended maximum of ${ERROR_MB}MB. ` +
1050
- `This may cause memory issues. Consider splitting into multiple documents or optimizing images.`;
1051
- }
1052
- else if (totalMB > WARNING_MB) {
1053
- warning = `Document size (${totalMB.toFixed(1)}MB) exceeds ${WARNING_MB}MB. ` +
1054
- `Large documents may take longer to process and use significant memory.`;
1055
- }
1056
- return {
1057
- paragraphs: paragraphCount,
1058
- tables: tableCount,
1059
- images: imageCount,
1060
- estimatedXmlBytes: estimatedXml,
1061
- imageBytes,
1062
- totalEstimatedBytes: totalBytes,
1063
- totalEstimatedMB: parseFloat(totalMB.toFixed(2)),
1064
- warning,
1065
- };
564
+ return this.validator.estimateSize(this.bodyElements, this.imageManager);
1066
565
  }
1067
566
  dispose() {
1068
567
  this.bodyElements = [];
1069
- this.parseErrors = [];
568
+ this.parser.clearParseErrors();
1070
569
  this.stylesManager = StylesManager_1.StylesManager.create();
1071
570
  this.numberingManager = NumberingManager_1.NumberingManager.create();
1072
571
  this.imageManager.clear();
@@ -1078,31 +577,7 @@ class Document {
1078
577
  this.commentManager.clear();
1079
578
  }
1080
579
  getSizeStats() {
1081
- const estimate = this.estimateSize();
1082
- const warnings = [];
1083
- if (estimate.warning) {
1084
- warnings.push(estimate.warning);
1085
- }
1086
- const formatBytes = (bytes) => {
1087
- if (bytes < 1024)
1088
- return `${bytes} B`;
1089
- if (bytes < 1024 * 1024)
1090
- return `${(bytes / 1024).toFixed(1)} KB`;
1091
- return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
1092
- };
1093
- return {
1094
- elements: {
1095
- paragraphs: estimate.paragraphs,
1096
- tables: estimate.tables,
1097
- images: estimate.images,
1098
- },
1099
- size: {
1100
- xml: formatBytes(estimate.estimatedXmlBytes),
1101
- images: formatBytes(estimate.imageBytes),
1102
- total: formatBytes(estimate.totalEstimatedBytes),
1103
- },
1104
- warnings,
1105
- };
580
+ return this.validator.getSizeStats(this.bodyElements, this.imageManager);
1106
581
  }
1107
582
  }
1108
583
  exports.Document = Document;