docxmlater 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +22 -21
  2. package/dist/core/Document.d.ts +8 -20
  3. package/dist/core/Document.d.ts.map +1 -1
  4. package/dist/core/Document.js +49 -535
  5. package/dist/core/Document.js.map +1 -1
  6. package/dist/core/DocumentGenerator.d.ts +22 -0
  7. package/dist/core/DocumentGenerator.d.ts.map +1 -0
  8. package/dist/core/DocumentGenerator.js +161 -0
  9. package/dist/core/DocumentGenerator.js.map +1 -0
  10. package/dist/core/DocumentParser.d.ts +32 -0
  11. package/dist/core/DocumentParser.d.ts.map +1 -0
  12. package/dist/core/DocumentParser.js +404 -0
  13. package/dist/core/DocumentParser.js.map +1 -0
  14. package/dist/core/DocumentValidator.d.ts +46 -0
  15. package/dist/core/DocumentValidator.d.ts.map +1 -0
  16. package/dist/core/DocumentValidator.js +223 -0
  17. package/dist/core/DocumentValidator.js.map +1 -0
  18. package/dist/core/RelationshipManager.d.ts.map +1 -1
  19. package/dist/core/RelationshipManager.js +19 -3
  20. package/dist/core/RelationshipManager.js.map +1 -1
  21. package/dist/elements/Image.d.ts +5 -1
  22. package/dist/elements/Image.d.ts.map +1 -1
  23. package/dist/elements/Image.js +118 -12
  24. package/dist/elements/Image.js.map +1 -1
  25. package/dist/elements/ImageManager.d.ts +11 -3
  26. package/dist/elements/ImageManager.d.ts.map +1 -1
  27. package/dist/elements/ImageManager.js +72 -6
  28. package/dist/elements/ImageManager.js.map +1 -1
  29. package/dist/index.d.ts +3 -0
  30. package/dist/index.d.ts.map +1 -1
  31. package/dist/index.js +7 -1
  32. package/dist/index.js.map +1 -1
  33. package/dist/utils/validation.d.ts.map +1 -1
  34. package/dist/utils/validation.js +10 -1
  35. package/dist/utils/validation.js.map +1 -1
  36. package/package.json +1 -1
@@ -41,7 +41,6 @@ const Table_1 = require("../elements/Table");
41
41
  const Section_1 = require("../elements/Section");
42
42
  const ImageManager_1 = require("../elements/ImageManager");
43
43
  const HeaderFooterManager_1 = require("../elements/HeaderFooterManager");
44
- const Hyperlink_1 = require("../elements/Hyperlink");
45
44
  const TableOfContents_1 = require("../elements/TableOfContents");
46
45
  const TableOfContentsElement_1 = require("../elements/TableOfContentsElement");
47
46
  const BookmarkManager_1 = require("../elements/BookmarkManager");
@@ -49,17 +48,19 @@ const Revision_1 = require("../elements/Revision");
49
48
  const RevisionManager_1 = require("../elements/RevisionManager");
50
49
  const CommentManager_1 = require("../elements/CommentManager");
51
50
  const Run_1 = require("../elements/Run");
52
- const XMLBuilder_1 = require("../xml/XMLBuilder");
53
- const XMLParser_1 = require("../xml/XMLParser");
54
51
  const StylesManager_1 = require("../formatting/StylesManager");
55
52
  const NumberingManager_1 = require("../formatting/NumberingManager");
56
53
  const RelationshipManager_1 = require("./RelationshipManager");
57
- class ImageRun {
54
+ const DocumentParser_1 = require("./DocumentParser");
55
+ const DocumentGenerator_1 = require("./DocumentGenerator");
56
+ const DocumentValidator_1 = require("./DocumentValidator");
57
+ class ImageRun extends Run_1.Run {
58
58
  constructor(image) {
59
- this.image = image;
59
+ super('');
60
+ this.imageElement = image;
60
61
  }
61
62
  toXML() {
62
- const drawing = this.image.toXML();
63
+ const drawing = this.imageElement.toXML();
63
64
  return {
64
65
  name: 'w:r',
65
66
  children: [drawing]
@@ -69,15 +70,25 @@ class ImageRun {
69
70
  class Document {
70
71
  constructor(zipHandler, options = {}, initDefaults = true) {
71
72
  this.bodyElements = [];
72
- this.parseErrors = [];
73
73
  this.zipHandler = zipHandler || new ZipHandler_1.ZipHandler();
74
- this.properties = options.properties || {};
75
- this.maxMemoryUsagePercent = options.maxMemoryUsagePercent ?? 80;
76
- this.strictParsing = options.strictParsing ?? false;
74
+ const strictParsing = options.strictParsing ?? false;
75
+ const memoryPercent = options.maxMemoryUsagePercent ?? 80;
76
+ this.parser = new DocumentParser_1.DocumentParser(strictParsing);
77
+ this.generator = new DocumentGenerator_1.DocumentGenerator();
78
+ this.validator = new DocumentValidator_1.DocumentValidator(memoryPercent, {
79
+ maxMemoryUsagePercent: options.maxMemoryUsagePercent,
80
+ maxRssMB: options.maxRssMB,
81
+ useAbsoluteLimit: options.useAbsoluteMemoryLimit,
82
+ });
83
+ this.properties = options.properties ? DocumentValidator_1.DocumentValidator.validateProperties(options.properties) : {};
77
84
  this.stylesManager = StylesManager_1.StylesManager.create();
78
85
  this.numberingManager = NumberingManager_1.NumberingManager.create();
79
86
  this.section = Section_1.Section.createLetter();
80
- this.imageManager = ImageManager_1.ImageManager.create();
87
+ this.imageManager = ImageManager_1.ImageManager.create({
88
+ maxImageCount: options.maxImageCount,
89
+ maxTotalImageSizeMB: options.maxTotalImageSizeMB,
90
+ maxSingleImageSizeMB: options.maxSingleImageSizeMB,
91
+ });
81
92
  this.relationshipManager = RelationshipManager_1.RelationshipManager.create();
82
93
  this.headerFooterManager = HeaderFooterManager_1.HeaderFooterManager.create();
83
94
  this.bookmarkManager = BookmarkManager_1.BookmarkManager.create();
@@ -108,290 +119,20 @@ class Document {
108
119
  return doc;
109
120
  }
110
121
  initializeRequiredFiles() {
111
- this.zipHandler.addFile(types_1.DOCX_PATHS.CONTENT_TYPES, this.generateContentTypes());
112
- this.zipHandler.addFile(types_1.DOCX_PATHS.RELS, this.generateRels());
113
- this.zipHandler.addFile(types_1.DOCX_PATHS.DOCUMENT, this.generateDocumentXml());
122
+ this.zipHandler.addFile(types_1.DOCX_PATHS.CONTENT_TYPES, this.generator.generateContentTypes());
123
+ this.zipHandler.addFile(types_1.DOCX_PATHS.RELS, this.generator.generateRels());
124
+ this.zipHandler.addFile(types_1.DOCX_PATHS.DOCUMENT, this.generator.generateDocumentXml(this.bodyElements, this.section));
114
125
  this.zipHandler.addFile('word/_rels/document.xml.rels', this.relationshipManager.generateXml());
115
126
  this.zipHandler.addFile(types_1.DOCX_PATHS.STYLES, this.stylesManager.generateStylesXml());
116
127
  this.zipHandler.addFile(types_1.DOCX_PATHS.NUMBERING, this.numberingManager.generateNumberingXml());
117
- this.zipHandler.addFile(types_1.DOCX_PATHS.CORE_PROPS, this.generateCoreProps());
118
- this.zipHandler.addFile(types_1.DOCX_PATHS.APP_PROPS, this.generateAppProps());
128
+ this.zipHandler.addFile(types_1.DOCX_PATHS.CORE_PROPS, this.generator.generateCoreProps(this.properties));
129
+ this.zipHandler.addFile(types_1.DOCX_PATHS.APP_PROPS, this.generator.generateAppProps());
119
130
  }
120
131
  async parseDocument() {
121
- const docXml = this.zipHandler.getFileAsString(types_1.DOCX_PATHS.DOCUMENT);
122
- if (!docXml) {
123
- throw new Error('Invalid document: word/document.xml not found');
124
- }
125
- this.parseRelationships();
126
- this.parseProperties();
127
- this.parseBodyElements(docXml);
128
- }
129
- parseBodyElements(docXml) {
130
- this.bodyElements = [];
131
- try {
132
- XMLParser_1.XMLParser.validateSize(docXml);
133
- }
134
- catch (error) {
135
- const err = error instanceof Error ? error : new Error(String(error));
136
- this.parseErrors.push({ element: 'document', error: err });
137
- if (this.strictParsing) {
138
- throw err;
139
- }
140
- return;
141
- }
142
- const bodyContent = XMLParser_1.XMLParser.extractBody(docXml);
143
- if (!bodyContent) {
144
- return;
145
- }
146
- const paragraphXmls = XMLParser_1.XMLParser.extractElements(bodyContent, 'w:p');
147
- for (const paraXml of paragraphXmls) {
148
- const paragraph = this.parseParagraph(paraXml);
149
- if (paragraph) {
150
- this.bodyElements.push(paragraph);
151
- }
152
- }
153
- const hasTable = bodyContent.includes('<w:tbl');
154
- if (hasTable) {
155
- const err = new Error('Document contains tables which are not yet fully supported in Phase 2. Tables will be ignored.');
156
- this.parseErrors.push({ element: 'table', error: err });
157
- if (this.strictParsing) {
158
- throw err;
159
- }
160
- }
161
- this.validateLoadedContent();
162
- }
163
- validateLoadedContent() {
164
- const paragraphs = this.bodyElements.filter((el) => el instanceof Paragraph_1.Paragraph);
165
- if (paragraphs.length === 0) {
166
- return;
167
- }
168
- let totalRuns = 0;
169
- let emptyRuns = 0;
170
- let runsWithText = 0;
171
- for (const para of paragraphs) {
172
- const runs = para.getRuns();
173
- totalRuns += runs.length;
174
- for (const run of runs) {
175
- const text = run.getText();
176
- if (text.length === 0) {
177
- emptyRuns++;
178
- }
179
- else {
180
- runsWithText++;
181
- }
182
- }
183
- }
184
- if (totalRuns > 0) {
185
- const emptyPercentage = (emptyRuns / totalRuns) * 100;
186
- if (emptyPercentage > 90 && emptyRuns > 10) {
187
- const warning = new Error(`WARNING: Document appears to be corrupted or empty. ` +
188
- `${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) have no text content. ` +
189
- `This may indicate:\n` +
190
- ` - The document was already corrupted before loading\n` +
191
- ` - Text content was stripped by another application\n` +
192
- ` - Encoding issues during document creation\n` +
193
- `Original document structure is preserved, but text may be lost.`);
194
- this.parseErrors.push({ element: 'document-validation', error: warning });
195
- console.warn(`\nDocXML Load Warning:\n${warning.message}\n`);
196
- }
197
- else if (emptyPercentage > 50 && emptyRuns > 5) {
198
- const warning = new Error(`Document has ${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) with no text. ` +
199
- `This is higher than normal and may indicate partial data loss.`);
200
- this.parseErrors.push({ element: 'document-validation', error: warning });
201
- console.warn(`\nDocXML Load Warning:\n${warning.message}\n`);
202
- }
203
- }
204
- }
205
- parseParagraph(paraXml) {
206
- try {
207
- const paragraph = new Paragraph_1.Paragraph();
208
- this.parseParagraphProperties(paraXml, paragraph);
209
- const runXmls = XMLParser_1.XMLParser.extractElements(paraXml, 'w:r');
210
- for (const runXml of runXmls) {
211
- const run = this.parseRun(runXml);
212
- if (run) {
213
- paragraph.addRun(run);
214
- }
215
- }
216
- return paragraph;
217
- }
218
- catch (error) {
219
- const err = error instanceof Error ? error : new Error(String(error));
220
- this.parseErrors.push({ element: 'paragraph', error: err });
221
- if (this.strictParsing) {
222
- throw new Error(`Failed to parse paragraph: ${err.message}`);
223
- }
224
- return null;
225
- }
226
- }
227
- parseParagraphProperties(paraXml, paragraph) {
228
- const pPrMatch = paraXml.match(/<w:pPr[^>]*>([\s\S]*?)<\/w:pPr>/);
229
- if (!pPrMatch || !pPrMatch[1]) {
230
- return;
231
- }
232
- const pPr = pPrMatch[1];
233
- const alignMatch = pPr.match(/<w:jc\s+w:val="([^"]+)"/);
234
- if (alignMatch && alignMatch[1]) {
235
- const alignment = alignMatch[1];
236
- paragraph.setAlignment(alignment);
237
- }
238
- const styleMatch = pPr.match(/<w:pStyle\s+w:val="([^"]+)"/);
239
- if (styleMatch && styleMatch[1]) {
240
- paragraph.setStyle(styleMatch[1]);
241
- }
242
- const indMatch = pPr.match(/<w:ind([^>]+)\/>/);
243
- if (indMatch && indMatch[1]) {
244
- const indStr = indMatch[1];
245
- const leftMatch = indStr.match(/w:left="(\d+)"/);
246
- const rightMatch = indStr.match(/w:right="(\d+)"/);
247
- const firstLineMatch = indStr.match(/w:firstLine="(\d+)"/);
248
- if (leftMatch && leftMatch[1]) {
249
- paragraph.setLeftIndent(parseInt(leftMatch[1], 10));
250
- }
251
- if (rightMatch && rightMatch[1]) {
252
- paragraph.setRightIndent(parseInt(rightMatch[1], 10));
253
- }
254
- if (firstLineMatch && firstLineMatch[1]) {
255
- paragraph.setFirstLineIndent(parseInt(firstLineMatch[1], 10));
256
- }
257
- }
258
- const spacingMatch = pPr.match(/<w:spacing([^>]+)\/>/);
259
- if (spacingMatch && spacingMatch[1]) {
260
- const spacingStr = spacingMatch[1];
261
- const beforeMatch = spacingStr.match(/w:before="(\d+)"/);
262
- const afterMatch = spacingStr.match(/w:after="(\d+)"/);
263
- const lineMatch = spacingStr.match(/w:line="(\d+)"/);
264
- if (beforeMatch && beforeMatch[1]) {
265
- paragraph.setSpaceBefore(parseInt(beforeMatch[1], 10));
266
- }
267
- if (afterMatch && afterMatch[1]) {
268
- paragraph.setSpaceAfter(parseInt(afterMatch[1], 10));
269
- }
270
- if (lineMatch && lineMatch[1]) {
271
- const lineRule = spacingStr.match(/w:lineRule="([^"]+)"/);
272
- paragraph.setLineSpacing(parseInt(lineMatch[1], 10), lineRule && lineRule[1] ? lineRule[1] : undefined);
273
- }
274
- }
275
- if (pPr.includes('<w:keepNext'))
276
- paragraph.setKeepNext(true);
277
- if (pPr.includes('<w:keepLines'))
278
- paragraph.setKeepLines(true);
279
- if (pPr.includes('<w:pageBreakBefore'))
280
- paragraph.setPageBreakBefore(true);
281
- }
282
- parseRun(runXml) {
283
- try {
284
- const text = XMLBuilder_1.XMLBuilder.unescapeXml(XMLParser_1.XMLParser.extractText(runXml));
285
- const run = new Run_1.Run(text);
286
- this.parseRunProperties(runXml, run);
287
- return run;
288
- }
289
- catch (error) {
290
- const err = error instanceof Error ? error : new Error(String(error));
291
- this.parseErrors.push({ element: 'run', error: err });
292
- if (this.strictParsing) {
293
- throw new Error(`Failed to parse run: ${err.message}`);
294
- }
295
- return null;
296
- }
297
- }
298
- parseRunProperties(runXml, run) {
299
- const rPrMatch = runXml.match(/<w:rPr[^>]*>([\s\S]*?)<\/w:rPr>/);
300
- if (!rPrMatch || !rPrMatch[1]) {
301
- return;
302
- }
303
- const rPr = rPrMatch[1];
304
- if (rPr.includes('<w:b/>') || rPr.includes('<w:b ')) {
305
- run.setBold(true);
306
- }
307
- if (rPr.includes('<w:i/>') || rPr.includes('<w:i ')) {
308
- run.setItalic(true);
309
- }
310
- const underlineMatch = rPr.match(/<w:u\s+w:val="([^"]+)"/);
311
- if (underlineMatch && underlineMatch[1]) {
312
- const underlineStyle = underlineMatch[1];
313
- run.setUnderline(underlineStyle);
314
- }
315
- else if (rPr.includes('<w:u/>')) {
316
- run.setUnderline(true);
317
- }
318
- if (rPr.includes('<w:strike/>') || rPr.includes('<w:strike ')) {
319
- run.setStrike(true);
320
- }
321
- const vertAlignMatch = rPr.match(/<w:vertAlign\s+w:val="([^"]+)"/);
322
- if (vertAlignMatch && vertAlignMatch[1]) {
323
- if (vertAlignMatch[1] === 'subscript') {
324
- run.setSubscript(true);
325
- }
326
- else if (vertAlignMatch[1] === 'superscript') {
327
- run.setSuperscript(true);
328
- }
329
- }
330
- const fontMatch = rPr.match(/<w:rFonts[^>]+w:ascii="([^"]+)"/);
331
- if (fontMatch && fontMatch[1]) {
332
- run.setFont(fontMatch[1]);
333
- }
334
- const sizeMatch = rPr.match(/<w:sz\s+w:val="(\d+)"/);
335
- if (sizeMatch && sizeMatch[1]) {
336
- const halfPoints = parseInt(sizeMatch[1], 10);
337
- run.setSize(halfPoints / 2);
338
- }
339
- const colorMatch = rPr.match(/<w:color\s+w:val="([^"]+)"/);
340
- if (colorMatch && colorMatch[1]) {
341
- run.setColor(colorMatch[1]);
342
- }
343
- const highlightMatch = rPr.match(/<w:highlight\s+w:val="([^"]+)"/);
344
- if (highlightMatch && highlightMatch[1]) {
345
- const highlightColor = highlightMatch[1];
346
- run.setHighlight(highlightColor);
347
- }
348
- if (rPr.includes('<w:smallCaps/>') || rPr.includes('<w:smallCaps ')) {
349
- run.setSmallCaps(true);
350
- }
351
- if (rPr.includes('<w:caps/>') || rPr.includes('<w:caps ')) {
352
- run.setAllCaps(true);
353
- }
354
- }
355
- parseRelationships() {
356
- const relsPath = 'word/_rels/document.xml.rels';
357
- const relsXml = this.zipHandler.getFileAsString(relsPath);
358
- if (relsXml) {
359
- this.relationshipManager = RelationshipManager_1.RelationshipManager.fromXml(relsXml);
360
- }
361
- else {
362
- this.relationshipManager.addStyles();
363
- this.relationshipManager.addNumbering();
364
- }
365
- }
366
- parseProperties() {
367
- const coreXml = this.zipHandler.getFileAsString(types_1.DOCX_PATHS.CORE_PROPS);
368
- if (!coreXml) {
369
- return;
370
- }
371
- const extractTag = (xml, tag) => {
372
- const match = xml.match(new RegExp(`<${tag}[^>]*>([^<]*)</${tag}>`));
373
- return match && match[1] ? XMLBuilder_1.XMLBuilder.unescapeXml(match[1]) : undefined;
374
- };
375
- this.properties = {
376
- title: extractTag(coreXml, 'dc:title'),
377
- subject: extractTag(coreXml, 'dc:subject'),
378
- creator: extractTag(coreXml, 'dc:creator'),
379
- keywords: extractTag(coreXml, 'cp:keywords'),
380
- description: extractTag(coreXml, 'dc:description'),
381
- lastModifiedBy: extractTag(coreXml, 'cp:lastModifiedBy'),
382
- };
383
- const revisionStr = extractTag(coreXml, 'cp:revision');
384
- if (revisionStr) {
385
- this.properties.revision = parseInt(revisionStr, 10);
386
- }
387
- const createdStr = extractTag(coreXml, 'dcterms:created');
388
- if (createdStr) {
389
- this.properties.created = new Date(createdStr);
390
- }
391
- const modifiedStr = extractTag(coreXml, 'dcterms:modified');
392
- if (modifiedStr) {
393
- this.properties.modified = new Date(modifiedStr);
394
- }
132
+ const result = await this.parser.parseDocument(this.zipHandler, this.relationshipManager);
133
+ this.bodyElements = result.bodyElements;
134
+ this.properties = result.properties;
135
+ this.relationshipManager = result.relationshipManager;
395
136
  }
396
137
  addParagraph(paragraph) {
397
138
  this.bodyElements.push(paragraph);
@@ -448,48 +189,21 @@ class Document {
448
189
  return this;
449
190
  }
450
191
  setProperties(properties) {
451
- this.properties = { ...this.properties, ...properties };
192
+ const validated = DocumentValidator_1.DocumentValidator.validateProperties(properties);
193
+ this.properties = { ...this.properties, ...validated };
452
194
  return this;
453
195
  }
454
196
  getProperties() {
455
197
  return { ...this.properties };
456
198
  }
457
- validateBeforeSave() {
458
- const paragraphs = this.getParagraphs();
459
- if (paragraphs.length === 0) {
460
- console.warn('\nDocXML Save Warning:\n' +
461
- 'Document has no paragraphs. You are saving an empty document.\n');
462
- return;
463
- }
464
- let totalRuns = 0;
465
- let emptyRuns = 0;
466
- for (const para of paragraphs) {
467
- const runs = para.getRuns();
468
- totalRuns += runs.length;
469
- for (const run of runs) {
470
- if (run.getText().length === 0) {
471
- emptyRuns++;
472
- }
473
- }
474
- }
475
- if (totalRuns > 0) {
476
- const emptyPercentage = (emptyRuns / totalRuns) * 100;
477
- if (emptyPercentage > 90 && emptyRuns > 10) {
478
- console.warn('\nDocXML Save Warning:\n' +
479
- `You are about to save a document where ${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) are empty.\n` +
480
- 'This may result in a document with no visible text content.\n' +
481
- 'If this is unintentional, please review the document before saving.\n');
482
- }
483
- }
484
- }
485
199
  async save(filePath) {
486
200
  const tempPath = `${filePath}.tmp.${Date.now()}`;
487
201
  try {
488
- this.validateBeforeSave();
489
- this.checkMemoryThreshold();
202
+ this.validator.validateBeforeSave(this.bodyElements);
203
+ this.validator.checkMemoryThreshold();
490
204
  await this.imageManager.loadAllImageData();
491
- this.checkMemoryThreshold();
492
- const sizeInfo = this.estimateSize();
205
+ this.validator.checkMemoryThreshold();
206
+ const sizeInfo = this.validator.estimateSize(this.bodyElements, this.imageManager);
493
207
  if (sizeInfo.warning) {
494
208
  console.warn(`DocXML Warning: ${sizeInfo.warning}`);
495
209
  }
@@ -523,11 +237,11 @@ class Document {
523
237
  }
524
238
  async toBuffer() {
525
239
  try {
526
- this.validateBeforeSave();
527
- this.checkMemoryThreshold();
240
+ this.validator.validateBeforeSave(this.bodyElements);
241
+ this.validator.checkMemoryThreshold();
528
242
  await this.imageManager.loadAllImageData();
529
- this.checkMemoryThreshold();
530
- const sizeInfo = this.estimateSize();
243
+ this.validator.checkMemoryThreshold();
244
+ const sizeInfo = this.validator.estimateSize(this.bodyElements, this.imageManager);
531
245
  if (sizeInfo.warning) {
532
246
  console.warn(`DocXML Warning: ${sizeInfo.warning}`);
533
247
  }
@@ -549,11 +263,11 @@ class Document {
549
263
  }
550
264
  }
551
265
  updateDocumentXml() {
552
- const xml = this.generateDocumentXml();
266
+ const xml = this.generator.generateDocumentXml(this.bodyElements, this.section);
553
267
  this.zipHandler.updateFile(types_1.DOCX_PATHS.DOCUMENT, xml);
554
268
  }
555
269
  updateCoreProps() {
556
- const xml = this.generateCoreProps();
270
+ const xml = this.generator.generateCoreProps(this.properties);
557
271
  this.zipHandler.updateFile(types_1.DOCX_PATHS.CORE_PROPS, xml);
558
272
  }
559
273
  updateStylesXml() {
@@ -564,78 +278,6 @@ class Document {
564
278
  const xml = this.numberingManager.generateNumberingXml();
565
279
  this.zipHandler.updateFile(types_1.DOCX_PATHS.NUMBERING, xml);
566
280
  }
567
- generateContentTypes() {
568
- return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
569
- <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
570
- <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
571
- <Default Extension="xml" ContentType="application/xml"/>
572
- <Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
573
- <Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/>
574
- <Override PartName="/word/numbering.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"/>
575
- <Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>
576
- <Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>
577
- </Types>`;
578
- }
579
- generateRels() {
580
- return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
581
- <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
582
- <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
583
- <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/>
584
- <Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/>
585
- </Relationships>`;
586
- }
587
- generateDocumentXml() {
588
- const bodyXmls = [];
589
- for (const element of this.bodyElements) {
590
- const xml = element.toXML();
591
- if (Array.isArray(xml)) {
592
- bodyXmls.push(...xml);
593
- }
594
- else {
595
- bodyXmls.push(xml);
596
- }
597
- }
598
- bodyXmls.push(this.section.toXML());
599
- return XMLBuilder_1.XMLBuilder.createDocument(bodyXmls);
600
- }
601
- generateCoreProps() {
602
- const now = new Date();
603
- const created = this.properties.created || now;
604
- const modified = this.properties.modified || now;
605
- const formatDate = (date) => {
606
- return date.toISOString();
607
- };
608
- return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
609
- <cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
610
- xmlns:dc="http://purl.org/dc/elements/1.1/"
611
- xmlns:dcterms="http://purl.org/dc/terms/"
612
- xmlns:dcmitype="http://purl.org/dc/dcmitype/"
613
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
614
- <dc:title>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.title || '')}</dc:title>
615
- <dc:subject>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.subject || '')}</dc:subject>
616
- <dc:creator>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.creator || 'DocXML')}</dc:creator>
617
- <cp:keywords>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.keywords || '')}</cp:keywords>
618
- <dc:description>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.description || '')}</dc:description>
619
- <cp:lastModifiedBy>${XMLBuilder_1.XMLBuilder.escapeXmlText(this.properties.lastModifiedBy || this.properties.creator || 'DocXML')}</cp:lastModifiedBy>
620
- <cp:revision>${this.properties.revision || 1}</cp:revision>
621
- <dcterms:created xsi:type="dcterms:W3CDTF">${formatDate(created)}</dcterms:created>
622
- <dcterms:modified xsi:type="dcterms:W3CDTF">${formatDate(modified)}</dcterms:modified>
623
- </cp:coreProperties>`;
624
- }
625
- generateAppProps() {
626
- return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
627
- <Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"
628
- xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
629
- <Application>DocXML</Application>
630
- <DocSecurity>0</DocSecurity>
631
- <ScaleCrop>false</ScaleCrop>
632
- <Company></Company>
633
- <LinksUpToDate>false</LinksUpToDate>
634
- <SharedDoc>false</SharedDoc>
635
- <HyperlinksChanged>false</HyperlinksChanged>
636
- <AppVersion>0.1.0</AppVersion>
637
- </Properties>`;
638
- }
639
281
  getStylesManager() {
640
282
  return this.stylesManager;
641
283
  }
@@ -744,38 +386,7 @@ class Document {
744
386
  return this.relationshipManager;
745
387
  }
746
388
  processHyperlinks() {
747
- const paragraphs = this.getParagraphs();
748
- const headers = this.headerFooterManager.getAllHeaders();
749
- const footers = this.headerFooterManager.getAllFooters();
750
- for (const header of headers) {
751
- for (const element of header.header.getElements()) {
752
- if (element instanceof Paragraph_1.Paragraph) {
753
- this.processHyperlinksInParagraph(element);
754
- }
755
- }
756
- }
757
- for (const footer of footers) {
758
- for (const element of footer.footer.getElements()) {
759
- if (element instanceof Paragraph_1.Paragraph) {
760
- this.processHyperlinksInParagraph(element);
761
- }
762
- }
763
- }
764
- for (const para of paragraphs) {
765
- this.processHyperlinksInParagraph(para);
766
- }
767
- }
768
- processHyperlinksInParagraph(paragraph) {
769
- const content = paragraph.getContent();
770
- for (const item of content) {
771
- if (item instanceof Hyperlink_1.Hyperlink && item.isExternal() && !item.getRelationshipId()) {
772
- const url = item.getUrl();
773
- if (url) {
774
- const relationship = this.relationshipManager.addHyperlink(url);
775
- item.setRelationshipId(relationship.getId());
776
- }
777
- }
778
- }
389
+ this.generator.processHyperlinks(this.bodyElements, this.headerFooterManager, this.relationshipManager);
779
390
  }
780
391
  saveImages() {
781
392
  const images = this.imageManager.getAllImages();
@@ -815,43 +426,9 @@ class Document {
815
426
  }
816
427
  }
817
428
  updateContentTypesWithImagesHeadersFootersAndComments() {
818
- const contentTypes = this.generateContentTypesWithImagesHeadersFootersAndComments();
429
+ const contentTypes = this.generator.generateContentTypesWithImagesHeadersFootersAndComments(this.imageManager, this.headerFooterManager, this.commentManager);
819
430
  this.zipHandler.updateFile(types_1.DOCX_PATHS.CONTENT_TYPES, contentTypes);
820
431
  }
821
- generateContentTypesWithImagesHeadersFootersAndComments() {
822
- const images = this.imageManager.getAllImages();
823
- const headers = this.headerFooterManager.getAllHeaders();
824
- const footers = this.headerFooterManager.getAllFooters();
825
- const hasComments = this.commentManager.getCount() > 0;
826
- const extensions = new Set();
827
- for (const entry of images) {
828
- extensions.add(entry.image.getExtension());
829
- }
830
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
831
- xml += '<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">\n';
832
- xml += ' <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>\n';
833
- xml += ' <Default Extension="xml" ContentType="application/xml"/>\n';
834
- for (const ext of extensions) {
835
- const mimeType = ImageManager_1.ImageManager.getMimeType(ext);
836
- xml += ` <Default Extension="${ext}" ContentType="${mimeType}"/>\n`;
837
- }
838
- xml += ' <Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>\n';
839
- xml += ' <Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/>\n';
840
- xml += ' <Override PartName="/word/numbering.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"/>\n';
841
- for (const entry of headers) {
842
- xml += ` <Override PartName="/word/${entry.filename}" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/>\n`;
843
- }
844
- for (const entry of footers) {
845
- xml += ` <Override PartName="/word/${entry.filename}" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/>\n`;
846
- }
847
- if (hasComments) {
848
- xml += ' <Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>\n';
849
- }
850
- xml += ' <Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>\n';
851
- xml += ' <Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>\n';
852
- xml += '</Types>';
853
- return xml;
854
- }
855
432
  getBookmarkManager() {
856
433
  return this.bookmarkManager;
857
434
  }
@@ -964,53 +541,14 @@ class Document {
964
541
  return this.revisionManager.getAllDeletions();
965
542
  }
966
543
  getParseWarnings() {
967
- return [...this.parseErrors];
968
- }
969
- checkMemoryThreshold() {
970
- const { heapUsed, heapTotal } = process.memoryUsage();
971
- const usagePercent = (heapUsed / heapTotal) * 100;
972
- if (usagePercent > this.maxMemoryUsagePercent) {
973
- throw new Error(`Memory usage critical (${usagePercent.toFixed(1)}% of ${(heapTotal / 1024 / 1024).toFixed(0)}MB heap). ` +
974
- `Cannot process document safely. Consider:\n` +
975
- `- Reducing document size\n` +
976
- `- Optimizing/compressing images\n` +
977
- `- Splitting into multiple documents\n` +
978
- `- Increasing Node.js heap size (--max-old-space-size)`);
979
- }
544
+ return this.parser.getParseErrors();
980
545
  }
981
546
  estimateSize() {
982
- const paragraphCount = this.getParagraphCount();
983
- const tableCount = this.getTableCount();
984
- const imageCount = this.imageManager.getImageCount();
985
- const estimatedXml = (paragraphCount * 200) + (tableCount * 1000) + 50000;
986
- const imageBytes = this.imageManager.getTotalSize();
987
- const totalBytes = estimatedXml + imageBytes;
988
- const totalMB = totalBytes / (1024 * 1024);
989
- const WARNING_MB = 50;
990
- const ERROR_MB = 100;
991
- let warning;
992
- if (totalMB > ERROR_MB) {
993
- warning = `Document size (${totalMB.toFixed(1)}MB) exceeds recommended maximum of ${ERROR_MB}MB. ` +
994
- `This may cause memory issues. Consider splitting into multiple documents or optimizing images.`;
995
- }
996
- else if (totalMB > WARNING_MB) {
997
- warning = `Document size (${totalMB.toFixed(1)}MB) exceeds ${WARNING_MB}MB. ` +
998
- `Large documents may take longer to process and use significant memory.`;
999
- }
1000
- return {
1001
- paragraphs: paragraphCount,
1002
- tables: tableCount,
1003
- images: imageCount,
1004
- estimatedXmlBytes: estimatedXml,
1005
- imageBytes,
1006
- totalEstimatedBytes: totalBytes,
1007
- totalEstimatedMB: parseFloat(totalMB.toFixed(2)),
1008
- warning,
1009
- };
547
+ return this.validator.estimateSize(this.bodyElements, this.imageManager);
1010
548
  }
1011
549
  dispose() {
1012
550
  this.bodyElements = [];
1013
- this.parseErrors = [];
551
+ this.parser.clearParseErrors();
1014
552
  this.stylesManager = StylesManager_1.StylesManager.create();
1015
553
  this.numberingManager = NumberingManager_1.NumberingManager.create();
1016
554
  this.imageManager.clear();
@@ -1022,31 +560,7 @@ class Document {
1022
560
  this.commentManager.clear();
1023
561
  }
1024
562
  getSizeStats() {
1025
- const estimate = this.estimateSize();
1026
- const warnings = [];
1027
- if (estimate.warning) {
1028
- warnings.push(estimate.warning);
1029
- }
1030
- const formatBytes = (bytes) => {
1031
- if (bytes < 1024)
1032
- return `${bytes} B`;
1033
- if (bytes < 1024 * 1024)
1034
- return `${(bytes / 1024).toFixed(1)} KB`;
1035
- return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
1036
- };
1037
- return {
1038
- elements: {
1039
- paragraphs: estimate.paragraphs,
1040
- tables: estimate.tables,
1041
- images: estimate.images,
1042
- },
1043
- size: {
1044
- xml: formatBytes(estimate.estimatedXmlBytes),
1045
- images: formatBytes(estimate.imageBytes),
1046
- total: formatBytes(estimate.totalEstimatedBytes),
1047
- },
1048
- warnings,
1049
- };
563
+ return this.validator.getSizeStats(this.bodyElements, this.imageManager);
1050
564
  }
1051
565
  }
1052
566
  exports.Document = Document;