docxmlater 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/README.md +228 -31
  2. package/dist/core/BaseManager.d.ts +21 -0
  3. package/dist/core/BaseManager.d.ts.map +1 -0
  4. package/dist/core/BaseManager.js +86 -0
  5. package/dist/core/BaseManager.js.map +1 -0
  6. package/dist/core/Document.d.ts +18 -1
  7. package/dist/core/Document.d.ts.map +1 -1
  8. package/dist/core/Document.js +389 -0
  9. package/dist/core/Document.js.map +1 -1
  10. package/dist/core/DocumentParser.d.ts +4 -0
  11. package/dist/core/DocumentParser.d.ts.map +1 -1
  12. package/dist/core/DocumentParser.js +649 -103
  13. package/dist/core/DocumentParser.js.map +1 -1
  14. package/dist/elements/Comment.d.ts +1 -0
  15. package/dist/elements/Comment.d.ts.map +1 -1
  16. package/dist/elements/Comment.js +3 -0
  17. package/dist/elements/Comment.js.map +1 -1
  18. package/dist/elements/Field.d.ts +12 -0
  19. package/dist/elements/Field.d.ts.map +1 -1
  20. package/dist/elements/Field.js +34 -1
  21. package/dist/elements/Field.js.map +1 -1
  22. package/dist/elements/FieldHelpers.d.ts +7 -0
  23. package/dist/elements/FieldHelpers.d.ts.map +1 -0
  24. package/dist/elements/FieldHelpers.js +62 -0
  25. package/dist/elements/FieldHelpers.js.map +1 -0
  26. package/dist/elements/Hyperlink.d.ts +1 -0
  27. package/dist/elements/Hyperlink.d.ts.map +1 -1
  28. package/dist/elements/Hyperlink.js +6 -1
  29. package/dist/elements/Hyperlink.js.map +1 -1
  30. package/dist/elements/Paragraph.d.ts +6 -0
  31. package/dist/elements/Paragraph.d.ts.map +1 -1
  32. package/dist/elements/Paragraph.js +48 -2
  33. package/dist/elements/Paragraph.js.map +1 -1
  34. package/dist/elements/Run.d.ts +15 -3
  35. package/dist/elements/Run.d.ts.map +1 -1
  36. package/dist/elements/Run.js +122 -54
  37. package/dist/elements/Run.js.map +1 -1
  38. package/dist/elements/Section.d.ts +9 -0
  39. package/dist/elements/Section.d.ts.map +1 -1
  40. package/dist/elements/Section.js +21 -0
  41. package/dist/elements/Section.js.map +1 -1
  42. package/dist/elements/Table.d.ts +0 -1
  43. package/dist/elements/Table.d.ts.map +1 -1
  44. package/dist/elements/Table.js +18 -21
  45. package/dist/elements/Table.js.map +1 -1
  46. package/dist/elements/TableCell.d.ts +0 -1
  47. package/dist/elements/TableCell.d.ts.map +1 -1
  48. package/dist/elements/TableCell.js +4 -16
  49. package/dist/elements/TableCell.js.map +1 -1
  50. package/dist/elements/TableOfContents.d.ts +5 -0
  51. package/dist/elements/TableOfContents.d.ts.map +1 -1
  52. package/dist/elements/TableOfContents.js +31 -4
  53. package/dist/elements/TableOfContents.js.map +1 -1
  54. package/dist/formatting/NumberingLevel.d.ts +2 -0
  55. package/dist/formatting/NumberingLevel.d.ts.map +1 -1
  56. package/dist/formatting/NumberingLevel.js +8 -0
  57. package/dist/formatting/NumberingLevel.js.map +1 -1
  58. package/dist/index.d.ts +3 -1
  59. package/dist/index.d.ts.map +1 -1
  60. package/dist/index.js +17 -3
  61. package/dist/index.js.map +1 -1
  62. package/dist/utils/diagnostics.d.ts +20 -0
  63. package/dist/utils/diagnostics.d.ts.map +1 -0
  64. package/dist/utils/diagnostics.js +80 -0
  65. package/dist/utils/diagnostics.js.map +1 -0
  66. package/dist/utils/formatting.d.ts +7 -0
  67. package/dist/utils/formatting.d.ts.map +1 -0
  68. package/dist/utils/formatting.js +93 -0
  69. package/dist/utils/formatting.js.map +1 -0
  70. package/dist/utils/validation.d.ts +1 -0
  71. package/dist/utils/validation.d.ts.map +1 -1
  72. package/dist/utils/validation.js +12 -0
  73. package/dist/utils/validation.js.map +1 -1
  74. package/dist/xml/XMLBuilder.d.ts +25 -0
  75. package/dist/xml/XMLBuilder.d.ts.map +1 -1
  76. package/dist/xml/XMLBuilder.js +72 -0
  77. package/dist/xml/XMLBuilder.js.map +1 -1
  78. package/dist/xml/XMLParser.d.ts.map +1 -1
  79. package/dist/xml/XMLParser.js +15 -0
  80. package/dist/xml/XMLParser.js.map +1 -1
  81. package/package.json +1 -1
@@ -42,6 +42,9 @@ const Hyperlink_1 = require("../elements/Hyperlink");
42
42
  const Table_1 = require("../elements/Table");
43
43
  const TableRow_1 = require("../elements/TableRow");
44
44
  const TableCell_1 = require("../elements/TableCell");
45
+ const TableOfContentsElement_1 = require("../elements/TableOfContentsElement");
46
+ const TableOfContents_1 = require("../elements/TableOfContents");
47
+ const StructuredDocumentTag_1 = require("../elements/StructuredDocumentTag");
45
48
  const ImageRun_1 = require("../elements/ImageRun");
46
49
  const Section_1 = require("../elements/Section");
47
50
  const XMLBuilder_1 = require("../xml/XMLBuilder");
@@ -50,6 +53,7 @@ const RelationshipManager_1 = require("./RelationshipManager");
50
53
  const Style_1 = require("../formatting/Style");
51
54
  const AbstractNumbering_1 = require("../formatting/AbstractNumbering");
52
55
  const NumberingInstance_1 = require("../formatting/NumberingInstance");
56
+ const diagnostics_1 = require("../utils/diagnostics");
53
57
  class DocumentParser {
54
58
  parseErrors = [];
55
59
  strictParsing;
@@ -111,8 +115,7 @@ class DocumentParser {
111
115
  if (next.type === "p") {
112
116
  const elementXml = this.extractSingleElement(bodyContent, "w:p", next.pos);
113
117
  if (elementXml) {
114
- const parsed = XMLParser_1.XMLParser.parseToObject(elementXml, { trimValues: false });
115
- const paragraph = await this.parseParagraphFromObject(parsed["w:p"], relationshipManager, zipHandler, imageManager);
118
+ const paragraph = await this.parseParagraphWithOrder(elementXml, relationshipManager, zipHandler, imageManager);
116
119
  if (paragraph)
117
120
  bodyElements.push(paragraph);
118
121
  pos = next.pos + elementXml.length;
@@ -193,51 +196,10 @@ class DocumentParser {
193
196
  return openTableTags > closeTableTags;
194
197
  }
195
198
  extractSingleElement(content, tagName, startPos) {
196
- const openTag = `<${tagName}`;
197
- const closeTag = `</${tagName}>`;
198
- const selfClosingEnd = "/>";
199
- if (!content.substring(startPos).startsWith(openTag)) {
200
- return "";
201
- }
202
- const openEnd = content.indexOf(">", startPos);
203
- if (openEnd === -1) {
204
- return "";
205
- }
206
- if (content.substring(openEnd - 1, openEnd + 1) === selfClosingEnd) {
207
- return content.substring(startPos, openEnd + 1);
208
- }
209
- let depth = 1;
210
- let pos = openEnd + 1;
211
- while (pos < content.length && depth > 0) {
212
- const nextOpen = content.indexOf(openTag, pos);
213
- const nextClose = content.indexOf(closeTag, pos);
214
- if (nextClose === -1) {
215
- return "";
216
- }
217
- if (nextOpen !== -1 && nextOpen < nextClose) {
218
- const charAfter = content[nextOpen + openTag.length];
219
- if (charAfter === ">" ||
220
- charAfter === "/" ||
221
- charAfter === " " ||
222
- charAfter === "\t" ||
223
- charAfter === "\n" ||
224
- charAfter === "\r") {
225
- depth++;
226
- pos = nextOpen + openTag.length;
227
- }
228
- else {
229
- pos = nextOpen + openTag.length;
230
- }
231
- }
232
- else {
233
- depth--;
234
- pos = nextClose + closeTag.length;
235
- if (depth === 0) {
236
- return content.substring(startPos, pos);
237
- }
238
- }
239
- }
240
- return "";
199
+ const remainingContent = content.substring(startPos);
200
+ const elements = XMLParser_1.XMLParser.extractElements(remainingContent, tagName);
201
+ const extracted = elements.length > 0 ? elements[0] : "";
202
+ return extracted;
241
203
  }
242
204
  validateLoadedContent(bodyElements) {
243
205
  const paragraphs = bodyElements.filter((el) => el instanceof Paragraph_1.Paragraph);
@@ -287,6 +249,124 @@ class DocumentParser {
287
249
  }
288
250
  }
289
251
  }
252
+ async parseParagraphWithOrder(paraXml, relationshipManager, zipHandler, imageManager) {
253
+ try {
254
+ const paragraph = new Paragraph_1.Paragraph();
255
+ const paraObj = XMLParser_1.XMLParser.parseToObject(paraXml, { trimValues: false });
256
+ const pElement = paraObj["w:p"];
257
+ if (!pElement) {
258
+ return null;
259
+ }
260
+ this.parseParagraphPropertiesFromObject(pElement["w:pPr"], paragraph);
261
+ const paraId = pElement["w14:paraId"];
262
+ if (paraId) {
263
+ paragraph.formatting.paraId = paraId;
264
+ }
265
+ const orderedChildren = pElement["_orderedChildren"];
266
+ if (orderedChildren && orderedChildren.length > 0) {
267
+ for (const childInfo of orderedChildren) {
268
+ const elementType = childInfo.type;
269
+ const elementIndex = childInfo.index;
270
+ if (elementType === "w:r") {
271
+ const runs = pElement["w:r"];
272
+ const runArray = Array.isArray(runs) ? runs : (runs ? [runs] : []);
273
+ if (elementIndex < runArray.length) {
274
+ const child = runArray[elementIndex];
275
+ if (child["w:drawing"]) {
276
+ if (zipHandler && imageManager) {
277
+ const imageRun = await this.parseDrawingFromObject(child["w:drawing"], zipHandler, relationshipManager, imageManager);
278
+ if (imageRun) {
279
+ paragraph.addRun(imageRun);
280
+ }
281
+ }
282
+ }
283
+ else {
284
+ const run = this.parseRunFromObject(child);
285
+ if (run) {
286
+ paragraph.addRun(run);
287
+ }
288
+ }
289
+ }
290
+ }
291
+ else if (elementType === "w:hyperlink") {
292
+ const hyperlinks = pElement["w:hyperlink"];
293
+ const hyperlinkArray = Array.isArray(hyperlinks) ? hyperlinks : (hyperlinks ? [hyperlinks] : []);
294
+ if (elementIndex < hyperlinkArray.length) {
295
+ const hyperlink = this.parseHyperlinkFromObject(hyperlinkArray[elementIndex], relationshipManager);
296
+ if (hyperlink) {
297
+ paragraph.addHyperlink(hyperlink);
298
+ }
299
+ }
300
+ }
301
+ else if (elementType === "w:fldSimple") {
302
+ const fields = pElement["w:fldSimple"];
303
+ const fieldArray = Array.isArray(fields) ? fields : (fields ? [fields] : []);
304
+ if (elementIndex < fieldArray.length) {
305
+ const field = this.parseSimpleFieldFromObject(fieldArray[elementIndex]);
306
+ if (field) {
307
+ paragraph.addField(field);
308
+ }
309
+ }
310
+ }
311
+ }
312
+ }
313
+ else {
314
+ const runs = pElement["w:r"];
315
+ const runChildren = Array.isArray(runs) ? runs : (runs ? [runs] : []);
316
+ for (const child of runChildren) {
317
+ if (child["w:drawing"]) {
318
+ if (zipHandler && imageManager) {
319
+ const imageRun = await this.parseDrawingFromObject(child["w:drawing"], zipHandler, relationshipManager, imageManager);
320
+ if (imageRun) {
321
+ paragraph.addRun(imageRun);
322
+ }
323
+ }
324
+ }
325
+ else {
326
+ const run = this.parseRunFromObject(child);
327
+ if (run) {
328
+ paragraph.addRun(run);
329
+ }
330
+ }
331
+ }
332
+ const hyperlinks = pElement["w:hyperlink"];
333
+ const hyperlinkChildren = Array.isArray(hyperlinks) ? hyperlinks : (hyperlinks ? [hyperlinks] : []);
334
+ for (const hyperlinkObj of hyperlinkChildren) {
335
+ const hyperlink = this.parseHyperlinkFromObject(hyperlinkObj, relationshipManager);
336
+ if (hyperlink) {
337
+ paragraph.addHyperlink(hyperlink);
338
+ }
339
+ }
340
+ const fields = pElement["w:fldSimple"];
341
+ const fieldChildren = Array.isArray(fields) ? fields : (fields ? [fields] : []);
342
+ for (const fieldObj of fieldChildren) {
343
+ const field = this.parseSimpleFieldFromObject(fieldObj);
344
+ if (field) {
345
+ paragraph.addField(field);
346
+ }
347
+ }
348
+ }
349
+ const runs = paragraph.getRuns();
350
+ const runData = runs.map(run => ({
351
+ text: run.getText(),
352
+ rtl: run.getFormatting().rtl,
353
+ }));
354
+ const bidi = paragraph.getFormatting().bidi;
355
+ (0, diagnostics_1.logParagraphContent)('parsing', -1, runData, bidi);
356
+ if (bidi) {
357
+ (0, diagnostics_1.logTextDirection)(`Paragraph has BiDi enabled`);
358
+ }
359
+ return paragraph;
360
+ }
361
+ catch (error) {
362
+ const err = error instanceof Error ? error : new Error(String(error));
363
+ this.parseErrors.push({ element: "paragraph", error: err });
364
+ if (this.strictParsing) {
365
+ throw new Error(`Failed to parse paragraph: ${err.message}`);
366
+ }
367
+ return null;
368
+ }
369
+ }
290
370
  async parseParagraphFromObject(paraObj, relationshipManager, zipHandler, imageManager) {
291
371
  try {
292
372
  const paragraph = new Paragraph_1.Paragraph();
@@ -295,38 +375,88 @@ class DocumentParser {
295
375
  paragraph.formatting.paraId = paraId;
296
376
  }
297
377
  this.parseParagraphPropertiesFromObject(paraObj["w:pPr"], paragraph);
298
- const runs = paraObj["w:r"];
299
- const runChildren = Array.isArray(runs) ? runs : (runs ? [runs] : []);
300
- for (const child of runChildren) {
301
- if (child["w:drawing"]) {
302
- if (zipHandler && imageManager) {
303
- const imageRun = await this.parseDrawingFromObject(child["w:drawing"], zipHandler, relationshipManager, imageManager);
304
- if (imageRun) {
305
- paragraph.addRun(imageRun);
378
+ const orderedChildren = paraObj["_orderedChildren"];
379
+ if (orderedChildren && orderedChildren.length > 0) {
380
+ for (const childInfo of orderedChildren) {
381
+ const elementType = childInfo.type;
382
+ const elementIndex = childInfo.index;
383
+ if (elementType === "w:r") {
384
+ const runs = paraObj["w:r"];
385
+ const runArray = Array.isArray(runs) ? runs : (runs ? [runs] : []);
386
+ if (elementIndex < runArray.length) {
387
+ const child = runArray[elementIndex];
388
+ if (child["w:drawing"]) {
389
+ if (zipHandler && imageManager) {
390
+ const imageRun = await this.parseDrawingFromObject(child["w:drawing"], zipHandler, relationshipManager, imageManager);
391
+ if (imageRun) {
392
+ paragraph.addRun(imageRun);
393
+ }
394
+ }
395
+ }
396
+ else {
397
+ const run = this.parseRunFromObject(child);
398
+ if (run) {
399
+ paragraph.addRun(run);
400
+ }
401
+ }
306
402
  }
307
403
  }
308
- }
309
- else {
310
- const run = this.parseRunFromObject(child);
311
- if (run) {
312
- paragraph.addRun(run);
404
+ else if (elementType === "w:hyperlink") {
405
+ const hyperlinks = paraObj["w:hyperlink"];
406
+ const hyperlinkArray = Array.isArray(hyperlinks) ? hyperlinks : (hyperlinks ? [hyperlinks] : []);
407
+ if (elementIndex < hyperlinkArray.length) {
408
+ const hyperlink = this.parseHyperlinkFromObject(hyperlinkArray[elementIndex], relationshipManager);
409
+ if (hyperlink) {
410
+ paragraph.addHyperlink(hyperlink);
411
+ }
412
+ }
413
+ }
414
+ else if (elementType === "w:fldSimple") {
415
+ const fields = paraObj["w:fldSimple"];
416
+ const fieldArray = Array.isArray(fields) ? fields : (fields ? [fields] : []);
417
+ if (elementIndex < fieldArray.length) {
418
+ const field = this.parseSimpleFieldFromObject(fieldArray[elementIndex]);
419
+ if (field) {
420
+ paragraph.addField(field);
421
+ }
422
+ }
313
423
  }
314
424
  }
315
425
  }
316
- const hyperlinks = paraObj["w:hyperlink"];
317
- const hyperlinkChildren = Array.isArray(hyperlinks) ? hyperlinks : (hyperlinks ? [hyperlinks] : []);
318
- for (const hyperlinkObj of hyperlinkChildren) {
319
- const hyperlink = this.parseHyperlinkFromObject(hyperlinkObj, relationshipManager);
320
- if (hyperlink) {
321
- paragraph.addHyperlink(hyperlink);
426
+ else {
427
+ const runs = paraObj["w:r"];
428
+ const runChildren = Array.isArray(runs) ? runs : (runs ? [runs] : []);
429
+ for (const child of runChildren) {
430
+ if (child["w:drawing"]) {
431
+ if (zipHandler && imageManager) {
432
+ const imageRun = await this.parseDrawingFromObject(child["w:drawing"], zipHandler, relationshipManager, imageManager);
433
+ if (imageRun) {
434
+ paragraph.addRun(imageRun);
435
+ }
436
+ }
437
+ }
438
+ else {
439
+ const run = this.parseRunFromObject(child);
440
+ if (run) {
441
+ paragraph.addRun(run);
442
+ }
443
+ }
322
444
  }
323
- }
324
- const fields = paraObj["w:fldSimple"];
325
- const fieldChildren = Array.isArray(fields) ? fields : (fields ? [fields] : []);
326
- for (const fieldObj of fieldChildren) {
327
- const field = this.parseSimpleFieldFromObject(fieldObj);
328
- if (field) {
329
- paragraph.addField(field);
445
+ const hyperlinks = paraObj["w:hyperlink"];
446
+ const hyperlinkChildren = Array.isArray(hyperlinks) ? hyperlinks : (hyperlinks ? [hyperlinks] : []);
447
+ for (const hyperlinkObj of hyperlinkChildren) {
448
+ const hyperlink = this.parseHyperlinkFromObject(hyperlinkObj, relationshipManager);
449
+ if (hyperlink) {
450
+ paragraph.addHyperlink(hyperlink);
451
+ }
452
+ }
453
+ const fields = paraObj["w:fldSimple"];
454
+ const fieldChildren = Array.isArray(fields) ? fields : (fields ? [fields] : []);
455
+ for (const fieldObj of fieldChildren) {
456
+ const field = this.parseSimpleFieldFromObject(fieldObj);
457
+ if (field) {
458
+ paragraph.addField(field);
459
+ }
330
460
  }
331
461
  }
332
462
  return paragraph;
@@ -585,13 +715,89 @@ class DocumentParser {
585
715
  }
586
716
  parseRunFromObject(runObj) {
587
717
  try {
588
- const textElement = runObj["w:t"];
589
- let text = (typeof textElement === 'object' && textElement !== null)
590
- ? (textElement["#text"] || "")
591
- : (textElement || "");
592
- text = XMLBuilder_1.XMLBuilder.unescapeXml(text);
593
- const run = new Run_1.Run(text, { cleanXmlFromText: false });
718
+ const content = [];
719
+ if (runObj["_orderedChildren"]) {
720
+ for (const child of runObj["_orderedChildren"]) {
721
+ const elementType = child.type;
722
+ const elementIndex = child.index;
723
+ switch (elementType) {
724
+ case 'w:t': {
725
+ const textElements = Array.isArray(runObj["w:t"]) ? runObj["w:t"] : [runObj["w:t"]];
726
+ const te = textElements[elementIndex];
727
+ if (te !== undefined && te !== null) {
728
+ let text = (typeof te === 'object' && te !== null)
729
+ ? (te["#text"] || "")
730
+ : (te || "");
731
+ text = XMLBuilder_1.XMLBuilder.unescapeXml(text);
732
+ if (text) {
733
+ content.push({ type: 'text', value: text });
734
+ }
735
+ }
736
+ break;
737
+ }
738
+ case 'w:tab':
739
+ content.push({ type: 'tab' });
740
+ break;
741
+ case 'w:br': {
742
+ const brElement = runObj["w:br"];
743
+ const breakType = brElement?.['@_w:type'];
744
+ content.push({ type: 'break', breakType });
745
+ break;
746
+ }
747
+ case 'w:cr':
748
+ content.push({ type: 'carriageReturn' });
749
+ break;
750
+ case 'w:softHyphen':
751
+ content.push({ type: 'softHyphen' });
752
+ break;
753
+ case 'w:noBreakHyphen':
754
+ content.push({ type: 'noBreakHyphen' });
755
+ break;
756
+ case 'w:rPr':
757
+ break;
758
+ }
759
+ }
760
+ }
761
+ else {
762
+ const textElement = runObj["w:t"];
763
+ if (textElement !== undefined && textElement !== null) {
764
+ const textElements = Array.isArray(textElement) ? textElement : [textElement];
765
+ for (const te of textElements) {
766
+ let text = (typeof te === 'object' && te !== null)
767
+ ? (te["#text"] || "")
768
+ : (te || "");
769
+ text = XMLBuilder_1.XMLBuilder.unescapeXml(text);
770
+ if (text) {
771
+ content.push({ type: 'text', value: text });
772
+ }
773
+ }
774
+ }
775
+ if (runObj["w:tab"] !== undefined) {
776
+ content.push({ type: 'tab' });
777
+ }
778
+ if (runObj["w:br"] !== undefined) {
779
+ const brElement = runObj["w:br"];
780
+ const breakType = brElement?.['@_w:type'];
781
+ content.push({ type: 'break', breakType });
782
+ }
783
+ if (runObj["w:cr"] !== undefined) {
784
+ content.push({ type: 'carriageReturn' });
785
+ }
786
+ if (runObj["w:softHyphen"] !== undefined) {
787
+ content.push({ type: 'softHyphen' });
788
+ }
789
+ if (runObj["w:noBreakHyphen"] !== undefined) {
790
+ content.push({ type: 'noBreakHyphen' });
791
+ }
792
+ }
793
+ const run = Run_1.Run.createFromContent(content, { cleanXmlFromText: false });
594
794
  this.parseRunPropertiesFromObject(runObj["w:rPr"], run);
795
+ const text = run.getText();
796
+ const formatting = run.getFormatting();
797
+ if (formatting.rtl) {
798
+ (0, diagnostics_1.logTextDirection)(`Run with RTL: "${text}"`);
799
+ }
800
+ (0, diagnostics_1.logParsing)(`Parsed run: "${text}" (${content.length} content element(s))`, { rtl: formatting.rtl || false });
595
801
  return run;
596
802
  }
597
803
  catch (error) {
@@ -605,20 +811,15 @@ class DocumentParser {
605
811
  const tooltip = hyperlinkObj["@_w:tooltip"];
606
812
  const runs = hyperlinkObj["w:r"];
607
813
  const runChildren = Array.isArray(runs) ? runs : (runs ? [runs] : []);
608
- const text = runChildren
609
- .map((runObj) => {
610
- const textElement = runObj["w:t"];
611
- let runText = (typeof textElement === 'object' && textElement !== null)
612
- ? (textElement["#text"] || "")
613
- : (textElement || "");
614
- return XMLBuilder_1.XMLBuilder.unescapeXml(runText);
615
- })
616
- .join('');
814
+ let parsedRun = null;
815
+ let text = '';
617
816
  let formatting = {};
618
- if (runChildren.length > 0 && runChildren[0]["w:rPr"]) {
619
- const tempRun = new Run_1.Run('');
620
- this.parseRunPropertiesFromObject(runChildren[0]["w:rPr"], tempRun);
621
- formatting = tempRun.getFormatting();
817
+ if (runChildren.length > 0) {
818
+ parsedRun = this.parseRunFromObject(runChildren[0]);
819
+ if (parsedRun) {
820
+ text = parsedRun.getText();
821
+ formatting = parsedRun.getFormatting();
822
+ }
622
823
  }
623
824
  let url;
624
825
  if (relationshipId) {
@@ -627,14 +828,23 @@ class DocumentParser {
627
828
  url = relationship.getTarget();
628
829
  }
629
830
  }
831
+ let displayText = text || url || '[Link]';
832
+ if (!text && anchor) {
833
+ console.warn(`[DocumentParser] Hyperlink to anchor "${anchor}" has no display text. ` +
834
+ `Using placeholder "[Link]" to prevent bookmark ID from appearing as visible text. ` +
835
+ `This may indicate a corrupted TOC or malformed hyperlink in the source document.`);
836
+ }
630
837
  const hyperlink = new Hyperlink_1.Hyperlink({
631
838
  url,
632
839
  anchor,
633
- text: text || url || anchor || 'Link',
840
+ text: displayText,
634
841
  formatting,
635
842
  tooltip,
636
843
  relationshipId,
637
844
  });
845
+ if (parsedRun && parsedRun.getContent().length > 1) {
846
+ hyperlink.setRun(parsedRun);
847
+ }
638
848
  return hyperlink;
639
849
  }
640
850
  catch (error) {
@@ -726,21 +936,29 @@ class DocumentParser {
726
936
  run.setVanish(true);
727
937
  if (rPrObj["w:specVanish"])
728
938
  run.setSpecVanish(true);
729
- if (rPrObj["w:rtl"])
939
+ const checkBooleanProp = (prop) => {
940
+ if (!prop)
941
+ return false;
942
+ const val = prop["@_w:val"];
943
+ if (val === undefined)
944
+ return true;
945
+ return val === "1" || val === 1 || val === "true" || val === true;
946
+ };
947
+ if (checkBooleanProp(rPrObj["w:rtl"]))
730
948
  run.setRTL(true);
731
- if (rPrObj["w:b"])
949
+ if (checkBooleanProp(rPrObj["w:b"]))
732
950
  run.setBold(true);
733
- if (rPrObj["w:bCs"])
951
+ if (checkBooleanProp(rPrObj["w:bCs"]))
734
952
  run.setComplexScriptBold(true);
735
- if (rPrObj["w:i"])
953
+ if (checkBooleanProp(rPrObj["w:i"]))
736
954
  run.setItalic(true);
737
- if (rPrObj["w:iCs"])
955
+ if (checkBooleanProp(rPrObj["w:iCs"]))
738
956
  run.setComplexScriptItalic(true);
739
- if (rPrObj["w:strike"])
957
+ if (checkBooleanProp(rPrObj["w:strike"]))
740
958
  run.setStrike(true);
741
- if (rPrObj["w:smallCaps"])
959
+ if (checkBooleanProp(rPrObj["w:smallCaps"]))
742
960
  run.setSmallCaps(true);
743
- if (rPrObj["w:caps"])
961
+ if (checkBooleanProp(rPrObj["w:caps"]))
744
962
  run.setAllCaps(true);
745
963
  if (rPrObj["w:u"]) {
746
964
  const uVal = rPrObj["w:u"]["@_w:val"];
@@ -1125,6 +1343,12 @@ class DocumentParser {
1125
1343
  table.setCellSpacingType(spacingType);
1126
1344
  }
1127
1345
  }
1346
+ if (tblPrObj["w:jc"]) {
1347
+ const alignment = tblPrObj["w:jc"]["@_w:val"];
1348
+ if (alignment) {
1349
+ table.setAlignment(alignment);
1350
+ }
1351
+ }
1128
1352
  }
1129
1353
  async parseTableRowFromObject(rowObj, relationshipManager, zipHandler, imageManager) {
1130
1354
  try {
@@ -1382,8 +1606,330 @@ class DocumentParser {
1382
1606
  return null;
1383
1607
  }
1384
1608
  }
1385
- async parseSDTFromObject(_sdtObj, _relationshipManager, _zipHandler, _imageManager) {
1386
- return null;
1609
+ async parseSDTFromObject(sdtObj, relationshipManager, zipHandler, imageManager) {
1610
+ try {
1611
+ if (!sdtObj)
1612
+ return null;
1613
+ const properties = {};
1614
+ const sdtPr = sdtObj['w:sdtPr'];
1615
+ if (sdtPr) {
1616
+ const idElement = sdtPr['w:id'];
1617
+ if (idElement && idElement['@_w:val']) {
1618
+ properties.id = parseInt(idElement['@_w:val'], 10);
1619
+ }
1620
+ const tagElement = sdtPr['w:tag'];
1621
+ if (tagElement && tagElement['@_w:val']) {
1622
+ properties.tag = tagElement['@_w:val'];
1623
+ }
1624
+ const lockElement = sdtPr['w:lock'];
1625
+ if (lockElement && lockElement['@_w:val']) {
1626
+ properties.lock = lockElement['@_w:val'];
1627
+ }
1628
+ const aliasElement = sdtPr['w:alias'];
1629
+ if (aliasElement && aliasElement['@_w:val']) {
1630
+ properties.alias = aliasElement['@_w:val'];
1631
+ }
1632
+ if (sdtPr['w:richText']) {
1633
+ properties.controlType = 'richText';
1634
+ }
1635
+ else if (sdtPr['w:text']) {
1636
+ properties.controlType = 'plainText';
1637
+ const textElement = sdtPr['w:text'];
1638
+ properties.plainText = {
1639
+ multiLine: textElement?.['@_w:multiLine'] === '1' || textElement?.['@_w:multiLine'] === 'true'
1640
+ };
1641
+ }
1642
+ else if (sdtPr['w:comboBox']) {
1643
+ properties.controlType = 'comboBox';
1644
+ const comboBoxElement = sdtPr['w:comboBox'];
1645
+ properties.comboBox = this.parseListItems(comboBoxElement);
1646
+ }
1647
+ else if (sdtPr['w:dropDownList']) {
1648
+ properties.controlType = 'dropDownList';
1649
+ const dropDownElement = sdtPr['w:dropDownList'];
1650
+ properties.dropDownList = this.parseListItems(dropDownElement);
1651
+ }
1652
+ else if (sdtPr['w:date']) {
1653
+ properties.controlType = 'datePicker';
1654
+ const dateElement = sdtPr['w:date'];
1655
+ properties.datePicker = {
1656
+ dateFormat: dateElement?.['w:dateFormat']?.['@_w:val'],
1657
+ fullDate: dateElement?.['w:fullDate']?.['@_w:val'] ? new Date(dateElement['w:fullDate']['@_w:val']) : undefined,
1658
+ lid: dateElement?.['w:lid']?.['@_w:val'],
1659
+ calendar: dateElement?.['w:calendar']?.['@_w:val']
1660
+ };
1661
+ }
1662
+ else if (sdtPr['w14:checkbox']) {
1663
+ properties.controlType = 'checkbox';
1664
+ const checkboxElement = sdtPr['w14:checkbox'];
1665
+ properties.checkbox = {
1666
+ checked: checkboxElement?.['w14:checked']?.['@_w14:val'] === '1' || checkboxElement?.['w14:checked']?.['@_w14:val'] === 'true',
1667
+ checkedState: checkboxElement?.['w14:checkedState']?.['@_w14:val'],
1668
+ uncheckedState: checkboxElement?.['w14:uncheckedState']?.['@_w14:val']
1669
+ };
1670
+ }
1671
+ else if (sdtPr['w:picture']) {
1672
+ properties.controlType = 'picture';
1673
+ }
1674
+ else if (sdtPr['w:docPartObj']) {
1675
+ properties.controlType = 'buildingBlock';
1676
+ const docPartObj = sdtPr['w:docPartObj'];
1677
+ properties.buildingBlock = {
1678
+ gallery: docPartObj?.['w:docPartGallery']?.['@_w:val'],
1679
+ category: docPartObj?.['w:docPartCategory']?.['@_w:val']
1680
+ };
1681
+ }
1682
+ else if (sdtPr['w:group']) {
1683
+ properties.controlType = 'group';
1684
+ }
1685
+ }
1686
+ const content = [];
1687
+ const sdtContent = sdtObj['w:sdtContent'];
1688
+ if (sdtContent) {
1689
+ const orderedChildren = sdtContent['_orderedChildren'];
1690
+ if (orderedChildren && orderedChildren.length > 0) {
1691
+ for (const childInfo of orderedChildren) {
1692
+ const elementType = childInfo.type;
1693
+ const elementIndex = childInfo.index;
1694
+ if (elementType === 'w:p') {
1695
+ const paragraphs = sdtContent['w:p'];
1696
+ const paraArray = Array.isArray(paragraphs) ? paragraphs : (paragraphs ? [paragraphs] : []);
1697
+ if (elementIndex < paraArray.length) {
1698
+ const paraXml = this.objectToXml({ 'w:p': paraArray[elementIndex] });
1699
+ const para = await this.parseParagraphWithOrder(paraXml, relationshipManager, zipHandler, imageManager);
1700
+ if (para)
1701
+ content.push(para);
1702
+ }
1703
+ }
1704
+ else if (elementType === 'w:tbl') {
1705
+ const tables = sdtContent['w:tbl'];
1706
+ const tableArray = Array.isArray(tables) ? tables : (tables ? [tables] : []);
1707
+ if (elementIndex < tableArray.length) {
1708
+ const tableObj = tableArray[elementIndex];
1709
+ const table = await this.parseTableFromObject(tableObj, relationshipManager, zipHandler, imageManager);
1710
+ if (table)
1711
+ content.push(table);
1712
+ }
1713
+ }
1714
+ else if (elementType === 'w:sdt') {
1715
+ const sdts = sdtContent['w:sdt'];
1716
+ const sdtArray = Array.isArray(sdts) ? sdts : (sdts ? [sdts] : []);
1717
+ if (elementIndex < sdtArray.length) {
1718
+ const nestedSdt = await this.parseSDTFromObject(sdtArray[elementIndex], relationshipManager, zipHandler, imageManager);
1719
+ if (nestedSdt)
1720
+ content.push(nestedSdt);
1721
+ }
1722
+ }
1723
+ }
1724
+ }
1725
+ else {
1726
+ const paragraphs = sdtContent['w:p'];
1727
+ const paraArray = Array.isArray(paragraphs) ? paragraphs : (paragraphs ? [paragraphs] : []);
1728
+ for (const paraObj of paraArray) {
1729
+ const paraXml = this.objectToXml({ 'w:p': paraObj });
1730
+ const para = await this.parseParagraphWithOrder(paraXml, relationshipManager, zipHandler, imageManager);
1731
+ if (para)
1732
+ content.push(para);
1733
+ }
1734
+ const tables = sdtContent['w:tbl'];
1735
+ const tableArray = Array.isArray(tables) ? tables : (tables ? [tables] : []);
1736
+ for (const tableObj of tableArray) {
1737
+ const table = await this.parseTableFromObject(tableObj, relationshipManager, zipHandler, imageManager);
1738
+ if (table)
1739
+ content.push(table);
1740
+ }
1741
+ const nestedSdts = sdtContent['w:sdt'];
1742
+ const sdtArray = Array.isArray(nestedSdts) ? nestedSdts : (nestedSdts ? [nestedSdts] : []);
1743
+ for (const nestedSdtObj of sdtArray) {
1744
+ const nestedSdt = await this.parseSDTFromObject(nestedSdtObj, relationshipManager, zipHandler, imageManager);
1745
+ if (nestedSdt)
1746
+ content.push(nestedSdt);
1747
+ }
1748
+ }
1749
+ }
1750
+ if (properties.buildingBlock?.gallery === 'Table of Contents') {
1751
+ const toc = this.parseTOCFromSDTContent(content, properties, sdtContent);
1752
+ if (toc) {
1753
+ return new TableOfContentsElement_1.TableOfContentsElement(toc);
1754
+ }
1755
+ }
1756
+ return new StructuredDocumentTag_1.StructuredDocumentTag(properties, content);
1757
+ }
1758
+ catch (error) {
1759
+ console.warn('[DocumentParser] Failed to parse SDT:', error);
1760
+ return null;
1761
+ }
1762
+ }
1763
+ parseTOCFromSDTContent(content, properties, sdtContent) {
1764
+ try {
1765
+ let title;
1766
+ let fieldInstruction;
1767
+ for (const element of content) {
1768
+ if (element instanceof Paragraph_1.Paragraph) {
1769
+ const style = element.getStyle();
1770
+ if (style === 'TOCHeading') {
1771
+ const runs = element.getRuns();
1772
+ title = runs.map(r => r.getText()).join('');
1773
+ }
1774
+ }
1775
+ }
1776
+ const paragraphs = sdtContent['w:p'];
1777
+ const paraArray = Array.isArray(paragraphs) ? paragraphs : (paragraphs ? [paragraphs] : []);
1778
+ for (const paraObj of paraArray) {
1779
+ const runs = paraObj['w:r'];
1780
+ const runArray = Array.isArray(runs) ? runs : (runs ? [runs] : []);
1781
+ for (const runObj of runArray) {
1782
+ const instrText = runObj['w:instrText'];
1783
+ if (instrText) {
1784
+ if (typeof instrText === 'string') {
1785
+ fieldInstruction = instrText.trim();
1786
+ }
1787
+ else if (instrText['#text']) {
1788
+ fieldInstruction = instrText['#text'].trim();
1789
+ }
1790
+ if (fieldInstruction)
1791
+ break;
1792
+ }
1793
+ }
1794
+ if (fieldInstruction)
1795
+ break;
1796
+ }
1797
+ if (!fieldInstruction) {
1798
+ console.warn('[DocumentParser] No TOC field instruction found in SDT content');
1799
+ return null;
1800
+ }
1801
+ const tocOptions = { title };
1802
+ if (fieldInstruction.includes('\\h')) {
1803
+ tocOptions.useHyperlinks = true;
1804
+ }
1805
+ if (fieldInstruction.includes('\\n')) {
1806
+ tocOptions.showPageNumbers = false;
1807
+ }
1808
+ if (fieldInstruction.includes('\\z')) {
1809
+ tocOptions.hideInWebLayout = true;
1810
+ }
1811
+ const outlineMatch = fieldInstruction.match(/\\o\s+"(\d+)-(\d+)"/);
1812
+ if (outlineMatch && outlineMatch[1] && outlineMatch[2]) {
1813
+ tocOptions.minLevel = parseInt(outlineMatch[1], 10);
1814
+ tocOptions.maxLevel = parseInt(outlineMatch[2], 10);
1815
+ }
1816
+ const stylesMatch = fieldInstruction.match(/\\t\s+"([^"]+)"/);
1817
+ if (stylesMatch && stylesMatch[1]) {
1818
+ const stylesStr = stylesMatch[1];
1819
+ const styles = [];
1820
+ const parts = stylesStr.split(',').filter(p => p.trim());
1821
+ for (let i = 0; i < parts.length; i += 2) {
1822
+ const styleName = parts[i];
1823
+ const levelStr = parts[i + 1];
1824
+ if (styleName && levelStr) {
1825
+ styles.push({
1826
+ styleName: styleName.trim(),
1827
+ level: parseInt(levelStr.trim(), 10)
1828
+ });
1829
+ }
1830
+ }
1831
+ if (styles.length > 0) {
1832
+ tocOptions.includeStyles = styles;
1833
+ }
1834
+ }
1835
+ return new TableOfContents_1.TableOfContents(tocOptions);
1836
+ }
1837
+ catch (error) {
1838
+ console.warn('[DocumentParser] Failed to parse TOC from SDT content:', error);
1839
+ return null;
1840
+ }
1841
+ }
1842
+ parseListItems(element) {
1843
+ const items = [];
1844
+ const listItems = element?.['w:listItem'];
1845
+ const itemArray = Array.isArray(listItems) ? listItems : (listItems ? [listItems] : []);
1846
+ for (const item of itemArray) {
1847
+ if (item['@_w:displayText'] && item['@_w:value']) {
1848
+ items.push({
1849
+ displayText: item['@_w:displayText'],
1850
+ value: item['@_w:value']
1851
+ });
1852
+ }
1853
+ }
1854
+ return {
1855
+ items,
1856
+ lastValue: element?.['@_w:lastValue']
1857
+ };
1858
+ }
1859
+ objectToXml(obj) {
1860
+ const buildXml = (o, name) => {
1861
+ if (typeof o === 'string')
1862
+ return o;
1863
+ if (typeof o !== 'object')
1864
+ return String(o);
1865
+ const keys = Object.keys(o);
1866
+ if (keys.length === 0 && !name)
1867
+ return '';
1868
+ const tagName = name || keys[0];
1869
+ const element = name ? o : o[tagName];
1870
+ let xml = `<${tagName}`;
1871
+ if (element && typeof element === 'object') {
1872
+ for (const key of Object.keys(element)) {
1873
+ if (key.startsWith('@_')) {
1874
+ const attrName = key.substring(2);
1875
+ xml += ` ${attrName}="${element[key]}"`;
1876
+ }
1877
+ }
1878
+ }
1879
+ const hasChildren = element && typeof element === 'object' &&
1880
+ Object.keys(element).some(k => !k.startsWith('@_') && k !== '#text' && k !== '_orderedChildren');
1881
+ if (!hasChildren && (!element || !element['#text'])) {
1882
+ xml += '/>';
1883
+ }
1884
+ else {
1885
+ xml += '>';
1886
+ if (element && element['#text']) {
1887
+ xml += element['#text'];
1888
+ }
1889
+ if (element && typeof element === 'object') {
1890
+ const orderedChildren = element['_orderedChildren'];
1891
+ if (orderedChildren && orderedChildren.length > 0) {
1892
+ for (const childInfo of orderedChildren) {
1893
+ const childType = childInfo.type;
1894
+ const childIndex = childInfo.index;
1895
+ if (element[childType] !== undefined) {
1896
+ const children = element[childType];
1897
+ if (Array.isArray(children)) {
1898
+ if (childIndex < children.length) {
1899
+ const childXml = buildXml(children[childIndex], childType);
1900
+ xml += childXml;
1901
+ }
1902
+ }
1903
+ else {
1904
+ if (childIndex === 0) {
1905
+ const childXml = buildXml(children, childType);
1906
+ xml += childXml;
1907
+ }
1908
+ }
1909
+ }
1910
+ }
1911
+ }
1912
+ else {
1913
+ for (const key of Object.keys(element)) {
1914
+ if (!key.startsWith('@_') && key !== '#text' && key !== '_orderedChildren') {
1915
+ const children = element[key];
1916
+ if (Array.isArray(children)) {
1917
+ for (const child of children) {
1918
+ xml += buildXml(child, key);
1919
+ }
1920
+ }
1921
+ else {
1922
+ xml += buildXml(children, key);
1923
+ }
1924
+ }
1925
+ }
1926
+ }
1927
+ }
1928
+ xml += `</${tagName}>`;
1929
+ }
1930
+ return xml;
1931
+ };
1932
+ return buildXml(obj);
1387
1933
  }
1388
1934
  parseRelationships(zipHandler, relationshipManager) {
1389
1935
  const relsPath = "word/_rels/document.xml.rels";