docxmlater 0.28.1 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/README.md +330 -4
  2. package/dist/core/Document.d.ts +66 -28
  3. package/dist/core/Document.d.ts.map +1 -1
  4. package/dist/core/Document.js +590 -90
  5. package/dist/core/Document.js.map +1 -1
  6. package/dist/core/DocumentGenerator.d.ts +15 -13
  7. package/dist/core/DocumentGenerator.d.ts.map +1 -1
  8. package/dist/core/DocumentGenerator.js +74 -13
  9. package/dist/core/DocumentGenerator.js.map +1 -1
  10. package/dist/core/DocumentParser.d.ts +40 -13
  11. package/dist/core/DocumentParser.d.ts.map +1 -1
  12. package/dist/core/DocumentParser.js +800 -316
  13. package/dist/core/DocumentParser.js.map +1 -1
  14. package/dist/core/DocumentValidator.d.ts +2 -1
  15. package/dist/core/DocumentValidator.d.ts.map +1 -1
  16. package/dist/core/DocumentValidator.js.map +1 -1
  17. package/dist/elements/Bookmark.d.ts +1 -0
  18. package/dist/elements/Bookmark.d.ts.map +1 -1
  19. package/dist/elements/Bookmark.js +1 -1
  20. package/dist/elements/Bookmark.js.map +1 -1
  21. package/dist/elements/Hyperlink.d.ts +1 -0
  22. package/dist/elements/Hyperlink.d.ts.map +1 -1
  23. package/dist/elements/Hyperlink.js +23 -0
  24. package/dist/elements/Hyperlink.js.map +1 -1
  25. package/dist/elements/ImageManager.d.ts +2 -0
  26. package/dist/elements/ImageManager.d.ts.map +1 -1
  27. package/dist/elements/ImageManager.js +22 -0
  28. package/dist/elements/ImageManager.js.map +1 -1
  29. package/dist/elements/ImageRun.d.ts +10 -0
  30. package/dist/elements/ImageRun.d.ts.map +1 -0
  31. package/dist/elements/ImageRun.js +23 -0
  32. package/dist/elements/ImageRun.js.map +1 -0
  33. package/dist/elements/Paragraph.d.ts +16 -1
  34. package/dist/elements/Paragraph.d.ts.map +1 -1
  35. package/dist/elements/Paragraph.js +146 -2
  36. package/dist/elements/Paragraph.js.map +1 -1
  37. package/dist/elements/Run.d.ts +4 -0
  38. package/dist/elements/Run.d.ts.map +1 -1
  39. package/dist/elements/Run.js +27 -1
  40. package/dist/elements/Run.js.map +1 -1
  41. package/dist/elements/Section.d.ts.map +1 -1
  42. package/dist/elements/Section.js +3 -8
  43. package/dist/elements/Section.js.map +1 -1
  44. package/dist/elements/StructuredDocumentTag.d.ts +32 -0
  45. package/dist/elements/StructuredDocumentTag.d.ts.map +1 -0
  46. package/dist/elements/StructuredDocumentTag.js +94 -0
  47. package/dist/elements/StructuredDocumentTag.js.map +1 -0
  48. package/dist/elements/Table.d.ts +24 -0
  49. package/dist/elements/Table.d.ts.map +1 -1
  50. package/dist/elements/Table.js +177 -3
  51. package/dist/elements/Table.js.map +1 -1
  52. package/dist/elements/TableOfContents.d.ts +33 -0
  53. package/dist/elements/TableOfContents.d.ts.map +1 -1
  54. package/dist/elements/TableOfContents.js +129 -1
  55. package/dist/elements/TableOfContents.js.map +1 -1
  56. package/dist/formatting/AbstractNumbering.d.ts +1 -0
  57. package/dist/formatting/AbstractNumbering.d.ts.map +1 -1
  58. package/dist/formatting/AbstractNumbering.js +30 -0
  59. package/dist/formatting/AbstractNumbering.js.map +1 -1
  60. package/dist/formatting/NumberingInstance.d.ts +1 -0
  61. package/dist/formatting/NumberingInstance.d.ts.map +1 -1
  62. package/dist/formatting/NumberingInstance.js +16 -0
  63. package/dist/formatting/NumberingInstance.js.map +1 -1
  64. package/dist/formatting/NumberingLevel.d.ts +1 -0
  65. package/dist/formatting/NumberingLevel.d.ts.map +1 -1
  66. package/dist/formatting/NumberingLevel.js +59 -0
  67. package/dist/formatting/NumberingLevel.js.map +1 -1
  68. package/dist/formatting/NumberingManager.d.ts +11 -0
  69. package/dist/formatting/NumberingManager.d.ts.map +1 -1
  70. package/dist/formatting/NumberingManager.js +92 -0
  71. package/dist/formatting/NumberingManager.js.map +1 -1
  72. package/dist/formatting/Style.d.ts +2 -0
  73. package/dist/formatting/Style.d.ts.map +1 -1
  74. package/dist/formatting/Style.js +49 -0
  75. package/dist/formatting/Style.js.map +1 -1
  76. package/dist/index.d.ts +2 -0
  77. package/dist/index.d.ts.map +1 -1
  78. package/dist/index.js +6 -2
  79. package/dist/index.js.map +1 -1
  80. package/dist/xml/XMLBuilder.d.ts +4 -1
  81. package/dist/xml/XMLBuilder.d.ts.map +1 -1
  82. package/dist/xml/XMLBuilder.js +134 -31
  83. package/dist/xml/XMLBuilder.js.map +1 -1
  84. package/dist/xml/XMLParser.d.ts +1 -0
  85. package/dist/xml/XMLParser.d.ts.map +1 -1
  86. package/dist/xml/XMLParser.js +75 -42
  87. package/dist/xml/XMLParser.js.map +1 -1
  88. package/dist/zip/ZipHandler.d.ts.map +1 -1
  89. package/dist/zip/ZipHandler.js +6 -1
  90. package/dist/zip/ZipHandler.js.map +1 -1
  91. package/dist/zip/ZipReader.d.ts.map +1 -1
  92. package/dist/zip/ZipReader.js +2 -2
  93. package/dist/zip/ZipReader.js.map +1 -1
  94. package/dist/zip/ZipWriter.d.ts.map +1 -1
  95. package/dist/zip/ZipWriter.js +13 -8
  96. package/dist/zip/ZipWriter.js.map +1 -1
  97. package/package.json +1 -1
@@ -4,12 +4,17 @@ exports.DocumentParser = void 0;
4
4
  const types_1 = require("../zip/types");
5
5
  const Paragraph_1 = require("../elements/Paragraph");
6
6
  const Run_1 = require("../elements/Run");
7
+ const Hyperlink_1 = require("../elements/Hyperlink");
7
8
  const Table_1 = require("../elements/Table");
9
+ const TableRow_1 = require("../elements/TableRow");
8
10
  const TableCell_1 = require("../elements/TableCell");
9
- const Hyperlink_1 = require("../elements/Hyperlink");
11
+ const Section_1 = require("../elements/Section");
10
12
  const XMLBuilder_1 = require("../xml/XMLBuilder");
11
13
  const XMLParser_1 = require("../xml/XMLParser");
12
14
  const RelationshipManager_1 = require("./RelationshipManager");
15
+ const Style_1 = require("../formatting/Style");
16
+ const AbstractNumbering_1 = require("../formatting/AbstractNumbering");
17
+ const NumberingInstance_1 = require("../formatting/NumberingInstance");
13
18
  class DocumentParser {
14
19
  parseErrors = [];
15
20
  strictParsing;
@@ -22,60 +27,182 @@ class DocumentParser {
22
27
  clearParseErrors() {
23
28
  this.parseErrors = [];
24
29
  }
25
- async parseDocument(zipHandler, relationshipManager) {
30
+ async parseDocument(zipHandler, relationshipManager, imageManager) {
26
31
  const docXml = zipHandler.getFileAsString(types_1.DOCX_PATHS.DOCUMENT);
27
32
  if (!docXml) {
28
- throw new Error('Invalid document: word/document.xml not found');
33
+ throw new Error("Invalid document: word/document.xml not found");
29
34
  }
30
35
  const parsedRelationshipManager = this.parseRelationships(zipHandler, relationshipManager);
31
36
  const properties = this.parseProperties(zipHandler);
32
- const bodyElements = this.parseBodyElements(docXml, parsedRelationshipManager);
33
- return { bodyElements, properties, relationshipManager: parsedRelationshipManager };
37
+ const bodyElements = await this.parseBodyElements(docXml, parsedRelationshipManager, zipHandler, imageManager);
38
+ const styles = this.parseStyles(zipHandler);
39
+ const numbering = this.parseNumbering(zipHandler);
40
+ const section = this.parseSectionProperties(docXml);
41
+ const namespaces = this.parseNamespaces(docXml);
42
+ return {
43
+ bodyElements,
44
+ properties,
45
+ relationshipManager: parsedRelationshipManager,
46
+ styles,
47
+ abstractNumberings: numbering.abstractNumberings,
48
+ numberingInstances: numbering.numberingInstances,
49
+ section,
50
+ namespaces,
51
+ };
34
52
  }
35
- parseBodyElements(docXml, relationshipManager) {
53
+ async parseBodyElements(docXml, relationshipManager, zipHandler, imageManager) {
36
54
  const bodyElements = [];
37
- try {
38
- XMLParser_1.XMLParser.validateSize(docXml);
39
- }
40
- catch (error) {
41
- const err = error instanceof Error ? error : new Error(String(error));
42
- this.parseErrors.push({ element: 'document', error: err });
43
- if (this.strictParsing) {
44
- throw err;
45
- }
46
- return bodyElements;
47
- }
48
55
  const bodyContent = XMLParser_1.XMLParser.extractBody(docXml);
49
56
  if (!bodyContent) {
50
57
  return bodyElements;
51
58
  }
52
- const paragraphXmls = XMLParser_1.XMLParser.extractElements(bodyContent, 'w:p');
53
- for (const paraXml of paragraphXmls) {
54
- const paragraph = this.parseParagraph(paraXml, relationshipManager);
55
- if (paragraph) {
56
- bodyElements.push(paragraph);
59
+ let pos = 0;
60
+ while (pos < bodyContent.length) {
61
+ const nextP = this.findNextTopLevelTag(bodyContent, "w:p", pos);
62
+ const nextTbl = this.findNextTopLevelTag(bodyContent, "w:tbl", pos);
63
+ const nextSdt = this.findNextTopLevelTag(bodyContent, "w:sdt", pos);
64
+ const candidates = [];
65
+ if (nextP !== -1)
66
+ candidates.push({ type: "p", pos: nextP });
67
+ if (nextTbl !== -1)
68
+ candidates.push({ type: "tbl", pos: nextTbl });
69
+ if (nextSdt !== -1)
70
+ candidates.push({ type: "sdt", pos: nextSdt });
71
+ if (candidates.length === 0)
72
+ break;
73
+ candidates.sort((a, b) => a.pos - b.pos);
74
+ const next = candidates[0];
75
+ if (next) {
76
+ if (next.type === "p") {
77
+ const elementXml = this.extractSingleElement(bodyContent, "w:p", next.pos);
78
+ if (elementXml) {
79
+ const parsed = XMLParser_1.XMLParser.parseToObject(elementXml, { trimValues: false });
80
+ const paragraph = await this.parseParagraphFromObject(parsed["w:p"], relationshipManager, zipHandler, imageManager);
81
+ if (paragraph)
82
+ bodyElements.push(paragraph);
83
+ pos = next.pos + elementXml.length;
84
+ }
85
+ else {
86
+ pos = next.pos + 1;
87
+ }
88
+ }
89
+ else if (next.type === "tbl") {
90
+ const elementXml = this.extractSingleElement(bodyContent, "w:tbl", next.pos);
91
+ if (elementXml) {
92
+ const parsed = XMLParser_1.XMLParser.parseToObject(elementXml, { trimValues: false });
93
+ const table = await this.parseTableFromObject(parsed["w:tbl"], relationshipManager, zipHandler, imageManager);
94
+ if (table)
95
+ bodyElements.push(table);
96
+ pos = next.pos + elementXml.length;
97
+ }
98
+ else {
99
+ pos = next.pos + 1;
100
+ }
101
+ }
102
+ else if (next.type === "sdt") {
103
+ const elementXml = this.extractSingleElement(bodyContent, "w:sdt", next.pos);
104
+ if (elementXml) {
105
+ const parsed = XMLParser_1.XMLParser.parseToObject(elementXml, { trimValues: false });
106
+ const sdt = await this.parseSDTFromObject(parsed["w:sdt"], relationshipManager, zipHandler, imageManager);
107
+ if (sdt)
108
+ bodyElements.push(sdt);
109
+ pos = next.pos + elementXml.length;
110
+ }
111
+ else {
112
+ pos = next.pos + 1;
113
+ }
114
+ }
115
+ }
116
+ }
117
+ this.validateLoadedContent(bodyElements);
118
+ return bodyElements;
119
+ }
120
+ findNextTag(content, tagName, startPos) {
121
+ const tag = `<${tagName}`;
122
+ let pos = content.indexOf(tag, startPos);
123
+ while (pos !== -1) {
124
+ const charAfterTag = content[pos + tag.length];
125
+ if (charAfterTag &&
126
+ charAfterTag !== ">" &&
127
+ charAfterTag !== "/" &&
128
+ charAfterTag !== " " &&
129
+ charAfterTag !== "\t" &&
130
+ charAfterTag !== "\n" &&
131
+ charAfterTag !== "\r") {
132
+ pos = content.indexOf(tag, pos + tag.length);
133
+ continue;
57
134
  }
135
+ return pos;
136
+ }
137
+ return -1;
138
+ }
139
+ findNextTopLevelTag(content, tagName, startPos) {
140
+ let pos = startPos;
141
+ while (pos < content.length) {
142
+ const tagPos = this.findNextTag(content, tagName, pos);
143
+ if (tagPos === -1) {
144
+ return -1;
145
+ }
146
+ const isInsideTable = this.isPositionInsideTable(content, tagPos);
147
+ if (!isInsideTable) {
148
+ return tagPos;
149
+ }
150
+ pos = tagPos + 1;
151
+ }
152
+ return -1;
153
+ }
154
+ isPositionInsideTable(content, position) {
155
+ const beforeContent = content.substring(0, position);
156
+ const openTableTags = (beforeContent.match(/<w:tbl[\s>]/g) || []).length;
157
+ const closeTableTags = (beforeContent.match(/<\/w:tbl>/g) || []).length;
158
+ return openTableTags > closeTableTags;
159
+ }
160
+ extractSingleElement(content, tagName, startPos) {
161
+ const openTag = `<${tagName}`;
162
+ const closeTag = `</${tagName}>`;
163
+ const selfClosingEnd = "/>";
164
+ if (!content.substring(startPos).startsWith(openTag)) {
165
+ return "";
166
+ }
167
+ const openEnd = content.indexOf(">", startPos);
168
+ if (openEnd === -1) {
169
+ return "";
58
170
  }
59
- const tableElements = XMLParser_1.XMLParser.extractElements(bodyContent, 'w:tbl');
60
- for (const tableElement of tableElements) {
61
- try {
62
- const table = this.parseTable(tableElement, relationshipManager);
63
- if (table) {
64
- bodyElements.push(table);
171
+ if (content.substring(openEnd - 1, openEnd + 1) === selfClosingEnd) {
172
+ return content.substring(startPos, openEnd + 1);
173
+ }
174
+ let depth = 1;
175
+ let pos = openEnd + 1;
176
+ while (pos < content.length && depth > 0) {
177
+ const nextOpen = content.indexOf(openTag, pos);
178
+ const nextClose = content.indexOf(closeTag, pos);
179
+ if (nextClose === -1) {
180
+ return "";
181
+ }
182
+ if (nextOpen !== -1 && nextOpen < nextClose) {
183
+ const charAfter = content[nextOpen + openTag.length];
184
+ if (charAfter === ">" ||
185
+ charAfter === "/" ||
186
+ charAfter === " " ||
187
+ charAfter === "\t" ||
188
+ charAfter === "\n" ||
189
+ charAfter === "\r") {
190
+ depth++;
191
+ pos = nextOpen + openTag.length;
192
+ }
193
+ else {
194
+ pos = nextOpen + openTag.length;
65
195
  }
66
196
  }
67
- catch (error) {
68
- this.parseErrors.push({
69
- element: 'table',
70
- error: error instanceof Error ? error : new Error(String(error))
71
- });
72
- if (this.strictParsing) {
73
- throw error;
197
+ else {
198
+ depth--;
199
+ pos = nextClose + closeTag.length;
200
+ if (depth === 0) {
201
+ return content.substring(startPos, pos);
74
202
  }
75
203
  }
76
204
  }
77
- this.validateLoadedContent(bodyElements);
78
- return bodyElements;
205
+ return "";
79
206
  }
80
207
  validateLoadedContent(bodyElements) {
81
208
  const paragraphs = bodyElements.filter((el) => el instanceof Paragraph_1.Paragraph);
@@ -108,271 +235,158 @@ class DocumentParser {
108
235
  ` - Text content was stripped by another application\n` +
109
236
  ` - Encoding issues during document creation\n` +
110
237
  `Original document structure is preserved, but text may be lost.`);
111
- this.parseErrors.push({ element: 'document-validation', error: warning });
238
+ this.parseErrors.push({
239
+ element: "document-validation",
240
+ error: warning,
241
+ });
112
242
  console.warn(`\nDocXML Load Warning:\n${warning.message}\n`);
113
243
  }
114
244
  else if (emptyPercentage > 50 && emptyRuns > 5) {
115
245
  const warning = new Error(`Document has ${emptyRuns} out of ${totalRuns} runs (${emptyPercentage.toFixed(1)}%) with no text. ` +
116
246
  `This is higher than normal and may indicate partial data loss.`);
117
- this.parseErrors.push({ element: 'document-validation', error: warning });
247
+ this.parseErrors.push({
248
+ element: "document-validation",
249
+ error: warning,
250
+ });
118
251
  console.warn(`\nDocXML Load Warning:\n${warning.message}\n`);
119
252
  }
120
253
  }
121
254
  }
122
- parseParagraph(paraXml, relationshipManager) {
255
+ async parseParagraphFromObject(paraObj, relationshipManager, zipHandler, imageManager) {
123
256
  try {
124
257
  const paragraph = new Paragraph_1.Paragraph();
125
- this.parseParagraphProperties(paraXml, paragraph);
126
- const hyperlinkXmls = XMLParser_1.XMLParser.extractElements(paraXml, 'w:hyperlink');
127
- for (const hyperlinkXml of hyperlinkXmls) {
128
- const hyperlink = this.parseHyperlink(hyperlinkXml, relationshipManager);
129
- if (hyperlink) {
130
- paragraph.addHyperlink(hyperlink);
131
- }
258
+ const paraId = paraObj["w14:paraId"];
259
+ if (paraId) {
260
+ paragraph.formatting.paraId = paraId;
132
261
  }
133
- let paraXmlWithoutHyperlinks = paraXml;
134
- for (const hyperlinkXml of hyperlinkXmls) {
135
- paraXmlWithoutHyperlinks = paraXmlWithoutHyperlinks.replace(hyperlinkXml, '');
262
+ this.parseParagraphPropertiesFromObject(paraObj["w:pPr"], paragraph);
263
+ const runs = paraObj["w:r"];
264
+ const runChildren = Array.isArray(runs) ? runs : (runs ? [runs] : []);
265
+ for (const child of runChildren) {
266
+ if (child["w:drawing"]) {
267
+ if (zipHandler && imageManager) {
268
+ const imageRun = await this.parseDrawingFromObject(child["w:drawing"], zipHandler, relationshipManager, imageManager);
269
+ if (imageRun) {
270
+ paragraph.addRun(imageRun);
271
+ }
272
+ }
273
+ }
274
+ else {
275
+ const run = this.parseRunFromObject(child);
276
+ if (run) {
277
+ paragraph.addRun(run);
278
+ }
279
+ }
136
280
  }
137
- const runXmls = XMLParser_1.XMLParser.extractElements(paraXmlWithoutHyperlinks, 'w:r');
138
- for (const runXml of runXmls) {
139
- const run = this.parseRun(runXml);
140
- if (run) {
141
- paragraph.addRun(run);
281
+ const hyperlinks = paraObj["w:hyperlink"];
282
+ const hyperlinkChildren = Array.isArray(hyperlinks) ? hyperlinks : (hyperlinks ? [hyperlinks] : []);
283
+ for (const hyperlinkObj of hyperlinkChildren) {
284
+ const hyperlink = this.parseHyperlinkFromObject(hyperlinkObj, relationshipManager);
285
+ if (hyperlink) {
286
+ paragraph.addHyperlink(hyperlink);
142
287
  }
143
288
  }
144
289
  return paragraph;
145
290
  }
146
291
  catch (error) {
147
292
  const err = error instanceof Error ? error : new Error(String(error));
148
- this.parseErrors.push({ element: 'paragraph', error: err });
293
+ this.parseErrors.push({ element: "paragraph", error: err });
149
294
  if (this.strictParsing) {
150
295
  throw new Error(`Failed to parse paragraph: ${err.message}`);
151
296
  }
152
297
  return null;
153
298
  }
154
299
  }
155
- parseParagraphProperties(paraXml, paragraph) {
156
- const pPr = XMLParser_1.XMLParser.extractBetweenTags(paraXml, '<w:pPr', '</w:pPr>');
157
- if (!pPr) {
300
+ parseParagraphPropertiesFromObject(pPrObj, paragraph) {
301
+ if (!pPrObj)
158
302
  return;
303
+ if (pPrObj["w:jc"]?.["@_w:val"]) {
304
+ paragraph.setAlignment(pPrObj["w:jc"]["@_w:val"]);
159
305
  }
160
- const jcElements = XMLParser_1.XMLParser.extractElements(pPr, 'w:jc');
161
- if (jcElements.length > 0) {
162
- const value = XMLParser_1.XMLParser.extractAttribute(jcElements[0], 'w:val');
163
- if (value) {
164
- const validAlignments = ['left', 'center', 'right', 'justify'];
165
- if (validAlignments.includes(value)) {
166
- paragraph.setAlignment(value);
167
- }
168
- }
306
+ if (pPrObj["w:pStyle"]?.["@_w:val"]) {
307
+ paragraph.setStyle(pPrObj["w:pStyle"]["@_w:val"]);
169
308
  }
170
- const styleElements = XMLParser_1.XMLParser.extractElements(pPr, 'w:pStyle');
171
- if (styleElements.length > 0) {
172
- const styleId = XMLParser_1.XMLParser.extractAttribute(styleElements[0], 'w:val');
173
- if (styleId) {
174
- paragraph.setStyle(styleId);
175
- }
176
- }
177
- const indElements = XMLParser_1.XMLParser.extractElements(pPr, 'w:ind');
178
- if (indElements.length > 0) {
179
- const indElement = indElements[0];
180
- const left = XMLParser_1.XMLParser.extractAttribute(indElement, 'w:left');
181
- const right = XMLParser_1.XMLParser.extractAttribute(indElement, 'w:right');
182
- const firstLine = XMLParser_1.XMLParser.extractAttribute(indElement, 'w:firstLine');
183
- if (left) {
184
- paragraph.setLeftIndent(parseInt(left, 10));
185
- }
186
- if (right) {
187
- paragraph.setRightIndent(parseInt(right, 10));
188
- }
189
- if (firstLine) {
190
- paragraph.setFirstLineIndent(parseInt(firstLine, 10));
191
- }
192
- }
193
- const spacingElements = XMLParser_1.XMLParser.extractElements(pPr, 'w:spacing');
194
- if (spacingElements.length > 0) {
195
- const spacingElement = spacingElements[0];
196
- const before = XMLParser_1.XMLParser.extractAttribute(spacingElement, 'w:before');
197
- const after = XMLParser_1.XMLParser.extractAttribute(spacingElement, 'w:after');
198
- const line = XMLParser_1.XMLParser.extractAttribute(spacingElement, 'w:line');
199
- const lineRule = XMLParser_1.XMLParser.extractAttribute(spacingElement, 'w:lineRule');
200
- if (before) {
201
- paragraph.setSpaceBefore(parseInt(before, 10));
202
- }
203
- if (after) {
204
- paragraph.setSpaceAfter(parseInt(after, 10));
205
- }
206
- if (line) {
207
- let validatedLineRule;
208
- if (lineRule) {
209
- const validLineRules = ['auto', 'exact', 'atLeast'];
210
- if (validLineRules.includes(lineRule)) {
211
- validatedLineRule = lineRule;
212
- }
213
- }
214
- paragraph.setLineSpacing(parseInt(line, 10), validatedLineRule);
309
+ if (pPrObj["w:ind"]) {
310
+ const ind = pPrObj["w:ind"];
311
+ if (ind["@_w:left"])
312
+ paragraph.setLeftIndent(parseInt(ind["@_w:left"], 10));
313
+ if (ind["@_w:right"])
314
+ paragraph.setRightIndent(parseInt(ind["@_w:right"], 10));
315
+ if (ind["@_w:firstLine"])
316
+ paragraph.setFirstLineIndent(parseInt(ind["@_w:firstLine"], 10));
317
+ }
318
+ if (pPrObj["w:spacing"]) {
319
+ const spacing = pPrObj["w:spacing"];
320
+ if (spacing["@_w:before"])
321
+ paragraph.setSpaceBefore(parseInt(spacing["@_w:before"], 10));
322
+ if (spacing["@_w:after"])
323
+ paragraph.setSpaceAfter(parseInt(spacing["@_w:after"], 10));
324
+ if (spacing["@_w:line"]) {
325
+ paragraph.setLineSpacing(parseInt(spacing["@_w:line"], 10), spacing["@_w:lineRule"]);
215
326
  }
216
327
  }
217
- if (XMLParser_1.XMLParser.hasSelfClosingTag(pPr, 'w:keepNext'))
328
+ if (pPrObj["w:pageBreakBefore"])
329
+ paragraph.formatting.pageBreakBefore = true;
330
+ if (pPrObj["w:keepNext"])
218
331
  paragraph.setKeepNext(true);
219
- if (XMLParser_1.XMLParser.hasSelfClosingTag(pPr, 'w:keepLines'))
332
+ if (pPrObj["w:keepLines"])
220
333
  paragraph.setKeepLines(true);
221
- if (XMLParser_1.XMLParser.hasSelfClosingTag(pPr, 'w:pageBreakBefore'))
222
- paragraph.setPageBreakBefore(true);
223
- if (XMLParser_1.XMLParser.hasSelfClosingTag(pPr, 'w:contextualSpacing')) {
334
+ if (pPrObj["w:contextualSpacing"])
224
335
  paragraph.setContextualSpacing(true);
336
+ if (pPrObj["w:numPr"]) {
337
+ const numPr = pPrObj["w:numPr"];
338
+ const numId = numPr["w:numId"]?.["@_w:val"];
339
+ const ilvl = numPr["w:ilvl"]?.["@_w:val"] || "0";
340
+ if (numId) {
341
+ paragraph.setNumbering(parseInt(numId, 10), parseInt(ilvl, 10));
342
+ }
225
343
  }
226
344
  }
227
- parseRun(runXml) {
345
+ parseRunFromObject(runObj) {
228
346
  try {
229
- const text = XMLBuilder_1.XMLBuilder.unescapeXml(XMLParser_1.XMLParser.extractText(runXml));
230
- const run = new Run_1.Run(text);
231
- this.parseRunProperties(runXml, run);
347
+ const textElement = runObj["w:t"];
348
+ let text = (typeof textElement === 'object' && textElement !== null)
349
+ ? (textElement["#text"] || "")
350
+ : (textElement || "");
351
+ text = XMLBuilder_1.XMLBuilder.unescapeXml(text);
352
+ const run = new Run_1.Run(text, { cleanXmlFromText: false });
353
+ this.parseRunPropertiesFromObject(runObj["w:rPr"], run);
232
354
  return run;
233
355
  }
234
356
  catch (error) {
235
- const err = error instanceof Error ? error : new Error(String(error));
236
- this.parseErrors.push({ element: 'run', error: err });
237
- if (this.strictParsing) {
238
- throw new Error(`Failed to parse run: ${err.message}`);
239
- }
240
357
  return null;
241
358
  }
242
359
  }
243
- parseRunProperties(runXml, run) {
244
- const rPr = XMLParser_1.XMLParser.extractBetweenTags(runXml, '<w:rPr', '</w:rPr>');
245
- if (!rPr) {
246
- return;
247
- }
248
- if (XMLParser_1.XMLParser.hasSelfClosingTag(rPr, 'w:b')) {
249
- run.setBold(true);
250
- }
251
- if (XMLParser_1.XMLParser.hasSelfClosingTag(rPr, 'w:i')) {
252
- run.setItalic(true);
253
- }
254
- const underlineElements = XMLParser_1.XMLParser.extractElements(rPr, 'w:u');
255
- if (underlineElements.length > 0) {
256
- const value = XMLParser_1.XMLParser.extractAttribute(underlineElements[0], 'w:val');
257
- if (value) {
258
- const validUnderlineStyles = [
259
- 'single',
260
- 'double',
261
- 'thick',
262
- 'dotted',
263
- 'dash',
264
- 'dotDash',
265
- 'dotDotDash',
266
- 'wave',
267
- ];
268
- if (validUnderlineStyles.includes(value) ||
269
- value === 'true' ||
270
- value === 'false') {
271
- const underlineStyle = value;
272
- run.setUnderline(underlineStyle);
273
- }
274
- }
275
- else {
276
- run.setUnderline(true);
277
- }
278
- }
279
- if (XMLParser_1.XMLParser.hasSelfClosingTag(rPr, 'w:strike')) {
280
- run.setStrike(true);
281
- }
282
- const vertAlignElements = XMLParser_1.XMLParser.extractElements(rPr, 'w:vertAlign');
283
- if (vertAlignElements.length > 0) {
284
- const value = XMLParser_1.XMLParser.extractAttribute(vertAlignElements[0], 'w:val');
285
- if (value === 'subscript') {
286
- run.setSubscript(true);
287
- }
288
- else if (value === 'superscript') {
289
- run.setSuperscript(true);
290
- }
291
- }
292
- const fontElements = XMLParser_1.XMLParser.extractElements(rPr, 'w:rFonts');
293
- if (fontElements.length > 0) {
294
- const fontName = XMLParser_1.XMLParser.extractAttribute(fontElements[0], 'w:ascii');
295
- if (fontName) {
296
- run.setFont(fontName);
297
- }
298
- }
299
- const sizeElements = XMLParser_1.XMLParser.extractElements(rPr, 'w:sz');
300
- if (sizeElements.length > 0) {
301
- const halfPoints = XMLParser_1.XMLParser.extractAttribute(sizeElements[0], 'w:val');
302
- if (halfPoints) {
303
- run.setSize(parseInt(halfPoints, 10) / 2);
304
- }
305
- }
306
- const colorElements = XMLParser_1.XMLParser.extractElements(rPr, 'w:color');
307
- if (colorElements.length > 0) {
308
- const colorValue = XMLParser_1.XMLParser.extractAttribute(colorElements[0], 'w:val');
309
- if (colorValue) {
310
- run.setColor(colorValue);
311
- }
312
- }
313
- const highlightElements = XMLParser_1.XMLParser.extractElements(rPr, 'w:highlight');
314
- if (highlightElements.length > 0) {
315
- const value = XMLParser_1.XMLParser.extractAttribute(highlightElements[0], 'w:val');
316
- if (value) {
317
- const validHighlightColors = [
318
- 'yellow',
319
- 'green',
320
- 'cyan',
321
- 'magenta',
322
- 'blue',
323
- 'red',
324
- 'darkBlue',
325
- 'darkCyan',
326
- 'darkGreen',
327
- 'darkMagenta',
328
- 'darkRed',
329
- 'darkYellow',
330
- 'darkGray',
331
- 'lightGray',
332
- 'black',
333
- 'white',
334
- ];
335
- if (validHighlightColors.includes(value)) {
336
- const highlightColor = value;
337
- run.setHighlight(highlightColor);
338
- }
339
- }
340
- }
341
- if (XMLParser_1.XMLParser.hasSelfClosingTag(rPr, 'w:smallCaps')) {
342
- run.setSmallCaps(true);
343
- }
344
- if (XMLParser_1.XMLParser.hasSelfClosingTag(rPr, 'w:caps')) {
345
- run.setAllCaps(true);
346
- }
347
- }
348
- parseHyperlink(hyperlinkXml, relationshipManager) {
360
+ parseHyperlinkFromObject(hyperlinkObj, relationshipManager) {
349
361
  try {
350
- const relationshipId = XMLParser_1.XMLParser.extractAttribute(hyperlinkXml, 'r:id');
351
- const anchor = XMLParser_1.XMLParser.extractAttribute(hyperlinkXml, 'w:anchor');
352
- const tooltip = XMLParser_1.XMLParser.extractAttribute(hyperlinkXml, 'w:tooltip');
353
- if (!relationshipId && !anchor) {
354
- return null;
355
- }
356
- const runXmls = XMLParser_1.XMLParser.extractElements(hyperlinkXml, 'w:r');
357
- let text = '';
358
- let formatting;
359
- for (const runXml of runXmls) {
360
- text += XMLBuilder_1.XMLBuilder.unescapeXml(XMLParser_1.XMLParser.extractText(runXml));
361
- if (!formatting) {
362
- const run = this.parseRun(runXml);
363
- if (run) {
364
- formatting = run.getFormatting();
365
- }
366
- }
362
+ const relationshipId = hyperlinkObj["@_r:id"];
363
+ const anchor = hyperlinkObj["@_w:anchor"];
364
+ const tooltip = hyperlinkObj["@_w:tooltip"];
365
+ const runs = hyperlinkObj["w:r"];
366
+ const runChildren = Array.isArray(runs) ? runs : (runs ? [runs] : []);
367
+ const text = runChildren
368
+ .map((runObj) => {
369
+ const textElement = runObj["w:t"];
370
+ let runText = (typeof textElement === 'object' && textElement !== null)
371
+ ? (textElement["#text"] || "")
372
+ : (textElement || "");
373
+ return XMLBuilder_1.XMLBuilder.unescapeXml(runText);
374
+ })
375
+ .join('');
376
+ let formatting = {};
377
+ if (runChildren.length > 0 && runChildren[0]["w:rPr"]) {
378
+ const tempRun = new Run_1.Run('');
379
+ this.parseRunPropertiesFromObject(runChildren[0]["w:rPr"], tempRun);
380
+ formatting = tempRun.getFormatting();
367
381
  }
368
382
  let url;
369
383
  if (relationshipId) {
370
384
  const relationship = relationshipManager.getRelationship(relationshipId);
371
- if (relationship && relationship.getType().includes('hyperlink')) {
385
+ if (relationship) {
372
386
  url = relationship.getTarget();
373
387
  }
374
388
  }
375
- return new Hyperlink_1.Hyperlink({
389
+ const hyperlink = new Hyperlink_1.Hyperlink({
376
390
  url,
377
391
  anchor,
378
392
  text: text || url || anchor || 'Link',
@@ -380,73 +394,112 @@ class DocumentParser {
380
394
  tooltip,
381
395
  relationshipId,
382
396
  });
397
+ return hyperlink;
383
398
  }
384
399
  catch (error) {
385
- const err = error instanceof Error ? error : new Error(String(error));
386
- this.parseErrors.push({ element: 'hyperlink', error: err });
387
- if (this.strictParsing) {
388
- throw new Error(`Failed to parse hyperlink: ${err.message}`);
389
- }
400
+ console.warn('[DocumentParser] Failed to parse hyperlink:', error);
390
401
  return null;
391
402
  }
392
403
  }
393
- parseTable(tableXml, relationshipManager) {
404
+ parseRunPropertiesFromObject(rPrObj, run) {
405
+ if (!rPrObj)
406
+ return;
407
+ if (rPrObj["w:b"])
408
+ run.setBold(true);
409
+ if (rPrObj["w:i"])
410
+ run.setItalic(true);
411
+ if (rPrObj["w:strike"])
412
+ run.setStrike(true);
413
+ if (rPrObj["w:smallCaps"])
414
+ run.setSmallCaps(true);
415
+ if (rPrObj["w:caps"])
416
+ run.setAllCaps(true);
417
+ if (rPrObj["w:u"]) {
418
+ const uVal = rPrObj["w:u"]["@_w:val"];
419
+ run.setUnderline(uVal || true);
420
+ }
421
+ if (rPrObj["w:vertAlign"]) {
422
+ const val = rPrObj["w:vertAlign"]["@_w:val"];
423
+ if (val === "subscript")
424
+ run.setSubscript(true);
425
+ if (val === "superscript")
426
+ run.setSuperscript(true);
427
+ }
428
+ if (rPrObj["w:rFonts"]) {
429
+ run.setFont(rPrObj["w:rFonts"]["@_w:ascii"]);
430
+ }
431
+ if (rPrObj["w:sz"]) {
432
+ run.setSize(parseInt(rPrObj["w:sz"]["@_w:val"], 10) / 2);
433
+ }
434
+ if (rPrObj["w:color"]) {
435
+ run.setColor(rPrObj["w:color"]["@_w:val"]);
436
+ }
437
+ if (rPrObj["w:highlight"]) {
438
+ run.setHighlight(rPrObj["w:highlight"]["@_w:val"]);
439
+ }
440
+ }
441
+ async parseDrawingFromObject(_drawingObj, _zipHandler, _relationshipManager, _imageManager) {
442
+ return null;
443
+ }
444
+ async parseTableFromObject(tableObj, relationshipManager, zipHandler, imageManager) {
394
445
  try {
395
- const rowElements = XMLParser_1.XMLParser.extractElements(tableXml, 'w:tr');
396
- const rows = [];
397
- for (const rowElement of rowElements) {
398
- const cellElements = XMLParser_1.XMLParser.extractElements(rowElement, 'w:tc');
399
- const cells = [];
400
- for (const cellElement of cellElements) {
401
- cells.push(cellElement);
446
+ const table = new Table_1.Table();
447
+ const rows = tableObj["w:tr"];
448
+ const rowChildren = Array.isArray(rows) ? rows : (rows ? [rows] : []);
449
+ for (const rowObj of rowChildren) {
450
+ const row = await this.parseTableRowFromObject(rowObj, relationshipManager, zipHandler, imageManager);
451
+ if (row) {
452
+ table.addRow(row);
402
453
  }
403
- if (cells.length > 0) {
404
- rows.push(cells);
405
- }
406
- }
407
- if (rows.length === 0) {
408
- return null;
409
- }
410
- const firstRow = rows[0];
411
- if (!firstRow || firstRow.length === 0) {
412
- return null;
413
454
  }
414
- const colCount = firstRow.length;
415
- const rowCount = rows.length;
416
- const table = new Table_1.Table(rowCount, colCount);
417
- for (let rIdx = 0; rIdx < rows.length; rIdx++) {
418
- const row = table.getRows()[rIdx];
419
- if (!row)
420
- continue;
421
- const cells = row.getCells();
422
- const cellContents = rows[rIdx];
423
- if (!cellContents)
424
- continue;
425
- for (let cIdx = 0; cIdx < cellContents.length && cIdx < cells.length; cIdx++) {
426
- const cell = cells[cIdx];
427
- if (cell instanceof TableCell_1.TableCell) {
428
- const cellXml = cellContents[cIdx] || '';
429
- const paraElements = XMLParser_1.XMLParser.extractElements(cellXml, 'w:p');
430
- for (const paraElement of paraElements) {
431
- const para = this.parseParagraph(paraElement, relationshipManager);
432
- if (para) {
433
- cell.addParagraph(para);
434
- }
435
- }
436
- }
455
+ return table;
456
+ }
457
+ catch (error) {
458
+ console.warn('[DocumentParser] Failed to parse table:', error);
459
+ return null;
460
+ }
461
+ }
462
+ async parseTableRowFromObject(rowObj, relationshipManager, zipHandler, imageManager) {
463
+ try {
464
+ const row = new TableRow_1.TableRow();
465
+ const cells = rowObj["w:tc"];
466
+ const cellChildren = Array.isArray(cells) ? cells : (cells ? [cells] : []);
467
+ for (const cellObj of cellChildren) {
468
+ const cell = await this.parseTableCellFromObject(cellObj, relationshipManager, zipHandler, imageManager);
469
+ if (cell) {
470
+ row.addCell(cell);
437
471
  }
438
472
  }
439
- return table;
473
+ return row;
440
474
  }
441
475
  catch (error) {
442
- if (this.strictParsing) {
443
- throw error;
476
+ console.warn('[DocumentParser] Failed to parse table row:', error);
477
+ return null;
478
+ }
479
+ }
480
+ async parseTableCellFromObject(cellObj, relationshipManager, zipHandler, imageManager) {
481
+ try {
482
+ const cell = new TableCell_1.TableCell();
483
+ const paragraphs = cellObj["w:p"];
484
+ const paraChildren = Array.isArray(paragraphs) ? paragraphs : (paragraphs ? [paragraphs] : []);
485
+ for (const paraObj of paraChildren) {
486
+ const paragraph = await this.parseParagraphFromObject(paraObj, relationshipManager, zipHandler, imageManager);
487
+ if (paragraph) {
488
+ cell.addParagraph(paragraph);
489
+ }
444
490
  }
491
+ return cell;
492
+ }
493
+ catch (error) {
494
+ console.warn('[DocumentParser] Failed to parse table cell:', error);
445
495
  return null;
446
496
  }
447
497
  }
498
+ async parseSDTFromObject(_sdtObj, _relationshipManager, _zipHandler, _imageManager) {
499
+ return null;
500
+ }
448
501
  parseRelationships(zipHandler, relationshipManager) {
449
- const relsPath = 'word/_rels/document.xml.rels';
502
+ const relsPath = "word/_rels/document.xml.rels";
450
503
  const relsXml = zipHandler.getFileAsString(relsPath);
451
504
  if (relsXml) {
452
505
  return RelationshipManager_1.RelationshipManager.fromXml(relsXml);
@@ -463,38 +516,448 @@ class DocumentParser {
463
516
  return tagContent ? XMLBuilder_1.XMLBuilder.unescapeXml(tagContent) : undefined;
464
517
  };
465
518
  const properties = {
466
- title: extractTag(coreXml, 'dc:title'),
467
- subject: extractTag(coreXml, 'dc:subject'),
468
- creator: extractTag(coreXml, 'dc:creator'),
469
- keywords: extractTag(coreXml, 'cp:keywords'),
470
- description: extractTag(coreXml, 'dc:description'),
471
- lastModifiedBy: extractTag(coreXml, 'cp:lastModifiedBy'),
519
+ title: extractTag(coreXml, "dc:title"),
520
+ subject: extractTag(coreXml, "dc:subject"),
521
+ creator: extractTag(coreXml, "dc:creator"),
522
+ keywords: extractTag(coreXml, "cp:keywords"),
523
+ description: extractTag(coreXml, "dc:description"),
524
+ lastModifiedBy: extractTag(coreXml, "cp:lastModifiedBy"),
472
525
  };
473
- const revisionStr = extractTag(coreXml, 'cp:revision');
526
+ const revisionStr = extractTag(coreXml, "cp:revision");
474
527
  if (revisionStr) {
475
528
  properties.revision = parseInt(revisionStr, 10);
476
529
  }
477
- const createdStr = extractTag(coreXml, 'dcterms:created');
530
+ const createdStr = extractTag(coreXml, "dcterms:created");
478
531
  if (createdStr) {
479
532
  properties.created = new Date(createdStr);
480
533
  }
481
- const modifiedStr = extractTag(coreXml, 'dcterms:modified');
534
+ const modifiedStr = extractTag(coreXml, "dcterms:modified");
482
535
  if (modifiedStr) {
483
536
  properties.modified = new Date(modifiedStr);
484
537
  }
485
538
  return properties;
486
539
  }
540
+ parseStyles(zipHandler) {
541
+ const styles = [];
542
+ const stylesXml = zipHandler.getFileAsString(types_1.DOCX_PATHS.STYLES);
543
+ if (!stylesXml) {
544
+ return styles;
545
+ }
546
+ try {
547
+ const styleElements = XMLParser_1.XMLParser.extractElements(stylesXml, "w:style");
548
+ for (const styleXml of styleElements) {
549
+ try {
550
+ const style = this.parseStyle(styleXml);
551
+ if (style) {
552
+ styles.push(style);
553
+ }
554
+ }
555
+ catch (error) {
556
+ const err = error instanceof Error ? error : new Error(String(error));
557
+ this.parseErrors.push({ element: "style", error: err });
558
+ if (this.strictParsing) {
559
+ throw error;
560
+ }
561
+ }
562
+ }
563
+ }
564
+ catch (error) {
565
+ const err = error instanceof Error ? error : new Error(String(error));
566
+ this.parseErrors.push({ element: "styles.xml", error: err });
567
+ if (this.strictParsing) {
568
+ throw new Error(`Failed to parse styles.xml: ${err.message}`);
569
+ }
570
+ }
571
+ return styles;
572
+ }
573
+ parseNumbering(zipHandler) {
574
+ const abstractNumberings = [];
575
+ const numberingInstances = [];
576
+ const numberingXml = zipHandler.getFileAsString(types_1.DOCX_PATHS.NUMBERING);
577
+ if (!numberingXml) {
578
+ return { abstractNumberings, numberingInstances };
579
+ }
580
+ try {
581
+ const abstractNumElements = XMLParser_1.XMLParser.extractElements(numberingXml, "w:abstractNum");
582
+ for (const abstractNumXml of abstractNumElements) {
583
+ try {
584
+ const abstractNum = AbstractNumbering_1.AbstractNumbering.fromXML(abstractNumXml);
585
+ abstractNumberings.push(abstractNum);
586
+ }
587
+ catch (error) {
588
+ const err = error instanceof Error ? error : new Error(String(error));
589
+ this.parseErrors.push({ element: "abstractNum", error: err });
590
+ if (this.strictParsing) {
591
+ throw error;
592
+ }
593
+ }
594
+ }
595
+ const numElements = XMLParser_1.XMLParser.extractElements(numberingXml, "w:num");
596
+ for (const numXml of numElements) {
597
+ try {
598
+ const instance = NumberingInstance_1.NumberingInstance.fromXML(numXml);
599
+ numberingInstances.push(instance);
600
+ }
601
+ catch (error) {
602
+ const err = error instanceof Error ? error : new Error(String(error));
603
+ this.parseErrors.push({ element: "num", error: err });
604
+ if (this.strictParsing) {
605
+ throw error;
606
+ }
607
+ }
608
+ }
609
+ }
610
+ catch (error) {
611
+ const err = error instanceof Error ? error : new Error(String(error));
612
+ this.parseErrors.push({ element: "numbering.xml", error: err });
613
+ if (this.strictParsing) {
614
+ throw new Error(`Failed to parse numbering.xml: ${err.message}`);
615
+ }
616
+ }
617
+ return { abstractNumberings, numberingInstances };
618
+ }
619
+ parseSectionProperties(docXml) {
620
+ try {
621
+ const bodyElements = XMLParser_1.XMLParser.extractElements(docXml, "w:body");
622
+ if (bodyElements.length === 0) {
623
+ return null;
624
+ }
625
+ const bodyContent = bodyElements[0];
626
+ if (!bodyContent) {
627
+ return null;
628
+ }
629
+ const sectPrElements = XMLParser_1.XMLParser.extractElements(bodyContent, "w:sectPr");
630
+ if (sectPrElements.length === 0) {
631
+ return null;
632
+ }
633
+ const sectPr = sectPrElements[sectPrElements.length - 1];
634
+ if (!sectPr) {
635
+ return null;
636
+ }
637
+ const sectionProps = {};
638
+ const pgSzElements = XMLParser_1.XMLParser.extractElements(sectPr, "w:pgSz");
639
+ if (pgSzElements.length > 0) {
640
+ const pgSz = pgSzElements[0];
641
+ if (pgSz) {
642
+ const width = XMLParser_1.XMLParser.extractAttribute(pgSz, "w:w");
643
+ const height = XMLParser_1.XMLParser.extractAttribute(pgSz, "w:h");
644
+ const orient = XMLParser_1.XMLParser.extractAttribute(pgSz, "w:orient");
645
+ if (width && height) {
646
+ sectionProps.pageSize = {
647
+ width: parseInt(width, 10),
648
+ height: parseInt(height, 10),
649
+ orientation: orient === "landscape" ? "landscape" : "portrait",
650
+ };
651
+ }
652
+ }
653
+ }
654
+ const pgMarElements = XMLParser_1.XMLParser.extractElements(sectPr, "w:pgMar");
655
+ if (pgMarElements.length > 0) {
656
+ const pgMar = pgMarElements[0];
657
+ if (pgMar) {
658
+ const top = XMLParser_1.XMLParser.extractAttribute(pgMar, "w:top");
659
+ const bottom = XMLParser_1.XMLParser.extractAttribute(pgMar, "w:bottom");
660
+ const left = XMLParser_1.XMLParser.extractAttribute(pgMar, "w:left");
661
+ const right = XMLParser_1.XMLParser.extractAttribute(pgMar, "w:right");
662
+ const header = XMLParser_1.XMLParser.extractAttribute(pgMar, "w:header");
663
+ const footer = XMLParser_1.XMLParser.extractAttribute(pgMar, "w:footer");
664
+ const gutter = XMLParser_1.XMLParser.extractAttribute(pgMar, "w:gutter");
665
+ if (top && bottom && left && right) {
666
+ sectionProps.margins = {
667
+ top: parseInt(top, 10),
668
+ bottom: parseInt(bottom, 10),
669
+ left: parseInt(left, 10),
670
+ right: parseInt(right, 10),
671
+ header: header ? parseInt(header, 10) : undefined,
672
+ footer: footer ? parseInt(footer, 10) : undefined,
673
+ gutter: gutter ? parseInt(gutter, 10) : undefined,
674
+ };
675
+ }
676
+ }
677
+ }
678
+ const colsElements = XMLParser_1.XMLParser.extractElements(sectPr, "w:cols");
679
+ if (colsElements.length > 0) {
680
+ const cols = colsElements[0];
681
+ if (cols) {
682
+ const num = XMLParser_1.XMLParser.extractAttribute(cols, "w:num");
683
+ const space = XMLParser_1.XMLParser.extractAttribute(cols, "w:space");
684
+ const equalWidth = XMLParser_1.XMLParser.extractAttribute(cols, "w:equalWidth");
685
+ if (num) {
686
+ sectionProps.columns = {
687
+ count: parseInt(num, 10),
688
+ space: space ? parseInt(space, 10) : undefined,
689
+ equalWidth: equalWidth === "1" || equalWidth === "true",
690
+ };
691
+ }
692
+ }
693
+ }
694
+ const typeElements = XMLParser_1.XMLParser.extractElements(sectPr, "w:type");
695
+ if (typeElements.length > 0) {
696
+ const type = typeElements[0];
697
+ if (type) {
698
+ const typeVal = XMLParser_1.XMLParser.extractAttribute(type, "w:val");
699
+ if (typeVal) {
700
+ sectionProps.type = typeVal;
701
+ }
702
+ }
703
+ }
704
+ const pgNumTypeElements = XMLParser_1.XMLParser.extractElements(sectPr, "w:pgNumType");
705
+ if (pgNumTypeElements.length > 0) {
706
+ const pgNumType = pgNumTypeElements[0];
707
+ if (pgNumType) {
708
+ const start = XMLParser_1.XMLParser.extractAttribute(pgNumType, "w:start");
709
+ const fmt = XMLParser_1.XMLParser.extractAttribute(pgNumType, "w:fmt");
710
+ sectionProps.pageNumbering = {
711
+ start: start ? parseInt(start, 10) : undefined,
712
+ format: fmt,
713
+ };
714
+ }
715
+ }
716
+ if (XMLParser_1.XMLParser.hasSelfClosingTag(sectPr, "w:titlePg")) {
717
+ sectionProps.titlePage = true;
718
+ }
719
+ const headerRefs = XMLParser_1.XMLParser.extractElements(sectPr, "w:headerReference");
720
+ if (headerRefs.length > 0) {
721
+ sectionProps.headers = {};
722
+ for (const headerRef of headerRefs) {
723
+ const type = XMLParser_1.XMLParser.extractAttribute(headerRef, "w:type");
724
+ const rId = XMLParser_1.XMLParser.extractAttribute(headerRef, "r:id");
725
+ if (type && rId) {
726
+ if (type === "default")
727
+ sectionProps.headers.default = rId;
728
+ else if (type === "first")
729
+ sectionProps.headers.first = rId;
730
+ else if (type === "even")
731
+ sectionProps.headers.even = rId;
732
+ }
733
+ }
734
+ }
735
+ const footerRefs = XMLParser_1.XMLParser.extractElements(sectPr, "w:footerReference");
736
+ if (footerRefs.length > 0) {
737
+ sectionProps.footers = {};
738
+ for (const footerRef of footerRefs) {
739
+ const type = XMLParser_1.XMLParser.extractAttribute(footerRef, "w:type");
740
+ const rId = XMLParser_1.XMLParser.extractAttribute(footerRef, "r:id");
741
+ if (type && rId) {
742
+ if (type === "default")
743
+ sectionProps.footers.default = rId;
744
+ else if (type === "first")
745
+ sectionProps.footers.first = rId;
746
+ else if (type === "even")
747
+ sectionProps.footers.even = rId;
748
+ }
749
+ }
750
+ }
751
+ return new Section_1.Section(sectionProps);
752
+ }
753
+ catch (error) {
754
+ const err = error instanceof Error ? error : new Error(String(error));
755
+ this.parseErrors.push({ element: "sectPr", error: err });
756
+ if (this.strictParsing) {
757
+ throw new Error(`Failed to parse section properties: ${err.message}`);
758
+ }
759
+ return null;
760
+ }
761
+ }
762
+ parseStyle(styleXml) {
763
+ const typeAttr = XMLParser_1.XMLParser.extractAttribute(styleXml, "w:type");
764
+ const styleId = XMLParser_1.XMLParser.extractAttribute(styleXml, "w:styleId") || "";
765
+ const defaultAttr = XMLParser_1.XMLParser.extractAttribute(styleXml, "w:default");
766
+ const customStyleAttr = XMLParser_1.XMLParser.extractAttribute(styleXml, "w:customStyle");
767
+ if (!styleId || !typeAttr) {
768
+ return null;
769
+ }
770
+ const nameElement = XMLParser_1.XMLParser.extractBetweenTags(styleXml, "<w:name", "</w:name>");
771
+ const name = nameElement
772
+ ? XMLParser_1.XMLParser.extractAttribute(`<w:name${nameElement}`, "w:val") || styleId
773
+ : styleId;
774
+ const basedOnElement = XMLParser_1.XMLParser.extractBetweenTags(styleXml, "<w:basedOn", "</w:basedOn>");
775
+ const basedOn = basedOnElement
776
+ ? XMLParser_1.XMLParser.extractAttribute(`<w:basedOn${basedOnElement}`, "w:val")
777
+ : undefined;
778
+ const nextElement = XMLParser_1.XMLParser.extractBetweenTags(styleXml, "<w:next", "</w:next>");
779
+ const next = nextElement
780
+ ? XMLParser_1.XMLParser.extractAttribute(`<w:next${nextElement}`, "w:val")
781
+ : undefined;
782
+ let paragraphFormatting;
783
+ const pPrXml = XMLParser_1.XMLParser.extractBetweenTags(styleXml, "<w:pPr>", "</w:pPr>");
784
+ if (pPrXml) {
785
+ paragraphFormatting = this.parseParagraphFormattingFromXml(pPrXml);
786
+ }
787
+ let runFormatting;
788
+ const rPrXml = XMLParser_1.XMLParser.extractBetweenTags(styleXml, "<w:rPr>", "</w:rPr>");
789
+ if (rPrXml) {
790
+ runFormatting = this.parseRunFormattingFromXml(rPrXml);
791
+ }
792
+ const properties = {
793
+ styleId,
794
+ name,
795
+ type: typeAttr,
796
+ basedOn,
797
+ next,
798
+ isDefault: defaultAttr === "1" || defaultAttr === "true",
799
+ customStyle: customStyleAttr === "1" || customStyleAttr === "true",
800
+ paragraphFormatting,
801
+ runFormatting,
802
+ };
803
+ return Style_1.Style.create(properties);
804
+ }
805
+ parseParagraphFormattingFromXml(pPrXml) {
806
+ const formatting = {};
807
+ const jcElement = XMLParser_1.XMLParser.extractBetweenTags(pPrXml, "<w:jc", "/>");
808
+ if (jcElement) {
809
+ const alignment = XMLParser_1.XMLParser.extractAttribute(`<w:jc${jcElement}`, "w:val");
810
+ if (alignment) {
811
+ formatting.alignment = alignment;
812
+ }
813
+ }
814
+ const spacingElement = XMLParser_1.XMLParser.extractBetweenTags(pPrXml, "<w:spacing", "/>");
815
+ if (spacingElement) {
816
+ const before = XMLParser_1.XMLParser.extractAttribute(`<w:spacing${spacingElement}`, "w:before");
817
+ const after = XMLParser_1.XMLParser.extractAttribute(`<w:spacing${spacingElement}`, "w:after");
818
+ const line = XMLParser_1.XMLParser.extractAttribute(`<w:spacing${spacingElement}`, "w:line");
819
+ const lineRule = XMLParser_1.XMLParser.extractAttribute(`<w:spacing${spacingElement}`, "w:lineRule");
820
+ let validatedLineRule;
821
+ if (lineRule) {
822
+ const validLineRules = ["auto", "exact", "atLeast"];
823
+ if (validLineRules.includes(lineRule)) {
824
+ validatedLineRule = lineRule;
825
+ }
826
+ }
827
+ formatting.spacing = {
828
+ before: before ? parseInt(before, 10) : undefined,
829
+ after: after ? parseInt(after, 10) : undefined,
830
+ line: line ? parseInt(line, 10) : validatedLineRule ? 240 : undefined,
831
+ lineRule: validatedLineRule,
832
+ };
833
+ }
834
+ const indElement = XMLParser_1.XMLParser.extractBetweenTags(pPrXml, "<w:ind", "/>");
835
+ if (indElement) {
836
+ const left = XMLParser_1.XMLParser.extractAttribute(`<w:ind${indElement}`, "w:left");
837
+ const right = XMLParser_1.XMLParser.extractAttribute(`<w:ind${indElement}`, "w:right");
838
+ const firstLine = XMLParser_1.XMLParser.extractAttribute(`<w:ind${indElement}`, "w:firstLine");
839
+ const hanging = XMLParser_1.XMLParser.extractAttribute(`<w:ind${indElement}`, "w:hanging");
840
+ formatting.indentation = {
841
+ left: left ? parseInt(left, 10) : undefined,
842
+ right: right ? parseInt(right, 10) : undefined,
843
+ firstLine: firstLine ? parseInt(firstLine, 10) : undefined,
844
+ hanging: hanging ? parseInt(hanging, 10) : undefined,
845
+ };
846
+ }
847
+ if (pPrXml.includes("<w:keepNext/>") || pPrXml.includes("<w:keepNext ")) {
848
+ formatting.keepNext = true;
849
+ }
850
+ if (pPrXml.includes("<w:keepLines/>") || pPrXml.includes("<w:keepLines ")) {
851
+ formatting.keepLines = true;
852
+ }
853
+ if (pPrXml.includes("<w:pageBreakBefore/>") ||
854
+ pPrXml.includes("<w:pageBreakBefore ")) {
855
+ formatting.pageBreakBefore = true;
856
+ }
857
+ return formatting;
858
+ }
859
+ parseRunFormattingFromXml(rPrXml) {
860
+ const formatting = {};
861
+ if (rPrXml.includes("<w:b/>") || rPrXml.includes("<w:b ")) {
862
+ formatting.bold = true;
863
+ }
864
+ if (rPrXml.includes("<w:i/>") || rPrXml.includes("<w:i ")) {
865
+ formatting.italic = true;
866
+ }
867
+ if (rPrXml.includes("<w:strike/>") || rPrXml.includes("<w:strike ")) {
868
+ formatting.strike = true;
869
+ }
870
+ if (rPrXml.includes("<w:smallCaps/>") || rPrXml.includes("<w:smallCaps ")) {
871
+ formatting.smallCaps = true;
872
+ }
873
+ if (rPrXml.includes("<w:caps/>") || rPrXml.includes("<w:caps ")) {
874
+ formatting.allCaps = true;
875
+ }
876
+ const uElement = XMLParser_1.XMLParser.extractBetweenTags(rPrXml, "<w:u", "/>");
877
+ if (uElement) {
878
+ const uVal = XMLParser_1.XMLParser.extractAttribute(`<w:u${uElement}`, "w:val");
879
+ if (uVal === "single" ||
880
+ uVal === "double" ||
881
+ uVal === "thick" ||
882
+ uVal === "dotted" ||
883
+ uVal === "dash") {
884
+ formatting.underline = uVal;
885
+ }
886
+ else {
887
+ formatting.underline = true;
888
+ }
889
+ }
890
+ const vertAlignElement = XMLParser_1.XMLParser.extractBetweenTags(rPrXml, "<w:vertAlign", "/>");
891
+ if (vertAlignElement) {
892
+ const val = XMLParser_1.XMLParser.extractAttribute(`<w:vertAlign${vertAlignElement}`, "w:val");
893
+ if (val === "subscript") {
894
+ formatting.subscript = true;
895
+ }
896
+ else if (val === "superscript") {
897
+ formatting.superscript = true;
898
+ }
899
+ }
900
+ const rFontsElement = XMLParser_1.XMLParser.extractBetweenTags(rPrXml, "<w:rFonts", "/>");
901
+ if (rFontsElement) {
902
+ const ascii = XMLParser_1.XMLParser.extractAttribute(`<w:rFonts${rFontsElement}`, "w:ascii");
903
+ if (ascii) {
904
+ formatting.font = ascii;
905
+ }
906
+ }
907
+ const szElement = XMLParser_1.XMLParser.extractBetweenTags(rPrXml, "<w:sz", "/>");
908
+ if (szElement) {
909
+ const val = XMLParser_1.XMLParser.extractAttribute(`<w:sz${szElement}`, "w:val");
910
+ if (val) {
911
+ formatting.size = parseInt(val, 10) / 2;
912
+ }
913
+ }
914
+ const colorElement = XMLParser_1.XMLParser.extractBetweenTags(rPrXml, "<w:color", "/>");
915
+ if (colorElement) {
916
+ const val = XMLParser_1.XMLParser.extractAttribute(`<w:color${colorElement}`, "w:val");
917
+ if (val && val !== "auto") {
918
+ formatting.color = val;
919
+ }
920
+ }
921
+ const highlightElement = XMLParser_1.XMLParser.extractBetweenTags(rPrXml, "<w:highlight", "/>");
922
+ if (highlightElement) {
923
+ const val = XMLParser_1.XMLParser.extractAttribute(`<w:highlight${highlightElement}`, "w:val");
924
+ if (val) {
925
+ const validHighlights = [
926
+ "yellow",
927
+ "green",
928
+ "cyan",
929
+ "magenta",
930
+ "blue",
931
+ "red",
932
+ "darkBlue",
933
+ "darkCyan",
934
+ "darkGreen",
935
+ "darkMagenta",
936
+ "darkRed",
937
+ "darkYellow",
938
+ "darkGray",
939
+ "lightGray",
940
+ "black",
941
+ "white",
942
+ ];
943
+ if (validHighlights.includes(val)) {
944
+ formatting.highlight = val;
945
+ }
946
+ }
947
+ }
948
+ return formatting;
949
+ }
487
950
  static getRawXml(zipHandler, partName) {
488
951
  try {
489
952
  const file = zipHandler.getFile(partName);
490
953
  if (!file) {
491
954
  return null;
492
955
  }
493
- if (typeof file.content === 'string') {
956
+ if (typeof file.content === "string") {
494
957
  return file.content;
495
958
  }
496
959
  if (Buffer.isBuffer(file.content)) {
497
- return file.content.toString('utf8');
960
+ return file.content.toString("utf8");
498
961
  }
499
962
  return null;
500
963
  }
@@ -504,10 +967,12 @@ class DocumentParser {
504
967
  }
505
968
  static setRawXml(zipHandler, partName, xmlContent) {
506
969
  try {
507
- if (typeof xmlContent !== 'string') {
970
+ if (typeof xmlContent !== "string") {
508
971
  return false;
509
972
  }
510
- zipHandler.addFile(partName, Buffer.from(xmlContent, 'utf8'), { binary: true });
973
+ zipHandler.addFile(partName, Buffer.from(xmlContent, "utf8"), {
974
+ binary: true,
975
+ });
511
976
  return true;
512
977
  }
513
978
  catch (error) {
@@ -516,7 +981,7 @@ class DocumentParser {
516
981
  }
517
982
  static getRelationships(zipHandler, partName) {
518
983
  try {
519
- const lastSlash = partName.lastIndexOf('/');
984
+ const lastSlash = partName.lastIndexOf("/");
520
985
  const relsPath = lastSlash === -1
521
986
  ? `_rels/${partName}.rels`
522
987
  : `${partName.substring(0, lastSlash)}/_rels/${partName.substring(lastSlash + 1)}.rels`;
@@ -552,6 +1017,25 @@ class DocumentParser {
552
1017
  return [];
553
1018
  }
554
1019
  }
1020
+ parseNamespaces(docXml) {
1021
+ const namespaces = {};
1022
+ const docTagMatch = docXml.match(/<w:document([^>]+)>/);
1023
+ if (docTagMatch && docTagMatch[1]) {
1024
+ const attributes = docTagMatch[1];
1025
+ const nsPattern = /xmlns:([^=]+)="([^"]+)"/g;
1026
+ let match;
1027
+ while ((match = nsPattern.exec(attributes)) !== null) {
1028
+ if (match[1] && match[2]) {
1029
+ namespaces[match[1]] = match[2];
1030
+ }
1031
+ }
1032
+ const defaultNsMatch = attributes.match(/xmlns="([^"]+)"/);
1033
+ if (defaultNsMatch && defaultNsMatch[1]) {
1034
+ namespaces["xmlns"] = defaultNsMatch[1];
1035
+ }
1036
+ }
1037
+ return namespaces;
1038
+ }
555
1039
  }
556
1040
  exports.DocumentParser = DocumentParser;
557
1041
  //# sourceMappingURL=DocumentParser.js.map