docxmlater 10.1.4 → 10.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (372) hide show
  1. package/README.md +759 -754
  2. package/dist/constants/legacyCompatFlags.js +1 -1
  3. package/dist/constants/legacyCompatFlags.js.map +1 -1
  4. package/dist/constants/limits.js.map +1 -1
  5. package/dist/core/Document.d.ts +51 -50
  6. package/dist/core/Document.d.ts.map +1 -1
  7. package/dist/core/Document.js +486 -471
  8. package/dist/core/Document.js.map +1 -1
  9. package/dist/core/DocumentContent.d.ts +9 -9
  10. package/dist/core/DocumentContent.d.ts.map +1 -1
  11. package/dist/core/DocumentContent.js +1 -1
  12. package/dist/core/DocumentContent.js.map +1 -1
  13. package/dist/core/DocumentGenerator.d.ts +11 -11
  14. package/dist/core/DocumentGenerator.d.ts.map +1 -1
  15. package/dist/core/DocumentGenerator.js +251 -251
  16. package/dist/core/DocumentGenerator.js.map +1 -1
  17. package/dist/core/DocumentIdManager.js.map +1 -1
  18. package/dist/core/DocumentParser.d.ts +15 -15
  19. package/dist/core/DocumentParser.d.ts.map +1 -1
  20. package/dist/core/DocumentParser.js +2123 -2155
  21. package/dist/core/DocumentParser.js.map +1 -1
  22. package/dist/core/DocumentValidator.d.ts.map +1 -1
  23. package/dist/core/DocumentValidator.js +2 -5
  24. package/dist/core/DocumentValidator.js.map +1 -1
  25. package/dist/core/Relationship.js.map +1 -1
  26. package/dist/core/RelationshipManager.d.ts.map +1 -1
  27. package/dist/core/RelationshipManager.js +3 -3
  28. package/dist/core/RelationshipManager.js.map +1 -1
  29. package/dist/elements/AlternateContent.js.map +1 -1
  30. package/dist/elements/Bookmark.d.ts.map +1 -1
  31. package/dist/elements/Bookmark.js +3 -1
  32. package/dist/elements/Bookmark.js.map +1 -1
  33. package/dist/elements/BookmarkManager.d.ts.map +1 -1
  34. package/dist/elements/BookmarkManager.js.map +1 -1
  35. package/dist/elements/Comment.d.ts.map +1 -1
  36. package/dist/elements/Comment.js +9 -6
  37. package/dist/elements/Comment.js.map +1 -1
  38. package/dist/elements/CommentManager.d.ts.map +1 -1
  39. package/dist/elements/CommentManager.js +18 -17
  40. package/dist/elements/CommentManager.js.map +1 -1
  41. package/dist/elements/CommonTypes.d.ts +21 -21
  42. package/dist/elements/CommonTypes.d.ts.map +1 -1
  43. package/dist/elements/CommonTypes.js +56 -56
  44. package/dist/elements/CommonTypes.js.map +1 -1
  45. package/dist/elements/CustomXml.js.map +1 -1
  46. package/dist/elements/Endnote.d.ts.map +1 -1
  47. package/dist/elements/Endnote.js +6 -6
  48. package/dist/elements/Endnote.js.map +1 -1
  49. package/dist/elements/EndnoteManager.d.ts.map +1 -1
  50. package/dist/elements/EndnoteManager.js +6 -7
  51. package/dist/elements/EndnoteManager.js.map +1 -1
  52. package/dist/elements/Field.d.ts.map +1 -1
  53. package/dist/elements/Field.js +82 -25
  54. package/dist/elements/Field.js.map +1 -1
  55. package/dist/elements/FieldHelpers.d.ts.map +1 -1
  56. package/dist/elements/FieldHelpers.js.map +1 -1
  57. package/dist/elements/FontManager.d.ts.map +1 -1
  58. package/dist/elements/FontManager.js +1 -1
  59. package/dist/elements/FontManager.js.map +1 -1
  60. package/dist/elements/Footer.js +2 -2
  61. package/dist/elements/Footer.js.map +1 -1
  62. package/dist/elements/Footnote.d.ts.map +1 -1
  63. package/dist/elements/Footnote.js +6 -6
  64. package/dist/elements/Footnote.js.map +1 -1
  65. package/dist/elements/FootnoteManager.d.ts.map +1 -1
  66. package/dist/elements/FootnoteManager.js +6 -7
  67. package/dist/elements/FootnoteManager.js.map +1 -1
  68. package/dist/elements/Header.js +2 -2
  69. package/dist/elements/Header.js.map +1 -1
  70. package/dist/elements/HeaderFooterManager.js.map +1 -1
  71. package/dist/elements/Hyperlink.d.ts +5 -3
  72. package/dist/elements/Hyperlink.d.ts.map +1 -1
  73. package/dist/elements/Hyperlink.js +134 -76
  74. package/dist/elements/Hyperlink.js.map +1 -1
  75. package/dist/elements/Image.d.ts.map +1 -1
  76. package/dist/elements/Image.js +238 -106
  77. package/dist/elements/Image.js.map +1 -1
  78. package/dist/elements/ImageManager.d.ts.map +1 -1
  79. package/dist/elements/ImageManager.js +1 -1
  80. package/dist/elements/ImageManager.js.map +1 -1
  81. package/dist/elements/ImageRun.js +1 -1
  82. package/dist/elements/ImageRun.js.map +1 -1
  83. package/dist/elements/MathElement.js.map +1 -1
  84. package/dist/elements/Paragraph.d.ts +24 -24
  85. package/dist/elements/Paragraph.d.ts.map +1 -1
  86. package/dist/elements/Paragraph.js +181 -188
  87. package/dist/elements/Paragraph.js.map +1 -1
  88. package/dist/elements/PreservedElement.js.map +1 -1
  89. package/dist/elements/PropertyChangeTypes.d.ts.map +1 -1
  90. package/dist/elements/PropertyChangeTypes.js +6 -6
  91. package/dist/elements/PropertyChangeTypes.js.map +1 -1
  92. package/dist/elements/RangeMarker.d.ts.map +1 -1
  93. package/dist/elements/RangeMarker.js.map +1 -1
  94. package/dist/elements/Revision.d.ts.map +1 -1
  95. package/dist/elements/Revision.js +4 -5
  96. package/dist/elements/Revision.js.map +1 -1
  97. package/dist/elements/RevisionContent.js.map +1 -1
  98. package/dist/elements/RevisionManager.d.ts.map +1 -1
  99. package/dist/elements/RevisionManager.js +40 -48
  100. package/dist/elements/RevisionManager.js.map +1 -1
  101. package/dist/elements/Run.d.ts +16 -16
  102. package/dist/elements/Run.d.ts.map +1 -1
  103. package/dist/elements/Run.js +256 -238
  104. package/dist/elements/Run.js.map +1 -1
  105. package/dist/elements/Section.d.ts.map +1 -1
  106. package/dist/elements/Section.js +36 -11
  107. package/dist/elements/Section.js.map +1 -1
  108. package/dist/elements/Shape.d.ts.map +1 -1
  109. package/dist/elements/Shape.js.map +1 -1
  110. package/dist/elements/StructuredDocumentTag.d.ts +6 -6
  111. package/dist/elements/StructuredDocumentTag.d.ts.map +1 -1
  112. package/dist/elements/StructuredDocumentTag.js +99 -104
  113. package/dist/elements/StructuredDocumentTag.js.map +1 -1
  114. package/dist/elements/Table.d.ts +11 -11
  115. package/dist/elements/Table.d.ts.map +1 -1
  116. package/dist/elements/Table.js +102 -107
  117. package/dist/elements/Table.js.map +1 -1
  118. package/dist/elements/TableCell.d.ts +10 -10
  119. package/dist/elements/TableCell.d.ts.map +1 -1
  120. package/dist/elements/TableCell.js +105 -106
  121. package/dist/elements/TableCell.js.map +1 -1
  122. package/dist/elements/TableGridChange.d.ts.map +1 -1
  123. package/dist/elements/TableGridChange.js.map +1 -1
  124. package/dist/elements/TableOfContents.d.ts.map +1 -1
  125. package/dist/elements/TableOfContents.js +4 -4
  126. package/dist/elements/TableOfContents.js.map +1 -1
  127. package/dist/elements/TableOfContentsElement.js.map +1 -1
  128. package/dist/elements/TableRow.d.ts.map +1 -1
  129. package/dist/elements/TableRow.js +13 -6
  130. package/dist/elements/TableRow.js.map +1 -1
  131. package/dist/elements/TextBox.d.ts.map +1 -1
  132. package/dist/elements/TextBox.js +3 -5
  133. package/dist/elements/TextBox.js.map +1 -1
  134. package/dist/formatting/AbstractNumbering.d.ts +4 -4
  135. package/dist/formatting/AbstractNumbering.d.ts.map +1 -1
  136. package/dist/formatting/AbstractNumbering.js +54 -49
  137. package/dist/formatting/AbstractNumbering.js.map +1 -1
  138. package/dist/formatting/NumberingInstance.d.ts.map +1 -1
  139. package/dist/formatting/NumberingInstance.js +1 -3
  140. package/dist/formatting/NumberingInstance.js.map +1 -1
  141. package/dist/formatting/NumberingLevel.d.ts +5 -5
  142. package/dist/formatting/NumberingLevel.d.ts.map +1 -1
  143. package/dist/formatting/NumberingLevel.js +119 -125
  144. package/dist/formatting/NumberingLevel.js.map +1 -1
  145. package/dist/formatting/NumberingManager.d.ts +1 -0
  146. package/dist/formatting/NumberingManager.d.ts.map +1 -1
  147. package/dist/formatting/NumberingManager.js +27 -9
  148. package/dist/formatting/NumberingManager.js.map +1 -1
  149. package/dist/formatting/Style.d.ts +11 -11
  150. package/dist/formatting/Style.d.ts.map +1 -1
  151. package/dist/formatting/Style.js +219 -247
  152. package/dist/formatting/Style.js.map +1 -1
  153. package/dist/formatting/StylesManager.d.ts +2 -2
  154. package/dist/formatting/StylesManager.d.ts.map +1 -1
  155. package/dist/formatting/StylesManager.js +96 -102
  156. package/dist/formatting/StylesManager.js.map +1 -1
  157. package/dist/helpers/CleanupHelper.d.ts +1 -1
  158. package/dist/helpers/CleanupHelper.d.ts.map +1 -1
  159. package/dist/helpers/CleanupHelper.js +6 -6
  160. package/dist/helpers/CleanupHelper.js.map +1 -1
  161. package/dist/images/ImageOptimizer.js +7 -7
  162. package/dist/images/ImageOptimizer.js.map +1 -1
  163. package/dist/index.d.ts +9 -9
  164. package/dist/index.d.ts.map +1 -1
  165. package/dist/index.js.map +1 -1
  166. package/dist/managers/DrawingManager.js.map +1 -1
  167. package/dist/tracking/DocumentTrackingContext.d.ts.map +1 -1
  168. package/dist/tracking/DocumentTrackingContext.js +23 -7
  169. package/dist/tracking/DocumentTrackingContext.js.map +1 -1
  170. package/dist/tracking/TrackingContext.d.ts.map +1 -1
  171. package/dist/tracking/TrackingContext.js.map +1 -1
  172. package/dist/types/compatibility-types.js.map +1 -1
  173. package/dist/types/formatting.js.map +1 -1
  174. package/dist/types/list-types.d.ts +6 -6
  175. package/dist/types/list-types.js.map +1 -1
  176. package/dist/types/settings-types.js.map +1 -1
  177. package/dist/types/styleConfig.d.ts +2 -2
  178. package/dist/types/styleConfig.js.map +1 -1
  179. package/dist/utils/ChangelogGenerator.d.ts.map +1 -1
  180. package/dist/utils/ChangelogGenerator.js +97 -101
  181. package/dist/utils/ChangelogGenerator.js.map +1 -1
  182. package/dist/utils/CompatibilityUpgrader.d.ts.map +1 -1
  183. package/dist/utils/CompatibilityUpgrader.js +1 -1
  184. package/dist/utils/CompatibilityUpgrader.js.map +1 -1
  185. package/dist/utils/InMemoryRevisionAcceptor.d.ts.map +1 -1
  186. package/dist/utils/InMemoryRevisionAcceptor.js +1 -6
  187. package/dist/utils/InMemoryRevisionAcceptor.js.map +1 -1
  188. package/dist/utils/MoveOperationHelper.d.ts.map +1 -1
  189. package/dist/utils/MoveOperationHelper.js +1 -1
  190. package/dist/utils/MoveOperationHelper.js.map +1 -1
  191. package/dist/utils/RevisionAwareProcessor.d.ts.map +1 -1
  192. package/dist/utils/RevisionAwareProcessor.js +2 -4
  193. package/dist/utils/RevisionAwareProcessor.js.map +1 -1
  194. package/dist/utils/RevisionWalker.d.ts.map +1 -1
  195. package/dist/utils/RevisionWalker.js +4 -12
  196. package/dist/utils/RevisionWalker.js.map +1 -1
  197. package/dist/utils/SelectiveRevisionAcceptor.d.ts.map +1 -1
  198. package/dist/utils/SelectiveRevisionAcceptor.js +2 -6
  199. package/dist/utils/SelectiveRevisionAcceptor.js.map +1 -1
  200. package/dist/utils/ShadingResolver.d.ts.map +1 -1
  201. package/dist/utils/ShadingResolver.js +1 -1
  202. package/dist/utils/ShadingResolver.js.map +1 -1
  203. package/dist/utils/acceptRevisions.d.ts.map +1 -1
  204. package/dist/utils/acceptRevisions.js +23 -12
  205. package/dist/utils/acceptRevisions.js.map +1 -1
  206. package/dist/utils/cnfStyleDecoder.d.ts +1 -1
  207. package/dist/utils/cnfStyleDecoder.d.ts.map +1 -1
  208. package/dist/utils/cnfStyleDecoder.js +40 -40
  209. package/dist/utils/cnfStyleDecoder.js.map +1 -1
  210. package/dist/utils/corruptionDetection.d.ts.map +1 -1
  211. package/dist/utils/corruptionDetection.js.map +1 -1
  212. package/dist/utils/dateFormatting.js.map +1 -1
  213. package/dist/utils/deepClone.js +1 -1
  214. package/dist/utils/deepClone.js.map +1 -1
  215. package/dist/utils/diagnostics.d.ts.map +1 -1
  216. package/dist/utils/diagnostics.js +1 -1
  217. package/dist/utils/diagnostics.js.map +1 -1
  218. package/dist/utils/errorHandling.js.map +1 -1
  219. package/dist/utils/formatting.d.ts.map +1 -1
  220. package/dist/utils/formatting.js +10 -2
  221. package/dist/utils/formatting.js.map +1 -1
  222. package/dist/utils/list-detection.d.ts +2 -2
  223. package/dist/utils/list-detection.d.ts.map +1 -1
  224. package/dist/utils/list-detection.js +21 -23
  225. package/dist/utils/list-detection.js.map +1 -1
  226. package/dist/utils/logger.d.ts.map +1 -1
  227. package/dist/utils/logger.js +12 -7
  228. package/dist/utils/logger.js.map +1 -1
  229. package/dist/utils/parsingHelpers.js.map +1 -1
  230. package/dist/utils/stripTrackedChanges.d.ts.map +1 -1
  231. package/dist/utils/stripTrackedChanges.js +3 -3
  232. package/dist/utils/stripTrackedChanges.js.map +1 -1
  233. package/dist/utils/textDiff.d.ts +1 -1
  234. package/dist/utils/textDiff.js +8 -8
  235. package/dist/utils/textDiff.js.map +1 -1
  236. package/dist/utils/units.js.map +1 -1
  237. package/dist/utils/validation.d.ts.map +1 -1
  238. package/dist/utils/validation.js +24 -7
  239. package/dist/utils/validation.js.map +1 -1
  240. package/dist/utils/xmlSanitization.d.ts.map +1 -1
  241. package/dist/utils/xmlSanitization.js +3 -3
  242. package/dist/utils/xmlSanitization.js.map +1 -1
  243. package/dist/validation/RevisionAutoFixer.d.ts.map +1 -1
  244. package/dist/validation/RevisionAutoFixer.js +5 -5
  245. package/dist/validation/RevisionAutoFixer.js.map +1 -1
  246. package/dist/validation/RevisionValidator.d.ts.map +1 -1
  247. package/dist/validation/RevisionValidator.js +7 -9
  248. package/dist/validation/RevisionValidator.js.map +1 -1
  249. package/dist/validation/ValidationRules.js +3 -3
  250. package/dist/validation/ValidationRules.js.map +1 -1
  251. package/dist/validation/index.js.map +1 -1
  252. package/dist/xml/XMLBuilder.d.ts +1 -1
  253. package/dist/xml/XMLBuilder.d.ts.map +1 -1
  254. package/dist/xml/XMLBuilder.js +98 -100
  255. package/dist/xml/XMLBuilder.js.map +1 -1
  256. package/dist/xml/XMLParser.d.ts.map +1 -1
  257. package/dist/xml/XMLParser.js +61 -66
  258. package/dist/xml/XMLParser.js.map +1 -1
  259. package/dist/zip/ZipHandler.d.ts.map +1 -1
  260. package/dist/zip/ZipHandler.js.map +1 -1
  261. package/dist/zip/ZipReader.d.ts.map +1 -1
  262. package/dist/zip/ZipReader.js +1 -3
  263. package/dist/zip/ZipReader.js.map +1 -1
  264. package/dist/zip/ZipWriter.d.ts +1 -1
  265. package/dist/zip/ZipWriter.d.ts.map +1 -1
  266. package/dist/zip/ZipWriter.js +28 -36
  267. package/dist/zip/ZipWriter.js.map +1 -1
  268. package/dist/zip/types.js +1 -1
  269. package/dist/zip/types.js.map +1 -1
  270. package/package.json +92 -92
  271. package/src/__tests__/helper-methods.test.ts +512 -512
  272. package/src/constants/legacyCompatFlags.ts +138 -138
  273. package/src/constants/limits.ts +50 -50
  274. package/src/core/Document.ts +1010 -1145
  275. package/src/core/DocumentContent.ts +461 -467
  276. package/src/core/DocumentGenerator.ts +1133 -1104
  277. package/src/core/DocumentIdManager.ts +158 -158
  278. package/src/core/DocumentParser.ts +2347 -2716
  279. package/src/core/DocumentValidator.ts +363 -372
  280. package/src/core/Relationship.ts +367 -367
  281. package/src/core/RelationshipManager.ts +429 -428
  282. package/src/elements/AlternateContent.ts +42 -42
  283. package/src/elements/Bookmark.ts +212 -210
  284. package/src/elements/BookmarkManager.ts +247 -250
  285. package/src/elements/Comment.ts +356 -359
  286. package/src/elements/CommentManager.ts +499 -502
  287. package/src/elements/CommonTypes.ts +524 -549
  288. package/src/elements/CustomXml.ts +36 -36
  289. package/src/elements/Endnote.ts +221 -217
  290. package/src/elements/EndnoteManager.ts +246 -249
  291. package/src/elements/Field.ts +1292 -1233
  292. package/src/elements/FieldHelpers.ts +329 -333
  293. package/src/elements/FontManager.ts +336 -339
  294. package/src/elements/Footer.ts +269 -269
  295. package/src/elements/Footnote.ts +221 -217
  296. package/src/elements/FootnoteManager.ts +246 -249
  297. package/src/elements/Header.ts +269 -269
  298. package/src/elements/HeaderFooterManager.ts +219 -219
  299. package/src/elements/Hyperlink.ts +1288 -1193
  300. package/src/elements/Image.ts +1982 -1756
  301. package/src/elements/ImageManager.ts +437 -432
  302. package/src/elements/ImageRun.ts +59 -59
  303. package/src/elements/MathElement.ts +65 -65
  304. package/src/elements/Paragraph.ts +4347 -4287
  305. package/src/elements/PreservedElement.ts +53 -53
  306. package/src/elements/PropertyChangeTypes.ts +458 -442
  307. package/src/elements/RangeMarker.ts +382 -400
  308. package/src/elements/Revision.ts +1198 -1217
  309. package/src/elements/RevisionContent.ts +73 -73
  310. package/src/elements/RevisionManager.ts +1070 -1070
  311. package/src/elements/Run.ts +3103 -3073
  312. package/src/elements/Section.ts +1521 -1421
  313. package/src/elements/Shape.ts +884 -873
  314. package/src/elements/StructuredDocumentTag.ts +1176 -1207
  315. package/src/elements/Table.ts +2468 -2524
  316. package/src/elements/TableCell.ts +1617 -1621
  317. package/src/elements/TableGridChange.ts +149 -151
  318. package/src/elements/TableOfContents.ts +701 -691
  319. package/src/elements/TableOfContentsElement.ts +89 -89
  320. package/src/elements/TableRow.ts +960 -929
  321. package/src/elements/TextBox.ts +766 -768
  322. package/src/formatting/AbstractNumbering.ts +580 -579
  323. package/src/formatting/NumberingInstance.ts +295 -299
  324. package/src/formatting/NumberingLevel.ts +981 -1040
  325. package/src/formatting/NumberingManager.ts +875 -827
  326. package/src/formatting/Style.ts +1785 -1879
  327. package/src/formatting/StylesManager.ts +1090 -1130
  328. package/src/helpers/CleanupHelper.ts +524 -524
  329. package/src/images/ImageOptimizer.ts +274 -274
  330. package/src/index.ts +559 -554
  331. package/src/managers/DrawingManager.ts +319 -319
  332. package/src/tracking/DocumentTrackingContext.ts +687 -674
  333. package/src/tracking/TrackingContext.ts +175 -173
  334. package/src/types/compatibility-types.ts +49 -49
  335. package/src/types/formatting.ts +210 -210
  336. package/src/types/list-types.ts +14 -14
  337. package/src/types/settings-types.ts +59 -59
  338. package/src/types/styleConfig.ts +189 -189
  339. package/src/utils/ChangelogGenerator.ts +1583 -1581
  340. package/src/utils/CompatibilityUpgrader.ts +235 -237
  341. package/src/utils/InMemoryRevisionAcceptor.ts +691 -696
  342. package/src/utils/MoveOperationHelper.ts +233 -238
  343. package/src/utils/RevisionAwareProcessor.ts +518 -526
  344. package/src/utils/RevisionWalker.ts +427 -457
  345. package/src/utils/SelectiveRevisionAcceptor.ts +662 -683
  346. package/src/utils/ShadingResolver.ts +105 -107
  347. package/src/utils/acceptRevisions.ts +723 -714
  348. package/src/utils/cnfStyleDecoder.ts +212 -217
  349. package/src/utils/corruptionDetection.ts +346 -345
  350. package/src/utils/dateFormatting.ts +20 -20
  351. package/src/utils/deepClone.ts +77 -78
  352. package/src/utils/diagnostics.ts +125 -129
  353. package/src/utils/errorHandling.ts +80 -80
  354. package/src/utils/formatting.ts +220 -213
  355. package/src/utils/list-detection.ts +32 -42
  356. package/src/utils/logger.ts +412 -404
  357. package/src/utils/parsingHelpers.ts +190 -190
  358. package/src/utils/stripTrackedChanges.ts +356 -353
  359. package/src/utils/textDiff.ts +100 -100
  360. package/src/utils/units.ts +421 -421
  361. package/src/utils/validation.ts +553 -542
  362. package/src/utils/xmlSanitization.ts +179 -182
  363. package/src/validation/RevisionAutoFixer.ts +541 -542
  364. package/src/validation/RevisionValidator.ts +470 -460
  365. package/src/validation/ValidationRules.ts +338 -338
  366. package/src/validation/index.ts +30 -30
  367. package/src/xml/XMLBuilder.ts +857 -871
  368. package/src/xml/XMLParser.ts +877 -919
  369. package/src/zip/ZipHandler.ts +629 -637
  370. package/src/zip/ZipReader.ts +295 -299
  371. package/src/zip/ZipWriter.ts +374 -390
  372. package/src/zip/types.ts +116 -116
@@ -1,919 +1,877 @@
1
- /**
2
- * XMLParser - Simple position-based XML parser
3
- * Avoids regex backtracking issues that can cause ReDoS attacks
4
- * Completes the DocXML framework (XMLBuilder + XMLParser)
5
- */
6
-
7
- import { getGlobalLogger, createScopedLogger, ILogger } from "../utils/logger";
8
- import { XMLBuilder } from "./XMLBuilder";
9
-
10
- // Create scoped logger for XMLParser operations
11
- function getLogger(): ILogger {
12
- return createScopedLogger(getGlobalLogger(), 'XMLParser');
13
- }
14
-
15
- /**
16
- * Default maximum nesting depth for XML parsing.
17
- * Prevents stack overflow on deeply nested documents.
18
- */
19
- export const DEFAULT_MAX_NESTING_DEPTH = 256;
20
-
21
- /**
22
- * Options for XML-to-object parsing
23
- */
24
- export interface ParseToObjectOptions {
25
- /** Ignore attributes (default: false) */
26
- ignoreAttributes?: boolean;
27
-
28
- /** Attribute name prefix (default: '@_') */
29
- attributeNamePrefix?: string;
30
-
31
- /** Text node property name (default: '#text') */
32
- textNodeName?: string;
33
-
34
- /** Remove namespace prefixes from element names (default: false) */
35
- ignoreNamespace?: boolean;
36
-
37
- /** Parse numeric attribute values (default: true) */
38
- parseAttributeValue?: boolean;
39
-
40
- /** Trim whitespace from text values (default: true) */
41
- trimValues?: boolean;
42
-
43
- /** Always return arrays for elements (default: false) */
44
- alwaysArray?: boolean;
45
-
46
- /** Maximum nesting depth (default: 256). Prevents stack overflow on deeply nested documents. */
47
- maxNestingDepth?: number;
48
- }
49
-
50
- /**
51
- * Parsed XML object structure
52
- * Can be a string, object, array, or nested structure
53
- */
54
- export type ParsedXMLValue =
55
- | string
56
- | number
57
- | boolean
58
- | ParsedXMLObject
59
- | ParsedXMLObject[]
60
- | null
61
- | undefined;
62
-
63
- /**
64
- * Parsed XML object with dynamic keys
65
- */
66
- export interface ParsedXMLObject {
67
- [key: string]: ParsedXMLValue;
68
- }
69
-
70
- /**
71
- * Internal structure for tracking parsed elements during parsing
72
- */
73
- interface ParsedElement {
74
- name: string;
75
- value: ParsedXMLValue;
76
- }
77
-
78
- /**
79
- * Simple XML parser using position-based parsing instead of regex
80
- * Prevents catastrophic backtracking (ReDoS attacks) by avoiding nested regex patterns
81
- */
82
- export class XMLParser {
83
- /**
84
- * Extracts the body content from a Word document XML
85
- * @param docXml - The complete document.xml content
86
- * @returns The body content, or empty string if not found
87
- */
88
- static extractBody(docXml: string): string {
89
- const startTag = "<w:body";
90
- const endTag = "</w:body>";
91
-
92
- const startIdx = docXml.indexOf(startTag);
93
- if (startIdx === -1) return "";
94
-
95
- // Find the closing > of opening tag
96
- const openEnd = docXml.indexOf(">", startIdx);
97
- if (openEnd === -1) return "";
98
-
99
- // Find matching closing tag
100
- const endIdx = docXml.indexOf(endTag, openEnd);
101
- if (endIdx === -1) return "";
102
-
103
- return docXml.substring(openEnd + 1, endIdx);
104
- }
105
-
106
- /**
107
- * Extracts all elements of a given type using position-based parsing
108
- * Handles nested tags correctly by tracking depth
109
- * @param xml - XML content to parse
110
- * @param tagName - Tag name to extract (e.g., 'w:p', 'w:r')
111
- * @returns Array of XML strings for each element
112
- */
113
- static extractElements(xml: string, tagName: string): string[] {
114
- const elements: string[] = [];
115
- const openTag = `<${tagName}`;
116
- const closeTag = `</${tagName}>`;
117
- const selfClosingEnd = "/>";
118
-
119
- let pos = 0;
120
- while (pos < xml.length) {
121
- const startIdx = xml.indexOf(openTag, pos);
122
- if (startIdx === -1) break;
123
-
124
- // Verify this is the exact tag (not a prefix match like <w:p matching <w:pPr>)
125
- // The character after the tag name must be either '>', '/', whitespace, or '=' (for attributes)
126
- const charAfterTag = xml[startIdx + openTag.length];
127
- if (
128
- charAfterTag &&
129
- charAfterTag !== ">" &&
130
- charAfterTag !== "/" &&
131
- charAfterTag !== " " &&
132
- charAfterTag !== "\t" &&
133
- charAfterTag !== "\n" &&
134
- charAfterTag !== "\r" &&
135
- charAfterTag !== "="
136
- ) {
137
- // This is a prefix match (e.g., <w:pPr> when looking for <w:p>), skip it (Issue #5)
138
- pos = startIdx + openTag.length;
139
- continue;
140
- }
141
-
142
- // Find the end of opening tag
143
- const openEnd = xml.indexOf(">", startIdx);
144
- if (openEnd === -1) break;
145
-
146
- // Check if self-closing
147
- if (xml.substring(openEnd - 1, openEnd + 1) === selfClosingEnd) {
148
- elements.push(xml.substring(startIdx, openEnd + 1));
149
- pos = openEnd + 1;
150
- continue;
151
- }
152
-
153
- // Find matching closing tag (handle nesting)
154
- let depth = 1;
155
- let searchPos = openEnd + 1;
156
-
157
- while (depth > 0 && searchPos < xml.length) {
158
- // Find next potential opening tag
159
- let nextOpen = -1;
160
- let openSearchPos = searchPos;
161
- while (true) {
162
- const candidateOpen = xml.indexOf(openTag, openSearchPos);
163
- if (candidateOpen === -1) {
164
- break;
165
- }
166
- // Verify it's an exact match (not a prefix)
167
- const charAfter = xml[candidateOpen + openTag.length];
168
- if (
169
- charAfter &&
170
- charAfter !== ">" &&
171
- charAfter !== "/" &&
172
- charAfter !== " " &&
173
- charAfter !== "\t" &&
174
- charAfter !== "\n" &&
175
- charAfter !== "\r"
176
- ) {
177
- // Prefix match, keep searching
178
- openSearchPos = candidateOpen + openTag.length;
179
- continue;
180
- }
181
- nextOpen = candidateOpen;
182
- break;
183
- }
184
-
185
- const nextClose = xml.indexOf(closeTag, searchPos);
186
-
187
- if (nextClose === -1) break;
188
-
189
- if (nextOpen !== -1 && nextOpen < nextClose) {
190
- depth++;
191
- searchPos = nextOpen + openTag.length;
192
- } else {
193
- depth--;
194
- if (depth === 0) {
195
- elements.push(xml.substring(startIdx, nextClose + closeTag.length));
196
- pos = nextClose + closeTag.length;
197
- } else {
198
- searchPos = nextClose + closeTag.length;
199
- }
200
- }
201
- }
202
-
203
- if (depth > 0) {
204
- // Unclosed tag - skip it
205
- pos = startIdx + openTag.length;
206
- }
207
- }
208
-
209
- return elements;
210
- }
211
-
212
- /**
213
- * Extracts attribute value from an XML string
214
- * @param xml - XML content
215
- * @param attributeName - Attribute name (e.g., 'w:val')
216
- * @returns Attribute value or undefined
217
- */
218
- static extractAttribute(
219
- xml: string,
220
- attributeName: string
221
- ): string | undefined {
222
- // Use simple indexOf for bounded string search (safe)
223
- const attrPattern = `${attributeName}="`;
224
- const startIdx = xml.indexOf(attrPattern);
225
- if (startIdx === -1) return undefined;
226
-
227
- const valueStart = startIdx + attrPattern.length;
228
- const valueEnd = xml.indexOf('"', valueStart);
229
- if (valueEnd === -1) return undefined;
230
-
231
- const rawValue = xml.substring(valueStart, valueEnd);
232
- // Unescape XML entities to get the actual value
233
- // This prevents double-escaping when the value is later re-serialized
234
- return XMLBuilder.unescapeXml(rawValue);
235
- }
236
-
237
- /**
238
- * Checks if an XML string contains a self-closing tag
239
- * @param xml - XML content
240
- * @param tagName - Tag name to check
241
- * @returns True if the tag exists as self-closing
242
- */
243
- static hasSelfClosingTag(xml: string, tagName: string): boolean {
244
- return xml.includes(`<${tagName}/>`) || xml.includes(`<${tagName} `);
245
- }
246
-
247
- /**
248
- * Checks if a boolean property tag is enabled (w:val="1" or w:val="true")
249
- * Per ECMA-376, boolean properties can be:
250
- * - Present with w:val="1" or w:val="true" (enabled)
251
- * - Present with w:val="0" or w:val="false" (explicitly disabled)
252
- * - Absent (disabled by default)
253
- *
254
- * @param xml - XML content to search
255
- * @param tagName - Tag name (e.g., 'w:keepNext')
256
- * @returns True if tag exists with w:val="1" or w:val="true", false otherwise
257
- *
258
- * @example
259
- * hasBooleanProperty('<w:pPr><w:keepNext w:val="1"/></w:pPr>', 'w:keepNext'); // true
260
- * hasBooleanProperty('<w:pPr><w:keepNext w:val="0"/></w:pPr>', 'w:keepNext'); // false
261
- * hasBooleanProperty('<w:pPr><w:spacing/></w:pPr>', 'w:keepNext'); // false
262
- */
263
- static hasBooleanProperty(xml: string, tagName: string): boolean {
264
- // Check for tag with w:val="1" or w:val="true"
265
- if (
266
- xml.includes(`<${tagName} w:val="1"`) ||
267
- xml.includes(`<${tagName} w:val="true"`)
268
- ) {
269
- return true;
270
- }
271
-
272
- // Check for self-closing tag without w:val attribute (means true per ECMA-376)
273
- // Example: <w:b/> means bold=true
274
- if (xml.includes(`<${tagName}/>`)) {
275
- return true;
276
- }
277
-
278
- return false;
279
- }
280
-
281
- /**
282
- * Extracts text content from within tags
283
- * Finds all <w:t>...</w:t> tags and extracts their text
284
- * @param xml - XML content
285
- * @returns Combined text content
286
- */
287
- static extractText(xml: string): string {
288
- const texts: string[] = [];
289
- const openTag = "<w:t";
290
- const closeTag = "</w:t>";
291
-
292
- let pos = 0;
293
- while (pos < xml.length) {
294
- const startIdx = xml.indexOf(openTag, pos);
295
- if (startIdx === -1) break;
296
-
297
- // Find the end of opening tag
298
- const openEnd = xml.indexOf(">", startIdx);
299
- if (openEnd === -1) break;
300
-
301
- // Find closing tag
302
- const closeIdx = xml.indexOf(closeTag, openEnd);
303
- if (closeIdx === -1) break;
304
-
305
- // Extract text between tags
306
- const text = xml.substring(openEnd + 1, closeIdx);
307
- texts.push(text);
308
-
309
- pos = closeIdx + closeTag.length;
310
- }
311
-
312
- return texts.join("");
313
- }
314
-
315
- /**
316
- * Validates input size to prevent excessive memory usage
317
- * @param xml - XML content
318
- * @param maxSize - Maximum size in bytes (default: 10MB)
319
- * @throws Error if XML exceeds max size
320
- */
321
- static validateSize(xml: string, maxSize: number = 10 * 1024 * 1024): void {
322
- if (xml.length > maxSize) {
323
- throw new Error(
324
- `XML content too large for parsing (${(
325
- xml.length /
326
- 1024 /
327
- 1024
328
- ).toFixed(1)}MB). ` +
329
- `Maximum allowed: ${(maxSize / 1024 / 1024).toFixed(0)}MB`
330
- );
331
- }
332
- }
333
-
334
- /**
335
- * Extracts content between two specific tags
336
- * More efficient than regex for large documents
337
- * @param xml - XML content
338
- * @param startTag - Opening tag (e.g., '<w:pPr')
339
- * @param endTag - Closing tag (e.g., '</w:pPr>')
340
- * @returns Content between tags, or undefined if not found
341
- */
342
- static extractBetweenTags(
343
- xml: string,
344
- startTag: string,
345
- endTag: string
346
- ): string | undefined {
347
- const startIdx = xml.indexOf(startTag);
348
- if (startIdx === -1) return undefined;
349
-
350
- // Find the end of the opening tag
351
- const openEnd = xml.indexOf(">", startIdx);
352
- if (openEnd === -1) return undefined;
353
-
354
- // Find the closing tag
355
- const endIdx = xml.indexOf(endTag, openEnd);
356
- if (endIdx === -1) return undefined;
357
-
358
- return xml.substring(openEnd + 1, endIdx);
359
- }
360
-
361
- /**
362
- * Extracts a complete self-closing tag with its attributes
363
- * Handles cases where multiple similar tags exist (e.g., <w:sz.../> and <w:szCs.../>)
364
- *
365
- * @param xml - XML string to search
366
- * @param tagName - Tag name to find (e.g., "w:color", "w:sz")
367
- * @returns The complete tag content (attributes portion) or undefined if not found
368
- *
369
- * @example
370
- * const xml = '<w:sz w:val="36"/><w:color w:val="FF0000"/>';
371
- * const colorTag = XMLParser.extractSelfClosingTag(xml, 'w:color');
372
- * // Returns: ' w:val="FF0000"'
373
- */
374
- static extractSelfClosingTag(
375
- xml: string,
376
- tagName: string
377
- ): string | undefined {
378
- const startPattern = `<${tagName}`;
379
- let searchPos = 0;
380
-
381
- // Search for the exact tag (not tags that start with this pattern)
382
- while (true) {
383
- const startIdx = xml.indexOf(startPattern, searchPos);
384
- if (startIdx === -1) return undefined;
385
-
386
- // Check what character follows the tag name
387
- const charAfterTag = xml[startIdx + startPattern.length];
388
-
389
- // Valid separators after tag name: space, '/', or '>'
390
- if (charAfterTag === ' ' || charAfterTag === '/' || charAfterTag === '>') {
391
- // Found the exact tag, now find its end
392
- const endIdx = xml.indexOf('/>', startIdx);
393
- if (endIdx === -1) {
394
- // Try finding a closing tag instead (non-self-closing)
395
- const closeTagStart = xml.indexOf('>', startIdx);
396
- if (closeTagStart === -1) return undefined;
397
-
398
- // Return attributes portion
399
- return xml.substring(startIdx + startPattern.length, closeTagStart);
400
- }
401
-
402
- // Return attributes portion (between tag name and />)
403
- return xml.substring(startIdx + startPattern.length, endIdx);
404
- }
405
-
406
- // Not the exact tag (e.g., found "w:sz" when looking for "w:s")
407
- // Continue searching
408
- searchPos = startIdx + 1;
409
- }
410
- }
411
-
412
- /**
413
- * Parse XML string to JavaScript object
414
- * Compatible with fast-xml-parser output format
415
- *
416
- * @param xml - XML string to parse
417
- * @param options - Parsing options
418
- * @returns Parsed JavaScript object
419
- *
420
- * @example
421
- * const xml = '<Relationships><Relationship Id="rId1" Target="https://example.com"/></Relationships>';
422
- * const obj = XMLParser.parseToObject(xml);
423
- * // Returns: { Relationships: { Relationship: { '@_Id': 'rId1', '@_Target': 'https://example.com' } } }
424
- *
425
- * @example
426
- * // Multiple elements become arrays
427
- * const xml = '<Items><Item id="1"/><Item id="2"/></Items>';
428
- * const obj = XMLParser.parseToObject(xml);
429
- * // Returns: { Items: { Item: [{ '@_id': '1' }, { '@_id': '2' }] } }
430
- */
431
- static parseToObject(
432
- xml: string,
433
- options?: ParseToObjectOptions
434
- ): ParsedXMLObject {
435
- const logger = getLogger();
436
- logger.debug('Parsing XML to object', { xmlSize: xml.length });
437
-
438
- // Default options
439
- const opts: Required<ParseToObjectOptions> = {
440
- ignoreAttributes: options?.ignoreAttributes ?? false,
441
- attributeNamePrefix: options?.attributeNamePrefix ?? "@_",
442
- textNodeName: options?.textNodeName ?? "#text",
443
- ignoreNamespace: options?.ignoreNamespace ?? false,
444
- parseAttributeValue: options?.parseAttributeValue ?? true,
445
- trimValues: options?.trimValues ?? true,
446
- alwaysArray: options?.alwaysArray ?? false,
447
- maxNestingDepth: options?.maxNestingDepth ?? DEFAULT_MAX_NESTING_DEPTH,
448
- };
449
-
450
- // Validate input size
451
- XMLParser.validateSize(xml);
452
-
453
- // Remove XML declaration and trim
454
- xml = xml.replace(/<\?xml[^>]*\?>\s*/g, "").trim();
455
-
456
- if (!xml) {
457
- return {};
458
- }
459
-
460
- // Parse root element (start at depth 0)
461
- const result = XMLParser.parseElementToObject(xml, 0, opts, 0);
462
- logger.debug('XML parsed to object');
463
- return result.value as ParsedXMLObject;
464
- }
465
-
466
- /**
467
- * Parses a single XML element into an object
468
- * @private
469
- */
470
- private static parseElementToObject(
471
- xml: string,
472
- startPos: number,
473
- options: Required<ParseToObjectOptions>,
474
- depth: number
475
- ): { value: ParsedXMLValue; endPos: number } {
476
- // Check nesting depth to prevent stack overflow
477
- if (depth > options.maxNestingDepth) {
478
- throw new Error(
479
- `XML nesting depth exceeds maximum of ${options.maxNestingDepth}. ` +
480
- `This may indicate malformed XML or an attack attempt. ` +
481
- `Use the maxNestingDepth option to increase the limit if needed.`
482
- );
483
- }
484
-
485
- // Find opening tag
486
- const openTagStart = xml.indexOf("<", startPos);
487
- if (openTagStart === -1) {
488
- return { value: {}, endPos: xml.length };
489
- }
490
-
491
- // Skip comments
492
- if (xml.substring(openTagStart, openTagStart + 4) === "<!--") {
493
- const commentEnd = xml.indexOf("-->", openTagStart + 4);
494
- if (commentEnd !== -1) {
495
- return XMLParser.parseElementToObject(xml, commentEnd + 3, options, depth);
496
- }
497
- return { value: {}, endPos: xml.length };
498
- }
499
-
500
- // Extract element name
501
- const nameMatch = /^([a-zA-Z0-9:_-]+)/.exec(xml
502
- .substring(openTagStart + 1));
503
- if (!nameMatch) {
504
- return { value: {}, endPos: openTagStart + 1 };
505
- }
506
-
507
- const originalElementName: string = nameMatch[1] || "";
508
- let elementName: string = originalElementName;
509
- const tagHeaderEnd = xml.indexOf(">", openTagStart);
510
- if (tagHeaderEnd === -1) {
511
- return { value: {}, endPos: xml.length };
512
- }
513
-
514
- // Remove namespace if requested (but keep original for offset calculations)
515
- if (options.ignoreNamespace && elementName.includes(":")) {
516
- elementName = elementName.split(":")[1] || elementName;
517
- }
518
-
519
- // Extract attributes using ORIGINAL element name length for correct offset
520
- const tagHeader = xml.substring(
521
- openTagStart + 1 + originalElementName.length,
522
- tagHeaderEnd
523
- );
524
- const attributes = XMLParser.extractAttributesFromTag(tagHeader, options);
525
-
526
- // Check if self-closing
527
- const isSelfClosing =
528
- tagHeader.trim().endsWith("/") || xml[tagHeaderEnd - 1] === "/";
529
-
530
- if (isSelfClosing) {
531
- // Self-closing tag - return object with attributes only
532
- const elementValue: ParsedXMLObject = { ...attributes };
533
- return {
534
- value: { [elementName]: elementValue },
535
- endPos: tagHeaderEnd + 1,
536
- };
537
- }
538
-
539
- // Find closing tag (use original name with namespace for correct matching)
540
- const closingTag = `</${originalElementName}>`;
541
- const contentStart = tagHeaderEnd + 1;
542
- const closingTagPos = XMLParser.findClosingTag(
543
- xml,
544
- originalElementName,
545
- contentStart
546
- );
547
-
548
- if (closingTagPos === -1) {
549
- // No closing tag found - treat as self-closing
550
- return {
551
- value: { [elementName]: { ...attributes } },
552
- endPos: tagHeaderEnd + 1,
553
- };
554
- }
555
-
556
- // Extract content between tags
557
- const content = xml.substring(contentStart, closingTagPos);
558
-
559
- // Parse content (children or text)
560
- const children: ParsedElement[] = [];
561
- let textContent = "";
562
- let pos = 0;
563
-
564
- while (pos < content.length) {
565
- const nextTag = content.indexOf("<", pos);
566
-
567
- if (nextTag === -1) {
568
- // No more tags - rest is text
569
- const text = content.substring(pos);
570
- // When trimValues is false, preserve whitespace-only text
571
- // When trimValues is true, only include text that has non-whitespace content
572
- if (text.length > 0 && (!options.trimValues || text.trim())) {
573
- // Unescape XML entities in text content (e.g., &lt; -> <)
574
- textContent += XMLBuilder.unescapeXml(text);
575
- }
576
- break;
577
- }
578
-
579
- // Collect text before next tag
580
- if (nextTag > pos) {
581
- const text = content.substring(pos, nextTag);
582
- // When trimValues is false, preserve whitespace-only text
583
- // When trimValues is true, only include text that has non-whitespace content
584
- if (text.length > 0 && (!options.trimValues || text.trim())) {
585
- // Unescape XML entities in text content (e.g., &lt; -> <)
586
- textContent += XMLBuilder.unescapeXml(text);
587
- }
588
- }
589
-
590
- // Parse child element (increment depth for children)
591
- const childResult = XMLParser.parseElementToObject(
592
- content,
593
- nextTag,
594
- options,
595
- depth + 1
596
- );
597
- const childObj = childResult.value as ParsedXMLObject;
598
-
599
- // Extract child name and value
600
- const childKeys = Object.keys(childObj);
601
- if (childKeys.length > 0) {
602
- const childName = childKeys[0];
603
- if (childName) {
604
- const childValue = childObj[childName];
605
- children.push({ name: childName, value: childValue });
606
- }
607
- }
608
-
609
- pos = childResult.endPos;
610
- }
611
-
612
- // Build element value
613
- let elementValue: ParsedXMLValue = {};
614
-
615
- // Add attributes
616
- if (!options.ignoreAttributes && Object.keys(attributes).length > 0) {
617
- elementValue = { ...attributes };
618
- }
619
-
620
- // Add text content
621
- // When trimValues is false, include whitespace-only text
622
- // When trimValues is true, only include text with non-whitespace content
623
- if (textContent.length > 0 && (!options.trimValues || textContent.trim())) {
624
- const text = options.trimValues ? textContent.trim() : textContent;
625
- if (typeof elementValue === "object" && !Array.isArray(elementValue)) {
626
- if (Object.keys(elementValue).length === 0) {
627
- // Only text, no attributes - return as direct value if simple
628
- elementValue = text;
629
- } else {
630
- // Text with attributes
631
- (elementValue)[options.textNodeName] = text;
632
- }
633
- }
634
- }
635
-
636
- // Add children
637
- if (children.length > 0) {
638
- const coalescedChildren = XMLParser.coalesceChildren(children, options);
639
- if (typeof elementValue === "object" && !Array.isArray(elementValue)) {
640
- elementValue = { ...elementValue, ...coalescedChildren };
641
- } else {
642
- elementValue = coalescedChildren;
643
- }
644
- }
645
-
646
- // If element has no content, attributes, or children - return empty object
647
- if (
648
- typeof elementValue === "object" &&
649
- !Array.isArray(elementValue) &&
650
- Object.keys(elementValue).length === 0
651
- ) {
652
- elementValue = {};
653
- }
654
-
655
- return {
656
- value: { [elementName]: elementValue },
657
- endPos: closingTagPos + closingTag.length,
658
- };
659
- }
660
-
661
- /**
662
- * Extracts attributes from a tag header
663
- * @private
664
- */
665
- private static extractAttributesFromTag(
666
- tagHeader: string,
667
- options: Required<ParseToObjectOptions>
668
- ): Record<string, string | number | boolean> {
669
- const attributes: Record<string, string | number | boolean> = {};
670
-
671
- if (options.ignoreAttributes) {
672
- return attributes;
673
- }
674
-
675
- // Simple attribute extraction using position-based parsing
676
- let pos = 0;
677
- while (pos < tagHeader.length) {
678
- // Skip whitespace
679
- while (pos < tagHeader.length) {
680
- const char = tagHeader[pos];
681
- if (char && /\s/.test(char)) {
682
- pos++;
683
- } else {
684
- break;
685
- }
686
- }
687
-
688
- if (pos >= tagHeader.length || tagHeader[pos] === "/") {
689
- break;
690
- }
691
-
692
- // Extract attribute name
693
- const nameStart = pos;
694
- while (pos < tagHeader.length) {
695
- const char = tagHeader[pos];
696
- if (char && /[a-zA-Z0-9:_-]/.test(char)) {
697
- pos++;
698
- } else {
699
- break;
700
- }
701
- }
702
-
703
- if (pos === nameStart) {
704
- break;
705
- }
706
-
707
- let attrName = tagHeader.substring(nameStart, pos);
708
-
709
- // Skip whitespace and '='
710
- while (pos < tagHeader.length) {
711
- const char = tagHeader[pos];
712
- if (char && /[\s=]/.test(char)) {
713
- pos++;
714
- } else {
715
- break;
716
- }
717
- }
718
-
719
- // Extract attribute value
720
- let attrValue = "";
721
- if (
722
- pos < tagHeader.length &&
723
- (tagHeader[pos] === '"' || tagHeader[pos] === "'")
724
- ) {
725
- const quote = tagHeader[pos];
726
- pos++; // Skip opening quote
727
- const valueStart = pos;
728
-
729
- while (pos < tagHeader.length && tagHeader[pos] !== quote) {
730
- pos++;
731
- }
732
-
733
- attrValue = tagHeader.substring(valueStart, pos);
734
- pos++; // Skip closing quote
735
- }
736
-
737
- // Remove namespace from attribute name if requested
738
- if (options.ignoreNamespace && attrName.includes(":")) {
739
- attrName = attrName.split(":")[1] || attrName;
740
- }
741
-
742
- // Add prefix to attribute name
743
- const prefixedName = options.attributeNamePrefix + attrName;
744
-
745
- // Parse attribute value
746
- attributes[prefixedName] = options.parseAttributeValue
747
- ? XMLParser.parseValue(attrValue)
748
- : attrValue;
749
- }
750
-
751
- return attributes;
752
- }
753
-
754
- /**
755
- * Finds the closing tag for an element, handling nesting
756
- * @private
757
- */
758
- private static findClosingTag(
759
- xml: string,
760
- elementName: string,
761
- startPos: number
762
- ): number {
763
- const openTag = `<${elementName}`;
764
- const closeTag = `</${elementName}>`;
765
- let depth = 1;
766
- let pos = startPos;
767
-
768
- while (depth > 0 && pos < xml.length) {
769
- const nextClose = xml.indexOf(closeTag, pos);
770
-
771
- if (nextClose === -1) {
772
- return -1; // No closing tag found
773
- }
774
-
775
- // Find the next REAL opening tag (not a prefix match like <w:pPrChange for <w:pPr)
776
- // Must search for all potential matches and verify each one
777
- let realOpenPos = -1;
778
- let searchPos = pos;
779
- while (searchPos < nextClose) {
780
- const candidateOpen = xml.indexOf(openTag, searchPos);
781
- if (candidateOpen === -1 || candidateOpen >= nextClose) {
782
- break; // No more candidates before the closing tag
783
- }
784
-
785
- const charAfter = xml[candidateOpen + openTag.length];
786
- if (
787
- charAfter === ">" ||
788
- charAfter === " " ||
789
- charAfter === "/" ||
790
- charAfter === "\t" ||
791
- charAfter === "\n" ||
792
- charAfter === "\r"
793
- ) {
794
- // This looks like a real opening tag - but check if it's self-closing
795
- // Self-closing tags like <w:rPr/> should NOT increase depth
796
- const tagEnd = xml.indexOf(">", candidateOpen);
797
- if (tagEnd !== -1 && xml[tagEnd - 1] === "/") {
798
- // Self-closing tag - skip it (don't affect depth)
799
- searchPos = tagEnd + 1;
800
- continue;
801
- }
802
- // This is a real opening tag (not self-closing)
803
- realOpenPos = candidateOpen;
804
- break;
805
- }
806
-
807
- // False positive (e.g., <w:pPrChange when looking for <w:pPr)
808
- // Keep searching from after this position
809
- searchPos = candidateOpen + openTag.length;
810
- }
811
-
812
- if (realOpenPos !== -1) {
813
- // Found a real opening tag before the closing tag - increase depth
814
- depth++;
815
- pos = realOpenPos + openTag.length;
816
- } else {
817
- // No real opening tag before this closing tag - decrease depth
818
- depth--;
819
- if (depth === 0) {
820
- return nextClose;
821
- }
822
- pos = nextClose + closeTag.length;
823
- }
824
- }
825
-
826
- return -1;
827
- }
828
-
829
- /**
830
- * Coalesces children with duplicate names into arrays
831
- * @private
832
- */
833
- private static coalesceChildren(
834
- children: ParsedElement[],
835
- options: Required<ParseToObjectOptions>
836
- ): ParsedXMLObject {
837
- const result: ParsedXMLObject = {};
838
- const nameCounts: Record<string, number> = {};
839
- const nameIndices: Record<string, number> = {};
840
-
841
- // Track element order for correct run content parsing (tabs, breaks, text)
842
- // This is critical for preserving the order of mixed content like: text -> tab -> text
843
- const orderedChildren: { type: string; index: number }[] = [];
844
-
845
- // Count occurrences of each child name
846
- for (const child of children) {
847
- nameCounts[child.name] = (nameCounts[child.name] || 0) + 1;
848
- }
849
-
850
- // Build result object while tracking order
851
- for (const child of children) {
852
- const shouldBeArray =
853
- options.alwaysArray || (nameCounts[child.name] || 0) > 1;
854
-
855
- // Track element order with its index in the array
856
- const currentIndex = nameIndices[child.name] || 0;
857
- orderedChildren.push({ type: child.name, index: currentIndex });
858
- nameIndices[child.name] = currentIndex + 1;
859
-
860
- if (shouldBeArray) {
861
- if (!result[child.name]) {
862
- result[child.name] = [];
863
- }
864
- (result[child.name] as ParsedXMLValue[]).push(child.value);
865
- } else {
866
- result[child.name] = child.value;
867
- }
868
- }
869
-
870
- // Add _orderedChildren to track element order (used by DocumentParser for runs)
871
- if (orderedChildren.length > 0) {
872
- result._orderedChildren = orderedChildren;
873
- }
874
-
875
- return result;
876
- }
877
-
878
- /**
879
- * Parses a string value to number or boolean if applicable
880
- * @private
881
- */
882
- private static parseValue(value: string): string | number | boolean {
883
- if (value === "true") return true;
884
- if (value === "false") return false;
885
-
886
- // Preserve 6-character hex color codes (OpenXML standard for colors)
887
- // This includes "000000" (black) which should stay as a string
888
- if (/^[0-9A-Fa-f]{6}$/.test(value)) {
889
- return value.toUpperCase(); // Normalize to uppercase per Microsoft convention
890
- }
891
-
892
- // Preserve long digit-only strings (e.g., cnfStyle binary strings like "100000000000")
893
- // These should not be converted to numbers to avoid losing leading zeros
894
- if (/^\d{7,}$/.test(value)) {
895
- return value; // Keep as string for values with 7+ digits
896
- }
897
-
898
- // Try parsing as number
899
- // 3-character values like "240" will be parsed as numbers
900
- // 6-character hex values are already handled above
901
- if (/^-?\d+$/.test(value)) {
902
- const num = parseInt(value, 10);
903
- if (!isNaN(num)) return num;
904
- }
905
-
906
- if (/^-?\d+\.\d+$/.test(value)) {
907
- const num = parseFloat(value);
908
- if (!isNaN(num)) return num;
909
- }
910
-
911
- // Preserve 3-character hex codes (like "F0A") that have letters
912
- // Pure numeric 3-char values (like "240") are already parsed as numbers above
913
- if (/^[0-9A-Fa-f]{3}$/.test(value) && /[A-Fa-f]/.test(value)) {
914
- return value.toUpperCase();
915
- }
916
-
917
- return value;
918
- }
919
- }
1
+ /**
2
+ * XMLParser - Simple position-based XML parser
3
+ * Avoids regex backtracking issues that can cause ReDoS attacks
4
+ * Completes the DocXML framework (XMLBuilder + XMLParser)
5
+ */
6
+
7
+ import { getGlobalLogger, createScopedLogger, ILogger } from '../utils/logger';
8
+ import { XMLBuilder } from './XMLBuilder';
9
+
10
+ // Create scoped logger for XMLParser operations
11
+ function getLogger(): ILogger {
12
+ return createScopedLogger(getGlobalLogger(), 'XMLParser');
13
+ }
14
+
15
+ /**
16
+ * Default maximum nesting depth for XML parsing.
17
+ * Prevents stack overflow on deeply nested documents.
18
+ */
19
+ export const DEFAULT_MAX_NESTING_DEPTH = 256;
20
+
21
+ /**
22
+ * Options for XML-to-object parsing
23
+ */
24
+ export interface ParseToObjectOptions {
25
+ /** Ignore attributes (default: false) */
26
+ ignoreAttributes?: boolean;
27
+
28
+ /** Attribute name prefix (default: '@_') */
29
+ attributeNamePrefix?: string;
30
+
31
+ /** Text node property name (default: '#text') */
32
+ textNodeName?: string;
33
+
34
+ /** Remove namespace prefixes from element names (default: false) */
35
+ ignoreNamespace?: boolean;
36
+
37
+ /** Parse numeric attribute values (default: true) */
38
+ parseAttributeValue?: boolean;
39
+
40
+ /** Trim whitespace from text values (default: true) */
41
+ trimValues?: boolean;
42
+
43
+ /** Always return arrays for elements (default: false) */
44
+ alwaysArray?: boolean;
45
+
46
+ /** Maximum nesting depth (default: 256). Prevents stack overflow on deeply nested documents. */
47
+ maxNestingDepth?: number;
48
+ }
49
+
50
+ /**
51
+ * Parsed XML object structure
52
+ * Can be a string, object, array, or nested structure
53
+ */
54
+ export type ParsedXMLValue =
55
+ | string
56
+ | number
57
+ | boolean
58
+ | ParsedXMLObject
59
+ | ParsedXMLObject[]
60
+ | null
61
+ | undefined;
62
+
63
+ /**
64
+ * Parsed XML object with dynamic keys
65
+ */
66
+ export interface ParsedXMLObject {
67
+ [key: string]: ParsedXMLValue;
68
+ }
69
+
70
+ /**
71
+ * Internal structure for tracking parsed elements during parsing
72
+ */
73
+ interface ParsedElement {
74
+ name: string;
75
+ value: ParsedXMLValue;
76
+ }
77
+
78
+ /**
79
+ * Simple XML parser using position-based parsing instead of regex
80
+ * Prevents catastrophic backtracking (ReDoS attacks) by avoiding nested regex patterns
81
+ */
82
+ export class XMLParser {
83
+ /**
84
+ * Extracts the body content from a Word document XML
85
+ * @param docXml - The complete document.xml content
86
+ * @returns The body content, or empty string if not found
87
+ */
88
+ static extractBody(docXml: string): string {
89
+ const startTag = '<w:body';
90
+ const endTag = '</w:body>';
91
+
92
+ const startIdx = docXml.indexOf(startTag);
93
+ if (startIdx === -1) return '';
94
+
95
+ // Find the closing > of opening tag
96
+ const openEnd = docXml.indexOf('>', startIdx);
97
+ if (openEnd === -1) return '';
98
+
99
+ // Find matching closing tag
100
+ const endIdx = docXml.indexOf(endTag, openEnd);
101
+ if (endIdx === -1) return '';
102
+
103
+ return docXml.substring(openEnd + 1, endIdx);
104
+ }
105
+
106
+ /**
107
+ * Extracts all elements of a given type using position-based parsing
108
+ * Handles nested tags correctly by tracking depth
109
+ * @param xml - XML content to parse
110
+ * @param tagName - Tag name to extract (e.g., 'w:p', 'w:r')
111
+ * @returns Array of XML strings for each element
112
+ */
113
+ static extractElements(xml: string, tagName: string): string[] {
114
+ const elements: string[] = [];
115
+ const openTag = `<${tagName}`;
116
+ const closeTag = `</${tagName}>`;
117
+ const selfClosingEnd = '/>';
118
+
119
+ let pos = 0;
120
+ while (pos < xml.length) {
121
+ const startIdx = xml.indexOf(openTag, pos);
122
+ if (startIdx === -1) break;
123
+
124
+ // Verify this is the exact tag (not a prefix match like <w:p matching <w:pPr>)
125
+ // The character after the tag name must be either '>', '/', whitespace, or '=' (for attributes)
126
+ const charAfterTag = xml[startIdx + openTag.length];
127
+ if (
128
+ charAfterTag &&
129
+ charAfterTag !== '>' &&
130
+ charAfterTag !== '/' &&
131
+ charAfterTag !== ' ' &&
132
+ charAfterTag !== '\t' &&
133
+ charAfterTag !== '\n' &&
134
+ charAfterTag !== '\r' &&
135
+ charAfterTag !== '='
136
+ ) {
137
+ // This is a prefix match (e.g., <w:pPr> when looking for <w:p>), skip it (Issue #5)
138
+ pos = startIdx + openTag.length;
139
+ continue;
140
+ }
141
+
142
+ // Find the end of opening tag
143
+ const openEnd = xml.indexOf('>', startIdx);
144
+ if (openEnd === -1) break;
145
+
146
+ // Check if self-closing
147
+ if (xml.substring(openEnd - 1, openEnd + 1) === selfClosingEnd) {
148
+ elements.push(xml.substring(startIdx, openEnd + 1));
149
+ pos = openEnd + 1;
150
+ continue;
151
+ }
152
+
153
+ // Find matching closing tag (handle nesting)
154
+ let depth = 1;
155
+ let searchPos = openEnd + 1;
156
+
157
+ while (depth > 0 && searchPos < xml.length) {
158
+ // Find next potential opening tag
159
+ let nextOpen = -1;
160
+ let openSearchPos = searchPos;
161
+ while (true) {
162
+ const candidateOpen = xml.indexOf(openTag, openSearchPos);
163
+ if (candidateOpen === -1) {
164
+ break;
165
+ }
166
+ // Verify it's an exact match (not a prefix)
167
+ const charAfter = xml[candidateOpen + openTag.length];
168
+ if (
169
+ charAfter &&
170
+ charAfter !== '>' &&
171
+ charAfter !== '/' &&
172
+ charAfter !== ' ' &&
173
+ charAfter !== '\t' &&
174
+ charAfter !== '\n' &&
175
+ charAfter !== '\r'
176
+ ) {
177
+ // Prefix match, keep searching
178
+ openSearchPos = candidateOpen + openTag.length;
179
+ continue;
180
+ }
181
+ nextOpen = candidateOpen;
182
+ break;
183
+ }
184
+
185
+ const nextClose = xml.indexOf(closeTag, searchPos);
186
+
187
+ if (nextClose === -1) break;
188
+
189
+ if (nextOpen !== -1 && nextOpen < nextClose) {
190
+ depth++;
191
+ searchPos = nextOpen + openTag.length;
192
+ } else {
193
+ depth--;
194
+ if (depth === 0) {
195
+ elements.push(xml.substring(startIdx, nextClose + closeTag.length));
196
+ pos = nextClose + closeTag.length;
197
+ } else {
198
+ searchPos = nextClose + closeTag.length;
199
+ }
200
+ }
201
+ }
202
+
203
+ if (depth > 0) {
204
+ // Unclosed tag - skip it
205
+ pos = startIdx + openTag.length;
206
+ }
207
+ }
208
+
209
+ return elements;
210
+ }
211
+
212
+ /**
213
+ * Extracts attribute value from an XML string
214
+ * @param xml - XML content
215
+ * @param attributeName - Attribute name (e.g., 'w:val')
216
+ * @returns Attribute value or undefined
217
+ */
218
+ static extractAttribute(xml: string, attributeName: string): string | undefined {
219
+ // Use simple indexOf for bounded string search (safe)
220
+ const attrPattern = `${attributeName}="`;
221
+ const startIdx = xml.indexOf(attrPattern);
222
+ if (startIdx === -1) return undefined;
223
+
224
+ const valueStart = startIdx + attrPattern.length;
225
+ const valueEnd = xml.indexOf('"', valueStart);
226
+ if (valueEnd === -1) return undefined;
227
+
228
+ const rawValue = xml.substring(valueStart, valueEnd);
229
+ // Unescape XML entities to get the actual value
230
+ // This prevents double-escaping when the value is later re-serialized
231
+ return XMLBuilder.unescapeXml(rawValue);
232
+ }
233
+
234
+ /**
235
+ * Checks if an XML string contains a self-closing tag
236
+ * @param xml - XML content
237
+ * @param tagName - Tag name to check
238
+ * @returns True if the tag exists as self-closing
239
+ */
240
+ static hasSelfClosingTag(xml: string, tagName: string): boolean {
241
+ return xml.includes(`<${tagName}/>`) || xml.includes(`<${tagName} `);
242
+ }
243
+
244
+ /**
245
+ * Checks if a boolean property tag is enabled (w:val="1" or w:val="true")
246
+ * Per ECMA-376, boolean properties can be:
247
+ * - Present with w:val="1" or w:val="true" (enabled)
248
+ * - Present with w:val="0" or w:val="false" (explicitly disabled)
249
+ * - Absent (disabled by default)
250
+ *
251
+ * @param xml - XML content to search
252
+ * @param tagName - Tag name (e.g., 'w:keepNext')
253
+ * @returns True if tag exists with w:val="1" or w:val="true", false otherwise
254
+ *
255
+ * @example
256
+ * hasBooleanProperty('<w:pPr><w:keepNext w:val="1"/></w:pPr>', 'w:keepNext'); // true
257
+ * hasBooleanProperty('<w:pPr><w:keepNext w:val="0"/></w:pPr>', 'w:keepNext'); // false
258
+ * hasBooleanProperty('<w:pPr><w:spacing/></w:pPr>', 'w:keepNext'); // false
259
+ */
260
+ static hasBooleanProperty(xml: string, tagName: string): boolean {
261
+ // Check for tag with w:val="1" or w:val="true"
262
+ if (xml.includes(`<${tagName} w:val="1"`) || xml.includes(`<${tagName} w:val="true"`)) {
263
+ return true;
264
+ }
265
+
266
+ // Check for self-closing tag without w:val attribute (means true per ECMA-376)
267
+ // Example: <w:b/> means bold=true
268
+ if (xml.includes(`<${tagName}/>`)) {
269
+ return true;
270
+ }
271
+
272
+ return false;
273
+ }
274
+
275
+ /**
276
+ * Extracts text content from within tags
277
+ * Finds all <w:t>...</w:t> tags and extracts their text
278
+ * @param xml - XML content
279
+ * @returns Combined text content
280
+ */
281
+ static extractText(xml: string): string {
282
+ const texts: string[] = [];
283
+ const openTag = '<w:t';
284
+ const closeTag = '</w:t>';
285
+
286
+ let pos = 0;
287
+ while (pos < xml.length) {
288
+ const startIdx = xml.indexOf(openTag, pos);
289
+ if (startIdx === -1) break;
290
+
291
+ // Find the end of opening tag
292
+ const openEnd = xml.indexOf('>', startIdx);
293
+ if (openEnd === -1) break;
294
+
295
+ // Find closing tag
296
+ const closeIdx = xml.indexOf(closeTag, openEnd);
297
+ if (closeIdx === -1) break;
298
+
299
+ // Extract text between tags
300
+ const text = xml.substring(openEnd + 1, closeIdx);
301
+ texts.push(text);
302
+
303
+ pos = closeIdx + closeTag.length;
304
+ }
305
+
306
+ return texts.join('');
307
+ }
308
+
309
+ /**
310
+ * Validates input size to prevent excessive memory usage
311
+ * @param xml - XML content
312
+ * @param maxSize - Maximum size in bytes (default: 10MB)
313
+ * @throws Error if XML exceeds max size
314
+ */
315
+ static validateSize(xml: string, maxSize: number = 10 * 1024 * 1024): void {
316
+ if (xml.length > maxSize) {
317
+ throw new Error(
318
+ `XML content too large for parsing (${(xml.length / 1024 / 1024).toFixed(1)}MB). ` +
319
+ `Maximum allowed: ${(maxSize / 1024 / 1024).toFixed(0)}MB`
320
+ );
321
+ }
322
+ }
323
+
324
+ /**
325
+ * Extracts content between two specific tags
326
+ * More efficient than regex for large documents
327
+ * @param xml - XML content
328
+ * @param startTag - Opening tag (e.g., '<w:pPr')
329
+ * @param endTag - Closing tag (e.g., '</w:pPr>')
330
+ * @returns Content between tags, or undefined if not found
331
+ */
332
+ static extractBetweenTags(xml: string, startTag: string, endTag: string): string | undefined {
333
+ const startIdx = xml.indexOf(startTag);
334
+ if (startIdx === -1) return undefined;
335
+
336
+ // Find the end of the opening tag
337
+ const openEnd = xml.indexOf('>', startIdx);
338
+ if (openEnd === -1) return undefined;
339
+
340
+ // Find the closing tag
341
+ const endIdx = xml.indexOf(endTag, openEnd);
342
+ if (endIdx === -1) return undefined;
343
+
344
+ return xml.substring(openEnd + 1, endIdx);
345
+ }
346
+
347
+ /**
348
+ * Extracts a complete self-closing tag with its attributes
349
+ * Handles cases where multiple similar tags exist (e.g., <w:sz.../> and <w:szCs.../>)
350
+ *
351
+ * @param xml - XML string to search
352
+ * @param tagName - Tag name to find (e.g., "w:color", "w:sz")
353
+ * @returns The complete tag content (attributes portion) or undefined if not found
354
+ *
355
+ * @example
356
+ * const xml = '<w:sz w:val="36"/><w:color w:val="FF0000"/>';
357
+ * const colorTag = XMLParser.extractSelfClosingTag(xml, 'w:color');
358
+ * // Returns: ' w:val="FF0000"'
359
+ */
360
+ static extractSelfClosingTag(xml: string, tagName: string): string | undefined {
361
+ const startPattern = `<${tagName}`;
362
+ let searchPos = 0;
363
+
364
+ // Search for the exact tag (not tags that start with this pattern)
365
+ while (true) {
366
+ const startIdx = xml.indexOf(startPattern, searchPos);
367
+ if (startIdx === -1) return undefined;
368
+
369
+ // Check what character follows the tag name
370
+ const charAfterTag = xml[startIdx + startPattern.length];
371
+
372
+ // Valid separators after tag name: space, '/', or '>'
373
+ if (charAfterTag === ' ' || charAfterTag === '/' || charAfterTag === '>') {
374
+ // Found the exact tag, now find its end
375
+ const endIdx = xml.indexOf('/>', startIdx);
376
+ if (endIdx === -1) {
377
+ // Try finding a closing tag instead (non-self-closing)
378
+ const closeTagStart = xml.indexOf('>', startIdx);
379
+ if (closeTagStart === -1) return undefined;
380
+
381
+ // Return attributes portion
382
+ return xml.substring(startIdx + startPattern.length, closeTagStart);
383
+ }
384
+
385
+ // Return attributes portion (between tag name and />)
386
+ return xml.substring(startIdx + startPattern.length, endIdx);
387
+ }
388
+
389
+ // Not the exact tag (e.g., found "w:sz" when looking for "w:s")
390
+ // Continue searching
391
+ searchPos = startIdx + 1;
392
+ }
393
+ }
394
+
395
+ /**
396
+ * Parse XML string to JavaScript object
397
+ * Compatible with fast-xml-parser output format
398
+ *
399
+ * @param xml - XML string to parse
400
+ * @param options - Parsing options
401
+ * @returns Parsed JavaScript object
402
+ *
403
+ * @example
404
+ * const xml = '<Relationships><Relationship Id="rId1" Target="https://example.com"/></Relationships>';
405
+ * const obj = XMLParser.parseToObject(xml);
406
+ * // Returns: { Relationships: { Relationship: { '@_Id': 'rId1', '@_Target': 'https://example.com' } } }
407
+ *
408
+ * @example
409
+ * // Multiple elements become arrays
410
+ * const xml = '<Items><Item id="1"/><Item id="2"/></Items>';
411
+ * const obj = XMLParser.parseToObject(xml);
412
+ * // Returns: { Items: { Item: [{ '@_id': '1' }, { '@_id': '2' }] } }
413
+ */
414
+ static parseToObject(xml: string, options?: ParseToObjectOptions): ParsedXMLObject {
415
+ const logger = getLogger();
416
+ logger.debug('Parsing XML to object', { xmlSize: xml.length });
417
+
418
+ // Default options
419
+ const opts: Required<ParseToObjectOptions> = {
420
+ ignoreAttributes: options?.ignoreAttributes ?? false,
421
+ attributeNamePrefix: options?.attributeNamePrefix ?? '@_',
422
+ textNodeName: options?.textNodeName ?? '#text',
423
+ ignoreNamespace: options?.ignoreNamespace ?? false,
424
+ parseAttributeValue: options?.parseAttributeValue ?? true,
425
+ trimValues: options?.trimValues ?? true,
426
+ alwaysArray: options?.alwaysArray ?? false,
427
+ maxNestingDepth: options?.maxNestingDepth ?? DEFAULT_MAX_NESTING_DEPTH,
428
+ };
429
+
430
+ // Validate input size
431
+ XMLParser.validateSize(xml);
432
+
433
+ // Remove XML declaration and trim
434
+ xml = xml.replace(/<\?xml[^>]*\?>\s*/g, '').trim();
435
+
436
+ if (!xml) {
437
+ return {};
438
+ }
439
+
440
+ // Parse root element (start at depth 0)
441
+ const result = XMLParser.parseElementToObject(xml, 0, opts, 0);
442
+ logger.debug('XML parsed to object');
443
+ return result.value as ParsedXMLObject;
444
+ }
445
+
446
+ /**
447
+ * Parses a single XML element into an object
448
+ * @private
449
+ */
450
+ private static parseElementToObject(
451
+ xml: string,
452
+ startPos: number,
453
+ options: Required<ParseToObjectOptions>,
454
+ depth: number
455
+ ): { value: ParsedXMLValue; endPos: number } {
456
+ // Check nesting depth to prevent stack overflow
457
+ if (depth > options.maxNestingDepth) {
458
+ throw new Error(
459
+ `XML nesting depth exceeds maximum of ${options.maxNestingDepth}. ` +
460
+ `This may indicate malformed XML or an attack attempt. ` +
461
+ `Use the maxNestingDepth option to increase the limit if needed.`
462
+ );
463
+ }
464
+
465
+ // Find opening tag
466
+ const openTagStart = xml.indexOf('<', startPos);
467
+ if (openTagStart === -1) {
468
+ return { value: {}, endPos: xml.length };
469
+ }
470
+
471
+ // Skip comments
472
+ if (xml.substring(openTagStart, openTagStart + 4) === '<!--') {
473
+ const commentEnd = xml.indexOf('-->', openTagStart + 4);
474
+ if (commentEnd !== -1) {
475
+ return XMLParser.parseElementToObject(xml, commentEnd + 3, options, depth);
476
+ }
477
+ return { value: {}, endPos: xml.length };
478
+ }
479
+
480
+ // Extract element name
481
+ const nameMatch = /^([a-zA-Z0-9:_-]+)/.exec(xml.substring(openTagStart + 1));
482
+ if (!nameMatch) {
483
+ return { value: {}, endPos: openTagStart + 1 };
484
+ }
485
+
486
+ const originalElementName: string = nameMatch[1] || '';
487
+ let elementName: string = originalElementName;
488
+ const tagHeaderEnd = xml.indexOf('>', openTagStart);
489
+ if (tagHeaderEnd === -1) {
490
+ return { value: {}, endPos: xml.length };
491
+ }
492
+
493
+ // Remove namespace if requested (but keep original for offset calculations)
494
+ if (options.ignoreNamespace && elementName.includes(':')) {
495
+ elementName = elementName.split(':')[1] || elementName;
496
+ }
497
+
498
+ // Extract attributes using ORIGINAL element name length for correct offset
499
+ const tagHeader = xml.substring(openTagStart + 1 + originalElementName.length, tagHeaderEnd);
500
+ const attributes = XMLParser.extractAttributesFromTag(tagHeader, options);
501
+
502
+ // Check if self-closing
503
+ const isSelfClosing = tagHeader.trim().endsWith('/') || xml[tagHeaderEnd - 1] === '/';
504
+
505
+ if (isSelfClosing) {
506
+ // Self-closing tag - return object with attributes only
507
+ const elementValue: ParsedXMLObject = { ...attributes };
508
+ return {
509
+ value: { [elementName]: elementValue },
510
+ endPos: tagHeaderEnd + 1,
511
+ };
512
+ }
513
+
514
+ // Find closing tag (use original name with namespace for correct matching)
515
+ const closingTag = `</${originalElementName}>`;
516
+ const contentStart = tagHeaderEnd + 1;
517
+ const closingTagPos = XMLParser.findClosingTag(xml, originalElementName, contentStart);
518
+
519
+ if (closingTagPos === -1) {
520
+ // No closing tag found - treat as self-closing
521
+ return {
522
+ value: { [elementName]: { ...attributes } },
523
+ endPos: tagHeaderEnd + 1,
524
+ };
525
+ }
526
+
527
+ // Extract content between tags
528
+ const content = xml.substring(contentStart, closingTagPos);
529
+
530
+ // Parse content (children or text)
531
+ const children: ParsedElement[] = [];
532
+ let textContent = '';
533
+ let pos = 0;
534
+
535
+ while (pos < content.length) {
536
+ const nextTag = content.indexOf('<', pos);
537
+
538
+ if (nextTag === -1) {
539
+ // No more tags - rest is text
540
+ const text = content.substring(pos);
541
+ // When trimValues is false, preserve whitespace-only text
542
+ // When trimValues is true, only include text that has non-whitespace content
543
+ if (text.length > 0 && (!options.trimValues || text.trim())) {
544
+ // Unescape XML entities in text content (e.g., &lt; -> <)
545
+ textContent += XMLBuilder.unescapeXml(text);
546
+ }
547
+ break;
548
+ }
549
+
550
+ // Collect text before next tag
551
+ if (nextTag > pos) {
552
+ const text = content.substring(pos, nextTag);
553
+ // When trimValues is false, preserve whitespace-only text
554
+ // When trimValues is true, only include text that has non-whitespace content
555
+ if (text.length > 0 && (!options.trimValues || text.trim())) {
556
+ // Unescape XML entities in text content (e.g., &lt; -> <)
557
+ textContent += XMLBuilder.unescapeXml(text);
558
+ }
559
+ }
560
+
561
+ // Parse child element (increment depth for children)
562
+ const childResult = XMLParser.parseElementToObject(content, nextTag, options, depth + 1);
563
+ const childObj = childResult.value as ParsedXMLObject;
564
+
565
+ // Extract child name and value
566
+ const childKeys = Object.keys(childObj);
567
+ if (childKeys.length > 0) {
568
+ const childName = childKeys[0];
569
+ if (childName) {
570
+ const childValue = childObj[childName];
571
+ children.push({ name: childName, value: childValue });
572
+ }
573
+ }
574
+
575
+ pos = childResult.endPos;
576
+ }
577
+
578
+ // Build element value
579
+ let elementValue: ParsedXMLValue = {};
580
+
581
+ // Add attributes
582
+ if (!options.ignoreAttributes && Object.keys(attributes).length > 0) {
583
+ elementValue = { ...attributes };
584
+ }
585
+
586
+ // Add text content
587
+ // When trimValues is false, include whitespace-only text
588
+ // When trimValues is true, only include text with non-whitespace content
589
+ if (textContent.length > 0 && (!options.trimValues || textContent.trim())) {
590
+ const text = options.trimValues ? textContent.trim() : textContent;
591
+ if (typeof elementValue === 'object' && !Array.isArray(elementValue)) {
592
+ if (Object.keys(elementValue).length === 0) {
593
+ // Only text, no attributes - return as direct value if simple
594
+ elementValue = text;
595
+ } else {
596
+ // Text with attributes
597
+ elementValue[options.textNodeName] = text;
598
+ }
599
+ }
600
+ }
601
+
602
+ // Add children
603
+ if (children.length > 0) {
604
+ const coalescedChildren = XMLParser.coalesceChildren(children, options);
605
+ if (typeof elementValue === 'object' && !Array.isArray(elementValue)) {
606
+ elementValue = { ...elementValue, ...coalescedChildren };
607
+ } else {
608
+ elementValue = coalescedChildren;
609
+ }
610
+ }
611
+
612
+ // If element has no content, attributes, or children - return empty object
613
+ if (
614
+ typeof elementValue === 'object' &&
615
+ !Array.isArray(elementValue) &&
616
+ Object.keys(elementValue).length === 0
617
+ ) {
618
+ elementValue = {};
619
+ }
620
+
621
+ return {
622
+ value: { [elementName]: elementValue },
623
+ endPos: closingTagPos + closingTag.length,
624
+ };
625
+ }
626
+
627
+ /**
628
+ * Extracts attributes from a tag header
629
+ * @private
630
+ */
631
+ private static extractAttributesFromTag(
632
+ tagHeader: string,
633
+ options: Required<ParseToObjectOptions>
634
+ ): Record<string, string | number | boolean> {
635
+ const attributes: Record<string, string | number | boolean> = {};
636
+
637
+ if (options.ignoreAttributes) {
638
+ return attributes;
639
+ }
640
+
641
+ // Simple attribute extraction using position-based parsing
642
+ let pos = 0;
643
+ while (pos < tagHeader.length) {
644
+ // Skip whitespace
645
+ while (pos < tagHeader.length) {
646
+ const char = tagHeader[pos];
647
+ if (char && /\s/.test(char)) {
648
+ pos++;
649
+ } else {
650
+ break;
651
+ }
652
+ }
653
+
654
+ if (pos >= tagHeader.length || tagHeader[pos] === '/') {
655
+ break;
656
+ }
657
+
658
+ // Extract attribute name
659
+ const nameStart = pos;
660
+ while (pos < tagHeader.length) {
661
+ const char = tagHeader[pos];
662
+ if (char && /[a-zA-Z0-9:_-]/.test(char)) {
663
+ pos++;
664
+ } else {
665
+ break;
666
+ }
667
+ }
668
+
669
+ if (pos === nameStart) {
670
+ break;
671
+ }
672
+
673
+ let attrName = tagHeader.substring(nameStart, pos);
674
+
675
+ // Skip whitespace and '='
676
+ while (pos < tagHeader.length) {
677
+ const char = tagHeader[pos];
678
+ if (char && /[\s=]/.test(char)) {
679
+ pos++;
680
+ } else {
681
+ break;
682
+ }
683
+ }
684
+
685
+ // Extract attribute value
686
+ let attrValue = '';
687
+ if (pos < tagHeader.length && (tagHeader[pos] === '"' || tagHeader[pos] === "'")) {
688
+ const quote = tagHeader[pos];
689
+ pos++; // Skip opening quote
690
+ const valueStart = pos;
691
+
692
+ while (pos < tagHeader.length && tagHeader[pos] !== quote) {
693
+ pos++;
694
+ }
695
+
696
+ attrValue = tagHeader.substring(valueStart, pos);
697
+ pos++; // Skip closing quote
698
+ }
699
+
700
+ // Remove namespace from attribute name if requested
701
+ if (options.ignoreNamespace && attrName.includes(':')) {
702
+ attrName = attrName.split(':')[1] || attrName;
703
+ }
704
+
705
+ // Add prefix to attribute name
706
+ const prefixedName = options.attributeNamePrefix + attrName;
707
+
708
+ // Parse attribute value
709
+ attributes[prefixedName] = options.parseAttributeValue
710
+ ? XMLParser.parseValue(attrValue)
711
+ : attrValue;
712
+ }
713
+
714
+ return attributes;
715
+ }
716
+
717
+ /**
718
+ * Finds the closing tag for an element, handling nesting
719
+ * @private
720
+ */
721
+ private static findClosingTag(xml: string, elementName: string, startPos: number): number {
722
+ const openTag = `<${elementName}`;
723
+ const closeTag = `</${elementName}>`;
724
+ let depth = 1;
725
+ let pos = startPos;
726
+
727
+ while (depth > 0 && pos < xml.length) {
728
+ const nextClose = xml.indexOf(closeTag, pos);
729
+
730
+ if (nextClose === -1) {
731
+ return -1; // No closing tag found
732
+ }
733
+
734
+ // Find the next REAL opening tag (not a prefix match like <w:pPrChange for <w:pPr)
735
+ // Must search for all potential matches and verify each one
736
+ let realOpenPos = -1;
737
+ let searchPos = pos;
738
+ while (searchPos < nextClose) {
739
+ const candidateOpen = xml.indexOf(openTag, searchPos);
740
+ if (candidateOpen === -1 || candidateOpen >= nextClose) {
741
+ break; // No more candidates before the closing tag
742
+ }
743
+
744
+ const charAfter = xml[candidateOpen + openTag.length];
745
+ if (
746
+ charAfter === '>' ||
747
+ charAfter === ' ' ||
748
+ charAfter === '/' ||
749
+ charAfter === '\t' ||
750
+ charAfter === '\n' ||
751
+ charAfter === '\r'
752
+ ) {
753
+ // This looks like a real opening tag - but check if it's self-closing
754
+ // Self-closing tags like <w:rPr/> should NOT increase depth
755
+ const tagEnd = xml.indexOf('>', candidateOpen);
756
+ if (tagEnd !== -1 && xml[tagEnd - 1] === '/') {
757
+ // Self-closing tag - skip it (don't affect depth)
758
+ searchPos = tagEnd + 1;
759
+ continue;
760
+ }
761
+ // This is a real opening tag (not self-closing)
762
+ realOpenPos = candidateOpen;
763
+ break;
764
+ }
765
+
766
+ // False positive (e.g., <w:pPrChange when looking for <w:pPr)
767
+ // Keep searching from after this position
768
+ searchPos = candidateOpen + openTag.length;
769
+ }
770
+
771
+ if (realOpenPos !== -1) {
772
+ // Found a real opening tag before the closing tag - increase depth
773
+ depth++;
774
+ pos = realOpenPos + openTag.length;
775
+ } else {
776
+ // No real opening tag before this closing tag - decrease depth
777
+ depth--;
778
+ if (depth === 0) {
779
+ return nextClose;
780
+ }
781
+ pos = nextClose + closeTag.length;
782
+ }
783
+ }
784
+
785
+ return -1;
786
+ }
787
+
788
+ /**
789
+ * Coalesces children with duplicate names into arrays
790
+ * @private
791
+ */
792
+ private static coalesceChildren(
793
+ children: ParsedElement[],
794
+ options: Required<ParseToObjectOptions>
795
+ ): ParsedXMLObject {
796
+ const result: ParsedXMLObject = {};
797
+ const nameCounts: Record<string, number> = {};
798
+ const nameIndices: Record<string, number> = {};
799
+
800
+ // Track element order for correct run content parsing (tabs, breaks, text)
801
+ // This is critical for preserving the order of mixed content like: text -> tab -> text
802
+ const orderedChildren: { type: string; index: number }[] = [];
803
+
804
+ // Count occurrences of each child name
805
+ for (const child of children) {
806
+ nameCounts[child.name] = (nameCounts[child.name] || 0) + 1;
807
+ }
808
+
809
+ // Build result object while tracking order
810
+ for (const child of children) {
811
+ const shouldBeArray = options.alwaysArray || (nameCounts[child.name] || 0) > 1;
812
+
813
+ // Track element order with its index in the array
814
+ const currentIndex = nameIndices[child.name] || 0;
815
+ orderedChildren.push({ type: child.name, index: currentIndex });
816
+ nameIndices[child.name] = currentIndex + 1;
817
+
818
+ if (shouldBeArray) {
819
+ if (!result[child.name]) {
820
+ result[child.name] = [];
821
+ }
822
+ (result[child.name] as ParsedXMLValue[]).push(child.value);
823
+ } else {
824
+ result[child.name] = child.value;
825
+ }
826
+ }
827
+
828
+ // Add _orderedChildren to track element order (used by DocumentParser for runs)
829
+ if (orderedChildren.length > 0) {
830
+ result._orderedChildren = orderedChildren;
831
+ }
832
+
833
+ return result;
834
+ }
835
+
836
+ /**
837
+ * Parses a string value to number or boolean if applicable
838
+ * @private
839
+ */
840
+ private static parseValue(value: string): string | number | boolean {
841
+ if (value === 'true') return true;
842
+ if (value === 'false') return false;
843
+
844
+ // Preserve 6-character hex color codes (OpenXML standard for colors)
845
+ // This includes "000000" (black) which should stay as a string
846
+ if (/^[0-9A-Fa-f]{6}$/.test(value)) {
847
+ return value.toUpperCase(); // Normalize to uppercase per Microsoft convention
848
+ }
849
+
850
+ // Preserve long digit-only strings (e.g., cnfStyle binary strings like "100000000000")
851
+ // These should not be converted to numbers to avoid losing leading zeros
852
+ if (/^\d{7,}$/.test(value)) {
853
+ return value; // Keep as string for values with 7+ digits
854
+ }
855
+
856
+ // Try parsing as number
857
+ // 3-character values like "240" will be parsed as numbers
858
+ // 6-character hex values are already handled above
859
+ if (/^-?\d+$/.test(value)) {
860
+ const num = parseInt(value, 10);
861
+ if (!isNaN(num)) return num;
862
+ }
863
+
864
+ if (/^-?\d+\.\d+$/.test(value)) {
865
+ const num = parseFloat(value);
866
+ if (!isNaN(num)) return num;
867
+ }
868
+
869
+ // Preserve 3-character hex codes (like "F0A") that have letters
870
+ // Pure numeric 3-char values (like "240") are already parsed as numbers above
871
+ if (/^[0-9A-Fa-f]{3}$/.test(value) && /[A-Fa-f]/.test(value)) {
872
+ return value.toUpperCase();
873
+ }
874
+
875
+ return value;
876
+ }
877
+ }