docxmlater 10.0.1 → 10.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (395) hide show
  1. package/README.md +3 -2
  2. package/dist/constants/legacyCompatFlags.d.ts.map +1 -1
  3. package/dist/constants/legacyCompatFlags.js.map +1 -1
  4. package/dist/constants/limits.d.ts +0 -27
  5. package/dist/constants/limits.d.ts.map +1 -1
  6. package/dist/constants/limits.js +13 -13
  7. package/dist/constants/limits.js.map +1 -1
  8. package/dist/core/Document.d.ts +24 -19
  9. package/dist/core/Document.d.ts.map +1 -1
  10. package/dist/core/Document.js +272 -71
  11. package/dist/core/Document.js.map +1 -1
  12. package/dist/core/DocumentContent.d.ts.map +1 -1
  13. package/dist/core/DocumentContent.js.map +1 -1
  14. package/dist/core/DocumentGenerator.d.ts.map +1 -1
  15. package/dist/core/DocumentGenerator.js +59 -24
  16. package/dist/core/DocumentGenerator.js.map +1 -1
  17. package/dist/core/DocumentIdManager.d.ts.map +1 -1
  18. package/dist/core/DocumentIdManager.js.map +1 -1
  19. package/dist/core/DocumentParser.d.ts +6 -6
  20. package/dist/core/DocumentParser.d.ts.map +1 -1
  21. package/dist/core/DocumentParser.js +60 -54
  22. package/dist/core/DocumentParser.js.map +1 -1
  23. package/dist/core/DocumentValidator.d.ts.map +1 -1
  24. package/dist/core/DocumentValidator.js.map +1 -1
  25. package/dist/core/Relationship.d.ts.map +1 -1
  26. package/dist/core/Relationship.js +1 -1
  27. package/dist/core/Relationship.js.map +1 -1
  28. package/dist/core/RelationshipManager.js +3 -3
  29. package/dist/core/RelationshipManager.js.map +1 -1
  30. package/dist/elements/AlternateContent.js.map +1 -1
  31. package/dist/elements/Bookmark.d.ts.map +1 -1
  32. package/dist/elements/Bookmark.js.map +1 -1
  33. package/dist/elements/BookmarkManager.d.ts.map +1 -1
  34. package/dist/elements/BookmarkManager.js.map +1 -1
  35. package/dist/elements/Comment.js +1 -1
  36. package/dist/elements/Comment.js.map +1 -1
  37. package/dist/elements/CommentManager.d.ts.map +1 -1
  38. package/dist/elements/CommentManager.js +8 -2
  39. package/dist/elements/CommentManager.js.map +1 -1
  40. package/dist/elements/CommonTypes.d.ts.map +1 -1
  41. package/dist/elements/CommonTypes.js +1 -2
  42. package/dist/elements/CommonTypes.js.map +1 -1
  43. package/dist/elements/CustomXml.js.map +1 -1
  44. package/dist/elements/Endnote.d.ts.map +1 -1
  45. package/dist/elements/Endnote.js.map +1 -1
  46. package/dist/elements/EndnoteManager.d.ts.map +1 -1
  47. package/dist/elements/EndnoteManager.js.map +1 -1
  48. package/dist/elements/Field.d.ts.map +1 -1
  49. package/dist/elements/Field.js +31 -28
  50. package/dist/elements/Field.js.map +1 -1
  51. package/dist/elements/FieldHelpers.d.ts.map +1 -1
  52. package/dist/elements/FieldHelpers.js +6 -6
  53. package/dist/elements/FieldHelpers.js.map +1 -1
  54. package/dist/elements/FontManager.d.ts.map +1 -1
  55. package/dist/elements/FontManager.js.map +1 -1
  56. package/dist/elements/Footer.js.map +1 -1
  57. package/dist/elements/Footnote.d.ts.map +1 -1
  58. package/dist/elements/Footnote.js.map +1 -1
  59. package/dist/elements/FootnoteManager.d.ts.map +1 -1
  60. package/dist/elements/FootnoteManager.js.map +1 -1
  61. package/dist/elements/Header.js.map +1 -1
  62. package/dist/elements/HeaderFooterManager.js.map +1 -1
  63. package/dist/elements/Hyperlink.d.ts.map +1 -1
  64. package/dist/elements/Hyperlink.js +5 -5
  65. package/dist/elements/Hyperlink.js.map +1 -1
  66. package/dist/elements/Image.d.ts +2 -2
  67. package/dist/elements/Image.d.ts.map +1 -1
  68. package/dist/elements/Image.js +21 -5
  69. package/dist/elements/Image.js.map +1 -1
  70. package/dist/elements/ImageManager.d.ts.map +1 -1
  71. package/dist/elements/ImageManager.js +2 -2
  72. package/dist/elements/ImageManager.js.map +1 -1
  73. package/dist/elements/ImageRun.js.map +1 -1
  74. package/dist/elements/MathElement.js.map +1 -1
  75. package/dist/elements/Paragraph.d.ts.map +1 -1
  76. package/dist/elements/Paragraph.js +128 -117
  77. package/dist/elements/Paragraph.js.map +1 -1
  78. package/dist/elements/PreservedElement.js.map +1 -1
  79. package/dist/elements/PropertyChangeTypes.js.map +1 -1
  80. package/dist/elements/RangeMarker.js.map +1 -1
  81. package/dist/elements/Revision.d.ts +1 -0
  82. package/dist/elements/Revision.d.ts.map +1 -1
  83. package/dist/elements/Revision.js +44 -5
  84. package/dist/elements/Revision.js.map +1 -1
  85. package/dist/elements/RevisionContent.js.map +1 -1
  86. package/dist/elements/RevisionManager.d.ts.map +1 -1
  87. package/dist/elements/RevisionManager.js.map +1 -1
  88. package/dist/elements/Run.d.ts.map +1 -1
  89. package/dist/elements/Run.js +1 -3
  90. package/dist/elements/Run.js.map +1 -1
  91. package/dist/elements/Section.d.ts.map +1 -1
  92. package/dist/elements/Section.js +127 -118
  93. package/dist/elements/Section.js.map +1 -1
  94. package/dist/elements/Shape.d.ts.map +1 -1
  95. package/dist/elements/Shape.js +21 -0
  96. package/dist/elements/Shape.js.map +1 -1
  97. package/dist/elements/StructuredDocumentTag.d.ts.map +1 -1
  98. package/dist/elements/StructuredDocumentTag.js +20 -8
  99. package/dist/elements/StructuredDocumentTag.js.map +1 -1
  100. package/dist/elements/Table.d.ts +2 -2
  101. package/dist/elements/Table.d.ts.map +1 -1
  102. package/dist/elements/Table.js +29 -35
  103. package/dist/elements/Table.js.map +1 -1
  104. package/dist/elements/TableCell.d.ts +2 -2
  105. package/dist/elements/TableCell.d.ts.map +1 -1
  106. package/dist/elements/TableCell.js +63 -67
  107. package/dist/elements/TableCell.js.map +1 -1
  108. package/dist/elements/TableGridChange.js.map +1 -1
  109. package/dist/elements/TableOfContents.d.ts +6 -6
  110. package/dist/elements/TableOfContents.d.ts.map +1 -1
  111. package/dist/elements/TableOfContents.js.map +1 -1
  112. package/dist/elements/TableOfContentsElement.js.map +1 -1
  113. package/dist/elements/TableRow.d.ts.map +1 -1
  114. package/dist/elements/TableRow.js +65 -47
  115. package/dist/elements/TableRow.js.map +1 -1
  116. package/dist/elements/TextBox.d.ts.map +1 -1
  117. package/dist/elements/TextBox.js +1 -1
  118. package/dist/elements/TextBox.js.map +1 -1
  119. package/dist/formatting/AbstractNumbering.d.ts +1 -1
  120. package/dist/formatting/AbstractNumbering.d.ts.map +1 -1
  121. package/dist/formatting/AbstractNumbering.js +11 -11
  122. package/dist/formatting/AbstractNumbering.js.map +1 -1
  123. package/dist/formatting/NumberingInstance.d.ts.map +1 -1
  124. package/dist/formatting/NumberingInstance.js +4 -4
  125. package/dist/formatting/NumberingInstance.js.map +1 -1
  126. package/dist/formatting/NumberingLevel.d.ts.map +1 -1
  127. package/dist/formatting/NumberingLevel.js +26 -26
  128. package/dist/formatting/NumberingLevel.js.map +1 -1
  129. package/dist/formatting/NumberingManager.d.ts +1 -1
  130. package/dist/formatting/NumberingManager.d.ts.map +1 -1
  131. package/dist/formatting/NumberingManager.js.map +1 -1
  132. package/dist/formatting/Style.d.ts.map +1 -1
  133. package/dist/formatting/Style.js +87 -95
  134. package/dist/formatting/Style.js.map +1 -1
  135. package/dist/formatting/StylesManager.d.ts +3 -3
  136. package/dist/formatting/StylesManager.d.ts.map +1 -1
  137. package/dist/formatting/StylesManager.js.map +1 -1
  138. package/dist/helpers/CleanupHelper.d.ts.map +1 -1
  139. package/dist/helpers/CleanupHelper.js +1 -7
  140. package/dist/helpers/CleanupHelper.js.map +1 -1
  141. package/dist/images/ImageOptimizer.js.map +1 -1
  142. package/dist/index.js.map +1 -1
  143. package/dist/managers/DrawingManager.d.ts.map +1 -1
  144. package/dist/managers/DrawingManager.js.map +1 -1
  145. package/dist/tracking/DocumentTrackingContext.js.map +1 -1
  146. package/dist/tracking/TrackingContext.js.map +1 -1
  147. package/dist/types/compatibility-types.js.map +1 -1
  148. package/dist/types/formatting.js.map +1 -1
  149. package/dist/types/list-types.d.ts +4 -4
  150. package/dist/types/list-types.d.ts.map +1 -1
  151. package/dist/types/list-types.js.map +1 -1
  152. package/dist/types/settings-types.js.map +1 -1
  153. package/dist/types/styleConfig.js.map +1 -1
  154. package/dist/utils/ChangelogGenerator.d.ts.map +1 -1
  155. package/dist/utils/ChangelogGenerator.js.map +1 -1
  156. package/dist/utils/CompatibilityUpgrader.d.ts.map +1 -1
  157. package/dist/utils/CompatibilityUpgrader.js +7 -7
  158. package/dist/utils/CompatibilityUpgrader.js.map +1 -1
  159. package/dist/utils/InMemoryRevisionAcceptor.js +1 -1
  160. package/dist/utils/InMemoryRevisionAcceptor.js.map +1 -1
  161. package/dist/utils/MoveOperationHelper.js.map +1 -1
  162. package/dist/utils/RevisionAwareProcessor.js.map +1 -1
  163. package/dist/utils/RevisionWalker.js.map +1 -1
  164. package/dist/utils/SelectiveRevisionAcceptor.js.map +1 -1
  165. package/dist/utils/ShadingResolver.js +1 -1
  166. package/dist/utils/ShadingResolver.js.map +1 -1
  167. package/dist/utils/acceptRevisions.d.ts +0 -28
  168. package/dist/utils/acceptRevisions.d.ts.map +1 -1
  169. package/dist/utils/acceptRevisions.js +5 -7
  170. package/dist/utils/acceptRevisions.js.map +1 -1
  171. package/dist/utils/cnfStyleDecoder.js +1 -1
  172. package/dist/utils/cnfStyleDecoder.js.map +1 -1
  173. package/dist/utils/corruptionDetection.js.map +1 -1
  174. package/dist/utils/dateFormatting.js.map +1 -1
  175. package/dist/utils/deepClone.d.ts +0 -1
  176. package/dist/utils/deepClone.d.ts.map +1 -1
  177. package/dist/utils/deepClone.js +0 -7
  178. package/dist/utils/deepClone.js.map +1 -1
  179. package/dist/utils/diagnostics.d.ts +2 -2
  180. package/dist/utils/diagnostics.d.ts.map +1 -1
  181. package/dist/utils/diagnostics.js.map +1 -1
  182. package/dist/utils/errorHandling.js.map +1 -1
  183. package/dist/utils/formatting.js.map +1 -1
  184. package/dist/utils/list-detection.d.ts +2 -2
  185. package/dist/utils/list-detection.d.ts.map +1 -1
  186. package/dist/utils/list-detection.js +3 -3
  187. package/dist/utils/list-detection.js.map +1 -1
  188. package/dist/utils/logger.d.ts +2 -4
  189. package/dist/utils/logger.d.ts.map +1 -1
  190. package/dist/utils/logger.js +0 -2
  191. package/dist/utils/logger.js.map +1 -1
  192. package/dist/utils/parsingHelpers.js.map +1 -1
  193. package/dist/utils/stripTrackedChanges.d.ts +0 -19
  194. package/dist/utils/stripTrackedChanges.d.ts.map +1 -1
  195. package/dist/utils/stripTrackedChanges.js +0 -2
  196. package/dist/utils/stripTrackedChanges.js.map +1 -1
  197. package/dist/utils/textDiff.js.map +1 -1
  198. package/dist/utils/units.js.map +1 -1
  199. package/dist/utils/validation.d.ts.map +1 -1
  200. package/dist/utils/validation.js.map +1 -1
  201. package/dist/utils/xmlSanitization.js.map +1 -1
  202. package/dist/validation/RevisionAutoFixer.js.map +1 -1
  203. package/dist/validation/RevisionValidator.js.map +1 -1
  204. package/dist/validation/ValidationRules.js.map +1 -1
  205. package/dist/validation/index.js.map +1 -1
  206. package/dist/xml/XMLBuilder.d.ts.map +1 -1
  207. package/dist/xml/XMLBuilder.js +10 -0
  208. package/dist/xml/XMLBuilder.js.map +1 -1
  209. package/dist/xml/XMLParser.d.ts.map +1 -1
  210. package/dist/xml/XMLParser.js +4 -5
  211. package/dist/xml/XMLParser.js.map +1 -1
  212. package/dist/zip/ZipHandler.js.map +1 -1
  213. package/dist/zip/ZipReader.js.map +1 -1
  214. package/dist/zip/ZipWriter.js.map +1 -1
  215. package/dist/zip/errors.js.map +1 -1
  216. package/dist/zip/types.js.map +1 -1
  217. package/package.json +34 -4
  218. package/src/__tests__/helper-methods.test.ts +512 -0
  219. package/src/constants/legacyCompatFlags.ts +138 -0
  220. package/src/constants/limits.ts +50 -0
  221. package/src/core/CLAUDE.md +109 -0
  222. package/src/core/Document.ts +15569 -0
  223. package/src/core/DocumentContent.ts +467 -0
  224. package/src/core/DocumentGenerator.ts +1104 -0
  225. package/src/core/DocumentIdManager.ts +158 -0
  226. package/src/core/DocumentParser.ts +10107 -0
  227. package/src/core/DocumentValidator.ts +372 -0
  228. package/src/core/Relationship.ts +367 -0
  229. package/src/core/RelationshipManager.ts +428 -0
  230. package/src/elements/AlternateContent.ts +42 -0
  231. package/src/elements/Bookmark.ts +210 -0
  232. package/src/elements/BookmarkManager.ts +250 -0
  233. package/src/elements/CLAUDE.md +126 -0
  234. package/src/elements/Comment.ts +359 -0
  235. package/src/elements/CommentManager.ts +502 -0
  236. package/src/elements/CommonTypes.ts +549 -0
  237. package/src/elements/CustomXml.ts +36 -0
  238. package/src/elements/Endnote.ts +217 -0
  239. package/src/elements/EndnoteManager.ts +249 -0
  240. package/src/elements/Field.ts +1233 -0
  241. package/src/elements/FieldHelpers.ts +333 -0
  242. package/src/elements/FontManager.ts +339 -0
  243. package/src/elements/Footer.ts +269 -0
  244. package/src/elements/Footnote.ts +217 -0
  245. package/src/elements/FootnoteManager.ts +249 -0
  246. package/src/elements/Header.ts +269 -0
  247. package/src/elements/HeaderFooterManager.ts +219 -0
  248. package/src/elements/Hyperlink.ts +1146 -0
  249. package/src/elements/Image.ts +1756 -0
  250. package/src/elements/ImageManager.ts +432 -0
  251. package/src/elements/ImageRun.ts +59 -0
  252. package/src/elements/MathElement.ts +65 -0
  253. package/src/elements/Paragraph.ts +4227 -0
  254. package/src/elements/PreservedElement.ts +53 -0
  255. package/src/elements/PropertyChangeTypes.ts +442 -0
  256. package/src/elements/RangeMarker.ts +400 -0
  257. package/src/elements/Revision.ts +1217 -0
  258. package/src/elements/RevisionContent.ts +73 -0
  259. package/src/elements/RevisionManager.ts +1070 -0
  260. package/src/elements/Run.ts +3068 -0
  261. package/src/elements/Section.ts +1421 -0
  262. package/src/elements/Shape.ts +873 -0
  263. package/src/elements/StructuredDocumentTag.ts +978 -0
  264. package/src/elements/Table.ts +2524 -0
  265. package/src/elements/TableCell.ts +1586 -0
  266. package/src/elements/TableGridChange.ts +151 -0
  267. package/src/elements/TableOfContents.ts +691 -0
  268. package/src/elements/TableOfContentsElement.ts +89 -0
  269. package/src/elements/TableRow.ts +906 -0
  270. package/src/elements/TextBox.ts +768 -0
  271. package/src/formatting/AbstractNumbering.ts +548 -0
  272. package/src/formatting/CLAUDE.md +74 -0
  273. package/src/formatting/NumberingInstance.ts +212 -0
  274. package/src/formatting/NumberingLevel.ts +1006 -0
  275. package/src/formatting/NumberingManager.ts +827 -0
  276. package/src/formatting/Style.ts +1833 -0
  277. package/src/formatting/StylesManager.ts +1005 -0
  278. package/src/helpers/CleanupHelper.ts +524 -0
  279. package/src/images/ImageOptimizer.ts +274 -0
  280. package/src/index.ts +554 -0
  281. package/src/managers/CLAUDE.md +47 -0
  282. package/src/managers/DrawingManager.ts +319 -0
  283. package/src/tracking/DocumentTrackingContext.ts +643 -0
  284. package/src/tracking/TrackingContext.ts +173 -0
  285. package/src/types/compatibility-types.ts +49 -0
  286. package/src/types/formatting.ts +210 -0
  287. package/src/types/list-types.ts +152 -0
  288. package/src/types/settings-types.ts +59 -0
  289. package/src/types/styleConfig.ts +189 -0
  290. package/src/utils/CLAUDE.md +153 -0
  291. package/src/utils/ChangelogGenerator.ts +1581 -0
  292. package/src/utils/CompatibilityUpgrader.ts +237 -0
  293. package/src/utils/InMemoryRevisionAcceptor.ts +668 -0
  294. package/src/utils/MoveOperationHelper.ts +238 -0
  295. package/src/utils/RevisionAwareProcessor.ts +526 -0
  296. package/src/utils/RevisionWalker.ts +457 -0
  297. package/src/utils/SelectiveRevisionAcceptor.ts +613 -0
  298. package/src/utils/ShadingResolver.ts +107 -0
  299. package/src/utils/acceptRevisions.ts +714 -0
  300. package/src/utils/cnfStyleDecoder.ts +217 -0
  301. package/src/utils/corruptionDetection.ts +345 -0
  302. package/src/utils/dateFormatting.ts +20 -0
  303. package/src/utils/deepClone.ts +78 -0
  304. package/src/utils/diagnostics.ts +129 -0
  305. package/src/utils/errorHandling.ts +80 -0
  306. package/src/utils/formatting.ts +213 -0
  307. package/src/utils/list-detection.ts +274 -0
  308. package/src/utils/logger.ts +404 -0
  309. package/src/utils/parsingHelpers.ts +190 -0
  310. package/src/utils/stripTrackedChanges.ts +353 -0
  311. package/src/utils/textDiff.ts +100 -0
  312. package/src/utils/units.ts +421 -0
  313. package/src/utils/validation.ts +542 -0
  314. package/src/utils/xmlSanitization.ts +182 -0
  315. package/src/validation/RevisionAutoFixer.ts +542 -0
  316. package/src/validation/RevisionValidator.ts +460 -0
  317. package/src/validation/ValidationRules.ts +338 -0
  318. package/src/validation/index.ts +30 -0
  319. package/src/xml/CLAUDE.md +65 -0
  320. package/src/xml/XMLBuilder.ts +871 -0
  321. package/src/xml/XMLParser.ts +919 -0
  322. package/src/zip/CLAUDE.md +55 -0
  323. package/src/zip/ZipHandler.ts +637 -0
  324. package/src/zip/ZipReader.ts +299 -0
  325. package/src/zip/ZipWriter.ts +390 -0
  326. package/src/zip/errors.ts +69 -0
  327. package/src/zip/types.ts +116 -0
  328. package/dist/core/ListNormalizer.d.ts +0 -23
  329. package/dist/core/ListNormalizer.d.ts.map +0 -1
  330. package/dist/core/ListNormalizer.js +0 -624
  331. package/dist/core/ListNormalizer.js.map +0 -1
  332. package/dist/images/index.d.ts +0 -2
  333. package/dist/images/index.d.ts.map +0 -1
  334. package/dist/images/index.js +0 -8
  335. package/dist/images/index.js.map +0 -1
  336. package/dist/ms-doc/cfb/CFBReader.d.ts +0 -35
  337. package/dist/ms-doc/cfb/CFBReader.d.ts.map +0 -1
  338. package/dist/ms-doc/cfb/CFBReader.js +0 -360
  339. package/dist/ms-doc/cfb/CFBReader.js.map +0 -1
  340. package/dist/ms-doc/converter/DocToDocxConverter.d.ts +0 -55
  341. package/dist/ms-doc/converter/DocToDocxConverter.d.ts.map +0 -1
  342. package/dist/ms-doc/converter/DocToDocxConverter.js +0 -324
  343. package/dist/ms-doc/converter/DocToDocxConverter.js.map +0 -1
  344. package/dist/ms-doc/fib/FIB.d.ts +0 -18
  345. package/dist/ms-doc/fib/FIB.d.ts.map +0 -1
  346. package/dist/ms-doc/fib/FIB.js +0 -342
  347. package/dist/ms-doc/fib/FIB.js.map +0 -1
  348. package/dist/ms-doc/fields/FieldParser.d.ts +0 -31
  349. package/dist/ms-doc/fields/FieldParser.d.ts.map +0 -1
  350. package/dist/ms-doc/fields/FieldParser.js +0 -266
  351. package/dist/ms-doc/fields/FieldParser.js.map +0 -1
  352. package/dist/ms-doc/images/PictureExtractor.d.ts +0 -22
  353. package/dist/ms-doc/images/PictureExtractor.d.ts.map +0 -1
  354. package/dist/ms-doc/images/PictureExtractor.js +0 -233
  355. package/dist/ms-doc/images/PictureExtractor.js.map +0 -1
  356. package/dist/ms-doc/index.d.ts +0 -20
  357. package/dist/ms-doc/index.d.ts.map +0 -1
  358. package/dist/ms-doc/index.js +0 -59
  359. package/dist/ms-doc/index.js.map +0 -1
  360. package/dist/ms-doc/properties/SPRM.d.ts +0 -210
  361. package/dist/ms-doc/properties/SPRM.d.ts.map +0 -1
  362. package/dist/ms-doc/properties/SPRM.js +0 -633
  363. package/dist/ms-doc/properties/SPRM.js.map +0 -1
  364. package/dist/ms-doc/sections/SectionParser.d.ts +0 -25
  365. package/dist/ms-doc/sections/SectionParser.d.ts.map +0 -1
  366. package/dist/ms-doc/sections/SectionParser.js +0 -214
  367. package/dist/ms-doc/sections/SectionParser.js.map +0 -1
  368. package/dist/ms-doc/styles/StyleSheet.d.ts +0 -23
  369. package/dist/ms-doc/styles/StyleSheet.d.ts.map +0 -1
  370. package/dist/ms-doc/styles/StyleSheet.js +0 -268
  371. package/dist/ms-doc/styles/StyleSheet.js.map +0 -1
  372. package/dist/ms-doc/subdocuments/SubdocumentParser.d.ts +0 -61
  373. package/dist/ms-doc/subdocuments/SubdocumentParser.d.ts.map +0 -1
  374. package/dist/ms-doc/subdocuments/SubdocumentParser.js +0 -208
  375. package/dist/ms-doc/subdocuments/SubdocumentParser.js.map +0 -1
  376. package/dist/ms-doc/tables/TableParser.d.ts +0 -29
  377. package/dist/ms-doc/tables/TableParser.d.ts.map +0 -1
  378. package/dist/ms-doc/tables/TableParser.js +0 -176
  379. package/dist/ms-doc/tables/TableParser.js.map +0 -1
  380. package/dist/ms-doc/text/PieceTable.d.ts +0 -21
  381. package/dist/ms-doc/text/PieceTable.d.ts.map +0 -1
  382. package/dist/ms-doc/text/PieceTable.js +0 -171
  383. package/dist/ms-doc/text/PieceTable.js.map +0 -1
  384. package/dist/ms-doc/types/Constants.d.ts +0 -99
  385. package/dist/ms-doc/types/Constants.d.ts.map +0 -1
  386. package/dist/ms-doc/types/Constants.js +0 -102
  387. package/dist/ms-doc/types/Constants.js.map +0 -1
  388. package/dist/ms-doc/types/DocTypes.d.ts +0 -368
  389. package/dist/ms-doc/types/DocTypes.d.ts.map +0 -1
  390. package/dist/ms-doc/types/DocTypes.js +0 -3
  391. package/dist/ms-doc/types/DocTypes.js.map +0 -1
  392. package/dist/tracking/index.d.ts +0 -3
  393. package/dist/tracking/index.d.ts.map +0 -1
  394. package/dist/tracking/index.js +0 -6
  395. package/dist/tracking/index.js.map +0 -1
@@ -0,0 +1,919 @@
1
+ /**
2
+ * XMLParser - Simple position-based XML parser
3
+ * Avoids regex backtracking issues that can cause ReDoS attacks
4
+ * Completes the DocXML framework (XMLBuilder + XMLParser)
5
+ */
6
+
7
+ import { getGlobalLogger, createScopedLogger, ILogger } from "../utils/logger";
8
+ import { XMLBuilder } from "./XMLBuilder";
9
+
10
+ // Create scoped logger for XMLParser operations
11
+ function getLogger(): ILogger {
12
+ return createScopedLogger(getGlobalLogger(), 'XMLParser');
13
+ }
14
+
15
+ /**
16
+ * Default maximum nesting depth for XML parsing.
17
+ * Prevents stack overflow on deeply nested documents.
18
+ */
19
+ export const DEFAULT_MAX_NESTING_DEPTH = 256;
20
+
21
+ /**
22
+ * Options for XML-to-object parsing
23
+ */
24
+ export interface ParseToObjectOptions {
25
+ /** Ignore attributes (default: false) */
26
+ ignoreAttributes?: boolean;
27
+
28
+ /** Attribute name prefix (default: '@_') */
29
+ attributeNamePrefix?: string;
30
+
31
+ /** Text node property name (default: '#text') */
32
+ textNodeName?: string;
33
+
34
+ /** Remove namespace prefixes from element names (default: false) */
35
+ ignoreNamespace?: boolean;
36
+
37
+ /** Parse numeric attribute values (default: true) */
38
+ parseAttributeValue?: boolean;
39
+
40
+ /** Trim whitespace from text values (default: true) */
41
+ trimValues?: boolean;
42
+
43
+ /** Always return arrays for elements (default: false) */
44
+ alwaysArray?: boolean;
45
+
46
+ /** Maximum nesting depth (default: 256). Prevents stack overflow on deeply nested documents. */
47
+ maxNestingDepth?: number;
48
+ }
49
+
50
+ /**
51
+ * Parsed XML object structure
52
+ * Can be a string, object, array, or nested structure
53
+ */
54
+ export type ParsedXMLValue =
55
+ | string
56
+ | number
57
+ | boolean
58
+ | ParsedXMLObject
59
+ | ParsedXMLObject[]
60
+ | null
61
+ | undefined;
62
+
63
+ /**
64
+ * Parsed XML object with dynamic keys
65
+ */
66
+ export interface ParsedXMLObject {
67
+ [key: string]: ParsedXMLValue;
68
+ }
69
+
70
+ /**
71
+ * Internal structure for tracking parsed elements during parsing
72
+ */
73
+ interface ParsedElement {
74
+ name: string;
75
+ value: ParsedXMLValue;
76
+ }
77
+
78
+ /**
79
+ * Simple XML parser using position-based parsing instead of regex
80
+ * Prevents catastrophic backtracking (ReDoS attacks) by avoiding nested regex patterns
81
+ */
82
+ export class XMLParser {
83
+ /**
84
+ * Extracts the body content from a Word document XML
85
+ * @param docXml - The complete document.xml content
86
+ * @returns The body content, or empty string if not found
87
+ */
88
+ static extractBody(docXml: string): string {
89
+ const startTag = "<w:body";
90
+ const endTag = "</w:body>";
91
+
92
+ const startIdx = docXml.indexOf(startTag);
93
+ if (startIdx === -1) return "";
94
+
95
+ // Find the closing > of opening tag
96
+ const openEnd = docXml.indexOf(">", startIdx);
97
+ if (openEnd === -1) return "";
98
+
99
+ // Find matching closing tag
100
+ const endIdx = docXml.indexOf(endTag, openEnd);
101
+ if (endIdx === -1) return "";
102
+
103
+ return docXml.substring(openEnd + 1, endIdx);
104
+ }
105
+
106
+ /**
107
+ * Extracts all elements of a given type using position-based parsing
108
+ * Handles nested tags correctly by tracking depth
109
+ * @param xml - XML content to parse
110
+ * @param tagName - Tag name to extract (e.g., 'w:p', 'w:r')
111
+ * @returns Array of XML strings for each element
112
+ */
113
+ static extractElements(xml: string, tagName: string): string[] {
114
+ const elements: string[] = [];
115
+ const openTag = `<${tagName}`;
116
+ const closeTag = `</${tagName}>`;
117
+ const selfClosingEnd = "/>";
118
+
119
+ let pos = 0;
120
+ while (pos < xml.length) {
121
+ const startIdx = xml.indexOf(openTag, pos);
122
+ if (startIdx === -1) break;
123
+
124
+ // Verify this is the exact tag (not a prefix match like <w:p matching <w:pPr>)
125
+ // The character after the tag name must be either '>', '/', whitespace, or '=' (for attributes)
126
+ const charAfterTag = xml[startIdx + openTag.length];
127
+ if (
128
+ charAfterTag &&
129
+ charAfterTag !== ">" &&
130
+ charAfterTag !== "/" &&
131
+ charAfterTag !== " " &&
132
+ charAfterTag !== "\t" &&
133
+ charAfterTag !== "\n" &&
134
+ charAfterTag !== "\r" &&
135
+ charAfterTag !== "="
136
+ ) {
137
+ // This is a prefix match (e.g., <w:pPr> when looking for <w:p>), skip it (Issue #5)
138
+ pos = startIdx + openTag.length;
139
+ continue;
140
+ }
141
+
142
+ // Find the end of opening tag
143
+ const openEnd = xml.indexOf(">", startIdx);
144
+ if (openEnd === -1) break;
145
+
146
+ // Check if self-closing
147
+ if (xml.substring(openEnd - 1, openEnd + 1) === selfClosingEnd) {
148
+ elements.push(xml.substring(startIdx, openEnd + 1));
149
+ pos = openEnd + 1;
150
+ continue;
151
+ }
152
+
153
+ // Find matching closing tag (handle nesting)
154
+ let depth = 1;
155
+ let searchPos = openEnd + 1;
156
+
157
+ while (depth > 0 && searchPos < xml.length) {
158
+ // Find next potential opening tag
159
+ let nextOpen = -1;
160
+ let openSearchPos = searchPos;
161
+ while (true) {
162
+ const candidateOpen = xml.indexOf(openTag, openSearchPos);
163
+ if (candidateOpen === -1) {
164
+ break;
165
+ }
166
+ // Verify it's an exact match (not a prefix)
167
+ const charAfter = xml[candidateOpen + openTag.length];
168
+ if (
169
+ charAfter &&
170
+ charAfter !== ">" &&
171
+ charAfter !== "/" &&
172
+ charAfter !== " " &&
173
+ charAfter !== "\t" &&
174
+ charAfter !== "\n" &&
175
+ charAfter !== "\r"
176
+ ) {
177
+ // Prefix match, keep searching
178
+ openSearchPos = candidateOpen + openTag.length;
179
+ continue;
180
+ }
181
+ nextOpen = candidateOpen;
182
+ break;
183
+ }
184
+
185
+ const nextClose = xml.indexOf(closeTag, searchPos);
186
+
187
+ if (nextClose === -1) break;
188
+
189
+ if (nextOpen !== -1 && nextOpen < nextClose) {
190
+ depth++;
191
+ searchPos = nextOpen + openTag.length;
192
+ } else {
193
+ depth--;
194
+ if (depth === 0) {
195
+ elements.push(xml.substring(startIdx, nextClose + closeTag.length));
196
+ pos = nextClose + closeTag.length;
197
+ } else {
198
+ searchPos = nextClose + closeTag.length;
199
+ }
200
+ }
201
+ }
202
+
203
+ if (depth > 0) {
204
+ // Unclosed tag - skip it
205
+ pos = startIdx + openTag.length;
206
+ }
207
+ }
208
+
209
+ return elements;
210
+ }
211
+
212
+ /**
213
+ * Extracts attribute value from an XML string
214
+ * @param xml - XML content
215
+ * @param attributeName - Attribute name (e.g., 'w:val')
216
+ * @returns Attribute value or undefined
217
+ */
218
+ static extractAttribute(
219
+ xml: string,
220
+ attributeName: string
221
+ ): string | undefined {
222
+ // Use simple indexOf for bounded string search (safe)
223
+ const attrPattern = `${attributeName}="`;
224
+ const startIdx = xml.indexOf(attrPattern);
225
+ if (startIdx === -1) return undefined;
226
+
227
+ const valueStart = startIdx + attrPattern.length;
228
+ const valueEnd = xml.indexOf('"', valueStart);
229
+ if (valueEnd === -1) return undefined;
230
+
231
+ const rawValue = xml.substring(valueStart, valueEnd);
232
+ // Unescape XML entities to get the actual value
233
+ // This prevents double-escaping when the value is later re-serialized
234
+ return XMLBuilder.unescapeXml(rawValue);
235
+ }
236
+
237
+ /**
238
+ * Checks if an XML string contains a self-closing tag
239
+ * @param xml - XML content
240
+ * @param tagName - Tag name to check
241
+ * @returns True if the tag exists as self-closing
242
+ */
243
+ static hasSelfClosingTag(xml: string, tagName: string): boolean {
244
+ return xml.includes(`<${tagName}/>`) || xml.includes(`<${tagName} `);
245
+ }
246
+
247
+ /**
248
+ * Checks if a boolean property tag is enabled (w:val="1" or w:val="true")
249
+ * Per ECMA-376, boolean properties can be:
250
+ * - Present with w:val="1" or w:val="true" (enabled)
251
+ * - Present with w:val="0" or w:val="false" (explicitly disabled)
252
+ * - Absent (disabled by default)
253
+ *
254
+ * @param xml - XML content to search
255
+ * @param tagName - Tag name (e.g., 'w:keepNext')
256
+ * @returns True if tag exists with w:val="1" or w:val="true", false otherwise
257
+ *
258
+ * @example
259
+ * hasBooleanProperty('<w:pPr><w:keepNext w:val="1"/></w:pPr>', 'w:keepNext'); // true
260
+ * hasBooleanProperty('<w:pPr><w:keepNext w:val="0"/></w:pPr>', 'w:keepNext'); // false
261
+ * hasBooleanProperty('<w:pPr><w:spacing/></w:pPr>', 'w:keepNext'); // false
262
+ */
263
+ static hasBooleanProperty(xml: string, tagName: string): boolean {
264
+ // Check for tag with w:val="1" or w:val="true"
265
+ if (
266
+ xml.includes(`<${tagName} w:val="1"`) ||
267
+ xml.includes(`<${tagName} w:val="true"`)
268
+ ) {
269
+ return true;
270
+ }
271
+
272
+ // Check for self-closing tag without w:val attribute (means true per ECMA-376)
273
+ // Example: <w:b/> means bold=true
274
+ if (xml.includes(`<${tagName}/>`)) {
275
+ return true;
276
+ }
277
+
278
+ return false;
279
+ }
280
+
281
+ /**
282
+ * Extracts text content from within tags
283
+ * Finds all <w:t>...</w:t> tags and extracts their text
284
+ * @param xml - XML content
285
+ * @returns Combined text content
286
+ */
287
+ static extractText(xml: string): string {
288
+ const texts: string[] = [];
289
+ const openTag = "<w:t";
290
+ const closeTag = "</w:t>";
291
+
292
+ let pos = 0;
293
+ while (pos < xml.length) {
294
+ const startIdx = xml.indexOf(openTag, pos);
295
+ if (startIdx === -1) break;
296
+
297
+ // Find the end of opening tag
298
+ const openEnd = xml.indexOf(">", startIdx);
299
+ if (openEnd === -1) break;
300
+
301
+ // Find closing tag
302
+ const closeIdx = xml.indexOf(closeTag, openEnd);
303
+ if (closeIdx === -1) break;
304
+
305
+ // Extract text between tags
306
+ const text = xml.substring(openEnd + 1, closeIdx);
307
+ texts.push(text);
308
+
309
+ pos = closeIdx + closeTag.length;
310
+ }
311
+
312
+ return texts.join("");
313
+ }
314
+
315
+ /**
316
+ * Validates input size to prevent excessive memory usage
317
+ * @param xml - XML content
318
+ * @param maxSize - Maximum size in bytes (default: 10MB)
319
+ * @throws Error if XML exceeds max size
320
+ */
321
+ static validateSize(xml: string, maxSize: number = 10 * 1024 * 1024): void {
322
+ if (xml.length > maxSize) {
323
+ throw new Error(
324
+ `XML content too large for parsing (${(
325
+ xml.length /
326
+ 1024 /
327
+ 1024
328
+ ).toFixed(1)}MB). ` +
329
+ `Maximum allowed: ${(maxSize / 1024 / 1024).toFixed(0)}MB`
330
+ );
331
+ }
332
+ }
333
+
334
+ /**
335
+ * Extracts content between two specific tags
336
+ * More efficient than regex for large documents
337
+ * @param xml - XML content
338
+ * @param startTag - Opening tag (e.g., '<w:pPr')
339
+ * @param endTag - Closing tag (e.g., '</w:pPr>')
340
+ * @returns Content between tags, or undefined if not found
341
+ */
342
+ static extractBetweenTags(
343
+ xml: string,
344
+ startTag: string,
345
+ endTag: string
346
+ ): string | undefined {
347
+ const startIdx = xml.indexOf(startTag);
348
+ if (startIdx === -1) return undefined;
349
+
350
+ // Find the end of the opening tag
351
+ const openEnd = xml.indexOf(">", startIdx);
352
+ if (openEnd === -1) return undefined;
353
+
354
+ // Find the closing tag
355
+ const endIdx = xml.indexOf(endTag, openEnd);
356
+ if (endIdx === -1) return undefined;
357
+
358
+ return xml.substring(openEnd + 1, endIdx);
359
+ }
360
+
361
+ /**
362
+ * Extracts a complete self-closing tag with its attributes
363
+ * Handles cases where multiple similar tags exist (e.g., <w:sz.../> and <w:szCs.../>)
364
+ *
365
+ * @param xml - XML string to search
366
+ * @param tagName - Tag name to find (e.g., "w:color", "w:sz")
367
+ * @returns The complete tag content (attributes portion) or undefined if not found
368
+ *
369
+ * @example
370
+ * const xml = '<w:sz w:val="36"/><w:color w:val="FF0000"/>';
371
+ * const colorTag = XMLParser.extractSelfClosingTag(xml, 'w:color');
372
+ * // Returns: ' w:val="FF0000"'
373
+ */
374
+ static extractSelfClosingTag(
375
+ xml: string,
376
+ tagName: string
377
+ ): string | undefined {
378
+ const startPattern = `<${tagName}`;
379
+ let searchPos = 0;
380
+
381
+ // Search for the exact tag (not tags that start with this pattern)
382
+ while (true) {
383
+ const startIdx = xml.indexOf(startPattern, searchPos);
384
+ if (startIdx === -1) return undefined;
385
+
386
+ // Check what character follows the tag name
387
+ const charAfterTag = xml[startIdx + startPattern.length];
388
+
389
+ // Valid separators after tag name: space, '/', or '>'
390
+ if (charAfterTag === ' ' || charAfterTag === '/' || charAfterTag === '>') {
391
+ // Found the exact tag, now find its end
392
+ const endIdx = xml.indexOf('/>', startIdx);
393
+ if (endIdx === -1) {
394
+ // Try finding a closing tag instead (non-self-closing)
395
+ const closeTagStart = xml.indexOf('>', startIdx);
396
+ if (closeTagStart === -1) return undefined;
397
+
398
+ // Return attributes portion
399
+ return xml.substring(startIdx + startPattern.length, closeTagStart);
400
+ }
401
+
402
+ // Return attributes portion (between tag name and />)
403
+ return xml.substring(startIdx + startPattern.length, endIdx);
404
+ }
405
+
406
+ // Not the exact tag (e.g., found "w:sz" when looking for "w:s")
407
+ // Continue searching
408
+ searchPos = startIdx + 1;
409
+ }
410
+ }
411
+
412
+ /**
413
+ * Parse XML string to JavaScript object
414
+ * Compatible with fast-xml-parser output format
415
+ *
416
+ * @param xml - XML string to parse
417
+ * @param options - Parsing options
418
+ * @returns Parsed JavaScript object
419
+ *
420
+ * @example
421
+ * const xml = '<Relationships><Relationship Id="rId1" Target="https://example.com"/></Relationships>';
422
+ * const obj = XMLParser.parseToObject(xml);
423
+ * // Returns: { Relationships: { Relationship: { '@_Id': 'rId1', '@_Target': 'https://example.com' } } }
424
+ *
425
+ * @example
426
+ * // Multiple elements become arrays
427
+ * const xml = '<Items><Item id="1"/><Item id="2"/></Items>';
428
+ * const obj = XMLParser.parseToObject(xml);
429
+ * // Returns: { Items: { Item: [{ '@_id': '1' }, { '@_id': '2' }] } }
430
+ */
431
+ static parseToObject(
432
+ xml: string,
433
+ options?: ParseToObjectOptions
434
+ ): ParsedXMLObject {
435
+ const logger = getLogger();
436
+ logger.debug('Parsing XML to object', { xmlSize: xml.length });
437
+
438
+ // Default options
439
+ const opts: Required<ParseToObjectOptions> = {
440
+ ignoreAttributes: options?.ignoreAttributes ?? false,
441
+ attributeNamePrefix: options?.attributeNamePrefix ?? "@_",
442
+ textNodeName: options?.textNodeName ?? "#text",
443
+ ignoreNamespace: options?.ignoreNamespace ?? false,
444
+ parseAttributeValue: options?.parseAttributeValue ?? true,
445
+ trimValues: options?.trimValues ?? true,
446
+ alwaysArray: options?.alwaysArray ?? false,
447
+ maxNestingDepth: options?.maxNestingDepth ?? DEFAULT_MAX_NESTING_DEPTH,
448
+ };
449
+
450
+ // Validate input size
451
+ XMLParser.validateSize(xml);
452
+
453
+ // Remove XML declaration and trim
454
+ xml = xml.replace(/<\?xml[^>]*\?>\s*/g, "").trim();
455
+
456
+ if (!xml) {
457
+ return {};
458
+ }
459
+
460
+ // Parse root element (start at depth 0)
461
+ const result = XMLParser.parseElementToObject(xml, 0, opts, 0);
462
+ logger.debug('XML parsed to object');
463
+ return result.value as ParsedXMLObject;
464
+ }
465
+
466
+ /**
467
+ * Parses a single XML element into an object
468
+ * @private
469
+ */
470
+ private static parseElementToObject(
471
+ xml: string,
472
+ startPos: number,
473
+ options: Required<ParseToObjectOptions>,
474
+ depth: number
475
+ ): { value: ParsedXMLValue; endPos: number } {
476
+ // Check nesting depth to prevent stack overflow
477
+ if (depth > options.maxNestingDepth) {
478
+ throw new Error(
479
+ `XML nesting depth exceeds maximum of ${options.maxNestingDepth}. ` +
480
+ `This may indicate malformed XML or an attack attempt. ` +
481
+ `Use the maxNestingDepth option to increase the limit if needed.`
482
+ );
483
+ }
484
+
485
+ // Find opening tag
486
+ const openTagStart = xml.indexOf("<", startPos);
487
+ if (openTagStart === -1) {
488
+ return { value: {}, endPos: xml.length };
489
+ }
490
+
491
+ // Skip comments
492
+ if (xml.substring(openTagStart, openTagStart + 4) === "<!--") {
493
+ const commentEnd = xml.indexOf("-->", openTagStart + 4);
494
+ if (commentEnd !== -1) {
495
+ return XMLParser.parseElementToObject(xml, commentEnd + 3, options, depth);
496
+ }
497
+ return { value: {}, endPos: xml.length };
498
+ }
499
+
500
+ // Extract element name
501
+ const nameMatch = /^([a-zA-Z0-9:_-]+)/.exec(xml
502
+ .substring(openTagStart + 1));
503
+ if (!nameMatch) {
504
+ return { value: {}, endPos: openTagStart + 1 };
505
+ }
506
+
507
+ const originalElementName: string = nameMatch[1] || "";
508
+ let elementName: string = originalElementName;
509
+ const tagHeaderEnd = xml.indexOf(">", openTagStart);
510
+ if (tagHeaderEnd === -1) {
511
+ return { value: {}, endPos: xml.length };
512
+ }
513
+
514
+ // Remove namespace if requested (but keep original for offset calculations)
515
+ if (options.ignoreNamespace && elementName.includes(":")) {
516
+ elementName = elementName.split(":")[1] || elementName;
517
+ }
518
+
519
+ // Extract attributes using ORIGINAL element name length for correct offset
520
+ const tagHeader = xml.substring(
521
+ openTagStart + 1 + originalElementName.length,
522
+ tagHeaderEnd
523
+ );
524
+ const attributes = XMLParser.extractAttributesFromTag(tagHeader, options);
525
+
526
+ // Check if self-closing
527
+ const isSelfClosing =
528
+ tagHeader.trim().endsWith("/") || xml[tagHeaderEnd - 1] === "/";
529
+
530
+ if (isSelfClosing) {
531
+ // Self-closing tag - return object with attributes only
532
+ const elementValue: ParsedXMLObject = { ...attributes };
533
+ return {
534
+ value: { [elementName]: elementValue },
535
+ endPos: tagHeaderEnd + 1,
536
+ };
537
+ }
538
+
539
+ // Find closing tag (use original name with namespace for correct matching)
540
+ const closingTag = `</${originalElementName}>`;
541
+ const contentStart = tagHeaderEnd + 1;
542
+ const closingTagPos = XMLParser.findClosingTag(
543
+ xml,
544
+ originalElementName,
545
+ contentStart
546
+ );
547
+
548
+ if (closingTagPos === -1) {
549
+ // No closing tag found - treat as self-closing
550
+ return {
551
+ value: { [elementName]: { ...attributes } },
552
+ endPos: tagHeaderEnd + 1,
553
+ };
554
+ }
555
+
556
+ // Extract content between tags
557
+ const content = xml.substring(contentStart, closingTagPos);
558
+
559
+ // Parse content (children or text)
560
+ const children: ParsedElement[] = [];
561
+ let textContent = "";
562
+ let pos = 0;
563
+
564
+ while (pos < content.length) {
565
+ const nextTag = content.indexOf("<", pos);
566
+
567
+ if (nextTag === -1) {
568
+ // No more tags - rest is text
569
+ const text = content.substring(pos);
570
+ // When trimValues is false, preserve whitespace-only text
571
+ // When trimValues is true, only include text that has non-whitespace content
572
+ if (text.length > 0 && (!options.trimValues || text.trim())) {
573
+ // Unescape XML entities in text content (e.g., &lt; -> <)
574
+ textContent += XMLBuilder.unescapeXml(text);
575
+ }
576
+ break;
577
+ }
578
+
579
+ // Collect text before next tag
580
+ if (nextTag > pos) {
581
+ const text = content.substring(pos, nextTag);
582
+ // When trimValues is false, preserve whitespace-only text
583
+ // When trimValues is true, only include text that has non-whitespace content
584
+ if (text.length > 0 && (!options.trimValues || text.trim())) {
585
+ // Unescape XML entities in text content (e.g., &lt; -> <)
586
+ textContent += XMLBuilder.unescapeXml(text);
587
+ }
588
+ }
589
+
590
+ // Parse child element (increment depth for children)
591
+ const childResult = XMLParser.parseElementToObject(
592
+ content,
593
+ nextTag,
594
+ options,
595
+ depth + 1
596
+ );
597
+ const childObj = childResult.value as ParsedXMLObject;
598
+
599
+ // Extract child name and value
600
+ const childKeys = Object.keys(childObj);
601
+ if (childKeys.length > 0) {
602
+ const childName = childKeys[0];
603
+ if (childName) {
604
+ const childValue = childObj[childName];
605
+ children.push({ name: childName, value: childValue });
606
+ }
607
+ }
608
+
609
+ pos = childResult.endPos;
610
+ }
611
+
612
+ // Build element value
613
+ let elementValue: ParsedXMLValue = {};
614
+
615
+ // Add attributes
616
+ if (!options.ignoreAttributes && Object.keys(attributes).length > 0) {
617
+ elementValue = { ...attributes };
618
+ }
619
+
620
+ // Add text content
621
+ // When trimValues is false, include whitespace-only text
622
+ // When trimValues is true, only include text with non-whitespace content
623
+ if (textContent.length > 0 && (!options.trimValues || textContent.trim())) {
624
+ const text = options.trimValues ? textContent.trim() : textContent;
625
+ if (typeof elementValue === "object" && !Array.isArray(elementValue)) {
626
+ if (Object.keys(elementValue).length === 0) {
627
+ // Only text, no attributes - return as direct value if simple
628
+ elementValue = text;
629
+ } else {
630
+ // Text with attributes
631
+ (elementValue)[options.textNodeName] = text;
632
+ }
633
+ }
634
+ }
635
+
636
+ // Add children
637
+ if (children.length > 0) {
638
+ const coalescedChildren = XMLParser.coalesceChildren(children, options);
639
+ if (typeof elementValue === "object" && !Array.isArray(elementValue)) {
640
+ elementValue = { ...elementValue, ...coalescedChildren };
641
+ } else {
642
+ elementValue = coalescedChildren;
643
+ }
644
+ }
645
+
646
+ // If element has no content, attributes, or children - return empty object
647
+ if (
648
+ typeof elementValue === "object" &&
649
+ !Array.isArray(elementValue) &&
650
+ Object.keys(elementValue).length === 0
651
+ ) {
652
+ elementValue = {};
653
+ }
654
+
655
+ return {
656
+ value: { [elementName]: elementValue },
657
+ endPos: closingTagPos + closingTag.length,
658
+ };
659
+ }
660
+
661
+ /**
662
+ * Extracts attributes from a tag header
663
+ * @private
664
+ */
665
+ private static extractAttributesFromTag(
666
+ tagHeader: string,
667
+ options: Required<ParseToObjectOptions>
668
+ ): Record<string, string | number | boolean> {
669
+ const attributes: Record<string, string | number | boolean> = {};
670
+
671
+ if (options.ignoreAttributes) {
672
+ return attributes;
673
+ }
674
+
675
+ // Simple attribute extraction using position-based parsing
676
+ let pos = 0;
677
+ while (pos < tagHeader.length) {
678
+ // Skip whitespace
679
+ while (pos < tagHeader.length) {
680
+ const char = tagHeader[pos];
681
+ if (char && /\s/.test(char)) {
682
+ pos++;
683
+ } else {
684
+ break;
685
+ }
686
+ }
687
+
688
+ if (pos >= tagHeader.length || tagHeader[pos] === "/") {
689
+ break;
690
+ }
691
+
692
+ // Extract attribute name
693
+ const nameStart = pos;
694
+ while (pos < tagHeader.length) {
695
+ const char = tagHeader[pos];
696
+ if (char && /[a-zA-Z0-9:_-]/.test(char)) {
697
+ pos++;
698
+ } else {
699
+ break;
700
+ }
701
+ }
702
+
703
+ if (pos === nameStart) {
704
+ break;
705
+ }
706
+
707
+ let attrName = tagHeader.substring(nameStart, pos);
708
+
709
+ // Skip whitespace and '='
710
+ while (pos < tagHeader.length) {
711
+ const char = tagHeader[pos];
712
+ if (char && /[\s=]/.test(char)) {
713
+ pos++;
714
+ } else {
715
+ break;
716
+ }
717
+ }
718
+
719
+ // Extract attribute value
720
+ let attrValue = "";
721
+ if (
722
+ pos < tagHeader.length &&
723
+ (tagHeader[pos] === '"' || tagHeader[pos] === "'")
724
+ ) {
725
+ const quote = tagHeader[pos];
726
+ pos++; // Skip opening quote
727
+ const valueStart = pos;
728
+
729
+ while (pos < tagHeader.length && tagHeader[pos] !== quote) {
730
+ pos++;
731
+ }
732
+
733
+ attrValue = tagHeader.substring(valueStart, pos);
734
+ pos++; // Skip closing quote
735
+ }
736
+
737
+ // Remove namespace from attribute name if requested
738
+ if (options.ignoreNamespace && attrName.includes(":")) {
739
+ attrName = attrName.split(":")[1] || attrName;
740
+ }
741
+
742
+ // Add prefix to attribute name
743
+ const prefixedName = options.attributeNamePrefix + attrName;
744
+
745
+ // Parse attribute value
746
+ attributes[prefixedName] = options.parseAttributeValue
747
+ ? XMLParser.parseValue(attrValue)
748
+ : attrValue;
749
+ }
750
+
751
+ return attributes;
752
+ }
753
+
754
+ /**
755
+ * Finds the closing tag for an element, handling nesting
756
+ * @private
757
+ */
758
+ private static findClosingTag(
759
+ xml: string,
760
+ elementName: string,
761
+ startPos: number
762
+ ): number {
763
+ const openTag = `<${elementName}`;
764
+ const closeTag = `</${elementName}>`;
765
+ let depth = 1;
766
+ let pos = startPos;
767
+
768
+ while (depth > 0 && pos < xml.length) {
769
+ const nextClose = xml.indexOf(closeTag, pos);
770
+
771
+ if (nextClose === -1) {
772
+ return -1; // No closing tag found
773
+ }
774
+
775
+ // Find the next REAL opening tag (not a prefix match like <w:pPrChange for <w:pPr)
776
+ // Must search for all potential matches and verify each one
777
+ let realOpenPos = -1;
778
+ let searchPos = pos;
779
+ while (searchPos < nextClose) {
780
+ const candidateOpen = xml.indexOf(openTag, searchPos);
781
+ if (candidateOpen === -1 || candidateOpen >= nextClose) {
782
+ break; // No more candidates before the closing tag
783
+ }
784
+
785
+ const charAfter = xml[candidateOpen + openTag.length];
786
+ if (
787
+ charAfter === ">" ||
788
+ charAfter === " " ||
789
+ charAfter === "/" ||
790
+ charAfter === "\t" ||
791
+ charAfter === "\n" ||
792
+ charAfter === "\r"
793
+ ) {
794
+ // This looks like a real opening tag - but check if it's self-closing
795
+ // Self-closing tags like <w:rPr/> should NOT increase depth
796
+ const tagEnd = xml.indexOf(">", candidateOpen);
797
+ if (tagEnd !== -1 && xml[tagEnd - 1] === "/") {
798
+ // Self-closing tag - skip it (don't affect depth)
799
+ searchPos = tagEnd + 1;
800
+ continue;
801
+ }
802
+ // This is a real opening tag (not self-closing)
803
+ realOpenPos = candidateOpen;
804
+ break;
805
+ }
806
+
807
+ // False positive (e.g., <w:pPrChange when looking for <w:pPr)
808
+ // Keep searching from after this position
809
+ searchPos = candidateOpen + openTag.length;
810
+ }
811
+
812
+ if (realOpenPos !== -1) {
813
+ // Found a real opening tag before the closing tag - increase depth
814
+ depth++;
815
+ pos = realOpenPos + openTag.length;
816
+ } else {
817
+ // No real opening tag before this closing tag - decrease depth
818
+ depth--;
819
+ if (depth === 0) {
820
+ return nextClose;
821
+ }
822
+ pos = nextClose + closeTag.length;
823
+ }
824
+ }
825
+
826
+ return -1;
827
+ }
828
+
829
+ /**
830
+ * Coalesces children with duplicate names into arrays
831
+ * @private
832
+ */
833
+ private static coalesceChildren(
834
+ children: ParsedElement[],
835
+ options: Required<ParseToObjectOptions>
836
+ ): ParsedXMLObject {
837
+ const result: ParsedXMLObject = {};
838
+ const nameCounts: Record<string, number> = {};
839
+ const nameIndices: Record<string, number> = {};
840
+
841
+ // Track element order for correct run content parsing (tabs, breaks, text)
842
+ // This is critical for preserving the order of mixed content like: text -> tab -> text
843
+ const orderedChildren: { type: string; index: number }[] = [];
844
+
845
+ // Count occurrences of each child name
846
+ for (const child of children) {
847
+ nameCounts[child.name] = (nameCounts[child.name] || 0) + 1;
848
+ }
849
+
850
+ // Build result object while tracking order
851
+ for (const child of children) {
852
+ const shouldBeArray =
853
+ options.alwaysArray || (nameCounts[child.name] || 0) > 1;
854
+
855
+ // Track element order with its index in the array
856
+ const currentIndex = nameIndices[child.name] || 0;
857
+ orderedChildren.push({ type: child.name, index: currentIndex });
858
+ nameIndices[child.name] = currentIndex + 1;
859
+
860
+ if (shouldBeArray) {
861
+ if (!result[child.name]) {
862
+ result[child.name] = [];
863
+ }
864
+ (result[child.name] as ParsedXMLValue[]).push(child.value);
865
+ } else {
866
+ result[child.name] = child.value;
867
+ }
868
+ }
869
+
870
+ // Add _orderedChildren to track element order (used by DocumentParser for runs)
871
+ if (orderedChildren.length > 0) {
872
+ result._orderedChildren = orderedChildren;
873
+ }
874
+
875
+ return result;
876
+ }
877
+
878
+ /**
879
+ * Parses a string value to number or boolean if applicable
880
+ * @private
881
+ */
882
+ private static parseValue(value: string): string | number | boolean {
883
+ if (value === "true") return true;
884
+ if (value === "false") return false;
885
+
886
+ // Preserve 6-character hex color codes (OpenXML standard for colors)
887
+ // This includes "000000" (black) which should stay as a string
888
+ if (/^[0-9A-Fa-f]{6}$/.test(value)) {
889
+ return value.toUpperCase(); // Normalize to uppercase per Microsoft convention
890
+ }
891
+
892
+ // Preserve long digit-only strings (e.g., cnfStyle binary strings like "100000000000")
893
+ // These should not be converted to numbers to avoid losing leading zeros
894
+ if (/^\d{7,}$/.test(value)) {
895
+ return value; // Keep as string for values with 7+ digits
896
+ }
897
+
898
+ // Try parsing as number
899
+ // 3-character values like "240" will be parsed as numbers
900
+ // 6-character hex values are already handled above
901
+ if (/^-?\d+$/.test(value)) {
902
+ const num = parseInt(value, 10);
903
+ if (!isNaN(num)) return num;
904
+ }
905
+
906
+ if (/^-?\d+\.\d+$/.test(value)) {
907
+ const num = parseFloat(value);
908
+ if (!isNaN(num)) return num;
909
+ }
910
+
911
+ // Preserve 3-character hex codes (like "F0A") that have letters
912
+ // Pure numeric 3-char values (like "240") are already parsed as numbers above
913
+ if (/^[0-9A-Fa-f]{3}$/.test(value) && /[A-Fa-f]/.test(value)) {
914
+ return value.toUpperCase();
915
+ }
916
+
917
+ return value;
918
+ }
919
+ }