docxmlater 10.1.4 → 10.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (372) hide show
  1. package/README.md +759 -754
  2. package/dist/constants/legacyCompatFlags.js +1 -1
  3. package/dist/constants/legacyCompatFlags.js.map +1 -1
  4. package/dist/constants/limits.js.map +1 -1
  5. package/dist/core/Document.d.ts +51 -50
  6. package/dist/core/Document.d.ts.map +1 -1
  7. package/dist/core/Document.js +486 -471
  8. package/dist/core/Document.js.map +1 -1
  9. package/dist/core/DocumentContent.d.ts +9 -9
  10. package/dist/core/DocumentContent.d.ts.map +1 -1
  11. package/dist/core/DocumentContent.js +1 -1
  12. package/dist/core/DocumentContent.js.map +1 -1
  13. package/dist/core/DocumentGenerator.d.ts +11 -11
  14. package/dist/core/DocumentGenerator.d.ts.map +1 -1
  15. package/dist/core/DocumentGenerator.js +251 -251
  16. package/dist/core/DocumentGenerator.js.map +1 -1
  17. package/dist/core/DocumentIdManager.js.map +1 -1
  18. package/dist/core/DocumentParser.d.ts +15 -15
  19. package/dist/core/DocumentParser.d.ts.map +1 -1
  20. package/dist/core/DocumentParser.js +2123 -2155
  21. package/dist/core/DocumentParser.js.map +1 -1
  22. package/dist/core/DocumentValidator.d.ts.map +1 -1
  23. package/dist/core/DocumentValidator.js +2 -5
  24. package/dist/core/DocumentValidator.js.map +1 -1
  25. package/dist/core/Relationship.js.map +1 -1
  26. package/dist/core/RelationshipManager.d.ts.map +1 -1
  27. package/dist/core/RelationshipManager.js +3 -3
  28. package/dist/core/RelationshipManager.js.map +1 -1
  29. package/dist/elements/AlternateContent.js.map +1 -1
  30. package/dist/elements/Bookmark.d.ts.map +1 -1
  31. package/dist/elements/Bookmark.js +3 -1
  32. package/dist/elements/Bookmark.js.map +1 -1
  33. package/dist/elements/BookmarkManager.d.ts.map +1 -1
  34. package/dist/elements/BookmarkManager.js.map +1 -1
  35. package/dist/elements/Comment.d.ts.map +1 -1
  36. package/dist/elements/Comment.js +9 -6
  37. package/dist/elements/Comment.js.map +1 -1
  38. package/dist/elements/CommentManager.d.ts.map +1 -1
  39. package/dist/elements/CommentManager.js +18 -17
  40. package/dist/elements/CommentManager.js.map +1 -1
  41. package/dist/elements/CommonTypes.d.ts +21 -21
  42. package/dist/elements/CommonTypes.d.ts.map +1 -1
  43. package/dist/elements/CommonTypes.js +56 -56
  44. package/dist/elements/CommonTypes.js.map +1 -1
  45. package/dist/elements/CustomXml.js.map +1 -1
  46. package/dist/elements/Endnote.d.ts.map +1 -1
  47. package/dist/elements/Endnote.js +6 -6
  48. package/dist/elements/Endnote.js.map +1 -1
  49. package/dist/elements/EndnoteManager.d.ts.map +1 -1
  50. package/dist/elements/EndnoteManager.js +6 -7
  51. package/dist/elements/EndnoteManager.js.map +1 -1
  52. package/dist/elements/Field.d.ts.map +1 -1
  53. package/dist/elements/Field.js +82 -25
  54. package/dist/elements/Field.js.map +1 -1
  55. package/dist/elements/FieldHelpers.d.ts.map +1 -1
  56. package/dist/elements/FieldHelpers.js.map +1 -1
  57. package/dist/elements/FontManager.d.ts.map +1 -1
  58. package/dist/elements/FontManager.js +1 -1
  59. package/dist/elements/FontManager.js.map +1 -1
  60. package/dist/elements/Footer.js +2 -2
  61. package/dist/elements/Footer.js.map +1 -1
  62. package/dist/elements/Footnote.d.ts.map +1 -1
  63. package/dist/elements/Footnote.js +6 -6
  64. package/dist/elements/Footnote.js.map +1 -1
  65. package/dist/elements/FootnoteManager.d.ts.map +1 -1
  66. package/dist/elements/FootnoteManager.js +6 -7
  67. package/dist/elements/FootnoteManager.js.map +1 -1
  68. package/dist/elements/Header.js +2 -2
  69. package/dist/elements/Header.js.map +1 -1
  70. package/dist/elements/HeaderFooterManager.js.map +1 -1
  71. package/dist/elements/Hyperlink.d.ts +5 -3
  72. package/dist/elements/Hyperlink.d.ts.map +1 -1
  73. package/dist/elements/Hyperlink.js +134 -76
  74. package/dist/elements/Hyperlink.js.map +1 -1
  75. package/dist/elements/Image.d.ts.map +1 -1
  76. package/dist/elements/Image.js +238 -106
  77. package/dist/elements/Image.js.map +1 -1
  78. package/dist/elements/ImageManager.d.ts.map +1 -1
  79. package/dist/elements/ImageManager.js +1 -1
  80. package/dist/elements/ImageManager.js.map +1 -1
  81. package/dist/elements/ImageRun.js +1 -1
  82. package/dist/elements/ImageRun.js.map +1 -1
  83. package/dist/elements/MathElement.js.map +1 -1
  84. package/dist/elements/Paragraph.d.ts +24 -24
  85. package/dist/elements/Paragraph.d.ts.map +1 -1
  86. package/dist/elements/Paragraph.js +181 -188
  87. package/dist/elements/Paragraph.js.map +1 -1
  88. package/dist/elements/PreservedElement.js.map +1 -1
  89. package/dist/elements/PropertyChangeTypes.d.ts.map +1 -1
  90. package/dist/elements/PropertyChangeTypes.js +6 -6
  91. package/dist/elements/PropertyChangeTypes.js.map +1 -1
  92. package/dist/elements/RangeMarker.d.ts.map +1 -1
  93. package/dist/elements/RangeMarker.js.map +1 -1
  94. package/dist/elements/Revision.d.ts.map +1 -1
  95. package/dist/elements/Revision.js +4 -5
  96. package/dist/elements/Revision.js.map +1 -1
  97. package/dist/elements/RevisionContent.js.map +1 -1
  98. package/dist/elements/RevisionManager.d.ts.map +1 -1
  99. package/dist/elements/RevisionManager.js +40 -48
  100. package/dist/elements/RevisionManager.js.map +1 -1
  101. package/dist/elements/Run.d.ts +16 -16
  102. package/dist/elements/Run.d.ts.map +1 -1
  103. package/dist/elements/Run.js +256 -238
  104. package/dist/elements/Run.js.map +1 -1
  105. package/dist/elements/Section.d.ts.map +1 -1
  106. package/dist/elements/Section.js +36 -11
  107. package/dist/elements/Section.js.map +1 -1
  108. package/dist/elements/Shape.d.ts.map +1 -1
  109. package/dist/elements/Shape.js.map +1 -1
  110. package/dist/elements/StructuredDocumentTag.d.ts +6 -6
  111. package/dist/elements/StructuredDocumentTag.d.ts.map +1 -1
  112. package/dist/elements/StructuredDocumentTag.js +99 -104
  113. package/dist/elements/StructuredDocumentTag.js.map +1 -1
  114. package/dist/elements/Table.d.ts +11 -11
  115. package/dist/elements/Table.d.ts.map +1 -1
  116. package/dist/elements/Table.js +102 -107
  117. package/dist/elements/Table.js.map +1 -1
  118. package/dist/elements/TableCell.d.ts +10 -10
  119. package/dist/elements/TableCell.d.ts.map +1 -1
  120. package/dist/elements/TableCell.js +105 -106
  121. package/dist/elements/TableCell.js.map +1 -1
  122. package/dist/elements/TableGridChange.d.ts.map +1 -1
  123. package/dist/elements/TableGridChange.js.map +1 -1
  124. package/dist/elements/TableOfContents.d.ts.map +1 -1
  125. package/dist/elements/TableOfContents.js +4 -4
  126. package/dist/elements/TableOfContents.js.map +1 -1
  127. package/dist/elements/TableOfContentsElement.js.map +1 -1
  128. package/dist/elements/TableRow.d.ts.map +1 -1
  129. package/dist/elements/TableRow.js +13 -6
  130. package/dist/elements/TableRow.js.map +1 -1
  131. package/dist/elements/TextBox.d.ts.map +1 -1
  132. package/dist/elements/TextBox.js +3 -5
  133. package/dist/elements/TextBox.js.map +1 -1
  134. package/dist/formatting/AbstractNumbering.d.ts +4 -4
  135. package/dist/formatting/AbstractNumbering.d.ts.map +1 -1
  136. package/dist/formatting/AbstractNumbering.js +54 -49
  137. package/dist/formatting/AbstractNumbering.js.map +1 -1
  138. package/dist/formatting/NumberingInstance.d.ts.map +1 -1
  139. package/dist/formatting/NumberingInstance.js +1 -3
  140. package/dist/formatting/NumberingInstance.js.map +1 -1
  141. package/dist/formatting/NumberingLevel.d.ts +5 -5
  142. package/dist/formatting/NumberingLevel.d.ts.map +1 -1
  143. package/dist/formatting/NumberingLevel.js +119 -125
  144. package/dist/formatting/NumberingLevel.js.map +1 -1
  145. package/dist/formatting/NumberingManager.d.ts +1 -0
  146. package/dist/formatting/NumberingManager.d.ts.map +1 -1
  147. package/dist/formatting/NumberingManager.js +27 -9
  148. package/dist/formatting/NumberingManager.js.map +1 -1
  149. package/dist/formatting/Style.d.ts +11 -11
  150. package/dist/formatting/Style.d.ts.map +1 -1
  151. package/dist/formatting/Style.js +219 -247
  152. package/dist/formatting/Style.js.map +1 -1
  153. package/dist/formatting/StylesManager.d.ts +2 -2
  154. package/dist/formatting/StylesManager.d.ts.map +1 -1
  155. package/dist/formatting/StylesManager.js +96 -102
  156. package/dist/formatting/StylesManager.js.map +1 -1
  157. package/dist/helpers/CleanupHelper.d.ts +1 -1
  158. package/dist/helpers/CleanupHelper.d.ts.map +1 -1
  159. package/dist/helpers/CleanupHelper.js +6 -6
  160. package/dist/helpers/CleanupHelper.js.map +1 -1
  161. package/dist/images/ImageOptimizer.js +7 -7
  162. package/dist/images/ImageOptimizer.js.map +1 -1
  163. package/dist/index.d.ts +9 -9
  164. package/dist/index.d.ts.map +1 -1
  165. package/dist/index.js.map +1 -1
  166. package/dist/managers/DrawingManager.js.map +1 -1
  167. package/dist/tracking/DocumentTrackingContext.d.ts.map +1 -1
  168. package/dist/tracking/DocumentTrackingContext.js +23 -7
  169. package/dist/tracking/DocumentTrackingContext.js.map +1 -1
  170. package/dist/tracking/TrackingContext.d.ts.map +1 -1
  171. package/dist/tracking/TrackingContext.js.map +1 -1
  172. package/dist/types/compatibility-types.js.map +1 -1
  173. package/dist/types/formatting.js.map +1 -1
  174. package/dist/types/list-types.d.ts +6 -6
  175. package/dist/types/list-types.js.map +1 -1
  176. package/dist/types/settings-types.js.map +1 -1
  177. package/dist/types/styleConfig.d.ts +2 -2
  178. package/dist/types/styleConfig.js.map +1 -1
  179. package/dist/utils/ChangelogGenerator.d.ts.map +1 -1
  180. package/dist/utils/ChangelogGenerator.js +97 -101
  181. package/dist/utils/ChangelogGenerator.js.map +1 -1
  182. package/dist/utils/CompatibilityUpgrader.d.ts.map +1 -1
  183. package/dist/utils/CompatibilityUpgrader.js +1 -1
  184. package/dist/utils/CompatibilityUpgrader.js.map +1 -1
  185. package/dist/utils/InMemoryRevisionAcceptor.d.ts.map +1 -1
  186. package/dist/utils/InMemoryRevisionAcceptor.js +1 -6
  187. package/dist/utils/InMemoryRevisionAcceptor.js.map +1 -1
  188. package/dist/utils/MoveOperationHelper.d.ts.map +1 -1
  189. package/dist/utils/MoveOperationHelper.js +1 -1
  190. package/dist/utils/MoveOperationHelper.js.map +1 -1
  191. package/dist/utils/RevisionAwareProcessor.d.ts.map +1 -1
  192. package/dist/utils/RevisionAwareProcessor.js +2 -4
  193. package/dist/utils/RevisionAwareProcessor.js.map +1 -1
  194. package/dist/utils/RevisionWalker.d.ts.map +1 -1
  195. package/dist/utils/RevisionWalker.js +4 -12
  196. package/dist/utils/RevisionWalker.js.map +1 -1
  197. package/dist/utils/SelectiveRevisionAcceptor.d.ts.map +1 -1
  198. package/dist/utils/SelectiveRevisionAcceptor.js +2 -6
  199. package/dist/utils/SelectiveRevisionAcceptor.js.map +1 -1
  200. package/dist/utils/ShadingResolver.d.ts.map +1 -1
  201. package/dist/utils/ShadingResolver.js +1 -1
  202. package/dist/utils/ShadingResolver.js.map +1 -1
  203. package/dist/utils/acceptRevisions.d.ts.map +1 -1
  204. package/dist/utils/acceptRevisions.js +23 -12
  205. package/dist/utils/acceptRevisions.js.map +1 -1
  206. package/dist/utils/cnfStyleDecoder.d.ts +1 -1
  207. package/dist/utils/cnfStyleDecoder.d.ts.map +1 -1
  208. package/dist/utils/cnfStyleDecoder.js +40 -40
  209. package/dist/utils/cnfStyleDecoder.js.map +1 -1
  210. package/dist/utils/corruptionDetection.d.ts.map +1 -1
  211. package/dist/utils/corruptionDetection.js.map +1 -1
  212. package/dist/utils/dateFormatting.js.map +1 -1
  213. package/dist/utils/deepClone.js +1 -1
  214. package/dist/utils/deepClone.js.map +1 -1
  215. package/dist/utils/diagnostics.d.ts.map +1 -1
  216. package/dist/utils/diagnostics.js +1 -1
  217. package/dist/utils/diagnostics.js.map +1 -1
  218. package/dist/utils/errorHandling.js.map +1 -1
  219. package/dist/utils/formatting.d.ts.map +1 -1
  220. package/dist/utils/formatting.js +10 -2
  221. package/dist/utils/formatting.js.map +1 -1
  222. package/dist/utils/list-detection.d.ts +2 -2
  223. package/dist/utils/list-detection.d.ts.map +1 -1
  224. package/dist/utils/list-detection.js +21 -23
  225. package/dist/utils/list-detection.js.map +1 -1
  226. package/dist/utils/logger.d.ts.map +1 -1
  227. package/dist/utils/logger.js +12 -7
  228. package/dist/utils/logger.js.map +1 -1
  229. package/dist/utils/parsingHelpers.js.map +1 -1
  230. package/dist/utils/stripTrackedChanges.d.ts.map +1 -1
  231. package/dist/utils/stripTrackedChanges.js +3 -3
  232. package/dist/utils/stripTrackedChanges.js.map +1 -1
  233. package/dist/utils/textDiff.d.ts +1 -1
  234. package/dist/utils/textDiff.js +8 -8
  235. package/dist/utils/textDiff.js.map +1 -1
  236. package/dist/utils/units.js.map +1 -1
  237. package/dist/utils/validation.d.ts.map +1 -1
  238. package/dist/utils/validation.js +24 -7
  239. package/dist/utils/validation.js.map +1 -1
  240. package/dist/utils/xmlSanitization.d.ts.map +1 -1
  241. package/dist/utils/xmlSanitization.js +3 -3
  242. package/dist/utils/xmlSanitization.js.map +1 -1
  243. package/dist/validation/RevisionAutoFixer.d.ts.map +1 -1
  244. package/dist/validation/RevisionAutoFixer.js +5 -5
  245. package/dist/validation/RevisionAutoFixer.js.map +1 -1
  246. package/dist/validation/RevisionValidator.d.ts.map +1 -1
  247. package/dist/validation/RevisionValidator.js +7 -9
  248. package/dist/validation/RevisionValidator.js.map +1 -1
  249. package/dist/validation/ValidationRules.js +3 -3
  250. package/dist/validation/ValidationRules.js.map +1 -1
  251. package/dist/validation/index.js.map +1 -1
  252. package/dist/xml/XMLBuilder.d.ts +1 -1
  253. package/dist/xml/XMLBuilder.d.ts.map +1 -1
  254. package/dist/xml/XMLBuilder.js +98 -100
  255. package/dist/xml/XMLBuilder.js.map +1 -1
  256. package/dist/xml/XMLParser.d.ts.map +1 -1
  257. package/dist/xml/XMLParser.js +61 -66
  258. package/dist/xml/XMLParser.js.map +1 -1
  259. package/dist/zip/ZipHandler.d.ts.map +1 -1
  260. package/dist/zip/ZipHandler.js.map +1 -1
  261. package/dist/zip/ZipReader.d.ts.map +1 -1
  262. package/dist/zip/ZipReader.js +1 -3
  263. package/dist/zip/ZipReader.js.map +1 -1
  264. package/dist/zip/ZipWriter.d.ts +1 -1
  265. package/dist/zip/ZipWriter.d.ts.map +1 -1
  266. package/dist/zip/ZipWriter.js +28 -36
  267. package/dist/zip/ZipWriter.js.map +1 -1
  268. package/dist/zip/types.js +1 -1
  269. package/dist/zip/types.js.map +1 -1
  270. package/package.json +92 -92
  271. package/src/__tests__/helper-methods.test.ts +512 -512
  272. package/src/constants/legacyCompatFlags.ts +138 -138
  273. package/src/constants/limits.ts +50 -50
  274. package/src/core/Document.ts +1010 -1145
  275. package/src/core/DocumentContent.ts +461 -467
  276. package/src/core/DocumentGenerator.ts +1133 -1104
  277. package/src/core/DocumentIdManager.ts +158 -158
  278. package/src/core/DocumentParser.ts +2347 -2716
  279. package/src/core/DocumentValidator.ts +363 -372
  280. package/src/core/Relationship.ts +367 -367
  281. package/src/core/RelationshipManager.ts +429 -428
  282. package/src/elements/AlternateContent.ts +42 -42
  283. package/src/elements/Bookmark.ts +212 -210
  284. package/src/elements/BookmarkManager.ts +247 -250
  285. package/src/elements/Comment.ts +356 -359
  286. package/src/elements/CommentManager.ts +499 -502
  287. package/src/elements/CommonTypes.ts +524 -549
  288. package/src/elements/CustomXml.ts +36 -36
  289. package/src/elements/Endnote.ts +221 -217
  290. package/src/elements/EndnoteManager.ts +246 -249
  291. package/src/elements/Field.ts +1292 -1233
  292. package/src/elements/FieldHelpers.ts +329 -333
  293. package/src/elements/FontManager.ts +336 -339
  294. package/src/elements/Footer.ts +269 -269
  295. package/src/elements/Footnote.ts +221 -217
  296. package/src/elements/FootnoteManager.ts +246 -249
  297. package/src/elements/Header.ts +269 -269
  298. package/src/elements/HeaderFooterManager.ts +219 -219
  299. package/src/elements/Hyperlink.ts +1288 -1193
  300. package/src/elements/Image.ts +1982 -1756
  301. package/src/elements/ImageManager.ts +437 -432
  302. package/src/elements/ImageRun.ts +59 -59
  303. package/src/elements/MathElement.ts +65 -65
  304. package/src/elements/Paragraph.ts +4347 -4287
  305. package/src/elements/PreservedElement.ts +53 -53
  306. package/src/elements/PropertyChangeTypes.ts +458 -442
  307. package/src/elements/RangeMarker.ts +382 -400
  308. package/src/elements/Revision.ts +1198 -1217
  309. package/src/elements/RevisionContent.ts +73 -73
  310. package/src/elements/RevisionManager.ts +1070 -1070
  311. package/src/elements/Run.ts +3103 -3073
  312. package/src/elements/Section.ts +1521 -1421
  313. package/src/elements/Shape.ts +884 -873
  314. package/src/elements/StructuredDocumentTag.ts +1176 -1207
  315. package/src/elements/Table.ts +2468 -2524
  316. package/src/elements/TableCell.ts +1617 -1621
  317. package/src/elements/TableGridChange.ts +149 -151
  318. package/src/elements/TableOfContents.ts +701 -691
  319. package/src/elements/TableOfContentsElement.ts +89 -89
  320. package/src/elements/TableRow.ts +960 -929
  321. package/src/elements/TextBox.ts +766 -768
  322. package/src/formatting/AbstractNumbering.ts +580 -579
  323. package/src/formatting/NumberingInstance.ts +295 -299
  324. package/src/formatting/NumberingLevel.ts +981 -1040
  325. package/src/formatting/NumberingManager.ts +875 -827
  326. package/src/formatting/Style.ts +1785 -1879
  327. package/src/formatting/StylesManager.ts +1090 -1130
  328. package/src/helpers/CleanupHelper.ts +524 -524
  329. package/src/images/ImageOptimizer.ts +274 -274
  330. package/src/index.ts +559 -554
  331. package/src/managers/DrawingManager.ts +319 -319
  332. package/src/tracking/DocumentTrackingContext.ts +687 -674
  333. package/src/tracking/TrackingContext.ts +175 -173
  334. package/src/types/compatibility-types.ts +49 -49
  335. package/src/types/formatting.ts +210 -210
  336. package/src/types/list-types.ts +14 -14
  337. package/src/types/settings-types.ts +59 -59
  338. package/src/types/styleConfig.ts +189 -189
  339. package/src/utils/ChangelogGenerator.ts +1583 -1581
  340. package/src/utils/CompatibilityUpgrader.ts +235 -237
  341. package/src/utils/InMemoryRevisionAcceptor.ts +691 -696
  342. package/src/utils/MoveOperationHelper.ts +233 -238
  343. package/src/utils/RevisionAwareProcessor.ts +518 -526
  344. package/src/utils/RevisionWalker.ts +427 -457
  345. package/src/utils/SelectiveRevisionAcceptor.ts +662 -683
  346. package/src/utils/ShadingResolver.ts +105 -107
  347. package/src/utils/acceptRevisions.ts +723 -714
  348. package/src/utils/cnfStyleDecoder.ts +212 -217
  349. package/src/utils/corruptionDetection.ts +346 -345
  350. package/src/utils/dateFormatting.ts +20 -20
  351. package/src/utils/deepClone.ts +77 -78
  352. package/src/utils/diagnostics.ts +125 -129
  353. package/src/utils/errorHandling.ts +80 -80
  354. package/src/utils/formatting.ts +220 -213
  355. package/src/utils/list-detection.ts +32 -42
  356. package/src/utils/logger.ts +412 -404
  357. package/src/utils/parsingHelpers.ts +190 -190
  358. package/src/utils/stripTrackedChanges.ts +356 -353
  359. package/src/utils/textDiff.ts +100 -100
  360. package/src/utils/units.ts +421 -421
  361. package/src/utils/validation.ts +553 -542
  362. package/src/utils/xmlSanitization.ts +179 -182
  363. package/src/validation/RevisionAutoFixer.ts +541 -542
  364. package/src/validation/RevisionValidator.ts +470 -460
  365. package/src/validation/ValidationRules.ts +338 -338
  366. package/src/validation/index.ts +30 -30
  367. package/src/xml/XMLBuilder.ts +857 -871
  368. package/src/xml/XMLParser.ts +877 -919
  369. package/src/zip/ZipHandler.ts +629 -637
  370. package/src/zip/ZipReader.ts +295 -299
  371. package/src/zip/ZipWriter.ts +374 -390
  372. package/src/zip/types.ts +116 -116
@@ -1,182 +1,179 @@
1
- /**
2
- * XML Sanitization Utilities
3
- *
4
- * Provides functions for validating and sanitizing text content per XML 1.0 specification.
5
- * Per XML 1.0, certain control characters are invalid and must be removed before
6
- * including text in XML documents.
7
- *
8
- * Valid characters in XML 1.0:
9
- * - 0x09 (tab), 0x0A (newline), 0x0D (carriage return)
10
- * - 0x20-0xD7FF, 0xE000-0xFFFD, 0x10000-0x10FFFF
11
- *
12
- * Invalid characters (control characters that must be removed):
13
- * - 0x00-0x08 (NULL through BACKSPACE)
14
- * - 0x0B-0x0C (VERTICAL TAB and FORM FEED)
15
- * - 0x0E-0x1F (SHIFT OUT through UNIT SEPARATOR)
16
- * - 0x7F (DELETE)
17
- *
18
- * @module xmlSanitization
19
- */
20
-
21
- import { getGlobalLogger } from "./logger";
22
-
23
- /**
24
- * Regular expression matching invalid XML 1.0 control characters.
25
- * Matches: 0x00-0x08, 0x0B-0x0C, 0x0E-0x1F, 0x7F
26
- * Does NOT match valid chars: 0x09 (tab), 0x0A (newline), 0x0D (CR)
27
- */
28
- const INVALID_XML_CHAR_REGEX = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g;
29
-
30
- /**
31
- * Removes invalid XML 1.0 control characters from text.
32
- *
33
- * Per XML 1.0 spec, characters 0x00-0x08, 0x0B-0x0C, 0x0E-0x1F, 0x7F are invalid
34
- * and cannot appear in XML documents. This function removes them.
35
- *
36
- * Valid control characters are preserved:
37
- * - Tab (0x09)
38
- * - Line Feed / Newline (0x0A)
39
- * - Carriage Return (0x0D)
40
- *
41
- * @param text - Input text to sanitize
42
- * @param logWarning - If true, logs a warning when invalid chars are found (default: true)
43
- * @returns Sanitized text with invalid control characters removed
44
- *
45
- * @example
46
- * ```typescript
47
- * // Remove NULL byte from text
48
- * const clean = removeInvalidXmlChars("Hello\x00World");
49
- * // Returns: "HelloWorld"
50
- *
51
- * // Tab and newline are preserved
52
- * const preserved = removeInvalidXmlChars("Hello\tWorld\n");
53
- * // Returns: "Hello\tWorld\n"
54
- * ```
55
- */
56
- export function removeInvalidXmlChars(
57
- text: string,
58
- logWarning = true
59
- ): string {
60
- // Reset regex lastIndex for global regex
61
- INVALID_XML_CHAR_REGEX.lastIndex = 0;
62
-
63
- if (logWarning && INVALID_XML_CHAR_REGEX.test(text)) {
64
- // Reset regex lastIndex after test
65
- INVALID_XML_CHAR_REGEX.lastIndex = 0;
66
-
67
- const invalidChars = findInvalidXmlChars(text);
68
- const hexCodes = invalidChars
69
- .map((c) => `0x${c.toString(16).toUpperCase().padStart(2, "0")}`)
70
- .join(", ");
71
- getGlobalLogger().warn(
72
- `[XMLSanitization] Removing invalid XML control characters: ${hexCodes}`
73
- );
74
- }
75
-
76
- // Reset regex lastIndex before replace
77
- INVALID_XML_CHAR_REGEX.lastIndex = 0;
78
- return text.replace(INVALID_XML_CHAR_REGEX, "");
79
- }
80
-
81
- /**
82
- * Finds all invalid XML 1.0 control characters in text.
83
- *
84
- * Returns an array of unique character codes that are invalid per XML 1.0 spec.
85
- * This is useful for diagnostics and error reporting.
86
- *
87
- * @param text - Text to scan for invalid characters
88
- * @returns Array of unique invalid character codes found, or empty array if text is valid
89
- *
90
- * @example
91
- * ```typescript
92
- * const invalid = findInvalidXmlChars("Hello\x00\x08World");
93
- * // Returns: [0, 8] - NULL and BACKSPACE codes
94
- *
95
- * const valid = findInvalidXmlChars("Hello\tWorld");
96
- * // Returns: [] - tab is valid
97
- * ```
98
- */
99
- export function findInvalidXmlChars(text: string): number[] {
100
- const invalid: number[] = [];
101
-
102
- for (let i = 0; i < text.length; i++) {
103
- const code = text.charCodeAt(i);
104
-
105
- // Check if character is in invalid ranges
106
- if (
107
- (code >= 0x00 && code <= 0x08) || // NULL through BACKSPACE
108
- (code >= 0x0b && code <= 0x0c) || // VERTICAL TAB and FORM FEED
109
- (code >= 0x0e && code <= 0x1f) || // SHIFT OUT through UNIT SEPARATOR
110
- code === 0x7f // DELETE
111
- ) {
112
- // Only add unique codes
113
- if (!invalid.includes(code)) {
114
- invalid.push(code);
115
- }
116
- }
117
- }
118
-
119
- return invalid;
120
- }
121
-
122
- /**
123
- * Checks if text contains any invalid XML 1.0 control characters.
124
- *
125
- * This is a fast check that returns true/false without identifying specific characters.
126
- * Use `findInvalidXmlChars()` if you need to know which characters are invalid.
127
- *
128
- * @param text - Text to check
129
- * @returns true if text contains invalid characters, false otherwise
130
- *
131
- * @example
132
- * ```typescript
133
- * hasInvalidXmlChars("Hello\x00World"); // true - NULL byte
134
- * hasInvalidXmlChars("Hello\tWorld"); // false - tab is valid
135
- * hasInvalidXmlChars("Normal text"); // false
136
- * ```
137
- */
138
- export function hasInvalidXmlChars(text: string): boolean {
139
- // Reset regex lastIndex for global regex
140
- INVALID_XML_CHAR_REGEX.lastIndex = 0;
141
- return INVALID_XML_CHAR_REGEX.test(text);
142
- }
143
-
144
- /**
145
- * Character code constants for documentation and testing.
146
- */
147
- export const XML_CONTROL_CHARS = {
148
- /** NULL (0x00) - Invalid */
149
- NULL: 0x00,
150
- /** Start of Heading (0x01) - Invalid */
151
- SOH: 0x01,
152
- /** Start of Text (0x02) - Invalid */
153
- STX: 0x02,
154
- /** End of Text (0x03) - Invalid */
155
- ETX: 0x03,
156
- /** End of Transmission (0x04) - Invalid */
157
- EOT: 0x04,
158
- /** Enquiry (0x05) - Invalid */
159
- ENQ: 0x05,
160
- /** Acknowledge (0x06) - Invalid */
161
- ACK: 0x06,
162
- /** Bell (0x07) - Invalid */
163
- BEL: 0x07,
164
- /** Backspace (0x08) - Invalid */
165
- BS: 0x08,
166
- /** Horizontal Tab (0x09) - VALID */
167
- TAB: 0x09,
168
- /** Line Feed / Newline (0x0A) - VALID */
169
- LF: 0x0a,
170
- /** Vertical Tab (0x0B) - Invalid */
171
- VT: 0x0b,
172
- /** Form Feed (0x0C) - Invalid */
173
- FF: 0x0c,
174
- /** Carriage Return (0x0D) - VALID */
175
- CR: 0x0d,
176
- /** Shift Out (0x0E) - Invalid */
177
- SO: 0x0e,
178
- /** Unit Separator (0x1F) - Invalid */
179
- US: 0x1f,
180
- /** Delete (0x7F) - Invalid */
181
- DEL: 0x7f,
182
- } as const;
1
+ /**
2
+ * XML Sanitization Utilities
3
+ *
4
+ * Provides functions for validating and sanitizing text content per XML 1.0 specification.
5
+ * Per XML 1.0, certain control characters are invalid and must be removed before
6
+ * including text in XML documents.
7
+ *
8
+ * Valid characters in XML 1.0:
9
+ * - 0x09 (tab), 0x0A (newline), 0x0D (carriage return)
10
+ * - 0x20-0xD7FF, 0xE000-0xFFFD, 0x10000-0x10FFFF
11
+ *
12
+ * Invalid characters (control characters that must be removed):
13
+ * - 0x00-0x08 (NULL through BACKSPACE)
14
+ * - 0x0B-0x0C (VERTICAL TAB and FORM FEED)
15
+ * - 0x0E-0x1F (SHIFT OUT through UNIT SEPARATOR)
16
+ * - 0x7F (DELETE)
17
+ *
18
+ * @module xmlSanitization
19
+ */
20
+
21
+ import { getGlobalLogger } from './logger';
22
+
23
+ /**
24
+ * Regular expression matching invalid XML 1.0 control characters.
25
+ * Matches: 0x00-0x08, 0x0B-0x0C, 0x0E-0x1F, 0x7F
26
+ * Does NOT match valid chars: 0x09 (tab), 0x0A (newline), 0x0D (CR)
27
+ */
28
+ const INVALID_XML_CHAR_REGEX = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g;
29
+
30
+ /**
31
+ * Removes invalid XML 1.0 control characters from text.
32
+ *
33
+ * Per XML 1.0 spec, characters 0x00-0x08, 0x0B-0x0C, 0x0E-0x1F, 0x7F are invalid
34
+ * and cannot appear in XML documents. This function removes them.
35
+ *
36
+ * Valid control characters are preserved:
37
+ * - Tab (0x09)
38
+ * - Line Feed / Newline (0x0A)
39
+ * - Carriage Return (0x0D)
40
+ *
41
+ * @param text - Input text to sanitize
42
+ * @param logWarning - If true, logs a warning when invalid chars are found (default: true)
43
+ * @returns Sanitized text with invalid control characters removed
44
+ *
45
+ * @example
46
+ * ```typescript
47
+ * // Remove NULL byte from text
48
+ * const clean = removeInvalidXmlChars("Hello\x00World");
49
+ * // Returns: "HelloWorld"
50
+ *
51
+ * // Tab and newline are preserved
52
+ * const preserved = removeInvalidXmlChars("Hello\tWorld\n");
53
+ * // Returns: "Hello\tWorld\n"
54
+ * ```
55
+ */
56
+ export function removeInvalidXmlChars(text: string, logWarning = true): string {
57
+ // Reset regex lastIndex for global regex
58
+ INVALID_XML_CHAR_REGEX.lastIndex = 0;
59
+
60
+ if (logWarning && INVALID_XML_CHAR_REGEX.test(text)) {
61
+ // Reset regex lastIndex after test
62
+ INVALID_XML_CHAR_REGEX.lastIndex = 0;
63
+
64
+ const invalidChars = findInvalidXmlChars(text);
65
+ const hexCodes = invalidChars
66
+ .map((c) => `0x${c.toString(16).toUpperCase().padStart(2, '0')}`)
67
+ .join(', ');
68
+ getGlobalLogger().warn(
69
+ `[XMLSanitization] Removing invalid XML control characters: ${hexCodes}`
70
+ );
71
+ }
72
+
73
+ // Reset regex lastIndex before replace
74
+ INVALID_XML_CHAR_REGEX.lastIndex = 0;
75
+ return text.replace(INVALID_XML_CHAR_REGEX, '');
76
+ }
77
+
78
+ /**
79
+ * Finds all invalid XML 1.0 control characters in text.
80
+ *
81
+ * Returns an array of unique character codes that are invalid per XML 1.0 spec.
82
+ * This is useful for diagnostics and error reporting.
83
+ *
84
+ * @param text - Text to scan for invalid characters
85
+ * @returns Array of unique invalid character codes found, or empty array if text is valid
86
+ *
87
+ * @example
88
+ * ```typescript
89
+ * const invalid = findInvalidXmlChars("Hello\x00\x08World");
90
+ * // Returns: [0, 8] - NULL and BACKSPACE codes
91
+ *
92
+ * const valid = findInvalidXmlChars("Hello\tWorld");
93
+ * // Returns: [] - tab is valid
94
+ * ```
95
+ */
96
+ export function findInvalidXmlChars(text: string): number[] {
97
+ const invalid: number[] = [];
98
+
99
+ for (let i = 0; i < text.length; i++) {
100
+ const code = text.charCodeAt(i);
101
+
102
+ // Check if character is in invalid ranges
103
+ if (
104
+ (code >= 0x00 && code <= 0x08) || // NULL through BACKSPACE
105
+ (code >= 0x0b && code <= 0x0c) || // VERTICAL TAB and FORM FEED
106
+ (code >= 0x0e && code <= 0x1f) || // SHIFT OUT through UNIT SEPARATOR
107
+ code === 0x7f // DELETE
108
+ ) {
109
+ // Only add unique codes
110
+ if (!invalid.includes(code)) {
111
+ invalid.push(code);
112
+ }
113
+ }
114
+ }
115
+
116
+ return invalid;
117
+ }
118
+
119
+ /**
120
+ * Checks if text contains any invalid XML 1.0 control characters.
121
+ *
122
+ * This is a fast check that returns true/false without identifying specific characters.
123
+ * Use `findInvalidXmlChars()` if you need to know which characters are invalid.
124
+ *
125
+ * @param text - Text to check
126
+ * @returns true if text contains invalid characters, false otherwise
127
+ *
128
+ * @example
129
+ * ```typescript
130
+ * hasInvalidXmlChars("Hello\x00World"); // true - NULL byte
131
+ * hasInvalidXmlChars("Hello\tWorld"); // false - tab is valid
132
+ * hasInvalidXmlChars("Normal text"); // false
133
+ * ```
134
+ */
135
+ export function hasInvalidXmlChars(text: string): boolean {
136
+ // Reset regex lastIndex for global regex
137
+ INVALID_XML_CHAR_REGEX.lastIndex = 0;
138
+ return INVALID_XML_CHAR_REGEX.test(text);
139
+ }
140
+
141
+ /**
142
+ * Character code constants for documentation and testing.
143
+ */
144
+ export const XML_CONTROL_CHARS = {
145
+ /** NULL (0x00) - Invalid */
146
+ NULL: 0x00,
147
+ /** Start of Heading (0x01) - Invalid */
148
+ SOH: 0x01,
149
+ /** Start of Text (0x02) - Invalid */
150
+ STX: 0x02,
151
+ /** End of Text (0x03) - Invalid */
152
+ ETX: 0x03,
153
+ /** End of Transmission (0x04) - Invalid */
154
+ EOT: 0x04,
155
+ /** Enquiry (0x05) - Invalid */
156
+ ENQ: 0x05,
157
+ /** Acknowledge (0x06) - Invalid */
158
+ ACK: 0x06,
159
+ /** Bell (0x07) - Invalid */
160
+ BEL: 0x07,
161
+ /** Backspace (0x08) - Invalid */
162
+ BS: 0x08,
163
+ /** Horizontal Tab (0x09) - VALID */
164
+ TAB: 0x09,
165
+ /** Line Feed / Newline (0x0A) - VALID */
166
+ LF: 0x0a,
167
+ /** Vertical Tab (0x0B) - Invalid */
168
+ VT: 0x0b,
169
+ /** Form Feed (0x0C) - Invalid */
170
+ FF: 0x0c,
171
+ /** Carriage Return (0x0D) - VALID */
172
+ CR: 0x0d,
173
+ /** Shift Out (0x0E) - Invalid */
174
+ SO: 0x0e,
175
+ /** Unit Separator (0x1F) - Invalid */
176
+ US: 0x1f,
177
+ /** Delete (0x7F) - Invalid */
178
+ DEL: 0x7f,
179
+ } as const;