docxmlater 10.1.4 → 10.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (372) hide show
  1. package/README.md +759 -754
  2. package/dist/constants/legacyCompatFlags.js +1 -1
  3. package/dist/constants/legacyCompatFlags.js.map +1 -1
  4. package/dist/constants/limits.js.map +1 -1
  5. package/dist/core/Document.d.ts +51 -50
  6. package/dist/core/Document.d.ts.map +1 -1
  7. package/dist/core/Document.js +486 -471
  8. package/dist/core/Document.js.map +1 -1
  9. package/dist/core/DocumentContent.d.ts +9 -9
  10. package/dist/core/DocumentContent.d.ts.map +1 -1
  11. package/dist/core/DocumentContent.js +1 -1
  12. package/dist/core/DocumentContent.js.map +1 -1
  13. package/dist/core/DocumentGenerator.d.ts +11 -11
  14. package/dist/core/DocumentGenerator.d.ts.map +1 -1
  15. package/dist/core/DocumentGenerator.js +251 -251
  16. package/dist/core/DocumentGenerator.js.map +1 -1
  17. package/dist/core/DocumentIdManager.js.map +1 -1
  18. package/dist/core/DocumentParser.d.ts +15 -15
  19. package/dist/core/DocumentParser.d.ts.map +1 -1
  20. package/dist/core/DocumentParser.js +2123 -2155
  21. package/dist/core/DocumentParser.js.map +1 -1
  22. package/dist/core/DocumentValidator.d.ts.map +1 -1
  23. package/dist/core/DocumentValidator.js +2 -5
  24. package/dist/core/DocumentValidator.js.map +1 -1
  25. package/dist/core/Relationship.js.map +1 -1
  26. package/dist/core/RelationshipManager.d.ts.map +1 -1
  27. package/dist/core/RelationshipManager.js +3 -3
  28. package/dist/core/RelationshipManager.js.map +1 -1
  29. package/dist/elements/AlternateContent.js.map +1 -1
  30. package/dist/elements/Bookmark.d.ts.map +1 -1
  31. package/dist/elements/Bookmark.js +3 -1
  32. package/dist/elements/Bookmark.js.map +1 -1
  33. package/dist/elements/BookmarkManager.d.ts.map +1 -1
  34. package/dist/elements/BookmarkManager.js.map +1 -1
  35. package/dist/elements/Comment.d.ts.map +1 -1
  36. package/dist/elements/Comment.js +9 -6
  37. package/dist/elements/Comment.js.map +1 -1
  38. package/dist/elements/CommentManager.d.ts.map +1 -1
  39. package/dist/elements/CommentManager.js +18 -17
  40. package/dist/elements/CommentManager.js.map +1 -1
  41. package/dist/elements/CommonTypes.d.ts +21 -21
  42. package/dist/elements/CommonTypes.d.ts.map +1 -1
  43. package/dist/elements/CommonTypes.js +56 -56
  44. package/dist/elements/CommonTypes.js.map +1 -1
  45. package/dist/elements/CustomXml.js.map +1 -1
  46. package/dist/elements/Endnote.d.ts.map +1 -1
  47. package/dist/elements/Endnote.js +6 -6
  48. package/dist/elements/Endnote.js.map +1 -1
  49. package/dist/elements/EndnoteManager.d.ts.map +1 -1
  50. package/dist/elements/EndnoteManager.js +6 -7
  51. package/dist/elements/EndnoteManager.js.map +1 -1
  52. package/dist/elements/Field.d.ts.map +1 -1
  53. package/dist/elements/Field.js +82 -25
  54. package/dist/elements/Field.js.map +1 -1
  55. package/dist/elements/FieldHelpers.d.ts.map +1 -1
  56. package/dist/elements/FieldHelpers.js.map +1 -1
  57. package/dist/elements/FontManager.d.ts.map +1 -1
  58. package/dist/elements/FontManager.js +1 -1
  59. package/dist/elements/FontManager.js.map +1 -1
  60. package/dist/elements/Footer.js +2 -2
  61. package/dist/elements/Footer.js.map +1 -1
  62. package/dist/elements/Footnote.d.ts.map +1 -1
  63. package/dist/elements/Footnote.js +6 -6
  64. package/dist/elements/Footnote.js.map +1 -1
  65. package/dist/elements/FootnoteManager.d.ts.map +1 -1
  66. package/dist/elements/FootnoteManager.js +6 -7
  67. package/dist/elements/FootnoteManager.js.map +1 -1
  68. package/dist/elements/Header.js +2 -2
  69. package/dist/elements/Header.js.map +1 -1
  70. package/dist/elements/HeaderFooterManager.js.map +1 -1
  71. package/dist/elements/Hyperlink.d.ts +5 -3
  72. package/dist/elements/Hyperlink.d.ts.map +1 -1
  73. package/dist/elements/Hyperlink.js +134 -76
  74. package/dist/elements/Hyperlink.js.map +1 -1
  75. package/dist/elements/Image.d.ts.map +1 -1
  76. package/dist/elements/Image.js +238 -106
  77. package/dist/elements/Image.js.map +1 -1
  78. package/dist/elements/ImageManager.d.ts.map +1 -1
  79. package/dist/elements/ImageManager.js +1 -1
  80. package/dist/elements/ImageManager.js.map +1 -1
  81. package/dist/elements/ImageRun.js +1 -1
  82. package/dist/elements/ImageRun.js.map +1 -1
  83. package/dist/elements/MathElement.js.map +1 -1
  84. package/dist/elements/Paragraph.d.ts +24 -24
  85. package/dist/elements/Paragraph.d.ts.map +1 -1
  86. package/dist/elements/Paragraph.js +181 -188
  87. package/dist/elements/Paragraph.js.map +1 -1
  88. package/dist/elements/PreservedElement.js.map +1 -1
  89. package/dist/elements/PropertyChangeTypes.d.ts.map +1 -1
  90. package/dist/elements/PropertyChangeTypes.js +6 -6
  91. package/dist/elements/PropertyChangeTypes.js.map +1 -1
  92. package/dist/elements/RangeMarker.d.ts.map +1 -1
  93. package/dist/elements/RangeMarker.js.map +1 -1
  94. package/dist/elements/Revision.d.ts.map +1 -1
  95. package/dist/elements/Revision.js +4 -5
  96. package/dist/elements/Revision.js.map +1 -1
  97. package/dist/elements/RevisionContent.js.map +1 -1
  98. package/dist/elements/RevisionManager.d.ts.map +1 -1
  99. package/dist/elements/RevisionManager.js +40 -48
  100. package/dist/elements/RevisionManager.js.map +1 -1
  101. package/dist/elements/Run.d.ts +16 -16
  102. package/dist/elements/Run.d.ts.map +1 -1
  103. package/dist/elements/Run.js +256 -238
  104. package/dist/elements/Run.js.map +1 -1
  105. package/dist/elements/Section.d.ts.map +1 -1
  106. package/dist/elements/Section.js +36 -11
  107. package/dist/elements/Section.js.map +1 -1
  108. package/dist/elements/Shape.d.ts.map +1 -1
  109. package/dist/elements/Shape.js.map +1 -1
  110. package/dist/elements/StructuredDocumentTag.d.ts +6 -6
  111. package/dist/elements/StructuredDocumentTag.d.ts.map +1 -1
  112. package/dist/elements/StructuredDocumentTag.js +99 -104
  113. package/dist/elements/StructuredDocumentTag.js.map +1 -1
  114. package/dist/elements/Table.d.ts +11 -11
  115. package/dist/elements/Table.d.ts.map +1 -1
  116. package/dist/elements/Table.js +102 -107
  117. package/dist/elements/Table.js.map +1 -1
  118. package/dist/elements/TableCell.d.ts +10 -10
  119. package/dist/elements/TableCell.d.ts.map +1 -1
  120. package/dist/elements/TableCell.js +105 -106
  121. package/dist/elements/TableCell.js.map +1 -1
  122. package/dist/elements/TableGridChange.d.ts.map +1 -1
  123. package/dist/elements/TableGridChange.js.map +1 -1
  124. package/dist/elements/TableOfContents.d.ts.map +1 -1
  125. package/dist/elements/TableOfContents.js +4 -4
  126. package/dist/elements/TableOfContents.js.map +1 -1
  127. package/dist/elements/TableOfContentsElement.js.map +1 -1
  128. package/dist/elements/TableRow.d.ts.map +1 -1
  129. package/dist/elements/TableRow.js +13 -6
  130. package/dist/elements/TableRow.js.map +1 -1
  131. package/dist/elements/TextBox.d.ts.map +1 -1
  132. package/dist/elements/TextBox.js +3 -5
  133. package/dist/elements/TextBox.js.map +1 -1
  134. package/dist/formatting/AbstractNumbering.d.ts +4 -4
  135. package/dist/formatting/AbstractNumbering.d.ts.map +1 -1
  136. package/dist/formatting/AbstractNumbering.js +54 -49
  137. package/dist/formatting/AbstractNumbering.js.map +1 -1
  138. package/dist/formatting/NumberingInstance.d.ts.map +1 -1
  139. package/dist/formatting/NumberingInstance.js +1 -3
  140. package/dist/formatting/NumberingInstance.js.map +1 -1
  141. package/dist/formatting/NumberingLevel.d.ts +5 -5
  142. package/dist/formatting/NumberingLevel.d.ts.map +1 -1
  143. package/dist/formatting/NumberingLevel.js +119 -125
  144. package/dist/formatting/NumberingLevel.js.map +1 -1
  145. package/dist/formatting/NumberingManager.d.ts +1 -0
  146. package/dist/formatting/NumberingManager.d.ts.map +1 -1
  147. package/dist/formatting/NumberingManager.js +27 -9
  148. package/dist/formatting/NumberingManager.js.map +1 -1
  149. package/dist/formatting/Style.d.ts +11 -11
  150. package/dist/formatting/Style.d.ts.map +1 -1
  151. package/dist/formatting/Style.js +219 -247
  152. package/dist/formatting/Style.js.map +1 -1
  153. package/dist/formatting/StylesManager.d.ts +2 -2
  154. package/dist/formatting/StylesManager.d.ts.map +1 -1
  155. package/dist/formatting/StylesManager.js +96 -102
  156. package/dist/formatting/StylesManager.js.map +1 -1
  157. package/dist/helpers/CleanupHelper.d.ts +1 -1
  158. package/dist/helpers/CleanupHelper.d.ts.map +1 -1
  159. package/dist/helpers/CleanupHelper.js +6 -6
  160. package/dist/helpers/CleanupHelper.js.map +1 -1
  161. package/dist/images/ImageOptimizer.js +7 -7
  162. package/dist/images/ImageOptimizer.js.map +1 -1
  163. package/dist/index.d.ts +9 -9
  164. package/dist/index.d.ts.map +1 -1
  165. package/dist/index.js.map +1 -1
  166. package/dist/managers/DrawingManager.js.map +1 -1
  167. package/dist/tracking/DocumentTrackingContext.d.ts.map +1 -1
  168. package/dist/tracking/DocumentTrackingContext.js +23 -7
  169. package/dist/tracking/DocumentTrackingContext.js.map +1 -1
  170. package/dist/tracking/TrackingContext.d.ts.map +1 -1
  171. package/dist/tracking/TrackingContext.js.map +1 -1
  172. package/dist/types/compatibility-types.js.map +1 -1
  173. package/dist/types/formatting.js.map +1 -1
  174. package/dist/types/list-types.d.ts +6 -6
  175. package/dist/types/list-types.js.map +1 -1
  176. package/dist/types/settings-types.js.map +1 -1
  177. package/dist/types/styleConfig.d.ts +2 -2
  178. package/dist/types/styleConfig.js.map +1 -1
  179. package/dist/utils/ChangelogGenerator.d.ts.map +1 -1
  180. package/dist/utils/ChangelogGenerator.js +97 -101
  181. package/dist/utils/ChangelogGenerator.js.map +1 -1
  182. package/dist/utils/CompatibilityUpgrader.d.ts.map +1 -1
  183. package/dist/utils/CompatibilityUpgrader.js +1 -1
  184. package/dist/utils/CompatibilityUpgrader.js.map +1 -1
  185. package/dist/utils/InMemoryRevisionAcceptor.d.ts.map +1 -1
  186. package/dist/utils/InMemoryRevisionAcceptor.js +1 -6
  187. package/dist/utils/InMemoryRevisionAcceptor.js.map +1 -1
  188. package/dist/utils/MoveOperationHelper.d.ts.map +1 -1
  189. package/dist/utils/MoveOperationHelper.js +1 -1
  190. package/dist/utils/MoveOperationHelper.js.map +1 -1
  191. package/dist/utils/RevisionAwareProcessor.d.ts.map +1 -1
  192. package/dist/utils/RevisionAwareProcessor.js +2 -4
  193. package/dist/utils/RevisionAwareProcessor.js.map +1 -1
  194. package/dist/utils/RevisionWalker.d.ts.map +1 -1
  195. package/dist/utils/RevisionWalker.js +4 -12
  196. package/dist/utils/RevisionWalker.js.map +1 -1
  197. package/dist/utils/SelectiveRevisionAcceptor.d.ts.map +1 -1
  198. package/dist/utils/SelectiveRevisionAcceptor.js +2 -6
  199. package/dist/utils/SelectiveRevisionAcceptor.js.map +1 -1
  200. package/dist/utils/ShadingResolver.d.ts.map +1 -1
  201. package/dist/utils/ShadingResolver.js +1 -1
  202. package/dist/utils/ShadingResolver.js.map +1 -1
  203. package/dist/utils/acceptRevisions.d.ts.map +1 -1
  204. package/dist/utils/acceptRevisions.js +23 -12
  205. package/dist/utils/acceptRevisions.js.map +1 -1
  206. package/dist/utils/cnfStyleDecoder.d.ts +1 -1
  207. package/dist/utils/cnfStyleDecoder.d.ts.map +1 -1
  208. package/dist/utils/cnfStyleDecoder.js +40 -40
  209. package/dist/utils/cnfStyleDecoder.js.map +1 -1
  210. package/dist/utils/corruptionDetection.d.ts.map +1 -1
  211. package/dist/utils/corruptionDetection.js.map +1 -1
  212. package/dist/utils/dateFormatting.js.map +1 -1
  213. package/dist/utils/deepClone.js +1 -1
  214. package/dist/utils/deepClone.js.map +1 -1
  215. package/dist/utils/diagnostics.d.ts.map +1 -1
  216. package/dist/utils/diagnostics.js +1 -1
  217. package/dist/utils/diagnostics.js.map +1 -1
  218. package/dist/utils/errorHandling.js.map +1 -1
  219. package/dist/utils/formatting.d.ts.map +1 -1
  220. package/dist/utils/formatting.js +10 -2
  221. package/dist/utils/formatting.js.map +1 -1
  222. package/dist/utils/list-detection.d.ts +2 -2
  223. package/dist/utils/list-detection.d.ts.map +1 -1
  224. package/dist/utils/list-detection.js +21 -23
  225. package/dist/utils/list-detection.js.map +1 -1
  226. package/dist/utils/logger.d.ts.map +1 -1
  227. package/dist/utils/logger.js +12 -7
  228. package/dist/utils/logger.js.map +1 -1
  229. package/dist/utils/parsingHelpers.js.map +1 -1
  230. package/dist/utils/stripTrackedChanges.d.ts.map +1 -1
  231. package/dist/utils/stripTrackedChanges.js +3 -3
  232. package/dist/utils/stripTrackedChanges.js.map +1 -1
  233. package/dist/utils/textDiff.d.ts +1 -1
  234. package/dist/utils/textDiff.js +8 -8
  235. package/dist/utils/textDiff.js.map +1 -1
  236. package/dist/utils/units.js.map +1 -1
  237. package/dist/utils/validation.d.ts.map +1 -1
  238. package/dist/utils/validation.js +24 -7
  239. package/dist/utils/validation.js.map +1 -1
  240. package/dist/utils/xmlSanitization.d.ts.map +1 -1
  241. package/dist/utils/xmlSanitization.js +3 -3
  242. package/dist/utils/xmlSanitization.js.map +1 -1
  243. package/dist/validation/RevisionAutoFixer.d.ts.map +1 -1
  244. package/dist/validation/RevisionAutoFixer.js +5 -5
  245. package/dist/validation/RevisionAutoFixer.js.map +1 -1
  246. package/dist/validation/RevisionValidator.d.ts.map +1 -1
  247. package/dist/validation/RevisionValidator.js +7 -9
  248. package/dist/validation/RevisionValidator.js.map +1 -1
  249. package/dist/validation/ValidationRules.js +3 -3
  250. package/dist/validation/ValidationRules.js.map +1 -1
  251. package/dist/validation/index.js.map +1 -1
  252. package/dist/xml/XMLBuilder.d.ts +1 -1
  253. package/dist/xml/XMLBuilder.d.ts.map +1 -1
  254. package/dist/xml/XMLBuilder.js +98 -100
  255. package/dist/xml/XMLBuilder.js.map +1 -1
  256. package/dist/xml/XMLParser.d.ts.map +1 -1
  257. package/dist/xml/XMLParser.js +61 -66
  258. package/dist/xml/XMLParser.js.map +1 -1
  259. package/dist/zip/ZipHandler.d.ts.map +1 -1
  260. package/dist/zip/ZipHandler.js.map +1 -1
  261. package/dist/zip/ZipReader.d.ts.map +1 -1
  262. package/dist/zip/ZipReader.js +1 -3
  263. package/dist/zip/ZipReader.js.map +1 -1
  264. package/dist/zip/ZipWriter.d.ts +1 -1
  265. package/dist/zip/ZipWriter.d.ts.map +1 -1
  266. package/dist/zip/ZipWriter.js +28 -36
  267. package/dist/zip/ZipWriter.js.map +1 -1
  268. package/dist/zip/types.js +1 -1
  269. package/dist/zip/types.js.map +1 -1
  270. package/package.json +92 -92
  271. package/src/__tests__/helper-methods.test.ts +512 -512
  272. package/src/constants/legacyCompatFlags.ts +138 -138
  273. package/src/constants/limits.ts +50 -50
  274. package/src/core/Document.ts +1010 -1145
  275. package/src/core/DocumentContent.ts +461 -467
  276. package/src/core/DocumentGenerator.ts +1133 -1104
  277. package/src/core/DocumentIdManager.ts +158 -158
  278. package/src/core/DocumentParser.ts +2347 -2716
  279. package/src/core/DocumentValidator.ts +363 -372
  280. package/src/core/Relationship.ts +367 -367
  281. package/src/core/RelationshipManager.ts +429 -428
  282. package/src/elements/AlternateContent.ts +42 -42
  283. package/src/elements/Bookmark.ts +212 -210
  284. package/src/elements/BookmarkManager.ts +247 -250
  285. package/src/elements/Comment.ts +356 -359
  286. package/src/elements/CommentManager.ts +499 -502
  287. package/src/elements/CommonTypes.ts +524 -549
  288. package/src/elements/CustomXml.ts +36 -36
  289. package/src/elements/Endnote.ts +221 -217
  290. package/src/elements/EndnoteManager.ts +246 -249
  291. package/src/elements/Field.ts +1292 -1233
  292. package/src/elements/FieldHelpers.ts +329 -333
  293. package/src/elements/FontManager.ts +336 -339
  294. package/src/elements/Footer.ts +269 -269
  295. package/src/elements/Footnote.ts +221 -217
  296. package/src/elements/FootnoteManager.ts +246 -249
  297. package/src/elements/Header.ts +269 -269
  298. package/src/elements/HeaderFooterManager.ts +219 -219
  299. package/src/elements/Hyperlink.ts +1288 -1193
  300. package/src/elements/Image.ts +1982 -1756
  301. package/src/elements/ImageManager.ts +437 -432
  302. package/src/elements/ImageRun.ts +59 -59
  303. package/src/elements/MathElement.ts +65 -65
  304. package/src/elements/Paragraph.ts +4347 -4287
  305. package/src/elements/PreservedElement.ts +53 -53
  306. package/src/elements/PropertyChangeTypes.ts +458 -442
  307. package/src/elements/RangeMarker.ts +382 -400
  308. package/src/elements/Revision.ts +1198 -1217
  309. package/src/elements/RevisionContent.ts +73 -73
  310. package/src/elements/RevisionManager.ts +1070 -1070
  311. package/src/elements/Run.ts +3103 -3073
  312. package/src/elements/Section.ts +1521 -1421
  313. package/src/elements/Shape.ts +884 -873
  314. package/src/elements/StructuredDocumentTag.ts +1176 -1207
  315. package/src/elements/Table.ts +2468 -2524
  316. package/src/elements/TableCell.ts +1617 -1621
  317. package/src/elements/TableGridChange.ts +149 -151
  318. package/src/elements/TableOfContents.ts +701 -691
  319. package/src/elements/TableOfContentsElement.ts +89 -89
  320. package/src/elements/TableRow.ts +960 -929
  321. package/src/elements/TextBox.ts +766 -768
  322. package/src/formatting/AbstractNumbering.ts +580 -579
  323. package/src/formatting/NumberingInstance.ts +295 -299
  324. package/src/formatting/NumberingLevel.ts +981 -1040
  325. package/src/formatting/NumberingManager.ts +875 -827
  326. package/src/formatting/Style.ts +1785 -1879
  327. package/src/formatting/StylesManager.ts +1090 -1130
  328. package/src/helpers/CleanupHelper.ts +524 -524
  329. package/src/images/ImageOptimizer.ts +274 -274
  330. package/src/index.ts +559 -554
  331. package/src/managers/DrawingManager.ts +319 -319
  332. package/src/tracking/DocumentTrackingContext.ts +687 -674
  333. package/src/tracking/TrackingContext.ts +175 -173
  334. package/src/types/compatibility-types.ts +49 -49
  335. package/src/types/formatting.ts +210 -210
  336. package/src/types/list-types.ts +14 -14
  337. package/src/types/settings-types.ts +59 -59
  338. package/src/types/styleConfig.ts +189 -189
  339. package/src/utils/ChangelogGenerator.ts +1583 -1581
  340. package/src/utils/CompatibilityUpgrader.ts +235 -237
  341. package/src/utils/InMemoryRevisionAcceptor.ts +691 -696
  342. package/src/utils/MoveOperationHelper.ts +233 -238
  343. package/src/utils/RevisionAwareProcessor.ts +518 -526
  344. package/src/utils/RevisionWalker.ts +427 -457
  345. package/src/utils/SelectiveRevisionAcceptor.ts +662 -683
  346. package/src/utils/ShadingResolver.ts +105 -107
  347. package/src/utils/acceptRevisions.ts +723 -714
  348. package/src/utils/cnfStyleDecoder.ts +212 -217
  349. package/src/utils/corruptionDetection.ts +346 -345
  350. package/src/utils/dateFormatting.ts +20 -20
  351. package/src/utils/deepClone.ts +77 -78
  352. package/src/utils/diagnostics.ts +125 -129
  353. package/src/utils/errorHandling.ts +80 -80
  354. package/src/utils/formatting.ts +220 -213
  355. package/src/utils/list-detection.ts +32 -42
  356. package/src/utils/logger.ts +412 -404
  357. package/src/utils/parsingHelpers.ts +190 -190
  358. package/src/utils/stripTrackedChanges.ts +356 -353
  359. package/src/utils/textDiff.ts +100 -100
  360. package/src/utils/units.ts +421 -421
  361. package/src/utils/validation.ts +553 -542
  362. package/src/utils/xmlSanitization.ts +179 -182
  363. package/src/validation/RevisionAutoFixer.ts +541 -542
  364. package/src/validation/RevisionValidator.ts +470 -460
  365. package/src/validation/ValidationRules.ts +338 -338
  366. package/src/validation/index.ts +30 -30
  367. package/src/xml/XMLBuilder.ts +857 -871
  368. package/src/xml/XMLParser.ts +877 -919
  369. package/src/zip/ZipHandler.ts +629 -637
  370. package/src/zip/ZipReader.ts +295 -299
  371. package/src/zip/ZipWriter.ts +374 -390
  372. package/src/zip/types.ts +116 -116
@@ -1,524 +1,524 @@
1
- /**
2
- * CleanupHelper - Comprehensive document cleanup utilities
3
- *
4
- * Provides methods to clean up common issues in DOCX documents, including:
5
- * - Unlocking and removing SDTs
6
- * - Clearing preserve flags
7
- * - Defragmenting hyperlinks
8
- * - Cleaning unused elements
9
- * - Removing customXML
10
- * - Unlocking fields and frames
11
- * - Sanitizing tables
12
- *
13
- * Usage:
14
- * const cleanup = new CleanupHelper(doc);
15
- * cleanup.all(); // Run all cleanups
16
- */
17
-
18
- import type { Document } from "../core/Document";
19
- import { Field, ComplexField } from "../elements/Field";
20
- import { Hyperlink } from "../elements/Hyperlink";
21
- import { Paragraph } from "../elements/Paragraph";
22
- import { Table } from "../elements/Table";
23
- import { StructuredDocumentTag } from "../elements/StructuredDocumentTag";
24
-
25
- export interface CleanupOptions {
26
- /** Unlock all SDTs to enable editing */
27
- unlockSDTs?: boolean;
28
- /** Remove all SDTs (unwrap content) */
29
- removeSDTs?: boolean;
30
- /** Clear paragraph preserve flags */
31
- clearPreserveFlags?: boolean;
32
- /** Merge fragmented hyperlinks */
33
- defragmentHyperlinks?: boolean;
34
- /** Reset hyperlink formatting to standard */
35
- resetHyperlinkFormatting?: boolean;
36
- /** Remove unused numbering definitions */
37
- cleanupNumbering?: boolean;
38
- /** Remove unused styles */
39
- cleanupStyles?: boolean;
40
- /** Remove orphaned relationships */
41
- cleanupRelationships?: boolean;
42
- /** Remove customXML elements */
43
- removeCustomXML?: boolean;
44
- /** Unlock field locks (enable field updates) */
45
- unlockFields?: boolean;
46
- /** Remove frame/text box locks */
47
- unlockFrames?: boolean;
48
- /** Sanitize table property exceptions (tblPrEx) */
49
- sanitizeTables?: boolean;
50
- /** Format internal anchor hyperlinks with standard styling (Verdana 12pt blue underlined) */
51
- formatInternalHyperlinks?: boolean;
52
- /** Format ALL hyperlinks (internal, external, and HYPERLINK fields) with standard styling (Verdana 12pt #0000FF underlined) */
53
- formatAllHyperlinks?: boolean;
54
- }
55
-
56
- export interface CleanupReport {
57
- sdtsUnlocked: number;
58
- sdtsRemoved: number;
59
- preserveFlagsCleared: number;
60
- hyperlinksDefragmented: number;
61
- numberingRemoved: number;
62
- stylesRemoved: number;
63
- relationshipsRemoved: number;
64
- customXMLRemoved: number;
65
- fieldsUnlocked: number;
66
- framesUnlocked: number;
67
- tablesProcessed: number;
68
- internalHyperlinksFormatted: number;
69
- allHyperlinksFormatted: number;
70
- warnings: string[];
71
- }
72
-
73
- export class CleanupHelper {
74
- private doc: Document;
75
-
76
- constructor(doc: Document) {
77
- this.doc = doc;
78
- }
79
-
80
- /**
81
- * Run all cleanup operations with default settings
82
- * @returns Cleanup report
83
- */
84
- all(): CleanupReport {
85
- return this.run({
86
- unlockSDTs: true,
87
- removeSDTs: true,
88
- clearPreserveFlags: true,
89
- defragmentHyperlinks: true,
90
- resetHyperlinkFormatting: true,
91
- cleanupNumbering: true,
92
- cleanupStyles: true,
93
- cleanupRelationships: true,
94
- removeCustomXML: true,
95
- unlockFields: true,
96
- unlockFrames: true,
97
- sanitizeTables: true,
98
- formatAllHyperlinks: true,
99
- });
100
- }
101
-
102
- /**
103
- * Run selective cleanup operations
104
- * @param options Cleanup options
105
- * @returns Cleanup report
106
- */
107
- run(options: CleanupOptions): CleanupReport {
108
- const report: CleanupReport = {
109
- sdtsUnlocked: 0,
110
- sdtsRemoved: 0,
111
- preserveFlagsCleared: 0,
112
- hyperlinksDefragmented: 0,
113
- numberingRemoved: 0,
114
- stylesRemoved: 0,
115
- relationshipsRemoved: 0,
116
- customXMLRemoved: 0,
117
- fieldsUnlocked: 0,
118
- framesUnlocked: 0,
119
- tablesProcessed: 0,
120
- internalHyperlinksFormatted: 0,
121
- allHyperlinksFormatted: 0,
122
- warnings: [],
123
- };
124
-
125
- if (options.unlockSDTs) {
126
- report.sdtsUnlocked = this.unlockSDTs();
127
- }
128
-
129
- if (options.removeSDTs) {
130
- report.sdtsRemoved = this.removeSDTs();
131
- }
132
-
133
- if (options.clearPreserveFlags) {
134
- report.preserveFlagsCleared = this.clearPreserveFlags();
135
- }
136
-
137
- if (options.defragmentHyperlinks) {
138
- report.hyperlinksDefragmented = this.defragmentHyperlinks(
139
- options.resetHyperlinkFormatting ?? false
140
- );
141
- }
142
-
143
- if (options.cleanupNumbering) {
144
- report.numberingRemoved = this.cleanupNumbering();
145
- }
146
-
147
- if (options.cleanupStyles) {
148
- report.stylesRemoved = this.cleanupStyles();
149
- }
150
-
151
- if (options.cleanupRelationships) {
152
- report.relationshipsRemoved = this.cleanupRelationships();
153
- }
154
-
155
- if (options.removeCustomXML) {
156
- report.customXMLRemoved = this.removeCustomXML();
157
- }
158
-
159
- if (options.unlockFields) {
160
- report.fieldsUnlocked = this.unlockFields();
161
- }
162
-
163
- if (options.unlockFrames) {
164
- report.framesUnlocked = this.unlockFrames();
165
- }
166
-
167
- if (options.sanitizeTables) {
168
- report.tablesProcessed = this.sanitizeTables();
169
- }
170
-
171
- if (options.formatInternalHyperlinks) {
172
- report.internalHyperlinksFormatted = this.formatInternalHyperlinks();
173
- }
174
-
175
- if (options.formatAllHyperlinks) {
176
- report.allHyperlinksFormatted = this.formatAllHyperlinks();
177
- }
178
-
179
- return report;
180
- }
181
-
182
- private unlockSDTs(): number {
183
- let count = 0;
184
- const bodyElements = this.doc.getBodyElements();
185
-
186
- for (const element of bodyElements) {
187
- if (element instanceof StructuredDocumentTag && element.isLocked()) {
188
- element.unlock();
189
- count++;
190
- }
191
- }
192
-
193
- // Also unlock in tables
194
- for (const table of this.doc.getAllTables()) {
195
- for (const row of table.getRows()) {
196
- for (const cell of row.getCells()) {
197
- for (const para of cell.getParagraphs()) {
198
- // SDTs can wrap paragraphs in cells
199
- const content = para.getContent();
200
- for (const item of content) {
201
- if (item instanceof StructuredDocumentTag && item.isLocked()) {
202
- item.unlock();
203
- count++;
204
- }
205
- }
206
- }
207
- }
208
- }
209
- }
210
-
211
- return count;
212
- }
213
-
214
- private removeSDTs(): number {
215
- // Unwrap SDT wrappers, preserving their content
216
- const bodyElements = this.doc.getBodyElements();
217
- type BodyElement = Paragraph | Table | StructuredDocumentTag;
218
- const unwrapped: BodyElement[] = [];
219
- let sdtCount = 0;
220
-
221
- const unwrapSDT = (sdt: StructuredDocumentTag, target: BodyElement[]) => {
222
- sdtCount++;
223
- for (const item of sdt.getContent()) {
224
- if (item instanceof Paragraph || item instanceof Table) {
225
- target.push(item);
226
- } else if (item instanceof StructuredDocumentTag) {
227
- unwrapSDT(item, target);
228
- }
229
- }
230
- };
231
-
232
- for (const element of bodyElements) {
233
- if (element instanceof StructuredDocumentTag) {
234
- unwrapSDT(element, unwrapped);
235
- } else {
236
- unwrapped.push(element as BodyElement);
237
- }
238
- }
239
-
240
- this.doc.setBodyElements(unwrapped);
241
- return sdtCount;
242
- }
243
-
244
- private clearPreserveFlags(): number {
245
- let cleared = 0;
246
- for (const para of this.doc.getAllParagraphs()) {
247
- if (para.isPreserved()) {
248
- para.setPreserved(false);
249
- cleared++;
250
- }
251
- }
252
- return cleared;
253
- }
254
-
255
- private defragmentHyperlinks(resetFormatting: boolean): number {
256
- return this.doc.defragmentHyperlinks({ resetFormatting, cleanupRelationships: true });
257
- }
258
-
259
- private cleanupNumbering(): number {
260
- const before = this.doc.getNumberingManager().getAllInstances().length;
261
- this.doc.cleanupUnusedNumbering();
262
- const after = this.doc.getNumberingManager().getAllInstances().length;
263
- return before - after;
264
- }
265
-
266
- private cleanupStyles(): number {
267
- // Implementation for unused styles removal
268
- // Scan all paragraphs and runs for used styles
269
- const usedStyles = new Set<string>();
270
- for (const para of this.doc.getAllParagraphs()) {
271
- const paraStyle = para.getFormatting().style;
272
- if (paraStyle) usedStyles.add(paraStyle);
273
- for (const run of para.getRuns()) {
274
- const runStyle = run.getFormatting().characterStyle;
275
- if (runStyle) usedStyles.add(runStyle);
276
- }
277
- }
278
-
279
- // Remove unused styles
280
- let removed = 0;
281
- const allStyles = this.doc.getStylesManager().getAllStyles();
282
- for (const style of allStyles) {
283
- if (!usedStyles.has(style.getStyleId())) {
284
- this.doc.getStylesManager().removeStyle(style.getStyleId());
285
- removed++;
286
- }
287
- }
288
-
289
- return removed;
290
- }
291
-
292
- private cleanupRelationships(): number {
293
- // Use comprehensive scanning that includes raw nested content (nested tables),
294
- // headers/footers, footnotes, and endnotes — not just in-memory hyperlinks
295
- const referencedIds = this.doc.collectAllReferencedHyperlinkIds();
296
-
297
- // Remove orphaned hyperlink relationships
298
- return this.doc.getRelationshipManager().removeOrphanedHyperlinks(referencedIds);
299
- }
300
-
301
- private removeCustomXML(): number {
302
- const zipHandler = this.doc.getZipHandler();
303
- let removed = 0;
304
-
305
- // Remove customXML files
306
- const files = zipHandler.getFilePaths();
307
- for (const file of files) {
308
- if (file.startsWith('customXml/') || file.startsWith('customXML/')) {
309
- zipHandler.removeFile(file);
310
- removed++;
311
- }
312
- }
313
-
314
- // Remove customXML relationships
315
- const relManager = this.doc.getRelationshipManager();
316
- const customRels = relManager.getRelationshipsByType(
317
- 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/customXml'
318
- );
319
- for (const rel of customRels) {
320
- relManager.removeRelationship(rel.getId());
321
- removed++;
322
- }
323
-
324
- // Remove custom.xml if present (docProps/custom.xml)
325
- if (zipHandler.hasFile('docProps/custom.xml')) {
326
- zipHandler.removeFile('docProps/custom.xml');
327
- removed++;
328
- }
329
-
330
- return removed;
331
- }
332
-
333
- private unlockFields(): number {
334
- const zipHandler = this.doc.getZipHandler();
335
- const docXml = zipHandler.getFileAsString('word/document.xml');
336
- if (!docXml) return 0;
337
-
338
- // Count matches first, then replace (avoid regex re-execution)
339
- const pattern = /w:fldLock="(1|true)"/g;
340
- const matches = docXml.match(pattern) || [];
341
- if (matches.length === 0) return 0;
342
-
343
- // Remove w:fldLock="1" or w:fldLock="true"
344
- const updatedXml = docXml.replace(pattern, '');
345
- zipHandler.updateFile('word/document.xml', updatedXml);
346
-
347
- return matches.length;
348
- }
349
-
350
- private unlockFrames(): number {
351
- const zipHandler = this.doc.getZipHandler();
352
- const docXml = zipHandler.getFileAsString('word/document.xml');
353
- if (!docXml) return 0;
354
-
355
- // Count matches first, then replace (avoid regex re-execution)
356
- const pattern = /w:anchorLock="(1|true)"/g;
357
- const matches = docXml.match(pattern) || [];
358
- if (matches.length === 0) return 0;
359
-
360
- // Remove w:anchorLock="1" or w:anchorLock="true"
361
- const updatedXml = docXml.replace(pattern, '');
362
- zipHandler.updateFile('word/document.xml', updatedXml);
363
-
364
- return matches.length;
365
- }
366
-
367
- private sanitizeTables(): number {
368
- const tables = this.doc.getAllTables();
369
- let processed = 0;
370
- for (const table of tables) {
371
- for (const row of table.getRows()) {
372
- const exceptions = row.getTablePropertyExceptions();
373
- if (exceptions && Object.keys(exceptions).length > 0) {
374
- row.setTablePropertyExceptions(undefined as any);
375
- }
376
- }
377
- processed++;
378
- }
379
- return processed;
380
- }
381
-
382
- private formatInternalHyperlinks(): number {
383
- let count = 0;
384
- const formatting = {
385
- font: "Verdana",
386
- size: 12,
387
- color: "0000FF",
388
- underline: "single" as const,
389
- };
390
-
391
- // Process body paragraphs
392
- for (const paragraph of this.doc.getAllParagraphs()) {
393
- for (const item of paragraph.getContent()) {
394
- if (item instanceof Hyperlink && item.isInternal()) {
395
- item.setFormatting(formatting, { replace: true });
396
- count++;
397
- }
398
- }
399
- }
400
-
401
- // Process table paragraphs
402
- for (const table of this.doc.getAllTables()) {
403
- for (const row of table.getRows()) {
404
- for (const cell of row.getCells()) {
405
- for (const para of cell.getParagraphs()) {
406
- for (const item of para.getContent()) {
407
- if (item instanceof Hyperlink && item.isInternal()) {
408
- item.setFormatting(formatting, { replace: true });
409
- count++;
410
- }
411
- }
412
- }
413
- }
414
- }
415
- }
416
-
417
- return count;
418
- }
419
-
420
- /**
421
- * Formats ALL hyperlinks in the document with standard styling
422
- * This includes:
423
- * - Internal w:hyperlink elements (bookmarks)
424
- * - External w:hyperlink elements (URLs)
425
- * - HYPERLINK fields (both simple w:fldSimple and complex fields)
426
- *
427
- * Standard formatting: Verdana 12pt, #0000FF blue, single underline
428
- * @returns Number of hyperlinks formatted
429
- */
430
- private formatAllHyperlinks(): number {
431
- let count = 0;
432
- const formatting = {
433
- font: "Verdana",
434
- size: 12,
435
- color: "0000FF",
436
- underline: "single" as const,
437
- };
438
-
439
- // Helper to process paragraph content
440
- const processParagraph = (paragraph: any): void => {
441
- for (const item of paragraph.getContent()) {
442
- // Process all Hyperlink instances (both internal AND external)
443
- if (item instanceof Hyperlink) {
444
- item.setFormatting(formatting, { replace: true });
445
- count++;
446
- }
447
- // Process simple HYPERLINK fields
448
- if (item instanceof Field && item.isHyperlinkField()) {
449
- item.setFormatting(formatting);
450
- count++;
451
- }
452
- // Process complex HYPERLINK fields
453
- if (item instanceof ComplexField && item.isHyperlinkField()) {
454
- item.setResultFormatting(formatting);
455
- count++;
456
- }
457
- }
458
- };
459
-
460
- // Process body paragraphs
461
- for (const paragraph of this.doc.getAllParagraphs()) {
462
- processParagraph(paragraph);
463
- }
464
-
465
- // Process table paragraphs
466
- for (const table of this.doc.getAllTables()) {
467
- for (const row of table.getRows()) {
468
- for (const cell of row.getCells()) {
469
- for (const para of cell.getParagraphs()) {
470
- processParagraph(para);
471
- }
472
- }
473
- }
474
- }
475
-
476
- return count;
477
- }
478
-
479
- /**
480
- * Preset: Google Docs cleanup
481
- */
482
- static googleDocsPreset(): CleanupOptions {
483
- return {
484
- unlockSDTs: true,
485
- removeSDTs: true,
486
- defragmentHyperlinks: true,
487
- resetHyperlinkFormatting: true,
488
- cleanupRelationships: true,
489
- removeCustomXML: true,
490
- sanitizeTables: true,
491
- };
492
- }
493
-
494
- /**
495
- * Preset: Full cleanup
496
- */
497
- static fullCleanupPreset(): CleanupOptions {
498
- return {
499
- unlockSDTs: true,
500
- removeSDTs: true,
501
- clearPreserveFlags: true,
502
- defragmentHyperlinks: true,
503
- resetHyperlinkFormatting: true,
504
- cleanupNumbering: true,
505
- cleanupStyles: true,
506
- cleanupRelationships: true,
507
- removeCustomXML: true,
508
- unlockFields: true,
509
- unlockFrames: true,
510
- sanitizeTables: true,
511
- formatAllHyperlinks: true,
512
- };
513
- }
514
-
515
- /**
516
- * Preset: Minimal cleanup
517
- */
518
- static minimalPreset(): CleanupOptions {
519
- return {
520
- cleanupRelationships: true,
521
- removeCustomXML: true,
522
- };
523
- }
524
- }
1
+ /**
2
+ * CleanupHelper - Comprehensive document cleanup utilities
3
+ *
4
+ * Provides methods to clean up common issues in DOCX documents, including:
5
+ * - Unlocking and removing SDTs
6
+ * - Clearing preserve flags
7
+ * - Defragmenting hyperlinks
8
+ * - Cleaning unused elements
9
+ * - Removing customXML
10
+ * - Unlocking fields and frames
11
+ * - Sanitizing tables
12
+ *
13
+ * Usage:
14
+ * const cleanup = new CleanupHelper(doc);
15
+ * cleanup.all(); // Run all cleanups
16
+ */
17
+
18
+ import type { Document } from '../core/Document';
19
+ import { Field, ComplexField } from '../elements/Field';
20
+ import { Hyperlink } from '../elements/Hyperlink';
21
+ import { Paragraph } from '../elements/Paragraph';
22
+ import { Table } from '../elements/Table';
23
+ import { StructuredDocumentTag } from '../elements/StructuredDocumentTag';
24
+
25
+ export interface CleanupOptions {
26
+ /** Unlock all SDTs to enable editing */
27
+ unlockSDTs?: boolean;
28
+ /** Remove all SDTs (unwrap content) */
29
+ removeSDTs?: boolean;
30
+ /** Clear paragraph preserve flags */
31
+ clearPreserveFlags?: boolean;
32
+ /** Merge fragmented hyperlinks */
33
+ defragmentHyperlinks?: boolean;
34
+ /** Reset hyperlink formatting to standard */
35
+ resetHyperlinkFormatting?: boolean;
36
+ /** Remove unused numbering definitions */
37
+ cleanupNumbering?: boolean;
38
+ /** Remove unused styles */
39
+ cleanupStyles?: boolean;
40
+ /** Remove orphaned relationships */
41
+ cleanupRelationships?: boolean;
42
+ /** Remove customXML elements */
43
+ removeCustomXML?: boolean;
44
+ /** Unlock field locks (enable field updates) */
45
+ unlockFields?: boolean;
46
+ /** Remove frame/text box locks */
47
+ unlockFrames?: boolean;
48
+ /** Sanitize table property exceptions (tblPrEx) */
49
+ sanitizeTables?: boolean;
50
+ /** Format internal anchor hyperlinks with standard styling (Verdana 12pt blue underlined) */
51
+ formatInternalHyperlinks?: boolean;
52
+ /** Format ALL hyperlinks (internal, external, and HYPERLINK fields) with standard styling (Verdana 12pt #0000FF underlined) */
53
+ formatAllHyperlinks?: boolean;
54
+ }
55
+
56
+ export interface CleanupReport {
57
+ sdtsUnlocked: number;
58
+ sdtsRemoved: number;
59
+ preserveFlagsCleared: number;
60
+ hyperlinksDefragmented: number;
61
+ numberingRemoved: number;
62
+ stylesRemoved: number;
63
+ relationshipsRemoved: number;
64
+ customXMLRemoved: number;
65
+ fieldsUnlocked: number;
66
+ framesUnlocked: number;
67
+ tablesProcessed: number;
68
+ internalHyperlinksFormatted: number;
69
+ allHyperlinksFormatted: number;
70
+ warnings: string[];
71
+ }
72
+
73
+ export class CleanupHelper {
74
+ private doc: Document;
75
+
76
+ constructor(doc: Document) {
77
+ this.doc = doc;
78
+ }
79
+
80
+ /**
81
+ * Run all cleanup operations with default settings
82
+ * @returns Cleanup report
83
+ */
84
+ all(): CleanupReport {
85
+ return this.run({
86
+ unlockSDTs: true,
87
+ removeSDTs: true,
88
+ clearPreserveFlags: true,
89
+ defragmentHyperlinks: true,
90
+ resetHyperlinkFormatting: true,
91
+ cleanupNumbering: true,
92
+ cleanupStyles: true,
93
+ cleanupRelationships: true,
94
+ removeCustomXML: true,
95
+ unlockFields: true,
96
+ unlockFrames: true,
97
+ sanitizeTables: true,
98
+ formatAllHyperlinks: true,
99
+ });
100
+ }
101
+
102
+ /**
103
+ * Run selective cleanup operations
104
+ * @param options Cleanup options
105
+ * @returns Cleanup report
106
+ */
107
+ run(options: CleanupOptions): CleanupReport {
108
+ const report: CleanupReport = {
109
+ sdtsUnlocked: 0,
110
+ sdtsRemoved: 0,
111
+ preserveFlagsCleared: 0,
112
+ hyperlinksDefragmented: 0,
113
+ numberingRemoved: 0,
114
+ stylesRemoved: 0,
115
+ relationshipsRemoved: 0,
116
+ customXMLRemoved: 0,
117
+ fieldsUnlocked: 0,
118
+ framesUnlocked: 0,
119
+ tablesProcessed: 0,
120
+ internalHyperlinksFormatted: 0,
121
+ allHyperlinksFormatted: 0,
122
+ warnings: [],
123
+ };
124
+
125
+ if (options.unlockSDTs) {
126
+ report.sdtsUnlocked = this.unlockSDTs();
127
+ }
128
+
129
+ if (options.removeSDTs) {
130
+ report.sdtsRemoved = this.removeSDTs();
131
+ }
132
+
133
+ if (options.clearPreserveFlags) {
134
+ report.preserveFlagsCleared = this.clearPreserveFlags();
135
+ }
136
+
137
+ if (options.defragmentHyperlinks) {
138
+ report.hyperlinksDefragmented = this.defragmentHyperlinks(
139
+ options.resetHyperlinkFormatting ?? false
140
+ );
141
+ }
142
+
143
+ if (options.cleanupNumbering) {
144
+ report.numberingRemoved = this.cleanupNumbering();
145
+ }
146
+
147
+ if (options.cleanupStyles) {
148
+ report.stylesRemoved = this.cleanupStyles();
149
+ }
150
+
151
+ if (options.cleanupRelationships) {
152
+ report.relationshipsRemoved = this.cleanupRelationships();
153
+ }
154
+
155
+ if (options.removeCustomXML) {
156
+ report.customXMLRemoved = this.removeCustomXML();
157
+ }
158
+
159
+ if (options.unlockFields) {
160
+ report.fieldsUnlocked = this.unlockFields();
161
+ }
162
+
163
+ if (options.unlockFrames) {
164
+ report.framesUnlocked = this.unlockFrames();
165
+ }
166
+
167
+ if (options.sanitizeTables) {
168
+ report.tablesProcessed = this.sanitizeTables();
169
+ }
170
+
171
+ if (options.formatInternalHyperlinks) {
172
+ report.internalHyperlinksFormatted = this.formatInternalHyperlinks();
173
+ }
174
+
175
+ if (options.formatAllHyperlinks) {
176
+ report.allHyperlinksFormatted = this.formatAllHyperlinks();
177
+ }
178
+
179
+ return report;
180
+ }
181
+
182
+ private unlockSDTs(): number {
183
+ let count = 0;
184
+ const bodyElements = this.doc.getBodyElements();
185
+
186
+ for (const element of bodyElements) {
187
+ if (element instanceof StructuredDocumentTag && element.isLocked()) {
188
+ element.unlock();
189
+ count++;
190
+ }
191
+ }
192
+
193
+ // Also unlock in tables
194
+ for (const table of this.doc.getAllTables()) {
195
+ for (const row of table.getRows()) {
196
+ for (const cell of row.getCells()) {
197
+ for (const para of cell.getParagraphs()) {
198
+ // SDTs can wrap paragraphs in cells
199
+ const content = para.getContent();
200
+ for (const item of content) {
201
+ if (item instanceof StructuredDocumentTag && item.isLocked()) {
202
+ item.unlock();
203
+ count++;
204
+ }
205
+ }
206
+ }
207
+ }
208
+ }
209
+ }
210
+
211
+ return count;
212
+ }
213
+
214
+ private removeSDTs(): number {
215
+ // Unwrap SDT wrappers, preserving their content
216
+ const bodyElements = this.doc.getBodyElements();
217
+ type BodyElement = Paragraph | Table | StructuredDocumentTag;
218
+ const unwrapped: BodyElement[] = [];
219
+ let sdtCount = 0;
220
+
221
+ const unwrapSDT = (sdt: StructuredDocumentTag, target: BodyElement[]) => {
222
+ sdtCount++;
223
+ for (const item of sdt.getContent()) {
224
+ if (item instanceof Paragraph || item instanceof Table) {
225
+ target.push(item);
226
+ } else if (item instanceof StructuredDocumentTag) {
227
+ unwrapSDT(item, target);
228
+ }
229
+ }
230
+ };
231
+
232
+ for (const element of bodyElements) {
233
+ if (element instanceof StructuredDocumentTag) {
234
+ unwrapSDT(element, unwrapped);
235
+ } else {
236
+ unwrapped.push(element as BodyElement);
237
+ }
238
+ }
239
+
240
+ this.doc.setBodyElements(unwrapped);
241
+ return sdtCount;
242
+ }
243
+
244
+ private clearPreserveFlags(): number {
245
+ let cleared = 0;
246
+ for (const para of this.doc.getAllParagraphs()) {
247
+ if (para.isPreserved()) {
248
+ para.setPreserved(false);
249
+ cleared++;
250
+ }
251
+ }
252
+ return cleared;
253
+ }
254
+
255
+ private defragmentHyperlinks(resetFormatting: boolean): number {
256
+ return this.doc.defragmentHyperlinks({ resetFormatting, cleanupRelationships: true });
257
+ }
258
+
259
+ private cleanupNumbering(): number {
260
+ const before = this.doc.getNumberingManager().getAllInstances().length;
261
+ this.doc.cleanupUnusedNumbering();
262
+ const after = this.doc.getNumberingManager().getAllInstances().length;
263
+ return before - after;
264
+ }
265
+
266
+ private cleanupStyles(): number {
267
+ // Implementation for unused styles removal
268
+ // Scan all paragraphs and runs for used styles
269
+ const usedStyles = new Set<string>();
270
+ for (const para of this.doc.getAllParagraphs()) {
271
+ const paraStyle = para.getFormatting().style;
272
+ if (paraStyle) usedStyles.add(paraStyle);
273
+ for (const run of para.getRuns()) {
274
+ const runStyle = run.getFormatting().characterStyle;
275
+ if (runStyle) usedStyles.add(runStyle);
276
+ }
277
+ }
278
+
279
+ // Remove unused styles
280
+ let removed = 0;
281
+ const allStyles = this.doc.getStylesManager().getAllStyles();
282
+ for (const style of allStyles) {
283
+ if (!usedStyles.has(style.getStyleId())) {
284
+ this.doc.getStylesManager().removeStyle(style.getStyleId());
285
+ removed++;
286
+ }
287
+ }
288
+
289
+ return removed;
290
+ }
291
+
292
+ private cleanupRelationships(): number {
293
+ // Use comprehensive scanning that includes raw nested content (nested tables),
294
+ // headers/footers, footnotes, and endnotes — not just in-memory hyperlinks
295
+ const referencedIds = this.doc.collectAllReferencedHyperlinkIds();
296
+
297
+ // Remove orphaned hyperlink relationships
298
+ return this.doc.getRelationshipManager().removeOrphanedHyperlinks(referencedIds);
299
+ }
300
+
301
+ private removeCustomXML(): number {
302
+ const zipHandler = this.doc.getZipHandler();
303
+ let removed = 0;
304
+
305
+ // Remove customXML files
306
+ const files = zipHandler.getFilePaths();
307
+ for (const file of files) {
308
+ if (file.startsWith('customXml/') || file.startsWith('customXML/')) {
309
+ zipHandler.removeFile(file);
310
+ removed++;
311
+ }
312
+ }
313
+
314
+ // Remove customXML relationships
315
+ const relManager = this.doc.getRelationshipManager();
316
+ const customRels = relManager.getRelationshipsByType(
317
+ 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/customXml'
318
+ );
319
+ for (const rel of customRels) {
320
+ relManager.removeRelationship(rel.getId());
321
+ removed++;
322
+ }
323
+
324
+ // Remove custom.xml if present (docProps/custom.xml)
325
+ if (zipHandler.hasFile('docProps/custom.xml')) {
326
+ zipHandler.removeFile('docProps/custom.xml');
327
+ removed++;
328
+ }
329
+
330
+ return removed;
331
+ }
332
+
333
+ private unlockFields(): number {
334
+ const zipHandler = this.doc.getZipHandler();
335
+ const docXml = zipHandler.getFileAsString('word/document.xml');
336
+ if (!docXml) return 0;
337
+
338
+ // Count matches first, then replace (avoid regex re-execution)
339
+ const pattern = /w:fldLock="(1|true)"/g;
340
+ const matches = docXml.match(pattern) || [];
341
+ if (matches.length === 0) return 0;
342
+
343
+ // Remove w:fldLock="1" or w:fldLock="true"
344
+ const updatedXml = docXml.replace(pattern, '');
345
+ zipHandler.updateFile('word/document.xml', updatedXml);
346
+
347
+ return matches.length;
348
+ }
349
+
350
+ private unlockFrames(): number {
351
+ const zipHandler = this.doc.getZipHandler();
352
+ const docXml = zipHandler.getFileAsString('word/document.xml');
353
+ if (!docXml) return 0;
354
+
355
+ // Count matches first, then replace (avoid regex re-execution)
356
+ const pattern = /w:anchorLock="(1|true)"/g;
357
+ const matches = docXml.match(pattern) || [];
358
+ if (matches.length === 0) return 0;
359
+
360
+ // Remove w:anchorLock="1" or w:anchorLock="true"
361
+ const updatedXml = docXml.replace(pattern, '');
362
+ zipHandler.updateFile('word/document.xml', updatedXml);
363
+
364
+ return matches.length;
365
+ }
366
+
367
+ private sanitizeTables(): number {
368
+ const tables = this.doc.getAllTables();
369
+ let processed = 0;
370
+ for (const table of tables) {
371
+ for (const row of table.getRows()) {
372
+ const exceptions = row.getTablePropertyExceptions();
373
+ if (exceptions && Object.keys(exceptions).length > 0) {
374
+ row.setTablePropertyExceptions(undefined as any);
375
+ }
376
+ }
377
+ processed++;
378
+ }
379
+ return processed;
380
+ }
381
+
382
+ private formatInternalHyperlinks(): number {
383
+ let count = 0;
384
+ const formatting = {
385
+ font: 'Verdana',
386
+ size: 12,
387
+ color: '0000FF',
388
+ underline: 'single' as const,
389
+ };
390
+
391
+ // Process body paragraphs
392
+ for (const paragraph of this.doc.getAllParagraphs()) {
393
+ for (const item of paragraph.getContent()) {
394
+ if (item instanceof Hyperlink && item.isInternal()) {
395
+ item.setFormatting(formatting, { replace: true });
396
+ count++;
397
+ }
398
+ }
399
+ }
400
+
401
+ // Process table paragraphs
402
+ for (const table of this.doc.getAllTables()) {
403
+ for (const row of table.getRows()) {
404
+ for (const cell of row.getCells()) {
405
+ for (const para of cell.getParagraphs()) {
406
+ for (const item of para.getContent()) {
407
+ if (item instanceof Hyperlink && item.isInternal()) {
408
+ item.setFormatting(formatting, { replace: true });
409
+ count++;
410
+ }
411
+ }
412
+ }
413
+ }
414
+ }
415
+ }
416
+
417
+ return count;
418
+ }
419
+
420
+ /**
421
+ * Formats ALL hyperlinks in the document with standard styling
422
+ * This includes:
423
+ * - Internal w:hyperlink elements (bookmarks)
424
+ * - External w:hyperlink elements (URLs)
425
+ * - HYPERLINK fields (both simple w:fldSimple and complex fields)
426
+ *
427
+ * Standard formatting: Verdana 12pt, #0000FF blue, single underline
428
+ * @returns Number of hyperlinks formatted
429
+ */
430
+ private formatAllHyperlinks(): number {
431
+ let count = 0;
432
+ const formatting = {
433
+ font: 'Verdana',
434
+ size: 12,
435
+ color: '0000FF',
436
+ underline: 'single' as const,
437
+ };
438
+
439
+ // Helper to process paragraph content
440
+ const processParagraph = (paragraph: any): void => {
441
+ for (const item of paragraph.getContent()) {
442
+ // Process all Hyperlink instances (both internal AND external)
443
+ if (item instanceof Hyperlink) {
444
+ item.setFormatting(formatting, { replace: true });
445
+ count++;
446
+ }
447
+ // Process simple HYPERLINK fields
448
+ if (item instanceof Field && item.isHyperlinkField()) {
449
+ item.setFormatting(formatting);
450
+ count++;
451
+ }
452
+ // Process complex HYPERLINK fields
453
+ if (item instanceof ComplexField && item.isHyperlinkField()) {
454
+ item.setResultFormatting(formatting);
455
+ count++;
456
+ }
457
+ }
458
+ };
459
+
460
+ // Process body paragraphs
461
+ for (const paragraph of this.doc.getAllParagraphs()) {
462
+ processParagraph(paragraph);
463
+ }
464
+
465
+ // Process table paragraphs
466
+ for (const table of this.doc.getAllTables()) {
467
+ for (const row of table.getRows()) {
468
+ for (const cell of row.getCells()) {
469
+ for (const para of cell.getParagraphs()) {
470
+ processParagraph(para);
471
+ }
472
+ }
473
+ }
474
+ }
475
+
476
+ return count;
477
+ }
478
+
479
+ /**
480
+ * Preset: Google Docs cleanup
481
+ */
482
+ static googleDocsPreset(): CleanupOptions {
483
+ return {
484
+ unlockSDTs: true,
485
+ removeSDTs: true,
486
+ defragmentHyperlinks: true,
487
+ resetHyperlinkFormatting: true,
488
+ cleanupRelationships: true,
489
+ removeCustomXML: true,
490
+ sanitizeTables: true,
491
+ };
492
+ }
493
+
494
+ /**
495
+ * Preset: Full cleanup
496
+ */
497
+ static fullCleanupPreset(): CleanupOptions {
498
+ return {
499
+ unlockSDTs: true,
500
+ removeSDTs: true,
501
+ clearPreserveFlags: true,
502
+ defragmentHyperlinks: true,
503
+ resetHyperlinkFormatting: true,
504
+ cleanupNumbering: true,
505
+ cleanupStyles: true,
506
+ cleanupRelationships: true,
507
+ removeCustomXML: true,
508
+ unlockFields: true,
509
+ unlockFrames: true,
510
+ sanitizeTables: true,
511
+ formatAllHyperlinks: true,
512
+ };
513
+ }
514
+
515
+ /**
516
+ * Preset: Minimal cleanup
517
+ */
518
+ static minimalPreset(): CleanupOptions {
519
+ return {
520
+ cleanupRelationships: true,
521
+ removeCustomXML: true,
522
+ };
523
+ }
524
+ }