docxmlater 10.1.3 → 10.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (371) hide show
  1. package/README.md +759 -754
  2. package/dist/constants/legacyCompatFlags.js +1 -1
  3. package/dist/constants/legacyCompatFlags.js.map +1 -1
  4. package/dist/constants/limits.js.map +1 -1
  5. package/dist/core/Document.d.ts +50 -50
  6. package/dist/core/Document.d.ts.map +1 -1
  7. package/dist/core/Document.js +483 -471
  8. package/dist/core/Document.js.map +1 -1
  9. package/dist/core/DocumentContent.d.ts +9 -9
  10. package/dist/core/DocumentContent.d.ts.map +1 -1
  11. package/dist/core/DocumentContent.js +1 -1
  12. package/dist/core/DocumentContent.js.map +1 -1
  13. package/dist/core/DocumentGenerator.d.ts +11 -11
  14. package/dist/core/DocumentGenerator.d.ts.map +1 -1
  15. package/dist/core/DocumentGenerator.js +251 -251
  16. package/dist/core/DocumentGenerator.js.map +1 -1
  17. package/dist/core/DocumentIdManager.js.map +1 -1
  18. package/dist/core/DocumentParser.d.ts +15 -15
  19. package/dist/core/DocumentParser.d.ts.map +1 -1
  20. package/dist/core/DocumentParser.js +2123 -2155
  21. package/dist/core/DocumentParser.js.map +1 -1
  22. package/dist/core/DocumentValidator.d.ts.map +1 -1
  23. package/dist/core/DocumentValidator.js +2 -5
  24. package/dist/core/DocumentValidator.js.map +1 -1
  25. package/dist/core/Relationship.js.map +1 -1
  26. package/dist/core/RelationshipManager.d.ts.map +1 -1
  27. package/dist/core/RelationshipManager.js +3 -3
  28. package/dist/core/RelationshipManager.js.map +1 -1
  29. package/dist/elements/AlternateContent.js.map +1 -1
  30. package/dist/elements/Bookmark.d.ts.map +1 -1
  31. package/dist/elements/Bookmark.js +3 -1
  32. package/dist/elements/Bookmark.js.map +1 -1
  33. package/dist/elements/BookmarkManager.d.ts.map +1 -1
  34. package/dist/elements/BookmarkManager.js.map +1 -1
  35. package/dist/elements/Comment.d.ts.map +1 -1
  36. package/dist/elements/Comment.js +9 -6
  37. package/dist/elements/Comment.js.map +1 -1
  38. package/dist/elements/CommentManager.d.ts.map +1 -1
  39. package/dist/elements/CommentManager.js +18 -17
  40. package/dist/elements/CommentManager.js.map +1 -1
  41. package/dist/elements/CommonTypes.d.ts +21 -21
  42. package/dist/elements/CommonTypes.d.ts.map +1 -1
  43. package/dist/elements/CommonTypes.js +56 -56
  44. package/dist/elements/CommonTypes.js.map +1 -1
  45. package/dist/elements/CustomXml.js.map +1 -1
  46. package/dist/elements/Endnote.d.ts.map +1 -1
  47. package/dist/elements/Endnote.js +6 -6
  48. package/dist/elements/Endnote.js.map +1 -1
  49. package/dist/elements/EndnoteManager.d.ts.map +1 -1
  50. package/dist/elements/EndnoteManager.js +6 -7
  51. package/dist/elements/EndnoteManager.js.map +1 -1
  52. package/dist/elements/Field.d.ts.map +1 -1
  53. package/dist/elements/Field.js +82 -25
  54. package/dist/elements/Field.js.map +1 -1
  55. package/dist/elements/FieldHelpers.d.ts.map +1 -1
  56. package/dist/elements/FieldHelpers.js.map +1 -1
  57. package/dist/elements/FontManager.d.ts.map +1 -1
  58. package/dist/elements/FontManager.js +1 -1
  59. package/dist/elements/FontManager.js.map +1 -1
  60. package/dist/elements/Footer.js +2 -2
  61. package/dist/elements/Footer.js.map +1 -1
  62. package/dist/elements/Footnote.d.ts.map +1 -1
  63. package/dist/elements/Footnote.js +6 -6
  64. package/dist/elements/Footnote.js.map +1 -1
  65. package/dist/elements/FootnoteManager.d.ts.map +1 -1
  66. package/dist/elements/FootnoteManager.js +6 -7
  67. package/dist/elements/FootnoteManager.js.map +1 -1
  68. package/dist/elements/Header.js +2 -2
  69. package/dist/elements/Header.js.map +1 -1
  70. package/dist/elements/HeaderFooterManager.js.map +1 -1
  71. package/dist/elements/Hyperlink.d.ts +5 -3
  72. package/dist/elements/Hyperlink.d.ts.map +1 -1
  73. package/dist/elements/Hyperlink.js +134 -76
  74. package/dist/elements/Hyperlink.js.map +1 -1
  75. package/dist/elements/Image.d.ts.map +1 -1
  76. package/dist/elements/Image.js +238 -106
  77. package/dist/elements/Image.js.map +1 -1
  78. package/dist/elements/ImageManager.d.ts.map +1 -1
  79. package/dist/elements/ImageManager.js +1 -1
  80. package/dist/elements/ImageManager.js.map +1 -1
  81. package/dist/elements/ImageRun.js +1 -1
  82. package/dist/elements/ImageRun.js.map +1 -1
  83. package/dist/elements/MathElement.js.map +1 -1
  84. package/dist/elements/Paragraph.d.ts +24 -24
  85. package/dist/elements/Paragraph.d.ts.map +1 -1
  86. package/dist/elements/Paragraph.js +181 -188
  87. package/dist/elements/Paragraph.js.map +1 -1
  88. package/dist/elements/PreservedElement.js.map +1 -1
  89. package/dist/elements/PropertyChangeTypes.d.ts.map +1 -1
  90. package/dist/elements/PropertyChangeTypes.js +6 -6
  91. package/dist/elements/PropertyChangeTypes.js.map +1 -1
  92. package/dist/elements/RangeMarker.d.ts.map +1 -1
  93. package/dist/elements/RangeMarker.js.map +1 -1
  94. package/dist/elements/Revision.d.ts.map +1 -1
  95. package/dist/elements/Revision.js +4 -5
  96. package/dist/elements/Revision.js.map +1 -1
  97. package/dist/elements/RevisionContent.js.map +1 -1
  98. package/dist/elements/RevisionManager.d.ts.map +1 -1
  99. package/dist/elements/RevisionManager.js +40 -48
  100. package/dist/elements/RevisionManager.js.map +1 -1
  101. package/dist/elements/Run.d.ts +16 -16
  102. package/dist/elements/Run.d.ts.map +1 -1
  103. package/dist/elements/Run.js +256 -238
  104. package/dist/elements/Run.js.map +1 -1
  105. package/dist/elements/Section.d.ts.map +1 -1
  106. package/dist/elements/Section.js +36 -11
  107. package/dist/elements/Section.js.map +1 -1
  108. package/dist/elements/Shape.d.ts.map +1 -1
  109. package/dist/elements/Shape.js.map +1 -1
  110. package/dist/elements/StructuredDocumentTag.d.ts +6 -6
  111. package/dist/elements/StructuredDocumentTag.d.ts.map +1 -1
  112. package/dist/elements/StructuredDocumentTag.js +99 -104
  113. package/dist/elements/StructuredDocumentTag.js.map +1 -1
  114. package/dist/elements/Table.d.ts +11 -11
  115. package/dist/elements/Table.d.ts.map +1 -1
  116. package/dist/elements/Table.js +102 -107
  117. package/dist/elements/Table.js.map +1 -1
  118. package/dist/elements/TableCell.d.ts +10 -10
  119. package/dist/elements/TableCell.d.ts.map +1 -1
  120. package/dist/elements/TableCell.js +105 -106
  121. package/dist/elements/TableCell.js.map +1 -1
  122. package/dist/elements/TableGridChange.d.ts.map +1 -1
  123. package/dist/elements/TableGridChange.js.map +1 -1
  124. package/dist/elements/TableOfContents.d.ts.map +1 -1
  125. package/dist/elements/TableOfContents.js +4 -4
  126. package/dist/elements/TableOfContents.js.map +1 -1
  127. package/dist/elements/TableOfContentsElement.js.map +1 -1
  128. package/dist/elements/TableRow.d.ts.map +1 -1
  129. package/dist/elements/TableRow.js +13 -6
  130. package/dist/elements/TableRow.js.map +1 -1
  131. package/dist/elements/TextBox.d.ts.map +1 -1
  132. package/dist/elements/TextBox.js +3 -5
  133. package/dist/elements/TextBox.js.map +1 -1
  134. package/dist/formatting/AbstractNumbering.d.ts +4 -4
  135. package/dist/formatting/AbstractNumbering.d.ts.map +1 -1
  136. package/dist/formatting/AbstractNumbering.js +54 -49
  137. package/dist/formatting/AbstractNumbering.js.map +1 -1
  138. package/dist/formatting/NumberingInstance.d.ts.map +1 -1
  139. package/dist/formatting/NumberingInstance.js +1 -3
  140. package/dist/formatting/NumberingInstance.js.map +1 -1
  141. package/dist/formatting/NumberingLevel.d.ts +5 -5
  142. package/dist/formatting/NumberingLevel.d.ts.map +1 -1
  143. package/dist/formatting/NumberingLevel.js +119 -125
  144. package/dist/formatting/NumberingLevel.js.map +1 -1
  145. package/dist/formatting/NumberingManager.d.ts.map +1 -1
  146. package/dist/formatting/NumberingManager.js +9 -9
  147. package/dist/formatting/NumberingManager.js.map +1 -1
  148. package/dist/formatting/Style.d.ts +11 -11
  149. package/dist/formatting/Style.d.ts.map +1 -1
  150. package/dist/formatting/Style.js +219 -247
  151. package/dist/formatting/Style.js.map +1 -1
  152. package/dist/formatting/StylesManager.d.ts +2 -2
  153. package/dist/formatting/StylesManager.d.ts.map +1 -1
  154. package/dist/formatting/StylesManager.js +96 -102
  155. package/dist/formatting/StylesManager.js.map +1 -1
  156. package/dist/helpers/CleanupHelper.d.ts +1 -1
  157. package/dist/helpers/CleanupHelper.d.ts.map +1 -1
  158. package/dist/helpers/CleanupHelper.js +6 -6
  159. package/dist/helpers/CleanupHelper.js.map +1 -1
  160. package/dist/images/ImageOptimizer.js +7 -7
  161. package/dist/images/ImageOptimizer.js.map +1 -1
  162. package/dist/index.d.ts +9 -9
  163. package/dist/index.d.ts.map +1 -1
  164. package/dist/index.js.map +1 -1
  165. package/dist/managers/DrawingManager.js.map +1 -1
  166. package/dist/tracking/DocumentTrackingContext.d.ts.map +1 -1
  167. package/dist/tracking/DocumentTrackingContext.js +23 -7
  168. package/dist/tracking/DocumentTrackingContext.js.map +1 -1
  169. package/dist/tracking/TrackingContext.d.ts.map +1 -1
  170. package/dist/tracking/TrackingContext.js.map +1 -1
  171. package/dist/types/compatibility-types.js.map +1 -1
  172. package/dist/types/formatting.js.map +1 -1
  173. package/dist/types/list-types.d.ts +6 -6
  174. package/dist/types/list-types.js.map +1 -1
  175. package/dist/types/settings-types.js.map +1 -1
  176. package/dist/types/styleConfig.d.ts +2 -2
  177. package/dist/types/styleConfig.js.map +1 -1
  178. package/dist/utils/ChangelogGenerator.d.ts.map +1 -1
  179. package/dist/utils/ChangelogGenerator.js +97 -101
  180. package/dist/utils/ChangelogGenerator.js.map +1 -1
  181. package/dist/utils/CompatibilityUpgrader.d.ts.map +1 -1
  182. package/dist/utils/CompatibilityUpgrader.js +1 -1
  183. package/dist/utils/CompatibilityUpgrader.js.map +1 -1
  184. package/dist/utils/InMemoryRevisionAcceptor.d.ts.map +1 -1
  185. package/dist/utils/InMemoryRevisionAcceptor.js +1 -6
  186. package/dist/utils/InMemoryRevisionAcceptor.js.map +1 -1
  187. package/dist/utils/MoveOperationHelper.d.ts.map +1 -1
  188. package/dist/utils/MoveOperationHelper.js +1 -1
  189. package/dist/utils/MoveOperationHelper.js.map +1 -1
  190. package/dist/utils/RevisionAwareProcessor.d.ts.map +1 -1
  191. package/dist/utils/RevisionAwareProcessor.js +2 -4
  192. package/dist/utils/RevisionAwareProcessor.js.map +1 -1
  193. package/dist/utils/RevisionWalker.d.ts.map +1 -1
  194. package/dist/utils/RevisionWalker.js +4 -12
  195. package/dist/utils/RevisionWalker.js.map +1 -1
  196. package/dist/utils/SelectiveRevisionAcceptor.d.ts.map +1 -1
  197. package/dist/utils/SelectiveRevisionAcceptor.js +2 -6
  198. package/dist/utils/SelectiveRevisionAcceptor.js.map +1 -1
  199. package/dist/utils/ShadingResolver.d.ts.map +1 -1
  200. package/dist/utils/ShadingResolver.js +1 -1
  201. package/dist/utils/ShadingResolver.js.map +1 -1
  202. package/dist/utils/acceptRevisions.d.ts.map +1 -1
  203. package/dist/utils/acceptRevisions.js +23 -12
  204. package/dist/utils/acceptRevisions.js.map +1 -1
  205. package/dist/utils/cnfStyleDecoder.d.ts +1 -1
  206. package/dist/utils/cnfStyleDecoder.d.ts.map +1 -1
  207. package/dist/utils/cnfStyleDecoder.js +40 -40
  208. package/dist/utils/cnfStyleDecoder.js.map +1 -1
  209. package/dist/utils/corruptionDetection.d.ts.map +1 -1
  210. package/dist/utils/corruptionDetection.js.map +1 -1
  211. package/dist/utils/dateFormatting.js.map +1 -1
  212. package/dist/utils/deepClone.js +1 -1
  213. package/dist/utils/deepClone.js.map +1 -1
  214. package/dist/utils/diagnostics.d.ts.map +1 -1
  215. package/dist/utils/diagnostics.js +1 -1
  216. package/dist/utils/diagnostics.js.map +1 -1
  217. package/dist/utils/errorHandling.js.map +1 -1
  218. package/dist/utils/formatting.d.ts.map +1 -1
  219. package/dist/utils/formatting.js +10 -2
  220. package/dist/utils/formatting.js.map +1 -1
  221. package/dist/utils/list-detection.d.ts +2 -2
  222. package/dist/utils/list-detection.d.ts.map +1 -1
  223. package/dist/utils/list-detection.js +21 -23
  224. package/dist/utils/list-detection.js.map +1 -1
  225. package/dist/utils/logger.d.ts.map +1 -1
  226. package/dist/utils/logger.js +12 -7
  227. package/dist/utils/logger.js.map +1 -1
  228. package/dist/utils/parsingHelpers.js.map +1 -1
  229. package/dist/utils/stripTrackedChanges.d.ts.map +1 -1
  230. package/dist/utils/stripTrackedChanges.js +3 -3
  231. package/dist/utils/stripTrackedChanges.js.map +1 -1
  232. package/dist/utils/textDiff.d.ts +1 -1
  233. package/dist/utils/textDiff.js +8 -8
  234. package/dist/utils/textDiff.js.map +1 -1
  235. package/dist/utils/units.js.map +1 -1
  236. package/dist/utils/validation.d.ts.map +1 -1
  237. package/dist/utils/validation.js +24 -7
  238. package/dist/utils/validation.js.map +1 -1
  239. package/dist/utils/xmlSanitization.d.ts.map +1 -1
  240. package/dist/utils/xmlSanitization.js +3 -3
  241. package/dist/utils/xmlSanitization.js.map +1 -1
  242. package/dist/validation/RevisionAutoFixer.d.ts.map +1 -1
  243. package/dist/validation/RevisionAutoFixer.js +5 -5
  244. package/dist/validation/RevisionAutoFixer.js.map +1 -1
  245. package/dist/validation/RevisionValidator.d.ts.map +1 -1
  246. package/dist/validation/RevisionValidator.js +7 -9
  247. package/dist/validation/RevisionValidator.js.map +1 -1
  248. package/dist/validation/ValidationRules.js +3 -3
  249. package/dist/validation/ValidationRules.js.map +1 -1
  250. package/dist/validation/index.js.map +1 -1
  251. package/dist/xml/XMLBuilder.d.ts +1 -1
  252. package/dist/xml/XMLBuilder.d.ts.map +1 -1
  253. package/dist/xml/XMLBuilder.js +98 -100
  254. package/dist/xml/XMLBuilder.js.map +1 -1
  255. package/dist/xml/XMLParser.d.ts.map +1 -1
  256. package/dist/xml/XMLParser.js +61 -66
  257. package/dist/xml/XMLParser.js.map +1 -1
  258. package/dist/zip/ZipHandler.d.ts.map +1 -1
  259. package/dist/zip/ZipHandler.js.map +1 -1
  260. package/dist/zip/ZipReader.d.ts.map +1 -1
  261. package/dist/zip/ZipReader.js +1 -3
  262. package/dist/zip/ZipReader.js.map +1 -1
  263. package/dist/zip/ZipWriter.d.ts +1 -1
  264. package/dist/zip/ZipWriter.d.ts.map +1 -1
  265. package/dist/zip/ZipWriter.js +28 -36
  266. package/dist/zip/ZipWriter.js.map +1 -1
  267. package/dist/zip/types.js +1 -1
  268. package/dist/zip/types.js.map +1 -1
  269. package/package.json +92 -92
  270. package/src/__tests__/helper-methods.test.ts +512 -512
  271. package/src/constants/legacyCompatFlags.ts +138 -138
  272. package/src/constants/limits.ts +50 -50
  273. package/src/core/Document.ts +985 -1145
  274. package/src/core/DocumentContent.ts +461 -467
  275. package/src/core/DocumentGenerator.ts +1133 -1104
  276. package/src/core/DocumentIdManager.ts +158 -158
  277. package/src/core/DocumentParser.ts +2347 -2716
  278. package/src/core/DocumentValidator.ts +363 -372
  279. package/src/core/Relationship.ts +367 -367
  280. package/src/core/RelationshipManager.ts +429 -428
  281. package/src/elements/AlternateContent.ts +42 -42
  282. package/src/elements/Bookmark.ts +212 -210
  283. package/src/elements/BookmarkManager.ts +247 -250
  284. package/src/elements/Comment.ts +356 -359
  285. package/src/elements/CommentManager.ts +499 -502
  286. package/src/elements/CommonTypes.ts +524 -549
  287. package/src/elements/CustomXml.ts +36 -36
  288. package/src/elements/Endnote.ts +221 -217
  289. package/src/elements/EndnoteManager.ts +246 -249
  290. package/src/elements/Field.ts +1292 -1233
  291. package/src/elements/FieldHelpers.ts +329 -333
  292. package/src/elements/FontManager.ts +336 -339
  293. package/src/elements/Footer.ts +269 -269
  294. package/src/elements/Footnote.ts +221 -217
  295. package/src/elements/FootnoteManager.ts +246 -249
  296. package/src/elements/Header.ts +269 -269
  297. package/src/elements/HeaderFooterManager.ts +219 -219
  298. package/src/elements/Hyperlink.ts +1288 -1193
  299. package/src/elements/Image.ts +1982 -1756
  300. package/src/elements/ImageManager.ts +437 -432
  301. package/src/elements/ImageRun.ts +59 -59
  302. package/src/elements/MathElement.ts +65 -65
  303. package/src/elements/Paragraph.ts +4347 -4287
  304. package/src/elements/PreservedElement.ts +53 -53
  305. package/src/elements/PropertyChangeTypes.ts +458 -442
  306. package/src/elements/RangeMarker.ts +382 -400
  307. package/src/elements/Revision.ts +1198 -1217
  308. package/src/elements/RevisionContent.ts +73 -73
  309. package/src/elements/RevisionManager.ts +1070 -1070
  310. package/src/elements/Run.ts +3103 -3073
  311. package/src/elements/Section.ts +1521 -1421
  312. package/src/elements/Shape.ts +884 -873
  313. package/src/elements/StructuredDocumentTag.ts +1176 -1207
  314. package/src/elements/Table.ts +2468 -2524
  315. package/src/elements/TableCell.ts +1617 -1621
  316. package/src/elements/TableGridChange.ts +149 -151
  317. package/src/elements/TableOfContents.ts +701 -691
  318. package/src/elements/TableOfContentsElement.ts +89 -89
  319. package/src/elements/TableRow.ts +960 -929
  320. package/src/elements/TextBox.ts +766 -768
  321. package/src/formatting/AbstractNumbering.ts +580 -579
  322. package/src/formatting/NumberingInstance.ts +295 -299
  323. package/src/formatting/NumberingLevel.ts +981 -1040
  324. package/src/formatting/NumberingManager.ts +833 -827
  325. package/src/formatting/Style.ts +1785 -1879
  326. package/src/formatting/StylesManager.ts +1090 -1130
  327. package/src/helpers/CleanupHelper.ts +524 -524
  328. package/src/images/ImageOptimizer.ts +274 -274
  329. package/src/index.ts +559 -554
  330. package/src/managers/DrawingManager.ts +319 -319
  331. package/src/tracking/DocumentTrackingContext.ts +687 -674
  332. package/src/tracking/TrackingContext.ts +175 -173
  333. package/src/types/compatibility-types.ts +49 -49
  334. package/src/types/formatting.ts +210 -210
  335. package/src/types/list-types.ts +14 -14
  336. package/src/types/settings-types.ts +59 -59
  337. package/src/types/styleConfig.ts +189 -189
  338. package/src/utils/ChangelogGenerator.ts +1583 -1581
  339. package/src/utils/CompatibilityUpgrader.ts +235 -237
  340. package/src/utils/InMemoryRevisionAcceptor.ts +691 -696
  341. package/src/utils/MoveOperationHelper.ts +233 -238
  342. package/src/utils/RevisionAwareProcessor.ts +518 -526
  343. package/src/utils/RevisionWalker.ts +427 -457
  344. package/src/utils/SelectiveRevisionAcceptor.ts +662 -683
  345. package/src/utils/ShadingResolver.ts +105 -107
  346. package/src/utils/acceptRevisions.ts +723 -714
  347. package/src/utils/cnfStyleDecoder.ts +212 -217
  348. package/src/utils/corruptionDetection.ts +346 -345
  349. package/src/utils/dateFormatting.ts +20 -20
  350. package/src/utils/deepClone.ts +77 -78
  351. package/src/utils/diagnostics.ts +125 -129
  352. package/src/utils/errorHandling.ts +80 -80
  353. package/src/utils/formatting.ts +220 -213
  354. package/src/utils/list-detection.ts +32 -42
  355. package/src/utils/logger.ts +412 -404
  356. package/src/utils/parsingHelpers.ts +190 -190
  357. package/src/utils/stripTrackedChanges.ts +356 -353
  358. package/src/utils/textDiff.ts +100 -100
  359. package/src/utils/units.ts +421 -421
  360. package/src/utils/validation.ts +553 -542
  361. package/src/utils/xmlSanitization.ts +179 -182
  362. package/src/validation/RevisionAutoFixer.ts +541 -542
  363. package/src/validation/RevisionValidator.ts +470 -460
  364. package/src/validation/ValidationRules.ts +338 -338
  365. package/src/validation/index.ts +30 -30
  366. package/src/xml/XMLBuilder.ts +857 -871
  367. package/src/xml/XMLParser.ts +877 -919
  368. package/src/zip/ZipHandler.ts +629 -637
  369. package/src/zip/ZipReader.ts +295 -299
  370. package/src/zip/ZipWriter.ts +374 -390
  371. package/src/zip/types.ts +116 -116
@@ -1,524 +1,524 @@
1
- /**
2
- * CleanupHelper - Comprehensive document cleanup utilities
3
- *
4
- * Provides methods to clean up common issues in DOCX documents, including:
5
- * - Unlocking and removing SDTs
6
- * - Clearing preserve flags
7
- * - Defragmenting hyperlinks
8
- * - Cleaning unused elements
9
- * - Removing customXML
10
- * - Unlocking fields and frames
11
- * - Sanitizing tables
12
- *
13
- * Usage:
14
- * const cleanup = new CleanupHelper(doc);
15
- * cleanup.all(); // Run all cleanups
16
- */
17
-
18
- import type { Document } from "../core/Document";
19
- import { Field, ComplexField } from "../elements/Field";
20
- import { Hyperlink } from "../elements/Hyperlink";
21
- import { Paragraph } from "../elements/Paragraph";
22
- import { Table } from "../elements/Table";
23
- import { StructuredDocumentTag } from "../elements/StructuredDocumentTag";
24
-
25
- export interface CleanupOptions {
26
- /** Unlock all SDTs to enable editing */
27
- unlockSDTs?: boolean;
28
- /** Remove all SDTs (unwrap content) */
29
- removeSDTs?: boolean;
30
- /** Clear paragraph preserve flags */
31
- clearPreserveFlags?: boolean;
32
- /** Merge fragmented hyperlinks */
33
- defragmentHyperlinks?: boolean;
34
- /** Reset hyperlink formatting to standard */
35
- resetHyperlinkFormatting?: boolean;
36
- /** Remove unused numbering definitions */
37
- cleanupNumbering?: boolean;
38
- /** Remove unused styles */
39
- cleanupStyles?: boolean;
40
- /** Remove orphaned relationships */
41
- cleanupRelationships?: boolean;
42
- /** Remove customXML elements */
43
- removeCustomXML?: boolean;
44
- /** Unlock field locks (enable field updates) */
45
- unlockFields?: boolean;
46
- /** Remove frame/text box locks */
47
- unlockFrames?: boolean;
48
- /** Sanitize table property exceptions (tblPrEx) */
49
- sanitizeTables?: boolean;
50
- /** Format internal anchor hyperlinks with standard styling (Verdana 12pt blue underlined) */
51
- formatInternalHyperlinks?: boolean;
52
- /** Format ALL hyperlinks (internal, external, and HYPERLINK fields) with standard styling (Verdana 12pt #0000FF underlined) */
53
- formatAllHyperlinks?: boolean;
54
- }
55
-
56
- export interface CleanupReport {
57
- sdtsUnlocked: number;
58
- sdtsRemoved: number;
59
- preserveFlagsCleared: number;
60
- hyperlinksDefragmented: number;
61
- numberingRemoved: number;
62
- stylesRemoved: number;
63
- relationshipsRemoved: number;
64
- customXMLRemoved: number;
65
- fieldsUnlocked: number;
66
- framesUnlocked: number;
67
- tablesProcessed: number;
68
- internalHyperlinksFormatted: number;
69
- allHyperlinksFormatted: number;
70
- warnings: string[];
71
- }
72
-
73
- export class CleanupHelper {
74
- private doc: Document;
75
-
76
- constructor(doc: Document) {
77
- this.doc = doc;
78
- }
79
-
80
- /**
81
- * Run all cleanup operations with default settings
82
- * @returns Cleanup report
83
- */
84
- all(): CleanupReport {
85
- return this.run({
86
- unlockSDTs: true,
87
- removeSDTs: true,
88
- clearPreserveFlags: true,
89
- defragmentHyperlinks: true,
90
- resetHyperlinkFormatting: true,
91
- cleanupNumbering: true,
92
- cleanupStyles: true,
93
- cleanupRelationships: true,
94
- removeCustomXML: true,
95
- unlockFields: true,
96
- unlockFrames: true,
97
- sanitizeTables: true,
98
- formatAllHyperlinks: true,
99
- });
100
- }
101
-
102
- /**
103
- * Run selective cleanup operations
104
- * @param options Cleanup options
105
- * @returns Cleanup report
106
- */
107
- run(options: CleanupOptions): CleanupReport {
108
- const report: CleanupReport = {
109
- sdtsUnlocked: 0,
110
- sdtsRemoved: 0,
111
- preserveFlagsCleared: 0,
112
- hyperlinksDefragmented: 0,
113
- numberingRemoved: 0,
114
- stylesRemoved: 0,
115
- relationshipsRemoved: 0,
116
- customXMLRemoved: 0,
117
- fieldsUnlocked: 0,
118
- framesUnlocked: 0,
119
- tablesProcessed: 0,
120
- internalHyperlinksFormatted: 0,
121
- allHyperlinksFormatted: 0,
122
- warnings: [],
123
- };
124
-
125
- if (options.unlockSDTs) {
126
- report.sdtsUnlocked = this.unlockSDTs();
127
- }
128
-
129
- if (options.removeSDTs) {
130
- report.sdtsRemoved = this.removeSDTs();
131
- }
132
-
133
- if (options.clearPreserveFlags) {
134
- report.preserveFlagsCleared = this.clearPreserveFlags();
135
- }
136
-
137
- if (options.defragmentHyperlinks) {
138
- report.hyperlinksDefragmented = this.defragmentHyperlinks(
139
- options.resetHyperlinkFormatting ?? false
140
- );
141
- }
142
-
143
- if (options.cleanupNumbering) {
144
- report.numberingRemoved = this.cleanupNumbering();
145
- }
146
-
147
- if (options.cleanupStyles) {
148
- report.stylesRemoved = this.cleanupStyles();
149
- }
150
-
151
- if (options.cleanupRelationships) {
152
- report.relationshipsRemoved = this.cleanupRelationships();
153
- }
154
-
155
- if (options.removeCustomXML) {
156
- report.customXMLRemoved = this.removeCustomXML();
157
- }
158
-
159
- if (options.unlockFields) {
160
- report.fieldsUnlocked = this.unlockFields();
161
- }
162
-
163
- if (options.unlockFrames) {
164
- report.framesUnlocked = this.unlockFrames();
165
- }
166
-
167
- if (options.sanitizeTables) {
168
- report.tablesProcessed = this.sanitizeTables();
169
- }
170
-
171
- if (options.formatInternalHyperlinks) {
172
- report.internalHyperlinksFormatted = this.formatInternalHyperlinks();
173
- }
174
-
175
- if (options.formatAllHyperlinks) {
176
- report.allHyperlinksFormatted = this.formatAllHyperlinks();
177
- }
178
-
179
- return report;
180
- }
181
-
182
- private unlockSDTs(): number {
183
- let count = 0;
184
- const bodyElements = this.doc.getBodyElements();
185
-
186
- for (const element of bodyElements) {
187
- if (element instanceof StructuredDocumentTag && element.isLocked()) {
188
- element.unlock();
189
- count++;
190
- }
191
- }
192
-
193
- // Also unlock in tables
194
- for (const table of this.doc.getAllTables()) {
195
- for (const row of table.getRows()) {
196
- for (const cell of row.getCells()) {
197
- for (const para of cell.getParagraphs()) {
198
- // SDTs can wrap paragraphs in cells
199
- const content = para.getContent();
200
- for (const item of content) {
201
- if (item instanceof StructuredDocumentTag && item.isLocked()) {
202
- item.unlock();
203
- count++;
204
- }
205
- }
206
- }
207
- }
208
- }
209
- }
210
-
211
- return count;
212
- }
213
-
214
- private removeSDTs(): number {
215
- // Unwrap SDT wrappers, preserving their content
216
- const bodyElements = this.doc.getBodyElements();
217
- type BodyElement = Paragraph | Table | StructuredDocumentTag;
218
- const unwrapped: BodyElement[] = [];
219
- let sdtCount = 0;
220
-
221
- const unwrapSDT = (sdt: StructuredDocumentTag, target: BodyElement[]) => {
222
- sdtCount++;
223
- for (const item of sdt.getContent()) {
224
- if (item instanceof Paragraph || item instanceof Table) {
225
- target.push(item);
226
- } else if (item instanceof StructuredDocumentTag) {
227
- unwrapSDT(item, target);
228
- }
229
- }
230
- };
231
-
232
- for (const element of bodyElements) {
233
- if (element instanceof StructuredDocumentTag) {
234
- unwrapSDT(element, unwrapped);
235
- } else {
236
- unwrapped.push(element as BodyElement);
237
- }
238
- }
239
-
240
- this.doc.setBodyElements(unwrapped);
241
- return sdtCount;
242
- }
243
-
244
- private clearPreserveFlags(): number {
245
- let cleared = 0;
246
- for (const para of this.doc.getAllParagraphs()) {
247
- if (para.isPreserved()) {
248
- para.setPreserved(false);
249
- cleared++;
250
- }
251
- }
252
- return cleared;
253
- }
254
-
255
- private defragmentHyperlinks(resetFormatting: boolean): number {
256
- return this.doc.defragmentHyperlinks({ resetFormatting, cleanupRelationships: true });
257
- }
258
-
259
- private cleanupNumbering(): number {
260
- const before = this.doc.getNumberingManager().getAllInstances().length;
261
- this.doc.cleanupUnusedNumbering();
262
- const after = this.doc.getNumberingManager().getAllInstances().length;
263
- return before - after;
264
- }
265
-
266
- private cleanupStyles(): number {
267
- // Implementation for unused styles removal
268
- // Scan all paragraphs and runs for used styles
269
- const usedStyles = new Set<string>();
270
- for (const para of this.doc.getAllParagraphs()) {
271
- const paraStyle = para.getFormatting().style;
272
- if (paraStyle) usedStyles.add(paraStyle);
273
- for (const run of para.getRuns()) {
274
- const runStyle = run.getFormatting().characterStyle;
275
- if (runStyle) usedStyles.add(runStyle);
276
- }
277
- }
278
-
279
- // Remove unused styles
280
- let removed = 0;
281
- const allStyles = this.doc.getStylesManager().getAllStyles();
282
- for (const style of allStyles) {
283
- if (!usedStyles.has(style.getStyleId())) {
284
- this.doc.getStylesManager().removeStyle(style.getStyleId());
285
- removed++;
286
- }
287
- }
288
-
289
- return removed;
290
- }
291
-
292
- private cleanupRelationships(): number {
293
- // Use comprehensive scanning that includes raw nested content (nested tables),
294
- // headers/footers, footnotes, and endnotes — not just in-memory hyperlinks
295
- const referencedIds = this.doc.collectAllReferencedHyperlinkIds();
296
-
297
- // Remove orphaned hyperlink relationships
298
- return this.doc.getRelationshipManager().removeOrphanedHyperlinks(referencedIds);
299
- }
300
-
301
- private removeCustomXML(): number {
302
- const zipHandler = this.doc.getZipHandler();
303
- let removed = 0;
304
-
305
- // Remove customXML files
306
- const files = zipHandler.getFilePaths();
307
- for (const file of files) {
308
- if (file.startsWith('customXml/') || file.startsWith('customXML/')) {
309
- zipHandler.removeFile(file);
310
- removed++;
311
- }
312
- }
313
-
314
- // Remove customXML relationships
315
- const relManager = this.doc.getRelationshipManager();
316
- const customRels = relManager.getRelationshipsByType(
317
- 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/customXml'
318
- );
319
- for (const rel of customRels) {
320
- relManager.removeRelationship(rel.getId());
321
- removed++;
322
- }
323
-
324
- // Remove custom.xml if present (docProps/custom.xml)
325
- if (zipHandler.hasFile('docProps/custom.xml')) {
326
- zipHandler.removeFile('docProps/custom.xml');
327
- removed++;
328
- }
329
-
330
- return removed;
331
- }
332
-
333
- private unlockFields(): number {
334
- const zipHandler = this.doc.getZipHandler();
335
- const docXml = zipHandler.getFileAsString('word/document.xml');
336
- if (!docXml) return 0;
337
-
338
- // Count matches first, then replace (avoid regex re-execution)
339
- const pattern = /w:fldLock="(1|true)"/g;
340
- const matches = docXml.match(pattern) || [];
341
- if (matches.length === 0) return 0;
342
-
343
- // Remove w:fldLock="1" or w:fldLock="true"
344
- const updatedXml = docXml.replace(pattern, '');
345
- zipHandler.updateFile('word/document.xml', updatedXml);
346
-
347
- return matches.length;
348
- }
349
-
350
- private unlockFrames(): number {
351
- const zipHandler = this.doc.getZipHandler();
352
- const docXml = zipHandler.getFileAsString('word/document.xml');
353
- if (!docXml) return 0;
354
-
355
- // Count matches first, then replace (avoid regex re-execution)
356
- const pattern = /w:anchorLock="(1|true)"/g;
357
- const matches = docXml.match(pattern) || [];
358
- if (matches.length === 0) return 0;
359
-
360
- // Remove w:anchorLock="1" or w:anchorLock="true"
361
- const updatedXml = docXml.replace(pattern, '');
362
- zipHandler.updateFile('word/document.xml', updatedXml);
363
-
364
- return matches.length;
365
- }
366
-
367
- private sanitizeTables(): number {
368
- const tables = this.doc.getAllTables();
369
- let processed = 0;
370
- for (const table of tables) {
371
- for (const row of table.getRows()) {
372
- const exceptions = row.getTablePropertyExceptions();
373
- if (exceptions && Object.keys(exceptions).length > 0) {
374
- row.setTablePropertyExceptions(undefined as any);
375
- }
376
- }
377
- processed++;
378
- }
379
- return processed;
380
- }
381
-
382
- private formatInternalHyperlinks(): number {
383
- let count = 0;
384
- const formatting = {
385
- font: "Verdana",
386
- size: 12,
387
- color: "0000FF",
388
- underline: "single" as const,
389
- };
390
-
391
- // Process body paragraphs
392
- for (const paragraph of this.doc.getAllParagraphs()) {
393
- for (const item of paragraph.getContent()) {
394
- if (item instanceof Hyperlink && item.isInternal()) {
395
- item.setFormatting(formatting, { replace: true });
396
- count++;
397
- }
398
- }
399
- }
400
-
401
- // Process table paragraphs
402
- for (const table of this.doc.getAllTables()) {
403
- for (const row of table.getRows()) {
404
- for (const cell of row.getCells()) {
405
- for (const para of cell.getParagraphs()) {
406
- for (const item of para.getContent()) {
407
- if (item instanceof Hyperlink && item.isInternal()) {
408
- item.setFormatting(formatting, { replace: true });
409
- count++;
410
- }
411
- }
412
- }
413
- }
414
- }
415
- }
416
-
417
- return count;
418
- }
419
-
420
- /**
421
- * Formats ALL hyperlinks in the document with standard styling
422
- * This includes:
423
- * - Internal w:hyperlink elements (bookmarks)
424
- * - External w:hyperlink elements (URLs)
425
- * - HYPERLINK fields (both simple w:fldSimple and complex fields)
426
- *
427
- * Standard formatting: Verdana 12pt, #0000FF blue, single underline
428
- * @returns Number of hyperlinks formatted
429
- */
430
- private formatAllHyperlinks(): number {
431
- let count = 0;
432
- const formatting = {
433
- font: "Verdana",
434
- size: 12,
435
- color: "0000FF",
436
- underline: "single" as const,
437
- };
438
-
439
- // Helper to process paragraph content
440
- const processParagraph = (paragraph: any): void => {
441
- for (const item of paragraph.getContent()) {
442
- // Process all Hyperlink instances (both internal AND external)
443
- if (item instanceof Hyperlink) {
444
- item.setFormatting(formatting, { replace: true });
445
- count++;
446
- }
447
- // Process simple HYPERLINK fields
448
- if (item instanceof Field && item.isHyperlinkField()) {
449
- item.setFormatting(formatting);
450
- count++;
451
- }
452
- // Process complex HYPERLINK fields
453
- if (item instanceof ComplexField && item.isHyperlinkField()) {
454
- item.setResultFormatting(formatting);
455
- count++;
456
- }
457
- }
458
- };
459
-
460
- // Process body paragraphs
461
- for (const paragraph of this.doc.getAllParagraphs()) {
462
- processParagraph(paragraph);
463
- }
464
-
465
- // Process table paragraphs
466
- for (const table of this.doc.getAllTables()) {
467
- for (const row of table.getRows()) {
468
- for (const cell of row.getCells()) {
469
- for (const para of cell.getParagraphs()) {
470
- processParagraph(para);
471
- }
472
- }
473
- }
474
- }
475
-
476
- return count;
477
- }
478
-
479
- /**
480
- * Preset: Google Docs cleanup
481
- */
482
- static googleDocsPreset(): CleanupOptions {
483
- return {
484
- unlockSDTs: true,
485
- removeSDTs: true,
486
- defragmentHyperlinks: true,
487
- resetHyperlinkFormatting: true,
488
- cleanupRelationships: true,
489
- removeCustomXML: true,
490
- sanitizeTables: true,
491
- };
492
- }
493
-
494
- /**
495
- * Preset: Full cleanup
496
- */
497
- static fullCleanupPreset(): CleanupOptions {
498
- return {
499
- unlockSDTs: true,
500
- removeSDTs: true,
501
- clearPreserveFlags: true,
502
- defragmentHyperlinks: true,
503
- resetHyperlinkFormatting: true,
504
- cleanupNumbering: true,
505
- cleanupStyles: true,
506
- cleanupRelationships: true,
507
- removeCustomXML: true,
508
- unlockFields: true,
509
- unlockFrames: true,
510
- sanitizeTables: true,
511
- formatAllHyperlinks: true,
512
- };
513
- }
514
-
515
- /**
516
- * Preset: Minimal cleanup
517
- */
518
- static minimalPreset(): CleanupOptions {
519
- return {
520
- cleanupRelationships: true,
521
- removeCustomXML: true,
522
- };
523
- }
524
- }
1
+ /**
2
+ * CleanupHelper - Comprehensive document cleanup utilities
3
+ *
4
+ * Provides methods to clean up common issues in DOCX documents, including:
5
+ * - Unlocking and removing SDTs
6
+ * - Clearing preserve flags
7
+ * - Defragmenting hyperlinks
8
+ * - Cleaning unused elements
9
+ * - Removing customXML
10
+ * - Unlocking fields and frames
11
+ * - Sanitizing tables
12
+ *
13
+ * Usage:
14
+ * const cleanup = new CleanupHelper(doc);
15
+ * cleanup.all(); // Run all cleanups
16
+ */
17
+
18
+ import type { Document } from '../core/Document';
19
+ import { Field, ComplexField } from '../elements/Field';
20
+ import { Hyperlink } from '../elements/Hyperlink';
21
+ import { Paragraph } from '../elements/Paragraph';
22
+ import { Table } from '../elements/Table';
23
+ import { StructuredDocumentTag } from '../elements/StructuredDocumentTag';
24
+
25
+ export interface CleanupOptions {
26
+ /** Unlock all SDTs to enable editing */
27
+ unlockSDTs?: boolean;
28
+ /** Remove all SDTs (unwrap content) */
29
+ removeSDTs?: boolean;
30
+ /** Clear paragraph preserve flags */
31
+ clearPreserveFlags?: boolean;
32
+ /** Merge fragmented hyperlinks */
33
+ defragmentHyperlinks?: boolean;
34
+ /** Reset hyperlink formatting to standard */
35
+ resetHyperlinkFormatting?: boolean;
36
+ /** Remove unused numbering definitions */
37
+ cleanupNumbering?: boolean;
38
+ /** Remove unused styles */
39
+ cleanupStyles?: boolean;
40
+ /** Remove orphaned relationships */
41
+ cleanupRelationships?: boolean;
42
+ /** Remove customXML elements */
43
+ removeCustomXML?: boolean;
44
+ /** Unlock field locks (enable field updates) */
45
+ unlockFields?: boolean;
46
+ /** Remove frame/text box locks */
47
+ unlockFrames?: boolean;
48
+ /** Sanitize table property exceptions (tblPrEx) */
49
+ sanitizeTables?: boolean;
50
+ /** Format internal anchor hyperlinks with standard styling (Verdana 12pt blue underlined) */
51
+ formatInternalHyperlinks?: boolean;
52
+ /** Format ALL hyperlinks (internal, external, and HYPERLINK fields) with standard styling (Verdana 12pt #0000FF underlined) */
53
+ formatAllHyperlinks?: boolean;
54
+ }
55
+
56
+ export interface CleanupReport {
57
+ sdtsUnlocked: number;
58
+ sdtsRemoved: number;
59
+ preserveFlagsCleared: number;
60
+ hyperlinksDefragmented: number;
61
+ numberingRemoved: number;
62
+ stylesRemoved: number;
63
+ relationshipsRemoved: number;
64
+ customXMLRemoved: number;
65
+ fieldsUnlocked: number;
66
+ framesUnlocked: number;
67
+ tablesProcessed: number;
68
+ internalHyperlinksFormatted: number;
69
+ allHyperlinksFormatted: number;
70
+ warnings: string[];
71
+ }
72
+
73
+ export class CleanupHelper {
74
+ private doc: Document;
75
+
76
+ constructor(doc: Document) {
77
+ this.doc = doc;
78
+ }
79
+
80
+ /**
81
+ * Run all cleanup operations with default settings
82
+ * @returns Cleanup report
83
+ */
84
+ all(): CleanupReport {
85
+ return this.run({
86
+ unlockSDTs: true,
87
+ removeSDTs: true,
88
+ clearPreserveFlags: true,
89
+ defragmentHyperlinks: true,
90
+ resetHyperlinkFormatting: true,
91
+ cleanupNumbering: true,
92
+ cleanupStyles: true,
93
+ cleanupRelationships: true,
94
+ removeCustomXML: true,
95
+ unlockFields: true,
96
+ unlockFrames: true,
97
+ sanitizeTables: true,
98
+ formatAllHyperlinks: true,
99
+ });
100
+ }
101
+
102
+ /**
103
+ * Run selective cleanup operations
104
+ * @param options Cleanup options
105
+ * @returns Cleanup report
106
+ */
107
+ run(options: CleanupOptions): CleanupReport {
108
+ const report: CleanupReport = {
109
+ sdtsUnlocked: 0,
110
+ sdtsRemoved: 0,
111
+ preserveFlagsCleared: 0,
112
+ hyperlinksDefragmented: 0,
113
+ numberingRemoved: 0,
114
+ stylesRemoved: 0,
115
+ relationshipsRemoved: 0,
116
+ customXMLRemoved: 0,
117
+ fieldsUnlocked: 0,
118
+ framesUnlocked: 0,
119
+ tablesProcessed: 0,
120
+ internalHyperlinksFormatted: 0,
121
+ allHyperlinksFormatted: 0,
122
+ warnings: [],
123
+ };
124
+
125
+ if (options.unlockSDTs) {
126
+ report.sdtsUnlocked = this.unlockSDTs();
127
+ }
128
+
129
+ if (options.removeSDTs) {
130
+ report.sdtsRemoved = this.removeSDTs();
131
+ }
132
+
133
+ if (options.clearPreserveFlags) {
134
+ report.preserveFlagsCleared = this.clearPreserveFlags();
135
+ }
136
+
137
+ if (options.defragmentHyperlinks) {
138
+ report.hyperlinksDefragmented = this.defragmentHyperlinks(
139
+ options.resetHyperlinkFormatting ?? false
140
+ );
141
+ }
142
+
143
+ if (options.cleanupNumbering) {
144
+ report.numberingRemoved = this.cleanupNumbering();
145
+ }
146
+
147
+ if (options.cleanupStyles) {
148
+ report.stylesRemoved = this.cleanupStyles();
149
+ }
150
+
151
+ if (options.cleanupRelationships) {
152
+ report.relationshipsRemoved = this.cleanupRelationships();
153
+ }
154
+
155
+ if (options.removeCustomXML) {
156
+ report.customXMLRemoved = this.removeCustomXML();
157
+ }
158
+
159
+ if (options.unlockFields) {
160
+ report.fieldsUnlocked = this.unlockFields();
161
+ }
162
+
163
+ if (options.unlockFrames) {
164
+ report.framesUnlocked = this.unlockFrames();
165
+ }
166
+
167
+ if (options.sanitizeTables) {
168
+ report.tablesProcessed = this.sanitizeTables();
169
+ }
170
+
171
+ if (options.formatInternalHyperlinks) {
172
+ report.internalHyperlinksFormatted = this.formatInternalHyperlinks();
173
+ }
174
+
175
+ if (options.formatAllHyperlinks) {
176
+ report.allHyperlinksFormatted = this.formatAllHyperlinks();
177
+ }
178
+
179
+ return report;
180
+ }
181
+
182
+ private unlockSDTs(): number {
183
+ let count = 0;
184
+ const bodyElements = this.doc.getBodyElements();
185
+
186
+ for (const element of bodyElements) {
187
+ if (element instanceof StructuredDocumentTag && element.isLocked()) {
188
+ element.unlock();
189
+ count++;
190
+ }
191
+ }
192
+
193
+ // Also unlock in tables
194
+ for (const table of this.doc.getAllTables()) {
195
+ for (const row of table.getRows()) {
196
+ for (const cell of row.getCells()) {
197
+ for (const para of cell.getParagraphs()) {
198
+ // SDTs can wrap paragraphs in cells
199
+ const content = para.getContent();
200
+ for (const item of content) {
201
+ if (item instanceof StructuredDocumentTag && item.isLocked()) {
202
+ item.unlock();
203
+ count++;
204
+ }
205
+ }
206
+ }
207
+ }
208
+ }
209
+ }
210
+
211
+ return count;
212
+ }
213
+
214
+ private removeSDTs(): number {
215
+ // Unwrap SDT wrappers, preserving their content
216
+ const bodyElements = this.doc.getBodyElements();
217
+ type BodyElement = Paragraph | Table | StructuredDocumentTag;
218
+ const unwrapped: BodyElement[] = [];
219
+ let sdtCount = 0;
220
+
221
+ const unwrapSDT = (sdt: StructuredDocumentTag, target: BodyElement[]) => {
222
+ sdtCount++;
223
+ for (const item of sdt.getContent()) {
224
+ if (item instanceof Paragraph || item instanceof Table) {
225
+ target.push(item);
226
+ } else if (item instanceof StructuredDocumentTag) {
227
+ unwrapSDT(item, target);
228
+ }
229
+ }
230
+ };
231
+
232
+ for (const element of bodyElements) {
233
+ if (element instanceof StructuredDocumentTag) {
234
+ unwrapSDT(element, unwrapped);
235
+ } else {
236
+ unwrapped.push(element as BodyElement);
237
+ }
238
+ }
239
+
240
+ this.doc.setBodyElements(unwrapped);
241
+ return sdtCount;
242
+ }
243
+
244
+ private clearPreserveFlags(): number {
245
+ let cleared = 0;
246
+ for (const para of this.doc.getAllParagraphs()) {
247
+ if (para.isPreserved()) {
248
+ para.setPreserved(false);
249
+ cleared++;
250
+ }
251
+ }
252
+ return cleared;
253
+ }
254
+
255
+ private defragmentHyperlinks(resetFormatting: boolean): number {
256
+ return this.doc.defragmentHyperlinks({ resetFormatting, cleanupRelationships: true });
257
+ }
258
+
259
+ private cleanupNumbering(): number {
260
+ const before = this.doc.getNumberingManager().getAllInstances().length;
261
+ this.doc.cleanupUnusedNumbering();
262
+ const after = this.doc.getNumberingManager().getAllInstances().length;
263
+ return before - after;
264
+ }
265
+
266
+ private cleanupStyles(): number {
267
+ // Implementation for unused styles removal
268
+ // Scan all paragraphs and runs for used styles
269
+ const usedStyles = new Set<string>();
270
+ for (const para of this.doc.getAllParagraphs()) {
271
+ const paraStyle = para.getFormatting().style;
272
+ if (paraStyle) usedStyles.add(paraStyle);
273
+ for (const run of para.getRuns()) {
274
+ const runStyle = run.getFormatting().characterStyle;
275
+ if (runStyle) usedStyles.add(runStyle);
276
+ }
277
+ }
278
+
279
+ // Remove unused styles
280
+ let removed = 0;
281
+ const allStyles = this.doc.getStylesManager().getAllStyles();
282
+ for (const style of allStyles) {
283
+ if (!usedStyles.has(style.getStyleId())) {
284
+ this.doc.getStylesManager().removeStyle(style.getStyleId());
285
+ removed++;
286
+ }
287
+ }
288
+
289
+ return removed;
290
+ }
291
+
292
+ private cleanupRelationships(): number {
293
+ // Use comprehensive scanning that includes raw nested content (nested tables),
294
+ // headers/footers, footnotes, and endnotes — not just in-memory hyperlinks
295
+ const referencedIds = this.doc.collectAllReferencedHyperlinkIds();
296
+
297
+ // Remove orphaned hyperlink relationships
298
+ return this.doc.getRelationshipManager().removeOrphanedHyperlinks(referencedIds);
299
+ }
300
+
301
+ private removeCustomXML(): number {
302
+ const zipHandler = this.doc.getZipHandler();
303
+ let removed = 0;
304
+
305
+ // Remove customXML files
306
+ const files = zipHandler.getFilePaths();
307
+ for (const file of files) {
308
+ if (file.startsWith('customXml/') || file.startsWith('customXML/')) {
309
+ zipHandler.removeFile(file);
310
+ removed++;
311
+ }
312
+ }
313
+
314
+ // Remove customXML relationships
315
+ const relManager = this.doc.getRelationshipManager();
316
+ const customRels = relManager.getRelationshipsByType(
317
+ 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/customXml'
318
+ );
319
+ for (const rel of customRels) {
320
+ relManager.removeRelationship(rel.getId());
321
+ removed++;
322
+ }
323
+
324
+ // Remove custom.xml if present (docProps/custom.xml)
325
+ if (zipHandler.hasFile('docProps/custom.xml')) {
326
+ zipHandler.removeFile('docProps/custom.xml');
327
+ removed++;
328
+ }
329
+
330
+ return removed;
331
+ }
332
+
333
+ private unlockFields(): number {
334
+ const zipHandler = this.doc.getZipHandler();
335
+ const docXml = zipHandler.getFileAsString('word/document.xml');
336
+ if (!docXml) return 0;
337
+
338
+ // Count matches first, then replace (avoid regex re-execution)
339
+ const pattern = /w:fldLock="(1|true)"/g;
340
+ const matches = docXml.match(pattern) || [];
341
+ if (matches.length === 0) return 0;
342
+
343
+ // Remove w:fldLock="1" or w:fldLock="true"
344
+ const updatedXml = docXml.replace(pattern, '');
345
+ zipHandler.updateFile('word/document.xml', updatedXml);
346
+
347
+ return matches.length;
348
+ }
349
+
350
+ private unlockFrames(): number {
351
+ const zipHandler = this.doc.getZipHandler();
352
+ const docXml = zipHandler.getFileAsString('word/document.xml');
353
+ if (!docXml) return 0;
354
+
355
+ // Count matches first, then replace (avoid regex re-execution)
356
+ const pattern = /w:anchorLock="(1|true)"/g;
357
+ const matches = docXml.match(pattern) || [];
358
+ if (matches.length === 0) return 0;
359
+
360
+ // Remove w:anchorLock="1" or w:anchorLock="true"
361
+ const updatedXml = docXml.replace(pattern, '');
362
+ zipHandler.updateFile('word/document.xml', updatedXml);
363
+
364
+ return matches.length;
365
+ }
366
+
367
+ private sanitizeTables(): number {
368
+ const tables = this.doc.getAllTables();
369
+ let processed = 0;
370
+ for (const table of tables) {
371
+ for (const row of table.getRows()) {
372
+ const exceptions = row.getTablePropertyExceptions();
373
+ if (exceptions && Object.keys(exceptions).length > 0) {
374
+ row.setTablePropertyExceptions(undefined as any);
375
+ }
376
+ }
377
+ processed++;
378
+ }
379
+ return processed;
380
+ }
381
+
382
+ private formatInternalHyperlinks(): number {
383
+ let count = 0;
384
+ const formatting = {
385
+ font: 'Verdana',
386
+ size: 12,
387
+ color: '0000FF',
388
+ underline: 'single' as const,
389
+ };
390
+
391
+ // Process body paragraphs
392
+ for (const paragraph of this.doc.getAllParagraphs()) {
393
+ for (const item of paragraph.getContent()) {
394
+ if (item instanceof Hyperlink && item.isInternal()) {
395
+ item.setFormatting(formatting, { replace: true });
396
+ count++;
397
+ }
398
+ }
399
+ }
400
+
401
+ // Process table paragraphs
402
+ for (const table of this.doc.getAllTables()) {
403
+ for (const row of table.getRows()) {
404
+ for (const cell of row.getCells()) {
405
+ for (const para of cell.getParagraphs()) {
406
+ for (const item of para.getContent()) {
407
+ if (item instanceof Hyperlink && item.isInternal()) {
408
+ item.setFormatting(formatting, { replace: true });
409
+ count++;
410
+ }
411
+ }
412
+ }
413
+ }
414
+ }
415
+ }
416
+
417
+ return count;
418
+ }
419
+
420
+ /**
421
+ * Formats ALL hyperlinks in the document with standard styling
422
+ * This includes:
423
+ * - Internal w:hyperlink elements (bookmarks)
424
+ * - External w:hyperlink elements (URLs)
425
+ * - HYPERLINK fields (both simple w:fldSimple and complex fields)
426
+ *
427
+ * Standard formatting: Verdana 12pt, #0000FF blue, single underline
428
+ * @returns Number of hyperlinks formatted
429
+ */
430
+ private formatAllHyperlinks(): number {
431
+ let count = 0;
432
+ const formatting = {
433
+ font: 'Verdana',
434
+ size: 12,
435
+ color: '0000FF',
436
+ underline: 'single' as const,
437
+ };
438
+
439
+ // Helper to process paragraph content
440
+ const processParagraph = (paragraph: any): void => {
441
+ for (const item of paragraph.getContent()) {
442
+ // Process all Hyperlink instances (both internal AND external)
443
+ if (item instanceof Hyperlink) {
444
+ item.setFormatting(formatting, { replace: true });
445
+ count++;
446
+ }
447
+ // Process simple HYPERLINK fields
448
+ if (item instanceof Field && item.isHyperlinkField()) {
449
+ item.setFormatting(formatting);
450
+ count++;
451
+ }
452
+ // Process complex HYPERLINK fields
453
+ if (item instanceof ComplexField && item.isHyperlinkField()) {
454
+ item.setResultFormatting(formatting);
455
+ count++;
456
+ }
457
+ }
458
+ };
459
+
460
+ // Process body paragraphs
461
+ for (const paragraph of this.doc.getAllParagraphs()) {
462
+ processParagraph(paragraph);
463
+ }
464
+
465
+ // Process table paragraphs
466
+ for (const table of this.doc.getAllTables()) {
467
+ for (const row of table.getRows()) {
468
+ for (const cell of row.getCells()) {
469
+ for (const para of cell.getParagraphs()) {
470
+ processParagraph(para);
471
+ }
472
+ }
473
+ }
474
+ }
475
+
476
+ return count;
477
+ }
478
+
479
+ /**
480
+ * Preset: Google Docs cleanup
481
+ */
482
+ static googleDocsPreset(): CleanupOptions {
483
+ return {
484
+ unlockSDTs: true,
485
+ removeSDTs: true,
486
+ defragmentHyperlinks: true,
487
+ resetHyperlinkFormatting: true,
488
+ cleanupRelationships: true,
489
+ removeCustomXML: true,
490
+ sanitizeTables: true,
491
+ };
492
+ }
493
+
494
+ /**
495
+ * Preset: Full cleanup
496
+ */
497
+ static fullCleanupPreset(): CleanupOptions {
498
+ return {
499
+ unlockSDTs: true,
500
+ removeSDTs: true,
501
+ clearPreserveFlags: true,
502
+ defragmentHyperlinks: true,
503
+ resetHyperlinkFormatting: true,
504
+ cleanupNumbering: true,
505
+ cleanupStyles: true,
506
+ cleanupRelationships: true,
507
+ removeCustomXML: true,
508
+ unlockFields: true,
509
+ unlockFrames: true,
510
+ sanitizeTables: true,
511
+ formatAllHyperlinks: true,
512
+ };
513
+ }
514
+
515
+ /**
516
+ * Preset: Minimal cleanup
517
+ */
518
+ static minimalPreset(): CleanupOptions {
519
+ return {
520
+ cleanupRelationships: true,
521
+ removeCustomXML: true,
522
+ };
523
+ }
524
+ }