@cj-tech-master/excelts 9.5.4 → 9.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (767) hide show
  1. package/dist/browser/modules/archive/compression/streaming-compress.browser.js +29 -0
  2. package/dist/browser/modules/archive/compression/streaming-compress.js +9 -0
  3. package/dist/browser/modules/archive/compression/worker-pool/pool.browser.js +26 -1
  4. package/dist/browser/modules/archive/fs/archive-file.d.ts +8 -5
  5. package/dist/browser/modules/archive/fs/archive-file.js +78 -16
  6. package/dist/browser/modules/archive/unzip/stream.browser.js +43 -2
  7. package/dist/browser/modules/excel/chart/chart-ex-builder.js +7 -2
  8. package/dist/browser/modules/excel/chart/chart-ex-renderer.js +4 -9
  9. package/dist/browser/modules/excel/chart/chart-ex-types.d.ts +0 -12
  10. package/dist/browser/modules/excel/chart/chart.d.ts +1 -5
  11. package/dist/browser/modules/excel/chart/chart.js +1 -7
  12. package/dist/browser/modules/excel/chart/types.d.ts +0 -6
  13. package/dist/browser/modules/excel/stream/workbook-reader.browser.js +25 -1
  14. package/dist/browser/modules/excel/stream/workbook-reader.js +9 -0
  15. package/dist/browser/modules/excel/stream/workbook-writer.browser.d.ts +40 -0
  16. package/dist/browser/modules/excel/stream/workbook-writer.browser.js +228 -13
  17. package/dist/browser/modules/excel/utils/string-buf.d.ts +5 -26
  18. package/dist/browser/modules/excel/utils/string-buf.js +4 -81
  19. package/dist/browser/modules/excel/workbook.browser.js +135 -25
  20. package/dist/browser/modules/excel/xlsx/xform/chart/chart-space-xform.js +6 -20
  21. package/dist/browser/modules/excel/xlsx/xlsx.browser.d.ts +19 -9
  22. package/dist/browser/modules/excel/xlsx/xlsx.browser.js +32 -8
  23. package/dist/browser/modules/excel/xlsx/xlsx.d.ts +10 -2
  24. package/dist/browser/modules/excel/xlsx/xlsx.js +9 -1
  25. package/dist/browser/modules/pdf/excel-bridge.d.ts +30 -1
  26. package/dist/browser/modules/pdf/excel-bridge.js +32 -0
  27. package/dist/browser/modules/pdf/font/metrics.d.ts +3 -52
  28. package/dist/browser/modules/pdf/font/metrics.js +3 -237
  29. package/dist/browser/modules/pdf/index.d.ts +1 -1
  30. package/dist/browser/modules/pdf/index.js +1 -1
  31. package/dist/browser/modules/pdf/render-layout-to-pdf.d.ts +66 -0
  32. package/dist/browser/modules/pdf/render-layout-to-pdf.js +647 -0
  33. package/dist/browser/modules/pdf/word-bridge.d.ts +80 -12
  34. package/dist/browser/modules/pdf/word-bridge.js +122 -274
  35. package/dist/browser/modules/stream/index.base.d.ts +2 -0
  36. package/dist/browser/modules/stream/index.base.js +2 -1
  37. package/dist/browser/modules/stream/internal/sink-adapter.d.ts +65 -0
  38. package/dist/browser/modules/stream/internal/sink-adapter.js +198 -0
  39. package/dist/browser/modules/stream/pull-stream.d.ts +19 -2
  40. package/dist/browser/modules/stream/pull-stream.js +51 -5
  41. package/dist/browser/modules/stream/types.d.ts +13 -1
  42. package/dist/browser/modules/word/advanced/diff.d.ts +61 -0
  43. package/dist/browser/modules/word/advanced/diff.js +167 -0
  44. package/dist/browser/modules/word/advanced/drawing-shapes.d.ts +269 -0
  45. package/dist/browser/modules/word/advanced/drawing-shapes.js +268 -0
  46. package/dist/browser/modules/word/advanced/field-engine.d.ts +43 -0
  47. package/dist/browser/modules/word/advanced/field-engine.js +1225 -0
  48. package/dist/browser/modules/word/advanced/glossary.d.ts +86 -0
  49. package/dist/browser/modules/word/advanced/glossary.js +79 -0
  50. package/dist/browser/modules/word/advanced/math-convert.d.ts +30 -0
  51. package/dist/browser/modules/word/advanced/math-convert.js +595 -0
  52. package/dist/browser/modules/word/advanced/ole-objects.d.ts +115 -0
  53. package/dist/browser/modules/word/advanced/ole-objects.js +271 -0
  54. package/dist/browser/modules/word/advanced/style-map.d.ts +105 -0
  55. package/dist/browser/modules/word/advanced/style-map.js +322 -0
  56. package/dist/browser/modules/word/advanced/validation.d.ts +56 -0
  57. package/dist/browser/modules/word/advanced/validation.js +1065 -0
  58. package/dist/browser/modules/word/advanced/vba-project.d.ts +91 -0
  59. package/dist/browser/modules/word/advanced/vba-project.js +265 -0
  60. package/dist/browser/modules/word/bridge/excel-bridge.d.ts +127 -0
  61. package/dist/browser/modules/word/bridge/excel-bridge.js +980 -0
  62. package/dist/browser/modules/word/builder/document-handle.d.ts +151 -0
  63. package/dist/browser/modules/word/builder/document-handle.js +664 -0
  64. package/dist/browser/modules/word/builder/paragraph-builders.d.ts +61 -0
  65. package/dist/browser/modules/word/builder/paragraph-builders.js +90 -0
  66. package/dist/browser/modules/word/builder/run-builders.d.ts +374 -0
  67. package/dist/browser/modules/word/builder/run-builders.js +600 -0
  68. package/dist/browser/modules/word/builder/table-builders.d.ts +23 -0
  69. package/dist/browser/modules/word/builder/table-builders.js +45 -0
  70. package/dist/browser/modules/word/constants.d.ts +39 -1
  71. package/dist/browser/modules/word/constants.js +109 -1
  72. package/dist/browser/modules/word/convert/conversion-ir.d.ts +210 -0
  73. package/dist/browser/modules/word/convert/conversion-ir.js +31 -0
  74. package/dist/browser/modules/word/convert/docx-to-semantic.d.ts +39 -0
  75. package/dist/browser/modules/word/convert/docx-to-semantic.js +499 -0
  76. package/dist/browser/modules/word/convert/flat-opc.d.ts +44 -0
  77. package/dist/browser/modules/word/convert/flat-opc.js +385 -0
  78. package/dist/browser/modules/word/convert/html/html-import.d.ts +50 -0
  79. package/dist/browser/modules/word/convert/html/html-import.js +1907 -0
  80. package/dist/{types/modules/word → browser/modules/word/convert/html}/html-renderer.d.ts +14 -1
  81. package/dist/{esm/modules/word → browser/modules/word/convert/html}/html-renderer.js +420 -69
  82. package/dist/browser/modules/word/convert/html/html.d.ts +15 -0
  83. package/dist/browser/modules/word/convert/html/html.js +15 -0
  84. package/dist/browser/modules/word/convert/markdown/markdown-import.d.ts +68 -0
  85. package/dist/browser/modules/word/convert/markdown/markdown-import.js +1325 -0
  86. package/dist/browser/modules/word/convert/markdown/markdown-renderer.d.ts +25 -0
  87. package/dist/browser/modules/word/convert/markdown/markdown-renderer.js +634 -0
  88. package/dist/browser/modules/word/convert/markdown/markdown.d.ts +15 -0
  89. package/dist/browser/modules/word/convert/markdown/markdown.js +15 -0
  90. package/dist/browser/modules/word/convert/odt/odt.d.ts +41 -0
  91. package/dist/browser/modules/word/convert/odt/odt.js +1932 -0
  92. package/dist/browser/modules/word/{color-utils.d.ts → core/color-utils.d.ts} +8 -1
  93. package/dist/browser/modules/word/core/color-utils.js +43 -0
  94. package/dist/browser/modules/word/core/internal-utils.d.ts +90 -0
  95. package/dist/browser/modules/word/core/internal-utils.js +209 -0
  96. package/dist/browser/modules/word/core/mapper.d.ts +44 -0
  97. package/dist/browser/modules/word/core/mapper.js +427 -0
  98. package/dist/browser/modules/word/core/opc-paths.d.ts +33 -0
  99. package/dist/browser/modules/word/core/opc-paths.js +48 -0
  100. package/dist/browser/modules/word/core/text-utils.d.ts +38 -0
  101. package/dist/browser/modules/word/core/text-utils.js +202 -0
  102. package/dist/browser/modules/word/core/walker.d.ts +119 -0
  103. package/dist/browser/modules/word/core/walker.js +570 -0
  104. package/dist/browser/modules/word/crypto.d.ts +14 -9
  105. package/dist/browser/modules/word/crypto.js +13 -7
  106. package/dist/browser/modules/word/document-io.d.ts +59 -27
  107. package/dist/browser/modules/word/document-io.js +80 -197
  108. package/dist/browser/modules/word/errors.d.ts +44 -1
  109. package/dist/browser/modules/word/errors.js +54 -2
  110. package/dist/browser/modules/word/excel.d.ts +14 -0
  111. package/dist/browser/modules/word/excel.js +13 -0
  112. package/dist/browser/modules/word/font/font-embed.d.ts +112 -0
  113. package/dist/browser/modules/word/font/font-embed.js +646 -0
  114. package/dist/{esm/modules/word → browser/modules/word/font}/font-obfuscation.js +4 -9
  115. package/dist/browser/modules/word/font/hyphenation.d.ts +65 -0
  116. package/dist/browser/modules/word/font/hyphenation.js +4210 -0
  117. package/dist/browser/modules/word/font/text-shaping.d.ts +58 -0
  118. package/dist/browser/modules/word/font/text-shaping.js +635 -0
  119. package/dist/browser/modules/word/html.d.ts +7 -6
  120. package/dist/browser/modules/word/html.js +6 -5
  121. package/dist/browser/modules/word/incremental-edit.d.ts +123 -0
  122. package/dist/browser/modules/word/incremental-edit.js +361 -0
  123. package/dist/browser/modules/word/index.base.d.ts +194 -10
  124. package/dist/browser/modules/word/index.base.js +138 -29
  125. package/dist/browser/modules/word/layout/layout-constants.d.ts +17 -0
  126. package/dist/browser/modules/word/layout/layout-constants.js +17 -0
  127. package/dist/browser/modules/word/layout/layout-full.d.ts +53 -0
  128. package/dist/browser/modules/word/layout/layout-full.js +1696 -0
  129. package/dist/browser/modules/word/layout/layout-model.d.ts +344 -0
  130. package/dist/browser/modules/word/layout/layout-model.js +16 -0
  131. package/dist/browser/modules/word/layout/layout.d.ts +63 -0
  132. package/dist/browser/modules/word/layout/layout.js +1167 -0
  133. package/dist/browser/modules/word/layout/render-page.d.ts +57 -0
  134. package/dist/browser/modules/word/layout/render-page.js +1238 -0
  135. package/dist/browser/modules/word/markdown.d.ts +14 -0
  136. package/dist/browser/modules/word/markdown.js +13 -0
  137. package/dist/browser/modules/word/patcher.d.ts +62 -0
  138. package/dist/browser/modules/word/patcher.js +537 -0
  139. package/dist/browser/modules/word/query/compat.d.ts +25 -0
  140. package/dist/browser/modules/word/query/compat.js +58 -0
  141. package/dist/browser/modules/word/query/data-binding.d.ts +22 -0
  142. package/dist/browser/modules/word/query/data-binding.js +392 -0
  143. package/dist/browser/modules/word/query/form-fields.d.ts +41 -0
  144. package/dist/browser/modules/word/query/form-fields.js +268 -0
  145. package/dist/browser/modules/word/query/format-search.d.ts +99 -0
  146. package/dist/browser/modules/word/query/format-search.js +329 -0
  147. package/dist/browser/modules/word/query/mail-merge.d.ts +25 -0
  148. package/dist/browser/modules/word/query/mail-merge.js +111 -0
  149. package/dist/browser/modules/word/query/merge.d.ts +50 -0
  150. package/dist/browser/modules/word/query/merge.js +617 -0
  151. package/dist/browser/modules/word/query/replace.d.ts +47 -0
  152. package/dist/browser/modules/word/query/replace.js +301 -0
  153. package/dist/browser/modules/word/query/revisions.d.ts +67 -0
  154. package/dist/browser/modules/word/query/revisions.js +879 -0
  155. package/dist/browser/modules/word/query/search.d.ts +129 -0
  156. package/dist/browser/modules/word/query/search.js +346 -0
  157. package/dist/browser/modules/word/query/split.d.ts +44 -0
  158. package/dist/browser/modules/word/query/split.js +135 -0
  159. package/dist/browser/modules/word/query/style-resolve.d.ts +104 -0
  160. package/dist/browser/modules/word/query/style-resolve.js +368 -0
  161. package/dist/browser/modules/word/reader/chart-parser.d.ts +20 -0
  162. package/dist/browser/modules/word/reader/chart-parser.js +810 -0
  163. package/dist/browser/modules/word/reader/comments-parser.d.ts +26 -0
  164. package/dist/browser/modules/word/reader/comments-parser.js +92 -0
  165. package/dist/browser/modules/word/reader/doc-props-parsers.d.ts +15 -0
  166. package/dist/browser/modules/word/reader/doc-props-parsers.js +190 -0
  167. package/dist/browser/modules/word/reader/docx-reader.d.ts +27 -0
  168. package/dist/browser/modules/word/reader/docx-reader.js +2557 -0
  169. package/dist/browser/modules/word/reader/drawing-helpers.d.ts +27 -0
  170. package/dist/browser/modules/word/reader/drawing-helpers.js +84 -0
  171. package/dist/browser/modules/word/reader/form-field-parser.d.ts +21 -0
  172. package/dist/browser/modules/word/reader/form-field-parser.js +82 -0
  173. package/dist/browser/modules/word/reader/image-parsers.d.ts +11 -0
  174. package/dist/browser/modules/word/reader/image-parsers.js +291 -0
  175. package/dist/browser/modules/word/reader/math-parser.d.ts +12 -0
  176. package/dist/browser/modules/word/reader/math-parser.js +422 -0
  177. package/dist/browser/modules/word/reader/metadata-parsers.d.ts +17 -0
  178. package/dist/browser/modules/word/reader/metadata-parsers.js +87 -0
  179. package/dist/browser/modules/word/reader/numbering-parser.d.ts +13 -0
  180. package/dist/browser/modules/word/reader/numbering-parser.js +166 -0
  181. package/dist/browser/modules/word/reader/paragraph-section-parsers.d.ts +12 -0
  182. package/dist/browser/modules/word/reader/paragraph-section-parsers.js +503 -0
  183. package/dist/browser/modules/word/reader/parse-utils.d.ts +91 -0
  184. package/dist/browser/modules/word/reader/parse-utils.js +249 -0
  185. package/dist/browser/modules/word/reader/properties-parsers.d.ts +21 -0
  186. package/dist/browser/modules/word/reader/properties-parsers.js +332 -0
  187. package/dist/browser/modules/word/reader/reader-context.d.ts +69 -0
  188. package/dist/browser/modules/word/reader/reader-context.js +61 -0
  189. package/dist/browser/modules/word/reader/sdt-helpers.d.ts +29 -0
  190. package/dist/browser/modules/word/reader/sdt-helpers.js +111 -0
  191. package/dist/browser/modules/word/reader/settings-parser.d.ts +8 -0
  192. package/dist/browser/modules/word/reader/settings-parser.js +263 -0
  193. package/dist/browser/modules/word/reader/styles-parser.d.ts +12 -0
  194. package/dist/browser/modules/word/reader/styles-parser.js +147 -0
  195. package/dist/browser/modules/word/reader/table-properties-parsers.d.ts +12 -0
  196. package/dist/browser/modules/word/reader/table-properties-parsers.js +234 -0
  197. package/dist/browser/modules/word/reader/theme-parser.d.ts +8 -0
  198. package/dist/browser/modules/word/reader/theme-parser.js +167 -0
  199. package/dist/browser/modules/word/reader/watermark-parser.d.ts +15 -0
  200. package/dist/browser/modules/word/reader/watermark-parser.js +110 -0
  201. package/dist/browser/modules/word/security/cfb-reader.d.ts +37 -0
  202. package/dist/browser/modules/word/security/cfb-reader.js +410 -0
  203. package/dist/browser/modules/word/{digital-signatures.d.ts → security/digital-signatures.d.ts} +19 -11
  204. package/dist/browser/modules/word/{digital-signatures.js → security/digital-signatures.js} +34 -34
  205. package/dist/browser/modules/word/security/document-protection.d.ts +93 -0
  206. package/dist/browser/modules/word/security/document-protection.js +201 -0
  207. package/dist/{types/modules/word → browser/modules/word/security}/encryption.d.ts +51 -4
  208. package/dist/browser/modules/word/security/encryption.js +602 -0
  209. package/dist/browser/modules/word/security/policy.d.ts +80 -0
  210. package/dist/browser/modules/word/security/policy.js +102 -0
  211. package/dist/browser/modules/word/template/template-chart.d.ts +56 -0
  212. package/dist/browser/modules/word/template/template-chart.js +167 -0
  213. package/dist/browser/modules/word/template/template-datasource.d.ts +154 -0
  214. package/dist/browser/modules/word/template/template-datasource.js +541 -0
  215. package/dist/browser/modules/word/template/template-engine.d.ts +121 -0
  216. package/dist/browser/modules/word/template/template-engine.js +1435 -0
  217. package/dist/browser/modules/word/types.d.ts +224 -25
  218. package/dist/browser/modules/word/units.d.ts +26 -0
  219. package/dist/browser/modules/word/units.js +43 -14
  220. package/dist/browser/modules/word/{writers → writer}/chart-writer.js +164 -23
  221. package/dist/browser/modules/word/writer/checkbox-writer.d.ts +17 -0
  222. package/dist/browser/modules/word/writer/checkbox-writer.js +79 -0
  223. package/dist/{types/modules/word/writers → browser/modules/word/writer}/comment-writer.d.ts +2 -1
  224. package/dist/browser/modules/word/{writers → writer}/comment-writer.js +8 -6
  225. package/dist/browser/modules/word/writer/common-parts.d.ts +57 -0
  226. package/dist/browser/modules/word/writer/common-parts.js +101 -0
  227. package/dist/{types/modules/word → browser/modules/word/writer}/content-types.d.ts +2 -2
  228. package/dist/{esm/modules/word → browser/modules/word/writer}/content-types.js +14 -6
  229. package/dist/browser/modules/word/writer/document-writer.d.ts +24 -0
  230. package/dist/browser/modules/word/writer/document-writer.js +473 -0
  231. package/dist/browser/modules/word/writer/docx-packager.d.ts +35 -0
  232. package/dist/browser/modules/word/writer/docx-packager.js +1515 -0
  233. package/dist/{types/modules/word/writers → browser/modules/word/writer}/footnote-writer.d.ts +3 -2
  234. package/dist/{esm/modules/word/writers → browser/modules/word/writer}/footnote-writer.js +13 -10
  235. package/dist/{types/modules/word/writers → browser/modules/word/writer}/header-footer-writer.d.ts +3 -2
  236. package/dist/{esm/modules/word/writers → browser/modules/word/writer}/header-footer-writer.js +39 -21
  237. package/dist/{types/modules/word/writers → browser/modules/word/writer}/image-writer.d.ts +1 -1
  238. package/dist/browser/modules/word/{writers → writer}/image-writer.js +11 -7
  239. package/dist/browser/modules/word/writer/math-writer.d.ts +20 -0
  240. package/dist/{esm/modules/word/writers → browser/modules/word/writer}/math-writer.js +21 -1
  241. package/dist/browser/modules/word/{writers → writer}/numbering-writer.d.ts +1 -1
  242. package/dist/{esm/modules/word/writers → browser/modules/word/writer}/numbering-writer.js +11 -4
  243. package/dist/browser/modules/word/{writers → writer}/paragraph-writer.d.ts +2 -1
  244. package/dist/browser/modules/word/{writers → writer}/paragraph-writer.js +73 -38
  245. package/dist/browser/modules/word/{writers → writer}/parts-writer.d.ts +3 -3
  246. package/dist/{esm/modules/word/writers → browser/modules/word/writer}/parts-writer.js +91 -12
  247. package/dist/browser/modules/word/writer/reference-scanners.d.ts +42 -0
  248. package/dist/browser/modules/word/writer/reference-scanners.js +111 -0
  249. package/dist/browser/modules/word/writer/relationships.d.ts +52 -0
  250. package/dist/browser/modules/word/writer/relationships.js +117 -0
  251. package/dist/browser/modules/word/writer/render-context.d.ts +124 -0
  252. package/dist/browser/modules/word/writer/render-context.js +46 -0
  253. package/dist/browser/modules/word/{writers → writer}/run-writer.d.ts +10 -1
  254. package/dist/{esm/modules/word/writers → browser/modules/word/writer}/run-writer.js +126 -24
  255. package/dist/browser/modules/word/writer/sdt-writer.d.ts +25 -0
  256. package/dist/browser/modules/word/writer/sdt-writer.js +189 -0
  257. package/dist/browser/modules/word/writer/stream-buf.d.ts +37 -0
  258. package/dist/browser/modules/word/writer/stream-buf.js +73 -0
  259. package/dist/browser/modules/word/writer/streaming-writer.d.ts +344 -0
  260. package/dist/browser/modules/word/writer/streaming-writer.js +1382 -0
  261. package/dist/browser/modules/word/writer/string-buf.d.ts +8 -0
  262. package/dist/browser/modules/word/writer/string-buf.js +7 -0
  263. package/dist/browser/modules/word/{writers → writer}/styles-writer.js +32 -1
  264. package/dist/browser/modules/word/{writers → writer}/table-writer.d.ts +2 -1
  265. package/dist/browser/modules/word/{writers → writer}/table-writer.js +94 -11
  266. package/dist/browser/modules/xml/types.d.ts +22 -0
  267. package/dist/browser/utils/crypto.browser.d.ts +3 -1
  268. package/dist/browser/utils/crypto.browser.js +3 -1
  269. package/dist/browser/utils/crypto.d.ts +4 -1
  270. package/dist/browser/utils/crypto.js +4 -1
  271. package/dist/browser/utils/font-metrics.d.ts +63 -0
  272. package/dist/browser/utils/font-metrics.js +293 -0
  273. package/dist/browser/utils/string-buf.d.ts +42 -0
  274. package/dist/browser/utils/string-buf.js +89 -0
  275. package/dist/browser/utils/theme-colors.d.ts +55 -0
  276. package/dist/browser/utils/theme-colors.js +120 -0
  277. package/dist/cjs/modules/archive/compression/streaming-compress.browser.js +29 -0
  278. package/dist/cjs/modules/archive/compression/streaming-compress.js +9 -0
  279. package/dist/cjs/modules/archive/compression/worker-pool/pool.browser.js +26 -1
  280. package/dist/cjs/modules/archive/fs/archive-file.js +78 -16
  281. package/dist/cjs/modules/archive/unzip/stream.browser.js +43 -2
  282. package/dist/cjs/modules/excel/chart/chart-ex-builder.js +7 -2
  283. package/dist/cjs/modules/excel/chart/chart-ex-renderer.js +4 -9
  284. package/dist/cjs/modules/excel/chart/chart.js +1 -7
  285. package/dist/cjs/modules/excel/stream/workbook-reader.browser.js +25 -1
  286. package/dist/cjs/modules/excel/stream/workbook-reader.js +9 -0
  287. package/dist/cjs/modules/excel/stream/workbook-writer.browser.js +228 -13
  288. package/dist/cjs/modules/excel/utils/string-buf.js +5 -81
  289. package/dist/cjs/modules/excel/workbook.browser.js +135 -25
  290. package/dist/cjs/modules/excel/xlsx/xform/chart/chart-space-xform.js +6 -20
  291. package/dist/cjs/modules/excel/xlsx/xlsx.browser.js +32 -8
  292. package/dist/cjs/modules/excel/xlsx/xlsx.js +9 -1
  293. package/dist/cjs/modules/pdf/excel-bridge.js +33 -0
  294. package/dist/cjs/modules/pdf/font/metrics.js +11 -244
  295. package/dist/cjs/modules/pdf/index.js +2 -1
  296. package/dist/cjs/modules/pdf/render-layout-to-pdf.js +651 -0
  297. package/dist/cjs/modules/pdf/word-bridge.js +155 -274
  298. package/dist/cjs/modules/stream/index.base.js +4 -2
  299. package/dist/cjs/modules/stream/internal/sink-adapter.js +202 -0
  300. package/dist/cjs/modules/stream/pull-stream.js +51 -5
  301. package/dist/cjs/modules/word/advanced/diff.js +170 -0
  302. package/dist/cjs/modules/word/advanced/drawing-shapes.js +279 -0
  303. package/dist/cjs/modules/word/advanced/field-engine.js +1229 -0
  304. package/dist/cjs/modules/word/advanced/glossary.js +87 -0
  305. package/dist/cjs/modules/word/advanced/math-convert.js +599 -0
  306. package/dist/cjs/modules/word/advanced/ole-objects.js +277 -0
  307. package/dist/cjs/modules/word/advanced/style-map.js +329 -0
  308. package/dist/cjs/modules/word/advanced/validation.js +1068 -0
  309. package/dist/cjs/modules/word/advanced/vba-project.js +274 -0
  310. package/dist/cjs/modules/word/bridge/excel-bridge.js +1020 -0
  311. package/dist/cjs/modules/word/builder/document-handle.js +667 -0
  312. package/dist/cjs/modules/word/builder/paragraph-builders.js +109 -0
  313. package/dist/cjs/modules/word/builder/run-builders.js +676 -0
  314. package/dist/cjs/modules/word/builder/table-builders.js +53 -0
  315. package/dist/cjs/modules/word/constants.js +111 -2
  316. package/dist/cjs/modules/word/convert/conversion-ir.js +34 -0
  317. package/dist/cjs/modules/word/convert/docx-to-semantic.js +502 -0
  318. package/dist/cjs/modules/word/convert/flat-opc.js +390 -0
  319. package/dist/cjs/modules/word/convert/html/html-import.js +1910 -0
  320. package/dist/cjs/modules/word/{html-renderer.js → convert/html/html-renderer.js} +420 -69
  321. package/dist/cjs/modules/word/convert/html/html.js +20 -0
  322. package/dist/cjs/modules/word/convert/markdown/markdown-import.js +1329 -0
  323. package/dist/cjs/modules/word/convert/markdown/markdown-renderer.js +637 -0
  324. package/dist/cjs/modules/word/convert/markdown/markdown.js +21 -0
  325. package/dist/cjs/modules/word/convert/odt/odt.js +1936 -0
  326. package/dist/cjs/modules/word/core/color-utils.js +47 -0
  327. package/dist/cjs/modules/word/core/internal-utils.js +219 -0
  328. package/dist/cjs/modules/word/core/mapper.js +430 -0
  329. package/dist/cjs/modules/word/core/opc-paths.js +53 -0
  330. package/dist/cjs/modules/word/core/text-utils.js +210 -0
  331. package/dist/cjs/modules/word/core/walker.js +577 -0
  332. package/dist/cjs/modules/word/crypto.js +19 -8
  333. package/dist/cjs/modules/word/document-io.js +117 -197
  334. package/dist/cjs/modules/word/errors.js +59 -13
  335. package/dist/cjs/modules/word/excel.js +22 -0
  336. package/dist/cjs/modules/word/font/font-embed.js +652 -0
  337. package/dist/cjs/modules/word/{font-obfuscation.js → font/font-obfuscation.js} +4 -9
  338. package/dist/cjs/modules/word/font/hyphenation.js +4216 -0
  339. package/dist/cjs/modules/word/font/text-shaping.js +640 -0
  340. package/dist/cjs/modules/word/html.js +9 -7
  341. package/dist/cjs/modules/word/incremental-edit.js +366 -0
  342. package/dist/cjs/modules/word/index.base.js +370 -137
  343. package/dist/cjs/modules/word/layout/layout-constants.js +20 -0
  344. package/dist/cjs/modules/word/layout/layout-full.js +1699 -0
  345. package/dist/cjs/modules/word/layout/layout-model.js +17 -0
  346. package/dist/cjs/modules/word/layout/layout.js +1170 -0
  347. package/dist/cjs/modules/word/layout/render-page.js +1243 -0
  348. package/dist/cjs/modules/word/markdown.js +19 -0
  349. package/dist/cjs/modules/word/patcher.js +539 -0
  350. package/dist/cjs/modules/word/query/compat.js +61 -0
  351. package/dist/cjs/modules/word/query/data-binding.js +395 -0
  352. package/dist/cjs/modules/word/query/form-fields.js +272 -0
  353. package/dist/cjs/modules/word/query/format-search.js +334 -0
  354. package/dist/cjs/modules/word/query/mail-merge.js +114 -0
  355. package/dist/cjs/modules/word/query/merge.js +620 -0
  356. package/dist/cjs/modules/word/query/replace.js +304 -0
  357. package/dist/cjs/modules/word/query/revisions.js +885 -0
  358. package/dist/cjs/modules/word/query/search.js +361 -0
  359. package/dist/cjs/modules/word/query/split.js +138 -0
  360. package/dist/cjs/modules/word/query/style-resolve.js +374 -0
  361. package/dist/cjs/modules/word/reader/chart-parser.js +814 -0
  362. package/dist/cjs/modules/word/reader/comments-parser.js +96 -0
  363. package/dist/cjs/modules/word/reader/doc-props-parsers.js +194 -0
  364. package/dist/cjs/modules/word/reader/docx-reader.js +2560 -0
  365. package/dist/cjs/modules/word/reader/drawing-helpers.js +90 -0
  366. package/dist/cjs/modules/word/reader/form-field-parser.js +85 -0
  367. package/dist/cjs/modules/word/reader/image-parsers.js +293 -0
  368. package/dist/cjs/modules/word/reader/math-parser.js +424 -0
  369. package/dist/cjs/modules/word/reader/metadata-parsers.js +93 -0
  370. package/dist/cjs/modules/word/reader/numbering-parser.js +168 -0
  371. package/dist/cjs/modules/word/reader/paragraph-section-parsers.js +505 -0
  372. package/dist/cjs/modules/word/reader/parse-utils.js +271 -0
  373. package/dist/cjs/modules/word/reader/properties-parsers.js +338 -0
  374. package/dist/cjs/modules/word/reader/reader-context.js +66 -0
  375. package/dist/cjs/modules/word/reader/sdt-helpers.js +114 -0
  376. package/dist/cjs/modules/word/reader/settings-parser.js +265 -0
  377. package/dist/cjs/modules/word/reader/styles-parser.js +149 -0
  378. package/dist/cjs/modules/word/reader/table-properties-parsers.js +237 -0
  379. package/dist/cjs/modules/word/reader/theme-parser.js +169 -0
  380. package/dist/cjs/modules/word/reader/watermark-parser.js +113 -0
  381. package/dist/cjs/modules/word/security/cfb-reader.js +414 -0
  382. package/dist/cjs/modules/word/{digital-signatures.js → security/digital-signatures.js} +34 -34
  383. package/dist/cjs/modules/word/security/document-protection.js +208 -0
  384. package/dist/cjs/modules/word/security/encryption.js +612 -0
  385. package/dist/cjs/modules/word/security/policy.js +106 -0
  386. package/dist/cjs/modules/word/template/template-chart.js +170 -0
  387. package/dist/cjs/modules/word/template/template-datasource.js +549 -0
  388. package/dist/cjs/modules/word/template/template-engine.js +1430 -0
  389. package/dist/cjs/modules/word/units.js +44 -14
  390. package/dist/cjs/modules/word/{writers → writer}/chart-writer.js +163 -22
  391. package/dist/cjs/modules/word/writer/checkbox-writer.js +82 -0
  392. package/dist/cjs/modules/word/{writers → writer}/comment-writer.js +8 -6
  393. package/dist/cjs/modules/word/writer/common-parts.js +104 -0
  394. package/dist/cjs/modules/word/{content-types.js → writer/content-types.js} +14 -6
  395. package/dist/cjs/modules/word/writer/document-writer.js +478 -0
  396. package/dist/cjs/modules/word/writer/docx-packager.js +1551 -0
  397. package/dist/cjs/modules/word/{writers → writer}/footnote-writer.js +13 -10
  398. package/dist/cjs/modules/word/{writers → writer}/header-footer-writer.js +38 -20
  399. package/dist/cjs/modules/word/{writers → writer}/image-writer.js +11 -7
  400. package/dist/cjs/modules/word/{writers → writer}/math-writer.js +21 -1
  401. package/dist/cjs/modules/word/{writers → writer}/numbering-writer.js +11 -4
  402. package/dist/cjs/modules/word/{writers → writer}/paragraph-writer.js +72 -37
  403. package/dist/cjs/modules/word/{writers → writer}/parts-writer.js +91 -12
  404. package/dist/cjs/modules/word/writer/reference-scanners.js +120 -0
  405. package/dist/cjs/modules/word/writer/relationships.js +124 -0
  406. package/dist/cjs/modules/word/writer/render-context.js +51 -0
  407. package/dist/cjs/modules/word/{writers → writer}/run-writer.js +127 -24
  408. package/dist/cjs/modules/word/writer/sdt-writer.js +192 -0
  409. package/dist/cjs/modules/word/writer/stream-buf.js +76 -0
  410. package/dist/cjs/modules/word/writer/streaming-writer.js +1387 -0
  411. package/dist/cjs/modules/word/writer/string-buf.js +11 -0
  412. package/dist/cjs/modules/word/{writers → writer}/styles-writer.js +32 -1
  413. package/dist/cjs/modules/word/{writers → writer}/table-writer.js +94 -11
  414. package/dist/cjs/utils/crypto.browser.js +3 -1
  415. package/dist/cjs/utils/crypto.js +4 -1
  416. package/dist/cjs/utils/font-metrics.js +303 -0
  417. package/dist/cjs/utils/string-buf.js +92 -0
  418. package/dist/cjs/utils/theme-colors.js +126 -0
  419. package/dist/esm/modules/archive/compression/streaming-compress.browser.js +29 -0
  420. package/dist/esm/modules/archive/compression/streaming-compress.js +9 -0
  421. package/dist/esm/modules/archive/compression/worker-pool/pool.browser.js +26 -1
  422. package/dist/esm/modules/archive/fs/archive-file.js +78 -16
  423. package/dist/esm/modules/archive/unzip/stream.browser.js +43 -2
  424. package/dist/esm/modules/excel/chart/chart-ex-builder.js +7 -2
  425. package/dist/esm/modules/excel/chart/chart-ex-renderer.js +4 -9
  426. package/dist/esm/modules/excel/chart/chart.js +1 -7
  427. package/dist/esm/modules/excel/stream/workbook-reader.browser.js +25 -1
  428. package/dist/esm/modules/excel/stream/workbook-reader.js +9 -0
  429. package/dist/esm/modules/excel/stream/workbook-writer.browser.js +228 -13
  430. package/dist/esm/modules/excel/utils/string-buf.js +4 -81
  431. package/dist/esm/modules/excel/workbook.browser.js +135 -25
  432. package/dist/esm/modules/excel/xlsx/xform/chart/chart-space-xform.js +6 -20
  433. package/dist/esm/modules/excel/xlsx/xlsx.browser.js +32 -8
  434. package/dist/esm/modules/excel/xlsx/xlsx.js +9 -1
  435. package/dist/esm/modules/pdf/excel-bridge.js +32 -0
  436. package/dist/esm/modules/pdf/font/metrics.js +3 -237
  437. package/dist/esm/modules/pdf/index.js +1 -1
  438. package/dist/esm/modules/pdf/render-layout-to-pdf.js +647 -0
  439. package/dist/esm/modules/pdf/word-bridge.js +122 -274
  440. package/dist/esm/modules/stream/index.base.js +2 -1
  441. package/dist/esm/modules/stream/internal/sink-adapter.js +198 -0
  442. package/dist/esm/modules/stream/pull-stream.js +51 -5
  443. package/dist/esm/modules/word/advanced/diff.js +167 -0
  444. package/dist/esm/modules/word/advanced/drawing-shapes.js +268 -0
  445. package/dist/esm/modules/word/advanced/field-engine.js +1225 -0
  446. package/dist/esm/modules/word/advanced/glossary.js +79 -0
  447. package/dist/esm/modules/word/advanced/math-convert.js +595 -0
  448. package/dist/esm/modules/word/advanced/ole-objects.js +271 -0
  449. package/dist/esm/modules/word/advanced/style-map.js +322 -0
  450. package/dist/esm/modules/word/advanced/validation.js +1065 -0
  451. package/dist/esm/modules/word/advanced/vba-project.js +265 -0
  452. package/dist/esm/modules/word/bridge/excel-bridge.js +980 -0
  453. package/dist/esm/modules/word/builder/document-handle.js +664 -0
  454. package/dist/esm/modules/word/builder/paragraph-builders.js +90 -0
  455. package/dist/esm/modules/word/builder/run-builders.js +600 -0
  456. package/dist/esm/modules/word/builder/table-builders.js +45 -0
  457. package/dist/esm/modules/word/constants.js +109 -1
  458. package/dist/esm/modules/word/convert/conversion-ir.js +31 -0
  459. package/dist/esm/modules/word/convert/docx-to-semantic.js +499 -0
  460. package/dist/esm/modules/word/convert/flat-opc.js +385 -0
  461. package/dist/esm/modules/word/convert/html/html-import.js +1907 -0
  462. package/dist/{browser/modules/word → esm/modules/word/convert/html}/html-renderer.js +420 -69
  463. package/dist/esm/modules/word/convert/html/html.js +15 -0
  464. package/dist/esm/modules/word/convert/markdown/markdown-import.js +1325 -0
  465. package/dist/esm/modules/word/convert/markdown/markdown-renderer.js +634 -0
  466. package/dist/esm/modules/word/convert/markdown/markdown.js +15 -0
  467. package/dist/esm/modules/word/convert/odt/odt.js +1932 -0
  468. package/dist/esm/modules/word/core/color-utils.js +43 -0
  469. package/dist/esm/modules/word/core/internal-utils.js +209 -0
  470. package/dist/esm/modules/word/core/mapper.js +427 -0
  471. package/dist/esm/modules/word/core/opc-paths.js +48 -0
  472. package/dist/esm/modules/word/core/text-utils.js +202 -0
  473. package/dist/esm/modules/word/core/walker.js +570 -0
  474. package/dist/esm/modules/word/crypto.js +13 -7
  475. package/dist/esm/modules/word/document-io.js +80 -197
  476. package/dist/esm/modules/word/errors.js +54 -2
  477. package/dist/esm/modules/word/excel.js +13 -0
  478. package/dist/esm/modules/word/font/font-embed.js +646 -0
  479. package/dist/{browser/modules/word → esm/modules/word/font}/font-obfuscation.js +4 -9
  480. package/dist/esm/modules/word/font/hyphenation.js +4210 -0
  481. package/dist/esm/modules/word/font/text-shaping.js +635 -0
  482. package/dist/esm/modules/word/html.js +6 -5
  483. package/dist/esm/modules/word/incremental-edit.js +361 -0
  484. package/dist/esm/modules/word/index.base.js +138 -29
  485. package/dist/esm/modules/word/layout/layout-constants.js +17 -0
  486. package/dist/esm/modules/word/layout/layout-full.js +1696 -0
  487. package/dist/esm/modules/word/layout/layout-model.js +16 -0
  488. package/dist/esm/modules/word/layout/layout.js +1167 -0
  489. package/dist/esm/modules/word/layout/render-page.js +1238 -0
  490. package/dist/esm/modules/word/markdown.js +13 -0
  491. package/dist/esm/modules/word/patcher.js +537 -0
  492. package/dist/esm/modules/word/query/compat.js +58 -0
  493. package/dist/esm/modules/word/query/data-binding.js +392 -0
  494. package/dist/esm/modules/word/query/form-fields.js +268 -0
  495. package/dist/esm/modules/word/query/format-search.js +329 -0
  496. package/dist/esm/modules/word/query/mail-merge.js +111 -0
  497. package/dist/esm/modules/word/query/merge.js +617 -0
  498. package/dist/esm/modules/word/query/replace.js +301 -0
  499. package/dist/esm/modules/word/query/revisions.js +879 -0
  500. package/dist/esm/modules/word/query/search.js +346 -0
  501. package/dist/esm/modules/word/query/split.js +135 -0
  502. package/dist/esm/modules/word/query/style-resolve.js +368 -0
  503. package/dist/esm/modules/word/reader/chart-parser.js +810 -0
  504. package/dist/esm/modules/word/reader/comments-parser.js +92 -0
  505. package/dist/esm/modules/word/reader/doc-props-parsers.js +190 -0
  506. package/dist/esm/modules/word/reader/docx-reader.js +2557 -0
  507. package/dist/esm/modules/word/reader/drawing-helpers.js +84 -0
  508. package/dist/esm/modules/word/reader/form-field-parser.js +82 -0
  509. package/dist/esm/modules/word/reader/image-parsers.js +291 -0
  510. package/dist/esm/modules/word/reader/math-parser.js +422 -0
  511. package/dist/esm/modules/word/reader/metadata-parsers.js +87 -0
  512. package/dist/esm/modules/word/reader/numbering-parser.js +166 -0
  513. package/dist/esm/modules/word/reader/paragraph-section-parsers.js +503 -0
  514. package/dist/esm/modules/word/reader/parse-utils.js +249 -0
  515. package/dist/esm/modules/word/reader/properties-parsers.js +332 -0
  516. package/dist/esm/modules/word/reader/reader-context.js +61 -0
  517. package/dist/esm/modules/word/reader/sdt-helpers.js +111 -0
  518. package/dist/esm/modules/word/reader/settings-parser.js +263 -0
  519. package/dist/esm/modules/word/reader/styles-parser.js +147 -0
  520. package/dist/esm/modules/word/reader/table-properties-parsers.js +234 -0
  521. package/dist/esm/modules/word/reader/theme-parser.js +167 -0
  522. package/dist/esm/modules/word/reader/watermark-parser.js +110 -0
  523. package/dist/esm/modules/word/security/cfb-reader.js +410 -0
  524. package/dist/esm/modules/word/{digital-signatures.js → security/digital-signatures.js} +34 -34
  525. package/dist/esm/modules/word/security/document-protection.js +201 -0
  526. package/dist/esm/modules/word/security/encryption.js +602 -0
  527. package/dist/esm/modules/word/security/policy.js +102 -0
  528. package/dist/esm/modules/word/template/template-chart.js +167 -0
  529. package/dist/esm/modules/word/template/template-datasource.js +541 -0
  530. package/dist/esm/modules/word/template/template-engine.js +1435 -0
  531. package/dist/esm/modules/word/units.js +43 -14
  532. package/dist/esm/modules/word/{writers → writer}/chart-writer.js +164 -23
  533. package/dist/esm/modules/word/writer/checkbox-writer.js +79 -0
  534. package/dist/esm/modules/word/{writers → writer}/comment-writer.js +8 -6
  535. package/dist/esm/modules/word/writer/common-parts.js +101 -0
  536. package/dist/{browser/modules/word → esm/modules/word/writer}/content-types.js +14 -6
  537. package/dist/esm/modules/word/writer/document-writer.js +473 -0
  538. package/dist/esm/modules/word/writer/docx-packager.js +1515 -0
  539. package/dist/{browser/modules/word/writers → esm/modules/word/writer}/footnote-writer.js +13 -10
  540. package/dist/{browser/modules/word/writers → esm/modules/word/writer}/header-footer-writer.js +39 -21
  541. package/dist/esm/modules/word/{writers → writer}/image-writer.js +11 -7
  542. package/dist/{browser/modules/word/writers → esm/modules/word/writer}/math-writer.js +21 -1
  543. package/dist/{browser/modules/word/writers → esm/modules/word/writer}/numbering-writer.js +11 -4
  544. package/dist/esm/modules/word/{writers → writer}/paragraph-writer.js +73 -38
  545. package/dist/{browser/modules/word/writers → esm/modules/word/writer}/parts-writer.js +91 -12
  546. package/dist/esm/modules/word/writer/reference-scanners.js +111 -0
  547. package/dist/esm/modules/word/writer/relationships.js +117 -0
  548. package/dist/esm/modules/word/writer/render-context.js +46 -0
  549. package/dist/{browser/modules/word/writers → esm/modules/word/writer}/run-writer.js +126 -24
  550. package/dist/esm/modules/word/writer/sdt-writer.js +189 -0
  551. package/dist/esm/modules/word/writer/stream-buf.js +73 -0
  552. package/dist/esm/modules/word/writer/streaming-writer.js +1382 -0
  553. package/dist/esm/modules/word/writer/string-buf.js +7 -0
  554. package/dist/esm/modules/word/{writers → writer}/styles-writer.js +32 -1
  555. package/dist/esm/modules/word/{writers → writer}/table-writer.js +94 -11
  556. package/dist/esm/utils/crypto.browser.js +3 -1
  557. package/dist/esm/utils/crypto.js +4 -1
  558. package/dist/esm/utils/font-metrics.js +293 -0
  559. package/dist/esm/utils/string-buf.js +89 -0
  560. package/dist/esm/utils/theme-colors.js +120 -0
  561. package/dist/iife/excelts.iife.js +70692 -70337
  562. package/dist/iife/excelts.iife.js.map +1 -1
  563. package/dist/iife/excelts.iife.min.js +57 -57
  564. package/dist/types/modules/archive/fs/archive-file.d.ts +8 -5
  565. package/dist/types/modules/excel/chart/chart-ex-types.d.ts +0 -12
  566. package/dist/types/modules/excel/chart/chart.d.ts +1 -5
  567. package/dist/types/modules/excel/chart/types.d.ts +0 -6
  568. package/dist/types/modules/excel/stream/workbook-writer.browser.d.ts +40 -0
  569. package/dist/types/modules/excel/utils/string-buf.d.ts +5 -26
  570. package/dist/types/modules/excel/xlsx/xlsx.browser.d.ts +19 -9
  571. package/dist/types/modules/excel/xlsx/xlsx.d.ts +10 -2
  572. package/dist/types/modules/pdf/excel-bridge.d.ts +30 -1
  573. package/dist/types/modules/pdf/font/metrics.d.ts +3 -52
  574. package/dist/types/modules/pdf/index.d.ts +1 -1
  575. package/dist/types/modules/pdf/render-layout-to-pdf.d.ts +66 -0
  576. package/dist/types/modules/pdf/word-bridge.d.ts +80 -12
  577. package/dist/types/modules/stream/index.base.d.ts +2 -0
  578. package/dist/types/modules/stream/internal/sink-adapter.d.ts +65 -0
  579. package/dist/types/modules/stream/pull-stream.d.ts +19 -2
  580. package/dist/types/modules/stream/types.d.ts +13 -1
  581. package/dist/types/modules/word/advanced/diff.d.ts +61 -0
  582. package/dist/types/modules/word/advanced/drawing-shapes.d.ts +269 -0
  583. package/dist/types/modules/word/advanced/field-engine.d.ts +43 -0
  584. package/dist/types/modules/word/advanced/glossary.d.ts +86 -0
  585. package/dist/types/modules/word/advanced/math-convert.d.ts +30 -0
  586. package/dist/types/modules/word/advanced/ole-objects.d.ts +115 -0
  587. package/dist/types/modules/word/advanced/style-map.d.ts +105 -0
  588. package/dist/types/modules/word/advanced/validation.d.ts +56 -0
  589. package/dist/types/modules/word/advanced/vba-project.d.ts +91 -0
  590. package/dist/types/modules/word/bridge/excel-bridge.d.ts +127 -0
  591. package/dist/types/modules/word/builder/document-handle.d.ts +151 -0
  592. package/dist/types/modules/word/builder/paragraph-builders.d.ts +61 -0
  593. package/dist/types/modules/word/builder/run-builders.d.ts +374 -0
  594. package/dist/types/modules/word/builder/table-builders.d.ts +23 -0
  595. package/dist/types/modules/word/constants.d.ts +39 -1
  596. package/dist/types/modules/word/convert/conversion-ir.d.ts +210 -0
  597. package/dist/types/modules/word/convert/docx-to-semantic.d.ts +39 -0
  598. package/dist/types/modules/word/convert/flat-opc.d.ts +44 -0
  599. package/dist/types/modules/word/convert/html/html-import.d.ts +50 -0
  600. package/dist/{browser/modules/word → types/modules/word/convert/html}/html-renderer.d.ts +14 -1
  601. package/dist/types/modules/word/convert/html/html.d.ts +15 -0
  602. package/dist/types/modules/word/convert/markdown/markdown-import.d.ts +68 -0
  603. package/dist/types/modules/word/convert/markdown/markdown-renderer.d.ts +25 -0
  604. package/dist/types/modules/word/convert/markdown/markdown.d.ts +15 -0
  605. package/dist/types/modules/word/convert/odt/odt.d.ts +41 -0
  606. package/dist/types/modules/word/{color-utils.d.ts → core/color-utils.d.ts} +8 -1
  607. package/dist/types/modules/word/core/internal-utils.d.ts +90 -0
  608. package/dist/types/modules/word/core/mapper.d.ts +44 -0
  609. package/dist/types/modules/word/core/opc-paths.d.ts +33 -0
  610. package/dist/types/modules/word/core/text-utils.d.ts +38 -0
  611. package/dist/types/modules/word/core/walker.d.ts +119 -0
  612. package/dist/types/modules/word/crypto.d.ts +14 -9
  613. package/dist/types/modules/word/document-io.d.ts +59 -27
  614. package/dist/types/modules/word/errors.d.ts +44 -1
  615. package/dist/types/modules/word/excel.d.ts +14 -0
  616. package/dist/types/modules/word/font/font-embed.d.ts +112 -0
  617. package/dist/types/modules/word/font/hyphenation.d.ts +65 -0
  618. package/dist/types/modules/word/font/text-shaping.d.ts +58 -0
  619. package/dist/types/modules/word/html.d.ts +7 -6
  620. package/dist/types/modules/word/incremental-edit.d.ts +123 -0
  621. package/dist/types/modules/word/index.base.d.ts +194 -10
  622. package/dist/types/modules/word/layout/layout-constants.d.ts +17 -0
  623. package/dist/types/modules/word/layout/layout-full.d.ts +53 -0
  624. package/dist/types/modules/word/layout/layout-model.d.ts +344 -0
  625. package/dist/types/modules/word/layout/layout.d.ts +63 -0
  626. package/dist/types/modules/word/layout/render-page.d.ts +57 -0
  627. package/dist/types/modules/word/markdown.d.ts +14 -0
  628. package/dist/types/modules/word/patcher.d.ts +62 -0
  629. package/dist/types/modules/word/query/compat.d.ts +25 -0
  630. package/dist/types/modules/word/query/data-binding.d.ts +22 -0
  631. package/dist/types/modules/word/query/form-fields.d.ts +41 -0
  632. package/dist/types/modules/word/query/format-search.d.ts +99 -0
  633. package/dist/types/modules/word/query/mail-merge.d.ts +25 -0
  634. package/dist/types/modules/word/query/merge.d.ts +50 -0
  635. package/dist/types/modules/word/query/replace.d.ts +47 -0
  636. package/dist/types/modules/word/query/revisions.d.ts +67 -0
  637. package/dist/types/modules/word/query/search.d.ts +129 -0
  638. package/dist/types/modules/word/query/split.d.ts +44 -0
  639. package/dist/types/modules/word/query/style-resolve.d.ts +104 -0
  640. package/dist/types/modules/word/reader/chart-parser.d.ts +20 -0
  641. package/dist/types/modules/word/reader/comments-parser.d.ts +26 -0
  642. package/dist/types/modules/word/reader/doc-props-parsers.d.ts +15 -0
  643. package/dist/types/modules/word/reader/docx-reader.d.ts +27 -0
  644. package/dist/types/modules/word/reader/drawing-helpers.d.ts +27 -0
  645. package/dist/types/modules/word/reader/form-field-parser.d.ts +21 -0
  646. package/dist/types/modules/word/reader/image-parsers.d.ts +11 -0
  647. package/dist/types/modules/word/reader/math-parser.d.ts +12 -0
  648. package/dist/types/modules/word/reader/metadata-parsers.d.ts +17 -0
  649. package/dist/types/modules/word/reader/numbering-parser.d.ts +13 -0
  650. package/dist/types/modules/word/reader/paragraph-section-parsers.d.ts +12 -0
  651. package/dist/types/modules/word/reader/parse-utils.d.ts +91 -0
  652. package/dist/types/modules/word/reader/properties-parsers.d.ts +21 -0
  653. package/dist/types/modules/word/reader/reader-context.d.ts +69 -0
  654. package/dist/types/modules/word/reader/sdt-helpers.d.ts +29 -0
  655. package/dist/types/modules/word/reader/settings-parser.d.ts +8 -0
  656. package/dist/types/modules/word/reader/styles-parser.d.ts +12 -0
  657. package/dist/types/modules/word/reader/table-properties-parsers.d.ts +12 -0
  658. package/dist/types/modules/word/reader/theme-parser.d.ts +8 -0
  659. package/dist/types/modules/word/reader/watermark-parser.d.ts +15 -0
  660. package/dist/types/modules/word/security/cfb-reader.d.ts +37 -0
  661. package/dist/types/modules/word/{digital-signatures.d.ts → security/digital-signatures.d.ts} +19 -11
  662. package/dist/types/modules/word/security/document-protection.d.ts +93 -0
  663. package/dist/{browser/modules/word → types/modules/word/security}/encryption.d.ts +51 -4
  664. package/dist/types/modules/word/security/policy.d.ts +80 -0
  665. package/dist/types/modules/word/template/template-chart.d.ts +56 -0
  666. package/dist/types/modules/word/template/template-datasource.d.ts +154 -0
  667. package/dist/types/modules/word/template/template-engine.d.ts +121 -0
  668. package/dist/types/modules/word/types.d.ts +224 -25
  669. package/dist/types/modules/word/units.d.ts +26 -0
  670. package/dist/types/modules/word/writer/checkbox-writer.d.ts +17 -0
  671. package/dist/{browser/modules/word/writers → types/modules/word/writer}/comment-writer.d.ts +2 -1
  672. package/dist/types/modules/word/writer/common-parts.d.ts +57 -0
  673. package/dist/{browser/modules/word → types/modules/word/writer}/content-types.d.ts +2 -2
  674. package/dist/types/modules/word/writer/document-writer.d.ts +24 -0
  675. package/dist/types/modules/word/writer/docx-packager.d.ts +35 -0
  676. package/dist/{browser/modules/word/writers → types/modules/word/writer}/footnote-writer.d.ts +3 -2
  677. package/dist/{browser/modules/word/writers → types/modules/word/writer}/header-footer-writer.d.ts +3 -2
  678. package/dist/{browser/modules/word/writers → types/modules/word/writer}/image-writer.d.ts +1 -1
  679. package/dist/types/modules/word/writer/math-writer.d.ts +20 -0
  680. package/dist/types/modules/word/{writers → writer}/numbering-writer.d.ts +1 -1
  681. package/dist/types/modules/word/{writers → writer}/paragraph-writer.d.ts +2 -1
  682. package/dist/types/modules/word/{writers → writer}/parts-writer.d.ts +3 -3
  683. package/dist/types/modules/word/writer/reference-scanners.d.ts +42 -0
  684. package/dist/types/modules/word/writer/relationships.d.ts +52 -0
  685. package/dist/types/modules/word/writer/render-context.d.ts +124 -0
  686. package/dist/types/modules/word/{writers → writer}/run-writer.d.ts +10 -1
  687. package/dist/types/modules/word/writer/sdt-writer.d.ts +25 -0
  688. package/dist/types/modules/word/writer/stream-buf.d.ts +37 -0
  689. package/dist/types/modules/word/writer/streaming-writer.d.ts +344 -0
  690. package/dist/types/modules/word/writer/string-buf.d.ts +8 -0
  691. package/dist/types/modules/word/{writers → writer}/table-writer.d.ts +2 -1
  692. package/dist/types/modules/xml/types.d.ts +22 -0
  693. package/dist/types/utils/crypto.browser.d.ts +3 -1
  694. package/dist/types/utils/crypto.d.ts +4 -1
  695. package/dist/types/utils/font-metrics.d.ts +63 -0
  696. package/dist/types/utils/string-buf.d.ts +42 -0
  697. package/dist/types/utils/theme-colors.d.ts +55 -0
  698. package/package.json +121 -39
  699. package/dist/browser/modules/word/color-utils.js +0 -94
  700. package/dist/browser/modules/word/document.d.ts +0 -657
  701. package/dist/browser/modules/word/document.js +0 -1533
  702. package/dist/browser/modules/word/docx-packager.d.ts +0 -14
  703. package/dist/browser/modules/word/docx-packager.js +0 -822
  704. package/dist/browser/modules/word/docx-reader.d.ts +0 -11
  705. package/dist/browser/modules/word/docx-reader.js +0 -4929
  706. package/dist/browser/modules/word/encryption.js +0 -274
  707. package/dist/browser/modules/word/internal-utils.d.ts +0 -23
  708. package/dist/browser/modules/word/internal-utils.js +0 -54
  709. package/dist/browser/modules/word/namespaces.d.ts +0 -159
  710. package/dist/browser/modules/word/namespaces.js +0 -189
  711. package/dist/browser/modules/word/relationships.d.ts +0 -30
  712. package/dist/browser/modules/word/relationships.js +0 -48
  713. package/dist/browser/modules/word/writers/checkbox-writer.d.ts +0 -9
  714. package/dist/browser/modules/word/writers/checkbox-writer.js +0 -42
  715. package/dist/browser/modules/word/writers/document-writer.d.ts +0 -16
  716. package/dist/browser/modules/word/writers/document-writer.js +0 -461
  717. package/dist/browser/modules/word/writers/math-writer.d.ts +0 -9
  718. package/dist/cjs/modules/word/color-utils.js +0 -97
  719. package/dist/cjs/modules/word/document.js +0 -1645
  720. package/dist/cjs/modules/word/docx-packager.js +0 -825
  721. package/dist/cjs/modules/word/docx-reader.js +0 -4932
  722. package/dist/cjs/modules/word/encryption.js +0 -282
  723. package/dist/cjs/modules/word/internal-utils.js +0 -59
  724. package/dist/cjs/modules/word/namespaces.js +0 -192
  725. package/dist/cjs/modules/word/relationships.js +0 -55
  726. package/dist/cjs/modules/word/writers/checkbox-writer.js +0 -45
  727. package/dist/cjs/modules/word/writers/document-writer.js +0 -465
  728. package/dist/esm/modules/word/color-utils.js +0 -94
  729. package/dist/esm/modules/word/document.js +0 -1533
  730. package/dist/esm/modules/word/docx-packager.js +0 -822
  731. package/dist/esm/modules/word/docx-reader.js +0 -4929
  732. package/dist/esm/modules/word/encryption.js +0 -274
  733. package/dist/esm/modules/word/internal-utils.js +0 -54
  734. package/dist/esm/modules/word/namespaces.js +0 -189
  735. package/dist/esm/modules/word/relationships.js +0 -48
  736. package/dist/esm/modules/word/writers/checkbox-writer.js +0 -42
  737. package/dist/esm/modules/word/writers/document-writer.js +0 -461
  738. package/dist/types/modules/word/document.d.ts +0 -657
  739. package/dist/types/modules/word/docx-packager.d.ts +0 -14
  740. package/dist/types/modules/word/docx-reader.d.ts +0 -11
  741. package/dist/types/modules/word/internal-utils.d.ts +0 -23
  742. package/dist/types/modules/word/namespaces.d.ts +0 -159
  743. package/dist/types/modules/word/relationships.d.ts +0 -30
  744. package/dist/types/modules/word/writers/checkbox-writer.d.ts +0 -9
  745. package/dist/types/modules/word/writers/document-writer.d.ts +0 -16
  746. package/dist/types/modules/word/writers/math-writer.d.ts +0 -9
  747. /package/dist/browser/modules/word/{font-obfuscation.d.ts → font/font-obfuscation.d.ts} +0 -0
  748. /package/dist/browser/modules/word/{writers → writer}/chart-writer.d.ts +0 -0
  749. /package/dist/browser/modules/word/{writers → writer}/section-writer.d.ts +0 -0
  750. /package/dist/browser/modules/word/{writers → writer}/section-writer.js +0 -0
  751. /package/dist/browser/modules/word/{writers → writer}/styles-writer.d.ts +0 -0
  752. /package/dist/browser/modules/word/{writers → writer}/textbox-writer.d.ts +0 -0
  753. /package/dist/browser/modules/word/{writers → writer}/textbox-writer.js +0 -0
  754. /package/dist/browser/modules/word/{writers → writer}/toc-writer.d.ts +0 -0
  755. /package/dist/browser/modules/word/{writers → writer}/toc-writer.js +0 -0
  756. /package/dist/cjs/modules/word/{writers → writer}/section-writer.js +0 -0
  757. /package/dist/cjs/modules/word/{writers → writer}/textbox-writer.js +0 -0
  758. /package/dist/cjs/modules/word/{writers → writer}/toc-writer.js +0 -0
  759. /package/dist/esm/modules/word/{writers → writer}/section-writer.js +0 -0
  760. /package/dist/esm/modules/word/{writers → writer}/textbox-writer.js +0 -0
  761. /package/dist/esm/modules/word/{writers → writer}/toc-writer.js +0 -0
  762. /package/dist/types/modules/word/{font-obfuscation.d.ts → font/font-obfuscation.d.ts} +0 -0
  763. /package/dist/types/modules/word/{writers → writer}/chart-writer.d.ts +0 -0
  764. /package/dist/types/modules/word/{writers → writer}/section-writer.d.ts +0 -0
  765. /package/dist/types/modules/word/{writers → writer}/styles-writer.d.ts +0 -0
  766. /package/dist/types/modules/word/{writers → writer}/textbox-writer.d.ts +0 -0
  767. /package/dist/types/modules/word/{writers → writer}/toc-writer.d.ts +0 -0
@@ -0,0 +1,2557 @@
1
+ /**
2
+ * DOCX Module - Reader / Parser
3
+ *
4
+ * Reads a DOCX ZIP file and parses it into a DocxDocument model.
5
+ * Uses the archive module for ZIP reading and XML module for parsing.
6
+ */
7
+ import { unzip } from "../../archive/read-archive.js";
8
+ import { parseXml, findChild, textContent } from "../../xml/dom.js";
9
+ import { RelType } from "../constants.js";
10
+ import { utf8Decoder } from "../core/internal-utils.js";
11
+ import { isRun } from "../core/text-utils.js";
12
+ import { DocxError, DocxParseError, DocxMissingPartError, DocxEncryptedError, DocxLimitExceededError } from "../errors.js";
13
+ import { decryptDocx } from "../security/encryption.js";
14
+ import { resolveSecurityPolicy } from "../security/policy.js";
15
+ import { replaceOpaqueCharts, replaceOpaqueChartExDrawings, parseChartXml, parseChartExXml } from "./chart-parser.js";
16
+ import { parseCommentsXml as parseCommentsXmlExternal, parseCommentsExtendedXml } from "./comments-parser.js";
17
+ import { parseCoreProps, parseAppProps, parseCustomPropsXml, parseFontTableXml } from "./doc-props-parsers.js";
18
+ import { parseFfData } from "./form-field-parser.js";
19
+ import { parseDrawingContent, parseFloatingImage } from "./image-parsers.js";
20
+ import { parseMathContent, parseMathBlock } from "./math-parser.js";
21
+ import { parseThemeXml, parseWebSettings, parsePeople, parseSettingsXml } from "./metadata-parsers.js";
22
+ import { parseNumberingXml } from "./numbering-parser.js";
23
+ import { parseParagraphProperties, parseSectionProperties } from "./paragraph-section-parsers.js";
24
+ import { attrVal, attrInt, findChildNs, findChildrenNs, boolToggle, serializeElement, collectRIds, getPartRelsPath, getFileName, getFileExt, resolvePartPath, resolveRelTarget } from "./parse-utils.js";
25
+ import { parseRunProperties, parseShading, parseTableWidth, parseRevisionInfo } from "./properties-parsers.js";
26
+ import { createFieldState, createReaderContext, parseRelationships } from "./reader-context.js";
27
+ import { parseCheckBox, parseTocInstruction } from "./sdt-helpers.js";
28
+ import { parseStyles } from "./styles-parser.js";
29
+ import { parseTableBorders, parseTableCellMargins, parseTableProperties } from "./table-properties-parsers.js";
30
+ import { detectWatermarkFromRoot } from "./watermark-parser.js";
31
+ // =============================================================================
32
+ // Run Content Parser
33
+ // =============================================================================
34
+ function parseRunContent(el) {
35
+ const content = [];
36
+ for (const child of el.children) {
37
+ if (child.type !== "element") {
38
+ continue;
39
+ }
40
+ const name = child.name.replace(/^w:/, "");
41
+ switch (name) {
42
+ case "t":
43
+ content.push({ type: "text", text: textContent(child) });
44
+ break;
45
+ case "br": {
46
+ const brType = attrVal(child, "type");
47
+ content.push({
48
+ type: "break",
49
+ breakType: brType
50
+ });
51
+ break;
52
+ }
53
+ case "tab":
54
+ content.push({ type: "tab" });
55
+ break;
56
+ case "ptab": {
57
+ const alignment = attrVal(child, "alignment") ?? "left";
58
+ const relativeTo = attrVal(child, "relativeTo") ?? "margin";
59
+ const leader = attrVal(child, "leader");
60
+ const ptab = {
61
+ type: "ptab",
62
+ alignment: alignment,
63
+ relativeTo: relativeTo
64
+ };
65
+ if (leader) {
66
+ ptab.leader = leader;
67
+ }
68
+ content.push(ptab);
69
+ break;
70
+ }
71
+ case "ruby": {
72
+ const ruby = {
73
+ type: "ruby",
74
+ rubyText: [],
75
+ baseText: []
76
+ };
77
+ const rubyPrEl = findChildNs(child, "rubyPr");
78
+ if (rubyPrEl) {
79
+ const props = {};
80
+ const alignEl = findChildNs(rubyPrEl, "rubyAlign");
81
+ if (alignEl) {
82
+ props.align = attrVal(alignEl, "val");
83
+ }
84
+ const hpsEl = findChildNs(rubyPrEl, "hps");
85
+ if (hpsEl) {
86
+ props.fontSize = attrInt(hpsEl, "val");
87
+ }
88
+ const hpsRaiseEl = findChildNs(rubyPrEl, "hpsRaise");
89
+ if (hpsRaiseEl) {
90
+ props.raise = attrInt(hpsRaiseEl, "val");
91
+ }
92
+ const hpsBaseTextEl = findChildNs(rubyPrEl, "hpsBaseText");
93
+ if (hpsBaseTextEl) {
94
+ props.baseFontSize = attrInt(hpsBaseTextEl, "val");
95
+ }
96
+ const lidEl = findChildNs(rubyPrEl, "lid");
97
+ if (lidEl) {
98
+ props.language = attrVal(lidEl, "val");
99
+ }
100
+ if (Object.keys(props).length > 0) {
101
+ ruby.properties = props;
102
+ }
103
+ }
104
+ // Parse w:rt (ruby text)
105
+ const rtEl = findChildNs(child, "rt");
106
+ if (rtEl) {
107
+ for (const rtChild of rtEl.children) {
108
+ if (rtChild.type === "element" && rtChild.name.replace(/^w:/, "") === "r") {
109
+ ruby.rubyText.push(parseRun(rtChild));
110
+ }
111
+ }
112
+ }
113
+ // Parse w:rubyBase
114
+ const baseEl = findChildNs(child, "rubyBase");
115
+ if (baseEl) {
116
+ for (const bChild of baseEl.children) {
117
+ if (bChild.type === "element" && bChild.name.replace(/^w:/, "") === "r") {
118
+ ruby.baseText.push(parseRun(bChild));
119
+ }
120
+ }
121
+ }
122
+ content.push(ruby);
123
+ break;
124
+ }
125
+ case "sym":
126
+ content.push({
127
+ type: "symbol",
128
+ font: attrVal(child, "font") ?? "",
129
+ char: attrVal(child, "char") ?? ""
130
+ });
131
+ break;
132
+ case "footnoteReference": {
133
+ const fr = {
134
+ type: "footnoteRef",
135
+ id: attrInt(child, "id") ?? 0
136
+ };
137
+ const cmf = attrVal(child, "customMarkFollows");
138
+ if (cmf === "1" || cmf === "true") {
139
+ fr.customMarkFollows = true;
140
+ }
141
+ content.push(fr);
142
+ break;
143
+ }
144
+ case "endnoteReference": {
145
+ const er = {
146
+ type: "endnoteRef",
147
+ id: attrInt(child, "id") ?? 0
148
+ };
149
+ const cmf = attrVal(child, "customMarkFollows");
150
+ if (cmf === "1" || cmf === "true") {
151
+ er.customMarkFollows = true;
152
+ }
153
+ content.push(er);
154
+ break;
155
+ }
156
+ case "drawing":
157
+ parseDrawingContent(child, content);
158
+ break;
159
+ case "cr":
160
+ content.push({ type: "carriageReturn" });
161
+ break;
162
+ case "noBreakHyphen":
163
+ content.push({ type: "noBreakHyphen" });
164
+ break;
165
+ case "softHyphen":
166
+ content.push({ type: "softHyphen" });
167
+ break;
168
+ case "lastRenderedPageBreak":
169
+ content.push({ type: "lastRenderedPageBreak" });
170
+ break;
171
+ case "annotationRef":
172
+ content.push({ type: "annotationReference", id: attrInt(child, "id") ?? 0 });
173
+ break;
174
+ case "commentReference":
175
+ // This is annotationReference for comments inside runs
176
+ content.push({ type: "annotationReference", id: attrInt(child, "id") ?? 0 });
177
+ break;
178
+ case "rPr":
179
+ case "fldChar":
180
+ case "instrText":
181
+ case "delText":
182
+ // Known structural elements handled elsewhere — skip silently
183
+ break;
184
+ default:
185
+ // Unknown run child: preserve as opaque for round-trip fidelity
186
+ content.push({
187
+ type: "opaqueRun",
188
+ rawXml: serializeElement(child)
189
+ });
190
+ break;
191
+ }
192
+ }
193
+ return content;
194
+ }
195
+ // =============================================================================
196
+ // DrawingML Shape Parser
197
+ // =============================================================================
198
+ function parseDrawingShape(anchorEl, wspEl, ctx) {
199
+ const docPrEl = findChild(anchorEl, "wp:docPr");
200
+ const extentEl = findChild(anchorEl, "wp:extent");
201
+ const cx = parseInt(extentEl?.attributes["cx"] ?? "0", 10);
202
+ const cy = parseInt(extentEl?.attributes["cy"] ?? "0", 10);
203
+ // Parse preset shape type from wps:spPr > a:prstGeom
204
+ const spPrEl = findChild(wspEl, "wps:spPr") ?? findChildNs(wspEl, "spPr");
205
+ const prstGeomEl = spPrEl
206
+ ? (findChild(spPrEl, "a:prstGeom") ?? findChildNs(spPrEl, "prstGeom"))
207
+ : undefined;
208
+ const shapeType = prstGeomEl?.attributes["prst"] ?? "rect";
209
+ const shape = {
210
+ type: "drawingShape",
211
+ shapeType: shapeType,
212
+ width: cx,
213
+ height: cy,
214
+ altText: docPrEl?.attributes["descr"],
215
+ name: docPrEl?.attributes["name"]
216
+ };
217
+ // Parse fill
218
+ if (spPrEl) {
219
+ const solidFill = findChild(spPrEl, "a:solidFill") ?? findChildNs(spPrEl, "solidFill");
220
+ if (solidFill) {
221
+ const srgb = findChild(solidFill, "a:srgbClr") ?? findChildNs(solidFill, "srgbClr");
222
+ if (srgb) {
223
+ shape.fillColor = srgb.attributes["val"];
224
+ }
225
+ }
226
+ const noFill = findChild(spPrEl, "a:noFill") ?? findChildNs(spPrEl, "noFill");
227
+ if (noFill) {
228
+ shape.noFill = true;
229
+ }
230
+ // Parse outline
231
+ const lnEl = findChild(spPrEl, "a:ln") ?? findChildNs(spPrEl, "ln");
232
+ if (lnEl) {
233
+ const w = lnEl.attributes["w"];
234
+ if (w) {
235
+ shape.outlineWidth = parseInt(w, 10);
236
+ }
237
+ const lnFill = findChild(lnEl, "a:solidFill") ?? findChildNs(lnEl, "solidFill");
238
+ if (lnFill) {
239
+ const srgb = findChild(lnFill, "a:srgbClr") ?? findChildNs(lnFill, "srgbClr");
240
+ if (srgb) {
241
+ shape.outlineColor = srgb.attributes["val"];
242
+ }
243
+ }
244
+ const noLn = findChild(lnEl, "a:noFill") ?? findChildNs(lnEl, "noFill");
245
+ if (noLn) {
246
+ shape.noOutline = true;
247
+ }
248
+ }
249
+ }
250
+ // Parse text content (wps:txbx > w:txbxContent)
251
+ const txbxEl = findChild(wspEl, "wps:txbx") ?? findChildNs(wspEl, "txbx");
252
+ const txbxContentEl = txbxEl
253
+ ? (findChild(txbxEl, "w:txbxContent") ?? findChildNs(txbxEl, "txbxContent"))
254
+ : undefined;
255
+ if (txbxContentEl) {
256
+ const paras = [];
257
+ for (const child of txbxContentEl.children) {
258
+ if (child.type === "element" && child.name.replace(/^w:/, "") === "p") {
259
+ paras.push(parseParagraph(child, ctx));
260
+ }
261
+ }
262
+ if (paras.length > 0) {
263
+ shape.textContent = paras;
264
+ }
265
+ }
266
+ // Parse positioning
267
+ const posH = findChild(anchorEl, "wp:positionH");
268
+ if (posH) {
269
+ const hp = {
270
+ relativeTo: posH.attributes["relativeFrom"]
271
+ };
272
+ const offsetEl = findChild(posH, "wp:posOffset");
273
+ if (offsetEl) {
274
+ hp.offset = parseInt(textContent(offsetEl), 10);
275
+ }
276
+ const alignEl = findChild(posH, "wp:align");
277
+ if (alignEl) {
278
+ hp.align = textContent(alignEl);
279
+ }
280
+ shape.horizontalPosition = hp;
281
+ }
282
+ const posV = findChild(anchorEl, "wp:positionV");
283
+ if (posV) {
284
+ const vp = {
285
+ relativeTo: posV.attributes["relativeFrom"]
286
+ };
287
+ const offsetEl = findChild(posV, "wp:posOffset");
288
+ if (offsetEl) {
289
+ vp.offset = parseInt(textContent(offsetEl), 10);
290
+ }
291
+ const alignEl = findChild(posV, "wp:align");
292
+ if (alignEl) {
293
+ vp.align = textContent(alignEl);
294
+ }
295
+ shape.verticalPosition = vp;
296
+ }
297
+ // Wrap
298
+ for (const wrapChild of anchorEl.children) {
299
+ if (wrapChild.type !== "element") {
300
+ continue;
301
+ }
302
+ const wn = wrapChild.name;
303
+ if (wn === "wp:wrapSquare") {
304
+ shape.wrap = {
305
+ style: "square",
306
+ side: wrapChild.attributes["wrapText"]
307
+ };
308
+ }
309
+ else if (wn === "wp:wrapTight") {
310
+ shape.wrap = {
311
+ style: "tight",
312
+ side: wrapChild.attributes["wrapText"]
313
+ };
314
+ }
315
+ else if (wn === "wp:wrapTopAndBottom") {
316
+ shape.wrap = { style: "topAndBottom" };
317
+ }
318
+ else if (wn === "wp:wrapNone") {
319
+ shape.wrap = { style: "none" };
320
+ }
321
+ }
322
+ // Behind doc
323
+ if (anchorEl.attributes["behindDoc"] === "1") {
324
+ shape.behindDoc = true;
325
+ }
326
+ // Rotation
327
+ if (spPrEl) {
328
+ const xfrmEl = findChild(spPrEl, "a:xfrm") ?? findChildNs(spPrEl, "xfrm");
329
+ if (xfrmEl?.attributes["rot"]) {
330
+ shape.rotation = parseInt(xfrmEl.attributes["rot"], 10);
331
+ }
332
+ }
333
+ return shape;
334
+ }
335
+ // =============================================================================
336
+ // TextBox Parser
337
+ // =============================================================================
338
+ function parseTextBox(pictEl, ctx) {
339
+ // Look for v:shape > v:textbox > w:txbxContent
340
+ let txbxContentEl;
341
+ let shapeEl;
342
+ for (const child of pictEl.children) {
343
+ if (child.type === "element" && (child.name === "v:shape" || child.name === "v:rect")) {
344
+ shapeEl = child;
345
+ for (const sc of child.children) {
346
+ if (sc.type === "element" && sc.name === "v:textbox") {
347
+ for (const tc of sc.children) {
348
+ if (tc.type === "element" &&
349
+ (tc.name === "w:txbxContent" || tc.name === "txbxContent")) {
350
+ txbxContentEl = tc;
351
+ }
352
+ }
353
+ }
354
+ }
355
+ }
356
+ }
357
+ if (!txbxContentEl) {
358
+ return undefined;
359
+ }
360
+ const paragraphs = [];
361
+ for (const c of txbxContentEl.children) {
362
+ if (c.type === "element" && c.name.replace(/^w:/, "") === "p") {
363
+ paragraphs.push(parseParagraph(c, ctx));
364
+ }
365
+ }
366
+ const tb = { type: "textBox", content: paragraphs };
367
+ if (shapeEl) {
368
+ const style = shapeEl.attributes["style"];
369
+ if (style) {
370
+ tb.style = style;
371
+ }
372
+ const sc = shapeEl.attributes["strokecolor"];
373
+ if (sc) {
374
+ tb.strokeColor = sc;
375
+ }
376
+ const fc = shapeEl.attributes["fillcolor"];
377
+ if (fc) {
378
+ tb.fillColor = fc;
379
+ }
380
+ if (shapeEl.attributes["stroked"] === "f") {
381
+ tb.stroke = false;
382
+ }
383
+ if (shapeEl.attributes["filled"] === "f") {
384
+ tb.fill = false;
385
+ }
386
+ }
387
+ return tb;
388
+ }
389
+ // =============================================================================
390
+ // SDT / CheckBox / TOC Parser
391
+ // =============================================================================
392
+ function parseSdt(sdtEl, ctx) {
393
+ const sdtPrEl = findChildNs(sdtEl, "sdtPr");
394
+ const sdtContentEl = findChildNs(sdtEl, "sdtContent");
395
+ // Check for checkbox (w14:checkbox)
396
+ if (sdtPrEl) {
397
+ const checkBoxEl = findChild(sdtPrEl, "w14:checkbox");
398
+ if (checkBoxEl) {
399
+ return parseCheckBox(checkBoxEl);
400
+ }
401
+ }
402
+ // Check for TOC (contains docPartObj with docPartGallery "Table of Contents")
403
+ if (sdtPrEl) {
404
+ const docPartObjEl = findChildNs(sdtPrEl, "docPartObj");
405
+ if (docPartObjEl) {
406
+ const galleryEl = findChildNs(docPartObjEl, "docPartGallery");
407
+ const galleryVal = galleryEl ? attrVal(galleryEl, "val") : undefined;
408
+ if (galleryVal === "Table of Contents") {
409
+ return parseTocFromSdt(sdtContentEl, ctx);
410
+ }
411
+ }
412
+ }
413
+ // Generic SDT
414
+ const props = {};
415
+ if (sdtPrEl) {
416
+ const tagEl = findChildNs(sdtPrEl, "tag");
417
+ if (tagEl) {
418
+ props.tag = attrVal(tagEl, "val");
419
+ }
420
+ const aliasEl = findChildNs(sdtPrEl, "alias");
421
+ if (aliasEl) {
422
+ props.alias = attrVal(aliasEl, "val");
423
+ }
424
+ const lockEl = findChildNs(sdtPrEl, "lock");
425
+ if (lockEl) {
426
+ const v = attrVal(lockEl, "val");
427
+ if (v === "contentLocked" || v === "sdtContentLocked") {
428
+ props.lockContent = true;
429
+ }
430
+ if (v === "sdtLocked" || v === "sdtContentLocked") {
431
+ props.lockSdt = true;
432
+ }
433
+ }
434
+ // Plain text
435
+ if (findChildNs(sdtPrEl, "text")) {
436
+ props.plainText = true;
437
+ }
438
+ // showingPlcHdr is a toggle, not a property with a val
439
+ if (findChildNs(sdtPrEl, "showingPlcHdr")) {
440
+ const v = boolToggle(sdtPrEl, "showingPlcHdr");
441
+ if (v !== false) {
442
+ props.showingPlaceholder = true;
443
+ }
444
+ }
445
+ // w15:appearance (replaces the old misused showingPlcHdr)
446
+ const appearanceEl = findChild(sdtPrEl, "w15:appearance");
447
+ if (appearanceEl) {
448
+ const v = appearanceEl.attributes["w15:val"] ?? appearanceEl.attributes["val"];
449
+ if (v === "boundingBox" || v === "tags" || v === "hidden") {
450
+ props.appearance = v;
451
+ }
452
+ }
453
+ // Dropdown list
454
+ const ddlEl = findChildNs(sdtPrEl, "dropDownList");
455
+ if (ddlEl) {
456
+ const items = [];
457
+ for (const li of findChildrenNs(ddlEl, "listItem")) {
458
+ const item = { value: attrVal(li, "value") ?? "" };
459
+ const dt = attrVal(li, "displayText");
460
+ if (dt) {
461
+ item.displayText = dt;
462
+ }
463
+ items.push(item);
464
+ }
465
+ props.dropdownList = items;
466
+ }
467
+ // ComboBox
468
+ const cbEl = findChildNs(sdtPrEl, "comboBox");
469
+ if (cbEl) {
470
+ const items = [];
471
+ for (const li of findChildrenNs(cbEl, "listItem")) {
472
+ const item = { value: attrVal(li, "value") ?? "" };
473
+ const dt = attrVal(li, "displayText");
474
+ if (dt) {
475
+ item.displayText = dt;
476
+ }
477
+ items.push(item);
478
+ }
479
+ props.comboBox = items;
480
+ }
481
+ // Date picker
482
+ const dateEl = findChildNs(sdtPrEl, "date");
483
+ if (dateEl) {
484
+ const dateProp = {};
485
+ const fullDate = attrVal(dateEl, "fullDate");
486
+ if (fullDate) {
487
+ dateProp.fullDate = fullDate;
488
+ }
489
+ const dfEl = findChildNs(dateEl, "dateFormat");
490
+ if (dfEl) {
491
+ dateProp.dateFormat = attrVal(dfEl, "val");
492
+ }
493
+ const lidEl = findChildNs(dateEl, "lid");
494
+ if (lidEl) {
495
+ dateProp.lid = attrVal(lidEl, "val");
496
+ }
497
+ const storeEl = findChildNs(dateEl, "storeMappedDataAs");
498
+ if (storeEl) {
499
+ dateProp.storeMappedDataAs = attrVal(storeEl, "val");
500
+ }
501
+ props.date = dateProp;
502
+ }
503
+ // ID
504
+ const idEl = findChildNs(sdtPrEl, "id");
505
+ if (idEl) {
506
+ const v = attrInt(idEl, "val");
507
+ if (v !== undefined) {
508
+ props.id = v;
509
+ }
510
+ }
511
+ // Data binding
512
+ const dbEl = findChildNs(sdtPrEl, "dataBinding");
513
+ if (dbEl) {
514
+ const xpath = attrVal(dbEl, "xpath");
515
+ const storeItemId = attrVal(dbEl, "storeItemID");
516
+ if (xpath && storeItemId) {
517
+ const binding = {
518
+ xpath,
519
+ storeItemId
520
+ };
521
+ const prefixMappings = attrVal(dbEl, "prefixMappings");
522
+ if (prefixMappings) {
523
+ binding.prefixMappings = prefixMappings;
524
+ }
525
+ props.dataBinding = binding;
526
+ }
527
+ }
528
+ // Placeholder
529
+ const phEl = findChildNs(sdtPrEl, "placeholder");
530
+ if (phEl) {
531
+ const docPartEl = findChildNs(phEl, "docPart");
532
+ if (docPartEl) {
533
+ props.placeholder = attrVal(docPartEl, "val");
534
+ }
535
+ }
536
+ // Boolean marker elements
537
+ if (findChildNs(sdtPrEl, "richText")) {
538
+ props.richText = true;
539
+ }
540
+ if (findChildNs(sdtPrEl, "picture")) {
541
+ props.picture = true;
542
+ }
543
+ if (findChildNs(sdtPrEl, "group")) {
544
+ props.group = true;
545
+ }
546
+ if (findChildNs(sdtPrEl, "equation")) {
547
+ props.equation = true;
548
+ }
549
+ if (findChildNs(sdtPrEl, "citation")) {
550
+ props.citation = true;
551
+ }
552
+ if (findChildNs(sdtPrEl, "bibliography")) {
553
+ props.bibliography = true;
554
+ }
555
+ if (findChildNs(sdtPrEl, "temporary")) {
556
+ props.temporary = true;
557
+ }
558
+ // w15: repeating section
559
+ const rsEl = findChild(sdtPrEl, "w15:repeatingSection");
560
+ if (rsEl) {
561
+ const rs = {};
562
+ // Read from child elements (correct per schema)
563
+ const titleEl = findChild(rsEl, "w15:sectionTitle");
564
+ if (titleEl) {
565
+ const v = titleEl.attributes["w15:val"] ?? titleEl.attributes["val"];
566
+ if (v !== undefined) {
567
+ rs.sectionTitle = v;
568
+ }
569
+ }
570
+ if (findChild(rsEl, "w15:doNotAllowInsertDeleteSection")) {
571
+ rs.allowInsertDelete = false;
572
+ }
573
+ // Also accept attribute form for backwards compatibility
574
+ const stAttr = rsEl.attributes["w15:sectionTitle"];
575
+ if (stAttr !== undefined && rs.sectionTitle === undefined) {
576
+ rs.sectionTitle = stAttr;
577
+ }
578
+ const noInsDelAttr = rsEl.attributes["w15:doNotAllowInsertDeleteSection"];
579
+ if (noInsDelAttr !== undefined && rs.allowInsertDelete === undefined) {
580
+ rs.allowInsertDelete = noInsDelAttr === "0";
581
+ }
582
+ props.repeatingSection = rs;
583
+ }
584
+ if (findChild(sdtPrEl, "w15:repeatingSectionItem")) {
585
+ props.repeatingSectionItem = true;
586
+ }
587
+ }
588
+ const content = [];
589
+ if (sdtContentEl) {
590
+ for (const child of sdtContentEl.children) {
591
+ if (child.type !== "element") {
592
+ continue;
593
+ }
594
+ const n = child.name.replace(/^w:/, "");
595
+ if (n === "p") {
596
+ content.push(parseParagraph(child, ctx));
597
+ }
598
+ else if (n === "tbl") {
599
+ content.push(parseTable(child, ctx));
600
+ }
601
+ else if (n === "r") {
602
+ content.push(parseRun(child));
603
+ }
604
+ else if (n === "sdt") {
605
+ // Nested SDT (e.g. repeating section item SDTs). Preserve the
606
+ // inner SDT verbatim — including its own properties — so data
607
+ // binding, alias, lock and similar metadata round-trip correctly.
608
+ const inner = parseSdt(child, ctx);
609
+ if (inner && inner.type === "sdt") {
610
+ content.push(inner);
611
+ }
612
+ }
613
+ }
614
+ }
615
+ return { type: "sdt", properties: props, content };
616
+ }
617
+ function parseTocFromSdt(sdtContentEl, ctx) {
618
+ const toc = { type: "tableOfContents" };
619
+ const cachedParagraphs = [];
620
+ if (sdtContentEl) {
621
+ // Collect all instrText to assemble the complete TOC field instruction
622
+ let instrText = "";
623
+ const collectInstr = (el) => {
624
+ for (const child of el.children) {
625
+ if (child.type !== "element") {
626
+ continue;
627
+ }
628
+ const name = child.name.replace(/^w:/, "");
629
+ if (name === "instrText") {
630
+ instrText += textContent(child);
631
+ }
632
+ else {
633
+ collectInstr(child);
634
+ }
635
+ }
636
+ };
637
+ collectInstr(sdtContentEl);
638
+ if (instrText.trim()) {
639
+ parseTocInstruction(instrText, toc);
640
+ }
641
+ for (const child of sdtContentEl.children) {
642
+ if (child.type !== "element") {
643
+ continue;
644
+ }
645
+ const n = child.name.replace(/^w:/, "");
646
+ if (n === "p") {
647
+ cachedParagraphs.push(parseParagraph(child, ctx));
648
+ }
649
+ }
650
+ }
651
+ if (cachedParagraphs.length > 0) {
652
+ toc.cachedParagraphs = cachedParagraphs;
653
+ }
654
+ return toc;
655
+ }
656
+ /** Parse a TOC field instruction string (e.g. `TOC \o "1-3" \h \t "Style,1" \c "Figure"`). */
657
+ // =============================================================================
658
+ // Paragraph Parser
659
+ // =============================================================================
660
+ function parseRun(el) {
661
+ const rPrEl = findChildNs(el, "rPr");
662
+ return {
663
+ properties: rPrEl ? parseRunProperties(rPrEl) : undefined,
664
+ content: parseRunContent(el)
665
+ };
666
+ }
667
+ function parseParagraph(pEl, ctx) {
668
+ const pPrEl = findChildNs(pEl, "pPr");
669
+ const children = [];
670
+ // Field state machine lives on ctx so that complex fields (TOC, INDEX,
671
+ // long REF/SEQ chains) can span paragraph boundaries — the matching
672
+ // `<w:fldChar fldCharType="end">` may occur in a later paragraph than the
673
+ // `begin`. Storing state on ctx is also safe because part-scoped parsers
674
+ // (header/footer/footnote/endnote/comment) save and reset it on entry.
675
+ const field = ctx.field;
676
+ for (const child of pEl.children) {
677
+ if (child.type !== "element") {
678
+ continue;
679
+ }
680
+ // Handle mc:AlternateContent — pick mc:Choice, fall back to mc:Fallback
681
+ let resolved = child;
682
+ if (child.name === "mc:AlternateContent") {
683
+ const choice = findChild(child, "mc:Choice");
684
+ const fallback = findChild(child, "mc:Fallback");
685
+ const chosen = choice ?? fallback;
686
+ if (chosen && chosen.children.length > 0) {
687
+ // The first element child inside Choice/Fallback is the real element
688
+ const inner = chosen.children.find(c => c.type === "element");
689
+ if (inner) {
690
+ resolved = inner;
691
+ }
692
+ else {
693
+ continue;
694
+ }
695
+ }
696
+ else {
697
+ continue;
698
+ }
699
+ }
700
+ const name = resolved.name.replace(/^w:/, "");
701
+ switch (name) {
702
+ case "r": {
703
+ // Check for fldChar and instrText inside the run
704
+ let hasFldChar = false;
705
+ for (const rc of resolved.children) {
706
+ if (rc.type !== "element") {
707
+ continue;
708
+ }
709
+ const rcName = rc.name.replace(/^w:/, "");
710
+ if (rcName === "fldChar") {
711
+ hasFldChar = true;
712
+ const fldCharType = attrVal(rc, "fldCharType");
713
+ if (fldCharType === "begin") {
714
+ field.state = "instrText";
715
+ field.instr = "";
716
+ field.cached = "";
717
+ // Capture run properties from this run for the field
718
+ const rPrEl = findChildNs(resolved, "rPr");
719
+ field.runProps = rPrEl ? parseRunProperties(rPrEl) : undefined;
720
+ // Parse ffData for legacy form fields
721
+ const ffDataEl = findChildNs(rc, "ffData");
722
+ field.formField = ffDataEl ? parseFfData(ffDataEl) : undefined;
723
+ }
724
+ else if (fldCharType === "separate") {
725
+ field.state = "cached";
726
+ }
727
+ else if (fldCharType === "end") {
728
+ // Emit the assembled field as a Run with FieldContent
729
+ const fc = {
730
+ type: "field",
731
+ instruction: field.instr.trim(),
732
+ cachedValue: field.cached || undefined,
733
+ formField: field.formField
734
+ };
735
+ children.push({
736
+ properties: field.runProps,
737
+ content: [fc]
738
+ });
739
+ field.state = "none";
740
+ field.instr = "";
741
+ field.cached = "";
742
+ field.runProps = undefined;
743
+ field.formField = undefined;
744
+ }
745
+ }
746
+ else if (rcName === "instrText" && field.state === "instrText") {
747
+ hasFldChar = true;
748
+ field.instr += textContent(rc);
749
+ }
750
+ }
751
+ if (field.state === "cached") {
752
+ // Collect cached text from this run
753
+ for (const rc of resolved.children) {
754
+ if (rc.type !== "element") {
755
+ continue;
756
+ }
757
+ const rcName = rc.name.replace(/^w:/, "");
758
+ if (rcName === "t") {
759
+ field.cached += textContent(rc);
760
+ }
761
+ else if (rcName === "fldChar") {
762
+ // Already handled above
763
+ }
764
+ }
765
+ if (!hasFldChar) {
766
+ continue; // Skip adding this run normally
767
+ }
768
+ }
769
+ if (field.state === "instrText" && hasFldChar) {
770
+ continue; // Don't add begin/instrText runs as normal content
771
+ }
772
+ if (field.state === "none" && !hasFldChar) {
773
+ // Detect a degenerate `<w:r>` whose only meaningful child is
774
+ // `<w:commentReference>`. The OOXML schema requires the leaf
775
+ // to live inside a w:r, but at the model level we represent
776
+ // it as a paragraph-child `commentReference`. Hoisting here
777
+ // means a round-trip preserves the model shape instead of
778
+ // collapsing to `annotationReference`.
779
+ let onlyCommentRefId;
780
+ let onlyCommentRefSeen = false;
781
+ let hasOtherMeaningfulChild = false;
782
+ for (const rcc of resolved.children) {
783
+ if (rcc.type !== "element") {
784
+ continue;
785
+ }
786
+ const rccName = rcc.name.replace(/^w:/, "");
787
+ if (rccName === "rPr") {
788
+ continue;
789
+ }
790
+ if (rccName === "commentReference") {
791
+ if (onlyCommentRefSeen) {
792
+ // Multiple commentReferences in one run is malformed;
793
+ // fall through to the generic run parser.
794
+ hasOtherMeaningfulChild = true;
795
+ break;
796
+ }
797
+ onlyCommentRefSeen = true;
798
+ const idAttr = rcc.attributes["w:id"] ?? rcc.attributes["id"];
799
+ const id = idAttr !== undefined ? parseInt(idAttr, 10) : NaN;
800
+ if (!Number.isNaN(id)) {
801
+ onlyCommentRefId = id;
802
+ }
803
+ }
804
+ else {
805
+ hasOtherMeaningfulChild = true;
806
+ break;
807
+ }
808
+ }
809
+ if (onlyCommentRefSeen && !hasOtherMeaningfulChild && onlyCommentRefId !== undefined) {
810
+ children.push({ type: "commentReference", id: onlyCommentRefId });
811
+ }
812
+ else {
813
+ children.push(parseRun(resolved));
814
+ }
815
+ }
816
+ break;
817
+ }
818
+ case "fldSimple": {
819
+ // Simple field: <w:fldSimple w:instr=" PAGE "><w:r>...</w:r></w:fldSimple>
820
+ const instr = attrVal(resolved, "instr") ?? "";
821
+ let cached = "";
822
+ for (const fc of resolved.children) {
823
+ if (fc.type === "element" && fc.name.replace(/^w:/, "") === "r") {
824
+ for (const rc of fc.children) {
825
+ if (rc.type === "element" && rc.name.replace(/^w:/, "") === "t") {
826
+ cached += textContent(rc);
827
+ }
828
+ }
829
+ }
830
+ }
831
+ const fc = {
832
+ type: "field",
833
+ instruction: instr.trim(),
834
+ cachedValue: cached || undefined
835
+ };
836
+ children.push({
837
+ properties: undefined,
838
+ content: [fc]
839
+ });
840
+ break;
841
+ }
842
+ case "hyperlink": {
843
+ const rId = resolved.attributes["r:id"];
844
+ const anchor = resolved.attributes["w:anchor"] ?? resolved.attributes["anchor"];
845
+ const tooltip = resolved.attributes["w:tooltip"] ?? resolved.attributes["tooltip"];
846
+ const historyAttr = resolved.attributes["w:history"] ?? resolved.attributes["history"];
847
+ const tgtFrame = resolved.attributes["w:tgtFrame"] ?? resolved.attributes["tgtFrame"];
848
+ const docLocation = resolved.attributes["w:docLocation"] ?? resolved.attributes["docLocation"];
849
+ const hRuns = [];
850
+ for (const hChild of resolved.children) {
851
+ if (hChild.type === "element" && hChild.name.replace(/^w:/, "") === "r") {
852
+ hRuns.push(parseRun(hChild));
853
+ }
854
+ }
855
+ // Resolve URL from relMap. If the security policy disallows
856
+ // external targets, skip URL resolution entirely so the resulting
857
+ // Hyperlink only carries an anchor (or becomes a plain non-link
858
+ // wrapper). Internal anchor-only hyperlinks are unaffected.
859
+ let url;
860
+ if (rId && ctx.securityPolicy.allowExternalTargets) {
861
+ const rel = ctx.relMap.get(rId);
862
+ if (rel && rel.targetMode === "External") {
863
+ url = rel.target;
864
+ }
865
+ }
866
+ const hyperlink = {
867
+ type: "hyperlink",
868
+ rId,
869
+ anchor,
870
+ url,
871
+ tooltip,
872
+ children: hRuns
873
+ };
874
+ if (historyAttr === "1" || historyAttr === "true") {
875
+ hyperlink.history = true;
876
+ }
877
+ if (tgtFrame) {
878
+ hyperlink.tgtFrame = tgtFrame;
879
+ }
880
+ if (docLocation) {
881
+ hyperlink.docLocation = docLocation;
882
+ }
883
+ children.push(hyperlink);
884
+ break;
885
+ }
886
+ case "bookmarkStart": {
887
+ const idAttr = resolved.attributes["w:id"] ?? resolved.attributes["id"];
888
+ const id = idAttr !== undefined ? parseInt(idAttr, 10) : NaN;
889
+ if (Number.isNaN(id)) {
890
+ // Without a valid id we can't pair this with a bookmarkEnd; drop it
891
+ // rather than fabricate id=0 (which would collide with every other
892
+ // bookmark missing an id and corrupt cross-references on round-trip).
893
+ break;
894
+ }
895
+ const bm = {
896
+ type: "bookmarkStart",
897
+ id,
898
+ name: resolved.attributes["w:name"] ?? resolved.attributes["name"] ?? ""
899
+ };
900
+ const colFirst = resolved.attributes["w:colFirst"] ?? resolved.attributes["colFirst"];
901
+ if (colFirst !== undefined) {
902
+ bm.colFirst = parseInt(colFirst, 10);
903
+ }
904
+ const colLast = resolved.attributes["w:colLast"] ?? resolved.attributes["colLast"];
905
+ if (colLast !== undefined) {
906
+ bm.colLast = parseInt(colLast, 10);
907
+ }
908
+ const dcx = resolved.attributes["w:displacedByCustomXml"] ??
909
+ resolved.attributes["displacedByCustomXml"];
910
+ if (dcx === "next" || dcx === "prev") {
911
+ bm.displacedByCustomXml = dcx;
912
+ }
913
+ children.push(bm);
914
+ break;
915
+ }
916
+ case "bookmarkEnd": {
917
+ const idAttr = resolved.attributes["w:id"] ?? resolved.attributes["id"];
918
+ const id = idAttr !== undefined ? parseInt(idAttr, 10) : NaN;
919
+ if (Number.isNaN(id)) {
920
+ break;
921
+ }
922
+ children.push({ type: "bookmarkEnd", id });
923
+ break;
924
+ }
925
+ case "commentRangeStart": {
926
+ const idAttr = resolved.attributes["w:id"] ?? resolved.attributes["id"];
927
+ const id = idAttr !== undefined ? parseInt(idAttr, 10) : NaN;
928
+ if (Number.isNaN(id)) {
929
+ break;
930
+ }
931
+ children.push({ type: "commentRangeStart", id });
932
+ break;
933
+ }
934
+ case "commentRangeEnd": {
935
+ const idAttr = resolved.attributes["w:id"] ?? resolved.attributes["id"];
936
+ const id = idAttr !== undefined ? parseInt(idAttr, 10) : NaN;
937
+ if (Number.isNaN(id)) {
938
+ break;
939
+ }
940
+ children.push({ type: "commentRangeEnd", id });
941
+ break;
942
+ }
943
+ case "commentReference": {
944
+ const idAttr = resolved.attributes["w:id"] ?? resolved.attributes["id"];
945
+ const id = idAttr !== undefined ? parseInt(idAttr, 10) : NaN;
946
+ if (Number.isNaN(id)) {
947
+ break;
948
+ }
949
+ children.push({ type: "commentReference", id });
950
+ break;
951
+ }
952
+ case "ins": {
953
+ // Inserted run (track changes)
954
+ const rev = parseRevisionInfo(resolved);
955
+ if (rev) {
956
+ for (const insChild of resolved.children) {
957
+ if (insChild.type === "element" && insChild.name.replace(/^w:/, "") === "r") {
958
+ children.push({
959
+ type: "insertedRun",
960
+ revision: rev,
961
+ run: parseRun(insChild)
962
+ });
963
+ }
964
+ }
965
+ }
966
+ break;
967
+ }
968
+ case "del": {
969
+ // Deleted run (track changes)
970
+ const rev = parseRevisionInfo(resolved);
971
+ if (rev) {
972
+ for (const delChild of resolved.children) {
973
+ if (delChild.type === "element" && delChild.name.replace(/^w:/, "") === "r") {
974
+ children.push({
975
+ type: "deletedRun",
976
+ revision: rev,
977
+ run: parseDeletedRun(delChild)
978
+ });
979
+ }
980
+ }
981
+ }
982
+ break;
983
+ }
984
+ case "moveFrom": {
985
+ const rev = parseRevisionInfo(resolved);
986
+ if (rev) {
987
+ for (const mfChild of resolved.children) {
988
+ if (mfChild.type === "element" && mfChild.name.replace(/^w:/, "") === "r") {
989
+ children.push({
990
+ type: "movedFromRun",
991
+ revision: rev,
992
+ run: parseRun(mfChild)
993
+ });
994
+ }
995
+ }
996
+ }
997
+ break;
998
+ }
999
+ case "moveTo": {
1000
+ const rev = parseRevisionInfo(resolved);
1001
+ if (rev) {
1002
+ for (const mtChild of resolved.children) {
1003
+ if (mtChild.type === "element" && mtChild.name.replace(/^w:/, "") === "r") {
1004
+ children.push({
1005
+ type: "movedToRun",
1006
+ revision: rev,
1007
+ run: parseRun(mtChild)
1008
+ });
1009
+ }
1010
+ }
1011
+ }
1012
+ break;
1013
+ }
1014
+ case "moveFromRangeStart":
1015
+ case "moveFromRangeEnd":
1016
+ case "moveToRangeStart":
1017
+ case "moveToRangeEnd": {
1018
+ const id = attrInt(resolved, "id");
1019
+ if (id !== undefined) {
1020
+ const marker = {
1021
+ type: name,
1022
+ id
1023
+ };
1024
+ const author = attrVal(resolved, "author");
1025
+ if (author) {
1026
+ marker.author = author;
1027
+ }
1028
+ const date = attrVal(resolved, "date");
1029
+ if (date) {
1030
+ marker.date = date;
1031
+ }
1032
+ const mName = attrVal(resolved, "name");
1033
+ if (mName) {
1034
+ marker.name = mName;
1035
+ }
1036
+ children.push(marker);
1037
+ }
1038
+ break;
1039
+ }
1040
+ case "customXmlInsRangeStart":
1041
+ case "customXmlInsRangeEnd":
1042
+ case "customXmlDelRangeStart":
1043
+ case "customXmlDelRangeEnd":
1044
+ case "customXmlMoveFromRangeStart":
1045
+ case "customXmlMoveFromRangeEnd":
1046
+ case "customXmlMoveToRangeStart":
1047
+ case "customXmlMoveToRangeEnd": {
1048
+ const id = attrInt(resolved, "id");
1049
+ if (id !== undefined) {
1050
+ const marker = {
1051
+ type: name,
1052
+ id
1053
+ };
1054
+ const author = attrVal(resolved, "author");
1055
+ if (author) {
1056
+ marker.author = author;
1057
+ }
1058
+ const date = attrVal(resolved, "date");
1059
+ if (date) {
1060
+ marker.date = date;
1061
+ }
1062
+ children.push(marker);
1063
+ }
1064
+ break;
1065
+ }
1066
+ case "smartTag":
1067
+ case "customXml":
1068
+ case "dir": {
1069
+ // Semantic wrappers: flatten their children into the current
1070
+ // paragraph. The wrapper's own properties element (smartTagPr,
1071
+ // customXmlPr, …) is not a paragraph child and would otherwise
1072
+ // fall through to the `default` branch below and be emitted as a
1073
+ // bogus `opaqueParagraphChild` containing the properties XML —
1074
+ // poisoning the paragraph on round-trip. Build a synthetic element
1075
+ // that excludes those `*Pr` siblings before recursing.
1076
+ const filteredChildren = resolved.children.filter(c => {
1077
+ if (c.type !== "element") {
1078
+ return true;
1079
+ }
1080
+ const ln = c.name.replace(/^w:/, "");
1081
+ return ln !== "smartTagPr" && ln !== "customXmlPr";
1082
+ });
1083
+ const surrogate = {
1084
+ ...resolved,
1085
+ children: filteredChildren
1086
+ };
1087
+ const subPara = parseParagraph(surrogate, ctx);
1088
+ for (const sub of subPara.children) {
1089
+ children.push(sub);
1090
+ }
1091
+ break;
1092
+ }
1093
+ case "proofErr":
1094
+ case "permStart":
1095
+ case "permEnd":
1096
+ case "lastRenderedPageBreak":
1097
+ // Non-semantic markers; safely ignored
1098
+ break;
1099
+ default:
1100
+ // Unknown paragraph child: preserve as opaque for round-trip fidelity
1101
+ children.push({
1102
+ type: "opaqueParagraphChild",
1103
+ rawXml: serializeElement(resolved)
1104
+ });
1105
+ break;
1106
+ }
1107
+ }
1108
+ const paraId = pEl.attributes["w14:paraId"];
1109
+ const textId = pEl.attributes["w14:textId"];
1110
+ const result = {
1111
+ type: "paragraph",
1112
+ properties: pPrEl ? parseParagraphProperties(pPrEl) : undefined,
1113
+ children
1114
+ };
1115
+ if (paraId) {
1116
+ result.paraId = paraId;
1117
+ }
1118
+ if (textId) {
1119
+ result.textId = textId;
1120
+ }
1121
+ return result;
1122
+ }
1123
+ /** Parse a deleted run (w:delText instead of w:t). */
1124
+ function parseDeletedRun(el) {
1125
+ const rPrEl = findChildNs(el, "rPr");
1126
+ const content = [];
1127
+ for (const child of el.children) {
1128
+ if (child.type !== "element") {
1129
+ continue;
1130
+ }
1131
+ const name = child.name.replace(/^w:/, "");
1132
+ if (name === "delText") {
1133
+ content.push({ type: "text", text: textContent(child) });
1134
+ }
1135
+ else if (name === "t") {
1136
+ content.push({ type: "text", text: textContent(child) });
1137
+ }
1138
+ else if (name === "br") {
1139
+ content.push({
1140
+ type: "break",
1141
+ breakType: attrVal(child, "type")
1142
+ });
1143
+ }
1144
+ else if (name === "tab") {
1145
+ content.push({ type: "tab" });
1146
+ }
1147
+ }
1148
+ return {
1149
+ properties: rPrEl ? parseRunProperties(rPrEl) : undefined,
1150
+ content
1151
+ };
1152
+ }
1153
+ // =============================================================================
1154
+ // Table Parser
1155
+ // =============================================================================
1156
+ function parseTableCell(el, ctx) {
1157
+ const tcPrEl = findChildNs(el, "tcPr");
1158
+ const content = [];
1159
+ for (const child of el.children) {
1160
+ if (child.type !== "element") {
1161
+ continue;
1162
+ }
1163
+ const name = child.name.replace(/^w:/, "");
1164
+ if (name === "p") {
1165
+ content.push(parseParagraph(child, ctx));
1166
+ }
1167
+ else if (name === "tbl") {
1168
+ content.push(parseTable(child, ctx));
1169
+ }
1170
+ else if (name === "sdt") {
1171
+ // SDT inside a table cell. The TableCell.content union does not
1172
+ // include StructuredDocumentTag, so we flatten the SDT's inner
1173
+ // paragraphs/tables into the cell. SDT-level metadata (data binding,
1174
+ // alias, repeating section, …) is lost on round-trip but visible
1175
+ // content is preserved — better than dropping the runs entirely.
1176
+ const sdt = parseSdt(child, ctx);
1177
+ if (sdt && sdt.type === "sdt") {
1178
+ for (const c of sdt.content) {
1179
+ if (c.type === "paragraph") {
1180
+ content.push(c);
1181
+ }
1182
+ else if (c.type === "table") {
1183
+ content.push(c);
1184
+ }
1185
+ // Run-only and nested-SDT children cannot live as direct
1186
+ // siblings of <w:p>/<w:tbl> in a <w:tc>, so they are dropped.
1187
+ }
1188
+ }
1189
+ }
1190
+ }
1191
+ let props;
1192
+ if (tcPrEl) {
1193
+ const p = {};
1194
+ const wEl = findChildNs(tcPrEl, "tcW");
1195
+ if (wEl) {
1196
+ p.width = parseTableWidth(wEl);
1197
+ }
1198
+ const gsEl = findChildNs(tcPrEl, "gridSpan");
1199
+ if (gsEl) {
1200
+ p.gridSpan = attrInt(gsEl, "val");
1201
+ }
1202
+ const vmEl = findChildNs(tcPrEl, "vMerge");
1203
+ if (vmEl) {
1204
+ p.verticalMerge = (attrVal(vmEl, "val") ??
1205
+ "continue");
1206
+ }
1207
+ const bordersEl = findChildNs(tcPrEl, "tcBorders");
1208
+ if (bordersEl) {
1209
+ p.borders = parseTableBorders(bordersEl);
1210
+ }
1211
+ const shdEl = findChildNs(tcPrEl, "shd");
1212
+ if (shdEl) {
1213
+ p.shading = parseShading(shdEl);
1214
+ }
1215
+ const vAlignEl = findChildNs(tcPrEl, "vAlign");
1216
+ if (vAlignEl) {
1217
+ p.verticalAlign = attrVal(vAlignEl, "val");
1218
+ }
1219
+ if (findChildNs(tcPrEl, "noWrap")) {
1220
+ p.noWrap = true;
1221
+ }
1222
+ const textDirEl = findChildNs(tcPrEl, "textDirection");
1223
+ if (textDirEl) {
1224
+ p.textDirection = attrVal(textDirEl, "val");
1225
+ }
1226
+ const marginsEl = findChildNs(tcPrEl, "tcMar");
1227
+ if (marginsEl) {
1228
+ p.margins = parseTableCellMargins(marginsEl);
1229
+ }
1230
+ // Conditional formatting
1231
+ const cnfEl = findChildNs(tcPrEl, "cnfStyle");
1232
+ if (cnfEl) {
1233
+ p.cnfStyle = attrVal(cnfEl, "val");
1234
+ }
1235
+ // Hide cell end-of-cell marker
1236
+ if (findChildNs(tcPrEl, "hideMark")) {
1237
+ p.hideMark = true;
1238
+ }
1239
+ // Fit text
1240
+ if (findChildNs(tcPrEl, "tcFitText")) {
1241
+ p.fitText = true;
1242
+ }
1243
+ // Cell-level revisions
1244
+ const cellInsEl = findChildNs(tcPrEl, "cellIns");
1245
+ if (cellInsEl) {
1246
+ const rev = parseRevisionInfo(cellInsEl);
1247
+ if (rev) {
1248
+ p.inserted = { revision: rev };
1249
+ }
1250
+ }
1251
+ const cellDelEl = findChildNs(tcPrEl, "cellDel");
1252
+ if (cellDelEl) {
1253
+ const rev = parseRevisionInfo(cellDelEl);
1254
+ if (rev) {
1255
+ p.deleted = { revision: rev };
1256
+ }
1257
+ }
1258
+ const cellMergeEl = findChildNs(tcPrEl, "cellMerge");
1259
+ if (cellMergeEl) {
1260
+ const vMerge = attrVal(cellMergeEl, "vMerge");
1261
+ const rev = parseRevisionInfo(cellMergeEl);
1262
+ if (rev && (vMerge === "cont" || vMerge === "rest")) {
1263
+ p.cellMerge = { vMerge, revision: rev };
1264
+ }
1265
+ }
1266
+ // tcPrChange
1267
+ const tcPrChangeEl = findChildNs(tcPrEl, "tcPrChange");
1268
+ if (tcPrChangeEl) {
1269
+ const rev = parseRevisionInfo(tcPrChangeEl);
1270
+ if (rev) {
1271
+ const prev = findChildNs(tcPrChangeEl, "tcPr");
1272
+ p.propertyChange = { revision: rev };
1273
+ if (prev) {
1274
+ // Minimal: previousProperties won't recurse (avoid infinite recursion).
1275
+ // Just capture the presence of the change marker here.
1276
+ }
1277
+ }
1278
+ }
1279
+ props = p;
1280
+ }
1281
+ return { properties: props, content };
1282
+ }
1283
+ function parseTableRow(el, ctx) {
1284
+ const trPrEl = findChildNs(el, "trPr");
1285
+ const tblPrExEl = findChildNs(el, "tblPrEx");
1286
+ const cells = [];
1287
+ for (const child of el.children) {
1288
+ if (child.type === "element" && child.name.replace(/^w:/, "") === "tc") {
1289
+ cells.push(parseTableCell(child, ctx));
1290
+ }
1291
+ }
1292
+ let props;
1293
+ if (trPrEl || tblPrExEl) {
1294
+ const p = {};
1295
+ if (tblPrExEl) {
1296
+ p.tblPrEx = parseTableProperties(tblPrExEl);
1297
+ }
1298
+ if (trPrEl) {
1299
+ const heightEl = findChildNs(trPrEl, "trHeight");
1300
+ if (heightEl) {
1301
+ p.height = {
1302
+ value: attrInt(heightEl, "val") ?? 0,
1303
+ rule: attrVal(heightEl, "hRule")
1304
+ };
1305
+ }
1306
+ if (findChildNs(trPrEl, "tblHeader")) {
1307
+ p.tableHeader = true;
1308
+ }
1309
+ if (findChildNs(trPrEl, "cantSplit")) {
1310
+ p.cantSplit = true;
1311
+ }
1312
+ if (findChildNs(trPrEl, "hidden")) {
1313
+ p.hidden = true;
1314
+ }
1315
+ const csEl = findChildNs(trPrEl, "tblCellSpacing");
1316
+ if (csEl) {
1317
+ p.cellSpacing = parseTableWidth(csEl);
1318
+ }
1319
+ const insEl = findChildNs(trPrEl, "ins");
1320
+ if (insEl) {
1321
+ const rev = parseRevisionInfo(insEl);
1322
+ if (rev) {
1323
+ p.inserted = { revision: rev };
1324
+ }
1325
+ }
1326
+ const delEl = findChildNs(trPrEl, "del");
1327
+ if (delEl) {
1328
+ const rev = parseRevisionInfo(delEl);
1329
+ if (rev) {
1330
+ p.deleted = { revision: rev };
1331
+ }
1332
+ }
1333
+ const gbEl = findChildNs(trPrEl, "gridBefore");
1334
+ if (gbEl) {
1335
+ p.gridBefore = attrInt(gbEl, "val");
1336
+ }
1337
+ const gaEl = findChildNs(trPrEl, "gridAfter");
1338
+ if (gaEl) {
1339
+ p.gridAfter = attrInt(gaEl, "val");
1340
+ }
1341
+ const wbEl = findChildNs(trPrEl, "wBefore");
1342
+ if (wbEl) {
1343
+ p.widthBefore = parseTableWidth(wbEl);
1344
+ }
1345
+ const waEl = findChildNs(trPrEl, "wAfter");
1346
+ if (waEl) {
1347
+ p.widthAfter = parseTableWidth(waEl);
1348
+ }
1349
+ const cnfEl = findChildNs(trPrEl, "cnfStyle");
1350
+ if (cnfEl) {
1351
+ p.cnfStyle = attrVal(cnfEl, "val");
1352
+ }
1353
+ const trPrChangeEl = findChildNs(trPrEl, "trPrChange");
1354
+ if (trPrChangeEl) {
1355
+ const rev = parseRevisionInfo(trPrChangeEl);
1356
+ if (rev) {
1357
+ const prevTrPr = findChildNs(trPrChangeEl, "trPr");
1358
+ p.propertyChange = {
1359
+ revision: rev,
1360
+ previousProperties: prevTrPr ? parseRowPrInner(prevTrPr) : undefined
1361
+ };
1362
+ }
1363
+ }
1364
+ }
1365
+ props = p;
1366
+ }
1367
+ return { properties: props, cells };
1368
+ }
1369
+ /** Inner parse for row properties content (used by propertyChange recursion). */
1370
+ function parseRowPrInner(trPrEl) {
1371
+ const p = {};
1372
+ const heightEl = findChildNs(trPrEl, "trHeight");
1373
+ if (heightEl) {
1374
+ p.height = {
1375
+ value: attrInt(heightEl, "val") ?? 0,
1376
+ rule: attrVal(heightEl, "hRule")
1377
+ };
1378
+ }
1379
+ if (findChildNs(trPrEl, "tblHeader")) {
1380
+ p.tableHeader = true;
1381
+ }
1382
+ if (findChildNs(trPrEl, "cantSplit")) {
1383
+ p.cantSplit = true;
1384
+ }
1385
+ return p;
1386
+ }
1387
+ function parseTable(tblEl, ctx) {
1388
+ const tblPrEl = findChildNs(tblEl, "tblPr");
1389
+ const gridEl = findChildNs(tblEl, "tblGrid");
1390
+ const rows = [];
1391
+ for (const child of tblEl.children) {
1392
+ if (child.type === "element" && child.name.replace(/^w:/, "") === "tr") {
1393
+ rows.push(parseTableRow(child, ctx));
1394
+ }
1395
+ }
1396
+ let columnWidths;
1397
+ if (gridEl) {
1398
+ columnWidths = [];
1399
+ for (const col of findChildrenNs(gridEl, "gridCol")) {
1400
+ columnWidths.push(parseInt(col.attributes["w:w"] ?? col.attributes["w"] ?? "0", 10));
1401
+ }
1402
+ }
1403
+ return {
1404
+ type: "table",
1405
+ properties: tblPrEl ? parseTableProperties(tblPrEl) : undefined,
1406
+ columnWidths,
1407
+ rows
1408
+ };
1409
+ }
1410
+ // =============================================================================
1411
+ // Footnotes/Endnotes Parser
1412
+ // =============================================================================
1413
+ function parseNotesXml(xmlStr, elementName, ctx) {
1414
+ // Each note part is self-contained. Save and reset the field state so an
1415
+ // unterminated complex field in the document body cannot bleed into a
1416
+ // footnote/endnote and swallow its runs.
1417
+ const savedField = ctx.field;
1418
+ ctx.field = createFieldState();
1419
+ try {
1420
+ const doc = parseXml(xmlStr);
1421
+ const root = doc.root;
1422
+ const notes = [];
1423
+ for (const noteEl of findChildrenNs(root, elementName)) {
1424
+ const id = attrInt(noteEl, "id");
1425
+ const type = attrVal(noteEl, "type");
1426
+ // Skip auto-generated separator entries (default IDs -1 and 0)
1427
+ // Real separators/continuationSeparators are regenerated by the writer.
1428
+ if (type === "separator" || type === "continuationSeparator") {
1429
+ continue;
1430
+ }
1431
+ if (id === undefined) {
1432
+ continue;
1433
+ }
1434
+ const content = [];
1435
+ for (const child of noteEl.children) {
1436
+ if (child.type !== "element") {
1437
+ continue;
1438
+ }
1439
+ const ln = child.name.replace(/^w:/, "");
1440
+ if (ln === "p") {
1441
+ content.push(parseParagraph(child, ctx));
1442
+ }
1443
+ else if (ln === "sdt") {
1444
+ // SDT inside a footnote/endnote: the model's content type is
1445
+ // `Paragraph[]`, so flatten the SDT's inner paragraphs (and their
1446
+ // descendants reachable as paragraphs). SDT-level metadata is
1447
+ // dropped here on round-trip — better than losing the visible
1448
+ // text completely.
1449
+ const sdt = parseSdt(child, ctx);
1450
+ if (sdt && sdt.type === "sdt") {
1451
+ for (const c of sdt.content) {
1452
+ if (c.type === "paragraph") {
1453
+ content.push(c);
1454
+ }
1455
+ }
1456
+ }
1457
+ }
1458
+ }
1459
+ const note = { id, content };
1460
+ if (type === "continuationNotice" || type === "normal") {
1461
+ note.type = type;
1462
+ }
1463
+ notes.push(note);
1464
+ }
1465
+ return notes;
1466
+ }
1467
+ finally {
1468
+ ctx.field = savedField;
1469
+ }
1470
+ }
1471
+ // =============================================================================
1472
+ // Header/Footer Parser
1473
+ // =============================================================================
1474
+ function parseHeaderFooterXml(xmlStr, ctx) {
1475
+ return parseHeaderFooterRoot(parseXml(xmlStr).root, ctx);
1476
+ }
1477
+ function parseHeaderFooterRoot(root, ctx) {
1478
+ // Header/footer parts are self-contained: reset field state on entry so an
1479
+ // unterminated complex field in the body does not consume header/footer runs.
1480
+ const savedField = ctx.field;
1481
+ ctx.field = createFieldState();
1482
+ try {
1483
+ const children = [];
1484
+ for (const child of root.children) {
1485
+ if (child.type !== "element") {
1486
+ continue;
1487
+ }
1488
+ const name = child.name.replace(/^w:/, "");
1489
+ if (name === "p") {
1490
+ children.push(parseParagraph(child, ctx));
1491
+ }
1492
+ else if (name === "tbl") {
1493
+ children.push(parseTable(child, ctx));
1494
+ }
1495
+ else if (name === "sdt") {
1496
+ // Flatten SDT children. HeaderFooterContent.children is
1497
+ // `(Paragraph | Table)[]` so we hoist the inner paragraphs/tables;
1498
+ // SDT-level metadata is dropped on round-trip but visible content
1499
+ // is preserved (better than losing the runs entirely).
1500
+ const sdt = parseSdt(child, ctx);
1501
+ if (sdt && sdt.type === "sdt") {
1502
+ for (const c of sdt.content) {
1503
+ if (c.type === "paragraph") {
1504
+ children.push(c);
1505
+ }
1506
+ else if (c.type === "table") {
1507
+ children.push(c);
1508
+ }
1509
+ }
1510
+ }
1511
+ }
1512
+ }
1513
+ return { children };
1514
+ }
1515
+ finally {
1516
+ ctx.field = savedField;
1517
+ }
1518
+ }
1519
+ /** Detect watermark from a header's parsed XML root element. */
1520
+ // =============================================================================
1521
+ // Comments Parser
1522
+ // =============================================================================
1523
+ function parseCommentsXmlFromCtx(xmlStr, ctx) {
1524
+ return parseCommentsXmlExternal(xmlStr, ctx, parseParagraph);
1525
+ }
1526
+ // =============================================================================
1527
+ // Main Document Parser
1528
+ // =============================================================================
1529
+ /** Recursively extract floating images, drawing shapes, and opaque drawings from an element tree. */
1530
+ function extractFloatingContent(el, images, shapes, opaqueDrawings, ctx) {
1531
+ for (const child of el.children) {
1532
+ if (child.type !== "element") {
1533
+ continue;
1534
+ }
1535
+ if (child.name === "wp:anchor") {
1536
+ // Check if this is a pic (image) or wsp (shape)
1537
+ const graphicEl = findChild(child, "a:graphic");
1538
+ const graphicDataEl = graphicEl ? findChild(graphicEl, "a:graphicData") : undefined;
1539
+ const wspEl = graphicDataEl
1540
+ ? (findChild(graphicDataEl, "wps:wsp") ?? findChildNs(graphicDataEl, "wsp"))
1541
+ : undefined;
1542
+ if (wspEl) {
1543
+ const shape = parseDrawingShape(child, wspEl, ctx);
1544
+ if (shape) {
1545
+ shapes.push(shape);
1546
+ }
1547
+ }
1548
+ else {
1549
+ const fi = parseFloatingImage(child);
1550
+ if (fi) {
1551
+ images.push(fi);
1552
+ }
1553
+ else {
1554
+ // Unknown anchor content (chart, diagram, etc.) — preserve as opaque
1555
+ const drawingEl = findDrawingParent(child);
1556
+ if (drawingEl) {
1557
+ const rids = new Set();
1558
+ collectRIds(drawingEl, rids);
1559
+ opaqueDrawings.push({
1560
+ type: "opaqueDrawing",
1561
+ rawXml: serializeElement(drawingEl),
1562
+ referencedRIds: [...rids]
1563
+ });
1564
+ }
1565
+ }
1566
+ }
1567
+ }
1568
+ else if (child.name === "wp:inline") {
1569
+ // Inline drawings that aren't images — check for chart etc.
1570
+ const graphicEl = findChild(child, "a:graphic");
1571
+ const graphicDataEl = graphicEl ? findChild(graphicEl, "a:graphicData") : undefined;
1572
+ if (graphicDataEl) {
1573
+ const picEl = findChild(graphicDataEl, "pic:pic") ?? findChildNs(graphicDataEl, "pic");
1574
+ if (!picEl) {
1575
+ // Not an image — opaque inline drawing. We deliberately keep this
1576
+ // path even though parseDrawingContent also emits an `opaqueRun`
1577
+ // for the same drawing: the body-level pass below removes the
1578
+ // duplicate opaqueRun once we know this OpaqueDrawing has been
1579
+ // captured. Inside table cells / headers / footers / SDTs (where
1580
+ // this extractor is not invoked) the opaqueRun is the only
1581
+ // representation, so the drawing still survives a round-trip.
1582
+ const rids = new Set();
1583
+ collectRIds(child, rids);
1584
+ // Serialize the wp:inline element wrapped in w:drawing
1585
+ const rawXml = `<w:drawing>${serializeElement(child)}</w:drawing>`;
1586
+ opaqueDrawings.push({
1587
+ type: "opaqueDrawing",
1588
+ rawXml,
1589
+ referencedRIds: [...rids]
1590
+ });
1591
+ }
1592
+ }
1593
+ }
1594
+ else {
1595
+ extractFloatingContent(child, images, shapes, opaqueDrawings, ctx);
1596
+ }
1597
+ }
1598
+ }
1599
+ /** Find the w:drawing ancestor element for serialization. */
1600
+ function findDrawingParent(anchorEl) {
1601
+ // We don't have parent refs, so we construct a synthetic w:drawing wrapper
1602
+ return {
1603
+ type: "element",
1604
+ name: "w:drawing",
1605
+ attributes: {},
1606
+ children: [anchorEl]
1607
+ };
1608
+ }
1609
+ /**
1610
+ * A paragraph is considered "empty" for the purposes of synthetic-anchor
1611
+ * detection if it has no children, or if every child is a run whose content
1612
+ * is either absent or contains only zero-length text segments. Inline images,
1613
+ * fields, hyperlinks etc. all count as non-empty content. Properties (style
1614
+ * id, alignment, etc.) are intentionally ignored — a single floating drawing
1615
+ * that the writer wrapped in its own paragraph would never have meaningful
1616
+ * paragraph properties.
1617
+ */
1618
+ function isEmptyParagraph(para) {
1619
+ if (!para.children || para.children.length === 0) {
1620
+ return true;
1621
+ }
1622
+ for (const child of para.children) {
1623
+ if (!isRun(child)) {
1624
+ // Anything with a `type` (hyperlink, bookmark, insertedRun, etc.) is
1625
+ // considered meaningful content.
1626
+ return false;
1627
+ }
1628
+ const run = child;
1629
+ for (const c of run.content) {
1630
+ if (c.type === "text") {
1631
+ if (c.text.length > 0) {
1632
+ return false;
1633
+ }
1634
+ }
1635
+ else {
1636
+ // Any non-text run content (image, field, break, tab, ruby, etc.)
1637
+ // makes the paragraph non-empty.
1638
+ return false;
1639
+ }
1640
+ }
1641
+ }
1642
+ return true;
1643
+ }
1644
+ /**
1645
+ * Remove `opaqueRun` entries that wrap a non-picture `<wp:inline>` drawing.
1646
+ *
1647
+ * These are emitted by parseDrawingContent so the drawing survives a
1648
+ * round-trip when its containing paragraph lives inside a table cell, header,
1649
+ * footer or SDT (places where the body-level extractor never runs). At the
1650
+ * body level, however, the same drawings are also captured as `OpaqueDrawing`
1651
+ * entries by extractFloatingContent — keeping both would duplicate the
1652
+ * drawing in the produced document. Mutates `para.children`/run content in
1653
+ * place.
1654
+ */
1655
+ function stripInlineDrawingOpaqueRuns(para) {
1656
+ for (const child of para.children) {
1657
+ if (!isRun(child)) {
1658
+ continue;
1659
+ }
1660
+ const run = child;
1661
+ let i = 0;
1662
+ while (i < run.content.length) {
1663
+ const c = run.content[i];
1664
+ if (c.type === "opaqueRun" &&
1665
+ c.rawXml.includes("<wp:inline") &&
1666
+ !c.rawXml.includes("<pic:pic")) {
1667
+ run.content.splice(i, 1);
1668
+ }
1669
+ else {
1670
+ i++;
1671
+ }
1672
+ }
1673
+ }
1674
+ }
1675
+ function parseDocumentXml(xmlStr, ctx) {
1676
+ const doc = parseXml(xmlStr);
1677
+ const root = doc.root;
1678
+ // Parse background
1679
+ let background;
1680
+ const bgEl = findChildNs(root, "background");
1681
+ if (bgEl) {
1682
+ const bg = {};
1683
+ const color = attrVal(bgEl, "color");
1684
+ if (color) {
1685
+ bg.color = color;
1686
+ }
1687
+ const themeColor = attrVal(bgEl, "themeColor");
1688
+ if (themeColor) {
1689
+ bg.themeColor = themeColor;
1690
+ }
1691
+ const themeShade = attrVal(bgEl, "themeShade");
1692
+ if (themeShade) {
1693
+ bg.themeShade = themeShade;
1694
+ }
1695
+ const themeTint = attrVal(bgEl, "themeTint");
1696
+ if (themeTint) {
1697
+ bg.themeTint = themeTint;
1698
+ }
1699
+ background = bg;
1700
+ }
1701
+ const bodyEl = findChildNs(root, "body") ?? findChild(root, "w:body");
1702
+ if (!bodyEl) {
1703
+ throw new DocxParseError("Missing w:body element in document.xml");
1704
+ }
1705
+ const body = [];
1706
+ let sectionProperties;
1707
+ // Instead of extracting floating content from the entire body tree and
1708
+ // appending at the end (which loses positional information), we now extract
1709
+ // floating content per-paragraph and insert it immediately after the
1710
+ // paragraph it belongs to.
1711
+ for (const child of bodyEl.children) {
1712
+ if (child.type !== "element") {
1713
+ continue;
1714
+ }
1715
+ const name = child.name.replace(/^w:/, "");
1716
+ switch (name) {
1717
+ case "p": {
1718
+ // Per OOXML schema (CT_OMathPara is a member of EG_PContent), a
1719
+ // body-level math block is encoded as a paragraph containing a
1720
+ // single m:oMathPara child. Detect that shape and surface it as
1721
+ // a top-level MathBlock so the document model stays flat — the
1722
+ // writer reverses this by re-wrapping math blocks in <w:p>.
1723
+ const mathParaChildren = child.children.filter(c => c.type === "element" && c.name === "m:oMathPara");
1724
+ const otherChildren = child.children.filter(c => {
1725
+ if (c.type !== "element") {
1726
+ return false;
1727
+ }
1728
+ // pPr is allowed; everything else (runs, hyperlinks, etc.) means
1729
+ // we're NOT a synthetic math wrapper and must keep the paragraph.
1730
+ return c.name !== "w:pPr" && c.name !== "m:oMathPara";
1731
+ });
1732
+ if (mathParaChildren.length > 0 && otherChildren.length === 0) {
1733
+ for (const oMathPara of mathParaChildren) {
1734
+ if (oMathPara.type === "element") {
1735
+ body.push(parseMathBlock(oMathPara));
1736
+ }
1737
+ }
1738
+ break;
1739
+ }
1740
+ const para = parseParagraph(child, ctx);
1741
+ // Extract floating content from this paragraph element and insert
1742
+ // immediately after it to preserve document position.
1743
+ const pFloatingImages = [];
1744
+ const pDrawingShapes = [];
1745
+ const pOpaqueDrawings = [];
1746
+ extractFloatingContent(child, pFloatingImages, pDrawingShapes, pOpaqueDrawings, ctx);
1747
+ // parseDrawingContent (called from parseRunContent) already preserved
1748
+ // every non-picture inline drawing as an `opaqueRun` so the drawing
1749
+ // survives a round-trip even inside cells/headers/footers/SDTs where
1750
+ // this body-level extractor is not invoked. At the body level
1751
+ // extractFloatingContent has now also captured those drawings as
1752
+ // `OpaqueDrawing` entries — that is the form chart-parser is wired
1753
+ // to look for when promoting them to `ChartContent`. To avoid
1754
+ // duplicate output we strip any opaqueRun whose XML embeds a
1755
+ // <wp:inline> drawing from the paragraph here.
1756
+ if (pOpaqueDrawings.length > 0) {
1757
+ stripInlineDrawingOpaqueRuns(para);
1758
+ }
1759
+ // If the paragraph is otherwise empty AND we did extract anchored
1760
+ // content out of it, treat the paragraph as a synthetic carrier for
1761
+ // the floating drawing(s) and drop it. Otherwise keeping it would
1762
+ // cause a phantom empty paragraph to accumulate on every round-trip
1763
+ // (writer wraps floating images in their own <w:p>, reader pulls the
1764
+ // anchor out, leaving an empty <w:p> behind).
1765
+ const hasAnchoredContent = pFloatingImages.length > 0 || pDrawingShapes.length > 0 || pOpaqueDrawings.length > 0;
1766
+ const paragraphIsEmpty = isEmptyParagraph(para);
1767
+ if (!(hasAnchoredContent && paragraphIsEmpty)) {
1768
+ body.push(para);
1769
+ }
1770
+ for (const fi of pFloatingImages) {
1771
+ body.push(fi);
1772
+ }
1773
+ for (const ds of pDrawingShapes) {
1774
+ body.push(ds);
1775
+ }
1776
+ for (const od of pOpaqueDrawings) {
1777
+ body.push(od);
1778
+ }
1779
+ break;
1780
+ }
1781
+ case "tbl":
1782
+ body.push(parseTable(child, ctx));
1783
+ break;
1784
+ case "sectPr":
1785
+ // Final section properties at the body level
1786
+ sectionProperties = parseSectionProperties(child);
1787
+ break;
1788
+ case "sdt": {
1789
+ const sdtResult = parseSdt(child, ctx);
1790
+ if (sdtResult) {
1791
+ body.push(sdtResult);
1792
+ }
1793
+ break;
1794
+ }
1795
+ case "altChunk": {
1796
+ const rId = child.attributes["r:id"] ?? child.attributes["id"];
1797
+ if (rId) {
1798
+ body.push({ type: "altChunk", rId });
1799
+ }
1800
+ break;
1801
+ }
1802
+ default: {
1803
+ // Check for math namespace
1804
+ if (child.name === "m:oMathPara") {
1805
+ body.push(parseMathBlock(child));
1806
+ }
1807
+ else if (child.name === "m:oMath") {
1808
+ body.push({ type: "math", content: parseMathContent(child) });
1809
+ }
1810
+ // Check for VML pict (textbox)
1811
+ if (name === "pict" || child.name === "w:pict") {
1812
+ const tb = parseTextBox(child, ctx);
1813
+ if (tb) {
1814
+ body.push(tb);
1815
+ }
1816
+ }
1817
+ break;
1818
+ }
1819
+ }
1820
+ }
1821
+ return { body, sectionProperties, background };
1822
+ }
1823
+ /**
1824
+ * Read a DOCX file from a Uint8Array buffer and parse it into a DocxDocument model.
1825
+ *
1826
+ * If the file is encrypted (CFB format), provide a password via the options parameter
1827
+ * to decrypt it automatically.
1828
+ */
1829
+ export async function readDocx(buffer, options) {
1830
+ const policy = resolveSecurityPolicy(options?.securityPolicy);
1831
+ // Defense-in-depth: reject obviously oversized packages up-front. The same
1832
+ // limit is also enforced incrementally during entry decompression so a
1833
+ // pathological deflate stream can't slip past this check.
1834
+ if (buffer.length > policy.maxPackageSize) {
1835
+ throw new DocxLimitExceededError("packageSize", policy.maxPackageSize, buffer.length, "compressed input larger than maxPackageSize");
1836
+ }
1837
+ // Detect encrypted DOCX (CFB format) before attempting ZIP parse.
1838
+ // CFB signature: D0 CF 11 E0 A1 B1 1A E1
1839
+ if (buffer.length >= 8 &&
1840
+ buffer[0] === 0xd0 &&
1841
+ buffer[1] === 0xcf &&
1842
+ buffer[2] === 0x11 &&
1843
+ buffer[3] === 0xe0 &&
1844
+ buffer[4] === 0xa1 &&
1845
+ buffer[5] === 0xb1 &&
1846
+ buffer[6] === 0x1a &&
1847
+ buffer[7] === 0xe1) {
1848
+ if (options?.password != null) {
1849
+ // Pass the security policy's package-size cap so a hostile CFB cannot
1850
+ // claim a multi-GiB decrypted size and force a huge buffer allocation
1851
+ // before the unzip stage even runs.
1852
+ const decryptedZip = await decryptDocx(buffer, options.password, policy.maxPackageSize);
1853
+ return readDocx(decryptedZip, options);
1854
+ }
1855
+ throw new DocxEncryptedError();
1856
+ }
1857
+ try {
1858
+ return await _readDocxInner(buffer, policy);
1859
+ }
1860
+ catch (e) {
1861
+ if (e instanceof DocxError) {
1862
+ throw e;
1863
+ }
1864
+ const msg = e instanceof Error ? e.message : String(e);
1865
+ throw new DocxParseError(`Failed to read DOCX: ${msg}`, { cause: e });
1866
+ }
1867
+ }
1868
+ async function _readDocxInner(buffer, policy) {
1869
+ const reader = unzip(buffer);
1870
+ const entries = new Map();
1871
+ let totalUncompressed = 0;
1872
+ let entryCount = 0;
1873
+ for await (const entry of reader.entries()) {
1874
+ entryCount++;
1875
+ if (entryCount > policy.maxPartCount) {
1876
+ throw new DocxLimitExceededError("partCount", policy.maxPartCount, entryCount, "ZIP contains more entries than maxPartCount");
1877
+ }
1878
+ const data = await entry.bytes();
1879
+ if (data.length > policy.maxPartSize) {
1880
+ throw new DocxLimitExceededError("partSize", policy.maxPartSize, data.length, `entry "${entry.path}" exceeds maxPartSize`);
1881
+ }
1882
+ totalUncompressed += data.length;
1883
+ if (totalUncompressed > policy.maxPackageSize) {
1884
+ throw new DocxLimitExceededError("packageSize", policy.maxPackageSize, totalUncompressed, "cumulative uncompressed entry size exceeds maxPackageSize");
1885
+ }
1886
+ // Normalize path: remove leading slash, normalize separators
1887
+ const path = entry.path.replace(/^\//, "").replace(/\\/g, "/");
1888
+ entries.set(path, data);
1889
+ }
1890
+ const decoder = utf8Decoder;
1891
+ const consumedPaths = new Set(["[Content_Types].xml"]);
1892
+ // Best-effort parse for non-critical parts (settings, numbering, styles,
1893
+ // theme, fontTable, comments, charts, headers, footers, notes, …). A
1894
+ // malformed auxiliary part should not prevent us from returning the main
1895
+ // document body. Only parse failures on document.xml itself are fatal.
1896
+ const tryParse = (fn) => {
1897
+ try {
1898
+ return fn();
1899
+ }
1900
+ catch {
1901
+ return undefined;
1902
+ }
1903
+ };
1904
+ // Parse [Content_Types].xml for accurate opaque part content types
1905
+ const contentTypesXml = entries.get("[Content_Types].xml");
1906
+ const contentTypeOverrides = new Map();
1907
+ const contentTypeDefaults = new Map();
1908
+ if (contentTypesXml) {
1909
+ const ctDoc = parseXml(decoder.decode(contentTypesXml));
1910
+ for (const child of ctDoc.root.children) {
1911
+ if (child.type !== "element") {
1912
+ continue;
1913
+ }
1914
+ if (child.name === "Override") {
1915
+ const partName = child.attributes["PartName"] ?? "";
1916
+ const ct = child.attributes["ContentType"] ?? "";
1917
+ if (partName && ct) {
1918
+ // Normalize: remove leading slash
1919
+ contentTypeOverrides.set(partName.replace(/^\//, ""), ct);
1920
+ }
1921
+ }
1922
+ else if (child.name === "Default") {
1923
+ const ext = child.attributes["Extension"] ?? "";
1924
+ const ct = child.attributes["ContentType"] ?? "";
1925
+ if (ext && ct) {
1926
+ contentTypeDefaults.set(ext.toLowerCase(), ct);
1927
+ }
1928
+ }
1929
+ }
1930
+ }
1931
+ const getText = (path) => {
1932
+ const data = entries.get(path);
1933
+ if (data) {
1934
+ consumedPaths.add(path);
1935
+ }
1936
+ return data ? decoder.decode(data) : undefined;
1937
+ };
1938
+ // Parse document relationships (must be before parseDocumentXml for hyperlink resolution)
1939
+ // First, try to discover document path via package rels (supports Strict conformance)
1940
+ let documentPartPath = "word/document.xml";
1941
+ const packageRelsXmlEarly = getText("_rels/.rels");
1942
+ if (packageRelsXmlEarly) {
1943
+ const pkgRelsEarly = parseRelationships(packageRelsXmlEarly);
1944
+ for (const rel of pkgRelsEarly) {
1945
+ if (rel.type === RelType.OfficeDocument) {
1946
+ let target = rel.target;
1947
+ if (target.startsWith("/")) {
1948
+ target = target.substring(1);
1949
+ }
1950
+ documentPartPath = target;
1951
+ break;
1952
+ }
1953
+ }
1954
+ }
1955
+ const docRelsPath = getPartRelsPath(documentPartPath);
1956
+ const docRelsXml = getText(docRelsPath);
1957
+ const docRels = docRelsXml ? parseRelationships(docRelsXml) : [];
1958
+ const _relMap = new Map(docRels.map(r => [r.id, r]));
1959
+ // Create reader context for this parse session (replaces module-level _session)
1960
+ const ctx = createReaderContext(policy);
1961
+ ctx.relMap = _relMap;
1962
+ // Parse document.xml (required)
1963
+ const documentXml = getText(documentPartPath);
1964
+ if (!documentXml) {
1965
+ throw new DocxMissingPartError(documentPartPath);
1966
+ }
1967
+ const { body, sectionProperties, background } = parseDocumentXml(documentXml, ctx);
1968
+ // Parse styles (resolve path via relationship, fallback to hardcoded)
1969
+ const stylesPath = resolveRelTarget(docRels, RelType.Styles, documentPartPath) ?? "word/styles.xml";
1970
+ const stylesXml = getText(stylesPath);
1971
+ const stylesResult = stylesXml ? tryParse(() => parseStyles(stylesXml)) : undefined;
1972
+ // Parse numbering
1973
+ const numberingPath = resolveRelTarget(docRels, RelType.Numbering, documentPartPath) ?? "word/numbering.xml";
1974
+ const numberingXml = getText(numberingPath);
1975
+ const numberingResult = numberingXml
1976
+ ? tryParse(() => parseNumberingXml(numberingXml))
1977
+ : undefined;
1978
+ // Parse footnotes/endnotes — swap ctx.relMap to the notes part's own
1979
+ // .rels (footnotes.xml.rels / endnotes.xml.rels) so hyperlinks and images
1980
+ // inside notes resolve against the correct relationship map. Without this,
1981
+ // any rId used in a footnote silently resolves to undefined.
1982
+ const footnotesPath = resolveRelTarget(docRels, RelType.Footnotes, documentPartPath) ?? "word/footnotes.xml";
1983
+ const footnotesXml = getText(footnotesPath);
1984
+ let footnotes;
1985
+ if (footnotesXml) {
1986
+ const footnotesRelsPath = getPartRelsPath(footnotesPath);
1987
+ const footnotesRelsXml = getText(footnotesRelsPath);
1988
+ const savedRelMap = ctx.relMap;
1989
+ if (footnotesRelsXml) {
1990
+ const footnotesRels = parseRelationships(footnotesRelsXml);
1991
+ ctx.relMap = new Map(footnotesRels.map(r => [r.id, r]));
1992
+ consumedPaths.add(footnotesRelsPath);
1993
+ }
1994
+ else {
1995
+ ctx.relMap = new Map();
1996
+ }
1997
+ footnotes = tryParse(() => parseNotesXml(footnotesXml, "footnote", ctx));
1998
+ ctx.relMap = savedRelMap;
1999
+ }
2000
+ const endnotesPath = resolveRelTarget(docRels, RelType.Endnotes, documentPartPath) ?? "word/endnotes.xml";
2001
+ const endnotesXml = getText(endnotesPath);
2002
+ let endnotes;
2003
+ if (endnotesXml) {
2004
+ const endnotesRelsPath = getPartRelsPath(endnotesPath);
2005
+ const endnotesRelsXml = getText(endnotesRelsPath);
2006
+ const savedRelMap = ctx.relMap;
2007
+ if (endnotesRelsXml) {
2008
+ const endnotesRels = parseRelationships(endnotesRelsXml);
2009
+ ctx.relMap = new Map(endnotesRels.map(r => [r.id, r]));
2010
+ consumedPaths.add(endnotesRelsPath);
2011
+ }
2012
+ else {
2013
+ ctx.relMap = new Map();
2014
+ }
2015
+ endnotes = tryParse(() => parseNotesXml(endnotesXml, "endnote", ctx));
2016
+ ctx.relMap = savedRelMap;
2017
+ }
2018
+ // Parse headers/footers + detect watermarks
2019
+ const headers = new Map();
2020
+ const footers = new Map();
2021
+ let watermark;
2022
+ for (const rel of docRels) {
2023
+ if (rel.type === RelType.Header) {
2024
+ const headerPartPath = resolvePartPath(documentPartPath, rel.target);
2025
+ const xml = getText(headerPartPath);
2026
+ if (xml) {
2027
+ // Parse header's own rels and switch ctx.relMap so hyperlinks/images
2028
+ // referenced inside the header resolve against its own relationship map.
2029
+ const headerRelsPath = getPartRelsPath(headerPartPath);
2030
+ const headerRelsXml = getText(headerRelsPath);
2031
+ const savedRelMap = ctx.relMap;
2032
+ if (headerRelsXml) {
2033
+ const headerRels = parseRelationships(headerRelsXml);
2034
+ const headerRelMap = new Map(headerRels.map(r => [r.id, r]));
2035
+ ctx.relMap = headerRelMap;
2036
+ consumedPaths.add(headerRelsPath);
2037
+ }
2038
+ else {
2039
+ ctx.relMap = new Map();
2040
+ }
2041
+ try {
2042
+ // Parse XML once, re-use for both header content and watermark detection
2043
+ const headerRoot = parseXml(xml).root;
2044
+ headers.set(rel.id, { content: parseHeaderFooterRoot(headerRoot, ctx), rId: rel.id });
2045
+ if (!watermark) {
2046
+ watermark = detectWatermarkFromRoot(headerRoot);
2047
+ }
2048
+ }
2049
+ catch {
2050
+ // Skip a malformed header; preserve other headers and the document.
2051
+ }
2052
+ ctx.relMap = savedRelMap;
2053
+ }
2054
+ }
2055
+ else if (rel.type === RelType.Footer) {
2056
+ const footerPartPath = resolvePartPath(documentPartPath, rel.target);
2057
+ const xml = getText(footerPartPath);
2058
+ if (xml) {
2059
+ // Parse footer's own rels and switch ctx.relMap so hyperlinks/images
2060
+ // referenced inside the footer resolve against its own relationship map.
2061
+ const footerRelsPath = getPartRelsPath(footerPartPath);
2062
+ const footerRelsXml = getText(footerRelsPath);
2063
+ const savedRelMap = ctx.relMap;
2064
+ if (footerRelsXml) {
2065
+ const footerRels = parseRelationships(footerRelsXml);
2066
+ const footerRelMap = new Map(footerRels.map(r => [r.id, r]));
2067
+ ctx.relMap = footerRelMap;
2068
+ consumedPaths.add(footerRelsPath);
2069
+ }
2070
+ else {
2071
+ ctx.relMap = new Map();
2072
+ }
2073
+ try {
2074
+ footers.set(rel.id, { content: parseHeaderFooterXml(xml, ctx), rId: rel.id });
2075
+ }
2076
+ catch {
2077
+ // Skip a malformed footer; preserve other footers and the document.
2078
+ }
2079
+ ctx.relMap = savedRelMap;
2080
+ }
2081
+ }
2082
+ }
2083
+ // Parse settings
2084
+ const settingsPath = resolveRelTarget(docRels, RelType.Settings, documentPartPath) ?? "word/settings.xml";
2085
+ const settingsXml = getText(settingsPath);
2086
+ const settings = settingsXml ? tryParse(() => parseSettingsXml(settingsXml)) : undefined;
2087
+ // Parse web settings
2088
+ const webSettingsPath = resolveRelTarget(docRels, RelType.WebSettings, documentPartPath) ?? "word/webSettings.xml";
2089
+ const webSettingsXml = getText(webSettingsPath);
2090
+ const webSettings = webSettingsXml ? tryParse(() => parseWebSettings(webSettingsXml)) : undefined;
2091
+ // Parse people
2092
+ const peoplePath = resolveRelTarget(docRels, RelType.People, documentPartPath) ?? "word/people.xml";
2093
+ const peopleXml = getText(peoplePath);
2094
+ const people = peopleXml ? tryParse(() => parsePeople(peopleXml)) : undefined;
2095
+ // Parse thumbnail (from package rels — reuse already-parsed rels)
2096
+ let thumbnail;
2097
+ if (packageRelsXmlEarly) {
2098
+ const pkgRels = parseRelationships(packageRelsXmlEarly);
2099
+ for (const rel of pkgRels) {
2100
+ if (rel.type.endsWith("/thumbnail")) {
2101
+ // Target in package rels is relative to package root; may include or exclude leading slash
2102
+ let target = rel.target;
2103
+ if (target.startsWith("/")) {
2104
+ target = target.substring(1);
2105
+ }
2106
+ // If the target doesn't include docProps/ prefix, add it (some writers emit bare filenames)
2107
+ const normalized = target.includes("/") ? target : `docProps/${target}`;
2108
+ consumedPaths.add(normalized);
2109
+ const thumbData = entries.get(normalized);
2110
+ if (thumbData) {
2111
+ const ext = getFileExt(normalized);
2112
+ const ct = ext === "jpeg" || ext === "jpg"
2113
+ ? "image/jpeg"
2114
+ : ext === "png"
2115
+ ? "image/png"
2116
+ : "image/x-wmf";
2117
+ thumbnail = {
2118
+ contentType: ct,
2119
+ data: thumbData
2120
+ };
2121
+ }
2122
+ break;
2123
+ }
2124
+ }
2125
+ }
2126
+ // Parse font table
2127
+ const fontTablePath = resolveRelTarget(docRels, RelType.FontTable, documentPartPath) ?? "word/fontTable.xml";
2128
+ const fontTableXml = getText(fontTablePath);
2129
+ const fonts = fontTableXml ? tryParse(() => parseFontTableXml(fontTableXml)) : undefined;
2130
+ // Parse embedded fonts
2131
+ let embeddedFonts;
2132
+ const fontTableRelsXml = getText("word/_rels/fontTable.xml.rels");
2133
+ if (fontTableRelsXml && fonts) {
2134
+ const fontRels = parseRelationships(fontTableRelsXml);
2135
+ const efs = [];
2136
+ // Build rId → { key } map from font table
2137
+ const rIdToKey = new Map();
2138
+ for (const f of fonts) {
2139
+ if (f.embedRegular && f.embedRegularKey) {
2140
+ rIdToKey.set(f.embedRegular, f.embedRegularKey);
2141
+ }
2142
+ if (f.embedBold && f.embedBoldKey) {
2143
+ rIdToKey.set(f.embedBold, f.embedBoldKey);
2144
+ }
2145
+ if (f.embedItalic && f.embedItalicKey) {
2146
+ rIdToKey.set(f.embedItalic, f.embedItalicKey);
2147
+ }
2148
+ if (f.embedBoldItalic && f.embedBoldItalicKey) {
2149
+ rIdToKey.set(f.embedBoldItalic, f.embedBoldItalicKey);
2150
+ }
2151
+ }
2152
+ for (const rel of fontRels) {
2153
+ if (rel.type === RelType.Font) {
2154
+ const fontPath = resolvePartPath("word/fontTable.xml", rel.target);
2155
+ consumedPaths.add(fontPath);
2156
+ const data = entries.get(fontPath);
2157
+ if (data) {
2158
+ const fileName = getFileName(rel.target);
2159
+ const fontKey = rIdToKey.get(rel.id);
2160
+ const ef = {
2161
+ rId: rel.id,
2162
+ data,
2163
+ fileName
2164
+ };
2165
+ if (fontKey) {
2166
+ ef.fontKey = fontKey;
2167
+ }
2168
+ efs.push(ef);
2169
+ }
2170
+ }
2171
+ }
2172
+ if (efs.length > 0) {
2173
+ embeddedFonts = efs;
2174
+ }
2175
+ }
2176
+ // Parse Custom XML parts (for SDT data binding)
2177
+ const customXmlParts = [];
2178
+ for (const rel of docRels) {
2179
+ if (rel.type === RelType.CustomXml) {
2180
+ const targetPath = resolvePartPath(documentPartPath, rel.target);
2181
+ consumedPaths.add(targetPath);
2182
+ const xmlContent = getText(targetPath);
2183
+ if (!xmlContent) {
2184
+ continue;
2185
+ }
2186
+ // Parse itemProps*.xml to get storeItemID
2187
+ const fileName = getFileName(targetPath);
2188
+ // itemProps file is typically at the same directory
2189
+ const dir = targetPath.substring(0, targetPath.lastIndexOf("/"));
2190
+ // Extract item number from fileName (e.g. "item1.xml" → "1")
2191
+ const match = fileName.match(/item(\d+)\.xml$/);
2192
+ let itemId = "";
2193
+ let schemaReferences;
2194
+ if (match) {
2195
+ const num = match[1];
2196
+ const propsPath = `${dir}/itemProps${num}.xml`;
2197
+ consumedPaths.add(propsPath);
2198
+ const propsXml = getText(propsPath);
2199
+ if (propsXml) {
2200
+ const propsDoc = parseXml(propsXml);
2201
+ const dsItemEl = propsDoc.root;
2202
+ const id = dsItemEl.attributes["ds:itemID"];
2203
+ if (id) {
2204
+ itemId = id.replace(/[{}]/g, "");
2205
+ }
2206
+ // Schema references
2207
+ const refs = [];
2208
+ const schemaRefsEl = findChild(dsItemEl, "ds:schemaRefs") ?? findChild(dsItemEl, "schemaRefs");
2209
+ if (schemaRefsEl) {
2210
+ for (const srChild of schemaRefsEl.children) {
2211
+ if (srChild.type === "element") {
2212
+ const uri = srChild.attributes["ds:uri"] ?? srChild.attributes["uri"];
2213
+ if (uri) {
2214
+ refs.push(uri);
2215
+ }
2216
+ }
2217
+ }
2218
+ }
2219
+ if (refs.length > 0) {
2220
+ schemaReferences = refs;
2221
+ }
2222
+ }
2223
+ }
2224
+ customXmlParts.push({
2225
+ itemId,
2226
+ xmlContent,
2227
+ fileName,
2228
+ schemaReferences
2229
+ });
2230
+ }
2231
+ }
2232
+ // Parse core properties
2233
+ const corePropsXml = getText("docProps/core.xml");
2234
+ const coreProperties = corePropsXml ? tryParse(() => parseCoreProps(corePropsXml)) : undefined;
2235
+ // Parse app properties
2236
+ const appPropsXml = getText("docProps/app.xml");
2237
+ const appProperties = appPropsXml ? tryParse(() => parseAppProps(appPropsXml)) : undefined;
2238
+ // Parse comments — switch ctx.relMap to comments.xml.rels so any
2239
+ // hyperlinks/images referenced from inside comment paragraphs resolve
2240
+ // against the comment part's own relationships rather than document.xml.rels.
2241
+ const commentsXml = getText("word/comments.xml");
2242
+ let comments;
2243
+ if (commentsXml) {
2244
+ const commentsRelsPath = "word/_rels/comments.xml.rels";
2245
+ const commentsRelsXml = getText(commentsRelsPath);
2246
+ const savedRelMap = ctx.relMap;
2247
+ if (commentsRelsXml) {
2248
+ const commentsRels = parseRelationships(commentsRelsXml);
2249
+ ctx.relMap = new Map(commentsRels.map(r => [r.id, r]));
2250
+ consumedPaths.add(commentsRelsPath);
2251
+ }
2252
+ else {
2253
+ ctx.relMap = new Map();
2254
+ }
2255
+ comments = tryParse(() => parseCommentsXmlFromCtx(commentsXml, ctx));
2256
+ ctx.relMap = savedRelMap;
2257
+ }
2258
+ // Merge in commentsExtended.xml data if present
2259
+ const commentsExtXml = getText("word/commentsExtended.xml");
2260
+ if (commentsExtXml && comments) {
2261
+ const extMap = tryParse(() => parseCommentsExtendedXml(commentsExtXml));
2262
+ if (extMap) {
2263
+ comments = comments.map(c => {
2264
+ const firstPara = c.content[0];
2265
+ if (!firstPara?.paraId) {
2266
+ return c;
2267
+ }
2268
+ const ext = extMap.get(firstPara.paraId);
2269
+ if (!ext) {
2270
+ return c;
2271
+ }
2272
+ return {
2273
+ ...c,
2274
+ ...(ext.done !== undefined ? { done: ext.done } : {}),
2275
+ ...(ext.parentId !== undefined ? { parentId: ext.parentId } : {})
2276
+ };
2277
+ });
2278
+ }
2279
+ }
2280
+ // Parse custom properties
2281
+ const customPropsXml = getText("docProps/custom.xml");
2282
+ const customProperties = customPropsXml
2283
+ ? tryParse(() => parseCustomPropsXml(customPropsXml))
2284
+ : undefined;
2285
+ // Parse theme
2286
+ const themePath = resolveRelTarget(docRels, RelType.Theme, documentPartPath) ?? "word/theme/theme1.xml";
2287
+ const themeXml = getText(themePath);
2288
+ const theme = themeXml ? tryParse(() => parseThemeXml(themeXml)) : undefined;
2289
+ // Collect images from main document relationships
2290
+ const images = [];
2291
+ for (const rel of docRels) {
2292
+ if (rel.type === RelType.Image) {
2293
+ const imgPath = resolvePartPath(documentPartPath, rel.target);
2294
+ consumedPaths.add(imgPath);
2295
+ const data = entries.get(imgPath);
2296
+ if (data) {
2297
+ const fileName = getFileName(rel.target);
2298
+ const ext = getFileExt(fileName) || "png";
2299
+ images.push({
2300
+ data,
2301
+ mediaType: ext,
2302
+ fileName,
2303
+ rId: rel.id
2304
+ });
2305
+ }
2306
+ }
2307
+ }
2308
+ // Also collect images from header/footer relationships to ensure full round-trip.
2309
+ // Headers and footers have their own .rels files which may reference unique
2310
+ // images, OR they may share a media file with the main document. In the
2311
+ // latter case we keep the existing ImageDef but record the local rId as an
2312
+ // alias so the packager can rebuild header1.xml.rels with the original
2313
+ // (header-local) id intact.
2314
+ const collectedImagePaths = new Map();
2315
+ for (const img of images) {
2316
+ collectedImagePaths.set(img.fileName, img);
2317
+ }
2318
+ for (const rel of docRels) {
2319
+ if (rel.type !== RelType.Header && rel.type !== RelType.Footer) {
2320
+ continue;
2321
+ }
2322
+ const partPath = resolvePartPath(documentPartPath, rel.target);
2323
+ const partRelsPath = getPartRelsPath(partPath);
2324
+ const partRelsXml = entries.get(partRelsPath);
2325
+ if (!partRelsXml) {
2326
+ continue;
2327
+ }
2328
+ const partRels = parseRelationships(decoder.decode(partRelsXml));
2329
+ for (const pRel of partRels) {
2330
+ if (pRel.type !== RelType.Image) {
2331
+ continue;
2332
+ }
2333
+ const imgPath = resolvePartPath(partPath, pRel.target);
2334
+ consumedPaths.add(imgPath);
2335
+ const data = entries.get(imgPath);
2336
+ if (!data) {
2337
+ continue;
2338
+ }
2339
+ const fileName = getFileName(pRel.target);
2340
+ const existing = collectedImagePaths.get(fileName);
2341
+ if (existing) {
2342
+ // Same physical file as one we already know — keep one ImageDef and
2343
+ // append this part-local rId to its aliases (if it differs from the
2344
+ // primary rId and isn't already recorded).
2345
+ if (pRel.id && pRel.id !== existing.rId) {
2346
+ const aliases = existing.aliasRIds ? [...existing.aliasRIds] : [];
2347
+ if (!aliases.includes(pRel.id)) {
2348
+ aliases.push(pRel.id);
2349
+ existing.aliasRIds = aliases;
2350
+ }
2351
+ }
2352
+ continue;
2353
+ }
2354
+ const ext = getFileExt(fileName) || "png";
2355
+ const newImg = {
2356
+ data,
2357
+ mediaType: ext,
2358
+ fileName,
2359
+ rId: pRel.id
2360
+ };
2361
+ images.push(newImg);
2362
+ collectedImagePaths.set(fileName, newImg);
2363
+ }
2364
+ }
2365
+ // Parse chart parts and replace opaque drawings with typed ChartContent
2366
+ const chartRIdToChart = new Map();
2367
+ for (const rel of docRels) {
2368
+ if (rel.type === RelType.Chart) {
2369
+ const chartPath = resolvePartPath(documentPartPath, rel.target);
2370
+ consumedPaths.add(chartPath);
2371
+ const chartXml = getText(chartPath);
2372
+ if (chartXml) {
2373
+ const chart = tryParse(() => parseChartXml(chartXml));
2374
+ if (chart) {
2375
+ chartRIdToChart.set(rel.id, chart);
2376
+ }
2377
+ }
2378
+ }
2379
+ }
2380
+ // Replace OpaqueDrawing items that reference chart rIds with proper ChartContent
2381
+ if (chartRIdToChart.size > 0) {
2382
+ replaceOpaqueCharts(body, chartRIdToChart);
2383
+ }
2384
+ // Parse ChartEx parts and replace opaque drawings with typed ChartExContent
2385
+ const chartExRIdToContent = new Map();
2386
+ for (const rel of docRels) {
2387
+ if (rel.type === RelType.ChartEx) {
2388
+ const chartExPath = resolvePartPath(documentPartPath, rel.target);
2389
+ consumedPaths.add(chartExPath);
2390
+ const chartExXml = getText(chartExPath);
2391
+ if (chartExXml) {
2392
+ const data = tryParse(() => parseChartExXml(chartExXml));
2393
+ const content = {
2394
+ type: "chartEx",
2395
+ chartExXml,
2396
+ ...(data !== undefined && { data })
2397
+ };
2398
+ chartExRIdToContent.set(rel.id, content);
2399
+ }
2400
+ }
2401
+ }
2402
+ // Replace OpaqueDrawing items that reference ChartEx rIds with proper ChartExContent
2403
+ if (chartExRIdToContent.size > 0) {
2404
+ replaceOpaqueChartExDrawings(body, chartExRIdToContent);
2405
+ }
2406
+ // Detect document type from main document part content type
2407
+ let docType;
2408
+ const mainDocCT = contentTypeOverrides.get(documentPartPath) ?? contentTypeOverrides.get(`/${documentPartPath}`);
2409
+ if (mainDocCT) {
2410
+ if (mainDocCT.includes("template.main") && mainDocCT.includes("macroEnabled")) {
2411
+ docType = "macroEnabledTemplate";
2412
+ }
2413
+ else if (mainDocCT.includes("template.main")) {
2414
+ docType = "template";
2415
+ }
2416
+ else if (mainDocCT.includes("macroEnabled")) {
2417
+ docType = "macroEnabledDocument";
2418
+ }
2419
+ // "document" is the default — only set if non-standard
2420
+ }
2421
+ // Extract VBA project binary for .docm/.dotm round-trip.
2422
+ // Honour `preserveVbaProject`: if disabled, mark the relationship's
2423
+ // target consumed (so opaqueParts won't retain it either) but leave
2424
+ // `vbaProject` undefined so the produced model does not surface macro
2425
+ // payloads to downstream consumers.
2426
+ let vbaProject;
2427
+ for (const rel of docRels) {
2428
+ if (rel.type === RelType.VbaProject) {
2429
+ const vbaPath = resolvePartPath(documentPartPath, rel.target);
2430
+ consumedPaths.add(vbaPath);
2431
+ if (policy.preserveVbaProject) {
2432
+ vbaProject = entries.get(vbaPath);
2433
+ }
2434
+ break;
2435
+ }
2436
+ }
2437
+ // Resolve altChunk data: body elements of type "altChunk" reference a rId.
2438
+ // The target file is stored in docRels + entries. We populate the altChunk
2439
+ // body item with its data here AND mark the target path as consumed so the
2440
+ // opaqueParts collector below does not retain a duplicate copy that would
2441
+ // later be written back to the ZIP twice.
2442
+ //
2443
+ // Honour `preserveAltChunks`: when disabled, we still consume the target
2444
+ // path (so it doesn't leak into opaqueParts) but skip data attachment
2445
+ // and remove altChunk entries from the body before the document is
2446
+ // returned. Embedded HTML/RTF in altChunks is a common attack vector
2447
+ // for downstream renderers, so strict mode strips them entirely.
2448
+ for (const item of body) {
2449
+ if (item.type === "altChunk" && item.rId) {
2450
+ const rel = _relMap.get(item.rId);
2451
+ if (rel) {
2452
+ const target = resolvePartPath(documentPartPath, rel.target);
2453
+ const targetData = entries.get(target);
2454
+ if (targetData) {
2455
+ consumedPaths.add(target);
2456
+ if (policy.preserveAltChunks) {
2457
+ const fileName = getFileName(target);
2458
+ const mItem = item;
2459
+ mItem.data = targetData;
2460
+ mItem.fileName = fileName;
2461
+ // Infer content type from extension
2462
+ const ext = fileName ? getFileExt(fileName) : "";
2463
+ if (ext === "html" || ext === "htm") {
2464
+ mItem.contentType = "text/html";
2465
+ }
2466
+ else if (ext === "rtf") {
2467
+ mItem.contentType = "text/rtf";
2468
+ }
2469
+ else if (ext === "txt") {
2470
+ mItem.contentType = "text/plain";
2471
+ }
2472
+ }
2473
+ }
2474
+ }
2475
+ }
2476
+ }
2477
+ // Remove altChunk body entries entirely when not preserving them.
2478
+ if (!policy.preserveAltChunks) {
2479
+ for (let i = body.length - 1; i >= 0; i--) {
2480
+ if (body[i].type === "altChunk") {
2481
+ body.splice(i, 1);
2482
+ }
2483
+ }
2484
+ }
2485
+ // Collect opaque (unrecognized) parts for round-trip preservation
2486
+ const opaqueParts = [];
2487
+ for (const [path, data] of entries) {
2488
+ // Skip consumed paths and all .rels files (structural)
2489
+ if (consumedPaths.has(path) || path.includes("_rels/")) {
2490
+ continue;
2491
+ }
2492
+ // Honour `preserveOleObjects`: when disabled, drop OLE embedding
2493
+ // binaries (word/embeddings/*.bin and similar) before they reach the
2494
+ // returned model. The relationship targets remain in their parent
2495
+ // part's .rels, so the caller is responsible for stripping or
2496
+ // ignoring those if they need a fully-clean document.
2497
+ if (!policy.preserveOleObjects &&
2498
+ (path.startsWith("word/embeddings/") || (path.endsWith(".bin") && path.includes("embed")))) {
2499
+ continue;
2500
+ }
2501
+ // Parse rels for this part if they exist
2502
+ const partRelsPath = getPartRelsPath(path);
2503
+ const partRelsData = entries.get(partRelsPath);
2504
+ let relationships;
2505
+ if (partRelsData) {
2506
+ const rels = parseRelationships(decoder.decode(partRelsData));
2507
+ relationships = rels.map(r => ({
2508
+ id: r.id,
2509
+ type: r.type,
2510
+ target: r.target,
2511
+ // Preserve the source string verbatim ("External", "Internal", or
2512
+ // any non-standard value) so opaque round-trip is byte-faithful.
2513
+ targetMode: r.targetMode
2514
+ }));
2515
+ }
2516
+ // Resolve content type from [Content_Types].xml (override > default by extension)
2517
+ let contentType = contentTypeOverrides.get(path);
2518
+ if (!contentType) {
2519
+ const ext = getFileExt(path);
2520
+ contentType = contentTypeDefaults.get(ext);
2521
+ }
2522
+ opaqueParts.push({ path, data, contentType, relationships });
2523
+ }
2524
+ return {
2525
+ ...(docType ? { docType } : {}),
2526
+ body,
2527
+ sectionProperties,
2528
+ styles: stylesResult?.styles,
2529
+ docDefaults: stylesResult?.docDefaults,
2530
+ abstractNumberings: numberingResult?.abstractNums,
2531
+ numberingInstances: numberingResult?.instances,
2532
+ numPicBullets: numberingResult?.numPicBullets && numberingResult.numPicBullets.length > 0
2533
+ ? numberingResult.numPicBullets
2534
+ : undefined,
2535
+ headers: headers.size > 0 ? headers : undefined,
2536
+ footers: footers.size > 0 ? footers : undefined,
2537
+ footnotes: footnotes && footnotes.length > 0 ? footnotes : undefined,
2538
+ endnotes: endnotes && endnotes.length > 0 ? endnotes : undefined,
2539
+ images: images.length > 0 ? images : undefined,
2540
+ fonts: fonts && fonts.length > 0 ? fonts : undefined,
2541
+ embeddedFonts: embeddedFonts && embeddedFonts.length > 0 ? embeddedFonts : undefined,
2542
+ customXmlParts: customXmlParts.length > 0 ? customXmlParts : undefined,
2543
+ webSettings,
2544
+ thumbnail,
2545
+ people: people && people.length > 0 ? people : undefined,
2546
+ settings,
2547
+ coreProperties,
2548
+ appProperties,
2549
+ comments: comments && comments.length > 0 ? comments : undefined,
2550
+ background,
2551
+ customProperties: customProperties && customProperties.length > 0 ? customProperties : undefined,
2552
+ theme,
2553
+ watermark,
2554
+ opaqueParts: opaqueParts.length > 0 ? opaqueParts : undefined,
2555
+ vbaProject
2556
+ };
2557
+ }