@cj-tech-master/excelts 9.5.4 → 9.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/modules/archive/compression/streaming-compress.browser.js +29 -0
- package/dist/browser/modules/archive/compression/streaming-compress.js +9 -0
- package/dist/browser/modules/archive/compression/worker-pool/pool.browser.js +26 -1
- package/dist/browser/modules/archive/fs/archive-file.d.ts +8 -5
- package/dist/browser/modules/archive/fs/archive-file.js +78 -16
- package/dist/browser/modules/archive/unzip/stream.browser.js +43 -2
- package/dist/browser/modules/excel/chart/chart-ex-builder.js +7 -2
- package/dist/browser/modules/excel/chart/chart-ex-renderer.js +4 -9
- package/dist/browser/modules/excel/chart/chart-ex-types.d.ts +0 -12
- package/dist/browser/modules/excel/chart/chart.d.ts +1 -5
- package/dist/browser/modules/excel/chart/chart.js +1 -7
- package/dist/browser/modules/excel/chart/types.d.ts +0 -6
- package/dist/browser/modules/excel/stream/workbook-reader.browser.js +25 -1
- package/dist/browser/modules/excel/stream/workbook-reader.js +9 -0
- package/dist/browser/modules/excel/stream/workbook-writer.browser.d.ts +40 -0
- package/dist/browser/modules/excel/stream/workbook-writer.browser.js +228 -13
- package/dist/browser/modules/excel/utils/string-buf.d.ts +5 -26
- package/dist/browser/modules/excel/utils/string-buf.js +4 -81
- package/dist/browser/modules/excel/workbook.browser.js +135 -25
- package/dist/browser/modules/excel/xlsx/xform/chart/chart-space-xform.js +6 -20
- package/dist/browser/modules/excel/xlsx/xlsx.browser.d.ts +19 -9
- package/dist/browser/modules/excel/xlsx/xlsx.browser.js +32 -8
- package/dist/browser/modules/excel/xlsx/xlsx.d.ts +10 -2
- package/dist/browser/modules/excel/xlsx/xlsx.js +9 -1
- package/dist/browser/modules/pdf/excel-bridge.d.ts +30 -1
- package/dist/browser/modules/pdf/excel-bridge.js +32 -0
- package/dist/browser/modules/pdf/font/metrics.d.ts +3 -52
- package/dist/browser/modules/pdf/font/metrics.js +3 -237
- package/dist/browser/modules/pdf/index.d.ts +1 -1
- package/dist/browser/modules/pdf/index.js +1 -1
- package/dist/browser/modules/pdf/render-layout-to-pdf.d.ts +66 -0
- package/dist/browser/modules/pdf/render-layout-to-pdf.js +647 -0
- package/dist/browser/modules/pdf/word-bridge.d.ts +80 -12
- package/dist/browser/modules/pdf/word-bridge.js +122 -274
- package/dist/browser/modules/stream/index.base.d.ts +2 -0
- package/dist/browser/modules/stream/index.base.js +2 -1
- package/dist/browser/modules/stream/internal/sink-adapter.d.ts +65 -0
- package/dist/browser/modules/stream/internal/sink-adapter.js +198 -0
- package/dist/browser/modules/stream/pull-stream.d.ts +19 -2
- package/dist/browser/modules/stream/pull-stream.js +51 -5
- package/dist/browser/modules/stream/types.d.ts +13 -1
- package/dist/browser/modules/word/advanced/diff.d.ts +61 -0
- package/dist/browser/modules/word/advanced/diff.js +167 -0
- package/dist/browser/modules/word/advanced/drawing-shapes.d.ts +269 -0
- package/dist/browser/modules/word/advanced/drawing-shapes.js +268 -0
- package/dist/browser/modules/word/advanced/field-engine.d.ts +43 -0
- package/dist/browser/modules/word/advanced/field-engine.js +1225 -0
- package/dist/browser/modules/word/advanced/glossary.d.ts +86 -0
- package/dist/browser/modules/word/advanced/glossary.js +79 -0
- package/dist/browser/modules/word/advanced/math-convert.d.ts +30 -0
- package/dist/browser/modules/word/advanced/math-convert.js +595 -0
- package/dist/browser/modules/word/advanced/ole-objects.d.ts +115 -0
- package/dist/browser/modules/word/advanced/ole-objects.js +271 -0
- package/dist/browser/modules/word/advanced/style-map.d.ts +105 -0
- package/dist/browser/modules/word/advanced/style-map.js +322 -0
- package/dist/browser/modules/word/advanced/validation.d.ts +56 -0
- package/dist/browser/modules/word/advanced/validation.js +1065 -0
- package/dist/browser/modules/word/advanced/vba-project.d.ts +91 -0
- package/dist/browser/modules/word/advanced/vba-project.js +265 -0
- package/dist/browser/modules/word/bridge/excel-bridge.d.ts +127 -0
- package/dist/browser/modules/word/bridge/excel-bridge.js +980 -0
- package/dist/browser/modules/word/builder/document-handle.d.ts +151 -0
- package/dist/browser/modules/word/builder/document-handle.js +664 -0
- package/dist/browser/modules/word/builder/paragraph-builders.d.ts +61 -0
- package/dist/browser/modules/word/builder/paragraph-builders.js +90 -0
- package/dist/browser/modules/word/builder/run-builders.d.ts +374 -0
- package/dist/browser/modules/word/builder/run-builders.js +600 -0
- package/dist/browser/modules/word/builder/table-builders.d.ts +23 -0
- package/dist/browser/modules/word/builder/table-builders.js +45 -0
- package/dist/browser/modules/word/constants.d.ts +39 -1
- package/dist/browser/modules/word/constants.js +109 -1
- package/dist/browser/modules/word/convert/conversion-ir.d.ts +210 -0
- package/dist/browser/modules/word/convert/conversion-ir.js +31 -0
- package/dist/browser/modules/word/convert/docx-to-semantic.d.ts +39 -0
- package/dist/browser/modules/word/convert/docx-to-semantic.js +499 -0
- package/dist/browser/modules/word/convert/flat-opc.d.ts +44 -0
- package/dist/browser/modules/word/convert/flat-opc.js +385 -0
- package/dist/browser/modules/word/convert/html/html-import.d.ts +50 -0
- package/dist/browser/modules/word/convert/html/html-import.js +1907 -0
- package/dist/{types/modules/word → browser/modules/word/convert/html}/html-renderer.d.ts +14 -1
- package/dist/{esm/modules/word → browser/modules/word/convert/html}/html-renderer.js +420 -69
- package/dist/browser/modules/word/convert/html/html.d.ts +15 -0
- package/dist/browser/modules/word/convert/html/html.js +15 -0
- package/dist/browser/modules/word/convert/markdown/markdown-import.d.ts +68 -0
- package/dist/browser/modules/word/convert/markdown/markdown-import.js +1325 -0
- package/dist/browser/modules/word/convert/markdown/markdown-renderer.d.ts +25 -0
- package/dist/browser/modules/word/convert/markdown/markdown-renderer.js +634 -0
- package/dist/browser/modules/word/convert/markdown/markdown.d.ts +15 -0
- package/dist/browser/modules/word/convert/markdown/markdown.js +15 -0
- package/dist/browser/modules/word/convert/odt/odt.d.ts +41 -0
- package/dist/browser/modules/word/convert/odt/odt.js +1932 -0
- package/dist/browser/modules/word/{color-utils.d.ts → core/color-utils.d.ts} +8 -1
- package/dist/browser/modules/word/core/color-utils.js +43 -0
- package/dist/browser/modules/word/core/internal-utils.d.ts +90 -0
- package/dist/browser/modules/word/core/internal-utils.js +209 -0
- package/dist/browser/modules/word/core/mapper.d.ts +44 -0
- package/dist/browser/modules/word/core/mapper.js +427 -0
- package/dist/browser/modules/word/core/opc-paths.d.ts +33 -0
- package/dist/browser/modules/word/core/opc-paths.js +48 -0
- package/dist/browser/modules/word/core/text-utils.d.ts +38 -0
- package/dist/browser/modules/word/core/text-utils.js +202 -0
- package/dist/browser/modules/word/core/walker.d.ts +119 -0
- package/dist/browser/modules/word/core/walker.js +570 -0
- package/dist/browser/modules/word/crypto.d.ts +14 -9
- package/dist/browser/modules/word/crypto.js +13 -7
- package/dist/browser/modules/word/document-io.d.ts +59 -27
- package/dist/browser/modules/word/document-io.js +80 -197
- package/dist/browser/modules/word/errors.d.ts +44 -1
- package/dist/browser/modules/word/errors.js +54 -2
- package/dist/browser/modules/word/excel.d.ts +14 -0
- package/dist/browser/modules/word/excel.js +13 -0
- package/dist/browser/modules/word/font/font-embed.d.ts +112 -0
- package/dist/browser/modules/word/font/font-embed.js +646 -0
- package/dist/{esm/modules/word → browser/modules/word/font}/font-obfuscation.js +4 -9
- package/dist/browser/modules/word/font/hyphenation.d.ts +65 -0
- package/dist/browser/modules/word/font/hyphenation.js +4210 -0
- package/dist/browser/modules/word/font/text-shaping.d.ts +58 -0
- package/dist/browser/modules/word/font/text-shaping.js +635 -0
- package/dist/browser/modules/word/html.d.ts +7 -6
- package/dist/browser/modules/word/html.js +6 -5
- package/dist/browser/modules/word/incremental-edit.d.ts +123 -0
- package/dist/browser/modules/word/incremental-edit.js +361 -0
- package/dist/browser/modules/word/index.base.d.ts +194 -10
- package/dist/browser/modules/word/index.base.js +138 -29
- package/dist/browser/modules/word/layout/layout-constants.d.ts +17 -0
- package/dist/browser/modules/word/layout/layout-constants.js +17 -0
- package/dist/browser/modules/word/layout/layout-full.d.ts +53 -0
- package/dist/browser/modules/word/layout/layout-full.js +1696 -0
- package/dist/browser/modules/word/layout/layout-model.d.ts +344 -0
- package/dist/browser/modules/word/layout/layout-model.js +16 -0
- package/dist/browser/modules/word/layout/layout.d.ts +63 -0
- package/dist/browser/modules/word/layout/layout.js +1167 -0
- package/dist/browser/modules/word/layout/render-page.d.ts +57 -0
- package/dist/browser/modules/word/layout/render-page.js +1238 -0
- package/dist/browser/modules/word/markdown.d.ts +14 -0
- package/dist/browser/modules/word/markdown.js +13 -0
- package/dist/browser/modules/word/patcher.d.ts +62 -0
- package/dist/browser/modules/word/patcher.js +537 -0
- package/dist/browser/modules/word/query/compat.d.ts +25 -0
- package/dist/browser/modules/word/query/compat.js +58 -0
- package/dist/browser/modules/word/query/data-binding.d.ts +22 -0
- package/dist/browser/modules/word/query/data-binding.js +392 -0
- package/dist/browser/modules/word/query/form-fields.d.ts +41 -0
- package/dist/browser/modules/word/query/form-fields.js +268 -0
- package/dist/browser/modules/word/query/format-search.d.ts +99 -0
- package/dist/browser/modules/word/query/format-search.js +329 -0
- package/dist/browser/modules/word/query/mail-merge.d.ts +25 -0
- package/dist/browser/modules/word/query/mail-merge.js +111 -0
- package/dist/browser/modules/word/query/merge.d.ts +50 -0
- package/dist/browser/modules/word/query/merge.js +617 -0
- package/dist/browser/modules/word/query/replace.d.ts +47 -0
- package/dist/browser/modules/word/query/replace.js +301 -0
- package/dist/browser/modules/word/query/revisions.d.ts +67 -0
- package/dist/browser/modules/word/query/revisions.js +879 -0
- package/dist/browser/modules/word/query/search.d.ts +129 -0
- package/dist/browser/modules/word/query/search.js +346 -0
- package/dist/browser/modules/word/query/split.d.ts +44 -0
- package/dist/browser/modules/word/query/split.js +135 -0
- package/dist/browser/modules/word/query/style-resolve.d.ts +104 -0
- package/dist/browser/modules/word/query/style-resolve.js +368 -0
- package/dist/browser/modules/word/reader/chart-parser.d.ts +20 -0
- package/dist/browser/modules/word/reader/chart-parser.js +810 -0
- package/dist/browser/modules/word/reader/comments-parser.d.ts +26 -0
- package/dist/browser/modules/word/reader/comments-parser.js +92 -0
- package/dist/browser/modules/word/reader/doc-props-parsers.d.ts +15 -0
- package/dist/browser/modules/word/reader/doc-props-parsers.js +190 -0
- package/dist/browser/modules/word/reader/docx-reader.d.ts +27 -0
- package/dist/browser/modules/word/reader/docx-reader.js +2557 -0
- package/dist/browser/modules/word/reader/drawing-helpers.d.ts +27 -0
- package/dist/browser/modules/word/reader/drawing-helpers.js +84 -0
- package/dist/browser/modules/word/reader/form-field-parser.d.ts +21 -0
- package/dist/browser/modules/word/reader/form-field-parser.js +82 -0
- package/dist/browser/modules/word/reader/image-parsers.d.ts +11 -0
- package/dist/browser/modules/word/reader/image-parsers.js +291 -0
- package/dist/browser/modules/word/reader/math-parser.d.ts +12 -0
- package/dist/browser/modules/word/reader/math-parser.js +422 -0
- package/dist/browser/modules/word/reader/metadata-parsers.d.ts +17 -0
- package/dist/browser/modules/word/reader/metadata-parsers.js +87 -0
- package/dist/browser/modules/word/reader/numbering-parser.d.ts +13 -0
- package/dist/browser/modules/word/reader/numbering-parser.js +166 -0
- package/dist/browser/modules/word/reader/paragraph-section-parsers.d.ts +12 -0
- package/dist/browser/modules/word/reader/paragraph-section-parsers.js +503 -0
- package/dist/browser/modules/word/reader/parse-utils.d.ts +91 -0
- package/dist/browser/modules/word/reader/parse-utils.js +249 -0
- package/dist/browser/modules/word/reader/properties-parsers.d.ts +21 -0
- package/dist/browser/modules/word/reader/properties-parsers.js +332 -0
- package/dist/browser/modules/word/reader/reader-context.d.ts +69 -0
- package/dist/browser/modules/word/reader/reader-context.js +61 -0
- package/dist/browser/modules/word/reader/sdt-helpers.d.ts +29 -0
- package/dist/browser/modules/word/reader/sdt-helpers.js +111 -0
- package/dist/browser/modules/word/reader/settings-parser.d.ts +8 -0
- package/dist/browser/modules/word/reader/settings-parser.js +263 -0
- package/dist/browser/modules/word/reader/styles-parser.d.ts +12 -0
- package/dist/browser/modules/word/reader/styles-parser.js +147 -0
- package/dist/browser/modules/word/reader/table-properties-parsers.d.ts +12 -0
- package/dist/browser/modules/word/reader/table-properties-parsers.js +234 -0
- package/dist/browser/modules/word/reader/theme-parser.d.ts +8 -0
- package/dist/browser/modules/word/reader/theme-parser.js +167 -0
- package/dist/browser/modules/word/reader/watermark-parser.d.ts +15 -0
- package/dist/browser/modules/word/reader/watermark-parser.js +110 -0
- package/dist/browser/modules/word/security/cfb-reader.d.ts +37 -0
- package/dist/browser/modules/word/security/cfb-reader.js +410 -0
- package/dist/browser/modules/word/{digital-signatures.d.ts → security/digital-signatures.d.ts} +19 -11
- package/dist/browser/modules/word/{digital-signatures.js → security/digital-signatures.js} +34 -34
- package/dist/browser/modules/word/security/document-protection.d.ts +93 -0
- package/dist/browser/modules/word/security/document-protection.js +201 -0
- package/dist/{types/modules/word → browser/modules/word/security}/encryption.d.ts +51 -4
- package/dist/browser/modules/word/security/encryption.js +602 -0
- package/dist/browser/modules/word/security/policy.d.ts +80 -0
- package/dist/browser/modules/word/security/policy.js +102 -0
- package/dist/browser/modules/word/template/template-chart.d.ts +56 -0
- package/dist/browser/modules/word/template/template-chart.js +167 -0
- package/dist/browser/modules/word/template/template-datasource.d.ts +154 -0
- package/dist/browser/modules/word/template/template-datasource.js +541 -0
- package/dist/browser/modules/word/template/template-engine.d.ts +121 -0
- package/dist/browser/modules/word/template/template-engine.js +1435 -0
- package/dist/browser/modules/word/types.d.ts +224 -25
- package/dist/browser/modules/word/units.d.ts +26 -0
- package/dist/browser/modules/word/units.js +43 -14
- package/dist/browser/modules/word/{writers → writer}/chart-writer.js +164 -23
- package/dist/browser/modules/word/writer/checkbox-writer.d.ts +17 -0
- package/dist/browser/modules/word/writer/checkbox-writer.js +79 -0
- package/dist/{types/modules/word/writers → browser/modules/word/writer}/comment-writer.d.ts +2 -1
- package/dist/browser/modules/word/{writers → writer}/comment-writer.js +8 -6
- package/dist/browser/modules/word/writer/common-parts.d.ts +57 -0
- package/dist/browser/modules/word/writer/common-parts.js +101 -0
- package/dist/{types/modules/word → browser/modules/word/writer}/content-types.d.ts +2 -2
- package/dist/{esm/modules/word → browser/modules/word/writer}/content-types.js +14 -6
- package/dist/browser/modules/word/writer/document-writer.d.ts +24 -0
- package/dist/browser/modules/word/writer/document-writer.js +473 -0
- package/dist/browser/modules/word/writer/docx-packager.d.ts +35 -0
- package/dist/browser/modules/word/writer/docx-packager.js +1515 -0
- package/dist/{types/modules/word/writers → browser/modules/word/writer}/footnote-writer.d.ts +3 -2
- package/dist/{esm/modules/word/writers → browser/modules/word/writer}/footnote-writer.js +13 -10
- package/dist/{types/modules/word/writers → browser/modules/word/writer}/header-footer-writer.d.ts +3 -2
- package/dist/{esm/modules/word/writers → browser/modules/word/writer}/header-footer-writer.js +39 -21
- package/dist/{types/modules/word/writers → browser/modules/word/writer}/image-writer.d.ts +1 -1
- package/dist/browser/modules/word/{writers → writer}/image-writer.js +11 -7
- package/dist/browser/modules/word/writer/math-writer.d.ts +20 -0
- package/dist/{esm/modules/word/writers → browser/modules/word/writer}/math-writer.js +21 -1
- package/dist/browser/modules/word/{writers → writer}/numbering-writer.d.ts +1 -1
- package/dist/{esm/modules/word/writers → browser/modules/word/writer}/numbering-writer.js +11 -4
- package/dist/browser/modules/word/{writers → writer}/paragraph-writer.d.ts +2 -1
- package/dist/browser/modules/word/{writers → writer}/paragraph-writer.js +73 -38
- package/dist/browser/modules/word/{writers → writer}/parts-writer.d.ts +3 -3
- package/dist/{esm/modules/word/writers → browser/modules/word/writer}/parts-writer.js +91 -12
- package/dist/browser/modules/word/writer/reference-scanners.d.ts +42 -0
- package/dist/browser/modules/word/writer/reference-scanners.js +111 -0
- package/dist/browser/modules/word/writer/relationships.d.ts +52 -0
- package/dist/browser/modules/word/writer/relationships.js +117 -0
- package/dist/browser/modules/word/writer/render-context.d.ts +124 -0
- package/dist/browser/modules/word/writer/render-context.js +46 -0
- package/dist/browser/modules/word/{writers → writer}/run-writer.d.ts +10 -1
- package/dist/{esm/modules/word/writers → browser/modules/word/writer}/run-writer.js +126 -24
- package/dist/browser/modules/word/writer/sdt-writer.d.ts +25 -0
- package/dist/browser/modules/word/writer/sdt-writer.js +189 -0
- package/dist/browser/modules/word/writer/stream-buf.d.ts +37 -0
- package/dist/browser/modules/word/writer/stream-buf.js +73 -0
- package/dist/browser/modules/word/writer/streaming-writer.d.ts +344 -0
- package/dist/browser/modules/word/writer/streaming-writer.js +1382 -0
- package/dist/browser/modules/word/writer/string-buf.d.ts +8 -0
- package/dist/browser/modules/word/writer/string-buf.js +7 -0
- package/dist/browser/modules/word/{writers → writer}/styles-writer.js +32 -1
- package/dist/browser/modules/word/{writers → writer}/table-writer.d.ts +2 -1
- package/dist/browser/modules/word/{writers → writer}/table-writer.js +94 -11
- package/dist/browser/modules/xml/types.d.ts +22 -0
- package/dist/browser/utils/crypto.browser.d.ts +3 -1
- package/dist/browser/utils/crypto.browser.js +3 -1
- package/dist/browser/utils/crypto.d.ts +4 -1
- package/dist/browser/utils/crypto.js +4 -1
- package/dist/browser/utils/font-metrics.d.ts +63 -0
- package/dist/browser/utils/font-metrics.js +293 -0
- package/dist/browser/utils/string-buf.d.ts +42 -0
- package/dist/browser/utils/string-buf.js +89 -0
- package/dist/browser/utils/theme-colors.d.ts +55 -0
- package/dist/browser/utils/theme-colors.js +120 -0
- package/dist/cjs/modules/archive/compression/streaming-compress.browser.js +29 -0
- package/dist/cjs/modules/archive/compression/streaming-compress.js +9 -0
- package/dist/cjs/modules/archive/compression/worker-pool/pool.browser.js +26 -1
- package/dist/cjs/modules/archive/fs/archive-file.js +78 -16
- package/dist/cjs/modules/archive/unzip/stream.browser.js +43 -2
- package/dist/cjs/modules/excel/chart/chart-ex-builder.js +7 -2
- package/dist/cjs/modules/excel/chart/chart-ex-renderer.js +4 -9
- package/dist/cjs/modules/excel/chart/chart.js +1 -7
- package/dist/cjs/modules/excel/stream/workbook-reader.browser.js +25 -1
- package/dist/cjs/modules/excel/stream/workbook-reader.js +9 -0
- package/dist/cjs/modules/excel/stream/workbook-writer.browser.js +228 -13
- package/dist/cjs/modules/excel/utils/string-buf.js +5 -81
- package/dist/cjs/modules/excel/workbook.browser.js +135 -25
- package/dist/cjs/modules/excel/xlsx/xform/chart/chart-space-xform.js +6 -20
- package/dist/cjs/modules/excel/xlsx/xlsx.browser.js +32 -8
- package/dist/cjs/modules/excel/xlsx/xlsx.js +9 -1
- package/dist/cjs/modules/pdf/excel-bridge.js +33 -0
- package/dist/cjs/modules/pdf/font/metrics.js +11 -244
- package/dist/cjs/modules/pdf/index.js +2 -1
- package/dist/cjs/modules/pdf/render-layout-to-pdf.js +651 -0
- package/dist/cjs/modules/pdf/word-bridge.js +155 -274
- package/dist/cjs/modules/stream/index.base.js +4 -2
- package/dist/cjs/modules/stream/internal/sink-adapter.js +202 -0
- package/dist/cjs/modules/stream/pull-stream.js +51 -5
- package/dist/cjs/modules/word/advanced/diff.js +170 -0
- package/dist/cjs/modules/word/advanced/drawing-shapes.js +279 -0
- package/dist/cjs/modules/word/advanced/field-engine.js +1229 -0
- package/dist/cjs/modules/word/advanced/glossary.js +87 -0
- package/dist/cjs/modules/word/advanced/math-convert.js +599 -0
- package/dist/cjs/modules/word/advanced/ole-objects.js +277 -0
- package/dist/cjs/modules/word/advanced/style-map.js +329 -0
- package/dist/cjs/modules/word/advanced/validation.js +1068 -0
- package/dist/cjs/modules/word/advanced/vba-project.js +274 -0
- package/dist/cjs/modules/word/bridge/excel-bridge.js +1020 -0
- package/dist/cjs/modules/word/builder/document-handle.js +667 -0
- package/dist/cjs/modules/word/builder/paragraph-builders.js +109 -0
- package/dist/cjs/modules/word/builder/run-builders.js +676 -0
- package/dist/cjs/modules/word/builder/table-builders.js +53 -0
- package/dist/cjs/modules/word/constants.js +111 -2
- package/dist/cjs/modules/word/convert/conversion-ir.js +34 -0
- package/dist/cjs/modules/word/convert/docx-to-semantic.js +502 -0
- package/dist/cjs/modules/word/convert/flat-opc.js +390 -0
- package/dist/cjs/modules/word/convert/html/html-import.js +1910 -0
- package/dist/cjs/modules/word/{html-renderer.js → convert/html/html-renderer.js} +420 -69
- package/dist/cjs/modules/word/convert/html/html.js +20 -0
- package/dist/cjs/modules/word/convert/markdown/markdown-import.js +1329 -0
- package/dist/cjs/modules/word/convert/markdown/markdown-renderer.js +637 -0
- package/dist/cjs/modules/word/convert/markdown/markdown.js +21 -0
- package/dist/cjs/modules/word/convert/odt/odt.js +1936 -0
- package/dist/cjs/modules/word/core/color-utils.js +47 -0
- package/dist/cjs/modules/word/core/internal-utils.js +219 -0
- package/dist/cjs/modules/word/core/mapper.js +430 -0
- package/dist/cjs/modules/word/core/opc-paths.js +53 -0
- package/dist/cjs/modules/word/core/text-utils.js +210 -0
- package/dist/cjs/modules/word/core/walker.js +577 -0
- package/dist/cjs/modules/word/crypto.js +19 -8
- package/dist/cjs/modules/word/document-io.js +117 -197
- package/dist/cjs/modules/word/errors.js +59 -13
- package/dist/cjs/modules/word/excel.js +22 -0
- package/dist/cjs/modules/word/font/font-embed.js +652 -0
- package/dist/cjs/modules/word/{font-obfuscation.js → font/font-obfuscation.js} +4 -9
- package/dist/cjs/modules/word/font/hyphenation.js +4216 -0
- package/dist/cjs/modules/word/font/text-shaping.js +640 -0
- package/dist/cjs/modules/word/html.js +9 -7
- package/dist/cjs/modules/word/incremental-edit.js +366 -0
- package/dist/cjs/modules/word/index.base.js +370 -137
- package/dist/cjs/modules/word/layout/layout-constants.js +20 -0
- package/dist/cjs/modules/word/layout/layout-full.js +1699 -0
- package/dist/cjs/modules/word/layout/layout-model.js +17 -0
- package/dist/cjs/modules/word/layout/layout.js +1170 -0
- package/dist/cjs/modules/word/layout/render-page.js +1243 -0
- package/dist/cjs/modules/word/markdown.js +19 -0
- package/dist/cjs/modules/word/patcher.js +539 -0
- package/dist/cjs/modules/word/query/compat.js +61 -0
- package/dist/cjs/modules/word/query/data-binding.js +395 -0
- package/dist/cjs/modules/word/query/form-fields.js +272 -0
- package/dist/cjs/modules/word/query/format-search.js +334 -0
- package/dist/cjs/modules/word/query/mail-merge.js +114 -0
- package/dist/cjs/modules/word/query/merge.js +620 -0
- package/dist/cjs/modules/word/query/replace.js +304 -0
- package/dist/cjs/modules/word/query/revisions.js +885 -0
- package/dist/cjs/modules/word/query/search.js +361 -0
- package/dist/cjs/modules/word/query/split.js +138 -0
- package/dist/cjs/modules/word/query/style-resolve.js +374 -0
- package/dist/cjs/modules/word/reader/chart-parser.js +814 -0
- package/dist/cjs/modules/word/reader/comments-parser.js +96 -0
- package/dist/cjs/modules/word/reader/doc-props-parsers.js +194 -0
- package/dist/cjs/modules/word/reader/docx-reader.js +2560 -0
- package/dist/cjs/modules/word/reader/drawing-helpers.js +90 -0
- package/dist/cjs/modules/word/reader/form-field-parser.js +85 -0
- package/dist/cjs/modules/word/reader/image-parsers.js +293 -0
- package/dist/cjs/modules/word/reader/math-parser.js +424 -0
- package/dist/cjs/modules/word/reader/metadata-parsers.js +93 -0
- package/dist/cjs/modules/word/reader/numbering-parser.js +168 -0
- package/dist/cjs/modules/word/reader/paragraph-section-parsers.js +505 -0
- package/dist/cjs/modules/word/reader/parse-utils.js +271 -0
- package/dist/cjs/modules/word/reader/properties-parsers.js +338 -0
- package/dist/cjs/modules/word/reader/reader-context.js +66 -0
- package/dist/cjs/modules/word/reader/sdt-helpers.js +114 -0
- package/dist/cjs/modules/word/reader/settings-parser.js +265 -0
- package/dist/cjs/modules/word/reader/styles-parser.js +149 -0
- package/dist/cjs/modules/word/reader/table-properties-parsers.js +237 -0
- package/dist/cjs/modules/word/reader/theme-parser.js +169 -0
- package/dist/cjs/modules/word/reader/watermark-parser.js +113 -0
- package/dist/cjs/modules/word/security/cfb-reader.js +414 -0
- package/dist/cjs/modules/word/{digital-signatures.js → security/digital-signatures.js} +34 -34
- package/dist/cjs/modules/word/security/document-protection.js +208 -0
- package/dist/cjs/modules/word/security/encryption.js +612 -0
- package/dist/cjs/modules/word/security/policy.js +106 -0
- package/dist/cjs/modules/word/template/template-chart.js +170 -0
- package/dist/cjs/modules/word/template/template-datasource.js +549 -0
- package/dist/cjs/modules/word/template/template-engine.js +1430 -0
- package/dist/cjs/modules/word/units.js +44 -14
- package/dist/cjs/modules/word/{writers → writer}/chart-writer.js +163 -22
- package/dist/cjs/modules/word/writer/checkbox-writer.js +82 -0
- package/dist/cjs/modules/word/{writers → writer}/comment-writer.js +8 -6
- package/dist/cjs/modules/word/writer/common-parts.js +104 -0
- package/dist/cjs/modules/word/{content-types.js → writer/content-types.js} +14 -6
- package/dist/cjs/modules/word/writer/document-writer.js +478 -0
- package/dist/cjs/modules/word/writer/docx-packager.js +1551 -0
- package/dist/cjs/modules/word/{writers → writer}/footnote-writer.js +13 -10
- package/dist/cjs/modules/word/{writers → writer}/header-footer-writer.js +38 -20
- package/dist/cjs/modules/word/{writers → writer}/image-writer.js +11 -7
- package/dist/cjs/modules/word/{writers → writer}/math-writer.js +21 -1
- package/dist/cjs/modules/word/{writers → writer}/numbering-writer.js +11 -4
- package/dist/cjs/modules/word/{writers → writer}/paragraph-writer.js +72 -37
- package/dist/cjs/modules/word/{writers → writer}/parts-writer.js +91 -12
- package/dist/cjs/modules/word/writer/reference-scanners.js +120 -0
- package/dist/cjs/modules/word/writer/relationships.js +124 -0
- package/dist/cjs/modules/word/writer/render-context.js +51 -0
- package/dist/cjs/modules/word/{writers → writer}/run-writer.js +127 -24
- package/dist/cjs/modules/word/writer/sdt-writer.js +192 -0
- package/dist/cjs/modules/word/writer/stream-buf.js +76 -0
- package/dist/cjs/modules/word/writer/streaming-writer.js +1387 -0
- package/dist/cjs/modules/word/writer/string-buf.js +11 -0
- package/dist/cjs/modules/word/{writers → writer}/styles-writer.js +32 -1
- package/dist/cjs/modules/word/{writers → writer}/table-writer.js +94 -11
- package/dist/cjs/utils/crypto.browser.js +3 -1
- package/dist/cjs/utils/crypto.js +4 -1
- package/dist/cjs/utils/font-metrics.js +303 -0
- package/dist/cjs/utils/string-buf.js +92 -0
- package/dist/cjs/utils/theme-colors.js +126 -0
- package/dist/esm/modules/archive/compression/streaming-compress.browser.js +29 -0
- package/dist/esm/modules/archive/compression/streaming-compress.js +9 -0
- package/dist/esm/modules/archive/compression/worker-pool/pool.browser.js +26 -1
- package/dist/esm/modules/archive/fs/archive-file.js +78 -16
- package/dist/esm/modules/archive/unzip/stream.browser.js +43 -2
- package/dist/esm/modules/excel/chart/chart-ex-builder.js +7 -2
- package/dist/esm/modules/excel/chart/chart-ex-renderer.js +4 -9
- package/dist/esm/modules/excel/chart/chart.js +1 -7
- package/dist/esm/modules/excel/stream/workbook-reader.browser.js +25 -1
- package/dist/esm/modules/excel/stream/workbook-reader.js +9 -0
- package/dist/esm/modules/excel/stream/workbook-writer.browser.js +228 -13
- package/dist/esm/modules/excel/utils/string-buf.js +4 -81
- package/dist/esm/modules/excel/workbook.browser.js +135 -25
- package/dist/esm/modules/excel/xlsx/xform/chart/chart-space-xform.js +6 -20
- package/dist/esm/modules/excel/xlsx/xlsx.browser.js +32 -8
- package/dist/esm/modules/excel/xlsx/xlsx.js +9 -1
- package/dist/esm/modules/pdf/excel-bridge.js +32 -0
- package/dist/esm/modules/pdf/font/metrics.js +3 -237
- package/dist/esm/modules/pdf/index.js +1 -1
- package/dist/esm/modules/pdf/render-layout-to-pdf.js +647 -0
- package/dist/esm/modules/pdf/word-bridge.js +122 -274
- package/dist/esm/modules/stream/index.base.js +2 -1
- package/dist/esm/modules/stream/internal/sink-adapter.js +198 -0
- package/dist/esm/modules/stream/pull-stream.js +51 -5
- package/dist/esm/modules/word/advanced/diff.js +167 -0
- package/dist/esm/modules/word/advanced/drawing-shapes.js +268 -0
- package/dist/esm/modules/word/advanced/field-engine.js +1225 -0
- package/dist/esm/modules/word/advanced/glossary.js +79 -0
- package/dist/esm/modules/word/advanced/math-convert.js +595 -0
- package/dist/esm/modules/word/advanced/ole-objects.js +271 -0
- package/dist/esm/modules/word/advanced/style-map.js +322 -0
- package/dist/esm/modules/word/advanced/validation.js +1065 -0
- package/dist/esm/modules/word/advanced/vba-project.js +265 -0
- package/dist/esm/modules/word/bridge/excel-bridge.js +980 -0
- package/dist/esm/modules/word/builder/document-handle.js +664 -0
- package/dist/esm/modules/word/builder/paragraph-builders.js +90 -0
- package/dist/esm/modules/word/builder/run-builders.js +600 -0
- package/dist/esm/modules/word/builder/table-builders.js +45 -0
- package/dist/esm/modules/word/constants.js +109 -1
- package/dist/esm/modules/word/convert/conversion-ir.js +31 -0
- package/dist/esm/modules/word/convert/docx-to-semantic.js +499 -0
- package/dist/esm/modules/word/convert/flat-opc.js +385 -0
- package/dist/esm/modules/word/convert/html/html-import.js +1907 -0
- package/dist/{browser/modules/word → esm/modules/word/convert/html}/html-renderer.js +420 -69
- package/dist/esm/modules/word/convert/html/html.js +15 -0
- package/dist/esm/modules/word/convert/markdown/markdown-import.js +1325 -0
- package/dist/esm/modules/word/convert/markdown/markdown-renderer.js +634 -0
- package/dist/esm/modules/word/convert/markdown/markdown.js +15 -0
- package/dist/esm/modules/word/convert/odt/odt.js +1932 -0
- package/dist/esm/modules/word/core/color-utils.js +43 -0
- package/dist/esm/modules/word/core/internal-utils.js +209 -0
- package/dist/esm/modules/word/core/mapper.js +427 -0
- package/dist/esm/modules/word/core/opc-paths.js +48 -0
- package/dist/esm/modules/word/core/text-utils.js +202 -0
- package/dist/esm/modules/word/core/walker.js +570 -0
- package/dist/esm/modules/word/crypto.js +13 -7
- package/dist/esm/modules/word/document-io.js +80 -197
- package/dist/esm/modules/word/errors.js +54 -2
- package/dist/esm/modules/word/excel.js +13 -0
- package/dist/esm/modules/word/font/font-embed.js +646 -0
- package/dist/{browser/modules/word → esm/modules/word/font}/font-obfuscation.js +4 -9
- package/dist/esm/modules/word/font/hyphenation.js +4210 -0
- package/dist/esm/modules/word/font/text-shaping.js +635 -0
- package/dist/esm/modules/word/html.js +6 -5
- package/dist/esm/modules/word/incremental-edit.js +361 -0
- package/dist/esm/modules/word/index.base.js +138 -29
- package/dist/esm/modules/word/layout/layout-constants.js +17 -0
- package/dist/esm/modules/word/layout/layout-full.js +1696 -0
- package/dist/esm/modules/word/layout/layout-model.js +16 -0
- package/dist/esm/modules/word/layout/layout.js +1167 -0
- package/dist/esm/modules/word/layout/render-page.js +1238 -0
- package/dist/esm/modules/word/markdown.js +13 -0
- package/dist/esm/modules/word/patcher.js +537 -0
- package/dist/esm/modules/word/query/compat.js +58 -0
- package/dist/esm/modules/word/query/data-binding.js +392 -0
- package/dist/esm/modules/word/query/form-fields.js +268 -0
- package/dist/esm/modules/word/query/format-search.js +329 -0
- package/dist/esm/modules/word/query/mail-merge.js +111 -0
- package/dist/esm/modules/word/query/merge.js +617 -0
- package/dist/esm/modules/word/query/replace.js +301 -0
- package/dist/esm/modules/word/query/revisions.js +879 -0
- package/dist/esm/modules/word/query/search.js +346 -0
- package/dist/esm/modules/word/query/split.js +135 -0
- package/dist/esm/modules/word/query/style-resolve.js +368 -0
- package/dist/esm/modules/word/reader/chart-parser.js +810 -0
- package/dist/esm/modules/word/reader/comments-parser.js +92 -0
- package/dist/esm/modules/word/reader/doc-props-parsers.js +190 -0
- package/dist/esm/modules/word/reader/docx-reader.js +2557 -0
- package/dist/esm/modules/word/reader/drawing-helpers.js +84 -0
- package/dist/esm/modules/word/reader/form-field-parser.js +82 -0
- package/dist/esm/modules/word/reader/image-parsers.js +291 -0
- package/dist/esm/modules/word/reader/math-parser.js +422 -0
- package/dist/esm/modules/word/reader/metadata-parsers.js +87 -0
- package/dist/esm/modules/word/reader/numbering-parser.js +166 -0
- package/dist/esm/modules/word/reader/paragraph-section-parsers.js +503 -0
- package/dist/esm/modules/word/reader/parse-utils.js +249 -0
- package/dist/esm/modules/word/reader/properties-parsers.js +332 -0
- package/dist/esm/modules/word/reader/reader-context.js +61 -0
- package/dist/esm/modules/word/reader/sdt-helpers.js +111 -0
- package/dist/esm/modules/word/reader/settings-parser.js +263 -0
- package/dist/esm/modules/word/reader/styles-parser.js +147 -0
- package/dist/esm/modules/word/reader/table-properties-parsers.js +234 -0
- package/dist/esm/modules/word/reader/theme-parser.js +167 -0
- package/dist/esm/modules/word/reader/watermark-parser.js +110 -0
- package/dist/esm/modules/word/security/cfb-reader.js +410 -0
- package/dist/esm/modules/word/{digital-signatures.js → security/digital-signatures.js} +34 -34
- package/dist/esm/modules/word/security/document-protection.js +201 -0
- package/dist/esm/modules/word/security/encryption.js +602 -0
- package/dist/esm/modules/word/security/policy.js +102 -0
- package/dist/esm/modules/word/template/template-chart.js +167 -0
- package/dist/esm/modules/word/template/template-datasource.js +541 -0
- package/dist/esm/modules/word/template/template-engine.js +1435 -0
- package/dist/esm/modules/word/units.js +43 -14
- package/dist/esm/modules/word/{writers → writer}/chart-writer.js +164 -23
- package/dist/esm/modules/word/writer/checkbox-writer.js +79 -0
- package/dist/esm/modules/word/{writers → writer}/comment-writer.js +8 -6
- package/dist/esm/modules/word/writer/common-parts.js +101 -0
- package/dist/{browser/modules/word → esm/modules/word/writer}/content-types.js +14 -6
- package/dist/esm/modules/word/writer/document-writer.js +473 -0
- package/dist/esm/modules/word/writer/docx-packager.js +1515 -0
- package/dist/{browser/modules/word/writers → esm/modules/word/writer}/footnote-writer.js +13 -10
- package/dist/{browser/modules/word/writers → esm/modules/word/writer}/header-footer-writer.js +39 -21
- package/dist/esm/modules/word/{writers → writer}/image-writer.js +11 -7
- package/dist/{browser/modules/word/writers → esm/modules/word/writer}/math-writer.js +21 -1
- package/dist/{browser/modules/word/writers → esm/modules/word/writer}/numbering-writer.js +11 -4
- package/dist/esm/modules/word/{writers → writer}/paragraph-writer.js +73 -38
- package/dist/{browser/modules/word/writers → esm/modules/word/writer}/parts-writer.js +91 -12
- package/dist/esm/modules/word/writer/reference-scanners.js +111 -0
- package/dist/esm/modules/word/writer/relationships.js +117 -0
- package/dist/esm/modules/word/writer/render-context.js +46 -0
- package/dist/{browser/modules/word/writers → esm/modules/word/writer}/run-writer.js +126 -24
- package/dist/esm/modules/word/writer/sdt-writer.js +189 -0
- package/dist/esm/modules/word/writer/stream-buf.js +73 -0
- package/dist/esm/modules/word/writer/streaming-writer.js +1382 -0
- package/dist/esm/modules/word/writer/string-buf.js +7 -0
- package/dist/esm/modules/word/{writers → writer}/styles-writer.js +32 -1
- package/dist/esm/modules/word/{writers → writer}/table-writer.js +94 -11
- package/dist/esm/utils/crypto.browser.js +3 -1
- package/dist/esm/utils/crypto.js +4 -1
- package/dist/esm/utils/font-metrics.js +293 -0
- package/dist/esm/utils/string-buf.js +89 -0
- package/dist/esm/utils/theme-colors.js +120 -0
- package/dist/iife/excelts.iife.js +70692 -70337
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +57 -57
- package/dist/types/modules/archive/fs/archive-file.d.ts +8 -5
- package/dist/types/modules/excel/chart/chart-ex-types.d.ts +0 -12
- package/dist/types/modules/excel/chart/chart.d.ts +1 -5
- package/dist/types/modules/excel/chart/types.d.ts +0 -6
- package/dist/types/modules/excel/stream/workbook-writer.browser.d.ts +40 -0
- package/dist/types/modules/excel/utils/string-buf.d.ts +5 -26
- package/dist/types/modules/excel/xlsx/xlsx.browser.d.ts +19 -9
- package/dist/types/modules/excel/xlsx/xlsx.d.ts +10 -2
- package/dist/types/modules/pdf/excel-bridge.d.ts +30 -1
- package/dist/types/modules/pdf/font/metrics.d.ts +3 -52
- package/dist/types/modules/pdf/index.d.ts +1 -1
- package/dist/types/modules/pdf/render-layout-to-pdf.d.ts +66 -0
- package/dist/types/modules/pdf/word-bridge.d.ts +80 -12
- package/dist/types/modules/stream/index.base.d.ts +2 -0
- package/dist/types/modules/stream/internal/sink-adapter.d.ts +65 -0
- package/dist/types/modules/stream/pull-stream.d.ts +19 -2
- package/dist/types/modules/stream/types.d.ts +13 -1
- package/dist/types/modules/word/advanced/diff.d.ts +61 -0
- package/dist/types/modules/word/advanced/drawing-shapes.d.ts +269 -0
- package/dist/types/modules/word/advanced/field-engine.d.ts +43 -0
- package/dist/types/modules/word/advanced/glossary.d.ts +86 -0
- package/dist/types/modules/word/advanced/math-convert.d.ts +30 -0
- package/dist/types/modules/word/advanced/ole-objects.d.ts +115 -0
- package/dist/types/modules/word/advanced/style-map.d.ts +105 -0
- package/dist/types/modules/word/advanced/validation.d.ts +56 -0
- package/dist/types/modules/word/advanced/vba-project.d.ts +91 -0
- package/dist/types/modules/word/bridge/excel-bridge.d.ts +127 -0
- package/dist/types/modules/word/builder/document-handle.d.ts +151 -0
- package/dist/types/modules/word/builder/paragraph-builders.d.ts +61 -0
- package/dist/types/modules/word/builder/run-builders.d.ts +374 -0
- package/dist/types/modules/word/builder/table-builders.d.ts +23 -0
- package/dist/types/modules/word/constants.d.ts +39 -1
- package/dist/types/modules/word/convert/conversion-ir.d.ts +210 -0
- package/dist/types/modules/word/convert/docx-to-semantic.d.ts +39 -0
- package/dist/types/modules/word/convert/flat-opc.d.ts +44 -0
- package/dist/types/modules/word/convert/html/html-import.d.ts +50 -0
- package/dist/{browser/modules/word → types/modules/word/convert/html}/html-renderer.d.ts +14 -1
- package/dist/types/modules/word/convert/html/html.d.ts +15 -0
- package/dist/types/modules/word/convert/markdown/markdown-import.d.ts +68 -0
- package/dist/types/modules/word/convert/markdown/markdown-renderer.d.ts +25 -0
- package/dist/types/modules/word/convert/markdown/markdown.d.ts +15 -0
- package/dist/types/modules/word/convert/odt/odt.d.ts +41 -0
- package/dist/types/modules/word/{color-utils.d.ts → core/color-utils.d.ts} +8 -1
- package/dist/types/modules/word/core/internal-utils.d.ts +90 -0
- package/dist/types/modules/word/core/mapper.d.ts +44 -0
- package/dist/types/modules/word/core/opc-paths.d.ts +33 -0
- package/dist/types/modules/word/core/text-utils.d.ts +38 -0
- package/dist/types/modules/word/core/walker.d.ts +119 -0
- package/dist/types/modules/word/crypto.d.ts +14 -9
- package/dist/types/modules/word/document-io.d.ts +59 -27
- package/dist/types/modules/word/errors.d.ts +44 -1
- package/dist/types/modules/word/excel.d.ts +14 -0
- package/dist/types/modules/word/font/font-embed.d.ts +112 -0
- package/dist/types/modules/word/font/hyphenation.d.ts +65 -0
- package/dist/types/modules/word/font/text-shaping.d.ts +58 -0
- package/dist/types/modules/word/html.d.ts +7 -6
- package/dist/types/modules/word/incremental-edit.d.ts +123 -0
- package/dist/types/modules/word/index.base.d.ts +194 -10
- package/dist/types/modules/word/layout/layout-constants.d.ts +17 -0
- package/dist/types/modules/word/layout/layout-full.d.ts +53 -0
- package/dist/types/modules/word/layout/layout-model.d.ts +344 -0
- package/dist/types/modules/word/layout/layout.d.ts +63 -0
- package/dist/types/modules/word/layout/render-page.d.ts +57 -0
- package/dist/types/modules/word/markdown.d.ts +14 -0
- package/dist/types/modules/word/patcher.d.ts +62 -0
- package/dist/types/modules/word/query/compat.d.ts +25 -0
- package/dist/types/modules/word/query/data-binding.d.ts +22 -0
- package/dist/types/modules/word/query/form-fields.d.ts +41 -0
- package/dist/types/modules/word/query/format-search.d.ts +99 -0
- package/dist/types/modules/word/query/mail-merge.d.ts +25 -0
- package/dist/types/modules/word/query/merge.d.ts +50 -0
- package/dist/types/modules/word/query/replace.d.ts +47 -0
- package/dist/types/modules/word/query/revisions.d.ts +67 -0
- package/dist/types/modules/word/query/search.d.ts +129 -0
- package/dist/types/modules/word/query/split.d.ts +44 -0
- package/dist/types/modules/word/query/style-resolve.d.ts +104 -0
- package/dist/types/modules/word/reader/chart-parser.d.ts +20 -0
- package/dist/types/modules/word/reader/comments-parser.d.ts +26 -0
- package/dist/types/modules/word/reader/doc-props-parsers.d.ts +15 -0
- package/dist/types/modules/word/reader/docx-reader.d.ts +27 -0
- package/dist/types/modules/word/reader/drawing-helpers.d.ts +27 -0
- package/dist/types/modules/word/reader/form-field-parser.d.ts +21 -0
- package/dist/types/modules/word/reader/image-parsers.d.ts +11 -0
- package/dist/types/modules/word/reader/math-parser.d.ts +12 -0
- package/dist/types/modules/word/reader/metadata-parsers.d.ts +17 -0
- package/dist/types/modules/word/reader/numbering-parser.d.ts +13 -0
- package/dist/types/modules/word/reader/paragraph-section-parsers.d.ts +12 -0
- package/dist/types/modules/word/reader/parse-utils.d.ts +91 -0
- package/dist/types/modules/word/reader/properties-parsers.d.ts +21 -0
- package/dist/types/modules/word/reader/reader-context.d.ts +69 -0
- package/dist/types/modules/word/reader/sdt-helpers.d.ts +29 -0
- package/dist/types/modules/word/reader/settings-parser.d.ts +8 -0
- package/dist/types/modules/word/reader/styles-parser.d.ts +12 -0
- package/dist/types/modules/word/reader/table-properties-parsers.d.ts +12 -0
- package/dist/types/modules/word/reader/theme-parser.d.ts +8 -0
- package/dist/types/modules/word/reader/watermark-parser.d.ts +15 -0
- package/dist/types/modules/word/security/cfb-reader.d.ts +37 -0
- package/dist/types/modules/word/{digital-signatures.d.ts → security/digital-signatures.d.ts} +19 -11
- package/dist/types/modules/word/security/document-protection.d.ts +93 -0
- package/dist/{browser/modules/word → types/modules/word/security}/encryption.d.ts +51 -4
- package/dist/types/modules/word/security/policy.d.ts +80 -0
- package/dist/types/modules/word/template/template-chart.d.ts +56 -0
- package/dist/types/modules/word/template/template-datasource.d.ts +154 -0
- package/dist/types/modules/word/template/template-engine.d.ts +121 -0
- package/dist/types/modules/word/types.d.ts +224 -25
- package/dist/types/modules/word/units.d.ts +26 -0
- package/dist/types/modules/word/writer/checkbox-writer.d.ts +17 -0
- package/dist/{browser/modules/word/writers → types/modules/word/writer}/comment-writer.d.ts +2 -1
- package/dist/types/modules/word/writer/common-parts.d.ts +57 -0
- package/dist/{browser/modules/word → types/modules/word/writer}/content-types.d.ts +2 -2
- package/dist/types/modules/word/writer/document-writer.d.ts +24 -0
- package/dist/types/modules/word/writer/docx-packager.d.ts +35 -0
- package/dist/{browser/modules/word/writers → types/modules/word/writer}/footnote-writer.d.ts +3 -2
- package/dist/{browser/modules/word/writers → types/modules/word/writer}/header-footer-writer.d.ts +3 -2
- package/dist/{browser/modules/word/writers → types/modules/word/writer}/image-writer.d.ts +1 -1
- package/dist/types/modules/word/writer/math-writer.d.ts +20 -0
- package/dist/types/modules/word/{writers → writer}/numbering-writer.d.ts +1 -1
- package/dist/types/modules/word/{writers → writer}/paragraph-writer.d.ts +2 -1
- package/dist/types/modules/word/{writers → writer}/parts-writer.d.ts +3 -3
- package/dist/types/modules/word/writer/reference-scanners.d.ts +42 -0
- package/dist/types/modules/word/writer/relationships.d.ts +52 -0
- package/dist/types/modules/word/writer/render-context.d.ts +124 -0
- package/dist/types/modules/word/{writers → writer}/run-writer.d.ts +10 -1
- package/dist/types/modules/word/writer/sdt-writer.d.ts +25 -0
- package/dist/types/modules/word/writer/stream-buf.d.ts +37 -0
- package/dist/types/modules/word/writer/streaming-writer.d.ts +344 -0
- package/dist/types/modules/word/writer/string-buf.d.ts +8 -0
- package/dist/types/modules/word/{writers → writer}/table-writer.d.ts +2 -1
- package/dist/types/modules/xml/types.d.ts +22 -0
- package/dist/types/utils/crypto.browser.d.ts +3 -1
- package/dist/types/utils/crypto.d.ts +4 -1
- package/dist/types/utils/font-metrics.d.ts +63 -0
- package/dist/types/utils/string-buf.d.ts +42 -0
- package/dist/types/utils/theme-colors.d.ts +55 -0
- package/package.json +121 -39
- package/dist/browser/modules/word/color-utils.js +0 -94
- package/dist/browser/modules/word/document.d.ts +0 -657
- package/dist/browser/modules/word/document.js +0 -1533
- package/dist/browser/modules/word/docx-packager.d.ts +0 -14
- package/dist/browser/modules/word/docx-packager.js +0 -822
- package/dist/browser/modules/word/docx-reader.d.ts +0 -11
- package/dist/browser/modules/word/docx-reader.js +0 -4929
- package/dist/browser/modules/word/encryption.js +0 -274
- package/dist/browser/modules/word/internal-utils.d.ts +0 -23
- package/dist/browser/modules/word/internal-utils.js +0 -54
- package/dist/browser/modules/word/namespaces.d.ts +0 -159
- package/dist/browser/modules/word/namespaces.js +0 -189
- package/dist/browser/modules/word/relationships.d.ts +0 -30
- package/dist/browser/modules/word/relationships.js +0 -48
- package/dist/browser/modules/word/writers/checkbox-writer.d.ts +0 -9
- package/dist/browser/modules/word/writers/checkbox-writer.js +0 -42
- package/dist/browser/modules/word/writers/document-writer.d.ts +0 -16
- package/dist/browser/modules/word/writers/document-writer.js +0 -461
- package/dist/browser/modules/word/writers/math-writer.d.ts +0 -9
- package/dist/cjs/modules/word/color-utils.js +0 -97
- package/dist/cjs/modules/word/document.js +0 -1645
- package/dist/cjs/modules/word/docx-packager.js +0 -825
- package/dist/cjs/modules/word/docx-reader.js +0 -4932
- package/dist/cjs/modules/word/encryption.js +0 -282
- package/dist/cjs/modules/word/internal-utils.js +0 -59
- package/dist/cjs/modules/word/namespaces.js +0 -192
- package/dist/cjs/modules/word/relationships.js +0 -55
- package/dist/cjs/modules/word/writers/checkbox-writer.js +0 -45
- package/dist/cjs/modules/word/writers/document-writer.js +0 -465
- package/dist/esm/modules/word/color-utils.js +0 -94
- package/dist/esm/modules/word/document.js +0 -1533
- package/dist/esm/modules/word/docx-packager.js +0 -822
- package/dist/esm/modules/word/docx-reader.js +0 -4929
- package/dist/esm/modules/word/encryption.js +0 -274
- package/dist/esm/modules/word/internal-utils.js +0 -54
- package/dist/esm/modules/word/namespaces.js +0 -189
- package/dist/esm/modules/word/relationships.js +0 -48
- package/dist/esm/modules/word/writers/checkbox-writer.js +0 -42
- package/dist/esm/modules/word/writers/document-writer.js +0 -461
- package/dist/types/modules/word/document.d.ts +0 -657
- package/dist/types/modules/word/docx-packager.d.ts +0 -14
- package/dist/types/modules/word/docx-reader.d.ts +0 -11
- package/dist/types/modules/word/internal-utils.d.ts +0 -23
- package/dist/types/modules/word/namespaces.d.ts +0 -159
- package/dist/types/modules/word/relationships.d.ts +0 -30
- package/dist/types/modules/word/writers/checkbox-writer.d.ts +0 -9
- package/dist/types/modules/word/writers/document-writer.d.ts +0 -16
- package/dist/types/modules/word/writers/math-writer.d.ts +0 -9
- /package/dist/browser/modules/word/{font-obfuscation.d.ts → font/font-obfuscation.d.ts} +0 -0
- /package/dist/browser/modules/word/{writers → writer}/chart-writer.d.ts +0 -0
- /package/dist/browser/modules/word/{writers → writer}/section-writer.d.ts +0 -0
- /package/dist/browser/modules/word/{writers → writer}/section-writer.js +0 -0
- /package/dist/browser/modules/word/{writers → writer}/styles-writer.d.ts +0 -0
- /package/dist/browser/modules/word/{writers → writer}/textbox-writer.d.ts +0 -0
- /package/dist/browser/modules/word/{writers → writer}/textbox-writer.js +0 -0
- /package/dist/browser/modules/word/{writers → writer}/toc-writer.d.ts +0 -0
- /package/dist/browser/modules/word/{writers → writer}/toc-writer.js +0 -0
- /package/dist/cjs/modules/word/{writers → writer}/section-writer.js +0 -0
- /package/dist/cjs/modules/word/{writers → writer}/textbox-writer.js +0 -0
- /package/dist/cjs/modules/word/{writers → writer}/toc-writer.js +0 -0
- /package/dist/esm/modules/word/{writers → writer}/section-writer.js +0 -0
- /package/dist/esm/modules/word/{writers → writer}/textbox-writer.js +0 -0
- /package/dist/esm/modules/word/{writers → writer}/toc-writer.js +0 -0
- /package/dist/types/modules/word/{font-obfuscation.d.ts → font/font-obfuscation.d.ts} +0 -0
- /package/dist/types/modules/word/{writers → writer}/chart-writer.d.ts +0 -0
- /package/dist/types/modules/word/{writers → writer}/section-writer.d.ts +0 -0
- /package/dist/types/modules/word/{writers → writer}/styles-writer.d.ts +0 -0
- /package/dist/types/modules/word/{writers → writer}/textbox-writer.d.ts +0 -0
- /package/dist/types/modules/word/{writers → writer}/toc-writer.d.ts +0 -0
|
@@ -0,0 +1,2560 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* DOCX Module - Reader / Parser
|
|
4
|
+
*
|
|
5
|
+
* Reads a DOCX ZIP file and parses it into a DocxDocument model.
|
|
6
|
+
* Uses the archive module for ZIP reading and XML module for parsing.
|
|
7
|
+
*/
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.readDocx = readDocx;
|
|
10
|
+
const read_archive_1 = require("../../archive/read-archive.js");
|
|
11
|
+
const dom_1 = require("../../xml/dom.js");
|
|
12
|
+
const constants_1 = require("../constants");
|
|
13
|
+
const internal_utils_1 = require("../core/internal-utils");
|
|
14
|
+
const text_utils_1 = require("../core/text-utils");
|
|
15
|
+
const errors_1 = require("../errors");
|
|
16
|
+
const encryption_1 = require("../security/encryption");
|
|
17
|
+
const policy_1 = require("../security/policy");
|
|
18
|
+
const chart_parser_1 = require("./chart-parser");
|
|
19
|
+
const comments_parser_1 = require("./comments-parser");
|
|
20
|
+
const doc_props_parsers_1 = require("./doc-props-parsers");
|
|
21
|
+
const form_field_parser_1 = require("./form-field-parser");
|
|
22
|
+
const image_parsers_1 = require("./image-parsers");
|
|
23
|
+
const math_parser_1 = require("./math-parser");
|
|
24
|
+
const metadata_parsers_1 = require("./metadata-parsers");
|
|
25
|
+
const numbering_parser_1 = require("./numbering-parser");
|
|
26
|
+
const paragraph_section_parsers_1 = require("./paragraph-section-parsers");
|
|
27
|
+
const parse_utils_1 = require("./parse-utils");
|
|
28
|
+
const properties_parsers_1 = require("./properties-parsers");
|
|
29
|
+
const reader_context_1 = require("./reader-context");
|
|
30
|
+
const sdt_helpers_1 = require("./sdt-helpers");
|
|
31
|
+
const styles_parser_1 = require("./styles-parser");
|
|
32
|
+
const table_properties_parsers_1 = require("./table-properties-parsers");
|
|
33
|
+
const watermark_parser_1 = require("./watermark-parser");
|
|
34
|
+
// =============================================================================
|
|
35
|
+
// Run Content Parser
|
|
36
|
+
// =============================================================================
|
|
37
|
+
function parseRunContent(el) {
|
|
38
|
+
const content = [];
|
|
39
|
+
for (const child of el.children) {
|
|
40
|
+
if (child.type !== "element") {
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
const name = child.name.replace(/^w:/, "");
|
|
44
|
+
switch (name) {
|
|
45
|
+
case "t":
|
|
46
|
+
content.push({ type: "text", text: (0, dom_1.textContent)(child) });
|
|
47
|
+
break;
|
|
48
|
+
case "br": {
|
|
49
|
+
const brType = (0, parse_utils_1.attrVal)(child, "type");
|
|
50
|
+
content.push({
|
|
51
|
+
type: "break",
|
|
52
|
+
breakType: brType
|
|
53
|
+
});
|
|
54
|
+
break;
|
|
55
|
+
}
|
|
56
|
+
case "tab":
|
|
57
|
+
content.push({ type: "tab" });
|
|
58
|
+
break;
|
|
59
|
+
case "ptab": {
|
|
60
|
+
const alignment = (0, parse_utils_1.attrVal)(child, "alignment") ?? "left";
|
|
61
|
+
const relativeTo = (0, parse_utils_1.attrVal)(child, "relativeTo") ?? "margin";
|
|
62
|
+
const leader = (0, parse_utils_1.attrVal)(child, "leader");
|
|
63
|
+
const ptab = {
|
|
64
|
+
type: "ptab",
|
|
65
|
+
alignment: alignment,
|
|
66
|
+
relativeTo: relativeTo
|
|
67
|
+
};
|
|
68
|
+
if (leader) {
|
|
69
|
+
ptab.leader = leader;
|
|
70
|
+
}
|
|
71
|
+
content.push(ptab);
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
case "ruby": {
|
|
75
|
+
const ruby = {
|
|
76
|
+
type: "ruby",
|
|
77
|
+
rubyText: [],
|
|
78
|
+
baseText: []
|
|
79
|
+
};
|
|
80
|
+
const rubyPrEl = (0, parse_utils_1.findChildNs)(child, "rubyPr");
|
|
81
|
+
if (rubyPrEl) {
|
|
82
|
+
const props = {};
|
|
83
|
+
const alignEl = (0, parse_utils_1.findChildNs)(rubyPrEl, "rubyAlign");
|
|
84
|
+
if (alignEl) {
|
|
85
|
+
props.align = (0, parse_utils_1.attrVal)(alignEl, "val");
|
|
86
|
+
}
|
|
87
|
+
const hpsEl = (0, parse_utils_1.findChildNs)(rubyPrEl, "hps");
|
|
88
|
+
if (hpsEl) {
|
|
89
|
+
props.fontSize = (0, parse_utils_1.attrInt)(hpsEl, "val");
|
|
90
|
+
}
|
|
91
|
+
const hpsRaiseEl = (0, parse_utils_1.findChildNs)(rubyPrEl, "hpsRaise");
|
|
92
|
+
if (hpsRaiseEl) {
|
|
93
|
+
props.raise = (0, parse_utils_1.attrInt)(hpsRaiseEl, "val");
|
|
94
|
+
}
|
|
95
|
+
const hpsBaseTextEl = (0, parse_utils_1.findChildNs)(rubyPrEl, "hpsBaseText");
|
|
96
|
+
if (hpsBaseTextEl) {
|
|
97
|
+
props.baseFontSize = (0, parse_utils_1.attrInt)(hpsBaseTextEl, "val");
|
|
98
|
+
}
|
|
99
|
+
const lidEl = (0, parse_utils_1.findChildNs)(rubyPrEl, "lid");
|
|
100
|
+
if (lidEl) {
|
|
101
|
+
props.language = (0, parse_utils_1.attrVal)(lidEl, "val");
|
|
102
|
+
}
|
|
103
|
+
if (Object.keys(props).length > 0) {
|
|
104
|
+
ruby.properties = props;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
// Parse w:rt (ruby text)
|
|
108
|
+
const rtEl = (0, parse_utils_1.findChildNs)(child, "rt");
|
|
109
|
+
if (rtEl) {
|
|
110
|
+
for (const rtChild of rtEl.children) {
|
|
111
|
+
if (rtChild.type === "element" && rtChild.name.replace(/^w:/, "") === "r") {
|
|
112
|
+
ruby.rubyText.push(parseRun(rtChild));
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
// Parse w:rubyBase
|
|
117
|
+
const baseEl = (0, parse_utils_1.findChildNs)(child, "rubyBase");
|
|
118
|
+
if (baseEl) {
|
|
119
|
+
for (const bChild of baseEl.children) {
|
|
120
|
+
if (bChild.type === "element" && bChild.name.replace(/^w:/, "") === "r") {
|
|
121
|
+
ruby.baseText.push(parseRun(bChild));
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
content.push(ruby);
|
|
126
|
+
break;
|
|
127
|
+
}
|
|
128
|
+
case "sym":
|
|
129
|
+
content.push({
|
|
130
|
+
type: "symbol",
|
|
131
|
+
font: (0, parse_utils_1.attrVal)(child, "font") ?? "",
|
|
132
|
+
char: (0, parse_utils_1.attrVal)(child, "char") ?? ""
|
|
133
|
+
});
|
|
134
|
+
break;
|
|
135
|
+
case "footnoteReference": {
|
|
136
|
+
const fr = {
|
|
137
|
+
type: "footnoteRef",
|
|
138
|
+
id: (0, parse_utils_1.attrInt)(child, "id") ?? 0
|
|
139
|
+
};
|
|
140
|
+
const cmf = (0, parse_utils_1.attrVal)(child, "customMarkFollows");
|
|
141
|
+
if (cmf === "1" || cmf === "true") {
|
|
142
|
+
fr.customMarkFollows = true;
|
|
143
|
+
}
|
|
144
|
+
content.push(fr);
|
|
145
|
+
break;
|
|
146
|
+
}
|
|
147
|
+
case "endnoteReference": {
|
|
148
|
+
const er = {
|
|
149
|
+
type: "endnoteRef",
|
|
150
|
+
id: (0, parse_utils_1.attrInt)(child, "id") ?? 0
|
|
151
|
+
};
|
|
152
|
+
const cmf = (0, parse_utils_1.attrVal)(child, "customMarkFollows");
|
|
153
|
+
if (cmf === "1" || cmf === "true") {
|
|
154
|
+
er.customMarkFollows = true;
|
|
155
|
+
}
|
|
156
|
+
content.push(er);
|
|
157
|
+
break;
|
|
158
|
+
}
|
|
159
|
+
case "drawing":
|
|
160
|
+
(0, image_parsers_1.parseDrawingContent)(child, content);
|
|
161
|
+
break;
|
|
162
|
+
case "cr":
|
|
163
|
+
content.push({ type: "carriageReturn" });
|
|
164
|
+
break;
|
|
165
|
+
case "noBreakHyphen":
|
|
166
|
+
content.push({ type: "noBreakHyphen" });
|
|
167
|
+
break;
|
|
168
|
+
case "softHyphen":
|
|
169
|
+
content.push({ type: "softHyphen" });
|
|
170
|
+
break;
|
|
171
|
+
case "lastRenderedPageBreak":
|
|
172
|
+
content.push({ type: "lastRenderedPageBreak" });
|
|
173
|
+
break;
|
|
174
|
+
case "annotationRef":
|
|
175
|
+
content.push({ type: "annotationReference", id: (0, parse_utils_1.attrInt)(child, "id") ?? 0 });
|
|
176
|
+
break;
|
|
177
|
+
case "commentReference":
|
|
178
|
+
// This is annotationReference for comments inside runs
|
|
179
|
+
content.push({ type: "annotationReference", id: (0, parse_utils_1.attrInt)(child, "id") ?? 0 });
|
|
180
|
+
break;
|
|
181
|
+
case "rPr":
|
|
182
|
+
case "fldChar":
|
|
183
|
+
case "instrText":
|
|
184
|
+
case "delText":
|
|
185
|
+
// Known structural elements handled elsewhere — skip silently
|
|
186
|
+
break;
|
|
187
|
+
default:
|
|
188
|
+
// Unknown run child: preserve as opaque for round-trip fidelity
|
|
189
|
+
content.push({
|
|
190
|
+
type: "opaqueRun",
|
|
191
|
+
rawXml: (0, parse_utils_1.serializeElement)(child)
|
|
192
|
+
});
|
|
193
|
+
break;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
return content;
|
|
197
|
+
}
|
|
198
|
+
// =============================================================================
|
|
199
|
+
// DrawingML Shape Parser
|
|
200
|
+
// =============================================================================
|
|
201
|
+
function parseDrawingShape(anchorEl, wspEl, ctx) {
|
|
202
|
+
const docPrEl = (0, dom_1.findChild)(anchorEl, "wp:docPr");
|
|
203
|
+
const extentEl = (0, dom_1.findChild)(anchorEl, "wp:extent");
|
|
204
|
+
const cx = parseInt(extentEl?.attributes["cx"] ?? "0", 10);
|
|
205
|
+
const cy = parseInt(extentEl?.attributes["cy"] ?? "0", 10);
|
|
206
|
+
// Parse preset shape type from wps:spPr > a:prstGeom
|
|
207
|
+
const spPrEl = (0, dom_1.findChild)(wspEl, "wps:spPr") ?? (0, parse_utils_1.findChildNs)(wspEl, "spPr");
|
|
208
|
+
const prstGeomEl = spPrEl
|
|
209
|
+
? ((0, dom_1.findChild)(spPrEl, "a:prstGeom") ?? (0, parse_utils_1.findChildNs)(spPrEl, "prstGeom"))
|
|
210
|
+
: undefined;
|
|
211
|
+
const shapeType = prstGeomEl?.attributes["prst"] ?? "rect";
|
|
212
|
+
const shape = {
|
|
213
|
+
type: "drawingShape",
|
|
214
|
+
shapeType: shapeType,
|
|
215
|
+
width: cx,
|
|
216
|
+
height: cy,
|
|
217
|
+
altText: docPrEl?.attributes["descr"],
|
|
218
|
+
name: docPrEl?.attributes["name"]
|
|
219
|
+
};
|
|
220
|
+
// Parse fill
|
|
221
|
+
if (spPrEl) {
|
|
222
|
+
const solidFill = (0, dom_1.findChild)(spPrEl, "a:solidFill") ?? (0, parse_utils_1.findChildNs)(spPrEl, "solidFill");
|
|
223
|
+
if (solidFill) {
|
|
224
|
+
const srgb = (0, dom_1.findChild)(solidFill, "a:srgbClr") ?? (0, parse_utils_1.findChildNs)(solidFill, "srgbClr");
|
|
225
|
+
if (srgb) {
|
|
226
|
+
shape.fillColor = srgb.attributes["val"];
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
const noFill = (0, dom_1.findChild)(spPrEl, "a:noFill") ?? (0, parse_utils_1.findChildNs)(spPrEl, "noFill");
|
|
230
|
+
if (noFill) {
|
|
231
|
+
shape.noFill = true;
|
|
232
|
+
}
|
|
233
|
+
// Parse outline
|
|
234
|
+
const lnEl = (0, dom_1.findChild)(spPrEl, "a:ln") ?? (0, parse_utils_1.findChildNs)(spPrEl, "ln");
|
|
235
|
+
if (lnEl) {
|
|
236
|
+
const w = lnEl.attributes["w"];
|
|
237
|
+
if (w) {
|
|
238
|
+
shape.outlineWidth = parseInt(w, 10);
|
|
239
|
+
}
|
|
240
|
+
const lnFill = (0, dom_1.findChild)(lnEl, "a:solidFill") ?? (0, parse_utils_1.findChildNs)(lnEl, "solidFill");
|
|
241
|
+
if (lnFill) {
|
|
242
|
+
const srgb = (0, dom_1.findChild)(lnFill, "a:srgbClr") ?? (0, parse_utils_1.findChildNs)(lnFill, "srgbClr");
|
|
243
|
+
if (srgb) {
|
|
244
|
+
shape.outlineColor = srgb.attributes["val"];
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
const noLn = (0, dom_1.findChild)(lnEl, "a:noFill") ?? (0, parse_utils_1.findChildNs)(lnEl, "noFill");
|
|
248
|
+
if (noLn) {
|
|
249
|
+
shape.noOutline = true;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
// Parse text content (wps:txbx > w:txbxContent)
|
|
254
|
+
const txbxEl = (0, dom_1.findChild)(wspEl, "wps:txbx") ?? (0, parse_utils_1.findChildNs)(wspEl, "txbx");
|
|
255
|
+
const txbxContentEl = txbxEl
|
|
256
|
+
? ((0, dom_1.findChild)(txbxEl, "w:txbxContent") ?? (0, parse_utils_1.findChildNs)(txbxEl, "txbxContent"))
|
|
257
|
+
: undefined;
|
|
258
|
+
if (txbxContentEl) {
|
|
259
|
+
const paras = [];
|
|
260
|
+
for (const child of txbxContentEl.children) {
|
|
261
|
+
if (child.type === "element" && child.name.replace(/^w:/, "") === "p") {
|
|
262
|
+
paras.push(parseParagraph(child, ctx));
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
if (paras.length > 0) {
|
|
266
|
+
shape.textContent = paras;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
// Parse positioning
|
|
270
|
+
const posH = (0, dom_1.findChild)(anchorEl, "wp:positionH");
|
|
271
|
+
if (posH) {
|
|
272
|
+
const hp = {
|
|
273
|
+
relativeTo: posH.attributes["relativeFrom"]
|
|
274
|
+
};
|
|
275
|
+
const offsetEl = (0, dom_1.findChild)(posH, "wp:posOffset");
|
|
276
|
+
if (offsetEl) {
|
|
277
|
+
hp.offset = parseInt((0, dom_1.textContent)(offsetEl), 10);
|
|
278
|
+
}
|
|
279
|
+
const alignEl = (0, dom_1.findChild)(posH, "wp:align");
|
|
280
|
+
if (alignEl) {
|
|
281
|
+
hp.align = (0, dom_1.textContent)(alignEl);
|
|
282
|
+
}
|
|
283
|
+
shape.horizontalPosition = hp;
|
|
284
|
+
}
|
|
285
|
+
const posV = (0, dom_1.findChild)(anchorEl, "wp:positionV");
|
|
286
|
+
if (posV) {
|
|
287
|
+
const vp = {
|
|
288
|
+
relativeTo: posV.attributes["relativeFrom"]
|
|
289
|
+
};
|
|
290
|
+
const offsetEl = (0, dom_1.findChild)(posV, "wp:posOffset");
|
|
291
|
+
if (offsetEl) {
|
|
292
|
+
vp.offset = parseInt((0, dom_1.textContent)(offsetEl), 10);
|
|
293
|
+
}
|
|
294
|
+
const alignEl = (0, dom_1.findChild)(posV, "wp:align");
|
|
295
|
+
if (alignEl) {
|
|
296
|
+
vp.align = (0, dom_1.textContent)(alignEl);
|
|
297
|
+
}
|
|
298
|
+
shape.verticalPosition = vp;
|
|
299
|
+
}
|
|
300
|
+
// Wrap
|
|
301
|
+
for (const wrapChild of anchorEl.children) {
|
|
302
|
+
if (wrapChild.type !== "element") {
|
|
303
|
+
continue;
|
|
304
|
+
}
|
|
305
|
+
const wn = wrapChild.name;
|
|
306
|
+
if (wn === "wp:wrapSquare") {
|
|
307
|
+
shape.wrap = {
|
|
308
|
+
style: "square",
|
|
309
|
+
side: wrapChild.attributes["wrapText"]
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
else if (wn === "wp:wrapTight") {
|
|
313
|
+
shape.wrap = {
|
|
314
|
+
style: "tight",
|
|
315
|
+
side: wrapChild.attributes["wrapText"]
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
else if (wn === "wp:wrapTopAndBottom") {
|
|
319
|
+
shape.wrap = { style: "topAndBottom" };
|
|
320
|
+
}
|
|
321
|
+
else if (wn === "wp:wrapNone") {
|
|
322
|
+
shape.wrap = { style: "none" };
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
// Behind doc
|
|
326
|
+
if (anchorEl.attributes["behindDoc"] === "1") {
|
|
327
|
+
shape.behindDoc = true;
|
|
328
|
+
}
|
|
329
|
+
// Rotation
|
|
330
|
+
if (spPrEl) {
|
|
331
|
+
const xfrmEl = (0, dom_1.findChild)(spPrEl, "a:xfrm") ?? (0, parse_utils_1.findChildNs)(spPrEl, "xfrm");
|
|
332
|
+
if (xfrmEl?.attributes["rot"]) {
|
|
333
|
+
shape.rotation = parseInt(xfrmEl.attributes["rot"], 10);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
return shape;
|
|
337
|
+
}
|
|
338
|
+
// =============================================================================
|
|
339
|
+
// TextBox Parser
|
|
340
|
+
// =============================================================================
|
|
341
|
+
function parseTextBox(pictEl, ctx) {
|
|
342
|
+
// Look for v:shape > v:textbox > w:txbxContent
|
|
343
|
+
let txbxContentEl;
|
|
344
|
+
let shapeEl;
|
|
345
|
+
for (const child of pictEl.children) {
|
|
346
|
+
if (child.type === "element" && (child.name === "v:shape" || child.name === "v:rect")) {
|
|
347
|
+
shapeEl = child;
|
|
348
|
+
for (const sc of child.children) {
|
|
349
|
+
if (sc.type === "element" && sc.name === "v:textbox") {
|
|
350
|
+
for (const tc of sc.children) {
|
|
351
|
+
if (tc.type === "element" &&
|
|
352
|
+
(tc.name === "w:txbxContent" || tc.name === "txbxContent")) {
|
|
353
|
+
txbxContentEl = tc;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
if (!txbxContentEl) {
|
|
361
|
+
return undefined;
|
|
362
|
+
}
|
|
363
|
+
const paragraphs = [];
|
|
364
|
+
for (const c of txbxContentEl.children) {
|
|
365
|
+
if (c.type === "element" && c.name.replace(/^w:/, "") === "p") {
|
|
366
|
+
paragraphs.push(parseParagraph(c, ctx));
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
const tb = { type: "textBox", content: paragraphs };
|
|
370
|
+
if (shapeEl) {
|
|
371
|
+
const style = shapeEl.attributes["style"];
|
|
372
|
+
if (style) {
|
|
373
|
+
tb.style = style;
|
|
374
|
+
}
|
|
375
|
+
const sc = shapeEl.attributes["strokecolor"];
|
|
376
|
+
if (sc) {
|
|
377
|
+
tb.strokeColor = sc;
|
|
378
|
+
}
|
|
379
|
+
const fc = shapeEl.attributes["fillcolor"];
|
|
380
|
+
if (fc) {
|
|
381
|
+
tb.fillColor = fc;
|
|
382
|
+
}
|
|
383
|
+
if (shapeEl.attributes["stroked"] === "f") {
|
|
384
|
+
tb.stroke = false;
|
|
385
|
+
}
|
|
386
|
+
if (shapeEl.attributes["filled"] === "f") {
|
|
387
|
+
tb.fill = false;
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
return tb;
|
|
391
|
+
}
|
|
392
|
+
// =============================================================================
|
|
393
|
+
// SDT / CheckBox / TOC Parser
|
|
394
|
+
// =============================================================================
|
|
395
|
+
function parseSdt(sdtEl, ctx) {
|
|
396
|
+
const sdtPrEl = (0, parse_utils_1.findChildNs)(sdtEl, "sdtPr");
|
|
397
|
+
const sdtContentEl = (0, parse_utils_1.findChildNs)(sdtEl, "sdtContent");
|
|
398
|
+
// Check for checkbox (w14:checkbox)
|
|
399
|
+
if (sdtPrEl) {
|
|
400
|
+
const checkBoxEl = (0, dom_1.findChild)(sdtPrEl, "w14:checkbox");
|
|
401
|
+
if (checkBoxEl) {
|
|
402
|
+
return (0, sdt_helpers_1.parseCheckBox)(checkBoxEl);
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
// Check for TOC (contains docPartObj with docPartGallery "Table of Contents")
|
|
406
|
+
if (sdtPrEl) {
|
|
407
|
+
const docPartObjEl = (0, parse_utils_1.findChildNs)(sdtPrEl, "docPartObj");
|
|
408
|
+
if (docPartObjEl) {
|
|
409
|
+
const galleryEl = (0, parse_utils_1.findChildNs)(docPartObjEl, "docPartGallery");
|
|
410
|
+
const galleryVal = galleryEl ? (0, parse_utils_1.attrVal)(galleryEl, "val") : undefined;
|
|
411
|
+
if (galleryVal === "Table of Contents") {
|
|
412
|
+
return parseTocFromSdt(sdtContentEl, ctx);
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
// Generic SDT
|
|
417
|
+
const props = {};
|
|
418
|
+
if (sdtPrEl) {
|
|
419
|
+
const tagEl = (0, parse_utils_1.findChildNs)(sdtPrEl, "tag");
|
|
420
|
+
if (tagEl) {
|
|
421
|
+
props.tag = (0, parse_utils_1.attrVal)(tagEl, "val");
|
|
422
|
+
}
|
|
423
|
+
const aliasEl = (0, parse_utils_1.findChildNs)(sdtPrEl, "alias");
|
|
424
|
+
if (aliasEl) {
|
|
425
|
+
props.alias = (0, parse_utils_1.attrVal)(aliasEl, "val");
|
|
426
|
+
}
|
|
427
|
+
const lockEl = (0, parse_utils_1.findChildNs)(sdtPrEl, "lock");
|
|
428
|
+
if (lockEl) {
|
|
429
|
+
const v = (0, parse_utils_1.attrVal)(lockEl, "val");
|
|
430
|
+
if (v === "contentLocked" || v === "sdtContentLocked") {
|
|
431
|
+
props.lockContent = true;
|
|
432
|
+
}
|
|
433
|
+
if (v === "sdtLocked" || v === "sdtContentLocked") {
|
|
434
|
+
props.lockSdt = true;
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
// Plain text
|
|
438
|
+
if ((0, parse_utils_1.findChildNs)(sdtPrEl, "text")) {
|
|
439
|
+
props.plainText = true;
|
|
440
|
+
}
|
|
441
|
+
// showingPlcHdr is a toggle, not a property with a val
|
|
442
|
+
if ((0, parse_utils_1.findChildNs)(sdtPrEl, "showingPlcHdr")) {
|
|
443
|
+
const v = (0, parse_utils_1.boolToggle)(sdtPrEl, "showingPlcHdr");
|
|
444
|
+
if (v !== false) {
|
|
445
|
+
props.showingPlaceholder = true;
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
// w15:appearance (replaces the old misused showingPlcHdr)
|
|
449
|
+
const appearanceEl = (0, dom_1.findChild)(sdtPrEl, "w15:appearance");
|
|
450
|
+
if (appearanceEl) {
|
|
451
|
+
const v = appearanceEl.attributes["w15:val"] ?? appearanceEl.attributes["val"];
|
|
452
|
+
if (v === "boundingBox" || v === "tags" || v === "hidden") {
|
|
453
|
+
props.appearance = v;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
// Dropdown list
|
|
457
|
+
const ddlEl = (0, parse_utils_1.findChildNs)(sdtPrEl, "dropDownList");
|
|
458
|
+
if (ddlEl) {
|
|
459
|
+
const items = [];
|
|
460
|
+
for (const li of (0, parse_utils_1.findChildrenNs)(ddlEl, "listItem")) {
|
|
461
|
+
const item = { value: (0, parse_utils_1.attrVal)(li, "value") ?? "" };
|
|
462
|
+
const dt = (0, parse_utils_1.attrVal)(li, "displayText");
|
|
463
|
+
if (dt) {
|
|
464
|
+
item.displayText = dt;
|
|
465
|
+
}
|
|
466
|
+
items.push(item);
|
|
467
|
+
}
|
|
468
|
+
props.dropdownList = items;
|
|
469
|
+
}
|
|
470
|
+
// ComboBox
|
|
471
|
+
const cbEl = (0, parse_utils_1.findChildNs)(sdtPrEl, "comboBox");
|
|
472
|
+
if (cbEl) {
|
|
473
|
+
const items = [];
|
|
474
|
+
for (const li of (0, parse_utils_1.findChildrenNs)(cbEl, "listItem")) {
|
|
475
|
+
const item = { value: (0, parse_utils_1.attrVal)(li, "value") ?? "" };
|
|
476
|
+
const dt = (0, parse_utils_1.attrVal)(li, "displayText");
|
|
477
|
+
if (dt) {
|
|
478
|
+
item.displayText = dt;
|
|
479
|
+
}
|
|
480
|
+
items.push(item);
|
|
481
|
+
}
|
|
482
|
+
props.comboBox = items;
|
|
483
|
+
}
|
|
484
|
+
// Date picker
|
|
485
|
+
const dateEl = (0, parse_utils_1.findChildNs)(sdtPrEl, "date");
|
|
486
|
+
if (dateEl) {
|
|
487
|
+
const dateProp = {};
|
|
488
|
+
const fullDate = (0, parse_utils_1.attrVal)(dateEl, "fullDate");
|
|
489
|
+
if (fullDate) {
|
|
490
|
+
dateProp.fullDate = fullDate;
|
|
491
|
+
}
|
|
492
|
+
const dfEl = (0, parse_utils_1.findChildNs)(dateEl, "dateFormat");
|
|
493
|
+
if (dfEl) {
|
|
494
|
+
dateProp.dateFormat = (0, parse_utils_1.attrVal)(dfEl, "val");
|
|
495
|
+
}
|
|
496
|
+
const lidEl = (0, parse_utils_1.findChildNs)(dateEl, "lid");
|
|
497
|
+
if (lidEl) {
|
|
498
|
+
dateProp.lid = (0, parse_utils_1.attrVal)(lidEl, "val");
|
|
499
|
+
}
|
|
500
|
+
const storeEl = (0, parse_utils_1.findChildNs)(dateEl, "storeMappedDataAs");
|
|
501
|
+
if (storeEl) {
|
|
502
|
+
dateProp.storeMappedDataAs = (0, parse_utils_1.attrVal)(storeEl, "val");
|
|
503
|
+
}
|
|
504
|
+
props.date = dateProp;
|
|
505
|
+
}
|
|
506
|
+
// ID
|
|
507
|
+
const idEl = (0, parse_utils_1.findChildNs)(sdtPrEl, "id");
|
|
508
|
+
if (idEl) {
|
|
509
|
+
const v = (0, parse_utils_1.attrInt)(idEl, "val");
|
|
510
|
+
if (v !== undefined) {
|
|
511
|
+
props.id = v;
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
// Data binding
|
|
515
|
+
const dbEl = (0, parse_utils_1.findChildNs)(sdtPrEl, "dataBinding");
|
|
516
|
+
if (dbEl) {
|
|
517
|
+
const xpath = (0, parse_utils_1.attrVal)(dbEl, "xpath");
|
|
518
|
+
const storeItemId = (0, parse_utils_1.attrVal)(dbEl, "storeItemID");
|
|
519
|
+
if (xpath && storeItemId) {
|
|
520
|
+
const binding = {
|
|
521
|
+
xpath,
|
|
522
|
+
storeItemId
|
|
523
|
+
};
|
|
524
|
+
const prefixMappings = (0, parse_utils_1.attrVal)(dbEl, "prefixMappings");
|
|
525
|
+
if (prefixMappings) {
|
|
526
|
+
binding.prefixMappings = prefixMappings;
|
|
527
|
+
}
|
|
528
|
+
props.dataBinding = binding;
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
// Placeholder
|
|
532
|
+
const phEl = (0, parse_utils_1.findChildNs)(sdtPrEl, "placeholder");
|
|
533
|
+
if (phEl) {
|
|
534
|
+
const docPartEl = (0, parse_utils_1.findChildNs)(phEl, "docPart");
|
|
535
|
+
if (docPartEl) {
|
|
536
|
+
props.placeholder = (0, parse_utils_1.attrVal)(docPartEl, "val");
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
// Boolean marker elements
|
|
540
|
+
if ((0, parse_utils_1.findChildNs)(sdtPrEl, "richText")) {
|
|
541
|
+
props.richText = true;
|
|
542
|
+
}
|
|
543
|
+
if ((0, parse_utils_1.findChildNs)(sdtPrEl, "picture")) {
|
|
544
|
+
props.picture = true;
|
|
545
|
+
}
|
|
546
|
+
if ((0, parse_utils_1.findChildNs)(sdtPrEl, "group")) {
|
|
547
|
+
props.group = true;
|
|
548
|
+
}
|
|
549
|
+
if ((0, parse_utils_1.findChildNs)(sdtPrEl, "equation")) {
|
|
550
|
+
props.equation = true;
|
|
551
|
+
}
|
|
552
|
+
if ((0, parse_utils_1.findChildNs)(sdtPrEl, "citation")) {
|
|
553
|
+
props.citation = true;
|
|
554
|
+
}
|
|
555
|
+
if ((0, parse_utils_1.findChildNs)(sdtPrEl, "bibliography")) {
|
|
556
|
+
props.bibliography = true;
|
|
557
|
+
}
|
|
558
|
+
if ((0, parse_utils_1.findChildNs)(sdtPrEl, "temporary")) {
|
|
559
|
+
props.temporary = true;
|
|
560
|
+
}
|
|
561
|
+
// w15: repeating section
|
|
562
|
+
const rsEl = (0, dom_1.findChild)(sdtPrEl, "w15:repeatingSection");
|
|
563
|
+
if (rsEl) {
|
|
564
|
+
const rs = {};
|
|
565
|
+
// Read from child elements (correct per schema)
|
|
566
|
+
const titleEl = (0, dom_1.findChild)(rsEl, "w15:sectionTitle");
|
|
567
|
+
if (titleEl) {
|
|
568
|
+
const v = titleEl.attributes["w15:val"] ?? titleEl.attributes["val"];
|
|
569
|
+
if (v !== undefined) {
|
|
570
|
+
rs.sectionTitle = v;
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
if ((0, dom_1.findChild)(rsEl, "w15:doNotAllowInsertDeleteSection")) {
|
|
574
|
+
rs.allowInsertDelete = false;
|
|
575
|
+
}
|
|
576
|
+
// Also accept attribute form for backwards compatibility
|
|
577
|
+
const stAttr = rsEl.attributes["w15:sectionTitle"];
|
|
578
|
+
if (stAttr !== undefined && rs.sectionTitle === undefined) {
|
|
579
|
+
rs.sectionTitle = stAttr;
|
|
580
|
+
}
|
|
581
|
+
const noInsDelAttr = rsEl.attributes["w15:doNotAllowInsertDeleteSection"];
|
|
582
|
+
if (noInsDelAttr !== undefined && rs.allowInsertDelete === undefined) {
|
|
583
|
+
rs.allowInsertDelete = noInsDelAttr === "0";
|
|
584
|
+
}
|
|
585
|
+
props.repeatingSection = rs;
|
|
586
|
+
}
|
|
587
|
+
if ((0, dom_1.findChild)(sdtPrEl, "w15:repeatingSectionItem")) {
|
|
588
|
+
props.repeatingSectionItem = true;
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
const content = [];
|
|
592
|
+
if (sdtContentEl) {
|
|
593
|
+
for (const child of sdtContentEl.children) {
|
|
594
|
+
if (child.type !== "element") {
|
|
595
|
+
continue;
|
|
596
|
+
}
|
|
597
|
+
const n = child.name.replace(/^w:/, "");
|
|
598
|
+
if (n === "p") {
|
|
599
|
+
content.push(parseParagraph(child, ctx));
|
|
600
|
+
}
|
|
601
|
+
else if (n === "tbl") {
|
|
602
|
+
content.push(parseTable(child, ctx));
|
|
603
|
+
}
|
|
604
|
+
else if (n === "r") {
|
|
605
|
+
content.push(parseRun(child));
|
|
606
|
+
}
|
|
607
|
+
else if (n === "sdt") {
|
|
608
|
+
// Nested SDT (e.g. repeating section item SDTs). Preserve the
|
|
609
|
+
// inner SDT verbatim — including its own properties — so data
|
|
610
|
+
// binding, alias, lock and similar metadata round-trip correctly.
|
|
611
|
+
const inner = parseSdt(child, ctx);
|
|
612
|
+
if (inner && inner.type === "sdt") {
|
|
613
|
+
content.push(inner);
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
return { type: "sdt", properties: props, content };
|
|
619
|
+
}
|
|
620
|
+
function parseTocFromSdt(sdtContentEl, ctx) {
|
|
621
|
+
const toc = { type: "tableOfContents" };
|
|
622
|
+
const cachedParagraphs = [];
|
|
623
|
+
if (sdtContentEl) {
|
|
624
|
+
// Collect all instrText to assemble the complete TOC field instruction
|
|
625
|
+
let instrText = "";
|
|
626
|
+
const collectInstr = (el) => {
|
|
627
|
+
for (const child of el.children) {
|
|
628
|
+
if (child.type !== "element") {
|
|
629
|
+
continue;
|
|
630
|
+
}
|
|
631
|
+
const name = child.name.replace(/^w:/, "");
|
|
632
|
+
if (name === "instrText") {
|
|
633
|
+
instrText += (0, dom_1.textContent)(child);
|
|
634
|
+
}
|
|
635
|
+
else {
|
|
636
|
+
collectInstr(child);
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
};
|
|
640
|
+
collectInstr(sdtContentEl);
|
|
641
|
+
if (instrText.trim()) {
|
|
642
|
+
(0, sdt_helpers_1.parseTocInstruction)(instrText, toc);
|
|
643
|
+
}
|
|
644
|
+
for (const child of sdtContentEl.children) {
|
|
645
|
+
if (child.type !== "element") {
|
|
646
|
+
continue;
|
|
647
|
+
}
|
|
648
|
+
const n = child.name.replace(/^w:/, "");
|
|
649
|
+
if (n === "p") {
|
|
650
|
+
cachedParagraphs.push(parseParagraph(child, ctx));
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
if (cachedParagraphs.length > 0) {
|
|
655
|
+
toc.cachedParagraphs = cachedParagraphs;
|
|
656
|
+
}
|
|
657
|
+
return toc;
|
|
658
|
+
}
|
|
659
|
+
/** Parse a TOC field instruction string (e.g. `TOC \o "1-3" \h \t "Style,1" \c "Figure"`). */
|
|
660
|
+
// =============================================================================
|
|
661
|
+
// Paragraph Parser
|
|
662
|
+
// =============================================================================
|
|
663
|
+
function parseRun(el) {
|
|
664
|
+
const rPrEl = (0, parse_utils_1.findChildNs)(el, "rPr");
|
|
665
|
+
return {
|
|
666
|
+
properties: rPrEl ? (0, properties_parsers_1.parseRunProperties)(rPrEl) : undefined,
|
|
667
|
+
content: parseRunContent(el)
|
|
668
|
+
};
|
|
669
|
+
}
|
|
670
|
+
function parseParagraph(pEl, ctx) {
|
|
671
|
+
const pPrEl = (0, parse_utils_1.findChildNs)(pEl, "pPr");
|
|
672
|
+
const children = [];
|
|
673
|
+
// Field state machine lives on ctx so that complex fields (TOC, INDEX,
|
|
674
|
+
// long REF/SEQ chains) can span paragraph boundaries — the matching
|
|
675
|
+
// `<w:fldChar fldCharType="end">` may occur in a later paragraph than the
|
|
676
|
+
// `begin`. Storing state on ctx is also safe because part-scoped parsers
|
|
677
|
+
// (header/footer/footnote/endnote/comment) save and reset it on entry.
|
|
678
|
+
const field = ctx.field;
|
|
679
|
+
for (const child of pEl.children) {
|
|
680
|
+
if (child.type !== "element") {
|
|
681
|
+
continue;
|
|
682
|
+
}
|
|
683
|
+
// Handle mc:AlternateContent — pick mc:Choice, fall back to mc:Fallback
|
|
684
|
+
let resolved = child;
|
|
685
|
+
if (child.name === "mc:AlternateContent") {
|
|
686
|
+
const choice = (0, dom_1.findChild)(child, "mc:Choice");
|
|
687
|
+
const fallback = (0, dom_1.findChild)(child, "mc:Fallback");
|
|
688
|
+
const chosen = choice ?? fallback;
|
|
689
|
+
if (chosen && chosen.children.length > 0) {
|
|
690
|
+
// The first element child inside Choice/Fallback is the real element
|
|
691
|
+
const inner = chosen.children.find(c => c.type === "element");
|
|
692
|
+
if (inner) {
|
|
693
|
+
resolved = inner;
|
|
694
|
+
}
|
|
695
|
+
else {
|
|
696
|
+
continue;
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
else {
|
|
700
|
+
continue;
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
const name = resolved.name.replace(/^w:/, "");
|
|
704
|
+
switch (name) {
|
|
705
|
+
case "r": {
|
|
706
|
+
// Check for fldChar and instrText inside the run
|
|
707
|
+
let hasFldChar = false;
|
|
708
|
+
for (const rc of resolved.children) {
|
|
709
|
+
if (rc.type !== "element") {
|
|
710
|
+
continue;
|
|
711
|
+
}
|
|
712
|
+
const rcName = rc.name.replace(/^w:/, "");
|
|
713
|
+
if (rcName === "fldChar") {
|
|
714
|
+
hasFldChar = true;
|
|
715
|
+
const fldCharType = (0, parse_utils_1.attrVal)(rc, "fldCharType");
|
|
716
|
+
if (fldCharType === "begin") {
|
|
717
|
+
field.state = "instrText";
|
|
718
|
+
field.instr = "";
|
|
719
|
+
field.cached = "";
|
|
720
|
+
// Capture run properties from this run for the field
|
|
721
|
+
const rPrEl = (0, parse_utils_1.findChildNs)(resolved, "rPr");
|
|
722
|
+
field.runProps = rPrEl ? (0, properties_parsers_1.parseRunProperties)(rPrEl) : undefined;
|
|
723
|
+
// Parse ffData for legacy form fields
|
|
724
|
+
const ffDataEl = (0, parse_utils_1.findChildNs)(rc, "ffData");
|
|
725
|
+
field.formField = ffDataEl ? (0, form_field_parser_1.parseFfData)(ffDataEl) : undefined;
|
|
726
|
+
}
|
|
727
|
+
else if (fldCharType === "separate") {
|
|
728
|
+
field.state = "cached";
|
|
729
|
+
}
|
|
730
|
+
else if (fldCharType === "end") {
|
|
731
|
+
// Emit the assembled field as a Run with FieldContent
|
|
732
|
+
const fc = {
|
|
733
|
+
type: "field",
|
|
734
|
+
instruction: field.instr.trim(),
|
|
735
|
+
cachedValue: field.cached || undefined,
|
|
736
|
+
formField: field.formField
|
|
737
|
+
};
|
|
738
|
+
children.push({
|
|
739
|
+
properties: field.runProps,
|
|
740
|
+
content: [fc]
|
|
741
|
+
});
|
|
742
|
+
field.state = "none";
|
|
743
|
+
field.instr = "";
|
|
744
|
+
field.cached = "";
|
|
745
|
+
field.runProps = undefined;
|
|
746
|
+
field.formField = undefined;
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
else if (rcName === "instrText" && field.state === "instrText") {
|
|
750
|
+
hasFldChar = true;
|
|
751
|
+
field.instr += (0, dom_1.textContent)(rc);
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
if (field.state === "cached") {
|
|
755
|
+
// Collect cached text from this run
|
|
756
|
+
for (const rc of resolved.children) {
|
|
757
|
+
if (rc.type !== "element") {
|
|
758
|
+
continue;
|
|
759
|
+
}
|
|
760
|
+
const rcName = rc.name.replace(/^w:/, "");
|
|
761
|
+
if (rcName === "t") {
|
|
762
|
+
field.cached += (0, dom_1.textContent)(rc);
|
|
763
|
+
}
|
|
764
|
+
else if (rcName === "fldChar") {
|
|
765
|
+
// Already handled above
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
if (!hasFldChar) {
|
|
769
|
+
continue; // Skip adding this run normally
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
if (field.state === "instrText" && hasFldChar) {
|
|
773
|
+
continue; // Don't add begin/instrText runs as normal content
|
|
774
|
+
}
|
|
775
|
+
if (field.state === "none" && !hasFldChar) {
|
|
776
|
+
// Detect a degenerate `<w:r>` whose only meaningful child is
|
|
777
|
+
// `<w:commentReference>`. The OOXML schema requires the leaf
|
|
778
|
+
// to live inside a w:r, but at the model level we represent
|
|
779
|
+
// it as a paragraph-child `commentReference`. Hoisting here
|
|
780
|
+
// means a round-trip preserves the model shape instead of
|
|
781
|
+
// collapsing to `annotationReference`.
|
|
782
|
+
let onlyCommentRefId;
|
|
783
|
+
let onlyCommentRefSeen = false;
|
|
784
|
+
let hasOtherMeaningfulChild = false;
|
|
785
|
+
for (const rcc of resolved.children) {
|
|
786
|
+
if (rcc.type !== "element") {
|
|
787
|
+
continue;
|
|
788
|
+
}
|
|
789
|
+
const rccName = rcc.name.replace(/^w:/, "");
|
|
790
|
+
if (rccName === "rPr") {
|
|
791
|
+
continue;
|
|
792
|
+
}
|
|
793
|
+
if (rccName === "commentReference") {
|
|
794
|
+
if (onlyCommentRefSeen) {
|
|
795
|
+
// Multiple commentReferences in one run is malformed;
|
|
796
|
+
// fall through to the generic run parser.
|
|
797
|
+
hasOtherMeaningfulChild = true;
|
|
798
|
+
break;
|
|
799
|
+
}
|
|
800
|
+
onlyCommentRefSeen = true;
|
|
801
|
+
const idAttr = rcc.attributes["w:id"] ?? rcc.attributes["id"];
|
|
802
|
+
const id = idAttr !== undefined ? parseInt(idAttr, 10) : NaN;
|
|
803
|
+
if (!Number.isNaN(id)) {
|
|
804
|
+
onlyCommentRefId = id;
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
else {
|
|
808
|
+
hasOtherMeaningfulChild = true;
|
|
809
|
+
break;
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
if (onlyCommentRefSeen && !hasOtherMeaningfulChild && onlyCommentRefId !== undefined) {
|
|
813
|
+
children.push({ type: "commentReference", id: onlyCommentRefId });
|
|
814
|
+
}
|
|
815
|
+
else {
|
|
816
|
+
children.push(parseRun(resolved));
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
break;
|
|
820
|
+
}
|
|
821
|
+
case "fldSimple": {
|
|
822
|
+
// Simple field: <w:fldSimple w:instr=" PAGE "><w:r>...</w:r></w:fldSimple>
|
|
823
|
+
const instr = (0, parse_utils_1.attrVal)(resolved, "instr") ?? "";
|
|
824
|
+
let cached = "";
|
|
825
|
+
for (const fc of resolved.children) {
|
|
826
|
+
if (fc.type === "element" && fc.name.replace(/^w:/, "") === "r") {
|
|
827
|
+
for (const rc of fc.children) {
|
|
828
|
+
if (rc.type === "element" && rc.name.replace(/^w:/, "") === "t") {
|
|
829
|
+
cached += (0, dom_1.textContent)(rc);
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
}
|
|
834
|
+
const fc = {
|
|
835
|
+
type: "field",
|
|
836
|
+
instruction: instr.trim(),
|
|
837
|
+
cachedValue: cached || undefined
|
|
838
|
+
};
|
|
839
|
+
children.push({
|
|
840
|
+
properties: undefined,
|
|
841
|
+
content: [fc]
|
|
842
|
+
});
|
|
843
|
+
break;
|
|
844
|
+
}
|
|
845
|
+
case "hyperlink": {
|
|
846
|
+
const rId = resolved.attributes["r:id"];
|
|
847
|
+
const anchor = resolved.attributes["w:anchor"] ?? resolved.attributes["anchor"];
|
|
848
|
+
const tooltip = resolved.attributes["w:tooltip"] ?? resolved.attributes["tooltip"];
|
|
849
|
+
const historyAttr = resolved.attributes["w:history"] ?? resolved.attributes["history"];
|
|
850
|
+
const tgtFrame = resolved.attributes["w:tgtFrame"] ?? resolved.attributes["tgtFrame"];
|
|
851
|
+
const docLocation = resolved.attributes["w:docLocation"] ?? resolved.attributes["docLocation"];
|
|
852
|
+
const hRuns = [];
|
|
853
|
+
for (const hChild of resolved.children) {
|
|
854
|
+
if (hChild.type === "element" && hChild.name.replace(/^w:/, "") === "r") {
|
|
855
|
+
hRuns.push(parseRun(hChild));
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
// Resolve URL from relMap. If the security policy disallows
|
|
859
|
+
// external targets, skip URL resolution entirely so the resulting
|
|
860
|
+
// Hyperlink only carries an anchor (or becomes a plain non-link
|
|
861
|
+
// wrapper). Internal anchor-only hyperlinks are unaffected.
|
|
862
|
+
let url;
|
|
863
|
+
if (rId && ctx.securityPolicy.allowExternalTargets) {
|
|
864
|
+
const rel = ctx.relMap.get(rId);
|
|
865
|
+
if (rel && rel.targetMode === "External") {
|
|
866
|
+
url = rel.target;
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
const hyperlink = {
|
|
870
|
+
type: "hyperlink",
|
|
871
|
+
rId,
|
|
872
|
+
anchor,
|
|
873
|
+
url,
|
|
874
|
+
tooltip,
|
|
875
|
+
children: hRuns
|
|
876
|
+
};
|
|
877
|
+
if (historyAttr === "1" || historyAttr === "true") {
|
|
878
|
+
hyperlink.history = true;
|
|
879
|
+
}
|
|
880
|
+
if (tgtFrame) {
|
|
881
|
+
hyperlink.tgtFrame = tgtFrame;
|
|
882
|
+
}
|
|
883
|
+
if (docLocation) {
|
|
884
|
+
hyperlink.docLocation = docLocation;
|
|
885
|
+
}
|
|
886
|
+
children.push(hyperlink);
|
|
887
|
+
break;
|
|
888
|
+
}
|
|
889
|
+
case "bookmarkStart": {
|
|
890
|
+
const idAttr = resolved.attributes["w:id"] ?? resolved.attributes["id"];
|
|
891
|
+
const id = idAttr !== undefined ? parseInt(idAttr, 10) : NaN;
|
|
892
|
+
if (Number.isNaN(id)) {
|
|
893
|
+
// Without a valid id we can't pair this with a bookmarkEnd; drop it
|
|
894
|
+
// rather than fabricate id=0 (which would collide with every other
|
|
895
|
+
// bookmark missing an id and corrupt cross-references on round-trip).
|
|
896
|
+
break;
|
|
897
|
+
}
|
|
898
|
+
const bm = {
|
|
899
|
+
type: "bookmarkStart",
|
|
900
|
+
id,
|
|
901
|
+
name: resolved.attributes["w:name"] ?? resolved.attributes["name"] ?? ""
|
|
902
|
+
};
|
|
903
|
+
const colFirst = resolved.attributes["w:colFirst"] ?? resolved.attributes["colFirst"];
|
|
904
|
+
if (colFirst !== undefined) {
|
|
905
|
+
bm.colFirst = parseInt(colFirst, 10);
|
|
906
|
+
}
|
|
907
|
+
const colLast = resolved.attributes["w:colLast"] ?? resolved.attributes["colLast"];
|
|
908
|
+
if (colLast !== undefined) {
|
|
909
|
+
bm.colLast = parseInt(colLast, 10);
|
|
910
|
+
}
|
|
911
|
+
const dcx = resolved.attributes["w:displacedByCustomXml"] ??
|
|
912
|
+
resolved.attributes["displacedByCustomXml"];
|
|
913
|
+
if (dcx === "next" || dcx === "prev") {
|
|
914
|
+
bm.displacedByCustomXml = dcx;
|
|
915
|
+
}
|
|
916
|
+
children.push(bm);
|
|
917
|
+
break;
|
|
918
|
+
}
|
|
919
|
+
case "bookmarkEnd": {
|
|
920
|
+
const idAttr = resolved.attributes["w:id"] ?? resolved.attributes["id"];
|
|
921
|
+
const id = idAttr !== undefined ? parseInt(idAttr, 10) : NaN;
|
|
922
|
+
if (Number.isNaN(id)) {
|
|
923
|
+
break;
|
|
924
|
+
}
|
|
925
|
+
children.push({ type: "bookmarkEnd", id });
|
|
926
|
+
break;
|
|
927
|
+
}
|
|
928
|
+
case "commentRangeStart": {
|
|
929
|
+
const idAttr = resolved.attributes["w:id"] ?? resolved.attributes["id"];
|
|
930
|
+
const id = idAttr !== undefined ? parseInt(idAttr, 10) : NaN;
|
|
931
|
+
if (Number.isNaN(id)) {
|
|
932
|
+
break;
|
|
933
|
+
}
|
|
934
|
+
children.push({ type: "commentRangeStart", id });
|
|
935
|
+
break;
|
|
936
|
+
}
|
|
937
|
+
case "commentRangeEnd": {
|
|
938
|
+
const idAttr = resolved.attributes["w:id"] ?? resolved.attributes["id"];
|
|
939
|
+
const id = idAttr !== undefined ? parseInt(idAttr, 10) : NaN;
|
|
940
|
+
if (Number.isNaN(id)) {
|
|
941
|
+
break;
|
|
942
|
+
}
|
|
943
|
+
children.push({ type: "commentRangeEnd", id });
|
|
944
|
+
break;
|
|
945
|
+
}
|
|
946
|
+
case "commentReference": {
|
|
947
|
+
const idAttr = resolved.attributes["w:id"] ?? resolved.attributes["id"];
|
|
948
|
+
const id = idAttr !== undefined ? parseInt(idAttr, 10) : NaN;
|
|
949
|
+
if (Number.isNaN(id)) {
|
|
950
|
+
break;
|
|
951
|
+
}
|
|
952
|
+
children.push({ type: "commentReference", id });
|
|
953
|
+
break;
|
|
954
|
+
}
|
|
955
|
+
case "ins": {
|
|
956
|
+
// Inserted run (track changes)
|
|
957
|
+
const rev = (0, properties_parsers_1.parseRevisionInfo)(resolved);
|
|
958
|
+
if (rev) {
|
|
959
|
+
for (const insChild of resolved.children) {
|
|
960
|
+
if (insChild.type === "element" && insChild.name.replace(/^w:/, "") === "r") {
|
|
961
|
+
children.push({
|
|
962
|
+
type: "insertedRun",
|
|
963
|
+
revision: rev,
|
|
964
|
+
run: parseRun(insChild)
|
|
965
|
+
});
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
}
|
|
969
|
+
break;
|
|
970
|
+
}
|
|
971
|
+
case "del": {
|
|
972
|
+
// Deleted run (track changes)
|
|
973
|
+
const rev = (0, properties_parsers_1.parseRevisionInfo)(resolved);
|
|
974
|
+
if (rev) {
|
|
975
|
+
for (const delChild of resolved.children) {
|
|
976
|
+
if (delChild.type === "element" && delChild.name.replace(/^w:/, "") === "r") {
|
|
977
|
+
children.push({
|
|
978
|
+
type: "deletedRun",
|
|
979
|
+
revision: rev,
|
|
980
|
+
run: parseDeletedRun(delChild)
|
|
981
|
+
});
|
|
982
|
+
}
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
break;
|
|
986
|
+
}
|
|
987
|
+
case "moveFrom": {
|
|
988
|
+
const rev = (0, properties_parsers_1.parseRevisionInfo)(resolved);
|
|
989
|
+
if (rev) {
|
|
990
|
+
for (const mfChild of resolved.children) {
|
|
991
|
+
if (mfChild.type === "element" && mfChild.name.replace(/^w:/, "") === "r") {
|
|
992
|
+
children.push({
|
|
993
|
+
type: "movedFromRun",
|
|
994
|
+
revision: rev,
|
|
995
|
+
run: parseRun(mfChild)
|
|
996
|
+
});
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
}
|
|
1000
|
+
break;
|
|
1001
|
+
}
|
|
1002
|
+
case "moveTo": {
|
|
1003
|
+
const rev = (0, properties_parsers_1.parseRevisionInfo)(resolved);
|
|
1004
|
+
if (rev) {
|
|
1005
|
+
for (const mtChild of resolved.children) {
|
|
1006
|
+
if (mtChild.type === "element" && mtChild.name.replace(/^w:/, "") === "r") {
|
|
1007
|
+
children.push({
|
|
1008
|
+
type: "movedToRun",
|
|
1009
|
+
revision: rev,
|
|
1010
|
+
run: parseRun(mtChild)
|
|
1011
|
+
});
|
|
1012
|
+
}
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
break;
|
|
1016
|
+
}
|
|
1017
|
+
case "moveFromRangeStart":
|
|
1018
|
+
case "moveFromRangeEnd":
|
|
1019
|
+
case "moveToRangeStart":
|
|
1020
|
+
case "moveToRangeEnd": {
|
|
1021
|
+
const id = (0, parse_utils_1.attrInt)(resolved, "id");
|
|
1022
|
+
if (id !== undefined) {
|
|
1023
|
+
const marker = {
|
|
1024
|
+
type: name,
|
|
1025
|
+
id
|
|
1026
|
+
};
|
|
1027
|
+
const author = (0, parse_utils_1.attrVal)(resolved, "author");
|
|
1028
|
+
if (author) {
|
|
1029
|
+
marker.author = author;
|
|
1030
|
+
}
|
|
1031
|
+
const date = (0, parse_utils_1.attrVal)(resolved, "date");
|
|
1032
|
+
if (date) {
|
|
1033
|
+
marker.date = date;
|
|
1034
|
+
}
|
|
1035
|
+
const mName = (0, parse_utils_1.attrVal)(resolved, "name");
|
|
1036
|
+
if (mName) {
|
|
1037
|
+
marker.name = mName;
|
|
1038
|
+
}
|
|
1039
|
+
children.push(marker);
|
|
1040
|
+
}
|
|
1041
|
+
break;
|
|
1042
|
+
}
|
|
1043
|
+
case "customXmlInsRangeStart":
|
|
1044
|
+
case "customXmlInsRangeEnd":
|
|
1045
|
+
case "customXmlDelRangeStart":
|
|
1046
|
+
case "customXmlDelRangeEnd":
|
|
1047
|
+
case "customXmlMoveFromRangeStart":
|
|
1048
|
+
case "customXmlMoveFromRangeEnd":
|
|
1049
|
+
case "customXmlMoveToRangeStart":
|
|
1050
|
+
case "customXmlMoveToRangeEnd": {
|
|
1051
|
+
const id = (0, parse_utils_1.attrInt)(resolved, "id");
|
|
1052
|
+
if (id !== undefined) {
|
|
1053
|
+
const marker = {
|
|
1054
|
+
type: name,
|
|
1055
|
+
id
|
|
1056
|
+
};
|
|
1057
|
+
const author = (0, parse_utils_1.attrVal)(resolved, "author");
|
|
1058
|
+
if (author) {
|
|
1059
|
+
marker.author = author;
|
|
1060
|
+
}
|
|
1061
|
+
const date = (0, parse_utils_1.attrVal)(resolved, "date");
|
|
1062
|
+
if (date) {
|
|
1063
|
+
marker.date = date;
|
|
1064
|
+
}
|
|
1065
|
+
children.push(marker);
|
|
1066
|
+
}
|
|
1067
|
+
break;
|
|
1068
|
+
}
|
|
1069
|
+
case "smartTag":
|
|
1070
|
+
case "customXml":
|
|
1071
|
+
case "dir": {
|
|
1072
|
+
// Semantic wrappers: flatten their children into the current
|
|
1073
|
+
// paragraph. The wrapper's own properties element (smartTagPr,
|
|
1074
|
+
// customXmlPr, …) is not a paragraph child and would otherwise
|
|
1075
|
+
// fall through to the `default` branch below and be emitted as a
|
|
1076
|
+
// bogus `opaqueParagraphChild` containing the properties XML —
|
|
1077
|
+
// poisoning the paragraph on round-trip. Build a synthetic element
|
|
1078
|
+
// that excludes those `*Pr` siblings before recursing.
|
|
1079
|
+
const filteredChildren = resolved.children.filter(c => {
|
|
1080
|
+
if (c.type !== "element") {
|
|
1081
|
+
return true;
|
|
1082
|
+
}
|
|
1083
|
+
const ln = c.name.replace(/^w:/, "");
|
|
1084
|
+
return ln !== "smartTagPr" && ln !== "customXmlPr";
|
|
1085
|
+
});
|
|
1086
|
+
const surrogate = {
|
|
1087
|
+
...resolved,
|
|
1088
|
+
children: filteredChildren
|
|
1089
|
+
};
|
|
1090
|
+
const subPara = parseParagraph(surrogate, ctx);
|
|
1091
|
+
for (const sub of subPara.children) {
|
|
1092
|
+
children.push(sub);
|
|
1093
|
+
}
|
|
1094
|
+
break;
|
|
1095
|
+
}
|
|
1096
|
+
case "proofErr":
|
|
1097
|
+
case "permStart":
|
|
1098
|
+
case "permEnd":
|
|
1099
|
+
case "lastRenderedPageBreak":
|
|
1100
|
+
// Non-semantic markers; safely ignored
|
|
1101
|
+
break;
|
|
1102
|
+
default:
|
|
1103
|
+
// Unknown paragraph child: preserve as opaque for round-trip fidelity
|
|
1104
|
+
children.push({
|
|
1105
|
+
type: "opaqueParagraphChild",
|
|
1106
|
+
rawXml: (0, parse_utils_1.serializeElement)(resolved)
|
|
1107
|
+
});
|
|
1108
|
+
break;
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
const paraId = pEl.attributes["w14:paraId"];
|
|
1112
|
+
const textId = pEl.attributes["w14:textId"];
|
|
1113
|
+
const result = {
|
|
1114
|
+
type: "paragraph",
|
|
1115
|
+
properties: pPrEl ? (0, paragraph_section_parsers_1.parseParagraphProperties)(pPrEl) : undefined,
|
|
1116
|
+
children
|
|
1117
|
+
};
|
|
1118
|
+
if (paraId) {
|
|
1119
|
+
result.paraId = paraId;
|
|
1120
|
+
}
|
|
1121
|
+
if (textId) {
|
|
1122
|
+
result.textId = textId;
|
|
1123
|
+
}
|
|
1124
|
+
return result;
|
|
1125
|
+
}
|
|
1126
|
+
/** Parse a deleted run (w:delText instead of w:t). */
|
|
1127
|
+
function parseDeletedRun(el) {
|
|
1128
|
+
const rPrEl = (0, parse_utils_1.findChildNs)(el, "rPr");
|
|
1129
|
+
const content = [];
|
|
1130
|
+
for (const child of el.children) {
|
|
1131
|
+
if (child.type !== "element") {
|
|
1132
|
+
continue;
|
|
1133
|
+
}
|
|
1134
|
+
const name = child.name.replace(/^w:/, "");
|
|
1135
|
+
if (name === "delText") {
|
|
1136
|
+
content.push({ type: "text", text: (0, dom_1.textContent)(child) });
|
|
1137
|
+
}
|
|
1138
|
+
else if (name === "t") {
|
|
1139
|
+
content.push({ type: "text", text: (0, dom_1.textContent)(child) });
|
|
1140
|
+
}
|
|
1141
|
+
else if (name === "br") {
|
|
1142
|
+
content.push({
|
|
1143
|
+
type: "break",
|
|
1144
|
+
breakType: (0, parse_utils_1.attrVal)(child, "type")
|
|
1145
|
+
});
|
|
1146
|
+
}
|
|
1147
|
+
else if (name === "tab") {
|
|
1148
|
+
content.push({ type: "tab" });
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
return {
|
|
1152
|
+
properties: rPrEl ? (0, properties_parsers_1.parseRunProperties)(rPrEl) : undefined,
|
|
1153
|
+
content
|
|
1154
|
+
};
|
|
1155
|
+
}
|
|
1156
|
+
// =============================================================================
|
|
1157
|
+
// Table Parser
|
|
1158
|
+
// =============================================================================
|
|
1159
|
+
function parseTableCell(el, ctx) {
|
|
1160
|
+
const tcPrEl = (0, parse_utils_1.findChildNs)(el, "tcPr");
|
|
1161
|
+
const content = [];
|
|
1162
|
+
for (const child of el.children) {
|
|
1163
|
+
if (child.type !== "element") {
|
|
1164
|
+
continue;
|
|
1165
|
+
}
|
|
1166
|
+
const name = child.name.replace(/^w:/, "");
|
|
1167
|
+
if (name === "p") {
|
|
1168
|
+
content.push(parseParagraph(child, ctx));
|
|
1169
|
+
}
|
|
1170
|
+
else if (name === "tbl") {
|
|
1171
|
+
content.push(parseTable(child, ctx));
|
|
1172
|
+
}
|
|
1173
|
+
else if (name === "sdt") {
|
|
1174
|
+
// SDT inside a table cell. The TableCell.content union does not
|
|
1175
|
+
// include StructuredDocumentTag, so we flatten the SDT's inner
|
|
1176
|
+
// paragraphs/tables into the cell. SDT-level metadata (data binding,
|
|
1177
|
+
// alias, repeating section, …) is lost on round-trip but visible
|
|
1178
|
+
// content is preserved — better than dropping the runs entirely.
|
|
1179
|
+
const sdt = parseSdt(child, ctx);
|
|
1180
|
+
if (sdt && sdt.type === "sdt") {
|
|
1181
|
+
for (const c of sdt.content) {
|
|
1182
|
+
if (c.type === "paragraph") {
|
|
1183
|
+
content.push(c);
|
|
1184
|
+
}
|
|
1185
|
+
else if (c.type === "table") {
|
|
1186
|
+
content.push(c);
|
|
1187
|
+
}
|
|
1188
|
+
// Run-only and nested-SDT children cannot live as direct
|
|
1189
|
+
// siblings of <w:p>/<w:tbl> in a <w:tc>, so they are dropped.
|
|
1190
|
+
}
|
|
1191
|
+
}
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
let props;
|
|
1195
|
+
if (tcPrEl) {
|
|
1196
|
+
const p = {};
|
|
1197
|
+
const wEl = (0, parse_utils_1.findChildNs)(tcPrEl, "tcW");
|
|
1198
|
+
if (wEl) {
|
|
1199
|
+
p.width = (0, properties_parsers_1.parseTableWidth)(wEl);
|
|
1200
|
+
}
|
|
1201
|
+
const gsEl = (0, parse_utils_1.findChildNs)(tcPrEl, "gridSpan");
|
|
1202
|
+
if (gsEl) {
|
|
1203
|
+
p.gridSpan = (0, parse_utils_1.attrInt)(gsEl, "val");
|
|
1204
|
+
}
|
|
1205
|
+
const vmEl = (0, parse_utils_1.findChildNs)(tcPrEl, "vMerge");
|
|
1206
|
+
if (vmEl) {
|
|
1207
|
+
p.verticalMerge = ((0, parse_utils_1.attrVal)(vmEl, "val") ??
|
|
1208
|
+
"continue");
|
|
1209
|
+
}
|
|
1210
|
+
const bordersEl = (0, parse_utils_1.findChildNs)(tcPrEl, "tcBorders");
|
|
1211
|
+
if (bordersEl) {
|
|
1212
|
+
p.borders = (0, table_properties_parsers_1.parseTableBorders)(bordersEl);
|
|
1213
|
+
}
|
|
1214
|
+
const shdEl = (0, parse_utils_1.findChildNs)(tcPrEl, "shd");
|
|
1215
|
+
if (shdEl) {
|
|
1216
|
+
p.shading = (0, properties_parsers_1.parseShading)(shdEl);
|
|
1217
|
+
}
|
|
1218
|
+
const vAlignEl = (0, parse_utils_1.findChildNs)(tcPrEl, "vAlign");
|
|
1219
|
+
if (vAlignEl) {
|
|
1220
|
+
p.verticalAlign = (0, parse_utils_1.attrVal)(vAlignEl, "val");
|
|
1221
|
+
}
|
|
1222
|
+
if ((0, parse_utils_1.findChildNs)(tcPrEl, "noWrap")) {
|
|
1223
|
+
p.noWrap = true;
|
|
1224
|
+
}
|
|
1225
|
+
const textDirEl = (0, parse_utils_1.findChildNs)(tcPrEl, "textDirection");
|
|
1226
|
+
if (textDirEl) {
|
|
1227
|
+
p.textDirection = (0, parse_utils_1.attrVal)(textDirEl, "val");
|
|
1228
|
+
}
|
|
1229
|
+
const marginsEl = (0, parse_utils_1.findChildNs)(tcPrEl, "tcMar");
|
|
1230
|
+
if (marginsEl) {
|
|
1231
|
+
p.margins = (0, table_properties_parsers_1.parseTableCellMargins)(marginsEl);
|
|
1232
|
+
}
|
|
1233
|
+
// Conditional formatting
|
|
1234
|
+
const cnfEl = (0, parse_utils_1.findChildNs)(tcPrEl, "cnfStyle");
|
|
1235
|
+
if (cnfEl) {
|
|
1236
|
+
p.cnfStyle = (0, parse_utils_1.attrVal)(cnfEl, "val");
|
|
1237
|
+
}
|
|
1238
|
+
// Hide cell end-of-cell marker
|
|
1239
|
+
if ((0, parse_utils_1.findChildNs)(tcPrEl, "hideMark")) {
|
|
1240
|
+
p.hideMark = true;
|
|
1241
|
+
}
|
|
1242
|
+
// Fit text
|
|
1243
|
+
if ((0, parse_utils_1.findChildNs)(tcPrEl, "tcFitText")) {
|
|
1244
|
+
p.fitText = true;
|
|
1245
|
+
}
|
|
1246
|
+
// Cell-level revisions
|
|
1247
|
+
const cellInsEl = (0, parse_utils_1.findChildNs)(tcPrEl, "cellIns");
|
|
1248
|
+
if (cellInsEl) {
|
|
1249
|
+
const rev = (0, properties_parsers_1.parseRevisionInfo)(cellInsEl);
|
|
1250
|
+
if (rev) {
|
|
1251
|
+
p.inserted = { revision: rev };
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1254
|
+
const cellDelEl = (0, parse_utils_1.findChildNs)(tcPrEl, "cellDel");
|
|
1255
|
+
if (cellDelEl) {
|
|
1256
|
+
const rev = (0, properties_parsers_1.parseRevisionInfo)(cellDelEl);
|
|
1257
|
+
if (rev) {
|
|
1258
|
+
p.deleted = { revision: rev };
|
|
1259
|
+
}
|
|
1260
|
+
}
|
|
1261
|
+
const cellMergeEl = (0, parse_utils_1.findChildNs)(tcPrEl, "cellMerge");
|
|
1262
|
+
if (cellMergeEl) {
|
|
1263
|
+
const vMerge = (0, parse_utils_1.attrVal)(cellMergeEl, "vMerge");
|
|
1264
|
+
const rev = (0, properties_parsers_1.parseRevisionInfo)(cellMergeEl);
|
|
1265
|
+
if (rev && (vMerge === "cont" || vMerge === "rest")) {
|
|
1266
|
+
p.cellMerge = { vMerge, revision: rev };
|
|
1267
|
+
}
|
|
1268
|
+
}
|
|
1269
|
+
// tcPrChange
|
|
1270
|
+
const tcPrChangeEl = (0, parse_utils_1.findChildNs)(tcPrEl, "tcPrChange");
|
|
1271
|
+
if (tcPrChangeEl) {
|
|
1272
|
+
const rev = (0, properties_parsers_1.parseRevisionInfo)(tcPrChangeEl);
|
|
1273
|
+
if (rev) {
|
|
1274
|
+
const prev = (0, parse_utils_1.findChildNs)(tcPrChangeEl, "tcPr");
|
|
1275
|
+
p.propertyChange = { revision: rev };
|
|
1276
|
+
if (prev) {
|
|
1277
|
+
// Minimal: previousProperties won't recurse (avoid infinite recursion).
|
|
1278
|
+
// Just capture the presence of the change marker here.
|
|
1279
|
+
}
|
|
1280
|
+
}
|
|
1281
|
+
}
|
|
1282
|
+
props = p;
|
|
1283
|
+
}
|
|
1284
|
+
return { properties: props, content };
|
|
1285
|
+
}
|
|
1286
|
+
function parseTableRow(el, ctx) {
|
|
1287
|
+
const trPrEl = (0, parse_utils_1.findChildNs)(el, "trPr");
|
|
1288
|
+
const tblPrExEl = (0, parse_utils_1.findChildNs)(el, "tblPrEx");
|
|
1289
|
+
const cells = [];
|
|
1290
|
+
for (const child of el.children) {
|
|
1291
|
+
if (child.type === "element" && child.name.replace(/^w:/, "") === "tc") {
|
|
1292
|
+
cells.push(parseTableCell(child, ctx));
|
|
1293
|
+
}
|
|
1294
|
+
}
|
|
1295
|
+
let props;
|
|
1296
|
+
if (trPrEl || tblPrExEl) {
|
|
1297
|
+
const p = {};
|
|
1298
|
+
if (tblPrExEl) {
|
|
1299
|
+
p.tblPrEx = (0, table_properties_parsers_1.parseTableProperties)(tblPrExEl);
|
|
1300
|
+
}
|
|
1301
|
+
if (trPrEl) {
|
|
1302
|
+
const heightEl = (0, parse_utils_1.findChildNs)(trPrEl, "trHeight");
|
|
1303
|
+
if (heightEl) {
|
|
1304
|
+
p.height = {
|
|
1305
|
+
value: (0, parse_utils_1.attrInt)(heightEl, "val") ?? 0,
|
|
1306
|
+
rule: (0, parse_utils_1.attrVal)(heightEl, "hRule")
|
|
1307
|
+
};
|
|
1308
|
+
}
|
|
1309
|
+
if ((0, parse_utils_1.findChildNs)(trPrEl, "tblHeader")) {
|
|
1310
|
+
p.tableHeader = true;
|
|
1311
|
+
}
|
|
1312
|
+
if ((0, parse_utils_1.findChildNs)(trPrEl, "cantSplit")) {
|
|
1313
|
+
p.cantSplit = true;
|
|
1314
|
+
}
|
|
1315
|
+
if ((0, parse_utils_1.findChildNs)(trPrEl, "hidden")) {
|
|
1316
|
+
p.hidden = true;
|
|
1317
|
+
}
|
|
1318
|
+
const csEl = (0, parse_utils_1.findChildNs)(trPrEl, "tblCellSpacing");
|
|
1319
|
+
if (csEl) {
|
|
1320
|
+
p.cellSpacing = (0, properties_parsers_1.parseTableWidth)(csEl);
|
|
1321
|
+
}
|
|
1322
|
+
const insEl = (0, parse_utils_1.findChildNs)(trPrEl, "ins");
|
|
1323
|
+
if (insEl) {
|
|
1324
|
+
const rev = (0, properties_parsers_1.parseRevisionInfo)(insEl);
|
|
1325
|
+
if (rev) {
|
|
1326
|
+
p.inserted = { revision: rev };
|
|
1327
|
+
}
|
|
1328
|
+
}
|
|
1329
|
+
const delEl = (0, parse_utils_1.findChildNs)(trPrEl, "del");
|
|
1330
|
+
if (delEl) {
|
|
1331
|
+
const rev = (0, properties_parsers_1.parseRevisionInfo)(delEl);
|
|
1332
|
+
if (rev) {
|
|
1333
|
+
p.deleted = { revision: rev };
|
|
1334
|
+
}
|
|
1335
|
+
}
|
|
1336
|
+
const gbEl = (0, parse_utils_1.findChildNs)(trPrEl, "gridBefore");
|
|
1337
|
+
if (gbEl) {
|
|
1338
|
+
p.gridBefore = (0, parse_utils_1.attrInt)(gbEl, "val");
|
|
1339
|
+
}
|
|
1340
|
+
const gaEl = (0, parse_utils_1.findChildNs)(trPrEl, "gridAfter");
|
|
1341
|
+
if (gaEl) {
|
|
1342
|
+
p.gridAfter = (0, parse_utils_1.attrInt)(gaEl, "val");
|
|
1343
|
+
}
|
|
1344
|
+
const wbEl = (0, parse_utils_1.findChildNs)(trPrEl, "wBefore");
|
|
1345
|
+
if (wbEl) {
|
|
1346
|
+
p.widthBefore = (0, properties_parsers_1.parseTableWidth)(wbEl);
|
|
1347
|
+
}
|
|
1348
|
+
const waEl = (0, parse_utils_1.findChildNs)(trPrEl, "wAfter");
|
|
1349
|
+
if (waEl) {
|
|
1350
|
+
p.widthAfter = (0, properties_parsers_1.parseTableWidth)(waEl);
|
|
1351
|
+
}
|
|
1352
|
+
const cnfEl = (0, parse_utils_1.findChildNs)(trPrEl, "cnfStyle");
|
|
1353
|
+
if (cnfEl) {
|
|
1354
|
+
p.cnfStyle = (0, parse_utils_1.attrVal)(cnfEl, "val");
|
|
1355
|
+
}
|
|
1356
|
+
const trPrChangeEl = (0, parse_utils_1.findChildNs)(trPrEl, "trPrChange");
|
|
1357
|
+
if (trPrChangeEl) {
|
|
1358
|
+
const rev = (0, properties_parsers_1.parseRevisionInfo)(trPrChangeEl);
|
|
1359
|
+
if (rev) {
|
|
1360
|
+
const prevTrPr = (0, parse_utils_1.findChildNs)(trPrChangeEl, "trPr");
|
|
1361
|
+
p.propertyChange = {
|
|
1362
|
+
revision: rev,
|
|
1363
|
+
previousProperties: prevTrPr ? parseRowPrInner(prevTrPr) : undefined
|
|
1364
|
+
};
|
|
1365
|
+
}
|
|
1366
|
+
}
|
|
1367
|
+
}
|
|
1368
|
+
props = p;
|
|
1369
|
+
}
|
|
1370
|
+
return { properties: props, cells };
|
|
1371
|
+
}
|
|
1372
|
+
/** Inner parse for row properties content (used by propertyChange recursion). */
|
|
1373
|
+
function parseRowPrInner(trPrEl) {
|
|
1374
|
+
const p = {};
|
|
1375
|
+
const heightEl = (0, parse_utils_1.findChildNs)(trPrEl, "trHeight");
|
|
1376
|
+
if (heightEl) {
|
|
1377
|
+
p.height = {
|
|
1378
|
+
value: (0, parse_utils_1.attrInt)(heightEl, "val") ?? 0,
|
|
1379
|
+
rule: (0, parse_utils_1.attrVal)(heightEl, "hRule")
|
|
1380
|
+
};
|
|
1381
|
+
}
|
|
1382
|
+
if ((0, parse_utils_1.findChildNs)(trPrEl, "tblHeader")) {
|
|
1383
|
+
p.tableHeader = true;
|
|
1384
|
+
}
|
|
1385
|
+
if ((0, parse_utils_1.findChildNs)(trPrEl, "cantSplit")) {
|
|
1386
|
+
p.cantSplit = true;
|
|
1387
|
+
}
|
|
1388
|
+
return p;
|
|
1389
|
+
}
|
|
1390
|
+
function parseTable(tblEl, ctx) {
|
|
1391
|
+
const tblPrEl = (0, parse_utils_1.findChildNs)(tblEl, "tblPr");
|
|
1392
|
+
const gridEl = (0, parse_utils_1.findChildNs)(tblEl, "tblGrid");
|
|
1393
|
+
const rows = [];
|
|
1394
|
+
for (const child of tblEl.children) {
|
|
1395
|
+
if (child.type === "element" && child.name.replace(/^w:/, "") === "tr") {
|
|
1396
|
+
rows.push(parseTableRow(child, ctx));
|
|
1397
|
+
}
|
|
1398
|
+
}
|
|
1399
|
+
let columnWidths;
|
|
1400
|
+
if (gridEl) {
|
|
1401
|
+
columnWidths = [];
|
|
1402
|
+
for (const col of (0, parse_utils_1.findChildrenNs)(gridEl, "gridCol")) {
|
|
1403
|
+
columnWidths.push(parseInt(col.attributes["w:w"] ?? col.attributes["w"] ?? "0", 10));
|
|
1404
|
+
}
|
|
1405
|
+
}
|
|
1406
|
+
return {
|
|
1407
|
+
type: "table",
|
|
1408
|
+
properties: tblPrEl ? (0, table_properties_parsers_1.parseTableProperties)(tblPrEl) : undefined,
|
|
1409
|
+
columnWidths,
|
|
1410
|
+
rows
|
|
1411
|
+
};
|
|
1412
|
+
}
|
|
1413
|
+
// =============================================================================
|
|
1414
|
+
// Footnotes/Endnotes Parser
|
|
1415
|
+
// =============================================================================
|
|
1416
|
+
function parseNotesXml(xmlStr, elementName, ctx) {
|
|
1417
|
+
// Each note part is self-contained. Save and reset the field state so an
|
|
1418
|
+
// unterminated complex field in the document body cannot bleed into a
|
|
1419
|
+
// footnote/endnote and swallow its runs.
|
|
1420
|
+
const savedField = ctx.field;
|
|
1421
|
+
ctx.field = (0, reader_context_1.createFieldState)();
|
|
1422
|
+
try {
|
|
1423
|
+
const doc = (0, dom_1.parseXml)(xmlStr);
|
|
1424
|
+
const root = doc.root;
|
|
1425
|
+
const notes = [];
|
|
1426
|
+
for (const noteEl of (0, parse_utils_1.findChildrenNs)(root, elementName)) {
|
|
1427
|
+
const id = (0, parse_utils_1.attrInt)(noteEl, "id");
|
|
1428
|
+
const type = (0, parse_utils_1.attrVal)(noteEl, "type");
|
|
1429
|
+
// Skip auto-generated separator entries (default IDs -1 and 0)
|
|
1430
|
+
// Real separators/continuationSeparators are regenerated by the writer.
|
|
1431
|
+
if (type === "separator" || type === "continuationSeparator") {
|
|
1432
|
+
continue;
|
|
1433
|
+
}
|
|
1434
|
+
if (id === undefined) {
|
|
1435
|
+
continue;
|
|
1436
|
+
}
|
|
1437
|
+
const content = [];
|
|
1438
|
+
for (const child of noteEl.children) {
|
|
1439
|
+
if (child.type !== "element") {
|
|
1440
|
+
continue;
|
|
1441
|
+
}
|
|
1442
|
+
const ln = child.name.replace(/^w:/, "");
|
|
1443
|
+
if (ln === "p") {
|
|
1444
|
+
content.push(parseParagraph(child, ctx));
|
|
1445
|
+
}
|
|
1446
|
+
else if (ln === "sdt") {
|
|
1447
|
+
// SDT inside a footnote/endnote: the model's content type is
|
|
1448
|
+
// `Paragraph[]`, so flatten the SDT's inner paragraphs (and their
|
|
1449
|
+
// descendants reachable as paragraphs). SDT-level metadata is
|
|
1450
|
+
// dropped here on round-trip — better than losing the visible
|
|
1451
|
+
// text completely.
|
|
1452
|
+
const sdt = parseSdt(child, ctx);
|
|
1453
|
+
if (sdt && sdt.type === "sdt") {
|
|
1454
|
+
for (const c of sdt.content) {
|
|
1455
|
+
if (c.type === "paragraph") {
|
|
1456
|
+
content.push(c);
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
}
|
|
1460
|
+
}
|
|
1461
|
+
}
|
|
1462
|
+
const note = { id, content };
|
|
1463
|
+
if (type === "continuationNotice" || type === "normal") {
|
|
1464
|
+
note.type = type;
|
|
1465
|
+
}
|
|
1466
|
+
notes.push(note);
|
|
1467
|
+
}
|
|
1468
|
+
return notes;
|
|
1469
|
+
}
|
|
1470
|
+
finally {
|
|
1471
|
+
ctx.field = savedField;
|
|
1472
|
+
}
|
|
1473
|
+
}
|
|
1474
|
+
// =============================================================================
|
|
1475
|
+
// Header/Footer Parser
|
|
1476
|
+
// =============================================================================
|
|
1477
|
+
function parseHeaderFooterXml(xmlStr, ctx) {
|
|
1478
|
+
return parseHeaderFooterRoot((0, dom_1.parseXml)(xmlStr).root, ctx);
|
|
1479
|
+
}
|
|
1480
|
+
function parseHeaderFooterRoot(root, ctx) {
|
|
1481
|
+
// Header/footer parts are self-contained: reset field state on entry so an
|
|
1482
|
+
// unterminated complex field in the body does not consume header/footer runs.
|
|
1483
|
+
const savedField = ctx.field;
|
|
1484
|
+
ctx.field = (0, reader_context_1.createFieldState)();
|
|
1485
|
+
try {
|
|
1486
|
+
const children = [];
|
|
1487
|
+
for (const child of root.children) {
|
|
1488
|
+
if (child.type !== "element") {
|
|
1489
|
+
continue;
|
|
1490
|
+
}
|
|
1491
|
+
const name = child.name.replace(/^w:/, "");
|
|
1492
|
+
if (name === "p") {
|
|
1493
|
+
children.push(parseParagraph(child, ctx));
|
|
1494
|
+
}
|
|
1495
|
+
else if (name === "tbl") {
|
|
1496
|
+
children.push(parseTable(child, ctx));
|
|
1497
|
+
}
|
|
1498
|
+
else if (name === "sdt") {
|
|
1499
|
+
// Flatten SDT children. HeaderFooterContent.children is
|
|
1500
|
+
// `(Paragraph | Table)[]` so we hoist the inner paragraphs/tables;
|
|
1501
|
+
// SDT-level metadata is dropped on round-trip but visible content
|
|
1502
|
+
// is preserved (better than losing the runs entirely).
|
|
1503
|
+
const sdt = parseSdt(child, ctx);
|
|
1504
|
+
if (sdt && sdt.type === "sdt") {
|
|
1505
|
+
for (const c of sdt.content) {
|
|
1506
|
+
if (c.type === "paragraph") {
|
|
1507
|
+
children.push(c);
|
|
1508
|
+
}
|
|
1509
|
+
else if (c.type === "table") {
|
|
1510
|
+
children.push(c);
|
|
1511
|
+
}
|
|
1512
|
+
}
|
|
1513
|
+
}
|
|
1514
|
+
}
|
|
1515
|
+
}
|
|
1516
|
+
return { children };
|
|
1517
|
+
}
|
|
1518
|
+
finally {
|
|
1519
|
+
ctx.field = savedField;
|
|
1520
|
+
}
|
|
1521
|
+
}
|
|
1522
|
+
/** Detect watermark from a header's parsed XML root element. */
|
|
1523
|
+
// =============================================================================
|
|
1524
|
+
// Comments Parser
|
|
1525
|
+
// =============================================================================
|
|
1526
|
+
function parseCommentsXmlFromCtx(xmlStr, ctx) {
|
|
1527
|
+
return (0, comments_parser_1.parseCommentsXml)(xmlStr, ctx, parseParagraph);
|
|
1528
|
+
}
|
|
1529
|
+
// =============================================================================
|
|
1530
|
+
// Main Document Parser
|
|
1531
|
+
// =============================================================================
|
|
1532
|
+
/** Recursively extract floating images, drawing shapes, and opaque drawings from an element tree. */
|
|
1533
|
+
function extractFloatingContent(el, images, shapes, opaqueDrawings, ctx) {
|
|
1534
|
+
for (const child of el.children) {
|
|
1535
|
+
if (child.type !== "element") {
|
|
1536
|
+
continue;
|
|
1537
|
+
}
|
|
1538
|
+
if (child.name === "wp:anchor") {
|
|
1539
|
+
// Check if this is a pic (image) or wsp (shape)
|
|
1540
|
+
const graphicEl = (0, dom_1.findChild)(child, "a:graphic");
|
|
1541
|
+
const graphicDataEl = graphicEl ? (0, dom_1.findChild)(graphicEl, "a:graphicData") : undefined;
|
|
1542
|
+
const wspEl = graphicDataEl
|
|
1543
|
+
? ((0, dom_1.findChild)(graphicDataEl, "wps:wsp") ?? (0, parse_utils_1.findChildNs)(graphicDataEl, "wsp"))
|
|
1544
|
+
: undefined;
|
|
1545
|
+
if (wspEl) {
|
|
1546
|
+
const shape = parseDrawingShape(child, wspEl, ctx);
|
|
1547
|
+
if (shape) {
|
|
1548
|
+
shapes.push(shape);
|
|
1549
|
+
}
|
|
1550
|
+
}
|
|
1551
|
+
else {
|
|
1552
|
+
const fi = (0, image_parsers_1.parseFloatingImage)(child);
|
|
1553
|
+
if (fi) {
|
|
1554
|
+
images.push(fi);
|
|
1555
|
+
}
|
|
1556
|
+
else {
|
|
1557
|
+
// Unknown anchor content (chart, diagram, etc.) — preserve as opaque
|
|
1558
|
+
const drawingEl = findDrawingParent(child);
|
|
1559
|
+
if (drawingEl) {
|
|
1560
|
+
const rids = new Set();
|
|
1561
|
+
(0, parse_utils_1.collectRIds)(drawingEl, rids);
|
|
1562
|
+
opaqueDrawings.push({
|
|
1563
|
+
type: "opaqueDrawing",
|
|
1564
|
+
rawXml: (0, parse_utils_1.serializeElement)(drawingEl),
|
|
1565
|
+
referencedRIds: [...rids]
|
|
1566
|
+
});
|
|
1567
|
+
}
|
|
1568
|
+
}
|
|
1569
|
+
}
|
|
1570
|
+
}
|
|
1571
|
+
else if (child.name === "wp:inline") {
|
|
1572
|
+
// Inline drawings that aren't images — check for chart etc.
|
|
1573
|
+
const graphicEl = (0, dom_1.findChild)(child, "a:graphic");
|
|
1574
|
+
const graphicDataEl = graphicEl ? (0, dom_1.findChild)(graphicEl, "a:graphicData") : undefined;
|
|
1575
|
+
if (graphicDataEl) {
|
|
1576
|
+
const picEl = (0, dom_1.findChild)(graphicDataEl, "pic:pic") ?? (0, parse_utils_1.findChildNs)(graphicDataEl, "pic");
|
|
1577
|
+
if (!picEl) {
|
|
1578
|
+
// Not an image — opaque inline drawing. We deliberately keep this
|
|
1579
|
+
// path even though parseDrawingContent also emits an `opaqueRun`
|
|
1580
|
+
// for the same drawing: the body-level pass below removes the
|
|
1581
|
+
// duplicate opaqueRun once we know this OpaqueDrawing has been
|
|
1582
|
+
// captured. Inside table cells / headers / footers / SDTs (where
|
|
1583
|
+
// this extractor is not invoked) the opaqueRun is the only
|
|
1584
|
+
// representation, so the drawing still survives a round-trip.
|
|
1585
|
+
const rids = new Set();
|
|
1586
|
+
(0, parse_utils_1.collectRIds)(child, rids);
|
|
1587
|
+
// Serialize the wp:inline element wrapped in w:drawing
|
|
1588
|
+
const rawXml = `<w:drawing>${(0, parse_utils_1.serializeElement)(child)}</w:drawing>`;
|
|
1589
|
+
opaqueDrawings.push({
|
|
1590
|
+
type: "opaqueDrawing",
|
|
1591
|
+
rawXml,
|
|
1592
|
+
referencedRIds: [...rids]
|
|
1593
|
+
});
|
|
1594
|
+
}
|
|
1595
|
+
}
|
|
1596
|
+
}
|
|
1597
|
+
else {
|
|
1598
|
+
extractFloatingContent(child, images, shapes, opaqueDrawings, ctx);
|
|
1599
|
+
}
|
|
1600
|
+
}
|
|
1601
|
+
}
|
|
1602
|
+
/** Find the w:drawing ancestor element for serialization. */
|
|
1603
|
+
function findDrawingParent(anchorEl) {
|
|
1604
|
+
// We don't have parent refs, so we construct a synthetic w:drawing wrapper
|
|
1605
|
+
return {
|
|
1606
|
+
type: "element",
|
|
1607
|
+
name: "w:drawing",
|
|
1608
|
+
attributes: {},
|
|
1609
|
+
children: [anchorEl]
|
|
1610
|
+
};
|
|
1611
|
+
}
|
|
1612
|
+
/**
|
|
1613
|
+
* A paragraph is considered "empty" for the purposes of synthetic-anchor
|
|
1614
|
+
* detection if it has no children, or if every child is a run whose content
|
|
1615
|
+
* is either absent or contains only zero-length text segments. Inline images,
|
|
1616
|
+
* fields, hyperlinks etc. all count as non-empty content. Properties (style
|
|
1617
|
+
* id, alignment, etc.) are intentionally ignored — a single floating drawing
|
|
1618
|
+
* that the writer wrapped in its own paragraph would never have meaningful
|
|
1619
|
+
* paragraph properties.
|
|
1620
|
+
*/
|
|
1621
|
+
function isEmptyParagraph(para) {
|
|
1622
|
+
if (!para.children || para.children.length === 0) {
|
|
1623
|
+
return true;
|
|
1624
|
+
}
|
|
1625
|
+
for (const child of para.children) {
|
|
1626
|
+
if (!(0, text_utils_1.isRun)(child)) {
|
|
1627
|
+
// Anything with a `type` (hyperlink, bookmark, insertedRun, etc.) is
|
|
1628
|
+
// considered meaningful content.
|
|
1629
|
+
return false;
|
|
1630
|
+
}
|
|
1631
|
+
const run = child;
|
|
1632
|
+
for (const c of run.content) {
|
|
1633
|
+
if (c.type === "text") {
|
|
1634
|
+
if (c.text.length > 0) {
|
|
1635
|
+
return false;
|
|
1636
|
+
}
|
|
1637
|
+
}
|
|
1638
|
+
else {
|
|
1639
|
+
// Any non-text run content (image, field, break, tab, ruby, etc.)
|
|
1640
|
+
// makes the paragraph non-empty.
|
|
1641
|
+
return false;
|
|
1642
|
+
}
|
|
1643
|
+
}
|
|
1644
|
+
}
|
|
1645
|
+
return true;
|
|
1646
|
+
}
|
|
1647
|
+
/**
|
|
1648
|
+
* Remove `opaqueRun` entries that wrap a non-picture `<wp:inline>` drawing.
|
|
1649
|
+
*
|
|
1650
|
+
* These are emitted by parseDrawingContent so the drawing survives a
|
|
1651
|
+
* round-trip when its containing paragraph lives inside a table cell, header,
|
|
1652
|
+
* footer or SDT (places where the body-level extractor never runs). At the
|
|
1653
|
+
* body level, however, the same drawings are also captured as `OpaqueDrawing`
|
|
1654
|
+
* entries by extractFloatingContent — keeping both would duplicate the
|
|
1655
|
+
* drawing in the produced document. Mutates `para.children`/run content in
|
|
1656
|
+
* place.
|
|
1657
|
+
*/
|
|
1658
|
+
function stripInlineDrawingOpaqueRuns(para) {
|
|
1659
|
+
for (const child of para.children) {
|
|
1660
|
+
if (!(0, text_utils_1.isRun)(child)) {
|
|
1661
|
+
continue;
|
|
1662
|
+
}
|
|
1663
|
+
const run = child;
|
|
1664
|
+
let i = 0;
|
|
1665
|
+
while (i < run.content.length) {
|
|
1666
|
+
const c = run.content[i];
|
|
1667
|
+
if (c.type === "opaqueRun" &&
|
|
1668
|
+
c.rawXml.includes("<wp:inline") &&
|
|
1669
|
+
!c.rawXml.includes("<pic:pic")) {
|
|
1670
|
+
run.content.splice(i, 1);
|
|
1671
|
+
}
|
|
1672
|
+
else {
|
|
1673
|
+
i++;
|
|
1674
|
+
}
|
|
1675
|
+
}
|
|
1676
|
+
}
|
|
1677
|
+
}
|
|
1678
|
+
function parseDocumentXml(xmlStr, ctx) {
|
|
1679
|
+
const doc = (0, dom_1.parseXml)(xmlStr);
|
|
1680
|
+
const root = doc.root;
|
|
1681
|
+
// Parse background
|
|
1682
|
+
let background;
|
|
1683
|
+
const bgEl = (0, parse_utils_1.findChildNs)(root, "background");
|
|
1684
|
+
if (bgEl) {
|
|
1685
|
+
const bg = {};
|
|
1686
|
+
const color = (0, parse_utils_1.attrVal)(bgEl, "color");
|
|
1687
|
+
if (color) {
|
|
1688
|
+
bg.color = color;
|
|
1689
|
+
}
|
|
1690
|
+
const themeColor = (0, parse_utils_1.attrVal)(bgEl, "themeColor");
|
|
1691
|
+
if (themeColor) {
|
|
1692
|
+
bg.themeColor = themeColor;
|
|
1693
|
+
}
|
|
1694
|
+
const themeShade = (0, parse_utils_1.attrVal)(bgEl, "themeShade");
|
|
1695
|
+
if (themeShade) {
|
|
1696
|
+
bg.themeShade = themeShade;
|
|
1697
|
+
}
|
|
1698
|
+
const themeTint = (0, parse_utils_1.attrVal)(bgEl, "themeTint");
|
|
1699
|
+
if (themeTint) {
|
|
1700
|
+
bg.themeTint = themeTint;
|
|
1701
|
+
}
|
|
1702
|
+
background = bg;
|
|
1703
|
+
}
|
|
1704
|
+
const bodyEl = (0, parse_utils_1.findChildNs)(root, "body") ?? (0, dom_1.findChild)(root, "w:body");
|
|
1705
|
+
if (!bodyEl) {
|
|
1706
|
+
throw new errors_1.DocxParseError("Missing w:body element in document.xml");
|
|
1707
|
+
}
|
|
1708
|
+
const body = [];
|
|
1709
|
+
let sectionProperties;
|
|
1710
|
+
// Instead of extracting floating content from the entire body tree and
|
|
1711
|
+
// appending at the end (which loses positional information), we now extract
|
|
1712
|
+
// floating content per-paragraph and insert it immediately after the
|
|
1713
|
+
// paragraph it belongs to.
|
|
1714
|
+
for (const child of bodyEl.children) {
|
|
1715
|
+
if (child.type !== "element") {
|
|
1716
|
+
continue;
|
|
1717
|
+
}
|
|
1718
|
+
const name = child.name.replace(/^w:/, "");
|
|
1719
|
+
switch (name) {
|
|
1720
|
+
case "p": {
|
|
1721
|
+
// Per OOXML schema (CT_OMathPara is a member of EG_PContent), a
|
|
1722
|
+
// body-level math block is encoded as a paragraph containing a
|
|
1723
|
+
// single m:oMathPara child. Detect that shape and surface it as
|
|
1724
|
+
// a top-level MathBlock so the document model stays flat — the
|
|
1725
|
+
// writer reverses this by re-wrapping math blocks in <w:p>.
|
|
1726
|
+
const mathParaChildren = child.children.filter(c => c.type === "element" && c.name === "m:oMathPara");
|
|
1727
|
+
const otherChildren = child.children.filter(c => {
|
|
1728
|
+
if (c.type !== "element") {
|
|
1729
|
+
return false;
|
|
1730
|
+
}
|
|
1731
|
+
// pPr is allowed; everything else (runs, hyperlinks, etc.) means
|
|
1732
|
+
// we're NOT a synthetic math wrapper and must keep the paragraph.
|
|
1733
|
+
return c.name !== "w:pPr" && c.name !== "m:oMathPara";
|
|
1734
|
+
});
|
|
1735
|
+
if (mathParaChildren.length > 0 && otherChildren.length === 0) {
|
|
1736
|
+
for (const oMathPara of mathParaChildren) {
|
|
1737
|
+
if (oMathPara.type === "element") {
|
|
1738
|
+
body.push((0, math_parser_1.parseMathBlock)(oMathPara));
|
|
1739
|
+
}
|
|
1740
|
+
}
|
|
1741
|
+
break;
|
|
1742
|
+
}
|
|
1743
|
+
const para = parseParagraph(child, ctx);
|
|
1744
|
+
// Extract floating content from this paragraph element and insert
|
|
1745
|
+
// immediately after it to preserve document position.
|
|
1746
|
+
const pFloatingImages = [];
|
|
1747
|
+
const pDrawingShapes = [];
|
|
1748
|
+
const pOpaqueDrawings = [];
|
|
1749
|
+
extractFloatingContent(child, pFloatingImages, pDrawingShapes, pOpaqueDrawings, ctx);
|
|
1750
|
+
// parseDrawingContent (called from parseRunContent) already preserved
|
|
1751
|
+
// every non-picture inline drawing as an `opaqueRun` so the drawing
|
|
1752
|
+
// survives a round-trip even inside cells/headers/footers/SDTs where
|
|
1753
|
+
// this body-level extractor is not invoked. At the body level
|
|
1754
|
+
// extractFloatingContent has now also captured those drawings as
|
|
1755
|
+
// `OpaqueDrawing` entries — that is the form chart-parser is wired
|
|
1756
|
+
// to look for when promoting them to `ChartContent`. To avoid
|
|
1757
|
+
// duplicate output we strip any opaqueRun whose XML embeds a
|
|
1758
|
+
// <wp:inline> drawing from the paragraph here.
|
|
1759
|
+
if (pOpaqueDrawings.length > 0) {
|
|
1760
|
+
stripInlineDrawingOpaqueRuns(para);
|
|
1761
|
+
}
|
|
1762
|
+
// If the paragraph is otherwise empty AND we did extract anchored
|
|
1763
|
+
// content out of it, treat the paragraph as a synthetic carrier for
|
|
1764
|
+
// the floating drawing(s) and drop it. Otherwise keeping it would
|
|
1765
|
+
// cause a phantom empty paragraph to accumulate on every round-trip
|
|
1766
|
+
// (writer wraps floating images in their own <w:p>, reader pulls the
|
|
1767
|
+
// anchor out, leaving an empty <w:p> behind).
|
|
1768
|
+
const hasAnchoredContent = pFloatingImages.length > 0 || pDrawingShapes.length > 0 || pOpaqueDrawings.length > 0;
|
|
1769
|
+
const paragraphIsEmpty = isEmptyParagraph(para);
|
|
1770
|
+
if (!(hasAnchoredContent && paragraphIsEmpty)) {
|
|
1771
|
+
body.push(para);
|
|
1772
|
+
}
|
|
1773
|
+
for (const fi of pFloatingImages) {
|
|
1774
|
+
body.push(fi);
|
|
1775
|
+
}
|
|
1776
|
+
for (const ds of pDrawingShapes) {
|
|
1777
|
+
body.push(ds);
|
|
1778
|
+
}
|
|
1779
|
+
for (const od of pOpaqueDrawings) {
|
|
1780
|
+
body.push(od);
|
|
1781
|
+
}
|
|
1782
|
+
break;
|
|
1783
|
+
}
|
|
1784
|
+
case "tbl":
|
|
1785
|
+
body.push(parseTable(child, ctx));
|
|
1786
|
+
break;
|
|
1787
|
+
case "sectPr":
|
|
1788
|
+
// Final section properties at the body level
|
|
1789
|
+
sectionProperties = (0, paragraph_section_parsers_1.parseSectionProperties)(child);
|
|
1790
|
+
break;
|
|
1791
|
+
case "sdt": {
|
|
1792
|
+
const sdtResult = parseSdt(child, ctx);
|
|
1793
|
+
if (sdtResult) {
|
|
1794
|
+
body.push(sdtResult);
|
|
1795
|
+
}
|
|
1796
|
+
break;
|
|
1797
|
+
}
|
|
1798
|
+
case "altChunk": {
|
|
1799
|
+
const rId = child.attributes["r:id"] ?? child.attributes["id"];
|
|
1800
|
+
if (rId) {
|
|
1801
|
+
body.push({ type: "altChunk", rId });
|
|
1802
|
+
}
|
|
1803
|
+
break;
|
|
1804
|
+
}
|
|
1805
|
+
default: {
|
|
1806
|
+
// Check for math namespace
|
|
1807
|
+
if (child.name === "m:oMathPara") {
|
|
1808
|
+
body.push((0, math_parser_1.parseMathBlock)(child));
|
|
1809
|
+
}
|
|
1810
|
+
else if (child.name === "m:oMath") {
|
|
1811
|
+
body.push({ type: "math", content: (0, math_parser_1.parseMathContent)(child) });
|
|
1812
|
+
}
|
|
1813
|
+
// Check for VML pict (textbox)
|
|
1814
|
+
if (name === "pict" || child.name === "w:pict") {
|
|
1815
|
+
const tb = parseTextBox(child, ctx);
|
|
1816
|
+
if (tb) {
|
|
1817
|
+
body.push(tb);
|
|
1818
|
+
}
|
|
1819
|
+
}
|
|
1820
|
+
break;
|
|
1821
|
+
}
|
|
1822
|
+
}
|
|
1823
|
+
}
|
|
1824
|
+
return { body, sectionProperties, background };
|
|
1825
|
+
}
|
|
1826
|
+
/**
|
|
1827
|
+
* Read a DOCX file from a Uint8Array buffer and parse it into a DocxDocument model.
|
|
1828
|
+
*
|
|
1829
|
+
* If the file is encrypted (CFB format), provide a password via the options parameter
|
|
1830
|
+
* to decrypt it automatically.
|
|
1831
|
+
*/
|
|
1832
|
+
async function readDocx(buffer, options) {
|
|
1833
|
+
const policy = (0, policy_1.resolveSecurityPolicy)(options?.securityPolicy);
|
|
1834
|
+
// Defense-in-depth: reject obviously oversized packages up-front. The same
|
|
1835
|
+
// limit is also enforced incrementally during entry decompression so a
|
|
1836
|
+
// pathological deflate stream can't slip past this check.
|
|
1837
|
+
if (buffer.length > policy.maxPackageSize) {
|
|
1838
|
+
throw new errors_1.DocxLimitExceededError("packageSize", policy.maxPackageSize, buffer.length, "compressed input larger than maxPackageSize");
|
|
1839
|
+
}
|
|
1840
|
+
// Detect encrypted DOCX (CFB format) before attempting ZIP parse.
|
|
1841
|
+
// CFB signature: D0 CF 11 E0 A1 B1 1A E1
|
|
1842
|
+
if (buffer.length >= 8 &&
|
|
1843
|
+
buffer[0] === 0xd0 &&
|
|
1844
|
+
buffer[1] === 0xcf &&
|
|
1845
|
+
buffer[2] === 0x11 &&
|
|
1846
|
+
buffer[3] === 0xe0 &&
|
|
1847
|
+
buffer[4] === 0xa1 &&
|
|
1848
|
+
buffer[5] === 0xb1 &&
|
|
1849
|
+
buffer[6] === 0x1a &&
|
|
1850
|
+
buffer[7] === 0xe1) {
|
|
1851
|
+
if (options?.password != null) {
|
|
1852
|
+
// Pass the security policy's package-size cap so a hostile CFB cannot
|
|
1853
|
+
// claim a multi-GiB decrypted size and force a huge buffer allocation
|
|
1854
|
+
// before the unzip stage even runs.
|
|
1855
|
+
const decryptedZip = await (0, encryption_1.decryptDocx)(buffer, options.password, policy.maxPackageSize);
|
|
1856
|
+
return readDocx(decryptedZip, options);
|
|
1857
|
+
}
|
|
1858
|
+
throw new errors_1.DocxEncryptedError();
|
|
1859
|
+
}
|
|
1860
|
+
try {
|
|
1861
|
+
return await _readDocxInner(buffer, policy);
|
|
1862
|
+
}
|
|
1863
|
+
catch (e) {
|
|
1864
|
+
if (e instanceof errors_1.DocxError) {
|
|
1865
|
+
throw e;
|
|
1866
|
+
}
|
|
1867
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
1868
|
+
throw new errors_1.DocxParseError(`Failed to read DOCX: ${msg}`, { cause: e });
|
|
1869
|
+
}
|
|
1870
|
+
}
|
|
1871
|
+
async function _readDocxInner(buffer, policy) {
|
|
1872
|
+
const reader = (0, read_archive_1.unzip)(buffer);
|
|
1873
|
+
const entries = new Map();
|
|
1874
|
+
let totalUncompressed = 0;
|
|
1875
|
+
let entryCount = 0;
|
|
1876
|
+
for await (const entry of reader.entries()) {
|
|
1877
|
+
entryCount++;
|
|
1878
|
+
if (entryCount > policy.maxPartCount) {
|
|
1879
|
+
throw new errors_1.DocxLimitExceededError("partCount", policy.maxPartCount, entryCount, "ZIP contains more entries than maxPartCount");
|
|
1880
|
+
}
|
|
1881
|
+
const data = await entry.bytes();
|
|
1882
|
+
if (data.length > policy.maxPartSize) {
|
|
1883
|
+
throw new errors_1.DocxLimitExceededError("partSize", policy.maxPartSize, data.length, `entry "${entry.path}" exceeds maxPartSize`);
|
|
1884
|
+
}
|
|
1885
|
+
totalUncompressed += data.length;
|
|
1886
|
+
if (totalUncompressed > policy.maxPackageSize) {
|
|
1887
|
+
throw new errors_1.DocxLimitExceededError("packageSize", policy.maxPackageSize, totalUncompressed, "cumulative uncompressed entry size exceeds maxPackageSize");
|
|
1888
|
+
}
|
|
1889
|
+
// Normalize path: remove leading slash, normalize separators
|
|
1890
|
+
const path = entry.path.replace(/^\//, "").replace(/\\/g, "/");
|
|
1891
|
+
entries.set(path, data);
|
|
1892
|
+
}
|
|
1893
|
+
const decoder = internal_utils_1.utf8Decoder;
|
|
1894
|
+
const consumedPaths = new Set(["[Content_Types].xml"]);
|
|
1895
|
+
// Best-effort parse for non-critical parts (settings, numbering, styles,
|
|
1896
|
+
// theme, fontTable, comments, charts, headers, footers, notes, …). A
|
|
1897
|
+
// malformed auxiliary part should not prevent us from returning the main
|
|
1898
|
+
// document body. Only parse failures on document.xml itself are fatal.
|
|
1899
|
+
const tryParse = (fn) => {
|
|
1900
|
+
try {
|
|
1901
|
+
return fn();
|
|
1902
|
+
}
|
|
1903
|
+
catch {
|
|
1904
|
+
return undefined;
|
|
1905
|
+
}
|
|
1906
|
+
};
|
|
1907
|
+
// Parse [Content_Types].xml for accurate opaque part content types
|
|
1908
|
+
const contentTypesXml = entries.get("[Content_Types].xml");
|
|
1909
|
+
const contentTypeOverrides = new Map();
|
|
1910
|
+
const contentTypeDefaults = new Map();
|
|
1911
|
+
if (contentTypesXml) {
|
|
1912
|
+
const ctDoc = (0, dom_1.parseXml)(decoder.decode(contentTypesXml));
|
|
1913
|
+
for (const child of ctDoc.root.children) {
|
|
1914
|
+
if (child.type !== "element") {
|
|
1915
|
+
continue;
|
|
1916
|
+
}
|
|
1917
|
+
if (child.name === "Override") {
|
|
1918
|
+
const partName = child.attributes["PartName"] ?? "";
|
|
1919
|
+
const ct = child.attributes["ContentType"] ?? "";
|
|
1920
|
+
if (partName && ct) {
|
|
1921
|
+
// Normalize: remove leading slash
|
|
1922
|
+
contentTypeOverrides.set(partName.replace(/^\//, ""), ct);
|
|
1923
|
+
}
|
|
1924
|
+
}
|
|
1925
|
+
else if (child.name === "Default") {
|
|
1926
|
+
const ext = child.attributes["Extension"] ?? "";
|
|
1927
|
+
const ct = child.attributes["ContentType"] ?? "";
|
|
1928
|
+
if (ext && ct) {
|
|
1929
|
+
contentTypeDefaults.set(ext.toLowerCase(), ct);
|
|
1930
|
+
}
|
|
1931
|
+
}
|
|
1932
|
+
}
|
|
1933
|
+
}
|
|
1934
|
+
const getText = (path) => {
|
|
1935
|
+
const data = entries.get(path);
|
|
1936
|
+
if (data) {
|
|
1937
|
+
consumedPaths.add(path);
|
|
1938
|
+
}
|
|
1939
|
+
return data ? decoder.decode(data) : undefined;
|
|
1940
|
+
};
|
|
1941
|
+
// Parse document relationships (must be before parseDocumentXml for hyperlink resolution)
|
|
1942
|
+
// First, try to discover document path via package rels (supports Strict conformance)
|
|
1943
|
+
let documentPartPath = "word/document.xml";
|
|
1944
|
+
const packageRelsXmlEarly = getText("_rels/.rels");
|
|
1945
|
+
if (packageRelsXmlEarly) {
|
|
1946
|
+
const pkgRelsEarly = (0, reader_context_1.parseRelationships)(packageRelsXmlEarly);
|
|
1947
|
+
for (const rel of pkgRelsEarly) {
|
|
1948
|
+
if (rel.type === constants_1.RelType.OfficeDocument) {
|
|
1949
|
+
let target = rel.target;
|
|
1950
|
+
if (target.startsWith("/")) {
|
|
1951
|
+
target = target.substring(1);
|
|
1952
|
+
}
|
|
1953
|
+
documentPartPath = target;
|
|
1954
|
+
break;
|
|
1955
|
+
}
|
|
1956
|
+
}
|
|
1957
|
+
}
|
|
1958
|
+
const docRelsPath = (0, parse_utils_1.getPartRelsPath)(documentPartPath);
|
|
1959
|
+
const docRelsXml = getText(docRelsPath);
|
|
1960
|
+
const docRels = docRelsXml ? (0, reader_context_1.parseRelationships)(docRelsXml) : [];
|
|
1961
|
+
const _relMap = new Map(docRels.map(r => [r.id, r]));
|
|
1962
|
+
// Create reader context for this parse session (replaces module-level _session)
|
|
1963
|
+
const ctx = (0, reader_context_1.createReaderContext)(policy);
|
|
1964
|
+
ctx.relMap = _relMap;
|
|
1965
|
+
// Parse document.xml (required)
|
|
1966
|
+
const documentXml = getText(documentPartPath);
|
|
1967
|
+
if (!documentXml) {
|
|
1968
|
+
throw new errors_1.DocxMissingPartError(documentPartPath);
|
|
1969
|
+
}
|
|
1970
|
+
const { body, sectionProperties, background } = parseDocumentXml(documentXml, ctx);
|
|
1971
|
+
// Parse styles (resolve path via relationship, fallback to hardcoded)
|
|
1972
|
+
const stylesPath = (0, parse_utils_1.resolveRelTarget)(docRels, constants_1.RelType.Styles, documentPartPath) ?? "word/styles.xml";
|
|
1973
|
+
const stylesXml = getText(stylesPath);
|
|
1974
|
+
const stylesResult = stylesXml ? tryParse(() => (0, styles_parser_1.parseStyles)(stylesXml)) : undefined;
|
|
1975
|
+
// Parse numbering
|
|
1976
|
+
const numberingPath = (0, parse_utils_1.resolveRelTarget)(docRels, constants_1.RelType.Numbering, documentPartPath) ?? "word/numbering.xml";
|
|
1977
|
+
const numberingXml = getText(numberingPath);
|
|
1978
|
+
const numberingResult = numberingXml
|
|
1979
|
+
? tryParse(() => (0, numbering_parser_1.parseNumberingXml)(numberingXml))
|
|
1980
|
+
: undefined;
|
|
1981
|
+
// Parse footnotes/endnotes — swap ctx.relMap to the notes part's own
|
|
1982
|
+
// .rels (footnotes.xml.rels / endnotes.xml.rels) so hyperlinks and images
|
|
1983
|
+
// inside notes resolve against the correct relationship map. Without this,
|
|
1984
|
+
// any rId used in a footnote silently resolves to undefined.
|
|
1985
|
+
const footnotesPath = (0, parse_utils_1.resolveRelTarget)(docRels, constants_1.RelType.Footnotes, documentPartPath) ?? "word/footnotes.xml";
|
|
1986
|
+
const footnotesXml = getText(footnotesPath);
|
|
1987
|
+
let footnotes;
|
|
1988
|
+
if (footnotesXml) {
|
|
1989
|
+
const footnotesRelsPath = (0, parse_utils_1.getPartRelsPath)(footnotesPath);
|
|
1990
|
+
const footnotesRelsXml = getText(footnotesRelsPath);
|
|
1991
|
+
const savedRelMap = ctx.relMap;
|
|
1992
|
+
if (footnotesRelsXml) {
|
|
1993
|
+
const footnotesRels = (0, reader_context_1.parseRelationships)(footnotesRelsXml);
|
|
1994
|
+
ctx.relMap = new Map(footnotesRels.map(r => [r.id, r]));
|
|
1995
|
+
consumedPaths.add(footnotesRelsPath);
|
|
1996
|
+
}
|
|
1997
|
+
else {
|
|
1998
|
+
ctx.relMap = new Map();
|
|
1999
|
+
}
|
|
2000
|
+
footnotes = tryParse(() => parseNotesXml(footnotesXml, "footnote", ctx));
|
|
2001
|
+
ctx.relMap = savedRelMap;
|
|
2002
|
+
}
|
|
2003
|
+
const endnotesPath = (0, parse_utils_1.resolveRelTarget)(docRels, constants_1.RelType.Endnotes, documentPartPath) ?? "word/endnotes.xml";
|
|
2004
|
+
const endnotesXml = getText(endnotesPath);
|
|
2005
|
+
let endnotes;
|
|
2006
|
+
if (endnotesXml) {
|
|
2007
|
+
const endnotesRelsPath = (0, parse_utils_1.getPartRelsPath)(endnotesPath);
|
|
2008
|
+
const endnotesRelsXml = getText(endnotesRelsPath);
|
|
2009
|
+
const savedRelMap = ctx.relMap;
|
|
2010
|
+
if (endnotesRelsXml) {
|
|
2011
|
+
const endnotesRels = (0, reader_context_1.parseRelationships)(endnotesRelsXml);
|
|
2012
|
+
ctx.relMap = new Map(endnotesRels.map(r => [r.id, r]));
|
|
2013
|
+
consumedPaths.add(endnotesRelsPath);
|
|
2014
|
+
}
|
|
2015
|
+
else {
|
|
2016
|
+
ctx.relMap = new Map();
|
|
2017
|
+
}
|
|
2018
|
+
endnotes = tryParse(() => parseNotesXml(endnotesXml, "endnote", ctx));
|
|
2019
|
+
ctx.relMap = savedRelMap;
|
|
2020
|
+
}
|
|
2021
|
+
// Parse headers/footers + detect watermarks
|
|
2022
|
+
const headers = new Map();
|
|
2023
|
+
const footers = new Map();
|
|
2024
|
+
let watermark;
|
|
2025
|
+
for (const rel of docRels) {
|
|
2026
|
+
if (rel.type === constants_1.RelType.Header) {
|
|
2027
|
+
const headerPartPath = (0, parse_utils_1.resolvePartPath)(documentPartPath, rel.target);
|
|
2028
|
+
const xml = getText(headerPartPath);
|
|
2029
|
+
if (xml) {
|
|
2030
|
+
// Parse header's own rels and switch ctx.relMap so hyperlinks/images
|
|
2031
|
+
// referenced inside the header resolve against its own relationship map.
|
|
2032
|
+
const headerRelsPath = (0, parse_utils_1.getPartRelsPath)(headerPartPath);
|
|
2033
|
+
const headerRelsXml = getText(headerRelsPath);
|
|
2034
|
+
const savedRelMap = ctx.relMap;
|
|
2035
|
+
if (headerRelsXml) {
|
|
2036
|
+
const headerRels = (0, reader_context_1.parseRelationships)(headerRelsXml);
|
|
2037
|
+
const headerRelMap = new Map(headerRels.map(r => [r.id, r]));
|
|
2038
|
+
ctx.relMap = headerRelMap;
|
|
2039
|
+
consumedPaths.add(headerRelsPath);
|
|
2040
|
+
}
|
|
2041
|
+
else {
|
|
2042
|
+
ctx.relMap = new Map();
|
|
2043
|
+
}
|
|
2044
|
+
try {
|
|
2045
|
+
// Parse XML once, re-use for both header content and watermark detection
|
|
2046
|
+
const headerRoot = (0, dom_1.parseXml)(xml).root;
|
|
2047
|
+
headers.set(rel.id, { content: parseHeaderFooterRoot(headerRoot, ctx), rId: rel.id });
|
|
2048
|
+
if (!watermark) {
|
|
2049
|
+
watermark = (0, watermark_parser_1.detectWatermarkFromRoot)(headerRoot);
|
|
2050
|
+
}
|
|
2051
|
+
}
|
|
2052
|
+
catch {
|
|
2053
|
+
// Skip a malformed header; preserve other headers and the document.
|
|
2054
|
+
}
|
|
2055
|
+
ctx.relMap = savedRelMap;
|
|
2056
|
+
}
|
|
2057
|
+
}
|
|
2058
|
+
else if (rel.type === constants_1.RelType.Footer) {
|
|
2059
|
+
const footerPartPath = (0, parse_utils_1.resolvePartPath)(documentPartPath, rel.target);
|
|
2060
|
+
const xml = getText(footerPartPath);
|
|
2061
|
+
if (xml) {
|
|
2062
|
+
// Parse footer's own rels and switch ctx.relMap so hyperlinks/images
|
|
2063
|
+
// referenced inside the footer resolve against its own relationship map.
|
|
2064
|
+
const footerRelsPath = (0, parse_utils_1.getPartRelsPath)(footerPartPath);
|
|
2065
|
+
const footerRelsXml = getText(footerRelsPath);
|
|
2066
|
+
const savedRelMap = ctx.relMap;
|
|
2067
|
+
if (footerRelsXml) {
|
|
2068
|
+
const footerRels = (0, reader_context_1.parseRelationships)(footerRelsXml);
|
|
2069
|
+
const footerRelMap = new Map(footerRels.map(r => [r.id, r]));
|
|
2070
|
+
ctx.relMap = footerRelMap;
|
|
2071
|
+
consumedPaths.add(footerRelsPath);
|
|
2072
|
+
}
|
|
2073
|
+
else {
|
|
2074
|
+
ctx.relMap = new Map();
|
|
2075
|
+
}
|
|
2076
|
+
try {
|
|
2077
|
+
footers.set(rel.id, { content: parseHeaderFooterXml(xml, ctx), rId: rel.id });
|
|
2078
|
+
}
|
|
2079
|
+
catch {
|
|
2080
|
+
// Skip a malformed footer; preserve other footers and the document.
|
|
2081
|
+
}
|
|
2082
|
+
ctx.relMap = savedRelMap;
|
|
2083
|
+
}
|
|
2084
|
+
}
|
|
2085
|
+
}
|
|
2086
|
+
// Parse settings
|
|
2087
|
+
const settingsPath = (0, parse_utils_1.resolveRelTarget)(docRels, constants_1.RelType.Settings, documentPartPath) ?? "word/settings.xml";
|
|
2088
|
+
const settingsXml = getText(settingsPath);
|
|
2089
|
+
const settings = settingsXml ? tryParse(() => (0, metadata_parsers_1.parseSettingsXml)(settingsXml)) : undefined;
|
|
2090
|
+
// Parse web settings
|
|
2091
|
+
const webSettingsPath = (0, parse_utils_1.resolveRelTarget)(docRels, constants_1.RelType.WebSettings, documentPartPath) ?? "word/webSettings.xml";
|
|
2092
|
+
const webSettingsXml = getText(webSettingsPath);
|
|
2093
|
+
const webSettings = webSettingsXml ? tryParse(() => (0, metadata_parsers_1.parseWebSettings)(webSettingsXml)) : undefined;
|
|
2094
|
+
// Parse people
|
|
2095
|
+
const peoplePath = (0, parse_utils_1.resolveRelTarget)(docRels, constants_1.RelType.People, documentPartPath) ?? "word/people.xml";
|
|
2096
|
+
const peopleXml = getText(peoplePath);
|
|
2097
|
+
const people = peopleXml ? tryParse(() => (0, metadata_parsers_1.parsePeople)(peopleXml)) : undefined;
|
|
2098
|
+
// Parse thumbnail (from package rels — reuse already-parsed rels)
|
|
2099
|
+
let thumbnail;
|
|
2100
|
+
if (packageRelsXmlEarly) {
|
|
2101
|
+
const pkgRels = (0, reader_context_1.parseRelationships)(packageRelsXmlEarly);
|
|
2102
|
+
for (const rel of pkgRels) {
|
|
2103
|
+
if (rel.type.endsWith("/thumbnail")) {
|
|
2104
|
+
// Target in package rels is relative to package root; may include or exclude leading slash
|
|
2105
|
+
let target = rel.target;
|
|
2106
|
+
if (target.startsWith("/")) {
|
|
2107
|
+
target = target.substring(1);
|
|
2108
|
+
}
|
|
2109
|
+
// If the target doesn't include docProps/ prefix, add it (some writers emit bare filenames)
|
|
2110
|
+
const normalized = target.includes("/") ? target : `docProps/${target}`;
|
|
2111
|
+
consumedPaths.add(normalized);
|
|
2112
|
+
const thumbData = entries.get(normalized);
|
|
2113
|
+
if (thumbData) {
|
|
2114
|
+
const ext = (0, parse_utils_1.getFileExt)(normalized);
|
|
2115
|
+
const ct = ext === "jpeg" || ext === "jpg"
|
|
2116
|
+
? "image/jpeg"
|
|
2117
|
+
: ext === "png"
|
|
2118
|
+
? "image/png"
|
|
2119
|
+
: "image/x-wmf";
|
|
2120
|
+
thumbnail = {
|
|
2121
|
+
contentType: ct,
|
|
2122
|
+
data: thumbData
|
|
2123
|
+
};
|
|
2124
|
+
}
|
|
2125
|
+
break;
|
|
2126
|
+
}
|
|
2127
|
+
}
|
|
2128
|
+
}
|
|
2129
|
+
// Parse font table
|
|
2130
|
+
const fontTablePath = (0, parse_utils_1.resolveRelTarget)(docRels, constants_1.RelType.FontTable, documentPartPath) ?? "word/fontTable.xml";
|
|
2131
|
+
const fontTableXml = getText(fontTablePath);
|
|
2132
|
+
const fonts = fontTableXml ? tryParse(() => (0, doc_props_parsers_1.parseFontTableXml)(fontTableXml)) : undefined;
|
|
2133
|
+
// Parse embedded fonts
|
|
2134
|
+
let embeddedFonts;
|
|
2135
|
+
const fontTableRelsXml = getText("word/_rels/fontTable.xml.rels");
|
|
2136
|
+
if (fontTableRelsXml && fonts) {
|
|
2137
|
+
const fontRels = (0, reader_context_1.parseRelationships)(fontTableRelsXml);
|
|
2138
|
+
const efs = [];
|
|
2139
|
+
// Build rId → { key } map from font table
|
|
2140
|
+
const rIdToKey = new Map();
|
|
2141
|
+
for (const f of fonts) {
|
|
2142
|
+
if (f.embedRegular && f.embedRegularKey) {
|
|
2143
|
+
rIdToKey.set(f.embedRegular, f.embedRegularKey);
|
|
2144
|
+
}
|
|
2145
|
+
if (f.embedBold && f.embedBoldKey) {
|
|
2146
|
+
rIdToKey.set(f.embedBold, f.embedBoldKey);
|
|
2147
|
+
}
|
|
2148
|
+
if (f.embedItalic && f.embedItalicKey) {
|
|
2149
|
+
rIdToKey.set(f.embedItalic, f.embedItalicKey);
|
|
2150
|
+
}
|
|
2151
|
+
if (f.embedBoldItalic && f.embedBoldItalicKey) {
|
|
2152
|
+
rIdToKey.set(f.embedBoldItalic, f.embedBoldItalicKey);
|
|
2153
|
+
}
|
|
2154
|
+
}
|
|
2155
|
+
for (const rel of fontRels) {
|
|
2156
|
+
if (rel.type === constants_1.RelType.Font) {
|
|
2157
|
+
const fontPath = (0, parse_utils_1.resolvePartPath)("word/fontTable.xml", rel.target);
|
|
2158
|
+
consumedPaths.add(fontPath);
|
|
2159
|
+
const data = entries.get(fontPath);
|
|
2160
|
+
if (data) {
|
|
2161
|
+
const fileName = (0, parse_utils_1.getFileName)(rel.target);
|
|
2162
|
+
const fontKey = rIdToKey.get(rel.id);
|
|
2163
|
+
const ef = {
|
|
2164
|
+
rId: rel.id,
|
|
2165
|
+
data,
|
|
2166
|
+
fileName
|
|
2167
|
+
};
|
|
2168
|
+
if (fontKey) {
|
|
2169
|
+
ef.fontKey = fontKey;
|
|
2170
|
+
}
|
|
2171
|
+
efs.push(ef);
|
|
2172
|
+
}
|
|
2173
|
+
}
|
|
2174
|
+
}
|
|
2175
|
+
if (efs.length > 0) {
|
|
2176
|
+
embeddedFonts = efs;
|
|
2177
|
+
}
|
|
2178
|
+
}
|
|
2179
|
+
// Parse Custom XML parts (for SDT data binding)
|
|
2180
|
+
const customXmlParts = [];
|
|
2181
|
+
for (const rel of docRels) {
|
|
2182
|
+
if (rel.type === constants_1.RelType.CustomXml) {
|
|
2183
|
+
const targetPath = (0, parse_utils_1.resolvePartPath)(documentPartPath, rel.target);
|
|
2184
|
+
consumedPaths.add(targetPath);
|
|
2185
|
+
const xmlContent = getText(targetPath);
|
|
2186
|
+
if (!xmlContent) {
|
|
2187
|
+
continue;
|
|
2188
|
+
}
|
|
2189
|
+
// Parse itemProps*.xml to get storeItemID
|
|
2190
|
+
const fileName = (0, parse_utils_1.getFileName)(targetPath);
|
|
2191
|
+
// itemProps file is typically at the same directory
|
|
2192
|
+
const dir = targetPath.substring(0, targetPath.lastIndexOf("/"));
|
|
2193
|
+
// Extract item number from fileName (e.g. "item1.xml" → "1")
|
|
2194
|
+
const match = fileName.match(/item(\d+)\.xml$/);
|
|
2195
|
+
let itemId = "";
|
|
2196
|
+
let schemaReferences;
|
|
2197
|
+
if (match) {
|
|
2198
|
+
const num = match[1];
|
|
2199
|
+
const propsPath = `${dir}/itemProps${num}.xml`;
|
|
2200
|
+
consumedPaths.add(propsPath);
|
|
2201
|
+
const propsXml = getText(propsPath);
|
|
2202
|
+
if (propsXml) {
|
|
2203
|
+
const propsDoc = (0, dom_1.parseXml)(propsXml);
|
|
2204
|
+
const dsItemEl = propsDoc.root;
|
|
2205
|
+
const id = dsItemEl.attributes["ds:itemID"];
|
|
2206
|
+
if (id) {
|
|
2207
|
+
itemId = id.replace(/[{}]/g, "");
|
|
2208
|
+
}
|
|
2209
|
+
// Schema references
|
|
2210
|
+
const refs = [];
|
|
2211
|
+
const schemaRefsEl = (0, dom_1.findChild)(dsItemEl, "ds:schemaRefs") ?? (0, dom_1.findChild)(dsItemEl, "schemaRefs");
|
|
2212
|
+
if (schemaRefsEl) {
|
|
2213
|
+
for (const srChild of schemaRefsEl.children) {
|
|
2214
|
+
if (srChild.type === "element") {
|
|
2215
|
+
const uri = srChild.attributes["ds:uri"] ?? srChild.attributes["uri"];
|
|
2216
|
+
if (uri) {
|
|
2217
|
+
refs.push(uri);
|
|
2218
|
+
}
|
|
2219
|
+
}
|
|
2220
|
+
}
|
|
2221
|
+
}
|
|
2222
|
+
if (refs.length > 0) {
|
|
2223
|
+
schemaReferences = refs;
|
|
2224
|
+
}
|
|
2225
|
+
}
|
|
2226
|
+
}
|
|
2227
|
+
customXmlParts.push({
|
|
2228
|
+
itemId,
|
|
2229
|
+
xmlContent,
|
|
2230
|
+
fileName,
|
|
2231
|
+
schemaReferences
|
|
2232
|
+
});
|
|
2233
|
+
}
|
|
2234
|
+
}
|
|
2235
|
+
// Parse core properties
|
|
2236
|
+
const corePropsXml = getText("docProps/core.xml");
|
|
2237
|
+
const coreProperties = corePropsXml ? tryParse(() => (0, doc_props_parsers_1.parseCoreProps)(corePropsXml)) : undefined;
|
|
2238
|
+
// Parse app properties
|
|
2239
|
+
const appPropsXml = getText("docProps/app.xml");
|
|
2240
|
+
const appProperties = appPropsXml ? tryParse(() => (0, doc_props_parsers_1.parseAppProps)(appPropsXml)) : undefined;
|
|
2241
|
+
// Parse comments — switch ctx.relMap to comments.xml.rels so any
|
|
2242
|
+
// hyperlinks/images referenced from inside comment paragraphs resolve
|
|
2243
|
+
// against the comment part's own relationships rather than document.xml.rels.
|
|
2244
|
+
const commentsXml = getText("word/comments.xml");
|
|
2245
|
+
let comments;
|
|
2246
|
+
if (commentsXml) {
|
|
2247
|
+
const commentsRelsPath = "word/_rels/comments.xml.rels";
|
|
2248
|
+
const commentsRelsXml = getText(commentsRelsPath);
|
|
2249
|
+
const savedRelMap = ctx.relMap;
|
|
2250
|
+
if (commentsRelsXml) {
|
|
2251
|
+
const commentsRels = (0, reader_context_1.parseRelationships)(commentsRelsXml);
|
|
2252
|
+
ctx.relMap = new Map(commentsRels.map(r => [r.id, r]));
|
|
2253
|
+
consumedPaths.add(commentsRelsPath);
|
|
2254
|
+
}
|
|
2255
|
+
else {
|
|
2256
|
+
ctx.relMap = new Map();
|
|
2257
|
+
}
|
|
2258
|
+
comments = tryParse(() => parseCommentsXmlFromCtx(commentsXml, ctx));
|
|
2259
|
+
ctx.relMap = savedRelMap;
|
|
2260
|
+
}
|
|
2261
|
+
// Merge in commentsExtended.xml data if present
|
|
2262
|
+
const commentsExtXml = getText("word/commentsExtended.xml");
|
|
2263
|
+
if (commentsExtXml && comments) {
|
|
2264
|
+
const extMap = tryParse(() => (0, comments_parser_1.parseCommentsExtendedXml)(commentsExtXml));
|
|
2265
|
+
if (extMap) {
|
|
2266
|
+
comments = comments.map(c => {
|
|
2267
|
+
const firstPara = c.content[0];
|
|
2268
|
+
if (!firstPara?.paraId) {
|
|
2269
|
+
return c;
|
|
2270
|
+
}
|
|
2271
|
+
const ext = extMap.get(firstPara.paraId);
|
|
2272
|
+
if (!ext) {
|
|
2273
|
+
return c;
|
|
2274
|
+
}
|
|
2275
|
+
return {
|
|
2276
|
+
...c,
|
|
2277
|
+
...(ext.done !== undefined ? { done: ext.done } : {}),
|
|
2278
|
+
...(ext.parentId !== undefined ? { parentId: ext.parentId } : {})
|
|
2279
|
+
};
|
|
2280
|
+
});
|
|
2281
|
+
}
|
|
2282
|
+
}
|
|
2283
|
+
// Parse custom properties
|
|
2284
|
+
const customPropsXml = getText("docProps/custom.xml");
|
|
2285
|
+
const customProperties = customPropsXml
|
|
2286
|
+
? tryParse(() => (0, doc_props_parsers_1.parseCustomPropsXml)(customPropsXml))
|
|
2287
|
+
: undefined;
|
|
2288
|
+
// Parse theme
|
|
2289
|
+
const themePath = (0, parse_utils_1.resolveRelTarget)(docRels, constants_1.RelType.Theme, documentPartPath) ?? "word/theme/theme1.xml";
|
|
2290
|
+
const themeXml = getText(themePath);
|
|
2291
|
+
const theme = themeXml ? tryParse(() => (0, metadata_parsers_1.parseThemeXml)(themeXml)) : undefined;
|
|
2292
|
+
// Collect images from main document relationships
|
|
2293
|
+
const images = [];
|
|
2294
|
+
for (const rel of docRels) {
|
|
2295
|
+
if (rel.type === constants_1.RelType.Image) {
|
|
2296
|
+
const imgPath = (0, parse_utils_1.resolvePartPath)(documentPartPath, rel.target);
|
|
2297
|
+
consumedPaths.add(imgPath);
|
|
2298
|
+
const data = entries.get(imgPath);
|
|
2299
|
+
if (data) {
|
|
2300
|
+
const fileName = (0, parse_utils_1.getFileName)(rel.target);
|
|
2301
|
+
const ext = (0, parse_utils_1.getFileExt)(fileName) || "png";
|
|
2302
|
+
images.push({
|
|
2303
|
+
data,
|
|
2304
|
+
mediaType: ext,
|
|
2305
|
+
fileName,
|
|
2306
|
+
rId: rel.id
|
|
2307
|
+
});
|
|
2308
|
+
}
|
|
2309
|
+
}
|
|
2310
|
+
}
|
|
2311
|
+
// Also collect images from header/footer relationships to ensure full round-trip.
|
|
2312
|
+
// Headers and footers have their own .rels files which may reference unique
|
|
2313
|
+
// images, OR they may share a media file with the main document. In the
|
|
2314
|
+
// latter case we keep the existing ImageDef but record the local rId as an
|
|
2315
|
+
// alias so the packager can rebuild header1.xml.rels with the original
|
|
2316
|
+
// (header-local) id intact.
|
|
2317
|
+
const collectedImagePaths = new Map();
|
|
2318
|
+
for (const img of images) {
|
|
2319
|
+
collectedImagePaths.set(img.fileName, img);
|
|
2320
|
+
}
|
|
2321
|
+
for (const rel of docRels) {
|
|
2322
|
+
if (rel.type !== constants_1.RelType.Header && rel.type !== constants_1.RelType.Footer) {
|
|
2323
|
+
continue;
|
|
2324
|
+
}
|
|
2325
|
+
const partPath = (0, parse_utils_1.resolvePartPath)(documentPartPath, rel.target);
|
|
2326
|
+
const partRelsPath = (0, parse_utils_1.getPartRelsPath)(partPath);
|
|
2327
|
+
const partRelsXml = entries.get(partRelsPath);
|
|
2328
|
+
if (!partRelsXml) {
|
|
2329
|
+
continue;
|
|
2330
|
+
}
|
|
2331
|
+
const partRels = (0, reader_context_1.parseRelationships)(decoder.decode(partRelsXml));
|
|
2332
|
+
for (const pRel of partRels) {
|
|
2333
|
+
if (pRel.type !== constants_1.RelType.Image) {
|
|
2334
|
+
continue;
|
|
2335
|
+
}
|
|
2336
|
+
const imgPath = (0, parse_utils_1.resolvePartPath)(partPath, pRel.target);
|
|
2337
|
+
consumedPaths.add(imgPath);
|
|
2338
|
+
const data = entries.get(imgPath);
|
|
2339
|
+
if (!data) {
|
|
2340
|
+
continue;
|
|
2341
|
+
}
|
|
2342
|
+
const fileName = (0, parse_utils_1.getFileName)(pRel.target);
|
|
2343
|
+
const existing = collectedImagePaths.get(fileName);
|
|
2344
|
+
if (existing) {
|
|
2345
|
+
// Same physical file as one we already know — keep one ImageDef and
|
|
2346
|
+
// append this part-local rId to its aliases (if it differs from the
|
|
2347
|
+
// primary rId and isn't already recorded).
|
|
2348
|
+
if (pRel.id && pRel.id !== existing.rId) {
|
|
2349
|
+
const aliases = existing.aliasRIds ? [...existing.aliasRIds] : [];
|
|
2350
|
+
if (!aliases.includes(pRel.id)) {
|
|
2351
|
+
aliases.push(pRel.id);
|
|
2352
|
+
existing.aliasRIds = aliases;
|
|
2353
|
+
}
|
|
2354
|
+
}
|
|
2355
|
+
continue;
|
|
2356
|
+
}
|
|
2357
|
+
const ext = (0, parse_utils_1.getFileExt)(fileName) || "png";
|
|
2358
|
+
const newImg = {
|
|
2359
|
+
data,
|
|
2360
|
+
mediaType: ext,
|
|
2361
|
+
fileName,
|
|
2362
|
+
rId: pRel.id
|
|
2363
|
+
};
|
|
2364
|
+
images.push(newImg);
|
|
2365
|
+
collectedImagePaths.set(fileName, newImg);
|
|
2366
|
+
}
|
|
2367
|
+
}
|
|
2368
|
+
// Parse chart parts and replace opaque drawings with typed ChartContent
|
|
2369
|
+
const chartRIdToChart = new Map();
|
|
2370
|
+
for (const rel of docRels) {
|
|
2371
|
+
if (rel.type === constants_1.RelType.Chart) {
|
|
2372
|
+
const chartPath = (0, parse_utils_1.resolvePartPath)(documentPartPath, rel.target);
|
|
2373
|
+
consumedPaths.add(chartPath);
|
|
2374
|
+
const chartXml = getText(chartPath);
|
|
2375
|
+
if (chartXml) {
|
|
2376
|
+
const chart = tryParse(() => (0, chart_parser_1.parseChartXml)(chartXml));
|
|
2377
|
+
if (chart) {
|
|
2378
|
+
chartRIdToChart.set(rel.id, chart);
|
|
2379
|
+
}
|
|
2380
|
+
}
|
|
2381
|
+
}
|
|
2382
|
+
}
|
|
2383
|
+
// Replace OpaqueDrawing items that reference chart rIds with proper ChartContent
|
|
2384
|
+
if (chartRIdToChart.size > 0) {
|
|
2385
|
+
(0, chart_parser_1.replaceOpaqueCharts)(body, chartRIdToChart);
|
|
2386
|
+
}
|
|
2387
|
+
// Parse ChartEx parts and replace opaque drawings with typed ChartExContent
|
|
2388
|
+
const chartExRIdToContent = new Map();
|
|
2389
|
+
for (const rel of docRels) {
|
|
2390
|
+
if (rel.type === constants_1.RelType.ChartEx) {
|
|
2391
|
+
const chartExPath = (0, parse_utils_1.resolvePartPath)(documentPartPath, rel.target);
|
|
2392
|
+
consumedPaths.add(chartExPath);
|
|
2393
|
+
const chartExXml = getText(chartExPath);
|
|
2394
|
+
if (chartExXml) {
|
|
2395
|
+
const data = tryParse(() => (0, chart_parser_1.parseChartExXml)(chartExXml));
|
|
2396
|
+
const content = {
|
|
2397
|
+
type: "chartEx",
|
|
2398
|
+
chartExXml,
|
|
2399
|
+
...(data !== undefined && { data })
|
|
2400
|
+
};
|
|
2401
|
+
chartExRIdToContent.set(rel.id, content);
|
|
2402
|
+
}
|
|
2403
|
+
}
|
|
2404
|
+
}
|
|
2405
|
+
// Replace OpaqueDrawing items that reference ChartEx rIds with proper ChartExContent
|
|
2406
|
+
if (chartExRIdToContent.size > 0) {
|
|
2407
|
+
(0, chart_parser_1.replaceOpaqueChartExDrawings)(body, chartExRIdToContent);
|
|
2408
|
+
}
|
|
2409
|
+
// Detect document type from main document part content type
|
|
2410
|
+
let docType;
|
|
2411
|
+
const mainDocCT = contentTypeOverrides.get(documentPartPath) ?? contentTypeOverrides.get(`/${documentPartPath}`);
|
|
2412
|
+
if (mainDocCT) {
|
|
2413
|
+
if (mainDocCT.includes("template.main") && mainDocCT.includes("macroEnabled")) {
|
|
2414
|
+
docType = "macroEnabledTemplate";
|
|
2415
|
+
}
|
|
2416
|
+
else if (mainDocCT.includes("template.main")) {
|
|
2417
|
+
docType = "template";
|
|
2418
|
+
}
|
|
2419
|
+
else if (mainDocCT.includes("macroEnabled")) {
|
|
2420
|
+
docType = "macroEnabledDocument";
|
|
2421
|
+
}
|
|
2422
|
+
// "document" is the default — only set if non-standard
|
|
2423
|
+
}
|
|
2424
|
+
// Extract VBA project binary for .docm/.dotm round-trip.
|
|
2425
|
+
// Honour `preserveVbaProject`: if disabled, mark the relationship's
|
|
2426
|
+
// target consumed (so opaqueParts won't retain it either) but leave
|
|
2427
|
+
// `vbaProject` undefined so the produced model does not surface macro
|
|
2428
|
+
// payloads to downstream consumers.
|
|
2429
|
+
let vbaProject;
|
|
2430
|
+
for (const rel of docRels) {
|
|
2431
|
+
if (rel.type === constants_1.RelType.VbaProject) {
|
|
2432
|
+
const vbaPath = (0, parse_utils_1.resolvePartPath)(documentPartPath, rel.target);
|
|
2433
|
+
consumedPaths.add(vbaPath);
|
|
2434
|
+
if (policy.preserveVbaProject) {
|
|
2435
|
+
vbaProject = entries.get(vbaPath);
|
|
2436
|
+
}
|
|
2437
|
+
break;
|
|
2438
|
+
}
|
|
2439
|
+
}
|
|
2440
|
+
// Resolve altChunk data: body elements of type "altChunk" reference a rId.
|
|
2441
|
+
// The target file is stored in docRels + entries. We populate the altChunk
|
|
2442
|
+
// body item with its data here AND mark the target path as consumed so the
|
|
2443
|
+
// opaqueParts collector below does not retain a duplicate copy that would
|
|
2444
|
+
// later be written back to the ZIP twice.
|
|
2445
|
+
//
|
|
2446
|
+
// Honour `preserveAltChunks`: when disabled, we still consume the target
|
|
2447
|
+
// path (so it doesn't leak into opaqueParts) but skip data attachment
|
|
2448
|
+
// and remove altChunk entries from the body before the document is
|
|
2449
|
+
// returned. Embedded HTML/RTF in altChunks is a common attack vector
|
|
2450
|
+
// for downstream renderers, so strict mode strips them entirely.
|
|
2451
|
+
for (const item of body) {
|
|
2452
|
+
if (item.type === "altChunk" && item.rId) {
|
|
2453
|
+
const rel = _relMap.get(item.rId);
|
|
2454
|
+
if (rel) {
|
|
2455
|
+
const target = (0, parse_utils_1.resolvePartPath)(documentPartPath, rel.target);
|
|
2456
|
+
const targetData = entries.get(target);
|
|
2457
|
+
if (targetData) {
|
|
2458
|
+
consumedPaths.add(target);
|
|
2459
|
+
if (policy.preserveAltChunks) {
|
|
2460
|
+
const fileName = (0, parse_utils_1.getFileName)(target);
|
|
2461
|
+
const mItem = item;
|
|
2462
|
+
mItem.data = targetData;
|
|
2463
|
+
mItem.fileName = fileName;
|
|
2464
|
+
// Infer content type from extension
|
|
2465
|
+
const ext = fileName ? (0, parse_utils_1.getFileExt)(fileName) : "";
|
|
2466
|
+
if (ext === "html" || ext === "htm") {
|
|
2467
|
+
mItem.contentType = "text/html";
|
|
2468
|
+
}
|
|
2469
|
+
else if (ext === "rtf") {
|
|
2470
|
+
mItem.contentType = "text/rtf";
|
|
2471
|
+
}
|
|
2472
|
+
else if (ext === "txt") {
|
|
2473
|
+
mItem.contentType = "text/plain";
|
|
2474
|
+
}
|
|
2475
|
+
}
|
|
2476
|
+
}
|
|
2477
|
+
}
|
|
2478
|
+
}
|
|
2479
|
+
}
|
|
2480
|
+
// Remove altChunk body entries entirely when not preserving them.
|
|
2481
|
+
if (!policy.preserveAltChunks) {
|
|
2482
|
+
for (let i = body.length - 1; i >= 0; i--) {
|
|
2483
|
+
if (body[i].type === "altChunk") {
|
|
2484
|
+
body.splice(i, 1);
|
|
2485
|
+
}
|
|
2486
|
+
}
|
|
2487
|
+
}
|
|
2488
|
+
// Collect opaque (unrecognized) parts for round-trip preservation
|
|
2489
|
+
const opaqueParts = [];
|
|
2490
|
+
for (const [path, data] of entries) {
|
|
2491
|
+
// Skip consumed paths and all .rels files (structural)
|
|
2492
|
+
if (consumedPaths.has(path) || path.includes("_rels/")) {
|
|
2493
|
+
continue;
|
|
2494
|
+
}
|
|
2495
|
+
// Honour `preserveOleObjects`: when disabled, drop OLE embedding
|
|
2496
|
+
// binaries (word/embeddings/*.bin and similar) before they reach the
|
|
2497
|
+
// returned model. The relationship targets remain in their parent
|
|
2498
|
+
// part's .rels, so the caller is responsible for stripping or
|
|
2499
|
+
// ignoring those if they need a fully-clean document.
|
|
2500
|
+
if (!policy.preserveOleObjects &&
|
|
2501
|
+
(path.startsWith("word/embeddings/") || (path.endsWith(".bin") && path.includes("embed")))) {
|
|
2502
|
+
continue;
|
|
2503
|
+
}
|
|
2504
|
+
// Parse rels for this part if they exist
|
|
2505
|
+
const partRelsPath = (0, parse_utils_1.getPartRelsPath)(path);
|
|
2506
|
+
const partRelsData = entries.get(partRelsPath);
|
|
2507
|
+
let relationships;
|
|
2508
|
+
if (partRelsData) {
|
|
2509
|
+
const rels = (0, reader_context_1.parseRelationships)(decoder.decode(partRelsData));
|
|
2510
|
+
relationships = rels.map(r => ({
|
|
2511
|
+
id: r.id,
|
|
2512
|
+
type: r.type,
|
|
2513
|
+
target: r.target,
|
|
2514
|
+
// Preserve the source string verbatim ("External", "Internal", or
|
|
2515
|
+
// any non-standard value) so opaque round-trip is byte-faithful.
|
|
2516
|
+
targetMode: r.targetMode
|
|
2517
|
+
}));
|
|
2518
|
+
}
|
|
2519
|
+
// Resolve content type from [Content_Types].xml (override > default by extension)
|
|
2520
|
+
let contentType = contentTypeOverrides.get(path);
|
|
2521
|
+
if (!contentType) {
|
|
2522
|
+
const ext = (0, parse_utils_1.getFileExt)(path);
|
|
2523
|
+
contentType = contentTypeDefaults.get(ext);
|
|
2524
|
+
}
|
|
2525
|
+
opaqueParts.push({ path, data, contentType, relationships });
|
|
2526
|
+
}
|
|
2527
|
+
return {
|
|
2528
|
+
...(docType ? { docType } : {}),
|
|
2529
|
+
body,
|
|
2530
|
+
sectionProperties,
|
|
2531
|
+
styles: stylesResult?.styles,
|
|
2532
|
+
docDefaults: stylesResult?.docDefaults,
|
|
2533
|
+
abstractNumberings: numberingResult?.abstractNums,
|
|
2534
|
+
numberingInstances: numberingResult?.instances,
|
|
2535
|
+
numPicBullets: numberingResult?.numPicBullets && numberingResult.numPicBullets.length > 0
|
|
2536
|
+
? numberingResult.numPicBullets
|
|
2537
|
+
: undefined,
|
|
2538
|
+
headers: headers.size > 0 ? headers : undefined,
|
|
2539
|
+
footers: footers.size > 0 ? footers : undefined,
|
|
2540
|
+
footnotes: footnotes && footnotes.length > 0 ? footnotes : undefined,
|
|
2541
|
+
endnotes: endnotes && endnotes.length > 0 ? endnotes : undefined,
|
|
2542
|
+
images: images.length > 0 ? images : undefined,
|
|
2543
|
+
fonts: fonts && fonts.length > 0 ? fonts : undefined,
|
|
2544
|
+
embeddedFonts: embeddedFonts && embeddedFonts.length > 0 ? embeddedFonts : undefined,
|
|
2545
|
+
customXmlParts: customXmlParts.length > 0 ? customXmlParts : undefined,
|
|
2546
|
+
webSettings,
|
|
2547
|
+
thumbnail,
|
|
2548
|
+
people: people && people.length > 0 ? people : undefined,
|
|
2549
|
+
settings,
|
|
2550
|
+
coreProperties,
|
|
2551
|
+
appProperties,
|
|
2552
|
+
comments: comments && comments.length > 0 ? comments : undefined,
|
|
2553
|
+
background,
|
|
2554
|
+
customProperties: customProperties && customProperties.length > 0 ? customProperties : undefined,
|
|
2555
|
+
theme,
|
|
2556
|
+
watermark,
|
|
2557
|
+
opaqueParts: opaqueParts.length > 0 ? opaqueParts : undefined,
|
|
2558
|
+
vbaProject
|
|
2559
|
+
};
|
|
2560
|
+
}
|