@cj-tech-master/excelts 9.5.4 → 9.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/modules/archive/compression/streaming-compress.browser.js +29 -0
- package/dist/browser/modules/archive/compression/streaming-compress.js +9 -0
- package/dist/browser/modules/archive/compression/worker-pool/pool.browser.js +26 -1
- package/dist/browser/modules/archive/fs/archive-file.d.ts +8 -5
- package/dist/browser/modules/archive/fs/archive-file.js +78 -16
- package/dist/browser/modules/archive/unzip/stream.browser.js +43 -2
- package/dist/browser/modules/excel/chart/chart-ex-builder.js +7 -2
- package/dist/browser/modules/excel/chart/chart-ex-renderer.js +4 -9
- package/dist/browser/modules/excel/chart/chart-ex-types.d.ts +0 -12
- package/dist/browser/modules/excel/chart/chart.d.ts +1 -5
- package/dist/browser/modules/excel/chart/chart.js +1 -7
- package/dist/browser/modules/excel/chart/types.d.ts +0 -6
- package/dist/browser/modules/excel/stream/workbook-reader.browser.js +25 -1
- package/dist/browser/modules/excel/stream/workbook-reader.js +9 -0
- package/dist/browser/modules/excel/stream/workbook-writer.browser.d.ts +40 -0
- package/dist/browser/modules/excel/stream/workbook-writer.browser.js +228 -13
- package/dist/browser/modules/excel/utils/string-buf.d.ts +5 -26
- package/dist/browser/modules/excel/utils/string-buf.js +4 -81
- package/dist/browser/modules/excel/workbook.browser.js +135 -25
- package/dist/browser/modules/excel/xlsx/xform/chart/chart-space-xform.js +6 -20
- package/dist/browser/modules/excel/xlsx/xlsx.browser.d.ts +19 -9
- package/dist/browser/modules/excel/xlsx/xlsx.browser.js +32 -8
- package/dist/browser/modules/excel/xlsx/xlsx.d.ts +10 -2
- package/dist/browser/modules/excel/xlsx/xlsx.js +9 -1
- package/dist/browser/modules/pdf/excel-bridge.d.ts +30 -1
- package/dist/browser/modules/pdf/excel-bridge.js +32 -0
- package/dist/browser/modules/pdf/font/metrics.d.ts +3 -52
- package/dist/browser/modules/pdf/font/metrics.js +3 -237
- package/dist/browser/modules/pdf/index.d.ts +1 -1
- package/dist/browser/modules/pdf/index.js +1 -1
- package/dist/browser/modules/pdf/render-layout-to-pdf.d.ts +66 -0
- package/dist/browser/modules/pdf/render-layout-to-pdf.js +647 -0
- package/dist/browser/modules/pdf/word-bridge.d.ts +80 -12
- package/dist/browser/modules/pdf/word-bridge.js +122 -274
- package/dist/browser/modules/stream/index.base.d.ts +2 -0
- package/dist/browser/modules/stream/index.base.js +2 -1
- package/dist/browser/modules/stream/internal/sink-adapter.d.ts +65 -0
- package/dist/browser/modules/stream/internal/sink-adapter.js +198 -0
- package/dist/browser/modules/stream/pull-stream.d.ts +19 -2
- package/dist/browser/modules/stream/pull-stream.js +51 -5
- package/dist/browser/modules/stream/types.d.ts +13 -1
- package/dist/browser/modules/word/advanced/diff.d.ts +61 -0
- package/dist/browser/modules/word/advanced/diff.js +167 -0
- package/dist/browser/modules/word/advanced/drawing-shapes.d.ts +269 -0
- package/dist/browser/modules/word/advanced/drawing-shapes.js +268 -0
- package/dist/browser/modules/word/advanced/field-engine.d.ts +43 -0
- package/dist/browser/modules/word/advanced/field-engine.js +1225 -0
- package/dist/browser/modules/word/advanced/glossary.d.ts +86 -0
- package/dist/browser/modules/word/advanced/glossary.js +79 -0
- package/dist/browser/modules/word/advanced/math-convert.d.ts +30 -0
- package/dist/browser/modules/word/advanced/math-convert.js +595 -0
- package/dist/browser/modules/word/advanced/ole-objects.d.ts +115 -0
- package/dist/browser/modules/word/advanced/ole-objects.js +271 -0
- package/dist/browser/modules/word/advanced/style-map.d.ts +105 -0
- package/dist/browser/modules/word/advanced/style-map.js +322 -0
- package/dist/browser/modules/word/advanced/validation.d.ts +56 -0
- package/dist/browser/modules/word/advanced/validation.js +1065 -0
- package/dist/browser/modules/word/advanced/vba-project.d.ts +91 -0
- package/dist/browser/modules/word/advanced/vba-project.js +265 -0
- package/dist/browser/modules/word/bridge/excel-bridge.d.ts +127 -0
- package/dist/browser/modules/word/bridge/excel-bridge.js +980 -0
- package/dist/browser/modules/word/builder/document-handle.d.ts +151 -0
- package/dist/browser/modules/word/builder/document-handle.js +664 -0
- package/dist/browser/modules/word/builder/paragraph-builders.d.ts +61 -0
- package/dist/browser/modules/word/builder/paragraph-builders.js +90 -0
- package/dist/browser/modules/word/builder/run-builders.d.ts +374 -0
- package/dist/browser/modules/word/builder/run-builders.js +600 -0
- package/dist/browser/modules/word/builder/table-builders.d.ts +23 -0
- package/dist/browser/modules/word/builder/table-builders.js +45 -0
- package/dist/browser/modules/word/constants.d.ts +39 -1
- package/dist/browser/modules/word/constants.js +109 -1
- package/dist/browser/modules/word/convert/conversion-ir.d.ts +210 -0
- package/dist/browser/modules/word/convert/conversion-ir.js +31 -0
- package/dist/browser/modules/word/convert/docx-to-semantic.d.ts +39 -0
- package/dist/browser/modules/word/convert/docx-to-semantic.js +499 -0
- package/dist/browser/modules/word/convert/flat-opc.d.ts +44 -0
- package/dist/browser/modules/word/convert/flat-opc.js +385 -0
- package/dist/browser/modules/word/convert/html/html-import.d.ts +50 -0
- package/dist/browser/modules/word/convert/html/html-import.js +1907 -0
- package/dist/{types/modules/word → browser/modules/word/convert/html}/html-renderer.d.ts +14 -1
- package/dist/{esm/modules/word → browser/modules/word/convert/html}/html-renderer.js +420 -69
- package/dist/browser/modules/word/convert/html/html.d.ts +15 -0
- package/dist/browser/modules/word/convert/html/html.js +15 -0
- package/dist/browser/modules/word/convert/markdown/markdown-import.d.ts +68 -0
- package/dist/browser/modules/word/convert/markdown/markdown-import.js +1325 -0
- package/dist/browser/modules/word/convert/markdown/markdown-renderer.d.ts +25 -0
- package/dist/browser/modules/word/convert/markdown/markdown-renderer.js +634 -0
- package/dist/browser/modules/word/convert/markdown/markdown.d.ts +15 -0
- package/dist/browser/modules/word/convert/markdown/markdown.js +15 -0
- package/dist/browser/modules/word/convert/odt/odt.d.ts +41 -0
- package/dist/browser/modules/word/convert/odt/odt.js +1932 -0
- package/dist/browser/modules/word/{color-utils.d.ts → core/color-utils.d.ts} +8 -1
- package/dist/browser/modules/word/core/color-utils.js +43 -0
- package/dist/browser/modules/word/core/internal-utils.d.ts +90 -0
- package/dist/browser/modules/word/core/internal-utils.js +209 -0
- package/dist/browser/modules/word/core/mapper.d.ts +44 -0
- package/dist/browser/modules/word/core/mapper.js +427 -0
- package/dist/browser/modules/word/core/opc-paths.d.ts +33 -0
- package/dist/browser/modules/word/core/opc-paths.js +48 -0
- package/dist/browser/modules/word/core/text-utils.d.ts +38 -0
- package/dist/browser/modules/word/core/text-utils.js +202 -0
- package/dist/browser/modules/word/core/walker.d.ts +119 -0
- package/dist/browser/modules/word/core/walker.js +570 -0
- package/dist/browser/modules/word/crypto.d.ts +14 -9
- package/dist/browser/modules/word/crypto.js +13 -7
- package/dist/browser/modules/word/document-io.d.ts +59 -27
- package/dist/browser/modules/word/document-io.js +80 -197
- package/dist/browser/modules/word/errors.d.ts +44 -1
- package/dist/browser/modules/word/errors.js +54 -2
- package/dist/browser/modules/word/excel.d.ts +14 -0
- package/dist/browser/modules/word/excel.js +13 -0
- package/dist/browser/modules/word/font/font-embed.d.ts +112 -0
- package/dist/browser/modules/word/font/font-embed.js +646 -0
- package/dist/{esm/modules/word → browser/modules/word/font}/font-obfuscation.js +4 -9
- package/dist/browser/modules/word/font/hyphenation.d.ts +65 -0
- package/dist/browser/modules/word/font/hyphenation.js +4210 -0
- package/dist/browser/modules/word/font/text-shaping.d.ts +58 -0
- package/dist/browser/modules/word/font/text-shaping.js +635 -0
- package/dist/browser/modules/word/html.d.ts +7 -6
- package/dist/browser/modules/word/html.js +6 -5
- package/dist/browser/modules/word/incremental-edit.d.ts +123 -0
- package/dist/browser/modules/word/incremental-edit.js +361 -0
- package/dist/browser/modules/word/index.base.d.ts +194 -10
- package/dist/browser/modules/word/index.base.js +138 -29
- package/dist/browser/modules/word/layout/layout-constants.d.ts +17 -0
- package/dist/browser/modules/word/layout/layout-constants.js +17 -0
- package/dist/browser/modules/word/layout/layout-full.d.ts +53 -0
- package/dist/browser/modules/word/layout/layout-full.js +1696 -0
- package/dist/browser/modules/word/layout/layout-model.d.ts +344 -0
- package/dist/browser/modules/word/layout/layout-model.js +16 -0
- package/dist/browser/modules/word/layout/layout.d.ts +63 -0
- package/dist/browser/modules/word/layout/layout.js +1167 -0
- package/dist/browser/modules/word/layout/render-page.d.ts +57 -0
- package/dist/browser/modules/word/layout/render-page.js +1238 -0
- package/dist/browser/modules/word/markdown.d.ts +14 -0
- package/dist/browser/modules/word/markdown.js +13 -0
- package/dist/browser/modules/word/patcher.d.ts +62 -0
- package/dist/browser/modules/word/patcher.js +537 -0
- package/dist/browser/modules/word/query/compat.d.ts +25 -0
- package/dist/browser/modules/word/query/compat.js +58 -0
- package/dist/browser/modules/word/query/data-binding.d.ts +22 -0
- package/dist/browser/modules/word/query/data-binding.js +392 -0
- package/dist/browser/modules/word/query/form-fields.d.ts +41 -0
- package/dist/browser/modules/word/query/form-fields.js +268 -0
- package/dist/browser/modules/word/query/format-search.d.ts +99 -0
- package/dist/browser/modules/word/query/format-search.js +329 -0
- package/dist/browser/modules/word/query/mail-merge.d.ts +25 -0
- package/dist/browser/modules/word/query/mail-merge.js +111 -0
- package/dist/browser/modules/word/query/merge.d.ts +50 -0
- package/dist/browser/modules/word/query/merge.js +617 -0
- package/dist/browser/modules/word/query/replace.d.ts +47 -0
- package/dist/browser/modules/word/query/replace.js +301 -0
- package/dist/browser/modules/word/query/revisions.d.ts +67 -0
- package/dist/browser/modules/word/query/revisions.js +879 -0
- package/dist/browser/modules/word/query/search.d.ts +129 -0
- package/dist/browser/modules/word/query/search.js +346 -0
- package/dist/browser/modules/word/query/split.d.ts +44 -0
- package/dist/browser/modules/word/query/split.js +135 -0
- package/dist/browser/modules/word/query/style-resolve.d.ts +104 -0
- package/dist/browser/modules/word/query/style-resolve.js +368 -0
- package/dist/browser/modules/word/reader/chart-parser.d.ts +20 -0
- package/dist/browser/modules/word/reader/chart-parser.js +810 -0
- package/dist/browser/modules/word/reader/comments-parser.d.ts +26 -0
- package/dist/browser/modules/word/reader/comments-parser.js +92 -0
- package/dist/browser/modules/word/reader/doc-props-parsers.d.ts +15 -0
- package/dist/browser/modules/word/reader/doc-props-parsers.js +190 -0
- package/dist/browser/modules/word/reader/docx-reader.d.ts +27 -0
- package/dist/browser/modules/word/reader/docx-reader.js +2557 -0
- package/dist/browser/modules/word/reader/drawing-helpers.d.ts +27 -0
- package/dist/browser/modules/word/reader/drawing-helpers.js +84 -0
- package/dist/browser/modules/word/reader/form-field-parser.d.ts +21 -0
- package/dist/browser/modules/word/reader/form-field-parser.js +82 -0
- package/dist/browser/modules/word/reader/image-parsers.d.ts +11 -0
- package/dist/browser/modules/word/reader/image-parsers.js +291 -0
- package/dist/browser/modules/word/reader/math-parser.d.ts +12 -0
- package/dist/browser/modules/word/reader/math-parser.js +422 -0
- package/dist/browser/modules/word/reader/metadata-parsers.d.ts +17 -0
- package/dist/browser/modules/word/reader/metadata-parsers.js +87 -0
- package/dist/browser/modules/word/reader/numbering-parser.d.ts +13 -0
- package/dist/browser/modules/word/reader/numbering-parser.js +166 -0
- package/dist/browser/modules/word/reader/paragraph-section-parsers.d.ts +12 -0
- package/dist/browser/modules/word/reader/paragraph-section-parsers.js +503 -0
- package/dist/browser/modules/word/reader/parse-utils.d.ts +91 -0
- package/dist/browser/modules/word/reader/parse-utils.js +249 -0
- package/dist/browser/modules/word/reader/properties-parsers.d.ts +21 -0
- package/dist/browser/modules/word/reader/properties-parsers.js +332 -0
- package/dist/browser/modules/word/reader/reader-context.d.ts +69 -0
- package/dist/browser/modules/word/reader/reader-context.js +61 -0
- package/dist/browser/modules/word/reader/sdt-helpers.d.ts +29 -0
- package/dist/browser/modules/word/reader/sdt-helpers.js +111 -0
- package/dist/browser/modules/word/reader/settings-parser.d.ts +8 -0
- package/dist/browser/modules/word/reader/settings-parser.js +263 -0
- package/dist/browser/modules/word/reader/styles-parser.d.ts +12 -0
- package/dist/browser/modules/word/reader/styles-parser.js +147 -0
- package/dist/browser/modules/word/reader/table-properties-parsers.d.ts +12 -0
- package/dist/browser/modules/word/reader/table-properties-parsers.js +234 -0
- package/dist/browser/modules/word/reader/theme-parser.d.ts +8 -0
- package/dist/browser/modules/word/reader/theme-parser.js +167 -0
- package/dist/browser/modules/word/reader/watermark-parser.d.ts +15 -0
- package/dist/browser/modules/word/reader/watermark-parser.js +110 -0
- package/dist/browser/modules/word/security/cfb-reader.d.ts +37 -0
- package/dist/browser/modules/word/security/cfb-reader.js +410 -0
- package/dist/browser/modules/word/{digital-signatures.d.ts → security/digital-signatures.d.ts} +19 -11
- package/dist/browser/modules/word/{digital-signatures.js → security/digital-signatures.js} +34 -34
- package/dist/browser/modules/word/security/document-protection.d.ts +93 -0
- package/dist/browser/modules/word/security/document-protection.js +201 -0
- package/dist/{types/modules/word → browser/modules/word/security}/encryption.d.ts +51 -4
- package/dist/browser/modules/word/security/encryption.js +602 -0
- package/dist/browser/modules/word/security/policy.d.ts +80 -0
- package/dist/browser/modules/word/security/policy.js +102 -0
- package/dist/browser/modules/word/template/template-chart.d.ts +56 -0
- package/dist/browser/modules/word/template/template-chart.js +167 -0
- package/dist/browser/modules/word/template/template-datasource.d.ts +154 -0
- package/dist/browser/modules/word/template/template-datasource.js +541 -0
- package/dist/browser/modules/word/template/template-engine.d.ts +121 -0
- package/dist/browser/modules/word/template/template-engine.js +1435 -0
- package/dist/browser/modules/word/types.d.ts +224 -25
- package/dist/browser/modules/word/units.d.ts +26 -0
- package/dist/browser/modules/word/units.js +43 -14
- package/dist/browser/modules/word/{writers → writer}/chart-writer.js +164 -23
- package/dist/browser/modules/word/writer/checkbox-writer.d.ts +17 -0
- package/dist/browser/modules/word/writer/checkbox-writer.js +79 -0
- package/dist/{types/modules/word/writers → browser/modules/word/writer}/comment-writer.d.ts +2 -1
- package/dist/browser/modules/word/{writers → writer}/comment-writer.js +8 -6
- package/dist/browser/modules/word/writer/common-parts.d.ts +57 -0
- package/dist/browser/modules/word/writer/common-parts.js +101 -0
- package/dist/{types/modules/word → browser/modules/word/writer}/content-types.d.ts +2 -2
- package/dist/{esm/modules/word → browser/modules/word/writer}/content-types.js +14 -6
- package/dist/browser/modules/word/writer/document-writer.d.ts +24 -0
- package/dist/browser/modules/word/writer/document-writer.js +473 -0
- package/dist/browser/modules/word/writer/docx-packager.d.ts +35 -0
- package/dist/browser/modules/word/writer/docx-packager.js +1515 -0
- package/dist/{types/modules/word/writers → browser/modules/word/writer}/footnote-writer.d.ts +3 -2
- package/dist/{esm/modules/word/writers → browser/modules/word/writer}/footnote-writer.js +13 -10
- package/dist/{types/modules/word/writers → browser/modules/word/writer}/header-footer-writer.d.ts +3 -2
- package/dist/{esm/modules/word/writers → browser/modules/word/writer}/header-footer-writer.js +39 -21
- package/dist/{types/modules/word/writers → browser/modules/word/writer}/image-writer.d.ts +1 -1
- package/dist/browser/modules/word/{writers → writer}/image-writer.js +11 -7
- package/dist/browser/modules/word/writer/math-writer.d.ts +20 -0
- package/dist/{esm/modules/word/writers → browser/modules/word/writer}/math-writer.js +21 -1
- package/dist/browser/modules/word/{writers → writer}/numbering-writer.d.ts +1 -1
- package/dist/{esm/modules/word/writers → browser/modules/word/writer}/numbering-writer.js +11 -4
- package/dist/browser/modules/word/{writers → writer}/paragraph-writer.d.ts +2 -1
- package/dist/browser/modules/word/{writers → writer}/paragraph-writer.js +73 -38
- package/dist/browser/modules/word/{writers → writer}/parts-writer.d.ts +3 -3
- package/dist/{esm/modules/word/writers → browser/modules/word/writer}/parts-writer.js +91 -12
- package/dist/browser/modules/word/writer/reference-scanners.d.ts +42 -0
- package/dist/browser/modules/word/writer/reference-scanners.js +111 -0
- package/dist/browser/modules/word/writer/relationships.d.ts +52 -0
- package/dist/browser/modules/word/writer/relationships.js +117 -0
- package/dist/browser/modules/word/writer/render-context.d.ts +124 -0
- package/dist/browser/modules/word/writer/render-context.js +46 -0
- package/dist/browser/modules/word/{writers → writer}/run-writer.d.ts +10 -1
- package/dist/{esm/modules/word/writers → browser/modules/word/writer}/run-writer.js +126 -24
- package/dist/browser/modules/word/writer/sdt-writer.d.ts +25 -0
- package/dist/browser/modules/word/writer/sdt-writer.js +189 -0
- package/dist/browser/modules/word/writer/stream-buf.d.ts +37 -0
- package/dist/browser/modules/word/writer/stream-buf.js +73 -0
- package/dist/browser/modules/word/writer/streaming-writer.d.ts +344 -0
- package/dist/browser/modules/word/writer/streaming-writer.js +1382 -0
- package/dist/browser/modules/word/writer/string-buf.d.ts +8 -0
- package/dist/browser/modules/word/writer/string-buf.js +7 -0
- package/dist/browser/modules/word/{writers → writer}/styles-writer.js +32 -1
- package/dist/browser/modules/word/{writers → writer}/table-writer.d.ts +2 -1
- package/dist/browser/modules/word/{writers → writer}/table-writer.js +94 -11
- package/dist/browser/modules/xml/types.d.ts +22 -0
- package/dist/browser/utils/crypto.browser.d.ts +3 -1
- package/dist/browser/utils/crypto.browser.js +3 -1
- package/dist/browser/utils/crypto.d.ts +4 -1
- package/dist/browser/utils/crypto.js +4 -1
- package/dist/browser/utils/font-metrics.d.ts +63 -0
- package/dist/browser/utils/font-metrics.js +293 -0
- package/dist/browser/utils/string-buf.d.ts +42 -0
- package/dist/browser/utils/string-buf.js +89 -0
- package/dist/browser/utils/theme-colors.d.ts +55 -0
- package/dist/browser/utils/theme-colors.js +120 -0
- package/dist/cjs/modules/archive/compression/streaming-compress.browser.js +29 -0
- package/dist/cjs/modules/archive/compression/streaming-compress.js +9 -0
- package/dist/cjs/modules/archive/compression/worker-pool/pool.browser.js +26 -1
- package/dist/cjs/modules/archive/fs/archive-file.js +78 -16
- package/dist/cjs/modules/archive/unzip/stream.browser.js +43 -2
- package/dist/cjs/modules/excel/chart/chart-ex-builder.js +7 -2
- package/dist/cjs/modules/excel/chart/chart-ex-renderer.js +4 -9
- package/dist/cjs/modules/excel/chart/chart.js +1 -7
- package/dist/cjs/modules/excel/stream/workbook-reader.browser.js +25 -1
- package/dist/cjs/modules/excel/stream/workbook-reader.js +9 -0
- package/dist/cjs/modules/excel/stream/workbook-writer.browser.js +228 -13
- package/dist/cjs/modules/excel/utils/string-buf.js +5 -81
- package/dist/cjs/modules/excel/workbook.browser.js +135 -25
- package/dist/cjs/modules/excel/xlsx/xform/chart/chart-space-xform.js +6 -20
- package/dist/cjs/modules/excel/xlsx/xlsx.browser.js +32 -8
- package/dist/cjs/modules/excel/xlsx/xlsx.js +9 -1
- package/dist/cjs/modules/pdf/excel-bridge.js +33 -0
- package/dist/cjs/modules/pdf/font/metrics.js +11 -244
- package/dist/cjs/modules/pdf/index.js +2 -1
- package/dist/cjs/modules/pdf/render-layout-to-pdf.js +651 -0
- package/dist/cjs/modules/pdf/word-bridge.js +155 -274
- package/dist/cjs/modules/stream/index.base.js +4 -2
- package/dist/cjs/modules/stream/internal/sink-adapter.js +202 -0
- package/dist/cjs/modules/stream/pull-stream.js +51 -5
- package/dist/cjs/modules/word/advanced/diff.js +170 -0
- package/dist/cjs/modules/word/advanced/drawing-shapes.js +279 -0
- package/dist/cjs/modules/word/advanced/field-engine.js +1229 -0
- package/dist/cjs/modules/word/advanced/glossary.js +87 -0
- package/dist/cjs/modules/word/advanced/math-convert.js +599 -0
- package/dist/cjs/modules/word/advanced/ole-objects.js +277 -0
- package/dist/cjs/modules/word/advanced/style-map.js +329 -0
- package/dist/cjs/modules/word/advanced/validation.js +1068 -0
- package/dist/cjs/modules/word/advanced/vba-project.js +274 -0
- package/dist/cjs/modules/word/bridge/excel-bridge.js +1020 -0
- package/dist/cjs/modules/word/builder/document-handle.js +667 -0
- package/dist/cjs/modules/word/builder/paragraph-builders.js +109 -0
- package/dist/cjs/modules/word/builder/run-builders.js +676 -0
- package/dist/cjs/modules/word/builder/table-builders.js +53 -0
- package/dist/cjs/modules/word/constants.js +111 -2
- package/dist/cjs/modules/word/convert/conversion-ir.js +34 -0
- package/dist/cjs/modules/word/convert/docx-to-semantic.js +502 -0
- package/dist/cjs/modules/word/convert/flat-opc.js +390 -0
- package/dist/cjs/modules/word/convert/html/html-import.js +1910 -0
- package/dist/cjs/modules/word/{html-renderer.js → convert/html/html-renderer.js} +420 -69
- package/dist/cjs/modules/word/convert/html/html.js +20 -0
- package/dist/cjs/modules/word/convert/markdown/markdown-import.js +1329 -0
- package/dist/cjs/modules/word/convert/markdown/markdown-renderer.js +637 -0
- package/dist/cjs/modules/word/convert/markdown/markdown.js +21 -0
- package/dist/cjs/modules/word/convert/odt/odt.js +1936 -0
- package/dist/cjs/modules/word/core/color-utils.js +47 -0
- package/dist/cjs/modules/word/core/internal-utils.js +219 -0
- package/dist/cjs/modules/word/core/mapper.js +430 -0
- package/dist/cjs/modules/word/core/opc-paths.js +53 -0
- package/dist/cjs/modules/word/core/text-utils.js +210 -0
- package/dist/cjs/modules/word/core/walker.js +577 -0
- package/dist/cjs/modules/word/crypto.js +19 -8
- package/dist/cjs/modules/word/document-io.js +117 -197
- package/dist/cjs/modules/word/errors.js +59 -13
- package/dist/cjs/modules/word/excel.js +22 -0
- package/dist/cjs/modules/word/font/font-embed.js +652 -0
- package/dist/cjs/modules/word/{font-obfuscation.js → font/font-obfuscation.js} +4 -9
- package/dist/cjs/modules/word/font/hyphenation.js +4216 -0
- package/dist/cjs/modules/word/font/text-shaping.js +640 -0
- package/dist/cjs/modules/word/html.js +9 -7
- package/dist/cjs/modules/word/incremental-edit.js +366 -0
- package/dist/cjs/modules/word/index.base.js +370 -137
- package/dist/cjs/modules/word/layout/layout-constants.js +20 -0
- package/dist/cjs/modules/word/layout/layout-full.js +1699 -0
- package/dist/cjs/modules/word/layout/layout-model.js +17 -0
- package/dist/cjs/modules/word/layout/layout.js +1170 -0
- package/dist/cjs/modules/word/layout/render-page.js +1243 -0
- package/dist/cjs/modules/word/markdown.js +19 -0
- package/dist/cjs/modules/word/patcher.js +539 -0
- package/dist/cjs/modules/word/query/compat.js +61 -0
- package/dist/cjs/modules/word/query/data-binding.js +395 -0
- package/dist/cjs/modules/word/query/form-fields.js +272 -0
- package/dist/cjs/modules/word/query/format-search.js +334 -0
- package/dist/cjs/modules/word/query/mail-merge.js +114 -0
- package/dist/cjs/modules/word/query/merge.js +620 -0
- package/dist/cjs/modules/word/query/replace.js +304 -0
- package/dist/cjs/modules/word/query/revisions.js +885 -0
- package/dist/cjs/modules/word/query/search.js +361 -0
- package/dist/cjs/modules/word/query/split.js +138 -0
- package/dist/cjs/modules/word/query/style-resolve.js +374 -0
- package/dist/cjs/modules/word/reader/chart-parser.js +814 -0
- package/dist/cjs/modules/word/reader/comments-parser.js +96 -0
- package/dist/cjs/modules/word/reader/doc-props-parsers.js +194 -0
- package/dist/cjs/modules/word/reader/docx-reader.js +2560 -0
- package/dist/cjs/modules/word/reader/drawing-helpers.js +90 -0
- package/dist/cjs/modules/word/reader/form-field-parser.js +85 -0
- package/dist/cjs/modules/word/reader/image-parsers.js +293 -0
- package/dist/cjs/modules/word/reader/math-parser.js +424 -0
- package/dist/cjs/modules/word/reader/metadata-parsers.js +93 -0
- package/dist/cjs/modules/word/reader/numbering-parser.js +168 -0
- package/dist/cjs/modules/word/reader/paragraph-section-parsers.js +505 -0
- package/dist/cjs/modules/word/reader/parse-utils.js +271 -0
- package/dist/cjs/modules/word/reader/properties-parsers.js +338 -0
- package/dist/cjs/modules/word/reader/reader-context.js +66 -0
- package/dist/cjs/modules/word/reader/sdt-helpers.js +114 -0
- package/dist/cjs/modules/word/reader/settings-parser.js +265 -0
- package/dist/cjs/modules/word/reader/styles-parser.js +149 -0
- package/dist/cjs/modules/word/reader/table-properties-parsers.js +237 -0
- package/dist/cjs/modules/word/reader/theme-parser.js +169 -0
- package/dist/cjs/modules/word/reader/watermark-parser.js +113 -0
- package/dist/cjs/modules/word/security/cfb-reader.js +414 -0
- package/dist/cjs/modules/word/{digital-signatures.js → security/digital-signatures.js} +34 -34
- package/dist/cjs/modules/word/security/document-protection.js +208 -0
- package/dist/cjs/modules/word/security/encryption.js +612 -0
- package/dist/cjs/modules/word/security/policy.js +106 -0
- package/dist/cjs/modules/word/template/template-chart.js +170 -0
- package/dist/cjs/modules/word/template/template-datasource.js +549 -0
- package/dist/cjs/modules/word/template/template-engine.js +1430 -0
- package/dist/cjs/modules/word/units.js +44 -14
- package/dist/cjs/modules/word/{writers → writer}/chart-writer.js +163 -22
- package/dist/cjs/modules/word/writer/checkbox-writer.js +82 -0
- package/dist/cjs/modules/word/{writers → writer}/comment-writer.js +8 -6
- package/dist/cjs/modules/word/writer/common-parts.js +104 -0
- package/dist/cjs/modules/word/{content-types.js → writer/content-types.js} +14 -6
- package/dist/cjs/modules/word/writer/document-writer.js +478 -0
- package/dist/cjs/modules/word/writer/docx-packager.js +1551 -0
- package/dist/cjs/modules/word/{writers → writer}/footnote-writer.js +13 -10
- package/dist/cjs/modules/word/{writers → writer}/header-footer-writer.js +38 -20
- package/dist/cjs/modules/word/{writers → writer}/image-writer.js +11 -7
- package/dist/cjs/modules/word/{writers → writer}/math-writer.js +21 -1
- package/dist/cjs/modules/word/{writers → writer}/numbering-writer.js +11 -4
- package/dist/cjs/modules/word/{writers → writer}/paragraph-writer.js +72 -37
- package/dist/cjs/modules/word/{writers → writer}/parts-writer.js +91 -12
- package/dist/cjs/modules/word/writer/reference-scanners.js +120 -0
- package/dist/cjs/modules/word/writer/relationships.js +124 -0
- package/dist/cjs/modules/word/writer/render-context.js +51 -0
- package/dist/cjs/modules/word/{writers → writer}/run-writer.js +127 -24
- package/dist/cjs/modules/word/writer/sdt-writer.js +192 -0
- package/dist/cjs/modules/word/writer/stream-buf.js +76 -0
- package/dist/cjs/modules/word/writer/streaming-writer.js +1387 -0
- package/dist/cjs/modules/word/writer/string-buf.js +11 -0
- package/dist/cjs/modules/word/{writers → writer}/styles-writer.js +32 -1
- package/dist/cjs/modules/word/{writers → writer}/table-writer.js +94 -11
- package/dist/cjs/utils/crypto.browser.js +3 -1
- package/dist/cjs/utils/crypto.js +4 -1
- package/dist/cjs/utils/font-metrics.js +303 -0
- package/dist/cjs/utils/string-buf.js +92 -0
- package/dist/cjs/utils/theme-colors.js +126 -0
- package/dist/esm/modules/archive/compression/streaming-compress.browser.js +29 -0
- package/dist/esm/modules/archive/compression/streaming-compress.js +9 -0
- package/dist/esm/modules/archive/compression/worker-pool/pool.browser.js +26 -1
- package/dist/esm/modules/archive/fs/archive-file.js +78 -16
- package/dist/esm/modules/archive/unzip/stream.browser.js +43 -2
- package/dist/esm/modules/excel/chart/chart-ex-builder.js +7 -2
- package/dist/esm/modules/excel/chart/chart-ex-renderer.js +4 -9
- package/dist/esm/modules/excel/chart/chart.js +1 -7
- package/dist/esm/modules/excel/stream/workbook-reader.browser.js +25 -1
- package/dist/esm/modules/excel/stream/workbook-reader.js +9 -0
- package/dist/esm/modules/excel/stream/workbook-writer.browser.js +228 -13
- package/dist/esm/modules/excel/utils/string-buf.js +4 -81
- package/dist/esm/modules/excel/workbook.browser.js +135 -25
- package/dist/esm/modules/excel/xlsx/xform/chart/chart-space-xform.js +6 -20
- package/dist/esm/modules/excel/xlsx/xlsx.browser.js +32 -8
- package/dist/esm/modules/excel/xlsx/xlsx.js +9 -1
- package/dist/esm/modules/pdf/excel-bridge.js +32 -0
- package/dist/esm/modules/pdf/font/metrics.js +3 -237
- package/dist/esm/modules/pdf/index.js +1 -1
- package/dist/esm/modules/pdf/render-layout-to-pdf.js +647 -0
- package/dist/esm/modules/pdf/word-bridge.js +122 -274
- package/dist/esm/modules/stream/index.base.js +2 -1
- package/dist/esm/modules/stream/internal/sink-adapter.js +198 -0
- package/dist/esm/modules/stream/pull-stream.js +51 -5
- package/dist/esm/modules/word/advanced/diff.js +167 -0
- package/dist/esm/modules/word/advanced/drawing-shapes.js +268 -0
- package/dist/esm/modules/word/advanced/field-engine.js +1225 -0
- package/dist/esm/modules/word/advanced/glossary.js +79 -0
- package/dist/esm/modules/word/advanced/math-convert.js +595 -0
- package/dist/esm/modules/word/advanced/ole-objects.js +271 -0
- package/dist/esm/modules/word/advanced/style-map.js +322 -0
- package/dist/esm/modules/word/advanced/validation.js +1065 -0
- package/dist/esm/modules/word/advanced/vba-project.js +265 -0
- package/dist/esm/modules/word/bridge/excel-bridge.js +980 -0
- package/dist/esm/modules/word/builder/document-handle.js +664 -0
- package/dist/esm/modules/word/builder/paragraph-builders.js +90 -0
- package/dist/esm/modules/word/builder/run-builders.js +600 -0
- package/dist/esm/modules/word/builder/table-builders.js +45 -0
- package/dist/esm/modules/word/constants.js +109 -1
- package/dist/esm/modules/word/convert/conversion-ir.js +31 -0
- package/dist/esm/modules/word/convert/docx-to-semantic.js +499 -0
- package/dist/esm/modules/word/convert/flat-opc.js +385 -0
- package/dist/esm/modules/word/convert/html/html-import.js +1907 -0
- package/dist/{browser/modules/word → esm/modules/word/convert/html}/html-renderer.js +420 -69
- package/dist/esm/modules/word/convert/html/html.js +15 -0
- package/dist/esm/modules/word/convert/markdown/markdown-import.js +1325 -0
- package/dist/esm/modules/word/convert/markdown/markdown-renderer.js +634 -0
- package/dist/esm/modules/word/convert/markdown/markdown.js +15 -0
- package/dist/esm/modules/word/convert/odt/odt.js +1932 -0
- package/dist/esm/modules/word/core/color-utils.js +43 -0
- package/dist/esm/modules/word/core/internal-utils.js +209 -0
- package/dist/esm/modules/word/core/mapper.js +427 -0
- package/dist/esm/modules/word/core/opc-paths.js +48 -0
- package/dist/esm/modules/word/core/text-utils.js +202 -0
- package/dist/esm/modules/word/core/walker.js +570 -0
- package/dist/esm/modules/word/crypto.js +13 -7
- package/dist/esm/modules/word/document-io.js +80 -197
- package/dist/esm/modules/word/errors.js +54 -2
- package/dist/esm/modules/word/excel.js +13 -0
- package/dist/esm/modules/word/font/font-embed.js +646 -0
- package/dist/{browser/modules/word → esm/modules/word/font}/font-obfuscation.js +4 -9
- package/dist/esm/modules/word/font/hyphenation.js +4210 -0
- package/dist/esm/modules/word/font/text-shaping.js +635 -0
- package/dist/esm/modules/word/html.js +6 -5
- package/dist/esm/modules/word/incremental-edit.js +361 -0
- package/dist/esm/modules/word/index.base.js +138 -29
- package/dist/esm/modules/word/layout/layout-constants.js +17 -0
- package/dist/esm/modules/word/layout/layout-full.js +1696 -0
- package/dist/esm/modules/word/layout/layout-model.js +16 -0
- package/dist/esm/modules/word/layout/layout.js +1167 -0
- package/dist/esm/modules/word/layout/render-page.js +1238 -0
- package/dist/esm/modules/word/markdown.js +13 -0
- package/dist/esm/modules/word/patcher.js +537 -0
- package/dist/esm/modules/word/query/compat.js +58 -0
- package/dist/esm/modules/word/query/data-binding.js +392 -0
- package/dist/esm/modules/word/query/form-fields.js +268 -0
- package/dist/esm/modules/word/query/format-search.js +329 -0
- package/dist/esm/modules/word/query/mail-merge.js +111 -0
- package/dist/esm/modules/word/query/merge.js +617 -0
- package/dist/esm/modules/word/query/replace.js +301 -0
- package/dist/esm/modules/word/query/revisions.js +879 -0
- package/dist/esm/modules/word/query/search.js +346 -0
- package/dist/esm/modules/word/query/split.js +135 -0
- package/dist/esm/modules/word/query/style-resolve.js +368 -0
- package/dist/esm/modules/word/reader/chart-parser.js +810 -0
- package/dist/esm/modules/word/reader/comments-parser.js +92 -0
- package/dist/esm/modules/word/reader/doc-props-parsers.js +190 -0
- package/dist/esm/modules/word/reader/docx-reader.js +2557 -0
- package/dist/esm/modules/word/reader/drawing-helpers.js +84 -0
- package/dist/esm/modules/word/reader/form-field-parser.js +82 -0
- package/dist/esm/modules/word/reader/image-parsers.js +291 -0
- package/dist/esm/modules/word/reader/math-parser.js +422 -0
- package/dist/esm/modules/word/reader/metadata-parsers.js +87 -0
- package/dist/esm/modules/word/reader/numbering-parser.js +166 -0
- package/dist/esm/modules/word/reader/paragraph-section-parsers.js +503 -0
- package/dist/esm/modules/word/reader/parse-utils.js +249 -0
- package/dist/esm/modules/word/reader/properties-parsers.js +332 -0
- package/dist/esm/modules/word/reader/reader-context.js +61 -0
- package/dist/esm/modules/word/reader/sdt-helpers.js +111 -0
- package/dist/esm/modules/word/reader/settings-parser.js +263 -0
- package/dist/esm/modules/word/reader/styles-parser.js +147 -0
- package/dist/esm/modules/word/reader/table-properties-parsers.js +234 -0
- package/dist/esm/modules/word/reader/theme-parser.js +167 -0
- package/dist/esm/modules/word/reader/watermark-parser.js +110 -0
- package/dist/esm/modules/word/security/cfb-reader.js +410 -0
- package/dist/esm/modules/word/{digital-signatures.js → security/digital-signatures.js} +34 -34
- package/dist/esm/modules/word/security/document-protection.js +201 -0
- package/dist/esm/modules/word/security/encryption.js +602 -0
- package/dist/esm/modules/word/security/policy.js +102 -0
- package/dist/esm/modules/word/template/template-chart.js +167 -0
- package/dist/esm/modules/word/template/template-datasource.js +541 -0
- package/dist/esm/modules/word/template/template-engine.js +1435 -0
- package/dist/esm/modules/word/units.js +43 -14
- package/dist/esm/modules/word/{writers → writer}/chart-writer.js +164 -23
- package/dist/esm/modules/word/writer/checkbox-writer.js +79 -0
- package/dist/esm/modules/word/{writers → writer}/comment-writer.js +8 -6
- package/dist/esm/modules/word/writer/common-parts.js +101 -0
- package/dist/{browser/modules/word → esm/modules/word/writer}/content-types.js +14 -6
- package/dist/esm/modules/word/writer/document-writer.js +473 -0
- package/dist/esm/modules/word/writer/docx-packager.js +1515 -0
- package/dist/{browser/modules/word/writers → esm/modules/word/writer}/footnote-writer.js +13 -10
- package/dist/{browser/modules/word/writers → esm/modules/word/writer}/header-footer-writer.js +39 -21
- package/dist/esm/modules/word/{writers → writer}/image-writer.js +11 -7
- package/dist/{browser/modules/word/writers → esm/modules/word/writer}/math-writer.js +21 -1
- package/dist/{browser/modules/word/writers → esm/modules/word/writer}/numbering-writer.js +11 -4
- package/dist/esm/modules/word/{writers → writer}/paragraph-writer.js +73 -38
- package/dist/{browser/modules/word/writers → esm/modules/word/writer}/parts-writer.js +91 -12
- package/dist/esm/modules/word/writer/reference-scanners.js +111 -0
- package/dist/esm/modules/word/writer/relationships.js +117 -0
- package/dist/esm/modules/word/writer/render-context.js +46 -0
- package/dist/{browser/modules/word/writers → esm/modules/word/writer}/run-writer.js +126 -24
- package/dist/esm/modules/word/writer/sdt-writer.js +189 -0
- package/dist/esm/modules/word/writer/stream-buf.js +73 -0
- package/dist/esm/modules/word/writer/streaming-writer.js +1382 -0
- package/dist/esm/modules/word/writer/string-buf.js +7 -0
- package/dist/esm/modules/word/{writers → writer}/styles-writer.js +32 -1
- package/dist/esm/modules/word/{writers → writer}/table-writer.js +94 -11
- package/dist/esm/utils/crypto.browser.js +3 -1
- package/dist/esm/utils/crypto.js +4 -1
- package/dist/esm/utils/font-metrics.js +293 -0
- package/dist/esm/utils/string-buf.js +89 -0
- package/dist/esm/utils/theme-colors.js +120 -0
- package/dist/iife/excelts.iife.js +70692 -70337
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +57 -57
- package/dist/types/modules/archive/fs/archive-file.d.ts +8 -5
- package/dist/types/modules/excel/chart/chart-ex-types.d.ts +0 -12
- package/dist/types/modules/excel/chart/chart.d.ts +1 -5
- package/dist/types/modules/excel/chart/types.d.ts +0 -6
- package/dist/types/modules/excel/stream/workbook-writer.browser.d.ts +40 -0
- package/dist/types/modules/excel/utils/string-buf.d.ts +5 -26
- package/dist/types/modules/excel/xlsx/xlsx.browser.d.ts +19 -9
- package/dist/types/modules/excel/xlsx/xlsx.d.ts +10 -2
- package/dist/types/modules/pdf/excel-bridge.d.ts +30 -1
- package/dist/types/modules/pdf/font/metrics.d.ts +3 -52
- package/dist/types/modules/pdf/index.d.ts +1 -1
- package/dist/types/modules/pdf/render-layout-to-pdf.d.ts +66 -0
- package/dist/types/modules/pdf/word-bridge.d.ts +80 -12
- package/dist/types/modules/stream/index.base.d.ts +2 -0
- package/dist/types/modules/stream/internal/sink-adapter.d.ts +65 -0
- package/dist/types/modules/stream/pull-stream.d.ts +19 -2
- package/dist/types/modules/stream/types.d.ts +13 -1
- package/dist/types/modules/word/advanced/diff.d.ts +61 -0
- package/dist/types/modules/word/advanced/drawing-shapes.d.ts +269 -0
- package/dist/types/modules/word/advanced/field-engine.d.ts +43 -0
- package/dist/types/modules/word/advanced/glossary.d.ts +86 -0
- package/dist/types/modules/word/advanced/math-convert.d.ts +30 -0
- package/dist/types/modules/word/advanced/ole-objects.d.ts +115 -0
- package/dist/types/modules/word/advanced/style-map.d.ts +105 -0
- package/dist/types/modules/word/advanced/validation.d.ts +56 -0
- package/dist/types/modules/word/advanced/vba-project.d.ts +91 -0
- package/dist/types/modules/word/bridge/excel-bridge.d.ts +127 -0
- package/dist/types/modules/word/builder/document-handle.d.ts +151 -0
- package/dist/types/modules/word/builder/paragraph-builders.d.ts +61 -0
- package/dist/types/modules/word/builder/run-builders.d.ts +374 -0
- package/dist/types/modules/word/builder/table-builders.d.ts +23 -0
- package/dist/types/modules/word/constants.d.ts +39 -1
- package/dist/types/modules/word/convert/conversion-ir.d.ts +210 -0
- package/dist/types/modules/word/convert/docx-to-semantic.d.ts +39 -0
- package/dist/types/modules/word/convert/flat-opc.d.ts +44 -0
- package/dist/types/modules/word/convert/html/html-import.d.ts +50 -0
- package/dist/{browser/modules/word → types/modules/word/convert/html}/html-renderer.d.ts +14 -1
- package/dist/types/modules/word/convert/html/html.d.ts +15 -0
- package/dist/types/modules/word/convert/markdown/markdown-import.d.ts +68 -0
- package/dist/types/modules/word/convert/markdown/markdown-renderer.d.ts +25 -0
- package/dist/types/modules/word/convert/markdown/markdown.d.ts +15 -0
- package/dist/types/modules/word/convert/odt/odt.d.ts +41 -0
- package/dist/types/modules/word/{color-utils.d.ts → core/color-utils.d.ts} +8 -1
- package/dist/types/modules/word/core/internal-utils.d.ts +90 -0
- package/dist/types/modules/word/core/mapper.d.ts +44 -0
- package/dist/types/modules/word/core/opc-paths.d.ts +33 -0
- package/dist/types/modules/word/core/text-utils.d.ts +38 -0
- package/dist/types/modules/word/core/walker.d.ts +119 -0
- package/dist/types/modules/word/crypto.d.ts +14 -9
- package/dist/types/modules/word/document-io.d.ts +59 -27
- package/dist/types/modules/word/errors.d.ts +44 -1
- package/dist/types/modules/word/excel.d.ts +14 -0
- package/dist/types/modules/word/font/font-embed.d.ts +112 -0
- package/dist/types/modules/word/font/hyphenation.d.ts +65 -0
- package/dist/types/modules/word/font/text-shaping.d.ts +58 -0
- package/dist/types/modules/word/html.d.ts +7 -6
- package/dist/types/modules/word/incremental-edit.d.ts +123 -0
- package/dist/types/modules/word/index.base.d.ts +194 -10
- package/dist/types/modules/word/layout/layout-constants.d.ts +17 -0
- package/dist/types/modules/word/layout/layout-full.d.ts +53 -0
- package/dist/types/modules/word/layout/layout-model.d.ts +344 -0
- package/dist/types/modules/word/layout/layout.d.ts +63 -0
- package/dist/types/modules/word/layout/render-page.d.ts +57 -0
- package/dist/types/modules/word/markdown.d.ts +14 -0
- package/dist/types/modules/word/patcher.d.ts +62 -0
- package/dist/types/modules/word/query/compat.d.ts +25 -0
- package/dist/types/modules/word/query/data-binding.d.ts +22 -0
- package/dist/types/modules/word/query/form-fields.d.ts +41 -0
- package/dist/types/modules/word/query/format-search.d.ts +99 -0
- package/dist/types/modules/word/query/mail-merge.d.ts +25 -0
- package/dist/types/modules/word/query/merge.d.ts +50 -0
- package/dist/types/modules/word/query/replace.d.ts +47 -0
- package/dist/types/modules/word/query/revisions.d.ts +67 -0
- package/dist/types/modules/word/query/search.d.ts +129 -0
- package/dist/types/modules/word/query/split.d.ts +44 -0
- package/dist/types/modules/word/query/style-resolve.d.ts +104 -0
- package/dist/types/modules/word/reader/chart-parser.d.ts +20 -0
- package/dist/types/modules/word/reader/comments-parser.d.ts +26 -0
- package/dist/types/modules/word/reader/doc-props-parsers.d.ts +15 -0
- package/dist/types/modules/word/reader/docx-reader.d.ts +27 -0
- package/dist/types/modules/word/reader/drawing-helpers.d.ts +27 -0
- package/dist/types/modules/word/reader/form-field-parser.d.ts +21 -0
- package/dist/types/modules/word/reader/image-parsers.d.ts +11 -0
- package/dist/types/modules/word/reader/math-parser.d.ts +12 -0
- package/dist/types/modules/word/reader/metadata-parsers.d.ts +17 -0
- package/dist/types/modules/word/reader/numbering-parser.d.ts +13 -0
- package/dist/types/modules/word/reader/paragraph-section-parsers.d.ts +12 -0
- package/dist/types/modules/word/reader/parse-utils.d.ts +91 -0
- package/dist/types/modules/word/reader/properties-parsers.d.ts +21 -0
- package/dist/types/modules/word/reader/reader-context.d.ts +69 -0
- package/dist/types/modules/word/reader/sdt-helpers.d.ts +29 -0
- package/dist/types/modules/word/reader/settings-parser.d.ts +8 -0
- package/dist/types/modules/word/reader/styles-parser.d.ts +12 -0
- package/dist/types/modules/word/reader/table-properties-parsers.d.ts +12 -0
- package/dist/types/modules/word/reader/theme-parser.d.ts +8 -0
- package/dist/types/modules/word/reader/watermark-parser.d.ts +15 -0
- package/dist/types/modules/word/security/cfb-reader.d.ts +37 -0
- package/dist/types/modules/word/{digital-signatures.d.ts → security/digital-signatures.d.ts} +19 -11
- package/dist/types/modules/word/security/document-protection.d.ts +93 -0
- package/dist/{browser/modules/word → types/modules/word/security}/encryption.d.ts +51 -4
- package/dist/types/modules/word/security/policy.d.ts +80 -0
- package/dist/types/modules/word/template/template-chart.d.ts +56 -0
- package/dist/types/modules/word/template/template-datasource.d.ts +154 -0
- package/dist/types/modules/word/template/template-engine.d.ts +121 -0
- package/dist/types/modules/word/types.d.ts +224 -25
- package/dist/types/modules/word/units.d.ts +26 -0
- package/dist/types/modules/word/writer/checkbox-writer.d.ts +17 -0
- package/dist/{browser/modules/word/writers → types/modules/word/writer}/comment-writer.d.ts +2 -1
- package/dist/types/modules/word/writer/common-parts.d.ts +57 -0
- package/dist/{browser/modules/word → types/modules/word/writer}/content-types.d.ts +2 -2
- package/dist/types/modules/word/writer/document-writer.d.ts +24 -0
- package/dist/types/modules/word/writer/docx-packager.d.ts +35 -0
- package/dist/{browser/modules/word/writers → types/modules/word/writer}/footnote-writer.d.ts +3 -2
- package/dist/{browser/modules/word/writers → types/modules/word/writer}/header-footer-writer.d.ts +3 -2
- package/dist/{browser/modules/word/writers → types/modules/word/writer}/image-writer.d.ts +1 -1
- package/dist/types/modules/word/writer/math-writer.d.ts +20 -0
- package/dist/types/modules/word/{writers → writer}/numbering-writer.d.ts +1 -1
- package/dist/types/modules/word/{writers → writer}/paragraph-writer.d.ts +2 -1
- package/dist/types/modules/word/{writers → writer}/parts-writer.d.ts +3 -3
- package/dist/types/modules/word/writer/reference-scanners.d.ts +42 -0
- package/dist/types/modules/word/writer/relationships.d.ts +52 -0
- package/dist/types/modules/word/writer/render-context.d.ts +124 -0
- package/dist/types/modules/word/{writers → writer}/run-writer.d.ts +10 -1
- package/dist/types/modules/word/writer/sdt-writer.d.ts +25 -0
- package/dist/types/modules/word/writer/stream-buf.d.ts +37 -0
- package/dist/types/modules/word/writer/streaming-writer.d.ts +344 -0
- package/dist/types/modules/word/writer/string-buf.d.ts +8 -0
- package/dist/types/modules/word/{writers → writer}/table-writer.d.ts +2 -1
- package/dist/types/modules/xml/types.d.ts +22 -0
- package/dist/types/utils/crypto.browser.d.ts +3 -1
- package/dist/types/utils/crypto.d.ts +4 -1
- package/dist/types/utils/font-metrics.d.ts +63 -0
- package/dist/types/utils/string-buf.d.ts +42 -0
- package/dist/types/utils/theme-colors.d.ts +55 -0
- package/package.json +121 -39
- package/dist/browser/modules/word/color-utils.js +0 -94
- package/dist/browser/modules/word/document.d.ts +0 -657
- package/dist/browser/modules/word/document.js +0 -1533
- package/dist/browser/modules/word/docx-packager.d.ts +0 -14
- package/dist/browser/modules/word/docx-packager.js +0 -822
- package/dist/browser/modules/word/docx-reader.d.ts +0 -11
- package/dist/browser/modules/word/docx-reader.js +0 -4929
- package/dist/browser/modules/word/encryption.js +0 -274
- package/dist/browser/modules/word/internal-utils.d.ts +0 -23
- package/dist/browser/modules/word/internal-utils.js +0 -54
- package/dist/browser/modules/word/namespaces.d.ts +0 -159
- package/dist/browser/modules/word/namespaces.js +0 -189
- package/dist/browser/modules/word/relationships.d.ts +0 -30
- package/dist/browser/modules/word/relationships.js +0 -48
- package/dist/browser/modules/word/writers/checkbox-writer.d.ts +0 -9
- package/dist/browser/modules/word/writers/checkbox-writer.js +0 -42
- package/dist/browser/modules/word/writers/document-writer.d.ts +0 -16
- package/dist/browser/modules/word/writers/document-writer.js +0 -461
- package/dist/browser/modules/word/writers/math-writer.d.ts +0 -9
- package/dist/cjs/modules/word/color-utils.js +0 -97
- package/dist/cjs/modules/word/document.js +0 -1645
- package/dist/cjs/modules/word/docx-packager.js +0 -825
- package/dist/cjs/modules/word/docx-reader.js +0 -4932
- package/dist/cjs/modules/word/encryption.js +0 -282
- package/dist/cjs/modules/word/internal-utils.js +0 -59
- package/dist/cjs/modules/word/namespaces.js +0 -192
- package/dist/cjs/modules/word/relationships.js +0 -55
- package/dist/cjs/modules/word/writers/checkbox-writer.js +0 -45
- package/dist/cjs/modules/word/writers/document-writer.js +0 -465
- package/dist/esm/modules/word/color-utils.js +0 -94
- package/dist/esm/modules/word/document.js +0 -1533
- package/dist/esm/modules/word/docx-packager.js +0 -822
- package/dist/esm/modules/word/docx-reader.js +0 -4929
- package/dist/esm/modules/word/encryption.js +0 -274
- package/dist/esm/modules/word/internal-utils.js +0 -54
- package/dist/esm/modules/word/namespaces.js +0 -189
- package/dist/esm/modules/word/relationships.js +0 -48
- package/dist/esm/modules/word/writers/checkbox-writer.js +0 -42
- package/dist/esm/modules/word/writers/document-writer.js +0 -461
- package/dist/types/modules/word/document.d.ts +0 -657
- package/dist/types/modules/word/docx-packager.d.ts +0 -14
- package/dist/types/modules/word/docx-reader.d.ts +0 -11
- package/dist/types/modules/word/internal-utils.d.ts +0 -23
- package/dist/types/modules/word/namespaces.d.ts +0 -159
- package/dist/types/modules/word/relationships.d.ts +0 -30
- package/dist/types/modules/word/writers/checkbox-writer.d.ts +0 -9
- package/dist/types/modules/word/writers/document-writer.d.ts +0 -16
- package/dist/types/modules/word/writers/math-writer.d.ts +0 -9
- /package/dist/browser/modules/word/{font-obfuscation.d.ts → font/font-obfuscation.d.ts} +0 -0
- /package/dist/browser/modules/word/{writers → writer}/chart-writer.d.ts +0 -0
- /package/dist/browser/modules/word/{writers → writer}/section-writer.d.ts +0 -0
- /package/dist/browser/modules/word/{writers → writer}/section-writer.js +0 -0
- /package/dist/browser/modules/word/{writers → writer}/styles-writer.d.ts +0 -0
- /package/dist/browser/modules/word/{writers → writer}/textbox-writer.d.ts +0 -0
- /package/dist/browser/modules/word/{writers → writer}/textbox-writer.js +0 -0
- /package/dist/browser/modules/word/{writers → writer}/toc-writer.d.ts +0 -0
- /package/dist/browser/modules/word/{writers → writer}/toc-writer.js +0 -0
- /package/dist/cjs/modules/word/{writers → writer}/section-writer.js +0 -0
- /package/dist/cjs/modules/word/{writers → writer}/textbox-writer.js +0 -0
- /package/dist/cjs/modules/word/{writers → writer}/toc-writer.js +0 -0
- /package/dist/esm/modules/word/{writers → writer}/section-writer.js +0 -0
- /package/dist/esm/modules/word/{writers → writer}/textbox-writer.js +0 -0
- /package/dist/esm/modules/word/{writers → writer}/toc-writer.js +0 -0
- /package/dist/types/modules/word/{font-obfuscation.d.ts → font/font-obfuscation.d.ts} +0 -0
- /package/dist/types/modules/word/{writers → writer}/chart-writer.d.ts +0 -0
- /package/dist/types/modules/word/{writers → writer}/section-writer.d.ts +0 -0
- /package/dist/types/modules/word/{writers → writer}/styles-writer.d.ts +0 -0
- /package/dist/types/modules/word/{writers → writer}/textbox-writer.d.ts +0 -0
- /package/dist/types/modules/word/{writers → writer}/toc-writer.d.ts +0 -0
|
@@ -0,0 +1,1910 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* DOCX Module - HTML to DOCX Converter
|
|
4
|
+
*
|
|
5
|
+
* Converts an HTML string into DOCX document body content (paragraphs, tables, etc.).
|
|
6
|
+
* Handles common HTML elements: p, h1-h6, strong, em, a, ul, ol, li, table, img, br, span.
|
|
7
|
+
*
|
|
8
|
+
* This is NOT a full HTML rendering engine — it covers the structural elements
|
|
9
|
+
* that map cleanly to WordprocessingML concepts.
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```ts
|
|
13
|
+
* import { htmlToDocxBody } from "excelts/word/html";
|
|
14
|
+
* import { Document, toBuffer } from "excelts/word";
|
|
15
|
+
*
|
|
16
|
+
* const body = htmlToDocxBody("<h1>Hello</h1><p>World</p>");
|
|
17
|
+
* const h = Document.create();
|
|
18
|
+
* for (const block of body) {
|
|
19
|
+
* Document.addBodyContent(h, block);
|
|
20
|
+
* }
|
|
21
|
+
* const buffer = await toBuffer(Document.build(h));
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
25
|
+
exports.htmlToDocxBody = htmlToDocxBody;
|
|
26
|
+
const internal_utils_1 = require("../../core/internal-utils");
|
|
27
|
+
const units_1 = require("../../units");
|
|
28
|
+
/**
|
|
29
|
+
* Convert an HTML string into an array of DOCX body content blocks.
|
|
30
|
+
*
|
|
31
|
+
* Supported elements:
|
|
32
|
+
* - Block: p, div, h1-h6, blockquote, pre, hr
|
|
33
|
+
* - List: ul, ol, li
|
|
34
|
+
* - Table: table, thead, tbody, tr, th, td (colspan, rowspan, border styles)
|
|
35
|
+
* - Inline: strong/b, em/i, u, s/strike/del, a, br, span, sub, sup, code
|
|
36
|
+
* - Images: img (base64 data URLs as InlineImageContent, http(s) as placeholder)
|
|
37
|
+
* - Page break: div with style="page-break-before: always" or class="page-break"
|
|
38
|
+
* - CSS inline styles: font-family, font-size, color, background-color, font-weight,
|
|
39
|
+
* font-style, text-decoration, text-align
|
|
40
|
+
*
|
|
41
|
+
* @param html - The HTML string to convert.
|
|
42
|
+
* @param options - Optional conversion settings.
|
|
43
|
+
* @returns Array of BodyContent blocks.
|
|
44
|
+
*/
|
|
45
|
+
function htmlToDocxBody(html, options) {
|
|
46
|
+
const blocks = [];
|
|
47
|
+
const tokens = tokenize(html);
|
|
48
|
+
// Extract <style> rules and merge with user-provided classStyles
|
|
49
|
+
const extractedStyles = extractStyleRules(tokens);
|
|
50
|
+
const classStyles = {
|
|
51
|
+
...extractedStyles,
|
|
52
|
+
...(options?.classStyles ?? {})
|
|
53
|
+
};
|
|
54
|
+
// Seed the inline context with the caller-supplied defaults so plain text
|
|
55
|
+
// runs actually carry the requested font/size. Without this the options
|
|
56
|
+
// were effectively ignored.
|
|
57
|
+
const initialCtx = {};
|
|
58
|
+
if (options?.defaultFont) {
|
|
59
|
+
initialCtx.fontFamily = options.defaultFont;
|
|
60
|
+
}
|
|
61
|
+
if (options?.defaultFontSize !== undefined) {
|
|
62
|
+
initialCtx.fontSize = options.defaultFontSize;
|
|
63
|
+
}
|
|
64
|
+
parseBlocks(tokens, 0, blocks, initialCtx, classStyles);
|
|
65
|
+
return blocks;
|
|
66
|
+
}
|
|
67
|
+
function tokenize(html) {
|
|
68
|
+
const tokens = [];
|
|
69
|
+
// Strip HTML comments, doctype declarations and SGML processing
|
|
70
|
+
// instructions before tokenising — none of them should appear as text
|
|
71
|
+
// in the document body. The previous regex treated `<!doctype html>`
|
|
72
|
+
// as a text node containing `"!doctype html>"`.
|
|
73
|
+
const stripped = html
|
|
74
|
+
.replace(/<!--[\s\S]*?-->/g, "")
|
|
75
|
+
.replace(/<!doctype[^>]*>/gi, "")
|
|
76
|
+
.replace(/<!\[CDATA\[[\s\S]*?\]\]>/g, "")
|
|
77
|
+
.replace(/<\?[\s\S]*?\?>/g, "");
|
|
78
|
+
// Match a tag, OR a run of text. Text is anything-up-to-the-next-tag,
|
|
79
|
+
// with the addition that a `<` not followed by a tag-like character is
|
|
80
|
+
// treated as literal text (so "1 < 2" / "a<b" / "<<" survive instead
|
|
81
|
+
// of being silently swallowed).
|
|
82
|
+
const re = /<\/?([a-zA-Z][a-zA-Z0-9]*)((?:\s+[^>]*?)?)\/?\s*>|((?:[^<]|<(?![/a-zA-Z]))+)/g;
|
|
83
|
+
const tagRe = /^<(\/?)([a-zA-Z][a-zA-Z0-9]*)((?:\s+[^>]*?)?)(\/?)\s*>$/;
|
|
84
|
+
let m;
|
|
85
|
+
while ((m = re.exec(stripped)) !== null) {
|
|
86
|
+
const fullMatch = m[0];
|
|
87
|
+
if (m[3] !== undefined) {
|
|
88
|
+
// Text node
|
|
89
|
+
const text = decodeHtmlEntities(m[3]);
|
|
90
|
+
if (text) {
|
|
91
|
+
tokens.push({ type: "text", value: text });
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
else {
|
|
95
|
+
const tagMatch = tagRe.exec(fullMatch);
|
|
96
|
+
if (tagMatch) {
|
|
97
|
+
const isClose = tagMatch[1] === "/";
|
|
98
|
+
const tag = tagMatch[2].toLowerCase();
|
|
99
|
+
const attrStr = tagMatch[3];
|
|
100
|
+
const selfClose = tagMatch[4] === "/" || VOID_ELEMENTS.has(tag);
|
|
101
|
+
const attrs = parseHtmlAttrs(attrStr);
|
|
102
|
+
if (isClose) {
|
|
103
|
+
tokens.push({ type: "close", tag, attrs: {} });
|
|
104
|
+
}
|
|
105
|
+
else if (selfClose) {
|
|
106
|
+
tokens.push({ type: "selfclose", tag, attrs });
|
|
107
|
+
}
|
|
108
|
+
else {
|
|
109
|
+
tokens.push({ type: "open", tag, attrs });
|
|
110
|
+
// Raw-text elements: their body must not be parsed as markup. Skip
|
|
111
|
+
// forward to the matching close tag and either capture the body as
|
|
112
|
+
// a single text token (for <style>, which is post-processed by
|
|
113
|
+
// extractStyleRules) or discard it entirely (for <script>, etc.).
|
|
114
|
+
// Without this, embedded scripts would leak into the document body.
|
|
115
|
+
if (RAW_TEXT_ELEMENTS.has(tag)) {
|
|
116
|
+
const closeRe = new RegExp(`</${tag}\\s*>`, "i");
|
|
117
|
+
closeRe.lastIndex = re.lastIndex;
|
|
118
|
+
const startBody = re.lastIndex;
|
|
119
|
+
const closeMatch = closeRe.exec(stripped);
|
|
120
|
+
if (closeMatch) {
|
|
121
|
+
const body = stripped.slice(startBody, closeMatch.index);
|
|
122
|
+
if (RAW_TEXT_PRESERVE_BODY.has(tag)) {
|
|
123
|
+
tokens.push({ type: "text", value: body });
|
|
124
|
+
}
|
|
125
|
+
tokens.push({ type: "close", tag, attrs: {} });
|
|
126
|
+
re.lastIndex = closeMatch.index + closeMatch[0].length;
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
// No closing tag — discard the rest of the input for this
|
|
130
|
+
// raw-text element to avoid emitting markup as text.
|
|
131
|
+
re.lastIndex = stripped.length;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return tokens;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* HTML elements whose body is not parsed as markup. Their content is either
|
|
142
|
+
* preserved (style) for downstream processing or discarded entirely.
|
|
143
|
+
*/
|
|
144
|
+
const RAW_TEXT_ELEMENTS = new Set([
|
|
145
|
+
"script",
|
|
146
|
+
"style",
|
|
147
|
+
"noscript",
|
|
148
|
+
"iframe",
|
|
149
|
+
"noframes",
|
|
150
|
+
"textarea",
|
|
151
|
+
"title"
|
|
152
|
+
]);
|
|
153
|
+
/** Subset of RAW_TEXT_ELEMENTS whose body is kept (as a single text token). */
|
|
154
|
+
const RAW_TEXT_PRESERVE_BODY = new Set(["style"]);
|
|
155
|
+
const VOID_ELEMENTS = new Set([
|
|
156
|
+
"br",
|
|
157
|
+
"hr",
|
|
158
|
+
"img",
|
|
159
|
+
"input",
|
|
160
|
+
"col",
|
|
161
|
+
"area",
|
|
162
|
+
"base",
|
|
163
|
+
"link",
|
|
164
|
+
"meta",
|
|
165
|
+
"source",
|
|
166
|
+
"wbr"
|
|
167
|
+
]);
|
|
168
|
+
/**
|
|
169
|
+
* Extract simple class rules from `<style>` tokens in the token stream.
|
|
170
|
+
* Only supports simple selectors: `.className { property: value; ... }`
|
|
171
|
+
* Does not support nested rules, media queries, pseudo-classes, combinators, etc.
|
|
172
|
+
* Returns a map of className → inline style string.
|
|
173
|
+
*/
|
|
174
|
+
function extractStyleRules(tokens) {
|
|
175
|
+
const result = {};
|
|
176
|
+
let i = 0;
|
|
177
|
+
while (i < tokens.length) {
|
|
178
|
+
const tok = tokens[i];
|
|
179
|
+
if (tok.type === "open" && tok.tag === "style") {
|
|
180
|
+
// Collect text content until </style>
|
|
181
|
+
let cssText = "";
|
|
182
|
+
i++;
|
|
183
|
+
while (i < tokens.length) {
|
|
184
|
+
const inner = tokens[i];
|
|
185
|
+
if (inner.type === "close" && inner.tag === "style") {
|
|
186
|
+
i++;
|
|
187
|
+
break;
|
|
188
|
+
}
|
|
189
|
+
if (inner.type === "text") {
|
|
190
|
+
cssText += inner.value;
|
|
191
|
+
}
|
|
192
|
+
i++;
|
|
193
|
+
}
|
|
194
|
+
// Parse simple class rules: .className { ... }
|
|
195
|
+
const ruleRe = /\.([a-zA-Z_][\w-]*)\s*\{([^}]*)\}/g;
|
|
196
|
+
let ruleMatch;
|
|
197
|
+
while ((ruleMatch = ruleRe.exec(cssText)) !== null) {
|
|
198
|
+
const className = ruleMatch[1];
|
|
199
|
+
const body = ruleMatch[2].trim();
|
|
200
|
+
if (body && !result[className]) {
|
|
201
|
+
result[className] = body;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
i++;
|
|
207
|
+
}
|
|
208
|
+
return result;
|
|
209
|
+
}
|
|
210
|
+
function parseHtmlAttrs(str) {
|
|
211
|
+
const attrs = {};
|
|
212
|
+
const re = /([a-zA-Z_][\w-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/g;
|
|
213
|
+
let m;
|
|
214
|
+
while ((m = re.exec(str)) !== null) {
|
|
215
|
+
attrs[m[1].toLowerCase()] = m[2] ?? m[3] ?? m[4] ?? "";
|
|
216
|
+
}
|
|
217
|
+
return attrs;
|
|
218
|
+
}
|
|
219
|
+
function decodeHtmlEntities(text) {
|
|
220
|
+
return text
|
|
221
|
+
.replace(/&/g, "&")
|
|
222
|
+
.replace(/</g, "<")
|
|
223
|
+
.replace(/>/g, ">")
|
|
224
|
+
.replace(/"/g, '"')
|
|
225
|
+
.replace(/'/g, "'")
|
|
226
|
+
.replace(/ /g, "\u00A0")
|
|
227
|
+
.replace(/&#(\d+);/g, (_, n) => safeFromCodePoint(parseInt(n, 10)))
|
|
228
|
+
.replace(/&#x([a-fA-F0-9]+);/g, (_, n) => safeFromCodePoint(parseInt(n, 16)))
|
|
229
|
+
.replace(/&([a-zA-Z]+);/g, (match, name) => HTML_ENTITIES[name] ?? match);
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Convert a numeric character reference to a string. Uses fromCodePoint so
|
|
233
|
+
* astral characters (e.g. emoji like 😀) are encoded as a proper
|
|
234
|
+
* surrogate pair instead of a single invalid UTF-16 unit. Out-of-range or
|
|
235
|
+
* non-finite values fall back to the Unicode replacement character.
|
|
236
|
+
*/
|
|
237
|
+
function safeFromCodePoint(cp) {
|
|
238
|
+
if (!Number.isFinite(cp) || cp < 0 || cp > 0x10ffff) {
|
|
239
|
+
return "\uFFFD";
|
|
240
|
+
}
|
|
241
|
+
// Surrogate halves are not valid scalar values.
|
|
242
|
+
if (cp >= 0xd800 && cp <= 0xdfff) {
|
|
243
|
+
return "\uFFFD";
|
|
244
|
+
}
|
|
245
|
+
return String.fromCodePoint(cp);
|
|
246
|
+
}
|
|
247
|
+
/** Common HTML named entities mapped to their Unicode characters. */
|
|
248
|
+
const HTML_ENTITIES = {
|
|
249
|
+
// Punctuation & Typography
|
|
250
|
+
mdash: "\u2014",
|
|
251
|
+
ndash: "\u2013",
|
|
252
|
+
hellip: "\u2026",
|
|
253
|
+
laquo: "\u00AB",
|
|
254
|
+
raquo: "\u00BB",
|
|
255
|
+
lsquo: "\u2018",
|
|
256
|
+
rsquo: "\u2019",
|
|
257
|
+
ldquo: "\u201C",
|
|
258
|
+
rdquo: "\u201D",
|
|
259
|
+
sbquo: "\u201A",
|
|
260
|
+
bdquo: "\u201E",
|
|
261
|
+
bull: "\u2022",
|
|
262
|
+
middot: "\u00B7",
|
|
263
|
+
prime: "\u2032",
|
|
264
|
+
Prime: "\u2033",
|
|
265
|
+
oline: "\u203E",
|
|
266
|
+
iquest: "\u00BF",
|
|
267
|
+
iexcl: "\u00A1",
|
|
268
|
+
sect: "\u00A7",
|
|
269
|
+
para: "\u00B6",
|
|
270
|
+
dagger: "\u2020",
|
|
271
|
+
Dagger: "\u2021",
|
|
272
|
+
permil: "\u2030",
|
|
273
|
+
// Symbols & Legal
|
|
274
|
+
copy: "\u00A9",
|
|
275
|
+
reg: "\u00AE",
|
|
276
|
+
trade: "\u2122",
|
|
277
|
+
// Math & Science
|
|
278
|
+
deg: "\u00B0",
|
|
279
|
+
plusmn: "\u00B1",
|
|
280
|
+
times: "\u00D7",
|
|
281
|
+
divide: "\u00F7",
|
|
282
|
+
minus: "\u2212",
|
|
283
|
+
lowast: "\u2217",
|
|
284
|
+
radic: "\u221A",
|
|
285
|
+
infin: "\u221E",
|
|
286
|
+
sum: "\u2211",
|
|
287
|
+
prod: "\u220F",
|
|
288
|
+
int: "\u222B",
|
|
289
|
+
part: "\u2202",
|
|
290
|
+
nabla: "\u2207",
|
|
291
|
+
ne: "\u2260",
|
|
292
|
+
equiv: "\u2261",
|
|
293
|
+
asymp: "\u2248",
|
|
294
|
+
le: "\u2264",
|
|
295
|
+
ge: "\u2265",
|
|
296
|
+
sub: "\u2282",
|
|
297
|
+
sup: "\u2283",
|
|
298
|
+
nsub: "\u2284",
|
|
299
|
+
sube: "\u2286",
|
|
300
|
+
supe: "\u2287",
|
|
301
|
+
oplus: "\u2295",
|
|
302
|
+
otimes: "\u2297",
|
|
303
|
+
perp: "\u22A5",
|
|
304
|
+
and: "\u2227",
|
|
305
|
+
or: "\u2228",
|
|
306
|
+
not: "\u00AC",
|
|
307
|
+
exist: "\u2203",
|
|
308
|
+
forall: "\u2200",
|
|
309
|
+
empty: "\u2205",
|
|
310
|
+
isin: "\u2208",
|
|
311
|
+
notin: "\u2209",
|
|
312
|
+
ni: "\u220B",
|
|
313
|
+
there4: "\u2234",
|
|
314
|
+
sim: "\u223C",
|
|
315
|
+
cong: "\u2245",
|
|
316
|
+
prop: "\u221D",
|
|
317
|
+
// Currency
|
|
318
|
+
euro: "\u20AC",
|
|
319
|
+
pound: "\u00A3",
|
|
320
|
+
yen: "\u00A5",
|
|
321
|
+
cent: "\u00A2",
|
|
322
|
+
curren: "\u00A4",
|
|
323
|
+
fnof: "\u0192",
|
|
324
|
+
// Greek letters (lowercase)
|
|
325
|
+
alpha: "\u03B1",
|
|
326
|
+
beta: "\u03B2",
|
|
327
|
+
gamma: "\u03B3",
|
|
328
|
+
delta: "\u03B4",
|
|
329
|
+
epsilon: "\u03B5",
|
|
330
|
+
zeta: "\u03B6",
|
|
331
|
+
eta: "\u03B7",
|
|
332
|
+
theta: "\u03B8",
|
|
333
|
+
iota: "\u03B9",
|
|
334
|
+
kappa: "\u03BA",
|
|
335
|
+
lambda: "\u03BB",
|
|
336
|
+
mu: "\u03BC",
|
|
337
|
+
nu: "\u03BD",
|
|
338
|
+
xi: "\u03BE",
|
|
339
|
+
omicron: "\u03BF",
|
|
340
|
+
pi: "\u03C0",
|
|
341
|
+
rho: "\u03C1",
|
|
342
|
+
sigma: "\u03C3",
|
|
343
|
+
tau: "\u03C4",
|
|
344
|
+
upsilon: "\u03C5",
|
|
345
|
+
phi: "\u03C6",
|
|
346
|
+
chi: "\u03C7",
|
|
347
|
+
psi: "\u03C8",
|
|
348
|
+
omega: "\u03C9",
|
|
349
|
+
// Greek letters (uppercase)
|
|
350
|
+
Alpha: "\u0391",
|
|
351
|
+
Beta: "\u0392",
|
|
352
|
+
Gamma: "\u0393",
|
|
353
|
+
Delta: "\u0394",
|
|
354
|
+
Epsilon: "\u0395",
|
|
355
|
+
Zeta: "\u0396",
|
|
356
|
+
Eta: "\u0397",
|
|
357
|
+
Theta: "\u0398",
|
|
358
|
+
Iota: "\u0399",
|
|
359
|
+
Kappa: "\u039A",
|
|
360
|
+
Lambda: "\u039B",
|
|
361
|
+
Mu: "\u039C",
|
|
362
|
+
Nu: "\u039D",
|
|
363
|
+
Xi: "\u039E",
|
|
364
|
+
Omicron: "\u039F",
|
|
365
|
+
Pi: "\u03A0",
|
|
366
|
+
Rho: "\u03A1",
|
|
367
|
+
Sigma: "\u03A3",
|
|
368
|
+
Tau: "\u03A4",
|
|
369
|
+
Upsilon: "\u03A5",
|
|
370
|
+
Phi: "\u03A6",
|
|
371
|
+
Chi: "\u03A7",
|
|
372
|
+
Psi: "\u03A8",
|
|
373
|
+
Omega: "\u03A9",
|
|
374
|
+
// Arrows
|
|
375
|
+
larr: "\u2190",
|
|
376
|
+
uarr: "\u2191",
|
|
377
|
+
rarr: "\u2192",
|
|
378
|
+
darr: "\u2193",
|
|
379
|
+
harr: "\u2194",
|
|
380
|
+
lArr: "\u21D0",
|
|
381
|
+
uArr: "\u21D1",
|
|
382
|
+
rArr: "\u21D2",
|
|
383
|
+
dArr: "\u21D3",
|
|
384
|
+
hArr: "\u21D4",
|
|
385
|
+
crarr: "\u21B5",
|
|
386
|
+
// Fractions
|
|
387
|
+
frac12: "\u00BD",
|
|
388
|
+
frac14: "\u00BC",
|
|
389
|
+
frac34: "\u00BE",
|
|
390
|
+
frac13: "\u2153",
|
|
391
|
+
frac23: "\u2154",
|
|
392
|
+
frac15: "\u2155",
|
|
393
|
+
frac18: "\u215B",
|
|
394
|
+
frac38: "\u215C",
|
|
395
|
+
frac58: "\u215D",
|
|
396
|
+
frac78: "\u215E",
|
|
397
|
+
// Spaces
|
|
398
|
+
ensp: "\u2002",
|
|
399
|
+
emsp: "\u2003",
|
|
400
|
+
thinsp: "\u2009",
|
|
401
|
+
zwnj: "\u200C",
|
|
402
|
+
zwj: "\u200D",
|
|
403
|
+
lrm: "\u200E",
|
|
404
|
+
rlm: "\u200F",
|
|
405
|
+
// Misc Symbols
|
|
406
|
+
spades: "\u2660",
|
|
407
|
+
clubs: "\u2663",
|
|
408
|
+
hearts: "\u2665",
|
|
409
|
+
diams: "\u2666",
|
|
410
|
+
loz: "\u25CA",
|
|
411
|
+
circ: "\u02C6",
|
|
412
|
+
tilde: "\u02DC",
|
|
413
|
+
shy: "\u00AD",
|
|
414
|
+
macr: "\u00AF",
|
|
415
|
+
acute: "\u00B4",
|
|
416
|
+
cedil: "\u00B8",
|
|
417
|
+
micro: "\u00B5",
|
|
418
|
+
sup1: "\u00B9",
|
|
419
|
+
sup2: "\u00B2",
|
|
420
|
+
sup3: "\u00B3",
|
|
421
|
+
ordf: "\u00AA",
|
|
422
|
+
ordm: "\u00BA"
|
|
423
|
+
};
|
|
424
|
+
/** Parse a CSS inline style string into structured values. */
|
|
425
|
+
function parseCssStyle(styleStr) {
|
|
426
|
+
const result = {};
|
|
427
|
+
if (!styleStr) {
|
|
428
|
+
return result;
|
|
429
|
+
}
|
|
430
|
+
const declarations = styleStr.split(";");
|
|
431
|
+
for (const decl of declarations) {
|
|
432
|
+
const colonIdx = decl.indexOf(":");
|
|
433
|
+
if (colonIdx < 0) {
|
|
434
|
+
continue;
|
|
435
|
+
}
|
|
436
|
+
const prop = decl.slice(0, colonIdx).trim().toLowerCase();
|
|
437
|
+
const value = decl
|
|
438
|
+
.slice(colonIdx + 1)
|
|
439
|
+
.trim()
|
|
440
|
+
.toLowerCase();
|
|
441
|
+
switch (prop) {
|
|
442
|
+
case "font-family":
|
|
443
|
+
result.fontFamily = parseFontFamily(value);
|
|
444
|
+
break;
|
|
445
|
+
case "font-size":
|
|
446
|
+
result.fontSize = parseFontSize(value);
|
|
447
|
+
break;
|
|
448
|
+
case "color":
|
|
449
|
+
result.color = parseCssColor(value);
|
|
450
|
+
break;
|
|
451
|
+
case "background-color":
|
|
452
|
+
result.backgroundColor = parseCssColor(value);
|
|
453
|
+
break;
|
|
454
|
+
case "font-weight":
|
|
455
|
+
if (value === "bold" || value === "bolder" || parseInt(value, 10) >= 700) {
|
|
456
|
+
result.bold = true;
|
|
457
|
+
}
|
|
458
|
+
break;
|
|
459
|
+
case "font-style":
|
|
460
|
+
if (value === "italic" || value === "oblique") {
|
|
461
|
+
result.italic = true;
|
|
462
|
+
}
|
|
463
|
+
break;
|
|
464
|
+
case "text-decoration":
|
|
465
|
+
case "text-decoration-line":
|
|
466
|
+
if (value.includes("underline")) {
|
|
467
|
+
result.underline = true;
|
|
468
|
+
}
|
|
469
|
+
if (value.includes("line-through")) {
|
|
470
|
+
result.lineThrough = true;
|
|
471
|
+
}
|
|
472
|
+
break;
|
|
473
|
+
case "text-align":
|
|
474
|
+
if (value === "left" || value === "start") {
|
|
475
|
+
result.textAlign = "left";
|
|
476
|
+
}
|
|
477
|
+
else if (value === "center") {
|
|
478
|
+
result.textAlign = "center";
|
|
479
|
+
}
|
|
480
|
+
else if (value === "right" || value === "end") {
|
|
481
|
+
result.textAlign = "right";
|
|
482
|
+
}
|
|
483
|
+
else if (value === "justify") {
|
|
484
|
+
result.textAlign = "both";
|
|
485
|
+
}
|
|
486
|
+
break;
|
|
487
|
+
case "page-break-before":
|
|
488
|
+
if (value === "always") {
|
|
489
|
+
result.pageBreakBefore = true;
|
|
490
|
+
}
|
|
491
|
+
break;
|
|
492
|
+
case "margin-left": {
|
|
493
|
+
const twips = parseLengthToTwips(value);
|
|
494
|
+
if (twips !== undefined) {
|
|
495
|
+
result.marginLeft = twips;
|
|
496
|
+
}
|
|
497
|
+
break;
|
|
498
|
+
}
|
|
499
|
+
case "line-height": {
|
|
500
|
+
const spacing = parseLineHeight(value);
|
|
501
|
+
if (spacing !== undefined) {
|
|
502
|
+
result.lineHeight = spacing;
|
|
503
|
+
}
|
|
504
|
+
break;
|
|
505
|
+
}
|
|
506
|
+
case "border": {
|
|
507
|
+
// Shorthand: border: 1px solid black
|
|
508
|
+
const parts = value.split(/\s+/);
|
|
509
|
+
for (const part of parts) {
|
|
510
|
+
if (/^\d/.test(part)) {
|
|
511
|
+
result.borderWidth = parseBorderWidth(part);
|
|
512
|
+
}
|
|
513
|
+
else if (isBorderStyleKeyword(part)) {
|
|
514
|
+
result.borderStyle = part;
|
|
515
|
+
}
|
|
516
|
+
else {
|
|
517
|
+
const c = parseCssColor(part);
|
|
518
|
+
if (c) {
|
|
519
|
+
result.borderColor = c;
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
break;
|
|
524
|
+
}
|
|
525
|
+
case "border-style":
|
|
526
|
+
result.borderStyle = value.split(/\s+/)[0];
|
|
527
|
+
break;
|
|
528
|
+
case "border-width":
|
|
529
|
+
result.borderWidth = parseBorderWidth(value.split(/\s+/)[0]);
|
|
530
|
+
break;
|
|
531
|
+
case "border-color": {
|
|
532
|
+
const c = parseCssColor(value.split(/\s+/)[0]);
|
|
533
|
+
if (c) {
|
|
534
|
+
result.borderColor = c;
|
|
535
|
+
}
|
|
536
|
+
break;
|
|
537
|
+
}
|
|
538
|
+
case "width": {
|
|
539
|
+
const twips = parseLengthToTwips(value);
|
|
540
|
+
if (twips !== undefined) {
|
|
541
|
+
result.width = twips;
|
|
542
|
+
}
|
|
543
|
+
break;
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
return result;
|
|
548
|
+
}
|
|
549
|
+
/** Extract the first font-family name from a CSS font-family value. */
|
|
550
|
+
function parseFontFamily(value) {
|
|
551
|
+
// Take the original (non-lowercased) value for font names — but our parser
|
|
552
|
+
// already lowered it. We'll capitalize for common fonts. Instead, let's
|
|
553
|
+
// just strip quotes and return as-is (already lowered).
|
|
554
|
+
// Actually we need the original casing. Let's re-parse from the raw value.
|
|
555
|
+
// Since we already lowercased, we'll just clean it up:
|
|
556
|
+
const first = value.split(",")[0].trim();
|
|
557
|
+
// Remove quotes
|
|
558
|
+
const cleaned = first.replace(/["']/g, "").trim();
|
|
559
|
+
// Capitalize each word for display
|
|
560
|
+
return cleaned
|
|
561
|
+
.split(/\s+/)
|
|
562
|
+
.map(w => w.charAt(0).toUpperCase() + w.slice(1))
|
|
563
|
+
.join(" ");
|
|
564
|
+
}
|
|
565
|
+
/** Parse CSS font-size into half-points. */
|
|
566
|
+
function parseFontSize(value) {
|
|
567
|
+
// Support px, pt, em, rem
|
|
568
|
+
const match = /^([\d.]+)\s*(px|pt|em|rem)?$/.exec(value);
|
|
569
|
+
if (!match) {
|
|
570
|
+
return undefined;
|
|
571
|
+
}
|
|
572
|
+
const num = parseFloat(match[1]);
|
|
573
|
+
const unit = match[2] || "px";
|
|
574
|
+
switch (unit) {
|
|
575
|
+
case "pt":
|
|
576
|
+
return Math.round(num * 2); // half-points
|
|
577
|
+
case "px":
|
|
578
|
+
// 1px ≈ 0.75pt
|
|
579
|
+
return Math.round(num * 0.75 * 2);
|
|
580
|
+
case "em":
|
|
581
|
+
case "rem":
|
|
582
|
+
// Assume 1em = 12pt = 24 half-points
|
|
583
|
+
return Math.round(num * 24);
|
|
584
|
+
default:
|
|
585
|
+
return undefined;
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
/** Parse a CSS color value into a 6-digit hex string (without #). */
|
|
589
|
+
function parseCssColor(value) {
|
|
590
|
+
// #RGB or #RRGGBB
|
|
591
|
+
if (value.startsWith("#")) {
|
|
592
|
+
const hex = value.slice(1);
|
|
593
|
+
if (hex.length === 3) {
|
|
594
|
+
return hex
|
|
595
|
+
.split("")
|
|
596
|
+
.map(c => c + c)
|
|
597
|
+
.join("")
|
|
598
|
+
.toUpperCase();
|
|
599
|
+
}
|
|
600
|
+
if (hex.length === 6) {
|
|
601
|
+
return hex.toUpperCase();
|
|
602
|
+
}
|
|
603
|
+
return undefined;
|
|
604
|
+
}
|
|
605
|
+
// rgb(r, g, b) or rgba(r, g, b, a)
|
|
606
|
+
const rgbMatch = /^rgba?\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)/.exec(value);
|
|
607
|
+
if (rgbMatch) {
|
|
608
|
+
const r = Math.min(255, parseInt(rgbMatch[1], 10));
|
|
609
|
+
const g = Math.min(255, parseInt(rgbMatch[2], 10));
|
|
610
|
+
const b = Math.min(255, parseInt(rgbMatch[3], 10));
|
|
611
|
+
return (r.toString(16).padStart(2, "0") +
|
|
612
|
+
g.toString(16).padStart(2, "0") +
|
|
613
|
+
b.toString(16).padStart(2, "0")).toUpperCase();
|
|
614
|
+
}
|
|
615
|
+
// Named colors (common subset)
|
|
616
|
+
const named = CSS_NAMED_COLORS[value];
|
|
617
|
+
if (named) {
|
|
618
|
+
return named;
|
|
619
|
+
}
|
|
620
|
+
return undefined;
|
|
621
|
+
}
|
|
622
|
+
/** Common CSS named colors mapped to hex. */
|
|
623
|
+
const CSS_NAMED_COLORS = {
|
|
624
|
+
black: "000000",
|
|
625
|
+
white: "FFFFFF",
|
|
626
|
+
red: "FF0000",
|
|
627
|
+
green: "008000",
|
|
628
|
+
blue: "0000FF",
|
|
629
|
+
yellow: "FFFF00",
|
|
630
|
+
cyan: "00FFFF",
|
|
631
|
+
magenta: "FF00FF",
|
|
632
|
+
gray: "808080",
|
|
633
|
+
grey: "808080",
|
|
634
|
+
silver: "C0C0C0",
|
|
635
|
+
maroon: "800000",
|
|
636
|
+
olive: "808000",
|
|
637
|
+
lime: "00FF00",
|
|
638
|
+
aqua: "00FFFF",
|
|
639
|
+
teal: "008080",
|
|
640
|
+
navy: "000080",
|
|
641
|
+
fuchsia: "FF00FF",
|
|
642
|
+
purple: "800080",
|
|
643
|
+
orange: "FFA500",
|
|
644
|
+
pink: "FFC0CB",
|
|
645
|
+
brown: "A52A2A",
|
|
646
|
+
coral: "FF7F50",
|
|
647
|
+
crimson: "DC143C",
|
|
648
|
+
darkblue: "00008B",
|
|
649
|
+
darkgreen: "006400",
|
|
650
|
+
darkred: "8B0000",
|
|
651
|
+
gold: "FFD700",
|
|
652
|
+
indigo: "4B0082",
|
|
653
|
+
ivory: "FFFFF0",
|
|
654
|
+
khaki: "F0E68C",
|
|
655
|
+
lavender: "E6E6FA",
|
|
656
|
+
lightblue: "ADD8E6",
|
|
657
|
+
lightgray: "D3D3D3",
|
|
658
|
+
lightgrey: "D3D3D3",
|
|
659
|
+
lightgreen: "90EE90",
|
|
660
|
+
lightyellow: "FFFFE0",
|
|
661
|
+
darkgray: "A9A9A9",
|
|
662
|
+
darkgrey: "A9A9A9",
|
|
663
|
+
dimgray: "696969",
|
|
664
|
+
dimgrey: "696969",
|
|
665
|
+
tomato: "FF6347",
|
|
666
|
+
violet: "EE82EE",
|
|
667
|
+
wheat: "F5DEB3"
|
|
668
|
+
};
|
|
669
|
+
/** Parse a CSS length value (px, pt, in, cm, mm) into twips. 1 inch = 1440 twips. */
|
|
670
|
+
function parseLengthToTwips(value) {
|
|
671
|
+
const match = /^([\d.]+)\s*(px|pt|in|cm|mm|em|rem)?$/.exec(value);
|
|
672
|
+
if (!match) {
|
|
673
|
+
return undefined;
|
|
674
|
+
}
|
|
675
|
+
const num = parseFloat(match[1]);
|
|
676
|
+
const unit = match[2] || "px";
|
|
677
|
+
switch (unit) {
|
|
678
|
+
case "pt":
|
|
679
|
+
return Math.round(num * 20); // 1pt = 20 twips
|
|
680
|
+
case "px":
|
|
681
|
+
return Math.round(num * 15); // 1px ≈ 0.75pt ≈ 15 twips
|
|
682
|
+
case "in":
|
|
683
|
+
return Math.round(num * 1440); // 1in = 1440 twips
|
|
684
|
+
case "cm":
|
|
685
|
+
return Math.round(num * 567); // 1cm ≈ 567 twips
|
|
686
|
+
case "mm":
|
|
687
|
+
return Math.round(num * 56.7); // 1mm ≈ 56.7 twips
|
|
688
|
+
case "em":
|
|
689
|
+
case "rem":
|
|
690
|
+
// Assume 1em = 12pt = 240 twips
|
|
691
|
+
return Math.round(num * 240);
|
|
692
|
+
default:
|
|
693
|
+
return undefined;
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
/** Parse CSS line-height into 240ths of a line for WordprocessingML spacing. */
|
|
697
|
+
function parseLineHeight(value) {
|
|
698
|
+
// Unitless number: e.g., "1.5" means 1.5 lines → 360 (240 * 1.5)
|
|
699
|
+
const unitlessMatch = /^([\d.]+)$/.exec(value);
|
|
700
|
+
if (unitlessMatch) {
|
|
701
|
+
const num = parseFloat(unitlessMatch[1]);
|
|
702
|
+
return Math.round(num * 240);
|
|
703
|
+
}
|
|
704
|
+
// Percentage: e.g., "150%" means 1.5 lines → 360
|
|
705
|
+
const percentMatch = /^([\d.]+)%$/.exec(value);
|
|
706
|
+
if (percentMatch) {
|
|
707
|
+
const num = parseFloat(percentMatch[1]);
|
|
708
|
+
return Math.round((num / 100) * 240);
|
|
709
|
+
}
|
|
710
|
+
// With units (px, pt): convert to twips and use "exact" style — but the "auto" rule
|
|
711
|
+
// uses 240ths of a line, so we approximate with the unitless conversion
|
|
712
|
+
const unitMatch = /^([\d.]+)\s*(px|pt|em|rem)$/.exec(value);
|
|
713
|
+
if (unitMatch) {
|
|
714
|
+
const num = parseFloat(unitMatch[1]);
|
|
715
|
+
const unit = unitMatch[2];
|
|
716
|
+
switch (unit) {
|
|
717
|
+
case "pt":
|
|
718
|
+
// Convert pt to 240ths of a line: 12pt = 240 (single line)
|
|
719
|
+
return Math.round((num / 12) * 240);
|
|
720
|
+
case "px":
|
|
721
|
+
// 1px ≈ 0.75pt; 16px ≈ 12pt = single
|
|
722
|
+
return Math.round(((num * 0.75) / 12) * 240);
|
|
723
|
+
case "em":
|
|
724
|
+
case "rem":
|
|
725
|
+
return Math.round(num * 240);
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
return undefined;
|
|
729
|
+
}
|
|
730
|
+
const _BLOCK_TAGS = new Set([
|
|
731
|
+
"p",
|
|
732
|
+
"div",
|
|
733
|
+
"h1",
|
|
734
|
+
"h2",
|
|
735
|
+
"h3",
|
|
736
|
+
"h4",
|
|
737
|
+
"h5",
|
|
738
|
+
"h6",
|
|
739
|
+
"blockquote",
|
|
740
|
+
"pre",
|
|
741
|
+
"li",
|
|
742
|
+
"dt",
|
|
743
|
+
"dd",
|
|
744
|
+
"dl",
|
|
745
|
+
"section",
|
|
746
|
+
"article",
|
|
747
|
+
"main",
|
|
748
|
+
"aside",
|
|
749
|
+
"header",
|
|
750
|
+
"footer",
|
|
751
|
+
"figure",
|
|
752
|
+
"figcaption",
|
|
753
|
+
"details",
|
|
754
|
+
"summary",
|
|
755
|
+
"address"
|
|
756
|
+
]);
|
|
757
|
+
function parseBlocks(tokens, start, blocks, parentCtx, classStyles) {
|
|
758
|
+
let i = start;
|
|
759
|
+
let pendingInline;
|
|
760
|
+
const flushPending = () => {
|
|
761
|
+
if (pendingInline && pendingInline.runs.length > 0) {
|
|
762
|
+
blocks.push({
|
|
763
|
+
type: "paragraph",
|
|
764
|
+
children: pendingInline.runs
|
|
765
|
+
});
|
|
766
|
+
}
|
|
767
|
+
pendingInline = undefined;
|
|
768
|
+
};
|
|
769
|
+
while (i < tokens.length) {
|
|
770
|
+
const tok = tokens[i];
|
|
771
|
+
if (tok.type === "close") {
|
|
772
|
+
flushPending();
|
|
773
|
+
return i + 1; // consumed the close tag
|
|
774
|
+
}
|
|
775
|
+
if (tok.type === "text") {
|
|
776
|
+
if (!pendingInline) {
|
|
777
|
+
pendingInline = { runs: [], ctx: parentCtx };
|
|
778
|
+
}
|
|
779
|
+
const run = makeRun(tok.value, parentCtx);
|
|
780
|
+
pendingInline.runs.push(run);
|
|
781
|
+
i++;
|
|
782
|
+
continue;
|
|
783
|
+
}
|
|
784
|
+
// Open or self-close tag
|
|
785
|
+
const tag = tok.type === "open" || tok.type === "selfclose" ? tok.tag : "";
|
|
786
|
+
// Document scaffolding (<html>, <body>) is transparent — descend into
|
|
787
|
+
// its children. <head> and its leaf children carry no body-text and
|
|
788
|
+
// are skipped entirely so their whitespace/newlines don't leak as
|
|
789
|
+
// empty paragraphs into the document.
|
|
790
|
+
if (tag === "html" || tag === "body") {
|
|
791
|
+
if (tok.type === "open") {
|
|
792
|
+
flushPending();
|
|
793
|
+
i = parseBlocks(tokens, i + 1, blocks, parentCtx, classStyles);
|
|
794
|
+
continue;
|
|
795
|
+
}
|
|
796
|
+
i++;
|
|
797
|
+
continue;
|
|
798
|
+
}
|
|
799
|
+
if (tag === "head") {
|
|
800
|
+
// Fast-forward to </head>; ignore everything in between (titles,
|
|
801
|
+
// meta, link, etc.). <style> bodies were already extracted by
|
|
802
|
+
// tokenize+extractStyleRules and stripped from the token stream
|
|
803
|
+
// through RAW_TEXT_ELEMENTS handling.
|
|
804
|
+
if (tok.type === "open") {
|
|
805
|
+
let depth = 1;
|
|
806
|
+
i++;
|
|
807
|
+
while (i < tokens.length && depth > 0) {
|
|
808
|
+
const t = tokens[i];
|
|
809
|
+
if (t.type === "open" && t.tag === "head") {
|
|
810
|
+
depth++;
|
|
811
|
+
}
|
|
812
|
+
else if (t.type === "close" && t.tag === "head") {
|
|
813
|
+
depth--;
|
|
814
|
+
}
|
|
815
|
+
i++;
|
|
816
|
+
}
|
|
817
|
+
continue;
|
|
818
|
+
}
|
|
819
|
+
i++;
|
|
820
|
+
continue;
|
|
821
|
+
}
|
|
822
|
+
if (tag === "title" || tag === "meta" || tag === "link" || tag === "base") {
|
|
823
|
+
// Should never reach here because <head> handler swallows them, but
|
|
824
|
+
// guard against malformed HTML where they appear at body level.
|
|
825
|
+
if (tok.type === "open") {
|
|
826
|
+
let depth = 1;
|
|
827
|
+
i++;
|
|
828
|
+
while (i < tokens.length && depth > 0) {
|
|
829
|
+
const t = tokens[i];
|
|
830
|
+
if (t.type === "open" && t.tag === tag) {
|
|
831
|
+
depth++;
|
|
832
|
+
}
|
|
833
|
+
else if (t.type === "close" && t.tag === tag) {
|
|
834
|
+
depth--;
|
|
835
|
+
}
|
|
836
|
+
i++;
|
|
837
|
+
}
|
|
838
|
+
continue;
|
|
839
|
+
}
|
|
840
|
+
i++;
|
|
841
|
+
continue;
|
|
842
|
+
}
|
|
843
|
+
if (tag === "br") {
|
|
844
|
+
if (!pendingInline) {
|
|
845
|
+
pendingInline = { runs: [], ctx: parentCtx };
|
|
846
|
+
}
|
|
847
|
+
pendingInline.runs.push({ content: [{ type: "break" }] });
|
|
848
|
+
i++;
|
|
849
|
+
continue;
|
|
850
|
+
}
|
|
851
|
+
if (tag === "hr") {
|
|
852
|
+
flushPending();
|
|
853
|
+
blocks.push({
|
|
854
|
+
type: "paragraph",
|
|
855
|
+
properties: {
|
|
856
|
+
borders: {
|
|
857
|
+
bottom: { style: "single", size: 6, space: 1, color: "auto" }
|
|
858
|
+
}
|
|
859
|
+
},
|
|
860
|
+
children: []
|
|
861
|
+
});
|
|
862
|
+
i++;
|
|
863
|
+
continue;
|
|
864
|
+
}
|
|
865
|
+
// Headings
|
|
866
|
+
if (/^h[1-6]$/.test(tag)) {
|
|
867
|
+
flushPending();
|
|
868
|
+
const level = parseInt(tag[1], 10);
|
|
869
|
+
const style = parseCssStyle(resolveEffectiveStyle(tok.attrs, classStyles));
|
|
870
|
+
const children = [];
|
|
871
|
+
const headingCtx = { ...parentCtx, bold: true };
|
|
872
|
+
applyCssToInlineContext(headingCtx, style);
|
|
873
|
+
i = parseInlines(tokens, i + 1, children, headingCtx, tag, classStyles);
|
|
874
|
+
const props = {
|
|
875
|
+
style: `Heading${level}`,
|
|
876
|
+
...(style.textAlign ? { alignment: style.textAlign } : {})
|
|
877
|
+
};
|
|
878
|
+
if (style.marginLeft !== undefined) {
|
|
879
|
+
props.indent = { left: style.marginLeft };
|
|
880
|
+
}
|
|
881
|
+
if (style.lineHeight !== undefined) {
|
|
882
|
+
props.spacing = { line: style.lineHeight, lineRule: "auto" };
|
|
883
|
+
}
|
|
884
|
+
blocks.push({
|
|
885
|
+
type: "paragraph",
|
|
886
|
+
properties: props,
|
|
887
|
+
children
|
|
888
|
+
});
|
|
889
|
+
continue;
|
|
890
|
+
}
|
|
891
|
+
// Page-break detection: <div style="page-break-before: always"> or <div class="page-break">
|
|
892
|
+
if (tag === "div" && tok.type === "open") {
|
|
893
|
+
const attrs = tok.attrs;
|
|
894
|
+
const style = parseCssStyle(resolveEffectiveStyle(attrs, classStyles));
|
|
895
|
+
const hasPageBreakClass = (attrs["class"] || "").split(/\s+/).includes("page-break");
|
|
896
|
+
if (style.pageBreakBefore || hasPageBreakClass) {
|
|
897
|
+
flushPending();
|
|
898
|
+
// Emit a page break paragraph
|
|
899
|
+
blocks.push({
|
|
900
|
+
type: "paragraph",
|
|
901
|
+
children: [{ content: [{ type: "break", breakType: "page" }] }]
|
|
902
|
+
});
|
|
903
|
+
// Continue parsing the div's children as normal content
|
|
904
|
+
i = parseBlocks(tokens, i + 1, blocks, parentCtx, classStyles);
|
|
905
|
+
continue;
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
// Paragraph-like blocks
|
|
909
|
+
if (tag === "p" || tag === "div" || tag === "blockquote" || tag === "pre" || tag === "aside") {
|
|
910
|
+
flushPending();
|
|
911
|
+
const attrs = tok.attrs;
|
|
912
|
+
const style = parseCssStyle(resolveEffectiveStyle(attrs, classStyles));
|
|
913
|
+
const children = [];
|
|
914
|
+
const ctx = tag === "pre" ? { ...parentCtx, code: true } : { ...parentCtx };
|
|
915
|
+
applyCssToInlineContext(ctx, style);
|
|
916
|
+
i = parseInlines(tokens, i + 1, children, ctx, tag, classStyles);
|
|
917
|
+
const props = {};
|
|
918
|
+
if (tag === "blockquote" || tag === "aside") {
|
|
919
|
+
props.indent = { left: 720 }; // 0.5 inch indent
|
|
920
|
+
}
|
|
921
|
+
if (style.marginLeft !== undefined) {
|
|
922
|
+
// margin-left → paragraph indentation (merges with blockquote indent)
|
|
923
|
+
const existing = props.indent || {};
|
|
924
|
+
props.indent = { ...existing, left: style.marginLeft };
|
|
925
|
+
}
|
|
926
|
+
if (style.textAlign) {
|
|
927
|
+
props.alignment = style.textAlign;
|
|
928
|
+
}
|
|
929
|
+
if (style.lineHeight !== undefined) {
|
|
930
|
+
props.spacing = { line: style.lineHeight, lineRule: "auto" };
|
|
931
|
+
}
|
|
932
|
+
blocks.push({
|
|
933
|
+
type: "paragraph",
|
|
934
|
+
...(Object.keys(props).length > 0 ? { properties: props } : {}),
|
|
935
|
+
children
|
|
936
|
+
});
|
|
937
|
+
continue;
|
|
938
|
+
}
|
|
939
|
+
// Container elements: figure, details — recurse into children
|
|
940
|
+
if (tag === "figure" || tag === "details") {
|
|
941
|
+
flushPending();
|
|
942
|
+
i = parseBlocks(tokens, i + 1, blocks, parentCtx, classStyles);
|
|
943
|
+
continue;
|
|
944
|
+
}
|
|
945
|
+
// figcaption — paragraph with Caption style
|
|
946
|
+
if (tag === "figcaption") {
|
|
947
|
+
flushPending();
|
|
948
|
+
const attrs = tok.attrs;
|
|
949
|
+
const style = parseCssStyle(resolveEffectiveStyle(attrs, classStyles));
|
|
950
|
+
const children = [];
|
|
951
|
+
const ctx = { ...parentCtx };
|
|
952
|
+
applyCssToInlineContext(ctx, style);
|
|
953
|
+
i = parseInlines(tokens, i + 1, children, ctx, tag, classStyles);
|
|
954
|
+
blocks.push({
|
|
955
|
+
type: "paragraph",
|
|
956
|
+
properties: { style: "Caption" },
|
|
957
|
+
children
|
|
958
|
+
});
|
|
959
|
+
continue;
|
|
960
|
+
}
|
|
961
|
+
// summary — bold paragraph
|
|
962
|
+
if (tag === "summary") {
|
|
963
|
+
flushPending();
|
|
964
|
+
const children = [];
|
|
965
|
+
const ctx = { ...parentCtx, bold: true };
|
|
966
|
+
i = parseInlines(tokens, i + 1, children, ctx, tag, classStyles);
|
|
967
|
+
blocks.push({
|
|
968
|
+
type: "paragraph",
|
|
969
|
+
children
|
|
970
|
+
});
|
|
971
|
+
continue;
|
|
972
|
+
}
|
|
973
|
+
// Definition list: dl is a container, dt is bold, dd is indented
|
|
974
|
+
if (tag === "dl") {
|
|
975
|
+
flushPending();
|
|
976
|
+
i = parseBlocks(tokens, i + 1, blocks, parentCtx, classStyles);
|
|
977
|
+
continue;
|
|
978
|
+
}
|
|
979
|
+
if (tag === "dt") {
|
|
980
|
+
flushPending();
|
|
981
|
+
const children = [];
|
|
982
|
+
const ctx = { ...parentCtx, bold: true };
|
|
983
|
+
i = parseInlines(tokens, i + 1, children, ctx, tag, classStyles);
|
|
984
|
+
blocks.push({
|
|
985
|
+
type: "paragraph",
|
|
986
|
+
children
|
|
987
|
+
});
|
|
988
|
+
continue;
|
|
989
|
+
}
|
|
990
|
+
if (tag === "dd") {
|
|
991
|
+
flushPending();
|
|
992
|
+
const children = [];
|
|
993
|
+
const ctx = { ...parentCtx };
|
|
994
|
+
i = parseInlines(tokens, i + 1, children, ctx, tag, classStyles);
|
|
995
|
+
blocks.push({
|
|
996
|
+
type: "paragraph",
|
|
997
|
+
properties: { indent: { left: 720 } },
|
|
998
|
+
children
|
|
999
|
+
});
|
|
1000
|
+
continue;
|
|
1001
|
+
}
|
|
1002
|
+
// address — italic paragraph
|
|
1003
|
+
if (tag === "address") {
|
|
1004
|
+
flushPending();
|
|
1005
|
+
const children = [];
|
|
1006
|
+
const ctx = { ...parentCtx, italic: true };
|
|
1007
|
+
i = parseInlines(tokens, i + 1, children, ctx, tag, classStyles);
|
|
1008
|
+
blocks.push({
|
|
1009
|
+
type: "paragraph",
|
|
1010
|
+
children
|
|
1011
|
+
});
|
|
1012
|
+
continue;
|
|
1013
|
+
}
|
|
1014
|
+
// Lists
|
|
1015
|
+
if (tag === "ul" || tag === "ol") {
|
|
1016
|
+
flushPending();
|
|
1017
|
+
i = parseList(tokens, i + 1, blocks, parentCtx, tag === "ol", 0, tag, classStyles);
|
|
1018
|
+
continue;
|
|
1019
|
+
}
|
|
1020
|
+
// Tables
|
|
1021
|
+
if (tag === "table") {
|
|
1022
|
+
flushPending();
|
|
1023
|
+
const table = parseTable(tokens, i + 1, tok.attrs, classStyles);
|
|
1024
|
+
blocks.push(table.table);
|
|
1025
|
+
i = table.endIdx;
|
|
1026
|
+
continue;
|
|
1027
|
+
}
|
|
1028
|
+
// Inline elements treated at block level (wrap in paragraph)
|
|
1029
|
+
if (INLINE_TAGS.has(tag) || tok.type === "selfclose") {
|
|
1030
|
+
if (!pendingInline) {
|
|
1031
|
+
pendingInline = { runs: [], ctx: parentCtx };
|
|
1032
|
+
}
|
|
1033
|
+
i = parseInlineTag(tokens, i, pendingInline.runs, parentCtx, classStyles);
|
|
1034
|
+
continue;
|
|
1035
|
+
}
|
|
1036
|
+
// Unknown block: recurse
|
|
1037
|
+
if (tok.type === "open") {
|
|
1038
|
+
flushPending();
|
|
1039
|
+
i = parseBlocks(tokens, i + 1, blocks, parentCtx, classStyles);
|
|
1040
|
+
continue;
|
|
1041
|
+
}
|
|
1042
|
+
i++;
|
|
1043
|
+
}
|
|
1044
|
+
flushPending();
|
|
1045
|
+
return i;
|
|
1046
|
+
}
|
|
1047
|
+
const INLINE_TAGS = new Set([
|
|
1048
|
+
"strong",
|
|
1049
|
+
"b",
|
|
1050
|
+
"em",
|
|
1051
|
+
"i",
|
|
1052
|
+
"u",
|
|
1053
|
+
"s",
|
|
1054
|
+
"strike",
|
|
1055
|
+
"del",
|
|
1056
|
+
"a",
|
|
1057
|
+
"span",
|
|
1058
|
+
"code",
|
|
1059
|
+
"sub",
|
|
1060
|
+
"sup",
|
|
1061
|
+
"mark",
|
|
1062
|
+
"small",
|
|
1063
|
+
"abbr",
|
|
1064
|
+
"q",
|
|
1065
|
+
"cite",
|
|
1066
|
+
"time",
|
|
1067
|
+
"kbd",
|
|
1068
|
+
"var",
|
|
1069
|
+
"samp",
|
|
1070
|
+
"img"
|
|
1071
|
+
]);
|
|
1072
|
+
// =============================================================================
|
|
1073
|
+
// Inline parser
|
|
1074
|
+
// =============================================================================
|
|
1075
|
+
function parseInlines(tokens, start, runs, ctx, untilClose, classStyles) {
|
|
1076
|
+
let i = start;
|
|
1077
|
+
while (i < tokens.length) {
|
|
1078
|
+
const tok = tokens[i];
|
|
1079
|
+
if (tok.type === "close" && tok.tag === untilClose) {
|
|
1080
|
+
return i + 1;
|
|
1081
|
+
}
|
|
1082
|
+
if (tok.type === "text") {
|
|
1083
|
+
runs.push(makeRun(tok.value, ctx));
|
|
1084
|
+
i++;
|
|
1085
|
+
}
|
|
1086
|
+
else if (tok.type === "close") {
|
|
1087
|
+
// Mismatched close tag, just skip
|
|
1088
|
+
return i + 1;
|
|
1089
|
+
}
|
|
1090
|
+
else {
|
|
1091
|
+
i = parseInlineTag(tokens, i, runs, ctx, classStyles);
|
|
1092
|
+
}
|
|
1093
|
+
}
|
|
1094
|
+
return i;
|
|
1095
|
+
}
|
|
1096
|
+
function parseInlineTag(tokens, idx, runs, ctx, classStyles) {
|
|
1097
|
+
const tok = tokens[idx];
|
|
1098
|
+
const tag = tok.tag;
|
|
1099
|
+
if (tok.type === "selfclose" || tag === "br") {
|
|
1100
|
+
if (tag === "br") {
|
|
1101
|
+
runs.push({ content: [{ type: "break" }] });
|
|
1102
|
+
}
|
|
1103
|
+
else if (tag === "img") {
|
|
1104
|
+
const imgContent = buildImageContent(tok.attrs);
|
|
1105
|
+
if (imgContent) {
|
|
1106
|
+
runs.push({ content: [imgContent] });
|
|
1107
|
+
}
|
|
1108
|
+
else {
|
|
1109
|
+
// Fallback placeholder text
|
|
1110
|
+
const alt = tok.attrs["alt"] || "image";
|
|
1111
|
+
runs.push(makeRun(`[Image: ${alt}]`, ctx));
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
return idx + 1;
|
|
1115
|
+
}
|
|
1116
|
+
// Open tags
|
|
1117
|
+
const newCtx = { ...ctx };
|
|
1118
|
+
const style = parseCssStyle(resolveEffectiveStyle(tok.attrs, classStyles));
|
|
1119
|
+
applyCssToInlineContext(newCtx, style);
|
|
1120
|
+
if (tag === "strong" || tag === "b") {
|
|
1121
|
+
newCtx.bold = true;
|
|
1122
|
+
}
|
|
1123
|
+
else if (tag === "em" || tag === "i") {
|
|
1124
|
+
newCtx.italic = true;
|
|
1125
|
+
}
|
|
1126
|
+
else if (tag === "u") {
|
|
1127
|
+
newCtx.underline = true;
|
|
1128
|
+
}
|
|
1129
|
+
else if (tag === "s" || tag === "strike" || tag === "del") {
|
|
1130
|
+
newCtx.strikethrough = true;
|
|
1131
|
+
}
|
|
1132
|
+
else if (tag === "sub") {
|
|
1133
|
+
newCtx.subscript = true;
|
|
1134
|
+
}
|
|
1135
|
+
else if (tag === "sup") {
|
|
1136
|
+
newCtx.superscript = true;
|
|
1137
|
+
}
|
|
1138
|
+
else if (tag === "mark") {
|
|
1139
|
+
if (!newCtx.backgroundColor) {
|
|
1140
|
+
newCtx.backgroundColor = "FFFF00"; // default highlight
|
|
1141
|
+
}
|
|
1142
|
+
}
|
|
1143
|
+
else if (tag === "cite") {
|
|
1144
|
+
newCtx.italic = true;
|
|
1145
|
+
}
|
|
1146
|
+
else if (tag === "small") {
|
|
1147
|
+
// 80% of default font size (default 24 half-points = 12pt)
|
|
1148
|
+
const baseSize = newCtx.fontSize || 24;
|
|
1149
|
+
newCtx.fontSize = Math.round(baseSize * 0.8);
|
|
1150
|
+
}
|
|
1151
|
+
else if (tag === "code" || tag === "kbd" || tag === "samp") {
|
|
1152
|
+
newCtx.code = true;
|
|
1153
|
+
}
|
|
1154
|
+
else if (tag === "a") {
|
|
1155
|
+
// Collect inner runs and wrap them in a Hyperlink
|
|
1156
|
+
const innerRuns = [];
|
|
1157
|
+
// Drop unsafe schemes (javascript:/vbscript:/...) silently — the link
|
|
1158
|
+
// text is still preserved as plain runs.
|
|
1159
|
+
const safeHref = (0, internal_utils_1.sanitizeUrl)(tok.attrs["href"]);
|
|
1160
|
+
let i = idx + 1;
|
|
1161
|
+
while (i < tokens.length) {
|
|
1162
|
+
const t = tokens[i];
|
|
1163
|
+
if (t.type === "close" && t.tag === tag) {
|
|
1164
|
+
const hyperlink = {
|
|
1165
|
+
type: "hyperlink",
|
|
1166
|
+
url: safeHref ?? "",
|
|
1167
|
+
children: innerRuns
|
|
1168
|
+
};
|
|
1169
|
+
runs.push(hyperlink);
|
|
1170
|
+
return i + 1;
|
|
1171
|
+
}
|
|
1172
|
+
if (t.type === "text") {
|
|
1173
|
+
innerRuns.push(makeRun(t.value, { ...ctx }));
|
|
1174
|
+
i++;
|
|
1175
|
+
}
|
|
1176
|
+
else if (t.type === "close") {
|
|
1177
|
+
const hyperlink = {
|
|
1178
|
+
type: "hyperlink",
|
|
1179
|
+
url: safeHref ?? "",
|
|
1180
|
+
children: innerRuns
|
|
1181
|
+
};
|
|
1182
|
+
runs.push(hyperlink);
|
|
1183
|
+
return i + 1;
|
|
1184
|
+
}
|
|
1185
|
+
else {
|
|
1186
|
+
const childRuns = [];
|
|
1187
|
+
i = parseInlineTag(tokens, i, childRuns, { ...ctx }, classStyles);
|
|
1188
|
+
for (const r of childRuns) {
|
|
1189
|
+
if ("content" in r && !("type" in r)) {
|
|
1190
|
+
innerRuns.push(r);
|
|
1191
|
+
}
|
|
1192
|
+
else if ("type" in r && r.type === "hyperlink") {
|
|
1193
|
+
// Flatten nested hyperlink children
|
|
1194
|
+
for (const c of r.children) {
|
|
1195
|
+
innerRuns.push(c);
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
}
|
|
1199
|
+
}
|
|
1200
|
+
}
|
|
1201
|
+
// EOF fallback: tokens ran out without a matching `</a>`. Use the
|
|
1202
|
+
// already-sanitized href so an unclosed `<a href="javascript:...">`
|
|
1203
|
+
// can't smuggle a dangerous URL into the model.
|
|
1204
|
+
const hyperlink = {
|
|
1205
|
+
type: "hyperlink",
|
|
1206
|
+
url: safeHref ?? "",
|
|
1207
|
+
children: innerRuns
|
|
1208
|
+
};
|
|
1209
|
+
runs.push(hyperlink);
|
|
1210
|
+
return i;
|
|
1211
|
+
}
|
|
1212
|
+
// Parse inner content
|
|
1213
|
+
let i = idx + 1;
|
|
1214
|
+
while (i < tokens.length) {
|
|
1215
|
+
const t = tokens[i];
|
|
1216
|
+
if (t.type === "close" && t.tag === tag) {
|
|
1217
|
+
return i + 1;
|
|
1218
|
+
}
|
|
1219
|
+
if (t.type === "text") {
|
|
1220
|
+
runs.push(makeRun(t.value, newCtx));
|
|
1221
|
+
i++;
|
|
1222
|
+
}
|
|
1223
|
+
else if (t.type === "close") {
|
|
1224
|
+
return i + 1;
|
|
1225
|
+
}
|
|
1226
|
+
else {
|
|
1227
|
+
i = parseInlineTag(tokens, i, runs, newCtx, classStyles);
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
return i;
|
|
1231
|
+
}
|
|
1232
|
+
// =============================================================================
|
|
1233
|
+
// List parser
|
|
1234
|
+
// =============================================================================
|
|
1235
|
+
function parseList(tokens, start, blocks, ctx, ordered, level, untilClose, classStyles) {
|
|
1236
|
+
let i = start;
|
|
1237
|
+
while (i < tokens.length) {
|
|
1238
|
+
const tok = tokens[i];
|
|
1239
|
+
if (tok.type === "close" && tok.tag === untilClose) {
|
|
1240
|
+
return i + 1;
|
|
1241
|
+
}
|
|
1242
|
+
if (tok.type === "open" && tok.tag === "li") {
|
|
1243
|
+
i = parseListItem(tokens, i + 1, blocks, ctx, ordered, level, classStyles);
|
|
1244
|
+
}
|
|
1245
|
+
else if (tok.type === "open" && (tok.tag === "ul" || tok.tag === "ol")) {
|
|
1246
|
+
// Nested list directly under ul/ol (without li wrapper) — increase level
|
|
1247
|
+
i = parseList(tokens, i + 1, blocks, ctx, tok.tag === "ol", level + 1, tok.tag, classStyles);
|
|
1248
|
+
}
|
|
1249
|
+
else {
|
|
1250
|
+
i++;
|
|
1251
|
+
}
|
|
1252
|
+
}
|
|
1253
|
+
return i;
|
|
1254
|
+
}
|
|
1255
|
+
/** Parse contents of a single `<li>`, handling nested `<ul>/<ol>` inside it. */
|
|
1256
|
+
function parseListItem(tokens, start, blocks, ctx, ordered, level, classStyles) {
|
|
1257
|
+
const children = [];
|
|
1258
|
+
let i = start;
|
|
1259
|
+
let hasEmittedContent = false;
|
|
1260
|
+
while (i < tokens.length) {
|
|
1261
|
+
const tok = tokens[i];
|
|
1262
|
+
// End of this <li>
|
|
1263
|
+
if (tok.type === "close" && tok.tag === "li") {
|
|
1264
|
+
// Only emit a paragraph if there's content, or if we haven't emitted any paragraph for this item yet
|
|
1265
|
+
if (children.length > 0 || !hasEmittedContent) {
|
|
1266
|
+
blocks.push({
|
|
1267
|
+
type: "paragraph",
|
|
1268
|
+
properties: {
|
|
1269
|
+
numbering: {
|
|
1270
|
+
numId: ordered ? 2 : 1,
|
|
1271
|
+
level: level
|
|
1272
|
+
}
|
|
1273
|
+
},
|
|
1274
|
+
children
|
|
1275
|
+
});
|
|
1276
|
+
}
|
|
1277
|
+
return i + 1;
|
|
1278
|
+
}
|
|
1279
|
+
// Nested list inside <li>: emit current inline content as paragraph, then recurse
|
|
1280
|
+
if (tok.type === "open" && (tok.tag === "ul" || tok.tag === "ol")) {
|
|
1281
|
+
// Emit any collected inline content as the list item paragraph first
|
|
1282
|
+
if (children.length > 0 || !hasEmittedContent) {
|
|
1283
|
+
blocks.push({
|
|
1284
|
+
type: "paragraph",
|
|
1285
|
+
properties: {
|
|
1286
|
+
numbering: {
|
|
1287
|
+
numId: ordered ? 2 : 1,
|
|
1288
|
+
level: level
|
|
1289
|
+
}
|
|
1290
|
+
},
|
|
1291
|
+
children: [...children]
|
|
1292
|
+
});
|
|
1293
|
+
children.length = 0;
|
|
1294
|
+
hasEmittedContent = true;
|
|
1295
|
+
}
|
|
1296
|
+
// Parse the nested list at the next level
|
|
1297
|
+
const nestedOrdered = tok.tag === "ol";
|
|
1298
|
+
i = parseList(tokens, i + 1, blocks, ctx, nestedOrdered, level + 1, tok.tag, classStyles);
|
|
1299
|
+
continue;
|
|
1300
|
+
}
|
|
1301
|
+
// Text content
|
|
1302
|
+
if (tok.type === "text") {
|
|
1303
|
+
children.push(makeRun(tok.value, ctx));
|
|
1304
|
+
i++;
|
|
1305
|
+
continue;
|
|
1306
|
+
}
|
|
1307
|
+
// Inline tags
|
|
1308
|
+
if (tok.type === "open" || tok.type === "selfclose") {
|
|
1309
|
+
if (INLINE_TAGS.has(tok.tag) || tok.type === "selfclose") {
|
|
1310
|
+
i = parseInlineTag(tokens, i, children, ctx, classStyles);
|
|
1311
|
+
}
|
|
1312
|
+
else if (tok.tag === "br") {
|
|
1313
|
+
children.push({ content: [{ type: "break" }] });
|
|
1314
|
+
i++;
|
|
1315
|
+
}
|
|
1316
|
+
else {
|
|
1317
|
+
// Some other block-level tag inside <li> (e.g., <p>, <div>) — treat as inline
|
|
1318
|
+
i = parseInlineTag(tokens, i, children, ctx, classStyles);
|
|
1319
|
+
}
|
|
1320
|
+
continue;
|
|
1321
|
+
}
|
|
1322
|
+
// Mismatched close tag
|
|
1323
|
+
if (tok.type === "close") {
|
|
1324
|
+
// Emit what we have and stop
|
|
1325
|
+
blocks.push({
|
|
1326
|
+
type: "paragraph",
|
|
1327
|
+
properties: {
|
|
1328
|
+
numbering: {
|
|
1329
|
+
numId: ordered ? 2 : 1,
|
|
1330
|
+
level: level
|
|
1331
|
+
}
|
|
1332
|
+
},
|
|
1333
|
+
children
|
|
1334
|
+
});
|
|
1335
|
+
return i + 1;
|
|
1336
|
+
}
|
|
1337
|
+
i++;
|
|
1338
|
+
}
|
|
1339
|
+
// Ran out of tokens without seeing </li> — emit what we have
|
|
1340
|
+
if (children.length > 0) {
|
|
1341
|
+
blocks.push({
|
|
1342
|
+
type: "paragraph",
|
|
1343
|
+
properties: {
|
|
1344
|
+
numbering: {
|
|
1345
|
+
numId: ordered ? 2 : 1,
|
|
1346
|
+
level: level
|
|
1347
|
+
}
|
|
1348
|
+
},
|
|
1349
|
+
children
|
|
1350
|
+
});
|
|
1351
|
+
}
|
|
1352
|
+
return i;
|
|
1353
|
+
}
|
|
1354
|
+
// =============================================================================
|
|
1355
|
+
// Table parser
|
|
1356
|
+
// =============================================================================
|
|
1357
|
+
function parseTable(tokens, start, tableAttrs, classStyles) {
|
|
1358
|
+
const rows = [];
|
|
1359
|
+
let i = start;
|
|
1360
|
+
while (i < tokens.length) {
|
|
1361
|
+
const tok = tokens[i];
|
|
1362
|
+
if (tok.type === "close" && tok.tag === "table") {
|
|
1363
|
+
i++;
|
|
1364
|
+
break;
|
|
1365
|
+
}
|
|
1366
|
+
if (tok.type === "open" &&
|
|
1367
|
+
(tok.tag === "thead" || tok.tag === "tbody" || tok.tag === "tfoot")) {
|
|
1368
|
+
i++;
|
|
1369
|
+
continue;
|
|
1370
|
+
}
|
|
1371
|
+
if (tok.type === "close" &&
|
|
1372
|
+
(tok.tag === "thead" || tok.tag === "tbody" || tok.tag === "tfoot")) {
|
|
1373
|
+
i++;
|
|
1374
|
+
continue;
|
|
1375
|
+
}
|
|
1376
|
+
if (tok.type === "open" && tok.tag === "tr") {
|
|
1377
|
+
const row = parseTableRow(tokens, i + 1, classStyles);
|
|
1378
|
+
rows.push(row.row);
|
|
1379
|
+
i = row.endIdx;
|
|
1380
|
+
continue;
|
|
1381
|
+
}
|
|
1382
|
+
i++;
|
|
1383
|
+
}
|
|
1384
|
+
// Apply rowspan: insert vMerge "continue" cells in subsequent rows
|
|
1385
|
+
applyRowSpan(rows);
|
|
1386
|
+
// Parse table border style from attributes
|
|
1387
|
+
const tableBorders = parseTableBorders(tableAttrs);
|
|
1388
|
+
// Parse table width from style
|
|
1389
|
+
const tableStyle = parseCssStyle(tableAttrs["style"]);
|
|
1390
|
+
const tableProps = {};
|
|
1391
|
+
if (tableBorders) {
|
|
1392
|
+
tableProps.borders = tableBorders;
|
|
1393
|
+
}
|
|
1394
|
+
if (tableStyle.width) {
|
|
1395
|
+
tableProps.width = { value: tableStyle.width, type: "dxa" };
|
|
1396
|
+
}
|
|
1397
|
+
return {
|
|
1398
|
+
table: {
|
|
1399
|
+
type: "table",
|
|
1400
|
+
...(Object.keys(tableProps).length > 0 ? { properties: tableProps } : {}),
|
|
1401
|
+
rows
|
|
1402
|
+
},
|
|
1403
|
+
endIdx: i
|
|
1404
|
+
};
|
|
1405
|
+
}
|
|
1406
|
+
function parseTableRow(tokens, start, classStyles) {
|
|
1407
|
+
const cells = [];
|
|
1408
|
+
let i = start;
|
|
1409
|
+
while (i < tokens.length) {
|
|
1410
|
+
const tok = tokens[i];
|
|
1411
|
+
if (tok.type === "close" && tok.tag === "tr") {
|
|
1412
|
+
return { row: { cells }, endIdx: i + 1 };
|
|
1413
|
+
}
|
|
1414
|
+
if (tok.type === "open" && (tok.tag === "td" || tok.tag === "th")) {
|
|
1415
|
+
const isHeader = tok.tag === "th";
|
|
1416
|
+
const attrs = tok.attrs;
|
|
1417
|
+
const children = [];
|
|
1418
|
+
const cellCtx = isHeader ? { bold: true } : {};
|
|
1419
|
+
const style = parseCssStyle(resolveEffectiveStyle(attrs, classStyles));
|
|
1420
|
+
applyCssToInlineContext(cellCtx, style);
|
|
1421
|
+
i = parseInlines(tokens, i + 1, children, cellCtx, tok.tag, classStyles);
|
|
1422
|
+
// Build cell properties
|
|
1423
|
+
const cellProps = buildCellProperties(attrs, style);
|
|
1424
|
+
// Build paragraph properties for text-align
|
|
1425
|
+
const paraProps = {};
|
|
1426
|
+
if (style.textAlign) {
|
|
1427
|
+
paraProps.alignment = style.textAlign;
|
|
1428
|
+
}
|
|
1429
|
+
cells.push({
|
|
1430
|
+
...(cellProps ? { properties: cellProps } : {}),
|
|
1431
|
+
content: [
|
|
1432
|
+
{
|
|
1433
|
+
type: "paragraph",
|
|
1434
|
+
...(Object.keys(paraProps).length > 0
|
|
1435
|
+
? { properties: paraProps }
|
|
1436
|
+
: {}),
|
|
1437
|
+
children
|
|
1438
|
+
}
|
|
1439
|
+
]
|
|
1440
|
+
});
|
|
1441
|
+
continue;
|
|
1442
|
+
}
|
|
1443
|
+
i++;
|
|
1444
|
+
}
|
|
1445
|
+
return { row: { cells }, endIdx: i };
|
|
1446
|
+
}
|
|
1447
|
+
/** Build TableCellProperties from HTML attributes (colspan, rowspan, borders). */
|
|
1448
|
+
function buildCellProperties(attrs, style) {
|
|
1449
|
+
const props = {};
|
|
1450
|
+
// colspan → gridSpan
|
|
1451
|
+
const colspan = parseInt(attrs["colspan"], 10);
|
|
1452
|
+
if (colspan > 1) {
|
|
1453
|
+
props.gridSpan = colspan;
|
|
1454
|
+
}
|
|
1455
|
+
// rowspan → verticalMerge restart (the continuation cells need "continue")
|
|
1456
|
+
const rowspan = parseInt(attrs["rowspan"], 10);
|
|
1457
|
+
if (rowspan > 1) {
|
|
1458
|
+
props.verticalMerge = "restart";
|
|
1459
|
+
props.rowSpan = rowspan;
|
|
1460
|
+
}
|
|
1461
|
+
// Cell width from style or width attribute
|
|
1462
|
+
if (style.width) {
|
|
1463
|
+
props.width = { value: style.width, type: "dxa" };
|
|
1464
|
+
}
|
|
1465
|
+
else if (attrs["width"]) {
|
|
1466
|
+
const w = parseCellWidthAttr(attrs["width"]);
|
|
1467
|
+
if (w) {
|
|
1468
|
+
props.width = w;
|
|
1469
|
+
}
|
|
1470
|
+
}
|
|
1471
|
+
// Background color from style
|
|
1472
|
+
if (style.backgroundColor) {
|
|
1473
|
+
props.shading = { pattern: "clear", fill: style.backgroundColor };
|
|
1474
|
+
}
|
|
1475
|
+
// Cell borders from inline style
|
|
1476
|
+
const cellBorders = parseCellBordersFromStyle(attrs["style"]);
|
|
1477
|
+
if (cellBorders) {
|
|
1478
|
+
props.borders = cellBorders;
|
|
1479
|
+
}
|
|
1480
|
+
return Object.keys(props).length > 0 ? props : undefined;
|
|
1481
|
+
}
|
|
1482
|
+
/** Parse a cell width attribute value (number in px, percentage, or plain number). */
|
|
1483
|
+
function parseCellWidthAttr(value) {
|
|
1484
|
+
if (!value) {
|
|
1485
|
+
return undefined;
|
|
1486
|
+
}
|
|
1487
|
+
// Percentage: "50%" → pct (fiftieths of a percent: 50% = 2500)
|
|
1488
|
+
const pctMatch = /^([\d.]+)%$/.exec(value.trim());
|
|
1489
|
+
if (pctMatch) {
|
|
1490
|
+
return { value: Math.round(parseFloat(pctMatch[1]) * 50), type: "pct" };
|
|
1491
|
+
}
|
|
1492
|
+
// Numeric (pixels): "200" or "200px" → convert to twips
|
|
1493
|
+
const pxMatch = /^(\d+)(?:px)?$/.exec(value.trim());
|
|
1494
|
+
if (pxMatch) {
|
|
1495
|
+
return { value: parseInt(pxMatch[1], 10) * 15, type: "dxa" };
|
|
1496
|
+
}
|
|
1497
|
+
return undefined;
|
|
1498
|
+
}
|
|
1499
|
+
/**
|
|
1500
|
+
* Post-process rows to insert vMerge "continue" cells for rowspan.
|
|
1501
|
+
* Scans rows for cells with rowSpan > 1, then inserts placeholder cells
|
|
1502
|
+
* with verticalMerge: "continue" in the appropriate positions in subsequent rows.
|
|
1503
|
+
*/
|
|
1504
|
+
function applyRowSpan(rows) {
|
|
1505
|
+
const activeSpans = new Map();
|
|
1506
|
+
for (let rowIdx = 0; rowIdx < rows.length; rowIdx++) {
|
|
1507
|
+
const row = rows[rowIdx];
|
|
1508
|
+
const newCells = [];
|
|
1509
|
+
let cellIdx = 0; // index into original cells
|
|
1510
|
+
let colIdx = 0; // logical column position
|
|
1511
|
+
// Insert vMerge "continue" cells for active rowspans
|
|
1512
|
+
while (colIdx < 1000) {
|
|
1513
|
+
// safety limit
|
|
1514
|
+
const span = activeSpans.get(colIdx);
|
|
1515
|
+
if (span && span.remaining > 0) {
|
|
1516
|
+
// Insert a continuation cell
|
|
1517
|
+
const contCell = {
|
|
1518
|
+
properties: {
|
|
1519
|
+
verticalMerge: "continue",
|
|
1520
|
+
...(span.gridSpan > 1 ? { gridSpan: span.gridSpan } : {})
|
|
1521
|
+
},
|
|
1522
|
+
content: [{ type: "paragraph", children: [] }]
|
|
1523
|
+
};
|
|
1524
|
+
newCells.push(contCell);
|
|
1525
|
+
span.remaining--;
|
|
1526
|
+
if (span.remaining === 0) {
|
|
1527
|
+
activeSpans.delete(colIdx);
|
|
1528
|
+
}
|
|
1529
|
+
colIdx += span.gridSpan;
|
|
1530
|
+
continue;
|
|
1531
|
+
}
|
|
1532
|
+
// No active span at this column: use the next original cell
|
|
1533
|
+
if (cellIdx >= row.cells.length) {
|
|
1534
|
+
break;
|
|
1535
|
+
}
|
|
1536
|
+
const cell = row.cells[cellIdx];
|
|
1537
|
+
const cellGridSpan = cell.properties?.gridSpan || 1;
|
|
1538
|
+
const cellRowSpan = cell.properties?.rowSpan;
|
|
1539
|
+
// Register new rowspan
|
|
1540
|
+
if (cellRowSpan && cellRowSpan > 1) {
|
|
1541
|
+
activeSpans.set(colIdx, { remaining: cellRowSpan - 1, gridSpan: cellGridSpan });
|
|
1542
|
+
}
|
|
1543
|
+
newCells.push(cell);
|
|
1544
|
+
colIdx += cellGridSpan;
|
|
1545
|
+
cellIdx++;
|
|
1546
|
+
}
|
|
1547
|
+
// Replace the row's cells with the new array (including continuation cells)
|
|
1548
|
+
// We need to cast away readonly for mutation during this post-processing step
|
|
1549
|
+
rows[rowIdx].cells = newCells;
|
|
1550
|
+
}
|
|
1551
|
+
}
|
|
1552
|
+
/** Parse table-level borders from table attributes. */
|
|
1553
|
+
function parseTableBorders(attrs) {
|
|
1554
|
+
const style = parseCssStyle(attrs["style"]);
|
|
1555
|
+
const borderAttr = attrs["border"];
|
|
1556
|
+
// Check for border="1" attribute (common HTML table pattern)
|
|
1557
|
+
if (borderAttr && parseInt(borderAttr, 10) > 0) {
|
|
1558
|
+
const size = Math.max(4, parseInt(borderAttr, 10) * 4); // eighths of a point
|
|
1559
|
+
const border = { style: "single", size, color: "000000" };
|
|
1560
|
+
return {
|
|
1561
|
+
top: border,
|
|
1562
|
+
left: border,
|
|
1563
|
+
bottom: border,
|
|
1564
|
+
right: border,
|
|
1565
|
+
insideH: border,
|
|
1566
|
+
insideV: border
|
|
1567
|
+
};
|
|
1568
|
+
}
|
|
1569
|
+
// Check style attribute for border
|
|
1570
|
+
const borderStyle = parseBorderStyleFromCss(attrs["style"]);
|
|
1571
|
+
if (borderStyle) {
|
|
1572
|
+
return {
|
|
1573
|
+
top: borderStyle,
|
|
1574
|
+
left: borderStyle,
|
|
1575
|
+
bottom: borderStyle,
|
|
1576
|
+
right: borderStyle,
|
|
1577
|
+
insideH: borderStyle,
|
|
1578
|
+
insideV: borderStyle
|
|
1579
|
+
};
|
|
1580
|
+
}
|
|
1581
|
+
// background-color at table level can influence shading (handled separately)
|
|
1582
|
+
void style;
|
|
1583
|
+
return undefined;
|
|
1584
|
+
}
|
|
1585
|
+
/** Parse border CSS shorthand to a Border object. */
|
|
1586
|
+
function parseBorderStyleFromCss(styleStr) {
|
|
1587
|
+
if (!styleStr) {
|
|
1588
|
+
return undefined;
|
|
1589
|
+
}
|
|
1590
|
+
// Match border: <width> <style> <color> or border-style, border-width, border-color
|
|
1591
|
+
const declarations = styleStr.split(";");
|
|
1592
|
+
let borderWidth;
|
|
1593
|
+
let borderStyleVal;
|
|
1594
|
+
let borderColor;
|
|
1595
|
+
for (const decl of declarations) {
|
|
1596
|
+
const colonIdx = decl.indexOf(":");
|
|
1597
|
+
if (colonIdx < 0) {
|
|
1598
|
+
continue;
|
|
1599
|
+
}
|
|
1600
|
+
const prop = decl.slice(0, colonIdx).trim().toLowerCase();
|
|
1601
|
+
const value = decl
|
|
1602
|
+
.slice(colonIdx + 1)
|
|
1603
|
+
.trim()
|
|
1604
|
+
.toLowerCase();
|
|
1605
|
+
if (prop === "border") {
|
|
1606
|
+
// Shorthand: border: 1px solid black
|
|
1607
|
+
const parts = value.split(/\s+/);
|
|
1608
|
+
for (const part of parts) {
|
|
1609
|
+
if (/^\d/.test(part)) {
|
|
1610
|
+
borderWidth = parseBorderWidth(part);
|
|
1611
|
+
}
|
|
1612
|
+
else if (isBorderStyleKeyword(part)) {
|
|
1613
|
+
borderStyleVal = part;
|
|
1614
|
+
}
|
|
1615
|
+
else {
|
|
1616
|
+
borderColor = parseCssColor(part) || borderColor;
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1619
|
+
}
|
|
1620
|
+
else if (prop === "border-style") {
|
|
1621
|
+
borderStyleVal = value.split(/\s+/)[0];
|
|
1622
|
+
}
|
|
1623
|
+
else if (prop === "border-width") {
|
|
1624
|
+
borderWidth = parseBorderWidth(value.split(/\s+/)[0]);
|
|
1625
|
+
}
|
|
1626
|
+
else if (prop === "border-color") {
|
|
1627
|
+
borderColor = parseCssColor(value.split(/\s+/)[0]) || borderColor;
|
|
1628
|
+
}
|
|
1629
|
+
}
|
|
1630
|
+
if (borderStyleVal && borderStyleVal !== "none" && borderStyleVal !== "hidden") {
|
|
1631
|
+
return {
|
|
1632
|
+
style: mapCssBorderStyle(borderStyleVal),
|
|
1633
|
+
size: borderWidth || 4,
|
|
1634
|
+
color: borderColor || "000000"
|
|
1635
|
+
};
|
|
1636
|
+
}
|
|
1637
|
+
return undefined;
|
|
1638
|
+
}
|
|
1639
|
+
/** Parse cell-level borders from inline style string. */
|
|
1640
|
+
function parseCellBordersFromStyle(styleStr) {
|
|
1641
|
+
if (!styleStr) {
|
|
1642
|
+
return undefined;
|
|
1643
|
+
}
|
|
1644
|
+
const borderDef = parseBorderStyleFromCss(styleStr);
|
|
1645
|
+
if (!borderDef) {
|
|
1646
|
+
return undefined;
|
|
1647
|
+
}
|
|
1648
|
+
return {
|
|
1649
|
+
top: borderDef,
|
|
1650
|
+
left: borderDef,
|
|
1651
|
+
bottom: borderDef,
|
|
1652
|
+
right: borderDef
|
|
1653
|
+
};
|
|
1654
|
+
}
|
|
1655
|
+
function parseBorderWidth(value) {
|
|
1656
|
+
const match = /^([\d.]+)\s*(px|pt)?$/.exec(value);
|
|
1657
|
+
if (!match) {
|
|
1658
|
+
return 4;
|
|
1659
|
+
}
|
|
1660
|
+
const num = parseFloat(match[1]);
|
|
1661
|
+
const unit = match[2] || "px";
|
|
1662
|
+
if (unit === "pt") {
|
|
1663
|
+
return Math.round(num * 8); // eighths of a point
|
|
1664
|
+
}
|
|
1665
|
+
// px: approximate 1px ≈ 0.75pt → 6 eighths
|
|
1666
|
+
return Math.round(num * 6);
|
|
1667
|
+
}
|
|
1668
|
+
function isBorderStyleKeyword(value) {
|
|
1669
|
+
return [
|
|
1670
|
+
"none",
|
|
1671
|
+
"hidden",
|
|
1672
|
+
"dotted",
|
|
1673
|
+
"dashed",
|
|
1674
|
+
"solid",
|
|
1675
|
+
"double",
|
|
1676
|
+
"groove",
|
|
1677
|
+
"ridge",
|
|
1678
|
+
"inset",
|
|
1679
|
+
"outset"
|
|
1680
|
+
].includes(value);
|
|
1681
|
+
}
|
|
1682
|
+
function mapCssBorderStyle(cssStyle) {
|
|
1683
|
+
switch (cssStyle) {
|
|
1684
|
+
case "solid":
|
|
1685
|
+
case "groove":
|
|
1686
|
+
case "ridge":
|
|
1687
|
+
case "inset":
|
|
1688
|
+
case "outset":
|
|
1689
|
+
return "single";
|
|
1690
|
+
case "double":
|
|
1691
|
+
return "double";
|
|
1692
|
+
case "dotted":
|
|
1693
|
+
return "dotted";
|
|
1694
|
+
case "dashed":
|
|
1695
|
+
return "dashed";
|
|
1696
|
+
case "none":
|
|
1697
|
+
case "hidden":
|
|
1698
|
+
return "none";
|
|
1699
|
+
default:
|
|
1700
|
+
return "single";
|
|
1701
|
+
}
|
|
1702
|
+
}
|
|
1703
|
+
// =============================================================================
|
|
1704
|
+
// Image content builder
|
|
1705
|
+
// =============================================================================
|
|
1706
|
+
/** Build InlineImageContent from img attributes or return undefined if not applicable. */
|
|
1707
|
+
function buildImageContent(attrs) {
|
|
1708
|
+
const src = attrs["src"] || "";
|
|
1709
|
+
const alt = attrs["alt"] || "";
|
|
1710
|
+
// Parse width/height from attributes first, then fall back to style
|
|
1711
|
+
let width = parseImageDimension(attrs["width"]);
|
|
1712
|
+
let height = parseImageDimension(attrs["height"]);
|
|
1713
|
+
// Also check inline style for width/height
|
|
1714
|
+
if (!width || !height) {
|
|
1715
|
+
const styleDims = parseImageDimensionsFromStyle(attrs["style"]);
|
|
1716
|
+
if (!width && styleDims.width) {
|
|
1717
|
+
width = styleDims.width;
|
|
1718
|
+
}
|
|
1719
|
+
if (!height && styleDims.height) {
|
|
1720
|
+
height = styleDims.height;
|
|
1721
|
+
}
|
|
1722
|
+
}
|
|
1723
|
+
// Convert pixels to EMU
|
|
1724
|
+
const widthEmu = (width || 100) * units_1.EMU_PER_PX;
|
|
1725
|
+
const heightEmu = (height || 100) * units_1.EMU_PER_PX;
|
|
1726
|
+
// Both data: and http(s) URLs become placeholders. The DOCX writer needs
|
|
1727
|
+
// a real ImageDef registered in `doc.images` plus a corresponding
|
|
1728
|
+
// relationship; htmlToDocxBody returns BodyContent[] only and cannot do
|
|
1729
|
+
// that registration. We surface the original src in the alt text so the
|
|
1730
|
+
// user can post-process if they need real embedded images.
|
|
1731
|
+
if (src.startsWith("data:") || src.startsWith("http://") || src.startsWith("https://")) {
|
|
1732
|
+
return {
|
|
1733
|
+
type: "image",
|
|
1734
|
+
rId: "", // empty rId → renderer treats this as a placeholder
|
|
1735
|
+
width: widthEmu,
|
|
1736
|
+
height: heightEmu,
|
|
1737
|
+
altText: alt || `[Image placeholder: ${src.slice(0, 64)}${src.length > 64 ? "…" : ""}]`,
|
|
1738
|
+
name: alt || "image"
|
|
1739
|
+
};
|
|
1740
|
+
}
|
|
1741
|
+
return undefined;
|
|
1742
|
+
}
|
|
1743
|
+
/** Parse an image dimension from HTML attribute value (number or "Npx"). */
|
|
1744
|
+
function parseImageDimension(value) {
|
|
1745
|
+
if (!value) {
|
|
1746
|
+
return undefined;
|
|
1747
|
+
}
|
|
1748
|
+
const match = /^(\d+)(?:px)?$/.exec(value.trim());
|
|
1749
|
+
return match ? parseInt(match[1], 10) : undefined;
|
|
1750
|
+
}
|
|
1751
|
+
/** Parse width and height from an inline style string (for <img> elements). */
|
|
1752
|
+
function parseImageDimensionsFromStyle(styleStr) {
|
|
1753
|
+
const result = {};
|
|
1754
|
+
if (!styleStr) {
|
|
1755
|
+
return result;
|
|
1756
|
+
}
|
|
1757
|
+
const declarations = styleStr.split(";");
|
|
1758
|
+
for (const decl of declarations) {
|
|
1759
|
+
const colonIdx = decl.indexOf(":");
|
|
1760
|
+
if (colonIdx < 0) {
|
|
1761
|
+
continue;
|
|
1762
|
+
}
|
|
1763
|
+
const prop = decl.slice(0, colonIdx).trim().toLowerCase();
|
|
1764
|
+
const value = decl
|
|
1765
|
+
.slice(colonIdx + 1)
|
|
1766
|
+
.trim()
|
|
1767
|
+
.toLowerCase();
|
|
1768
|
+
if (prop === "width") {
|
|
1769
|
+
const px = parseImageCssDimension(value);
|
|
1770
|
+
if (px !== undefined) {
|
|
1771
|
+
result.width = px;
|
|
1772
|
+
}
|
|
1773
|
+
}
|
|
1774
|
+
else if (prop === "height") {
|
|
1775
|
+
const px = parseImageCssDimension(value);
|
|
1776
|
+
if (px !== undefined) {
|
|
1777
|
+
result.height = px;
|
|
1778
|
+
}
|
|
1779
|
+
}
|
|
1780
|
+
}
|
|
1781
|
+
return result;
|
|
1782
|
+
}
|
|
1783
|
+
/** Parse a CSS dimension value for images into pixels. */
|
|
1784
|
+
function parseImageCssDimension(value) {
|
|
1785
|
+
const match = /^([\d.]+)\s*(px|pt|in|cm|mm)?$/.exec(value);
|
|
1786
|
+
if (!match) {
|
|
1787
|
+
return undefined;
|
|
1788
|
+
}
|
|
1789
|
+
const num = parseFloat(match[1]);
|
|
1790
|
+
const unit = match[2] || "px";
|
|
1791
|
+
switch (unit) {
|
|
1792
|
+
case "px":
|
|
1793
|
+
return Math.round(num);
|
|
1794
|
+
case "pt":
|
|
1795
|
+
// 1pt = 1.333px
|
|
1796
|
+
return Math.round(num * 1.333);
|
|
1797
|
+
case "in":
|
|
1798
|
+
// 1in = 96px
|
|
1799
|
+
return Math.round(num * 96);
|
|
1800
|
+
case "cm":
|
|
1801
|
+
// 1cm ≈ 37.8px
|
|
1802
|
+
return Math.round(num * 37.8);
|
|
1803
|
+
case "mm":
|
|
1804
|
+
// 1mm ≈ 3.78px
|
|
1805
|
+
return Math.round(num * 3.78);
|
|
1806
|
+
default:
|
|
1807
|
+
return undefined;
|
|
1808
|
+
}
|
|
1809
|
+
}
|
|
1810
|
+
// =============================================================================
|
|
1811
|
+
// CSS → InlineContext helper
|
|
1812
|
+
// =============================================================================
|
|
1813
|
+
/** Apply parsed CSS styles to an InlineContext. */
|
|
1814
|
+
function applyCssToInlineContext(ctx, style) {
|
|
1815
|
+
if (style.bold) {
|
|
1816
|
+
ctx.bold = true;
|
|
1817
|
+
}
|
|
1818
|
+
if (style.italic) {
|
|
1819
|
+
ctx.italic = true;
|
|
1820
|
+
}
|
|
1821
|
+
if (style.underline) {
|
|
1822
|
+
ctx.underline = true;
|
|
1823
|
+
}
|
|
1824
|
+
if (style.lineThrough) {
|
|
1825
|
+
ctx.strikethrough = true;
|
|
1826
|
+
}
|
|
1827
|
+
if (style.color) {
|
|
1828
|
+
ctx.color = style.color;
|
|
1829
|
+
}
|
|
1830
|
+
if (style.backgroundColor) {
|
|
1831
|
+
ctx.backgroundColor = style.backgroundColor;
|
|
1832
|
+
}
|
|
1833
|
+
if (style.fontFamily) {
|
|
1834
|
+
ctx.fontFamily = style.fontFamily;
|
|
1835
|
+
}
|
|
1836
|
+
if (style.fontSize) {
|
|
1837
|
+
ctx.fontSize = style.fontSize;
|
|
1838
|
+
}
|
|
1839
|
+
}
|
|
1840
|
+
/**
|
|
1841
|
+
* Resolve the effective inline style string for an element by merging class-based styles
|
|
1842
|
+
* with inline styles. Inline styles take priority over class-based styles.
|
|
1843
|
+
*/
|
|
1844
|
+
function resolveEffectiveStyle(attrs, classStyles) {
|
|
1845
|
+
const classAttr = attrs["class"];
|
|
1846
|
+
let merged;
|
|
1847
|
+
if (classAttr) {
|
|
1848
|
+
const classNames = classAttr.split(/\s+/).filter(Boolean);
|
|
1849
|
+
const parts = [];
|
|
1850
|
+
for (const name of classNames) {
|
|
1851
|
+
const s = classStyles[name];
|
|
1852
|
+
if (s) {
|
|
1853
|
+
parts.push(s);
|
|
1854
|
+
}
|
|
1855
|
+
}
|
|
1856
|
+
if (parts.length > 0) {
|
|
1857
|
+
merged = parts.join("; ");
|
|
1858
|
+
}
|
|
1859
|
+
}
|
|
1860
|
+
const inlineStyle = attrs["style"];
|
|
1861
|
+
if (merged && inlineStyle) {
|
|
1862
|
+
// Inline style overrides: append after class-based style so later declarations win
|
|
1863
|
+
return merged + "; " + inlineStyle;
|
|
1864
|
+
}
|
|
1865
|
+
return inlineStyle || merged;
|
|
1866
|
+
}
|
|
1867
|
+
// =============================================================================
|
|
1868
|
+
// Run builder
|
|
1869
|
+
// =============================================================================
|
|
1870
|
+
function makeRun(text, ctx) {
|
|
1871
|
+
const props = {};
|
|
1872
|
+
if (ctx.bold) {
|
|
1873
|
+
props.bold = true;
|
|
1874
|
+
}
|
|
1875
|
+
if (ctx.italic) {
|
|
1876
|
+
props.italic = true;
|
|
1877
|
+
}
|
|
1878
|
+
if (ctx.underline) {
|
|
1879
|
+
props.underline = "single";
|
|
1880
|
+
}
|
|
1881
|
+
if (ctx.strikethrough) {
|
|
1882
|
+
props.strike = true;
|
|
1883
|
+
}
|
|
1884
|
+
if (ctx.superscript) {
|
|
1885
|
+
props.vertAlign = "superscript";
|
|
1886
|
+
}
|
|
1887
|
+
if (ctx.subscript) {
|
|
1888
|
+
props.vertAlign = "subscript";
|
|
1889
|
+
}
|
|
1890
|
+
if (ctx.code) {
|
|
1891
|
+
props.font = { ascii: "Courier New", hAnsi: "Courier New" };
|
|
1892
|
+
}
|
|
1893
|
+
else if (ctx.fontFamily) {
|
|
1894
|
+
props.font = { ascii: ctx.fontFamily, hAnsi: ctx.fontFamily };
|
|
1895
|
+
}
|
|
1896
|
+
if (ctx.fontSize) {
|
|
1897
|
+
props.size = ctx.fontSize;
|
|
1898
|
+
}
|
|
1899
|
+
if (ctx.color) {
|
|
1900
|
+
props.color = ctx.color;
|
|
1901
|
+
}
|
|
1902
|
+
if (ctx.backgroundColor) {
|
|
1903
|
+
props.shading = { pattern: "clear", fill: ctx.backgroundColor };
|
|
1904
|
+
}
|
|
1905
|
+
const run = {
|
|
1906
|
+
...(Object.keys(props).length > 0 ? { properties: props } : {}),
|
|
1907
|
+
content: [{ type: "text", text }]
|
|
1908
|
+
};
|
|
1909
|
+
return run;
|
|
1910
|
+
}
|