@cj-tech-master/excelts 9.6.1 → 10.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -3
- package/README_zh.md +18 -3
- package/dist/browser/modules/excel/cell.d.ts +4 -0
- package/dist/browser/modules/excel/note.js +5 -1
- package/dist/browser/modules/excel/row.js +35 -2
- package/dist/browser/modules/excel/stream/workbook-writer.browser.d.ts +8 -1
- package/dist/browser/modules/excel/stream/workbook-writer.browser.js +22 -2
- package/dist/browser/modules/excel/types.d.ts +81 -0
- package/dist/browser/modules/excel/utils/drawing-utils.d.ts +8 -0
- package/dist/browser/modules/excel/utils/drawing-utils.js +19 -2
- package/dist/browser/modules/excel/workbook.browser.d.ts +16 -0
- package/dist/browser/modules/excel/workbook.browser.js +32 -2
- package/dist/browser/modules/excel/worksheet.d.ts +31 -1
- package/dist/browser/modules/excel/worksheet.js +83 -0
- package/dist/browser/modules/excel/xlsx/xform/comment/vml-shape-xform.d.ts +7 -0
- package/dist/browser/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
- package/dist/browser/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
- package/dist/browser/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
- package/dist/browser/modules/excel/xlsx/xform/drawing/blip-xform.d.ts +6 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
- package/dist/browser/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.d.ts +1 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/pic-xform.d.ts +2 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
- package/dist/browser/modules/excel/xlsx/xform/drawing/shape-xform.d.ts +47 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/shape-xform.js +109 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
- package/dist/browser/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
- package/dist/browser/modules/pdf/builder/document-builder.js +22 -49
- package/dist/browser/modules/pdf/builder/pdf-editor.js +1 -1
- package/dist/browser/modules/pdf/core/pdf-stream.d.ts +28 -1
- package/dist/browser/modules/pdf/core/pdf-stream.js +38 -2
- package/dist/browser/modules/pdf/font/font-manager.d.ts +26 -0
- package/dist/browser/modules/pdf/font/font-manager.js +35 -18
- package/dist/browser/modules/pdf/render/page-renderer.d.ts +51 -3
- package/dist/browser/modules/pdf/render/page-renderer.js +111 -18
- package/dist/browser/modules/word/advanced/field-engine.js +45 -20
- package/dist/browser/modules/word/advanced/glossary.d.ts +10 -36
- package/dist/browser/modules/word/advanced/glossary.js +8 -9
- package/dist/browser/modules/word/advanced/math-convert.js +94 -12
- package/dist/browser/modules/word/advanced/ole-objects.d.ts +28 -0
- package/dist/browser/modules/word/advanced/ole-objects.js +122 -19
- package/dist/browser/modules/word/advanced/style-map.js +31 -10
- package/dist/browser/modules/word/builder/run-builders.d.ts +7 -1
- package/dist/browser/modules/word/builder/run-builders.js +7 -1
- package/dist/browser/modules/word/constants.d.ts +4 -0
- package/dist/browser/modules/word/constants.js +5 -1
- package/dist/browser/modules/word/convert/docx-to-semantic.d.ts +2 -1
- package/dist/browser/modules/word/convert/docx-to-semantic.js +135 -1
- package/dist/browser/modules/word/convert/html/html-import.d.ts +32 -1
- package/dist/browser/modules/word/convert/html/html-import.js +167 -14
- package/dist/browser/modules/word/convert/html/html.d.ts +2 -2
- package/dist/browser/modules/word/convert/html/html.js +1 -1
- package/dist/browser/modules/word/convert/markdown/markdown-import.d.ts +48 -18
- package/dist/browser/modules/word/convert/markdown/markdown-import.js +279 -69
- package/dist/browser/modules/word/convert/markdown/markdown.d.ts +1 -1
- package/dist/browser/modules/word/convert/odt/odt.js +407 -56
- package/dist/browser/modules/word/html.d.ts +2 -2
- package/dist/browser/modules/word/html.js +1 -1
- package/dist/browser/modules/word/index.base.d.ts +3 -3
- package/dist/browser/modules/word/index.base.js +1 -1
- package/dist/browser/modules/word/layout/layout-full.js +326 -19
- package/dist/browser/modules/word/layout/render-page.js +35 -8
- package/dist/browser/modules/word/markdown.d.ts +1 -1
- package/dist/browser/modules/word/query/compat.d.ts +10 -2
- package/dist/browser/modules/word/query/compat.js +29 -21
- package/dist/browser/modules/word/reader/docx-reader.js +105 -2
- package/dist/browser/modules/word/reader/math-parser.js +8 -2
- package/dist/browser/modules/word/security/cfb-reader.js +5 -5
- package/dist/browser/modules/word/types.d.ts +96 -1
- package/dist/browser/modules/word/writer/docx-packager.js +108 -2
- package/dist/browser/modules/word/writer/glossary-writer.d.ts +28 -0
- package/dist/browser/modules/word/writer/glossary-writer.js +121 -0
- package/dist/browser/modules/word/writer/header-footer-writer.js +105 -20
- package/dist/browser/modules/word/writer/math-writer.js +7 -2
- package/dist/browser/utils/font-metrics.d.ts +8 -0
- package/dist/browser/utils/font-metrics.js +43 -0
- package/dist/browser/utils/theme-colors.js +4 -1
- package/dist/cjs/modules/excel/note.js +5 -1
- package/dist/cjs/modules/excel/row.js +35 -2
- package/dist/cjs/modules/excel/stream/workbook-writer.browser.js +22 -2
- package/dist/cjs/modules/excel/utils/drawing-utils.js +19 -2
- package/dist/cjs/modules/excel/workbook.browser.js +31 -1
- package/dist/cjs/modules/excel/worksheet.js +83 -0
- package/dist/cjs/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
- package/dist/cjs/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
- package/dist/cjs/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
- package/dist/cjs/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
- package/dist/cjs/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
- package/dist/cjs/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
- package/dist/cjs/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
- package/dist/cjs/modules/excel/xlsx/xform/drawing/shape-xform.js +112 -0
- package/dist/cjs/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
- package/dist/cjs/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
- package/dist/cjs/modules/pdf/builder/document-builder.js +21 -48
- package/dist/cjs/modules/pdf/builder/pdf-editor.js +1 -1
- package/dist/cjs/modules/pdf/core/pdf-stream.js +38 -2
- package/dist/cjs/modules/pdf/font/font-manager.js +35 -18
- package/dist/cjs/modules/pdf/render/page-renderer.js +112 -18
- package/dist/cjs/modules/word/advanced/field-engine.js +45 -20
- package/dist/cjs/modules/word/advanced/glossary.js +8 -9
- package/dist/cjs/modules/word/advanced/math-convert.js +94 -12
- package/dist/cjs/modules/word/advanced/ole-objects.js +123 -19
- package/dist/cjs/modules/word/advanced/style-map.js +31 -10
- package/dist/cjs/modules/word/builder/run-builders.js +7 -1
- package/dist/cjs/modules/word/constants.js +5 -1
- package/dist/cjs/modules/word/convert/docx-to-semantic.js +135 -1
- package/dist/cjs/modules/word/convert/html/html-import.js +168 -14
- package/dist/cjs/modules/word/convert/html/html.js +2 -1
- package/dist/cjs/modules/word/convert/markdown/markdown-import.js +279 -69
- package/dist/cjs/modules/word/convert/odt/odt.js +407 -56
- package/dist/cjs/modules/word/html.js +2 -1
- package/dist/cjs/modules/word/index.base.js +4 -3
- package/dist/cjs/modules/word/layout/layout-full.js +325 -18
- package/dist/cjs/modules/word/layout/render-page.js +35 -8
- package/dist/cjs/modules/word/query/compat.js +29 -21
- package/dist/cjs/modules/word/reader/docx-reader.js +104 -1
- package/dist/cjs/modules/word/reader/math-parser.js +8 -2
- package/dist/cjs/modules/word/security/cfb-reader.js +5 -5
- package/dist/cjs/modules/word/writer/docx-packager.js +108 -2
- package/dist/cjs/modules/word/writer/glossary-writer.js +124 -0
- package/dist/cjs/modules/word/writer/header-footer-writer.js +105 -20
- package/dist/cjs/modules/word/writer/math-writer.js +7 -2
- package/dist/cjs/utils/font-metrics.js +44 -0
- package/dist/cjs/utils/theme-colors.js +4 -1
- package/dist/esm/modules/excel/note.js +5 -1
- package/dist/esm/modules/excel/row.js +35 -2
- package/dist/esm/modules/excel/stream/workbook-writer.browser.js +22 -2
- package/dist/esm/modules/excel/utils/drawing-utils.js +19 -2
- package/dist/esm/modules/excel/workbook.browser.js +32 -2
- package/dist/esm/modules/excel/worksheet.js +83 -0
- package/dist/esm/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
- package/dist/esm/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
- package/dist/esm/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
- package/dist/esm/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
- package/dist/esm/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
- package/dist/esm/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
- package/dist/esm/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
- package/dist/esm/modules/excel/xlsx/xform/drawing/shape-xform.js +109 -0
- package/dist/esm/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
- package/dist/esm/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
- package/dist/esm/modules/pdf/builder/document-builder.js +22 -49
- package/dist/esm/modules/pdf/builder/pdf-editor.js +1 -1
- package/dist/esm/modules/pdf/core/pdf-stream.js +38 -2
- package/dist/esm/modules/pdf/font/font-manager.js +35 -18
- package/dist/esm/modules/pdf/render/page-renderer.js +111 -18
- package/dist/esm/modules/word/advanced/field-engine.js +45 -20
- package/dist/esm/modules/word/advanced/glossary.js +8 -9
- package/dist/esm/modules/word/advanced/math-convert.js +94 -12
- package/dist/esm/modules/word/advanced/ole-objects.js +122 -19
- package/dist/esm/modules/word/advanced/style-map.js +31 -10
- package/dist/esm/modules/word/builder/run-builders.js +7 -1
- package/dist/esm/modules/word/constants.js +5 -1
- package/dist/esm/modules/word/convert/docx-to-semantic.js +135 -1
- package/dist/esm/modules/word/convert/html/html-import.js +167 -14
- package/dist/esm/modules/word/convert/html/html.js +1 -1
- package/dist/esm/modules/word/convert/markdown/markdown-import.js +279 -69
- package/dist/esm/modules/word/convert/odt/odt.js +407 -56
- package/dist/esm/modules/word/html.js +1 -1
- package/dist/esm/modules/word/index.base.js +1 -1
- package/dist/esm/modules/word/layout/layout-full.js +326 -19
- package/dist/esm/modules/word/layout/render-page.js +35 -8
- package/dist/esm/modules/word/query/compat.js +29 -21
- package/dist/esm/modules/word/reader/docx-reader.js +105 -2
- package/dist/esm/modules/word/reader/math-parser.js +8 -2
- package/dist/esm/modules/word/security/cfb-reader.js +5 -5
- package/dist/esm/modules/word/writer/docx-packager.js +108 -2
- package/dist/esm/modules/word/writer/glossary-writer.js +121 -0
- package/dist/esm/modules/word/writer/header-footer-writer.js +105 -20
- package/dist/esm/modules/word/writer/math-writer.js +7 -2
- package/dist/esm/utils/font-metrics.js +43 -0
- package/dist/esm/utils/theme-colors.js +4 -1
- package/dist/iife/excelts.iife.js +496 -59
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +39 -39
- package/dist/types/modules/excel/cell.d.ts +4 -0
- package/dist/types/modules/excel/stream/workbook-writer.browser.d.ts +8 -1
- package/dist/types/modules/excel/types.d.ts +81 -0
- package/dist/types/modules/excel/utils/drawing-utils.d.ts +8 -0
- package/dist/types/modules/excel/workbook.browser.d.ts +16 -0
- package/dist/types/modules/excel/worksheet.d.ts +31 -1
- package/dist/types/modules/excel/xlsx/xform/comment/vml-shape-xform.d.ts +7 -0
- package/dist/types/modules/excel/xlsx/xform/drawing/blip-xform.d.ts +6 -0
- package/dist/types/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.d.ts +1 -0
- package/dist/types/modules/excel/xlsx/xform/drawing/pic-xform.d.ts +2 -0
- package/dist/types/modules/excel/xlsx/xform/drawing/shape-xform.d.ts +47 -0
- package/dist/types/modules/pdf/core/pdf-stream.d.ts +28 -1
- package/dist/types/modules/pdf/font/font-manager.d.ts +26 -0
- package/dist/types/modules/pdf/render/page-renderer.d.ts +51 -3
- package/dist/types/modules/word/advanced/glossary.d.ts +10 -36
- package/dist/types/modules/word/advanced/ole-objects.d.ts +28 -0
- package/dist/types/modules/word/builder/run-builders.d.ts +7 -1
- package/dist/types/modules/word/constants.d.ts +4 -0
- package/dist/types/modules/word/convert/docx-to-semantic.d.ts +2 -1
- package/dist/types/modules/word/convert/html/html-import.d.ts +32 -1
- package/dist/types/modules/word/convert/html/html.d.ts +2 -2
- package/dist/types/modules/word/convert/markdown/markdown-import.d.ts +48 -18
- package/dist/types/modules/word/convert/markdown/markdown.d.ts +1 -1
- package/dist/types/modules/word/html.d.ts +2 -2
- package/dist/types/modules/word/index.base.d.ts +3 -3
- package/dist/types/modules/word/markdown.d.ts +1 -1
- package/dist/types/modules/word/query/compat.d.ts +10 -2
- package/dist/types/modules/word/types.d.ts +96 -1
- package/dist/types/modules/word/writer/glossary-writer.d.ts +28 -0
- package/dist/types/utils/font-metrics.d.ts +8 -0
- package/package.json +3 -1
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
* const buffer = await toBuffer(Document.build(h));
|
|
21
21
|
* ```
|
|
22
22
|
*/
|
|
23
|
-
import type { BodyContent } from "../../types.js";
|
|
23
|
+
import type { BodyContent, ImageDef } from "../../types.js";
|
|
24
24
|
/** Options for HTML to DOCX conversion. */
|
|
25
25
|
export interface HtmlImportOptions {
|
|
26
26
|
/** Default font size in half-points (default: 24 = 12pt). */
|
|
@@ -48,3 +48,34 @@ export interface HtmlImportOptions {
|
|
|
48
48
|
* @returns Array of BodyContent blocks.
|
|
49
49
|
*/
|
|
50
50
|
export declare function htmlToDocxBody(html: string, options?: HtmlImportOptions): BodyContent[];
|
|
51
|
+
/** Result of {@link htmlToDocx}: body content plus the images it references. */
|
|
52
|
+
export interface HtmlToDocxResult {
|
|
53
|
+
/** Parsed body content blocks. */
|
|
54
|
+
readonly body: BodyContent[];
|
|
55
|
+
/**
|
|
56
|
+
* Images decoded from base64 `data:` URLs in the HTML, each with a unique
|
|
57
|
+
* rId already referenced by the matching image run in `body`. Merge these
|
|
58
|
+
* into the document model's `images` array so the pictures are embedded as
|
|
59
|
+
* real media in the package instead of dropped as placeholders.
|
|
60
|
+
*/
|
|
61
|
+
readonly images: ImageDef[];
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Convert an HTML string into DOCX body content **and** embedded images.
|
|
65
|
+
*
|
|
66
|
+
* Unlike {@link htmlToDocxBody}, this decodes base64 `data:` image URLs into
|
|
67
|
+
* real {@link ImageDef}s and assigns each a unique rId that the emitted image
|
|
68
|
+
* runs reference. Merge the returned `images` into your document model so the
|
|
69
|
+
* pictures are embedded rather than dropped as placeholders.
|
|
70
|
+
*
|
|
71
|
+
* @example
|
|
72
|
+
* ```ts
|
|
73
|
+
* const { body, images } = htmlToDocx(html);
|
|
74
|
+
* const doc = Document.create();
|
|
75
|
+
* for (const item of body) Document.addContent(doc, item);
|
|
76
|
+
* const built = Document.build(doc);
|
|
77
|
+
* const final = { ...built, images: [...(built.images ?? []), ...images] };
|
|
78
|
+
* const bytes = await toBuffer(final);
|
|
79
|
+
* ```
|
|
80
|
+
*/
|
|
81
|
+
export declare function htmlToDocx(html: string, options?: HtmlImportOptions): HtmlToDocxResult;
|
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
* const buffer = await toBuffer(Document.build(h));
|
|
21
21
|
* ```
|
|
22
22
|
*/
|
|
23
|
+
import { base64ToUint8Array } from "../../../../utils/utils.browser.js";
|
|
23
24
|
import { sanitizeUrl } from "../../core/internal-utils.js";
|
|
24
25
|
import { EMU_PER_PX } from "../../units.js";
|
|
25
26
|
/**
|
|
@@ -44,10 +45,7 @@ export function htmlToDocxBody(html, options) {
|
|
|
44
45
|
const tokens = tokenize(html);
|
|
45
46
|
// Extract <style> rules and merge with user-provided classStyles
|
|
46
47
|
const extractedStyles = extractStyleRules(tokens);
|
|
47
|
-
const classStyles = {
|
|
48
|
-
...extractedStyles,
|
|
49
|
-
...(options?.classStyles ?? {})
|
|
50
|
-
};
|
|
48
|
+
const classStyles = mergeClassStyles(extractedStyles, options?.classStyles ?? {});
|
|
51
49
|
// Seed the inline context with the caller-supplied defaults so plain text
|
|
52
50
|
// runs actually carry the requested font/size. Without this the options
|
|
53
51
|
// were effectively ignored.
|
|
@@ -61,6 +59,40 @@ export function htmlToDocxBody(html, options) {
|
|
|
61
59
|
parseBlocks(tokens, 0, blocks, initialCtx, classStyles);
|
|
62
60
|
return blocks;
|
|
63
61
|
}
|
|
62
|
+
/**
|
|
63
|
+
* Convert an HTML string into DOCX body content **and** embedded images.
|
|
64
|
+
*
|
|
65
|
+
* Unlike {@link htmlToDocxBody}, this decodes base64 `data:` image URLs into
|
|
66
|
+
* real {@link ImageDef}s and assigns each a unique rId that the emitted image
|
|
67
|
+
* runs reference. Merge the returned `images` into your document model so the
|
|
68
|
+
* pictures are embedded rather than dropped as placeholders.
|
|
69
|
+
*
|
|
70
|
+
* @example
|
|
71
|
+
* ```ts
|
|
72
|
+
* const { body, images } = htmlToDocx(html);
|
|
73
|
+
* const doc = Document.create();
|
|
74
|
+
* for (const item of body) Document.addContent(doc, item);
|
|
75
|
+
* const built = Document.build(doc);
|
|
76
|
+
* const final = { ...built, images: [...(built.images ?? []), ...images] };
|
|
77
|
+
* const bytes = await toBuffer(final);
|
|
78
|
+
* ```
|
|
79
|
+
*/
|
|
80
|
+
export function htmlToDocx(html, options) {
|
|
81
|
+
const blocks = [];
|
|
82
|
+
const tokens = tokenize(html);
|
|
83
|
+
const extractedStyles = extractStyleRules(tokens);
|
|
84
|
+
const classStyles = mergeClassStyles(extractedStyles, options?.classStyles ?? {});
|
|
85
|
+
const images = [];
|
|
86
|
+
const initialCtx = { imageSink: images };
|
|
87
|
+
if (options?.defaultFont) {
|
|
88
|
+
initialCtx.fontFamily = options.defaultFont;
|
|
89
|
+
}
|
|
90
|
+
if (options?.defaultFontSize !== undefined) {
|
|
91
|
+
initialCtx.fontSize = options.defaultFontSize;
|
|
92
|
+
}
|
|
93
|
+
parseBlocks(tokens, 0, blocks, initialCtx, classStyles);
|
|
94
|
+
return { body: blocks, images };
|
|
95
|
+
}
|
|
64
96
|
function tokenize(html) {
|
|
65
97
|
const tokens = [];
|
|
66
98
|
// Strip HTML comments, doctype declarations and SGML processing
|
|
@@ -473,6 +505,22 @@ function extractStyleRules(tokens) {
|
|
|
473
505
|
}
|
|
474
506
|
return result;
|
|
475
507
|
}
|
|
508
|
+
/**
|
|
509
|
+
* Merge two class→style maps. For classes present in both, the declarations
|
|
510
|
+
* are concatenated (extracted `<style>` rules first, caller-supplied overrides
|
|
511
|
+
* last) so the later source wins per CSS cascade while still preserving
|
|
512
|
+
* properties only declared by the other source. A plain `{ ...a, ...b }`
|
|
513
|
+
* would discard the extracted rule entirely whenever the caller supplies the
|
|
514
|
+
* same class name, silently dropping e.g. `font-style`/`color` from `<style>`.
|
|
515
|
+
*/
|
|
516
|
+
function mergeClassStyles(extracted, overrides) {
|
|
517
|
+
const merged = { ...extracted };
|
|
518
|
+
for (const [name, style] of Object.entries(overrides)) {
|
|
519
|
+
const existing = merged[name];
|
|
520
|
+
merged[name] = existing ? `${existing}; ${style}` : style;
|
|
521
|
+
}
|
|
522
|
+
return merged;
|
|
523
|
+
}
|
|
476
524
|
/**
|
|
477
525
|
* Parse HTML-style attributes from the inside of a start tag, e.g.
|
|
478
526
|
* `class="x" id='y' disabled href=foo`.
|
|
@@ -1133,6 +1181,17 @@ function parseBlocks(tokens, start, blocks, parentCtx, classStyles) {
|
|
|
1133
1181
|
return i + 1; // consumed the close tag
|
|
1134
1182
|
}
|
|
1135
1183
|
if (tok.type === "text") {
|
|
1184
|
+
// In block context, text nodes that are pure inter-element whitespace
|
|
1185
|
+
// (the newlines/indentation between block tags in pretty-printed HTML)
|
|
1186
|
+
// carry no content and must be ignored — otherwise every gap between
|
|
1187
|
+
// <p>/<table>/<div> tags would emit a spurious empty paragraph (and
|
|
1188
|
+
// the contained newline would be rendered as a <w:br/> soft break).
|
|
1189
|
+
// Whitespace that sits between inline runs is preserved by the inline
|
|
1190
|
+
// parser, which handles it separately.
|
|
1191
|
+
if (tok.value.trim() === "") {
|
|
1192
|
+
i++;
|
|
1193
|
+
continue;
|
|
1194
|
+
}
|
|
1136
1195
|
if (!pendingInline) {
|
|
1137
1196
|
pendingInline = { runs: [], ctx: parentCtx };
|
|
1138
1197
|
}
|
|
@@ -1461,7 +1520,7 @@ function parseInlineTag(tokens, idx, runs, ctx, classStyles) {
|
|
|
1461
1520
|
runs.push({ content: [{ type: "break" }] });
|
|
1462
1521
|
}
|
|
1463
1522
|
else if (tag === "img") {
|
|
1464
|
-
const imgContent = buildImageContent(tok.attrs);
|
|
1523
|
+
const imgContent = buildImageContent(tok.attrs, ctx);
|
|
1465
1524
|
if (imgContent) {
|
|
1466
1525
|
runs.push({ content: [imgContent] });
|
|
1467
1526
|
}
|
|
@@ -1534,13 +1593,15 @@ function parseInlineTag(tokens, idx, runs, ctx, classStyles) {
|
|
|
1534
1593
|
i++;
|
|
1535
1594
|
}
|
|
1536
1595
|
else if (t.type === "close") {
|
|
1596
|
+
// Mismatched close tag — close the hyperlink here but do NOT
|
|
1597
|
+
// consume the token; let the caller handle the block boundary.
|
|
1537
1598
|
const hyperlink = {
|
|
1538
1599
|
type: "hyperlink",
|
|
1539
1600
|
url: safeHref ?? "",
|
|
1540
1601
|
children: innerRuns
|
|
1541
1602
|
};
|
|
1542
1603
|
runs.push(hyperlink);
|
|
1543
|
-
return i
|
|
1604
|
+
return i;
|
|
1544
1605
|
}
|
|
1545
1606
|
else {
|
|
1546
1607
|
const childRuns = [];
|
|
@@ -1581,7 +1642,12 @@ function parseInlineTag(tokens, idx, runs, ctx, classStyles) {
|
|
|
1581
1642
|
i++;
|
|
1582
1643
|
}
|
|
1583
1644
|
else if (t.type === "close") {
|
|
1584
|
-
|
|
1645
|
+
// Mismatched close tag (e.g. </p> while inside an unclosed <strong>).
|
|
1646
|
+
// Do NOT consume it — return the current index so the caller can
|
|
1647
|
+
// handle it. Consuming a block-level close here would swallow the
|
|
1648
|
+
// parent paragraph boundary and pull all following block content
|
|
1649
|
+
// into this run, breaking page breaks, tables, etc.
|
|
1650
|
+
return i;
|
|
1585
1651
|
}
|
|
1586
1652
|
else {
|
|
1587
1653
|
i = parseInlineTag(tokens, i, runs, newCtx, classStyles);
|
|
@@ -1660,6 +1726,15 @@ function parseListItem(tokens, start, blocks, ctx, ordered, level, classStyles)
|
|
|
1660
1726
|
}
|
|
1661
1727
|
// Text content
|
|
1662
1728
|
if (tok.type === "text") {
|
|
1729
|
+
// Skip structural whitespace: the indentation/newlines that sit between
|
|
1730
|
+
// a nested <ul>/<ol> and the closing </li> (or at the very start of the
|
|
1731
|
+
// item) are not real content. Emitting them as runs would otherwise
|
|
1732
|
+
// produce a spurious empty list-item paragraph. Whitespace *between*
|
|
1733
|
+
// real inline content is preserved because `children` is non-empty then.
|
|
1734
|
+
if (tok.value.trim() === "" && children.length === 0) {
|
|
1735
|
+
i++;
|
|
1736
|
+
continue;
|
|
1737
|
+
}
|
|
1663
1738
|
children.push(makeRun(tok.value, ctx));
|
|
1664
1739
|
i++;
|
|
1665
1740
|
continue;
|
|
@@ -2064,7 +2139,7 @@ function mapCssBorderStyle(cssStyle) {
|
|
|
2064
2139
|
// Image content builder
|
|
2065
2140
|
// =============================================================================
|
|
2066
2141
|
/** Build InlineImageContent from img attributes or return undefined if not applicable. */
|
|
2067
|
-
function buildImageContent(attrs) {
|
|
2142
|
+
function buildImageContent(attrs, ctx) {
|
|
2068
2143
|
const src = attrs["src"] || "";
|
|
2069
2144
|
const alt = attrs["alt"] || "";
|
|
2070
2145
|
// Parse width/height from attributes first, then fall back to style
|
|
@@ -2083,11 +2158,36 @@ function buildImageContent(attrs) {
|
|
|
2083
2158
|
// Convert pixels to EMU
|
|
2084
2159
|
const widthEmu = (width || 100) * EMU_PER_PX;
|
|
2085
2160
|
const heightEmu = (height || 100) * EMU_PER_PX;
|
|
2086
|
-
//
|
|
2087
|
-
//
|
|
2088
|
-
//
|
|
2089
|
-
|
|
2090
|
-
|
|
2161
|
+
// base64 data: URLs can be decoded and embedded as a real media file when
|
|
2162
|
+
// an image sink is provided (htmlToDocx path). The decoded bytes are
|
|
2163
|
+
// registered as an ImageDef and the run references the assigned rId.
|
|
2164
|
+
if (src.startsWith("data:") && ctx?.imageSink) {
|
|
2165
|
+
const decoded = decodeDataUrlImage(src);
|
|
2166
|
+
if (decoded) {
|
|
2167
|
+
const sink = ctx.imageSink;
|
|
2168
|
+
const index = sink.length;
|
|
2169
|
+
const rId = `htmlImg${index}`;
|
|
2170
|
+
const ext = decoded.mediaType === "jpeg" ? "jpg" : decoded.mediaType;
|
|
2171
|
+
sink.push({
|
|
2172
|
+
data: decoded.data,
|
|
2173
|
+
mediaType: decoded.mediaType,
|
|
2174
|
+
fileName: `image_html_${index}.${ext}`,
|
|
2175
|
+
rId
|
|
2176
|
+
});
|
|
2177
|
+
return {
|
|
2178
|
+
type: "image",
|
|
2179
|
+
rId,
|
|
2180
|
+
width: widthEmu,
|
|
2181
|
+
height: heightEmu,
|
|
2182
|
+
altText: alt || undefined,
|
|
2183
|
+
name: alt || `image${index}`
|
|
2184
|
+
};
|
|
2185
|
+
}
|
|
2186
|
+
}
|
|
2187
|
+
// No sink (htmlToDocxBody only returns BodyContent[] and cannot register
|
|
2188
|
+
// media) or an unsupported/remote source: emit a placeholder with an empty
|
|
2189
|
+
// rId. The renderer treats an empty rId as a placeholder; the original src
|
|
2190
|
+
// is surfaced in the alt text so callers can post-process if needed.
|
|
2091
2191
|
if (src.startsWith("data:") || src.startsWith("http://") || src.startsWith("https://")) {
|
|
2092
2192
|
return {
|
|
2093
2193
|
type: "image",
|
|
@@ -2100,6 +2200,54 @@ function buildImageContent(attrs) {
|
|
|
2100
2200
|
}
|
|
2101
2201
|
return undefined;
|
|
2102
2202
|
}
|
|
2203
|
+
/** Decode a `data:image/...;base64,...` URL into bytes + media type. */
|
|
2204
|
+
function decodeDataUrlImage(src) {
|
|
2205
|
+
// data:image/png;base64,XXXX
|
|
2206
|
+
const match = /^data:image\/([a-z0-9.+-]+)\s*;\s*base64\s*,(.*)$/is.exec(src);
|
|
2207
|
+
if (!match) {
|
|
2208
|
+
return undefined;
|
|
2209
|
+
}
|
|
2210
|
+
const rawType = match[1].toLowerCase();
|
|
2211
|
+
const b64 = match[2].replace(/\s+/g, "");
|
|
2212
|
+
const mediaType = normalizeImageMediaType(rawType);
|
|
2213
|
+
if (!mediaType) {
|
|
2214
|
+
return undefined;
|
|
2215
|
+
}
|
|
2216
|
+
try {
|
|
2217
|
+
const data = base64ToUint8Array(b64);
|
|
2218
|
+
if (data.length === 0) {
|
|
2219
|
+
return undefined;
|
|
2220
|
+
}
|
|
2221
|
+
return { data, mediaType };
|
|
2222
|
+
}
|
|
2223
|
+
catch {
|
|
2224
|
+
return undefined;
|
|
2225
|
+
}
|
|
2226
|
+
}
|
|
2227
|
+
/** Map a data-URL image subtype to a supported ImageMediaType. */
|
|
2228
|
+
function normalizeImageMediaType(subtype) {
|
|
2229
|
+
switch (subtype) {
|
|
2230
|
+
case "png":
|
|
2231
|
+
return "png";
|
|
2232
|
+
case "jpeg":
|
|
2233
|
+
case "jpg":
|
|
2234
|
+
return "jpeg";
|
|
2235
|
+
case "gif":
|
|
2236
|
+
return "gif";
|
|
2237
|
+
case "bmp":
|
|
2238
|
+
return "bmp";
|
|
2239
|
+
case "tiff":
|
|
2240
|
+
case "tif":
|
|
2241
|
+
return "tiff";
|
|
2242
|
+
case "svg+xml":
|
|
2243
|
+
case "svg":
|
|
2244
|
+
return "svg";
|
|
2245
|
+
case "webp":
|
|
2246
|
+
return "webp";
|
|
2247
|
+
default:
|
|
2248
|
+
return undefined;
|
|
2249
|
+
}
|
|
2250
|
+
}
|
|
2103
2251
|
/** Parse an image dimension from HTML attribute value (number or "Npx"). */
|
|
2104
2252
|
function parseImageDimension(value) {
|
|
2105
2253
|
if (!value) {
|
|
@@ -2228,6 +2376,11 @@ function resolveEffectiveStyle(attrs, classStyles) {
|
|
|
2228
2376
|
// Run builder
|
|
2229
2377
|
// =============================================================================
|
|
2230
2378
|
function makeRun(text, ctx) {
|
|
2379
|
+
// HTML whitespace handling: outside <pre>/<code>, runs of whitespace
|
|
2380
|
+
// (including the newlines/indentation from source-code line wrapping)
|
|
2381
|
+
// collapse to a single space. Inside <pre>/<code> whitespace is
|
|
2382
|
+
// significant and preserved verbatim.
|
|
2383
|
+
const value = ctx.code ? text : text.replace(/\s+/g, " ");
|
|
2231
2384
|
const props = {};
|
|
2232
2385
|
if (ctx.bold) {
|
|
2233
2386
|
props.bold = true;
|
|
@@ -2264,7 +2417,7 @@ function makeRun(text, ctx) {
|
|
|
2264
2417
|
}
|
|
2265
2418
|
const run = {
|
|
2266
2419
|
...(Object.keys(props).length > 0 ? { properties: props } : {}),
|
|
2267
|
-
content: [{ type: "text", text }]
|
|
2420
|
+
content: [{ type: "text", text: value }]
|
|
2268
2421
|
};
|
|
2269
2422
|
return run;
|
|
2270
2423
|
}
|
|
@@ -11,5 +11,5 @@
|
|
|
11
11
|
*/
|
|
12
12
|
export { renderToHtml } from "./html-renderer.js";
|
|
13
13
|
export type { HtmlRenderOptions, HtmlRenderResult } from "./html-renderer.js";
|
|
14
|
-
export { htmlToDocxBody } from "./html-import.js";
|
|
15
|
-
export type { HtmlImportOptions } from "./html-import.js";
|
|
14
|
+
export { htmlToDocxBody, htmlToDocx } from "./html-import.js";
|
|
15
|
+
export type { HtmlImportOptions, HtmlToDocxResult } from "./html-import.js";
|
|
@@ -12,4 +12,4 @@
|
|
|
12
12
|
// HTML → render (DocxDocument → HTML output)
|
|
13
13
|
export { renderToHtml } from "./html-renderer.js";
|
|
14
14
|
// HTML → DOCX import (HTML string → BodyContent[])
|
|
15
|
-
export { htmlToDocxBody } from "./html-import.js";
|
|
15
|
+
export { htmlToDocxBody, htmlToDocx } from "./html-import.js";
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
*
|
|
17
17
|
* @stability experimental
|
|
18
18
|
*/
|
|
19
|
-
import type { BodyContent, DocxDocument } from "../../types.js";
|
|
19
|
+
import type { AbstractNumbering, BodyContent, DocxDocument, FootnoteDef, ImageDef, ImageMediaType, NumberingInstance } from "../../types.js";
|
|
20
20
|
/** Options for Markdown to DOCX conversion. */
|
|
21
21
|
export interface MarkdownImportOptions {
|
|
22
22
|
/** Default font family for body text. */
|
|
@@ -33,36 +33,66 @@ export interface MarkdownImportOptions {
|
|
|
33
33
|
/** Resolved image data for embedding. */
|
|
34
34
|
export interface MarkdownImageData {
|
|
35
35
|
readonly data: Uint8Array;
|
|
36
|
-
readonly mediaType:
|
|
36
|
+
readonly mediaType: ImageMediaType;
|
|
37
37
|
readonly width?: number;
|
|
38
38
|
readonly height?: number;
|
|
39
|
+
/**
|
|
40
|
+
* Raster (PNG) fallback for vector images. Required by Word for `svg`
|
|
41
|
+
* images so non-SVG-aware viewers have something to display. When the
|
|
42
|
+
* media type is `svg` and this is omitted, the packager synthesizes a
|
|
43
|
+
* transparent placeholder PNG automatically.
|
|
44
|
+
*/
|
|
45
|
+
readonly fallbackData?: Uint8Array;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Result of {@link markdownToDocxBody} — the parsed body content plus the
|
|
49
|
+
* supporting document-level definitions it references.
|
|
50
|
+
*
|
|
51
|
+
* Lists, footnotes and images are *not* self-contained: a list paragraph
|
|
52
|
+
* references a numbering id, a footnote reference run references a
|
|
53
|
+
* `FootnoteDef`, and an inline image references an `ImageDef`. Splicing the
|
|
54
|
+
* `body` alone into a host document that lacks these definitions yields
|
|
55
|
+
* invalid OOXML. Merge the relevant arrays into the host document (or its
|
|
56
|
+
* builder state) alongside the body.
|
|
57
|
+
*/
|
|
58
|
+
export interface MarkdownBodyResult {
|
|
59
|
+
readonly body: BodyContent[];
|
|
60
|
+
readonly abstractNumberings: AbstractNumbering[];
|
|
61
|
+
readonly numberingInstances: NumberingInstance[];
|
|
62
|
+
readonly footnotes: FootnoteDef[];
|
|
63
|
+
readonly images: ImageDef[];
|
|
39
64
|
}
|
|
40
65
|
/**
|
|
41
66
|
* Convert a Markdown string into a complete DocxDocument.
|
|
42
67
|
*
|
|
68
|
+
* Supports the full GFM feature set including inline images (embedded via the
|
|
69
|
+
* `resolveImage` callback) and footnotes (`[^id]` references with `[^id]: …`
|
|
70
|
+
* definitions). Because image resolution and document packaging are inherently
|
|
71
|
+
* asynchronous, this function is async.
|
|
72
|
+
*
|
|
43
73
|
* @param markdown - The GFM Markdown string.
|
|
44
74
|
* @param options - Optional conversion settings.
|
|
45
|
-
* @returns A DocxDocument ready to be packaged.
|
|
75
|
+
* @returns A Promise resolving to a DocxDocument ready to be packaged.
|
|
46
76
|
*/
|
|
47
|
-
export declare function markdownToDocx(markdown: string, options?: MarkdownImportOptions): DocxDocument
|
|
77
|
+
export declare function markdownToDocx(markdown: string, options?: MarkdownImportOptions): Promise<DocxDocument>;
|
|
48
78
|
/**
|
|
49
|
-
* Convert a Markdown string into
|
|
79
|
+
* Convert a Markdown string into DOCX body content plus the supporting
|
|
80
|
+
* document-level definitions it references.
|
|
50
81
|
*
|
|
51
|
-
* **Caveat — body content is not self-contained.**
|
|
52
|
-
*
|
|
53
|
-
*
|
|
54
|
-
*
|
|
55
|
-
* - **
|
|
56
|
-
* - **
|
|
82
|
+
* **Caveat — body content is not self-contained.** The returned `body` may
|
|
83
|
+
* reference:
|
|
84
|
+
* - **Numbering** (`abstractNumberings` / `numberingInstances`) — used by
|
|
85
|
+
* bullet / numbered / task lists.
|
|
86
|
+
* - **Footnotes** (`footnotes`) — referenced by footnote-reference runs.
|
|
87
|
+
* - **Images** (`images`) — referenced by inline image runs.
|
|
88
|
+
* - The named `Quote` / `CodeBlock` styles (for block quotes / code blocks).
|
|
57
89
|
*
|
|
58
|
-
*
|
|
59
|
-
*
|
|
60
|
-
*
|
|
61
|
-
* splicing, or use the higher-level {@link markdownToDocx} which returns a
|
|
62
|
-
* complete `DocxDocument` with the supporting definitions populated.
|
|
90
|
+
* Splice the relevant arrays into your host document alongside the body, or
|
|
91
|
+
* use the higher-level {@link markdownToDocx} which returns a complete
|
|
92
|
+
* `DocxDocument` with everything populated.
|
|
63
93
|
*
|
|
64
94
|
* @param markdown - The GFM Markdown string.
|
|
65
95
|
* @param options - Optional conversion settings.
|
|
66
|
-
* @returns
|
|
96
|
+
* @returns A Promise resolving to the body and its supporting definitions.
|
|
67
97
|
*/
|
|
68
|
-
export declare function markdownToDocxBody(markdown: string, options?: MarkdownImportOptions):
|
|
98
|
+
export declare function markdownToDocxBody(markdown: string, options?: MarkdownImportOptions): Promise<MarkdownBodyResult>;
|