@cj-tech-master/excelts 9.6.1 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/README.md +18 -3
  2. package/README_zh.md +18 -3
  3. package/dist/browser/modules/excel/cell.d.ts +4 -0
  4. package/dist/browser/modules/excel/note.js +5 -1
  5. package/dist/browser/modules/excel/row.js +35 -2
  6. package/dist/browser/modules/excel/stream/workbook-writer.browser.d.ts +8 -1
  7. package/dist/browser/modules/excel/stream/workbook-writer.browser.js +22 -2
  8. package/dist/browser/modules/excel/types.d.ts +81 -0
  9. package/dist/browser/modules/excel/utils/drawing-utils.d.ts +8 -0
  10. package/dist/browser/modules/excel/utils/drawing-utils.js +19 -2
  11. package/dist/browser/modules/excel/workbook.browser.d.ts +16 -0
  12. package/dist/browser/modules/excel/workbook.browser.js +32 -2
  13. package/dist/browser/modules/excel/worksheet.d.ts +31 -1
  14. package/dist/browser/modules/excel/worksheet.js +83 -0
  15. package/dist/browser/modules/excel/xlsx/xform/comment/vml-shape-xform.d.ts +7 -0
  16. package/dist/browser/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
  17. package/dist/browser/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
  18. package/dist/browser/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
  19. package/dist/browser/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
  20. package/dist/browser/modules/excel/xlsx/xform/drawing/blip-xform.d.ts +6 -0
  21. package/dist/browser/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
  22. package/dist/browser/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.d.ts +1 -0
  23. package/dist/browser/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
  24. package/dist/browser/modules/excel/xlsx/xform/drawing/pic-xform.d.ts +2 -0
  25. package/dist/browser/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
  26. package/dist/browser/modules/excel/xlsx/xform/drawing/shape-xform.d.ts +47 -0
  27. package/dist/browser/modules/excel/xlsx/xform/drawing/shape-xform.js +109 -0
  28. package/dist/browser/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
  29. package/dist/browser/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
  30. package/dist/browser/modules/pdf/builder/document-builder.js +22 -49
  31. package/dist/browser/modules/pdf/builder/pdf-editor.js +1 -1
  32. package/dist/browser/modules/pdf/core/pdf-stream.d.ts +28 -1
  33. package/dist/browser/modules/pdf/core/pdf-stream.js +38 -2
  34. package/dist/browser/modules/pdf/font/font-manager.d.ts +26 -0
  35. package/dist/browser/modules/pdf/font/font-manager.js +35 -18
  36. package/dist/browser/modules/pdf/render/page-renderer.d.ts +51 -3
  37. package/dist/browser/modules/pdf/render/page-renderer.js +111 -18
  38. package/dist/browser/modules/word/advanced/field-engine.js +45 -20
  39. package/dist/browser/modules/word/advanced/glossary.d.ts +10 -36
  40. package/dist/browser/modules/word/advanced/glossary.js +8 -9
  41. package/dist/browser/modules/word/advanced/math-convert.js +94 -12
  42. package/dist/browser/modules/word/advanced/ole-objects.d.ts +28 -0
  43. package/dist/browser/modules/word/advanced/ole-objects.js +122 -19
  44. package/dist/browser/modules/word/advanced/style-map.js +31 -10
  45. package/dist/browser/modules/word/builder/run-builders.d.ts +7 -1
  46. package/dist/browser/modules/word/builder/run-builders.js +7 -1
  47. package/dist/browser/modules/word/constants.d.ts +4 -0
  48. package/dist/browser/modules/word/constants.js +5 -1
  49. package/dist/browser/modules/word/convert/docx-to-semantic.d.ts +2 -1
  50. package/dist/browser/modules/word/convert/docx-to-semantic.js +135 -1
  51. package/dist/browser/modules/word/convert/html/html-import.d.ts +32 -1
  52. package/dist/browser/modules/word/convert/html/html-import.js +167 -14
  53. package/dist/browser/modules/word/convert/html/html.d.ts +2 -2
  54. package/dist/browser/modules/word/convert/html/html.js +1 -1
  55. package/dist/browser/modules/word/convert/markdown/markdown-import.d.ts +48 -18
  56. package/dist/browser/modules/word/convert/markdown/markdown-import.js +279 -69
  57. package/dist/browser/modules/word/convert/markdown/markdown.d.ts +1 -1
  58. package/dist/browser/modules/word/convert/odt/odt.js +407 -56
  59. package/dist/browser/modules/word/html.d.ts +2 -2
  60. package/dist/browser/modules/word/html.js +1 -1
  61. package/dist/browser/modules/word/index.base.d.ts +3 -3
  62. package/dist/browser/modules/word/index.base.js +1 -1
  63. package/dist/browser/modules/word/layout/layout-full.js +326 -19
  64. package/dist/browser/modules/word/layout/render-page.js +35 -8
  65. package/dist/browser/modules/word/markdown.d.ts +1 -1
  66. package/dist/browser/modules/word/query/compat.d.ts +10 -2
  67. package/dist/browser/modules/word/query/compat.js +29 -21
  68. package/dist/browser/modules/word/reader/docx-reader.js +105 -2
  69. package/dist/browser/modules/word/reader/math-parser.js +8 -2
  70. package/dist/browser/modules/word/security/cfb-reader.js +5 -5
  71. package/dist/browser/modules/word/types.d.ts +96 -1
  72. package/dist/browser/modules/word/writer/docx-packager.js +108 -2
  73. package/dist/browser/modules/word/writer/glossary-writer.d.ts +28 -0
  74. package/dist/browser/modules/word/writer/glossary-writer.js +121 -0
  75. package/dist/browser/modules/word/writer/header-footer-writer.js +105 -20
  76. package/dist/browser/modules/word/writer/math-writer.js +7 -2
  77. package/dist/browser/utils/font-metrics.d.ts +8 -0
  78. package/dist/browser/utils/font-metrics.js +43 -0
  79. package/dist/browser/utils/theme-colors.js +4 -1
  80. package/dist/cjs/modules/excel/note.js +5 -1
  81. package/dist/cjs/modules/excel/row.js +35 -2
  82. package/dist/cjs/modules/excel/stream/workbook-writer.browser.js +22 -2
  83. package/dist/cjs/modules/excel/utils/drawing-utils.js +19 -2
  84. package/dist/cjs/modules/excel/workbook.browser.js +31 -1
  85. package/dist/cjs/modules/excel/worksheet.js +83 -0
  86. package/dist/cjs/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
  87. package/dist/cjs/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
  88. package/dist/cjs/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
  89. package/dist/cjs/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
  90. package/dist/cjs/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
  91. package/dist/cjs/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
  92. package/dist/cjs/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
  93. package/dist/cjs/modules/excel/xlsx/xform/drawing/shape-xform.js +112 -0
  94. package/dist/cjs/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
  95. package/dist/cjs/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
  96. package/dist/cjs/modules/pdf/builder/document-builder.js +21 -48
  97. package/dist/cjs/modules/pdf/builder/pdf-editor.js +1 -1
  98. package/dist/cjs/modules/pdf/core/pdf-stream.js +38 -2
  99. package/dist/cjs/modules/pdf/font/font-manager.js +35 -18
  100. package/dist/cjs/modules/pdf/render/page-renderer.js +112 -18
  101. package/dist/cjs/modules/word/advanced/field-engine.js +45 -20
  102. package/dist/cjs/modules/word/advanced/glossary.js +8 -9
  103. package/dist/cjs/modules/word/advanced/math-convert.js +94 -12
  104. package/dist/cjs/modules/word/advanced/ole-objects.js +123 -19
  105. package/dist/cjs/modules/word/advanced/style-map.js +31 -10
  106. package/dist/cjs/modules/word/builder/run-builders.js +7 -1
  107. package/dist/cjs/modules/word/constants.js +5 -1
  108. package/dist/cjs/modules/word/convert/docx-to-semantic.js +135 -1
  109. package/dist/cjs/modules/word/convert/html/html-import.js +168 -14
  110. package/dist/cjs/modules/word/convert/html/html.js +2 -1
  111. package/dist/cjs/modules/word/convert/markdown/markdown-import.js +279 -69
  112. package/dist/cjs/modules/word/convert/odt/odt.js +407 -56
  113. package/dist/cjs/modules/word/html.js +2 -1
  114. package/dist/cjs/modules/word/index.base.js +4 -3
  115. package/dist/cjs/modules/word/layout/layout-full.js +325 -18
  116. package/dist/cjs/modules/word/layout/render-page.js +35 -8
  117. package/dist/cjs/modules/word/query/compat.js +29 -21
  118. package/dist/cjs/modules/word/reader/docx-reader.js +104 -1
  119. package/dist/cjs/modules/word/reader/math-parser.js +8 -2
  120. package/dist/cjs/modules/word/security/cfb-reader.js +5 -5
  121. package/dist/cjs/modules/word/writer/docx-packager.js +108 -2
  122. package/dist/cjs/modules/word/writer/glossary-writer.js +124 -0
  123. package/dist/cjs/modules/word/writer/header-footer-writer.js +105 -20
  124. package/dist/cjs/modules/word/writer/math-writer.js +7 -2
  125. package/dist/cjs/utils/font-metrics.js +44 -0
  126. package/dist/cjs/utils/theme-colors.js +4 -1
  127. package/dist/esm/modules/excel/note.js +5 -1
  128. package/dist/esm/modules/excel/row.js +35 -2
  129. package/dist/esm/modules/excel/stream/workbook-writer.browser.js +22 -2
  130. package/dist/esm/modules/excel/utils/drawing-utils.js +19 -2
  131. package/dist/esm/modules/excel/workbook.browser.js +32 -2
  132. package/dist/esm/modules/excel/worksheet.js +83 -0
  133. package/dist/esm/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
  134. package/dist/esm/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
  135. package/dist/esm/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
  136. package/dist/esm/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
  137. package/dist/esm/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
  138. package/dist/esm/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
  139. package/dist/esm/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
  140. package/dist/esm/modules/excel/xlsx/xform/drawing/shape-xform.js +109 -0
  141. package/dist/esm/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
  142. package/dist/esm/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
  143. package/dist/esm/modules/pdf/builder/document-builder.js +22 -49
  144. package/dist/esm/modules/pdf/builder/pdf-editor.js +1 -1
  145. package/dist/esm/modules/pdf/core/pdf-stream.js +38 -2
  146. package/dist/esm/modules/pdf/font/font-manager.js +35 -18
  147. package/dist/esm/modules/pdf/render/page-renderer.js +111 -18
  148. package/dist/esm/modules/word/advanced/field-engine.js +45 -20
  149. package/dist/esm/modules/word/advanced/glossary.js +8 -9
  150. package/dist/esm/modules/word/advanced/math-convert.js +94 -12
  151. package/dist/esm/modules/word/advanced/ole-objects.js +122 -19
  152. package/dist/esm/modules/word/advanced/style-map.js +31 -10
  153. package/dist/esm/modules/word/builder/run-builders.js +7 -1
  154. package/dist/esm/modules/word/constants.js +5 -1
  155. package/dist/esm/modules/word/convert/docx-to-semantic.js +135 -1
  156. package/dist/esm/modules/word/convert/html/html-import.js +167 -14
  157. package/dist/esm/modules/word/convert/html/html.js +1 -1
  158. package/dist/esm/modules/word/convert/markdown/markdown-import.js +279 -69
  159. package/dist/esm/modules/word/convert/odt/odt.js +407 -56
  160. package/dist/esm/modules/word/html.js +1 -1
  161. package/dist/esm/modules/word/index.base.js +1 -1
  162. package/dist/esm/modules/word/layout/layout-full.js +326 -19
  163. package/dist/esm/modules/word/layout/render-page.js +35 -8
  164. package/dist/esm/modules/word/query/compat.js +29 -21
  165. package/dist/esm/modules/word/reader/docx-reader.js +105 -2
  166. package/dist/esm/modules/word/reader/math-parser.js +8 -2
  167. package/dist/esm/modules/word/security/cfb-reader.js +5 -5
  168. package/dist/esm/modules/word/writer/docx-packager.js +108 -2
  169. package/dist/esm/modules/word/writer/glossary-writer.js +121 -0
  170. package/dist/esm/modules/word/writer/header-footer-writer.js +105 -20
  171. package/dist/esm/modules/word/writer/math-writer.js +7 -2
  172. package/dist/esm/utils/font-metrics.js +43 -0
  173. package/dist/esm/utils/theme-colors.js +4 -1
  174. package/dist/iife/excelts.iife.js +496 -59
  175. package/dist/iife/excelts.iife.js.map +1 -1
  176. package/dist/iife/excelts.iife.min.js +39 -39
  177. package/dist/types/modules/excel/cell.d.ts +4 -0
  178. package/dist/types/modules/excel/stream/workbook-writer.browser.d.ts +8 -1
  179. package/dist/types/modules/excel/types.d.ts +81 -0
  180. package/dist/types/modules/excel/utils/drawing-utils.d.ts +8 -0
  181. package/dist/types/modules/excel/workbook.browser.d.ts +16 -0
  182. package/dist/types/modules/excel/worksheet.d.ts +31 -1
  183. package/dist/types/modules/excel/xlsx/xform/comment/vml-shape-xform.d.ts +7 -0
  184. package/dist/types/modules/excel/xlsx/xform/drawing/blip-xform.d.ts +6 -0
  185. package/dist/types/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.d.ts +1 -0
  186. package/dist/types/modules/excel/xlsx/xform/drawing/pic-xform.d.ts +2 -0
  187. package/dist/types/modules/excel/xlsx/xform/drawing/shape-xform.d.ts +47 -0
  188. package/dist/types/modules/pdf/core/pdf-stream.d.ts +28 -1
  189. package/dist/types/modules/pdf/font/font-manager.d.ts +26 -0
  190. package/dist/types/modules/pdf/render/page-renderer.d.ts +51 -3
  191. package/dist/types/modules/word/advanced/glossary.d.ts +10 -36
  192. package/dist/types/modules/word/advanced/ole-objects.d.ts +28 -0
  193. package/dist/types/modules/word/builder/run-builders.d.ts +7 -1
  194. package/dist/types/modules/word/constants.d.ts +4 -0
  195. package/dist/types/modules/word/convert/docx-to-semantic.d.ts +2 -1
  196. package/dist/types/modules/word/convert/html/html-import.d.ts +32 -1
  197. package/dist/types/modules/word/convert/html/html.d.ts +2 -2
  198. package/dist/types/modules/word/convert/markdown/markdown-import.d.ts +48 -18
  199. package/dist/types/modules/word/convert/markdown/markdown.d.ts +1 -1
  200. package/dist/types/modules/word/html.d.ts +2 -2
  201. package/dist/types/modules/word/index.base.d.ts +3 -3
  202. package/dist/types/modules/word/markdown.d.ts +1 -1
  203. package/dist/types/modules/word/query/compat.d.ts +10 -2
  204. package/dist/types/modules/word/types.d.ts +96 -1
  205. package/dist/types/modules/word/writer/glossary-writer.d.ts +28 -0
  206. package/dist/types/utils/font-metrics.d.ts +8 -0
  207. package/package.json +3 -1
@@ -20,7 +20,7 @@
20
20
  * const buffer = await toBuffer(Document.build(h));
21
21
  * ```
22
22
  */
23
- import type { BodyContent } from "../../types.js";
23
+ import type { BodyContent, ImageDef } from "../../types.js";
24
24
  /** Options for HTML to DOCX conversion. */
25
25
  export interface HtmlImportOptions {
26
26
  /** Default font size in half-points (default: 24 = 12pt). */
@@ -48,3 +48,34 @@ export interface HtmlImportOptions {
48
48
  * @returns Array of BodyContent blocks.
49
49
  */
50
50
  export declare function htmlToDocxBody(html: string, options?: HtmlImportOptions): BodyContent[];
51
+ /** Result of {@link htmlToDocx}: body content plus the images it references. */
52
+ export interface HtmlToDocxResult {
53
+ /** Parsed body content blocks. */
54
+ readonly body: BodyContent[];
55
+ /**
56
+ * Images decoded from base64 `data:` URLs in the HTML, each with a unique
57
+ * rId already referenced by the matching image run in `body`. Merge these
58
+ * into the document model's `images` array so the pictures are embedded as
59
+ * real media in the package instead of dropped as placeholders.
60
+ */
61
+ readonly images: ImageDef[];
62
+ }
63
+ /**
64
+ * Convert an HTML string into DOCX body content **and** embedded images.
65
+ *
66
+ * Unlike {@link htmlToDocxBody}, this decodes base64 `data:` image URLs into
67
+ * real {@link ImageDef}s and assigns each a unique rId that the emitted image
68
+ * runs reference. Merge the returned `images` into your document model so the
69
+ * pictures are embedded rather than dropped as placeholders.
70
+ *
71
+ * @example
72
+ * ```ts
73
+ * const { body, images } = htmlToDocx(html);
74
+ * const doc = Document.create();
75
+ * for (const item of body) Document.addContent(doc, item);
76
+ * const built = Document.build(doc);
77
+ * const final = { ...built, images: [...(built.images ?? []), ...images] };
78
+ * const bytes = await toBuffer(final);
79
+ * ```
80
+ */
81
+ export declare function htmlToDocx(html: string, options?: HtmlImportOptions): HtmlToDocxResult;
@@ -20,6 +20,7 @@
20
20
  * const buffer = await toBuffer(Document.build(h));
21
21
  * ```
22
22
  */
23
+ import { base64ToUint8Array } from "../../../../utils/utils.browser.js";
23
24
  import { sanitizeUrl } from "../../core/internal-utils.js";
24
25
  import { EMU_PER_PX } from "../../units.js";
25
26
  /**
@@ -44,10 +45,7 @@ export function htmlToDocxBody(html, options) {
44
45
  const tokens = tokenize(html);
45
46
  // Extract <style> rules and merge with user-provided classStyles
46
47
  const extractedStyles = extractStyleRules(tokens);
47
- const classStyles = {
48
- ...extractedStyles,
49
- ...(options?.classStyles ?? {})
50
- };
48
+ const classStyles = mergeClassStyles(extractedStyles, options?.classStyles ?? {});
51
49
  // Seed the inline context with the caller-supplied defaults so plain text
52
50
  // runs actually carry the requested font/size. Without this the options
53
51
  // were effectively ignored.
@@ -61,6 +59,40 @@ export function htmlToDocxBody(html, options) {
61
59
  parseBlocks(tokens, 0, blocks, initialCtx, classStyles);
62
60
  return blocks;
63
61
  }
62
+ /**
63
+ * Convert an HTML string into DOCX body content **and** embedded images.
64
+ *
65
+ * Unlike {@link htmlToDocxBody}, this decodes base64 `data:` image URLs into
66
+ * real {@link ImageDef}s and assigns each a unique rId that the emitted image
67
+ * runs reference. Merge the returned `images` into your document model so the
68
+ * pictures are embedded rather than dropped as placeholders.
69
+ *
70
+ * @example
71
+ * ```ts
72
+ * const { body, images } = htmlToDocx(html);
73
+ * const doc = Document.create();
74
+ * for (const item of body) Document.addContent(doc, item);
75
+ * const built = Document.build(doc);
76
+ * const final = { ...built, images: [...(built.images ?? []), ...images] };
77
+ * const bytes = await toBuffer(final);
78
+ * ```
79
+ */
80
+ export function htmlToDocx(html, options) {
81
+ const blocks = [];
82
+ const tokens = tokenize(html);
83
+ const extractedStyles = extractStyleRules(tokens);
84
+ const classStyles = mergeClassStyles(extractedStyles, options?.classStyles ?? {});
85
+ const images = [];
86
+ const initialCtx = { imageSink: images };
87
+ if (options?.defaultFont) {
88
+ initialCtx.fontFamily = options.defaultFont;
89
+ }
90
+ if (options?.defaultFontSize !== undefined) {
91
+ initialCtx.fontSize = options.defaultFontSize;
92
+ }
93
+ parseBlocks(tokens, 0, blocks, initialCtx, classStyles);
94
+ return { body: blocks, images };
95
+ }
64
96
  function tokenize(html) {
65
97
  const tokens = [];
66
98
  // Strip HTML comments, doctype declarations and SGML processing
@@ -473,6 +505,22 @@ function extractStyleRules(tokens) {
473
505
  }
474
506
  return result;
475
507
  }
508
+ /**
509
+ * Merge two class→style maps. For classes present in both, the declarations
510
+ * are concatenated (extracted `<style>` rules first, caller-supplied overrides
511
+ * last) so the later source wins per CSS cascade while still preserving
512
+ * properties only declared by the other source. A plain `{ ...a, ...b }`
513
+ * would discard the extracted rule entirely whenever the caller supplies the
514
+ * same class name, silently dropping e.g. `font-style`/`color` from `<style>`.
515
+ */
516
+ function mergeClassStyles(extracted, overrides) {
517
+ const merged = { ...extracted };
518
+ for (const [name, style] of Object.entries(overrides)) {
519
+ const existing = merged[name];
520
+ merged[name] = existing ? `${existing}; ${style}` : style;
521
+ }
522
+ return merged;
523
+ }
476
524
  /**
477
525
  * Parse HTML-style attributes from the inside of a start tag, e.g.
478
526
  * `class="x" id='y' disabled href=foo`.
@@ -1133,6 +1181,17 @@ function parseBlocks(tokens, start, blocks, parentCtx, classStyles) {
1133
1181
  return i + 1; // consumed the close tag
1134
1182
  }
1135
1183
  if (tok.type === "text") {
1184
+ // In block context, text nodes that are pure inter-element whitespace
1185
+ // (the newlines/indentation between block tags in pretty-printed HTML)
1186
+ // carry no content and must be ignored — otherwise every gap between
1187
+ // <p>/<table>/<div> tags would emit a spurious empty paragraph (and
1188
+ // the contained newline would be rendered as a <w:br/> soft break).
1189
+ // Whitespace that sits between inline runs is preserved by the inline
1190
+ // parser, which handles it separately.
1191
+ if (tok.value.trim() === "") {
1192
+ i++;
1193
+ continue;
1194
+ }
1136
1195
  if (!pendingInline) {
1137
1196
  pendingInline = { runs: [], ctx: parentCtx };
1138
1197
  }
@@ -1461,7 +1520,7 @@ function parseInlineTag(tokens, idx, runs, ctx, classStyles) {
1461
1520
  runs.push({ content: [{ type: "break" }] });
1462
1521
  }
1463
1522
  else if (tag === "img") {
1464
- const imgContent = buildImageContent(tok.attrs);
1523
+ const imgContent = buildImageContent(tok.attrs, ctx);
1465
1524
  if (imgContent) {
1466
1525
  runs.push({ content: [imgContent] });
1467
1526
  }
@@ -1534,13 +1593,15 @@ function parseInlineTag(tokens, idx, runs, ctx, classStyles) {
1534
1593
  i++;
1535
1594
  }
1536
1595
  else if (t.type === "close") {
1596
+ // Mismatched close tag — close the hyperlink here but do NOT
1597
+ // consume the token; let the caller handle the block boundary.
1537
1598
  const hyperlink = {
1538
1599
  type: "hyperlink",
1539
1600
  url: safeHref ?? "",
1540
1601
  children: innerRuns
1541
1602
  };
1542
1603
  runs.push(hyperlink);
1543
- return i + 1;
1604
+ return i;
1544
1605
  }
1545
1606
  else {
1546
1607
  const childRuns = [];
@@ -1581,7 +1642,12 @@ function parseInlineTag(tokens, idx, runs, ctx, classStyles) {
1581
1642
  i++;
1582
1643
  }
1583
1644
  else if (t.type === "close") {
1584
- return i + 1;
1645
+ // Mismatched close tag (e.g. </p> while inside an unclosed <strong>).
1646
+ // Do NOT consume it — return the current index so the caller can
1647
+ // handle it. Consuming a block-level close here would swallow the
1648
+ // parent paragraph boundary and pull all following block content
1649
+ // into this run, breaking page breaks, tables, etc.
1650
+ return i;
1585
1651
  }
1586
1652
  else {
1587
1653
  i = parseInlineTag(tokens, i, runs, newCtx, classStyles);
@@ -1660,6 +1726,15 @@ function parseListItem(tokens, start, blocks, ctx, ordered, level, classStyles)
1660
1726
  }
1661
1727
  // Text content
1662
1728
  if (tok.type === "text") {
1729
+ // Skip structural whitespace: the indentation/newlines that sit between
1730
+ // a nested <ul>/<ol> and the closing </li> (or at the very start of the
1731
+ // item) are not real content. Emitting them as runs would otherwise
1732
+ // produce a spurious empty list-item paragraph. Whitespace *between*
1733
+ // real inline content is preserved because `children` is non-empty then.
1734
+ if (tok.value.trim() === "" && children.length === 0) {
1735
+ i++;
1736
+ continue;
1737
+ }
1663
1738
  children.push(makeRun(tok.value, ctx));
1664
1739
  i++;
1665
1740
  continue;
@@ -2064,7 +2139,7 @@ function mapCssBorderStyle(cssStyle) {
2064
2139
  // Image content builder
2065
2140
  // =============================================================================
2066
2141
  /** Build InlineImageContent from img attributes or return undefined if not applicable. */
2067
- function buildImageContent(attrs) {
2142
+ function buildImageContent(attrs, ctx) {
2068
2143
  const src = attrs["src"] || "";
2069
2144
  const alt = attrs["alt"] || "";
2070
2145
  // Parse width/height from attributes first, then fall back to style
@@ -2083,11 +2158,36 @@ function buildImageContent(attrs) {
2083
2158
  // Convert pixels to EMU
2084
2159
  const widthEmu = (width || 100) * EMU_PER_PX;
2085
2160
  const heightEmu = (height || 100) * EMU_PER_PX;
2086
- // Both data: and http(s) URLs become placeholders. The DOCX writer needs
2087
- // a real ImageDef registered in `doc.images` plus a corresponding
2088
- // relationship; htmlToDocxBody returns BodyContent[] only and cannot do
2089
- // that registration. We surface the original src in the alt text so the
2090
- // user can post-process if they need real embedded images.
2161
+ // base64 data: URLs can be decoded and embedded as a real media file when
2162
+ // an image sink is provided (htmlToDocx path). The decoded bytes are
2163
+ // registered as an ImageDef and the run references the assigned rId.
2164
+ if (src.startsWith("data:") && ctx?.imageSink) {
2165
+ const decoded = decodeDataUrlImage(src);
2166
+ if (decoded) {
2167
+ const sink = ctx.imageSink;
2168
+ const index = sink.length;
2169
+ const rId = `htmlImg${index}`;
2170
+ const ext = decoded.mediaType === "jpeg" ? "jpg" : decoded.mediaType;
2171
+ sink.push({
2172
+ data: decoded.data,
2173
+ mediaType: decoded.mediaType,
2174
+ fileName: `image_html_${index}.${ext}`,
2175
+ rId
2176
+ });
2177
+ return {
2178
+ type: "image",
2179
+ rId,
2180
+ width: widthEmu,
2181
+ height: heightEmu,
2182
+ altText: alt || undefined,
2183
+ name: alt || `image${index}`
2184
+ };
2185
+ }
2186
+ }
2187
+ // No sink (htmlToDocxBody only returns BodyContent[] and cannot register
2188
+ // media) or an unsupported/remote source: emit a placeholder with an empty
2189
+ // rId. The renderer treats an empty rId as a placeholder; the original src
2190
+ // is surfaced in the alt text so callers can post-process if needed.
2091
2191
  if (src.startsWith("data:") || src.startsWith("http://") || src.startsWith("https://")) {
2092
2192
  return {
2093
2193
  type: "image",
@@ -2100,6 +2200,54 @@ function buildImageContent(attrs) {
2100
2200
  }
2101
2201
  return undefined;
2102
2202
  }
2203
+ /** Decode a `data:image/...;base64,...` URL into bytes + media type. */
2204
+ function decodeDataUrlImage(src) {
2205
+ // data:image/png;base64,XXXX
2206
+ const match = /^data:image\/([a-z0-9.+-]+)\s*;\s*base64\s*,(.*)$/is.exec(src);
2207
+ if (!match) {
2208
+ return undefined;
2209
+ }
2210
+ const rawType = match[1].toLowerCase();
2211
+ const b64 = match[2].replace(/\s+/g, "");
2212
+ const mediaType = normalizeImageMediaType(rawType);
2213
+ if (!mediaType) {
2214
+ return undefined;
2215
+ }
2216
+ try {
2217
+ const data = base64ToUint8Array(b64);
2218
+ if (data.length === 0) {
2219
+ return undefined;
2220
+ }
2221
+ return { data, mediaType };
2222
+ }
2223
+ catch {
2224
+ return undefined;
2225
+ }
2226
+ }
2227
+ /** Map a data-URL image subtype to a supported ImageMediaType. */
2228
+ function normalizeImageMediaType(subtype) {
2229
+ switch (subtype) {
2230
+ case "png":
2231
+ return "png";
2232
+ case "jpeg":
2233
+ case "jpg":
2234
+ return "jpeg";
2235
+ case "gif":
2236
+ return "gif";
2237
+ case "bmp":
2238
+ return "bmp";
2239
+ case "tiff":
2240
+ case "tif":
2241
+ return "tiff";
2242
+ case "svg+xml":
2243
+ case "svg":
2244
+ return "svg";
2245
+ case "webp":
2246
+ return "webp";
2247
+ default:
2248
+ return undefined;
2249
+ }
2250
+ }
2103
2251
  /** Parse an image dimension from HTML attribute value (number or "Npx"). */
2104
2252
  function parseImageDimension(value) {
2105
2253
  if (!value) {
@@ -2228,6 +2376,11 @@ function resolveEffectiveStyle(attrs, classStyles) {
2228
2376
  // Run builder
2229
2377
  // =============================================================================
2230
2378
  function makeRun(text, ctx) {
2379
+ // HTML whitespace handling: outside <pre>/<code>, runs of whitespace
2380
+ // (including the newlines/indentation from source-code line wrapping)
2381
+ // collapse to a single space. Inside <pre>/<code> whitespace is
2382
+ // significant and preserved verbatim.
2383
+ const value = ctx.code ? text : text.replace(/\s+/g, " ");
2231
2384
  const props = {};
2232
2385
  if (ctx.bold) {
2233
2386
  props.bold = true;
@@ -2264,7 +2417,7 @@ function makeRun(text, ctx) {
2264
2417
  }
2265
2418
  const run = {
2266
2419
  ...(Object.keys(props).length > 0 ? { properties: props } : {}),
2267
- content: [{ type: "text", text }]
2420
+ content: [{ type: "text", text: value }]
2268
2421
  };
2269
2422
  return run;
2270
2423
  }
@@ -11,5 +11,5 @@
11
11
  */
12
12
  export { renderToHtml } from "./html-renderer.js";
13
13
  export type { HtmlRenderOptions, HtmlRenderResult } from "./html-renderer.js";
14
- export { htmlToDocxBody } from "./html-import.js";
15
- export type { HtmlImportOptions } from "./html-import.js";
14
+ export { htmlToDocxBody, htmlToDocx } from "./html-import.js";
15
+ export type { HtmlImportOptions, HtmlToDocxResult } from "./html-import.js";
@@ -12,4 +12,4 @@
12
12
  // HTML → render (DocxDocument → HTML output)
13
13
  export { renderToHtml } from "./html-renderer.js";
14
14
  // HTML → DOCX import (HTML string → BodyContent[])
15
- export { htmlToDocxBody } from "./html-import.js";
15
+ export { htmlToDocxBody, htmlToDocx } from "./html-import.js";
@@ -16,7 +16,7 @@
16
16
  *
17
17
  * @stability experimental
18
18
  */
19
- import type { BodyContent, DocxDocument } from "../../types.js";
19
+ import type { AbstractNumbering, BodyContent, DocxDocument, FootnoteDef, ImageDef, ImageMediaType, NumberingInstance } from "../../types.js";
20
20
  /** Options for Markdown to DOCX conversion. */
21
21
  export interface MarkdownImportOptions {
22
22
  /** Default font family for body text. */
@@ -33,36 +33,66 @@ export interface MarkdownImportOptions {
33
33
  /** Resolved image data for embedding. */
34
34
  export interface MarkdownImageData {
35
35
  readonly data: Uint8Array;
36
- readonly mediaType: "png" | "jpeg" | "gif" | "bmp" | "tiff" | "svg" | "webp";
36
+ readonly mediaType: ImageMediaType;
37
37
  readonly width?: number;
38
38
  readonly height?: number;
39
+ /**
40
+ * Raster (PNG) fallback for vector images. Required by Word for `svg`
41
+ * images so non-SVG-aware viewers have something to display. When the
42
+ * media type is `svg` and this is omitted, the packager synthesizes a
43
+ * transparent placeholder PNG automatically.
44
+ */
45
+ readonly fallbackData?: Uint8Array;
46
+ }
47
+ /**
48
+ * Result of {@link markdownToDocxBody} — the parsed body content plus the
49
+ * supporting document-level definitions it references.
50
+ *
51
+ * Lists, footnotes and images are *not* self-contained: a list paragraph
52
+ * references a numbering id, a footnote reference run references a
53
+ * `FootnoteDef`, and an inline image references an `ImageDef`. Splicing the
54
+ * `body` alone into a host document that lacks these definitions yields
55
+ * invalid OOXML. Merge the relevant arrays into the host document (or its
56
+ * builder state) alongside the body.
57
+ */
58
+ export interface MarkdownBodyResult {
59
+ readonly body: BodyContent[];
60
+ readonly abstractNumberings: AbstractNumbering[];
61
+ readonly numberingInstances: NumberingInstance[];
62
+ readonly footnotes: FootnoteDef[];
63
+ readonly images: ImageDef[];
39
64
  }
40
65
  /**
41
66
  * Convert a Markdown string into a complete DocxDocument.
42
67
  *
68
+ * Supports the full GFM feature set including inline images (embedded via the
69
+ * `resolveImage` callback) and footnotes (`[^id]` references with `[^id]: …`
70
+ * definitions). Because image resolution and document packaging are inherently
71
+ * asynchronous, this function is async.
72
+ *
43
73
  * @param markdown - The GFM Markdown string.
44
74
  * @param options - Optional conversion settings.
45
- * @returns A DocxDocument ready to be packaged.
75
+ * @returns A Promise resolving to a DocxDocument ready to be packaged.
46
76
  */
47
- export declare function markdownToDocx(markdown: string, options?: MarkdownImportOptions): DocxDocument;
77
+ export declare function markdownToDocx(markdown: string, options?: MarkdownImportOptions): Promise<DocxDocument>;
48
78
  /**
49
- * Convert a Markdown string into an array of DOCX body content blocks.
79
+ * Convert a Markdown string into DOCX body content plus the supporting
80
+ * document-level definitions it references.
50
81
  *
51
- * **Caveat — body content is not self-contained.**
52
- * - **Lists** (bullet / numbered / task) reference numbering ids that
53
- * live in document-level `abstractNumberings` + `numberingInstances`,
54
- * which this helper does NOT return.
55
- * - **Block quotes** reference the named `Quote` style.
56
- * - **Code blocks** reference the named code styles.
82
+ * **Caveat — body content is not self-contained.** The returned `body` may
83
+ * reference:
84
+ * - **Numbering** (`abstractNumberings` / `numberingInstances`) — used by
85
+ * bullet / numbered / task lists.
86
+ * - **Footnotes** (`footnotes`) referenced by footnote-reference runs.
87
+ * - **Images** (`images`) referenced by inline image runs.
88
+ * - The named `Quote` / `CodeBlock` styles (for block quotes / code blocks).
57
89
  *
58
- * Splicing markdown that uses any of these constructs into a document that
59
- * lacks the matching numbering / styles yields invalid OOXML. Either keep
60
- * the input flat (paragraphs + headings + inline formatting) before
61
- * splicing, or use the higher-level {@link markdownToDocx} which returns a
62
- * complete `DocxDocument` with the supporting definitions populated.
90
+ * Splice the relevant arrays into your host document alongside the body, or
91
+ * use the higher-level {@link markdownToDocx} which returns a complete
92
+ * `DocxDocument` with everything populated.
63
93
  *
64
94
  * @param markdown - The GFM Markdown string.
65
95
  * @param options - Optional conversion settings.
66
- * @returns Array of BodyContent blocks (no numbering / styles attached).
96
+ * @returns A Promise resolving to the body and its supporting definitions.
67
97
  */
68
- export declare function markdownToDocxBody(markdown: string, options?: MarkdownImportOptions): BodyContent[];
98
+ export declare function markdownToDocxBody(markdown: string, options?: MarkdownImportOptions): Promise<MarkdownBodyResult>;