@cj-tech-master/excelts 9.6.1 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/README.md +18 -3
  2. package/README_zh.md +18 -3
  3. package/dist/browser/modules/excel/cell.d.ts +4 -0
  4. package/dist/browser/modules/excel/note.js +5 -1
  5. package/dist/browser/modules/excel/row.js +35 -2
  6. package/dist/browser/modules/excel/stream/workbook-writer.browser.d.ts +8 -1
  7. package/dist/browser/modules/excel/stream/workbook-writer.browser.js +22 -2
  8. package/dist/browser/modules/excel/types.d.ts +81 -0
  9. package/dist/browser/modules/excel/utils/drawing-utils.d.ts +8 -0
  10. package/dist/browser/modules/excel/utils/drawing-utils.js +19 -2
  11. package/dist/browser/modules/excel/workbook.browser.d.ts +16 -0
  12. package/dist/browser/modules/excel/workbook.browser.js +32 -2
  13. package/dist/browser/modules/excel/worksheet.d.ts +31 -1
  14. package/dist/browser/modules/excel/worksheet.js +83 -0
  15. package/dist/browser/modules/excel/xlsx/xform/comment/vml-shape-xform.d.ts +7 -0
  16. package/dist/browser/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
  17. package/dist/browser/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
  18. package/dist/browser/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
  19. package/dist/browser/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
  20. package/dist/browser/modules/excel/xlsx/xform/drawing/blip-xform.d.ts +6 -0
  21. package/dist/browser/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
  22. package/dist/browser/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.d.ts +1 -0
  23. package/dist/browser/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
  24. package/dist/browser/modules/excel/xlsx/xform/drawing/pic-xform.d.ts +2 -0
  25. package/dist/browser/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
  26. package/dist/browser/modules/excel/xlsx/xform/drawing/shape-xform.d.ts +47 -0
  27. package/dist/browser/modules/excel/xlsx/xform/drawing/shape-xform.js +109 -0
  28. package/dist/browser/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
  29. package/dist/browser/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
  30. package/dist/browser/modules/pdf/builder/document-builder.js +22 -49
  31. package/dist/browser/modules/pdf/builder/pdf-editor.js +1 -1
  32. package/dist/browser/modules/pdf/core/pdf-stream.d.ts +28 -1
  33. package/dist/browser/modules/pdf/core/pdf-stream.js +38 -2
  34. package/dist/browser/modules/pdf/font/font-manager.d.ts +26 -0
  35. package/dist/browser/modules/pdf/font/font-manager.js +35 -18
  36. package/dist/browser/modules/pdf/render/page-renderer.d.ts +51 -3
  37. package/dist/browser/modules/pdf/render/page-renderer.js +111 -18
  38. package/dist/browser/modules/word/advanced/field-engine.js +45 -20
  39. package/dist/browser/modules/word/advanced/glossary.d.ts +10 -36
  40. package/dist/browser/modules/word/advanced/glossary.js +8 -9
  41. package/dist/browser/modules/word/advanced/math-convert.js +94 -12
  42. package/dist/browser/modules/word/advanced/ole-objects.d.ts +28 -0
  43. package/dist/browser/modules/word/advanced/ole-objects.js +122 -19
  44. package/dist/browser/modules/word/advanced/style-map.js +31 -10
  45. package/dist/browser/modules/word/builder/run-builders.d.ts +7 -1
  46. package/dist/browser/modules/word/builder/run-builders.js +7 -1
  47. package/dist/browser/modules/word/constants.d.ts +4 -0
  48. package/dist/browser/modules/word/constants.js +5 -1
  49. package/dist/browser/modules/word/convert/docx-to-semantic.d.ts +2 -1
  50. package/dist/browser/modules/word/convert/docx-to-semantic.js +135 -1
  51. package/dist/browser/modules/word/convert/html/html-import.d.ts +32 -1
  52. package/dist/browser/modules/word/convert/html/html-import.js +167 -14
  53. package/dist/browser/modules/word/convert/html/html.d.ts +2 -2
  54. package/dist/browser/modules/word/convert/html/html.js +1 -1
  55. package/dist/browser/modules/word/convert/markdown/markdown-import.d.ts +48 -18
  56. package/dist/browser/modules/word/convert/markdown/markdown-import.js +279 -69
  57. package/dist/browser/modules/word/convert/markdown/markdown.d.ts +1 -1
  58. package/dist/browser/modules/word/convert/odt/odt.js +407 -56
  59. package/dist/browser/modules/word/html.d.ts +2 -2
  60. package/dist/browser/modules/word/html.js +1 -1
  61. package/dist/browser/modules/word/index.base.d.ts +3 -3
  62. package/dist/browser/modules/word/index.base.js +1 -1
  63. package/dist/browser/modules/word/layout/layout-full.js +326 -19
  64. package/dist/browser/modules/word/layout/render-page.js +35 -8
  65. package/dist/browser/modules/word/markdown.d.ts +1 -1
  66. package/dist/browser/modules/word/query/compat.d.ts +10 -2
  67. package/dist/browser/modules/word/query/compat.js +29 -21
  68. package/dist/browser/modules/word/reader/docx-reader.js +105 -2
  69. package/dist/browser/modules/word/reader/math-parser.js +8 -2
  70. package/dist/browser/modules/word/security/cfb-reader.js +5 -5
  71. package/dist/browser/modules/word/types.d.ts +96 -1
  72. package/dist/browser/modules/word/writer/docx-packager.js +108 -2
  73. package/dist/browser/modules/word/writer/glossary-writer.d.ts +28 -0
  74. package/dist/browser/modules/word/writer/glossary-writer.js +121 -0
  75. package/dist/browser/modules/word/writer/header-footer-writer.js +105 -20
  76. package/dist/browser/modules/word/writer/math-writer.js +7 -2
  77. package/dist/browser/utils/font-metrics.d.ts +8 -0
  78. package/dist/browser/utils/font-metrics.js +43 -0
  79. package/dist/browser/utils/theme-colors.js +4 -1
  80. package/dist/cjs/modules/excel/note.js +5 -1
  81. package/dist/cjs/modules/excel/row.js +35 -2
  82. package/dist/cjs/modules/excel/stream/workbook-writer.browser.js +22 -2
  83. package/dist/cjs/modules/excel/utils/drawing-utils.js +19 -2
  84. package/dist/cjs/modules/excel/workbook.browser.js +31 -1
  85. package/dist/cjs/modules/excel/worksheet.js +83 -0
  86. package/dist/cjs/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
  87. package/dist/cjs/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
  88. package/dist/cjs/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
  89. package/dist/cjs/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
  90. package/dist/cjs/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
  91. package/dist/cjs/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
  92. package/dist/cjs/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
  93. package/dist/cjs/modules/excel/xlsx/xform/drawing/shape-xform.js +112 -0
  94. package/dist/cjs/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
  95. package/dist/cjs/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
  96. package/dist/cjs/modules/pdf/builder/document-builder.js +21 -48
  97. package/dist/cjs/modules/pdf/builder/pdf-editor.js +1 -1
  98. package/dist/cjs/modules/pdf/core/pdf-stream.js +38 -2
  99. package/dist/cjs/modules/pdf/font/font-manager.js +35 -18
  100. package/dist/cjs/modules/pdf/render/page-renderer.js +112 -18
  101. package/dist/cjs/modules/word/advanced/field-engine.js +45 -20
  102. package/dist/cjs/modules/word/advanced/glossary.js +8 -9
  103. package/dist/cjs/modules/word/advanced/math-convert.js +94 -12
  104. package/dist/cjs/modules/word/advanced/ole-objects.js +123 -19
  105. package/dist/cjs/modules/word/advanced/style-map.js +31 -10
  106. package/dist/cjs/modules/word/builder/run-builders.js +7 -1
  107. package/dist/cjs/modules/word/constants.js +5 -1
  108. package/dist/cjs/modules/word/convert/docx-to-semantic.js +135 -1
  109. package/dist/cjs/modules/word/convert/html/html-import.js +168 -14
  110. package/dist/cjs/modules/word/convert/html/html.js +2 -1
  111. package/dist/cjs/modules/word/convert/markdown/markdown-import.js +279 -69
  112. package/dist/cjs/modules/word/convert/odt/odt.js +407 -56
  113. package/dist/cjs/modules/word/html.js +2 -1
  114. package/dist/cjs/modules/word/index.base.js +4 -3
  115. package/dist/cjs/modules/word/layout/layout-full.js +325 -18
  116. package/dist/cjs/modules/word/layout/render-page.js +35 -8
  117. package/dist/cjs/modules/word/query/compat.js +29 -21
  118. package/dist/cjs/modules/word/reader/docx-reader.js +104 -1
  119. package/dist/cjs/modules/word/reader/math-parser.js +8 -2
  120. package/dist/cjs/modules/word/security/cfb-reader.js +5 -5
  121. package/dist/cjs/modules/word/writer/docx-packager.js +108 -2
  122. package/dist/cjs/modules/word/writer/glossary-writer.js +124 -0
  123. package/dist/cjs/modules/word/writer/header-footer-writer.js +105 -20
  124. package/dist/cjs/modules/word/writer/math-writer.js +7 -2
  125. package/dist/cjs/utils/font-metrics.js +44 -0
  126. package/dist/cjs/utils/theme-colors.js +4 -1
  127. package/dist/esm/modules/excel/note.js +5 -1
  128. package/dist/esm/modules/excel/row.js +35 -2
  129. package/dist/esm/modules/excel/stream/workbook-writer.browser.js +22 -2
  130. package/dist/esm/modules/excel/utils/drawing-utils.js +19 -2
  131. package/dist/esm/modules/excel/workbook.browser.js +32 -2
  132. package/dist/esm/modules/excel/worksheet.js +83 -0
  133. package/dist/esm/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
  134. package/dist/esm/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
  135. package/dist/esm/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
  136. package/dist/esm/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
  137. package/dist/esm/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
  138. package/dist/esm/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
  139. package/dist/esm/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
  140. package/dist/esm/modules/excel/xlsx/xform/drawing/shape-xform.js +109 -0
  141. package/dist/esm/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
  142. package/dist/esm/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
  143. package/dist/esm/modules/pdf/builder/document-builder.js +22 -49
  144. package/dist/esm/modules/pdf/builder/pdf-editor.js +1 -1
  145. package/dist/esm/modules/pdf/core/pdf-stream.js +38 -2
  146. package/dist/esm/modules/pdf/font/font-manager.js +35 -18
  147. package/dist/esm/modules/pdf/render/page-renderer.js +111 -18
  148. package/dist/esm/modules/word/advanced/field-engine.js +45 -20
  149. package/dist/esm/modules/word/advanced/glossary.js +8 -9
  150. package/dist/esm/modules/word/advanced/math-convert.js +94 -12
  151. package/dist/esm/modules/word/advanced/ole-objects.js +122 -19
  152. package/dist/esm/modules/word/advanced/style-map.js +31 -10
  153. package/dist/esm/modules/word/builder/run-builders.js +7 -1
  154. package/dist/esm/modules/word/constants.js +5 -1
  155. package/dist/esm/modules/word/convert/docx-to-semantic.js +135 -1
  156. package/dist/esm/modules/word/convert/html/html-import.js +167 -14
  157. package/dist/esm/modules/word/convert/html/html.js +1 -1
  158. package/dist/esm/modules/word/convert/markdown/markdown-import.js +279 -69
  159. package/dist/esm/modules/word/convert/odt/odt.js +407 -56
  160. package/dist/esm/modules/word/html.js +1 -1
  161. package/dist/esm/modules/word/index.base.js +1 -1
  162. package/dist/esm/modules/word/layout/layout-full.js +326 -19
  163. package/dist/esm/modules/word/layout/render-page.js +35 -8
  164. package/dist/esm/modules/word/query/compat.js +29 -21
  165. package/dist/esm/modules/word/reader/docx-reader.js +105 -2
  166. package/dist/esm/modules/word/reader/math-parser.js +8 -2
  167. package/dist/esm/modules/word/security/cfb-reader.js +5 -5
  168. package/dist/esm/modules/word/writer/docx-packager.js +108 -2
  169. package/dist/esm/modules/word/writer/glossary-writer.js +121 -0
  170. package/dist/esm/modules/word/writer/header-footer-writer.js +105 -20
  171. package/dist/esm/modules/word/writer/math-writer.js +7 -2
  172. package/dist/esm/utils/font-metrics.js +43 -0
  173. package/dist/esm/utils/theme-colors.js +4 -1
  174. package/dist/iife/excelts.iife.js +496 -59
  175. package/dist/iife/excelts.iife.js.map +1 -1
  176. package/dist/iife/excelts.iife.min.js +39 -39
  177. package/dist/types/modules/excel/cell.d.ts +4 -0
  178. package/dist/types/modules/excel/stream/workbook-writer.browser.d.ts +8 -1
  179. package/dist/types/modules/excel/types.d.ts +81 -0
  180. package/dist/types/modules/excel/utils/drawing-utils.d.ts +8 -0
  181. package/dist/types/modules/excel/workbook.browser.d.ts +16 -0
  182. package/dist/types/modules/excel/worksheet.d.ts +31 -1
  183. package/dist/types/modules/excel/xlsx/xform/comment/vml-shape-xform.d.ts +7 -0
  184. package/dist/types/modules/excel/xlsx/xform/drawing/blip-xform.d.ts +6 -0
  185. package/dist/types/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.d.ts +1 -0
  186. package/dist/types/modules/excel/xlsx/xform/drawing/pic-xform.d.ts +2 -0
  187. package/dist/types/modules/excel/xlsx/xform/drawing/shape-xform.d.ts +47 -0
  188. package/dist/types/modules/pdf/core/pdf-stream.d.ts +28 -1
  189. package/dist/types/modules/pdf/font/font-manager.d.ts +26 -0
  190. package/dist/types/modules/pdf/render/page-renderer.d.ts +51 -3
  191. package/dist/types/modules/word/advanced/glossary.d.ts +10 -36
  192. package/dist/types/modules/word/advanced/ole-objects.d.ts +28 -0
  193. package/dist/types/modules/word/builder/run-builders.d.ts +7 -1
  194. package/dist/types/modules/word/constants.d.ts +4 -0
  195. package/dist/types/modules/word/convert/docx-to-semantic.d.ts +2 -1
  196. package/dist/types/modules/word/convert/html/html-import.d.ts +32 -1
  197. package/dist/types/modules/word/convert/html/html.d.ts +2 -2
  198. package/dist/types/modules/word/convert/markdown/markdown-import.d.ts +48 -18
  199. package/dist/types/modules/word/convert/markdown/markdown.d.ts +1 -1
  200. package/dist/types/modules/word/html.d.ts +2 -2
  201. package/dist/types/modules/word/index.base.d.ts +3 -3
  202. package/dist/types/modules/word/markdown.d.ts +1 -1
  203. package/dist/types/modules/word/query/compat.d.ts +10 -2
  204. package/dist/types/modules/word/types.d.ts +96 -1
  205. package/dist/types/modules/word/writer/glossary-writer.d.ts +28 -0
  206. package/dist/types/utils/font-metrics.d.ts +8 -0
  207. package/package.json +3 -1
@@ -79,20 +79,98 @@ function convertNodeToMathML(node) {
79
79
  }
80
80
  }
81
81
  function convertMathRunToMathML(node) {
82
- const text = xmlEncode(node.text);
83
- // Operators and special characters
84
- if (isOperator(node.text)) {
85
- return `<mo>${text}</mo>`;
82
+ const raw = node.text;
83
+ // A single OMML run can hold a mix of letters, digits, operators and
84
+ // whitespace (e.g. "a + b = "). MathML presentation markup expects each
85
+ // token to be wrapped in the element matching its semantic category:
86
+ // <mi> for identifiers, <mn> for numbers, <mo> for operators and <mtext>
87
+ // for runs of whitespace / other text. Tokenize the run and emit one
88
+ // element per token so a mixed run is no longer flattened into a single
89
+ // (incorrect) <mi>.
90
+ const normalVariant = node.properties?.italic === false;
91
+ const tokens = tokenizeMathRun(raw);
92
+ // Fast path / backwards-compatibility: a run that is a single token keeps
93
+ // producing exactly one element (e.g. "x" -> <mi>x</mi>, "42" -> <mn>42</mn>,
94
+ // "+" -> <mo>+</mo>), matching the historical output.
95
+ return tokens.map(tok => emitMathToken(tok, normalVariant)).join("");
96
+ }
97
+ function classifyMathChar(ch) {
98
+ if (/\s/.test(ch)) {
99
+ return "text";
100
+ }
101
+ if (isOperator(ch)) {
102
+ return "operator";
103
+ }
104
+ if (/[0-9.]/.test(ch)) {
105
+ return "number";
106
+ }
107
+ if (/[A-Za-z]/.test(ch)) {
108
+ return "identifier";
109
+ }
110
+ // Letters outside ASCII (Greek, CJK, etc.) and any other symbol fall back to
111
+ // identifier — this matches how OMML treats variable names.
112
+ return "identifier";
113
+ }
114
+ function tokenizeMathRun(text) {
115
+ const tokens = [];
116
+ // Use code points so astral / combined characters are not split mid-symbol.
117
+ const chars = Array.from(text);
118
+ for (const ch of chars) {
119
+ const kind = classifyMathChar(ch);
120
+ const last = tokens[tokens.length - 1];
121
+ // Operators are always emitted as their own token (each operator is a
122
+ // distinct <mo>). Numbers, identifiers and text coalesce with the
123
+ // previous token of the same kind so "42" stays a single <mn> and a run
124
+ // of spaces stays a single <mtext>.
125
+ if (last && last.kind === kind && kind !== "operator") {
126
+ last.value += ch;
127
+ }
128
+ else {
129
+ tokens.push({ kind, value: ch });
130
+ }
86
131
  }
87
- // Numbers
88
- if (/^\d+(\.\d+)?$/.test(node.text)) {
89
- return `<mn>${text}</mn>`;
132
+ return mergeDecimalPoints(tokens);
133
+ }
134
+ /**
135
+ * A "." between two digit groups is a decimal point, not an operator. The
136
+ * char classifier sees "." as an operator (it is a valid math operator on its
137
+ * own, e.g. function composition), so stitch `<number> "." <number>` back into
138
+ * a single number token here.
139
+ */
140
+ function mergeDecimalPoints(tokens) {
141
+ const out = [];
142
+ for (let i = 0; i < tokens.length; i++) {
143
+ const tok = tokens[i];
144
+ const prev = out[out.length - 1];
145
+ const next = tokens[i + 1];
146
+ if (tok.kind === "operator" &&
147
+ tok.value === "." &&
148
+ prev &&
149
+ prev.kind === "number" &&
150
+ next &&
151
+ next.kind === "number") {
152
+ prev.value += "." + next.value;
153
+ i++; // consume the following number token
154
+ continue;
155
+ }
156
+ out.push(tok);
90
157
  }
91
- // Identifiers
92
- if (node.properties?.italic === false) {
93
- return `<mi mathvariant="normal">${text}</mi>`;
158
+ return out;
159
+ }
160
+ function emitMathToken(tok, normalVariant) {
161
+ const text = xmlEncode(tok.value);
162
+ switch (tok.kind) {
163
+ case "operator":
164
+ return `<mo>${text}</mo>`;
165
+ case "number":
166
+ // A lone "." is not a number; treat it as an operator/text fallback.
167
+ return /\d/.test(tok.value) ? `<mn>${text}</mn>` : `<mo>${text}</mo>`;
168
+ case "text":
169
+ return `<mtext>${text}</mtext>`;
170
+ case "identifier":
171
+ default:
172
+ return normalVariant ? `<mi mathvariant="normal">${text}</mi>` : `<mi>${text}</mi>`;
94
173
  }
95
- return `<mi>${text}</mi>`;
96
174
  }
97
175
  function convertFractionToMathML(node) {
98
176
  const num = childrenToMathML(node.numerator);
@@ -254,7 +332,11 @@ function convertMMLElement(el) {
254
332
  }
255
333
  case "msqrt": {
256
334
  const content = convertMMLChildren(el.children);
257
- return { type: "mathRadical", content };
335
+ // A bare square root has no degree. OOXML still emits an (empty)
336
+ // <m:deg/>, so we must set hideDegree → <m:degHide m:val="1"/>;
337
+ // otherwise Word treats the empty degree as visible and draws an empty
338
+ // degree box (a small square) at the radical's upper-left.
339
+ return { type: "mathRadical", content, hideDegree: true };
258
340
  }
259
341
  case "mroot": {
260
342
  const children = getElementChildren(el);
@@ -81,6 +81,8 @@ export interface OleEmbeddingResult {
81
81
  readonly olePart: OpaquePart;
82
82
  /** Suggested rId to use for the OLE binary in the document model. */
83
83
  readonly oleRId: string;
84
+ /** OLE ProgId the embedding was created with (e.g. "Excel.Sheet.12"). */
85
+ readonly progId: string;
84
86
  /** Preview image media part (only when `options.previewImage` was supplied). */
85
87
  readonly previewPart?: OpaquePart;
86
88
  /** Suggested rId for the preview image. */
@@ -113,3 +115,29 @@ export declare function createOleEmbedding(data: Uint8Array, progId: string, opt
113
115
  /** Override preview file name. Defaults to `image<N>.<ext>` from previewContentType. */
114
116
  previewFileName?: string;
115
117
  }): OleEmbeddingResult;
118
+ /**
119
+ * Wire an {@link OleEmbeddingResult} into a document so the OLE object is
120
+ * actually rendered and resolvable, returning a new {@link DocxDocument}.
121
+ *
122
+ * Unlike just stuffing the part into `opaqueParts` (which leaves the binary
123
+ * dangling — no relationship, no body reference), this:
124
+ *
125
+ * - registers the OLE binary (and optional preview) on
126
+ * `doc.oleObjects` so the packager emits a `word/_rels/document.xml.rels`
127
+ * relationship with the exact rId and a `[Content_Types].xml` override;
128
+ * - appends a body paragraph carrying a `<w:object>` / `<o:OLEObject>`
129
+ * that references the same rId and embeds the ProgId, so the object is
130
+ * visible in Word and round-trips through `readDocx`.
131
+ *
132
+ * @param doc - The document to add the OLE object to.
133
+ * @param embedding - Result from {@link createOleEmbedding}.
134
+ * @param options - Display geometry (defaults to a 2"×2" icon box).
135
+ */
136
+ export declare function addOleObject(doc: DocxDocument, embedding: OleEmbeddingResult, options?: {
137
+ /** Display width in points (default 96 = 2 inches at 48pt/in icon). */
138
+ widthPt?: number;
139
+ /** Display height in points (default 96). */
140
+ heightPt?: number;
141
+ /** Display mode. Default "icon". */
142
+ displayAs?: OleDisplayAs;
143
+ }): DocxDocument;
@@ -13,6 +13,8 @@
13
13
  * This module focuses on preservation (round-trip) and metadata extraction,
14
14
  * not full OLE compound document manipulation.
15
15
  */
16
+ import { xmlEncodeAttr } from "../../xml/encode.js";
17
+ import { ContentType } from "../constants.js";
16
18
  import { getFileName } from "../core/opc-paths.js";
17
19
  // =============================================================================
18
20
  // OLE Object Extraction
@@ -28,14 +30,33 @@ import { getFileName } from "../core/opc-paths.js";
28
30
  export function extractOleObjects(doc) {
29
31
  const objects = [];
30
32
  const summary = {};
31
- // Scan opaque parts for OLE embeddings
33
+ const pushObject = (obj) => {
34
+ objects.push(obj);
35
+ summary[obj.progId] = (summary[obj.progId] ?? 0) + 1;
36
+ };
37
+ // Structured OLE objects wired on document.xml.rels (preferred form —
38
+ // these carry the real rId and, when available, the round-tripped progId).
39
+ if (doc.oleObjects) {
40
+ for (const ole of doc.oleObjects) {
41
+ pushObject({
42
+ rId: ole.rId,
43
+ progId: ole.progId ?? detectProgIdFromData(ole.data),
44
+ objectType: "embedded",
45
+ displayAs: "icon",
46
+ imageRId: ole.previewRId,
47
+ fileName: getFileName(ole.path),
48
+ data: ole.data
49
+ });
50
+ }
51
+ }
52
+ // Scan opaque parts for OLE embeddings (legacy / hand-built documents that
53
+ // did not go through the structured oleObjects channel).
32
54
  if (doc.opaqueParts) {
33
55
  for (const part of doc.opaqueParts) {
34
56
  if (isOleEmbedding(part.path)) {
35
57
  const obj = parseOlePartMetadata(part);
36
58
  if (obj) {
37
- objects.push(obj);
38
- summary[obj.progId] = (summary[obj.progId] ?? 0) + 1;
59
+ pushObject(obj);
39
60
  }
40
61
  }
41
62
  }
@@ -45,11 +66,10 @@ export function extractOleObjects(doc) {
45
66
  if (element.type === "opaqueDrawing") {
46
67
  const oleFromDrawing = extractOleFromRawXml(element.rawXml);
47
68
  if (oleFromDrawing) {
48
- // Check if we already have this object from opaque parts
49
- const exists = objects.some(o => o.rId === oleFromDrawing.rId);
69
+ // Check if we already have this object (by rId)
70
+ const exists = objects.some(o => o.rId === oleFromDrawing.rId && o.rId !== "");
50
71
  if (!exists) {
51
- objects.push(oleFromDrawing);
52
- summary[oleFromDrawing.progId] = (summary[oleFromDrawing.progId] ?? 0) + 1;
72
+ pushObject(oleFromDrawing);
53
73
  }
54
74
  }
55
75
  }
@@ -60,6 +80,9 @@ export function extractOleObjects(doc) {
60
80
  * Check if a document contains any OLE embedded objects.
61
81
  */
62
82
  export function hasOleObjects(doc) {
83
+ if (doc.oleObjects && doc.oleObjects.length > 0) {
84
+ return true;
85
+ }
63
86
  if (doc.opaqueParts) {
64
87
  for (const part of doc.opaqueParts) {
65
88
  if (isOleEmbedding(part.path)) {
@@ -74,6 +97,14 @@ export function hasOleObjects(doc) {
74
97
  * Returns undefined if not found.
75
98
  */
76
99
  export function getOleObjectData(doc, rId) {
100
+ // Structured OLE objects carry the exact rId used on document.xml.rels.
101
+ if (doc.oleObjects) {
102
+ for (const ole of doc.oleObjects) {
103
+ if (ole.rId === rId) {
104
+ return ole.data;
105
+ }
106
+ }
107
+ }
77
108
  if (!doc.opaqueParts) {
78
109
  return undefined;
79
110
  }
@@ -126,16 +157,14 @@ export function createOleEmbedding(data, progId, options) {
126
157
  const olePart = {
127
158
  path: `word/embeddings/${fileName}`,
128
159
  data,
129
- contentType: "application/vnd.openxmlformats-officedocument.oleObject",
160
+ contentType: ContentType.OleObject,
130
161
  relationships: undefined
131
162
  };
132
163
  const oleRId = `rIdOle${oleSeq}`;
133
- // progId is metadata for downstream consumers not stored on
134
- // OpaquePart but accepted in the signature so callers can pass it
135
- // alongside without a separate channel. We don't need it here.
136
- void progId;
164
+ // progId is carried back on the result so addOleObject() can persist it
165
+ // into the body `<o:OLEObject ProgID="…">` markup for round-trip.
137
166
  if (!options?.previewImage) {
138
- return { olePart, oleRId };
167
+ return { olePart, oleRId, progId };
139
168
  }
140
169
  if (!options.previewContentType) {
141
170
  throw new Error("createOleEmbedding: options.previewImage requires options.previewContentType");
@@ -150,7 +179,78 @@ export function createOleEmbedding(data, progId, options) {
150
179
  relationships: undefined
151
180
  };
152
181
  const previewRId = `rIdOleImg${previewSeq}`;
153
- return { olePart, oleRId, previewPart, previewRId };
182
+ return { olePart, oleRId, progId, previewPart, previewRId };
183
+ }
184
+ /**
185
+ * Wire an {@link OleEmbeddingResult} into a document so the OLE object is
186
+ * actually rendered and resolvable, returning a new {@link DocxDocument}.
187
+ *
188
+ * Unlike just stuffing the part into `opaqueParts` (which leaves the binary
189
+ * dangling — no relationship, no body reference), this:
190
+ *
191
+ * - registers the OLE binary (and optional preview) on
192
+ * `doc.oleObjects` so the packager emits a `word/_rels/document.xml.rels`
193
+ * relationship with the exact rId and a `[Content_Types].xml` override;
194
+ * - appends a body paragraph carrying a `<w:object>` / `<o:OLEObject>`
195
+ * that references the same rId and embeds the ProgId, so the object is
196
+ * visible in Word and round-trips through `readDocx`.
197
+ *
198
+ * @param doc - The document to add the OLE object to.
199
+ * @param embedding - Result from {@link createOleEmbedding}.
200
+ * @param options - Display geometry (defaults to a 2"×2" icon box).
201
+ */
202
+ export function addOleObject(doc, embedding, options) {
203
+ const widthPt = options?.widthPt ?? 96;
204
+ const heightPt = options?.heightPt ?? 96;
205
+ const drawAspect = (options?.displayAs ?? "icon") === "icon" ? "Icon" : "Content";
206
+ const olePartEntry = {
207
+ path: embedding.olePart.path,
208
+ data: embedding.olePart.data,
209
+ rId: embedding.oleRId,
210
+ progId: embedding.progId,
211
+ contentType: embedding.olePart.contentType,
212
+ ...(embedding.previewPart && embedding.previewRId
213
+ ? {
214
+ previewPath: embedding.previewPart.path,
215
+ previewData: embedding.previewPart.data,
216
+ previewRId: embedding.previewRId,
217
+ previewContentType: embedding.previewPart.contentType
218
+ }
219
+ : {})
220
+ };
221
+ // Build the VML-hosted <w:object>. The o:OLEObject carries ProgID + the
222
+ // r:id of the binary; the v:shape provides geometry and (when present) the
223
+ // preview image fill via v:imagedata. This is the canonical OOXML shape for
224
+ // an embedded OLE object (ECMA-376 §17.3.3.19 + VML).
225
+ const shapeId = `_ole_${embedding.oleRId}`;
226
+ const styleWidth = widthPt.toFixed(0);
227
+ const styleHeight = heightPt.toFixed(0);
228
+ const imageData = embedding.previewRId != null
229
+ ? `<v:imagedata r:id="${xmlEncodeAttr(embedding.previewRId)}" o:title=""/>`
230
+ : "";
231
+ const rawXml = `<w:object>` +
232
+ `<v:shape id="${xmlEncodeAttr(shapeId)}" type="#_x0000_t75" ` +
233
+ `style="width:${styleWidth}pt;height:${styleHeight}pt">` +
234
+ imageData +
235
+ `</v:shape>` +
236
+ `<o:OLEObject Type="Embed" ProgID="${xmlEncodeAttr(embedding.progId)}" ` +
237
+ `ShapeID="${xmlEncodeAttr(shapeId)}" DrawAspect="${drawAspect}" ` +
238
+ `r:id="${xmlEncodeAttr(embedding.oleRId)}"/>` +
239
+ `</w:object>`;
240
+ const referencedRIds = [embedding.oleRId];
241
+ if (embedding.previewRId != null) {
242
+ referencedRIds.push(embedding.previewRId);
243
+ }
244
+ const drawing = {
245
+ type: "opaqueDrawing",
246
+ rawXml,
247
+ referencedRIds
248
+ };
249
+ return {
250
+ ...doc,
251
+ body: [...doc.body, drawing],
252
+ oleObjects: [...(doc.oleObjects ?? []), olePartEntry]
253
+ };
154
254
  }
155
255
  /** Module-level counters used to allocate unique file names per call. */
156
256
  let _oleSeq = 0;
@@ -242,17 +342,20 @@ function tryDecodeAscii(data) {
242
342
  return str;
243
343
  }
244
344
  function extractOleFromRawXml(rawXml) {
245
- // Parse OLE object info from raw XML using regex (lightweight)
246
- const progIdMatch = rawXml.match(/ProgID="([^"]+)"/i) ?? rawXml.match(/progId="([^"]+)"/i);
247
- const rIdMatch = rawXml.match(/r:id="([^"]+)"/i);
248
- const typeMatch = rawXml.match(/Type="([^"]+)"/i);
345
+ // Pull metadata from the <o:OLEObject> element specifically, so a preview
346
+ // image's <v:imagedata r:id=""> earlier in the markup is not mistaken for
347
+ // the OLE binary's relationship id.
348
+ const oleTag = rawXml.match(/<o:OLEObject\b[^>]*>/i)?.[0] ?? rawXml;
349
+ const progIdMatch = oleTag.match(/ProgID="([^"]+)"/i) ?? oleTag.match(/progId="([^"]+)"/i);
350
+ const rIdMatch = oleTag.match(/r:id="([^"]+)"/i);
351
+ const typeMatch = oleTag.match(/Type="([^"]+)"/i);
249
352
  if (!progIdMatch) {
250
353
  return null;
251
354
  }
252
355
  const progId = progIdMatch[1];
253
356
  const rId = rIdMatch ? rIdMatch[1] : "";
254
357
  const objectType = typeMatch && typeMatch[1].toLowerCase().includes("link") ? "linked" : "embedded";
255
- // Extract dimensions
358
+ // Extract dimensions (from the surrounding shape, hence full rawXml)
256
359
  const widthMatch = rawXml.match(/(?:cx|width)="(\d+)"/i);
257
360
  const heightMatch = rawXml.match(/(?:cy|height)="(\d+)"/i);
258
361
  return {
@@ -139,14 +139,8 @@ export const DEFAULT_STYLE_MAP = {
139
139
  * ```
140
140
  */
141
141
  export function parseStyleMap(dsl, options) {
142
- const rules = [];
143
- // Include defaults if requested
144
- if (options?.includeDefaults !== false && options?.base) {
145
- rules.push(...options.base.rules);
146
- }
147
- else if (options?.includeDefaults !== false && !options?.base) {
148
- rules.push(...DEFAULT_STYLE_MAP.rules);
149
- }
142
+ // User-defined rules from the DSL.
143
+ const userRules = [];
150
144
  const lines = dsl
151
145
  .split("\n")
152
146
  .map(l => l.trim())
@@ -154,10 +148,37 @@ export function parseStyleMap(dsl, options) {
154
148
  for (const line of lines) {
155
149
  const rule = parseRule(line);
156
150
  if (rule) {
157
- rules.push(rule);
151
+ userRules.push(rule);
158
152
  }
159
153
  }
160
- // Sort by priority (highest first)
154
+ // Default / base rules requested via `includeDefaults`.
155
+ const defaultRules = [];
156
+ if (options?.includeDefaults !== false && options?.base) {
157
+ defaultRules.push(...options.base.rules);
158
+ }
159
+ else if (options?.includeDefaults !== false && !options?.base) {
160
+ defaultRules.push(...DEFAULT_STYLE_MAP.rules);
161
+ }
162
+ // An explicit DSL rule should always win over a default rule for the same
163
+ // element — that is the whole point of providing one. Default rules,
164
+ // however, carry their own priorities (e.g. "Heading 1" => h1 has priority
165
+ // 10) that can exceed the fixed priority `parseRule` assigns to user rules.
166
+ // To guarantee user intent wins while preserving the relative priority
167
+ // ordering *within* each group, lift every user rule above the highest
168
+ // default priority. When there are no defaults this offset is 0 and user
169
+ // priorities are untouched.
170
+ const maxDefaultPriority = defaultRules.reduce((m, r) => Math.max(m, r.priority ?? 0), 0);
171
+ const userOffset = defaultRules.length > 0 ? maxDefaultPriority + 1 : 0;
172
+ const liftedUserRules = userOffset === 0
173
+ ? userRules
174
+ : userRules.map(r => ({ ...r, priority: (r.priority ?? 0) + userOffset }));
175
+ // User rules come first so that, after a stable sort, an explicit DSL rule
176
+ // also wins over any default rule that happens to share its (lifted)
177
+ // priority. The sort below only reorders by priority; equal priorities
178
+ // preserve this user-before-default ordering.
179
+ const rules = [...liftedUserRules, ...defaultRules];
180
+ // Sort by priority (highest first). Array.prototype.sort is stable, so
181
+ // rules of equal priority keep their relative order (user rules first).
161
182
  rules.sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0));
162
183
  return { rules };
163
184
  }
@@ -263,7 +263,13 @@ export declare function mathSubScript(base: MathContent[], subScript: MathConten
263
263
  export declare function mathSubSuperScript(base: MathContent[], subScript: MathContent[], superScript: MathContent[]): MathContent;
264
264
  /** Create a math pre-sub-superscript (subscript/superscript before the base). */
265
265
  export declare function mathPreSubSuperScript(base: MathContent[], preSubScript: MathContent[], preSuperScript: MathContent[]): MathContent;
266
- /** Create a math phantom (invisible expression that takes up space). */
266
+ /**
267
+ * Create a math phantom (an expression that takes up space).
268
+ *
269
+ * Note: in OOXML the phantom base is *shown* by default. To make the classic
270
+ * "occupies space but invisible" phantom pass `{ show: false }`; passing only
271
+ * `transparent: true` is not sufficient to hide the base in Word.
272
+ */
267
273
  export declare function mathPhantom(content: MathContent[], options?: {
268
274
  show?: boolean;
269
275
  zeroWidth?: boolean;
@@ -449,7 +449,13 @@ export function mathSubSuperScript(base, subScript, superScript) {
449
449
  export function mathPreSubSuperScript(base, preSubScript, preSuperScript) {
450
450
  return { type: "mathPreSubSuperScript", base, preSubScript, preSuperScript };
451
451
  }
452
- /** Create a math phantom (invisible expression that takes up space). */
452
+ /**
453
+ * Create a math phantom (an expression that takes up space).
454
+ *
455
+ * Note: in OOXML the phantom base is *shown* by default. To make the classic
456
+ * "occupies space but invisible" phantom pass `{ show: false }`; passing only
457
+ * `transparent: true` is not sufficient to hide the base in Word.
458
+ */
453
459
  export function mathPhantom(content, options) {
454
460
  return { type: "mathPhantom", content, ...options };
455
461
  }
@@ -168,6 +168,10 @@ export declare const ContentType: {
168
168
  readonly Xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
169
169
  readonly CustomXml: "application/xml";
170
170
  readonly VbaProject: "application/vnd.ms-office.vbaProject";
171
+ /** Glossary (Building Blocks) document part. */
172
+ readonly Glossary: "application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml";
173
+ /** OLE embedded object binary. */
174
+ readonly OleObject: "application/vnd.openxmlformats-officedocument.oleObject";
171
175
  };
172
176
  /** Map from image file extension to content type. */
173
177
  export declare const IMAGE_CONTENT_TYPES: Record<string, string>;
@@ -268,7 +268,11 @@ export const ContentType = {
268
268
  ChartEx: "application/vnd.ms-office.chartEx+xml",
269
269
  Xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
270
270
  CustomXml: "application/xml",
271
- VbaProject: "application/vnd.ms-office.vbaProject"
271
+ VbaProject: "application/vnd.ms-office.vbaProject",
272
+ /** Glossary (Building Blocks) document part. */
273
+ Glossary: "application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml",
274
+ /** OLE embedded object binary. */
275
+ OleObject: "application/vnd.openxmlformats-officedocument.oleObject"
272
276
  };
273
277
  /** Map from image file extension to content type. */
274
278
  export const IMAGE_CONTENT_TYPES = {
@@ -11,7 +11,8 @@
11
11
  * - Hyperlink extraction
12
12
  * - Image registration into ConversionContext
13
13
  * - Table structure with merge (colSpan/rowSpan)
14
- * - List/numbering detection (basic)
14
+ * - List/numbering detection: consecutive numbered paragraphs are aggregated
15
+ * into ordered/unordered `list` blocks with nested sub-lists by level
15
16
  * - Footnote/endnote reference and content
16
17
  * - Math content (text fallback)
17
18
  */
@@ -11,7 +11,8 @@
11
11
  * - Hyperlink extraction
12
12
  * - Image registration into ConversionContext
13
13
  * - Table structure with merge (colSpan/rowSpan)
14
- * - List/numbering detection (basic)
14
+ * - List/numbering detection: consecutive numbered paragraphs are aggregated
15
+ * into ordered/unordered `list` blocks with nested sub-lists by level
15
16
  * - Footnote/endnote reference and content
16
17
  * - Math content (text fallback)
17
18
  */
@@ -93,6 +94,26 @@ function convertBodyContent(body, doc, ctx, imageMap) {
93
94
  const item = body[bodyIndex];
94
95
  switch (item.type) {
95
96
  case "paragraph":
97
+ // A run of consecutive list-item paragraphs (each carrying a
98
+ // numbering reference, and not a heading) is aggregated into a single
99
+ // semantic `list` block with nested sub-lists driven by the numbering
100
+ // level. This is what turns Word numbering into real <ul>/<ol> in
101
+ // HTML and `-`/`1.` markers in Markdown when downstream renderers
102
+ // consume the IR.
103
+ if (isListItemParagraph(item)) {
104
+ let end = bodyIndex;
105
+ while (end < body.length) {
106
+ const next = body[end];
107
+ if (next.type !== "paragraph" || !isListItemParagraph(next)) {
108
+ break;
109
+ }
110
+ end++;
111
+ }
112
+ const listParas = body.slice(bodyIndex, end);
113
+ blocks.push(...buildListBlocks(listParas, doc, ctx, imageMap));
114
+ bodyIndex = end - 1; // loop's ++ advances past the consumed run
115
+ break;
116
+ }
96
117
  blocks.push(convertParagraph(item, doc, ctx, imageMap));
97
118
  break;
98
119
  case "table":
@@ -240,6 +261,119 @@ function convertBodyContent(body, doc, ctx, imageMap) {
240
261
  return blocks;
241
262
  }
242
263
  // =============================================================================
264
+ // Internal: List Aggregation
265
+ // =============================================================================
266
+ /**
267
+ * Whether a body paragraph should render as a list item: it carries a
268
+ * numbering reference and is not itself a heading (a numbered heading stays a
269
+ * heading, mirroring the markdown/html renderers).
270
+ */
271
+ function isListItemParagraph(item) {
272
+ if (item.type !== "paragraph") {
273
+ return false;
274
+ }
275
+ return item.properties?.numbering !== undefined && detectHeadingLevel(item) === null;
276
+ }
277
+ /**
278
+ * Resolve a numbering reference to its number format string (e.g. "decimal",
279
+ * "bullet"). Mirrors the lookup in the markdown/html renderers so the three
280
+ * surfaces classify ordered vs. unordered lists identically. Defaults to
281
+ * "bullet" when the numbering definition can't be resolved.
282
+ */
283
+ function getNumberingFormat(doc, numId, level) {
284
+ const instance = doc.numberingInstances?.find(n => n.numId === numId);
285
+ if (!instance) {
286
+ return "bullet";
287
+ }
288
+ const abstractNum = doc.abstractNumberings?.find(a => a.abstractNumId === instance.abstractNumId);
289
+ if (!abstractNum) {
290
+ return "bullet";
291
+ }
292
+ const levelDef = abstractNum.levels.find(l => l.level === level);
293
+ return levelDef?.format ?? "bullet";
294
+ }
295
+ /** A number format other than "bullet"/"none" denotes an ordered list. */
296
+ function isOrderedFormat(format) {
297
+ return format !== "bullet" && format !== "none";
298
+ }
299
+ /**
300
+ * Build one or more semantic `list` blocks from a contiguous run of list-item
301
+ * paragraphs. Paragraphs are nested by their numbering `level`; a deeper level
302
+ * becomes a `subList` of the preceding shallower item. Adjacent items that
303
+ * switch between ordered and unordered at the same level start a new sibling
304
+ * list so the ordered/unordered distinction is preserved.
305
+ */
306
+ function buildListBlocks(paras, doc, ctx, imageMap) {
307
+ const { blocks } = buildListLevel(paras, 0, 0, doc, ctx, imageMap);
308
+ return blocks;
309
+ }
310
+ /**
311
+ * Consume paragraphs starting at `start` that belong to `level` (or deeper),
312
+ * emitting sibling lists for this level. Deeper-level paragraphs are folded
313
+ * into the current item's `subList` via recursion. Returns the produced blocks
314
+ * and the index of the first paragraph that no longer belongs to this level.
315
+ */
316
+ function buildListLevel(paras, start, level, doc, ctx, imageMap) {
317
+ const blocks = [];
318
+ let i = start;
319
+ let currentOrdered = null;
320
+ let items = [];
321
+ const flush = () => {
322
+ if (items.length > 0 && currentOrdered !== null) {
323
+ blocks.push({ type: "list", ordered: currentOrdered, items });
324
+ items = [];
325
+ }
326
+ };
327
+ while (i < paras.length) {
328
+ const para = paras[i];
329
+ const num = para.properties?.numbering;
330
+ // Defensive: callers only pass list-item paragraphs, but guard anyway.
331
+ if (!num) {
332
+ break;
333
+ }
334
+ if (num.level < level) {
335
+ // Belongs to a shallower list — let the caller handle it.
336
+ break;
337
+ }
338
+ if (num.level > level) {
339
+ // Deeper item with no shallower parent at this position: descend and
340
+ // attach the nested list to the most recent item, or synthesise an
341
+ // empty item to host it when there is no parent.
342
+ const { blocks: subBlocks, next } = buildListLevel(paras, i, num.level, doc, ctx, imageMap);
343
+ const subList = subBlocks[0];
344
+ if (items.length > 0) {
345
+ const last = items[items.length - 1];
346
+ items[items.length - 1] = { ...last, subList };
347
+ }
348
+ else if (subList) {
349
+ // Promote the deeper list to this level when there is no parent item.
350
+ if (currentOrdered === null && subList.type === "list") {
351
+ currentOrdered = subList.ordered;
352
+ }
353
+ items.push({ children: [], subList });
354
+ }
355
+ i = next;
356
+ continue;
357
+ }
358
+ // num.level === level
359
+ const format = getNumberingFormat(doc, num.numId, num.level);
360
+ const ordered = isOrderedFormat(format);
361
+ if (currentOrdered === null) {
362
+ currentOrdered = ordered;
363
+ }
364
+ else if (ordered !== currentOrdered) {
365
+ // Ordered/unordered switch at the same level → start a new sibling list.
366
+ flush();
367
+ currentOrdered = ordered;
368
+ }
369
+ const children = convertParagraphChildren(para.children, doc, ctx, imageMap);
370
+ items.push({ children });
371
+ i++;
372
+ }
373
+ flush();
374
+ return { blocks, next: i };
375
+ }
376
+ // =============================================================================
243
377
  // Internal: Paragraph Conversion
244
378
  // =============================================================================
245
379
  function convertParagraph(para, doc, ctx, imageMap) {