@cj-tech-master/excelts 9.6.1 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/README.md +18 -3
  2. package/README_zh.md +18 -3
  3. package/dist/browser/modules/excel/cell.d.ts +4 -0
  4. package/dist/browser/modules/excel/note.js +5 -1
  5. package/dist/browser/modules/excel/row.js +35 -2
  6. package/dist/browser/modules/excel/stream/workbook-writer.browser.d.ts +8 -1
  7. package/dist/browser/modules/excel/stream/workbook-writer.browser.js +22 -2
  8. package/dist/browser/modules/excel/types.d.ts +81 -0
  9. package/dist/browser/modules/excel/utils/drawing-utils.d.ts +8 -0
  10. package/dist/browser/modules/excel/utils/drawing-utils.js +19 -2
  11. package/dist/browser/modules/excel/workbook.browser.d.ts +16 -0
  12. package/dist/browser/modules/excel/workbook.browser.js +32 -2
  13. package/dist/browser/modules/excel/worksheet.d.ts +31 -1
  14. package/dist/browser/modules/excel/worksheet.js +83 -0
  15. package/dist/browser/modules/excel/xlsx/xform/comment/vml-shape-xform.d.ts +7 -0
  16. package/dist/browser/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
  17. package/dist/browser/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
  18. package/dist/browser/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
  19. package/dist/browser/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
  20. package/dist/browser/modules/excel/xlsx/xform/drawing/blip-xform.d.ts +6 -0
  21. package/dist/browser/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
  22. package/dist/browser/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.d.ts +1 -0
  23. package/dist/browser/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
  24. package/dist/browser/modules/excel/xlsx/xform/drawing/pic-xform.d.ts +2 -0
  25. package/dist/browser/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
  26. package/dist/browser/modules/excel/xlsx/xform/drawing/shape-xform.d.ts +47 -0
  27. package/dist/browser/modules/excel/xlsx/xform/drawing/shape-xform.js +109 -0
  28. package/dist/browser/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
  29. package/dist/browser/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
  30. package/dist/browser/modules/pdf/builder/document-builder.js +22 -49
  31. package/dist/browser/modules/pdf/builder/pdf-editor.js +1 -1
  32. package/dist/browser/modules/pdf/core/pdf-stream.d.ts +28 -1
  33. package/dist/browser/modules/pdf/core/pdf-stream.js +38 -2
  34. package/dist/browser/modules/pdf/font/font-manager.d.ts +26 -0
  35. package/dist/browser/modules/pdf/font/font-manager.js +35 -18
  36. package/dist/browser/modules/pdf/render/page-renderer.d.ts +51 -3
  37. package/dist/browser/modules/pdf/render/page-renderer.js +111 -18
  38. package/dist/browser/modules/word/advanced/field-engine.js +45 -20
  39. package/dist/browser/modules/word/advanced/glossary.d.ts +10 -36
  40. package/dist/browser/modules/word/advanced/glossary.js +8 -9
  41. package/dist/browser/modules/word/advanced/math-convert.js +94 -12
  42. package/dist/browser/modules/word/advanced/ole-objects.d.ts +28 -0
  43. package/dist/browser/modules/word/advanced/ole-objects.js +122 -19
  44. package/dist/browser/modules/word/advanced/style-map.js +31 -10
  45. package/dist/browser/modules/word/builder/run-builders.d.ts +7 -1
  46. package/dist/browser/modules/word/builder/run-builders.js +7 -1
  47. package/dist/browser/modules/word/constants.d.ts +4 -0
  48. package/dist/browser/modules/word/constants.js +5 -1
  49. package/dist/browser/modules/word/convert/docx-to-semantic.d.ts +2 -1
  50. package/dist/browser/modules/word/convert/docx-to-semantic.js +135 -1
  51. package/dist/browser/modules/word/convert/html/html-import.d.ts +32 -1
  52. package/dist/browser/modules/word/convert/html/html-import.js +167 -14
  53. package/dist/browser/modules/word/convert/html/html.d.ts +2 -2
  54. package/dist/browser/modules/word/convert/html/html.js +1 -1
  55. package/dist/browser/modules/word/convert/markdown/markdown-import.d.ts +48 -18
  56. package/dist/browser/modules/word/convert/markdown/markdown-import.js +279 -69
  57. package/dist/browser/modules/word/convert/markdown/markdown.d.ts +1 -1
  58. package/dist/browser/modules/word/convert/odt/odt.js +407 -56
  59. package/dist/browser/modules/word/html.d.ts +2 -2
  60. package/dist/browser/modules/word/html.js +1 -1
  61. package/dist/browser/modules/word/index.base.d.ts +3 -3
  62. package/dist/browser/modules/word/index.base.js +1 -1
  63. package/dist/browser/modules/word/layout/layout-full.js +326 -19
  64. package/dist/browser/modules/word/layout/render-page.js +35 -8
  65. package/dist/browser/modules/word/markdown.d.ts +1 -1
  66. package/dist/browser/modules/word/query/compat.d.ts +10 -2
  67. package/dist/browser/modules/word/query/compat.js +29 -21
  68. package/dist/browser/modules/word/reader/docx-reader.js +105 -2
  69. package/dist/browser/modules/word/reader/math-parser.js +8 -2
  70. package/dist/browser/modules/word/security/cfb-reader.js +5 -5
  71. package/dist/browser/modules/word/types.d.ts +96 -1
  72. package/dist/browser/modules/word/writer/docx-packager.js +108 -2
  73. package/dist/browser/modules/word/writer/glossary-writer.d.ts +28 -0
  74. package/dist/browser/modules/word/writer/glossary-writer.js +121 -0
  75. package/dist/browser/modules/word/writer/header-footer-writer.js +105 -20
  76. package/dist/browser/modules/word/writer/math-writer.js +7 -2
  77. package/dist/browser/utils/font-metrics.d.ts +8 -0
  78. package/dist/browser/utils/font-metrics.js +43 -0
  79. package/dist/browser/utils/theme-colors.js +4 -1
  80. package/dist/cjs/modules/excel/note.js +5 -1
  81. package/dist/cjs/modules/excel/row.js +35 -2
  82. package/dist/cjs/modules/excel/stream/workbook-writer.browser.js +22 -2
  83. package/dist/cjs/modules/excel/utils/drawing-utils.js +19 -2
  84. package/dist/cjs/modules/excel/workbook.browser.js +31 -1
  85. package/dist/cjs/modules/excel/worksheet.js +83 -0
  86. package/dist/cjs/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
  87. package/dist/cjs/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
  88. package/dist/cjs/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
  89. package/dist/cjs/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
  90. package/dist/cjs/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
  91. package/dist/cjs/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
  92. package/dist/cjs/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
  93. package/dist/cjs/modules/excel/xlsx/xform/drawing/shape-xform.js +112 -0
  94. package/dist/cjs/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
  95. package/dist/cjs/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
  96. package/dist/cjs/modules/pdf/builder/document-builder.js +21 -48
  97. package/dist/cjs/modules/pdf/builder/pdf-editor.js +1 -1
  98. package/dist/cjs/modules/pdf/core/pdf-stream.js +38 -2
  99. package/dist/cjs/modules/pdf/font/font-manager.js +35 -18
  100. package/dist/cjs/modules/pdf/render/page-renderer.js +112 -18
  101. package/dist/cjs/modules/word/advanced/field-engine.js +45 -20
  102. package/dist/cjs/modules/word/advanced/glossary.js +8 -9
  103. package/dist/cjs/modules/word/advanced/math-convert.js +94 -12
  104. package/dist/cjs/modules/word/advanced/ole-objects.js +123 -19
  105. package/dist/cjs/modules/word/advanced/style-map.js +31 -10
  106. package/dist/cjs/modules/word/builder/run-builders.js +7 -1
  107. package/dist/cjs/modules/word/constants.js +5 -1
  108. package/dist/cjs/modules/word/convert/docx-to-semantic.js +135 -1
  109. package/dist/cjs/modules/word/convert/html/html-import.js +168 -14
  110. package/dist/cjs/modules/word/convert/html/html.js +2 -1
  111. package/dist/cjs/modules/word/convert/markdown/markdown-import.js +279 -69
  112. package/dist/cjs/modules/word/convert/odt/odt.js +407 -56
  113. package/dist/cjs/modules/word/html.js +2 -1
  114. package/dist/cjs/modules/word/index.base.js +4 -3
  115. package/dist/cjs/modules/word/layout/layout-full.js +325 -18
  116. package/dist/cjs/modules/word/layout/render-page.js +35 -8
  117. package/dist/cjs/modules/word/query/compat.js +29 -21
  118. package/dist/cjs/modules/word/reader/docx-reader.js +104 -1
  119. package/dist/cjs/modules/word/reader/math-parser.js +8 -2
  120. package/dist/cjs/modules/word/security/cfb-reader.js +5 -5
  121. package/dist/cjs/modules/word/writer/docx-packager.js +108 -2
  122. package/dist/cjs/modules/word/writer/glossary-writer.js +124 -0
  123. package/dist/cjs/modules/word/writer/header-footer-writer.js +105 -20
  124. package/dist/cjs/modules/word/writer/math-writer.js +7 -2
  125. package/dist/cjs/utils/font-metrics.js +44 -0
  126. package/dist/cjs/utils/theme-colors.js +4 -1
  127. package/dist/esm/modules/excel/note.js +5 -1
  128. package/dist/esm/modules/excel/row.js +35 -2
  129. package/dist/esm/modules/excel/stream/workbook-writer.browser.js +22 -2
  130. package/dist/esm/modules/excel/utils/drawing-utils.js +19 -2
  131. package/dist/esm/modules/excel/workbook.browser.js +32 -2
  132. package/dist/esm/modules/excel/worksheet.js +83 -0
  133. package/dist/esm/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
  134. package/dist/esm/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
  135. package/dist/esm/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
  136. package/dist/esm/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
  137. package/dist/esm/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
  138. package/dist/esm/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
  139. package/dist/esm/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
  140. package/dist/esm/modules/excel/xlsx/xform/drawing/shape-xform.js +109 -0
  141. package/dist/esm/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
  142. package/dist/esm/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
  143. package/dist/esm/modules/pdf/builder/document-builder.js +22 -49
  144. package/dist/esm/modules/pdf/builder/pdf-editor.js +1 -1
  145. package/dist/esm/modules/pdf/core/pdf-stream.js +38 -2
  146. package/dist/esm/modules/pdf/font/font-manager.js +35 -18
  147. package/dist/esm/modules/pdf/render/page-renderer.js +111 -18
  148. package/dist/esm/modules/word/advanced/field-engine.js +45 -20
  149. package/dist/esm/modules/word/advanced/glossary.js +8 -9
  150. package/dist/esm/modules/word/advanced/math-convert.js +94 -12
  151. package/dist/esm/modules/word/advanced/ole-objects.js +122 -19
  152. package/dist/esm/modules/word/advanced/style-map.js +31 -10
  153. package/dist/esm/modules/word/builder/run-builders.js +7 -1
  154. package/dist/esm/modules/word/constants.js +5 -1
  155. package/dist/esm/modules/word/convert/docx-to-semantic.js +135 -1
  156. package/dist/esm/modules/word/convert/html/html-import.js +167 -14
  157. package/dist/esm/modules/word/convert/html/html.js +1 -1
  158. package/dist/esm/modules/word/convert/markdown/markdown-import.js +279 -69
  159. package/dist/esm/modules/word/convert/odt/odt.js +407 -56
  160. package/dist/esm/modules/word/html.js +1 -1
  161. package/dist/esm/modules/word/index.base.js +1 -1
  162. package/dist/esm/modules/word/layout/layout-full.js +326 -19
  163. package/dist/esm/modules/word/layout/render-page.js +35 -8
  164. package/dist/esm/modules/word/query/compat.js +29 -21
  165. package/dist/esm/modules/word/reader/docx-reader.js +105 -2
  166. package/dist/esm/modules/word/reader/math-parser.js +8 -2
  167. package/dist/esm/modules/word/security/cfb-reader.js +5 -5
  168. package/dist/esm/modules/word/writer/docx-packager.js +108 -2
  169. package/dist/esm/modules/word/writer/glossary-writer.js +121 -0
  170. package/dist/esm/modules/word/writer/header-footer-writer.js +105 -20
  171. package/dist/esm/modules/word/writer/math-writer.js +7 -2
  172. package/dist/esm/utils/font-metrics.js +43 -0
  173. package/dist/esm/utils/theme-colors.js +4 -1
  174. package/dist/iife/excelts.iife.js +496 -59
  175. package/dist/iife/excelts.iife.js.map +1 -1
  176. package/dist/iife/excelts.iife.min.js +39 -39
  177. package/dist/types/modules/excel/cell.d.ts +4 -0
  178. package/dist/types/modules/excel/stream/workbook-writer.browser.d.ts +8 -1
  179. package/dist/types/modules/excel/types.d.ts +81 -0
  180. package/dist/types/modules/excel/utils/drawing-utils.d.ts +8 -0
  181. package/dist/types/modules/excel/workbook.browser.d.ts +16 -0
  182. package/dist/types/modules/excel/worksheet.d.ts +31 -1
  183. package/dist/types/modules/excel/xlsx/xform/comment/vml-shape-xform.d.ts +7 -0
  184. package/dist/types/modules/excel/xlsx/xform/drawing/blip-xform.d.ts +6 -0
  185. package/dist/types/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.d.ts +1 -0
  186. package/dist/types/modules/excel/xlsx/xform/drawing/pic-xform.d.ts +2 -0
  187. package/dist/types/modules/excel/xlsx/xform/drawing/shape-xform.d.ts +47 -0
  188. package/dist/types/modules/pdf/core/pdf-stream.d.ts +28 -1
  189. package/dist/types/modules/pdf/font/font-manager.d.ts +26 -0
  190. package/dist/types/modules/pdf/render/page-renderer.d.ts +51 -3
  191. package/dist/types/modules/word/advanced/glossary.d.ts +10 -36
  192. package/dist/types/modules/word/advanced/ole-objects.d.ts +28 -0
  193. package/dist/types/modules/word/builder/run-builders.d.ts +7 -1
  194. package/dist/types/modules/word/constants.d.ts +4 -0
  195. package/dist/types/modules/word/convert/docx-to-semantic.d.ts +2 -1
  196. package/dist/types/modules/word/convert/html/html-import.d.ts +32 -1
  197. package/dist/types/modules/word/convert/html/html.d.ts +2 -2
  198. package/dist/types/modules/word/convert/markdown/markdown-import.d.ts +48 -18
  199. package/dist/types/modules/word/convert/markdown/markdown.d.ts +1 -1
  200. package/dist/types/modules/word/html.d.ts +2 -2
  201. package/dist/types/modules/word/index.base.d.ts +3 -3
  202. package/dist/types/modules/word/markdown.d.ts +1 -1
  203. package/dist/types/modules/word/query/compat.d.ts +10 -2
  204. package/dist/types/modules/word/types.d.ts +96 -1
  205. package/dist/types/modules/word/writer/glossary-writer.d.ts +28 -0
  206. package/dist/types/utils/font-metrics.d.ts +8 -0
  207. package/package.json +3 -1
@@ -42,7 +42,7 @@ export function updateFields(doc, options) {
42
42
  // Build style → paragraphs index for STYLEREF
43
43
  const styleIndex = buildStyleIndex(doc);
44
44
  // Collect INDEX entries (XE fields) from all body content
45
- const indexEntries = collectIndexEntries(doc);
45
+ const indexEntries = collectIndexEntries(doc, layout);
46
46
  // Update body content
47
47
  const newBody = updateBody(doc, layout, bookmarkInfo, seqValues, styleIndex, indexEntries, opts);
48
48
  // If TOC was updated, register the TOC1..TOCn paragraph styles so the
@@ -610,23 +610,33 @@ function findStyleRef(styleIndex, styleName, bodyIndex) {
610
610
  // =============================================================================
611
611
  // Index Entry Collection (for INDEX field)
612
612
  // =============================================================================
613
- /** Collect all XE (Index Entry) fields from the document body. */
614
- function collectIndexEntries(doc) {
613
+ /**
614
+ * Collect all XE (Index Entry) fields from the document body, tagging each
615
+ * with the page it falls on. Page numbers come from `layout.contentPages`
616
+ * keyed by the enclosing top-level body index (the same mechanism used for
617
+ * headings/TOC); entries inside nested structures inherit their outer block's
618
+ * page, falling back to 1 when layout produced no page for that block.
619
+ */
620
+ function collectIndexEntries(doc, layout) {
615
621
  const entries = [];
616
- walkBlocks(doc.body, {
617
- visitRunContent(content) {
618
- if (content.type !== "field") {
619
- return;
620
- }
621
- const { type, args } = parseFieldInstruction(content.instruction);
622
- if (type === "XE") {
623
- const term = parseXeTerm(args);
624
- if (term) {
625
- entries.push({ term, page: 1 });
622
+ const { contentPages } = layout;
623
+ for (let i = 0; i < doc.body.length; i++) {
624
+ const page = contentPages[i] ?? 1;
625
+ walkBlocks([doc.body[i]], {
626
+ visitRunContent(content) {
627
+ if (content.type !== "field") {
628
+ return;
629
+ }
630
+ const { type, args } = parseFieldInstruction(content.instruction);
631
+ if (type === "XE") {
632
+ const term = parseXeTerm(args);
633
+ if (term) {
634
+ entries.push({ term, page });
635
+ }
626
636
  }
627
637
  }
628
- }
629
- });
638
+ });
639
+ }
630
640
  return entries;
631
641
  }
632
642
  /** Parse the term from an XE field argument: XE "term" or XE term. */
@@ -643,17 +653,32 @@ function buildIndexContent(entries, args) {
643
653
  if (entries.length === 0) {
644
654
  return "";
645
655
  }
646
- // Sort entries alphabetically by term
647
- const sorted = [...entries].sort((a, b) => a.term.localeCompare(b.term));
656
+ // Merge entries that share the same term: a term marked on several pages
657
+ // produces a single index line listing each distinct page (Word behaviour),
658
+ // e.g. "widget, 1, 4" rather than two separate "widget" rows.
659
+ const byTerm = new Map();
660
+ for (const entry of entries) {
661
+ let pages = byTerm.get(entry.term);
662
+ if (!pages) {
663
+ pages = new Set();
664
+ byTerm.set(entry.term, pages);
665
+ }
666
+ pages.add(entry.page);
667
+ }
668
+ // Sort terms alphabetically; within each term sort pages numerically.
669
+ const merged = [...byTerm.entries()]
670
+ .map(([term, pages]) => ({ term, pages: [...pages].sort((a, b) => a - b) }))
671
+ .sort((a, b) => a.term.localeCompare(b.term));
672
+ const formatEntry = (e) => `${e.term}\t${e.pages.join(", ")}`;
648
673
  // Check for \h switch (group by first letter with headings)
649
674
  const grouped = /\\h\b/i.test(args);
650
675
  if (!grouped) {
651
- return sorted.map(e => `${e.term}\t${e.page}`).join("\n");
676
+ return merged.map(formatEntry).join("\n");
652
677
  }
653
678
  // Group by first letter
654
679
  const lines = [];
655
680
  let currentLetter = "";
656
- for (const entry of sorted) {
681
+ for (const entry of merged) {
657
682
  const letter = entry.term.charAt(0).toUpperCase();
658
683
  if (letter !== currentLetter) {
659
684
  currentLetter = letter;
@@ -662,7 +687,7 @@ function buildIndexContent(entries, args) {
662
687
  }
663
688
  lines.push(currentLetter);
664
689
  }
665
- lines.push(`${entry.term}\t${entry.page}`);
690
+ lines.push(formatEntry(entry));
666
691
  }
667
692
  return lines.join("\n");
668
693
  }
@@ -4,17 +4,16 @@
4
4
  * Provides types and utilities for working with Glossary Document parts,
5
5
  * which contain AutoText entries, Quick Parts, and other Building Blocks.
6
6
  *
7
- * INTEGRATION STATUS: This module provides data structures and query helpers
8
- * for building blocks. The actual reading/writing of glossary parts from/to
9
- * DOCX archives is handled via the opaqueParts round-trip mechanism —
10
- * glossary parts in existing files are preserved as opaque parts during
11
- * read/write. This module is useful for:
7
+ * INTEGRATION STATUS: This module provides the glossary data model and query
8
+ * helpers. To embed a glossary in a document, assign a {@link GlossaryDocument}
9
+ * to `doc.glossary`; the packager then serialises it to
10
+ * `word/glossary/document.xml`, registers the `glossaryDocument` relationship,
11
+ * and adds the `[Content_Types].xml` override (the canonical OOXML location
12
+ * Word reads Quick Parts / AutoText from). Glossary parts in existing files are
13
+ * round-tripped via the same channel. This module is useful for:
12
14
  * - Building glossary data structures programmatically
13
15
  * - Querying/filtering building block collections
14
- * - Preparing data for future direct glossary part writing
15
- *
16
- * To add glossary content to a document currently, include it as an
17
- * OpaquePart with path "word/glossary/document.xml".
16
+ * - Assembling a glossary to attach via `doc.glossary`
18
17
  */
19
18
  import { generateGuid } from "../core/internal-utils.js";
20
19
  // =============================================================================
@@ -79,20 +79,98 @@ function convertNodeToMathML(node) {
79
79
  }
80
80
  }
81
81
  function convertMathRunToMathML(node) {
82
- const text = xmlEncode(node.text);
83
- // Operators and special characters
84
- if (isOperator(node.text)) {
85
- return `<mo>${text}</mo>`;
82
+ const raw = node.text;
83
+ // A single OMML run can hold a mix of letters, digits, operators and
84
+ // whitespace (e.g. "a + b = "). MathML presentation markup expects each
85
+ // token to be wrapped in the element matching its semantic category:
86
+ // <mi> for identifiers, <mn> for numbers, <mo> for operators and <mtext>
87
+ // for runs of whitespace / other text. Tokenize the run and emit one
88
+ // element per token so a mixed run is no longer flattened into a single
89
+ // (incorrect) <mi>.
90
+ const normalVariant = node.properties?.italic === false;
91
+ const tokens = tokenizeMathRun(raw);
92
+ // Fast path / backwards-compatibility: a run that is a single token keeps
93
+ // producing exactly one element (e.g. "x" -> <mi>x</mi>, "42" -> <mn>42</mn>,
94
+ // "+" -> <mo>+</mo>), matching the historical output.
95
+ return tokens.map(tok => emitMathToken(tok, normalVariant)).join("");
96
+ }
97
+ function classifyMathChar(ch) {
98
+ if (/\s/.test(ch)) {
99
+ return "text";
100
+ }
101
+ if (isOperator(ch)) {
102
+ return "operator";
103
+ }
104
+ if (/[0-9.]/.test(ch)) {
105
+ return "number";
106
+ }
107
+ if (/[A-Za-z]/.test(ch)) {
108
+ return "identifier";
109
+ }
110
+ // Letters outside ASCII (Greek, CJK, etc.) and any other symbol fall back to
111
+ // identifier — this matches how OMML treats variable names.
112
+ return "identifier";
113
+ }
114
+ function tokenizeMathRun(text) {
115
+ const tokens = [];
116
+ // Use code points so astral / combined characters are not split mid-symbol.
117
+ const chars = Array.from(text);
118
+ for (const ch of chars) {
119
+ const kind = classifyMathChar(ch);
120
+ const last = tokens[tokens.length - 1];
121
+ // Operators are always emitted as their own token (each operator is a
122
+ // distinct <mo>). Numbers, identifiers and text coalesce with the
123
+ // previous token of the same kind so "42" stays a single <mn> and a run
124
+ // of spaces stays a single <mtext>.
125
+ if (last && last.kind === kind && kind !== "operator") {
126
+ last.value += ch;
127
+ }
128
+ else {
129
+ tokens.push({ kind, value: ch });
130
+ }
86
131
  }
87
- // Numbers
88
- if (/^\d+(\.\d+)?$/.test(node.text)) {
89
- return `<mn>${text}</mn>`;
132
+ return mergeDecimalPoints(tokens);
133
+ }
134
+ /**
135
+ * A "." between two digit groups is a decimal point, not an operator. The
136
+ * char classifier sees "." as an operator (it is a valid math operator on its
137
+ * own, e.g. function composition), so stitch `<number> "." <number>` back into
138
+ * a single number token here.
139
+ */
140
+ function mergeDecimalPoints(tokens) {
141
+ const out = [];
142
+ for (let i = 0; i < tokens.length; i++) {
143
+ const tok = tokens[i];
144
+ const prev = out[out.length - 1];
145
+ const next = tokens[i + 1];
146
+ if (tok.kind === "operator" &&
147
+ tok.value === "." &&
148
+ prev &&
149
+ prev.kind === "number" &&
150
+ next &&
151
+ next.kind === "number") {
152
+ prev.value += "." + next.value;
153
+ i++; // consume the following number token
154
+ continue;
155
+ }
156
+ out.push(tok);
90
157
  }
91
- // Identifiers
92
- if (node.properties?.italic === false) {
93
- return `<mi mathvariant="normal">${text}</mi>`;
158
+ return out;
159
+ }
160
+ function emitMathToken(tok, normalVariant) {
161
+ const text = xmlEncode(tok.value);
162
+ switch (tok.kind) {
163
+ case "operator":
164
+ return `<mo>${text}</mo>`;
165
+ case "number":
166
+ // A lone "." is not a number; treat it as an operator/text fallback.
167
+ return /\d/.test(tok.value) ? `<mn>${text}</mn>` : `<mo>${text}</mo>`;
168
+ case "text":
169
+ return `<mtext>${text}</mtext>`;
170
+ case "identifier":
171
+ default:
172
+ return normalVariant ? `<mi mathvariant="normal">${text}</mi>` : `<mi>${text}</mi>`;
94
173
  }
95
- return `<mi>${text}</mi>`;
96
174
  }
97
175
  function convertFractionToMathML(node) {
98
176
  const num = childrenToMathML(node.numerator);
@@ -254,7 +332,11 @@ function convertMMLElement(el) {
254
332
  }
255
333
  case "msqrt": {
256
334
  const content = convertMMLChildren(el.children);
257
- return { type: "mathRadical", content };
335
+ // A bare square root has no degree. OOXML still emits an (empty)
336
+ // <m:deg/>, so we must set hideDegree → <m:degHide m:val="1"/>;
337
+ // otherwise Word treats the empty degree as visible and draws an empty
338
+ // degree box (a small square) at the radical's upper-left.
339
+ return { type: "mathRadical", content, hideDegree: true };
258
340
  }
259
341
  case "mroot": {
260
342
  const children = getElementChildren(el);
@@ -13,6 +13,8 @@
13
13
  * This module focuses on preservation (round-trip) and metadata extraction,
14
14
  * not full OLE compound document manipulation.
15
15
  */
16
+ import { xmlEncodeAttr } from "../../xml/encode.js";
17
+ import { ContentType } from "../constants.js";
16
18
  import { getFileName } from "../core/opc-paths.js";
17
19
  // =============================================================================
18
20
  // OLE Object Extraction
@@ -28,14 +30,33 @@ import { getFileName } from "../core/opc-paths.js";
28
30
  export function extractOleObjects(doc) {
29
31
  const objects = [];
30
32
  const summary = {};
31
- // Scan opaque parts for OLE embeddings
33
+ const pushObject = (obj) => {
34
+ objects.push(obj);
35
+ summary[obj.progId] = (summary[obj.progId] ?? 0) + 1;
36
+ };
37
+ // Structured OLE objects wired on document.xml.rels (preferred form —
38
+ // these carry the real rId and, when available, the round-tripped progId).
39
+ if (doc.oleObjects) {
40
+ for (const ole of doc.oleObjects) {
41
+ pushObject({
42
+ rId: ole.rId,
43
+ progId: ole.progId ?? detectProgIdFromData(ole.data),
44
+ objectType: "embedded",
45
+ displayAs: "icon",
46
+ imageRId: ole.previewRId,
47
+ fileName: getFileName(ole.path),
48
+ data: ole.data
49
+ });
50
+ }
51
+ }
52
+ // Scan opaque parts for OLE embeddings (legacy / hand-built documents that
53
+ // did not go through the structured oleObjects channel).
32
54
  if (doc.opaqueParts) {
33
55
  for (const part of doc.opaqueParts) {
34
56
  if (isOleEmbedding(part.path)) {
35
57
  const obj = parseOlePartMetadata(part);
36
58
  if (obj) {
37
- objects.push(obj);
38
- summary[obj.progId] = (summary[obj.progId] ?? 0) + 1;
59
+ pushObject(obj);
39
60
  }
40
61
  }
41
62
  }
@@ -45,11 +66,10 @@ export function extractOleObjects(doc) {
45
66
  if (element.type === "opaqueDrawing") {
46
67
  const oleFromDrawing = extractOleFromRawXml(element.rawXml);
47
68
  if (oleFromDrawing) {
48
- // Check if we already have this object from opaque parts
49
- const exists = objects.some(o => o.rId === oleFromDrawing.rId);
69
+ // Check if we already have this object (by rId)
70
+ const exists = objects.some(o => o.rId === oleFromDrawing.rId && o.rId !== "");
50
71
  if (!exists) {
51
- objects.push(oleFromDrawing);
52
- summary[oleFromDrawing.progId] = (summary[oleFromDrawing.progId] ?? 0) + 1;
72
+ pushObject(oleFromDrawing);
53
73
  }
54
74
  }
55
75
  }
@@ -60,6 +80,9 @@ export function extractOleObjects(doc) {
60
80
  * Check if a document contains any OLE embedded objects.
61
81
  */
62
82
  export function hasOleObjects(doc) {
83
+ if (doc.oleObjects && doc.oleObjects.length > 0) {
84
+ return true;
85
+ }
63
86
  if (doc.opaqueParts) {
64
87
  for (const part of doc.opaqueParts) {
65
88
  if (isOleEmbedding(part.path)) {
@@ -74,6 +97,14 @@ export function hasOleObjects(doc) {
74
97
  * Returns undefined if not found.
75
98
  */
76
99
  export function getOleObjectData(doc, rId) {
100
+ // Structured OLE objects carry the exact rId used on document.xml.rels.
101
+ if (doc.oleObjects) {
102
+ for (const ole of doc.oleObjects) {
103
+ if (ole.rId === rId) {
104
+ return ole.data;
105
+ }
106
+ }
107
+ }
77
108
  if (!doc.opaqueParts) {
78
109
  return undefined;
79
110
  }
@@ -126,16 +157,14 @@ export function createOleEmbedding(data, progId, options) {
126
157
  const olePart = {
127
158
  path: `word/embeddings/${fileName}`,
128
159
  data,
129
- contentType: "application/vnd.openxmlformats-officedocument.oleObject",
160
+ contentType: ContentType.OleObject,
130
161
  relationships: undefined
131
162
  };
132
163
  const oleRId = `rIdOle${oleSeq}`;
133
- // progId is metadata for downstream consumers not stored on
134
- // OpaquePart but accepted in the signature so callers can pass it
135
- // alongside without a separate channel. We don't need it here.
136
- void progId;
164
+ // progId is carried back on the result so addOleObject() can persist it
165
+ // into the body `<o:OLEObject ProgID="…">` markup for round-trip.
137
166
  if (!options?.previewImage) {
138
- return { olePart, oleRId };
167
+ return { olePart, oleRId, progId };
139
168
  }
140
169
  if (!options.previewContentType) {
141
170
  throw new Error("createOleEmbedding: options.previewImage requires options.previewContentType");
@@ -150,7 +179,78 @@ export function createOleEmbedding(data, progId, options) {
150
179
  relationships: undefined
151
180
  };
152
181
  const previewRId = `rIdOleImg${previewSeq}`;
153
- return { olePart, oleRId, previewPart, previewRId };
182
+ return { olePart, oleRId, progId, previewPart, previewRId };
183
+ }
184
+ /**
185
+ * Wire an {@link OleEmbeddingResult} into a document so the OLE object is
186
+ * actually rendered and resolvable, returning a new {@link DocxDocument}.
187
+ *
188
+ * Unlike just stuffing the part into `opaqueParts` (which leaves the binary
189
+ * dangling — no relationship, no body reference), this:
190
+ *
191
+ * - registers the OLE binary (and optional preview) on
192
+ * `doc.oleObjects` so the packager emits a `word/_rels/document.xml.rels`
193
+ * relationship with the exact rId and a `[Content_Types].xml` override;
194
+ * - appends a body paragraph carrying a `<w:object>` / `<o:OLEObject>`
195
+ * that references the same rId and embeds the ProgId, so the object is
196
+ * visible in Word and round-trips through `readDocx`.
197
+ *
198
+ * @param doc - The document to add the OLE object to.
199
+ * @param embedding - Result from {@link createOleEmbedding}.
200
+ * @param options - Display geometry (defaults to a 2"×2" icon box).
201
+ */
202
+ export function addOleObject(doc, embedding, options) {
203
+ const widthPt = options?.widthPt ?? 96;
204
+ const heightPt = options?.heightPt ?? 96;
205
+ const drawAspect = (options?.displayAs ?? "icon") === "icon" ? "Icon" : "Content";
206
+ const olePartEntry = {
207
+ path: embedding.olePart.path,
208
+ data: embedding.olePart.data,
209
+ rId: embedding.oleRId,
210
+ progId: embedding.progId,
211
+ contentType: embedding.olePart.contentType,
212
+ ...(embedding.previewPart && embedding.previewRId
213
+ ? {
214
+ previewPath: embedding.previewPart.path,
215
+ previewData: embedding.previewPart.data,
216
+ previewRId: embedding.previewRId,
217
+ previewContentType: embedding.previewPart.contentType
218
+ }
219
+ : {})
220
+ };
221
+ // Build the VML-hosted <w:object>. The o:OLEObject carries ProgID + the
222
+ // r:id of the binary; the v:shape provides geometry and (when present) the
223
+ // preview image fill via v:imagedata. This is the canonical OOXML shape for
224
+ // an embedded OLE object (ECMA-376 §17.3.3.19 + VML).
225
+ const shapeId = `_ole_${embedding.oleRId}`;
226
+ const styleWidth = widthPt.toFixed(0);
227
+ const styleHeight = heightPt.toFixed(0);
228
+ const imageData = embedding.previewRId != null
229
+ ? `<v:imagedata r:id="${xmlEncodeAttr(embedding.previewRId)}" o:title=""/>`
230
+ : "";
231
+ const rawXml = `<w:object>` +
232
+ `<v:shape id="${xmlEncodeAttr(shapeId)}" type="#_x0000_t75" ` +
233
+ `style="width:${styleWidth}pt;height:${styleHeight}pt">` +
234
+ imageData +
235
+ `</v:shape>` +
236
+ `<o:OLEObject Type="Embed" ProgID="${xmlEncodeAttr(embedding.progId)}" ` +
237
+ `ShapeID="${xmlEncodeAttr(shapeId)}" DrawAspect="${drawAspect}" ` +
238
+ `r:id="${xmlEncodeAttr(embedding.oleRId)}"/>` +
239
+ `</w:object>`;
240
+ const referencedRIds = [embedding.oleRId];
241
+ if (embedding.previewRId != null) {
242
+ referencedRIds.push(embedding.previewRId);
243
+ }
244
+ const drawing = {
245
+ type: "opaqueDrawing",
246
+ rawXml,
247
+ referencedRIds
248
+ };
249
+ return {
250
+ ...doc,
251
+ body: [...doc.body, drawing],
252
+ oleObjects: [...(doc.oleObjects ?? []), olePartEntry]
253
+ };
154
254
  }
155
255
  /** Module-level counters used to allocate unique file names per call. */
156
256
  let _oleSeq = 0;
@@ -242,17 +342,20 @@ function tryDecodeAscii(data) {
242
342
  return str;
243
343
  }
244
344
  function extractOleFromRawXml(rawXml) {
245
- // Parse OLE object info from raw XML using regex (lightweight)
246
- const progIdMatch = rawXml.match(/ProgID="([^"]+)"/i) ?? rawXml.match(/progId="([^"]+)"/i);
247
- const rIdMatch = rawXml.match(/r:id="([^"]+)"/i);
248
- const typeMatch = rawXml.match(/Type="([^"]+)"/i);
345
+ // Pull metadata from the <o:OLEObject> element specifically, so a preview
346
+ // image's <v:imagedata r:id=""> earlier in the markup is not mistaken for
347
+ // the OLE binary's relationship id.
348
+ const oleTag = rawXml.match(/<o:OLEObject\b[^>]*>/i)?.[0] ?? rawXml;
349
+ const progIdMatch = oleTag.match(/ProgID="([^"]+)"/i) ?? oleTag.match(/progId="([^"]+)"/i);
350
+ const rIdMatch = oleTag.match(/r:id="([^"]+)"/i);
351
+ const typeMatch = oleTag.match(/Type="([^"]+)"/i);
249
352
  if (!progIdMatch) {
250
353
  return null;
251
354
  }
252
355
  const progId = progIdMatch[1];
253
356
  const rId = rIdMatch ? rIdMatch[1] : "";
254
357
  const objectType = typeMatch && typeMatch[1].toLowerCase().includes("link") ? "linked" : "embedded";
255
- // Extract dimensions
358
+ // Extract dimensions (from the surrounding shape, hence full rawXml)
256
359
  const widthMatch = rawXml.match(/(?:cx|width)="(\d+)"/i);
257
360
  const heightMatch = rawXml.match(/(?:cy|height)="(\d+)"/i);
258
361
  return {
@@ -139,14 +139,8 @@ export const DEFAULT_STYLE_MAP = {
139
139
  * ```
140
140
  */
141
141
  export function parseStyleMap(dsl, options) {
142
- const rules = [];
143
- // Include defaults if requested
144
- if (options?.includeDefaults !== false && options?.base) {
145
- rules.push(...options.base.rules);
146
- }
147
- else if (options?.includeDefaults !== false && !options?.base) {
148
- rules.push(...DEFAULT_STYLE_MAP.rules);
149
- }
142
+ // User-defined rules from the DSL.
143
+ const userRules = [];
150
144
  const lines = dsl
151
145
  .split("\n")
152
146
  .map(l => l.trim())
@@ -154,10 +148,37 @@ export function parseStyleMap(dsl, options) {
154
148
  for (const line of lines) {
155
149
  const rule = parseRule(line);
156
150
  if (rule) {
157
- rules.push(rule);
151
+ userRules.push(rule);
158
152
  }
159
153
  }
160
- // Sort by priority (highest first)
154
+ // Default / base rules requested via `includeDefaults`.
155
+ const defaultRules = [];
156
+ if (options?.includeDefaults !== false && options?.base) {
157
+ defaultRules.push(...options.base.rules);
158
+ }
159
+ else if (options?.includeDefaults !== false && !options?.base) {
160
+ defaultRules.push(...DEFAULT_STYLE_MAP.rules);
161
+ }
162
+ // An explicit DSL rule should always win over a default rule for the same
163
+ // element — that is the whole point of providing one. Default rules,
164
+ // however, carry their own priorities (e.g. "Heading 1" => h1 has priority
165
+ // 10) that can exceed the fixed priority `parseRule` assigns to user rules.
166
+ // To guarantee user intent wins while preserving the relative priority
167
+ // ordering *within* each group, lift every user rule above the highest
168
+ // default priority. When there are no defaults this offset is 0 and user
169
+ // priorities are untouched.
170
+ const maxDefaultPriority = defaultRules.reduce((m, r) => Math.max(m, r.priority ?? 0), 0);
171
+ const userOffset = defaultRules.length > 0 ? maxDefaultPriority + 1 : 0;
172
+ const liftedUserRules = userOffset === 0
173
+ ? userRules
174
+ : userRules.map(r => ({ ...r, priority: (r.priority ?? 0) + userOffset }));
175
+ // User rules come first so that, after a stable sort, an explicit DSL rule
176
+ // also wins over any default rule that happens to share its (lifted)
177
+ // priority. The sort below only reorders by priority; equal priorities
178
+ // preserve this user-before-default ordering.
179
+ const rules = [...liftedUserRules, ...defaultRules];
180
+ // Sort by priority (highest first). Array.prototype.sort is stable, so
181
+ // rules of equal priority keep their relative order (user rules first).
161
182
  rules.sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0));
162
183
  return { rules };
163
184
  }
@@ -449,7 +449,13 @@ export function mathSubSuperScript(base, subScript, superScript) {
449
449
  export function mathPreSubSuperScript(base, preSubScript, preSuperScript) {
450
450
  return { type: "mathPreSubSuperScript", base, preSubScript, preSuperScript };
451
451
  }
452
- /** Create a math phantom (invisible expression that takes up space). */
452
+ /**
453
+ * Create a math phantom (an expression that takes up space).
454
+ *
455
+ * Note: in OOXML the phantom base is *shown* by default. To make the classic
456
+ * "occupies space but invisible" phantom pass `{ show: false }`; passing only
457
+ * `transparent: true` is not sufficient to hide the base in Word.
458
+ */
453
459
  export function mathPhantom(content, options) {
454
460
  return { type: "mathPhantom", content, ...options };
455
461
  }
@@ -268,7 +268,11 @@ export const ContentType = {
268
268
  ChartEx: "application/vnd.ms-office.chartEx+xml",
269
269
  Xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
270
270
  CustomXml: "application/xml",
271
- VbaProject: "application/vnd.ms-office.vbaProject"
271
+ VbaProject: "application/vnd.ms-office.vbaProject",
272
+ /** Glossary (Building Blocks) document part. */
273
+ Glossary: "application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml",
274
+ /** OLE embedded object binary. */
275
+ OleObject: "application/vnd.openxmlformats-officedocument.oleObject"
272
276
  };
273
277
  /** Map from image file extension to content type. */
274
278
  export const IMAGE_CONTENT_TYPES = {