@cj-tech-master/excelts 9.6.1 → 10.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -3
- package/README_zh.md +18 -3
- package/dist/browser/modules/excel/cell.d.ts +4 -0
- package/dist/browser/modules/excel/note.js +5 -1
- package/dist/browser/modules/excel/row.js +35 -2
- package/dist/browser/modules/excel/stream/workbook-writer.browser.d.ts +8 -1
- package/dist/browser/modules/excel/stream/workbook-writer.browser.js +22 -2
- package/dist/browser/modules/excel/types.d.ts +81 -0
- package/dist/browser/modules/excel/utils/drawing-utils.d.ts +8 -0
- package/dist/browser/modules/excel/utils/drawing-utils.js +19 -2
- package/dist/browser/modules/excel/workbook.browser.d.ts +16 -0
- package/dist/browser/modules/excel/workbook.browser.js +32 -2
- package/dist/browser/modules/excel/worksheet.d.ts +31 -1
- package/dist/browser/modules/excel/worksheet.js +83 -0
- package/dist/browser/modules/excel/xlsx/xform/comment/vml-shape-xform.d.ts +7 -0
- package/dist/browser/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
- package/dist/browser/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
- package/dist/browser/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
- package/dist/browser/modules/excel/xlsx/xform/drawing/blip-xform.d.ts +6 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
- package/dist/browser/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.d.ts +1 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/pic-xform.d.ts +2 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
- package/dist/browser/modules/excel/xlsx/xform/drawing/shape-xform.d.ts +47 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/shape-xform.js +109 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
- package/dist/browser/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
- package/dist/browser/modules/pdf/builder/document-builder.js +22 -49
- package/dist/browser/modules/pdf/builder/pdf-editor.js +1 -1
- package/dist/browser/modules/pdf/core/pdf-stream.d.ts +28 -1
- package/dist/browser/modules/pdf/core/pdf-stream.js +38 -2
- package/dist/browser/modules/pdf/font/font-manager.d.ts +26 -0
- package/dist/browser/modules/pdf/font/font-manager.js +35 -18
- package/dist/browser/modules/pdf/render/page-renderer.d.ts +51 -3
- package/dist/browser/modules/pdf/render/page-renderer.js +111 -18
- package/dist/browser/modules/word/advanced/field-engine.js +45 -20
- package/dist/browser/modules/word/advanced/glossary.d.ts +10 -36
- package/dist/browser/modules/word/advanced/glossary.js +8 -9
- package/dist/browser/modules/word/advanced/math-convert.js +94 -12
- package/dist/browser/modules/word/advanced/ole-objects.d.ts +28 -0
- package/dist/browser/modules/word/advanced/ole-objects.js +122 -19
- package/dist/browser/modules/word/advanced/style-map.js +31 -10
- package/dist/browser/modules/word/builder/run-builders.d.ts +7 -1
- package/dist/browser/modules/word/builder/run-builders.js +7 -1
- package/dist/browser/modules/word/constants.d.ts +4 -0
- package/dist/browser/modules/word/constants.js +5 -1
- package/dist/browser/modules/word/convert/docx-to-semantic.d.ts +2 -1
- package/dist/browser/modules/word/convert/docx-to-semantic.js +135 -1
- package/dist/browser/modules/word/convert/html/html-import.d.ts +32 -1
- package/dist/browser/modules/word/convert/html/html-import.js +167 -14
- package/dist/browser/modules/word/convert/html/html.d.ts +2 -2
- package/dist/browser/modules/word/convert/html/html.js +1 -1
- package/dist/browser/modules/word/convert/markdown/markdown-import.d.ts +48 -18
- package/dist/browser/modules/word/convert/markdown/markdown-import.js +279 -69
- package/dist/browser/modules/word/convert/markdown/markdown.d.ts +1 -1
- package/dist/browser/modules/word/convert/odt/odt.js +407 -56
- package/dist/browser/modules/word/html.d.ts +2 -2
- package/dist/browser/modules/word/html.js +1 -1
- package/dist/browser/modules/word/index.base.d.ts +3 -3
- package/dist/browser/modules/word/index.base.js +1 -1
- package/dist/browser/modules/word/layout/layout-full.js +326 -19
- package/dist/browser/modules/word/layout/render-page.js +35 -8
- package/dist/browser/modules/word/markdown.d.ts +1 -1
- package/dist/browser/modules/word/query/compat.d.ts +10 -2
- package/dist/browser/modules/word/query/compat.js +29 -21
- package/dist/browser/modules/word/reader/docx-reader.js +105 -2
- package/dist/browser/modules/word/reader/math-parser.js +8 -2
- package/dist/browser/modules/word/security/cfb-reader.js +5 -5
- package/dist/browser/modules/word/types.d.ts +96 -1
- package/dist/browser/modules/word/writer/docx-packager.js +108 -2
- package/dist/browser/modules/word/writer/glossary-writer.d.ts +28 -0
- package/dist/browser/modules/word/writer/glossary-writer.js +121 -0
- package/dist/browser/modules/word/writer/header-footer-writer.js +105 -20
- package/dist/browser/modules/word/writer/math-writer.js +7 -2
- package/dist/browser/utils/font-metrics.d.ts +8 -0
- package/dist/browser/utils/font-metrics.js +43 -0
- package/dist/browser/utils/theme-colors.js +4 -1
- package/dist/cjs/modules/excel/note.js +5 -1
- package/dist/cjs/modules/excel/row.js +35 -2
- package/dist/cjs/modules/excel/stream/workbook-writer.browser.js +22 -2
- package/dist/cjs/modules/excel/utils/drawing-utils.js +19 -2
- package/dist/cjs/modules/excel/workbook.browser.js +31 -1
- package/dist/cjs/modules/excel/worksheet.js +83 -0
- package/dist/cjs/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
- package/dist/cjs/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
- package/dist/cjs/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
- package/dist/cjs/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
- package/dist/cjs/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
- package/dist/cjs/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
- package/dist/cjs/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
- package/dist/cjs/modules/excel/xlsx/xform/drawing/shape-xform.js +112 -0
- package/dist/cjs/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
- package/dist/cjs/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
- package/dist/cjs/modules/pdf/builder/document-builder.js +21 -48
- package/dist/cjs/modules/pdf/builder/pdf-editor.js +1 -1
- package/dist/cjs/modules/pdf/core/pdf-stream.js +38 -2
- package/dist/cjs/modules/pdf/font/font-manager.js +35 -18
- package/dist/cjs/modules/pdf/render/page-renderer.js +112 -18
- package/dist/cjs/modules/word/advanced/field-engine.js +45 -20
- package/dist/cjs/modules/word/advanced/glossary.js +8 -9
- package/dist/cjs/modules/word/advanced/math-convert.js +94 -12
- package/dist/cjs/modules/word/advanced/ole-objects.js +123 -19
- package/dist/cjs/modules/word/advanced/style-map.js +31 -10
- package/dist/cjs/modules/word/builder/run-builders.js +7 -1
- package/dist/cjs/modules/word/constants.js +5 -1
- package/dist/cjs/modules/word/convert/docx-to-semantic.js +135 -1
- package/dist/cjs/modules/word/convert/html/html-import.js +168 -14
- package/dist/cjs/modules/word/convert/html/html.js +2 -1
- package/dist/cjs/modules/word/convert/markdown/markdown-import.js +279 -69
- package/dist/cjs/modules/word/convert/odt/odt.js +407 -56
- package/dist/cjs/modules/word/html.js +2 -1
- package/dist/cjs/modules/word/index.base.js +4 -3
- package/dist/cjs/modules/word/layout/layout-full.js +325 -18
- package/dist/cjs/modules/word/layout/render-page.js +35 -8
- package/dist/cjs/modules/word/query/compat.js +29 -21
- package/dist/cjs/modules/word/reader/docx-reader.js +104 -1
- package/dist/cjs/modules/word/reader/math-parser.js +8 -2
- package/dist/cjs/modules/word/security/cfb-reader.js +5 -5
- package/dist/cjs/modules/word/writer/docx-packager.js +108 -2
- package/dist/cjs/modules/word/writer/glossary-writer.js +124 -0
- package/dist/cjs/modules/word/writer/header-footer-writer.js +105 -20
- package/dist/cjs/modules/word/writer/math-writer.js +7 -2
- package/dist/cjs/utils/font-metrics.js +44 -0
- package/dist/cjs/utils/theme-colors.js +4 -1
- package/dist/esm/modules/excel/note.js +5 -1
- package/dist/esm/modules/excel/row.js +35 -2
- package/dist/esm/modules/excel/stream/workbook-writer.browser.js +22 -2
- package/dist/esm/modules/excel/utils/drawing-utils.js +19 -2
- package/dist/esm/modules/excel/workbook.browser.js +32 -2
- package/dist/esm/modules/excel/worksheet.js +83 -0
- package/dist/esm/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
- package/dist/esm/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
- package/dist/esm/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
- package/dist/esm/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
- package/dist/esm/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
- package/dist/esm/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
- package/dist/esm/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
- package/dist/esm/modules/excel/xlsx/xform/drawing/shape-xform.js +109 -0
- package/dist/esm/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
- package/dist/esm/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
- package/dist/esm/modules/pdf/builder/document-builder.js +22 -49
- package/dist/esm/modules/pdf/builder/pdf-editor.js +1 -1
- package/dist/esm/modules/pdf/core/pdf-stream.js +38 -2
- package/dist/esm/modules/pdf/font/font-manager.js +35 -18
- package/dist/esm/modules/pdf/render/page-renderer.js +111 -18
- package/dist/esm/modules/word/advanced/field-engine.js +45 -20
- package/dist/esm/modules/word/advanced/glossary.js +8 -9
- package/dist/esm/modules/word/advanced/math-convert.js +94 -12
- package/dist/esm/modules/word/advanced/ole-objects.js +122 -19
- package/dist/esm/modules/word/advanced/style-map.js +31 -10
- package/dist/esm/modules/word/builder/run-builders.js +7 -1
- package/dist/esm/modules/word/constants.js +5 -1
- package/dist/esm/modules/word/convert/docx-to-semantic.js +135 -1
- package/dist/esm/modules/word/convert/html/html-import.js +167 -14
- package/dist/esm/modules/word/convert/html/html.js +1 -1
- package/dist/esm/modules/word/convert/markdown/markdown-import.js +279 -69
- package/dist/esm/modules/word/convert/odt/odt.js +407 -56
- package/dist/esm/modules/word/html.js +1 -1
- package/dist/esm/modules/word/index.base.js +1 -1
- package/dist/esm/modules/word/layout/layout-full.js +326 -19
- package/dist/esm/modules/word/layout/render-page.js +35 -8
- package/dist/esm/modules/word/query/compat.js +29 -21
- package/dist/esm/modules/word/reader/docx-reader.js +105 -2
- package/dist/esm/modules/word/reader/math-parser.js +8 -2
- package/dist/esm/modules/word/security/cfb-reader.js +5 -5
- package/dist/esm/modules/word/writer/docx-packager.js +108 -2
- package/dist/esm/modules/word/writer/glossary-writer.js +121 -0
- package/dist/esm/modules/word/writer/header-footer-writer.js +105 -20
- package/dist/esm/modules/word/writer/math-writer.js +7 -2
- package/dist/esm/utils/font-metrics.js +43 -0
- package/dist/esm/utils/theme-colors.js +4 -1
- package/dist/iife/excelts.iife.js +496 -59
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +39 -39
- package/dist/types/modules/excel/cell.d.ts +4 -0
- package/dist/types/modules/excel/stream/workbook-writer.browser.d.ts +8 -1
- package/dist/types/modules/excel/types.d.ts +81 -0
- package/dist/types/modules/excel/utils/drawing-utils.d.ts +8 -0
- package/dist/types/modules/excel/workbook.browser.d.ts +16 -0
- package/dist/types/modules/excel/worksheet.d.ts +31 -1
- package/dist/types/modules/excel/xlsx/xform/comment/vml-shape-xform.d.ts +7 -0
- package/dist/types/modules/excel/xlsx/xform/drawing/blip-xform.d.ts +6 -0
- package/dist/types/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.d.ts +1 -0
- package/dist/types/modules/excel/xlsx/xform/drawing/pic-xform.d.ts +2 -0
- package/dist/types/modules/excel/xlsx/xform/drawing/shape-xform.d.ts +47 -0
- package/dist/types/modules/pdf/core/pdf-stream.d.ts +28 -1
- package/dist/types/modules/pdf/font/font-manager.d.ts +26 -0
- package/dist/types/modules/pdf/render/page-renderer.d.ts +51 -3
- package/dist/types/modules/word/advanced/glossary.d.ts +10 -36
- package/dist/types/modules/word/advanced/ole-objects.d.ts +28 -0
- package/dist/types/modules/word/builder/run-builders.d.ts +7 -1
- package/dist/types/modules/word/constants.d.ts +4 -0
- package/dist/types/modules/word/convert/docx-to-semantic.d.ts +2 -1
- package/dist/types/modules/word/convert/html/html-import.d.ts +32 -1
- package/dist/types/modules/word/convert/html/html.d.ts +2 -2
- package/dist/types/modules/word/convert/markdown/markdown-import.d.ts +48 -18
- package/dist/types/modules/word/convert/markdown/markdown.d.ts +1 -1
- package/dist/types/modules/word/html.d.ts +2 -2
- package/dist/types/modules/word/index.base.d.ts +3 -3
- package/dist/types/modules/word/markdown.d.ts +1 -1
- package/dist/types/modules/word/query/compat.d.ts +10 -2
- package/dist/types/modules/word/types.d.ts +96 -1
- package/dist/types/modules/word/writer/glossary-writer.d.ts +28 -0
- package/dist/types/utils/font-metrics.d.ts +8 -0
- package/package.json +3 -1
|
@@ -79,20 +79,98 @@ function convertNodeToMathML(node) {
|
|
|
79
79
|
}
|
|
80
80
|
}
|
|
81
81
|
function convertMathRunToMathML(node) {
|
|
82
|
-
const
|
|
83
|
-
//
|
|
84
|
-
|
|
85
|
-
|
|
82
|
+
const raw = node.text;
|
|
83
|
+
// A single OMML run can hold a mix of letters, digits, operators and
|
|
84
|
+
// whitespace (e.g. "a + b = "). MathML presentation markup expects each
|
|
85
|
+
// token to be wrapped in the element matching its semantic category:
|
|
86
|
+
// <mi> for identifiers, <mn> for numbers, <mo> for operators and <mtext>
|
|
87
|
+
// for runs of whitespace / other text. Tokenize the run and emit one
|
|
88
|
+
// element per token so a mixed run is no longer flattened into a single
|
|
89
|
+
// (incorrect) <mi>.
|
|
90
|
+
const normalVariant = node.properties?.italic === false;
|
|
91
|
+
const tokens = tokenizeMathRun(raw);
|
|
92
|
+
// Fast path / backwards-compatibility: a run that is a single token keeps
|
|
93
|
+
// producing exactly one element (e.g. "x" -> <mi>x</mi>, "42" -> <mn>42</mn>,
|
|
94
|
+
// "+" -> <mo>+</mo>), matching the historical output.
|
|
95
|
+
return tokens.map(tok => emitMathToken(tok, normalVariant)).join("");
|
|
96
|
+
}
|
|
97
|
+
function classifyMathChar(ch) {
|
|
98
|
+
if (/\s/.test(ch)) {
|
|
99
|
+
return "text";
|
|
100
|
+
}
|
|
101
|
+
if (isOperator(ch)) {
|
|
102
|
+
return "operator";
|
|
103
|
+
}
|
|
104
|
+
if (/[0-9.]/.test(ch)) {
|
|
105
|
+
return "number";
|
|
106
|
+
}
|
|
107
|
+
if (/[A-Za-z]/.test(ch)) {
|
|
108
|
+
return "identifier";
|
|
109
|
+
}
|
|
110
|
+
// Letters outside ASCII (Greek, CJK, etc.) and any other symbol fall back to
|
|
111
|
+
// identifier — this matches how OMML treats variable names.
|
|
112
|
+
return "identifier";
|
|
113
|
+
}
|
|
114
|
+
function tokenizeMathRun(text) {
|
|
115
|
+
const tokens = [];
|
|
116
|
+
// Use code points so astral / combined characters are not split mid-symbol.
|
|
117
|
+
const chars = Array.from(text);
|
|
118
|
+
for (const ch of chars) {
|
|
119
|
+
const kind = classifyMathChar(ch);
|
|
120
|
+
const last = tokens[tokens.length - 1];
|
|
121
|
+
// Operators are always emitted as their own token (each operator is a
|
|
122
|
+
// distinct <mo>). Numbers, identifiers and text coalesce with the
|
|
123
|
+
// previous token of the same kind so "42" stays a single <mn> and a run
|
|
124
|
+
// of spaces stays a single <mtext>.
|
|
125
|
+
if (last && last.kind === kind && kind !== "operator") {
|
|
126
|
+
last.value += ch;
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
tokens.push({ kind, value: ch });
|
|
130
|
+
}
|
|
86
131
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
132
|
+
return mergeDecimalPoints(tokens);
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* A "." between two digit groups is a decimal point, not an operator. The
|
|
136
|
+
* char classifier sees "." as an operator (it is a valid math operator on its
|
|
137
|
+
* own, e.g. function composition), so stitch `<number> "." <number>` back into
|
|
138
|
+
* a single number token here.
|
|
139
|
+
*/
|
|
140
|
+
function mergeDecimalPoints(tokens) {
|
|
141
|
+
const out = [];
|
|
142
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
143
|
+
const tok = tokens[i];
|
|
144
|
+
const prev = out[out.length - 1];
|
|
145
|
+
const next = tokens[i + 1];
|
|
146
|
+
if (tok.kind === "operator" &&
|
|
147
|
+
tok.value === "." &&
|
|
148
|
+
prev &&
|
|
149
|
+
prev.kind === "number" &&
|
|
150
|
+
next &&
|
|
151
|
+
next.kind === "number") {
|
|
152
|
+
prev.value += "." + next.value;
|
|
153
|
+
i++; // consume the following number token
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
out.push(tok);
|
|
90
157
|
}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
158
|
+
return out;
|
|
159
|
+
}
|
|
160
|
+
function emitMathToken(tok, normalVariant) {
|
|
161
|
+
const text = xmlEncode(tok.value);
|
|
162
|
+
switch (tok.kind) {
|
|
163
|
+
case "operator":
|
|
164
|
+
return `<mo>${text}</mo>`;
|
|
165
|
+
case "number":
|
|
166
|
+
// A lone "." is not a number; treat it as an operator/text fallback.
|
|
167
|
+
return /\d/.test(tok.value) ? `<mn>${text}</mn>` : `<mo>${text}</mo>`;
|
|
168
|
+
case "text":
|
|
169
|
+
return `<mtext>${text}</mtext>`;
|
|
170
|
+
case "identifier":
|
|
171
|
+
default:
|
|
172
|
+
return normalVariant ? `<mi mathvariant="normal">${text}</mi>` : `<mi>${text}</mi>`;
|
|
94
173
|
}
|
|
95
|
-
return `<mi>${text}</mi>`;
|
|
96
174
|
}
|
|
97
175
|
function convertFractionToMathML(node) {
|
|
98
176
|
const num = childrenToMathML(node.numerator);
|
|
@@ -254,7 +332,11 @@ function convertMMLElement(el) {
|
|
|
254
332
|
}
|
|
255
333
|
case "msqrt": {
|
|
256
334
|
const content = convertMMLChildren(el.children);
|
|
257
|
-
|
|
335
|
+
// A bare square root has no degree. OOXML still emits an (empty)
|
|
336
|
+
// <m:deg/>, so we must set hideDegree → <m:degHide m:val="1"/>;
|
|
337
|
+
// otherwise Word treats the empty degree as visible and draws an empty
|
|
338
|
+
// degree box (a small square) at the radical's upper-left.
|
|
339
|
+
return { type: "mathRadical", content, hideDegree: true };
|
|
258
340
|
}
|
|
259
341
|
case "mroot": {
|
|
260
342
|
const children = getElementChildren(el);
|
|
@@ -81,6 +81,8 @@ export interface OleEmbeddingResult {
|
|
|
81
81
|
readonly olePart: OpaquePart;
|
|
82
82
|
/** Suggested rId to use for the OLE binary in the document model. */
|
|
83
83
|
readonly oleRId: string;
|
|
84
|
+
/** OLE ProgId the embedding was created with (e.g. "Excel.Sheet.12"). */
|
|
85
|
+
readonly progId: string;
|
|
84
86
|
/** Preview image media part (only when `options.previewImage` was supplied). */
|
|
85
87
|
readonly previewPart?: OpaquePart;
|
|
86
88
|
/** Suggested rId for the preview image. */
|
|
@@ -113,3 +115,29 @@ export declare function createOleEmbedding(data: Uint8Array, progId: string, opt
|
|
|
113
115
|
/** Override preview file name. Defaults to `image<N>.<ext>` from previewContentType. */
|
|
114
116
|
previewFileName?: string;
|
|
115
117
|
}): OleEmbeddingResult;
|
|
118
|
+
/**
|
|
119
|
+
* Wire an {@link OleEmbeddingResult} into a document so the OLE object is
|
|
120
|
+
* actually rendered and resolvable, returning a new {@link DocxDocument}.
|
|
121
|
+
*
|
|
122
|
+
* Unlike just stuffing the part into `opaqueParts` (which leaves the binary
|
|
123
|
+
* dangling — no relationship, no body reference), this:
|
|
124
|
+
*
|
|
125
|
+
* - registers the OLE binary (and optional preview) on
|
|
126
|
+
* `doc.oleObjects` so the packager emits a `word/_rels/document.xml.rels`
|
|
127
|
+
* relationship with the exact rId and a `[Content_Types].xml` override;
|
|
128
|
+
* - appends a body paragraph carrying a `<w:object>` / `<o:OLEObject>`
|
|
129
|
+
* that references the same rId and embeds the ProgId, so the object is
|
|
130
|
+
* visible in Word and round-trips through `readDocx`.
|
|
131
|
+
*
|
|
132
|
+
* @param doc - The document to add the OLE object to.
|
|
133
|
+
* @param embedding - Result from {@link createOleEmbedding}.
|
|
134
|
+
* @param options - Display geometry (defaults to a 2"×2" icon box).
|
|
135
|
+
*/
|
|
136
|
+
export declare function addOleObject(doc: DocxDocument, embedding: OleEmbeddingResult, options?: {
|
|
137
|
+
/** Display width in points (default 96 = 2 inches at 48pt/in icon). */
|
|
138
|
+
widthPt?: number;
|
|
139
|
+
/** Display height in points (default 96). */
|
|
140
|
+
heightPt?: number;
|
|
141
|
+
/** Display mode. Default "icon". */
|
|
142
|
+
displayAs?: OleDisplayAs;
|
|
143
|
+
}): DocxDocument;
|
|
@@ -13,6 +13,8 @@
|
|
|
13
13
|
* This module focuses on preservation (round-trip) and metadata extraction,
|
|
14
14
|
* not full OLE compound document manipulation.
|
|
15
15
|
*/
|
|
16
|
+
import { xmlEncodeAttr } from "../../xml/encode.js";
|
|
17
|
+
import { ContentType } from "../constants.js";
|
|
16
18
|
import { getFileName } from "../core/opc-paths.js";
|
|
17
19
|
// =============================================================================
|
|
18
20
|
// OLE Object Extraction
|
|
@@ -28,14 +30,33 @@ import { getFileName } from "../core/opc-paths.js";
|
|
|
28
30
|
export function extractOleObjects(doc) {
|
|
29
31
|
const objects = [];
|
|
30
32
|
const summary = {};
|
|
31
|
-
|
|
33
|
+
const pushObject = (obj) => {
|
|
34
|
+
objects.push(obj);
|
|
35
|
+
summary[obj.progId] = (summary[obj.progId] ?? 0) + 1;
|
|
36
|
+
};
|
|
37
|
+
// Structured OLE objects wired on document.xml.rels (preferred form —
|
|
38
|
+
// these carry the real rId and, when available, the round-tripped progId).
|
|
39
|
+
if (doc.oleObjects) {
|
|
40
|
+
for (const ole of doc.oleObjects) {
|
|
41
|
+
pushObject({
|
|
42
|
+
rId: ole.rId,
|
|
43
|
+
progId: ole.progId ?? detectProgIdFromData(ole.data),
|
|
44
|
+
objectType: "embedded",
|
|
45
|
+
displayAs: "icon",
|
|
46
|
+
imageRId: ole.previewRId,
|
|
47
|
+
fileName: getFileName(ole.path),
|
|
48
|
+
data: ole.data
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
// Scan opaque parts for OLE embeddings (legacy / hand-built documents that
|
|
53
|
+
// did not go through the structured oleObjects channel).
|
|
32
54
|
if (doc.opaqueParts) {
|
|
33
55
|
for (const part of doc.opaqueParts) {
|
|
34
56
|
if (isOleEmbedding(part.path)) {
|
|
35
57
|
const obj = parseOlePartMetadata(part);
|
|
36
58
|
if (obj) {
|
|
37
|
-
|
|
38
|
-
summary[obj.progId] = (summary[obj.progId] ?? 0) + 1;
|
|
59
|
+
pushObject(obj);
|
|
39
60
|
}
|
|
40
61
|
}
|
|
41
62
|
}
|
|
@@ -45,11 +66,10 @@ export function extractOleObjects(doc) {
|
|
|
45
66
|
if (element.type === "opaqueDrawing") {
|
|
46
67
|
const oleFromDrawing = extractOleFromRawXml(element.rawXml);
|
|
47
68
|
if (oleFromDrawing) {
|
|
48
|
-
// Check if we already have this object
|
|
49
|
-
const exists = objects.some(o => o.rId === oleFromDrawing.rId);
|
|
69
|
+
// Check if we already have this object (by rId)
|
|
70
|
+
const exists = objects.some(o => o.rId === oleFromDrawing.rId && o.rId !== "");
|
|
50
71
|
if (!exists) {
|
|
51
|
-
|
|
52
|
-
summary[oleFromDrawing.progId] = (summary[oleFromDrawing.progId] ?? 0) + 1;
|
|
72
|
+
pushObject(oleFromDrawing);
|
|
53
73
|
}
|
|
54
74
|
}
|
|
55
75
|
}
|
|
@@ -60,6 +80,9 @@ export function extractOleObjects(doc) {
|
|
|
60
80
|
* Check if a document contains any OLE embedded objects.
|
|
61
81
|
*/
|
|
62
82
|
export function hasOleObjects(doc) {
|
|
83
|
+
if (doc.oleObjects && doc.oleObjects.length > 0) {
|
|
84
|
+
return true;
|
|
85
|
+
}
|
|
63
86
|
if (doc.opaqueParts) {
|
|
64
87
|
for (const part of doc.opaqueParts) {
|
|
65
88
|
if (isOleEmbedding(part.path)) {
|
|
@@ -74,6 +97,14 @@ export function hasOleObjects(doc) {
|
|
|
74
97
|
* Returns undefined if not found.
|
|
75
98
|
*/
|
|
76
99
|
export function getOleObjectData(doc, rId) {
|
|
100
|
+
// Structured OLE objects carry the exact rId used on document.xml.rels.
|
|
101
|
+
if (doc.oleObjects) {
|
|
102
|
+
for (const ole of doc.oleObjects) {
|
|
103
|
+
if (ole.rId === rId) {
|
|
104
|
+
return ole.data;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
77
108
|
if (!doc.opaqueParts) {
|
|
78
109
|
return undefined;
|
|
79
110
|
}
|
|
@@ -126,16 +157,14 @@ export function createOleEmbedding(data, progId, options) {
|
|
|
126
157
|
const olePart = {
|
|
127
158
|
path: `word/embeddings/${fileName}`,
|
|
128
159
|
data,
|
|
129
|
-
contentType:
|
|
160
|
+
contentType: ContentType.OleObject,
|
|
130
161
|
relationships: undefined
|
|
131
162
|
};
|
|
132
163
|
const oleRId = `rIdOle${oleSeq}`;
|
|
133
|
-
// progId is
|
|
134
|
-
//
|
|
135
|
-
// alongside without a separate channel. We don't need it here.
|
|
136
|
-
void progId;
|
|
164
|
+
// progId is carried back on the result so addOleObject() can persist it
|
|
165
|
+
// into the body `<o:OLEObject ProgID="…">` markup for round-trip.
|
|
137
166
|
if (!options?.previewImage) {
|
|
138
|
-
return { olePart, oleRId };
|
|
167
|
+
return { olePart, oleRId, progId };
|
|
139
168
|
}
|
|
140
169
|
if (!options.previewContentType) {
|
|
141
170
|
throw new Error("createOleEmbedding: options.previewImage requires options.previewContentType");
|
|
@@ -150,7 +179,78 @@ export function createOleEmbedding(data, progId, options) {
|
|
|
150
179
|
relationships: undefined
|
|
151
180
|
};
|
|
152
181
|
const previewRId = `rIdOleImg${previewSeq}`;
|
|
153
|
-
return { olePart, oleRId, previewPart, previewRId };
|
|
182
|
+
return { olePart, oleRId, progId, previewPart, previewRId };
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Wire an {@link OleEmbeddingResult} into a document so the OLE object is
|
|
186
|
+
* actually rendered and resolvable, returning a new {@link DocxDocument}.
|
|
187
|
+
*
|
|
188
|
+
* Unlike just stuffing the part into `opaqueParts` (which leaves the binary
|
|
189
|
+
* dangling — no relationship, no body reference), this:
|
|
190
|
+
*
|
|
191
|
+
* - registers the OLE binary (and optional preview) on
|
|
192
|
+
* `doc.oleObjects` so the packager emits a `word/_rels/document.xml.rels`
|
|
193
|
+
* relationship with the exact rId and a `[Content_Types].xml` override;
|
|
194
|
+
* - appends a body paragraph carrying a `<w:object>` / `<o:OLEObject>`
|
|
195
|
+
* that references the same rId and embeds the ProgId, so the object is
|
|
196
|
+
* visible in Word and round-trips through `readDocx`.
|
|
197
|
+
*
|
|
198
|
+
* @param doc - The document to add the OLE object to.
|
|
199
|
+
* @param embedding - Result from {@link createOleEmbedding}.
|
|
200
|
+
* @param options - Display geometry (defaults to a 2"×2" icon box).
|
|
201
|
+
*/
|
|
202
|
+
export function addOleObject(doc, embedding, options) {
|
|
203
|
+
const widthPt = options?.widthPt ?? 96;
|
|
204
|
+
const heightPt = options?.heightPt ?? 96;
|
|
205
|
+
const drawAspect = (options?.displayAs ?? "icon") === "icon" ? "Icon" : "Content";
|
|
206
|
+
const olePartEntry = {
|
|
207
|
+
path: embedding.olePart.path,
|
|
208
|
+
data: embedding.olePart.data,
|
|
209
|
+
rId: embedding.oleRId,
|
|
210
|
+
progId: embedding.progId,
|
|
211
|
+
contentType: embedding.olePart.contentType,
|
|
212
|
+
...(embedding.previewPart && embedding.previewRId
|
|
213
|
+
? {
|
|
214
|
+
previewPath: embedding.previewPart.path,
|
|
215
|
+
previewData: embedding.previewPart.data,
|
|
216
|
+
previewRId: embedding.previewRId,
|
|
217
|
+
previewContentType: embedding.previewPart.contentType
|
|
218
|
+
}
|
|
219
|
+
: {})
|
|
220
|
+
};
|
|
221
|
+
// Build the VML-hosted <w:object>. The o:OLEObject carries ProgID + the
|
|
222
|
+
// r:id of the binary; the v:shape provides geometry and (when present) the
|
|
223
|
+
// preview image fill via v:imagedata. This is the canonical OOXML shape for
|
|
224
|
+
// an embedded OLE object (ECMA-376 §17.3.3.19 + VML).
|
|
225
|
+
const shapeId = `_ole_${embedding.oleRId}`;
|
|
226
|
+
const styleWidth = widthPt.toFixed(0);
|
|
227
|
+
const styleHeight = heightPt.toFixed(0);
|
|
228
|
+
const imageData = embedding.previewRId != null
|
|
229
|
+
? `<v:imagedata r:id="${xmlEncodeAttr(embedding.previewRId)}" o:title=""/>`
|
|
230
|
+
: "";
|
|
231
|
+
const rawXml = `<w:object>` +
|
|
232
|
+
`<v:shape id="${xmlEncodeAttr(shapeId)}" type="#_x0000_t75" ` +
|
|
233
|
+
`style="width:${styleWidth}pt;height:${styleHeight}pt">` +
|
|
234
|
+
imageData +
|
|
235
|
+
`</v:shape>` +
|
|
236
|
+
`<o:OLEObject Type="Embed" ProgID="${xmlEncodeAttr(embedding.progId)}" ` +
|
|
237
|
+
`ShapeID="${xmlEncodeAttr(shapeId)}" DrawAspect="${drawAspect}" ` +
|
|
238
|
+
`r:id="${xmlEncodeAttr(embedding.oleRId)}"/>` +
|
|
239
|
+
`</w:object>`;
|
|
240
|
+
const referencedRIds = [embedding.oleRId];
|
|
241
|
+
if (embedding.previewRId != null) {
|
|
242
|
+
referencedRIds.push(embedding.previewRId);
|
|
243
|
+
}
|
|
244
|
+
const drawing = {
|
|
245
|
+
type: "opaqueDrawing",
|
|
246
|
+
rawXml,
|
|
247
|
+
referencedRIds
|
|
248
|
+
};
|
|
249
|
+
return {
|
|
250
|
+
...doc,
|
|
251
|
+
body: [...doc.body, drawing],
|
|
252
|
+
oleObjects: [...(doc.oleObjects ?? []), olePartEntry]
|
|
253
|
+
};
|
|
154
254
|
}
|
|
155
255
|
/** Module-level counters used to allocate unique file names per call. */
|
|
156
256
|
let _oleSeq = 0;
|
|
@@ -242,17 +342,20 @@ function tryDecodeAscii(data) {
|
|
|
242
342
|
return str;
|
|
243
343
|
}
|
|
244
344
|
function extractOleFromRawXml(rawXml) {
|
|
245
|
-
//
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
const
|
|
345
|
+
// Pull metadata from the <o:OLEObject> element specifically, so a preview
|
|
346
|
+
// image's <v:imagedata r:id="…"> earlier in the markup is not mistaken for
|
|
347
|
+
// the OLE binary's relationship id.
|
|
348
|
+
const oleTag = rawXml.match(/<o:OLEObject\b[^>]*>/i)?.[0] ?? rawXml;
|
|
349
|
+
const progIdMatch = oleTag.match(/ProgID="([^"]+)"/i) ?? oleTag.match(/progId="([^"]+)"/i);
|
|
350
|
+
const rIdMatch = oleTag.match(/r:id="([^"]+)"/i);
|
|
351
|
+
const typeMatch = oleTag.match(/Type="([^"]+)"/i);
|
|
249
352
|
if (!progIdMatch) {
|
|
250
353
|
return null;
|
|
251
354
|
}
|
|
252
355
|
const progId = progIdMatch[1];
|
|
253
356
|
const rId = rIdMatch ? rIdMatch[1] : "";
|
|
254
357
|
const objectType = typeMatch && typeMatch[1].toLowerCase().includes("link") ? "linked" : "embedded";
|
|
255
|
-
// Extract dimensions
|
|
358
|
+
// Extract dimensions (from the surrounding shape, hence full rawXml)
|
|
256
359
|
const widthMatch = rawXml.match(/(?:cx|width)="(\d+)"/i);
|
|
257
360
|
const heightMatch = rawXml.match(/(?:cy|height)="(\d+)"/i);
|
|
258
361
|
return {
|
|
@@ -139,14 +139,8 @@ export const DEFAULT_STYLE_MAP = {
|
|
|
139
139
|
* ```
|
|
140
140
|
*/
|
|
141
141
|
export function parseStyleMap(dsl, options) {
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
if (options?.includeDefaults !== false && options?.base) {
|
|
145
|
-
rules.push(...options.base.rules);
|
|
146
|
-
}
|
|
147
|
-
else if (options?.includeDefaults !== false && !options?.base) {
|
|
148
|
-
rules.push(...DEFAULT_STYLE_MAP.rules);
|
|
149
|
-
}
|
|
142
|
+
// User-defined rules from the DSL.
|
|
143
|
+
const userRules = [];
|
|
150
144
|
const lines = dsl
|
|
151
145
|
.split("\n")
|
|
152
146
|
.map(l => l.trim())
|
|
@@ -154,10 +148,37 @@ export function parseStyleMap(dsl, options) {
|
|
|
154
148
|
for (const line of lines) {
|
|
155
149
|
const rule = parseRule(line);
|
|
156
150
|
if (rule) {
|
|
157
|
-
|
|
151
|
+
userRules.push(rule);
|
|
158
152
|
}
|
|
159
153
|
}
|
|
160
|
-
//
|
|
154
|
+
// Default / base rules requested via `includeDefaults`.
|
|
155
|
+
const defaultRules = [];
|
|
156
|
+
if (options?.includeDefaults !== false && options?.base) {
|
|
157
|
+
defaultRules.push(...options.base.rules);
|
|
158
|
+
}
|
|
159
|
+
else if (options?.includeDefaults !== false && !options?.base) {
|
|
160
|
+
defaultRules.push(...DEFAULT_STYLE_MAP.rules);
|
|
161
|
+
}
|
|
162
|
+
// An explicit DSL rule should always win over a default rule for the same
|
|
163
|
+
// element — that is the whole point of providing one. Default rules,
|
|
164
|
+
// however, carry their own priorities (e.g. "Heading 1" => h1 has priority
|
|
165
|
+
// 10) that can exceed the fixed priority `parseRule` assigns to user rules.
|
|
166
|
+
// To guarantee user intent wins while preserving the relative priority
|
|
167
|
+
// ordering *within* each group, lift every user rule above the highest
|
|
168
|
+
// default priority. When there are no defaults this offset is 0 and user
|
|
169
|
+
// priorities are untouched.
|
|
170
|
+
const maxDefaultPriority = defaultRules.reduce((m, r) => Math.max(m, r.priority ?? 0), 0);
|
|
171
|
+
const userOffset = defaultRules.length > 0 ? maxDefaultPriority + 1 : 0;
|
|
172
|
+
const liftedUserRules = userOffset === 0
|
|
173
|
+
? userRules
|
|
174
|
+
: userRules.map(r => ({ ...r, priority: (r.priority ?? 0) + userOffset }));
|
|
175
|
+
// User rules come first so that, after a stable sort, an explicit DSL rule
|
|
176
|
+
// also wins over any default rule that happens to share its (lifted)
|
|
177
|
+
// priority. The sort below only reorders by priority; equal priorities
|
|
178
|
+
// preserve this user-before-default ordering.
|
|
179
|
+
const rules = [...liftedUserRules, ...defaultRules];
|
|
180
|
+
// Sort by priority (highest first). Array.prototype.sort is stable, so
|
|
181
|
+
// rules of equal priority keep their relative order (user rules first).
|
|
161
182
|
rules.sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0));
|
|
162
183
|
return { rules };
|
|
163
184
|
}
|
|
@@ -263,7 +263,13 @@ export declare function mathSubScript(base: MathContent[], subScript: MathConten
|
|
|
263
263
|
export declare function mathSubSuperScript(base: MathContent[], subScript: MathContent[], superScript: MathContent[]): MathContent;
|
|
264
264
|
/** Create a math pre-sub-superscript (subscript/superscript before the base). */
|
|
265
265
|
export declare function mathPreSubSuperScript(base: MathContent[], preSubScript: MathContent[], preSuperScript: MathContent[]): MathContent;
|
|
266
|
-
/**
|
|
266
|
+
/**
|
|
267
|
+
* Create a math phantom (an expression that takes up space).
|
|
268
|
+
*
|
|
269
|
+
* Note: in OOXML the phantom base is *shown* by default. To make the classic
|
|
270
|
+
* "occupies space but invisible" phantom pass `{ show: false }`; passing only
|
|
271
|
+
* `transparent: true` is not sufficient to hide the base in Word.
|
|
272
|
+
*/
|
|
267
273
|
export declare function mathPhantom(content: MathContent[], options?: {
|
|
268
274
|
show?: boolean;
|
|
269
275
|
zeroWidth?: boolean;
|
|
@@ -449,7 +449,13 @@ export function mathSubSuperScript(base, subScript, superScript) {
|
|
|
449
449
|
export function mathPreSubSuperScript(base, preSubScript, preSuperScript) {
|
|
450
450
|
return { type: "mathPreSubSuperScript", base, preSubScript, preSuperScript };
|
|
451
451
|
}
|
|
452
|
-
/**
|
|
452
|
+
/**
|
|
453
|
+
* Create a math phantom (an expression that takes up space).
|
|
454
|
+
*
|
|
455
|
+
* Note: in OOXML the phantom base is *shown* by default. To make the classic
|
|
456
|
+
* "occupies space but invisible" phantom pass `{ show: false }`; passing only
|
|
457
|
+
* `transparent: true` is not sufficient to hide the base in Word.
|
|
458
|
+
*/
|
|
453
459
|
export function mathPhantom(content, options) {
|
|
454
460
|
return { type: "mathPhantom", content, ...options };
|
|
455
461
|
}
|
|
@@ -168,6 +168,10 @@ export declare const ContentType: {
|
|
|
168
168
|
readonly Xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
|
|
169
169
|
readonly CustomXml: "application/xml";
|
|
170
170
|
readonly VbaProject: "application/vnd.ms-office.vbaProject";
|
|
171
|
+
/** Glossary (Building Blocks) document part. */
|
|
172
|
+
readonly Glossary: "application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml";
|
|
173
|
+
/** OLE embedded object binary. */
|
|
174
|
+
readonly OleObject: "application/vnd.openxmlformats-officedocument.oleObject";
|
|
171
175
|
};
|
|
172
176
|
/** Map from image file extension to content type. */
|
|
173
177
|
export declare const IMAGE_CONTENT_TYPES: Record<string, string>;
|
|
@@ -268,7 +268,11 @@ export const ContentType = {
|
|
|
268
268
|
ChartEx: "application/vnd.ms-office.chartEx+xml",
|
|
269
269
|
Xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
270
270
|
CustomXml: "application/xml",
|
|
271
|
-
VbaProject: "application/vnd.ms-office.vbaProject"
|
|
271
|
+
VbaProject: "application/vnd.ms-office.vbaProject",
|
|
272
|
+
/** Glossary (Building Blocks) document part. */
|
|
273
|
+
Glossary: "application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml",
|
|
274
|
+
/** OLE embedded object binary. */
|
|
275
|
+
OleObject: "application/vnd.openxmlformats-officedocument.oleObject"
|
|
272
276
|
};
|
|
273
277
|
/** Map from image file extension to content type. */
|
|
274
278
|
export const IMAGE_CONTENT_TYPES = {
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
* - Hyperlink extraction
|
|
12
12
|
* - Image registration into ConversionContext
|
|
13
13
|
* - Table structure with merge (colSpan/rowSpan)
|
|
14
|
-
* - List/numbering detection
|
|
14
|
+
* - List/numbering detection: consecutive numbered paragraphs are aggregated
|
|
15
|
+
* into ordered/unordered `list` blocks with nested sub-lists by level
|
|
15
16
|
* - Footnote/endnote reference and content
|
|
16
17
|
* - Math content (text fallback)
|
|
17
18
|
*/
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
* - Hyperlink extraction
|
|
12
12
|
* - Image registration into ConversionContext
|
|
13
13
|
* - Table structure with merge (colSpan/rowSpan)
|
|
14
|
-
* - List/numbering detection
|
|
14
|
+
* - List/numbering detection: consecutive numbered paragraphs are aggregated
|
|
15
|
+
* into ordered/unordered `list` blocks with nested sub-lists by level
|
|
15
16
|
* - Footnote/endnote reference and content
|
|
16
17
|
* - Math content (text fallback)
|
|
17
18
|
*/
|
|
@@ -93,6 +94,26 @@ function convertBodyContent(body, doc, ctx, imageMap) {
|
|
|
93
94
|
const item = body[bodyIndex];
|
|
94
95
|
switch (item.type) {
|
|
95
96
|
case "paragraph":
|
|
97
|
+
// A run of consecutive list-item paragraphs (each carrying a
|
|
98
|
+
// numbering reference, and not a heading) is aggregated into a single
|
|
99
|
+
// semantic `list` block with nested sub-lists driven by the numbering
|
|
100
|
+
// level. This is what turns Word numbering into real <ul>/<ol> in
|
|
101
|
+
// HTML and `-`/`1.` markers in Markdown when downstream renderers
|
|
102
|
+
// consume the IR.
|
|
103
|
+
if (isListItemParagraph(item)) {
|
|
104
|
+
let end = bodyIndex;
|
|
105
|
+
while (end < body.length) {
|
|
106
|
+
const next = body[end];
|
|
107
|
+
if (next.type !== "paragraph" || !isListItemParagraph(next)) {
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
end++;
|
|
111
|
+
}
|
|
112
|
+
const listParas = body.slice(bodyIndex, end);
|
|
113
|
+
blocks.push(...buildListBlocks(listParas, doc, ctx, imageMap));
|
|
114
|
+
bodyIndex = end - 1; // loop's ++ advances past the consumed run
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
96
117
|
blocks.push(convertParagraph(item, doc, ctx, imageMap));
|
|
97
118
|
break;
|
|
98
119
|
case "table":
|
|
@@ -240,6 +261,119 @@ function convertBodyContent(body, doc, ctx, imageMap) {
|
|
|
240
261
|
return blocks;
|
|
241
262
|
}
|
|
242
263
|
// =============================================================================
|
|
264
|
+
// Internal: List Aggregation
|
|
265
|
+
// =============================================================================
|
|
266
|
+
/**
|
|
267
|
+
* Whether a body paragraph should render as a list item: it carries a
|
|
268
|
+
* numbering reference and is not itself a heading (a numbered heading stays a
|
|
269
|
+
* heading, mirroring the markdown/html renderers).
|
|
270
|
+
*/
|
|
271
|
+
function isListItemParagraph(item) {
|
|
272
|
+
if (item.type !== "paragraph") {
|
|
273
|
+
return false;
|
|
274
|
+
}
|
|
275
|
+
return item.properties?.numbering !== undefined && detectHeadingLevel(item) === null;
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* Resolve a numbering reference to its number format string (e.g. "decimal",
|
|
279
|
+
* "bullet"). Mirrors the lookup in the markdown/html renderers so the three
|
|
280
|
+
* surfaces classify ordered vs. unordered lists identically. Defaults to
|
|
281
|
+
* "bullet" when the numbering definition can't be resolved.
|
|
282
|
+
*/
|
|
283
|
+
function getNumberingFormat(doc, numId, level) {
|
|
284
|
+
const instance = doc.numberingInstances?.find(n => n.numId === numId);
|
|
285
|
+
if (!instance) {
|
|
286
|
+
return "bullet";
|
|
287
|
+
}
|
|
288
|
+
const abstractNum = doc.abstractNumberings?.find(a => a.abstractNumId === instance.abstractNumId);
|
|
289
|
+
if (!abstractNum) {
|
|
290
|
+
return "bullet";
|
|
291
|
+
}
|
|
292
|
+
const levelDef = abstractNum.levels.find(l => l.level === level);
|
|
293
|
+
return levelDef?.format ?? "bullet";
|
|
294
|
+
}
|
|
295
|
+
/** A number format other than "bullet"/"none" denotes an ordered list. */
|
|
296
|
+
function isOrderedFormat(format) {
|
|
297
|
+
return format !== "bullet" && format !== "none";
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Build one or more semantic `list` blocks from a contiguous run of list-item
|
|
301
|
+
* paragraphs. Paragraphs are nested by their numbering `level`; a deeper level
|
|
302
|
+
* becomes a `subList` of the preceding shallower item. Adjacent items that
|
|
303
|
+
* switch between ordered and unordered at the same level start a new sibling
|
|
304
|
+
* list so the ordered/unordered distinction is preserved.
|
|
305
|
+
*/
|
|
306
|
+
function buildListBlocks(paras, doc, ctx, imageMap) {
|
|
307
|
+
const { blocks } = buildListLevel(paras, 0, 0, doc, ctx, imageMap);
|
|
308
|
+
return blocks;
|
|
309
|
+
}
|
|
310
|
+
/**
|
|
311
|
+
* Consume paragraphs starting at `start` that belong to `level` (or deeper),
|
|
312
|
+
* emitting sibling lists for this level. Deeper-level paragraphs are folded
|
|
313
|
+
* into the current item's `subList` via recursion. Returns the produced blocks
|
|
314
|
+
* and the index of the first paragraph that no longer belongs to this level.
|
|
315
|
+
*/
|
|
316
|
+
function buildListLevel(paras, start, level, doc, ctx, imageMap) {
|
|
317
|
+
const blocks = [];
|
|
318
|
+
let i = start;
|
|
319
|
+
let currentOrdered = null;
|
|
320
|
+
let items = [];
|
|
321
|
+
const flush = () => {
|
|
322
|
+
if (items.length > 0 && currentOrdered !== null) {
|
|
323
|
+
blocks.push({ type: "list", ordered: currentOrdered, items });
|
|
324
|
+
items = [];
|
|
325
|
+
}
|
|
326
|
+
};
|
|
327
|
+
while (i < paras.length) {
|
|
328
|
+
const para = paras[i];
|
|
329
|
+
const num = para.properties?.numbering;
|
|
330
|
+
// Defensive: callers only pass list-item paragraphs, but guard anyway.
|
|
331
|
+
if (!num) {
|
|
332
|
+
break;
|
|
333
|
+
}
|
|
334
|
+
if (num.level < level) {
|
|
335
|
+
// Belongs to a shallower list — let the caller handle it.
|
|
336
|
+
break;
|
|
337
|
+
}
|
|
338
|
+
if (num.level > level) {
|
|
339
|
+
// Deeper item with no shallower parent at this position: descend and
|
|
340
|
+
// attach the nested list to the most recent item, or synthesise an
|
|
341
|
+
// empty item to host it when there is no parent.
|
|
342
|
+
const { blocks: subBlocks, next } = buildListLevel(paras, i, num.level, doc, ctx, imageMap);
|
|
343
|
+
const subList = subBlocks[0];
|
|
344
|
+
if (items.length > 0) {
|
|
345
|
+
const last = items[items.length - 1];
|
|
346
|
+
items[items.length - 1] = { ...last, subList };
|
|
347
|
+
}
|
|
348
|
+
else if (subList) {
|
|
349
|
+
// Promote the deeper list to this level when there is no parent item.
|
|
350
|
+
if (currentOrdered === null && subList.type === "list") {
|
|
351
|
+
currentOrdered = subList.ordered;
|
|
352
|
+
}
|
|
353
|
+
items.push({ children: [], subList });
|
|
354
|
+
}
|
|
355
|
+
i = next;
|
|
356
|
+
continue;
|
|
357
|
+
}
|
|
358
|
+
// num.level === level
|
|
359
|
+
const format = getNumberingFormat(doc, num.numId, num.level);
|
|
360
|
+
const ordered = isOrderedFormat(format);
|
|
361
|
+
if (currentOrdered === null) {
|
|
362
|
+
currentOrdered = ordered;
|
|
363
|
+
}
|
|
364
|
+
else if (ordered !== currentOrdered) {
|
|
365
|
+
// Ordered/unordered switch at the same level → start a new sibling list.
|
|
366
|
+
flush();
|
|
367
|
+
currentOrdered = ordered;
|
|
368
|
+
}
|
|
369
|
+
const children = convertParagraphChildren(para.children, doc, ctx, imageMap);
|
|
370
|
+
items.push({ children });
|
|
371
|
+
i++;
|
|
372
|
+
}
|
|
373
|
+
flush();
|
|
374
|
+
return { blocks, next: i };
|
|
375
|
+
}
|
|
376
|
+
// =============================================================================
|
|
243
377
|
// Internal: Paragraph Conversion
|
|
244
378
|
// =============================================================================
|
|
245
379
|
function convertParagraph(para, doc, ctx, imageMap) {
|