@cj-tech-master/excelts 9.6.1 → 10.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -3
- package/README_zh.md +18 -3
- package/dist/browser/modules/excel/cell.d.ts +4 -0
- package/dist/browser/modules/excel/note.js +5 -1
- package/dist/browser/modules/excel/row.js +35 -2
- package/dist/browser/modules/excel/stream/workbook-writer.browser.d.ts +8 -1
- package/dist/browser/modules/excel/stream/workbook-writer.browser.js +22 -2
- package/dist/browser/modules/excel/types.d.ts +81 -0
- package/dist/browser/modules/excel/utils/drawing-utils.d.ts +8 -0
- package/dist/browser/modules/excel/utils/drawing-utils.js +19 -2
- package/dist/browser/modules/excel/workbook.browser.d.ts +16 -0
- package/dist/browser/modules/excel/workbook.browser.js +32 -2
- package/dist/browser/modules/excel/worksheet.d.ts +31 -1
- package/dist/browser/modules/excel/worksheet.js +83 -0
- package/dist/browser/modules/excel/xlsx/xform/comment/vml-shape-xform.d.ts +7 -0
- package/dist/browser/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
- package/dist/browser/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
- package/dist/browser/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
- package/dist/browser/modules/excel/xlsx/xform/drawing/blip-xform.d.ts +6 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
- package/dist/browser/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.d.ts +1 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/pic-xform.d.ts +2 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
- package/dist/browser/modules/excel/xlsx/xform/drawing/shape-xform.d.ts +47 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/shape-xform.js +109 -0
- package/dist/browser/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
- package/dist/browser/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
- package/dist/browser/modules/pdf/builder/document-builder.js +22 -49
- package/dist/browser/modules/pdf/builder/pdf-editor.js +1 -1
- package/dist/browser/modules/pdf/core/pdf-stream.d.ts +28 -1
- package/dist/browser/modules/pdf/core/pdf-stream.js +38 -2
- package/dist/browser/modules/pdf/font/font-manager.d.ts +26 -0
- package/dist/browser/modules/pdf/font/font-manager.js +35 -18
- package/dist/browser/modules/pdf/render/page-renderer.d.ts +51 -3
- package/dist/browser/modules/pdf/render/page-renderer.js +111 -18
- package/dist/browser/modules/word/advanced/field-engine.js +45 -20
- package/dist/browser/modules/word/advanced/glossary.d.ts +10 -36
- package/dist/browser/modules/word/advanced/glossary.js +8 -9
- package/dist/browser/modules/word/advanced/math-convert.js +94 -12
- package/dist/browser/modules/word/advanced/ole-objects.d.ts +28 -0
- package/dist/browser/modules/word/advanced/ole-objects.js +122 -19
- package/dist/browser/modules/word/advanced/style-map.js +31 -10
- package/dist/browser/modules/word/builder/run-builders.d.ts +7 -1
- package/dist/browser/modules/word/builder/run-builders.js +7 -1
- package/dist/browser/modules/word/constants.d.ts +4 -0
- package/dist/browser/modules/word/constants.js +5 -1
- package/dist/browser/modules/word/convert/docx-to-semantic.d.ts +2 -1
- package/dist/browser/modules/word/convert/docx-to-semantic.js +135 -1
- package/dist/browser/modules/word/convert/html/html-import.d.ts +32 -1
- package/dist/browser/modules/word/convert/html/html-import.js +167 -14
- package/dist/browser/modules/word/convert/html/html.d.ts +2 -2
- package/dist/browser/modules/word/convert/html/html.js +1 -1
- package/dist/browser/modules/word/convert/markdown/markdown-import.d.ts +48 -18
- package/dist/browser/modules/word/convert/markdown/markdown-import.js +279 -69
- package/dist/browser/modules/word/convert/markdown/markdown.d.ts +1 -1
- package/dist/browser/modules/word/convert/odt/odt.js +407 -56
- package/dist/browser/modules/word/html.d.ts +2 -2
- package/dist/browser/modules/word/html.js +1 -1
- package/dist/browser/modules/word/index.base.d.ts +3 -3
- package/dist/browser/modules/word/index.base.js +1 -1
- package/dist/browser/modules/word/layout/layout-full.js +326 -19
- package/dist/browser/modules/word/layout/render-page.js +35 -8
- package/dist/browser/modules/word/markdown.d.ts +1 -1
- package/dist/browser/modules/word/query/compat.d.ts +10 -2
- package/dist/browser/modules/word/query/compat.js +29 -21
- package/dist/browser/modules/word/reader/docx-reader.js +105 -2
- package/dist/browser/modules/word/reader/math-parser.js +8 -2
- package/dist/browser/modules/word/security/cfb-reader.js +5 -5
- package/dist/browser/modules/word/types.d.ts +96 -1
- package/dist/browser/modules/word/writer/docx-packager.js +108 -2
- package/dist/browser/modules/word/writer/glossary-writer.d.ts +28 -0
- package/dist/browser/modules/word/writer/glossary-writer.js +121 -0
- package/dist/browser/modules/word/writer/header-footer-writer.js +105 -20
- package/dist/browser/modules/word/writer/math-writer.js +7 -2
- package/dist/browser/utils/font-metrics.d.ts +8 -0
- package/dist/browser/utils/font-metrics.js +43 -0
- package/dist/browser/utils/theme-colors.js +4 -1
- package/dist/cjs/modules/excel/note.js +5 -1
- package/dist/cjs/modules/excel/row.js +35 -2
- package/dist/cjs/modules/excel/stream/workbook-writer.browser.js +22 -2
- package/dist/cjs/modules/excel/utils/drawing-utils.js +19 -2
- package/dist/cjs/modules/excel/workbook.browser.js +31 -1
- package/dist/cjs/modules/excel/worksheet.js +83 -0
- package/dist/cjs/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
- package/dist/cjs/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
- package/dist/cjs/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
- package/dist/cjs/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
- package/dist/cjs/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
- package/dist/cjs/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
- package/dist/cjs/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
- package/dist/cjs/modules/excel/xlsx/xform/drawing/shape-xform.js +112 -0
- package/dist/cjs/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
- package/dist/cjs/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
- package/dist/cjs/modules/pdf/builder/document-builder.js +21 -48
- package/dist/cjs/modules/pdf/builder/pdf-editor.js +1 -1
- package/dist/cjs/modules/pdf/core/pdf-stream.js +38 -2
- package/dist/cjs/modules/pdf/font/font-manager.js +35 -18
- package/dist/cjs/modules/pdf/render/page-renderer.js +112 -18
- package/dist/cjs/modules/word/advanced/field-engine.js +45 -20
- package/dist/cjs/modules/word/advanced/glossary.js +8 -9
- package/dist/cjs/modules/word/advanced/math-convert.js +94 -12
- package/dist/cjs/modules/word/advanced/ole-objects.js +123 -19
- package/dist/cjs/modules/word/advanced/style-map.js +31 -10
- package/dist/cjs/modules/word/builder/run-builders.js +7 -1
- package/dist/cjs/modules/word/constants.js +5 -1
- package/dist/cjs/modules/word/convert/docx-to-semantic.js +135 -1
- package/dist/cjs/modules/word/convert/html/html-import.js +168 -14
- package/dist/cjs/modules/word/convert/html/html.js +2 -1
- package/dist/cjs/modules/word/convert/markdown/markdown-import.js +279 -69
- package/dist/cjs/modules/word/convert/odt/odt.js +407 -56
- package/dist/cjs/modules/word/html.js +2 -1
- package/dist/cjs/modules/word/index.base.js +4 -3
- package/dist/cjs/modules/word/layout/layout-full.js +325 -18
- package/dist/cjs/modules/word/layout/render-page.js +35 -8
- package/dist/cjs/modules/word/query/compat.js +29 -21
- package/dist/cjs/modules/word/reader/docx-reader.js +104 -1
- package/dist/cjs/modules/word/reader/math-parser.js +8 -2
- package/dist/cjs/modules/word/security/cfb-reader.js +5 -5
- package/dist/cjs/modules/word/writer/docx-packager.js +108 -2
- package/dist/cjs/modules/word/writer/glossary-writer.js +124 -0
- package/dist/cjs/modules/word/writer/header-footer-writer.js +105 -20
- package/dist/cjs/modules/word/writer/math-writer.js +7 -2
- package/dist/cjs/utils/font-metrics.js +44 -0
- package/dist/cjs/utils/theme-colors.js +4 -1
- package/dist/esm/modules/excel/note.js +5 -1
- package/dist/esm/modules/excel/row.js +35 -2
- package/dist/esm/modules/excel/stream/workbook-writer.browser.js +22 -2
- package/dist/esm/modules/excel/utils/drawing-utils.js +19 -2
- package/dist/esm/modules/excel/workbook.browser.js +32 -2
- package/dist/esm/modules/excel/worksheet.js +83 -0
- package/dist/esm/modules/excel/xlsx/xform/comment/vml-shape-xform.js +42 -8
- package/dist/esm/modules/excel/xlsx/xform/core/content-types-xform.js +3 -1
- package/dist/esm/modules/excel/xlsx/xform/drawing/absolute-anchor-xform.js +5 -0
- package/dist/esm/modules/excel/xlsx/xform/drawing/base-cell-anchor-xform.js +18 -1
- package/dist/esm/modules/excel/xlsx/xform/drawing/blip-xform.js +38 -11
- package/dist/esm/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.js +5 -0
- package/dist/esm/modules/excel/xlsx/xform/drawing/pic-xform.js +2 -1
- package/dist/esm/modules/excel/xlsx/xform/drawing/shape-xform.js +109 -0
- package/dist/esm/modules/excel/xlsx/xform/drawing/two-cell-anchor-xform.js +10 -1
- package/dist/esm/modules/excel/xlsx/xform/sheet/worksheet-xform.js +64 -1
- package/dist/esm/modules/pdf/builder/document-builder.js +22 -49
- package/dist/esm/modules/pdf/builder/pdf-editor.js +1 -1
- package/dist/esm/modules/pdf/core/pdf-stream.js +38 -2
- package/dist/esm/modules/pdf/font/font-manager.js +35 -18
- package/dist/esm/modules/pdf/render/page-renderer.js +111 -18
- package/dist/esm/modules/word/advanced/field-engine.js +45 -20
- package/dist/esm/modules/word/advanced/glossary.js +8 -9
- package/dist/esm/modules/word/advanced/math-convert.js +94 -12
- package/dist/esm/modules/word/advanced/ole-objects.js +122 -19
- package/dist/esm/modules/word/advanced/style-map.js +31 -10
- package/dist/esm/modules/word/builder/run-builders.js +7 -1
- package/dist/esm/modules/word/constants.js +5 -1
- package/dist/esm/modules/word/convert/docx-to-semantic.js +135 -1
- package/dist/esm/modules/word/convert/html/html-import.js +167 -14
- package/dist/esm/modules/word/convert/html/html.js +1 -1
- package/dist/esm/modules/word/convert/markdown/markdown-import.js +279 -69
- package/dist/esm/modules/word/convert/odt/odt.js +407 -56
- package/dist/esm/modules/word/html.js +1 -1
- package/dist/esm/modules/word/index.base.js +1 -1
- package/dist/esm/modules/word/layout/layout-full.js +326 -19
- package/dist/esm/modules/word/layout/render-page.js +35 -8
- package/dist/esm/modules/word/query/compat.js +29 -21
- package/dist/esm/modules/word/reader/docx-reader.js +105 -2
- package/dist/esm/modules/word/reader/math-parser.js +8 -2
- package/dist/esm/modules/word/security/cfb-reader.js +5 -5
- package/dist/esm/modules/word/writer/docx-packager.js +108 -2
- package/dist/esm/modules/word/writer/glossary-writer.js +121 -0
- package/dist/esm/modules/word/writer/header-footer-writer.js +105 -20
- package/dist/esm/modules/word/writer/math-writer.js +7 -2
- package/dist/esm/utils/font-metrics.js +43 -0
- package/dist/esm/utils/theme-colors.js +4 -1
- package/dist/iife/excelts.iife.js +496 -59
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +39 -39
- package/dist/types/modules/excel/cell.d.ts +4 -0
- package/dist/types/modules/excel/stream/workbook-writer.browser.d.ts +8 -1
- package/dist/types/modules/excel/types.d.ts +81 -0
- package/dist/types/modules/excel/utils/drawing-utils.d.ts +8 -0
- package/dist/types/modules/excel/workbook.browser.d.ts +16 -0
- package/dist/types/modules/excel/worksheet.d.ts +31 -1
- package/dist/types/modules/excel/xlsx/xform/comment/vml-shape-xform.d.ts +7 -0
- package/dist/types/modules/excel/xlsx/xform/drawing/blip-xform.d.ts +6 -0
- package/dist/types/modules/excel/xlsx/xform/drawing/one-cell-anchor-xform.d.ts +1 -0
- package/dist/types/modules/excel/xlsx/xform/drawing/pic-xform.d.ts +2 -0
- package/dist/types/modules/excel/xlsx/xform/drawing/shape-xform.d.ts +47 -0
- package/dist/types/modules/pdf/core/pdf-stream.d.ts +28 -1
- package/dist/types/modules/pdf/font/font-manager.d.ts +26 -0
- package/dist/types/modules/pdf/render/page-renderer.d.ts +51 -3
- package/dist/types/modules/word/advanced/glossary.d.ts +10 -36
- package/dist/types/modules/word/advanced/ole-objects.d.ts +28 -0
- package/dist/types/modules/word/builder/run-builders.d.ts +7 -1
- package/dist/types/modules/word/constants.d.ts +4 -0
- package/dist/types/modules/word/convert/docx-to-semantic.d.ts +2 -1
- package/dist/types/modules/word/convert/html/html-import.d.ts +32 -1
- package/dist/types/modules/word/convert/html/html.d.ts +2 -2
- package/dist/types/modules/word/convert/markdown/markdown-import.d.ts +48 -18
- package/dist/types/modules/word/convert/markdown/markdown.d.ts +1 -1
- package/dist/types/modules/word/html.d.ts +2 -2
- package/dist/types/modules/word/index.base.d.ts +3 -3
- package/dist/types/modules/word/markdown.d.ts +1 -1
- package/dist/types/modules/word/query/compat.d.ts +10 -2
- package/dist/types/modules/word/types.d.ts +96 -1
- package/dist/types/modules/word/writer/glossary-writer.d.ts +28 -0
- package/dist/types/utils/font-metrics.d.ts +8 -0
- package/package.json +3 -1
|
@@ -146,14 +146,8 @@ exports.DEFAULT_STYLE_MAP = {
|
|
|
146
146
|
* ```
|
|
147
147
|
*/
|
|
148
148
|
function parseStyleMap(dsl, options) {
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
if (options?.includeDefaults !== false && options?.base) {
|
|
152
|
-
rules.push(...options.base.rules);
|
|
153
|
-
}
|
|
154
|
-
else if (options?.includeDefaults !== false && !options?.base) {
|
|
155
|
-
rules.push(...exports.DEFAULT_STYLE_MAP.rules);
|
|
156
|
-
}
|
|
149
|
+
// User-defined rules from the DSL.
|
|
150
|
+
const userRules = [];
|
|
157
151
|
const lines = dsl
|
|
158
152
|
.split("\n")
|
|
159
153
|
.map(l => l.trim())
|
|
@@ -161,10 +155,37 @@ function parseStyleMap(dsl, options) {
|
|
|
161
155
|
for (const line of lines) {
|
|
162
156
|
const rule = parseRule(line);
|
|
163
157
|
if (rule) {
|
|
164
|
-
|
|
158
|
+
userRules.push(rule);
|
|
165
159
|
}
|
|
166
160
|
}
|
|
167
|
-
//
|
|
161
|
+
// Default / base rules requested via `includeDefaults`.
|
|
162
|
+
const defaultRules = [];
|
|
163
|
+
if (options?.includeDefaults !== false && options?.base) {
|
|
164
|
+
defaultRules.push(...options.base.rules);
|
|
165
|
+
}
|
|
166
|
+
else if (options?.includeDefaults !== false && !options?.base) {
|
|
167
|
+
defaultRules.push(...exports.DEFAULT_STYLE_MAP.rules);
|
|
168
|
+
}
|
|
169
|
+
// An explicit DSL rule should always win over a default rule for the same
|
|
170
|
+
// element — that is the whole point of providing one. Default rules,
|
|
171
|
+
// however, carry their own priorities (e.g. "Heading 1" => h1 has priority
|
|
172
|
+
// 10) that can exceed the fixed priority `parseRule` assigns to user rules.
|
|
173
|
+
// To guarantee user intent wins while preserving the relative priority
|
|
174
|
+
// ordering *within* each group, lift every user rule above the highest
|
|
175
|
+
// default priority. When there are no defaults this offset is 0 and user
|
|
176
|
+
// priorities are untouched.
|
|
177
|
+
const maxDefaultPriority = defaultRules.reduce((m, r) => Math.max(m, r.priority ?? 0), 0);
|
|
178
|
+
const userOffset = defaultRules.length > 0 ? maxDefaultPriority + 1 : 0;
|
|
179
|
+
const liftedUserRules = userOffset === 0
|
|
180
|
+
? userRules
|
|
181
|
+
: userRules.map(r => ({ ...r, priority: (r.priority ?? 0) + userOffset }));
|
|
182
|
+
// User rules come first so that, after a stable sort, an explicit DSL rule
|
|
183
|
+
// also wins over any default rule that happens to share its (lifted)
|
|
184
|
+
// priority. The sort below only reorders by priority; equal priorities
|
|
185
|
+
// preserve this user-before-default ordering.
|
|
186
|
+
const rules = [...liftedUserRules, ...defaultRules];
|
|
187
|
+
// Sort by priority (highest first). Array.prototype.sort is stable, so
|
|
188
|
+
// rules of equal priority keep their relative order (user rules first).
|
|
168
189
|
rules.sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0));
|
|
169
190
|
return { rules };
|
|
170
191
|
}
|
|
@@ -525,7 +525,13 @@ function mathSubSuperScript(base, subScript, superScript) {
|
|
|
525
525
|
function mathPreSubSuperScript(base, preSubScript, preSuperScript) {
|
|
526
526
|
return { type: "mathPreSubSuperScript", base, preSubScript, preSuperScript };
|
|
527
527
|
}
|
|
528
|
-
/**
|
|
528
|
+
/**
|
|
529
|
+
* Create a math phantom (an expression that takes up space).
|
|
530
|
+
*
|
|
531
|
+
* Note: in OOXML the phantom base is *shown* by default. To make the classic
|
|
532
|
+
* "occupies space but invisible" phantom pass `{ show: false }`; passing only
|
|
533
|
+
* `transparent: true` is not sufficient to hide the base in Word.
|
|
534
|
+
*/
|
|
529
535
|
function mathPhantom(content, options) {
|
|
530
536
|
return { type: "mathPhantom", content, ...options };
|
|
531
537
|
}
|
|
@@ -272,7 +272,11 @@ exports.ContentType = {
|
|
|
272
272
|
ChartEx: "application/vnd.ms-office.chartEx+xml",
|
|
273
273
|
Xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
274
274
|
CustomXml: "application/xml",
|
|
275
|
-
VbaProject: "application/vnd.ms-office.vbaProject"
|
|
275
|
+
VbaProject: "application/vnd.ms-office.vbaProject",
|
|
276
|
+
/** Glossary (Building Blocks) document part. */
|
|
277
|
+
Glossary: "application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml",
|
|
278
|
+
/** OLE embedded object binary. */
|
|
279
|
+
OleObject: "application/vnd.openxmlformats-officedocument.oleObject"
|
|
276
280
|
};
|
|
277
281
|
/** Map from image file extension to content type. */
|
|
278
282
|
exports.IMAGE_CONTENT_TYPES = {
|
|
@@ -12,7 +12,8 @@
|
|
|
12
12
|
* - Hyperlink extraction
|
|
13
13
|
* - Image registration into ConversionContext
|
|
14
14
|
* - Table structure with merge (colSpan/rowSpan)
|
|
15
|
-
* - List/numbering detection
|
|
15
|
+
* - List/numbering detection: consecutive numbered paragraphs are aggregated
|
|
16
|
+
* into ordered/unordered `list` blocks with nested sub-lists by level
|
|
16
17
|
* - Footnote/endnote reference and content
|
|
17
18
|
* - Math content (text fallback)
|
|
18
19
|
*/
|
|
@@ -96,6 +97,26 @@ function convertBodyContent(body, doc, ctx, imageMap) {
|
|
|
96
97
|
const item = body[bodyIndex];
|
|
97
98
|
switch (item.type) {
|
|
98
99
|
case "paragraph":
|
|
100
|
+
// A run of consecutive list-item paragraphs (each carrying a
|
|
101
|
+
// numbering reference, and not a heading) is aggregated into a single
|
|
102
|
+
// semantic `list` block with nested sub-lists driven by the numbering
|
|
103
|
+
// level. This is what turns Word numbering into real <ul>/<ol> in
|
|
104
|
+
// HTML and `-`/`1.` markers in Markdown when downstream renderers
|
|
105
|
+
// consume the IR.
|
|
106
|
+
if (isListItemParagraph(item)) {
|
|
107
|
+
let end = bodyIndex;
|
|
108
|
+
while (end < body.length) {
|
|
109
|
+
const next = body[end];
|
|
110
|
+
if (next.type !== "paragraph" || !isListItemParagraph(next)) {
|
|
111
|
+
break;
|
|
112
|
+
}
|
|
113
|
+
end++;
|
|
114
|
+
}
|
|
115
|
+
const listParas = body.slice(bodyIndex, end);
|
|
116
|
+
blocks.push(...buildListBlocks(listParas, doc, ctx, imageMap));
|
|
117
|
+
bodyIndex = end - 1; // loop's ++ advances past the consumed run
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
99
120
|
blocks.push(convertParagraph(item, doc, ctx, imageMap));
|
|
100
121
|
break;
|
|
101
122
|
case "table":
|
|
@@ -243,6 +264,119 @@ function convertBodyContent(body, doc, ctx, imageMap) {
|
|
|
243
264
|
return blocks;
|
|
244
265
|
}
|
|
245
266
|
// =============================================================================
|
|
267
|
+
// Internal: List Aggregation
|
|
268
|
+
// =============================================================================
|
|
269
|
+
/**
|
|
270
|
+
* Whether a body paragraph should render as a list item: it carries a
|
|
271
|
+
* numbering reference and is not itself a heading (a numbered heading stays a
|
|
272
|
+
* heading, mirroring the markdown/html renderers).
|
|
273
|
+
*/
|
|
274
|
+
function isListItemParagraph(item) {
|
|
275
|
+
if (item.type !== "paragraph") {
|
|
276
|
+
return false;
|
|
277
|
+
}
|
|
278
|
+
return item.properties?.numbering !== undefined && detectHeadingLevel(item) === null;
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* Resolve a numbering reference to its number format string (e.g. "decimal",
|
|
282
|
+
* "bullet"). Mirrors the lookup in the markdown/html renderers so the three
|
|
283
|
+
* surfaces classify ordered vs. unordered lists identically. Defaults to
|
|
284
|
+
* "bullet" when the numbering definition can't be resolved.
|
|
285
|
+
*/
|
|
286
|
+
function getNumberingFormat(doc, numId, level) {
|
|
287
|
+
const instance = doc.numberingInstances?.find(n => n.numId === numId);
|
|
288
|
+
if (!instance) {
|
|
289
|
+
return "bullet";
|
|
290
|
+
}
|
|
291
|
+
const abstractNum = doc.abstractNumberings?.find(a => a.abstractNumId === instance.abstractNumId);
|
|
292
|
+
if (!abstractNum) {
|
|
293
|
+
return "bullet";
|
|
294
|
+
}
|
|
295
|
+
const levelDef = abstractNum.levels.find(l => l.level === level);
|
|
296
|
+
return levelDef?.format ?? "bullet";
|
|
297
|
+
}
|
|
298
|
+
/** A number format other than "bullet"/"none" denotes an ordered list. */
|
|
299
|
+
function isOrderedFormat(format) {
|
|
300
|
+
return format !== "bullet" && format !== "none";
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Build one or more semantic `list` blocks from a contiguous run of list-item
|
|
304
|
+
* paragraphs. Paragraphs are nested by their numbering `level`; a deeper level
|
|
305
|
+
* becomes a `subList` of the preceding shallower item. Adjacent items that
|
|
306
|
+
* switch between ordered and unordered at the same level start a new sibling
|
|
307
|
+
* list so the ordered/unordered distinction is preserved.
|
|
308
|
+
*/
|
|
309
|
+
function buildListBlocks(paras, doc, ctx, imageMap) {
|
|
310
|
+
const { blocks } = buildListLevel(paras, 0, 0, doc, ctx, imageMap);
|
|
311
|
+
return blocks;
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Consume paragraphs starting at `start` that belong to `level` (or deeper),
|
|
315
|
+
* emitting sibling lists for this level. Deeper-level paragraphs are folded
|
|
316
|
+
* into the current item's `subList` via recursion. Returns the produced blocks
|
|
317
|
+
* and the index of the first paragraph that no longer belongs to this level.
|
|
318
|
+
*/
|
|
319
|
+
function buildListLevel(paras, start, level, doc, ctx, imageMap) {
|
|
320
|
+
const blocks = [];
|
|
321
|
+
let i = start;
|
|
322
|
+
let currentOrdered = null;
|
|
323
|
+
let items = [];
|
|
324
|
+
const flush = () => {
|
|
325
|
+
if (items.length > 0 && currentOrdered !== null) {
|
|
326
|
+
blocks.push({ type: "list", ordered: currentOrdered, items });
|
|
327
|
+
items = [];
|
|
328
|
+
}
|
|
329
|
+
};
|
|
330
|
+
while (i < paras.length) {
|
|
331
|
+
const para = paras[i];
|
|
332
|
+
const num = para.properties?.numbering;
|
|
333
|
+
// Defensive: callers only pass list-item paragraphs, but guard anyway.
|
|
334
|
+
if (!num) {
|
|
335
|
+
break;
|
|
336
|
+
}
|
|
337
|
+
if (num.level < level) {
|
|
338
|
+
// Belongs to a shallower list — let the caller handle it.
|
|
339
|
+
break;
|
|
340
|
+
}
|
|
341
|
+
if (num.level > level) {
|
|
342
|
+
// Deeper item with no shallower parent at this position: descend and
|
|
343
|
+
// attach the nested list to the most recent item, or synthesise an
|
|
344
|
+
// empty item to host it when there is no parent.
|
|
345
|
+
const { blocks: subBlocks, next } = buildListLevel(paras, i, num.level, doc, ctx, imageMap);
|
|
346
|
+
const subList = subBlocks[0];
|
|
347
|
+
if (items.length > 0) {
|
|
348
|
+
const last = items[items.length - 1];
|
|
349
|
+
items[items.length - 1] = { ...last, subList };
|
|
350
|
+
}
|
|
351
|
+
else if (subList) {
|
|
352
|
+
// Promote the deeper list to this level when there is no parent item.
|
|
353
|
+
if (currentOrdered === null && subList.type === "list") {
|
|
354
|
+
currentOrdered = subList.ordered;
|
|
355
|
+
}
|
|
356
|
+
items.push({ children: [], subList });
|
|
357
|
+
}
|
|
358
|
+
i = next;
|
|
359
|
+
continue;
|
|
360
|
+
}
|
|
361
|
+
// num.level === level
|
|
362
|
+
const format = getNumberingFormat(doc, num.numId, num.level);
|
|
363
|
+
const ordered = isOrderedFormat(format);
|
|
364
|
+
if (currentOrdered === null) {
|
|
365
|
+
currentOrdered = ordered;
|
|
366
|
+
}
|
|
367
|
+
else if (ordered !== currentOrdered) {
|
|
368
|
+
// Ordered/unordered switch at the same level → start a new sibling list.
|
|
369
|
+
flush();
|
|
370
|
+
currentOrdered = ordered;
|
|
371
|
+
}
|
|
372
|
+
const children = convertParagraphChildren(para.children, doc, ctx, imageMap);
|
|
373
|
+
items.push({ children });
|
|
374
|
+
i++;
|
|
375
|
+
}
|
|
376
|
+
flush();
|
|
377
|
+
return { blocks, next: i };
|
|
378
|
+
}
|
|
379
|
+
// =============================================================================
|
|
246
380
|
// Internal: Paragraph Conversion
|
|
247
381
|
// =============================================================================
|
|
248
382
|
function convertParagraph(para, doc, ctx, imageMap) {
|
|
@@ -23,6 +23,8 @@
|
|
|
23
23
|
*/
|
|
24
24
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
25
25
|
exports.htmlToDocxBody = htmlToDocxBody;
|
|
26
|
+
exports.htmlToDocx = htmlToDocx;
|
|
27
|
+
const utils_1 = require("../../../../utils/utils.js");
|
|
26
28
|
const internal_utils_1 = require("../../core/internal-utils");
|
|
27
29
|
const units_1 = require("../../units");
|
|
28
30
|
/**
|
|
@@ -47,10 +49,7 @@ function htmlToDocxBody(html, options) {
|
|
|
47
49
|
const tokens = tokenize(html);
|
|
48
50
|
// Extract <style> rules and merge with user-provided classStyles
|
|
49
51
|
const extractedStyles = extractStyleRules(tokens);
|
|
50
|
-
const classStyles = {
|
|
51
|
-
...extractedStyles,
|
|
52
|
-
...(options?.classStyles ?? {})
|
|
53
|
-
};
|
|
52
|
+
const classStyles = mergeClassStyles(extractedStyles, options?.classStyles ?? {});
|
|
54
53
|
// Seed the inline context with the caller-supplied defaults so plain text
|
|
55
54
|
// runs actually carry the requested font/size. Without this the options
|
|
56
55
|
// were effectively ignored.
|
|
@@ -64,6 +63,40 @@ function htmlToDocxBody(html, options) {
|
|
|
64
63
|
parseBlocks(tokens, 0, blocks, initialCtx, classStyles);
|
|
65
64
|
return blocks;
|
|
66
65
|
}
|
|
66
|
+
/**
|
|
67
|
+
* Convert an HTML string into DOCX body content **and** embedded images.
|
|
68
|
+
*
|
|
69
|
+
* Unlike {@link htmlToDocxBody}, this decodes base64 `data:` image URLs into
|
|
70
|
+
* real {@link ImageDef}s and assigns each a unique rId that the emitted image
|
|
71
|
+
* runs reference. Merge the returned `images` into your document model so the
|
|
72
|
+
* pictures are embedded rather than dropped as placeholders.
|
|
73
|
+
*
|
|
74
|
+
* @example
|
|
75
|
+
* ```ts
|
|
76
|
+
* const { body, images } = htmlToDocx(html);
|
|
77
|
+
* const doc = Document.create();
|
|
78
|
+
* for (const item of body) Document.addContent(doc, item);
|
|
79
|
+
* const built = Document.build(doc);
|
|
80
|
+
* const final = { ...built, images: [...(built.images ?? []), ...images] };
|
|
81
|
+
* const bytes = await toBuffer(final);
|
|
82
|
+
* ```
|
|
83
|
+
*/
|
|
84
|
+
function htmlToDocx(html, options) {
|
|
85
|
+
const blocks = [];
|
|
86
|
+
const tokens = tokenize(html);
|
|
87
|
+
const extractedStyles = extractStyleRules(tokens);
|
|
88
|
+
const classStyles = mergeClassStyles(extractedStyles, options?.classStyles ?? {});
|
|
89
|
+
const images = [];
|
|
90
|
+
const initialCtx = { imageSink: images };
|
|
91
|
+
if (options?.defaultFont) {
|
|
92
|
+
initialCtx.fontFamily = options.defaultFont;
|
|
93
|
+
}
|
|
94
|
+
if (options?.defaultFontSize !== undefined) {
|
|
95
|
+
initialCtx.fontSize = options.defaultFontSize;
|
|
96
|
+
}
|
|
97
|
+
parseBlocks(tokens, 0, blocks, initialCtx, classStyles);
|
|
98
|
+
return { body: blocks, images };
|
|
99
|
+
}
|
|
67
100
|
function tokenize(html) {
|
|
68
101
|
const tokens = [];
|
|
69
102
|
// Strip HTML comments, doctype declarations and SGML processing
|
|
@@ -476,6 +509,22 @@ function extractStyleRules(tokens) {
|
|
|
476
509
|
}
|
|
477
510
|
return result;
|
|
478
511
|
}
|
|
512
|
+
/**
|
|
513
|
+
* Merge two class→style maps. For classes present in both, the declarations
|
|
514
|
+
* are concatenated (extracted `<style>` rules first, caller-supplied overrides
|
|
515
|
+
* last) so the later source wins per CSS cascade while still preserving
|
|
516
|
+
* properties only declared by the other source. A plain `{ ...a, ...b }`
|
|
517
|
+
* would discard the extracted rule entirely whenever the caller supplies the
|
|
518
|
+
* same class name, silently dropping e.g. `font-style`/`color` from `<style>`.
|
|
519
|
+
*/
|
|
520
|
+
function mergeClassStyles(extracted, overrides) {
|
|
521
|
+
const merged = { ...extracted };
|
|
522
|
+
for (const [name, style] of Object.entries(overrides)) {
|
|
523
|
+
const existing = merged[name];
|
|
524
|
+
merged[name] = existing ? `${existing}; ${style}` : style;
|
|
525
|
+
}
|
|
526
|
+
return merged;
|
|
527
|
+
}
|
|
479
528
|
/**
|
|
480
529
|
* Parse HTML-style attributes from the inside of a start tag, e.g.
|
|
481
530
|
* `class="x" id='y' disabled href=foo`.
|
|
@@ -1136,6 +1185,17 @@ function parseBlocks(tokens, start, blocks, parentCtx, classStyles) {
|
|
|
1136
1185
|
return i + 1; // consumed the close tag
|
|
1137
1186
|
}
|
|
1138
1187
|
if (tok.type === "text") {
|
|
1188
|
+
// In block context, text nodes that are pure inter-element whitespace
|
|
1189
|
+
// (the newlines/indentation between block tags in pretty-printed HTML)
|
|
1190
|
+
// carry no content and must be ignored — otherwise every gap between
|
|
1191
|
+
// <p>/<table>/<div> tags would emit a spurious empty paragraph (and
|
|
1192
|
+
// the contained newline would be rendered as a <w:br/> soft break).
|
|
1193
|
+
// Whitespace that sits between inline runs is preserved by the inline
|
|
1194
|
+
// parser, which handles it separately.
|
|
1195
|
+
if (tok.value.trim() === "") {
|
|
1196
|
+
i++;
|
|
1197
|
+
continue;
|
|
1198
|
+
}
|
|
1139
1199
|
if (!pendingInline) {
|
|
1140
1200
|
pendingInline = { runs: [], ctx: parentCtx };
|
|
1141
1201
|
}
|
|
@@ -1464,7 +1524,7 @@ function parseInlineTag(tokens, idx, runs, ctx, classStyles) {
|
|
|
1464
1524
|
runs.push({ content: [{ type: "break" }] });
|
|
1465
1525
|
}
|
|
1466
1526
|
else if (tag === "img") {
|
|
1467
|
-
const imgContent = buildImageContent(tok.attrs);
|
|
1527
|
+
const imgContent = buildImageContent(tok.attrs, ctx);
|
|
1468
1528
|
if (imgContent) {
|
|
1469
1529
|
runs.push({ content: [imgContent] });
|
|
1470
1530
|
}
|
|
@@ -1537,13 +1597,15 @@ function parseInlineTag(tokens, idx, runs, ctx, classStyles) {
|
|
|
1537
1597
|
i++;
|
|
1538
1598
|
}
|
|
1539
1599
|
else if (t.type === "close") {
|
|
1600
|
+
// Mismatched close tag — close the hyperlink here but do NOT
|
|
1601
|
+
// consume the token; let the caller handle the block boundary.
|
|
1540
1602
|
const hyperlink = {
|
|
1541
1603
|
type: "hyperlink",
|
|
1542
1604
|
url: safeHref ?? "",
|
|
1543
1605
|
children: innerRuns
|
|
1544
1606
|
};
|
|
1545
1607
|
runs.push(hyperlink);
|
|
1546
|
-
return i
|
|
1608
|
+
return i;
|
|
1547
1609
|
}
|
|
1548
1610
|
else {
|
|
1549
1611
|
const childRuns = [];
|
|
@@ -1584,7 +1646,12 @@ function parseInlineTag(tokens, idx, runs, ctx, classStyles) {
|
|
|
1584
1646
|
i++;
|
|
1585
1647
|
}
|
|
1586
1648
|
else if (t.type === "close") {
|
|
1587
|
-
|
|
1649
|
+
// Mismatched close tag (e.g. </p> while inside an unclosed <strong>).
|
|
1650
|
+
// Do NOT consume it — return the current index so the caller can
|
|
1651
|
+
// handle it. Consuming a block-level close here would swallow the
|
|
1652
|
+
// parent paragraph boundary and pull all following block content
|
|
1653
|
+
// into this run, breaking page breaks, tables, etc.
|
|
1654
|
+
return i;
|
|
1588
1655
|
}
|
|
1589
1656
|
else {
|
|
1590
1657
|
i = parseInlineTag(tokens, i, runs, newCtx, classStyles);
|
|
@@ -1663,6 +1730,15 @@ function parseListItem(tokens, start, blocks, ctx, ordered, level, classStyles)
|
|
|
1663
1730
|
}
|
|
1664
1731
|
// Text content
|
|
1665
1732
|
if (tok.type === "text") {
|
|
1733
|
+
// Skip structural whitespace: the indentation/newlines that sit between
|
|
1734
|
+
// a nested <ul>/<ol> and the closing </li> (or at the very start of the
|
|
1735
|
+
// item) are not real content. Emitting them as runs would otherwise
|
|
1736
|
+
// produce a spurious empty list-item paragraph. Whitespace *between*
|
|
1737
|
+
// real inline content is preserved because `children` is non-empty then.
|
|
1738
|
+
if (tok.value.trim() === "" && children.length === 0) {
|
|
1739
|
+
i++;
|
|
1740
|
+
continue;
|
|
1741
|
+
}
|
|
1666
1742
|
children.push(makeRun(tok.value, ctx));
|
|
1667
1743
|
i++;
|
|
1668
1744
|
continue;
|
|
@@ -2067,7 +2143,7 @@ function mapCssBorderStyle(cssStyle) {
|
|
|
2067
2143
|
// Image content builder
|
|
2068
2144
|
// =============================================================================
|
|
2069
2145
|
/** Build InlineImageContent from img attributes or return undefined if not applicable. */
|
|
2070
|
-
function buildImageContent(attrs) {
|
|
2146
|
+
function buildImageContent(attrs, ctx) {
|
|
2071
2147
|
const src = attrs["src"] || "";
|
|
2072
2148
|
const alt = attrs["alt"] || "";
|
|
2073
2149
|
// Parse width/height from attributes first, then fall back to style
|
|
@@ -2086,11 +2162,36 @@ function buildImageContent(attrs) {
|
|
|
2086
2162
|
// Convert pixels to EMU
|
|
2087
2163
|
const widthEmu = (width || 100) * units_1.EMU_PER_PX;
|
|
2088
2164
|
const heightEmu = (height || 100) * units_1.EMU_PER_PX;
|
|
2089
|
-
//
|
|
2090
|
-
//
|
|
2091
|
-
//
|
|
2092
|
-
|
|
2093
|
-
|
|
2165
|
+
// base64 data: URLs can be decoded and embedded as a real media file when
|
|
2166
|
+
// an image sink is provided (htmlToDocx path). The decoded bytes are
|
|
2167
|
+
// registered as an ImageDef and the run references the assigned rId.
|
|
2168
|
+
if (src.startsWith("data:") && ctx?.imageSink) {
|
|
2169
|
+
const decoded = decodeDataUrlImage(src);
|
|
2170
|
+
if (decoded) {
|
|
2171
|
+
const sink = ctx.imageSink;
|
|
2172
|
+
const index = sink.length;
|
|
2173
|
+
const rId = `htmlImg${index}`;
|
|
2174
|
+
const ext = decoded.mediaType === "jpeg" ? "jpg" : decoded.mediaType;
|
|
2175
|
+
sink.push({
|
|
2176
|
+
data: decoded.data,
|
|
2177
|
+
mediaType: decoded.mediaType,
|
|
2178
|
+
fileName: `image_html_${index}.${ext}`,
|
|
2179
|
+
rId
|
|
2180
|
+
});
|
|
2181
|
+
return {
|
|
2182
|
+
type: "image",
|
|
2183
|
+
rId,
|
|
2184
|
+
width: widthEmu,
|
|
2185
|
+
height: heightEmu,
|
|
2186
|
+
altText: alt || undefined,
|
|
2187
|
+
name: alt || `image${index}`
|
|
2188
|
+
};
|
|
2189
|
+
}
|
|
2190
|
+
}
|
|
2191
|
+
// No sink (htmlToDocxBody only returns BodyContent[] and cannot register
|
|
2192
|
+
// media) or an unsupported/remote source: emit a placeholder with an empty
|
|
2193
|
+
// rId. The renderer treats an empty rId as a placeholder; the original src
|
|
2194
|
+
// is surfaced in the alt text so callers can post-process if needed.
|
|
2094
2195
|
if (src.startsWith("data:") || src.startsWith("http://") || src.startsWith("https://")) {
|
|
2095
2196
|
return {
|
|
2096
2197
|
type: "image",
|
|
@@ -2103,6 +2204,54 @@ function buildImageContent(attrs) {
|
|
|
2103
2204
|
}
|
|
2104
2205
|
return undefined;
|
|
2105
2206
|
}
|
|
2207
|
+
/** Decode a `data:image/...;base64,...` URL into bytes + media type. */
|
|
2208
|
+
function decodeDataUrlImage(src) {
|
|
2209
|
+
// data:image/png;base64,XXXX
|
|
2210
|
+
const match = /^data:image\/([a-z0-9.+-]+)\s*;\s*base64\s*,(.*)$/is.exec(src);
|
|
2211
|
+
if (!match) {
|
|
2212
|
+
return undefined;
|
|
2213
|
+
}
|
|
2214
|
+
const rawType = match[1].toLowerCase();
|
|
2215
|
+
const b64 = match[2].replace(/\s+/g, "");
|
|
2216
|
+
const mediaType = normalizeImageMediaType(rawType);
|
|
2217
|
+
if (!mediaType) {
|
|
2218
|
+
return undefined;
|
|
2219
|
+
}
|
|
2220
|
+
try {
|
|
2221
|
+
const data = (0, utils_1.base64ToUint8Array)(b64);
|
|
2222
|
+
if (data.length === 0) {
|
|
2223
|
+
return undefined;
|
|
2224
|
+
}
|
|
2225
|
+
return { data, mediaType };
|
|
2226
|
+
}
|
|
2227
|
+
catch {
|
|
2228
|
+
return undefined;
|
|
2229
|
+
}
|
|
2230
|
+
}
|
|
2231
|
+
/** Map a data-URL image subtype to a supported ImageMediaType. */
|
|
2232
|
+
function normalizeImageMediaType(subtype) {
|
|
2233
|
+
switch (subtype) {
|
|
2234
|
+
case "png":
|
|
2235
|
+
return "png";
|
|
2236
|
+
case "jpeg":
|
|
2237
|
+
case "jpg":
|
|
2238
|
+
return "jpeg";
|
|
2239
|
+
case "gif":
|
|
2240
|
+
return "gif";
|
|
2241
|
+
case "bmp":
|
|
2242
|
+
return "bmp";
|
|
2243
|
+
case "tiff":
|
|
2244
|
+
case "tif":
|
|
2245
|
+
return "tiff";
|
|
2246
|
+
case "svg+xml":
|
|
2247
|
+
case "svg":
|
|
2248
|
+
return "svg";
|
|
2249
|
+
case "webp":
|
|
2250
|
+
return "webp";
|
|
2251
|
+
default:
|
|
2252
|
+
return undefined;
|
|
2253
|
+
}
|
|
2254
|
+
}
|
|
2106
2255
|
/** Parse an image dimension from HTML attribute value (number or "Npx"). */
|
|
2107
2256
|
function parseImageDimension(value) {
|
|
2108
2257
|
if (!value) {
|
|
@@ -2231,6 +2380,11 @@ function resolveEffectiveStyle(attrs, classStyles) {
|
|
|
2231
2380
|
// Run builder
|
|
2232
2381
|
// =============================================================================
|
|
2233
2382
|
function makeRun(text, ctx) {
|
|
2383
|
+
// HTML whitespace handling: outside <pre>/<code>, runs of whitespace
|
|
2384
|
+
// (including the newlines/indentation from source-code line wrapping)
|
|
2385
|
+
// collapse to a single space. Inside <pre>/<code> whitespace is
|
|
2386
|
+
// significant and preserved verbatim.
|
|
2387
|
+
const value = ctx.code ? text : text.replace(/\s+/g, " ");
|
|
2234
2388
|
const props = {};
|
|
2235
2389
|
if (ctx.bold) {
|
|
2236
2390
|
props.bold = true;
|
|
@@ -2267,7 +2421,7 @@ function makeRun(text, ctx) {
|
|
|
2267
2421
|
}
|
|
2268
2422
|
const run = {
|
|
2269
2423
|
...(Object.keys(props).length > 0 ? { properties: props } : {}),
|
|
2270
|
-
content: [{ type: "text", text }]
|
|
2424
|
+
content: [{ type: "text", text: value }]
|
|
2271
2425
|
};
|
|
2272
2426
|
return run;
|
|
2273
2427
|
}
|
|
@@ -11,10 +11,11 @@
|
|
|
11
11
|
* ```
|
|
12
12
|
*/
|
|
13
13
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
14
|
-
exports.htmlToDocxBody = exports.renderToHtml = void 0;
|
|
14
|
+
exports.htmlToDocx = exports.htmlToDocxBody = exports.renderToHtml = void 0;
|
|
15
15
|
// HTML → render (DocxDocument → HTML output)
|
|
16
16
|
var html_renderer_1 = require("./html-renderer");
|
|
17
17
|
Object.defineProperty(exports, "renderToHtml", { enumerable: true, get: function () { return html_renderer_1.renderToHtml; } });
|
|
18
18
|
// HTML → DOCX import (HTML string → BodyContent[])
|
|
19
19
|
var html_import_1 = require("./html-import");
|
|
20
20
|
Object.defineProperty(exports, "htmlToDocxBody", { enumerable: true, get: function () { return html_import_1.htmlToDocxBody; } });
|
|
21
|
+
Object.defineProperty(exports, "htmlToDocx", { enumerable: true, get: function () { return html_import_1.htmlToDocx; } });
|