@yinyoudexing/xml2word 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -0
- package/dist/createDocxZip-WVDRDYZT.js +109 -0
- package/dist/createDocxZip-WVDRDYZT.js.map +1 -0
- package/dist/htmlToWordBodyXml-RFBPSL2Q.js +416 -0
- package/dist/htmlToWordBodyXml-RFBPSL2Q.js.map +1 -0
- package/dist/index.cjs +653 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +63 -0
- package/dist/index.d.ts +63 -0
- package/dist/index.js +59 -0
- package/dist/index.js.map +1 -0
- package/package.json +47 -0
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,653 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __esm = (fn, res) => function __init() {
|
|
9
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
10
|
+
};
|
|
11
|
+
var __export = (target, all) => {
|
|
12
|
+
for (var name in all)
|
|
13
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
14
|
+
};
|
|
15
|
+
var __copyProps = (to, from, except, desc) => {
|
|
16
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
17
|
+
for (let key of __getOwnPropNames(from))
|
|
18
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
19
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
20
|
+
}
|
|
21
|
+
return to;
|
|
22
|
+
};
|
|
23
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
24
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
25
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
26
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
27
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
28
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
29
|
+
mod
|
|
30
|
+
));
|
|
31
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
32
|
+
|
|
33
|
+
// src/lib/normalizeDocumentXml.ts
|
|
34
|
+
function hasWordDocumentRoot(xml) {
|
|
35
|
+
return /<w:document[\s>]/.test(xml);
|
|
36
|
+
}
|
|
37
|
+
function ensureXmlDeclaration(xml) {
|
|
38
|
+
if (/^\s*<\?xml\b/.test(xml)) return xml;
|
|
39
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
40
|
+
${xml}`;
|
|
41
|
+
}
|
|
42
|
+
function wrapBodyXml(bodyXml) {
|
|
43
|
+
const xml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
44
|
+
<w:document
|
|
45
|
+
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
|
46
|
+
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
|
47
|
+
>
|
|
48
|
+
<w:body>
|
|
49
|
+
${bodyXml}
|
|
50
|
+
<w:sectPr>
|
|
51
|
+
<w:pgSz w:w="12240" w:h="15840"/>
|
|
52
|
+
<w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440" w:header="708" w:footer="708" w:gutter="0"/>
|
|
53
|
+
<w:cols w:space="708"/>
|
|
54
|
+
<w:docGrid w:linePitch="360"/>
|
|
55
|
+
</w:sectPr>
|
|
56
|
+
</w:body>
|
|
57
|
+
</w:document>
|
|
58
|
+
`;
|
|
59
|
+
return xml;
|
|
60
|
+
}
|
|
61
|
+
function ensureWordNamespace(xml) {
|
|
62
|
+
const hasWNamespace = /xmlns:w\s*=\s*["']http:\/\/schemas\.openxmlformats\.org\/wordprocessingml\/2006\/main["']/.test(
|
|
63
|
+
xml
|
|
64
|
+
);
|
|
65
|
+
if (hasWNamespace) return xml;
|
|
66
|
+
return xml.replace(
|
|
67
|
+
/<w:document\b/,
|
|
68
|
+
`<w:document xmlns:w="${WORD_MAIN_NS}"`
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
function normalizeDocumentXml(xml, inputKind) {
|
|
72
|
+
const trimmed = xml.trim();
|
|
73
|
+
if (!trimmed) {
|
|
74
|
+
throw new Error("XML is empty.");
|
|
75
|
+
}
|
|
76
|
+
if (inputKind === "document") {
|
|
77
|
+
const withDecl = ensureXmlDeclaration(trimmed);
|
|
78
|
+
const withNs = ensureWordNamespace(withDecl);
|
|
79
|
+
if (!hasWordDocumentRoot(withNs)) {
|
|
80
|
+
throw new Error('inputKind="document" requires a <w:document> root.');
|
|
81
|
+
}
|
|
82
|
+
return withNs;
|
|
83
|
+
}
|
|
84
|
+
if (inputKind === "body") {
|
|
85
|
+
return wrapBodyXml(trimmed);
|
|
86
|
+
}
|
|
87
|
+
if (hasWordDocumentRoot(trimmed)) {
|
|
88
|
+
const withDecl = ensureXmlDeclaration(trimmed);
|
|
89
|
+
return ensureWordNamespace(withDecl);
|
|
90
|
+
}
|
|
91
|
+
return wrapBodyXml(trimmed);
|
|
92
|
+
}
|
|
93
|
+
var WORD_MAIN_NS;
|
|
94
|
+
var init_normalizeDocumentXml = __esm({
|
|
95
|
+
"src/lib/normalizeDocumentXml.ts"() {
|
|
96
|
+
"use strict";
|
|
97
|
+
WORD_MAIN_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
// src/lib/validateXml.ts
|
|
102
|
+
function validateXmlIfNeeded(xml, validateXml) {
|
|
103
|
+
if (!validateXml) return;
|
|
104
|
+
const result = import_fast_xml_parser.XMLValidator.validate(xml);
|
|
105
|
+
if (result === true) return;
|
|
106
|
+
const err = result.err;
|
|
107
|
+
const msg = err?.msg ?? "Invalid XML.";
|
|
108
|
+
const line = err?.line;
|
|
109
|
+
const col = err?.col;
|
|
110
|
+
const location = typeof line === "number" && typeof col === "number" ? ` (line ${line}, col ${col})` : "";
|
|
111
|
+
throw new Error(`${msg}${location}`);
|
|
112
|
+
}
|
|
113
|
+
var import_fast_xml_parser;
|
|
114
|
+
var init_validateXml = __esm({
|
|
115
|
+
"src/lib/validateXml.ts"() {
|
|
116
|
+
"use strict";
|
|
117
|
+
import_fast_xml_parser = require("fast-xml-parser");
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
// src/lib/createDocxZip.ts
|
|
122
|
+
var createDocxZip_exports = {};
|
|
123
|
+
__export(createDocxZip_exports, {
|
|
124
|
+
createDocxZipUint8Array: () => createDocxZipUint8Array
|
|
125
|
+
});
|
|
126
|
+
async function createDocxZipUint8Array(xml, options = {}) {
|
|
127
|
+
const documentXml = normalizeDocumentXml(xml, options.inputKind ?? "auto");
|
|
128
|
+
validateXmlIfNeeded(documentXml, options.validateXml ?? true);
|
|
129
|
+
const zip = new import_jszip.default();
|
|
130
|
+
zip.file("[Content_Types].xml", CONTENT_TYPES_XML);
|
|
131
|
+
const relsFolder = zip.folder("_rels");
|
|
132
|
+
relsFolder?.file(".rels", ROOT_RELS_XML);
|
|
133
|
+
const wordFolder = zip.folder("word");
|
|
134
|
+
wordFolder?.file("document.xml", documentXml);
|
|
135
|
+
return zip.generateAsync({ type: "uint8array" });
|
|
136
|
+
}
|
|
137
|
+
var import_jszip, CONTENT_TYPES_XML, ROOT_RELS_XML;
|
|
138
|
+
var init_createDocxZip = __esm({
|
|
139
|
+
"src/lib/createDocxZip.ts"() {
|
|
140
|
+
"use strict";
|
|
141
|
+
import_jszip = __toESM(require("jszip"), 1);
|
|
142
|
+
init_normalizeDocumentXml();
|
|
143
|
+
init_validateXml();
|
|
144
|
+
CONTENT_TYPES_XML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
145
|
+
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
|
|
146
|
+
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
|
|
147
|
+
<Default Extension="xml" ContentType="application/xml"/>
|
|
148
|
+
<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
|
|
149
|
+
</Types>
|
|
150
|
+
`;
|
|
151
|
+
ROOT_RELS_XML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
152
|
+
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
|
153
|
+
<Relationship Id="rId1"
|
|
154
|
+
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"
|
|
155
|
+
Target="word/document.xml"/>
|
|
156
|
+
</Relationships>
|
|
157
|
+
`;
|
|
158
|
+
}
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
// src/lib/htmlToWordBodyXml.ts
|
|
162
|
+
var htmlToWordBodyXml_exports = {};
|
|
163
|
+
__export(htmlToWordBodyXml_exports, {
|
|
164
|
+
htmlToWordBodyXml: () => htmlToWordBodyXml,
|
|
165
|
+
textToWordBodyXml: () => textToWordBodyXml
|
|
166
|
+
});
|
|
167
|
+
function escapeXmlText(value) {
|
|
168
|
+
return value.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
169
|
+
}
|
|
170
|
+
function shouldPreserveSpace(text) {
|
|
171
|
+
if (!text) return false;
|
|
172
|
+
return /^\s/.test(text) || /\s$/.test(text) || /\s{2,}/.test(text);
|
|
173
|
+
}
|
|
174
|
+
function parseStyleAttribute(style) {
|
|
175
|
+
if (!style) return {};
|
|
176
|
+
const normalized = style.replace(/\r/g, "\n");
|
|
177
|
+
const parts = normalized.split(";");
|
|
178
|
+
const entries = [];
|
|
179
|
+
for (const part of parts) {
|
|
180
|
+
const idx = part.indexOf(":");
|
|
181
|
+
if (idx <= 0) continue;
|
|
182
|
+
const key = part.slice(0, idx).trim().toLowerCase();
|
|
183
|
+
const val = part.slice(idx + 1).trim();
|
|
184
|
+
if (!key || !val) continue;
|
|
185
|
+
entries.push([key, val]);
|
|
186
|
+
}
|
|
187
|
+
return Object.fromEntries(entries);
|
|
188
|
+
}
|
|
189
|
+
function parseRgbToHex(value) {
|
|
190
|
+
const m = value.trim().toLowerCase().match(/^rgb\(\s*(\d{1,3})\s*,\s*(\d{1,3})\s*,\s*(\d{1,3})\s*\)$/);
|
|
191
|
+
if (!m) return void 0;
|
|
192
|
+
const nums = [Number(m[1]), Number(m[2]), Number(m[3])];
|
|
193
|
+
if (nums.some((n) => Number.isNaN(n) || n < 0 || n > 255)) return void 0;
|
|
194
|
+
return nums.map((n) => n.toString(16).padStart(2, "0")).join("").toUpperCase();
|
|
195
|
+
}
|
|
196
|
+
function parseCssColorToHex(value) {
|
|
197
|
+
if (!value) return void 0;
|
|
198
|
+
const v = value.trim();
|
|
199
|
+
const hex = v.match(/^#([0-9a-fA-F]{6})$/)?.[1];
|
|
200
|
+
if (hex) return hex.toUpperCase();
|
|
201
|
+
return parseRgbToHex(v);
|
|
202
|
+
}
|
|
203
|
+
function parseFontSizeToHalfPoints(value) {
|
|
204
|
+
if (!value) return void 0;
|
|
205
|
+
const v = value.trim().toLowerCase();
|
|
206
|
+
const pt = v.match(/^(\d+(?:\.\d+)?)pt$/);
|
|
207
|
+
if (pt) return Math.max(1, Math.round(Number(pt[1]) * 2));
|
|
208
|
+
const px = v.match(/^(\d+(?:\.\d+)?)px$/);
|
|
209
|
+
if (px) {
|
|
210
|
+
const ptValue = Number(px[1]) * 72 / 96;
|
|
211
|
+
return Math.max(1, Math.round(ptValue * 2));
|
|
212
|
+
}
|
|
213
|
+
return void 0;
|
|
214
|
+
}
|
|
215
|
+
function normalizeFontFamily(value) {
|
|
216
|
+
if (!value) return void 0;
|
|
217
|
+
const first = value.split(",")[0]?.trim();
|
|
218
|
+
if (!first) return void 0;
|
|
219
|
+
return first.replace(/^["']|["']$/g, "");
|
|
220
|
+
}
|
|
221
|
+
function mergeTextStyle(base, patch) {
|
|
222
|
+
return {
|
|
223
|
+
bold: patch.bold ?? base.bold,
|
|
224
|
+
italic: patch.italic ?? base.italic,
|
|
225
|
+
underline: patch.underline ?? base.underline,
|
|
226
|
+
colorHex: patch.colorHex ?? base.colorHex,
|
|
227
|
+
fontFamily: patch.fontFamily ?? base.fontFamily,
|
|
228
|
+
fontSizeHalfPoints: patch.fontSizeHalfPoints ?? base.fontSizeHalfPoints
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
function styleFromElement(node) {
|
|
232
|
+
const tag = node.name?.toLowerCase();
|
|
233
|
+
const styleAttr = node.attribs?.style;
|
|
234
|
+
const css = parseStyleAttribute(styleAttr);
|
|
235
|
+
const boldFromCss = (() => {
|
|
236
|
+
const v = css["font-weight"]?.trim().toLowerCase();
|
|
237
|
+
if (!v) return void 0;
|
|
238
|
+
if (v === "bold" || v === "bolder") return true;
|
|
239
|
+
const n = Number(v);
|
|
240
|
+
if (!Number.isNaN(n)) return n >= 600;
|
|
241
|
+
return void 0;
|
|
242
|
+
})();
|
|
243
|
+
const italicFromCss = (() => {
|
|
244
|
+
const v = css["font-style"]?.trim().toLowerCase();
|
|
245
|
+
if (!v) return void 0;
|
|
246
|
+
if (v === "italic" || v === "oblique") return true;
|
|
247
|
+
return void 0;
|
|
248
|
+
})();
|
|
249
|
+
const underlineFromCss = (() => {
|
|
250
|
+
const v = css["text-decoration"]?.trim().toLowerCase();
|
|
251
|
+
if (!v) return void 0;
|
|
252
|
+
return v.includes("underline");
|
|
253
|
+
})();
|
|
254
|
+
const tagBold = tag === "b" || tag === "strong" ? true : void 0;
|
|
255
|
+
const tagItalic = tag === "i" || tag === "em" ? true : void 0;
|
|
256
|
+
const tagUnderline = tag === "u" ? true : void 0;
|
|
257
|
+
return {
|
|
258
|
+
bold: tagBold ?? boldFromCss,
|
|
259
|
+
italic: tagItalic ?? italicFromCss,
|
|
260
|
+
underline: tagUnderline ?? underlineFromCss,
|
|
261
|
+
colorHex: parseCssColorToHex(css.color),
|
|
262
|
+
fontFamily: normalizeFontFamily(css["font-family"]),
|
|
263
|
+
fontSizeHalfPoints: parseFontSizeToHalfPoints(css["font-size"])
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
function getTextContent(node) {
|
|
267
|
+
if (node.type === "text") return node.data ?? "";
|
|
268
|
+
let out = "";
|
|
269
|
+
const children = node.children ?? [];
|
|
270
|
+
for (const c of children) out += getTextContent(c);
|
|
271
|
+
return out;
|
|
272
|
+
}
|
|
273
|
+
function collectInlineRuns(node, inherited, out) {
|
|
274
|
+
if (node.type === "text") {
|
|
275
|
+
const text = node.data ?? "";
|
|
276
|
+
if (text) out.push({ kind: "text", text, style: inherited });
|
|
277
|
+
return;
|
|
278
|
+
}
|
|
279
|
+
if (node.type === "tag") {
|
|
280
|
+
const tag = node.name?.toLowerCase();
|
|
281
|
+
if (tag === "br") {
|
|
282
|
+
out.push({ kind: "br" });
|
|
283
|
+
return;
|
|
284
|
+
}
|
|
285
|
+
const next = mergeTextStyle(inherited, styleFromElement(node));
|
|
286
|
+
const children2 = node.children ?? [];
|
|
287
|
+
for (const c of children2) collectInlineRuns(c, next, out);
|
|
288
|
+
return;
|
|
289
|
+
}
|
|
290
|
+
const children = node.children ?? [];
|
|
291
|
+
for (const c of children) collectInlineRuns(c, inherited, out);
|
|
292
|
+
}
|
|
293
|
+
function buildRunXml(style, text) {
|
|
294
|
+
const rPrParts = [];
|
|
295
|
+
if (style.bold) rPrParts.push("<w:b/>");
|
|
296
|
+
if (style.italic) rPrParts.push("<w:i/>");
|
|
297
|
+
if (style.underline) rPrParts.push('<w:u w:val="single"/>');
|
|
298
|
+
if (style.colorHex) rPrParts.push(`<w:color w:val="${style.colorHex}"/>`);
|
|
299
|
+
if (style.fontFamily) {
|
|
300
|
+
const ff = escapeXmlText(style.fontFamily);
|
|
301
|
+
rPrParts.push(`<w:rFonts w:ascii="${ff}" w:hAnsi="${ff}" w:eastAsia="${ff}"/>`);
|
|
302
|
+
}
|
|
303
|
+
if (typeof style.fontSizeHalfPoints === "number") {
|
|
304
|
+
const sz = style.fontSizeHalfPoints;
|
|
305
|
+
rPrParts.push(`<w:sz w:val="${sz}"/><w:szCs w:val="${sz}"/>`);
|
|
306
|
+
}
|
|
307
|
+
const rPrXml = rPrParts.length ? `<w:rPr>${rPrParts.join("")}</w:rPr>` : "";
|
|
308
|
+
const escaped = escapeXmlText(text);
|
|
309
|
+
const preserve = shouldPreserveSpace(text) ? ' xml:space="preserve"' : "";
|
|
310
|
+
return `<w:r>${rPrXml}<w:t${preserve}>${escaped}</w:t></w:r>`;
|
|
311
|
+
}
|
|
312
|
+
function hasClass(node, className) {
|
|
313
|
+
const cls = node.attribs?.class;
|
|
314
|
+
if (!cls) return false;
|
|
315
|
+
return cls.split(/\s+/).includes(className);
|
|
316
|
+
}
|
|
317
|
+
function isSkippableSubtree(node) {
|
|
318
|
+
if (node.type !== "tag") return false;
|
|
319
|
+
const tag = node.name?.toLowerCase();
|
|
320
|
+
if (tag === "button" || tag === "canvas") return true;
|
|
321
|
+
if (tag === "img" && hasClass(node, "ProseMirror-separator")) return true;
|
|
322
|
+
if (node.attribs?.id === "pages") return true;
|
|
323
|
+
if (hasClass(node, "ProseMirror-widget")) return true;
|
|
324
|
+
return false;
|
|
325
|
+
}
|
|
326
|
+
function parseCssLengthToTwips(value, baseFontHalfPoints) {
|
|
327
|
+
if (!value) return void 0;
|
|
328
|
+
const v = value.trim().toLowerCase();
|
|
329
|
+
if (!v) return void 0;
|
|
330
|
+
const pt = v.match(/^(-?\d+(?:\.\d+)?)pt$/);
|
|
331
|
+
if (pt) return Math.round(Number(pt[1]) * 20);
|
|
332
|
+
const px = v.match(/^(-?\d+(?:\.\d+)?)px$/);
|
|
333
|
+
if (px) return Math.round(Number(px[1]) * 72 * 20 / 96);
|
|
334
|
+
const em = v.match(/^(-?\d+(?:\.\d+)?)em$/);
|
|
335
|
+
if (em) {
|
|
336
|
+
const basePt = baseFontHalfPoints / 2;
|
|
337
|
+
return Math.round(Number(em[1]) * basePt * 20);
|
|
338
|
+
}
|
|
339
|
+
const num = v.match(/^(-?\d+(?:\.\d+)?)$/);
|
|
340
|
+
if (num) return Math.round(Number(num[1]));
|
|
341
|
+
return void 0;
|
|
342
|
+
}
|
|
343
|
+
function inferFirstFontSizeHalfPoints(node) {
|
|
344
|
+
const stack = [node];
|
|
345
|
+
while (stack.length) {
|
|
346
|
+
const cur = stack.pop();
|
|
347
|
+
if (cur.type === "tag") {
|
|
348
|
+
const css = parseStyleAttribute(cur.attribs?.style);
|
|
349
|
+
const sz = parseFontSizeToHalfPoints(css["font-size"]);
|
|
350
|
+
if (typeof sz === "number") return sz;
|
|
351
|
+
}
|
|
352
|
+
const children = cur.children ?? [];
|
|
353
|
+
for (let i = children.length - 1; i >= 0; i--) {
|
|
354
|
+
stack.push(children[i]);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
return void 0;
|
|
358
|
+
}
|
|
359
|
+
function buildParagraphPrXml(node, baseFontHalfPoints, extraInd) {
|
|
360
|
+
const css = parseStyleAttribute(node.attribs?.style);
|
|
361
|
+
const parts = [];
|
|
362
|
+
const align = css["text-align"]?.trim().toLowerCase();
|
|
363
|
+
const jcVal = align === "center" ? "center" : align === "right" ? "right" : align === "justify" ? "both" : void 0;
|
|
364
|
+
if (jcVal) parts.push(`<w:jc w:val="${jcVal}"/>`);
|
|
365
|
+
const left = (() => {
|
|
366
|
+
const marginLeft = parseCssLengthToTwips(css["margin-left"], baseFontHalfPoints);
|
|
367
|
+
const paddingLeft = parseCssLengthToTwips(css["padding-left"], baseFontHalfPoints);
|
|
368
|
+
const sum = (marginLeft ?? 0) + (paddingLeft ?? 0);
|
|
369
|
+
if (!sum) return void 0;
|
|
370
|
+
return Math.max(0, sum);
|
|
371
|
+
})();
|
|
372
|
+
const firstLine = (() => {
|
|
373
|
+
const textIndent = parseCssLengthToTwips(css["text-indent"], baseFontHalfPoints);
|
|
374
|
+
if (typeof textIndent !== "number" || !textIndent) return void 0;
|
|
375
|
+
return Math.max(0, textIndent);
|
|
376
|
+
})();
|
|
377
|
+
const indAttrs = [];
|
|
378
|
+
const leftTwips = extraInd?.leftTwips ?? left;
|
|
379
|
+
if (typeof leftTwips === "number") indAttrs.push(`w:left="${leftTwips}"`);
|
|
380
|
+
const hangingTwips = extraInd?.hangingTwips;
|
|
381
|
+
if (typeof hangingTwips === "number") indAttrs.push(`w:hanging="${hangingTwips}"`);
|
|
382
|
+
if (typeof firstLine === "number") indAttrs.push(`w:firstLine="${firstLine}"`);
|
|
383
|
+
if (indAttrs.length) parts.push(`<w:ind ${indAttrs.join(" ")}/>`);
|
|
384
|
+
const before = parseCssLengthToTwips(css["margin-top"], baseFontHalfPoints);
|
|
385
|
+
const after = parseCssLengthToTwips(css["margin-bottom"], baseFontHalfPoints);
|
|
386
|
+
const lineHeight = (() => {
|
|
387
|
+
const lh = css["line-height"]?.trim().toLowerCase();
|
|
388
|
+
if (!lh || lh === "normal") return void 0;
|
|
389
|
+
const unitless = lh.match(/^(\d+(?:\.\d+)?)$/);
|
|
390
|
+
if (unitless) {
|
|
391
|
+
const multiplier = Number(unitless[1]);
|
|
392
|
+
if (!Number.isFinite(multiplier) || multiplier <= 0) return void 0;
|
|
393
|
+
const basePt = baseFontHalfPoints / 2;
|
|
394
|
+
return Math.round(basePt * multiplier * 20);
|
|
395
|
+
}
|
|
396
|
+
const twips = parseCssLengthToTwips(lh, baseFontHalfPoints);
|
|
397
|
+
if (typeof twips !== "number") return void 0;
|
|
398
|
+
return Math.max(1, twips);
|
|
399
|
+
})();
|
|
400
|
+
if (typeof before === "number" || typeof after === "number" || typeof lineHeight === "number") {
|
|
401
|
+
const attrs = [];
|
|
402
|
+
if (typeof before === "number") attrs.push(`w:before="${Math.max(0, before)}"`);
|
|
403
|
+
if (typeof after === "number") attrs.push(`w:after="${Math.max(0, after)}"`);
|
|
404
|
+
if (typeof lineHeight === "number") {
|
|
405
|
+
attrs.push(`w:line="${lineHeight}"`, 'w:lineRule="exact"');
|
|
406
|
+
}
|
|
407
|
+
parts.push(`<w:spacing ${attrs.join(" ")}/>`);
|
|
408
|
+
}
|
|
409
|
+
if (!parts.length) return "";
|
|
410
|
+
return `<w:pPr>${parts.join("")}</w:pPr>`;
|
|
411
|
+
}
|
|
412
|
+
function buildParagraphXmlFromContainer(node, baseStyle, extraInd) {
|
|
413
|
+
const baseFontHalfPoints = baseStyle.fontSizeHalfPoints ?? inferFirstFontSizeHalfPoints(node) ?? 28;
|
|
414
|
+
const pPrXml = buildParagraphPrXml(node, baseFontHalfPoints, extraInd);
|
|
415
|
+
const runs = [];
|
|
416
|
+
for (const c of node.children ?? []) collectInlineRuns(c, baseStyle, runs);
|
|
417
|
+
const rXml = [];
|
|
418
|
+
for (const token of runs) {
|
|
419
|
+
if (token.kind === "br") {
|
|
420
|
+
rXml.push("<w:r><w:br/></w:r>");
|
|
421
|
+
continue;
|
|
422
|
+
}
|
|
423
|
+
const text = token.text;
|
|
424
|
+
if (!text) continue;
|
|
425
|
+
if (!text.trim()) continue;
|
|
426
|
+
rXml.push(buildRunXml(token.style, text));
|
|
427
|
+
}
|
|
428
|
+
if (!rXml.length) return "";
|
|
429
|
+
return `<w:p>${pPrXml}${rXml.join("")}</w:p>`;
|
|
430
|
+
}
|
|
431
|
+
function isExplicitPageBreak(node) {
|
|
432
|
+
if (node.type !== "tag") return false;
|
|
433
|
+
const tag = node.name?.toLowerCase();
|
|
434
|
+
const css = parseStyleAttribute(node.attribs?.style);
|
|
435
|
+
const cls = node.attribs?.class ?? "";
|
|
436
|
+
const classList = cls ? cls.split(/\s+/) : [];
|
|
437
|
+
if (tag === "hr" && classList.includes("page-break")) return true;
|
|
438
|
+
if (classList.includes("page-break")) return true;
|
|
439
|
+
if (node.attribs?.["data-page-break"] === "true") return true;
|
|
440
|
+
const after = css["page-break-after"]?.toLowerCase() ?? css["break-after"]?.toLowerCase();
|
|
441
|
+
const before = css["page-break-before"]?.toLowerCase() ?? css["break-before"]?.toLowerCase();
|
|
442
|
+
if (after?.includes("always") || before?.includes("always")) return true;
|
|
443
|
+
return false;
|
|
444
|
+
}
|
|
445
|
+
function buildHeadingBaseStyle(level) {
|
|
446
|
+
const size = level === 1 ? 44 : level === 2 ? 32 : level === 3 ? 28 : level === 4 ? 24 : 22;
|
|
447
|
+
return { bold: true, fontSizeHalfPoints: size };
|
|
448
|
+
}
|
|
449
|
+
function buildListBlocks(listNode, ordered) {
|
|
450
|
+
const items = [];
|
|
451
|
+
const stack = [...listNode.children ?? []];
|
|
452
|
+
while (stack.length) {
|
|
453
|
+
const n = stack.shift();
|
|
454
|
+
if (n.type === "tag" && n.name?.toLowerCase() === "li") items.push(n);
|
|
455
|
+
}
|
|
456
|
+
const out = [];
|
|
457
|
+
for (let i = 0; i < items.length; i++) {
|
|
458
|
+
const prefix = ordered ? `${i + 1}. ` : "\u2022 ";
|
|
459
|
+
const li = items[i];
|
|
460
|
+
const baseStyle = {};
|
|
461
|
+
const runs = [];
|
|
462
|
+
runs.push({ kind: "text", text: prefix, style: baseStyle });
|
|
463
|
+
for (const c of li.children ?? []) collectInlineRuns(c, baseStyle, runs);
|
|
464
|
+
const rXml = [];
|
|
465
|
+
for (const token of runs) {
|
|
466
|
+
if (token.kind === "br") {
|
|
467
|
+
rXml.push("<w:r><w:br/></w:r>");
|
|
468
|
+
continue;
|
|
469
|
+
}
|
|
470
|
+
const text = token.text;
|
|
471
|
+
if (!text) continue;
|
|
472
|
+
if (!text.trim()) continue;
|
|
473
|
+
rXml.push(buildRunXml(token.style, text));
|
|
474
|
+
}
|
|
475
|
+
if (!rXml.length) continue;
|
|
476
|
+
const pPrXml = buildParagraphPrXml(li, inferFirstFontSizeHalfPoints(li) ?? 28, {
|
|
477
|
+
leftTwips: 720,
|
|
478
|
+
hangingTwips: 360
|
|
479
|
+
});
|
|
480
|
+
out.push(`<w:p>${pPrXml}${rXml.join("")}</w:p>`);
|
|
481
|
+
}
|
|
482
|
+
return out;
|
|
483
|
+
}
|
|
484
|
+
function buildTableXml(tableNode) {
|
|
485
|
+
const rows = [];
|
|
486
|
+
const stack = [...tableNode.children ?? []];
|
|
487
|
+
while (stack.length) {
|
|
488
|
+
const n = stack.shift();
|
|
489
|
+
if (n.type === "tag" && n.name?.toLowerCase() === "tr") rows.push(n);
|
|
490
|
+
if (n.children?.length) stack.unshift(...n.children);
|
|
491
|
+
}
|
|
492
|
+
const rowXml = [];
|
|
493
|
+
for (const tr of rows) {
|
|
494
|
+
const cells = (tr.children ?? []).filter(
|
|
495
|
+
(c) => c.type === "tag" && (c.name === "td" || c.name === "th")
|
|
496
|
+
);
|
|
497
|
+
const cellXml = [];
|
|
498
|
+
for (const cell of cells) {
|
|
499
|
+
const isHeader = cell.name === "th";
|
|
500
|
+
const baseStyle = isHeader ? { bold: true } : {};
|
|
501
|
+
const pXml = buildParagraphXmlFromContainer(cell, baseStyle);
|
|
502
|
+
const paragraphs = pXml ? pXml : "<w:p/>";
|
|
503
|
+
cellXml.push(
|
|
504
|
+
`<w:tc><w:tcPr><w:tcW w:w="0" w:type="auto"/></w:tcPr>${paragraphs}</w:tc>`
|
|
505
|
+
);
|
|
506
|
+
}
|
|
507
|
+
if (cellXml.length) rowXml.push(`<w:tr>${cellXml.join("")}</w:tr>`);
|
|
508
|
+
}
|
|
509
|
+
const tblPr = `<w:tblPr><w:tblW w:w="0" w:type="auto"/><w:tblBorders><w:top w:val="single" w:sz="4" w:space="0" w:color="D9D9D9"/><w:left w:val="single" w:sz="4" w:space="0" w:color="D9D9D9"/><w:bottom w:val="single" w:sz="4" w:space="0" w:color="D9D9D9"/><w:right w:val="single" w:sz="4" w:space="0" w:color="D9D9D9"/><w:insideH w:val="single" w:sz="4" w:space="0" w:color="D9D9D9"/><w:insideV w:val="single" w:sz="4" w:space="0" w:color="D9D9D9"/></w:tblBorders></w:tblPr>`;
|
|
510
|
+
const tblGrid = `<w:tblGrid/>`;
|
|
511
|
+
return `<w:tbl>${tblPr}${tblGrid}${rowXml.join("")}</w:tbl>`;
|
|
512
|
+
}
|
|
513
|
+
function collectBodyBlocks(node, out) {
|
|
514
|
+
if (isSkippableSubtree(node)) return;
|
|
515
|
+
if (node.type === "tag") {
|
|
516
|
+
const tag = node.name?.toLowerCase();
|
|
517
|
+
if (isExplicitPageBreak(node)) {
|
|
518
|
+
out.push(PAGE_BREAK_XML);
|
|
519
|
+
return;
|
|
520
|
+
}
|
|
521
|
+
if (tag === "p") {
|
|
522
|
+
const pXml = buildParagraphXmlFromContainer(node, {});
|
|
523
|
+
if (pXml) out.push(pXml);
|
|
524
|
+
return;
|
|
525
|
+
}
|
|
526
|
+
if (tag && /^h[1-6]$/.test(tag)) {
|
|
527
|
+
const level = Number(tag.slice(1));
|
|
528
|
+
const hXml = buildParagraphXmlFromContainer(node, buildHeadingBaseStyle(level));
|
|
529
|
+
if (hXml) out.push(hXml);
|
|
530
|
+
return;
|
|
531
|
+
}
|
|
532
|
+
if (tag === "table") {
|
|
533
|
+
const tblXml = buildTableXml(node);
|
|
534
|
+
if (tblXml) out.push(tblXml);
|
|
535
|
+
return;
|
|
536
|
+
}
|
|
537
|
+
if (tag === "ul" || tag === "ol") {
|
|
538
|
+
out.push(...buildListBlocks(node, tag === "ol"));
|
|
539
|
+
return;
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
for (const c of node.children ?? []) collectBodyBlocks(c, out);
|
|
543
|
+
}
|
|
544
|
+
function textToWordBodyXml(text) {
|
|
545
|
+
const normalized = text.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
|
|
546
|
+
if (!normalized.trim()) {
|
|
547
|
+
throw new Error("Text is empty.");
|
|
548
|
+
}
|
|
549
|
+
const lines = normalized.split("\n");
|
|
550
|
+
const out = [];
|
|
551
|
+
for (const line of lines) {
|
|
552
|
+
if (!line) {
|
|
553
|
+
out.push("<w:p/>");
|
|
554
|
+
continue;
|
|
555
|
+
}
|
|
556
|
+
out.push(`<w:p>${buildRunXml({}, line)}</w:p>`);
|
|
557
|
+
}
|
|
558
|
+
return out.join("");
|
|
559
|
+
}
|
|
560
|
+
function htmlToWordBodyXml(html) {
|
|
561
|
+
const normalized = html.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
|
|
562
|
+
const doc = (0, import_htmlparser2.parseDocument)(normalized, {
|
|
563
|
+
lowerCaseAttributeNames: true,
|
|
564
|
+
lowerCaseTags: true,
|
|
565
|
+
recognizeSelfClosing: true
|
|
566
|
+
});
|
|
567
|
+
const out = [];
|
|
568
|
+
collectBodyBlocks(doc, out);
|
|
569
|
+
if (!out.length) {
|
|
570
|
+
const text = getTextContent(doc);
|
|
571
|
+
return textToWordBodyXml(text);
|
|
572
|
+
}
|
|
573
|
+
return out.join("");
|
|
574
|
+
}
|
|
575
|
+
var import_htmlparser2, PAGE_BREAK_XML;
|
|
576
|
+
var init_htmlToWordBodyXml = __esm({
|
|
577
|
+
"src/lib/htmlToWordBodyXml.ts"() {
|
|
578
|
+
"use strict";
|
|
579
|
+
import_htmlparser2 = require("htmlparser2");
|
|
580
|
+
PAGE_BREAK_XML = '<w:p><w:r><w:br w:type="page"/></w:r></w:p>';
|
|
581
|
+
}
|
|
582
|
+
});
|
|
583
|
+
|
|
584
|
+
// src/index.ts
|
|
585
|
+
var index_exports = {};
|
|
586
|
+
__export(index_exports, {
|
|
587
|
+
htmlToDocxBlob: () => htmlToDocxBlob,
|
|
588
|
+
htmlToDocxBuffer: () => htmlToDocxBuffer,
|
|
589
|
+
htmlToDocxUint8Array: () => htmlToDocxUint8Array,
|
|
590
|
+
xmlToDocxBlob: () => xmlToDocxBlob,
|
|
591
|
+
xmlToDocxBuffer: () => xmlToDocxBuffer,
|
|
592
|
+
xmlToDocxUint8Array: () => xmlToDocxUint8Array
|
|
593
|
+
});
|
|
594
|
+
module.exports = __toCommonJS(index_exports);
|
|
595
|
+
function looksLikeHtml(input) {
|
|
596
|
+
const s = input.trim();
|
|
597
|
+
if (!s) return false;
|
|
598
|
+
if (!s.includes("<") || !s.includes(">")) return false;
|
|
599
|
+
return /<\/?[a-zA-Z][\s\S]*?>/.test(s);
|
|
600
|
+
}
|
|
601
|
+
async function xmlToDocxUint8Array(xml, options = {}) {
|
|
602
|
+
const { createDocxZipUint8Array: createDocxZipUint8Array2 } = await Promise.resolve().then(() => (init_createDocxZip(), createDocxZip_exports));
|
|
603
|
+
return createDocxZipUint8Array2(xml, options);
|
|
604
|
+
}
|
|
605
|
+
async function xmlToDocxBlob(xml, options = {}) {
|
|
606
|
+
const docx = await xmlToDocxUint8Array(xml, options);
|
|
607
|
+
const bytes = new Uint8Array(docx);
|
|
608
|
+
return new Blob([bytes], {
|
|
609
|
+
type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
610
|
+
});
|
|
611
|
+
}
|
|
612
|
+
async function xmlToDocxBuffer(xml, options = {}) {
|
|
613
|
+
const docx = await xmlToDocxUint8Array(xml, options);
|
|
614
|
+
const BufferCtor = globalThis.Buffer;
|
|
615
|
+
if (!BufferCtor) {
|
|
616
|
+
throw new Error("Buffer is not available. Use xmlToDocxUint8Array or xmlToDocxBlob instead.");
|
|
617
|
+
}
|
|
618
|
+
return BufferCtor.from(docx);
|
|
619
|
+
}
|
|
620
|
+
async function htmlToDocxUint8Array(html, options = {}) {
|
|
621
|
+
const { htmlToWordBodyXml: htmlToWordBodyXml2, textToWordBodyXml: textToWordBodyXml2 } = await Promise.resolve().then(() => (init_htmlToWordBodyXml(), htmlToWordBodyXml_exports));
|
|
622
|
+
const format = options.inputFormat ?? "auto";
|
|
623
|
+
const bodyXml = format === "html" ? htmlToWordBodyXml2(html) : format === "text" ? textToWordBodyXml2(html) : looksLikeHtml(html) ? htmlToWordBodyXml2(html) : textToWordBodyXml2(html);
|
|
624
|
+
return xmlToDocxUint8Array(bodyXml, {
|
|
625
|
+
inputKind: "body",
|
|
626
|
+
validateXml: options.validateXml
|
|
627
|
+
});
|
|
628
|
+
}
|
|
629
|
+
async function htmlToDocxBlob(html, options = {}) {
|
|
630
|
+
const docx = await htmlToDocxUint8Array(html, options);
|
|
631
|
+
const bytes = new Uint8Array(docx);
|
|
632
|
+
return new Blob([bytes], {
|
|
633
|
+
type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
634
|
+
});
|
|
635
|
+
}
|
|
636
|
+
async function htmlToDocxBuffer(html, options = {}) {
|
|
637
|
+
const docx = await htmlToDocxUint8Array(html, options);
|
|
638
|
+
const BufferCtor = globalThis.Buffer;
|
|
639
|
+
if (!BufferCtor) {
|
|
640
|
+
throw new Error("Buffer is not available. Use htmlToDocxUint8Array or htmlToDocxBlob instead.");
|
|
641
|
+
}
|
|
642
|
+
return BufferCtor.from(docx);
|
|
643
|
+
}
|
|
644
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
645
|
+
0 && (module.exports = {
|
|
646
|
+
htmlToDocxBlob,
|
|
647
|
+
htmlToDocxBuffer,
|
|
648
|
+
htmlToDocxUint8Array,
|
|
649
|
+
xmlToDocxBlob,
|
|
650
|
+
xmlToDocxBuffer,
|
|
651
|
+
xmlToDocxUint8Array
|
|
652
|
+
});
|
|
653
|
+
//# sourceMappingURL=index.cjs.map
|