@yinyoudexing/xml2word 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -0
- package/dist/createDocxZip-WVDRDYZT.js +109 -0
- package/dist/createDocxZip-WVDRDYZT.js.map +1 -0
- package/dist/htmlToWordBodyXml-RFBPSL2Q.js +416 -0
- package/dist/htmlToWordBodyXml-RFBPSL2Q.js.map +1 -0
- package/dist/index.cjs +653 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +63 -0
- package/dist/index.d.ts +63 -0
- package/dist/index.js +59 -0
- package/dist/index.js.map +1 -0
- package/package.json +47 -0
package/README.md
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# xml2word
|
|
2
|
+
|
|
3
|
+
把 WordprocessingML XML / HTML / 纯文本字符串转换成 `.docx`(本质是一个 zip 包)。
|
|
4
|
+
|
|
5
|
+
> 适用场景:后端 Node 生成 Word;前端直接传入长字符串(HTML 或纯文本)导出 Word。
|
|
6
|
+
|
|
7
|
+
## 安装
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm i @yinyoudexing/xml2word
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## 快速开始(前端)
|
|
14
|
+
|
|
15
|
+
### 1) 传入 HTML 字符串生成 docx 并下载
|
|
16
|
+
|
|
17
|
+
```js
|
|
18
|
+
import { htmlToDocxBlob } from "@yinyoudexing/xml2word";
|
|
19
|
+
|
|
20
|
+
const html = `<div><h1>标题</h1><p style="text-align: justify;">正文</p></div>`;
|
|
21
|
+
const blob = await htmlToDocxBlob(html, { inputFormat: "html" });
|
|
22
|
+
|
|
23
|
+
const url = URL.createObjectURL(blob);
|
|
24
|
+
const a = document.createElement("a");
|
|
25
|
+
a.href = url;
|
|
26
|
+
a.download = "out.docx";
|
|
27
|
+
a.click();
|
|
28
|
+
URL.revokeObjectURL(url);
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### 2) 传入“很长的字符串”(HTML 或纯文本)自动识别
|
|
32
|
+
|
|
33
|
+
```js
|
|
34
|
+
import { htmlToDocxBlob } from "@yinyoudexing/xml2word";
|
|
35
|
+
|
|
36
|
+
const longString = getContentFromEditor(); // 可能是 HTML,也可能只是纯文本
|
|
37
|
+
const blob = await htmlToDocxBlob(longString, { inputFormat: "auto" });
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## 快速开始(Node)
|
|
41
|
+
|
|
42
|
+
### 1) 传入 HTML 文件内容生成 docx
|
|
43
|
+
|
|
44
|
+
```js
|
|
45
|
+
import fs from "node:fs";
|
|
46
|
+
import { htmlToDocxBuffer } from "@yinyoudexing/xml2word";
|
|
47
|
+
|
|
48
|
+
const html = fs.readFileSync("文档.html", "utf8");
|
|
49
|
+
const buf = await htmlToDocxBuffer(html, { inputFormat: "html" });
|
|
50
|
+
fs.writeFileSync("out.docx", buf);
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### 2) 传入 WordprocessingML(document.xml 或 body 片段)生成 docx
|
|
54
|
+
|
|
55
|
+
```js
|
|
56
|
+
import fs from "node:fs";
|
|
57
|
+
import { xmlToDocxBuffer } from "@yinyoudexing/xml2word";
|
|
58
|
+
|
|
59
|
+
// body 片段(库会自动包一层 <w:document><w:body>)
|
|
60
|
+
const bodyXml = `<w:p><w:r><w:t>Hello</w:t></w:r></w:p>`;
|
|
61
|
+
const buf = await xmlToDocxBuffer(bodyXml, { inputKind: "body" });
|
|
62
|
+
fs.writeFileSync("out.docx", buf);
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## API
|
|
66
|
+
|
|
67
|
+
### XML → DOCX
|
|
68
|
+
|
|
69
|
+
#### `xmlToDocxUint8Array(xml, options?) => Promise<Uint8Array>`
|
|
70
|
+
|
|
71
|
+
- 通用输出格式(Node/浏览器都能用)
|
|
72
|
+
- `options.inputKind`:
|
|
73
|
+
- `auto`:自动判断(像 `<w:document>` 就当 document,否则当 body)
|
|
74
|
+
- `document`:你传的是完整 `<w:document>...`
|
|
75
|
+
- `body`:你传的是 `<w:p>...` 等正文片段
|
|
76
|
+
- `options.validateXml`:是否校验 XML(默认 `true`)
|
|
77
|
+
|
|
78
|
+
#### `xmlToDocxBlob(xml, options?) => Promise<Blob>`
|
|
79
|
+
|
|
80
|
+
- 浏览器下载用
|
|
81
|
+
|
|
82
|
+
#### `xmlToDocxBuffer(xml, options?) => Promise<Buffer>`
|
|
83
|
+
|
|
84
|
+
- Node 写文件用
|
|
85
|
+
|
|
86
|
+
### HTML / Text → DOCX
|
|
87
|
+
|
|
88
|
+
#### `htmlToDocxUint8Array(input, options?) => Promise<Uint8Array>`
|
|
89
|
+
|
|
90
|
+
- 支持前端“直接传入很长的字符串”
|
|
91
|
+
- `options.inputFormat`:
|
|
92
|
+
- `auto`(默认):自动判断像不像 HTML;像就按 HTML 解析,否则当纯文本
|
|
93
|
+
- `html`:强制按 HTML 解析(推荐编辑器输出 HTML 的场景)
|
|
94
|
+
- `text`:强制按纯文本解析(按换行拆段)
|
|
95
|
+
- `options.validateXml`:是否校验最终生成的 `word/document.xml` 是否为合法 XML(默认 `true`)
|
|
96
|
+
|
|
97
|
+
#### `htmlToDocxBlob(input, options?) => Promise<Blob>`
|
|
98
|
+
|
|
99
|
+
- 浏览器下载用
|
|
100
|
+
|
|
101
|
+
#### `htmlToDocxBuffer(input, options?) => Promise<Buffer>`
|
|
102
|
+
|
|
103
|
+
- Node 写文件用
|
|
104
|
+
|
|
105
|
+
## HTML 支持范围(当前版本)
|
|
106
|
+
|
|
107
|
+
- 支持:`p/span/strong/br`、`h1~h6`、`ul/ol/li`、`table/tr/td/th`
|
|
108
|
+
- 支持:`text-align`、`font-size(pt/px)`、`color(rgb/#)`、`font-family`
|
|
109
|
+
- 支持分页标记(会插入 Word 分页符):
|
|
110
|
+
- `<div class="page-break"></div>`
|
|
111
|
+
- `<hr class="page-break" />`
|
|
112
|
+
- `style="page-break-after: always"` / `break-after: always`
|
|
113
|
+
- 会跳过:`canvas`、编辑器辅助 DOM(如 ProseMirror widget)
|
|
114
|
+
|
|
115
|
+
限制(需要额外实现才能完全还原):
|
|
116
|
+
- 图片:HTML `<img>` 需要额外提供图片二进制并写入 docx 的 media 关系
|
|
117
|
+
- 复杂 CSS:比如浮动/定位/复杂布局无法直接映射到 Word
|
|
118
|
+
- 精准“自动分页”:如果需要指定分页位置,请在 HTML 中插入分页标记
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
// src/lib/createDocxZip.ts
|
|
2
|
+
import JSZip from "jszip";
|
|
3
|
+
|
|
4
|
+
// src/lib/normalizeDocumentXml.ts
|
|
5
|
+
var WORD_MAIN_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
|
|
6
|
+
function hasWordDocumentRoot(xml) {
|
|
7
|
+
return /<w:document[\s>]/.test(xml);
|
|
8
|
+
}
|
|
9
|
+
function ensureXmlDeclaration(xml) {
|
|
10
|
+
if (/^\s*<\?xml\b/.test(xml)) return xml;
|
|
11
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
12
|
+
${xml}`;
|
|
13
|
+
}
|
|
14
|
+
function wrapBodyXml(bodyXml) {
|
|
15
|
+
const xml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
16
|
+
<w:document
|
|
17
|
+
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
|
18
|
+
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
|
19
|
+
>
|
|
20
|
+
<w:body>
|
|
21
|
+
${bodyXml}
|
|
22
|
+
<w:sectPr>
|
|
23
|
+
<w:pgSz w:w="12240" w:h="15840"/>
|
|
24
|
+
<w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440" w:header="708" w:footer="708" w:gutter="0"/>
|
|
25
|
+
<w:cols w:space="708"/>
|
|
26
|
+
<w:docGrid w:linePitch="360"/>
|
|
27
|
+
</w:sectPr>
|
|
28
|
+
</w:body>
|
|
29
|
+
</w:document>
|
|
30
|
+
`;
|
|
31
|
+
return xml;
|
|
32
|
+
}
|
|
33
|
+
function ensureWordNamespace(xml) {
|
|
34
|
+
const hasWNamespace = /xmlns:w\s*=\s*["']http:\/\/schemas\.openxmlformats\.org\/wordprocessingml\/2006\/main["']/.test(
|
|
35
|
+
xml
|
|
36
|
+
);
|
|
37
|
+
if (hasWNamespace) return xml;
|
|
38
|
+
return xml.replace(
|
|
39
|
+
/<w:document\b/,
|
|
40
|
+
`<w:document xmlns:w="${WORD_MAIN_NS}"`
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
function normalizeDocumentXml(xml, inputKind) {
|
|
44
|
+
const trimmed = xml.trim();
|
|
45
|
+
if (!trimmed) {
|
|
46
|
+
throw new Error("XML is empty.");
|
|
47
|
+
}
|
|
48
|
+
if (inputKind === "document") {
|
|
49
|
+
const withDecl = ensureXmlDeclaration(trimmed);
|
|
50
|
+
const withNs = ensureWordNamespace(withDecl);
|
|
51
|
+
if (!hasWordDocumentRoot(withNs)) {
|
|
52
|
+
throw new Error('inputKind="document" requires a <w:document> root.');
|
|
53
|
+
}
|
|
54
|
+
return withNs;
|
|
55
|
+
}
|
|
56
|
+
if (inputKind === "body") {
|
|
57
|
+
return wrapBodyXml(trimmed);
|
|
58
|
+
}
|
|
59
|
+
if (hasWordDocumentRoot(trimmed)) {
|
|
60
|
+
const withDecl = ensureXmlDeclaration(trimmed);
|
|
61
|
+
return ensureWordNamespace(withDecl);
|
|
62
|
+
}
|
|
63
|
+
return wrapBodyXml(trimmed);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// src/lib/validateXml.ts
|
|
67
|
+
import { XMLValidator } from "fast-xml-parser";
|
|
68
|
+
function validateXmlIfNeeded(xml, validateXml) {
|
|
69
|
+
if (!validateXml) return;
|
|
70
|
+
const result = XMLValidator.validate(xml);
|
|
71
|
+
if (result === true) return;
|
|
72
|
+
const err = result.err;
|
|
73
|
+
const msg = err?.msg ?? "Invalid XML.";
|
|
74
|
+
const line = err?.line;
|
|
75
|
+
const col = err?.col;
|
|
76
|
+
const location = typeof line === "number" && typeof col === "number" ? ` (line ${line}, col ${col})` : "";
|
|
77
|
+
throw new Error(`${msg}${location}`);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// src/lib/createDocxZip.ts
|
|
81
|
+
var CONTENT_TYPES_XML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
82
|
+
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
|
|
83
|
+
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
|
|
84
|
+
<Default Extension="xml" ContentType="application/xml"/>
|
|
85
|
+
<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
|
|
86
|
+
</Types>
|
|
87
|
+
`;
|
|
88
|
+
var ROOT_RELS_XML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
89
|
+
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
|
90
|
+
<Relationship Id="rId1"
|
|
91
|
+
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"
|
|
92
|
+
Target="word/document.xml"/>
|
|
93
|
+
</Relationships>
|
|
94
|
+
`;
|
|
95
|
+
async function createDocxZipUint8Array(xml, options = {}) {
|
|
96
|
+
const documentXml = normalizeDocumentXml(xml, options.inputKind ?? "auto");
|
|
97
|
+
validateXmlIfNeeded(documentXml, options.validateXml ?? true);
|
|
98
|
+
const zip = new JSZip();
|
|
99
|
+
zip.file("[Content_Types].xml", CONTENT_TYPES_XML);
|
|
100
|
+
const relsFolder = zip.folder("_rels");
|
|
101
|
+
relsFolder?.file(".rels", ROOT_RELS_XML);
|
|
102
|
+
const wordFolder = zip.folder("word");
|
|
103
|
+
wordFolder?.file("document.xml", documentXml);
|
|
104
|
+
return zip.generateAsync({ type: "uint8array" });
|
|
105
|
+
}
|
|
106
|
+
export {
|
|
107
|
+
createDocxZipUint8Array
|
|
108
|
+
};
|
|
109
|
+
//# sourceMappingURL=createDocxZip-WVDRDYZT.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/lib/createDocxZip.ts","../src/lib/normalizeDocumentXml.ts","../src/lib/validateXml.ts"],"sourcesContent":["import JSZip from \"jszip\";\nimport type { XmlToDocxOptions } from \"../index.js\";\nimport { normalizeDocumentXml } from \"./normalizeDocumentXml.js\";\nimport { validateXmlIfNeeded } from \"./validateXml.js\";\n\nconst CONTENT_TYPES_XML = `<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<Types xmlns=\"http://schemas.openxmlformats.org/package/2006/content-types\">\n <Default Extension=\"rels\" ContentType=\"application/vnd.openxmlformats-package.relationships+xml\"/>\n <Default Extension=\"xml\" ContentType=\"application/xml\"/>\n <Override PartName=\"/word/document.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml\"/>\n</Types>\n`;\n\nconst ROOT_RELS_XML = `<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<Relationships xmlns=\"http://schemas.openxmlformats.org/package/2006/relationships\">\n <Relationship Id=\"rId1\"\n Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument\"\n Target=\"word/document.xml\"/>\n</Relationships>\n`;\n\nexport async function createDocxZipUint8Array(\n xml: string,\n options: XmlToDocxOptions = {},\n): Promise<Uint8Array> {\n const documentXml = normalizeDocumentXml(xml, options.inputKind ?? \"auto\");\n validateXmlIfNeeded(documentXml, options.validateXml ?? true);\n\n const zip = new JSZip();\n zip.file(\"[Content_Types].xml\", CONTENT_TYPES_XML);\n\n const relsFolder = zip.folder(\"_rels\");\n relsFolder?.file(\".rels\", ROOT_RELS_XML);\n\n const wordFolder = zip.folder(\"word\");\n wordFolder?.file(\"document.xml\", documentXml);\n\n return zip.generateAsync({ type: \"uint8array\" });\n}\n\n","import type { XmlToDocxInputKind } from \"../index.js\";\n\nconst WORD_MAIN_NS = \"http://schemas.openxmlformats.org/wordprocessingml/2006/main\";\n\nfunction hasWordDocumentRoot(xml: string): boolean {\n return /<w:document[\\s>]/.test(xml);\n}\n\nfunction ensureXmlDeclaration(xml: string): string {\n if (/^\\s*<\\?xml\\b/.test(xml)) return xml;\n return `<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\\n${xml}`;\n}\n\nfunction wrapBodyXml(bodyXml: string): string {\n const xml = `<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<w:document\n xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"\n xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n>\n <w:body>\n${bodyXml}\n <w:sectPr>\n <w:pgSz w:w=\"12240\" w:h=\"15840\"/>\n <w:pgMar w:top=\"1440\" w:right=\"1440\" w:bottom=\"1440\" w:left=\"1440\" w:header=\"708\" w:footer=\"708\" w:gutter=\"0\"/>\n <w:cols w:space=\"708\"/>\n <w:docGrid w:linePitch=\"360\"/>\n </w:sectPr>\n </w:body>\n</w:document>\n`;\n return xml;\n}\n\nfunction ensureWordNamespace(xml: string): string {\n const hasWNamespace =\n /xmlns:w\\s*=\\s*[\"']http:\\/\\/schemas\\.openxmlformats\\.org\\/wordprocessingml\\/2006\\/main[\"']/.test(\n xml,\n );\n if (hasWNamespace) return xml;\n\n return xml.replace(\n /<w:document\\b/,\n `<w:document xmlns:w=\"${WORD_MAIN_NS}\"`,\n );\n}\n\nexport function normalizeDocumentXml(xml: string, inputKind: XmlToDocxInputKind): string {\n const trimmed = xml.trim();\n if (!trimmed) {\n throw new Error(\"XML is empty.\");\n }\n\n if (inputKind === \"document\") {\n const withDecl = ensureXmlDeclaration(trimmed);\n const withNs = ensureWordNamespace(withDecl);\n if (!hasWordDocumentRoot(withNs)) {\n throw new Error('inputKind=\"document\" requires a <w:document> root.');\n }\n return withNs;\n }\n\n if (inputKind === \"body\") {\n return wrapBodyXml(trimmed);\n }\n\n if (hasWordDocumentRoot(trimmed)) {\n const withDecl = ensureXmlDeclaration(trimmed);\n return ensureWordNamespace(withDecl);\n }\n\n return wrapBodyXml(trimmed);\n}\n\n","import { XMLValidator } from \"fast-xml-parser\";\n\nexport function validateXmlIfNeeded(xml: string, validateXml: boolean): void {\n if (!validateXml) return;\n\n const result = XMLValidator.validate(xml);\n if (result === true) return;\n\n const err = (result as { err?: { msg?: string; line?: number; col?: number } }).err;\n const msg = err?.msg ?? \"Invalid XML.\";\n const line = err?.line;\n const col = err?.col;\n\n const location =\n typeof line === \"number\" && typeof col === \"number\" ? ` (line ${line}, col ${col})` : \"\";\n throw new Error(`${msg}${location}`);\n}\n\n"],"mappings":";AAAA,OAAO,WAAW;;;ACElB,IAAM,eAAe;AAErB,SAAS,oBAAoB,KAAsB;AACjD,SAAO,mBAAmB,KAAK,GAAG;AACpC;AAEA,SAAS,qBAAqB,KAAqB;AACjD,MAAI,eAAe,KAAK,GAAG,EAAG,QAAO;AACrC,SAAO;AAAA,EAA4D,GAAG;AACxE;AAEA,SAAS,YAAY,SAAyB;AAC5C,QAAM,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMZ,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAUP,SAAO;AACT;AAEA,SAAS,oBAAoB,KAAqB;AAChD,QAAM,gBACJ,4FAA4F;AAAA,IAC1F;AAAA,EACF;AACF,MAAI,cAAe,QAAO;AAE1B,SAAO,IAAI;AAAA,IACT;AAAA,IACA,wBAAwB,YAAY;AAAA,EACtC;AACF;AAEO,SAAS,qBAAqB,KAAa,WAAuC;AACvF,QAAM,UAAU,IAAI,KAAK;AACzB,MAAI,CAAC,SAAS;AACZ,UAAM,IAAI,MAAM,eAAe;AAAA,EACjC;AAEA,MAAI,cAAc,YAAY;AAC5B,UAAM,WAAW,qBAAqB,OAAO;AAC7C,UAAM,SAAS,oBAAoB,QAAQ;AAC3C,QAAI,CAAC,oBAAoB,MAAM,GAAG;AAChC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACtE;AACA,WAAO;AAAA,EACT;AAEA,MAAI,cAAc,QAAQ;AACxB,WAAO,YAAY,OAAO;AAAA,EAC5B;AAEA,MAAI,oBAAoB,OAAO,GAAG;AAChC,UAAM,WAAW,qBAAqB,OAAO;AAC7C,WAAO,oBAAoB,QAAQ;AAAA,EACrC;AAEA,SAAO,YAAY,OAAO;AAC5B;;;ACvEA,SAAS,oBAAoB;AAEtB,SAAS,oBAAoB,KAAa,aAA4B;AAC3E,MAAI,CAAC,YAAa;AAElB,QAAM,SAAS,aAAa,SAAS,GAAG;AACxC,MAAI,WAAW,KAAM;AAErB,QAAM,MAAO,OAAmE;AAChF,QAAM,MAAM,KAAK,OAAO;AACxB,QAAM,OAAO,KAAK;AAClB,QAAM,MAAM,KAAK;AAEjB,QAAM,WACJ,OAAO,SAAS,YAAY,OAAO,QAAQ,WAAW,UAAU,IAAI,SAAS,GAAG,MAAM;AACxF,QAAM,IAAI,MAAM,GAAG,GAAG,GAAG,QAAQ,EAAE;AACrC;;;AFXA,IAAM,oBAAoB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAQ1B,IAAM,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAQtB,eAAsB,wBACpB,KACA,UAA4B,CAAC,GACR;AACrB,QAAM,cAAc,qBAAqB,KAAK,QAAQ,aAAa,MAAM;AACzE,sBAAoB,aAAa,QAAQ,eAAe,IAAI;AAE5D,QAAM,MAAM,IAAI,MAAM;AACtB,MAAI,KAAK,uBAAuB,iBAAiB;AAEjD,QAAM,aAAa,IAAI,OAAO,OAAO;AACrC,cAAY,KAAK,SAAS,aAAa;AAEvC,QAAM,aAAa,IAAI,OAAO,MAAM;AACpC,cAAY,KAAK,gBAAgB,WAAW;AAE5C,SAAO,IAAI,cAAc,EAAE,MAAM,aAAa,CAAC;AACjD;","names":[]}
|
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
// src/lib/htmlToWordBodyXml.ts
|
|
2
|
+
import { parseDocument } from "htmlparser2";
|
|
3
|
+
function escapeXmlText(value) {
|
|
4
|
+
return value.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
5
|
+
}
|
|
6
|
+
function shouldPreserveSpace(text) {
|
|
7
|
+
if (!text) return false;
|
|
8
|
+
return /^\s/.test(text) || /\s$/.test(text) || /\s{2,}/.test(text);
|
|
9
|
+
}
|
|
10
|
+
function parseStyleAttribute(style) {
|
|
11
|
+
if (!style) return {};
|
|
12
|
+
const normalized = style.replace(/\r/g, "\n");
|
|
13
|
+
const parts = normalized.split(";");
|
|
14
|
+
const entries = [];
|
|
15
|
+
for (const part of parts) {
|
|
16
|
+
const idx = part.indexOf(":");
|
|
17
|
+
if (idx <= 0) continue;
|
|
18
|
+
const key = part.slice(0, idx).trim().toLowerCase();
|
|
19
|
+
const val = part.slice(idx + 1).trim();
|
|
20
|
+
if (!key || !val) continue;
|
|
21
|
+
entries.push([key, val]);
|
|
22
|
+
}
|
|
23
|
+
return Object.fromEntries(entries);
|
|
24
|
+
}
|
|
25
|
+
function parseRgbToHex(value) {
|
|
26
|
+
const m = value.trim().toLowerCase().match(/^rgb\(\s*(\d{1,3})\s*,\s*(\d{1,3})\s*,\s*(\d{1,3})\s*\)$/);
|
|
27
|
+
if (!m) return void 0;
|
|
28
|
+
const nums = [Number(m[1]), Number(m[2]), Number(m[3])];
|
|
29
|
+
if (nums.some((n) => Number.isNaN(n) || n < 0 || n > 255)) return void 0;
|
|
30
|
+
return nums.map((n) => n.toString(16).padStart(2, "0")).join("").toUpperCase();
|
|
31
|
+
}
|
|
32
|
+
function parseCssColorToHex(value) {
|
|
33
|
+
if (!value) return void 0;
|
|
34
|
+
const v = value.trim();
|
|
35
|
+
const hex = v.match(/^#([0-9a-fA-F]{6})$/)?.[1];
|
|
36
|
+
if (hex) return hex.toUpperCase();
|
|
37
|
+
return parseRgbToHex(v);
|
|
38
|
+
}
|
|
39
|
+
function parseFontSizeToHalfPoints(value) {
|
|
40
|
+
if (!value) return void 0;
|
|
41
|
+
const v = value.trim().toLowerCase();
|
|
42
|
+
const pt = v.match(/^(\d+(?:\.\d+)?)pt$/);
|
|
43
|
+
if (pt) return Math.max(1, Math.round(Number(pt[1]) * 2));
|
|
44
|
+
const px = v.match(/^(\d+(?:\.\d+)?)px$/);
|
|
45
|
+
if (px) {
|
|
46
|
+
const ptValue = Number(px[1]) * 72 / 96;
|
|
47
|
+
return Math.max(1, Math.round(ptValue * 2));
|
|
48
|
+
}
|
|
49
|
+
return void 0;
|
|
50
|
+
}
|
|
51
|
+
function normalizeFontFamily(value) {
|
|
52
|
+
if (!value) return void 0;
|
|
53
|
+
const first = value.split(",")[0]?.trim();
|
|
54
|
+
if (!first) return void 0;
|
|
55
|
+
return first.replace(/^["']|["']$/g, "");
|
|
56
|
+
}
|
|
57
|
+
function mergeTextStyle(base, patch) {
|
|
58
|
+
return {
|
|
59
|
+
bold: patch.bold ?? base.bold,
|
|
60
|
+
italic: patch.italic ?? base.italic,
|
|
61
|
+
underline: patch.underline ?? base.underline,
|
|
62
|
+
colorHex: patch.colorHex ?? base.colorHex,
|
|
63
|
+
fontFamily: patch.fontFamily ?? base.fontFamily,
|
|
64
|
+
fontSizeHalfPoints: patch.fontSizeHalfPoints ?? base.fontSizeHalfPoints
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
function styleFromElement(node) {
|
|
68
|
+
const tag = node.name?.toLowerCase();
|
|
69
|
+
const styleAttr = node.attribs?.style;
|
|
70
|
+
const css = parseStyleAttribute(styleAttr);
|
|
71
|
+
const boldFromCss = (() => {
|
|
72
|
+
const v = css["font-weight"]?.trim().toLowerCase();
|
|
73
|
+
if (!v) return void 0;
|
|
74
|
+
if (v === "bold" || v === "bolder") return true;
|
|
75
|
+
const n = Number(v);
|
|
76
|
+
if (!Number.isNaN(n)) return n >= 600;
|
|
77
|
+
return void 0;
|
|
78
|
+
})();
|
|
79
|
+
const italicFromCss = (() => {
|
|
80
|
+
const v = css["font-style"]?.trim().toLowerCase();
|
|
81
|
+
if (!v) return void 0;
|
|
82
|
+
if (v === "italic" || v === "oblique") return true;
|
|
83
|
+
return void 0;
|
|
84
|
+
})();
|
|
85
|
+
const underlineFromCss = (() => {
|
|
86
|
+
const v = css["text-decoration"]?.trim().toLowerCase();
|
|
87
|
+
if (!v) return void 0;
|
|
88
|
+
return v.includes("underline");
|
|
89
|
+
})();
|
|
90
|
+
const tagBold = tag === "b" || tag === "strong" ? true : void 0;
|
|
91
|
+
const tagItalic = tag === "i" || tag === "em" ? true : void 0;
|
|
92
|
+
const tagUnderline = tag === "u" ? true : void 0;
|
|
93
|
+
return {
|
|
94
|
+
bold: tagBold ?? boldFromCss,
|
|
95
|
+
italic: tagItalic ?? italicFromCss,
|
|
96
|
+
underline: tagUnderline ?? underlineFromCss,
|
|
97
|
+
colorHex: parseCssColorToHex(css.color),
|
|
98
|
+
fontFamily: normalizeFontFamily(css["font-family"]),
|
|
99
|
+
fontSizeHalfPoints: parseFontSizeToHalfPoints(css["font-size"])
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
function getTextContent(node) {
|
|
103
|
+
if (node.type === "text") return node.data ?? "";
|
|
104
|
+
let out = "";
|
|
105
|
+
const children = node.children ?? [];
|
|
106
|
+
for (const c of children) out += getTextContent(c);
|
|
107
|
+
return out;
|
|
108
|
+
}
|
|
109
|
+
function collectInlineRuns(node, inherited, out) {
|
|
110
|
+
if (node.type === "text") {
|
|
111
|
+
const text = node.data ?? "";
|
|
112
|
+
if (text) out.push({ kind: "text", text, style: inherited });
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
if (node.type === "tag") {
|
|
116
|
+
const tag = node.name?.toLowerCase();
|
|
117
|
+
if (tag === "br") {
|
|
118
|
+
out.push({ kind: "br" });
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
const next = mergeTextStyle(inherited, styleFromElement(node));
|
|
122
|
+
const children2 = node.children ?? [];
|
|
123
|
+
for (const c of children2) collectInlineRuns(c, next, out);
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
const children = node.children ?? [];
|
|
127
|
+
for (const c of children) collectInlineRuns(c, inherited, out);
|
|
128
|
+
}
|
|
129
|
+
function buildRunXml(style, text) {
|
|
130
|
+
const rPrParts = [];
|
|
131
|
+
if (style.bold) rPrParts.push("<w:b/>");
|
|
132
|
+
if (style.italic) rPrParts.push("<w:i/>");
|
|
133
|
+
if (style.underline) rPrParts.push('<w:u w:val="single"/>');
|
|
134
|
+
if (style.colorHex) rPrParts.push(`<w:color w:val="${style.colorHex}"/>`);
|
|
135
|
+
if (style.fontFamily) {
|
|
136
|
+
const ff = escapeXmlText(style.fontFamily);
|
|
137
|
+
rPrParts.push(`<w:rFonts w:ascii="${ff}" w:hAnsi="${ff}" w:eastAsia="${ff}"/>`);
|
|
138
|
+
}
|
|
139
|
+
if (typeof style.fontSizeHalfPoints === "number") {
|
|
140
|
+
const sz = style.fontSizeHalfPoints;
|
|
141
|
+
rPrParts.push(`<w:sz w:val="${sz}"/><w:szCs w:val="${sz}"/>`);
|
|
142
|
+
}
|
|
143
|
+
const rPrXml = rPrParts.length ? `<w:rPr>${rPrParts.join("")}</w:rPr>` : "";
|
|
144
|
+
const escaped = escapeXmlText(text);
|
|
145
|
+
const preserve = shouldPreserveSpace(text) ? ' xml:space="preserve"' : "";
|
|
146
|
+
return `<w:r>${rPrXml}<w:t${preserve}>${escaped}</w:t></w:r>`;
|
|
147
|
+
}
|
|
148
|
+
function hasClass(node, className) {
|
|
149
|
+
const cls = node.attribs?.class;
|
|
150
|
+
if (!cls) return false;
|
|
151
|
+
return cls.split(/\s+/).includes(className);
|
|
152
|
+
}
|
|
153
|
+
function isSkippableSubtree(node) {
|
|
154
|
+
if (node.type !== "tag") return false;
|
|
155
|
+
const tag = node.name?.toLowerCase();
|
|
156
|
+
if (tag === "button" || tag === "canvas") return true;
|
|
157
|
+
if (tag === "img" && hasClass(node, "ProseMirror-separator")) return true;
|
|
158
|
+
if (node.attribs?.id === "pages") return true;
|
|
159
|
+
if (hasClass(node, "ProseMirror-widget")) return true;
|
|
160
|
+
return false;
|
|
161
|
+
}
|
|
162
|
+
function parseCssLengthToTwips(value, baseFontHalfPoints) {
|
|
163
|
+
if (!value) return void 0;
|
|
164
|
+
const v = value.trim().toLowerCase();
|
|
165
|
+
if (!v) return void 0;
|
|
166
|
+
const pt = v.match(/^(-?\d+(?:\.\d+)?)pt$/);
|
|
167
|
+
if (pt) return Math.round(Number(pt[1]) * 20);
|
|
168
|
+
const px = v.match(/^(-?\d+(?:\.\d+)?)px$/);
|
|
169
|
+
if (px) return Math.round(Number(px[1]) * 72 * 20 / 96);
|
|
170
|
+
const em = v.match(/^(-?\d+(?:\.\d+)?)em$/);
|
|
171
|
+
if (em) {
|
|
172
|
+
const basePt = baseFontHalfPoints / 2;
|
|
173
|
+
return Math.round(Number(em[1]) * basePt * 20);
|
|
174
|
+
}
|
|
175
|
+
const num = v.match(/^(-?\d+(?:\.\d+)?)$/);
|
|
176
|
+
if (num) return Math.round(Number(num[1]));
|
|
177
|
+
return void 0;
|
|
178
|
+
}
|
|
179
|
+
function inferFirstFontSizeHalfPoints(node) {
|
|
180
|
+
const stack = [node];
|
|
181
|
+
while (stack.length) {
|
|
182
|
+
const cur = stack.pop();
|
|
183
|
+
if (cur.type === "tag") {
|
|
184
|
+
const css = parseStyleAttribute(cur.attribs?.style);
|
|
185
|
+
const sz = parseFontSizeToHalfPoints(css["font-size"]);
|
|
186
|
+
if (typeof sz === "number") return sz;
|
|
187
|
+
}
|
|
188
|
+
const children = cur.children ?? [];
|
|
189
|
+
for (let i = children.length - 1; i >= 0; i--) {
|
|
190
|
+
stack.push(children[i]);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
return void 0;
|
|
194
|
+
}
|
|
195
|
+
function buildParagraphPrXml(node, baseFontHalfPoints, extraInd) {
|
|
196
|
+
const css = parseStyleAttribute(node.attribs?.style);
|
|
197
|
+
const parts = [];
|
|
198
|
+
const align = css["text-align"]?.trim().toLowerCase();
|
|
199
|
+
const jcVal = align === "center" ? "center" : align === "right" ? "right" : align === "justify" ? "both" : void 0;
|
|
200
|
+
if (jcVal) parts.push(`<w:jc w:val="${jcVal}"/>`);
|
|
201
|
+
const left = (() => {
|
|
202
|
+
const marginLeft = parseCssLengthToTwips(css["margin-left"], baseFontHalfPoints);
|
|
203
|
+
const paddingLeft = parseCssLengthToTwips(css["padding-left"], baseFontHalfPoints);
|
|
204
|
+
const sum = (marginLeft ?? 0) + (paddingLeft ?? 0);
|
|
205
|
+
if (!sum) return void 0;
|
|
206
|
+
return Math.max(0, sum);
|
|
207
|
+
})();
|
|
208
|
+
const firstLine = (() => {
|
|
209
|
+
const textIndent = parseCssLengthToTwips(css["text-indent"], baseFontHalfPoints);
|
|
210
|
+
if (typeof textIndent !== "number" || !textIndent) return void 0;
|
|
211
|
+
return Math.max(0, textIndent);
|
|
212
|
+
})();
|
|
213
|
+
const indAttrs = [];
|
|
214
|
+
const leftTwips = extraInd?.leftTwips ?? left;
|
|
215
|
+
if (typeof leftTwips === "number") indAttrs.push(`w:left="${leftTwips}"`);
|
|
216
|
+
const hangingTwips = extraInd?.hangingTwips;
|
|
217
|
+
if (typeof hangingTwips === "number") indAttrs.push(`w:hanging="${hangingTwips}"`);
|
|
218
|
+
if (typeof firstLine === "number") indAttrs.push(`w:firstLine="${firstLine}"`);
|
|
219
|
+
if (indAttrs.length) parts.push(`<w:ind ${indAttrs.join(" ")}/>`);
|
|
220
|
+
const before = parseCssLengthToTwips(css["margin-top"], baseFontHalfPoints);
|
|
221
|
+
const after = parseCssLengthToTwips(css["margin-bottom"], baseFontHalfPoints);
|
|
222
|
+
const lineHeight = (() => {
|
|
223
|
+
const lh = css["line-height"]?.trim().toLowerCase();
|
|
224
|
+
if (!lh || lh === "normal") return void 0;
|
|
225
|
+
const unitless = lh.match(/^(\d+(?:\.\d+)?)$/);
|
|
226
|
+
if (unitless) {
|
|
227
|
+
const multiplier = Number(unitless[1]);
|
|
228
|
+
if (!Number.isFinite(multiplier) || multiplier <= 0) return void 0;
|
|
229
|
+
const basePt = baseFontHalfPoints / 2;
|
|
230
|
+
return Math.round(basePt * multiplier * 20);
|
|
231
|
+
}
|
|
232
|
+
const twips = parseCssLengthToTwips(lh, baseFontHalfPoints);
|
|
233
|
+
if (typeof twips !== "number") return void 0;
|
|
234
|
+
return Math.max(1, twips);
|
|
235
|
+
})();
|
|
236
|
+
if (typeof before === "number" || typeof after === "number" || typeof lineHeight === "number") {
|
|
237
|
+
const attrs = [];
|
|
238
|
+
if (typeof before === "number") attrs.push(`w:before="${Math.max(0, before)}"`);
|
|
239
|
+
if (typeof after === "number") attrs.push(`w:after="${Math.max(0, after)}"`);
|
|
240
|
+
if (typeof lineHeight === "number") {
|
|
241
|
+
attrs.push(`w:line="${lineHeight}"`, 'w:lineRule="exact"');
|
|
242
|
+
}
|
|
243
|
+
parts.push(`<w:spacing ${attrs.join(" ")}/>`);
|
|
244
|
+
}
|
|
245
|
+
if (!parts.length) return "";
|
|
246
|
+
return `<w:pPr>${parts.join("")}</w:pPr>`;
|
|
247
|
+
}
|
|
248
|
+
function buildParagraphXmlFromContainer(node, baseStyle, extraInd) {
|
|
249
|
+
const baseFontHalfPoints = baseStyle.fontSizeHalfPoints ?? inferFirstFontSizeHalfPoints(node) ?? 28;
|
|
250
|
+
const pPrXml = buildParagraphPrXml(node, baseFontHalfPoints, extraInd);
|
|
251
|
+
const runs = [];
|
|
252
|
+
for (const c of node.children ?? []) collectInlineRuns(c, baseStyle, runs);
|
|
253
|
+
const rXml = [];
|
|
254
|
+
for (const token of runs) {
|
|
255
|
+
if (token.kind === "br") {
|
|
256
|
+
rXml.push("<w:r><w:br/></w:r>");
|
|
257
|
+
continue;
|
|
258
|
+
}
|
|
259
|
+
const text = token.text;
|
|
260
|
+
if (!text) continue;
|
|
261
|
+
if (!text.trim()) continue;
|
|
262
|
+
rXml.push(buildRunXml(token.style, text));
|
|
263
|
+
}
|
|
264
|
+
if (!rXml.length) return "";
|
|
265
|
+
return `<w:p>${pPrXml}${rXml.join("")}</w:p>`;
|
|
266
|
+
}
|
|
267
|
+
var PAGE_BREAK_XML = '<w:p><w:r><w:br w:type="page"/></w:r></w:p>';
|
|
268
|
+
function isExplicitPageBreak(node) {
|
|
269
|
+
if (node.type !== "tag") return false;
|
|
270
|
+
const tag = node.name?.toLowerCase();
|
|
271
|
+
const css = parseStyleAttribute(node.attribs?.style);
|
|
272
|
+
const cls = node.attribs?.class ?? "";
|
|
273
|
+
const classList = cls ? cls.split(/\s+/) : [];
|
|
274
|
+
if (tag === "hr" && classList.includes("page-break")) return true;
|
|
275
|
+
if (classList.includes("page-break")) return true;
|
|
276
|
+
if (node.attribs?.["data-page-break"] === "true") return true;
|
|
277
|
+
const after = css["page-break-after"]?.toLowerCase() ?? css["break-after"]?.toLowerCase();
|
|
278
|
+
const before = css["page-break-before"]?.toLowerCase() ?? css["break-before"]?.toLowerCase();
|
|
279
|
+
if (after?.includes("always") || before?.includes("always")) return true;
|
|
280
|
+
return false;
|
|
281
|
+
}
|
|
282
|
+
function buildHeadingBaseStyle(level) {
|
|
283
|
+
const size = level === 1 ? 44 : level === 2 ? 32 : level === 3 ? 28 : level === 4 ? 24 : 22;
|
|
284
|
+
return { bold: true, fontSizeHalfPoints: size };
|
|
285
|
+
}
|
|
286
|
+
function buildListBlocks(listNode, ordered) {
|
|
287
|
+
const items = [];
|
|
288
|
+
const stack = [...listNode.children ?? []];
|
|
289
|
+
while (stack.length) {
|
|
290
|
+
const n = stack.shift();
|
|
291
|
+
if (n.type === "tag" && n.name?.toLowerCase() === "li") items.push(n);
|
|
292
|
+
}
|
|
293
|
+
const out = [];
|
|
294
|
+
for (let i = 0; i < items.length; i++) {
|
|
295
|
+
const prefix = ordered ? `${i + 1}. ` : "\u2022 ";
|
|
296
|
+
const li = items[i];
|
|
297
|
+
const baseStyle = {};
|
|
298
|
+
const runs = [];
|
|
299
|
+
runs.push({ kind: "text", text: prefix, style: baseStyle });
|
|
300
|
+
for (const c of li.children ?? []) collectInlineRuns(c, baseStyle, runs);
|
|
301
|
+
const rXml = [];
|
|
302
|
+
for (const token of runs) {
|
|
303
|
+
if (token.kind === "br") {
|
|
304
|
+
rXml.push("<w:r><w:br/></w:r>");
|
|
305
|
+
continue;
|
|
306
|
+
}
|
|
307
|
+
const text = token.text;
|
|
308
|
+
if (!text) continue;
|
|
309
|
+
if (!text.trim()) continue;
|
|
310
|
+
rXml.push(buildRunXml(token.style, text));
|
|
311
|
+
}
|
|
312
|
+
if (!rXml.length) continue;
|
|
313
|
+
const pPrXml = buildParagraphPrXml(li, inferFirstFontSizeHalfPoints(li) ?? 28, {
|
|
314
|
+
leftTwips: 720,
|
|
315
|
+
hangingTwips: 360
|
|
316
|
+
});
|
|
317
|
+
out.push(`<w:p>${pPrXml}${rXml.join("")}</w:p>`);
|
|
318
|
+
}
|
|
319
|
+
return out;
|
|
320
|
+
}
|
|
321
|
+
function buildTableXml(tableNode) {
|
|
322
|
+
const rows = [];
|
|
323
|
+
const stack = [...tableNode.children ?? []];
|
|
324
|
+
while (stack.length) {
|
|
325
|
+
const n = stack.shift();
|
|
326
|
+
if (n.type === "tag" && n.name?.toLowerCase() === "tr") rows.push(n);
|
|
327
|
+
if (n.children?.length) stack.unshift(...n.children);
|
|
328
|
+
}
|
|
329
|
+
const rowXml = [];
|
|
330
|
+
for (const tr of rows) {
|
|
331
|
+
const cells = (tr.children ?? []).filter(
|
|
332
|
+
(c) => c.type === "tag" && (c.name === "td" || c.name === "th")
|
|
333
|
+
);
|
|
334
|
+
const cellXml = [];
|
|
335
|
+
for (const cell of cells) {
|
|
336
|
+
const isHeader = cell.name === "th";
|
|
337
|
+
const baseStyle = isHeader ? { bold: true } : {};
|
|
338
|
+
const pXml = buildParagraphXmlFromContainer(cell, baseStyle);
|
|
339
|
+
const paragraphs = pXml ? pXml : "<w:p/>";
|
|
340
|
+
cellXml.push(
|
|
341
|
+
`<w:tc><w:tcPr><w:tcW w:w="0" w:type="auto"/></w:tcPr>${paragraphs}</w:tc>`
|
|
342
|
+
);
|
|
343
|
+
}
|
|
344
|
+
if (cellXml.length) rowXml.push(`<w:tr>${cellXml.join("")}</w:tr>`);
|
|
345
|
+
}
|
|
346
|
+
const tblPr = `<w:tblPr><w:tblW w:w="0" w:type="auto"/><w:tblBorders><w:top w:val="single" w:sz="4" w:space="0" w:color="D9D9D9"/><w:left w:val="single" w:sz="4" w:space="0" w:color="D9D9D9"/><w:bottom w:val="single" w:sz="4" w:space="0" w:color="D9D9D9"/><w:right w:val="single" w:sz="4" w:space="0" w:color="D9D9D9"/><w:insideH w:val="single" w:sz="4" w:space="0" w:color="D9D9D9"/><w:insideV w:val="single" w:sz="4" w:space="0" w:color="D9D9D9"/></w:tblBorders></w:tblPr>`;
|
|
347
|
+
const tblGrid = `<w:tblGrid/>`;
|
|
348
|
+
return `<w:tbl>${tblPr}${tblGrid}${rowXml.join("")}</w:tbl>`;
|
|
349
|
+
}
|
|
350
|
+
function collectBodyBlocks(node, out) {
|
|
351
|
+
if (isSkippableSubtree(node)) return;
|
|
352
|
+
if (node.type === "tag") {
|
|
353
|
+
const tag = node.name?.toLowerCase();
|
|
354
|
+
if (isExplicitPageBreak(node)) {
|
|
355
|
+
out.push(PAGE_BREAK_XML);
|
|
356
|
+
return;
|
|
357
|
+
}
|
|
358
|
+
if (tag === "p") {
|
|
359
|
+
const pXml = buildParagraphXmlFromContainer(node, {});
|
|
360
|
+
if (pXml) out.push(pXml);
|
|
361
|
+
return;
|
|
362
|
+
}
|
|
363
|
+
if (tag && /^h[1-6]$/.test(tag)) {
|
|
364
|
+
const level = Number(tag.slice(1));
|
|
365
|
+
const hXml = buildParagraphXmlFromContainer(node, buildHeadingBaseStyle(level));
|
|
366
|
+
if (hXml) out.push(hXml);
|
|
367
|
+
return;
|
|
368
|
+
}
|
|
369
|
+
if (tag === "table") {
|
|
370
|
+
const tblXml = buildTableXml(node);
|
|
371
|
+
if (tblXml) out.push(tblXml);
|
|
372
|
+
return;
|
|
373
|
+
}
|
|
374
|
+
if (tag === "ul" || tag === "ol") {
|
|
375
|
+
out.push(...buildListBlocks(node, tag === "ol"));
|
|
376
|
+
return;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
for (const c of node.children ?? []) collectBodyBlocks(c, out);
|
|
380
|
+
}
|
|
381
|
+
function textToWordBodyXml(text) {
|
|
382
|
+
const normalized = text.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
|
|
383
|
+
if (!normalized.trim()) {
|
|
384
|
+
throw new Error("Text is empty.");
|
|
385
|
+
}
|
|
386
|
+
const lines = normalized.split("\n");
|
|
387
|
+
const out = [];
|
|
388
|
+
for (const line of lines) {
|
|
389
|
+
if (!line) {
|
|
390
|
+
out.push("<w:p/>");
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
out.push(`<w:p>${buildRunXml({}, line)}</w:p>`);
|
|
394
|
+
}
|
|
395
|
+
return out.join("");
|
|
396
|
+
}
|
|
397
|
+
function htmlToWordBodyXml(html) {
|
|
398
|
+
const normalized = html.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
|
|
399
|
+
const doc = parseDocument(normalized, {
|
|
400
|
+
lowerCaseAttributeNames: true,
|
|
401
|
+
lowerCaseTags: true,
|
|
402
|
+
recognizeSelfClosing: true
|
|
403
|
+
});
|
|
404
|
+
const out = [];
|
|
405
|
+
collectBodyBlocks(doc, out);
|
|
406
|
+
if (!out.length) {
|
|
407
|
+
const text = getTextContent(doc);
|
|
408
|
+
return textToWordBodyXml(text);
|
|
409
|
+
}
|
|
410
|
+
return out.join("");
|
|
411
|
+
}
|
|
412
|
+
export {
|
|
413
|
+
htmlToWordBodyXml,
|
|
414
|
+
textToWordBodyXml
|
|
415
|
+
};
|
|
416
|
+
//# sourceMappingURL=htmlToWordBodyXml-RFBPSL2Q.js.map
|