@coding01/docsjs 0.1.2 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +148 -91
- package/README.zh-CN.md +204 -0
- package/dist/chunk-IBVWD4UO.js +2193 -0
- package/dist/chunk-IBVWD4UO.js.map +1 -0
- package/dist/index.cjs +831 -141
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +44 -20
- package/dist/index.d.ts +44 -20
- package/dist/index.js +59 -1553
- package/dist/index.js.map +1 -1
- package/dist/react.cjs +2264 -0
- package/dist/react.cjs.map +1 -0
- package/dist/react.d.cts +16 -0
- package/dist/react.d.ts +16 -0
- package/dist/react.js +41 -0
- package/dist/react.js.map +1 -0
- package/dist/types-DF14w1ol.d.cts +20 -0
- package/dist/types-DF14w1ol.d.ts +20 -0
- package/dist/vue.cjs +2267 -0
- package/dist/vue.cjs.map +1 -0
- package/dist/vue.d.cts +24 -0
- package/dist/vue.d.ts +24 -0
- package/dist/vue.js +44 -0
- package/dist/vue.js.map +1 -0
- package/package.json +30 -2
package/dist/index.cjs
CHANGED
|
@@ -31,8 +31,9 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
31
31
|
var index_exports = {};
|
|
32
32
|
__export(index_exports, {
|
|
33
33
|
DocsWordElement: () => DocsWordElement,
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
calculateFidelityScore: () => calculateFidelityScore,
|
|
35
|
+
collectSemanticStatsFromDocument: () => collectSemanticStatsFromDocument,
|
|
36
|
+
collectSemanticStatsFromHtml: () => collectSemanticStatsFromHtml,
|
|
36
37
|
defineDocsWordElement: () => defineDocsWordElement
|
|
37
38
|
});
|
|
38
39
|
module.exports = __toCommonJS(index_exports);
|
|
@@ -69,6 +70,9 @@ function queryAllByLocalName(root, localName) {
|
|
|
69
70
|
function queryByLocalName(root, localName) {
|
|
70
71
|
return queryAllByLocalName(root, localName)[0] ?? null;
|
|
71
72
|
}
|
|
73
|
+
function directChildrenByLocalName(node, localName) {
|
|
74
|
+
return Array.from(node.children).filter((child) => child.localName === localName);
|
|
75
|
+
}
|
|
72
76
|
function getAttr(node, name) {
|
|
73
77
|
if (!node) return null;
|
|
74
78
|
return node.getAttribute(name);
|
|
@@ -76,6 +80,9 @@ function getAttr(node, name) {
|
|
|
76
80
|
function emuToPx(emu) {
|
|
77
81
|
return emu * 96 / 914400;
|
|
78
82
|
}
|
|
83
|
+
function twipToPx(twip) {
|
|
84
|
+
return twip * 96 / 1440;
|
|
85
|
+
}
|
|
79
86
|
function parseDrawingSizePx(drawing) {
|
|
80
87
|
const extentNode = queryAllByLocalName(drawing, "extent").find((node) => {
|
|
81
88
|
const parent = node.parentElement;
|
|
@@ -104,6 +111,97 @@ function imageDimensionAttributes(sizePx) {
|
|
|
104
111
|
}
|
|
105
112
|
return attrs.length > 0 ? ` ${attrs.join(" ")}` : "";
|
|
106
113
|
}
|
|
114
|
+
function parseAnchorPositionPx(anchor) {
|
|
115
|
+
let leftPx = null;
|
|
116
|
+
let topPx = null;
|
|
117
|
+
const positionH = directChildrenByLocalName(anchor, "positionH")[0] ?? null;
|
|
118
|
+
const positionV = directChildrenByLocalName(anchor, "positionV")[0] ?? null;
|
|
119
|
+
const posH = positionH ? directChildrenByLocalName(positionH, "posOffset")[0] ?? null : null;
|
|
120
|
+
const posV = positionV ? directChildrenByLocalName(positionV, "posOffset")[0] ?? null : null;
|
|
121
|
+
const rawLeft = posH?.textContent?.trim() ?? "";
|
|
122
|
+
const rawTop = posV?.textContent?.trim() ?? "";
|
|
123
|
+
const left = rawLeft ? Number.parseFloat(rawLeft) : Number.NaN;
|
|
124
|
+
const top = rawTop ? Number.parseFloat(rawTop) : Number.NaN;
|
|
125
|
+
if (Number.isFinite(left)) leftPx = emuToPx(left);
|
|
126
|
+
if (Number.isFinite(top)) topPx = emuToPx(top);
|
|
127
|
+
return { leftPx, topPx };
|
|
128
|
+
}
|
|
129
|
+
function parseAnchorWrapMode(anchor) {
|
|
130
|
+
if (directChildrenByLocalName(anchor, "wrapSquare")[0]) return "square";
|
|
131
|
+
if (directChildrenByLocalName(anchor, "wrapTight")[0]) return "tight";
|
|
132
|
+
if (directChildrenByLocalName(anchor, "wrapTopAndBottom")[0]) return "topAndBottom";
|
|
133
|
+
if (directChildrenByLocalName(anchor, "wrapNone")[0]) return "none";
|
|
134
|
+
return null;
|
|
135
|
+
}
|
|
136
|
+
function parseAnchorMeta(drawing) {
|
|
137
|
+
const anchor = directChildrenByLocalName(drawing, "anchor")[0] ?? null;
|
|
138
|
+
if (!anchor) return null;
|
|
139
|
+
const positionH = directChildrenByLocalName(anchor, "positionH")[0] ?? null;
|
|
140
|
+
const positionV = directChildrenByLocalName(anchor, "positionV")[0] ?? null;
|
|
141
|
+
const relativeFromH = getAttr(positionH, "relativeFrom");
|
|
142
|
+
const relativeFromV = getAttr(positionV, "relativeFrom");
|
|
143
|
+
const parseDistPx = (name) => {
|
|
144
|
+
const raw = getAttr(anchor, name);
|
|
145
|
+
const emu = raw ? Number.parseInt(raw, 10) : Number.NaN;
|
|
146
|
+
return Number.isFinite(emu) && emu >= 0 ? emuToPx(emu) : null;
|
|
147
|
+
};
|
|
148
|
+
const rawHeight = getAttr(anchor, "relativeHeight");
|
|
149
|
+
const parsedHeight = rawHeight ? Number.parseInt(rawHeight, 10) : Number.NaN;
|
|
150
|
+
const boolAttr = (name, fallback) => {
|
|
151
|
+
const raw = (getAttr(anchor, name) ?? "").toLowerCase();
|
|
152
|
+
if (raw === "1" || raw === "true" || raw === "on") return true;
|
|
153
|
+
if (raw === "0" || raw === "false" || raw === "off") return false;
|
|
154
|
+
return fallback;
|
|
155
|
+
};
|
|
156
|
+
return {
|
|
157
|
+
position: parseAnchorPositionPx(anchor),
|
|
158
|
+
wrapMode: parseAnchorWrapMode(anchor),
|
|
159
|
+
distTPx: parseDistPx("distT"),
|
|
160
|
+
distBPx: parseDistPx("distB"),
|
|
161
|
+
distLPx: parseDistPx("distL"),
|
|
162
|
+
distRPx: parseDistPx("distR"),
|
|
163
|
+
relativeFromH,
|
|
164
|
+
relativeFromV,
|
|
165
|
+
behindDoc: boolAttr("behindDoc", false),
|
|
166
|
+
allowOverlap: boolAttr("allowOverlap", true),
|
|
167
|
+
layoutInCell: boolAttr("layoutInCell", true),
|
|
168
|
+
relativeHeight: Number.isFinite(parsedHeight) ? parsedHeight : null
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
function mergeImageStyle(baseAttrs, anchorMeta) {
|
|
172
|
+
if (!anchorMeta) return baseAttrs;
|
|
173
|
+
const { position, wrapMode } = anchorMeta;
|
|
174
|
+
if (position.leftPx === null && position.topPx === null) return baseAttrs;
|
|
175
|
+
const styleParts = [
|
|
176
|
+
"position:absolute",
|
|
177
|
+
position.leftPx !== null ? `left:${position.leftPx.toFixed(2)}px` : "",
|
|
178
|
+
position.topPx !== null ? `top:${position.topPx.toFixed(2)}px` : "",
|
|
179
|
+
`z-index:${anchorMeta.behindDoc ? 0 : anchorMeta.relativeHeight ?? 3}`,
|
|
180
|
+
anchorMeta.distTPx !== null ? `margin-top:${anchorMeta.distTPx.toFixed(2)}px` : "",
|
|
181
|
+
anchorMeta.distBPx !== null ? `margin-bottom:${anchorMeta.distBPx.toFixed(2)}px` : "",
|
|
182
|
+
anchorMeta.distLPx !== null ? `margin-left:${anchorMeta.distLPx.toFixed(2)}px` : "",
|
|
183
|
+
anchorMeta.distRPx !== null ? `margin-right:${anchorMeta.distRPx.toFixed(2)}px` : ""
|
|
184
|
+
].filter((x) => x.length > 0);
|
|
185
|
+
if (wrapMode === "topAndBottom") {
|
|
186
|
+
styleParts.push("display:block", "clear:both");
|
|
187
|
+
}
|
|
188
|
+
const anchorAttrs = [
|
|
189
|
+
`data-word-anchor="1"`,
|
|
190
|
+
wrapMode ? `data-word-wrap="${wrapMode}"` : "",
|
|
191
|
+
anchorMeta.relativeFromH ? `data-word-anchor-relh="${escapeHtml(anchorMeta.relativeFromH)}"` : "",
|
|
192
|
+
anchorMeta.relativeFromV ? `data-word-anchor-relv="${escapeHtml(anchorMeta.relativeFromV)}"` : "",
|
|
193
|
+
anchorMeta.behindDoc ? `data-word-anchor-behind="1"` : `data-word-anchor-behind="0"`,
|
|
194
|
+
anchorMeta.allowOverlap ? `data-word-anchor-overlap="1"` : `data-word-anchor-overlap="0"`,
|
|
195
|
+
anchorMeta.layoutInCell ? `data-word-anchor-layout-cell="1"` : `data-word-anchor-layout-cell="0"`
|
|
196
|
+
].filter((x) => x.length > 0).join(" ");
|
|
197
|
+
if (!baseAttrs.includes("style=")) {
|
|
198
|
+
return `${baseAttrs} style="${styleParts.join(";")}" ${anchorAttrs}`;
|
|
199
|
+
}
|
|
200
|
+
return baseAttrs.replace(/style="([^"]*)"/, (_m, styleText) => {
|
|
201
|
+
const merged = [styleText, ...styleParts].filter((x) => x.length > 0).join(";");
|
|
202
|
+
return `style="${merged}" ${anchorAttrs}`;
|
|
203
|
+
});
|
|
204
|
+
}
|
|
107
205
|
function parseDocRelsMap(relsXmlText) {
|
|
108
206
|
if (!relsXmlText) return {};
|
|
109
207
|
const rels = parseXml(relsXmlText);
|
|
@@ -127,11 +225,16 @@ function extToMime(ext) {
|
|
|
127
225
|
if (lower === "svg") return "image/svg+xml";
|
|
128
226
|
return "application/octet-stream";
|
|
129
227
|
}
|
|
228
|
+
function normalizeWordPath(relTarget) {
|
|
229
|
+
const normalized = relTarget.replace(/\\/g, "/").replace(/^\/+/, "");
|
|
230
|
+
if (normalized.startsWith("word/")) return normalized;
|
|
231
|
+
if (normalized.startsWith("../")) return `word/${normalized.replace(/^(\.\.\/)+/, "")}`;
|
|
232
|
+
return `word/${normalized}`;
|
|
233
|
+
}
|
|
130
234
|
async function imageRidToDataUrl(zip, relMap, rid) {
|
|
131
235
|
const relTarget = relMap[rid];
|
|
132
236
|
if (!relTarget) return null;
|
|
133
|
-
const
|
|
134
|
-
const path = normalized.startsWith("word/") ? normalized : `word/${normalized}`;
|
|
237
|
+
const path = normalizeWordPath(relTarget);
|
|
135
238
|
const file = zip.file(path);
|
|
136
239
|
if (!file) return null;
|
|
137
240
|
const base64 = await file.async("base64");
|
|
@@ -139,6 +242,55 @@ async function imageRidToDataUrl(zip, relMap, rid) {
|
|
|
139
242
|
const mime = extToMime(ext);
|
|
140
243
|
return `data:${mime};base64,${base64}`;
|
|
141
244
|
}
|
|
245
|
+
async function readXmlByRid(zip, relMap, rid) {
|
|
246
|
+
const relTarget = relMap[rid];
|
|
247
|
+
if (!relTarget) return null;
|
|
248
|
+
const path = normalizeWordPath(relTarget);
|
|
249
|
+
const file = zip.file(path);
|
|
250
|
+
return file ? file.async("string") : null;
|
|
251
|
+
}
|
|
252
|
+
function parseChartType(chartDoc) {
|
|
253
|
+
const known = ["barChart", "lineChart", "pieChart", "areaChart", "scatterChart", "radarChart", "doughnutChart"];
|
|
254
|
+
for (const type of known) {
|
|
255
|
+
if (queryByLocalName(chartDoc, type)) return type.replace(/Chart$/, "");
|
|
256
|
+
}
|
|
257
|
+
return "unknown";
|
|
258
|
+
}
|
|
259
|
+
function parseChartSummary(chartXmlText) {
|
|
260
|
+
const chartDoc = parseXml(chartXmlText);
|
|
261
|
+
const title = queryAllByLocalName(chartDoc, "t").map((n) => (n.textContent ?? "").trim()).find((v) => v.length > 0) ?? "Chart";
|
|
262
|
+
const seriesCount = queryAllByLocalName(chartDoc, "ser").length;
|
|
263
|
+
const pointCount = queryAllByLocalName(chartDoc, "pt").length;
|
|
264
|
+
const type = parseChartType(chartDoc);
|
|
265
|
+
return { title, type, seriesCount, pointCount };
|
|
266
|
+
}
|
|
267
|
+
function extractSmartArtText(diagramXmlText) {
|
|
268
|
+
const diagramDoc = parseXml(diagramXmlText);
|
|
269
|
+
return queryAllByLocalName(diagramDoc, "t").map((n) => (n.textContent ?? "").trim()).filter((v) => v.length > 0).slice(0, 12);
|
|
270
|
+
}
|
|
271
|
+
function ommlNodeToText(node) {
|
|
272
|
+
if (node.localName === "t") return node.textContent ?? "";
|
|
273
|
+
if (node.localName === "f") {
|
|
274
|
+
const num = queryByLocalName(node, "num");
|
|
275
|
+
const den = queryByLocalName(node, "den");
|
|
276
|
+
return `(${num ? ommlNodeToText(num) : "?"})/(${den ? ommlNodeToText(den) : "?"})`;
|
|
277
|
+
}
|
|
278
|
+
if (node.localName === "sSup") {
|
|
279
|
+
const e = queryByLocalName(node, "e");
|
|
280
|
+
const sup = queryByLocalName(node, "sup");
|
|
281
|
+
return `${e ? ommlNodeToText(e) : ""}^(${sup ? ommlNodeToText(sup) : ""})`;
|
|
282
|
+
}
|
|
283
|
+
if (node.localName === "sSub") {
|
|
284
|
+
const e = queryByLocalName(node, "e");
|
|
285
|
+
const sub = queryByLocalName(node, "sub");
|
|
286
|
+
return `${e ? ommlNodeToText(e) : ""}_(${sub ? ommlNodeToText(sub) : ""})`;
|
|
287
|
+
}
|
|
288
|
+
if (node.localName === "rad") {
|
|
289
|
+
const e = queryByLocalName(node, "e");
|
|
290
|
+
return `sqrt(${e ? ommlNodeToText(e) : ""})`;
|
|
291
|
+
}
|
|
292
|
+
return Array.from(node.children).map((child) => ommlNodeToText(child)).join("");
|
|
293
|
+
}
|
|
142
294
|
function runStyleToCss(rPr) {
|
|
143
295
|
if (!rPr) return "";
|
|
144
296
|
const declarations = [];
|
|
@@ -179,18 +331,145 @@ function paragraphAlignStyle(paragraph) {
|
|
|
179
331
|
function paragraphDataAttr(paragraphIndex) {
|
|
180
332
|
return paragraphIndex === null ? "" : ` data-word-p-index="${paragraphIndex}"`;
|
|
181
333
|
}
|
|
182
|
-
|
|
334
|
+
function parseFootnotesMap(footnotesXmlText) {
|
|
335
|
+
if (!footnotesXmlText) return {};
|
|
336
|
+
const footnotesDoc = parseXml(footnotesXmlText);
|
|
337
|
+
const map = {};
|
|
338
|
+
const footnotes = queryAllByLocalName(footnotesDoc, "footnote");
|
|
339
|
+
for (const footnote of footnotes) {
|
|
340
|
+
const idRaw = getAttr(footnote, "w:id") ?? getAttr(footnote, "id");
|
|
341
|
+
const idNum = idRaw ? Number.parseInt(idRaw, 10) : Number.NaN;
|
|
342
|
+
if (!Number.isFinite(idNum) || idNum <= 0) continue;
|
|
343
|
+
const paragraphs = queryAllByLocalName(footnote, "p");
|
|
344
|
+
const text = paragraphs.map((p) => paragraphText(p)).join("<br/>").trim();
|
|
345
|
+
if (!text) continue;
|
|
346
|
+
map[String(idNum)] = text;
|
|
347
|
+
}
|
|
348
|
+
return map;
|
|
349
|
+
}
|
|
350
|
+
function parseCommentsMap(commentsXmlText) {
|
|
351
|
+
if (!commentsXmlText) return {};
|
|
352
|
+
const commentsDoc = parseXml(commentsXmlText);
|
|
353
|
+
const map = {};
|
|
354
|
+
const comments = queryAllByLocalName(commentsDoc, "comment");
|
|
355
|
+
for (const comment of comments) {
|
|
356
|
+
const idRaw = getAttr(comment, "w:id") ?? getAttr(comment, "id");
|
|
357
|
+
if (!idRaw) continue;
|
|
358
|
+
const paragraphs = queryAllByLocalName(comment, "p");
|
|
359
|
+
const text = paragraphs.map((p) => paragraphText(p)).join("<br/>").trim();
|
|
360
|
+
if (!text) continue;
|
|
361
|
+
map[idRaw] = {
|
|
362
|
+
author: getAttr(comment, "w:author") ?? getAttr(comment, "author"),
|
|
363
|
+
date: getAttr(comment, "w:date") ?? getAttr(comment, "date"),
|
|
364
|
+
text
|
|
365
|
+
};
|
|
366
|
+
}
|
|
367
|
+
return map;
|
|
368
|
+
}
|
|
369
|
+
function parseEndnotesMap(endnotesXmlText) {
|
|
370
|
+
if (!endnotesXmlText) return {};
|
|
371
|
+
const endnotesDoc = parseXml(endnotesXmlText);
|
|
372
|
+
const map = {};
|
|
373
|
+
const endnotes = queryAllByLocalName(endnotesDoc, "endnote");
|
|
374
|
+
for (const endnote of endnotes) {
|
|
375
|
+
const idRaw = getAttr(endnote, "w:id") ?? getAttr(endnote, "id");
|
|
376
|
+
const idNum = idRaw ? Number.parseInt(idRaw, 10) : Number.NaN;
|
|
377
|
+
if (!Number.isFinite(idNum) || idNum <= 0) continue;
|
|
378
|
+
const paragraphs = queryAllByLocalName(endnote, "p");
|
|
379
|
+
const text = paragraphs.map((p) => paragraphText(p)).join("<br/>").trim();
|
|
380
|
+
if (!text) continue;
|
|
381
|
+
map[String(idNum)] = text;
|
|
382
|
+
}
|
|
383
|
+
return map;
|
|
384
|
+
}
|
|
385
|
+
function renderFootnotesSection(usedIds, footnotesMap) {
|
|
386
|
+
const uniq = [...new Set(usedIds)].filter((id) => footnotesMap[id]);
|
|
387
|
+
if (uniq.length === 0) return "";
|
|
388
|
+
const items = uniq.map((id) => `<li id="word-footnote-${id}" data-word-footnote-id="${id}">${footnotesMap[id]}</li>`).join("");
|
|
389
|
+
return `<section data-word-footnotes="1"><hr/><ol>${items}</ol></section>`;
|
|
390
|
+
}
|
|
391
|
+
function renderCommentsSection(usedIds, commentsMap) {
|
|
392
|
+
const uniq = [...new Set(usedIds)].filter((id) => commentsMap[id]);
|
|
393
|
+
if (uniq.length === 0) return "";
|
|
394
|
+
const items = uniq.map((id) => {
|
|
395
|
+
const item = commentsMap[id];
|
|
396
|
+
const meta = [item.author ?? "", item.date ?? ""].filter((x) => x.length > 0).join(" \xB7 ");
|
|
397
|
+
const metaHtml = meta ? `<div data-word-comment-meta="1">${escapeHtml(meta)}</div>` : "";
|
|
398
|
+
return `<li id="word-comment-${id}" data-word-comment-id="${id}">${metaHtml}<div>${item.text}</div></li>`;
|
|
399
|
+
}).join("");
|
|
400
|
+
return `<section data-word-comments="1"><hr/><ol>${items}</ol></section>`;
|
|
401
|
+
}
|
|
402
|
+
function renderEndnotesSection(usedIds, endnotesMap) {
|
|
403
|
+
const uniq = [...new Set(usedIds)].filter((id) => endnotesMap[id]);
|
|
404
|
+
if (uniq.length === 0) return "";
|
|
405
|
+
const items = uniq.map((id) => `<li id="word-endnote-${id}" data-word-endnote-id="${id}">${endnotesMap[id]}</li>`).join("");
|
|
406
|
+
return `<section data-word-endnotes="1"><hr/><ol>${items}</ol></section>`;
|
|
407
|
+
}
|
|
408
|
+
async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotesMap, usedFootnoteIds, endnotesMap, usedEndnoteIds, commentsMap, usedCommentIds) {
|
|
183
409
|
const tag = paragraphTag(paragraph);
|
|
184
410
|
const alignStyle = paragraphAlignStyle(paragraph);
|
|
185
411
|
const dataAttr = paragraphDataAttr(paragraphIndex);
|
|
186
|
-
const
|
|
187
|
-
if (
|
|
412
|
+
const hasRenderableNode = queryAllByLocalName(paragraph, "r").length > 0 || queryAllByLocalName(paragraph, "oMath").length > 0 || queryAllByLocalName(paragraph, "oMathPara").length > 0;
|
|
413
|
+
if (!hasRenderableNode) {
|
|
188
414
|
return `<${tag}${dataAttr}${alignStyle ? ` style="${alignStyle}"` : ""}><br/></${tag}>`;
|
|
189
415
|
}
|
|
190
|
-
|
|
191
|
-
|
|
416
|
+
function parseRevisionMeta(node, type) {
|
|
417
|
+
return {
|
|
418
|
+
type,
|
|
419
|
+
id: getAttr(node, "w:id") ?? getAttr(node, "id"),
|
|
420
|
+
author: getAttr(node, "w:author") ?? getAttr(node, "author"),
|
|
421
|
+
date: getAttr(node, "w:date") ?? getAttr(node, "date")
|
|
422
|
+
};
|
|
423
|
+
}
|
|
424
|
+
function inferRevisionMeta(run, fallback) {
|
|
425
|
+
if (fallback) return fallback;
|
|
426
|
+
let cursor = run;
|
|
427
|
+
while (cursor) {
|
|
428
|
+
if (cursor.localName === "ins") return parseRevisionMeta(cursor, "ins");
|
|
429
|
+
if (cursor.localName === "del") return parseRevisionMeta(cursor, "del");
|
|
430
|
+
if (cursor.localName === "p") break;
|
|
431
|
+
cursor = cursor.parentElement;
|
|
432
|
+
}
|
|
433
|
+
return null;
|
|
434
|
+
}
|
|
435
|
+
function revisionMetaAttrs(meta) {
|
|
436
|
+
const attrs = [`data-word-revision="${meta.type}"`];
|
|
437
|
+
if (meta.id) attrs.push(`data-word-revision-id="${escapeHtml(meta.id)}"`);
|
|
438
|
+
if (meta.author) attrs.push(`data-word-revision-author="${escapeHtml(meta.author)}"`);
|
|
439
|
+
if (meta.date) attrs.push(`data-word-revision-date="${escapeHtml(meta.date)}"`);
|
|
440
|
+
return attrs.join(" ");
|
|
441
|
+
}
|
|
442
|
+
async function runToHtml(run, revisionFallback) {
|
|
443
|
+
const result = [];
|
|
192
444
|
const rPr = queryByLocalName(run, "rPr");
|
|
193
445
|
const css = runStyleToCss(rPr);
|
|
446
|
+
const footnoteRef = queryByLocalName(run, "footnoteReference");
|
|
447
|
+
const footnoteId = getAttr(footnoteRef, "w:id") ?? getAttr(footnoteRef, "id");
|
|
448
|
+
if (footnoteId && footnotesMap[footnoteId]) {
|
|
449
|
+
usedFootnoteIds.push(footnoteId);
|
|
450
|
+
result.push(
|
|
451
|
+
`<sup data-word-footnote-ref="${footnoteId}"><a href="#word-footnote-${footnoteId}">[${footnoteId}]</a></sup>`
|
|
452
|
+
);
|
|
453
|
+
return result;
|
|
454
|
+
}
|
|
455
|
+
const endnoteRef = queryByLocalName(run, "endnoteReference");
|
|
456
|
+
const endnoteId = getAttr(endnoteRef, "w:id") ?? getAttr(endnoteRef, "id");
|
|
457
|
+
if (endnoteId && endnotesMap[endnoteId]) {
|
|
458
|
+
usedEndnoteIds.push(endnoteId);
|
|
459
|
+
result.push(
|
|
460
|
+
`<sup data-word-endnote-ref="${endnoteId}"><a href="#word-endnote-${endnoteId}">[${endnoteId}]</a></sup>`
|
|
461
|
+
);
|
|
462
|
+
return result;
|
|
463
|
+
}
|
|
464
|
+
const commentRef = queryByLocalName(run, "commentReference");
|
|
465
|
+
const commentId = getAttr(commentRef, "w:id") ?? getAttr(commentRef, "id");
|
|
466
|
+
if (commentId && commentsMap[commentId]) {
|
|
467
|
+
usedCommentIds.push(commentId);
|
|
468
|
+
result.push(
|
|
469
|
+
`<sup data-word-comment-ref="${commentId}"><a href="#word-comment-${commentId}">[c${commentId}]</a></sup>`
|
|
470
|
+
);
|
|
471
|
+
return result;
|
|
472
|
+
}
|
|
194
473
|
const drawing = queryByLocalName(run, "drawing");
|
|
195
474
|
if (drawing) {
|
|
196
475
|
const blip = queryByLocalName(drawing, "blip");
|
|
@@ -200,53 +479,318 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex) {
|
|
|
200
479
|
if (src) {
|
|
201
480
|
const imageSize = parseDrawingSizePx(drawing);
|
|
202
481
|
const dimensionAttrs = imageDimensionAttributes(imageSize);
|
|
203
|
-
|
|
204
|
-
|
|
482
|
+
const anchorMeta = parseAnchorMeta(drawing);
|
|
483
|
+
const attrs = mergeImageStyle(dimensionAttrs, anchorMeta);
|
|
484
|
+
result.push(`<img src="${src}" alt="word-image"${attrs}/>`);
|
|
485
|
+
return result;
|
|
205
486
|
}
|
|
206
487
|
}
|
|
488
|
+
const chartRef = queryByLocalName(drawing, "chart");
|
|
489
|
+
const chartRid = getAttr(chartRef, "r:id") ?? getAttr(chartRef, "id");
|
|
490
|
+
if (chartRid) {
|
|
491
|
+
const chartXmlText = await readXmlByRid(zip, relMap, chartRid);
|
|
492
|
+
if (chartXmlText) {
|
|
493
|
+
const summary = parseChartSummary(chartXmlText);
|
|
494
|
+
result.push(
|
|
495
|
+
`<figure data-word-chart="1" data-word-chart-type="${summary.type}" data-word-chart-series="${summary.seriesCount}" data-word-chart-points="${summary.pointCount}"><figcaption>${escapeHtml(summary.title)}</figcaption><div>Chart(${escapeHtml(summary.type)}): series=${summary.seriesCount}, points=${summary.pointCount}</div></figure>`
|
|
496
|
+
);
|
|
497
|
+
return result;
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
const smartArtRef = queryByLocalName(drawing, "relIds");
|
|
501
|
+
const smartArtRid = getAttr(smartArtRef, "r:dm") ?? getAttr(smartArtRef, "dm");
|
|
502
|
+
if (smartArtRid) {
|
|
503
|
+
const diagramXmlText = await readXmlByRid(zip, relMap, smartArtRid);
|
|
504
|
+
const textItems = diagramXmlText ? extractSmartArtText(diagramXmlText) : [];
|
|
505
|
+
const preview = textItems.length > 0 ? `: ${escapeHtml(textItems.join(" / "))}` : "";
|
|
506
|
+
result.push(
|
|
507
|
+
`<figure data-word-smartart="1" data-word-smartart-items="${textItems.length}"><figcaption>SmartArt fallback${preview}</figcaption></figure>`
|
|
508
|
+
);
|
|
509
|
+
return result;
|
|
510
|
+
}
|
|
207
511
|
}
|
|
208
512
|
const texts = queryAllByLocalName(run, "t").map((t) => t.textContent ?? "").join("");
|
|
209
|
-
const
|
|
210
|
-
const
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
}
|
|
215
|
-
|
|
513
|
+
const delTexts = queryAllByLocalName(run, "delText").map((t) => t.textContent ?? "").join("");
|
|
514
|
+
const brNodes = queryAllByLocalName(run, "br");
|
|
515
|
+
const pageBreakCount = brNodes.filter((node) => {
|
|
516
|
+
const type = (getAttr(node, "w:type") ?? getAttr(node, "type") ?? "").toLowerCase();
|
|
517
|
+
return type === "page";
|
|
518
|
+
}).length;
|
|
519
|
+
const lineBreakCount = Math.max(0, brNodes.length - pageBreakCount);
|
|
520
|
+
const runText2 = `${escapeHtml(texts || delTexts)}${"<br/>".repeat(lineBreakCount)}`;
|
|
521
|
+
if (runText2) {
|
|
522
|
+
const revisionMeta = inferRevisionMeta(run, revisionFallback);
|
|
523
|
+
if (css) {
|
|
524
|
+
const span = `<span style="${css}">${runText2}</span>`;
|
|
525
|
+
if (revisionMeta) {
|
|
526
|
+
const tagName = revisionMeta.type === "ins" ? "ins" : "del";
|
|
527
|
+
result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${span}</${tagName}>`);
|
|
528
|
+
} else {
|
|
529
|
+
result.push(span);
|
|
530
|
+
}
|
|
531
|
+
} else if (revisionMeta) {
|
|
532
|
+
const tagName = revisionMeta.type === "ins" ? "ins" : "del";
|
|
533
|
+
result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${runText2}</${tagName}>`);
|
|
534
|
+
} else {
|
|
535
|
+
result.push(runText2);
|
|
536
|
+
}
|
|
216
537
|
}
|
|
538
|
+
for (let i = 0; i < pageBreakCount; i += 1) {
|
|
539
|
+
result.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
|
|
540
|
+
}
|
|
541
|
+
return result;
|
|
542
|
+
}
|
|
543
|
+
async function nodeToHtml(node, revisionFallback) {
|
|
544
|
+
if (node.localName === "commentRangeStart") {
|
|
545
|
+
const id = getAttr(node, "w:id") ?? getAttr(node, "id");
|
|
546
|
+
return id ? [`<span data-word-comment-range-start="${id}"></span>`] : [];
|
|
547
|
+
}
|
|
548
|
+
if (node.localName === "commentRangeEnd") {
|
|
549
|
+
const id = getAttr(node, "w:id") ?? getAttr(node, "id");
|
|
550
|
+
return id ? [`<span data-word-comment-range-end="${id}"></span>`] : [];
|
|
551
|
+
}
|
|
552
|
+
if (node.localName === "r") {
|
|
553
|
+
return runToHtml(node, revisionFallback);
|
|
554
|
+
}
|
|
555
|
+
if (node.localName === "oMath" || node.localName === "oMathPara") {
|
|
556
|
+
const linear = ommlNodeToText(node).trim();
|
|
557
|
+
if (!linear) return [];
|
|
558
|
+
return [`<span data-word-omml="1">${escapeHtml(linear)}</span>`];
|
|
559
|
+
}
|
|
560
|
+
if (node.localName === "ins" || node.localName === "del") {
|
|
561
|
+
const scopedMeta = parseRevisionMeta(node, node.localName === "ins" ? "ins" : "del");
|
|
562
|
+
const nested2 = [];
|
|
563
|
+
for (const child of Array.from(node.children)) {
|
|
564
|
+
nested2.push(...await nodeToHtml(child, scopedMeta));
|
|
565
|
+
}
|
|
566
|
+
return nested2;
|
|
567
|
+
}
|
|
568
|
+
const nested = [];
|
|
569
|
+
for (const child of Array.from(node.children)) {
|
|
570
|
+
nested.push(...await nodeToHtml(child, revisionFallback));
|
|
571
|
+
}
|
|
572
|
+
return nested;
|
|
573
|
+
}
|
|
574
|
+
const parts = [];
|
|
575
|
+
const renderedPageBreakCount = queryAllByLocalName(paragraph, "lastRenderedPageBreak").length;
|
|
576
|
+
for (let i = 0; i < renderedPageBreakCount; i += 1) {
|
|
577
|
+
parts.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
|
|
578
|
+
}
|
|
579
|
+
for (const child of Array.from(paragraph.children)) {
|
|
580
|
+
parts.push(...await nodeToHtml(child, null));
|
|
217
581
|
}
|
|
218
582
|
const content = parts.join("") || "<br/>";
|
|
219
583
|
return `<${tag}${dataAttr}${alignStyle ? ` style="${alignStyle}"` : ""}>${content}</${tag}>`;
|
|
220
584
|
}
|
|
221
585
|
function runText(run) {
|
|
222
586
|
const text = queryAllByLocalName(run, "t").map((t) => t.textContent ?? "").join("");
|
|
587
|
+
const delText = queryAllByLocalName(run, "delText").map((t) => t.textContent ?? "").join("");
|
|
223
588
|
const brCount = queryAllByLocalName(run, "br").length;
|
|
224
|
-
return `${escapeHtml(text)}${"<br/>".repeat(brCount)}`;
|
|
589
|
+
return `${escapeHtml(text || delText)}${"<br/>".repeat(brCount)}`;
|
|
225
590
|
}
|
|
226
591
|
function paragraphText(paragraph) {
|
|
227
592
|
const runs = queryAllByLocalName(paragraph, "r");
|
|
228
593
|
const content = runs.map((run) => runText(run)).join("");
|
|
229
594
|
return content || "<br/>";
|
|
230
595
|
}
|
|
596
|
+
function parseTcGridSpan(tc) {
|
|
597
|
+
const tcPr = directChildrenByLocalName(tc, "tcPr")[0] ?? null;
|
|
598
|
+
const gridSpan = tcPr ? directChildrenByLocalName(tcPr, "gridSpan")[0] ?? null : null;
|
|
599
|
+
const rawVal = getAttr(gridSpan, "w:val") ?? getAttr(gridSpan, "val");
|
|
600
|
+
const parsed = rawVal ? Number.parseInt(rawVal, 10) : Number.NaN;
|
|
601
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : 1;
|
|
602
|
+
}
|
|
603
|
+
function parseTcVMerge(tc) {
|
|
604
|
+
const tcPr = directChildrenByLocalName(tc, "tcPr")[0] ?? null;
|
|
605
|
+
const vMerge = tcPr ? directChildrenByLocalName(tcPr, "vMerge")[0] ?? null : null;
|
|
606
|
+
if (!vMerge) return "none";
|
|
607
|
+
const rawVal = (getAttr(vMerge, "w:val") ?? getAttr(vMerge, "val") ?? "continue").toLowerCase();
|
|
608
|
+
return rawVal === "restart" ? "restart" : "continue";
|
|
609
|
+
}
|
|
610
|
+
function parseTblGridWidthsPx(table) {
|
|
611
|
+
const grid = directChildrenByLocalName(table, "tblGrid")[0] ?? null;
|
|
612
|
+
if (!grid) return [];
|
|
613
|
+
return directChildrenByLocalName(grid, "gridCol").map((col) => {
|
|
614
|
+
const raw = getAttr(col, "w:w") ?? getAttr(col, "w");
|
|
615
|
+
const twip = raw ? Number.parseInt(raw, 10) : Number.NaN;
|
|
616
|
+
return Number.isFinite(twip) && twip > 0 ? twipToPx(twip) : 0;
|
|
617
|
+
}).filter((px) => px > 0);
|
|
618
|
+
}
|
|
619
|
+
function borderSizeToPx(size) {
|
|
620
|
+
return size / 6;
|
|
621
|
+
}
|
|
622
|
+
function parseBorderCss(borderNode) {
|
|
623
|
+
if (!borderNode) return null;
|
|
624
|
+
const val = (getAttr(borderNode, "w:val") ?? getAttr(borderNode, "val") ?? "").toLowerCase();
|
|
625
|
+
if (!val || val === "nil" || val === "none") return "none";
|
|
626
|
+
const color = (getAttr(borderNode, "w:color") ?? getAttr(borderNode, "color") ?? "222222").replace(/^#/, "");
|
|
627
|
+
const rawSize = getAttr(borderNode, "w:sz") ?? getAttr(borderNode, "sz");
|
|
628
|
+
const size = rawSize ? Number.parseInt(rawSize, 10) : Number.NaN;
|
|
629
|
+
const px = Number.isFinite(size) && size > 0 ? borderSizeToPx(size) : 1;
|
|
630
|
+
const style = val === "single" ? "solid" : val;
|
|
631
|
+
return `${px.toFixed(2)}px ${style} #${color}`;
|
|
632
|
+
}
|
|
633
|
+
function parseTableStyleProfile(table) {
|
|
634
|
+
const tblPr = directChildrenByLocalName(table, "tblPr")[0] ?? null;
|
|
635
|
+
const tblBorders = tblPr ? directChildrenByLocalName(tblPr, "tblBorders")[0] ?? null : null;
|
|
636
|
+
const layout = tblPr ? directChildrenByLocalName(tblPr, "tblLayout")[0] ?? null : null;
|
|
637
|
+
const spacing = tblPr ? directChildrenByLocalName(tblPr, "tblCellSpacing")[0] ?? null : null;
|
|
638
|
+
const spacingType = (getAttr(spacing, "w:type") ?? getAttr(spacing, "type") ?? "dxa").toLowerCase();
|
|
639
|
+
const spacingRaw = getAttr(spacing, "w:w") ?? getAttr(spacing, "w");
|
|
640
|
+
const spacingVal = spacingRaw ? Number.parseFloat(spacingRaw) : Number.NaN;
|
|
641
|
+
const borderSpacingPx = spacingType === "dxa" && Number.isFinite(spacingVal) && spacingVal > 0 ? twipToPx(spacingVal) : 0;
|
|
642
|
+
const borderCollapse = borderSpacingPx > 0 ? "separate" : "collapse";
|
|
643
|
+
const tableLayout = (getAttr(layout, "w:type") ?? getAttr(layout, "type") ?? "").toLowerCase() === "autofit" ? "auto" : "fixed";
|
|
644
|
+
const top = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "top")[0] ?? null : null);
|
|
645
|
+
const bottom = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "bottom")[0] ?? null : null);
|
|
646
|
+
const left = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "left")[0] ?? null : null);
|
|
647
|
+
const right = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "right")[0] ?? null : null);
|
|
648
|
+
const insideH = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "insideH")[0] ?? null : null);
|
|
649
|
+
const insideV = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "insideV")[0] ?? null : null);
|
|
650
|
+
const borderCss = top ?? right ?? bottom ?? left ?? "1px solid #222";
|
|
651
|
+
return {
|
|
652
|
+
tableLayout,
|
|
653
|
+
borderCollapse,
|
|
654
|
+
borderSpacingPx,
|
|
655
|
+
borderCss,
|
|
656
|
+
insideHCss: insideH,
|
|
657
|
+
insideVCss: insideV
|
|
658
|
+
};
|
|
659
|
+
}
|
|
660
|
+
function parseTableWidthStyle(table, gridWidthsPx) {
|
|
661
|
+
const tblPr = directChildrenByLocalName(table, "tblPr")[0] ?? null;
|
|
662
|
+
const tblW = tblPr ? directChildrenByLocalName(tblPr, "tblW")[0] ?? null : null;
|
|
663
|
+
const type = (getAttr(tblW, "w:type") ?? getAttr(tblW, "type") ?? "").toLowerCase();
|
|
664
|
+
const rawVal = getAttr(tblW, "w:w") ?? getAttr(tblW, "w");
|
|
665
|
+
const numericVal = rawVal ? Number.parseFloat(rawVal) : Number.NaN;
|
|
666
|
+
if (type === "dxa" && Number.isFinite(numericVal) && numericVal > 0) {
|
|
667
|
+
return `width:${twipToPx(numericVal).toFixed(2)}px`;
|
|
668
|
+
}
|
|
669
|
+
if (type === "pct" && Number.isFinite(numericVal) && numericVal > 0) {
|
|
670
|
+
return `width:${(numericVal / 50).toFixed(2)}%`;
|
|
671
|
+
}
|
|
672
|
+
const gridTotal = gridWidthsPx.reduce((sum, item) => sum + item, 0);
|
|
673
|
+
if (gridTotal > 0) return `width:${gridTotal.toFixed(2)}px;max-width:100%`;
|
|
674
|
+
return "width:100%";
|
|
675
|
+
}
|
|
676
|
+
function parseCellWidthStyle(cell, colCursor, colSpan, gridWidthsPx) {
|
|
677
|
+
const tcPr = directChildrenByLocalName(cell, "tcPr")[0] ?? null;
|
|
678
|
+
const tcW = tcPr ? directChildrenByLocalName(tcPr, "tcW")[0] ?? null : null;
|
|
679
|
+
const type = (getAttr(tcW, "w:type") ?? getAttr(tcW, "type") ?? "").toLowerCase();
|
|
680
|
+
const rawVal = getAttr(tcW, "w:w") ?? getAttr(tcW, "w");
|
|
681
|
+
const numericVal = rawVal ? Number.parseFloat(rawVal) : Number.NaN;
|
|
682
|
+
if (type === "dxa" && Number.isFinite(numericVal) && numericVal > 0) {
|
|
683
|
+
return `width:${twipToPx(numericVal).toFixed(2)}px`;
|
|
684
|
+
}
|
|
685
|
+
if (type === "pct" && Number.isFinite(numericVal) && numericVal > 0) {
|
|
686
|
+
return `width:${(numericVal / 50).toFixed(2)}%`;
|
|
687
|
+
}
|
|
688
|
+
const width = gridWidthsPx.slice(colCursor, colCursor + colSpan).reduce((sum, item) => sum + item, 0);
|
|
689
|
+
if (width > 0) return `width:${width.toFixed(2)}px`;
|
|
690
|
+
return "";
|
|
691
|
+
}
|
|
692
|
+
function parseCellBorderStyle(cell, tableStyle) {
|
|
693
|
+
const tcPr = directChildrenByLocalName(cell, "tcPr")[0] ?? null;
|
|
694
|
+
const tcBorders = tcPr ? directChildrenByLocalName(tcPr, "tcBorders")[0] ?? null : null;
|
|
695
|
+
if (!tcBorders) {
|
|
696
|
+
const fallback = tableStyle.insideHCss ?? tableStyle.insideVCss ?? tableStyle.borderCss;
|
|
697
|
+
return `border:${fallback}`;
|
|
698
|
+
}
|
|
699
|
+
const top = parseBorderCss(directChildrenByLocalName(tcBorders, "top")[0] ?? null) ?? tableStyle.insideHCss ?? tableStyle.borderCss;
|
|
700
|
+
const right = parseBorderCss(directChildrenByLocalName(tcBorders, "right")[0] ?? null) ?? tableStyle.insideVCss ?? tableStyle.borderCss;
|
|
701
|
+
const bottom = parseBorderCss(directChildrenByLocalName(tcBorders, "bottom")[0] ?? null) ?? tableStyle.insideHCss ?? tableStyle.borderCss;
|
|
702
|
+
const left = parseBorderCss(directChildrenByLocalName(tcBorders, "left")[0] ?? null) ?? tableStyle.insideVCss ?? tableStyle.borderCss;
|
|
703
|
+
return `border-top:${top};border-right:${right};border-bottom:${bottom};border-left:${left}`;
|
|
704
|
+
}
|
|
231
705
|
function tableCellHtml(cell, paragraphIndexMap) {
|
|
232
|
-
const
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
706
|
+
const blocks = [];
|
|
707
|
+
for (const child of Array.from(cell.children)) {
|
|
708
|
+
if (child.localName === "tcPr") continue;
|
|
709
|
+
if (child.localName === "p") {
|
|
710
|
+
const paragraphIndex = paragraphIndexMap.get(child) ?? null;
|
|
711
|
+
blocks.push(`<p${paragraphDataAttr(paragraphIndex)}>${paragraphText(child)}</p>`);
|
|
712
|
+
continue;
|
|
713
|
+
}
|
|
714
|
+
if (child.localName === "tbl") {
|
|
715
|
+
blocks.push(tableToHtml(child, paragraphIndexMap));
|
|
716
|
+
continue;
|
|
717
|
+
}
|
|
236
718
|
}
|
|
237
|
-
return
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
}).join("");
|
|
719
|
+
if (blocks.length > 0) return blocks.join("");
|
|
720
|
+
const text = queryAllByLocalName(cell, "t").map((t) => t.textContent ?? "").join("").trim();
|
|
721
|
+
return escapeHtml(text) || "<br/>";
|
|
241
722
|
}
|
|
242
723
|
function tableToHtml(table, paragraphIndexMap) {
|
|
243
|
-
const rows =
|
|
244
|
-
const
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
724
|
+
const rows = directChildrenByLocalName(table, "tr");
|
|
725
|
+
const gridWidthsPx = parseTblGridWidthsPx(table);
|
|
726
|
+
const tableStyle = parseTableStyleProfile(table);
|
|
727
|
+
const activeByCol = /* @__PURE__ */ new Map();
|
|
728
|
+
const allOrigins = [];
|
|
729
|
+
let nextOriginId = 1;
|
|
730
|
+
const htmlRows = rows.map((row, rowIndex) => {
|
|
731
|
+
const directCells = directChildrenByLocalName(row, "tc");
|
|
732
|
+
const continued = /* @__PURE__ */ new Set();
|
|
733
|
+
const emittedCells = [];
|
|
734
|
+
let colCursor = 0;
|
|
735
|
+
for (const cell of directCells) {
|
|
736
|
+
const colSpan = parseTcGridSpan(cell);
|
|
737
|
+
const vMerge = parseTcVMerge(cell);
|
|
738
|
+
if (vMerge === "continue") {
|
|
739
|
+
const activeOrigins = Array.from(new Set(activeByCol.values())).filter((origin2) => !continued.has(origin2)).sort((a, b) => a.startCol - b.startCol);
|
|
740
|
+
const origin = activeOrigins.find((item) => item.startCol >= colCursor) ?? activeOrigins[0] ?? null;
|
|
741
|
+
if (origin) {
|
|
742
|
+
origin.rowSpan += 1;
|
|
743
|
+
continued.add(origin);
|
|
744
|
+
colCursor = origin.startCol + origin.colSpan;
|
|
745
|
+
}
|
|
746
|
+
continue;
|
|
747
|
+
}
|
|
748
|
+
while (activeByCol.has(colCursor)) {
|
|
749
|
+
colCursor += 1;
|
|
750
|
+
}
|
|
751
|
+
const html = tableCellHtml(cell, paragraphIndexMap);
|
|
752
|
+
const attrs = [];
|
|
753
|
+
const widthStyle = parseCellWidthStyle(cell, colCursor, colSpan, gridWidthsPx);
|
|
754
|
+
const borderStyle = parseCellBorderStyle(cell, tableStyle);
|
|
755
|
+
if (vMerge === "restart") {
|
|
756
|
+
const origin = {
|
|
757
|
+
id: `m${nextOriginId}`,
|
|
758
|
+
startCol: colCursor,
|
|
759
|
+
colSpan,
|
|
760
|
+
rowSpan: 1,
|
|
761
|
+
startedRow: rowIndex
|
|
762
|
+
};
|
|
763
|
+
nextOriginId += 1;
|
|
764
|
+
allOrigins.push(origin);
|
|
765
|
+
for (let i = 0; i < colSpan; i += 1) {
|
|
766
|
+
activeByCol.set(colCursor + i, origin);
|
|
767
|
+
}
|
|
768
|
+
attrs.push(`data-word-merge-id="${origin.id}"`);
|
|
769
|
+
}
|
|
770
|
+
if (colSpan > 1) attrs.push(`colspan="${colSpan}"`);
|
|
771
|
+
emittedCells.push(
|
|
772
|
+
`<td${attrs.length > 0 ? ` ${attrs.join(" ")}` : ""} style="${borderStyle};vertical-align:top;${widthStyle}">${html}</td>`
|
|
773
|
+
);
|
|
774
|
+
colCursor += colSpan;
|
|
775
|
+
}
|
|
776
|
+
for (const origin of Array.from(new Set(activeByCol.values()))) {
|
|
777
|
+
if (origin.startedRow < rowIndex && !continued.has(origin)) {
|
|
778
|
+
for (let i = 0; i < origin.colSpan; i += 1) {
|
|
779
|
+
activeByCol.delete(origin.startCol + i);
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
return `<tr>${emittedCells.join("")}</tr>`;
|
|
248
784
|
});
|
|
249
|
-
|
|
785
|
+
let merged = htmlRows.join("");
|
|
786
|
+
for (const origin of allOrigins) {
|
|
787
|
+
const marker = `data-word-merge-id="${origin.id}"`;
|
|
788
|
+
const replacement = origin.rowSpan > 1 ? `rowspan="${origin.rowSpan}"` : "";
|
|
789
|
+
merged = merged.replace(marker, replacement).replace(/\s{2,}/g, " ");
|
|
790
|
+
}
|
|
791
|
+
const tableWidthStyle = parseTableWidthStyle(table, gridWidthsPx);
|
|
792
|
+
const spacing = tableStyle.borderSpacingPx > 0 ? `border-spacing:${tableStyle.borderSpacingPx.toFixed(2)}px;` : "";
|
|
793
|
+
return `<table style="border-collapse:${tableStyle.borderCollapse};${spacing}table-layout:${tableStyle.tableLayout};${tableWidthStyle};border:${tableStyle.borderCss};">${merged}</table>`;
|
|
250
794
|
}
|
|
251
795
|
async function parseDocxToHtmlSnapshot(file) {
|
|
252
796
|
const maybeArrayBuffer = file.arrayBuffer;
|
|
@@ -257,7 +801,16 @@ async function parseDocxToHtmlSnapshot(file) {
|
|
|
257
801
|
throw new Error("DOCX missing document.xml");
|
|
258
802
|
}
|
|
259
803
|
const relsText = await zip.file("word/_rels/document.xml.rels")?.async("string");
|
|
804
|
+
const footnotesText = await zip.file("word/footnotes.xml")?.async("string");
|
|
805
|
+
const endnotesText = await zip.file("word/endnotes.xml")?.async("string");
|
|
806
|
+
const commentsText = await zip.file("word/comments.xml")?.async("string");
|
|
260
807
|
const relMap = parseDocRelsMap(relsText ?? null);
|
|
808
|
+
const footnotesMap = parseFootnotesMap(footnotesText ?? null);
|
|
809
|
+
const endnotesMap = parseEndnotesMap(endnotesText ?? null);
|
|
810
|
+
const commentsMap = parseCommentsMap(commentsText ?? null);
|
|
811
|
+
const usedFootnoteIds = [];
|
|
812
|
+
const usedEndnoteIds = [];
|
|
813
|
+
const usedCommentIds = [];
|
|
261
814
|
const documentXml = parseXml(documentXmlText);
|
|
262
815
|
const body = queryByLocalName(documentXml, "body");
|
|
263
816
|
if (!body) {
|
|
@@ -272,7 +825,20 @@ async function parseDocxToHtmlSnapshot(file) {
|
|
|
272
825
|
if (child.localName === "sectPr") continue;
|
|
273
826
|
if (child.localName === "p") {
|
|
274
827
|
const paragraphIndex = paragraphIndexMap.get(child) ?? null;
|
|
275
|
-
blockHtml.push(
|
|
828
|
+
blockHtml.push(
|
|
829
|
+
await paragraphToHtml(
|
|
830
|
+
zip,
|
|
831
|
+
relMap,
|
|
832
|
+
child,
|
|
833
|
+
paragraphIndex,
|
|
834
|
+
footnotesMap,
|
|
835
|
+
usedFootnoteIds,
|
|
836
|
+
endnotesMap,
|
|
837
|
+
usedEndnoteIds,
|
|
838
|
+
commentsMap,
|
|
839
|
+
usedCommentIds
|
|
840
|
+
)
|
|
841
|
+
);
|
|
276
842
|
continue;
|
|
277
843
|
}
|
|
278
844
|
if (child.localName === "tbl") {
|
|
@@ -280,6 +846,9 @@ async function parseDocxToHtmlSnapshot(file) {
|
|
|
280
846
|
continue;
|
|
281
847
|
}
|
|
282
848
|
}
|
|
849
|
+
blockHtml.push(renderFootnotesSection(usedFootnoteIds, footnotesMap));
|
|
850
|
+
blockHtml.push(renderEndnotesSection(usedEndnoteIds, endnotesMap));
|
|
851
|
+
blockHtml.push(renderCommentsSection(usedCommentIds, commentsMap));
|
|
283
852
|
return buildHtmlSnapshot(blockHtml.join("\n"));
|
|
284
853
|
}
|
|
285
854
|
|
|
@@ -511,7 +1080,7 @@ function createFallbackWordStyleProfile(sourceFileName = "snapshot") {
|
|
|
511
1080
|
paragraphProfiles: []
|
|
512
1081
|
};
|
|
513
1082
|
}
|
|
514
|
-
function
|
|
1083
|
+
function twipToPx2(twip) {
|
|
515
1084
|
return twip / 15;
|
|
516
1085
|
}
|
|
517
1086
|
function getAttr2(node, attr) {
|
|
@@ -555,10 +1124,10 @@ function parsePageGeometry(documentXml) {
|
|
|
555
1124
|
const top = getTwipAttr(pgMar, "w:top") ?? getTwipAttr(pgMar, "top") ?? null;
|
|
556
1125
|
const bottom = getTwipAttr(pgMar, "w:bottom") ?? getTwipAttr(pgMar, "bottom") ?? null;
|
|
557
1126
|
return {
|
|
558
|
-
contentWidthPx: pageW === null ? null :
|
|
559
|
-
pageHeightPx: pageH === null ? null :
|
|
560
|
-
marginTopPx: top === null ? null :
|
|
561
|
-
marginBottomPx: bottom === null ? null :
|
|
1127
|
+
contentWidthPx: pageW === null ? null : twipToPx2(pageW - left - right),
|
|
1128
|
+
pageHeightPx: pageH === null ? null : twipToPx2(pageH),
|
|
1129
|
+
marginTopPx: top === null ? null : twipToPx2(top),
|
|
1130
|
+
marginBottomPx: bottom === null ? null : twipToPx2(bottom)
|
|
562
1131
|
};
|
|
563
1132
|
}
|
|
564
1133
|
function parseHeadingAlignFromDocument(documentXml) {
|
|
@@ -672,7 +1241,28 @@ function parseNumberingMap(numberingXml) {
|
|
|
672
1241
|
const lvlMap = abstractMap.get(absId);
|
|
673
1242
|
if (!lvlMap) continue;
|
|
674
1243
|
for (const [lvl, spec] of lvlMap.entries()) {
|
|
675
|
-
levelMap.set(`${numId}:${lvl}`, spec);
|
|
1244
|
+
levelMap.set(`${numId}:${lvl}`, { ...spec });
|
|
1245
|
+
}
|
|
1246
|
+
const lvlOverrides = queryAllByLocalName2(num, "lvlOverride");
|
|
1247
|
+
for (const override of lvlOverrides) {
|
|
1248
|
+
const ilvl = toInt(getAttr2(override, "w:ilvl") ?? getAttr2(override, "ilvl"));
|
|
1249
|
+
if (ilvl === null) continue;
|
|
1250
|
+
const key = `${numId}:${ilvl}`;
|
|
1251
|
+
const base = levelMap.get(key) ?? { numFmt: null, lvlText: null, startAt: 1 };
|
|
1252
|
+
const overrideStart = toInt(
|
|
1253
|
+
getAttr2(queryAllByLocalName2(override, "startOverride")[0] ?? null, "w:val") ?? getAttr2(queryAllByLocalName2(override, "startOverride")[0] ?? null, "val")
|
|
1254
|
+
);
|
|
1255
|
+
const overrideLvl = queryAllByLocalName2(override, "lvl")[0] ?? null;
|
|
1256
|
+
const overrideNumFmtNode = overrideLvl ? queryAllByLocalName2(overrideLvl, "numFmt")[0] ?? null : null;
|
|
1257
|
+
const overrideLvlTextNode = overrideLvl ? queryAllByLocalName2(overrideLvl, "lvlText")[0] ?? null : null;
|
|
1258
|
+
const overrideLvlStart = toInt(
|
|
1259
|
+
getAttr2(queryAllByLocalName2(overrideLvl ?? override, "start")[0] ?? null, "w:val") ?? getAttr2(queryAllByLocalName2(overrideLvl ?? override, "start")[0] ?? null, "val")
|
|
1260
|
+
);
|
|
1261
|
+
levelMap.set(key, {
|
|
1262
|
+
numFmt: getAttr2(overrideNumFmtNode, "w:val") ?? getAttr2(overrideNumFmtNode, "val") ?? base.numFmt,
|
|
1263
|
+
lvlText: getAttr2(overrideLvlTextNode, "w:val") ?? getAttr2(overrideLvlTextNode, "val") ?? base.lvlText,
|
|
1264
|
+
startAt: overrideStart ?? overrideLvlStart ?? base.startAt
|
|
1265
|
+
});
|
|
676
1266
|
}
|
|
677
1267
|
}
|
|
678
1268
|
return levelMap;
|
|
@@ -710,15 +1300,15 @@ function parseParagraphProfiles(documentXml, numberingMap) {
|
|
|
710
1300
|
text,
|
|
711
1301
|
isEmpty: text.length === 0,
|
|
712
1302
|
align: parseParagraphAlign(paragraph),
|
|
713
|
-
beforePx: before === null ? null :
|
|
714
|
-
afterPx: after === null ? null :
|
|
1303
|
+
beforePx: before === null ? null : twipToPx2(before),
|
|
1304
|
+
afterPx: after === null ? null : twipToPx2(after),
|
|
715
1305
|
lineHeightRatio: line === null || lineHeightRule !== "auto" ? null : line / 240,
|
|
716
|
-
lineHeightPx: line === null || lineHeightRule === "auto" ? null :
|
|
1306
|
+
lineHeightPx: line === null || lineHeightRule === "auto" ? null : twipToPx2(line),
|
|
717
1307
|
lineHeightRule,
|
|
718
|
-
indentLeftPx: left === null ? null :
|
|
719
|
-
indentRightPx: right === null ? null :
|
|
720
|
-
firstLinePx: firstLine === null ? null :
|
|
721
|
-
hangingPx: hanging === null ? null :
|
|
1308
|
+
indentLeftPx: left === null ? null : twipToPx2(left),
|
|
1309
|
+
indentRightPx: right === null ? null : twipToPx2(right),
|
|
1310
|
+
firstLinePx: firstLine === null ? null : twipToPx2(firstLine),
|
|
1311
|
+
hangingPx: hanging === null ? null : twipToPx2(hanging),
|
|
722
1312
|
listNumId,
|
|
723
1313
|
listLevel,
|
|
724
1314
|
listFormat: listSpec?.numFmt ?? null,
|
|
@@ -753,19 +1343,19 @@ function parseTableDefaults(stylesXml) {
|
|
|
753
1343
|
return {
|
|
754
1344
|
topPx: (() => {
|
|
755
1345
|
const v = getTwipAttr(top, "w:w") ?? getTwipAttr(top, "w") ?? null;
|
|
756
|
-
return v === null ? null :
|
|
1346
|
+
return v === null ? null : twipToPx2(v);
|
|
757
1347
|
})(),
|
|
758
1348
|
leftPx: (() => {
|
|
759
1349
|
const v = getTwipAttr(left, "w:w") ?? getTwipAttr(left, "w") ?? null;
|
|
760
|
-
return v === null ? null :
|
|
1350
|
+
return v === null ? null : twipToPx2(v);
|
|
761
1351
|
})(),
|
|
762
1352
|
bottomPx: (() => {
|
|
763
1353
|
const v = getTwipAttr(bottom, "w:w") ?? getTwipAttr(bottom, "w") ?? null;
|
|
764
|
-
return v === null ? null :
|
|
1354
|
+
return v === null ? null : twipToPx2(v);
|
|
765
1355
|
})(),
|
|
766
1356
|
rightPx: (() => {
|
|
767
1357
|
const v = getTwipAttr(right, "w:w") ?? getTwipAttr(right, "w") ?? null;
|
|
768
|
-
return v === null ? null :
|
|
1358
|
+
return v === null ? null : twipToPx2(v);
|
|
769
1359
|
})()
|
|
770
1360
|
};
|
|
771
1361
|
}
|
|
@@ -863,9 +1453,9 @@ function parseDefaults(stylesXml) {
|
|
|
863
1453
|
const rawLineRule = (getAttr2(spacing, "w:lineRule") ?? getAttr2(spacing, "lineRule") ?? "auto").toLowerCase();
|
|
864
1454
|
const bodyLineHeightRule = rawLineRule === "exact" ? "exact" : rawLineRule === "atleast" ? "atLeast" : "auto";
|
|
865
1455
|
const bodyLineHeightRatio = line === null || bodyLineHeightRule !== "auto" ? null : line / 240;
|
|
866
|
-
const bodyLineHeightPx = line === null || bodyLineHeightRule === "auto" ? null :
|
|
1456
|
+
const bodyLineHeightPx = line === null || bodyLineHeightRule === "auto" ? null : twipToPx2(line);
|
|
867
1457
|
const after = getTwipAttr(spacing, "w:after") ?? getTwipAttr(spacing, "after") ?? null;
|
|
868
|
-
const paragraphAfterPx = after === null ? null :
|
|
1458
|
+
const paragraphAfterPx = after === null ? null : twipToPx2(after);
|
|
869
1459
|
return { bodyFontPx, bodyLineHeightRatio, bodyLineHeightPx, bodyLineHeightRule, paragraphAfterPx };
|
|
870
1460
|
}
|
|
871
1461
|
function parseHeading1Style(stylesXml) {
|
|
@@ -918,7 +1508,8 @@ function inferTitleFontFamily(families) {
|
|
|
918
1508
|
return FALLBACK_PROFILE.titleFontFamily;
|
|
919
1509
|
}
|
|
920
1510
|
async function parseDocxStyleProfile(file) {
|
|
921
|
-
const
|
|
1511
|
+
const maybeArrayBuffer = file.arrayBuffer;
|
|
1512
|
+
const buffer = maybeArrayBuffer ? await maybeArrayBuffer.call(file) : await new Response(file).arrayBuffer();
|
|
922
1513
|
const zip = await import_jszip2.default.loadAsync(buffer);
|
|
923
1514
|
const documentXmlText = await zip.file("word/document.xml")?.async("string");
|
|
924
1515
|
const stylesXmlText = await zip.file("word/styles.xml")?.async("string");
|
|
@@ -1280,6 +1871,7 @@ function applyParagraphProfiles(doc, styleProfile) {
|
|
|
1280
1871
|
if (!alreadyHasMarker) {
|
|
1281
1872
|
const marker = doc.createElement("span");
|
|
1282
1873
|
marker.className = "__word-list-marker";
|
|
1874
|
+
marker.setAttribute("data-word-list-marker", "1");
|
|
1283
1875
|
marker.textContent = `${markerText} `;
|
|
1284
1876
|
marker.style.display = "inline-block";
|
|
1285
1877
|
marker.style.minWidth = "1.8em";
|
|
@@ -1331,7 +1923,7 @@ function applyKeepPagination(doc, styleProfile, paragraphs) {
|
|
|
1331
1923
|
for (let i = 0; i < count; i += 1) {
|
|
1332
1924
|
const p = paragraphs[i];
|
|
1333
1925
|
const profile = styleProfile.paragraphProfiles[i];
|
|
1334
|
-
const
|
|
1926
|
+
const h = paragraphHeightPx(p);
|
|
1335
1927
|
const forceBreak = profile.pageBreakBefore;
|
|
1336
1928
|
if (forceBreak && used > 0) {
|
|
1337
1929
|
insertPageSpacerBefore(doc, p, contentHeight - used);
|
|
@@ -1342,11 +1934,11 @@ function applyKeepPagination(doc, styleProfile, paragraphs) {
|
|
|
1342
1934
|
insertPageSpacerBefore(doc, p, contentHeight - used);
|
|
1343
1935
|
used = 0;
|
|
1344
1936
|
}
|
|
1345
|
-
if (used > 0 && used +
|
|
1937
|
+
if (used > 0 && used + h > contentHeight) {
|
|
1346
1938
|
insertPageSpacerBefore(doc, p, contentHeight - used);
|
|
1347
1939
|
used = 0;
|
|
1348
1940
|
}
|
|
1349
|
-
used +=
|
|
1941
|
+
used += h;
|
|
1350
1942
|
if (used >= contentHeight) {
|
|
1351
1943
|
used = used % contentHeight;
|
|
1352
1944
|
}
|
|
@@ -1388,6 +1980,41 @@ function applyWordRenderModel({ doc, styleProfile, showFormattingMarks }) {
|
|
|
1388
1980
|
}
|
|
1389
1981
|
|
|
1390
1982
|
// src/core/DocsWordElement.ts
|
|
1983
|
+
var VERSION = "0.1.2";
|
|
1984
|
+
var MESSAGES = {
|
|
1985
|
+
zh: {
|
|
1986
|
+
readClipboard: "\u4ECE\u7CFB\u7EDF\u526A\u8D34\u677F\u8BFB\u53D6",
|
|
1987
|
+
uploadWord: "\u4E0A\u4F20 Word",
|
|
1988
|
+
clear: "\u6E05\u7A7A",
|
|
1989
|
+
pastePlaceholder: "\u5728\u6B64\u5904\u7C98\u8D34 Word/WPS/Google Docs \u5185\u5BB9\uFF08Ctrl/Cmd+V\uFF09",
|
|
1990
|
+
waitImport: "\u7B49\u5F85\u5185\u5BB9\u5BFC\u5165",
|
|
1991
|
+
loadedHtml: "\u5DF2\u52A0\u8F7D HTML \u5FEB\u7167",
|
|
1992
|
+
cleared: "\u6587\u6863\u5DF2\u6E05\u7A7A",
|
|
1993
|
+
loadedWord: (name) => `\u5DF2\u52A0\u8F7D Word \u6587\u4EF6: ${name}`,
|
|
1994
|
+
importedClipboard: "\u5DF2\u5BFC\u5165\u526A\u8D34\u677F\u5185\u5BB9",
|
|
1995
|
+
noContent: "\u672A\u68C0\u6D4B\u5230\u53EF\u5BFC\u5165\u5185\u5BB9",
|
|
1996
|
+
noClipboardRead: "\u5F53\u524D\u6D4F\u89C8\u5668\u4E0D\u652F\u6301 clipboard.read",
|
|
1997
|
+
parseFailed: "Word \u89E3\u6790\u5931\u8D25",
|
|
1998
|
+
clipboardReadFailed: "\u8BFB\u53D6\u526A\u8D34\u677F\u5931\u8D25",
|
|
1999
|
+
errorPrefix: "\u9519\u8BEF: "
|
|
2000
|
+
},
|
|
2001
|
+
en: {
|
|
2002
|
+
readClipboard: "Read clipboard",
|
|
2003
|
+
uploadWord: "Upload Word",
|
|
2004
|
+
clear: "Clear",
|
|
2005
|
+
pastePlaceholder: "Paste Word/WPS/Google Docs content here (Ctrl/Cmd+V)",
|
|
2006
|
+
waitImport: "Waiting for input",
|
|
2007
|
+
loadedHtml: "HTML snapshot loaded",
|
|
2008
|
+
cleared: "Document cleared",
|
|
2009
|
+
loadedWord: (name) => `Word file loaded: ${name}`,
|
|
2010
|
+
importedClipboard: "Clipboard content imported",
|
|
2011
|
+
noContent: "No importable content detected",
|
|
2012
|
+
noClipboardRead: "navigator.clipboard.read is not supported in this browser",
|
|
2013
|
+
parseFailed: "Word parse failed",
|
|
2014
|
+
clipboardReadFailed: "Failed to read clipboard",
|
|
2015
|
+
errorPrefix: "Error: "
|
|
2016
|
+
}
|
|
2017
|
+
};
|
|
1391
2018
|
var BASE_CSS = `
|
|
1392
2019
|
:host{display:block;border:1px solid #d8deea;border-radius:12px;background:#fff;overflow:hidden;font-family:ui-sans-serif,system-ui,-apple-system,Segoe UI,Roboto}
|
|
1393
2020
|
.toolbar{display:flex;gap:8px;flex-wrap:wrap;padding:10px;border-bottom:1px solid #e8edf6;background:#f8faff}
|
|
@@ -1398,6 +2025,10 @@ iframe{width:100%;min-height:760px;border:0}
|
|
|
1398
2025
|
`;
|
|
1399
2026
|
var DocsWordElement = class extends HTMLElement {
|
|
1400
2027
|
rootRef;
|
|
2028
|
+
toolbar;
|
|
2029
|
+
btnRead;
|
|
2030
|
+
btnUpload;
|
|
2031
|
+
btnClear;
|
|
1401
2032
|
frame;
|
|
1402
2033
|
pasteArea;
|
|
1403
2034
|
fileInput;
|
|
@@ -1405,64 +2036,92 @@ var DocsWordElement = class extends HTMLElement {
|
|
|
1405
2036
|
htmlSnapshot;
|
|
1406
2037
|
styleProfile = null;
|
|
1407
2038
|
frameHeight = 0;
|
|
2039
|
+
locale = "zh";
|
|
1408
2040
|
constructor() {
|
|
1409
2041
|
super();
|
|
1410
2042
|
this.rootRef = this.attachShadow({ mode: "open" });
|
|
2043
|
+
this.locale = this.parseLocale(this.getAttribute("lang"));
|
|
1411
2044
|
this.htmlSnapshot = buildHtmlSnapshot("<p><br/></p>");
|
|
1412
2045
|
const style = document.createElement("style");
|
|
1413
2046
|
style.textContent = BASE_CSS;
|
|
1414
|
-
|
|
1415
|
-
toolbar.className = "toolbar";
|
|
1416
|
-
|
|
1417
|
-
btnRead.
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
const btnClear = document.createElement("button");
|
|
1423
|
-
btnClear.textContent = "\u6E05\u7A7A";
|
|
1424
|
-
btnClear.onclick = () => this.clear();
|
|
2047
|
+
this.toolbar = document.createElement("div");
|
|
2048
|
+
this.toolbar.className = "toolbar";
|
|
2049
|
+
this.btnRead = document.createElement("button");
|
|
2050
|
+
this.btnRead.onclick = () => void this.loadClipboard();
|
|
2051
|
+
this.btnUpload = document.createElement("button");
|
|
2052
|
+
this.btnUpload.onclick = () => this.fileInput.click();
|
|
2053
|
+
this.btnClear = document.createElement("button");
|
|
2054
|
+
this.btnClear.onclick = () => this.clear();
|
|
1425
2055
|
this.fileInput = document.createElement("input");
|
|
1426
2056
|
this.fileInput.type = "file";
|
|
1427
2057
|
this.fileInput.accept = ".docx";
|
|
1428
2058
|
this.fileInput.style.display = "none";
|
|
1429
2059
|
this.fileInput.onchange = () => void this.onUpload();
|
|
1430
|
-
toolbar.append(btnRead, btnUpload, btnClear, this.fileInput);
|
|
2060
|
+
this.toolbar.append(this.btnRead, this.btnUpload, this.btnClear, this.fileInput);
|
|
1431
2061
|
this.pasteArea = document.createElement("textarea");
|
|
1432
2062
|
this.pasteArea.className = "paste";
|
|
1433
|
-
this.pasteArea.placeholder = "
|
|
2063
|
+
this.pasteArea.placeholder = "";
|
|
1434
2064
|
this.pasteArea.onpaste = (event) => {
|
|
1435
2065
|
event.preventDefault();
|
|
1436
2066
|
void this.applyFromClipboardData(event.clipboardData);
|
|
1437
2067
|
};
|
|
1438
2068
|
this.hint = document.createElement("span");
|
|
1439
2069
|
this.hint.className = "hint";
|
|
1440
|
-
this.hint.textContent = "
|
|
2070
|
+
this.hint.textContent = "";
|
|
1441
2071
|
this.frame = document.createElement("iframe");
|
|
1442
2072
|
this.frame.sandbox.add("allow-same-origin", "allow-scripts");
|
|
1443
2073
|
this.frame.onload = () => this.onFrameLoad();
|
|
1444
|
-
this.rootRef.append(style, toolbar, this.pasteArea, this.hint, this.frame);
|
|
2074
|
+
this.rootRef.append(style, this.toolbar, this.pasteArea, this.hint, this.frame);
|
|
2075
|
+
this.syncLocaleText();
|
|
2076
|
+
this.syncToolbarVisibility();
|
|
2077
|
+
}
|
|
2078
|
+
static get observedAttributes() {
|
|
2079
|
+
return ["lang", "show-toolbar"];
|
|
2080
|
+
}
|
|
2081
|
+
attributeChangedCallback(name, _, newValue) {
|
|
2082
|
+
if (name === "lang") {
|
|
2083
|
+
this.locale = this.parseLocale(newValue);
|
|
2084
|
+
this.syncLocaleText();
|
|
2085
|
+
return;
|
|
2086
|
+
}
|
|
2087
|
+
if (name === "show-toolbar") {
|
|
2088
|
+
this.syncToolbarVisibility();
|
|
2089
|
+
}
|
|
1445
2090
|
}
|
|
1446
2091
|
connectedCallback() {
|
|
1447
2092
|
this.renderSnapshot();
|
|
2093
|
+
this.dispatchEvent(new CustomEvent("docsjs-ready", { detail: { version: VERSION } }));
|
|
1448
2094
|
}
|
|
1449
2095
|
setSnapshot(rawHtml) {
|
|
2096
|
+
this.loadHtml(rawHtml);
|
|
2097
|
+
}
|
|
2098
|
+
loadHtml(rawHtml) {
|
|
1450
2099
|
this.styleProfile = null;
|
|
1451
2100
|
this.htmlSnapshot = buildHtmlSnapshot(rawHtml);
|
|
1452
2101
|
this.renderSnapshot();
|
|
1453
|
-
this.
|
|
1454
|
-
this.emitChange();
|
|
2102
|
+
this.setHint(MESSAGES[this.locale].loadedHtml);
|
|
2103
|
+
this.emitChange("api");
|
|
2104
|
+
}
|
|
2105
|
+
getSnapshot() {
|
|
2106
|
+
return this.htmlSnapshot;
|
|
1455
2107
|
}
|
|
1456
2108
|
clear() {
|
|
1457
2109
|
this.styleProfile = null;
|
|
1458
2110
|
this.htmlSnapshot = buildHtmlSnapshot("<p><br/></p>");
|
|
1459
2111
|
this.renderSnapshot();
|
|
1460
|
-
this.
|
|
1461
|
-
this.emitChange();
|
|
2112
|
+
this.setHint(MESSAGES[this.locale].cleared);
|
|
2113
|
+
this.emitChange("clear");
|
|
2114
|
+
}
|
|
2115
|
+
async loadDocx(file) {
|
|
2116
|
+
await this.applyDocx(file);
|
|
1462
2117
|
}
|
|
1463
2118
|
async onUpload() {
|
|
1464
2119
|
const file = this.fileInput.files?.[0];
|
|
1465
2120
|
if (!file) return;
|
|
2121
|
+
await this.applyDocx(file);
|
|
2122
|
+
this.fileInput.value = "";
|
|
2123
|
+
}
|
|
2124
|
+
async applyDocx(file) {
|
|
1466
2125
|
try {
|
|
1467
2126
|
const [snapshot, profile] = await Promise.all([
|
|
1468
2127
|
parseDocxToHtmlSnapshot(file),
|
|
@@ -1471,17 +2130,15 @@ var DocsWordElement = class extends HTMLElement {
|
|
|
1471
2130
|
this.styleProfile = profile;
|
|
1472
2131
|
this.htmlSnapshot = snapshot;
|
|
1473
2132
|
this.renderSnapshot();
|
|
1474
|
-
this.
|
|
1475
|
-
this.emitChange();
|
|
2133
|
+
this.setHint(MESSAGES[this.locale].loadedWord(profile.sourceFileName));
|
|
2134
|
+
this.emitChange("upload", profile.sourceFileName);
|
|
1476
2135
|
} catch (error) {
|
|
1477
|
-
this.emitError(error instanceof Error ? error.message :
|
|
1478
|
-
} finally {
|
|
1479
|
-
this.fileInput.value = "";
|
|
2136
|
+
this.emitError(error instanceof Error ? error.message : MESSAGES[this.locale].parseFailed);
|
|
1480
2137
|
}
|
|
1481
2138
|
}
|
|
1482
|
-
async
|
|
2139
|
+
async loadClipboard() {
|
|
1483
2140
|
if (!navigator.clipboard?.read) {
|
|
1484
|
-
this.emitError(
|
|
2141
|
+
this.emitError(MESSAGES[this.locale].noClipboardRead);
|
|
1485
2142
|
return;
|
|
1486
2143
|
}
|
|
1487
2144
|
try {
|
|
@@ -1489,7 +2146,7 @@ var DocsWordElement = class extends HTMLElement {
|
|
|
1489
2146
|
const payload = await extractFromClipboardItems(items);
|
|
1490
2147
|
this.applyPayload(payload.html, payload.text);
|
|
1491
2148
|
} catch (error) {
|
|
1492
|
-
this.emitError(error instanceof Error ? error.message :
|
|
2149
|
+
this.emitError(error instanceof Error ? error.message : MESSAGES[this.locale].clipboardReadFailed);
|
|
1493
2150
|
}
|
|
1494
2151
|
}
|
|
1495
2152
|
async applyFromClipboardData(data) {
|
|
@@ -1504,12 +2161,12 @@ var DocsWordElement = class extends HTMLElement {
|
|
|
1504
2161
|
} else if (text.trim()) {
|
|
1505
2162
|
this.htmlSnapshot = buildHtmlSnapshot(`<p>${text.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">")}</p>`);
|
|
1506
2163
|
} else {
|
|
1507
|
-
this.
|
|
2164
|
+
this.setHint(MESSAGES[this.locale].noContent);
|
|
1508
2165
|
return;
|
|
1509
2166
|
}
|
|
1510
2167
|
this.renderSnapshot();
|
|
1511
|
-
this.
|
|
1512
|
-
this.emitChange();
|
|
2168
|
+
this.setHint(MESSAGES[this.locale].importedClipboard);
|
|
2169
|
+
this.emitChange("paste");
|
|
1513
2170
|
}
|
|
1514
2171
|
onFrameLoad() {
|
|
1515
2172
|
const doc = this.frame.contentDocument;
|
|
@@ -1534,12 +2191,33 @@ var DocsWordElement = class extends HTMLElement {
|
|
|
1534
2191
|
renderSnapshot() {
|
|
1535
2192
|
this.frame.srcdoc = this.htmlSnapshot;
|
|
1536
2193
|
}
|
|
1537
|
-
emitChange() {
|
|
1538
|
-
this.dispatchEvent(new CustomEvent("docsjs-change", { detail: { htmlSnapshot: this.htmlSnapshot } }));
|
|
2194
|
+
emitChange(source, fileName) {
|
|
2195
|
+
this.dispatchEvent(new CustomEvent("docsjs-change", { detail: { htmlSnapshot: this.htmlSnapshot, source, fileName } }));
|
|
1539
2196
|
}
|
|
1540
2197
|
emitError(message) {
|
|
1541
2198
|
this.dispatchEvent(new CustomEvent("docsjs-error", { detail: { message } }));
|
|
1542
|
-
this.
|
|
2199
|
+
this.setHint(`${MESSAGES[this.locale].errorPrefix}${message}`);
|
|
2200
|
+
}
|
|
2201
|
+
setHint(text) {
|
|
2202
|
+
this.hint.textContent = text;
|
|
2203
|
+
}
|
|
2204
|
+
parseLocale(value) {
|
|
2205
|
+
return value?.toLowerCase() === "en" ? "en" : "zh";
|
|
2206
|
+
}
|
|
2207
|
+
syncToolbarVisibility() {
|
|
2208
|
+
const raw = this.getAttribute("show-toolbar");
|
|
2209
|
+
const show = raw === null || raw === "" || raw === "1" || raw.toLowerCase() === "true";
|
|
2210
|
+
this.toolbar.style.display = show ? "flex" : "none";
|
|
2211
|
+
}
|
|
2212
|
+
syncLocaleText() {
|
|
2213
|
+
const t = MESSAGES[this.locale];
|
|
2214
|
+
this.btnRead.textContent = t.readClipboard;
|
|
2215
|
+
this.btnUpload.textContent = t.uploadWord;
|
|
2216
|
+
this.btnClear.textContent = t.clear;
|
|
2217
|
+
this.pasteArea.placeholder = t.pastePlaceholder;
|
|
2218
|
+
if (!this.hint.textContent || this.hint.textContent === MESSAGES.en.waitImport || this.hint.textContent === MESSAGES.zh.waitImport) {
|
|
2219
|
+
this.hint.textContent = t.waitImport;
|
|
2220
|
+
}
|
|
1543
2221
|
}
|
|
1544
2222
|
};
|
|
1545
2223
|
function defineDocsWordElement() {
|
|
@@ -1548,62 +2226,74 @@ function defineDocsWordElement() {
|
|
|
1548
2226
|
}
|
|
1549
2227
|
}
|
|
1550
2228
|
|
|
1551
|
-
// src/
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
(
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
2229
|
+
// src/lib/semanticStats.ts
|
|
2230
|
+
function countElements(root, selector) {
|
|
2231
|
+
return root.querySelectorAll(selector).length;
|
|
2232
|
+
}
|
|
2233
|
+
function isListLikeParagraph(p) {
|
|
2234
|
+
if (p.hasAttribute("data-word-list")) return true;
|
|
2235
|
+
if (p.querySelector("span.__word-list-marker")) return true;
|
|
2236
|
+
const style = (p.getAttribute("style") ?? "").toLowerCase();
|
|
2237
|
+
return style.includes("mso-list");
|
|
2238
|
+
}
|
|
2239
|
+
function collectSemanticStatsFromDocument(doc) {
|
|
2240
|
+
const paragraphs = Array.from(doc.querySelectorAll("p"));
|
|
2241
|
+
const listParagraphCount = paragraphs.filter((p) => isListLikeParagraph(p)).length;
|
|
2242
|
+
const textCharCount = (doc.body.textContent ?? "").replace(/\s+/g, "").length;
|
|
2243
|
+
return {
|
|
2244
|
+
paragraphCount: paragraphs.length,
|
|
2245
|
+
headingCount: countElements(doc, "h1,h2,h3,h4,h5,h6"),
|
|
2246
|
+
tableCount: countElements(doc, "table"),
|
|
2247
|
+
tableCellCount: countElements(doc, "td,th"),
|
|
2248
|
+
imageCount: countElements(doc, "img"),
|
|
2249
|
+
anchorImageCount: countElements(doc, 'img[data-word-anchor="1"]'),
|
|
2250
|
+
wrappedImageCount: countElements(doc, "img[data-word-wrap]"),
|
|
2251
|
+
ommlCount: countElements(doc, "[data-word-omml]"),
|
|
2252
|
+
chartCount: countElements(doc, "[data-word-chart]"),
|
|
2253
|
+
smartArtCount: countElements(doc, "[data-word-smartart]"),
|
|
2254
|
+
listParagraphCount,
|
|
2255
|
+
commentRefCount: countElements(doc, "[data-word-comment-ref]"),
|
|
2256
|
+
revisionInsCount: countElements(doc, '[data-word-revision="ins"]'),
|
|
2257
|
+
revisionDelCount: countElements(doc, '[data-word-revision="del"]'),
|
|
2258
|
+
pageBreakCount: countElements(doc, "[data-word-page-break='1']"),
|
|
2259
|
+
pageSpacerCount: countElements(doc, "[data-word-page-spacer='1']"),
|
|
2260
|
+
textCharCount
|
|
2261
|
+
};
|
|
2262
|
+
}
|
|
2263
|
+
function collectSemanticStatsFromHtml(rawHtml) {
|
|
2264
|
+
const parser = new DOMParser();
|
|
2265
|
+
const doc = parser.parseFromString(rawHtml, "text/html");
|
|
2266
|
+
return collectSemanticStatsFromDocument(doc);
|
|
1575
2267
|
}
|
|
1576
2268
|
|
|
1577
|
-
// src/
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
return () => (0, import_vue.h)("docs-word-editor", { ref: elRef });
|
|
1600
|
-
}
|
|
1601
|
-
});
|
|
2269
|
+
// src/lib/fidelityScore.ts
|
|
2270
|
+
function ratioScore(actual, expected) {
|
|
2271
|
+
if (expected <= 0 && actual <= 0) return 1;
|
|
2272
|
+
if (expected <= 0 || actual < 0) return 0;
|
|
2273
|
+
const delta = Math.abs(actual - expected);
|
|
2274
|
+
const penalty = delta / expected;
|
|
2275
|
+
return Math.max(0, 1 - penalty);
|
|
2276
|
+
}
|
|
2277
|
+
function clamp01(v) {
|
|
2278
|
+
if (v < 0) return 0;
|
|
2279
|
+
if (v > 1) return 1;
|
|
2280
|
+
return v;
|
|
2281
|
+
}
|
|
2282
|
+
function calculateFidelityScore(expected, actual) {
|
|
2283
|
+
const structure = clamp01(
|
|
2284
|
+
(ratioScore(actual.paragraphCount, expected.paragraphCount) + ratioScore(actual.headingCount, expected.headingCount) + ratioScore(actual.tableCount, expected.tableCount) + ratioScore(actual.tableCellCount, expected.tableCellCount) + ratioScore(actual.imageCount, expected.imageCount) + ratioScore(actual.ommlCount, expected.ommlCount) + ratioScore(actual.chartCount, expected.chartCount) + ratioScore(actual.smartArtCount, expected.smartArtCount) + ratioScore(actual.listParagraphCount, expected.listParagraphCount)) / 9
|
|
2285
|
+
);
|
|
2286
|
+
const styleProxy = clamp01(ratioScore(actual.textCharCount, expected.textCharCount));
|
|
2287
|
+
const pagination = clamp01(ratioScore(actual.pageSpacerCount, expected.pageSpacerCount));
|
|
2288
|
+
const overall = clamp01(structure * 0.6 + styleProxy * 0.25 + pagination * 0.15);
|
|
2289
|
+
return { structure, styleProxy, pagination, overall };
|
|
2290
|
+
}
|
|
1602
2291
|
// Annotate the CommonJS export names for ESM import in node:
|
|
1603
2292
|
0 && (module.exports = {
|
|
1604
2293
|
DocsWordElement,
|
|
1605
|
-
|
|
1606
|
-
|
|
2294
|
+
calculateFidelityScore,
|
|
2295
|
+
collectSemanticStatsFromDocument,
|
|
2296
|
+
collectSemanticStatsFromHtml,
|
|
1607
2297
|
defineDocsWordElement
|
|
1608
2298
|
});
|
|
1609
2299
|
//# sourceMappingURL=index.cjs.map
|