@coding01/docsjs 0.1.3 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -21
- package/README.zh-CN.md +74 -20
- package/dist/{chunk-PRPDJOB7.js → chunk-632UOG2B.js} +448 -102
- package/dist/chunk-632UOG2B.js.map +1 -0
- package/dist/index.cjs +452 -103
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +4 -1
- package/dist/index.d.ts +4 -1
- package/dist/index.js +11 -4
- package/dist/index.js.map +1 -1
- package/dist/react.cjs +449 -102
- package/dist/react.cjs.map +1 -1
- package/dist/react.d.cts +1 -1
- package/dist/react.d.ts +1 -1
- package/dist/react.js +12 -3
- package/dist/react.js.map +1 -1
- package/dist/types-VvdwVF0_.d.cts +44 -0
- package/dist/types-VvdwVF0_.d.ts +44 -0
- package/dist/vue.cjs +438 -100
- package/dist/vue.cjs.map +1 -1
- package/dist/vue.d.cts +1 -1
- package/dist/vue.d.ts +1 -1
- package/dist/vue.js +1 -1
- package/package.json +3 -1
- package/dist/chunk-PRPDJOB7.js.map +0 -1
- package/dist/types-DF14w1ol.d.cts +0 -20
- package/dist/types-DF14w1ol.d.ts +0 -20
package/dist/index.cjs
CHANGED
|
@@ -34,7 +34,9 @@ __export(index_exports, {
|
|
|
34
34
|
calculateFidelityScore: () => calculateFidelityScore,
|
|
35
35
|
collectSemanticStatsFromDocument: () => collectSemanticStatsFromDocument,
|
|
36
36
|
collectSemanticStatsFromHtml: () => collectSemanticStatsFromHtml,
|
|
37
|
-
defineDocsWordElement: () => defineDocsWordElement
|
|
37
|
+
defineDocsWordElement: () => defineDocsWordElement,
|
|
38
|
+
parseDocxToHtmlSnapshot: () => parseDocxToHtmlSnapshot,
|
|
39
|
+
parseDocxToHtmlSnapshotWithReport: () => parseDocxToHtmlSnapshotWithReport
|
|
38
40
|
});
|
|
39
41
|
module.exports = __toCommonJS(index_exports);
|
|
40
42
|
|
|
@@ -57,6 +59,21 @@ function buildHtmlSnapshot(rawHtml) {
|
|
|
57
59
|
|
|
58
60
|
// src/lib/docxHtml.ts
|
|
59
61
|
var import_jszip = __toESM(require("jszip"), 1);
|
|
62
|
+
function createEmptyFeatureCounts() {
|
|
63
|
+
return {
|
|
64
|
+
hyperlinkCount: 0,
|
|
65
|
+
anchorImageCount: 0,
|
|
66
|
+
chartCount: 0,
|
|
67
|
+
smartArtCount: 0,
|
|
68
|
+
ommlCount: 0,
|
|
69
|
+
tableCount: 0,
|
|
70
|
+
footnoteRefCount: 0,
|
|
71
|
+
endnoteRefCount: 0,
|
|
72
|
+
commentRefCount: 0,
|
|
73
|
+
revisionCount: 0,
|
|
74
|
+
pageBreakCount: 0
|
|
75
|
+
};
|
|
76
|
+
}
|
|
60
77
|
function escapeHtml(text) {
|
|
61
78
|
return text.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll('"', """);
|
|
62
79
|
}
|
|
@@ -80,6 +97,9 @@ function getAttr(node, name) {
|
|
|
80
97
|
function emuToPx(emu) {
|
|
81
98
|
return emu * 96 / 914400;
|
|
82
99
|
}
|
|
100
|
+
function twipToPx(twip) {
|
|
101
|
+
return twip * 96 / 1440;
|
|
102
|
+
}
|
|
83
103
|
function parseDrawingSizePx(drawing) {
|
|
84
104
|
const extentNode = queryAllByLocalName(drawing, "extent").find((node) => {
|
|
85
105
|
const parent = node.parentElement;
|
|
@@ -108,9 +128,7 @@ function imageDimensionAttributes(sizePx) {
|
|
|
108
128
|
}
|
|
109
129
|
return attrs.length > 0 ? ` ${attrs.join(" ")}` : "";
|
|
110
130
|
}
|
|
111
|
-
function parseAnchorPositionPx(
|
|
112
|
-
const anchor = directChildrenByLocalName(drawing, "anchor")[0] ?? null;
|
|
113
|
-
if (!anchor) return { leftPx: null, topPx: null };
|
|
131
|
+
function parseAnchorPositionPx(anchor) {
|
|
114
132
|
let leftPx = null;
|
|
115
133
|
let topPx = null;
|
|
116
134
|
const positionH = directChildrenByLocalName(anchor, "positionH")[0] ?? null;
|
|
@@ -125,34 +143,80 @@ function parseAnchorPositionPx(drawing) {
|
|
|
125
143
|
if (Number.isFinite(top)) topPx = emuToPx(top);
|
|
126
144
|
return { leftPx, topPx };
|
|
127
145
|
}
|
|
128
|
-
function parseAnchorWrapMode(
|
|
129
|
-
const anchor = directChildrenByLocalName(drawing, "anchor")[0] ?? null;
|
|
130
|
-
if (!anchor) return null;
|
|
146
|
+
function parseAnchorWrapMode(anchor) {
|
|
131
147
|
if (directChildrenByLocalName(anchor, "wrapSquare")[0]) return "square";
|
|
132
148
|
if (directChildrenByLocalName(anchor, "wrapTight")[0]) return "tight";
|
|
133
149
|
if (directChildrenByLocalName(anchor, "wrapTopAndBottom")[0]) return "topAndBottom";
|
|
134
150
|
if (directChildrenByLocalName(anchor, "wrapNone")[0]) return "none";
|
|
135
151
|
return null;
|
|
136
152
|
}
|
|
137
|
-
function
|
|
138
|
-
|
|
153
|
+
function parseAnchorMeta(drawing) {
|
|
154
|
+
const anchor = directChildrenByLocalName(drawing, "anchor")[0] ?? null;
|
|
155
|
+
if (!anchor) return null;
|
|
156
|
+
const positionH = directChildrenByLocalName(anchor, "positionH")[0] ?? null;
|
|
157
|
+
const positionV = directChildrenByLocalName(anchor, "positionV")[0] ?? null;
|
|
158
|
+
const relativeFromH = getAttr(positionH, "relativeFrom");
|
|
159
|
+
const relativeFromV = getAttr(positionV, "relativeFrom");
|
|
160
|
+
const parseDistPx = (name) => {
|
|
161
|
+
const raw = getAttr(anchor, name);
|
|
162
|
+
const emu = raw ? Number.parseInt(raw, 10) : Number.NaN;
|
|
163
|
+
return Number.isFinite(emu) && emu >= 0 ? emuToPx(emu) : null;
|
|
164
|
+
};
|
|
165
|
+
const rawHeight = getAttr(anchor, "relativeHeight");
|
|
166
|
+
const parsedHeight = rawHeight ? Number.parseInt(rawHeight, 10) : Number.NaN;
|
|
167
|
+
const boolAttr = (name, fallback) => {
|
|
168
|
+
const raw = (getAttr(anchor, name) ?? "").toLowerCase();
|
|
169
|
+
if (raw === "1" || raw === "true" || raw === "on") return true;
|
|
170
|
+
if (raw === "0" || raw === "false" || raw === "off") return false;
|
|
171
|
+
return fallback;
|
|
172
|
+
};
|
|
173
|
+
return {
|
|
174
|
+
position: parseAnchorPositionPx(anchor),
|
|
175
|
+
wrapMode: parseAnchorWrapMode(anchor),
|
|
176
|
+
distTPx: parseDistPx("distT"),
|
|
177
|
+
distBPx: parseDistPx("distB"),
|
|
178
|
+
distLPx: parseDistPx("distL"),
|
|
179
|
+
distRPx: parseDistPx("distR"),
|
|
180
|
+
relativeFromH,
|
|
181
|
+
relativeFromV,
|
|
182
|
+
behindDoc: boolAttr("behindDoc", false),
|
|
183
|
+
allowOverlap: boolAttr("allowOverlap", true),
|
|
184
|
+
layoutInCell: boolAttr("layoutInCell", true),
|
|
185
|
+
relativeHeight: Number.isFinite(parsedHeight) ? parsedHeight : null
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
function mergeImageStyle(baseAttrs, anchorMeta) {
|
|
189
|
+
if (!anchorMeta) return baseAttrs;
|
|
190
|
+
const { position, wrapMode } = anchorMeta;
|
|
191
|
+
if (position.leftPx === null && position.topPx === null) return baseAttrs;
|
|
139
192
|
const styleParts = [
|
|
140
193
|
"position:absolute",
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
194
|
+
position.leftPx !== null ? `left:${position.leftPx.toFixed(2)}px` : "",
|
|
195
|
+
position.topPx !== null ? `top:${position.topPx.toFixed(2)}px` : "",
|
|
196
|
+
`z-index:${anchorMeta.behindDoc ? 0 : anchorMeta.relativeHeight ?? 3}`,
|
|
197
|
+
anchorMeta.distTPx !== null ? `margin-top:${anchorMeta.distTPx.toFixed(2)}px` : "",
|
|
198
|
+
anchorMeta.distBPx !== null ? `margin-bottom:${anchorMeta.distBPx.toFixed(2)}px` : "",
|
|
199
|
+
anchorMeta.distLPx !== null ? `margin-left:${anchorMeta.distLPx.toFixed(2)}px` : "",
|
|
200
|
+
anchorMeta.distRPx !== null ? `margin-right:${anchorMeta.distRPx.toFixed(2)}px` : ""
|
|
144
201
|
].filter((x) => x.length > 0);
|
|
145
202
|
if (wrapMode === "topAndBottom") {
|
|
146
|
-
styleParts.push("display:block");
|
|
147
|
-
}
|
|
203
|
+
styleParts.push("display:block", "clear:both");
|
|
204
|
+
}
|
|
205
|
+
const anchorAttrs = [
|
|
206
|
+
`data-word-anchor="1"`,
|
|
207
|
+
wrapMode ? `data-word-wrap="${wrapMode}"` : "",
|
|
208
|
+
anchorMeta.relativeFromH ? `data-word-anchor-relh="${escapeHtml(anchorMeta.relativeFromH)}"` : "",
|
|
209
|
+
anchorMeta.relativeFromV ? `data-word-anchor-relv="${escapeHtml(anchorMeta.relativeFromV)}"` : "",
|
|
210
|
+
anchorMeta.behindDoc ? `data-word-anchor-behind="1"` : `data-word-anchor-behind="0"`,
|
|
211
|
+
anchorMeta.allowOverlap ? `data-word-anchor-overlap="1"` : `data-word-anchor-overlap="0"`,
|
|
212
|
+
anchorMeta.layoutInCell ? `data-word-anchor-layout-cell="1"` : `data-word-anchor-layout-cell="0"`
|
|
213
|
+
].filter((x) => x.length > 0).join(" ");
|
|
148
214
|
if (!baseAttrs.includes("style=")) {
|
|
149
|
-
|
|
150
|
-
return `${baseAttrs} style="${styleParts.join(";")}" data-word-anchor="1"${wrapAttr}`;
|
|
215
|
+
return `${baseAttrs} style="${styleParts.join(";")}" ${anchorAttrs}`;
|
|
151
216
|
}
|
|
152
217
|
return baseAttrs.replace(/style="([^"]*)"/, (_m, styleText) => {
|
|
153
218
|
const merged = [styleText, ...styleParts].filter((x) => x.length > 0).join(";");
|
|
154
|
-
|
|
155
|
-
return `style="${merged}" data-word-anchor="1"${wrapAttr}`;
|
|
219
|
+
return `style="${merged}" ${anchorAttrs}`;
|
|
156
220
|
});
|
|
157
221
|
}
|
|
158
222
|
function parseDocRelsMap(relsXmlText) {
|
|
@@ -178,11 +242,29 @@ function extToMime(ext) {
|
|
|
178
242
|
if (lower === "svg") return "image/svg+xml";
|
|
179
243
|
return "application/octet-stream";
|
|
180
244
|
}
|
|
245
|
+
function normalizeWordPath(relTarget) {
|
|
246
|
+
const normalized = relTarget.replace(/\\/g, "/").replace(/^\/+/, "");
|
|
247
|
+
if (normalized.startsWith("word/")) return normalized;
|
|
248
|
+
if (normalized.startsWith("../")) return `word/${normalized.replace(/^(\.\.\/)+/, "")}`;
|
|
249
|
+
return `word/${normalized}`;
|
|
250
|
+
}
|
|
251
|
+
function resolveHyperlinkHref(relMap, rid, anchor) {
|
|
252
|
+
if (anchor && anchor.trim()) return `#${encodeURIComponent(anchor.trim())}`;
|
|
253
|
+
if (!rid) return null;
|
|
254
|
+
const relTarget = relMap[rid];
|
|
255
|
+
if (!relTarget) return null;
|
|
256
|
+
const trimmed = relTarget.trim();
|
|
257
|
+
if (!trimmed) return null;
|
|
258
|
+
const lower = trimmed.toLowerCase();
|
|
259
|
+
if (lower.startsWith("http://") || lower.startsWith("https://") || lower.startsWith("mailto:") || lower.startsWith("tel:")) {
|
|
260
|
+
return trimmed;
|
|
261
|
+
}
|
|
262
|
+
return trimmed.startsWith("#") ? trimmed : `#${encodeURIComponent(trimmed)}`;
|
|
263
|
+
}
|
|
181
264
|
async function imageRidToDataUrl(zip, relMap, rid) {
|
|
182
265
|
const relTarget = relMap[rid];
|
|
183
266
|
if (!relTarget) return null;
|
|
184
|
-
const
|
|
185
|
-
const path = normalized.startsWith("word/") ? normalized : `word/${normalized}`;
|
|
267
|
+
const path = normalizeWordPath(relTarget);
|
|
186
268
|
const file = zip.file(path);
|
|
187
269
|
if (!file) return null;
|
|
188
270
|
const base64 = await file.async("base64");
|
|
@@ -190,6 +272,55 @@ async function imageRidToDataUrl(zip, relMap, rid) {
|
|
|
190
272
|
const mime = extToMime(ext);
|
|
191
273
|
return `data:${mime};base64,${base64}`;
|
|
192
274
|
}
|
|
275
|
+
async function readXmlByRid(zip, relMap, rid) {
|
|
276
|
+
const relTarget = relMap[rid];
|
|
277
|
+
if (!relTarget) return null;
|
|
278
|
+
const path = normalizeWordPath(relTarget);
|
|
279
|
+
const file = zip.file(path);
|
|
280
|
+
return file ? file.async("string") : null;
|
|
281
|
+
}
|
|
282
|
+
function parseChartType(chartDoc) {
|
|
283
|
+
const known = ["barChart", "lineChart", "pieChart", "areaChart", "scatterChart", "radarChart", "doughnutChart"];
|
|
284
|
+
for (const type of known) {
|
|
285
|
+
if (queryByLocalName(chartDoc, type)) return type.replace(/Chart$/, "");
|
|
286
|
+
}
|
|
287
|
+
return "unknown";
|
|
288
|
+
}
|
|
289
|
+
function parseChartSummary(chartXmlText) {
|
|
290
|
+
const chartDoc = parseXml(chartXmlText);
|
|
291
|
+
const title = queryAllByLocalName(chartDoc, "t").map((n) => (n.textContent ?? "").trim()).find((v) => v.length > 0) ?? "Chart";
|
|
292
|
+
const seriesCount = queryAllByLocalName(chartDoc, "ser").length;
|
|
293
|
+
const pointCount = queryAllByLocalName(chartDoc, "pt").length;
|
|
294
|
+
const type = parseChartType(chartDoc);
|
|
295
|
+
return { title, type, seriesCount, pointCount };
|
|
296
|
+
}
|
|
297
|
+
function extractSmartArtText(diagramXmlText) {
|
|
298
|
+
const diagramDoc = parseXml(diagramXmlText);
|
|
299
|
+
return queryAllByLocalName(diagramDoc, "t").map((n) => (n.textContent ?? "").trim()).filter((v) => v.length > 0).slice(0, 12);
|
|
300
|
+
}
|
|
301
|
+
function ommlNodeToText(node) {
|
|
302
|
+
if (node.localName === "t") return node.textContent ?? "";
|
|
303
|
+
if (node.localName === "f") {
|
|
304
|
+
const num = queryByLocalName(node, "num");
|
|
305
|
+
const den = queryByLocalName(node, "den");
|
|
306
|
+
return `(${num ? ommlNodeToText(num) : "?"})/(${den ? ommlNodeToText(den) : "?"})`;
|
|
307
|
+
}
|
|
308
|
+
if (node.localName === "sSup") {
|
|
309
|
+
const e = queryByLocalName(node, "e");
|
|
310
|
+
const sup = queryByLocalName(node, "sup");
|
|
311
|
+
return `${e ? ommlNodeToText(e) : ""}^(${sup ? ommlNodeToText(sup) : ""})`;
|
|
312
|
+
}
|
|
313
|
+
if (node.localName === "sSub") {
|
|
314
|
+
const e = queryByLocalName(node, "e");
|
|
315
|
+
const sub = queryByLocalName(node, "sub");
|
|
316
|
+
return `${e ? ommlNodeToText(e) : ""}_(${sub ? ommlNodeToText(sub) : ""})`;
|
|
317
|
+
}
|
|
318
|
+
if (node.localName === "rad") {
|
|
319
|
+
const e = queryByLocalName(node, "e");
|
|
320
|
+
return `sqrt(${e ? ommlNodeToText(e) : ""})`;
|
|
321
|
+
}
|
|
322
|
+
return Array.from(node.children).map((child) => ommlNodeToText(child)).join("");
|
|
323
|
+
}
|
|
193
324
|
function runStyleToCss(rPr) {
|
|
194
325
|
if (!rPr) return "";
|
|
195
326
|
const declarations = [];
|
|
@@ -304,48 +435,73 @@ function renderEndnotesSection(usedIds, endnotesMap) {
|
|
|
304
435
|
const items = uniq.map((id) => `<li id="word-endnote-${id}" data-word-endnote-id="${id}">${endnotesMap[id]}</li>`).join("");
|
|
305
436
|
return `<section data-word-endnotes="1"><hr/><ol>${items}</ol></section>`;
|
|
306
437
|
}
|
|
307
|
-
async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotesMap, usedFootnoteIds, endnotesMap, usedEndnoteIds, commentsMap, usedCommentIds) {
|
|
438
|
+
async function paragraphToHtml(zip, relMap, context, paragraph, paragraphIndex, footnotesMap, usedFootnoteIds, endnotesMap, usedEndnoteIds, commentsMap, usedCommentIds) {
|
|
308
439
|
const tag = paragraphTag(paragraph);
|
|
309
440
|
const alignStyle = paragraphAlignStyle(paragraph);
|
|
310
441
|
const dataAttr = paragraphDataAttr(paragraphIndex);
|
|
311
|
-
const
|
|
312
|
-
if (
|
|
442
|
+
const hasRenderableNode = queryAllByLocalName(paragraph, "r").length > 0 || queryAllByLocalName(paragraph, "oMath").length > 0 || queryAllByLocalName(paragraph, "oMathPara").length > 0;
|
|
443
|
+
if (!hasRenderableNode) {
|
|
313
444
|
return `<${tag}${dataAttr}${alignStyle ? ` style="${alignStyle}"` : ""}><br/></${tag}>`;
|
|
314
445
|
}
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
446
|
+
function parseRevisionMeta(node, type) {
|
|
447
|
+
return {
|
|
448
|
+
type,
|
|
449
|
+
id: getAttr(node, "w:id") ?? getAttr(node, "id"),
|
|
450
|
+
author: getAttr(node, "w:author") ?? getAttr(node, "author"),
|
|
451
|
+
date: getAttr(node, "w:date") ?? getAttr(node, "date")
|
|
452
|
+
};
|
|
453
|
+
}
|
|
454
|
+
function inferRevisionMeta(run, fallback) {
|
|
455
|
+
if (fallback) return fallback;
|
|
456
|
+
let cursor = run;
|
|
457
|
+
while (cursor) {
|
|
458
|
+
if (cursor.localName === "ins") return parseRevisionMeta(cursor, "ins");
|
|
459
|
+
if (cursor.localName === "del") return parseRevisionMeta(cursor, "del");
|
|
460
|
+
if (cursor.localName === "p") break;
|
|
461
|
+
cursor = cursor.parentElement;
|
|
462
|
+
}
|
|
463
|
+
return null;
|
|
464
|
+
}
|
|
465
|
+
function revisionMetaAttrs(meta) {
|
|
466
|
+
const attrs = [`data-word-revision="${meta.type}"`];
|
|
467
|
+
if (meta.id) attrs.push(`data-word-revision-id="${escapeHtml(meta.id)}"`);
|
|
468
|
+
if (meta.author) attrs.push(`data-word-revision-author="${escapeHtml(meta.author)}"`);
|
|
469
|
+
if (meta.date) attrs.push(`data-word-revision-date="${escapeHtml(meta.date)}"`);
|
|
470
|
+
return attrs.join(" ");
|
|
319
471
|
}
|
|
320
|
-
|
|
472
|
+
async function runToHtml(run, revisionFallback) {
|
|
473
|
+
const result = [];
|
|
321
474
|
const rPr = queryByLocalName(run, "rPr");
|
|
322
475
|
const css = runStyleToCss(rPr);
|
|
323
476
|
const footnoteRef = queryByLocalName(run, "footnoteReference");
|
|
324
477
|
const footnoteId = getAttr(footnoteRef, "w:id") ?? getAttr(footnoteRef, "id");
|
|
325
478
|
if (footnoteId && footnotesMap[footnoteId]) {
|
|
479
|
+
context.features.footnoteRefCount += 1;
|
|
326
480
|
usedFootnoteIds.push(footnoteId);
|
|
327
|
-
|
|
481
|
+
result.push(
|
|
328
482
|
`<sup data-word-footnote-ref="${footnoteId}"><a href="#word-footnote-${footnoteId}">[${footnoteId}]</a></sup>`
|
|
329
483
|
);
|
|
330
|
-
|
|
484
|
+
return result;
|
|
331
485
|
}
|
|
332
486
|
const endnoteRef = queryByLocalName(run, "endnoteReference");
|
|
333
487
|
const endnoteId = getAttr(endnoteRef, "w:id") ?? getAttr(endnoteRef, "id");
|
|
334
488
|
if (endnoteId && endnotesMap[endnoteId]) {
|
|
489
|
+
context.features.endnoteRefCount += 1;
|
|
335
490
|
usedEndnoteIds.push(endnoteId);
|
|
336
|
-
|
|
491
|
+
result.push(
|
|
337
492
|
`<sup data-word-endnote-ref="${endnoteId}"><a href="#word-endnote-${endnoteId}">[${endnoteId}]</a></sup>`
|
|
338
493
|
);
|
|
339
|
-
|
|
494
|
+
return result;
|
|
340
495
|
}
|
|
341
496
|
const commentRef = queryByLocalName(run, "commentReference");
|
|
342
497
|
const commentId = getAttr(commentRef, "w:id") ?? getAttr(commentRef, "id");
|
|
343
498
|
if (commentId && commentsMap[commentId]) {
|
|
499
|
+
context.features.commentRefCount += 1;
|
|
344
500
|
usedCommentIds.push(commentId);
|
|
345
|
-
|
|
501
|
+
result.push(
|
|
346
502
|
`<sup data-word-comment-ref="${commentId}"><a href="#word-comment-${commentId}">[c${commentId}]</a></sup>`
|
|
347
503
|
);
|
|
348
|
-
|
|
504
|
+
return result;
|
|
349
505
|
}
|
|
350
506
|
const drawing = queryByLocalName(run, "drawing");
|
|
351
507
|
if (drawing) {
|
|
@@ -356,13 +512,38 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
356
512
|
if (src) {
|
|
357
513
|
const imageSize = parseDrawingSizePx(drawing);
|
|
358
514
|
const dimensionAttrs = imageDimensionAttributes(imageSize);
|
|
359
|
-
const
|
|
360
|
-
const
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
515
|
+
const anchorMeta = parseAnchorMeta(drawing);
|
|
516
|
+
const attrs = mergeImageStyle(dimensionAttrs, anchorMeta);
|
|
517
|
+
if (anchorMeta) context.features.anchorImageCount += 1;
|
|
518
|
+
result.push(`<img src="${src}" alt="word-image"${attrs}/>`);
|
|
519
|
+
return result;
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
const chartRef = queryByLocalName(drawing, "chart");
|
|
523
|
+
const chartRid = getAttr(chartRef, "r:id") ?? getAttr(chartRef, "id");
|
|
524
|
+
if (chartRid) {
|
|
525
|
+
const chartXmlText = await readXmlByRid(zip, relMap, chartRid);
|
|
526
|
+
if (chartXmlText) {
|
|
527
|
+
const summary = parseChartSummary(chartXmlText);
|
|
528
|
+
context.features.chartCount += 1;
|
|
529
|
+
result.push(
|
|
530
|
+
`<figure data-word-chart="1" data-word-chart-type="${summary.type}" data-word-chart-series="${summary.seriesCount}" data-word-chart-points="${summary.pointCount}"><figcaption>${escapeHtml(summary.title)}</figcaption><div>Chart(${escapeHtml(summary.type)}): series=${summary.seriesCount}, points=${summary.pointCount}</div></figure>`
|
|
531
|
+
);
|
|
532
|
+
return result;
|
|
364
533
|
}
|
|
365
534
|
}
|
|
535
|
+
const smartArtRef = queryByLocalName(drawing, "relIds");
|
|
536
|
+
const smartArtRid = getAttr(smartArtRef, "r:dm") ?? getAttr(smartArtRef, "dm");
|
|
537
|
+
if (smartArtRid) {
|
|
538
|
+
const diagramXmlText = await readXmlByRid(zip, relMap, smartArtRid);
|
|
539
|
+
const textItems = diagramXmlText ? extractSmartArtText(diagramXmlText) : [];
|
|
540
|
+
context.features.smartArtCount += 1;
|
|
541
|
+
const preview = textItems.length > 0 ? `: ${escapeHtml(textItems.join(" / "))}` : "";
|
|
542
|
+
result.push(
|
|
543
|
+
`<figure data-word-smartart="1" data-word-smartart-items="${textItems.length}"><figcaption>SmartArt fallback${preview}</figcaption></figure>`
|
|
544
|
+
);
|
|
545
|
+
return result;
|
|
546
|
+
}
|
|
366
547
|
}
|
|
367
548
|
const texts = queryAllByLocalName(run, "t").map((t) => t.textContent ?? "").join("");
|
|
368
549
|
const delTexts = queryAllByLocalName(run, "delText").map((t) => t.textContent ?? "").join("");
|
|
@@ -373,40 +554,86 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
373
554
|
}).length;
|
|
374
555
|
const lineBreakCount = Math.max(0, brNodes.length - pageBreakCount);
|
|
375
556
|
const runText2 = `${escapeHtml(texts || delTexts)}${"<br/>".repeat(lineBreakCount)}`;
|
|
376
|
-
if (
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
557
|
+
if (runText2) {
|
|
558
|
+
const revisionMeta = inferRevisionMeta(run, revisionFallback);
|
|
559
|
+
if (css) {
|
|
560
|
+
const span = `<span style="${css}">${runText2}</span>`;
|
|
561
|
+
if (revisionMeta) {
|
|
562
|
+
context.features.revisionCount += 1;
|
|
563
|
+
const tagName = revisionMeta.type === "ins" ? "ins" : "del";
|
|
564
|
+
result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${span}</${tagName}>`);
|
|
565
|
+
} else {
|
|
566
|
+
result.push(span);
|
|
567
|
+
}
|
|
568
|
+
} else if (revisionMeta) {
|
|
569
|
+
context.features.revisionCount += 1;
|
|
570
|
+
const tagName = revisionMeta.type === "ins" ? "ins" : "del";
|
|
571
|
+
result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${runText2}</${tagName}>`);
|
|
572
|
+
} else {
|
|
573
|
+
result.push(runText2);
|
|
387
574
|
}
|
|
388
|
-
if (cursor.localName === "p") break;
|
|
389
|
-
cursor = cursor.parentElement;
|
|
390
575
|
}
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
576
|
+
for (let i = 0; i < pageBreakCount; i += 1) {
|
|
577
|
+
context.features.pageBreakCount += 1;
|
|
578
|
+
result.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
|
|
579
|
+
}
|
|
580
|
+
return result;
|
|
581
|
+
}
|
|
582
|
+
async function nodeToHtml(node, revisionFallback) {
|
|
583
|
+
if (node.localName === "commentRangeStart") {
|
|
584
|
+
const id = getAttr(node, "w:id") ?? getAttr(node, "id");
|
|
585
|
+
return id ? [`<span data-word-comment-range-start="${id}"></span>`] : [];
|
|
586
|
+
}
|
|
587
|
+
if (node.localName === "commentRangeEnd") {
|
|
588
|
+
const id = getAttr(node, "w:id") ?? getAttr(node, "id");
|
|
589
|
+
return id ? [`<span data-word-comment-range-end="${id}"></span>`] : [];
|
|
590
|
+
}
|
|
591
|
+
if (node.localName === "r") {
|
|
592
|
+
return runToHtml(node, revisionFallback);
|
|
593
|
+
}
|
|
594
|
+
if (node.localName === "hyperlink") {
|
|
595
|
+
const rid = getAttr(node, "r:id") ?? getAttr(node, "id");
|
|
596
|
+
const anchor = getAttr(node, "w:anchor") ?? getAttr(node, "anchor");
|
|
597
|
+
const href = resolveHyperlinkHref(relMap, rid, anchor);
|
|
598
|
+
const nested2 = [];
|
|
599
|
+
for (const child of Array.from(node.children)) {
|
|
600
|
+
nested2.push(...await nodeToHtml(child, revisionFallback));
|
|
398
601
|
}
|
|
399
|
-
|
|
400
|
-
if (
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
602
|
+
const content2 = nested2.join("") || escapeHtml(node.textContent ?? "");
|
|
603
|
+
if (!href) return content2 ? [content2] : [];
|
|
604
|
+
context.features.hyperlinkCount += 1;
|
|
605
|
+
return [
|
|
606
|
+
`<a data-word-hyperlink="1" href="${escapeHtml(href)}" rel="noreferrer noopener" target="_blank">${content2}</a>`
|
|
607
|
+
];
|
|
608
|
+
}
|
|
609
|
+
if (node.localName === "oMath" || node.localName === "oMathPara") {
|
|
610
|
+
const linear = ommlNodeToText(node).trim();
|
|
611
|
+
if (!linear) return [];
|
|
612
|
+
context.features.ommlCount += 1;
|
|
613
|
+
return [`<span data-word-omml="1">${escapeHtml(linear)}</span>`];
|
|
614
|
+
}
|
|
615
|
+
if (node.localName === "ins" || node.localName === "del") {
|
|
616
|
+
const scopedMeta = parseRevisionMeta(node, node.localName === "ins" ? "ins" : "del");
|
|
617
|
+
const nested2 = [];
|
|
618
|
+
for (const child of Array.from(node.children)) {
|
|
619
|
+
nested2.push(...await nodeToHtml(child, scopedMeta));
|
|
405
620
|
}
|
|
621
|
+
return nested2;
|
|
406
622
|
}
|
|
407
|
-
|
|
408
|
-
|
|
623
|
+
const nested = [];
|
|
624
|
+
for (const child of Array.from(node.children)) {
|
|
625
|
+
nested.push(...await nodeToHtml(child, revisionFallback));
|
|
409
626
|
}
|
|
627
|
+
return nested;
|
|
628
|
+
}
|
|
629
|
+
const parts = [];
|
|
630
|
+
const renderedPageBreakCount = queryAllByLocalName(paragraph, "lastRenderedPageBreak").length;
|
|
631
|
+
for (let i = 0; i < renderedPageBreakCount; i += 1) {
|
|
632
|
+
context.features.pageBreakCount += 1;
|
|
633
|
+
parts.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
|
|
634
|
+
}
|
|
635
|
+
for (const child of Array.from(paragraph.children)) {
|
|
636
|
+
parts.push(...await nodeToHtml(child, null));
|
|
410
637
|
}
|
|
411
638
|
const content = parts.join("") || "<br/>";
|
|
412
639
|
return `<${tag}${dataAttr}${alignStyle ? ` style="${alignStyle}"` : ""}>${content}</${tag}>`;
|
|
@@ -436,7 +663,102 @@ function parseTcVMerge(tc) {
|
|
|
436
663
|
const rawVal = (getAttr(vMerge, "w:val") ?? getAttr(vMerge, "val") ?? "continue").toLowerCase();
|
|
437
664
|
return rawVal === "restart" ? "restart" : "continue";
|
|
438
665
|
}
|
|
439
|
-
function
|
|
666
|
+
function parseTblGridWidthsPx(table) {
|
|
667
|
+
const grid = directChildrenByLocalName(table, "tblGrid")[0] ?? null;
|
|
668
|
+
if (!grid) return [];
|
|
669
|
+
return directChildrenByLocalName(grid, "gridCol").map((col) => {
|
|
670
|
+
const raw = getAttr(col, "w:w") ?? getAttr(col, "w");
|
|
671
|
+
const twip = raw ? Number.parseInt(raw, 10) : Number.NaN;
|
|
672
|
+
return Number.isFinite(twip) && twip > 0 ? twipToPx(twip) : 0;
|
|
673
|
+
}).filter((px) => px > 0);
|
|
674
|
+
}
|
|
675
|
+
function borderSizeToPx(size) {
|
|
676
|
+
return size / 6;
|
|
677
|
+
}
|
|
678
|
+
function parseBorderCss(borderNode) {
|
|
679
|
+
if (!borderNode) return null;
|
|
680
|
+
const val = (getAttr(borderNode, "w:val") ?? getAttr(borderNode, "val") ?? "").toLowerCase();
|
|
681
|
+
if (!val || val === "nil" || val === "none") return "none";
|
|
682
|
+
const color = (getAttr(borderNode, "w:color") ?? getAttr(borderNode, "color") ?? "222222").replace(/^#/, "");
|
|
683
|
+
const rawSize = getAttr(borderNode, "w:sz") ?? getAttr(borderNode, "sz");
|
|
684
|
+
const size = rawSize ? Number.parseInt(rawSize, 10) : Number.NaN;
|
|
685
|
+
const px = Number.isFinite(size) && size > 0 ? borderSizeToPx(size) : 1;
|
|
686
|
+
const style = val === "single" ? "solid" : val;
|
|
687
|
+
return `${px.toFixed(2)}px ${style} #${color}`;
|
|
688
|
+
}
|
|
689
|
+
function parseTableStyleProfile(table) {
|
|
690
|
+
const tblPr = directChildrenByLocalName(table, "tblPr")[0] ?? null;
|
|
691
|
+
const tblBorders = tblPr ? directChildrenByLocalName(tblPr, "tblBorders")[0] ?? null : null;
|
|
692
|
+
const layout = tblPr ? directChildrenByLocalName(tblPr, "tblLayout")[0] ?? null : null;
|
|
693
|
+
const spacing = tblPr ? directChildrenByLocalName(tblPr, "tblCellSpacing")[0] ?? null : null;
|
|
694
|
+
const spacingType = (getAttr(spacing, "w:type") ?? getAttr(spacing, "type") ?? "dxa").toLowerCase();
|
|
695
|
+
const spacingRaw = getAttr(spacing, "w:w") ?? getAttr(spacing, "w");
|
|
696
|
+
const spacingVal = spacingRaw ? Number.parseFloat(spacingRaw) : Number.NaN;
|
|
697
|
+
const borderSpacingPx = spacingType === "dxa" && Number.isFinite(spacingVal) && spacingVal > 0 ? twipToPx(spacingVal) : 0;
|
|
698
|
+
const borderCollapse = borderSpacingPx > 0 ? "separate" : "collapse";
|
|
699
|
+
const tableLayout = (getAttr(layout, "w:type") ?? getAttr(layout, "type") ?? "").toLowerCase() === "autofit" ? "auto" : "fixed";
|
|
700
|
+
const top = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "top")[0] ?? null : null);
|
|
701
|
+
const bottom = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "bottom")[0] ?? null : null);
|
|
702
|
+
const left = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "left")[0] ?? null : null);
|
|
703
|
+
const right = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "right")[0] ?? null : null);
|
|
704
|
+
const insideH = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "insideH")[0] ?? null : null);
|
|
705
|
+
const insideV = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "insideV")[0] ?? null : null);
|
|
706
|
+
const borderCss = top ?? right ?? bottom ?? left ?? "1px solid #222";
|
|
707
|
+
return {
|
|
708
|
+
tableLayout,
|
|
709
|
+
borderCollapse,
|
|
710
|
+
borderSpacingPx,
|
|
711
|
+
borderCss,
|
|
712
|
+
insideHCss: insideH,
|
|
713
|
+
insideVCss: insideV
|
|
714
|
+
};
|
|
715
|
+
}
|
|
716
|
+
function parseTableWidthStyle(table, gridWidthsPx) {
|
|
717
|
+
const tblPr = directChildrenByLocalName(table, "tblPr")[0] ?? null;
|
|
718
|
+
const tblW = tblPr ? directChildrenByLocalName(tblPr, "tblW")[0] ?? null : null;
|
|
719
|
+
const type = (getAttr(tblW, "w:type") ?? getAttr(tblW, "type") ?? "").toLowerCase();
|
|
720
|
+
const rawVal = getAttr(tblW, "w:w") ?? getAttr(tblW, "w");
|
|
721
|
+
const numericVal = rawVal ? Number.parseFloat(rawVal) : Number.NaN;
|
|
722
|
+
if (type === "dxa" && Number.isFinite(numericVal) && numericVal > 0) {
|
|
723
|
+
return `width:${twipToPx(numericVal).toFixed(2)}px`;
|
|
724
|
+
}
|
|
725
|
+
if (type === "pct" && Number.isFinite(numericVal) && numericVal > 0) {
|
|
726
|
+
return `width:${(numericVal / 50).toFixed(2)}%`;
|
|
727
|
+
}
|
|
728
|
+
const gridTotal = gridWidthsPx.reduce((sum, item) => sum + item, 0);
|
|
729
|
+
if (gridTotal > 0) return `width:${gridTotal.toFixed(2)}px;max-width:100%`;
|
|
730
|
+
return "width:100%";
|
|
731
|
+
}
|
|
732
|
+
function parseCellWidthStyle(cell, colCursor, colSpan, gridWidthsPx) {
|
|
733
|
+
const tcPr = directChildrenByLocalName(cell, "tcPr")[0] ?? null;
|
|
734
|
+
const tcW = tcPr ? directChildrenByLocalName(tcPr, "tcW")[0] ?? null : null;
|
|
735
|
+
const type = (getAttr(tcW, "w:type") ?? getAttr(tcW, "type") ?? "").toLowerCase();
|
|
736
|
+
const rawVal = getAttr(tcW, "w:w") ?? getAttr(tcW, "w");
|
|
737
|
+
const numericVal = rawVal ? Number.parseFloat(rawVal) : Number.NaN;
|
|
738
|
+
if (type === "dxa" && Number.isFinite(numericVal) && numericVal > 0) {
|
|
739
|
+
return `width:${twipToPx(numericVal).toFixed(2)}px`;
|
|
740
|
+
}
|
|
741
|
+
if (type === "pct" && Number.isFinite(numericVal) && numericVal > 0) {
|
|
742
|
+
return `width:${(numericVal / 50).toFixed(2)}%`;
|
|
743
|
+
}
|
|
744
|
+
const width = gridWidthsPx.slice(colCursor, colCursor + colSpan).reduce((sum, item) => sum + item, 0);
|
|
745
|
+
if (width > 0) return `width:${width.toFixed(2)}px`;
|
|
746
|
+
return "";
|
|
747
|
+
}
|
|
748
|
+
function parseCellBorderStyle(cell, tableStyle) {
|
|
749
|
+
const tcPr = directChildrenByLocalName(cell, "tcPr")[0] ?? null;
|
|
750
|
+
const tcBorders = tcPr ? directChildrenByLocalName(tcPr, "tcBorders")[0] ?? null : null;
|
|
751
|
+
if (!tcBorders) {
|
|
752
|
+
const fallback = tableStyle.insideHCss ?? tableStyle.insideVCss ?? tableStyle.borderCss;
|
|
753
|
+
return `border:${fallback}`;
|
|
754
|
+
}
|
|
755
|
+
const top = parseBorderCss(directChildrenByLocalName(tcBorders, "top")[0] ?? null) ?? tableStyle.insideHCss ?? tableStyle.borderCss;
|
|
756
|
+
const right = parseBorderCss(directChildrenByLocalName(tcBorders, "right")[0] ?? null) ?? tableStyle.insideVCss ?? tableStyle.borderCss;
|
|
757
|
+
const bottom = parseBorderCss(directChildrenByLocalName(tcBorders, "bottom")[0] ?? null) ?? tableStyle.insideHCss ?? tableStyle.borderCss;
|
|
758
|
+
const left = parseBorderCss(directChildrenByLocalName(tcBorders, "left")[0] ?? null) ?? tableStyle.insideVCss ?? tableStyle.borderCss;
|
|
759
|
+
return `border-top:${top};border-right:${right};border-bottom:${bottom};border-left:${left}`;
|
|
760
|
+
}
|
|
761
|
+
function tableCellHtml(cell, paragraphIndexMap, context) {
|
|
440
762
|
const blocks = [];
|
|
441
763
|
for (const child of Array.from(cell.children)) {
|
|
442
764
|
if (child.localName === "tcPr") continue;
|
|
@@ -446,7 +768,7 @@ function tableCellHtml(cell, paragraphIndexMap) {
|
|
|
446
768
|
continue;
|
|
447
769
|
}
|
|
448
770
|
if (child.localName === "tbl") {
|
|
449
|
-
blocks.push(tableToHtml(child, paragraphIndexMap));
|
|
771
|
+
blocks.push(tableToHtml(child, paragraphIndexMap, context));
|
|
450
772
|
continue;
|
|
451
773
|
}
|
|
452
774
|
}
|
|
@@ -454,8 +776,11 @@ function tableCellHtml(cell, paragraphIndexMap) {
|
|
|
454
776
|
const text = queryAllByLocalName(cell, "t").map((t) => t.textContent ?? "").join("").trim();
|
|
455
777
|
return escapeHtml(text) || "<br/>";
|
|
456
778
|
}
|
|
457
|
-
function tableToHtml(table, paragraphIndexMap) {
|
|
779
|
+
function tableToHtml(table, paragraphIndexMap, context) {
|
|
780
|
+
context.features.tableCount += 1;
|
|
458
781
|
const rows = directChildrenByLocalName(table, "tr");
|
|
782
|
+
const gridWidthsPx = parseTblGridWidthsPx(table);
|
|
783
|
+
const tableStyle = parseTableStyleProfile(table);
|
|
459
784
|
const activeByCol = /* @__PURE__ */ new Map();
|
|
460
785
|
const allOrigins = [];
|
|
461
786
|
let nextOriginId = 1;
|
|
@@ -480,8 +805,10 @@ function tableToHtml(table, paragraphIndexMap) {
|
|
|
480
805
|
while (activeByCol.has(colCursor)) {
|
|
481
806
|
colCursor += 1;
|
|
482
807
|
}
|
|
483
|
-
const html = tableCellHtml(cell, paragraphIndexMap);
|
|
808
|
+
const html = tableCellHtml(cell, paragraphIndexMap, context);
|
|
484
809
|
const attrs = [];
|
|
810
|
+
const widthStyle = parseCellWidthStyle(cell, colCursor, colSpan, gridWidthsPx);
|
|
811
|
+
const borderStyle = parseCellBorderStyle(cell, tableStyle);
|
|
485
812
|
if (vMerge === "restart") {
|
|
486
813
|
const origin = {
|
|
487
814
|
id: `m${nextOriginId}`,
|
|
@@ -499,7 +826,7 @@ function tableToHtml(table, paragraphIndexMap) {
|
|
|
499
826
|
}
|
|
500
827
|
if (colSpan > 1) attrs.push(`colspan="${colSpan}"`);
|
|
501
828
|
emittedCells.push(
|
|
502
|
-
`<td${attrs.length > 0 ? ` ${attrs.join(" ")}` : ""} style="
|
|
829
|
+
`<td${attrs.length > 0 ? ` ${attrs.join(" ")}` : ""} style="${borderStyle};vertical-align:top;${widthStyle}">${html}</td>`
|
|
503
830
|
);
|
|
504
831
|
colCursor += colSpan;
|
|
505
832
|
}
|
|
@@ -518,9 +845,13 @@ function tableToHtml(table, paragraphIndexMap) {
|
|
|
518
845
|
const replacement = origin.rowSpan > 1 ? `rowspan="${origin.rowSpan}"` : "";
|
|
519
846
|
merged = merged.replace(marker, replacement).replace(/\s{2,}/g, " ");
|
|
520
847
|
}
|
|
521
|
-
|
|
848
|
+
const tableWidthStyle = parseTableWidthStyle(table, gridWidthsPx);
|
|
849
|
+
const spacing = tableStyle.borderSpacingPx > 0 ? `border-spacing:${tableStyle.borderSpacingPx.toFixed(2)}px;` : "";
|
|
850
|
+
return `<table style="border-collapse:${tableStyle.borderCollapse};${spacing}table-layout:${tableStyle.tableLayout};${tableWidthStyle};border:${tableStyle.borderCss};">${merged}</table>`;
|
|
522
851
|
}
|
|
523
|
-
async function
|
|
852
|
+
async function parseDocxToHtmlSnapshotWithReport(file) {
|
|
853
|
+
const startedAt = Date.now();
|
|
854
|
+
const context = { features: createEmptyFeatureCounts() };
|
|
524
855
|
const maybeArrayBuffer = file.arrayBuffer;
|
|
525
856
|
const buffer = maybeArrayBuffer ? await maybeArrayBuffer.call(file) : await new Response(file).arrayBuffer();
|
|
526
857
|
const zip = await import_jszip.default.loadAsync(buffer);
|
|
@@ -557,6 +888,7 @@ async function parseDocxToHtmlSnapshot(file) {
|
|
|
557
888
|
await paragraphToHtml(
|
|
558
889
|
zip,
|
|
559
890
|
relMap,
|
|
891
|
+
context,
|
|
560
892
|
child,
|
|
561
893
|
paragraphIndex,
|
|
562
894
|
footnotesMap,
|
|
@@ -570,14 +902,24 @@ async function parseDocxToHtmlSnapshot(file) {
|
|
|
570
902
|
continue;
|
|
571
903
|
}
|
|
572
904
|
if (child.localName === "tbl") {
|
|
573
|
-
blockHtml.push(tableToHtml(child, paragraphIndexMap));
|
|
905
|
+
blockHtml.push(tableToHtml(child, paragraphIndexMap, context));
|
|
574
906
|
continue;
|
|
575
907
|
}
|
|
576
908
|
}
|
|
577
909
|
blockHtml.push(renderFootnotesSection(usedFootnoteIds, footnotesMap));
|
|
578
910
|
blockHtml.push(renderEndnotesSection(usedEndnoteIds, endnotesMap));
|
|
579
911
|
blockHtml.push(renderCommentsSection(usedCommentIds, commentsMap));
|
|
580
|
-
return
|
|
912
|
+
return {
|
|
913
|
+
htmlSnapshot: buildHtmlSnapshot(blockHtml.join("\n")),
|
|
914
|
+
report: {
|
|
915
|
+
elapsedMs: Date.now() - startedAt,
|
|
916
|
+
features: context.features
|
|
917
|
+
}
|
|
918
|
+
};
|
|
919
|
+
}
|
|
920
|
+
async function parseDocxToHtmlSnapshot(file) {
|
|
921
|
+
const result = await parseDocxToHtmlSnapshotWithReport(file);
|
|
922
|
+
return result.htmlSnapshot;
|
|
581
923
|
}
|
|
582
924
|
|
|
583
925
|
// src/lib/pastePipeline.ts
|
|
@@ -808,7 +1150,7 @@ function createFallbackWordStyleProfile(sourceFileName = "snapshot") {
|
|
|
808
1150
|
paragraphProfiles: []
|
|
809
1151
|
};
|
|
810
1152
|
}
|
|
811
|
-
function
|
|
1153
|
+
function twipToPx2(twip) {
|
|
812
1154
|
return twip / 15;
|
|
813
1155
|
}
|
|
814
1156
|
function getAttr2(node, attr) {
|
|
@@ -852,10 +1194,10 @@ function parsePageGeometry(documentXml) {
|
|
|
852
1194
|
const top = getTwipAttr(pgMar, "w:top") ?? getTwipAttr(pgMar, "top") ?? null;
|
|
853
1195
|
const bottom = getTwipAttr(pgMar, "w:bottom") ?? getTwipAttr(pgMar, "bottom") ?? null;
|
|
854
1196
|
return {
|
|
855
|
-
contentWidthPx: pageW === null ? null :
|
|
856
|
-
pageHeightPx: pageH === null ? null :
|
|
857
|
-
marginTopPx: top === null ? null :
|
|
858
|
-
marginBottomPx: bottom === null ? null :
|
|
1197
|
+
contentWidthPx: pageW === null ? null : twipToPx2(pageW - left - right),
|
|
1198
|
+
pageHeightPx: pageH === null ? null : twipToPx2(pageH),
|
|
1199
|
+
marginTopPx: top === null ? null : twipToPx2(top),
|
|
1200
|
+
marginBottomPx: bottom === null ? null : twipToPx2(bottom)
|
|
859
1201
|
};
|
|
860
1202
|
}
|
|
861
1203
|
function parseHeadingAlignFromDocument(documentXml) {
|
|
@@ -1028,15 +1370,15 @@ function parseParagraphProfiles(documentXml, numberingMap) {
|
|
|
1028
1370
|
text,
|
|
1029
1371
|
isEmpty: text.length === 0,
|
|
1030
1372
|
align: parseParagraphAlign(paragraph),
|
|
1031
|
-
beforePx: before === null ? null :
|
|
1032
|
-
afterPx: after === null ? null :
|
|
1373
|
+
beforePx: before === null ? null : twipToPx2(before),
|
|
1374
|
+
afterPx: after === null ? null : twipToPx2(after),
|
|
1033
1375
|
lineHeightRatio: line === null || lineHeightRule !== "auto" ? null : line / 240,
|
|
1034
|
-
lineHeightPx: line === null || lineHeightRule === "auto" ? null :
|
|
1376
|
+
lineHeightPx: line === null || lineHeightRule === "auto" ? null : twipToPx2(line),
|
|
1035
1377
|
lineHeightRule,
|
|
1036
|
-
indentLeftPx: left === null ? null :
|
|
1037
|
-
indentRightPx: right === null ? null :
|
|
1038
|
-
firstLinePx: firstLine === null ? null :
|
|
1039
|
-
hangingPx: hanging === null ? null :
|
|
1378
|
+
indentLeftPx: left === null ? null : twipToPx2(left),
|
|
1379
|
+
indentRightPx: right === null ? null : twipToPx2(right),
|
|
1380
|
+
firstLinePx: firstLine === null ? null : twipToPx2(firstLine),
|
|
1381
|
+
hangingPx: hanging === null ? null : twipToPx2(hanging),
|
|
1040
1382
|
listNumId,
|
|
1041
1383
|
listLevel,
|
|
1042
1384
|
listFormat: listSpec?.numFmt ?? null,
|
|
@@ -1071,19 +1413,19 @@ function parseTableDefaults(stylesXml) {
|
|
|
1071
1413
|
return {
|
|
1072
1414
|
topPx: (() => {
|
|
1073
1415
|
const v = getTwipAttr(top, "w:w") ?? getTwipAttr(top, "w") ?? null;
|
|
1074
|
-
return v === null ? null :
|
|
1416
|
+
return v === null ? null : twipToPx2(v);
|
|
1075
1417
|
})(),
|
|
1076
1418
|
leftPx: (() => {
|
|
1077
1419
|
const v = getTwipAttr(left, "w:w") ?? getTwipAttr(left, "w") ?? null;
|
|
1078
|
-
return v === null ? null :
|
|
1420
|
+
return v === null ? null : twipToPx2(v);
|
|
1079
1421
|
})(),
|
|
1080
1422
|
bottomPx: (() => {
|
|
1081
1423
|
const v = getTwipAttr(bottom, "w:w") ?? getTwipAttr(bottom, "w") ?? null;
|
|
1082
|
-
return v === null ? null :
|
|
1424
|
+
return v === null ? null : twipToPx2(v);
|
|
1083
1425
|
})(),
|
|
1084
1426
|
rightPx: (() => {
|
|
1085
1427
|
const v = getTwipAttr(right, "w:w") ?? getTwipAttr(right, "w") ?? null;
|
|
1086
|
-
return v === null ? null :
|
|
1428
|
+
return v === null ? null : twipToPx2(v);
|
|
1087
1429
|
})()
|
|
1088
1430
|
};
|
|
1089
1431
|
}
|
|
@@ -1181,9 +1523,9 @@ function parseDefaults(stylesXml) {
|
|
|
1181
1523
|
const rawLineRule = (getAttr2(spacing, "w:lineRule") ?? getAttr2(spacing, "lineRule") ?? "auto").toLowerCase();
|
|
1182
1524
|
const bodyLineHeightRule = rawLineRule === "exact" ? "exact" : rawLineRule === "atleast" ? "atLeast" : "auto";
|
|
1183
1525
|
const bodyLineHeightRatio = line === null || bodyLineHeightRule !== "auto" ? null : line / 240;
|
|
1184
|
-
const bodyLineHeightPx = line === null || bodyLineHeightRule === "auto" ? null :
|
|
1526
|
+
const bodyLineHeightPx = line === null || bodyLineHeightRule === "auto" ? null : twipToPx2(line);
|
|
1185
1527
|
const after = getTwipAttr(spacing, "w:after") ?? getTwipAttr(spacing, "after") ?? null;
|
|
1186
|
-
const paragraphAfterPx = after === null ? null :
|
|
1528
|
+
const paragraphAfterPx = after === null ? null : twipToPx2(after);
|
|
1187
1529
|
return { bodyFontPx, bodyLineHeightRatio, bodyLineHeightPx, bodyLineHeightRule, paragraphAfterPx };
|
|
1188
1530
|
}
|
|
1189
1531
|
function parseHeading1Style(stylesXml) {
|
|
@@ -1708,7 +2050,7 @@ function applyWordRenderModel({ doc, styleProfile, showFormattingMarks }) {
|
|
|
1708
2050
|
}
|
|
1709
2051
|
|
|
1710
2052
|
// src/core/DocsWordElement.ts
|
|
1711
|
-
var VERSION = "0.1.
|
|
2053
|
+
var VERSION = "0.1.5";
|
|
1712
2054
|
var MESSAGES = {
|
|
1713
2055
|
zh: {
|
|
1714
2056
|
readClipboard: "\u4ECE\u7CFB\u7EDF\u526A\u8D34\u677F\u8BFB\u53D6",
|
|
@@ -1851,15 +2193,15 @@ var DocsWordElement = class extends HTMLElement {
|
|
|
1851
2193
|
}
|
|
1852
2194
|
async applyDocx(file) {
|
|
1853
2195
|
try {
|
|
1854
|
-
const [
|
|
1855
|
-
|
|
2196
|
+
const [parseResult, profile] = await Promise.all([
|
|
2197
|
+
parseDocxToHtmlSnapshotWithReport(file),
|
|
1856
2198
|
parseDocxStyleProfile(file)
|
|
1857
2199
|
]);
|
|
1858
2200
|
this.styleProfile = profile;
|
|
1859
|
-
this.htmlSnapshot =
|
|
2201
|
+
this.htmlSnapshot = parseResult.htmlSnapshot;
|
|
1860
2202
|
this.renderSnapshot();
|
|
1861
2203
|
this.setHint(MESSAGES[this.locale].loadedWord(profile.sourceFileName));
|
|
1862
|
-
this.emitChange("upload", profile.sourceFileName);
|
|
2204
|
+
this.emitChange("upload", profile.sourceFileName, parseResult.report);
|
|
1863
2205
|
} catch (error) {
|
|
1864
2206
|
this.emitError(error instanceof Error ? error.message : MESSAGES[this.locale].parseFailed);
|
|
1865
2207
|
}
|
|
@@ -1919,8 +2261,10 @@ var DocsWordElement = class extends HTMLElement {
|
|
|
1919
2261
|
renderSnapshot() {
|
|
1920
2262
|
this.frame.srcdoc = this.htmlSnapshot;
|
|
1921
2263
|
}
|
|
1922
|
-
emitChange(source, fileName) {
|
|
1923
|
-
this.dispatchEvent(
|
|
2264
|
+
emitChange(source, fileName, parseReport) {
|
|
2265
|
+
this.dispatchEvent(
|
|
2266
|
+
new CustomEvent("docsjs-change", { detail: { htmlSnapshot: this.htmlSnapshot, source, fileName, parseReport } })
|
|
2267
|
+
);
|
|
1924
2268
|
}
|
|
1925
2269
|
emitError(message) {
|
|
1926
2270
|
this.dispatchEvent(new CustomEvent("docsjs-error", { detail: { message } }));
|
|
@@ -1976,6 +2320,9 @@ function collectSemanticStatsFromDocument(doc) {
|
|
|
1976
2320
|
imageCount: countElements(doc, "img"),
|
|
1977
2321
|
anchorImageCount: countElements(doc, 'img[data-word-anchor="1"]'),
|
|
1978
2322
|
wrappedImageCount: countElements(doc, "img[data-word-wrap]"),
|
|
2323
|
+
ommlCount: countElements(doc, "[data-word-omml]"),
|
|
2324
|
+
chartCount: countElements(doc, "[data-word-chart]"),
|
|
2325
|
+
smartArtCount: countElements(doc, "[data-word-smartart]"),
|
|
1979
2326
|
listParagraphCount,
|
|
1980
2327
|
commentRefCount: countElements(doc, "[data-word-comment-ref]"),
|
|
1981
2328
|
revisionInsCount: countElements(doc, '[data-word-revision="ins"]'),
|
|
@@ -2006,7 +2353,7 @@ function clamp01(v) {
|
|
|
2006
2353
|
}
|
|
2007
2354
|
function calculateFidelityScore(expected, actual) {
|
|
2008
2355
|
const structure = clamp01(
|
|
2009
|
-
(ratioScore(actual.paragraphCount, expected.paragraphCount) + ratioScore(actual.headingCount, expected.headingCount) + ratioScore(actual.tableCount, expected.tableCount) + ratioScore(actual.tableCellCount, expected.tableCellCount) + ratioScore(actual.imageCount, expected.imageCount) + ratioScore(actual.listParagraphCount, expected.listParagraphCount)) /
|
|
2356
|
+
(ratioScore(actual.paragraphCount, expected.paragraphCount) + ratioScore(actual.headingCount, expected.headingCount) + ratioScore(actual.tableCount, expected.tableCount) + ratioScore(actual.tableCellCount, expected.tableCellCount) + ratioScore(actual.imageCount, expected.imageCount) + ratioScore(actual.ommlCount, expected.ommlCount) + ratioScore(actual.chartCount, expected.chartCount) + ratioScore(actual.smartArtCount, expected.smartArtCount) + ratioScore(actual.listParagraphCount, expected.listParagraphCount)) / 9
|
|
2010
2357
|
);
|
|
2011
2358
|
const styleProxy = clamp01(ratioScore(actual.textCharCount, expected.textCharCount));
|
|
2012
2359
|
const pagination = clamp01(ratioScore(actual.pageSpacerCount, expected.pageSpacerCount));
|
|
@@ -2019,6 +2366,8 @@ function calculateFidelityScore(expected, actual) {
|
|
|
2019
2366
|
calculateFidelityScore,
|
|
2020
2367
|
collectSemanticStatsFromDocument,
|
|
2021
2368
|
collectSemanticStatsFromHtml,
|
|
2022
|
-
defineDocsWordElement
|
|
2369
|
+
defineDocsWordElement,
|
|
2370
|
+
parseDocxToHtmlSnapshot,
|
|
2371
|
+
parseDocxToHtmlSnapshotWithReport
|
|
2023
2372
|
});
|
|
2024
2373
|
//# sourceMappingURL=index.cjs.map
|