@coding01/docsjs 0.1.2 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -31,8 +31,9 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
33
  DocsWordElement: () => DocsWordElement,
34
- WordFidelityEditorReact: () => WordFidelityEditorReact,
35
- WordFidelityEditorVue: () => WordFidelityEditorVue,
34
+ calculateFidelityScore: () => calculateFidelityScore,
35
+ collectSemanticStatsFromDocument: () => collectSemanticStatsFromDocument,
36
+ collectSemanticStatsFromHtml: () => collectSemanticStatsFromHtml,
36
37
  defineDocsWordElement: () => defineDocsWordElement
37
38
  });
38
39
  module.exports = __toCommonJS(index_exports);
@@ -69,6 +70,9 @@ function queryAllByLocalName(root, localName) {
69
70
  function queryByLocalName(root, localName) {
70
71
  return queryAllByLocalName(root, localName)[0] ?? null;
71
72
  }
73
+ function directChildrenByLocalName(node, localName) {
74
+ return Array.from(node.children).filter((child) => child.localName === localName);
75
+ }
72
76
  function getAttr(node, name) {
73
77
  if (!node) return null;
74
78
  return node.getAttribute(name);
@@ -76,6 +80,9 @@ function getAttr(node, name) {
76
80
  function emuToPx(emu) {
77
81
  return emu * 96 / 914400;
78
82
  }
83
+ function twipToPx(twip) {
84
+ return twip * 96 / 1440;
85
+ }
79
86
  function parseDrawingSizePx(drawing) {
80
87
  const extentNode = queryAllByLocalName(drawing, "extent").find((node) => {
81
88
  const parent = node.parentElement;
@@ -104,6 +111,97 @@ function imageDimensionAttributes(sizePx) {
104
111
  }
105
112
  return attrs.length > 0 ? ` ${attrs.join(" ")}` : "";
106
113
  }
114
+ function parseAnchorPositionPx(anchor) {
115
+ let leftPx = null;
116
+ let topPx = null;
117
+ const positionH = directChildrenByLocalName(anchor, "positionH")[0] ?? null;
118
+ const positionV = directChildrenByLocalName(anchor, "positionV")[0] ?? null;
119
+ const posH = positionH ? directChildrenByLocalName(positionH, "posOffset")[0] ?? null : null;
120
+ const posV = positionV ? directChildrenByLocalName(positionV, "posOffset")[0] ?? null : null;
121
+ const rawLeft = posH?.textContent?.trim() ?? "";
122
+ const rawTop = posV?.textContent?.trim() ?? "";
123
+ const left = rawLeft ? Number.parseFloat(rawLeft) : Number.NaN;
124
+ const top = rawTop ? Number.parseFloat(rawTop) : Number.NaN;
125
+ if (Number.isFinite(left)) leftPx = emuToPx(left);
126
+ if (Number.isFinite(top)) topPx = emuToPx(top);
127
+ return { leftPx, topPx };
128
+ }
129
+ function parseAnchorWrapMode(anchor) {
130
+ if (directChildrenByLocalName(anchor, "wrapSquare")[0]) return "square";
131
+ if (directChildrenByLocalName(anchor, "wrapTight")[0]) return "tight";
132
+ if (directChildrenByLocalName(anchor, "wrapTopAndBottom")[0]) return "topAndBottom";
133
+ if (directChildrenByLocalName(anchor, "wrapNone")[0]) return "none";
134
+ return null;
135
+ }
136
+ function parseAnchorMeta(drawing) {
137
+ const anchor = directChildrenByLocalName(drawing, "anchor")[0] ?? null;
138
+ if (!anchor) return null;
139
+ const positionH = directChildrenByLocalName(anchor, "positionH")[0] ?? null;
140
+ const positionV = directChildrenByLocalName(anchor, "positionV")[0] ?? null;
141
+ const relativeFromH = getAttr(positionH, "relativeFrom");
142
+ const relativeFromV = getAttr(positionV, "relativeFrom");
143
+ const parseDistPx = (name) => {
144
+ const raw = getAttr(anchor, name);
145
+ const emu = raw ? Number.parseInt(raw, 10) : Number.NaN;
146
+ return Number.isFinite(emu) && emu >= 0 ? emuToPx(emu) : null;
147
+ };
148
+ const rawHeight = getAttr(anchor, "relativeHeight");
149
+ const parsedHeight = rawHeight ? Number.parseInt(rawHeight, 10) : Number.NaN;
150
+ const boolAttr = (name, fallback) => {
151
+ const raw = (getAttr(anchor, name) ?? "").toLowerCase();
152
+ if (raw === "1" || raw === "true" || raw === "on") return true;
153
+ if (raw === "0" || raw === "false" || raw === "off") return false;
154
+ return fallback;
155
+ };
156
+ return {
157
+ position: parseAnchorPositionPx(anchor),
158
+ wrapMode: parseAnchorWrapMode(anchor),
159
+ distTPx: parseDistPx("distT"),
160
+ distBPx: parseDistPx("distB"),
161
+ distLPx: parseDistPx("distL"),
162
+ distRPx: parseDistPx("distR"),
163
+ relativeFromH,
164
+ relativeFromV,
165
+ behindDoc: boolAttr("behindDoc", false),
166
+ allowOverlap: boolAttr("allowOverlap", true),
167
+ layoutInCell: boolAttr("layoutInCell", true),
168
+ relativeHeight: Number.isFinite(parsedHeight) ? parsedHeight : null
169
+ };
170
+ }
171
+ function mergeImageStyle(baseAttrs, anchorMeta) {
172
+ if (!anchorMeta) return baseAttrs;
173
+ const { position, wrapMode } = anchorMeta;
174
+ if (position.leftPx === null && position.topPx === null) return baseAttrs;
175
+ const styleParts = [
176
+ "position:absolute",
177
+ position.leftPx !== null ? `left:${position.leftPx.toFixed(2)}px` : "",
178
+ position.topPx !== null ? `top:${position.topPx.toFixed(2)}px` : "",
179
+ `z-index:${anchorMeta.behindDoc ? 0 : anchorMeta.relativeHeight ?? 3}`,
180
+ anchorMeta.distTPx !== null ? `margin-top:${anchorMeta.distTPx.toFixed(2)}px` : "",
181
+ anchorMeta.distBPx !== null ? `margin-bottom:${anchorMeta.distBPx.toFixed(2)}px` : "",
182
+ anchorMeta.distLPx !== null ? `margin-left:${anchorMeta.distLPx.toFixed(2)}px` : "",
183
+ anchorMeta.distRPx !== null ? `margin-right:${anchorMeta.distRPx.toFixed(2)}px` : ""
184
+ ].filter((x) => x.length > 0);
185
+ if (wrapMode === "topAndBottom") {
186
+ styleParts.push("display:block", "clear:both");
187
+ }
188
+ const anchorAttrs = [
189
+ `data-word-anchor="1"`,
190
+ wrapMode ? `data-word-wrap="${wrapMode}"` : "",
191
+ anchorMeta.relativeFromH ? `data-word-anchor-relh="${escapeHtml(anchorMeta.relativeFromH)}"` : "",
192
+ anchorMeta.relativeFromV ? `data-word-anchor-relv="${escapeHtml(anchorMeta.relativeFromV)}"` : "",
193
+ anchorMeta.behindDoc ? `data-word-anchor-behind="1"` : `data-word-anchor-behind="0"`,
194
+ anchorMeta.allowOverlap ? `data-word-anchor-overlap="1"` : `data-word-anchor-overlap="0"`,
195
+ anchorMeta.layoutInCell ? `data-word-anchor-layout-cell="1"` : `data-word-anchor-layout-cell="0"`
196
+ ].filter((x) => x.length > 0).join(" ");
197
+ if (!baseAttrs.includes("style=")) {
198
+ return `${baseAttrs} style="${styleParts.join(";")}" ${anchorAttrs}`;
199
+ }
200
+ return baseAttrs.replace(/style="([^"]*)"/, (_m, styleText) => {
201
+ const merged = [styleText, ...styleParts].filter((x) => x.length > 0).join(";");
202
+ return `style="${merged}" ${anchorAttrs}`;
203
+ });
204
+ }
107
205
  function parseDocRelsMap(relsXmlText) {
108
206
  if (!relsXmlText) return {};
109
207
  const rels = parseXml(relsXmlText);
@@ -127,11 +225,16 @@ function extToMime(ext) {
127
225
  if (lower === "svg") return "image/svg+xml";
128
226
  return "application/octet-stream";
129
227
  }
228
+ function normalizeWordPath(relTarget) {
229
+ const normalized = relTarget.replace(/\\/g, "/").replace(/^\/+/, "");
230
+ if (normalized.startsWith("word/")) return normalized;
231
+ if (normalized.startsWith("../")) return `word/${normalized.replace(/^(\.\.\/)+/, "")}`;
232
+ return `word/${normalized}`;
233
+ }
130
234
  async function imageRidToDataUrl(zip, relMap, rid) {
131
235
  const relTarget = relMap[rid];
132
236
  if (!relTarget) return null;
133
- const normalized = relTarget.replace(/^\/+/, "");
134
- const path = normalized.startsWith("word/") ? normalized : `word/${normalized}`;
237
+ const path = normalizeWordPath(relTarget);
135
238
  const file = zip.file(path);
136
239
  if (!file) return null;
137
240
  const base64 = await file.async("base64");
@@ -139,6 +242,55 @@ async function imageRidToDataUrl(zip, relMap, rid) {
139
242
  const mime = extToMime(ext);
140
243
  return `data:${mime};base64,${base64}`;
141
244
  }
245
+ async function readXmlByRid(zip, relMap, rid) {
246
+ const relTarget = relMap[rid];
247
+ if (!relTarget) return null;
248
+ const path = normalizeWordPath(relTarget);
249
+ const file = zip.file(path);
250
+ return file ? file.async("string") : null;
251
+ }
252
+ function parseChartType(chartDoc) {
253
+ const known = ["barChart", "lineChart", "pieChart", "areaChart", "scatterChart", "radarChart", "doughnutChart"];
254
+ for (const type of known) {
255
+ if (queryByLocalName(chartDoc, type)) return type.replace(/Chart$/, "");
256
+ }
257
+ return "unknown";
258
+ }
259
+ function parseChartSummary(chartXmlText) {
260
+ const chartDoc = parseXml(chartXmlText);
261
+ const title = queryAllByLocalName(chartDoc, "t").map((n) => (n.textContent ?? "").trim()).find((v) => v.length > 0) ?? "Chart";
262
+ const seriesCount = queryAllByLocalName(chartDoc, "ser").length;
263
+ const pointCount = queryAllByLocalName(chartDoc, "pt").length;
264
+ const type = parseChartType(chartDoc);
265
+ return { title, type, seriesCount, pointCount };
266
+ }
267
+ function extractSmartArtText(diagramXmlText) {
268
+ const diagramDoc = parseXml(diagramXmlText);
269
+ return queryAllByLocalName(diagramDoc, "t").map((n) => (n.textContent ?? "").trim()).filter((v) => v.length > 0).slice(0, 12);
270
+ }
271
+ function ommlNodeToText(node) {
272
+ if (node.localName === "t") return node.textContent ?? "";
273
+ if (node.localName === "f") {
274
+ const num = queryByLocalName(node, "num");
275
+ const den = queryByLocalName(node, "den");
276
+ return `(${num ? ommlNodeToText(num) : "?"})/(${den ? ommlNodeToText(den) : "?"})`;
277
+ }
278
+ if (node.localName === "sSup") {
279
+ const e = queryByLocalName(node, "e");
280
+ const sup = queryByLocalName(node, "sup");
281
+ return `${e ? ommlNodeToText(e) : ""}^(${sup ? ommlNodeToText(sup) : ""})`;
282
+ }
283
+ if (node.localName === "sSub") {
284
+ const e = queryByLocalName(node, "e");
285
+ const sub = queryByLocalName(node, "sub");
286
+ return `${e ? ommlNodeToText(e) : ""}_(${sub ? ommlNodeToText(sub) : ""})`;
287
+ }
288
+ if (node.localName === "rad") {
289
+ const e = queryByLocalName(node, "e");
290
+ return `sqrt(${e ? ommlNodeToText(e) : ""})`;
291
+ }
292
+ return Array.from(node.children).map((child) => ommlNodeToText(child)).join("");
293
+ }
142
294
  function runStyleToCss(rPr) {
143
295
  if (!rPr) return "";
144
296
  const declarations = [];
@@ -179,18 +331,145 @@ function paragraphAlignStyle(paragraph) {
179
331
  function paragraphDataAttr(paragraphIndex) {
180
332
  return paragraphIndex === null ? "" : ` data-word-p-index="${paragraphIndex}"`;
181
333
  }
182
- async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex) {
334
+ function parseFootnotesMap(footnotesXmlText) {
335
+ if (!footnotesXmlText) return {};
336
+ const footnotesDoc = parseXml(footnotesXmlText);
337
+ const map = {};
338
+ const footnotes = queryAllByLocalName(footnotesDoc, "footnote");
339
+ for (const footnote of footnotes) {
340
+ const idRaw = getAttr(footnote, "w:id") ?? getAttr(footnote, "id");
341
+ const idNum = idRaw ? Number.parseInt(idRaw, 10) : Number.NaN;
342
+ if (!Number.isFinite(idNum) || idNum <= 0) continue;
343
+ const paragraphs = queryAllByLocalName(footnote, "p");
344
+ const text = paragraphs.map((p) => paragraphText(p)).join("<br/>").trim();
345
+ if (!text) continue;
346
+ map[String(idNum)] = text;
347
+ }
348
+ return map;
349
+ }
350
+ function parseCommentsMap(commentsXmlText) {
351
+ if (!commentsXmlText) return {};
352
+ const commentsDoc = parseXml(commentsXmlText);
353
+ const map = {};
354
+ const comments = queryAllByLocalName(commentsDoc, "comment");
355
+ for (const comment of comments) {
356
+ const idRaw = getAttr(comment, "w:id") ?? getAttr(comment, "id");
357
+ if (!idRaw) continue;
358
+ const paragraphs = queryAllByLocalName(comment, "p");
359
+ const text = paragraphs.map((p) => paragraphText(p)).join("<br/>").trim();
360
+ if (!text) continue;
361
+ map[idRaw] = {
362
+ author: getAttr(comment, "w:author") ?? getAttr(comment, "author"),
363
+ date: getAttr(comment, "w:date") ?? getAttr(comment, "date"),
364
+ text
365
+ };
366
+ }
367
+ return map;
368
+ }
369
+ function parseEndnotesMap(endnotesXmlText) {
370
+ if (!endnotesXmlText) return {};
371
+ const endnotesDoc = parseXml(endnotesXmlText);
372
+ const map = {};
373
+ const endnotes = queryAllByLocalName(endnotesDoc, "endnote");
374
+ for (const endnote of endnotes) {
375
+ const idRaw = getAttr(endnote, "w:id") ?? getAttr(endnote, "id");
376
+ const idNum = idRaw ? Number.parseInt(idRaw, 10) : Number.NaN;
377
+ if (!Number.isFinite(idNum) || idNum <= 0) continue;
378
+ const paragraphs = queryAllByLocalName(endnote, "p");
379
+ const text = paragraphs.map((p) => paragraphText(p)).join("<br/>").trim();
380
+ if (!text) continue;
381
+ map[String(idNum)] = text;
382
+ }
383
+ return map;
384
+ }
385
+ function renderFootnotesSection(usedIds, footnotesMap) {
386
+ const uniq = [...new Set(usedIds)].filter((id) => footnotesMap[id]);
387
+ if (uniq.length === 0) return "";
388
+ const items = uniq.map((id) => `<li id="word-footnote-${id}" data-word-footnote-id="${id}">${footnotesMap[id]}</li>`).join("");
389
+ return `<section data-word-footnotes="1"><hr/><ol>${items}</ol></section>`;
390
+ }
391
+ function renderCommentsSection(usedIds, commentsMap) {
392
+ const uniq = [...new Set(usedIds)].filter((id) => commentsMap[id]);
393
+ if (uniq.length === 0) return "";
394
+ const items = uniq.map((id) => {
395
+ const item = commentsMap[id];
396
+ const meta = [item.author ?? "", item.date ?? ""].filter((x) => x.length > 0).join(" \xB7 ");
397
+ const metaHtml = meta ? `<div data-word-comment-meta="1">${escapeHtml(meta)}</div>` : "";
398
+ return `<li id="word-comment-${id}" data-word-comment-id="${id}">${metaHtml}<div>${item.text}</div></li>`;
399
+ }).join("");
400
+ return `<section data-word-comments="1"><hr/><ol>${items}</ol></section>`;
401
+ }
402
+ function renderEndnotesSection(usedIds, endnotesMap) {
403
+ const uniq = [...new Set(usedIds)].filter((id) => endnotesMap[id]);
404
+ if (uniq.length === 0) return "";
405
+ const items = uniq.map((id) => `<li id="word-endnote-${id}" data-word-endnote-id="${id}">${endnotesMap[id]}</li>`).join("");
406
+ return `<section data-word-endnotes="1"><hr/><ol>${items}</ol></section>`;
407
+ }
408
+ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotesMap, usedFootnoteIds, endnotesMap, usedEndnoteIds, commentsMap, usedCommentIds) {
183
409
  const tag = paragraphTag(paragraph);
184
410
  const alignStyle = paragraphAlignStyle(paragraph);
185
411
  const dataAttr = paragraphDataAttr(paragraphIndex);
186
- const runs = queryAllByLocalName(paragraph, "r");
187
- if (runs.length === 0) {
412
+ const hasRenderableNode = queryAllByLocalName(paragraph, "r").length > 0 || queryAllByLocalName(paragraph, "oMath").length > 0 || queryAllByLocalName(paragraph, "oMathPara").length > 0;
413
+ if (!hasRenderableNode) {
188
414
  return `<${tag}${dataAttr}${alignStyle ? ` style="${alignStyle}"` : ""}><br/></${tag}>`;
189
415
  }
190
- const parts = [];
191
- for (const run of runs) {
416
+ function parseRevisionMeta(node, type) {
417
+ return {
418
+ type,
419
+ id: getAttr(node, "w:id") ?? getAttr(node, "id"),
420
+ author: getAttr(node, "w:author") ?? getAttr(node, "author"),
421
+ date: getAttr(node, "w:date") ?? getAttr(node, "date")
422
+ };
423
+ }
424
+ function inferRevisionMeta(run, fallback) {
425
+ if (fallback) return fallback;
426
+ let cursor = run;
427
+ while (cursor) {
428
+ if (cursor.localName === "ins") return parseRevisionMeta(cursor, "ins");
429
+ if (cursor.localName === "del") return parseRevisionMeta(cursor, "del");
430
+ if (cursor.localName === "p") break;
431
+ cursor = cursor.parentElement;
432
+ }
433
+ return null;
434
+ }
435
+ function revisionMetaAttrs(meta) {
436
+ const attrs = [`data-word-revision="${meta.type}"`];
437
+ if (meta.id) attrs.push(`data-word-revision-id="${escapeHtml(meta.id)}"`);
438
+ if (meta.author) attrs.push(`data-word-revision-author="${escapeHtml(meta.author)}"`);
439
+ if (meta.date) attrs.push(`data-word-revision-date="${escapeHtml(meta.date)}"`);
440
+ return attrs.join(" ");
441
+ }
442
+ async function runToHtml(run, revisionFallback) {
443
+ const result = [];
192
444
  const rPr = queryByLocalName(run, "rPr");
193
445
  const css = runStyleToCss(rPr);
446
+ const footnoteRef = queryByLocalName(run, "footnoteReference");
447
+ const footnoteId = getAttr(footnoteRef, "w:id") ?? getAttr(footnoteRef, "id");
448
+ if (footnoteId && footnotesMap[footnoteId]) {
449
+ usedFootnoteIds.push(footnoteId);
450
+ result.push(
451
+ `<sup data-word-footnote-ref="${footnoteId}"><a href="#word-footnote-${footnoteId}">[${footnoteId}]</a></sup>`
452
+ );
453
+ return result;
454
+ }
455
+ const endnoteRef = queryByLocalName(run, "endnoteReference");
456
+ const endnoteId = getAttr(endnoteRef, "w:id") ?? getAttr(endnoteRef, "id");
457
+ if (endnoteId && endnotesMap[endnoteId]) {
458
+ usedEndnoteIds.push(endnoteId);
459
+ result.push(
460
+ `<sup data-word-endnote-ref="${endnoteId}"><a href="#word-endnote-${endnoteId}">[${endnoteId}]</a></sup>`
461
+ );
462
+ return result;
463
+ }
464
+ const commentRef = queryByLocalName(run, "commentReference");
465
+ const commentId = getAttr(commentRef, "w:id") ?? getAttr(commentRef, "id");
466
+ if (commentId && commentsMap[commentId]) {
467
+ usedCommentIds.push(commentId);
468
+ result.push(
469
+ `<sup data-word-comment-ref="${commentId}"><a href="#word-comment-${commentId}">[c${commentId}]</a></sup>`
470
+ );
471
+ return result;
472
+ }
194
473
  const drawing = queryByLocalName(run, "drawing");
195
474
  if (drawing) {
196
475
  const blip = queryByLocalName(drawing, "blip");
@@ -200,53 +479,318 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex) {
200
479
  if (src) {
201
480
  const imageSize = parseDrawingSizePx(drawing);
202
481
  const dimensionAttrs = imageDimensionAttributes(imageSize);
203
- parts.push(`<img src="${src}" alt="word-image"${dimensionAttrs}/>`);
204
- continue;
482
+ const anchorMeta = parseAnchorMeta(drawing);
483
+ const attrs = mergeImageStyle(dimensionAttrs, anchorMeta);
484
+ result.push(`<img src="${src}" alt="word-image"${attrs}/>`);
485
+ return result;
205
486
  }
206
487
  }
488
+ const chartRef = queryByLocalName(drawing, "chart");
489
+ const chartRid = getAttr(chartRef, "r:id") ?? getAttr(chartRef, "id");
490
+ if (chartRid) {
491
+ const chartXmlText = await readXmlByRid(zip, relMap, chartRid);
492
+ if (chartXmlText) {
493
+ const summary = parseChartSummary(chartXmlText);
494
+ result.push(
495
+ `<figure data-word-chart="1" data-word-chart-type="${summary.type}" data-word-chart-series="${summary.seriesCount}" data-word-chart-points="${summary.pointCount}"><figcaption>${escapeHtml(summary.title)}</figcaption><div>Chart(${escapeHtml(summary.type)}): series=${summary.seriesCount}, points=${summary.pointCount}</div></figure>`
496
+ );
497
+ return result;
498
+ }
499
+ }
500
+ const smartArtRef = queryByLocalName(drawing, "relIds");
501
+ const smartArtRid = getAttr(smartArtRef, "r:dm") ?? getAttr(smartArtRef, "dm");
502
+ if (smartArtRid) {
503
+ const diagramXmlText = await readXmlByRid(zip, relMap, smartArtRid);
504
+ const textItems = diagramXmlText ? extractSmartArtText(diagramXmlText) : [];
505
+ const preview = textItems.length > 0 ? `: ${escapeHtml(textItems.join(" / "))}` : "";
506
+ result.push(
507
+ `<figure data-word-smartart="1" data-word-smartart-items="${textItems.length}"><figcaption>SmartArt fallback${preview}</figcaption></figure>`
508
+ );
509
+ return result;
510
+ }
207
511
  }
208
512
  const texts = queryAllByLocalName(run, "t").map((t) => t.textContent ?? "").join("");
209
- const brs = queryAllByLocalName(run, "br").length;
210
- const runText2 = `${escapeHtml(texts)}${"<br/>".repeat(brs)}`;
211
- if (!runText2) continue;
212
- if (css) {
213
- parts.push(`<span style="${css}">${runText2}</span>`);
214
- } else {
215
- parts.push(runText2);
513
+ const delTexts = queryAllByLocalName(run, "delText").map((t) => t.textContent ?? "").join("");
514
+ const brNodes = queryAllByLocalName(run, "br");
515
+ const pageBreakCount = brNodes.filter((node) => {
516
+ const type = (getAttr(node, "w:type") ?? getAttr(node, "type") ?? "").toLowerCase();
517
+ return type === "page";
518
+ }).length;
519
+ const lineBreakCount = Math.max(0, brNodes.length - pageBreakCount);
520
+ const runText2 = `${escapeHtml(texts || delTexts)}${"<br/>".repeat(lineBreakCount)}`;
521
+ if (runText2) {
522
+ const revisionMeta = inferRevisionMeta(run, revisionFallback);
523
+ if (css) {
524
+ const span = `<span style="${css}">${runText2}</span>`;
525
+ if (revisionMeta) {
526
+ const tagName = revisionMeta.type === "ins" ? "ins" : "del";
527
+ result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${span}</${tagName}>`);
528
+ } else {
529
+ result.push(span);
530
+ }
531
+ } else if (revisionMeta) {
532
+ const tagName = revisionMeta.type === "ins" ? "ins" : "del";
533
+ result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${runText2}</${tagName}>`);
534
+ } else {
535
+ result.push(runText2);
536
+ }
216
537
  }
538
+ for (let i = 0; i < pageBreakCount; i += 1) {
539
+ result.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
540
+ }
541
+ return result;
542
+ }
543
+ async function nodeToHtml(node, revisionFallback) {
544
+ if (node.localName === "commentRangeStart") {
545
+ const id = getAttr(node, "w:id") ?? getAttr(node, "id");
546
+ return id ? [`<span data-word-comment-range-start="${id}"></span>`] : [];
547
+ }
548
+ if (node.localName === "commentRangeEnd") {
549
+ const id = getAttr(node, "w:id") ?? getAttr(node, "id");
550
+ return id ? [`<span data-word-comment-range-end="${id}"></span>`] : [];
551
+ }
552
+ if (node.localName === "r") {
553
+ return runToHtml(node, revisionFallback);
554
+ }
555
+ if (node.localName === "oMath" || node.localName === "oMathPara") {
556
+ const linear = ommlNodeToText(node).trim();
557
+ if (!linear) return [];
558
+ return [`<span data-word-omml="1">${escapeHtml(linear)}</span>`];
559
+ }
560
+ if (node.localName === "ins" || node.localName === "del") {
561
+ const scopedMeta = parseRevisionMeta(node, node.localName === "ins" ? "ins" : "del");
562
+ const nested2 = [];
563
+ for (const child of Array.from(node.children)) {
564
+ nested2.push(...await nodeToHtml(child, scopedMeta));
565
+ }
566
+ return nested2;
567
+ }
568
+ const nested = [];
569
+ for (const child of Array.from(node.children)) {
570
+ nested.push(...await nodeToHtml(child, revisionFallback));
571
+ }
572
+ return nested;
573
+ }
574
+ const parts = [];
575
+ const renderedPageBreakCount = queryAllByLocalName(paragraph, "lastRenderedPageBreak").length;
576
+ for (let i = 0; i < renderedPageBreakCount; i += 1) {
577
+ parts.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
578
+ }
579
+ for (const child of Array.from(paragraph.children)) {
580
+ parts.push(...await nodeToHtml(child, null));
217
581
  }
218
582
  const content = parts.join("") || "<br/>";
219
583
  return `<${tag}${dataAttr}${alignStyle ? ` style="${alignStyle}"` : ""}>${content}</${tag}>`;
220
584
  }
221
585
  function runText(run) {
222
586
  const text = queryAllByLocalName(run, "t").map((t) => t.textContent ?? "").join("");
587
+ const delText = queryAllByLocalName(run, "delText").map((t) => t.textContent ?? "").join("");
223
588
  const brCount = queryAllByLocalName(run, "br").length;
224
- return `${escapeHtml(text)}${"<br/>".repeat(brCount)}`;
589
+ return `${escapeHtml(text || delText)}${"<br/>".repeat(brCount)}`;
225
590
  }
226
591
  function paragraphText(paragraph) {
227
592
  const runs = queryAllByLocalName(paragraph, "r");
228
593
  const content = runs.map((run) => runText(run)).join("");
229
594
  return content || "<br/>";
230
595
  }
596
+ function parseTcGridSpan(tc) {
597
+ const tcPr = directChildrenByLocalName(tc, "tcPr")[0] ?? null;
598
+ const gridSpan = tcPr ? directChildrenByLocalName(tcPr, "gridSpan")[0] ?? null : null;
599
+ const rawVal = getAttr(gridSpan, "w:val") ?? getAttr(gridSpan, "val");
600
+ const parsed = rawVal ? Number.parseInt(rawVal, 10) : Number.NaN;
601
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : 1;
602
+ }
603
+ function parseTcVMerge(tc) {
604
+ const tcPr = directChildrenByLocalName(tc, "tcPr")[0] ?? null;
605
+ const vMerge = tcPr ? directChildrenByLocalName(tcPr, "vMerge")[0] ?? null : null;
606
+ if (!vMerge) return "none";
607
+ const rawVal = (getAttr(vMerge, "w:val") ?? getAttr(vMerge, "val") ?? "continue").toLowerCase();
608
+ return rawVal === "restart" ? "restart" : "continue";
609
+ }
610
+ function parseTblGridWidthsPx(table) {
611
+ const grid = directChildrenByLocalName(table, "tblGrid")[0] ?? null;
612
+ if (!grid) return [];
613
+ return directChildrenByLocalName(grid, "gridCol").map((col) => {
614
+ const raw = getAttr(col, "w:w") ?? getAttr(col, "w");
615
+ const twip = raw ? Number.parseInt(raw, 10) : Number.NaN;
616
+ return Number.isFinite(twip) && twip > 0 ? twipToPx(twip) : 0;
617
+ }).filter((px) => px > 0);
618
+ }
619
+ function borderSizeToPx(size) {
620
+ return size / 6;
621
+ }
622
+ function parseBorderCss(borderNode) {
623
+ if (!borderNode) return null;
624
+ const val = (getAttr(borderNode, "w:val") ?? getAttr(borderNode, "val") ?? "").toLowerCase();
625
+ if (!val || val === "nil" || val === "none") return "none";
626
+ const color = (getAttr(borderNode, "w:color") ?? getAttr(borderNode, "color") ?? "222222").replace(/^#/, "");
627
+ const rawSize = getAttr(borderNode, "w:sz") ?? getAttr(borderNode, "sz");
628
+ const size = rawSize ? Number.parseInt(rawSize, 10) : Number.NaN;
629
+ const px = Number.isFinite(size) && size > 0 ? borderSizeToPx(size) : 1;
630
+ const style = val === "single" ? "solid" : val;
631
+ return `${px.toFixed(2)}px ${style} #${color}`;
632
+ }
633
+ function parseTableStyleProfile(table) {
634
+ const tblPr = directChildrenByLocalName(table, "tblPr")[0] ?? null;
635
+ const tblBorders = tblPr ? directChildrenByLocalName(tblPr, "tblBorders")[0] ?? null : null;
636
+ const layout = tblPr ? directChildrenByLocalName(tblPr, "tblLayout")[0] ?? null : null;
637
+ const spacing = tblPr ? directChildrenByLocalName(tblPr, "tblCellSpacing")[0] ?? null : null;
638
+ const spacingType = (getAttr(spacing, "w:type") ?? getAttr(spacing, "type") ?? "dxa").toLowerCase();
639
+ const spacingRaw = getAttr(spacing, "w:w") ?? getAttr(spacing, "w");
640
+ const spacingVal = spacingRaw ? Number.parseFloat(spacingRaw) : Number.NaN;
641
+ const borderSpacingPx = spacingType === "dxa" && Number.isFinite(spacingVal) && spacingVal > 0 ? twipToPx(spacingVal) : 0;
642
+ const borderCollapse = borderSpacingPx > 0 ? "separate" : "collapse";
643
+ const tableLayout = (getAttr(layout, "w:type") ?? getAttr(layout, "type") ?? "").toLowerCase() === "autofit" ? "auto" : "fixed";
644
+ const top = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "top")[0] ?? null : null);
645
+ const bottom = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "bottom")[0] ?? null : null);
646
+ const left = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "left")[0] ?? null : null);
647
+ const right = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "right")[0] ?? null : null);
648
+ const insideH = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "insideH")[0] ?? null : null);
649
+ const insideV = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "insideV")[0] ?? null : null);
650
+ const borderCss = top ?? right ?? bottom ?? left ?? "1px solid #222";
651
+ return {
652
+ tableLayout,
653
+ borderCollapse,
654
+ borderSpacingPx,
655
+ borderCss,
656
+ insideHCss: insideH,
657
+ insideVCss: insideV
658
+ };
659
+ }
660
+ function parseTableWidthStyle(table, gridWidthsPx) {
661
+ const tblPr = directChildrenByLocalName(table, "tblPr")[0] ?? null;
662
+ const tblW = tblPr ? directChildrenByLocalName(tblPr, "tblW")[0] ?? null : null;
663
+ const type = (getAttr(tblW, "w:type") ?? getAttr(tblW, "type") ?? "").toLowerCase();
664
+ const rawVal = getAttr(tblW, "w:w") ?? getAttr(tblW, "w");
665
+ const numericVal = rawVal ? Number.parseFloat(rawVal) : Number.NaN;
666
+ if (type === "dxa" && Number.isFinite(numericVal) && numericVal > 0) {
667
+ return `width:${twipToPx(numericVal).toFixed(2)}px`;
668
+ }
669
+ if (type === "pct" && Number.isFinite(numericVal) && numericVal > 0) {
670
+ return `width:${(numericVal / 50).toFixed(2)}%`;
671
+ }
672
+ const gridTotal = gridWidthsPx.reduce((sum, item) => sum + item, 0);
673
+ if (gridTotal > 0) return `width:${gridTotal.toFixed(2)}px;max-width:100%`;
674
+ return "width:100%";
675
+ }
676
+ function parseCellWidthStyle(cell, colCursor, colSpan, gridWidthsPx) {
677
+ const tcPr = directChildrenByLocalName(cell, "tcPr")[0] ?? null;
678
+ const tcW = tcPr ? directChildrenByLocalName(tcPr, "tcW")[0] ?? null : null;
679
+ const type = (getAttr(tcW, "w:type") ?? getAttr(tcW, "type") ?? "").toLowerCase();
680
+ const rawVal = getAttr(tcW, "w:w") ?? getAttr(tcW, "w");
681
+ const numericVal = rawVal ? Number.parseFloat(rawVal) : Number.NaN;
682
+ if (type === "dxa" && Number.isFinite(numericVal) && numericVal > 0) {
683
+ return `width:${twipToPx(numericVal).toFixed(2)}px`;
684
+ }
685
+ if (type === "pct" && Number.isFinite(numericVal) && numericVal > 0) {
686
+ return `width:${(numericVal / 50).toFixed(2)}%`;
687
+ }
688
+ const width = gridWidthsPx.slice(colCursor, colCursor + colSpan).reduce((sum, item) => sum + item, 0);
689
+ if (width > 0) return `width:${width.toFixed(2)}px`;
690
+ return "";
691
+ }
692
+ function parseCellBorderStyle(cell, tableStyle) {
693
+ const tcPr = directChildrenByLocalName(cell, "tcPr")[0] ?? null;
694
+ const tcBorders = tcPr ? directChildrenByLocalName(tcPr, "tcBorders")[0] ?? null : null;
695
+ if (!tcBorders) {
696
+ const fallback = tableStyle.insideHCss ?? tableStyle.insideVCss ?? tableStyle.borderCss;
697
+ return `border:${fallback}`;
698
+ }
699
+ const top = parseBorderCss(directChildrenByLocalName(tcBorders, "top")[0] ?? null) ?? tableStyle.insideHCss ?? tableStyle.borderCss;
700
+ const right = parseBorderCss(directChildrenByLocalName(tcBorders, "right")[0] ?? null) ?? tableStyle.insideVCss ?? tableStyle.borderCss;
701
+ const bottom = parseBorderCss(directChildrenByLocalName(tcBorders, "bottom")[0] ?? null) ?? tableStyle.insideHCss ?? tableStyle.borderCss;
702
+ const left = parseBorderCss(directChildrenByLocalName(tcBorders, "left")[0] ?? null) ?? tableStyle.insideVCss ?? tableStyle.borderCss;
703
+ return `border-top:${top};border-right:${right};border-bottom:${bottom};border-left:${left}`;
704
+ }
231
705
  function tableCellHtml(cell, paragraphIndexMap) {
232
- const paragraphs = queryAllByLocalName(cell, "p");
233
- if (paragraphs.length === 0) {
234
- const text = queryAllByLocalName(cell, "t").map((t) => t.textContent ?? "").join("").trim();
235
- return escapeHtml(text) || "<br/>";
706
+ const blocks = [];
707
+ for (const child of Array.from(cell.children)) {
708
+ if (child.localName === "tcPr") continue;
709
+ if (child.localName === "p") {
710
+ const paragraphIndex = paragraphIndexMap.get(child) ?? null;
711
+ blocks.push(`<p${paragraphDataAttr(paragraphIndex)}>${paragraphText(child)}</p>`);
712
+ continue;
713
+ }
714
+ if (child.localName === "tbl") {
715
+ blocks.push(tableToHtml(child, paragraphIndexMap));
716
+ continue;
717
+ }
236
718
  }
237
- return paragraphs.map((p) => {
238
- const paragraphIndex = paragraphIndexMap.get(p) ?? null;
239
- return `<p${paragraphDataAttr(paragraphIndex)}>${paragraphText(p)}</p>`;
240
- }).join("");
719
+ if (blocks.length > 0) return blocks.join("");
720
+ const text = queryAllByLocalName(cell, "t").map((t) => t.textContent ?? "").join("").trim();
721
+ return escapeHtml(text) || "<br/>";
241
722
  }
242
723
  function tableToHtml(table, paragraphIndexMap) {
243
- const rows = queryAllByLocalName(table, "tr");
244
- const htmlRows = rows.map((row) => {
245
- const cells = queryAllByLocalName(row, "tc");
246
- const htmlCells = cells.map((cell) => `<td style="border:1px solid #222;vertical-align:top;">${tableCellHtml(cell, paragraphIndexMap)}</td>`).join("");
247
- return `<tr>${htmlCells}</tr>`;
724
+ const rows = directChildrenByLocalName(table, "tr");
725
+ const gridWidthsPx = parseTblGridWidthsPx(table);
726
+ const tableStyle = parseTableStyleProfile(table);
727
+ const activeByCol = /* @__PURE__ */ new Map();
728
+ const allOrigins = [];
729
+ let nextOriginId = 1;
730
+ const htmlRows = rows.map((row, rowIndex) => {
731
+ const directCells = directChildrenByLocalName(row, "tc");
732
+ const continued = /* @__PURE__ */ new Set();
733
+ const emittedCells = [];
734
+ let colCursor = 0;
735
+ for (const cell of directCells) {
736
+ const colSpan = parseTcGridSpan(cell);
737
+ const vMerge = parseTcVMerge(cell);
738
+ if (vMerge === "continue") {
739
+ const activeOrigins = Array.from(new Set(activeByCol.values())).filter((origin2) => !continued.has(origin2)).sort((a, b) => a.startCol - b.startCol);
740
+ const origin = activeOrigins.find((item) => item.startCol >= colCursor) ?? activeOrigins[0] ?? null;
741
+ if (origin) {
742
+ origin.rowSpan += 1;
743
+ continued.add(origin);
744
+ colCursor = origin.startCol + origin.colSpan;
745
+ }
746
+ continue;
747
+ }
748
+ while (activeByCol.has(colCursor)) {
749
+ colCursor += 1;
750
+ }
751
+ const html = tableCellHtml(cell, paragraphIndexMap);
752
+ const attrs = [];
753
+ const widthStyle = parseCellWidthStyle(cell, colCursor, colSpan, gridWidthsPx);
754
+ const borderStyle = parseCellBorderStyle(cell, tableStyle);
755
+ if (vMerge === "restart") {
756
+ const origin = {
757
+ id: `m${nextOriginId}`,
758
+ startCol: colCursor,
759
+ colSpan,
760
+ rowSpan: 1,
761
+ startedRow: rowIndex
762
+ };
763
+ nextOriginId += 1;
764
+ allOrigins.push(origin);
765
+ for (let i = 0; i < colSpan; i += 1) {
766
+ activeByCol.set(colCursor + i, origin);
767
+ }
768
+ attrs.push(`data-word-merge-id="${origin.id}"`);
769
+ }
770
+ if (colSpan > 1) attrs.push(`colspan="${colSpan}"`);
771
+ emittedCells.push(
772
+ `<td${attrs.length > 0 ? ` ${attrs.join(" ")}` : ""} style="${borderStyle};vertical-align:top;${widthStyle}">${html}</td>`
773
+ );
774
+ colCursor += colSpan;
775
+ }
776
+ for (const origin of Array.from(new Set(activeByCol.values()))) {
777
+ if (origin.startedRow < rowIndex && !continued.has(origin)) {
778
+ for (let i = 0; i < origin.colSpan; i += 1) {
779
+ activeByCol.delete(origin.startCol + i);
780
+ }
781
+ }
782
+ }
783
+ return `<tr>${emittedCells.join("")}</tr>`;
248
784
  });
249
- return `<table style="border-collapse:collapse;table-layout:fixed;width:100%;border:1px solid #222;">${htmlRows.join("")}</table>`;
785
+ let merged = htmlRows.join("");
786
+ for (const origin of allOrigins) {
787
+ const marker = `data-word-merge-id="${origin.id}"`;
788
+ const replacement = origin.rowSpan > 1 ? `rowspan="${origin.rowSpan}"` : "";
789
+ merged = merged.replace(marker, replacement).replace(/\s{2,}/g, " ");
790
+ }
791
+ const tableWidthStyle = parseTableWidthStyle(table, gridWidthsPx);
792
+ const spacing = tableStyle.borderSpacingPx > 0 ? `border-spacing:${tableStyle.borderSpacingPx.toFixed(2)}px;` : "";
793
+ return `<table style="border-collapse:${tableStyle.borderCollapse};${spacing}table-layout:${tableStyle.tableLayout};${tableWidthStyle};border:${tableStyle.borderCss};">${merged}</table>`;
250
794
  }
251
795
  async function parseDocxToHtmlSnapshot(file) {
252
796
  const maybeArrayBuffer = file.arrayBuffer;
@@ -257,7 +801,16 @@ async function parseDocxToHtmlSnapshot(file) {
257
801
  throw new Error("DOCX missing document.xml");
258
802
  }
259
803
  const relsText = await zip.file("word/_rels/document.xml.rels")?.async("string");
804
+ const footnotesText = await zip.file("word/footnotes.xml")?.async("string");
805
+ const endnotesText = await zip.file("word/endnotes.xml")?.async("string");
806
+ const commentsText = await zip.file("word/comments.xml")?.async("string");
260
807
  const relMap = parseDocRelsMap(relsText ?? null);
808
+ const footnotesMap = parseFootnotesMap(footnotesText ?? null);
809
+ const endnotesMap = parseEndnotesMap(endnotesText ?? null);
810
+ const commentsMap = parseCommentsMap(commentsText ?? null);
811
+ const usedFootnoteIds = [];
812
+ const usedEndnoteIds = [];
813
+ const usedCommentIds = [];
261
814
  const documentXml = parseXml(documentXmlText);
262
815
  const body = queryByLocalName(documentXml, "body");
263
816
  if (!body) {
@@ -272,7 +825,20 @@ async function parseDocxToHtmlSnapshot(file) {
272
825
  if (child.localName === "sectPr") continue;
273
826
  if (child.localName === "p") {
274
827
  const paragraphIndex = paragraphIndexMap.get(child) ?? null;
275
- blockHtml.push(await paragraphToHtml(zip, relMap, child, paragraphIndex));
828
+ blockHtml.push(
829
+ await paragraphToHtml(
830
+ zip,
831
+ relMap,
832
+ child,
833
+ paragraphIndex,
834
+ footnotesMap,
835
+ usedFootnoteIds,
836
+ endnotesMap,
837
+ usedEndnoteIds,
838
+ commentsMap,
839
+ usedCommentIds
840
+ )
841
+ );
276
842
  continue;
277
843
  }
278
844
  if (child.localName === "tbl") {
@@ -280,6 +846,9 @@ async function parseDocxToHtmlSnapshot(file) {
280
846
  continue;
281
847
  }
282
848
  }
849
+ blockHtml.push(renderFootnotesSection(usedFootnoteIds, footnotesMap));
850
+ blockHtml.push(renderEndnotesSection(usedEndnoteIds, endnotesMap));
851
+ blockHtml.push(renderCommentsSection(usedCommentIds, commentsMap));
283
852
  return buildHtmlSnapshot(blockHtml.join("\n"));
284
853
  }
285
854
 
@@ -511,7 +1080,7 @@ function createFallbackWordStyleProfile(sourceFileName = "snapshot") {
511
1080
  paragraphProfiles: []
512
1081
  };
513
1082
  }
514
- function twipToPx(twip) {
1083
+ function twipToPx2(twip) {
515
1084
  return twip / 15;
516
1085
  }
517
1086
  function getAttr2(node, attr) {
@@ -555,10 +1124,10 @@ function parsePageGeometry(documentXml) {
555
1124
  const top = getTwipAttr(pgMar, "w:top") ?? getTwipAttr(pgMar, "top") ?? null;
556
1125
  const bottom = getTwipAttr(pgMar, "w:bottom") ?? getTwipAttr(pgMar, "bottom") ?? null;
557
1126
  return {
558
- contentWidthPx: pageW === null ? null : twipToPx(pageW - left - right),
559
- pageHeightPx: pageH === null ? null : twipToPx(pageH),
560
- marginTopPx: top === null ? null : twipToPx(top),
561
- marginBottomPx: bottom === null ? null : twipToPx(bottom)
1127
+ contentWidthPx: pageW === null ? null : twipToPx2(pageW - left - right),
1128
+ pageHeightPx: pageH === null ? null : twipToPx2(pageH),
1129
+ marginTopPx: top === null ? null : twipToPx2(top),
1130
+ marginBottomPx: bottom === null ? null : twipToPx2(bottom)
562
1131
  };
563
1132
  }
564
1133
  function parseHeadingAlignFromDocument(documentXml) {
@@ -672,7 +1241,28 @@ function parseNumberingMap(numberingXml) {
672
1241
  const lvlMap = abstractMap.get(absId);
673
1242
  if (!lvlMap) continue;
674
1243
  for (const [lvl, spec] of lvlMap.entries()) {
675
- levelMap.set(`${numId}:${lvl}`, spec);
1244
+ levelMap.set(`${numId}:${lvl}`, { ...spec });
1245
+ }
1246
+ const lvlOverrides = queryAllByLocalName2(num, "lvlOverride");
1247
+ for (const override of lvlOverrides) {
1248
+ const ilvl = toInt(getAttr2(override, "w:ilvl") ?? getAttr2(override, "ilvl"));
1249
+ if (ilvl === null) continue;
1250
+ const key = `${numId}:${ilvl}`;
1251
+ const base = levelMap.get(key) ?? { numFmt: null, lvlText: null, startAt: 1 };
1252
+ const overrideStart = toInt(
1253
+ getAttr2(queryAllByLocalName2(override, "startOverride")[0] ?? null, "w:val") ?? getAttr2(queryAllByLocalName2(override, "startOverride")[0] ?? null, "val")
1254
+ );
1255
+ const overrideLvl = queryAllByLocalName2(override, "lvl")[0] ?? null;
1256
+ const overrideNumFmtNode = overrideLvl ? queryAllByLocalName2(overrideLvl, "numFmt")[0] ?? null : null;
1257
+ const overrideLvlTextNode = overrideLvl ? queryAllByLocalName2(overrideLvl, "lvlText")[0] ?? null : null;
1258
+ const overrideLvlStart = toInt(
1259
+ getAttr2(queryAllByLocalName2(overrideLvl ?? override, "start")[0] ?? null, "w:val") ?? getAttr2(queryAllByLocalName2(overrideLvl ?? override, "start")[0] ?? null, "val")
1260
+ );
1261
+ levelMap.set(key, {
1262
+ numFmt: getAttr2(overrideNumFmtNode, "w:val") ?? getAttr2(overrideNumFmtNode, "val") ?? base.numFmt,
1263
+ lvlText: getAttr2(overrideLvlTextNode, "w:val") ?? getAttr2(overrideLvlTextNode, "val") ?? base.lvlText,
1264
+ startAt: overrideStart ?? overrideLvlStart ?? base.startAt
1265
+ });
676
1266
  }
677
1267
  }
678
1268
  return levelMap;
@@ -710,15 +1300,15 @@ function parseParagraphProfiles(documentXml, numberingMap) {
710
1300
  text,
711
1301
  isEmpty: text.length === 0,
712
1302
  align: parseParagraphAlign(paragraph),
713
- beforePx: before === null ? null : twipToPx(before),
714
- afterPx: after === null ? null : twipToPx(after),
1303
+ beforePx: before === null ? null : twipToPx2(before),
1304
+ afterPx: after === null ? null : twipToPx2(after),
715
1305
  lineHeightRatio: line === null || lineHeightRule !== "auto" ? null : line / 240,
716
- lineHeightPx: line === null || lineHeightRule === "auto" ? null : twipToPx(line),
1306
+ lineHeightPx: line === null || lineHeightRule === "auto" ? null : twipToPx2(line),
717
1307
  lineHeightRule,
718
- indentLeftPx: left === null ? null : twipToPx(left),
719
- indentRightPx: right === null ? null : twipToPx(right),
720
- firstLinePx: firstLine === null ? null : twipToPx(firstLine),
721
- hangingPx: hanging === null ? null : twipToPx(hanging),
1308
+ indentLeftPx: left === null ? null : twipToPx2(left),
1309
+ indentRightPx: right === null ? null : twipToPx2(right),
1310
+ firstLinePx: firstLine === null ? null : twipToPx2(firstLine),
1311
+ hangingPx: hanging === null ? null : twipToPx2(hanging),
722
1312
  listNumId,
723
1313
  listLevel,
724
1314
  listFormat: listSpec?.numFmt ?? null,
@@ -753,19 +1343,19 @@ function parseTableDefaults(stylesXml) {
753
1343
  return {
754
1344
  topPx: (() => {
755
1345
  const v = getTwipAttr(top, "w:w") ?? getTwipAttr(top, "w") ?? null;
756
- return v === null ? null : twipToPx(v);
1346
+ return v === null ? null : twipToPx2(v);
757
1347
  })(),
758
1348
  leftPx: (() => {
759
1349
  const v = getTwipAttr(left, "w:w") ?? getTwipAttr(left, "w") ?? null;
760
- return v === null ? null : twipToPx(v);
1350
+ return v === null ? null : twipToPx2(v);
761
1351
  })(),
762
1352
  bottomPx: (() => {
763
1353
  const v = getTwipAttr(bottom, "w:w") ?? getTwipAttr(bottom, "w") ?? null;
764
- return v === null ? null : twipToPx(v);
1354
+ return v === null ? null : twipToPx2(v);
765
1355
  })(),
766
1356
  rightPx: (() => {
767
1357
  const v = getTwipAttr(right, "w:w") ?? getTwipAttr(right, "w") ?? null;
768
- return v === null ? null : twipToPx(v);
1358
+ return v === null ? null : twipToPx2(v);
769
1359
  })()
770
1360
  };
771
1361
  }
@@ -863,9 +1453,9 @@ function parseDefaults(stylesXml) {
863
1453
  const rawLineRule = (getAttr2(spacing, "w:lineRule") ?? getAttr2(spacing, "lineRule") ?? "auto").toLowerCase();
864
1454
  const bodyLineHeightRule = rawLineRule === "exact" ? "exact" : rawLineRule === "atleast" ? "atLeast" : "auto";
865
1455
  const bodyLineHeightRatio = line === null || bodyLineHeightRule !== "auto" ? null : line / 240;
866
- const bodyLineHeightPx = line === null || bodyLineHeightRule === "auto" ? null : twipToPx(line);
1456
+ const bodyLineHeightPx = line === null || bodyLineHeightRule === "auto" ? null : twipToPx2(line);
867
1457
  const after = getTwipAttr(spacing, "w:after") ?? getTwipAttr(spacing, "after") ?? null;
868
- const paragraphAfterPx = after === null ? null : twipToPx(after);
1458
+ const paragraphAfterPx = after === null ? null : twipToPx2(after);
869
1459
  return { bodyFontPx, bodyLineHeightRatio, bodyLineHeightPx, bodyLineHeightRule, paragraphAfterPx };
870
1460
  }
871
1461
  function parseHeading1Style(stylesXml) {
@@ -918,7 +1508,8 @@ function inferTitleFontFamily(families) {
918
1508
  return FALLBACK_PROFILE.titleFontFamily;
919
1509
  }
920
1510
  async function parseDocxStyleProfile(file) {
921
- const buffer = await file.arrayBuffer();
1511
+ const maybeArrayBuffer = file.arrayBuffer;
1512
+ const buffer = maybeArrayBuffer ? await maybeArrayBuffer.call(file) : await new Response(file).arrayBuffer();
922
1513
  const zip = await import_jszip2.default.loadAsync(buffer);
923
1514
  const documentXmlText = await zip.file("word/document.xml")?.async("string");
924
1515
  const stylesXmlText = await zip.file("word/styles.xml")?.async("string");
@@ -1280,6 +1871,7 @@ function applyParagraphProfiles(doc, styleProfile) {
1280
1871
  if (!alreadyHasMarker) {
1281
1872
  const marker = doc.createElement("span");
1282
1873
  marker.className = "__word-list-marker";
1874
+ marker.setAttribute("data-word-list-marker", "1");
1283
1875
  marker.textContent = `${markerText} `;
1284
1876
  marker.style.display = "inline-block";
1285
1877
  marker.style.minWidth = "1.8em";
@@ -1331,7 +1923,7 @@ function applyKeepPagination(doc, styleProfile, paragraphs) {
1331
1923
  for (let i = 0; i < count; i += 1) {
1332
1924
  const p = paragraphs[i];
1333
1925
  const profile = styleProfile.paragraphProfiles[i];
1334
- const h2 = paragraphHeightPx(p);
1926
+ const h = paragraphHeightPx(p);
1335
1927
  const forceBreak = profile.pageBreakBefore;
1336
1928
  if (forceBreak && used > 0) {
1337
1929
  insertPageSpacerBefore(doc, p, contentHeight - used);
@@ -1342,11 +1934,11 @@ function applyKeepPagination(doc, styleProfile, paragraphs) {
1342
1934
  insertPageSpacerBefore(doc, p, contentHeight - used);
1343
1935
  used = 0;
1344
1936
  }
1345
- if (used > 0 && used + h2 > contentHeight) {
1937
+ if (used > 0 && used + h > contentHeight) {
1346
1938
  insertPageSpacerBefore(doc, p, contentHeight - used);
1347
1939
  used = 0;
1348
1940
  }
1349
- used += h2;
1941
+ used += h;
1350
1942
  if (used >= contentHeight) {
1351
1943
  used = used % contentHeight;
1352
1944
  }
@@ -1388,6 +1980,41 @@ function applyWordRenderModel({ doc, styleProfile, showFormattingMarks }) {
1388
1980
  }
1389
1981
 
1390
1982
  // src/core/DocsWordElement.ts
1983
+ var VERSION = "0.1.2";
1984
+ var MESSAGES = {
1985
+ zh: {
1986
+ readClipboard: "\u4ECE\u7CFB\u7EDF\u526A\u8D34\u677F\u8BFB\u53D6",
1987
+ uploadWord: "\u4E0A\u4F20 Word",
1988
+ clear: "\u6E05\u7A7A",
1989
+ pastePlaceholder: "\u5728\u6B64\u5904\u7C98\u8D34 Word/WPS/Google Docs \u5185\u5BB9\uFF08Ctrl/Cmd+V\uFF09",
1990
+ waitImport: "\u7B49\u5F85\u5185\u5BB9\u5BFC\u5165",
1991
+ loadedHtml: "\u5DF2\u52A0\u8F7D HTML \u5FEB\u7167",
1992
+ cleared: "\u6587\u6863\u5DF2\u6E05\u7A7A",
1993
+ loadedWord: (name) => `\u5DF2\u52A0\u8F7D Word \u6587\u4EF6: ${name}`,
1994
+ importedClipboard: "\u5DF2\u5BFC\u5165\u526A\u8D34\u677F\u5185\u5BB9",
1995
+ noContent: "\u672A\u68C0\u6D4B\u5230\u53EF\u5BFC\u5165\u5185\u5BB9",
1996
+ noClipboardRead: "\u5F53\u524D\u6D4F\u89C8\u5668\u4E0D\u652F\u6301 clipboard.read",
1997
+ parseFailed: "Word \u89E3\u6790\u5931\u8D25",
1998
+ clipboardReadFailed: "\u8BFB\u53D6\u526A\u8D34\u677F\u5931\u8D25",
1999
+ errorPrefix: "\u9519\u8BEF: "
2000
+ },
2001
+ en: {
2002
+ readClipboard: "Read clipboard",
2003
+ uploadWord: "Upload Word",
2004
+ clear: "Clear",
2005
+ pastePlaceholder: "Paste Word/WPS/Google Docs content here (Ctrl/Cmd+V)",
2006
+ waitImport: "Waiting for input",
2007
+ loadedHtml: "HTML snapshot loaded",
2008
+ cleared: "Document cleared",
2009
+ loadedWord: (name) => `Word file loaded: ${name}`,
2010
+ importedClipboard: "Clipboard content imported",
2011
+ noContent: "No importable content detected",
2012
+ noClipboardRead: "navigator.clipboard.read is not supported in this browser",
2013
+ parseFailed: "Word parse failed",
2014
+ clipboardReadFailed: "Failed to read clipboard",
2015
+ errorPrefix: "Error: "
2016
+ }
2017
+ };
1391
2018
  var BASE_CSS = `
1392
2019
  :host{display:block;border:1px solid #d8deea;border-radius:12px;background:#fff;overflow:hidden;font-family:ui-sans-serif,system-ui,-apple-system,Segoe UI,Roboto}
1393
2020
  .toolbar{display:flex;gap:8px;flex-wrap:wrap;padding:10px;border-bottom:1px solid #e8edf6;background:#f8faff}
@@ -1398,6 +2025,10 @@ iframe{width:100%;min-height:760px;border:0}
1398
2025
  `;
1399
2026
  var DocsWordElement = class extends HTMLElement {
1400
2027
  rootRef;
2028
+ toolbar;
2029
+ btnRead;
2030
+ btnUpload;
2031
+ btnClear;
1401
2032
  frame;
1402
2033
  pasteArea;
1403
2034
  fileInput;
@@ -1405,64 +2036,92 @@ var DocsWordElement = class extends HTMLElement {
1405
2036
  htmlSnapshot;
1406
2037
  styleProfile = null;
1407
2038
  frameHeight = 0;
2039
+ locale = "zh";
1408
2040
  constructor() {
1409
2041
  super();
1410
2042
  this.rootRef = this.attachShadow({ mode: "open" });
2043
+ this.locale = this.parseLocale(this.getAttribute("lang"));
1411
2044
  this.htmlSnapshot = buildHtmlSnapshot("<p><br/></p>");
1412
2045
  const style = document.createElement("style");
1413
2046
  style.textContent = BASE_CSS;
1414
- const toolbar = document.createElement("div");
1415
- toolbar.className = "toolbar";
1416
- const btnRead = document.createElement("button");
1417
- btnRead.textContent = "\u4ECE\u7CFB\u7EDF\u526A\u8D34\u677F\u8BFB\u53D6";
1418
- btnRead.onclick = () => void this.readClipboard();
1419
- const btnUpload = document.createElement("button");
1420
- btnUpload.textContent = "\u4E0A\u4F20 Word";
1421
- btnUpload.onclick = () => this.fileInput.click();
1422
- const btnClear = document.createElement("button");
1423
- btnClear.textContent = "\u6E05\u7A7A";
1424
- btnClear.onclick = () => this.clear();
2047
+ this.toolbar = document.createElement("div");
2048
+ this.toolbar.className = "toolbar";
2049
+ this.btnRead = document.createElement("button");
2050
+ this.btnRead.onclick = () => void this.loadClipboard();
2051
+ this.btnUpload = document.createElement("button");
2052
+ this.btnUpload.onclick = () => this.fileInput.click();
2053
+ this.btnClear = document.createElement("button");
2054
+ this.btnClear.onclick = () => this.clear();
1425
2055
  this.fileInput = document.createElement("input");
1426
2056
  this.fileInput.type = "file";
1427
2057
  this.fileInput.accept = ".docx";
1428
2058
  this.fileInput.style.display = "none";
1429
2059
  this.fileInput.onchange = () => void this.onUpload();
1430
- toolbar.append(btnRead, btnUpload, btnClear, this.fileInput);
2060
+ this.toolbar.append(this.btnRead, this.btnUpload, this.btnClear, this.fileInput);
1431
2061
  this.pasteArea = document.createElement("textarea");
1432
2062
  this.pasteArea.className = "paste";
1433
- this.pasteArea.placeholder = "\u5728\u6B64\u5904\u7C98\u8D34 Word/WPS/Google Docs \u5185\u5BB9\uFF08Ctrl/Cmd+V\uFF09";
2063
+ this.pasteArea.placeholder = "";
1434
2064
  this.pasteArea.onpaste = (event) => {
1435
2065
  event.preventDefault();
1436
2066
  void this.applyFromClipboardData(event.clipboardData);
1437
2067
  };
1438
2068
  this.hint = document.createElement("span");
1439
2069
  this.hint.className = "hint";
1440
- this.hint.textContent = "\u7B49\u5F85\u5185\u5BB9\u5BFC\u5165";
2070
+ this.hint.textContent = "";
1441
2071
  this.frame = document.createElement("iframe");
1442
2072
  this.frame.sandbox.add("allow-same-origin", "allow-scripts");
1443
2073
  this.frame.onload = () => this.onFrameLoad();
1444
- this.rootRef.append(style, toolbar, this.pasteArea, this.hint, this.frame);
2074
+ this.rootRef.append(style, this.toolbar, this.pasteArea, this.hint, this.frame);
2075
+ this.syncLocaleText();
2076
+ this.syncToolbarVisibility();
2077
+ }
2078
+ static get observedAttributes() {
2079
+ return ["lang", "show-toolbar"];
2080
+ }
2081
+ attributeChangedCallback(name, _, newValue) {
2082
+ if (name === "lang") {
2083
+ this.locale = this.parseLocale(newValue);
2084
+ this.syncLocaleText();
2085
+ return;
2086
+ }
2087
+ if (name === "show-toolbar") {
2088
+ this.syncToolbarVisibility();
2089
+ }
1445
2090
  }
1446
2091
  connectedCallback() {
1447
2092
  this.renderSnapshot();
2093
+ this.dispatchEvent(new CustomEvent("docsjs-ready", { detail: { version: VERSION } }));
1448
2094
  }
1449
2095
  setSnapshot(rawHtml) {
2096
+ this.loadHtml(rawHtml);
2097
+ }
2098
+ loadHtml(rawHtml) {
1450
2099
  this.styleProfile = null;
1451
2100
  this.htmlSnapshot = buildHtmlSnapshot(rawHtml);
1452
2101
  this.renderSnapshot();
1453
- this.hint.textContent = "\u5DF2\u52A0\u8F7D HTML \u5FEB\u7167";
1454
- this.emitChange();
2102
+ this.setHint(MESSAGES[this.locale].loadedHtml);
2103
+ this.emitChange("api");
2104
+ }
2105
+ getSnapshot() {
2106
+ return this.htmlSnapshot;
1455
2107
  }
1456
2108
  clear() {
1457
2109
  this.styleProfile = null;
1458
2110
  this.htmlSnapshot = buildHtmlSnapshot("<p><br/></p>");
1459
2111
  this.renderSnapshot();
1460
- this.hint.textContent = "\u6587\u6863\u5DF2\u6E05\u7A7A";
1461
- this.emitChange();
2112
+ this.setHint(MESSAGES[this.locale].cleared);
2113
+ this.emitChange("clear");
2114
+ }
2115
+ async loadDocx(file) {
2116
+ await this.applyDocx(file);
1462
2117
  }
1463
2118
  async onUpload() {
1464
2119
  const file = this.fileInput.files?.[0];
1465
2120
  if (!file) return;
2121
+ await this.applyDocx(file);
2122
+ this.fileInput.value = "";
2123
+ }
2124
+ async applyDocx(file) {
1466
2125
  try {
1467
2126
  const [snapshot, profile] = await Promise.all([
1468
2127
  parseDocxToHtmlSnapshot(file),
@@ -1471,17 +2130,15 @@ var DocsWordElement = class extends HTMLElement {
1471
2130
  this.styleProfile = profile;
1472
2131
  this.htmlSnapshot = snapshot;
1473
2132
  this.renderSnapshot();
1474
- this.hint.textContent = `\u5DF2\u52A0\u8F7D Word \u6587\u4EF6: ${profile.sourceFileName}`;
1475
- this.emitChange();
2133
+ this.setHint(MESSAGES[this.locale].loadedWord(profile.sourceFileName));
2134
+ this.emitChange("upload", profile.sourceFileName);
1476
2135
  } catch (error) {
1477
- this.emitError(error instanceof Error ? error.message : "Word \u89E3\u6790\u5931\u8D25");
1478
- } finally {
1479
- this.fileInput.value = "";
2136
+ this.emitError(error instanceof Error ? error.message : MESSAGES[this.locale].parseFailed);
1480
2137
  }
1481
2138
  }
1482
- async readClipboard() {
2139
+ async loadClipboard() {
1483
2140
  if (!navigator.clipboard?.read) {
1484
- this.emitError("\u5F53\u524D\u6D4F\u89C8\u5668\u4E0D\u652F\u6301 clipboard.read");
2141
+ this.emitError(MESSAGES[this.locale].noClipboardRead);
1485
2142
  return;
1486
2143
  }
1487
2144
  try {
@@ -1489,7 +2146,7 @@ var DocsWordElement = class extends HTMLElement {
1489
2146
  const payload = await extractFromClipboardItems(items);
1490
2147
  this.applyPayload(payload.html, payload.text);
1491
2148
  } catch (error) {
1492
- this.emitError(error instanceof Error ? error.message : "\u8BFB\u53D6\u526A\u8D34\u677F\u5931\u8D25");
2149
+ this.emitError(error instanceof Error ? error.message : MESSAGES[this.locale].clipboardReadFailed);
1493
2150
  }
1494
2151
  }
1495
2152
  async applyFromClipboardData(data) {
@@ -1504,12 +2161,12 @@ var DocsWordElement = class extends HTMLElement {
1504
2161
  } else if (text.trim()) {
1505
2162
  this.htmlSnapshot = buildHtmlSnapshot(`<p>${text.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;")}</p>`);
1506
2163
  } else {
1507
- this.hint.textContent = "\u672A\u68C0\u6D4B\u5230\u53EF\u5BFC\u5165\u5185\u5BB9";
2164
+ this.setHint(MESSAGES[this.locale].noContent);
1508
2165
  return;
1509
2166
  }
1510
2167
  this.renderSnapshot();
1511
- this.hint.textContent = "\u5DF2\u5BFC\u5165\u526A\u8D34\u677F\u5185\u5BB9";
1512
- this.emitChange();
2168
+ this.setHint(MESSAGES[this.locale].importedClipboard);
2169
+ this.emitChange("paste");
1513
2170
  }
1514
2171
  onFrameLoad() {
1515
2172
  const doc = this.frame.contentDocument;
@@ -1534,12 +2191,33 @@ var DocsWordElement = class extends HTMLElement {
1534
2191
  renderSnapshot() {
1535
2192
  this.frame.srcdoc = this.htmlSnapshot;
1536
2193
  }
1537
- emitChange() {
1538
- this.dispatchEvent(new CustomEvent("docsjs-change", { detail: { htmlSnapshot: this.htmlSnapshot } }));
2194
+ emitChange(source, fileName) {
2195
+ this.dispatchEvent(new CustomEvent("docsjs-change", { detail: { htmlSnapshot: this.htmlSnapshot, source, fileName } }));
1539
2196
  }
1540
2197
  emitError(message) {
1541
2198
  this.dispatchEvent(new CustomEvent("docsjs-error", { detail: { message } }));
1542
- this.hint.textContent = `\u9519\u8BEF: ${message}`;
2199
+ this.setHint(`${MESSAGES[this.locale].errorPrefix}${message}`);
2200
+ }
2201
+ setHint(text) {
2202
+ this.hint.textContent = text;
2203
+ }
2204
+ parseLocale(value) {
2205
+ return value?.toLowerCase() === "en" ? "en" : "zh";
2206
+ }
2207
+ syncToolbarVisibility() {
2208
+ const raw = this.getAttribute("show-toolbar");
2209
+ const show = raw === null || raw === "" || raw === "1" || raw.toLowerCase() === "true";
2210
+ this.toolbar.style.display = show ? "flex" : "none";
2211
+ }
2212
+ syncLocaleText() {
2213
+ const t = MESSAGES[this.locale];
2214
+ this.btnRead.textContent = t.readClipboard;
2215
+ this.btnUpload.textContent = t.uploadWord;
2216
+ this.btnClear.textContent = t.clear;
2217
+ this.pasteArea.placeholder = t.pastePlaceholder;
2218
+ if (!this.hint.textContent || this.hint.textContent === MESSAGES.en.waitImport || this.hint.textContent === MESSAGES.zh.waitImport) {
2219
+ this.hint.textContent = t.waitImport;
2220
+ }
1543
2221
  }
1544
2222
  };
1545
2223
  function defineDocsWordElement() {
@@ -1548,62 +2226,74 @@ function defineDocsWordElement() {
1548
2226
  }
1549
2227
  }
1550
2228
 
1551
- // src/react/WordFidelityEditorReact.tsx
1552
- var import_react = __toESM(require("react"), 1);
1553
- defineDocsWordElement();
1554
- function WordFidelityEditorReact({ onChange, onError }) {
1555
- const ref2 = (0, import_react.useRef)(null);
1556
- (0, import_react.useEffect)(() => {
1557
- const node = ref2.current;
1558
- if (!node) return;
1559
- const onChangeEvent = (event) => {
1560
- const detail = event.detail;
1561
- onChange?.(detail);
1562
- };
1563
- const onErrorEvent = (event) => {
1564
- const detail = event.detail;
1565
- onError?.(detail);
1566
- };
1567
- node.addEventListener("docsjs-change", onChangeEvent);
1568
- node.addEventListener("docsjs-error", onErrorEvent);
1569
- return () => {
1570
- node.removeEventListener("docsjs-change", onChangeEvent);
1571
- node.removeEventListener("docsjs-error", onErrorEvent);
1572
- };
1573
- }, [onChange, onError]);
1574
- return import_react.default.createElement("docs-word-editor", { ref: ref2 });
2229
+ // src/lib/semanticStats.ts
2230
+ function countElements(root, selector) {
2231
+ return root.querySelectorAll(selector).length;
2232
+ }
2233
+ function isListLikeParagraph(p) {
2234
+ if (p.hasAttribute("data-word-list")) return true;
2235
+ if (p.querySelector("span.__word-list-marker")) return true;
2236
+ const style = (p.getAttribute("style") ?? "").toLowerCase();
2237
+ return style.includes("mso-list");
2238
+ }
2239
+ function collectSemanticStatsFromDocument(doc) {
2240
+ const paragraphs = Array.from(doc.querySelectorAll("p"));
2241
+ const listParagraphCount = paragraphs.filter((p) => isListLikeParagraph(p)).length;
2242
+ const textCharCount = (doc.body.textContent ?? "").replace(/\s+/g, "").length;
2243
+ return {
2244
+ paragraphCount: paragraphs.length,
2245
+ headingCount: countElements(doc, "h1,h2,h3,h4,h5,h6"),
2246
+ tableCount: countElements(doc, "table"),
2247
+ tableCellCount: countElements(doc, "td,th"),
2248
+ imageCount: countElements(doc, "img"),
2249
+ anchorImageCount: countElements(doc, 'img[data-word-anchor="1"]'),
2250
+ wrappedImageCount: countElements(doc, "img[data-word-wrap]"),
2251
+ ommlCount: countElements(doc, "[data-word-omml]"),
2252
+ chartCount: countElements(doc, "[data-word-chart]"),
2253
+ smartArtCount: countElements(doc, "[data-word-smartart]"),
2254
+ listParagraphCount,
2255
+ commentRefCount: countElements(doc, "[data-word-comment-ref]"),
2256
+ revisionInsCount: countElements(doc, '[data-word-revision="ins"]'),
2257
+ revisionDelCount: countElements(doc, '[data-word-revision="del"]'),
2258
+ pageBreakCount: countElements(doc, "[data-word-page-break='1']"),
2259
+ pageSpacerCount: countElements(doc, "[data-word-page-spacer='1']"),
2260
+ textCharCount
2261
+ };
2262
+ }
2263
+ function collectSemanticStatsFromHtml(rawHtml) {
2264
+ const parser = new DOMParser();
2265
+ const doc = parser.parseFromString(rawHtml, "text/html");
2266
+ return collectSemanticStatsFromDocument(doc);
1575
2267
  }
1576
2268
 
1577
- // src/vue/WordFidelityEditorVue.ts
1578
- var import_vue = require("vue");
1579
- defineDocsWordElement();
1580
- var WordFidelityEditorVue = (0, import_vue.defineComponent)({
1581
- name: "WordFidelityEditorVue",
1582
- emits: ["change", "error"],
1583
- setup(_, { emit }) {
1584
- const elRef = (0, import_vue.ref)(null);
1585
- const onChange = (event) => {
1586
- emit("change", event.detail);
1587
- };
1588
- const onError = (event) => {
1589
- emit("error", event.detail);
1590
- };
1591
- (0, import_vue.onMounted)(() => {
1592
- elRef.value?.addEventListener("docsjs-change", onChange);
1593
- elRef.value?.addEventListener("docsjs-error", onError);
1594
- });
1595
- (0, import_vue.onBeforeUnmount)(() => {
1596
- elRef.value?.removeEventListener("docsjs-change", onChange);
1597
- elRef.value?.removeEventListener("docsjs-error", onError);
1598
- });
1599
- return () => (0, import_vue.h)("docs-word-editor", { ref: elRef });
1600
- }
1601
- });
2269
+ // src/lib/fidelityScore.ts
2270
+ function ratioScore(actual, expected) {
2271
+ if (expected <= 0 && actual <= 0) return 1;
2272
+ if (expected <= 0 || actual < 0) return 0;
2273
+ const delta = Math.abs(actual - expected);
2274
+ const penalty = delta / expected;
2275
+ return Math.max(0, 1 - penalty);
2276
+ }
2277
+ function clamp01(v) {
2278
+ if (v < 0) return 0;
2279
+ if (v > 1) return 1;
2280
+ return v;
2281
+ }
2282
+ function calculateFidelityScore(expected, actual) {
2283
+ const structure = clamp01(
2284
+ (ratioScore(actual.paragraphCount, expected.paragraphCount) + ratioScore(actual.headingCount, expected.headingCount) + ratioScore(actual.tableCount, expected.tableCount) + ratioScore(actual.tableCellCount, expected.tableCellCount) + ratioScore(actual.imageCount, expected.imageCount) + ratioScore(actual.ommlCount, expected.ommlCount) + ratioScore(actual.chartCount, expected.chartCount) + ratioScore(actual.smartArtCount, expected.smartArtCount) + ratioScore(actual.listParagraphCount, expected.listParagraphCount)) / 9
2285
+ );
2286
+ const styleProxy = clamp01(ratioScore(actual.textCharCount, expected.textCharCount));
2287
+ const pagination = clamp01(ratioScore(actual.pageSpacerCount, expected.pageSpacerCount));
2288
+ const overall = clamp01(structure * 0.6 + styleProxy * 0.25 + pagination * 0.15);
2289
+ return { structure, styleProxy, pagination, overall };
2290
+ }
1602
2291
  // Annotate the CommonJS export names for ESM import in node:
1603
2292
  0 && (module.exports = {
1604
2293
  DocsWordElement,
1605
- WordFidelityEditorReact,
1606
- WordFidelityEditorVue,
2294
+ calculateFidelityScore,
2295
+ collectSemanticStatsFromDocument,
2296
+ collectSemanticStatsFromHtml,
1607
2297
  defineDocsWordElement
1608
2298
  });
1609
2299
  //# sourceMappingURL=index.cjs.map