@coding01/docsjs 0.1.3 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -34,7 +34,9 @@ __export(index_exports, {
34
34
  calculateFidelityScore: () => calculateFidelityScore,
35
35
  collectSemanticStatsFromDocument: () => collectSemanticStatsFromDocument,
36
36
  collectSemanticStatsFromHtml: () => collectSemanticStatsFromHtml,
37
- defineDocsWordElement: () => defineDocsWordElement
37
+ defineDocsWordElement: () => defineDocsWordElement,
38
+ parseDocxToHtmlSnapshot: () => parseDocxToHtmlSnapshot,
39
+ parseDocxToHtmlSnapshotWithReport: () => parseDocxToHtmlSnapshotWithReport
38
40
  });
39
41
  module.exports = __toCommonJS(index_exports);
40
42
 
@@ -57,6 +59,21 @@ function buildHtmlSnapshot(rawHtml) {
57
59
 
58
60
  // src/lib/docxHtml.ts
59
61
  var import_jszip = __toESM(require("jszip"), 1);
62
+ function createEmptyFeatureCounts() {
63
+ return {
64
+ hyperlinkCount: 0,
65
+ anchorImageCount: 0,
66
+ chartCount: 0,
67
+ smartArtCount: 0,
68
+ ommlCount: 0,
69
+ tableCount: 0,
70
+ footnoteRefCount: 0,
71
+ endnoteRefCount: 0,
72
+ commentRefCount: 0,
73
+ revisionCount: 0,
74
+ pageBreakCount: 0
75
+ };
76
+ }
60
77
  function escapeHtml(text) {
61
78
  return text.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;");
62
79
  }
@@ -80,6 +97,9 @@ function getAttr(node, name) {
80
97
  function emuToPx(emu) {
81
98
  return emu * 96 / 914400;
82
99
  }
100
+ function twipToPx(twip) {
101
+ return twip * 96 / 1440;
102
+ }
83
103
  function parseDrawingSizePx(drawing) {
84
104
  const extentNode = queryAllByLocalName(drawing, "extent").find((node) => {
85
105
  const parent = node.parentElement;
@@ -108,9 +128,7 @@ function imageDimensionAttributes(sizePx) {
108
128
  }
109
129
  return attrs.length > 0 ? ` ${attrs.join(" ")}` : "";
110
130
  }
111
- function parseAnchorPositionPx(drawing) {
112
- const anchor = directChildrenByLocalName(drawing, "anchor")[0] ?? null;
113
- if (!anchor) return { leftPx: null, topPx: null };
131
+ function parseAnchorPositionPx(anchor) {
114
132
  let leftPx = null;
115
133
  let topPx = null;
116
134
  const positionH = directChildrenByLocalName(anchor, "positionH")[0] ?? null;
@@ -125,34 +143,80 @@ function parseAnchorPositionPx(drawing) {
125
143
  if (Number.isFinite(top)) topPx = emuToPx(top);
126
144
  return { leftPx, topPx };
127
145
  }
128
- function parseAnchorWrapMode(drawing) {
129
- const anchor = directChildrenByLocalName(drawing, "anchor")[0] ?? null;
130
- if (!anchor) return null;
146
+ function parseAnchorWrapMode(anchor) {
131
147
  if (directChildrenByLocalName(anchor, "wrapSquare")[0]) return "square";
132
148
  if (directChildrenByLocalName(anchor, "wrapTight")[0]) return "tight";
133
149
  if (directChildrenByLocalName(anchor, "wrapTopAndBottom")[0]) return "topAndBottom";
134
150
  if (directChildrenByLocalName(anchor, "wrapNone")[0]) return "none";
135
151
  return null;
136
152
  }
137
- function mergeImageStyle(baseAttrs, anchorPos, wrapMode) {
138
- if (anchorPos.leftPx === null && anchorPos.topPx === null) return baseAttrs;
153
+ function parseAnchorMeta(drawing) {
154
+ const anchor = directChildrenByLocalName(drawing, "anchor")[0] ?? null;
155
+ if (!anchor) return null;
156
+ const positionH = directChildrenByLocalName(anchor, "positionH")[0] ?? null;
157
+ const positionV = directChildrenByLocalName(anchor, "positionV")[0] ?? null;
158
+ const relativeFromH = getAttr(positionH, "relativeFrom");
159
+ const relativeFromV = getAttr(positionV, "relativeFrom");
160
+ const parseDistPx = (name) => {
161
+ const raw = getAttr(anchor, name);
162
+ const emu = raw ? Number.parseInt(raw, 10) : Number.NaN;
163
+ return Number.isFinite(emu) && emu >= 0 ? emuToPx(emu) : null;
164
+ };
165
+ const rawHeight = getAttr(anchor, "relativeHeight");
166
+ const parsedHeight = rawHeight ? Number.parseInt(rawHeight, 10) : Number.NaN;
167
+ const boolAttr = (name, fallback) => {
168
+ const raw = (getAttr(anchor, name) ?? "").toLowerCase();
169
+ if (raw === "1" || raw === "true" || raw === "on") return true;
170
+ if (raw === "0" || raw === "false" || raw === "off") return false;
171
+ return fallback;
172
+ };
173
+ return {
174
+ position: parseAnchorPositionPx(anchor),
175
+ wrapMode: parseAnchorWrapMode(anchor),
176
+ distTPx: parseDistPx("distT"),
177
+ distBPx: parseDistPx("distB"),
178
+ distLPx: parseDistPx("distL"),
179
+ distRPx: parseDistPx("distR"),
180
+ relativeFromH,
181
+ relativeFromV,
182
+ behindDoc: boolAttr("behindDoc", false),
183
+ allowOverlap: boolAttr("allowOverlap", true),
184
+ layoutInCell: boolAttr("layoutInCell", true),
185
+ relativeHeight: Number.isFinite(parsedHeight) ? parsedHeight : null
186
+ };
187
+ }
188
+ function mergeImageStyle(baseAttrs, anchorMeta) {
189
+ if (!anchorMeta) return baseAttrs;
190
+ const { position, wrapMode } = anchorMeta;
191
+ if (position.leftPx === null && position.topPx === null) return baseAttrs;
139
192
  const styleParts = [
140
193
  "position:absolute",
141
- anchorPos.leftPx !== null ? `left:${anchorPos.leftPx.toFixed(2)}px` : "",
142
- anchorPos.topPx !== null ? `top:${anchorPos.topPx.toFixed(2)}px` : "",
143
- "z-index:3"
194
+ position.leftPx !== null ? `left:${position.leftPx.toFixed(2)}px` : "",
195
+ position.topPx !== null ? `top:${position.topPx.toFixed(2)}px` : "",
196
+ `z-index:${anchorMeta.behindDoc ? 0 : anchorMeta.relativeHeight ?? 3}`,
197
+ anchorMeta.distTPx !== null ? `margin-top:${anchorMeta.distTPx.toFixed(2)}px` : "",
198
+ anchorMeta.distBPx !== null ? `margin-bottom:${anchorMeta.distBPx.toFixed(2)}px` : "",
199
+ anchorMeta.distLPx !== null ? `margin-left:${anchorMeta.distLPx.toFixed(2)}px` : "",
200
+ anchorMeta.distRPx !== null ? `margin-right:${anchorMeta.distRPx.toFixed(2)}px` : ""
144
201
  ].filter((x) => x.length > 0);
145
202
  if (wrapMode === "topAndBottom") {
146
- styleParts.push("display:block");
147
- }
203
+ styleParts.push("display:block", "clear:both");
204
+ }
205
+ const anchorAttrs = [
206
+ `data-word-anchor="1"`,
207
+ wrapMode ? `data-word-wrap="${wrapMode}"` : "",
208
+ anchorMeta.relativeFromH ? `data-word-anchor-relh="${escapeHtml(anchorMeta.relativeFromH)}"` : "",
209
+ anchorMeta.relativeFromV ? `data-word-anchor-relv="${escapeHtml(anchorMeta.relativeFromV)}"` : "",
210
+ anchorMeta.behindDoc ? `data-word-anchor-behind="1"` : `data-word-anchor-behind="0"`,
211
+ anchorMeta.allowOverlap ? `data-word-anchor-overlap="1"` : `data-word-anchor-overlap="0"`,
212
+ anchorMeta.layoutInCell ? `data-word-anchor-layout-cell="1"` : `data-word-anchor-layout-cell="0"`
213
+ ].filter((x) => x.length > 0).join(" ");
148
214
  if (!baseAttrs.includes("style=")) {
149
- const wrapAttr = wrapMode ? ` data-word-wrap="${wrapMode}"` : "";
150
- return `${baseAttrs} style="${styleParts.join(";")}" data-word-anchor="1"${wrapAttr}`;
215
+ return `${baseAttrs} style="${styleParts.join(";")}" ${anchorAttrs}`;
151
216
  }
152
217
  return baseAttrs.replace(/style="([^"]*)"/, (_m, styleText) => {
153
218
  const merged = [styleText, ...styleParts].filter((x) => x.length > 0).join(";");
154
- const wrapAttr = wrapMode ? ` data-word-wrap="${wrapMode}"` : "";
155
- return `style="${merged}" data-word-anchor="1"${wrapAttr}`;
219
+ return `style="${merged}" ${anchorAttrs}`;
156
220
  });
157
221
  }
158
222
  function parseDocRelsMap(relsXmlText) {
@@ -178,11 +242,29 @@ function extToMime(ext) {
178
242
  if (lower === "svg") return "image/svg+xml";
179
243
  return "application/octet-stream";
180
244
  }
245
+ function normalizeWordPath(relTarget) {
246
+ const normalized = relTarget.replace(/\\/g, "/").replace(/^\/+/, "");
247
+ if (normalized.startsWith("word/")) return normalized;
248
+ if (normalized.startsWith("../")) return `word/${normalized.replace(/^(\.\.\/)+/, "")}`;
249
+ return `word/${normalized}`;
250
+ }
251
+ function resolveHyperlinkHref(relMap, rid, anchor) {
252
+ if (anchor && anchor.trim()) return `#${encodeURIComponent(anchor.trim())}`;
253
+ if (!rid) return null;
254
+ const relTarget = relMap[rid];
255
+ if (!relTarget) return null;
256
+ const trimmed = relTarget.trim();
257
+ if (!trimmed) return null;
258
+ const lower = trimmed.toLowerCase();
259
+ if (lower.startsWith("http://") || lower.startsWith("https://") || lower.startsWith("mailto:") || lower.startsWith("tel:")) {
260
+ return trimmed;
261
+ }
262
+ return trimmed.startsWith("#") ? trimmed : `#${encodeURIComponent(trimmed)}`;
263
+ }
181
264
  async function imageRidToDataUrl(zip, relMap, rid) {
182
265
  const relTarget = relMap[rid];
183
266
  if (!relTarget) return null;
184
- const normalized = relTarget.replace(/^\/+/, "");
185
- const path = normalized.startsWith("word/") ? normalized : `word/${normalized}`;
267
+ const path = normalizeWordPath(relTarget);
186
268
  const file = zip.file(path);
187
269
  if (!file) return null;
188
270
  const base64 = await file.async("base64");
@@ -190,6 +272,55 @@ async function imageRidToDataUrl(zip, relMap, rid) {
190
272
  const mime = extToMime(ext);
191
273
  return `data:${mime};base64,${base64}`;
192
274
  }
275
+ async function readXmlByRid(zip, relMap, rid) {
276
+ const relTarget = relMap[rid];
277
+ if (!relTarget) return null;
278
+ const path = normalizeWordPath(relTarget);
279
+ const file = zip.file(path);
280
+ return file ? file.async("string") : null;
281
+ }
282
+ function parseChartType(chartDoc) {
283
+ const known = ["barChart", "lineChart", "pieChart", "areaChart", "scatterChart", "radarChart", "doughnutChart"];
284
+ for (const type of known) {
285
+ if (queryByLocalName(chartDoc, type)) return type.replace(/Chart$/, "");
286
+ }
287
+ return "unknown";
288
+ }
289
+ function parseChartSummary(chartXmlText) {
290
+ const chartDoc = parseXml(chartXmlText);
291
+ const title = queryAllByLocalName(chartDoc, "t").map((n) => (n.textContent ?? "").trim()).find((v) => v.length > 0) ?? "Chart";
292
+ const seriesCount = queryAllByLocalName(chartDoc, "ser").length;
293
+ const pointCount = queryAllByLocalName(chartDoc, "pt").length;
294
+ const type = parseChartType(chartDoc);
295
+ return { title, type, seriesCount, pointCount };
296
+ }
297
+ function extractSmartArtText(diagramXmlText) {
298
+ const diagramDoc = parseXml(diagramXmlText);
299
+ return queryAllByLocalName(diagramDoc, "t").map((n) => (n.textContent ?? "").trim()).filter((v) => v.length > 0).slice(0, 12);
300
+ }
301
+ function ommlNodeToText(node) {
302
+ if (node.localName === "t") return node.textContent ?? "";
303
+ if (node.localName === "f") {
304
+ const num = queryByLocalName(node, "num");
305
+ const den = queryByLocalName(node, "den");
306
+ return `(${num ? ommlNodeToText(num) : "?"})/(${den ? ommlNodeToText(den) : "?"})`;
307
+ }
308
+ if (node.localName === "sSup") {
309
+ const e = queryByLocalName(node, "e");
310
+ const sup = queryByLocalName(node, "sup");
311
+ return `${e ? ommlNodeToText(e) : ""}^(${sup ? ommlNodeToText(sup) : ""})`;
312
+ }
313
+ if (node.localName === "sSub") {
314
+ const e = queryByLocalName(node, "e");
315
+ const sub = queryByLocalName(node, "sub");
316
+ return `${e ? ommlNodeToText(e) : ""}_(${sub ? ommlNodeToText(sub) : ""})`;
317
+ }
318
+ if (node.localName === "rad") {
319
+ const e = queryByLocalName(node, "e");
320
+ return `sqrt(${e ? ommlNodeToText(e) : ""})`;
321
+ }
322
+ return Array.from(node.children).map((child) => ommlNodeToText(child)).join("");
323
+ }
193
324
  function runStyleToCss(rPr) {
194
325
  if (!rPr) return "";
195
326
  const declarations = [];
@@ -304,48 +435,73 @@ function renderEndnotesSection(usedIds, endnotesMap) {
304
435
  const items = uniq.map((id) => `<li id="word-endnote-${id}" data-word-endnote-id="${id}">${endnotesMap[id]}</li>`).join("");
305
436
  return `<section data-word-endnotes="1"><hr/><ol>${items}</ol></section>`;
306
437
  }
307
- async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotesMap, usedFootnoteIds, endnotesMap, usedEndnoteIds, commentsMap, usedCommentIds) {
438
+ async function paragraphToHtml(zip, relMap, context, paragraph, paragraphIndex, footnotesMap, usedFootnoteIds, endnotesMap, usedEndnoteIds, commentsMap, usedCommentIds) {
308
439
  const tag = paragraphTag(paragraph);
309
440
  const alignStyle = paragraphAlignStyle(paragraph);
310
441
  const dataAttr = paragraphDataAttr(paragraphIndex);
311
- const runs = queryAllByLocalName(paragraph, "r");
312
- if (runs.length === 0) {
442
+ const hasRenderableNode = queryAllByLocalName(paragraph, "r").length > 0 || queryAllByLocalName(paragraph, "oMath").length > 0 || queryAllByLocalName(paragraph, "oMathPara").length > 0;
443
+ if (!hasRenderableNode) {
313
444
  return `<${tag}${dataAttr}${alignStyle ? ` style="${alignStyle}"` : ""}><br/></${tag}>`;
314
445
  }
315
- const parts = [];
316
- const renderedPageBreakCount = queryAllByLocalName(paragraph, "lastRenderedPageBreak").length;
317
- for (let i = 0; i < renderedPageBreakCount; i += 1) {
318
- parts.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
446
+ function parseRevisionMeta(node, type) {
447
+ return {
448
+ type,
449
+ id: getAttr(node, "w:id") ?? getAttr(node, "id"),
450
+ author: getAttr(node, "w:author") ?? getAttr(node, "author"),
451
+ date: getAttr(node, "w:date") ?? getAttr(node, "date")
452
+ };
453
+ }
454
+ function inferRevisionMeta(run, fallback) {
455
+ if (fallback) return fallback;
456
+ let cursor = run;
457
+ while (cursor) {
458
+ if (cursor.localName === "ins") return parseRevisionMeta(cursor, "ins");
459
+ if (cursor.localName === "del") return parseRevisionMeta(cursor, "del");
460
+ if (cursor.localName === "p") break;
461
+ cursor = cursor.parentElement;
462
+ }
463
+ return null;
464
+ }
465
+ function revisionMetaAttrs(meta) {
466
+ const attrs = [`data-word-revision="${meta.type}"`];
467
+ if (meta.id) attrs.push(`data-word-revision-id="${escapeHtml(meta.id)}"`);
468
+ if (meta.author) attrs.push(`data-word-revision-author="${escapeHtml(meta.author)}"`);
469
+ if (meta.date) attrs.push(`data-word-revision-date="${escapeHtml(meta.date)}"`);
470
+ return attrs.join(" ");
319
471
  }
320
- for (const run of runs) {
472
+ async function runToHtml(run, revisionFallback) {
473
+ const result = [];
321
474
  const rPr = queryByLocalName(run, "rPr");
322
475
  const css = runStyleToCss(rPr);
323
476
  const footnoteRef = queryByLocalName(run, "footnoteReference");
324
477
  const footnoteId = getAttr(footnoteRef, "w:id") ?? getAttr(footnoteRef, "id");
325
478
  if (footnoteId && footnotesMap[footnoteId]) {
479
+ context.features.footnoteRefCount += 1;
326
480
  usedFootnoteIds.push(footnoteId);
327
- parts.push(
481
+ result.push(
328
482
  `<sup data-word-footnote-ref="${footnoteId}"><a href="#word-footnote-${footnoteId}">[${footnoteId}]</a></sup>`
329
483
  );
330
- continue;
484
+ return result;
331
485
  }
332
486
  const endnoteRef = queryByLocalName(run, "endnoteReference");
333
487
  const endnoteId = getAttr(endnoteRef, "w:id") ?? getAttr(endnoteRef, "id");
334
488
  if (endnoteId && endnotesMap[endnoteId]) {
489
+ context.features.endnoteRefCount += 1;
335
490
  usedEndnoteIds.push(endnoteId);
336
- parts.push(
491
+ result.push(
337
492
  `<sup data-word-endnote-ref="${endnoteId}"><a href="#word-endnote-${endnoteId}">[${endnoteId}]</a></sup>`
338
493
  );
339
- continue;
494
+ return result;
340
495
  }
341
496
  const commentRef = queryByLocalName(run, "commentReference");
342
497
  const commentId = getAttr(commentRef, "w:id") ?? getAttr(commentRef, "id");
343
498
  if (commentId && commentsMap[commentId]) {
499
+ context.features.commentRefCount += 1;
344
500
  usedCommentIds.push(commentId);
345
- parts.push(
501
+ result.push(
346
502
  `<sup data-word-comment-ref="${commentId}"><a href="#word-comment-${commentId}">[c${commentId}]</a></sup>`
347
503
  );
348
- continue;
504
+ return result;
349
505
  }
350
506
  const drawing = queryByLocalName(run, "drawing");
351
507
  if (drawing) {
@@ -356,13 +512,38 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
356
512
  if (src) {
357
513
  const imageSize = parseDrawingSizePx(drawing);
358
514
  const dimensionAttrs = imageDimensionAttributes(imageSize);
359
- const anchorPos = parseAnchorPositionPx(drawing);
360
- const wrapMode = parseAnchorWrapMode(drawing);
361
- const attrs = mergeImageStyle(dimensionAttrs, anchorPos, wrapMode);
362
- parts.push(`<img src="${src}" alt="word-image"${attrs}/>`);
363
- continue;
515
+ const anchorMeta = parseAnchorMeta(drawing);
516
+ const attrs = mergeImageStyle(dimensionAttrs, anchorMeta);
517
+ if (anchorMeta) context.features.anchorImageCount += 1;
518
+ result.push(`<img src="${src}" alt="word-image"${attrs}/>`);
519
+ return result;
520
+ }
521
+ }
522
+ const chartRef = queryByLocalName(drawing, "chart");
523
+ const chartRid = getAttr(chartRef, "r:id") ?? getAttr(chartRef, "id");
524
+ if (chartRid) {
525
+ const chartXmlText = await readXmlByRid(zip, relMap, chartRid);
526
+ if (chartXmlText) {
527
+ const summary = parseChartSummary(chartXmlText);
528
+ context.features.chartCount += 1;
529
+ result.push(
530
+ `<figure data-word-chart="1" data-word-chart-type="${summary.type}" data-word-chart-series="${summary.seriesCount}" data-word-chart-points="${summary.pointCount}"><figcaption>${escapeHtml(summary.title)}</figcaption><div>Chart(${escapeHtml(summary.type)}): series=${summary.seriesCount}, points=${summary.pointCount}</div></figure>`
531
+ );
532
+ return result;
364
533
  }
365
534
  }
535
+ const smartArtRef = queryByLocalName(drawing, "relIds");
536
+ const smartArtRid = getAttr(smartArtRef, "r:dm") ?? getAttr(smartArtRef, "dm");
537
+ if (smartArtRid) {
538
+ const diagramXmlText = await readXmlByRid(zip, relMap, smartArtRid);
539
+ const textItems = diagramXmlText ? extractSmartArtText(diagramXmlText) : [];
540
+ context.features.smartArtCount += 1;
541
+ const preview = textItems.length > 0 ? `: ${escapeHtml(textItems.join(" / "))}` : "";
542
+ result.push(
543
+ `<figure data-word-smartart="1" data-word-smartart-items="${textItems.length}"><figcaption>SmartArt fallback${preview}</figcaption></figure>`
544
+ );
545
+ return result;
546
+ }
366
547
  }
367
548
  const texts = queryAllByLocalName(run, "t").map((t) => t.textContent ?? "").join("");
368
549
  const delTexts = queryAllByLocalName(run, "delText").map((t) => t.textContent ?? "").join("");
@@ -373,40 +554,86 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
373
554
  }).length;
374
555
  const lineBreakCount = Math.max(0, brNodes.length - pageBreakCount);
375
556
  const runText2 = `${escapeHtml(texts || delTexts)}${"<br/>".repeat(lineBreakCount)}`;
376
- if (!runText2) continue;
377
- let revisionType = null;
378
- let cursor = run;
379
- while (cursor) {
380
- if (cursor.localName === "ins") {
381
- revisionType = "ins";
382
- break;
383
- }
384
- if (cursor.localName === "del") {
385
- revisionType = "del";
386
- break;
557
+ if (runText2) {
558
+ const revisionMeta = inferRevisionMeta(run, revisionFallback);
559
+ if (css) {
560
+ const span = `<span style="${css}">${runText2}</span>`;
561
+ if (revisionMeta) {
562
+ context.features.revisionCount += 1;
563
+ const tagName = revisionMeta.type === "ins" ? "ins" : "del";
564
+ result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${span}</${tagName}>`);
565
+ } else {
566
+ result.push(span);
567
+ }
568
+ } else if (revisionMeta) {
569
+ context.features.revisionCount += 1;
570
+ const tagName = revisionMeta.type === "ins" ? "ins" : "del";
571
+ result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${runText2}</${tagName}>`);
572
+ } else {
573
+ result.push(runText2);
387
574
  }
388
- if (cursor.localName === "p") break;
389
- cursor = cursor.parentElement;
390
575
  }
391
- if (css) {
392
- const span = `<span style="${css}">${runText2}</span>`;
393
- if (revisionType) {
394
- const tag2 = revisionType === "ins" ? "ins" : "del";
395
- parts.push(`<${tag2} data-word-revision="${revisionType}">${span}</${tag2}>`);
396
- } else {
397
- parts.push(span);
576
+ for (let i = 0; i < pageBreakCount; i += 1) {
577
+ context.features.pageBreakCount += 1;
578
+ result.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
579
+ }
580
+ return result;
581
+ }
582
+ async function nodeToHtml(node, revisionFallback) {
583
+ if (node.localName === "commentRangeStart") {
584
+ const id = getAttr(node, "w:id") ?? getAttr(node, "id");
585
+ return id ? [`<span data-word-comment-range-start="${id}"></span>`] : [];
586
+ }
587
+ if (node.localName === "commentRangeEnd") {
588
+ const id = getAttr(node, "w:id") ?? getAttr(node, "id");
589
+ return id ? [`<span data-word-comment-range-end="${id}"></span>`] : [];
590
+ }
591
+ if (node.localName === "r") {
592
+ return runToHtml(node, revisionFallback);
593
+ }
594
+ if (node.localName === "hyperlink") {
595
+ const rid = getAttr(node, "r:id") ?? getAttr(node, "id");
596
+ const anchor = getAttr(node, "w:anchor") ?? getAttr(node, "anchor");
597
+ const href = resolveHyperlinkHref(relMap, rid, anchor);
598
+ const nested2 = [];
599
+ for (const child of Array.from(node.children)) {
600
+ nested2.push(...await nodeToHtml(child, revisionFallback));
398
601
  }
399
- } else {
400
- if (revisionType) {
401
- const tag2 = revisionType === "ins" ? "ins" : "del";
402
- parts.push(`<${tag2} data-word-revision="${revisionType}">${runText2}</${tag2}>`);
403
- } else {
404
- parts.push(runText2);
602
+ const content2 = nested2.join("") || escapeHtml(node.textContent ?? "");
603
+ if (!href) return content2 ? [content2] : [];
604
+ context.features.hyperlinkCount += 1;
605
+ return [
606
+ `<a data-word-hyperlink="1" href="${escapeHtml(href)}" rel="noreferrer noopener" target="_blank">${content2}</a>`
607
+ ];
608
+ }
609
+ if (node.localName === "oMath" || node.localName === "oMathPara") {
610
+ const linear = ommlNodeToText(node).trim();
611
+ if (!linear) return [];
612
+ context.features.ommlCount += 1;
613
+ return [`<span data-word-omml="1">${escapeHtml(linear)}</span>`];
614
+ }
615
+ if (node.localName === "ins" || node.localName === "del") {
616
+ const scopedMeta = parseRevisionMeta(node, node.localName === "ins" ? "ins" : "del");
617
+ const nested2 = [];
618
+ for (const child of Array.from(node.children)) {
619
+ nested2.push(...await nodeToHtml(child, scopedMeta));
405
620
  }
621
+ return nested2;
406
622
  }
407
- for (let i = 0; i < pageBreakCount; i += 1) {
408
- parts.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
623
+ const nested = [];
624
+ for (const child of Array.from(node.children)) {
625
+ nested.push(...await nodeToHtml(child, revisionFallback));
409
626
  }
627
+ return nested;
628
+ }
629
+ const parts = [];
630
+ const renderedPageBreakCount = queryAllByLocalName(paragraph, "lastRenderedPageBreak").length;
631
+ for (let i = 0; i < renderedPageBreakCount; i += 1) {
632
+ context.features.pageBreakCount += 1;
633
+ parts.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
634
+ }
635
+ for (const child of Array.from(paragraph.children)) {
636
+ parts.push(...await nodeToHtml(child, null));
410
637
  }
411
638
  const content = parts.join("") || "<br/>";
412
639
  return `<${tag}${dataAttr}${alignStyle ? ` style="${alignStyle}"` : ""}>${content}</${tag}>`;
@@ -436,7 +663,102 @@ function parseTcVMerge(tc) {
436
663
  const rawVal = (getAttr(vMerge, "w:val") ?? getAttr(vMerge, "val") ?? "continue").toLowerCase();
437
664
  return rawVal === "restart" ? "restart" : "continue";
438
665
  }
439
- function tableCellHtml(cell, paragraphIndexMap) {
666
+ function parseTblGridWidthsPx(table) {
667
+ const grid = directChildrenByLocalName(table, "tblGrid")[0] ?? null;
668
+ if (!grid) return [];
669
+ return directChildrenByLocalName(grid, "gridCol").map((col) => {
670
+ const raw = getAttr(col, "w:w") ?? getAttr(col, "w");
671
+ const twip = raw ? Number.parseInt(raw, 10) : Number.NaN;
672
+ return Number.isFinite(twip) && twip > 0 ? twipToPx(twip) : 0;
673
+ }).filter((px) => px > 0);
674
+ }
675
+ function borderSizeToPx(size) {
676
+ return size / 6;
677
+ }
678
+ function parseBorderCss(borderNode) {
679
+ if (!borderNode) return null;
680
+ const val = (getAttr(borderNode, "w:val") ?? getAttr(borderNode, "val") ?? "").toLowerCase();
681
+ if (!val || val === "nil" || val === "none") return "none";
682
+ const color = (getAttr(borderNode, "w:color") ?? getAttr(borderNode, "color") ?? "222222").replace(/^#/, "");
683
+ const rawSize = getAttr(borderNode, "w:sz") ?? getAttr(borderNode, "sz");
684
+ const size = rawSize ? Number.parseInt(rawSize, 10) : Number.NaN;
685
+ const px = Number.isFinite(size) && size > 0 ? borderSizeToPx(size) : 1;
686
+ const style = val === "single" ? "solid" : val;
687
+ return `${px.toFixed(2)}px ${style} #${color}`;
688
+ }
689
+ function parseTableStyleProfile(table) {
690
+ const tblPr = directChildrenByLocalName(table, "tblPr")[0] ?? null;
691
+ const tblBorders = tblPr ? directChildrenByLocalName(tblPr, "tblBorders")[0] ?? null : null;
692
+ const layout = tblPr ? directChildrenByLocalName(tblPr, "tblLayout")[0] ?? null : null;
693
+ const spacing = tblPr ? directChildrenByLocalName(tblPr, "tblCellSpacing")[0] ?? null : null;
694
+ const spacingType = (getAttr(spacing, "w:type") ?? getAttr(spacing, "type") ?? "dxa").toLowerCase();
695
+ const spacingRaw = getAttr(spacing, "w:w") ?? getAttr(spacing, "w");
696
+ const spacingVal = spacingRaw ? Number.parseFloat(spacingRaw) : Number.NaN;
697
+ const borderSpacingPx = spacingType === "dxa" && Number.isFinite(spacingVal) && spacingVal > 0 ? twipToPx(spacingVal) : 0;
698
+ const borderCollapse = borderSpacingPx > 0 ? "separate" : "collapse";
699
+ const tableLayout = (getAttr(layout, "w:type") ?? getAttr(layout, "type") ?? "").toLowerCase() === "autofit" ? "auto" : "fixed";
700
+ const top = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "top")[0] ?? null : null);
701
+ const bottom = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "bottom")[0] ?? null : null);
702
+ const left = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "left")[0] ?? null : null);
703
+ const right = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "right")[0] ?? null : null);
704
+ const insideH = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "insideH")[0] ?? null : null);
705
+ const insideV = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "insideV")[0] ?? null : null);
706
+ const borderCss = top ?? right ?? bottom ?? left ?? "1px solid #222";
707
+ return {
708
+ tableLayout,
709
+ borderCollapse,
710
+ borderSpacingPx,
711
+ borderCss,
712
+ insideHCss: insideH,
713
+ insideVCss: insideV
714
+ };
715
+ }
716
+ function parseTableWidthStyle(table, gridWidthsPx) {
717
+ const tblPr = directChildrenByLocalName(table, "tblPr")[0] ?? null;
718
+ const tblW = tblPr ? directChildrenByLocalName(tblPr, "tblW")[0] ?? null : null;
719
+ const type = (getAttr(tblW, "w:type") ?? getAttr(tblW, "type") ?? "").toLowerCase();
720
+ const rawVal = getAttr(tblW, "w:w") ?? getAttr(tblW, "w");
721
+ const numericVal = rawVal ? Number.parseFloat(rawVal) : Number.NaN;
722
+ if (type === "dxa" && Number.isFinite(numericVal) && numericVal > 0) {
723
+ return `width:${twipToPx(numericVal).toFixed(2)}px`;
724
+ }
725
+ if (type === "pct" && Number.isFinite(numericVal) && numericVal > 0) {
726
+ return `width:${(numericVal / 50).toFixed(2)}%`;
727
+ }
728
+ const gridTotal = gridWidthsPx.reduce((sum, item) => sum + item, 0);
729
+ if (gridTotal > 0) return `width:${gridTotal.toFixed(2)}px;max-width:100%`;
730
+ return "width:100%";
731
+ }
732
+ function parseCellWidthStyle(cell, colCursor, colSpan, gridWidthsPx) {
733
+ const tcPr = directChildrenByLocalName(cell, "tcPr")[0] ?? null;
734
+ const tcW = tcPr ? directChildrenByLocalName(tcPr, "tcW")[0] ?? null : null;
735
+ const type = (getAttr(tcW, "w:type") ?? getAttr(tcW, "type") ?? "").toLowerCase();
736
+ const rawVal = getAttr(tcW, "w:w") ?? getAttr(tcW, "w");
737
+ const numericVal = rawVal ? Number.parseFloat(rawVal) : Number.NaN;
738
+ if (type === "dxa" && Number.isFinite(numericVal) && numericVal > 0) {
739
+ return `width:${twipToPx(numericVal).toFixed(2)}px`;
740
+ }
741
+ if (type === "pct" && Number.isFinite(numericVal) && numericVal > 0) {
742
+ return `width:${(numericVal / 50).toFixed(2)}%`;
743
+ }
744
+ const width = gridWidthsPx.slice(colCursor, colCursor + colSpan).reduce((sum, item) => sum + item, 0);
745
+ if (width > 0) return `width:${width.toFixed(2)}px`;
746
+ return "";
747
+ }
748
+ function parseCellBorderStyle(cell, tableStyle) {
749
+ const tcPr = directChildrenByLocalName(cell, "tcPr")[0] ?? null;
750
+ const tcBorders = tcPr ? directChildrenByLocalName(tcPr, "tcBorders")[0] ?? null : null;
751
+ if (!tcBorders) {
752
+ const fallback = tableStyle.insideHCss ?? tableStyle.insideVCss ?? tableStyle.borderCss;
753
+ return `border:${fallback}`;
754
+ }
755
+ const top = parseBorderCss(directChildrenByLocalName(tcBorders, "top")[0] ?? null) ?? tableStyle.insideHCss ?? tableStyle.borderCss;
756
+ const right = parseBorderCss(directChildrenByLocalName(tcBorders, "right")[0] ?? null) ?? tableStyle.insideVCss ?? tableStyle.borderCss;
757
+ const bottom = parseBorderCss(directChildrenByLocalName(tcBorders, "bottom")[0] ?? null) ?? tableStyle.insideHCss ?? tableStyle.borderCss;
758
+ const left = parseBorderCss(directChildrenByLocalName(tcBorders, "left")[0] ?? null) ?? tableStyle.insideVCss ?? tableStyle.borderCss;
759
+ return `border-top:${top};border-right:${right};border-bottom:${bottom};border-left:${left}`;
760
+ }
761
+ function tableCellHtml(cell, paragraphIndexMap, context) {
440
762
  const blocks = [];
441
763
  for (const child of Array.from(cell.children)) {
442
764
  if (child.localName === "tcPr") continue;
@@ -446,7 +768,7 @@ function tableCellHtml(cell, paragraphIndexMap) {
446
768
  continue;
447
769
  }
448
770
  if (child.localName === "tbl") {
449
- blocks.push(tableToHtml(child, paragraphIndexMap));
771
+ blocks.push(tableToHtml(child, paragraphIndexMap, context));
450
772
  continue;
451
773
  }
452
774
  }
@@ -454,8 +776,11 @@ function tableCellHtml(cell, paragraphIndexMap) {
454
776
  const text = queryAllByLocalName(cell, "t").map((t) => t.textContent ?? "").join("").trim();
455
777
  return escapeHtml(text) || "<br/>";
456
778
  }
457
- function tableToHtml(table, paragraphIndexMap) {
779
+ function tableToHtml(table, paragraphIndexMap, context) {
780
+ context.features.tableCount += 1;
458
781
  const rows = directChildrenByLocalName(table, "tr");
782
+ const gridWidthsPx = parseTblGridWidthsPx(table);
783
+ const tableStyle = parseTableStyleProfile(table);
459
784
  const activeByCol = /* @__PURE__ */ new Map();
460
785
  const allOrigins = [];
461
786
  let nextOriginId = 1;
@@ -480,8 +805,10 @@ function tableToHtml(table, paragraphIndexMap) {
480
805
  while (activeByCol.has(colCursor)) {
481
806
  colCursor += 1;
482
807
  }
483
- const html = tableCellHtml(cell, paragraphIndexMap);
808
+ const html = tableCellHtml(cell, paragraphIndexMap, context);
484
809
  const attrs = [];
810
+ const widthStyle = parseCellWidthStyle(cell, colCursor, colSpan, gridWidthsPx);
811
+ const borderStyle = parseCellBorderStyle(cell, tableStyle);
485
812
  if (vMerge === "restart") {
486
813
  const origin = {
487
814
  id: `m${nextOriginId}`,
@@ -499,7 +826,7 @@ function tableToHtml(table, paragraphIndexMap) {
499
826
  }
500
827
  if (colSpan > 1) attrs.push(`colspan="${colSpan}"`);
501
828
  emittedCells.push(
502
- `<td${attrs.length > 0 ? ` ${attrs.join(" ")}` : ""} style="border:1px solid #222;vertical-align:top;">${html}</td>`
829
+ `<td${attrs.length > 0 ? ` ${attrs.join(" ")}` : ""} style="${borderStyle};vertical-align:top;${widthStyle}">${html}</td>`
503
830
  );
504
831
  colCursor += colSpan;
505
832
  }
@@ -518,9 +845,13 @@ function tableToHtml(table, paragraphIndexMap) {
518
845
  const replacement = origin.rowSpan > 1 ? `rowspan="${origin.rowSpan}"` : "";
519
846
  merged = merged.replace(marker, replacement).replace(/\s{2,}/g, " ");
520
847
  }
521
- return `<table style="border-collapse:collapse;table-layout:fixed;width:100%;border:1px solid #222;">${merged}</table>`;
848
+ const tableWidthStyle = parseTableWidthStyle(table, gridWidthsPx);
849
+ const spacing = tableStyle.borderSpacingPx > 0 ? `border-spacing:${tableStyle.borderSpacingPx.toFixed(2)}px;` : "";
850
+ return `<table style="border-collapse:${tableStyle.borderCollapse};${spacing}table-layout:${tableStyle.tableLayout};${tableWidthStyle};border:${tableStyle.borderCss};">${merged}</table>`;
522
851
  }
523
- async function parseDocxToHtmlSnapshot(file) {
852
+ async function parseDocxToHtmlSnapshotWithReport(file) {
853
+ const startedAt = Date.now();
854
+ const context = { features: createEmptyFeatureCounts() };
524
855
  const maybeArrayBuffer = file.arrayBuffer;
525
856
  const buffer = maybeArrayBuffer ? await maybeArrayBuffer.call(file) : await new Response(file).arrayBuffer();
526
857
  const zip = await import_jszip.default.loadAsync(buffer);
@@ -557,6 +888,7 @@ async function parseDocxToHtmlSnapshot(file) {
557
888
  await paragraphToHtml(
558
889
  zip,
559
890
  relMap,
891
+ context,
560
892
  child,
561
893
  paragraphIndex,
562
894
  footnotesMap,
@@ -570,14 +902,24 @@ async function parseDocxToHtmlSnapshot(file) {
570
902
  continue;
571
903
  }
572
904
  if (child.localName === "tbl") {
573
- blockHtml.push(tableToHtml(child, paragraphIndexMap));
905
+ blockHtml.push(tableToHtml(child, paragraphIndexMap, context));
574
906
  continue;
575
907
  }
576
908
  }
577
909
  blockHtml.push(renderFootnotesSection(usedFootnoteIds, footnotesMap));
578
910
  blockHtml.push(renderEndnotesSection(usedEndnoteIds, endnotesMap));
579
911
  blockHtml.push(renderCommentsSection(usedCommentIds, commentsMap));
580
- return buildHtmlSnapshot(blockHtml.join("\n"));
912
+ return {
913
+ htmlSnapshot: buildHtmlSnapshot(blockHtml.join("\n")),
914
+ report: {
915
+ elapsedMs: Date.now() - startedAt,
916
+ features: context.features
917
+ }
918
+ };
919
+ }
920
+ async function parseDocxToHtmlSnapshot(file) {
921
+ const result = await parseDocxToHtmlSnapshotWithReport(file);
922
+ return result.htmlSnapshot;
581
923
  }
582
924
 
583
925
  // src/lib/pastePipeline.ts
@@ -808,7 +1150,7 @@ function createFallbackWordStyleProfile(sourceFileName = "snapshot") {
808
1150
  paragraphProfiles: []
809
1151
  };
810
1152
  }
811
- function twipToPx(twip) {
1153
+ function twipToPx2(twip) {
812
1154
  return twip / 15;
813
1155
  }
814
1156
  function getAttr2(node, attr) {
@@ -852,10 +1194,10 @@ function parsePageGeometry(documentXml) {
852
1194
  const top = getTwipAttr(pgMar, "w:top") ?? getTwipAttr(pgMar, "top") ?? null;
853
1195
  const bottom = getTwipAttr(pgMar, "w:bottom") ?? getTwipAttr(pgMar, "bottom") ?? null;
854
1196
  return {
855
- contentWidthPx: pageW === null ? null : twipToPx(pageW - left - right),
856
- pageHeightPx: pageH === null ? null : twipToPx(pageH),
857
- marginTopPx: top === null ? null : twipToPx(top),
858
- marginBottomPx: bottom === null ? null : twipToPx(bottom)
1197
+ contentWidthPx: pageW === null ? null : twipToPx2(pageW - left - right),
1198
+ pageHeightPx: pageH === null ? null : twipToPx2(pageH),
1199
+ marginTopPx: top === null ? null : twipToPx2(top),
1200
+ marginBottomPx: bottom === null ? null : twipToPx2(bottom)
859
1201
  };
860
1202
  }
861
1203
  function parseHeadingAlignFromDocument(documentXml) {
@@ -1028,15 +1370,15 @@ function parseParagraphProfiles(documentXml, numberingMap) {
1028
1370
  text,
1029
1371
  isEmpty: text.length === 0,
1030
1372
  align: parseParagraphAlign(paragraph),
1031
- beforePx: before === null ? null : twipToPx(before),
1032
- afterPx: after === null ? null : twipToPx(after),
1373
+ beforePx: before === null ? null : twipToPx2(before),
1374
+ afterPx: after === null ? null : twipToPx2(after),
1033
1375
  lineHeightRatio: line === null || lineHeightRule !== "auto" ? null : line / 240,
1034
- lineHeightPx: line === null || lineHeightRule === "auto" ? null : twipToPx(line),
1376
+ lineHeightPx: line === null || lineHeightRule === "auto" ? null : twipToPx2(line),
1035
1377
  lineHeightRule,
1036
- indentLeftPx: left === null ? null : twipToPx(left),
1037
- indentRightPx: right === null ? null : twipToPx(right),
1038
- firstLinePx: firstLine === null ? null : twipToPx(firstLine),
1039
- hangingPx: hanging === null ? null : twipToPx(hanging),
1378
+ indentLeftPx: left === null ? null : twipToPx2(left),
1379
+ indentRightPx: right === null ? null : twipToPx2(right),
1380
+ firstLinePx: firstLine === null ? null : twipToPx2(firstLine),
1381
+ hangingPx: hanging === null ? null : twipToPx2(hanging),
1040
1382
  listNumId,
1041
1383
  listLevel,
1042
1384
  listFormat: listSpec?.numFmt ?? null,
@@ -1071,19 +1413,19 @@ function parseTableDefaults(stylesXml) {
1071
1413
  return {
1072
1414
  topPx: (() => {
1073
1415
  const v = getTwipAttr(top, "w:w") ?? getTwipAttr(top, "w") ?? null;
1074
- return v === null ? null : twipToPx(v);
1416
+ return v === null ? null : twipToPx2(v);
1075
1417
  })(),
1076
1418
  leftPx: (() => {
1077
1419
  const v = getTwipAttr(left, "w:w") ?? getTwipAttr(left, "w") ?? null;
1078
- return v === null ? null : twipToPx(v);
1420
+ return v === null ? null : twipToPx2(v);
1079
1421
  })(),
1080
1422
  bottomPx: (() => {
1081
1423
  const v = getTwipAttr(bottom, "w:w") ?? getTwipAttr(bottom, "w") ?? null;
1082
- return v === null ? null : twipToPx(v);
1424
+ return v === null ? null : twipToPx2(v);
1083
1425
  })(),
1084
1426
  rightPx: (() => {
1085
1427
  const v = getTwipAttr(right, "w:w") ?? getTwipAttr(right, "w") ?? null;
1086
- return v === null ? null : twipToPx(v);
1428
+ return v === null ? null : twipToPx2(v);
1087
1429
  })()
1088
1430
  };
1089
1431
  }
@@ -1181,9 +1523,9 @@ function parseDefaults(stylesXml) {
1181
1523
  const rawLineRule = (getAttr2(spacing, "w:lineRule") ?? getAttr2(spacing, "lineRule") ?? "auto").toLowerCase();
1182
1524
  const bodyLineHeightRule = rawLineRule === "exact" ? "exact" : rawLineRule === "atleast" ? "atLeast" : "auto";
1183
1525
  const bodyLineHeightRatio = line === null || bodyLineHeightRule !== "auto" ? null : line / 240;
1184
- const bodyLineHeightPx = line === null || bodyLineHeightRule === "auto" ? null : twipToPx(line);
1526
+ const bodyLineHeightPx = line === null || bodyLineHeightRule === "auto" ? null : twipToPx2(line);
1185
1527
  const after = getTwipAttr(spacing, "w:after") ?? getTwipAttr(spacing, "after") ?? null;
1186
- const paragraphAfterPx = after === null ? null : twipToPx(after);
1528
+ const paragraphAfterPx = after === null ? null : twipToPx2(after);
1187
1529
  return { bodyFontPx, bodyLineHeightRatio, bodyLineHeightPx, bodyLineHeightRule, paragraphAfterPx };
1188
1530
  }
1189
1531
  function parseHeading1Style(stylesXml) {
@@ -1708,7 +2050,7 @@ function applyWordRenderModel({ doc, styleProfile, showFormattingMarks }) {
1708
2050
  }
1709
2051
 
1710
2052
  // src/core/DocsWordElement.ts
1711
- var VERSION = "0.1.2";
2053
+ var VERSION = "0.1.5";
1712
2054
  var MESSAGES = {
1713
2055
  zh: {
1714
2056
  readClipboard: "\u4ECE\u7CFB\u7EDF\u526A\u8D34\u677F\u8BFB\u53D6",
@@ -1851,15 +2193,15 @@ var DocsWordElement = class extends HTMLElement {
1851
2193
  }
1852
2194
  async applyDocx(file) {
1853
2195
  try {
1854
- const [snapshot, profile] = await Promise.all([
1855
- parseDocxToHtmlSnapshot(file),
2196
+ const [parseResult, profile] = await Promise.all([
2197
+ parseDocxToHtmlSnapshotWithReport(file),
1856
2198
  parseDocxStyleProfile(file)
1857
2199
  ]);
1858
2200
  this.styleProfile = profile;
1859
- this.htmlSnapshot = snapshot;
2201
+ this.htmlSnapshot = parseResult.htmlSnapshot;
1860
2202
  this.renderSnapshot();
1861
2203
  this.setHint(MESSAGES[this.locale].loadedWord(profile.sourceFileName));
1862
- this.emitChange("upload", profile.sourceFileName);
2204
+ this.emitChange("upload", profile.sourceFileName, parseResult.report);
1863
2205
  } catch (error) {
1864
2206
  this.emitError(error instanceof Error ? error.message : MESSAGES[this.locale].parseFailed);
1865
2207
  }
@@ -1919,8 +2261,10 @@ var DocsWordElement = class extends HTMLElement {
1919
2261
  renderSnapshot() {
1920
2262
  this.frame.srcdoc = this.htmlSnapshot;
1921
2263
  }
1922
- emitChange(source, fileName) {
1923
- this.dispatchEvent(new CustomEvent("docsjs-change", { detail: { htmlSnapshot: this.htmlSnapshot, source, fileName } }));
2264
+ emitChange(source, fileName, parseReport) {
2265
+ this.dispatchEvent(
2266
+ new CustomEvent("docsjs-change", { detail: { htmlSnapshot: this.htmlSnapshot, source, fileName, parseReport } })
2267
+ );
1924
2268
  }
1925
2269
  emitError(message) {
1926
2270
  this.dispatchEvent(new CustomEvent("docsjs-error", { detail: { message } }));
@@ -1976,6 +2320,9 @@ function collectSemanticStatsFromDocument(doc) {
1976
2320
  imageCount: countElements(doc, "img"),
1977
2321
  anchorImageCount: countElements(doc, 'img[data-word-anchor="1"]'),
1978
2322
  wrappedImageCount: countElements(doc, "img[data-word-wrap]"),
2323
+ ommlCount: countElements(doc, "[data-word-omml]"),
2324
+ chartCount: countElements(doc, "[data-word-chart]"),
2325
+ smartArtCount: countElements(doc, "[data-word-smartart]"),
1979
2326
  listParagraphCount,
1980
2327
  commentRefCount: countElements(doc, "[data-word-comment-ref]"),
1981
2328
  revisionInsCount: countElements(doc, '[data-word-revision="ins"]'),
@@ -2006,7 +2353,7 @@ function clamp01(v) {
2006
2353
  }
2007
2354
  function calculateFidelityScore(expected, actual) {
2008
2355
  const structure = clamp01(
2009
- (ratioScore(actual.paragraphCount, expected.paragraphCount) + ratioScore(actual.headingCount, expected.headingCount) + ratioScore(actual.tableCount, expected.tableCount) + ratioScore(actual.tableCellCount, expected.tableCellCount) + ratioScore(actual.imageCount, expected.imageCount) + ratioScore(actual.listParagraphCount, expected.listParagraphCount)) / 6
2356
+ (ratioScore(actual.paragraphCount, expected.paragraphCount) + ratioScore(actual.headingCount, expected.headingCount) + ratioScore(actual.tableCount, expected.tableCount) + ratioScore(actual.tableCellCount, expected.tableCellCount) + ratioScore(actual.imageCount, expected.imageCount) + ratioScore(actual.ommlCount, expected.ommlCount) + ratioScore(actual.chartCount, expected.chartCount) + ratioScore(actual.smartArtCount, expected.smartArtCount) + ratioScore(actual.listParagraphCount, expected.listParagraphCount)) / 9
2010
2357
  );
2011
2358
  const styleProxy = clamp01(ratioScore(actual.textCharCount, expected.textCharCount));
2012
2359
  const pagination = clamp01(ratioScore(actual.pageSpacerCount, expected.pageSpacerCount));
@@ -2019,6 +2366,8 @@ function calculateFidelityScore(expected, actual) {
2019
2366
  calculateFidelityScore,
2020
2367
  collectSemanticStatsFromDocument,
2021
2368
  collectSemanticStatsFromHtml,
2022
- defineDocsWordElement
2369
+ defineDocsWordElement,
2370
+ parseDocxToHtmlSnapshot,
2371
+ parseDocxToHtmlSnapshotWithReport
2023
2372
  });
2024
2373
  //# sourceMappingURL=index.cjs.map