@coding01/docsjs 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -31,8 +31,9 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
33
  DocsWordElement: () => DocsWordElement,
34
- WordFidelityEditorReact: () => WordFidelityEditorReact,
35
- WordFidelityEditorVue: () => WordFidelityEditorVue,
34
+ calculateFidelityScore: () => calculateFidelityScore,
35
+ collectSemanticStatsFromDocument: () => collectSemanticStatsFromDocument,
36
+ collectSemanticStatsFromHtml: () => collectSemanticStatsFromHtml,
36
37
  defineDocsWordElement: () => defineDocsWordElement
37
38
  });
38
39
  module.exports = __toCommonJS(index_exports);
@@ -69,6 +70,9 @@ function queryAllByLocalName(root, localName) {
69
70
  function queryByLocalName(root, localName) {
70
71
  return queryAllByLocalName(root, localName)[0] ?? null;
71
72
  }
73
+ function directChildrenByLocalName(node, localName) {
74
+ return Array.from(node.children).filter((child) => child.localName === localName);
75
+ }
72
76
  function getAttr(node, name) {
73
77
  if (!node) return null;
74
78
  return node.getAttribute(name);
@@ -104,6 +108,53 @@ function imageDimensionAttributes(sizePx) {
104
108
  }
105
109
  return attrs.length > 0 ? ` ${attrs.join(" ")}` : "";
106
110
  }
111
+ function parseAnchorPositionPx(drawing) {
112
+ const anchor = directChildrenByLocalName(drawing, "anchor")[0] ?? null;
113
+ if (!anchor) return { leftPx: null, topPx: null };
114
+ let leftPx = null;
115
+ let topPx = null;
116
+ const positionH = directChildrenByLocalName(anchor, "positionH")[0] ?? null;
117
+ const positionV = directChildrenByLocalName(anchor, "positionV")[0] ?? null;
118
+ const posH = positionH ? directChildrenByLocalName(positionH, "posOffset")[0] ?? null : null;
119
+ const posV = positionV ? directChildrenByLocalName(positionV, "posOffset")[0] ?? null : null;
120
+ const rawLeft = posH?.textContent?.trim() ?? "";
121
+ const rawTop = posV?.textContent?.trim() ?? "";
122
+ const left = rawLeft ? Number.parseFloat(rawLeft) : Number.NaN;
123
+ const top = rawTop ? Number.parseFloat(rawTop) : Number.NaN;
124
+ if (Number.isFinite(left)) leftPx = emuToPx(left);
125
+ if (Number.isFinite(top)) topPx = emuToPx(top);
126
+ return { leftPx, topPx };
127
+ }
128
+ function parseAnchorWrapMode(drawing) {
129
+ const anchor = directChildrenByLocalName(drawing, "anchor")[0] ?? null;
130
+ if (!anchor) return null;
131
+ if (directChildrenByLocalName(anchor, "wrapSquare")[0]) return "square";
132
+ if (directChildrenByLocalName(anchor, "wrapTight")[0]) return "tight";
133
+ if (directChildrenByLocalName(anchor, "wrapTopAndBottom")[0]) return "topAndBottom";
134
+ if (directChildrenByLocalName(anchor, "wrapNone")[0]) return "none";
135
+ return null;
136
+ }
137
+ function mergeImageStyle(baseAttrs, anchorPos, wrapMode) {
138
+ if (anchorPos.leftPx === null && anchorPos.topPx === null) return baseAttrs;
139
+ const styleParts = [
140
+ "position:absolute",
141
+ anchorPos.leftPx !== null ? `left:${anchorPos.leftPx.toFixed(2)}px` : "",
142
+ anchorPos.topPx !== null ? `top:${anchorPos.topPx.toFixed(2)}px` : "",
143
+ "z-index:3"
144
+ ].filter((x) => x.length > 0);
145
+ if (wrapMode === "topAndBottom") {
146
+ styleParts.push("display:block");
147
+ }
148
+ if (!baseAttrs.includes("style=")) {
149
+ const wrapAttr = wrapMode ? ` data-word-wrap="${wrapMode}"` : "";
150
+ return `${baseAttrs} style="${styleParts.join(";")}" data-word-anchor="1"${wrapAttr}`;
151
+ }
152
+ return baseAttrs.replace(/style="([^"]*)"/, (_m, styleText) => {
153
+ const merged = [styleText, ...styleParts].filter((x) => x.length > 0).join(";");
154
+ const wrapAttr = wrapMode ? ` data-word-wrap="${wrapMode}"` : "";
155
+ return `style="${merged}" data-word-anchor="1"${wrapAttr}`;
156
+ });
157
+ }
107
158
  function parseDocRelsMap(relsXmlText) {
108
159
  if (!relsXmlText) return {};
109
160
  const rels = parseXml(relsXmlText);
@@ -179,7 +230,81 @@ function paragraphAlignStyle(paragraph) {
179
230
  function paragraphDataAttr(paragraphIndex) {
180
231
  return paragraphIndex === null ? "" : ` data-word-p-index="${paragraphIndex}"`;
181
232
  }
182
- async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex) {
233
+ function parseFootnotesMap(footnotesXmlText) {
234
+ if (!footnotesXmlText) return {};
235
+ const footnotesDoc = parseXml(footnotesXmlText);
236
+ const map = {};
237
+ const footnotes = queryAllByLocalName(footnotesDoc, "footnote");
238
+ for (const footnote of footnotes) {
239
+ const idRaw = getAttr(footnote, "w:id") ?? getAttr(footnote, "id");
240
+ const idNum = idRaw ? Number.parseInt(idRaw, 10) : Number.NaN;
241
+ if (!Number.isFinite(idNum) || idNum <= 0) continue;
242
+ const paragraphs = queryAllByLocalName(footnote, "p");
243
+ const text = paragraphs.map((p) => paragraphText(p)).join("<br/>").trim();
244
+ if (!text) continue;
245
+ map[String(idNum)] = text;
246
+ }
247
+ return map;
248
+ }
249
+ function parseCommentsMap(commentsXmlText) {
250
+ if (!commentsXmlText) return {};
251
+ const commentsDoc = parseXml(commentsXmlText);
252
+ const map = {};
253
+ const comments = queryAllByLocalName(commentsDoc, "comment");
254
+ for (const comment of comments) {
255
+ const idRaw = getAttr(comment, "w:id") ?? getAttr(comment, "id");
256
+ if (!idRaw) continue;
257
+ const paragraphs = queryAllByLocalName(comment, "p");
258
+ const text = paragraphs.map((p) => paragraphText(p)).join("<br/>").trim();
259
+ if (!text) continue;
260
+ map[idRaw] = {
261
+ author: getAttr(comment, "w:author") ?? getAttr(comment, "author"),
262
+ date: getAttr(comment, "w:date") ?? getAttr(comment, "date"),
263
+ text
264
+ };
265
+ }
266
+ return map;
267
+ }
268
+ function parseEndnotesMap(endnotesXmlText) {
269
+ if (!endnotesXmlText) return {};
270
+ const endnotesDoc = parseXml(endnotesXmlText);
271
+ const map = {};
272
+ const endnotes = queryAllByLocalName(endnotesDoc, "endnote");
273
+ for (const endnote of endnotes) {
274
+ const idRaw = getAttr(endnote, "w:id") ?? getAttr(endnote, "id");
275
+ const idNum = idRaw ? Number.parseInt(idRaw, 10) : Number.NaN;
276
+ if (!Number.isFinite(idNum) || idNum <= 0) continue;
277
+ const paragraphs = queryAllByLocalName(endnote, "p");
278
+ const text = paragraphs.map((p) => paragraphText(p)).join("<br/>").trim();
279
+ if (!text) continue;
280
+ map[String(idNum)] = text;
281
+ }
282
+ return map;
283
+ }
284
+ function renderFootnotesSection(usedIds, footnotesMap) {
285
+ const uniq = [...new Set(usedIds)].filter((id) => footnotesMap[id]);
286
+ if (uniq.length === 0) return "";
287
+ const items = uniq.map((id) => `<li id="word-footnote-${id}" data-word-footnote-id="${id}">${footnotesMap[id]}</li>`).join("");
288
+ return `<section data-word-footnotes="1"><hr/><ol>${items}</ol></section>`;
289
+ }
290
+ function renderCommentsSection(usedIds, commentsMap) {
291
+ const uniq = [...new Set(usedIds)].filter((id) => commentsMap[id]);
292
+ if (uniq.length === 0) return "";
293
+ const items = uniq.map((id) => {
294
+ const item = commentsMap[id];
295
+ const meta = [item.author ?? "", item.date ?? ""].filter((x) => x.length > 0).join(" \xB7 ");
296
+ const metaHtml = meta ? `<div data-word-comment-meta="1">${escapeHtml(meta)}</div>` : "";
297
+ return `<li id="word-comment-${id}" data-word-comment-id="${id}">${metaHtml}<div>${item.text}</div></li>`;
298
+ }).join("");
299
+ return `<section data-word-comments="1"><hr/><ol>${items}</ol></section>`;
300
+ }
301
+ function renderEndnotesSection(usedIds, endnotesMap) {
302
+ const uniq = [...new Set(usedIds)].filter((id) => endnotesMap[id]);
303
+ if (uniq.length === 0) return "";
304
+ const items = uniq.map((id) => `<li id="word-endnote-${id}" data-word-endnote-id="${id}">${endnotesMap[id]}</li>`).join("");
305
+ return `<section data-word-endnotes="1"><hr/><ol>${items}</ol></section>`;
306
+ }
307
+ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotesMap, usedFootnoteIds, endnotesMap, usedEndnoteIds, commentsMap, usedCommentIds) {
183
308
  const tag = paragraphTag(paragraph);
184
309
  const alignStyle = paragraphAlignStyle(paragraph);
185
310
  const dataAttr = paragraphDataAttr(paragraphIndex);
@@ -188,9 +313,40 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex) {
188
313
  return `<${tag}${dataAttr}${alignStyle ? ` style="${alignStyle}"` : ""}><br/></${tag}>`;
189
314
  }
190
315
  const parts = [];
316
+ const renderedPageBreakCount = queryAllByLocalName(paragraph, "lastRenderedPageBreak").length;
317
+ for (let i = 0; i < renderedPageBreakCount; i += 1) {
318
+ parts.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
319
+ }
191
320
  for (const run of runs) {
192
321
  const rPr = queryByLocalName(run, "rPr");
193
322
  const css = runStyleToCss(rPr);
323
+ const footnoteRef = queryByLocalName(run, "footnoteReference");
324
+ const footnoteId = getAttr(footnoteRef, "w:id") ?? getAttr(footnoteRef, "id");
325
+ if (footnoteId && footnotesMap[footnoteId]) {
326
+ usedFootnoteIds.push(footnoteId);
327
+ parts.push(
328
+ `<sup data-word-footnote-ref="${footnoteId}"><a href="#word-footnote-${footnoteId}">[${footnoteId}]</a></sup>`
329
+ );
330
+ continue;
331
+ }
332
+ const endnoteRef = queryByLocalName(run, "endnoteReference");
333
+ const endnoteId = getAttr(endnoteRef, "w:id") ?? getAttr(endnoteRef, "id");
334
+ if (endnoteId && endnotesMap[endnoteId]) {
335
+ usedEndnoteIds.push(endnoteId);
336
+ parts.push(
337
+ `<sup data-word-endnote-ref="${endnoteId}"><a href="#word-endnote-${endnoteId}">[${endnoteId}]</a></sup>`
338
+ );
339
+ continue;
340
+ }
341
+ const commentRef = queryByLocalName(run, "commentReference");
342
+ const commentId = getAttr(commentRef, "w:id") ?? getAttr(commentRef, "id");
343
+ if (commentId && commentsMap[commentId]) {
344
+ usedCommentIds.push(commentId);
345
+ parts.push(
346
+ `<sup data-word-comment-ref="${commentId}"><a href="#word-comment-${commentId}">[c${commentId}]</a></sup>`
347
+ );
348
+ continue;
349
+ }
194
350
  const drawing = queryByLocalName(run, "drawing");
195
351
  if (drawing) {
196
352
  const blip = queryByLocalName(drawing, "blip");
@@ -200,19 +356,56 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex) {
200
356
  if (src) {
201
357
  const imageSize = parseDrawingSizePx(drawing);
202
358
  const dimensionAttrs = imageDimensionAttributes(imageSize);
203
- parts.push(`<img src="${src}" alt="word-image"${dimensionAttrs}/>`);
359
+ const anchorPos = parseAnchorPositionPx(drawing);
360
+ const wrapMode = parseAnchorWrapMode(drawing);
361
+ const attrs = mergeImageStyle(dimensionAttrs, anchorPos, wrapMode);
362
+ parts.push(`<img src="${src}" alt="word-image"${attrs}/>`);
204
363
  continue;
205
364
  }
206
365
  }
207
366
  }
208
367
  const texts = queryAllByLocalName(run, "t").map((t) => t.textContent ?? "").join("");
209
- const brs = queryAllByLocalName(run, "br").length;
210
- const runText2 = `${escapeHtml(texts)}${"<br/>".repeat(brs)}`;
368
+ const delTexts = queryAllByLocalName(run, "delText").map((t) => t.textContent ?? "").join("");
369
+ const brNodes = queryAllByLocalName(run, "br");
370
+ const pageBreakCount = brNodes.filter((node) => {
371
+ const type = (getAttr(node, "w:type") ?? getAttr(node, "type") ?? "").toLowerCase();
372
+ return type === "page";
373
+ }).length;
374
+ const lineBreakCount = Math.max(0, brNodes.length - pageBreakCount);
375
+ const runText2 = `${escapeHtml(texts || delTexts)}${"<br/>".repeat(lineBreakCount)}`;
211
376
  if (!runText2) continue;
377
+ let revisionType = null;
378
+ let cursor = run;
379
+ while (cursor) {
380
+ if (cursor.localName === "ins") {
381
+ revisionType = "ins";
382
+ break;
383
+ }
384
+ if (cursor.localName === "del") {
385
+ revisionType = "del";
386
+ break;
387
+ }
388
+ if (cursor.localName === "p") break;
389
+ cursor = cursor.parentElement;
390
+ }
212
391
  if (css) {
213
- parts.push(`<span style="${css}">${runText2}</span>`);
392
+ const span = `<span style="${css}">${runText2}</span>`;
393
+ if (revisionType) {
394
+ const tag2 = revisionType === "ins" ? "ins" : "del";
395
+ parts.push(`<${tag2} data-word-revision="${revisionType}">${span}</${tag2}>`);
396
+ } else {
397
+ parts.push(span);
398
+ }
214
399
  } else {
215
- parts.push(runText2);
400
+ if (revisionType) {
401
+ const tag2 = revisionType === "ins" ? "ins" : "del";
402
+ parts.push(`<${tag2} data-word-revision="${revisionType}">${runText2}</${tag2}>`);
403
+ } else {
404
+ parts.push(runText2);
405
+ }
406
+ }
407
+ for (let i = 0; i < pageBreakCount; i += 1) {
408
+ parts.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
216
409
  }
217
410
  }
218
411
  const content = parts.join("") || "<br/>";
@@ -220,33 +413,112 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex) {
220
413
  }
221
414
  function runText(run) {
222
415
  const text = queryAllByLocalName(run, "t").map((t) => t.textContent ?? "").join("");
416
+ const delText = queryAllByLocalName(run, "delText").map((t) => t.textContent ?? "").join("");
223
417
  const brCount = queryAllByLocalName(run, "br").length;
224
- return `${escapeHtml(text)}${"<br/>".repeat(brCount)}`;
418
+ return `${escapeHtml(text || delText)}${"<br/>".repeat(brCount)}`;
225
419
  }
226
420
  function paragraphText(paragraph) {
227
421
  const runs = queryAllByLocalName(paragraph, "r");
228
422
  const content = runs.map((run) => runText(run)).join("");
229
423
  return content || "<br/>";
230
424
  }
425
+ function parseTcGridSpan(tc) {
426
+ const tcPr = directChildrenByLocalName(tc, "tcPr")[0] ?? null;
427
+ const gridSpan = tcPr ? directChildrenByLocalName(tcPr, "gridSpan")[0] ?? null : null;
428
+ const rawVal = getAttr(gridSpan, "w:val") ?? getAttr(gridSpan, "val");
429
+ const parsed = rawVal ? Number.parseInt(rawVal, 10) : Number.NaN;
430
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : 1;
431
+ }
432
+ function parseTcVMerge(tc) {
433
+ const tcPr = directChildrenByLocalName(tc, "tcPr")[0] ?? null;
434
+ const vMerge = tcPr ? directChildrenByLocalName(tcPr, "vMerge")[0] ?? null : null;
435
+ if (!vMerge) return "none";
436
+ const rawVal = (getAttr(vMerge, "w:val") ?? getAttr(vMerge, "val") ?? "continue").toLowerCase();
437
+ return rawVal === "restart" ? "restart" : "continue";
438
+ }
231
439
  function tableCellHtml(cell, paragraphIndexMap) {
232
- const paragraphs = queryAllByLocalName(cell, "p");
233
- if (paragraphs.length === 0) {
234
- const text = queryAllByLocalName(cell, "t").map((t) => t.textContent ?? "").join("").trim();
235
- return escapeHtml(text) || "<br/>";
440
+ const blocks = [];
441
+ for (const child of Array.from(cell.children)) {
442
+ if (child.localName === "tcPr") continue;
443
+ if (child.localName === "p") {
444
+ const paragraphIndex = paragraphIndexMap.get(child) ?? null;
445
+ blocks.push(`<p${paragraphDataAttr(paragraphIndex)}>${paragraphText(child)}</p>`);
446
+ continue;
447
+ }
448
+ if (child.localName === "tbl") {
449
+ blocks.push(tableToHtml(child, paragraphIndexMap));
450
+ continue;
451
+ }
236
452
  }
237
- return paragraphs.map((p) => {
238
- const paragraphIndex = paragraphIndexMap.get(p) ?? null;
239
- return `<p${paragraphDataAttr(paragraphIndex)}>${paragraphText(p)}</p>`;
240
- }).join("");
453
+ if (blocks.length > 0) return blocks.join("");
454
+ const text = queryAllByLocalName(cell, "t").map((t) => t.textContent ?? "").join("").trim();
455
+ return escapeHtml(text) || "<br/>";
241
456
  }
242
457
  function tableToHtml(table, paragraphIndexMap) {
243
- const rows = queryAllByLocalName(table, "tr");
244
- const htmlRows = rows.map((row) => {
245
- const cells = queryAllByLocalName(row, "tc");
246
- const htmlCells = cells.map((cell) => `<td style="border:1px solid #222;vertical-align:top;">${tableCellHtml(cell, paragraphIndexMap)}</td>`).join("");
247
- return `<tr>${htmlCells}</tr>`;
458
+ const rows = directChildrenByLocalName(table, "tr");
459
+ const activeByCol = /* @__PURE__ */ new Map();
460
+ const allOrigins = [];
461
+ let nextOriginId = 1;
462
+ const htmlRows = rows.map((row, rowIndex) => {
463
+ const directCells = directChildrenByLocalName(row, "tc");
464
+ const continued = /* @__PURE__ */ new Set();
465
+ const emittedCells = [];
466
+ let colCursor = 0;
467
+ for (const cell of directCells) {
468
+ const colSpan = parseTcGridSpan(cell);
469
+ const vMerge = parseTcVMerge(cell);
470
+ if (vMerge === "continue") {
471
+ const activeOrigins = Array.from(new Set(activeByCol.values())).filter((origin2) => !continued.has(origin2)).sort((a, b) => a.startCol - b.startCol);
472
+ const origin = activeOrigins.find((item) => item.startCol >= colCursor) ?? activeOrigins[0] ?? null;
473
+ if (origin) {
474
+ origin.rowSpan += 1;
475
+ continued.add(origin);
476
+ colCursor = origin.startCol + origin.colSpan;
477
+ }
478
+ continue;
479
+ }
480
+ while (activeByCol.has(colCursor)) {
481
+ colCursor += 1;
482
+ }
483
+ const html = tableCellHtml(cell, paragraphIndexMap);
484
+ const attrs = [];
485
+ if (vMerge === "restart") {
486
+ const origin = {
487
+ id: `m${nextOriginId}`,
488
+ startCol: colCursor,
489
+ colSpan,
490
+ rowSpan: 1,
491
+ startedRow: rowIndex
492
+ };
493
+ nextOriginId += 1;
494
+ allOrigins.push(origin);
495
+ for (let i = 0; i < colSpan; i += 1) {
496
+ activeByCol.set(colCursor + i, origin);
497
+ }
498
+ attrs.push(`data-word-merge-id="${origin.id}"`);
499
+ }
500
+ if (colSpan > 1) attrs.push(`colspan="${colSpan}"`);
501
+ emittedCells.push(
502
+ `<td${attrs.length > 0 ? ` ${attrs.join(" ")}` : ""} style="border:1px solid #222;vertical-align:top;">${html}</td>`
503
+ );
504
+ colCursor += colSpan;
505
+ }
506
+ for (const origin of Array.from(new Set(activeByCol.values()))) {
507
+ if (origin.startedRow < rowIndex && !continued.has(origin)) {
508
+ for (let i = 0; i < origin.colSpan; i += 1) {
509
+ activeByCol.delete(origin.startCol + i);
510
+ }
511
+ }
512
+ }
513
+ return `<tr>${emittedCells.join("")}</tr>`;
248
514
  });
249
- return `<table style="border-collapse:collapse;table-layout:fixed;width:100%;border:1px solid #222;">${htmlRows.join("")}</table>`;
515
+ let merged = htmlRows.join("");
516
+ for (const origin of allOrigins) {
517
+ const marker = `data-word-merge-id="${origin.id}"`;
518
+ const replacement = origin.rowSpan > 1 ? `rowspan="${origin.rowSpan}"` : "";
519
+ merged = merged.replace(marker, replacement).replace(/\s{2,}/g, " ");
520
+ }
521
+ return `<table style="border-collapse:collapse;table-layout:fixed;width:100%;border:1px solid #222;">${merged}</table>`;
250
522
  }
251
523
  async function parseDocxToHtmlSnapshot(file) {
252
524
  const maybeArrayBuffer = file.arrayBuffer;
@@ -257,7 +529,16 @@ async function parseDocxToHtmlSnapshot(file) {
257
529
  throw new Error("DOCX missing document.xml");
258
530
  }
259
531
  const relsText = await zip.file("word/_rels/document.xml.rels")?.async("string");
532
+ const footnotesText = await zip.file("word/footnotes.xml")?.async("string");
533
+ const endnotesText = await zip.file("word/endnotes.xml")?.async("string");
534
+ const commentsText = await zip.file("word/comments.xml")?.async("string");
260
535
  const relMap = parseDocRelsMap(relsText ?? null);
536
+ const footnotesMap = parseFootnotesMap(footnotesText ?? null);
537
+ const endnotesMap = parseEndnotesMap(endnotesText ?? null);
538
+ const commentsMap = parseCommentsMap(commentsText ?? null);
539
+ const usedFootnoteIds = [];
540
+ const usedEndnoteIds = [];
541
+ const usedCommentIds = [];
261
542
  const documentXml = parseXml(documentXmlText);
262
543
  const body = queryByLocalName(documentXml, "body");
263
544
  if (!body) {
@@ -272,7 +553,20 @@ async function parseDocxToHtmlSnapshot(file) {
272
553
  if (child.localName === "sectPr") continue;
273
554
  if (child.localName === "p") {
274
555
  const paragraphIndex = paragraphIndexMap.get(child) ?? null;
275
- blockHtml.push(await paragraphToHtml(zip, relMap, child, paragraphIndex));
556
+ blockHtml.push(
557
+ await paragraphToHtml(
558
+ zip,
559
+ relMap,
560
+ child,
561
+ paragraphIndex,
562
+ footnotesMap,
563
+ usedFootnoteIds,
564
+ endnotesMap,
565
+ usedEndnoteIds,
566
+ commentsMap,
567
+ usedCommentIds
568
+ )
569
+ );
276
570
  continue;
277
571
  }
278
572
  if (child.localName === "tbl") {
@@ -280,6 +574,9 @@ async function parseDocxToHtmlSnapshot(file) {
280
574
  continue;
281
575
  }
282
576
  }
577
+ blockHtml.push(renderFootnotesSection(usedFootnoteIds, footnotesMap));
578
+ blockHtml.push(renderEndnotesSection(usedEndnoteIds, endnotesMap));
579
+ blockHtml.push(renderCommentsSection(usedCommentIds, commentsMap));
283
580
  return buildHtmlSnapshot(blockHtml.join("\n"));
284
581
  }
285
582
 
@@ -672,7 +969,28 @@ function parseNumberingMap(numberingXml) {
672
969
  const lvlMap = abstractMap.get(absId);
673
970
  if (!lvlMap) continue;
674
971
  for (const [lvl, spec] of lvlMap.entries()) {
675
- levelMap.set(`${numId}:${lvl}`, spec);
972
+ levelMap.set(`${numId}:${lvl}`, { ...spec });
973
+ }
974
+ const lvlOverrides = queryAllByLocalName2(num, "lvlOverride");
975
+ for (const override of lvlOverrides) {
976
+ const ilvl = toInt(getAttr2(override, "w:ilvl") ?? getAttr2(override, "ilvl"));
977
+ if (ilvl === null) continue;
978
+ const key = `${numId}:${ilvl}`;
979
+ const base = levelMap.get(key) ?? { numFmt: null, lvlText: null, startAt: 1 };
980
+ const overrideStart = toInt(
981
+ getAttr2(queryAllByLocalName2(override, "startOverride")[0] ?? null, "w:val") ?? getAttr2(queryAllByLocalName2(override, "startOverride")[0] ?? null, "val")
982
+ );
983
+ const overrideLvl = queryAllByLocalName2(override, "lvl")[0] ?? null;
984
+ const overrideNumFmtNode = overrideLvl ? queryAllByLocalName2(overrideLvl, "numFmt")[0] ?? null : null;
985
+ const overrideLvlTextNode = overrideLvl ? queryAllByLocalName2(overrideLvl, "lvlText")[0] ?? null : null;
986
+ const overrideLvlStart = toInt(
987
+ getAttr2(queryAllByLocalName2(overrideLvl ?? override, "start")[0] ?? null, "w:val") ?? getAttr2(queryAllByLocalName2(overrideLvl ?? override, "start")[0] ?? null, "val")
988
+ );
989
+ levelMap.set(key, {
990
+ numFmt: getAttr2(overrideNumFmtNode, "w:val") ?? getAttr2(overrideNumFmtNode, "val") ?? base.numFmt,
991
+ lvlText: getAttr2(overrideLvlTextNode, "w:val") ?? getAttr2(overrideLvlTextNode, "val") ?? base.lvlText,
992
+ startAt: overrideStart ?? overrideLvlStart ?? base.startAt
993
+ });
676
994
  }
677
995
  }
678
996
  return levelMap;
@@ -918,7 +1236,8 @@ function inferTitleFontFamily(families) {
918
1236
  return FALLBACK_PROFILE.titleFontFamily;
919
1237
  }
920
1238
  async function parseDocxStyleProfile(file) {
921
- const buffer = await file.arrayBuffer();
1239
+ const maybeArrayBuffer = file.arrayBuffer;
1240
+ const buffer = maybeArrayBuffer ? await maybeArrayBuffer.call(file) : await new Response(file).arrayBuffer();
922
1241
  const zip = await import_jszip2.default.loadAsync(buffer);
923
1242
  const documentXmlText = await zip.file("word/document.xml")?.async("string");
924
1243
  const stylesXmlText = await zip.file("word/styles.xml")?.async("string");
@@ -1280,6 +1599,7 @@ function applyParagraphProfiles(doc, styleProfile) {
1280
1599
  if (!alreadyHasMarker) {
1281
1600
  const marker = doc.createElement("span");
1282
1601
  marker.className = "__word-list-marker";
1602
+ marker.setAttribute("data-word-list-marker", "1");
1283
1603
  marker.textContent = `${markerText} `;
1284
1604
  marker.style.display = "inline-block";
1285
1605
  marker.style.minWidth = "1.8em";
@@ -1331,7 +1651,7 @@ function applyKeepPagination(doc, styleProfile, paragraphs) {
1331
1651
  for (let i = 0; i < count; i += 1) {
1332
1652
  const p = paragraphs[i];
1333
1653
  const profile = styleProfile.paragraphProfiles[i];
1334
- const h2 = paragraphHeightPx(p);
1654
+ const h = paragraphHeightPx(p);
1335
1655
  const forceBreak = profile.pageBreakBefore;
1336
1656
  if (forceBreak && used > 0) {
1337
1657
  insertPageSpacerBefore(doc, p, contentHeight - used);
@@ -1342,11 +1662,11 @@ function applyKeepPagination(doc, styleProfile, paragraphs) {
1342
1662
  insertPageSpacerBefore(doc, p, contentHeight - used);
1343
1663
  used = 0;
1344
1664
  }
1345
- if (used > 0 && used + h2 > contentHeight) {
1665
+ if (used > 0 && used + h > contentHeight) {
1346
1666
  insertPageSpacerBefore(doc, p, contentHeight - used);
1347
1667
  used = 0;
1348
1668
  }
1349
- used += h2;
1669
+ used += h;
1350
1670
  if (used >= contentHeight) {
1351
1671
  used = used % contentHeight;
1352
1672
  }
@@ -1388,6 +1708,41 @@ function applyWordRenderModel({ doc, styleProfile, showFormattingMarks }) {
1388
1708
  }
1389
1709
 
1390
1710
  // src/core/DocsWordElement.ts
1711
+ var VERSION = "0.1.2";
1712
+ var MESSAGES = {
1713
+ zh: {
1714
+ readClipboard: "\u4ECE\u7CFB\u7EDF\u526A\u8D34\u677F\u8BFB\u53D6",
1715
+ uploadWord: "\u4E0A\u4F20 Word",
1716
+ clear: "\u6E05\u7A7A",
1717
+ pastePlaceholder: "\u5728\u6B64\u5904\u7C98\u8D34 Word/WPS/Google Docs \u5185\u5BB9\uFF08Ctrl/Cmd+V\uFF09",
1718
+ waitImport: "\u7B49\u5F85\u5185\u5BB9\u5BFC\u5165",
1719
+ loadedHtml: "\u5DF2\u52A0\u8F7D HTML \u5FEB\u7167",
1720
+ cleared: "\u6587\u6863\u5DF2\u6E05\u7A7A",
1721
+ loadedWord: (name) => `\u5DF2\u52A0\u8F7D Word \u6587\u4EF6: ${name}`,
1722
+ importedClipboard: "\u5DF2\u5BFC\u5165\u526A\u8D34\u677F\u5185\u5BB9",
1723
+ noContent: "\u672A\u68C0\u6D4B\u5230\u53EF\u5BFC\u5165\u5185\u5BB9",
1724
+ noClipboardRead: "\u5F53\u524D\u6D4F\u89C8\u5668\u4E0D\u652F\u6301 clipboard.read",
1725
+ parseFailed: "Word \u89E3\u6790\u5931\u8D25",
1726
+ clipboardReadFailed: "\u8BFB\u53D6\u526A\u8D34\u677F\u5931\u8D25",
1727
+ errorPrefix: "\u9519\u8BEF: "
1728
+ },
1729
+ en: {
1730
+ readClipboard: "Read clipboard",
1731
+ uploadWord: "Upload Word",
1732
+ clear: "Clear",
1733
+ pastePlaceholder: "Paste Word/WPS/Google Docs content here (Ctrl/Cmd+V)",
1734
+ waitImport: "Waiting for input",
1735
+ loadedHtml: "HTML snapshot loaded",
1736
+ cleared: "Document cleared",
1737
+ loadedWord: (name) => `Word file loaded: ${name}`,
1738
+ importedClipboard: "Clipboard content imported",
1739
+ noContent: "No importable content detected",
1740
+ noClipboardRead: "navigator.clipboard.read is not supported in this browser",
1741
+ parseFailed: "Word parse failed",
1742
+ clipboardReadFailed: "Failed to read clipboard",
1743
+ errorPrefix: "Error: "
1744
+ }
1745
+ };
1391
1746
  var BASE_CSS = `
1392
1747
  :host{display:block;border:1px solid #d8deea;border-radius:12px;background:#fff;overflow:hidden;font-family:ui-sans-serif,system-ui,-apple-system,Segoe UI,Roboto}
1393
1748
  .toolbar{display:flex;gap:8px;flex-wrap:wrap;padding:10px;border-bottom:1px solid #e8edf6;background:#f8faff}
@@ -1398,6 +1753,10 @@ iframe{width:100%;min-height:760px;border:0}
1398
1753
  `;
1399
1754
  var DocsWordElement = class extends HTMLElement {
1400
1755
  rootRef;
1756
+ toolbar;
1757
+ btnRead;
1758
+ btnUpload;
1759
+ btnClear;
1401
1760
  frame;
1402
1761
  pasteArea;
1403
1762
  fileInput;
@@ -1405,64 +1764,92 @@ var DocsWordElement = class extends HTMLElement {
1405
1764
  htmlSnapshot;
1406
1765
  styleProfile = null;
1407
1766
  frameHeight = 0;
1767
+ locale = "zh";
1408
1768
  constructor() {
1409
1769
  super();
1410
1770
  this.rootRef = this.attachShadow({ mode: "open" });
1771
+ this.locale = this.parseLocale(this.getAttribute("lang"));
1411
1772
  this.htmlSnapshot = buildHtmlSnapshot("<p><br/></p>");
1412
1773
  const style = document.createElement("style");
1413
1774
  style.textContent = BASE_CSS;
1414
- const toolbar = document.createElement("div");
1415
- toolbar.className = "toolbar";
1416
- const btnRead = document.createElement("button");
1417
- btnRead.textContent = "\u4ECE\u7CFB\u7EDF\u526A\u8D34\u677F\u8BFB\u53D6";
1418
- btnRead.onclick = () => void this.readClipboard();
1419
- const btnUpload = document.createElement("button");
1420
- btnUpload.textContent = "\u4E0A\u4F20 Word";
1421
- btnUpload.onclick = () => this.fileInput.click();
1422
- const btnClear = document.createElement("button");
1423
- btnClear.textContent = "\u6E05\u7A7A";
1424
- btnClear.onclick = () => this.clear();
1775
+ this.toolbar = document.createElement("div");
1776
+ this.toolbar.className = "toolbar";
1777
+ this.btnRead = document.createElement("button");
1778
+ this.btnRead.onclick = () => void this.loadClipboard();
1779
+ this.btnUpload = document.createElement("button");
1780
+ this.btnUpload.onclick = () => this.fileInput.click();
1781
+ this.btnClear = document.createElement("button");
1782
+ this.btnClear.onclick = () => this.clear();
1425
1783
  this.fileInput = document.createElement("input");
1426
1784
  this.fileInput.type = "file";
1427
1785
  this.fileInput.accept = ".docx";
1428
1786
  this.fileInput.style.display = "none";
1429
1787
  this.fileInput.onchange = () => void this.onUpload();
1430
- toolbar.append(btnRead, btnUpload, btnClear, this.fileInput);
1788
+ this.toolbar.append(this.btnRead, this.btnUpload, this.btnClear, this.fileInput);
1431
1789
  this.pasteArea = document.createElement("textarea");
1432
1790
  this.pasteArea.className = "paste";
1433
- this.pasteArea.placeholder = "\u5728\u6B64\u5904\u7C98\u8D34 Word/WPS/Google Docs \u5185\u5BB9\uFF08Ctrl/Cmd+V\uFF09";
1791
+ this.pasteArea.placeholder = "";
1434
1792
  this.pasteArea.onpaste = (event) => {
1435
1793
  event.preventDefault();
1436
1794
  void this.applyFromClipboardData(event.clipboardData);
1437
1795
  };
1438
1796
  this.hint = document.createElement("span");
1439
1797
  this.hint.className = "hint";
1440
- this.hint.textContent = "\u7B49\u5F85\u5185\u5BB9\u5BFC\u5165";
1798
+ this.hint.textContent = "";
1441
1799
  this.frame = document.createElement("iframe");
1442
1800
  this.frame.sandbox.add("allow-same-origin", "allow-scripts");
1443
1801
  this.frame.onload = () => this.onFrameLoad();
1444
- this.rootRef.append(style, toolbar, this.pasteArea, this.hint, this.frame);
1802
+ this.rootRef.append(style, this.toolbar, this.pasteArea, this.hint, this.frame);
1803
+ this.syncLocaleText();
1804
+ this.syncToolbarVisibility();
1805
+ }
1806
+ static get observedAttributes() {
1807
+ return ["lang", "show-toolbar"];
1808
+ }
1809
+ attributeChangedCallback(name, _, newValue) {
1810
+ if (name === "lang") {
1811
+ this.locale = this.parseLocale(newValue);
1812
+ this.syncLocaleText();
1813
+ return;
1814
+ }
1815
+ if (name === "show-toolbar") {
1816
+ this.syncToolbarVisibility();
1817
+ }
1445
1818
  }
1446
1819
  connectedCallback() {
1447
1820
  this.renderSnapshot();
1821
+ this.dispatchEvent(new CustomEvent("docsjs-ready", { detail: { version: VERSION } }));
1448
1822
  }
1449
1823
  setSnapshot(rawHtml) {
1824
+ this.loadHtml(rawHtml);
1825
+ }
1826
+ loadHtml(rawHtml) {
1450
1827
  this.styleProfile = null;
1451
1828
  this.htmlSnapshot = buildHtmlSnapshot(rawHtml);
1452
1829
  this.renderSnapshot();
1453
- this.hint.textContent = "\u5DF2\u52A0\u8F7D HTML \u5FEB\u7167";
1454
- this.emitChange();
1830
+ this.setHint(MESSAGES[this.locale].loadedHtml);
1831
+ this.emitChange("api");
1832
+ }
1833
+ getSnapshot() {
1834
+ return this.htmlSnapshot;
1455
1835
  }
1456
1836
  clear() {
1457
1837
  this.styleProfile = null;
1458
1838
  this.htmlSnapshot = buildHtmlSnapshot("<p><br/></p>");
1459
1839
  this.renderSnapshot();
1460
- this.hint.textContent = "\u6587\u6863\u5DF2\u6E05\u7A7A";
1461
- this.emitChange();
1840
+ this.setHint(MESSAGES[this.locale].cleared);
1841
+ this.emitChange("clear");
1842
+ }
1843
+ async loadDocx(file) {
1844
+ await this.applyDocx(file);
1462
1845
  }
1463
1846
  async onUpload() {
1464
1847
  const file = this.fileInput.files?.[0];
1465
1848
  if (!file) return;
1849
+ await this.applyDocx(file);
1850
+ this.fileInput.value = "";
1851
+ }
1852
+ async applyDocx(file) {
1466
1853
  try {
1467
1854
  const [snapshot, profile] = await Promise.all([
1468
1855
  parseDocxToHtmlSnapshot(file),
@@ -1471,17 +1858,15 @@ var DocsWordElement = class extends HTMLElement {
1471
1858
  this.styleProfile = profile;
1472
1859
  this.htmlSnapshot = snapshot;
1473
1860
  this.renderSnapshot();
1474
- this.hint.textContent = `\u5DF2\u52A0\u8F7D Word \u6587\u4EF6: ${profile.sourceFileName}`;
1475
- this.emitChange();
1861
+ this.setHint(MESSAGES[this.locale].loadedWord(profile.sourceFileName));
1862
+ this.emitChange("upload", profile.sourceFileName);
1476
1863
  } catch (error) {
1477
- this.emitError(error instanceof Error ? error.message : "Word \u89E3\u6790\u5931\u8D25");
1478
- } finally {
1479
- this.fileInput.value = "";
1864
+ this.emitError(error instanceof Error ? error.message : MESSAGES[this.locale].parseFailed);
1480
1865
  }
1481
1866
  }
1482
- async readClipboard() {
1867
+ async loadClipboard() {
1483
1868
  if (!navigator.clipboard?.read) {
1484
- this.emitError("\u5F53\u524D\u6D4F\u89C8\u5668\u4E0D\u652F\u6301 clipboard.read");
1869
+ this.emitError(MESSAGES[this.locale].noClipboardRead);
1485
1870
  return;
1486
1871
  }
1487
1872
  try {
@@ -1489,7 +1874,7 @@ var DocsWordElement = class extends HTMLElement {
1489
1874
  const payload = await extractFromClipboardItems(items);
1490
1875
  this.applyPayload(payload.html, payload.text);
1491
1876
  } catch (error) {
1492
- this.emitError(error instanceof Error ? error.message : "\u8BFB\u53D6\u526A\u8D34\u677F\u5931\u8D25");
1877
+ this.emitError(error instanceof Error ? error.message : MESSAGES[this.locale].clipboardReadFailed);
1493
1878
  }
1494
1879
  }
1495
1880
  async applyFromClipboardData(data) {
@@ -1504,12 +1889,12 @@ var DocsWordElement = class extends HTMLElement {
1504
1889
  } else if (text.trim()) {
1505
1890
  this.htmlSnapshot = buildHtmlSnapshot(`<p>${text.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;")}</p>`);
1506
1891
  } else {
1507
- this.hint.textContent = "\u672A\u68C0\u6D4B\u5230\u53EF\u5BFC\u5165\u5185\u5BB9";
1892
+ this.setHint(MESSAGES[this.locale].noContent);
1508
1893
  return;
1509
1894
  }
1510
1895
  this.renderSnapshot();
1511
- this.hint.textContent = "\u5DF2\u5BFC\u5165\u526A\u8D34\u677F\u5185\u5BB9";
1512
- this.emitChange();
1896
+ this.setHint(MESSAGES[this.locale].importedClipboard);
1897
+ this.emitChange("paste");
1513
1898
  }
1514
1899
  onFrameLoad() {
1515
1900
  const doc = this.frame.contentDocument;
@@ -1534,12 +1919,33 @@ var DocsWordElement = class extends HTMLElement {
1534
1919
  renderSnapshot() {
1535
1920
  this.frame.srcdoc = this.htmlSnapshot;
1536
1921
  }
1537
- emitChange() {
1538
- this.dispatchEvent(new CustomEvent("docsjs-change", { detail: { htmlSnapshot: this.htmlSnapshot } }));
1922
+ emitChange(source, fileName) {
1923
+ this.dispatchEvent(new CustomEvent("docsjs-change", { detail: { htmlSnapshot: this.htmlSnapshot, source, fileName } }));
1539
1924
  }
1540
1925
  emitError(message) {
1541
1926
  this.dispatchEvent(new CustomEvent("docsjs-error", { detail: { message } }));
1542
- this.hint.textContent = `\u9519\u8BEF: ${message}`;
1927
+ this.setHint(`${MESSAGES[this.locale].errorPrefix}${message}`);
1928
+ }
1929
+ setHint(text) {
1930
+ this.hint.textContent = text;
1931
+ }
1932
+ parseLocale(value) {
1933
+ return value?.toLowerCase() === "en" ? "en" : "zh";
1934
+ }
1935
+ syncToolbarVisibility() {
1936
+ const raw = this.getAttribute("show-toolbar");
1937
+ const show = raw === null || raw === "" || raw === "1" || raw.toLowerCase() === "true";
1938
+ this.toolbar.style.display = show ? "flex" : "none";
1939
+ }
1940
+ syncLocaleText() {
1941
+ const t = MESSAGES[this.locale];
1942
+ this.btnRead.textContent = t.readClipboard;
1943
+ this.btnUpload.textContent = t.uploadWord;
1944
+ this.btnClear.textContent = t.clear;
1945
+ this.pasteArea.placeholder = t.pastePlaceholder;
1946
+ if (!this.hint.textContent || this.hint.textContent === MESSAGES.en.waitImport || this.hint.textContent === MESSAGES.zh.waitImport) {
1947
+ this.hint.textContent = t.waitImport;
1948
+ }
1543
1949
  }
1544
1950
  };
1545
1951
  function defineDocsWordElement() {
@@ -1548,62 +1954,71 @@ function defineDocsWordElement() {
1548
1954
  }
1549
1955
  }
1550
1956
 
1551
- // src/react/WordFidelityEditorReact.tsx
1552
- var import_react = __toESM(require("react"), 1);
1553
- defineDocsWordElement();
1554
- function WordFidelityEditorReact({ onChange, onError }) {
1555
- const ref2 = (0, import_react.useRef)(null);
1556
- (0, import_react.useEffect)(() => {
1557
- const node = ref2.current;
1558
- if (!node) return;
1559
- const onChangeEvent = (event) => {
1560
- const detail = event.detail;
1561
- onChange?.(detail);
1562
- };
1563
- const onErrorEvent = (event) => {
1564
- const detail = event.detail;
1565
- onError?.(detail);
1566
- };
1567
- node.addEventListener("docsjs-change", onChangeEvent);
1568
- node.addEventListener("docsjs-error", onErrorEvent);
1569
- return () => {
1570
- node.removeEventListener("docsjs-change", onChangeEvent);
1571
- node.removeEventListener("docsjs-error", onErrorEvent);
1572
- };
1573
- }, [onChange, onError]);
1574
- return import_react.default.createElement("docs-word-editor", { ref: ref2 });
1957
+ // src/lib/semanticStats.ts
1958
+ function countElements(root, selector) {
1959
+ return root.querySelectorAll(selector).length;
1960
+ }
1961
+ function isListLikeParagraph(p) {
1962
+ if (p.hasAttribute("data-word-list")) return true;
1963
+ if (p.querySelector("span.__word-list-marker")) return true;
1964
+ const style = (p.getAttribute("style") ?? "").toLowerCase();
1965
+ return style.includes("mso-list");
1966
+ }
1967
+ function collectSemanticStatsFromDocument(doc) {
1968
+ const paragraphs = Array.from(doc.querySelectorAll("p"));
1969
+ const listParagraphCount = paragraphs.filter((p) => isListLikeParagraph(p)).length;
1970
+ const textCharCount = (doc.body.textContent ?? "").replace(/\s+/g, "").length;
1971
+ return {
1972
+ paragraphCount: paragraphs.length,
1973
+ headingCount: countElements(doc, "h1,h2,h3,h4,h5,h6"),
1974
+ tableCount: countElements(doc, "table"),
1975
+ tableCellCount: countElements(doc, "td,th"),
1976
+ imageCount: countElements(doc, "img"),
1977
+ anchorImageCount: countElements(doc, 'img[data-word-anchor="1"]'),
1978
+ wrappedImageCount: countElements(doc, "img[data-word-wrap]"),
1979
+ listParagraphCount,
1980
+ commentRefCount: countElements(doc, "[data-word-comment-ref]"),
1981
+ revisionInsCount: countElements(doc, '[data-word-revision="ins"]'),
1982
+ revisionDelCount: countElements(doc, '[data-word-revision="del"]'),
1983
+ pageBreakCount: countElements(doc, "[data-word-page-break='1']"),
1984
+ pageSpacerCount: countElements(doc, "[data-word-page-spacer='1']"),
1985
+ textCharCount
1986
+ };
1987
+ }
1988
+ function collectSemanticStatsFromHtml(rawHtml) {
1989
+ const parser = new DOMParser();
1990
+ const doc = parser.parseFromString(rawHtml, "text/html");
1991
+ return collectSemanticStatsFromDocument(doc);
1575
1992
  }
1576
1993
 
1577
- // src/vue/WordFidelityEditorVue.ts
1578
- var import_vue = require("vue");
1579
- defineDocsWordElement();
1580
- var WordFidelityEditorVue = (0, import_vue.defineComponent)({
1581
- name: "WordFidelityEditorVue",
1582
- emits: ["change", "error"],
1583
- setup(_, { emit }) {
1584
- const elRef = (0, import_vue.ref)(null);
1585
- const onChange = (event) => {
1586
- emit("change", event.detail);
1587
- };
1588
- const onError = (event) => {
1589
- emit("error", event.detail);
1590
- };
1591
- (0, import_vue.onMounted)(() => {
1592
- elRef.value?.addEventListener("docsjs-change", onChange);
1593
- elRef.value?.addEventListener("docsjs-error", onError);
1594
- });
1595
- (0, import_vue.onBeforeUnmount)(() => {
1596
- elRef.value?.removeEventListener("docsjs-change", onChange);
1597
- elRef.value?.removeEventListener("docsjs-error", onError);
1598
- });
1599
- return () => (0, import_vue.h)("docs-word-editor", { ref: elRef });
1600
- }
1601
- });
1994
+ // src/lib/fidelityScore.ts
1995
+ function ratioScore(actual, expected) {
1996
+ if (expected <= 0 && actual <= 0) return 1;
1997
+ if (expected <= 0 || actual < 0) return 0;
1998
+ const delta = Math.abs(actual - expected);
1999
+ const penalty = delta / expected;
2000
+ return Math.max(0, 1 - penalty);
2001
+ }
2002
+ function clamp01(v) {
2003
+ if (v < 0) return 0;
2004
+ if (v > 1) return 1;
2005
+ return v;
2006
+ }
2007
+ function calculateFidelityScore(expected, actual) {
2008
+ const structure = clamp01(
2009
+ (ratioScore(actual.paragraphCount, expected.paragraphCount) + ratioScore(actual.headingCount, expected.headingCount) + ratioScore(actual.tableCount, expected.tableCount) + ratioScore(actual.tableCellCount, expected.tableCellCount) + ratioScore(actual.imageCount, expected.imageCount) + ratioScore(actual.listParagraphCount, expected.listParagraphCount)) / 6
2010
+ );
2011
+ const styleProxy = clamp01(ratioScore(actual.textCharCount, expected.textCharCount));
2012
+ const pagination = clamp01(ratioScore(actual.pageSpacerCount, expected.pageSpacerCount));
2013
+ const overall = clamp01(structure * 0.6 + styleProxy * 0.25 + pagination * 0.15);
2014
+ return { structure, styleProxy, pagination, overall };
2015
+ }
1602
2016
  // Annotate the CommonJS export names for ESM import in node:
1603
2017
  0 && (module.exports = {
1604
2018
  DocsWordElement,
1605
- WordFidelityEditorReact,
1606
- WordFidelityEditorVue,
2019
+ calculateFidelityScore,
2020
+ collectSemanticStatsFromDocument,
2021
+ collectSemanticStatsFromHtml,
1607
2022
  defineDocsWordElement
1608
2023
  });
1609
2024
  //# sourceMappingURL=index.cjs.map