@coding01/docsjs 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -34,7 +34,9 @@ __export(index_exports, {
34
34
  calculateFidelityScore: () => calculateFidelityScore,
35
35
  collectSemanticStatsFromDocument: () => collectSemanticStatsFromDocument,
36
36
  collectSemanticStatsFromHtml: () => collectSemanticStatsFromHtml,
37
- defineDocsWordElement: () => defineDocsWordElement
37
+ defineDocsWordElement: () => defineDocsWordElement,
38
+ parseDocxToHtmlSnapshot: () => parseDocxToHtmlSnapshot,
39
+ parseDocxToHtmlSnapshotWithReport: () => parseDocxToHtmlSnapshotWithReport
38
40
  });
39
41
  module.exports = __toCommonJS(index_exports);
40
42
 
@@ -57,6 +59,21 @@ function buildHtmlSnapshot(rawHtml) {
57
59
 
58
60
  // src/lib/docxHtml.ts
59
61
  var import_jszip = __toESM(require("jszip"), 1);
62
+ function createEmptyFeatureCounts() {
63
+ return {
64
+ hyperlinkCount: 0,
65
+ anchorImageCount: 0,
66
+ chartCount: 0,
67
+ smartArtCount: 0,
68
+ ommlCount: 0,
69
+ tableCount: 0,
70
+ footnoteRefCount: 0,
71
+ endnoteRefCount: 0,
72
+ commentRefCount: 0,
73
+ revisionCount: 0,
74
+ pageBreakCount: 0
75
+ };
76
+ }
60
77
  function escapeHtml(text) {
61
78
  return text.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;");
62
79
  }
@@ -231,6 +248,19 @@ function normalizeWordPath(relTarget) {
231
248
  if (normalized.startsWith("../")) return `word/${normalized.replace(/^(\.\.\/)+/, "")}`;
232
249
  return `word/${normalized}`;
233
250
  }
251
+ function resolveHyperlinkHref(relMap, rid, anchor) {
252
+ if (anchor && anchor.trim()) return `#${encodeURIComponent(anchor.trim())}`;
253
+ if (!rid) return null;
254
+ const relTarget = relMap[rid];
255
+ if (!relTarget) return null;
256
+ const trimmed = relTarget.trim();
257
+ if (!trimmed) return null;
258
+ const lower = trimmed.toLowerCase();
259
+ if (lower.startsWith("http://") || lower.startsWith("https://") || lower.startsWith("mailto:") || lower.startsWith("tel:")) {
260
+ return trimmed;
261
+ }
262
+ return trimmed.startsWith("#") ? trimmed : `#${encodeURIComponent(trimmed)}`;
263
+ }
234
264
  async function imageRidToDataUrl(zip, relMap, rid) {
235
265
  const relTarget = relMap[rid];
236
266
  if (!relTarget) return null;
@@ -405,7 +435,7 @@ function renderEndnotesSection(usedIds, endnotesMap) {
405
435
  const items = uniq.map((id) => `<li id="word-endnote-${id}" data-word-endnote-id="${id}">${endnotesMap[id]}</li>`).join("");
406
436
  return `<section data-word-endnotes="1"><hr/><ol>${items}</ol></section>`;
407
437
  }
408
- async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotesMap, usedFootnoteIds, endnotesMap, usedEndnoteIds, commentsMap, usedCommentIds) {
438
+ async function paragraphToHtml(zip, relMap, context, paragraph, paragraphIndex, footnotesMap, usedFootnoteIds, endnotesMap, usedEndnoteIds, commentsMap, usedCommentIds) {
409
439
  const tag = paragraphTag(paragraph);
410
440
  const alignStyle = paragraphAlignStyle(paragraph);
411
441
  const dataAttr = paragraphDataAttr(paragraphIndex);
@@ -446,6 +476,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
446
476
  const footnoteRef = queryByLocalName(run, "footnoteReference");
447
477
  const footnoteId = getAttr(footnoteRef, "w:id") ?? getAttr(footnoteRef, "id");
448
478
  if (footnoteId && footnotesMap[footnoteId]) {
479
+ context.features.footnoteRefCount += 1;
449
480
  usedFootnoteIds.push(footnoteId);
450
481
  result.push(
451
482
  `<sup data-word-footnote-ref="${footnoteId}"><a href="#word-footnote-${footnoteId}">[${footnoteId}]</a></sup>`
@@ -455,6 +486,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
455
486
  const endnoteRef = queryByLocalName(run, "endnoteReference");
456
487
  const endnoteId = getAttr(endnoteRef, "w:id") ?? getAttr(endnoteRef, "id");
457
488
  if (endnoteId && endnotesMap[endnoteId]) {
489
+ context.features.endnoteRefCount += 1;
458
490
  usedEndnoteIds.push(endnoteId);
459
491
  result.push(
460
492
  `<sup data-word-endnote-ref="${endnoteId}"><a href="#word-endnote-${endnoteId}">[${endnoteId}]</a></sup>`
@@ -464,6 +496,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
464
496
  const commentRef = queryByLocalName(run, "commentReference");
465
497
  const commentId = getAttr(commentRef, "w:id") ?? getAttr(commentRef, "id");
466
498
  if (commentId && commentsMap[commentId]) {
499
+ context.features.commentRefCount += 1;
467
500
  usedCommentIds.push(commentId);
468
501
  result.push(
469
502
  `<sup data-word-comment-ref="${commentId}"><a href="#word-comment-${commentId}">[c${commentId}]</a></sup>`
@@ -481,6 +514,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
481
514
  const dimensionAttrs = imageDimensionAttributes(imageSize);
482
515
  const anchorMeta = parseAnchorMeta(drawing);
483
516
  const attrs = mergeImageStyle(dimensionAttrs, anchorMeta);
517
+ if (anchorMeta) context.features.anchorImageCount += 1;
484
518
  result.push(`<img src="${src}" alt="word-image"${attrs}/>`);
485
519
  return result;
486
520
  }
@@ -491,6 +525,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
491
525
  const chartXmlText = await readXmlByRid(zip, relMap, chartRid);
492
526
  if (chartXmlText) {
493
527
  const summary = parseChartSummary(chartXmlText);
528
+ context.features.chartCount += 1;
494
529
  result.push(
495
530
  `<figure data-word-chart="1" data-word-chart-type="${summary.type}" data-word-chart-series="${summary.seriesCount}" data-word-chart-points="${summary.pointCount}"><figcaption>${escapeHtml(summary.title)}</figcaption><div>Chart(${escapeHtml(summary.type)}): series=${summary.seriesCount}, points=${summary.pointCount}</div></figure>`
496
531
  );
@@ -502,6 +537,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
502
537
  if (smartArtRid) {
503
538
  const diagramXmlText = await readXmlByRid(zip, relMap, smartArtRid);
504
539
  const textItems = diagramXmlText ? extractSmartArtText(diagramXmlText) : [];
540
+ context.features.smartArtCount += 1;
505
541
  const preview = textItems.length > 0 ? `: ${escapeHtml(textItems.join(" / "))}` : "";
506
542
  result.push(
507
543
  `<figure data-word-smartart="1" data-word-smartart-items="${textItems.length}"><figcaption>SmartArt fallback${preview}</figcaption></figure>`
@@ -523,12 +559,14 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
523
559
  if (css) {
524
560
  const span = `<span style="${css}">${runText2}</span>`;
525
561
  if (revisionMeta) {
562
+ context.features.revisionCount += 1;
526
563
  const tagName = revisionMeta.type === "ins" ? "ins" : "del";
527
564
  result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${span}</${tagName}>`);
528
565
  } else {
529
566
  result.push(span);
530
567
  }
531
568
  } else if (revisionMeta) {
569
+ context.features.revisionCount += 1;
532
570
  const tagName = revisionMeta.type === "ins" ? "ins" : "del";
533
571
  result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${runText2}</${tagName}>`);
534
572
  } else {
@@ -536,6 +574,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
536
574
  }
537
575
  }
538
576
  for (let i = 0; i < pageBreakCount; i += 1) {
577
+ context.features.pageBreakCount += 1;
539
578
  result.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
540
579
  }
541
580
  return result;
@@ -552,9 +591,25 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
552
591
  if (node.localName === "r") {
553
592
  return runToHtml(node, revisionFallback);
554
593
  }
594
+ if (node.localName === "hyperlink") {
595
+ const rid = getAttr(node, "r:id") ?? getAttr(node, "id");
596
+ const anchor = getAttr(node, "w:anchor") ?? getAttr(node, "anchor");
597
+ const href = resolveHyperlinkHref(relMap, rid, anchor);
598
+ const nested2 = [];
599
+ for (const child of Array.from(node.children)) {
600
+ nested2.push(...await nodeToHtml(child, revisionFallback));
601
+ }
602
+ const content2 = nested2.join("") || escapeHtml(node.textContent ?? "");
603
+ if (!href) return content2 ? [content2] : [];
604
+ context.features.hyperlinkCount += 1;
605
+ return [
606
+ `<a data-word-hyperlink="1" href="${escapeHtml(href)}" rel="noreferrer noopener" target="_blank">${content2}</a>`
607
+ ];
608
+ }
555
609
  if (node.localName === "oMath" || node.localName === "oMathPara") {
556
610
  const linear = ommlNodeToText(node).trim();
557
611
  if (!linear) return [];
612
+ context.features.ommlCount += 1;
558
613
  return [`<span data-word-omml="1">${escapeHtml(linear)}</span>`];
559
614
  }
560
615
  if (node.localName === "ins" || node.localName === "del") {
@@ -574,6 +629,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
574
629
  const parts = [];
575
630
  const renderedPageBreakCount = queryAllByLocalName(paragraph, "lastRenderedPageBreak").length;
576
631
  for (let i = 0; i < renderedPageBreakCount; i += 1) {
632
+ context.features.pageBreakCount += 1;
577
633
  parts.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
578
634
  }
579
635
  for (const child of Array.from(paragraph.children)) {
@@ -702,7 +758,7 @@ function parseCellBorderStyle(cell, tableStyle) {
702
758
  const left = parseBorderCss(directChildrenByLocalName(tcBorders, "left")[0] ?? null) ?? tableStyle.insideVCss ?? tableStyle.borderCss;
703
759
  return `border-top:${top};border-right:${right};border-bottom:${bottom};border-left:${left}`;
704
760
  }
705
- function tableCellHtml(cell, paragraphIndexMap) {
761
+ function tableCellHtml(cell, paragraphIndexMap, context) {
706
762
  const blocks = [];
707
763
  for (const child of Array.from(cell.children)) {
708
764
  if (child.localName === "tcPr") continue;
@@ -712,7 +768,7 @@ function tableCellHtml(cell, paragraphIndexMap) {
712
768
  continue;
713
769
  }
714
770
  if (child.localName === "tbl") {
715
- blocks.push(tableToHtml(child, paragraphIndexMap));
771
+ blocks.push(tableToHtml(child, paragraphIndexMap, context));
716
772
  continue;
717
773
  }
718
774
  }
@@ -720,7 +776,8 @@ function tableCellHtml(cell, paragraphIndexMap) {
720
776
  const text = queryAllByLocalName(cell, "t").map((t) => t.textContent ?? "").join("").trim();
721
777
  return escapeHtml(text) || "<br/>";
722
778
  }
723
- function tableToHtml(table, paragraphIndexMap) {
779
+ function tableToHtml(table, paragraphIndexMap, context) {
780
+ context.features.tableCount += 1;
724
781
  const rows = directChildrenByLocalName(table, "tr");
725
782
  const gridWidthsPx = parseTblGridWidthsPx(table);
726
783
  const tableStyle = parseTableStyleProfile(table);
@@ -748,7 +805,7 @@ function tableToHtml(table, paragraphIndexMap) {
748
805
  while (activeByCol.has(colCursor)) {
749
806
  colCursor += 1;
750
807
  }
751
- const html = tableCellHtml(cell, paragraphIndexMap);
808
+ const html = tableCellHtml(cell, paragraphIndexMap, context);
752
809
  const attrs = [];
753
810
  const widthStyle = parseCellWidthStyle(cell, colCursor, colSpan, gridWidthsPx);
754
811
  const borderStyle = parseCellBorderStyle(cell, tableStyle);
@@ -792,7 +849,9 @@ function tableToHtml(table, paragraphIndexMap) {
792
849
  const spacing = tableStyle.borderSpacingPx > 0 ? `border-spacing:${tableStyle.borderSpacingPx.toFixed(2)}px;` : "";
793
850
  return `<table style="border-collapse:${tableStyle.borderCollapse};${spacing}table-layout:${tableStyle.tableLayout};${tableWidthStyle};border:${tableStyle.borderCss};">${merged}</table>`;
794
851
  }
795
- async function parseDocxToHtmlSnapshot(file) {
852
+ async function parseDocxToHtmlSnapshotWithReport(file) {
853
+ const startedAt = Date.now();
854
+ const context = { features: createEmptyFeatureCounts() };
796
855
  const maybeArrayBuffer = file.arrayBuffer;
797
856
  const buffer = maybeArrayBuffer ? await maybeArrayBuffer.call(file) : await new Response(file).arrayBuffer();
798
857
  const zip = await import_jszip.default.loadAsync(buffer);
@@ -829,6 +888,7 @@ async function parseDocxToHtmlSnapshot(file) {
829
888
  await paragraphToHtml(
830
889
  zip,
831
890
  relMap,
891
+ context,
832
892
  child,
833
893
  paragraphIndex,
834
894
  footnotesMap,
@@ -842,14 +902,24 @@ async function parseDocxToHtmlSnapshot(file) {
842
902
  continue;
843
903
  }
844
904
  if (child.localName === "tbl") {
845
- blockHtml.push(tableToHtml(child, paragraphIndexMap));
905
+ blockHtml.push(tableToHtml(child, paragraphIndexMap, context));
846
906
  continue;
847
907
  }
848
908
  }
849
909
  blockHtml.push(renderFootnotesSection(usedFootnoteIds, footnotesMap));
850
910
  blockHtml.push(renderEndnotesSection(usedEndnoteIds, endnotesMap));
851
911
  blockHtml.push(renderCommentsSection(usedCommentIds, commentsMap));
852
- return buildHtmlSnapshot(blockHtml.join("\n"));
912
+ return {
913
+ htmlSnapshot: buildHtmlSnapshot(blockHtml.join("\n")),
914
+ report: {
915
+ elapsedMs: Date.now() - startedAt,
916
+ features: context.features
917
+ }
918
+ };
919
+ }
920
+ async function parseDocxToHtmlSnapshot(file) {
921
+ const result = await parseDocxToHtmlSnapshotWithReport(file);
922
+ return result.htmlSnapshot;
853
923
  }
854
924
 
855
925
  // src/lib/pastePipeline.ts
@@ -1980,7 +2050,7 @@ function applyWordRenderModel({ doc, styleProfile, showFormattingMarks }) {
1980
2050
  }
1981
2051
 
1982
2052
  // src/core/DocsWordElement.ts
1983
- var VERSION = "0.1.2";
2053
+ var VERSION = "0.1.5";
1984
2054
  var MESSAGES = {
1985
2055
  zh: {
1986
2056
  readClipboard: "\u4ECE\u7CFB\u7EDF\u526A\u8D34\u677F\u8BFB\u53D6",
@@ -2123,15 +2193,15 @@ var DocsWordElement = class extends HTMLElement {
2123
2193
  }
2124
2194
  async applyDocx(file) {
2125
2195
  try {
2126
- const [snapshot, profile] = await Promise.all([
2127
- parseDocxToHtmlSnapshot(file),
2196
+ const [parseResult, profile] = await Promise.all([
2197
+ parseDocxToHtmlSnapshotWithReport(file),
2128
2198
  parseDocxStyleProfile(file)
2129
2199
  ]);
2130
2200
  this.styleProfile = profile;
2131
- this.htmlSnapshot = snapshot;
2201
+ this.htmlSnapshot = parseResult.htmlSnapshot;
2132
2202
  this.renderSnapshot();
2133
2203
  this.setHint(MESSAGES[this.locale].loadedWord(profile.sourceFileName));
2134
- this.emitChange("upload", profile.sourceFileName);
2204
+ this.emitChange("upload", profile.sourceFileName, parseResult.report);
2135
2205
  } catch (error) {
2136
2206
  this.emitError(error instanceof Error ? error.message : MESSAGES[this.locale].parseFailed);
2137
2207
  }
@@ -2191,8 +2261,10 @@ var DocsWordElement = class extends HTMLElement {
2191
2261
  renderSnapshot() {
2192
2262
  this.frame.srcdoc = this.htmlSnapshot;
2193
2263
  }
2194
- emitChange(source, fileName) {
2195
- this.dispatchEvent(new CustomEvent("docsjs-change", { detail: { htmlSnapshot: this.htmlSnapshot, source, fileName } }));
2264
+ emitChange(source, fileName, parseReport) {
2265
+ this.dispatchEvent(
2266
+ new CustomEvent("docsjs-change", { detail: { htmlSnapshot: this.htmlSnapshot, source, fileName, parseReport } })
2267
+ );
2196
2268
  }
2197
2269
  emitError(message) {
2198
2270
  this.dispatchEvent(new CustomEvent("docsjs-error", { detail: { message } }));
@@ -2294,6 +2366,8 @@ function calculateFidelityScore(expected, actual) {
2294
2366
  calculateFidelityScore,
2295
2367
  collectSemanticStatsFromDocument,
2296
2368
  collectSemanticStatsFromHtml,
2297
- defineDocsWordElement
2369
+ defineDocsWordElement,
2370
+ parseDocxToHtmlSnapshot,
2371
+ parseDocxToHtmlSnapshotWithReport
2298
2372
  });
2299
2373
  //# sourceMappingURL=index.cjs.map