@coding01/docsjs 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -28
- package/README.zh-CN.md +56 -27
- package/dist/{chunk-IBVWD4UO.js → chunk-632UOG2B.js} +91 -17
- package/dist/chunk-632UOG2B.js.map +1 -0
- package/dist/index.cjs +91 -17
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +7 -3
- package/dist/index.js.map +1 -1
- package/dist/react.cjs +92 -17
- package/dist/react.cjs.map +1 -1
- package/dist/react.d.cts +1 -1
- package/dist/react.d.ts +1 -1
- package/dist/react.js +12 -3
- package/dist/react.js.map +1 -1
- package/dist/types-VvdwVF0_.d.cts +44 -0
- package/dist/types-VvdwVF0_.d.ts +44 -0
- package/dist/vue.cjs +81 -15
- package/dist/vue.cjs.map +1 -1
- package/dist/vue.d.cts +1 -1
- package/dist/vue.d.ts +1 -1
- package/dist/vue.js +1 -1
- package/package.json +2 -1
- package/dist/chunk-IBVWD4UO.js.map +0 -1
- package/dist/types-DF14w1ol.d.cts +0 -20
- package/dist/types-DF14w1ol.d.ts +0 -20
package/dist/index.cjs
CHANGED
|
@@ -34,7 +34,9 @@ __export(index_exports, {
|
|
|
34
34
|
calculateFidelityScore: () => calculateFidelityScore,
|
|
35
35
|
collectSemanticStatsFromDocument: () => collectSemanticStatsFromDocument,
|
|
36
36
|
collectSemanticStatsFromHtml: () => collectSemanticStatsFromHtml,
|
|
37
|
-
defineDocsWordElement: () => defineDocsWordElement
|
|
37
|
+
defineDocsWordElement: () => defineDocsWordElement,
|
|
38
|
+
parseDocxToHtmlSnapshot: () => parseDocxToHtmlSnapshot,
|
|
39
|
+
parseDocxToHtmlSnapshotWithReport: () => parseDocxToHtmlSnapshotWithReport
|
|
38
40
|
});
|
|
39
41
|
module.exports = __toCommonJS(index_exports);
|
|
40
42
|
|
|
@@ -57,6 +59,21 @@ function buildHtmlSnapshot(rawHtml) {
|
|
|
57
59
|
|
|
58
60
|
// src/lib/docxHtml.ts
|
|
59
61
|
var import_jszip = __toESM(require("jszip"), 1);
|
|
62
|
+
function createEmptyFeatureCounts() {
|
|
63
|
+
return {
|
|
64
|
+
hyperlinkCount: 0,
|
|
65
|
+
anchorImageCount: 0,
|
|
66
|
+
chartCount: 0,
|
|
67
|
+
smartArtCount: 0,
|
|
68
|
+
ommlCount: 0,
|
|
69
|
+
tableCount: 0,
|
|
70
|
+
footnoteRefCount: 0,
|
|
71
|
+
endnoteRefCount: 0,
|
|
72
|
+
commentRefCount: 0,
|
|
73
|
+
revisionCount: 0,
|
|
74
|
+
pageBreakCount: 0
|
|
75
|
+
};
|
|
76
|
+
}
|
|
60
77
|
function escapeHtml(text) {
|
|
61
78
|
return text.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll('"', """);
|
|
62
79
|
}
|
|
@@ -231,6 +248,19 @@ function normalizeWordPath(relTarget) {
|
|
|
231
248
|
if (normalized.startsWith("../")) return `word/${normalized.replace(/^(\.\.\/)+/, "")}`;
|
|
232
249
|
return `word/${normalized}`;
|
|
233
250
|
}
|
|
251
|
+
function resolveHyperlinkHref(relMap, rid, anchor) {
|
|
252
|
+
if (anchor && anchor.trim()) return `#${encodeURIComponent(anchor.trim())}`;
|
|
253
|
+
if (!rid) return null;
|
|
254
|
+
const relTarget = relMap[rid];
|
|
255
|
+
if (!relTarget) return null;
|
|
256
|
+
const trimmed = relTarget.trim();
|
|
257
|
+
if (!trimmed) return null;
|
|
258
|
+
const lower = trimmed.toLowerCase();
|
|
259
|
+
if (lower.startsWith("http://") || lower.startsWith("https://") || lower.startsWith("mailto:") || lower.startsWith("tel:")) {
|
|
260
|
+
return trimmed;
|
|
261
|
+
}
|
|
262
|
+
return trimmed.startsWith("#") ? trimmed : `#${encodeURIComponent(trimmed)}`;
|
|
263
|
+
}
|
|
234
264
|
async function imageRidToDataUrl(zip, relMap, rid) {
|
|
235
265
|
const relTarget = relMap[rid];
|
|
236
266
|
if (!relTarget) return null;
|
|
@@ -405,7 +435,7 @@ function renderEndnotesSection(usedIds, endnotesMap) {
|
|
|
405
435
|
const items = uniq.map((id) => `<li id="word-endnote-${id}" data-word-endnote-id="${id}">${endnotesMap[id]}</li>`).join("");
|
|
406
436
|
return `<section data-word-endnotes="1"><hr/><ol>${items}</ol></section>`;
|
|
407
437
|
}
|
|
408
|
-
async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotesMap, usedFootnoteIds, endnotesMap, usedEndnoteIds, commentsMap, usedCommentIds) {
|
|
438
|
+
async function paragraphToHtml(zip, relMap, context, paragraph, paragraphIndex, footnotesMap, usedFootnoteIds, endnotesMap, usedEndnoteIds, commentsMap, usedCommentIds) {
|
|
409
439
|
const tag = paragraphTag(paragraph);
|
|
410
440
|
const alignStyle = paragraphAlignStyle(paragraph);
|
|
411
441
|
const dataAttr = paragraphDataAttr(paragraphIndex);
|
|
@@ -446,6 +476,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
446
476
|
const footnoteRef = queryByLocalName(run, "footnoteReference");
|
|
447
477
|
const footnoteId = getAttr(footnoteRef, "w:id") ?? getAttr(footnoteRef, "id");
|
|
448
478
|
if (footnoteId && footnotesMap[footnoteId]) {
|
|
479
|
+
context.features.footnoteRefCount += 1;
|
|
449
480
|
usedFootnoteIds.push(footnoteId);
|
|
450
481
|
result.push(
|
|
451
482
|
`<sup data-word-footnote-ref="${footnoteId}"><a href="#word-footnote-${footnoteId}">[${footnoteId}]</a></sup>`
|
|
@@ -455,6 +486,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
455
486
|
const endnoteRef = queryByLocalName(run, "endnoteReference");
|
|
456
487
|
const endnoteId = getAttr(endnoteRef, "w:id") ?? getAttr(endnoteRef, "id");
|
|
457
488
|
if (endnoteId && endnotesMap[endnoteId]) {
|
|
489
|
+
context.features.endnoteRefCount += 1;
|
|
458
490
|
usedEndnoteIds.push(endnoteId);
|
|
459
491
|
result.push(
|
|
460
492
|
`<sup data-word-endnote-ref="${endnoteId}"><a href="#word-endnote-${endnoteId}">[${endnoteId}]</a></sup>`
|
|
@@ -464,6 +496,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
464
496
|
const commentRef = queryByLocalName(run, "commentReference");
|
|
465
497
|
const commentId = getAttr(commentRef, "w:id") ?? getAttr(commentRef, "id");
|
|
466
498
|
if (commentId && commentsMap[commentId]) {
|
|
499
|
+
context.features.commentRefCount += 1;
|
|
467
500
|
usedCommentIds.push(commentId);
|
|
468
501
|
result.push(
|
|
469
502
|
`<sup data-word-comment-ref="${commentId}"><a href="#word-comment-${commentId}">[c${commentId}]</a></sup>`
|
|
@@ -481,6 +514,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
481
514
|
const dimensionAttrs = imageDimensionAttributes(imageSize);
|
|
482
515
|
const anchorMeta = parseAnchorMeta(drawing);
|
|
483
516
|
const attrs = mergeImageStyle(dimensionAttrs, anchorMeta);
|
|
517
|
+
if (anchorMeta) context.features.anchorImageCount += 1;
|
|
484
518
|
result.push(`<img src="${src}" alt="word-image"${attrs}/>`);
|
|
485
519
|
return result;
|
|
486
520
|
}
|
|
@@ -491,6 +525,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
491
525
|
const chartXmlText = await readXmlByRid(zip, relMap, chartRid);
|
|
492
526
|
if (chartXmlText) {
|
|
493
527
|
const summary = parseChartSummary(chartXmlText);
|
|
528
|
+
context.features.chartCount += 1;
|
|
494
529
|
result.push(
|
|
495
530
|
`<figure data-word-chart="1" data-word-chart-type="${summary.type}" data-word-chart-series="${summary.seriesCount}" data-word-chart-points="${summary.pointCount}"><figcaption>${escapeHtml(summary.title)}</figcaption><div>Chart(${escapeHtml(summary.type)}): series=${summary.seriesCount}, points=${summary.pointCount}</div></figure>`
|
|
496
531
|
);
|
|
@@ -502,6 +537,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
502
537
|
if (smartArtRid) {
|
|
503
538
|
const diagramXmlText = await readXmlByRid(zip, relMap, smartArtRid);
|
|
504
539
|
const textItems = diagramXmlText ? extractSmartArtText(diagramXmlText) : [];
|
|
540
|
+
context.features.smartArtCount += 1;
|
|
505
541
|
const preview = textItems.length > 0 ? `: ${escapeHtml(textItems.join(" / "))}` : "";
|
|
506
542
|
result.push(
|
|
507
543
|
`<figure data-word-smartart="1" data-word-smartart-items="${textItems.length}"><figcaption>SmartArt fallback${preview}</figcaption></figure>`
|
|
@@ -523,12 +559,14 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
523
559
|
if (css) {
|
|
524
560
|
const span = `<span style="${css}">${runText2}</span>`;
|
|
525
561
|
if (revisionMeta) {
|
|
562
|
+
context.features.revisionCount += 1;
|
|
526
563
|
const tagName = revisionMeta.type === "ins" ? "ins" : "del";
|
|
527
564
|
result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${span}</${tagName}>`);
|
|
528
565
|
} else {
|
|
529
566
|
result.push(span);
|
|
530
567
|
}
|
|
531
568
|
} else if (revisionMeta) {
|
|
569
|
+
context.features.revisionCount += 1;
|
|
532
570
|
const tagName = revisionMeta.type === "ins" ? "ins" : "del";
|
|
533
571
|
result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${runText2}</${tagName}>`);
|
|
534
572
|
} else {
|
|
@@ -536,6 +574,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
536
574
|
}
|
|
537
575
|
}
|
|
538
576
|
for (let i = 0; i < pageBreakCount; i += 1) {
|
|
577
|
+
context.features.pageBreakCount += 1;
|
|
539
578
|
result.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
|
|
540
579
|
}
|
|
541
580
|
return result;
|
|
@@ -552,9 +591,25 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
552
591
|
if (node.localName === "r") {
|
|
553
592
|
return runToHtml(node, revisionFallback);
|
|
554
593
|
}
|
|
594
|
+
if (node.localName === "hyperlink") {
|
|
595
|
+
const rid = getAttr(node, "r:id") ?? getAttr(node, "id");
|
|
596
|
+
const anchor = getAttr(node, "w:anchor") ?? getAttr(node, "anchor");
|
|
597
|
+
const href = resolveHyperlinkHref(relMap, rid, anchor);
|
|
598
|
+
const nested2 = [];
|
|
599
|
+
for (const child of Array.from(node.children)) {
|
|
600
|
+
nested2.push(...await nodeToHtml(child, revisionFallback));
|
|
601
|
+
}
|
|
602
|
+
const content2 = nested2.join("") || escapeHtml(node.textContent ?? "");
|
|
603
|
+
if (!href) return content2 ? [content2] : [];
|
|
604
|
+
context.features.hyperlinkCount += 1;
|
|
605
|
+
return [
|
|
606
|
+
`<a data-word-hyperlink="1" href="${escapeHtml(href)}" rel="noreferrer noopener" target="_blank">${content2}</a>`
|
|
607
|
+
];
|
|
608
|
+
}
|
|
555
609
|
if (node.localName === "oMath" || node.localName === "oMathPara") {
|
|
556
610
|
const linear = ommlNodeToText(node).trim();
|
|
557
611
|
if (!linear) return [];
|
|
612
|
+
context.features.ommlCount += 1;
|
|
558
613
|
return [`<span data-word-omml="1">${escapeHtml(linear)}</span>`];
|
|
559
614
|
}
|
|
560
615
|
if (node.localName === "ins" || node.localName === "del") {
|
|
@@ -574,6 +629,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
574
629
|
const parts = [];
|
|
575
630
|
const renderedPageBreakCount = queryAllByLocalName(paragraph, "lastRenderedPageBreak").length;
|
|
576
631
|
for (let i = 0; i < renderedPageBreakCount; i += 1) {
|
|
632
|
+
context.features.pageBreakCount += 1;
|
|
577
633
|
parts.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
|
|
578
634
|
}
|
|
579
635
|
for (const child of Array.from(paragraph.children)) {
|
|
@@ -702,7 +758,7 @@ function parseCellBorderStyle(cell, tableStyle) {
|
|
|
702
758
|
const left = parseBorderCss(directChildrenByLocalName(tcBorders, "left")[0] ?? null) ?? tableStyle.insideVCss ?? tableStyle.borderCss;
|
|
703
759
|
return `border-top:${top};border-right:${right};border-bottom:${bottom};border-left:${left}`;
|
|
704
760
|
}
|
|
705
|
-
function tableCellHtml(cell, paragraphIndexMap) {
|
|
761
|
+
function tableCellHtml(cell, paragraphIndexMap, context) {
|
|
706
762
|
const blocks = [];
|
|
707
763
|
for (const child of Array.from(cell.children)) {
|
|
708
764
|
if (child.localName === "tcPr") continue;
|
|
@@ -712,7 +768,7 @@ function tableCellHtml(cell, paragraphIndexMap) {
|
|
|
712
768
|
continue;
|
|
713
769
|
}
|
|
714
770
|
if (child.localName === "tbl") {
|
|
715
|
-
blocks.push(tableToHtml(child, paragraphIndexMap));
|
|
771
|
+
blocks.push(tableToHtml(child, paragraphIndexMap, context));
|
|
716
772
|
continue;
|
|
717
773
|
}
|
|
718
774
|
}
|
|
@@ -720,7 +776,8 @@ function tableCellHtml(cell, paragraphIndexMap) {
|
|
|
720
776
|
const text = queryAllByLocalName(cell, "t").map((t) => t.textContent ?? "").join("").trim();
|
|
721
777
|
return escapeHtml(text) || "<br/>";
|
|
722
778
|
}
|
|
723
|
-
function tableToHtml(table, paragraphIndexMap) {
|
|
779
|
+
function tableToHtml(table, paragraphIndexMap, context) {
|
|
780
|
+
context.features.tableCount += 1;
|
|
724
781
|
const rows = directChildrenByLocalName(table, "tr");
|
|
725
782
|
const gridWidthsPx = parseTblGridWidthsPx(table);
|
|
726
783
|
const tableStyle = parseTableStyleProfile(table);
|
|
@@ -748,7 +805,7 @@ function tableToHtml(table, paragraphIndexMap) {
|
|
|
748
805
|
while (activeByCol.has(colCursor)) {
|
|
749
806
|
colCursor += 1;
|
|
750
807
|
}
|
|
751
|
-
const html = tableCellHtml(cell, paragraphIndexMap);
|
|
808
|
+
const html = tableCellHtml(cell, paragraphIndexMap, context);
|
|
752
809
|
const attrs = [];
|
|
753
810
|
const widthStyle = parseCellWidthStyle(cell, colCursor, colSpan, gridWidthsPx);
|
|
754
811
|
const borderStyle = parseCellBorderStyle(cell, tableStyle);
|
|
@@ -792,7 +849,9 @@ function tableToHtml(table, paragraphIndexMap) {
|
|
|
792
849
|
const spacing = tableStyle.borderSpacingPx > 0 ? `border-spacing:${tableStyle.borderSpacingPx.toFixed(2)}px;` : "";
|
|
793
850
|
return `<table style="border-collapse:${tableStyle.borderCollapse};${spacing}table-layout:${tableStyle.tableLayout};${tableWidthStyle};border:${tableStyle.borderCss};">${merged}</table>`;
|
|
794
851
|
}
|
|
795
|
-
async function
|
|
852
|
+
async function parseDocxToHtmlSnapshotWithReport(file) {
|
|
853
|
+
const startedAt = Date.now();
|
|
854
|
+
const context = { features: createEmptyFeatureCounts() };
|
|
796
855
|
const maybeArrayBuffer = file.arrayBuffer;
|
|
797
856
|
const buffer = maybeArrayBuffer ? await maybeArrayBuffer.call(file) : await new Response(file).arrayBuffer();
|
|
798
857
|
const zip = await import_jszip.default.loadAsync(buffer);
|
|
@@ -829,6 +888,7 @@ async function parseDocxToHtmlSnapshot(file) {
|
|
|
829
888
|
await paragraphToHtml(
|
|
830
889
|
zip,
|
|
831
890
|
relMap,
|
|
891
|
+
context,
|
|
832
892
|
child,
|
|
833
893
|
paragraphIndex,
|
|
834
894
|
footnotesMap,
|
|
@@ -842,14 +902,24 @@ async function parseDocxToHtmlSnapshot(file) {
|
|
|
842
902
|
continue;
|
|
843
903
|
}
|
|
844
904
|
if (child.localName === "tbl") {
|
|
845
|
-
blockHtml.push(tableToHtml(child, paragraphIndexMap));
|
|
905
|
+
blockHtml.push(tableToHtml(child, paragraphIndexMap, context));
|
|
846
906
|
continue;
|
|
847
907
|
}
|
|
848
908
|
}
|
|
849
909
|
blockHtml.push(renderFootnotesSection(usedFootnoteIds, footnotesMap));
|
|
850
910
|
blockHtml.push(renderEndnotesSection(usedEndnoteIds, endnotesMap));
|
|
851
911
|
blockHtml.push(renderCommentsSection(usedCommentIds, commentsMap));
|
|
852
|
-
return
|
|
912
|
+
return {
|
|
913
|
+
htmlSnapshot: buildHtmlSnapshot(blockHtml.join("\n")),
|
|
914
|
+
report: {
|
|
915
|
+
elapsedMs: Date.now() - startedAt,
|
|
916
|
+
features: context.features
|
|
917
|
+
}
|
|
918
|
+
};
|
|
919
|
+
}
|
|
920
|
+
async function parseDocxToHtmlSnapshot(file) {
|
|
921
|
+
const result = await parseDocxToHtmlSnapshotWithReport(file);
|
|
922
|
+
return result.htmlSnapshot;
|
|
853
923
|
}
|
|
854
924
|
|
|
855
925
|
// src/lib/pastePipeline.ts
|
|
@@ -1980,7 +2050,7 @@ function applyWordRenderModel({ doc, styleProfile, showFormattingMarks }) {
|
|
|
1980
2050
|
}
|
|
1981
2051
|
|
|
1982
2052
|
// src/core/DocsWordElement.ts
|
|
1983
|
-
var VERSION = "0.1.
|
|
2053
|
+
var VERSION = "0.1.5";
|
|
1984
2054
|
var MESSAGES = {
|
|
1985
2055
|
zh: {
|
|
1986
2056
|
readClipboard: "\u4ECE\u7CFB\u7EDF\u526A\u8D34\u677F\u8BFB\u53D6",
|
|
@@ -2123,15 +2193,15 @@ var DocsWordElement = class extends HTMLElement {
|
|
|
2123
2193
|
}
|
|
2124
2194
|
async applyDocx(file) {
|
|
2125
2195
|
try {
|
|
2126
|
-
const [
|
|
2127
|
-
|
|
2196
|
+
const [parseResult, profile] = await Promise.all([
|
|
2197
|
+
parseDocxToHtmlSnapshotWithReport(file),
|
|
2128
2198
|
parseDocxStyleProfile(file)
|
|
2129
2199
|
]);
|
|
2130
2200
|
this.styleProfile = profile;
|
|
2131
|
-
this.htmlSnapshot =
|
|
2201
|
+
this.htmlSnapshot = parseResult.htmlSnapshot;
|
|
2132
2202
|
this.renderSnapshot();
|
|
2133
2203
|
this.setHint(MESSAGES[this.locale].loadedWord(profile.sourceFileName));
|
|
2134
|
-
this.emitChange("upload", profile.sourceFileName);
|
|
2204
|
+
this.emitChange("upload", profile.sourceFileName, parseResult.report);
|
|
2135
2205
|
} catch (error) {
|
|
2136
2206
|
this.emitError(error instanceof Error ? error.message : MESSAGES[this.locale].parseFailed);
|
|
2137
2207
|
}
|
|
@@ -2191,8 +2261,10 @@ var DocsWordElement = class extends HTMLElement {
|
|
|
2191
2261
|
renderSnapshot() {
|
|
2192
2262
|
this.frame.srcdoc = this.htmlSnapshot;
|
|
2193
2263
|
}
|
|
2194
|
-
emitChange(source, fileName) {
|
|
2195
|
-
this.dispatchEvent(
|
|
2264
|
+
emitChange(source, fileName, parseReport) {
|
|
2265
|
+
this.dispatchEvent(
|
|
2266
|
+
new CustomEvent("docsjs-change", { detail: { htmlSnapshot: this.htmlSnapshot, source, fileName, parseReport } })
|
|
2267
|
+
);
|
|
2196
2268
|
}
|
|
2197
2269
|
emitError(message) {
|
|
2198
2270
|
this.dispatchEvent(new CustomEvent("docsjs-error", { detail: { message } }));
|
|
@@ -2294,6 +2366,8 @@ function calculateFidelityScore(expected, actual) {
|
|
|
2294
2366
|
calculateFidelityScore,
|
|
2295
2367
|
collectSemanticStatsFromDocument,
|
|
2296
2368
|
collectSemanticStatsFromHtml,
|
|
2297
|
-
defineDocsWordElement
|
|
2369
|
+
defineDocsWordElement,
|
|
2370
|
+
parseDocxToHtmlSnapshot,
|
|
2371
|
+
parseDocxToHtmlSnapshotWithReport
|
|
2298
2372
|
});
|
|
2299
2373
|
//# sourceMappingURL=index.cjs.map
|