@coding01/docsjs 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -28
- package/README.zh-CN.md +56 -27
- package/dist/{chunk-IBVWD4UO.js → chunk-632UOG2B.js} +91 -17
- package/dist/chunk-632UOG2B.js.map +1 -0
- package/dist/index.cjs +91 -17
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +7 -3
- package/dist/index.js.map +1 -1
- package/dist/react.cjs +92 -17
- package/dist/react.cjs.map +1 -1
- package/dist/react.d.cts +1 -1
- package/dist/react.d.ts +1 -1
- package/dist/react.js +12 -3
- package/dist/react.js.map +1 -1
- package/dist/types-VvdwVF0_.d.cts +44 -0
- package/dist/types-VvdwVF0_.d.ts +44 -0
- package/dist/vue.cjs +81 -15
- package/dist/vue.cjs.map +1 -1
- package/dist/vue.d.cts +1 -1
- package/dist/vue.d.ts +1 -1
- package/dist/vue.js +1 -1
- package/package.json +2 -1
- package/dist/chunk-IBVWD4UO.js.map +0 -1
- package/dist/types-DF14w1ol.d.cts +0 -20
- package/dist/types-DF14w1ol.d.ts +0 -20
package/README.md
CHANGED
|
@@ -6,9 +6,16 @@ Import Word/WPS/Google Docs content from paste or `.docx` while preserving struc
|
|
|
6
6
|
[](https://www.npmjs.com/package/@coding01/docsjs)
|
|
7
7
|
[](https://www.npmjs.com/package/@coding01/docsjs)
|
|
8
8
|
[](https://github.com/fanly/docsjs/actions/workflows/ci.yml)
|
|
9
|
+
[](https://github.com/fanly/docsjs/actions/workflows/pages.yml)
|
|
9
10
|
|
|
10
11
|
[中文文档](./README.zh-CN.md)
|
|
11
12
|
|
|
13
|
+
## GitHub Pages
|
|
14
|
+
|
|
15
|
+
- Product page: [https://docsjs.coding01.cn/](https://docsjs.coding01.cn/)
|
|
16
|
+
- Source: `docs/index.html`
|
|
17
|
+
- Deploy workflow: `.github/workflows/pages.yml`
|
|
18
|
+
|
|
12
19
|
## What You Get
|
|
13
20
|
|
|
14
21
|
- Web Component core: `docs-word-editor`
|
|
@@ -74,7 +81,7 @@ el.addEventListener("docsjs-change", (e) => {
|
|
|
74
81
|
### Events
|
|
75
82
|
|
|
76
83
|
- `docsjs-change`
|
|
77
|
-
- payload: `{ htmlSnapshot: string; source: "paste" | "upload" | "api" | "clear"; fileName?: string }`
|
|
84
|
+
- payload: `{ htmlSnapshot: string; source: "paste" | "upload" | "api" | "clear"; fileName?: string; parseReport?: DocxParseReport }`
|
|
78
85
|
- `docsjs-error`
|
|
79
86
|
- payload: `{ message: string }`
|
|
80
87
|
- `docsjs-ready`
|
|
@@ -95,34 +102,49 @@ el.addEventListener("docsjs-change", (e) => {
|
|
|
95
102
|
|
|
96
103
|
## Feature Checklist
|
|
97
104
|
|
|
105
|
+
<!-- GENERATED:FEATURE_CHECKLIST_EN:START -->
|
|
106
|
+
### Core
|
|
107
|
+
|
|
98
108
|
- ✅ Web Component core (`docs-word-editor`)
|
|
99
|
-
- ✅ React adapter
|
|
100
|
-
- ✅
|
|
101
|
-
- ✅
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
- ✅
|
|
106
|
-
- ✅
|
|
107
|
-
- ✅
|
|
108
|
-
- ✅
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
- ✅
|
|
113
|
-
- ✅
|
|
114
|
-
- ✅
|
|
115
|
-
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
- ✅
|
|
120
|
-
- ✅
|
|
121
|
-
- ✅
|
|
122
|
-
- ✅
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
109
|
+
- ✅ React adapter + Vue adapter
|
|
110
|
+
- ✅ Events and imperative public API
|
|
111
|
+
- ✅ Strict-only parser strategy
|
|
112
|
+
|
|
113
|
+
### Import Pipeline
|
|
114
|
+
|
|
115
|
+
- ✅ Clipboard import (`text/html`, `text/plain`)
|
|
116
|
+
- ✅ `.docx` upload + relationship media mapping
|
|
117
|
+
- ✅ Clipboard image hydration (`file:/blob:/cid:`)
|
|
118
|
+
- ✅ Output as stable HTML snapshot
|
|
119
|
+
|
|
120
|
+
### Layout Fidelity
|
|
121
|
+
|
|
122
|
+
- ✅ List reconstruction (`numId`, `ilvl`, `lvlText`)
|
|
123
|
+
- ✅ Table v1 (`tblGrid/tcW`, merge, border, spacing)
|
|
124
|
+
- ✅ Floating anchors v1 (`wp:anchor` metadata)
|
|
125
|
+
- ⏳ Anchor collision parity (pixel-level wrap)
|
|
126
|
+
|
|
127
|
+
### Advanced Semantics
|
|
128
|
+
|
|
129
|
+
- ✅ Footnotes / endnotes / comments
|
|
130
|
+
- ✅ Revision markers (`ins` / `del`) + metadata
|
|
131
|
+
- ✅ Page break semantic markers
|
|
132
|
+
- ✅ DOCX hyperlink relationship + anchor mapping
|
|
133
|
+
|
|
134
|
+
### Semantic Fallback
|
|
135
|
+
|
|
136
|
+
- ✅ OMML fallback output
|
|
137
|
+
- ✅ Chart semantic extraction fallback
|
|
138
|
+
- ✅ SmartArt node fallback extraction
|
|
139
|
+
- ⏳ OMML high-fidelity render pipeline (MathML/KaTeX)
|
|
140
|
+
|
|
141
|
+
### Engineering Quality
|
|
142
|
+
|
|
143
|
+
- ✅ 50 automated tests (regression + boundary)
|
|
144
|
+
- ✅ Baseline snapshot regression framework
|
|
145
|
+
- ✅ `verify` quality gate (lint/typecheck/test/build/size)
|
|
146
|
+
- ✅ Parse report API for performance tuning
|
|
147
|
+
<!-- GENERATED:FEATURE_CHECKLIST_EN:END -->
|
|
126
148
|
|
|
127
149
|
## What's New in v0.1.3
|
|
128
150
|
|
|
@@ -166,6 +188,13 @@ npm run build
|
|
|
166
188
|
npm run benchmark:fidelity
|
|
167
189
|
```
|
|
168
190
|
|
|
191
|
+
## Engineering Modes
|
|
192
|
+
|
|
193
|
+
- Spec and conventions: [ENGINEERING_MODES.md](./ENGINEERING_MODES.md)
|
|
194
|
+
- Parse API now supports:
|
|
195
|
+
- `parseDocxToHtmlSnapshot(file)`
|
|
196
|
+
- `parseDocxToHtmlSnapshotWithReport(file)`
|
|
197
|
+
|
|
169
198
|
## Demos
|
|
170
199
|
|
|
171
200
|
### React demo
|
package/README.zh-CN.md
CHANGED
|
@@ -6,9 +6,16 @@
|
|
|
6
6
|
[](https://www.npmjs.com/package/@coding01/docsjs)
|
|
7
7
|
[](https://www.npmjs.com/package/@coding01/docsjs)
|
|
8
8
|
[](https://github.com/fanly/docsjs/actions/workflows/ci.yml)
|
|
9
|
+
[](https://github.com/fanly/docsjs/actions/workflows/pages.yml)
|
|
9
10
|
|
|
10
11
|
[English README](./README.md)
|
|
11
12
|
|
|
13
|
+
## GitHub Pages
|
|
14
|
+
|
|
15
|
+
- 产品单页: [https://docsjs.coding01.cn/](https://docsjs.coding01.cn/)
|
|
16
|
+
- 页面源码: `docs/index.html`
|
|
17
|
+
- 自动部署: `.github/workflows/pages.yml`
|
|
18
|
+
|
|
12
19
|
## 核心能力
|
|
13
20
|
|
|
14
21
|
- Web Component 内核:`docs-word-editor`
|
|
@@ -90,34 +97,49 @@ document.body.appendChild(el);
|
|
|
90
97
|
|
|
91
98
|
## 功能清单
|
|
92
99
|
|
|
100
|
+
<!-- GENERATED:FEATURE_CHECKLIST_ZH:START -->
|
|
101
|
+
### 核心
|
|
102
|
+
|
|
93
103
|
- ✅ Web Component 内核(`docs-word-editor`)
|
|
94
|
-
- ✅ React
|
|
95
|
-
- ✅
|
|
96
|
-
- ✅
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
- ✅
|
|
101
|
-
- ✅
|
|
102
|
-
- ✅
|
|
103
|
-
- ✅
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
- ✅
|
|
108
|
-
- ✅
|
|
109
|
-
- ✅
|
|
110
|
-
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
- ✅
|
|
115
|
-
- ✅
|
|
116
|
-
- ✅
|
|
117
|
-
- ✅
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
104
|
+
- ✅ React + Vue 适配层
|
|
105
|
+
- ✅ 事件体系与命令式公开 API
|
|
106
|
+
- ✅ 严格模式唯一解析策略
|
|
107
|
+
|
|
108
|
+
### 导入链路
|
|
109
|
+
|
|
110
|
+
- ✅ 剪贴板导入(`text/html`、`text/plain`)
|
|
111
|
+
- ✅ `.docx` 上传与关系媒体映射
|
|
112
|
+
- ✅ 不稳定图片 URI 修复(`file:/blob:/cid:`)
|
|
113
|
+
- ✅ 输出稳定 HTML Snapshot
|
|
114
|
+
|
|
115
|
+
### 版式保真
|
|
116
|
+
|
|
117
|
+
- ✅ 列表结构恢复(`numId`、`ilvl`、`lvlText`)
|
|
118
|
+
- ✅ 表格 v1(`tblGrid/tcW`、合并、边框、间距)
|
|
119
|
+
- ✅ 浮动锚点 v1(`wp:anchor` 元数据)
|
|
120
|
+
- ⏳ 锚点碰撞一致性(像素级绕排)
|
|
121
|
+
|
|
122
|
+
### 高级语义
|
|
123
|
+
|
|
124
|
+
- ✅ 脚注/尾注/批注
|
|
125
|
+
- ✅ 修订标记(`ins`/`del`)与元数据
|
|
126
|
+
- ✅ 分页语义标记
|
|
127
|
+
- ✅ DOCX 超链接关系与锚点映射
|
|
128
|
+
|
|
129
|
+
### 语义降级
|
|
130
|
+
|
|
131
|
+
- ✅ OMML 语义降级输出
|
|
132
|
+
- ✅ 图表语义提取降级
|
|
133
|
+
- ✅ SmartArt 节点降级提取
|
|
134
|
+
- ⏳ OMML 高保真渲染链(MathML/KaTeX)
|
|
135
|
+
|
|
136
|
+
### 工程质量
|
|
137
|
+
|
|
138
|
+
- ✅ 50 条自动化测试(回归 + 边界)
|
|
139
|
+
- ✅ 基准快照回归框架
|
|
140
|
+
- ✅ `verify` 质量门禁(lint/typecheck/test/build/size)
|
|
141
|
+
- ✅ 解析报告 API(性能调优)
|
|
142
|
+
<!-- GENERATED:FEATURE_CHECKLIST_ZH:END -->
|
|
121
143
|
|
|
122
144
|
## v0.1.3 更新内容
|
|
123
145
|
|
|
@@ -161,6 +183,13 @@ npm run build
|
|
|
161
183
|
npm run benchmark:fidelity
|
|
162
184
|
```
|
|
163
185
|
|
|
186
|
+
## 工程模式
|
|
187
|
+
|
|
188
|
+
- 规则说明: [ENGINEERING_MODES.md](./ENGINEERING_MODES.md)
|
|
189
|
+
- 解析 API 支持:
|
|
190
|
+
- `parseDocxToHtmlSnapshot(file)`
|
|
191
|
+
- `parseDocxToHtmlSnapshotWithReport(file)`
|
|
192
|
+
|
|
164
193
|
## 演示
|
|
165
194
|
|
|
166
195
|
### React demo
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
// src/lib/docxHtml.ts
|
|
2
|
+
import JSZip from "jszip";
|
|
3
|
+
|
|
1
4
|
// src/lib/htmlSnapshot.ts
|
|
2
5
|
var SNAPSHOT_SHELL_START = '<!DOCTYPE html><html><head><meta charset="utf-8"/>';
|
|
3
6
|
var SNAPSHOT_SHELL_END = "</head><body></body></html>";
|
|
@@ -16,7 +19,21 @@ function buildHtmlSnapshot(rawHtml) {
|
|
|
16
19
|
}
|
|
17
20
|
|
|
18
21
|
// src/lib/docxHtml.ts
|
|
19
|
-
|
|
22
|
+
function createEmptyFeatureCounts() {
|
|
23
|
+
return {
|
|
24
|
+
hyperlinkCount: 0,
|
|
25
|
+
anchorImageCount: 0,
|
|
26
|
+
chartCount: 0,
|
|
27
|
+
smartArtCount: 0,
|
|
28
|
+
ommlCount: 0,
|
|
29
|
+
tableCount: 0,
|
|
30
|
+
footnoteRefCount: 0,
|
|
31
|
+
endnoteRefCount: 0,
|
|
32
|
+
commentRefCount: 0,
|
|
33
|
+
revisionCount: 0,
|
|
34
|
+
pageBreakCount: 0
|
|
35
|
+
};
|
|
36
|
+
}
|
|
20
37
|
function escapeHtml(text) {
|
|
21
38
|
return text.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll('"', """);
|
|
22
39
|
}
|
|
@@ -191,6 +208,19 @@ function normalizeWordPath(relTarget) {
|
|
|
191
208
|
if (normalized.startsWith("../")) return `word/${normalized.replace(/^(\.\.\/)+/, "")}`;
|
|
192
209
|
return `word/${normalized}`;
|
|
193
210
|
}
|
|
211
|
+
function resolveHyperlinkHref(relMap, rid, anchor) {
|
|
212
|
+
if (anchor && anchor.trim()) return `#${encodeURIComponent(anchor.trim())}`;
|
|
213
|
+
if (!rid) return null;
|
|
214
|
+
const relTarget = relMap[rid];
|
|
215
|
+
if (!relTarget) return null;
|
|
216
|
+
const trimmed = relTarget.trim();
|
|
217
|
+
if (!trimmed) return null;
|
|
218
|
+
const lower = trimmed.toLowerCase();
|
|
219
|
+
if (lower.startsWith("http://") || lower.startsWith("https://") || lower.startsWith("mailto:") || lower.startsWith("tel:")) {
|
|
220
|
+
return trimmed;
|
|
221
|
+
}
|
|
222
|
+
return trimmed.startsWith("#") ? trimmed : `#${encodeURIComponent(trimmed)}`;
|
|
223
|
+
}
|
|
194
224
|
async function imageRidToDataUrl(zip, relMap, rid) {
|
|
195
225
|
const relTarget = relMap[rid];
|
|
196
226
|
if (!relTarget) return null;
|
|
@@ -365,7 +395,7 @@ function renderEndnotesSection(usedIds, endnotesMap) {
|
|
|
365
395
|
const items = uniq.map((id) => `<li id="word-endnote-${id}" data-word-endnote-id="${id}">${endnotesMap[id]}</li>`).join("");
|
|
366
396
|
return `<section data-word-endnotes="1"><hr/><ol>${items}</ol></section>`;
|
|
367
397
|
}
|
|
368
|
-
async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotesMap, usedFootnoteIds, endnotesMap, usedEndnoteIds, commentsMap, usedCommentIds) {
|
|
398
|
+
async function paragraphToHtml(zip, relMap, context, paragraph, paragraphIndex, footnotesMap, usedFootnoteIds, endnotesMap, usedEndnoteIds, commentsMap, usedCommentIds) {
|
|
369
399
|
const tag = paragraphTag(paragraph);
|
|
370
400
|
const alignStyle = paragraphAlignStyle(paragraph);
|
|
371
401
|
const dataAttr = paragraphDataAttr(paragraphIndex);
|
|
@@ -406,6 +436,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
406
436
|
const footnoteRef = queryByLocalName(run, "footnoteReference");
|
|
407
437
|
const footnoteId = getAttr(footnoteRef, "w:id") ?? getAttr(footnoteRef, "id");
|
|
408
438
|
if (footnoteId && footnotesMap[footnoteId]) {
|
|
439
|
+
context.features.footnoteRefCount += 1;
|
|
409
440
|
usedFootnoteIds.push(footnoteId);
|
|
410
441
|
result.push(
|
|
411
442
|
`<sup data-word-footnote-ref="${footnoteId}"><a href="#word-footnote-${footnoteId}">[${footnoteId}]</a></sup>`
|
|
@@ -415,6 +446,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
415
446
|
const endnoteRef = queryByLocalName(run, "endnoteReference");
|
|
416
447
|
const endnoteId = getAttr(endnoteRef, "w:id") ?? getAttr(endnoteRef, "id");
|
|
417
448
|
if (endnoteId && endnotesMap[endnoteId]) {
|
|
449
|
+
context.features.endnoteRefCount += 1;
|
|
418
450
|
usedEndnoteIds.push(endnoteId);
|
|
419
451
|
result.push(
|
|
420
452
|
`<sup data-word-endnote-ref="${endnoteId}"><a href="#word-endnote-${endnoteId}">[${endnoteId}]</a></sup>`
|
|
@@ -424,6 +456,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
424
456
|
const commentRef = queryByLocalName(run, "commentReference");
|
|
425
457
|
const commentId = getAttr(commentRef, "w:id") ?? getAttr(commentRef, "id");
|
|
426
458
|
if (commentId && commentsMap[commentId]) {
|
|
459
|
+
context.features.commentRefCount += 1;
|
|
427
460
|
usedCommentIds.push(commentId);
|
|
428
461
|
result.push(
|
|
429
462
|
`<sup data-word-comment-ref="${commentId}"><a href="#word-comment-${commentId}">[c${commentId}]</a></sup>`
|
|
@@ -441,6 +474,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
441
474
|
const dimensionAttrs = imageDimensionAttributes(imageSize);
|
|
442
475
|
const anchorMeta = parseAnchorMeta(drawing);
|
|
443
476
|
const attrs = mergeImageStyle(dimensionAttrs, anchorMeta);
|
|
477
|
+
if (anchorMeta) context.features.anchorImageCount += 1;
|
|
444
478
|
result.push(`<img src="${src}" alt="word-image"${attrs}/>`);
|
|
445
479
|
return result;
|
|
446
480
|
}
|
|
@@ -451,6 +485,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
451
485
|
const chartXmlText = await readXmlByRid(zip, relMap, chartRid);
|
|
452
486
|
if (chartXmlText) {
|
|
453
487
|
const summary = parseChartSummary(chartXmlText);
|
|
488
|
+
context.features.chartCount += 1;
|
|
454
489
|
result.push(
|
|
455
490
|
`<figure data-word-chart="1" data-word-chart-type="${summary.type}" data-word-chart-series="${summary.seriesCount}" data-word-chart-points="${summary.pointCount}"><figcaption>${escapeHtml(summary.title)}</figcaption><div>Chart(${escapeHtml(summary.type)}): series=${summary.seriesCount}, points=${summary.pointCount}</div></figure>`
|
|
456
491
|
);
|
|
@@ -462,6 +497,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
462
497
|
if (smartArtRid) {
|
|
463
498
|
const diagramXmlText = await readXmlByRid(zip, relMap, smartArtRid);
|
|
464
499
|
const textItems = diagramXmlText ? extractSmartArtText(diagramXmlText) : [];
|
|
500
|
+
context.features.smartArtCount += 1;
|
|
465
501
|
const preview = textItems.length > 0 ? `: ${escapeHtml(textItems.join(" / "))}` : "";
|
|
466
502
|
result.push(
|
|
467
503
|
`<figure data-word-smartart="1" data-word-smartart-items="${textItems.length}"><figcaption>SmartArt fallback${preview}</figcaption></figure>`
|
|
@@ -483,12 +519,14 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
483
519
|
if (css) {
|
|
484
520
|
const span = `<span style="${css}">${runText2}</span>`;
|
|
485
521
|
if (revisionMeta) {
|
|
522
|
+
context.features.revisionCount += 1;
|
|
486
523
|
const tagName = revisionMeta.type === "ins" ? "ins" : "del";
|
|
487
524
|
result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${span}</${tagName}>`);
|
|
488
525
|
} else {
|
|
489
526
|
result.push(span);
|
|
490
527
|
}
|
|
491
528
|
} else if (revisionMeta) {
|
|
529
|
+
context.features.revisionCount += 1;
|
|
492
530
|
const tagName = revisionMeta.type === "ins" ? "ins" : "del";
|
|
493
531
|
result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${runText2}</${tagName}>`);
|
|
494
532
|
} else {
|
|
@@ -496,6 +534,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
496
534
|
}
|
|
497
535
|
}
|
|
498
536
|
for (let i = 0; i < pageBreakCount; i += 1) {
|
|
537
|
+
context.features.pageBreakCount += 1;
|
|
499
538
|
result.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
|
|
500
539
|
}
|
|
501
540
|
return result;
|
|
@@ -512,9 +551,25 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
512
551
|
if (node.localName === "r") {
|
|
513
552
|
return runToHtml(node, revisionFallback);
|
|
514
553
|
}
|
|
554
|
+
if (node.localName === "hyperlink") {
|
|
555
|
+
const rid = getAttr(node, "r:id") ?? getAttr(node, "id");
|
|
556
|
+
const anchor = getAttr(node, "w:anchor") ?? getAttr(node, "anchor");
|
|
557
|
+
const href = resolveHyperlinkHref(relMap, rid, anchor);
|
|
558
|
+
const nested2 = [];
|
|
559
|
+
for (const child of Array.from(node.children)) {
|
|
560
|
+
nested2.push(...await nodeToHtml(child, revisionFallback));
|
|
561
|
+
}
|
|
562
|
+
const content2 = nested2.join("") || escapeHtml(node.textContent ?? "");
|
|
563
|
+
if (!href) return content2 ? [content2] : [];
|
|
564
|
+
context.features.hyperlinkCount += 1;
|
|
565
|
+
return [
|
|
566
|
+
`<a data-word-hyperlink="1" href="${escapeHtml(href)}" rel="noreferrer noopener" target="_blank">${content2}</a>`
|
|
567
|
+
];
|
|
568
|
+
}
|
|
515
569
|
if (node.localName === "oMath" || node.localName === "oMathPara") {
|
|
516
570
|
const linear = ommlNodeToText(node).trim();
|
|
517
571
|
if (!linear) return [];
|
|
572
|
+
context.features.ommlCount += 1;
|
|
518
573
|
return [`<span data-word-omml="1">${escapeHtml(linear)}</span>`];
|
|
519
574
|
}
|
|
520
575
|
if (node.localName === "ins" || node.localName === "del") {
|
|
@@ -534,6 +589,7 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
|
|
|
534
589
|
const parts = [];
|
|
535
590
|
const renderedPageBreakCount = queryAllByLocalName(paragraph, "lastRenderedPageBreak").length;
|
|
536
591
|
for (let i = 0; i < renderedPageBreakCount; i += 1) {
|
|
592
|
+
context.features.pageBreakCount += 1;
|
|
537
593
|
parts.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
|
|
538
594
|
}
|
|
539
595
|
for (const child of Array.from(paragraph.children)) {
|
|
@@ -662,7 +718,7 @@ function parseCellBorderStyle(cell, tableStyle) {
|
|
|
662
718
|
const left = parseBorderCss(directChildrenByLocalName(tcBorders, "left")[0] ?? null) ?? tableStyle.insideVCss ?? tableStyle.borderCss;
|
|
663
719
|
return `border-top:${top};border-right:${right};border-bottom:${bottom};border-left:${left}`;
|
|
664
720
|
}
|
|
665
|
-
function tableCellHtml(cell, paragraphIndexMap) {
|
|
721
|
+
function tableCellHtml(cell, paragraphIndexMap, context) {
|
|
666
722
|
const blocks = [];
|
|
667
723
|
for (const child of Array.from(cell.children)) {
|
|
668
724
|
if (child.localName === "tcPr") continue;
|
|
@@ -672,7 +728,7 @@ function tableCellHtml(cell, paragraphIndexMap) {
|
|
|
672
728
|
continue;
|
|
673
729
|
}
|
|
674
730
|
if (child.localName === "tbl") {
|
|
675
|
-
blocks.push(tableToHtml(child, paragraphIndexMap));
|
|
731
|
+
blocks.push(tableToHtml(child, paragraphIndexMap, context));
|
|
676
732
|
continue;
|
|
677
733
|
}
|
|
678
734
|
}
|
|
@@ -680,7 +736,8 @@ function tableCellHtml(cell, paragraphIndexMap) {
|
|
|
680
736
|
const text = queryAllByLocalName(cell, "t").map((t) => t.textContent ?? "").join("").trim();
|
|
681
737
|
return escapeHtml(text) || "<br/>";
|
|
682
738
|
}
|
|
683
|
-
function tableToHtml(table, paragraphIndexMap) {
|
|
739
|
+
function tableToHtml(table, paragraphIndexMap, context) {
|
|
740
|
+
context.features.tableCount += 1;
|
|
684
741
|
const rows = directChildrenByLocalName(table, "tr");
|
|
685
742
|
const gridWidthsPx = parseTblGridWidthsPx(table);
|
|
686
743
|
const tableStyle = parseTableStyleProfile(table);
|
|
@@ -708,7 +765,7 @@ function tableToHtml(table, paragraphIndexMap) {
|
|
|
708
765
|
while (activeByCol.has(colCursor)) {
|
|
709
766
|
colCursor += 1;
|
|
710
767
|
}
|
|
711
|
-
const html = tableCellHtml(cell, paragraphIndexMap);
|
|
768
|
+
const html = tableCellHtml(cell, paragraphIndexMap, context);
|
|
712
769
|
const attrs = [];
|
|
713
770
|
const widthStyle = parseCellWidthStyle(cell, colCursor, colSpan, gridWidthsPx);
|
|
714
771
|
const borderStyle = parseCellBorderStyle(cell, tableStyle);
|
|
@@ -752,7 +809,9 @@ function tableToHtml(table, paragraphIndexMap) {
|
|
|
752
809
|
const spacing = tableStyle.borderSpacingPx > 0 ? `border-spacing:${tableStyle.borderSpacingPx.toFixed(2)}px;` : "";
|
|
753
810
|
return `<table style="border-collapse:${tableStyle.borderCollapse};${spacing}table-layout:${tableStyle.tableLayout};${tableWidthStyle};border:${tableStyle.borderCss};">${merged}</table>`;
|
|
754
811
|
}
|
|
755
|
-
async function
|
|
812
|
+
async function parseDocxToHtmlSnapshotWithReport(file) {
|
|
813
|
+
const startedAt = Date.now();
|
|
814
|
+
const context = { features: createEmptyFeatureCounts() };
|
|
756
815
|
const maybeArrayBuffer = file.arrayBuffer;
|
|
757
816
|
const buffer = maybeArrayBuffer ? await maybeArrayBuffer.call(file) : await new Response(file).arrayBuffer();
|
|
758
817
|
const zip = await JSZip.loadAsync(buffer);
|
|
@@ -789,6 +848,7 @@ async function parseDocxToHtmlSnapshot(file) {
|
|
|
789
848
|
await paragraphToHtml(
|
|
790
849
|
zip,
|
|
791
850
|
relMap,
|
|
851
|
+
context,
|
|
792
852
|
child,
|
|
793
853
|
paragraphIndex,
|
|
794
854
|
footnotesMap,
|
|
@@ -802,14 +862,24 @@ async function parseDocxToHtmlSnapshot(file) {
|
|
|
802
862
|
continue;
|
|
803
863
|
}
|
|
804
864
|
if (child.localName === "tbl") {
|
|
805
|
-
blockHtml.push(tableToHtml(child, paragraphIndexMap));
|
|
865
|
+
blockHtml.push(tableToHtml(child, paragraphIndexMap, context));
|
|
806
866
|
continue;
|
|
807
867
|
}
|
|
808
868
|
}
|
|
809
869
|
blockHtml.push(renderFootnotesSection(usedFootnoteIds, footnotesMap));
|
|
810
870
|
blockHtml.push(renderEndnotesSection(usedEndnoteIds, endnotesMap));
|
|
811
871
|
blockHtml.push(renderCommentsSection(usedCommentIds, commentsMap));
|
|
812
|
-
return
|
|
872
|
+
return {
|
|
873
|
+
htmlSnapshot: buildHtmlSnapshot(blockHtml.join("\n")),
|
|
874
|
+
report: {
|
|
875
|
+
elapsedMs: Date.now() - startedAt,
|
|
876
|
+
features: context.features
|
|
877
|
+
}
|
|
878
|
+
};
|
|
879
|
+
}
|
|
880
|
+
async function parseDocxToHtmlSnapshot(file) {
|
|
881
|
+
const result = await parseDocxToHtmlSnapshotWithReport(file);
|
|
882
|
+
return result.htmlSnapshot;
|
|
813
883
|
}
|
|
814
884
|
|
|
815
885
|
// src/lib/pastePipeline.ts
|
|
@@ -1940,7 +2010,7 @@ function applyWordRenderModel({ doc, styleProfile, showFormattingMarks }) {
|
|
|
1940
2010
|
}
|
|
1941
2011
|
|
|
1942
2012
|
// src/core/DocsWordElement.ts
|
|
1943
|
-
var VERSION = "0.1.
|
|
2013
|
+
var VERSION = "0.1.5";
|
|
1944
2014
|
var MESSAGES = {
|
|
1945
2015
|
zh: {
|
|
1946
2016
|
readClipboard: "\u4ECE\u7CFB\u7EDF\u526A\u8D34\u677F\u8BFB\u53D6",
|
|
@@ -2083,15 +2153,15 @@ var DocsWordElement = class extends HTMLElement {
|
|
|
2083
2153
|
}
|
|
2084
2154
|
async applyDocx(file) {
|
|
2085
2155
|
try {
|
|
2086
|
-
const [
|
|
2087
|
-
|
|
2156
|
+
const [parseResult, profile] = await Promise.all([
|
|
2157
|
+
parseDocxToHtmlSnapshotWithReport(file),
|
|
2088
2158
|
parseDocxStyleProfile(file)
|
|
2089
2159
|
]);
|
|
2090
2160
|
this.styleProfile = profile;
|
|
2091
|
-
this.htmlSnapshot =
|
|
2161
|
+
this.htmlSnapshot = parseResult.htmlSnapshot;
|
|
2092
2162
|
this.renderSnapshot();
|
|
2093
2163
|
this.setHint(MESSAGES[this.locale].loadedWord(profile.sourceFileName));
|
|
2094
|
-
this.emitChange("upload", profile.sourceFileName);
|
|
2164
|
+
this.emitChange("upload", profile.sourceFileName, parseResult.report);
|
|
2095
2165
|
} catch (error) {
|
|
2096
2166
|
this.emitError(error instanceof Error ? error.message : MESSAGES[this.locale].parseFailed);
|
|
2097
2167
|
}
|
|
@@ -2151,8 +2221,10 @@ var DocsWordElement = class extends HTMLElement {
|
|
|
2151
2221
|
renderSnapshot() {
|
|
2152
2222
|
this.frame.srcdoc = this.htmlSnapshot;
|
|
2153
2223
|
}
|
|
2154
|
-
emitChange(source, fileName) {
|
|
2155
|
-
this.dispatchEvent(
|
|
2224
|
+
emitChange(source, fileName, parseReport) {
|
|
2225
|
+
this.dispatchEvent(
|
|
2226
|
+
new CustomEvent("docsjs-change", { detail: { htmlSnapshot: this.htmlSnapshot, source, fileName, parseReport } })
|
|
2227
|
+
);
|
|
2156
2228
|
}
|
|
2157
2229
|
emitError(message) {
|
|
2158
2230
|
this.dispatchEvent(new CustomEvent("docsjs-error", { detail: { message } }));
|
|
@@ -2187,7 +2259,9 @@ function defineDocsWordElement() {
|
|
|
2187
2259
|
}
|
|
2188
2260
|
|
|
2189
2261
|
export {
|
|
2262
|
+
parseDocxToHtmlSnapshotWithReport,
|
|
2263
|
+
parseDocxToHtmlSnapshot,
|
|
2190
2264
|
DocsWordElement,
|
|
2191
2265
|
defineDocsWordElement
|
|
2192
2266
|
};
|
|
2193
|
-
//# sourceMappingURL=chunk-
|
|
2267
|
+
//# sourceMappingURL=chunk-632UOG2B.js.map
|