@courtifyai/docx-render 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,159 +0,0 @@
1
- /**
2
- * 评论扩展解析器
3
- * 解析 word/commentsExtended.xml,提取评论的父子关系
4
- */
5
-
6
- import { ICommentExtended, ICommentElement } from '../types'
7
- import { parseXmlString } from '../parser/xml-parser'
8
-
9
- /** Word 2015 命名空间(用于 commentsExtended) */
10
- const W15_NS = 'http://schemas.microsoft.com/office/word/2012/wordml'
11
-
12
- /**
13
- * 解析 commentsExtended.xml 内容
14
- * @param xmlContent XML 字符串
15
- * @returns 扩展评论 Map(paraId -> ICommentExtended)
16
- */
17
- export function parseCommentsExtended(xmlContent: string): Map<string, ICommentExtended> {
18
- const result = new Map<string, ICommentExtended>()
19
-
20
- if (!xmlContent) {
21
- return result
22
- }
23
-
24
- try {
25
- const doc = parseXmlString(xmlContent)
26
- const root = doc.documentElement
27
-
28
- // 查找所有 commentEx 元素
29
- // 可能在 w15 命名空间下
30
- const commentExElements = root.getElementsByTagNameNS(W15_NS, 'commentEx')
31
-
32
- // 如果找不到,尝试不带命名空间查找
33
- const elementsToProcess = commentExElements.length > 0
34
- ? Array.from(commentExElements)
35
- : Array.from(root.getElementsByTagName('commentEx'))
36
-
37
- for (const el of elementsToProcess) {
38
- const paraId = getAttr(el, 'paraId')
39
- if (!paraId) continue
40
-
41
- const extended: ICommentExtended = {
42
- paraId,
43
- paraIdParent: getAttr(el, 'paraIdParent'),
44
- done: getBoolAttr(el, 'done'),
45
- }
46
-
47
- result.set(paraId, extended)
48
- }
49
-
50
- console.log('[DEBUG] parseCommentsExtended: found', result.size, 'extended comments')
51
- } catch (e) {
52
- console.warn('解析 commentsExtended.xml 失败:', e)
53
- }
54
-
55
- return result
56
- }
57
-
58
- /**
59
- * 构建评论树结构(回复链)
60
- * @param comments 所有评论列表
61
- * @param extendedMap 扩展评论映射(paraId -> ICommentExtended)
62
- * @returns 顶级评论列表(回复嵌套在 replies 中)
63
- */
64
- export function buildCommentTree(
65
- comments: ICommentElement[],
66
- extendedMap: Map<string, ICommentExtended>
67
- ): ICommentElement[] {
68
- // 构建 paraId -> comment 映射
69
- const paraIdToComment = new Map<string, ICommentElement>()
70
-
71
- for (const comment of comments) {
72
- if (comment.paraId) {
73
- paraIdToComment.set(comment.paraId, comment)
74
- }
75
- }
76
-
77
- // 初始化所有评论的 replies 数组
78
- for (const comment of comments) {
79
- comment.replies = []
80
- }
81
-
82
- // 关联扩展信息并建立父子关系
83
- for (const [paraId, extended] of extendedMap) {
84
- const comment = paraIdToComment.get(paraId)
85
- if (!comment) continue
86
-
87
- // 设置完成状态
88
- comment.done = extended.done
89
-
90
- // 如果有父段落,建立父子关系
91
- if (extended.paraIdParent) {
92
- const parentComment = paraIdToComment.get(extended.paraIdParent)
93
- if (parentComment) {
94
- comment.parentId = parentComment.id
95
- parentComment.replies!.push(comment)
96
- }
97
- }
98
- }
99
-
100
- // 筛选顶级评论(没有 parentId 的)
101
- const rootComments = comments.filter(c => !c.parentId)
102
-
103
- // 按日期排序(旧的在前)
104
- rootComments.sort((a, b) => {
105
- const dateA = new Date(a.date).getTime()
106
- const dateB = new Date(b.date).getTime()
107
- return dateA - dateB
108
- })
109
-
110
- // 递归排序回复
111
- sortReplies(rootComments)
112
-
113
- console.log('[DEBUG] buildCommentTree: root comments:', rootComments.length,
114
- 'total comments:', comments.length)
115
-
116
- return rootComments
117
- }
118
-
119
- /**
120
- * 递归排序回复(按日期)
121
- */
122
- function sortReplies(comments: ICommentElement[]): void {
123
- for (const comment of comments) {
124
- if (comment.replies && comment.replies.length > 0) {
125
- comment.replies.sort((a, b) => {
126
- const dateA = new Date(a.date).getTime()
127
- const dateB = new Date(b.date).getTime()
128
- return dateA - dateB
129
- })
130
- sortReplies(comment.replies)
131
- }
132
- }
133
- }
134
-
135
- /**
136
- * 获取元素属性(支持 w15 命名空间)
137
- */
138
- function getAttr(el: Element, name: string): string | undefined {
139
- // 先尝试 w15 命名空间
140
- let value = el.getAttributeNS(W15_NS, name)
141
- if (value) return value
142
-
143
- // 再尝试无命名空间
144
- value = el.getAttribute(name)
145
- if (value) return value
146
-
147
- // 尝试 w: 前缀
148
- value = el.getAttribute(`w15:${name}`)
149
- return value || undefined
150
- }
151
-
152
- /**
153
- * 获取布尔属性
154
- */
155
- function getBoolAttr(el: Element, name: string): boolean {
156
- const value = getAttr(el, name)
157
- if (!value) return false
158
- return value === '1' || value === 'true'
159
- }
@@ -1,6 +0,0 @@
1
- /**
2
- * 评论模块
3
- * 处理评论解析和回复链构建
4
- */
5
-
6
- export { parseCommentsExtended, buildCommentTree } from './comments-parser'
@@ -1,379 +0,0 @@
1
- /**
2
- * 嵌入字体加载器
3
- * 从 DOCX 文件中加载嵌入字体,支持字体解密和 CSS @font-face 注入
4
- */
5
-
6
- import JSZip from 'jszip'
7
- import {
8
- IFontTable,
9
- IFontDeclaration,
10
- IEmbedFontRef,
11
- ILoadedEmbedFont,
12
- IRelationship,
13
- TEmbedFontType,
14
- RELATIONSHIP_TYPES,
15
- } from '../types'
16
-
17
- /**
18
- * 嵌入字体加载器配置
19
- */
20
- export interface IFontLoaderOptions {
21
- /** 是否自动注入 CSS @font-face */
22
- injectStyles?: boolean
23
- /** 自定义样式容器(默认为 document.head) */
24
- styleContainer?: HTMLElement
25
- /** 字体加载超时时间(毫秒) */
26
- timeout?: number
27
- }
28
-
29
- const DEFAULT_OPTIONS: IFontLoaderOptions = {
30
- injectStyles: true,
31
- timeout: 10000,
32
- }
33
-
34
- /**
35
- * 加载嵌入字体
36
- * @param zip DOCX 文件的 JSZip 实例
37
- * @param fontTable 字体表
38
- * @param fontRels fontTable.xml 的关系文件内容(可选)
39
- * @param options 配置选项
40
- * @returns 已加载的嵌入字体数组
41
- */
42
- export async function loadEmbeddedFonts(
43
- zip: JSZip,
44
- fontTable: IFontTable,
45
- fontRels: IRelationship[],
46
- options: IFontLoaderOptions = {}
47
- ): Promise<ILoadedEmbedFont[]> {
48
- const opts = { ...DEFAULT_OPTIONS, ...options }
49
- const loadedFonts: ILoadedEmbedFont[] = []
50
-
51
- // 遍历所有有嵌入字体的字体声明
52
- for (const font of fontTable.fonts) {
53
- if (font.embedFontRefs.length === 0) continue
54
-
55
- for (const embedRef of font.embedFontRefs) {
56
- try {
57
- const loadedFont = await loadSingleEmbeddedFont(
58
- zip,
59
- font,
60
- embedRef,
61
- fontRels,
62
- opts.timeout || DEFAULT_OPTIONS.timeout!
63
- )
64
-
65
- if (loadedFont) {
66
- loadedFonts.push(loadedFont)
67
- }
68
- } catch (e) {
69
- console.warn(`加载嵌入字体失败: ${font.name} (${embedRef.type})`, e)
70
- }
71
- }
72
- }
73
-
74
- // 注入 CSS @font-face
75
- if (opts.injectStyles && loadedFonts.length > 0) {
76
- injectFontFaceStyles(loadedFonts, opts.styleContainer)
77
- }
78
-
79
- return loadedFonts
80
- }
81
-
82
- /**
83
- * 加载单个嵌入字体
84
- */
85
- async function loadSingleEmbeddedFont(
86
- zip: JSZip,
87
- font: IFontDeclaration,
88
- embedRef: IEmbedFontRef,
89
- fontRels: IRelationship[],
90
- timeout: number
91
- ): Promise<ILoadedEmbedFont | null> {
92
- // 根据关系 ID 查找字体文件路径
93
- const rel = fontRels.find(r => r.id === embedRef.id)
94
- if (!rel) {
95
- console.warn(`找不到嵌入字体关系: ${embedRef.id}`)
96
- return null
97
- }
98
-
99
- // 构建字体文件路径
100
- const fontPath = `word/${rel.target}`
101
- const fontFile = zip.file(fontPath)
102
-
103
- if (!fontFile) {
104
- console.warn(`找不到嵌入字体文件: ${fontPath}`)
105
- return null
106
- }
107
-
108
- // 读取字体文件
109
- const fontData = await Promise.race([
110
- fontFile.async('arraybuffer'),
111
- new Promise<never>((_, reject) =>
112
- setTimeout(() => reject(new Error('字体加载超时')), timeout)
113
- ),
114
- ])
115
-
116
- // 如果有密钥,需要解密字体
117
- let decryptedData = fontData
118
- if (embedRef.key) {
119
- decryptedData = decryptEmbeddedFont(fontData, embedRef.key)
120
- }
121
-
122
- // 检测字体格式
123
- const format = detectFontFormat(new Uint8Array(decryptedData))
124
-
125
- // 转换为 Data URL
126
- const mimeType = getFontMimeType(format)
127
- const blob = new Blob([decryptedData], { type: mimeType })
128
- const dataUrl = await blobToDataUrl(blob)
129
-
130
- return {
131
- fontName: font.name,
132
- type: embedRef.type,
133
- dataUrl,
134
- format,
135
- }
136
- }
137
-
138
- /**
139
- * 解密嵌入字体
140
- * OOXML 嵌入字体使用 XOR 加密,密钥为 32 位 GUID
141
- * @param data 加密的字体数据
142
- * @param keyString 密钥字符串(格式:{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})
143
- * @returns 解密后的字体数据
144
- */
145
- function decryptEmbeddedFont(data: ArrayBuffer, keyString: string): ArrayBuffer {
146
- // 解析密钥 GUID(移除花括号和连字符)
147
- const keyHex = keyString.replace(/[{}-]/g, '')
148
- if (keyHex.length !== 32) {
149
- console.warn('无效的字体密钥格式:', keyString)
150
- return data
151
- }
152
-
153
- // 将 GUID 转换为 16 字节密钥数组
154
- // 注意:GUID 的字节顺序需要特殊处理
155
- const key = new Uint8Array(16)
156
-
157
- // 前三组需要反转字节顺序(小端序)
158
- // 第一组:8 个字符 = 4 字节,反转
159
- for (let i = 0; i < 4; i++) {
160
- key[3 - i] = parseInt(keyHex.substr(i * 2, 2), 16)
161
- }
162
- // 第二组:4 个字符 = 2 字节,反转
163
- for (let i = 0; i < 2; i++) {
164
- key[5 - i] = parseInt(keyHex.substr(8 + i * 2, 2), 16)
165
- }
166
- // 第三组:4 个字符 = 2 字节,反转
167
- for (let i = 0; i < 2; i++) {
168
- key[7 - i] = parseInt(keyHex.substr(12 + i * 2, 2), 16)
169
- }
170
- // 后两组保持原顺序
171
- for (let i = 0; i < 8; i++) {
172
- key[8 + i] = parseInt(keyHex.substr(16 + i * 2, 2), 16)
173
- }
174
-
175
- // XOR 解密:每 16 字节与密钥进行 XOR
176
- const input = new Uint8Array(data)
177
- const output = new Uint8Array(input.length)
178
-
179
- // 只解密前 32 字节(OOXML 规范)
180
- const decryptLength = Math.min(32, input.length)
181
-
182
- for (let i = 0; i < decryptLength; i++) {
183
- output[i] = input[i] ^ key[i % 16]
184
- }
185
-
186
- // 剩余部分保持不变
187
- for (let i = decryptLength; i < input.length; i++) {
188
- output[i] = input[i]
189
- }
190
-
191
- return output.buffer
192
- }
193
-
194
- /**
195
- * 检测字体格式
196
- */
197
- function detectFontFormat(data: Uint8Array): 'opentype' | 'truetype' | 'embedded-opentype' {
198
- if (data.length < 4) {
199
- return 'truetype'
200
- }
201
-
202
- // OpenType (CFF) 签名: 'OTTO'
203
- if (data[0] === 0x4F && data[1] === 0x54 && data[2] === 0x54 && data[3] === 0x4F) {
204
- return 'opentype'
205
- }
206
-
207
- // TrueType 签名: 0x00010000 或 'true'
208
- if ((data[0] === 0x00 && data[1] === 0x01 && data[2] === 0x00 && data[3] === 0x00) ||
209
- (data[0] === 0x74 && data[1] === 0x72 && data[2] === 0x75 && data[3] === 0x65)) {
210
- return 'truetype'
211
- }
212
-
213
- // EOT 签名
214
- if (data[0] === 0x00 && data[1] === 0x00 && data[2] === 0x01) {
215
- return 'embedded-opentype'
216
- }
217
-
218
- // 默认假设为 TrueType
219
- return 'truetype'
220
- }
221
-
222
- /**
223
- * 获取字体 MIME 类型
224
- */
225
- function getFontMimeType(format: string): string {
226
- switch (format) {
227
- case 'opentype':
228
- return 'font/otf'
229
- case 'truetype':
230
- return 'font/ttf'
231
- case 'embedded-opentype':
232
- return 'application/vnd.ms-fontobject'
233
- default:
234
- return 'font/ttf'
235
- }
236
- }
237
-
238
- /**
239
- * Blob 转 Data URL
240
- */
241
- function blobToDataUrl(blob: Blob): Promise<string> {
242
- return new Promise((resolve, reject) => {
243
- const reader = new FileReader()
244
- reader.onloadend = () => resolve(reader.result as string)
245
- reader.onerror = reject
246
- reader.readAsDataURL(blob)
247
- })
248
- }
249
-
250
- /**
251
- * 注入 CSS @font-face 样式
252
- */
253
- function injectFontFaceStyles(
254
- fonts: ILoadedEmbedFont[],
255
- container?: HTMLElement
256
- ): void {
257
- const styleEl = document.createElement('style')
258
- styleEl.setAttribute('data-docx-fonts', 'true')
259
-
260
- const cssRules = fonts.map(font => buildFontFaceRule(font)).join('\n')
261
- styleEl.textContent = cssRules
262
-
263
- const target = container || document.head
264
-
265
- // 移除旧的字体样式
266
- const oldStyle = target.querySelector('style[data-docx-fonts]')
267
- if (oldStyle) {
268
- oldStyle.remove()
269
- }
270
-
271
- target.appendChild(styleEl)
272
- }
273
-
274
- /**
275
- * 构建单个 @font-face 规则
276
- */
277
- function buildFontFaceRule(font: ILoadedEmbedFont): string {
278
- const fontWeight = getFontWeight(font.type)
279
- const fontStyle = getFontStyle(font.type)
280
- const format = getFontFormatString(font.format)
281
-
282
- return `@font-face {
283
- font-family: "${escapeFontName(font.fontName)}";
284
- src: url("${font.dataUrl}") format("${format}");
285
- font-weight: ${fontWeight};
286
- font-style: ${fontStyle};
287
- font-display: swap;
288
- }`
289
- }
290
-
291
- /**
292
- * 获取字体粗细
293
- */
294
- function getFontWeight(type: TEmbedFontType): string {
295
- switch (type) {
296
- case 'bold':
297
- case 'boldItalic':
298
- return '700'
299
- default:
300
- return '400'
301
- }
302
- }
303
-
304
- /**
305
- * 获取字体样式
306
- */
307
- function getFontStyle(type: TEmbedFontType): string {
308
- switch (type) {
309
- case 'italic':
310
- case 'boldItalic':
311
- return 'italic'
312
- default:
313
- return 'normal'
314
- }
315
- }
316
-
317
- /**
318
- * 获取 CSS font format 字符串
319
- */
320
- function getFontFormatString(format: string): string {
321
- switch (format) {
322
- case 'opentype':
323
- return 'opentype'
324
- case 'truetype':
325
- return 'truetype'
326
- case 'embedded-opentype':
327
- return 'embedded-opentype'
328
- default:
329
- return 'truetype'
330
- }
331
- }
332
-
333
- /**
334
- * 转义字体名称中的特殊字符
335
- */
336
- function escapeFontName(name: string): string {
337
- return name.replace(/"/g, '\\"')
338
- }
339
-
340
- /**
341
- * 清理已注入的字体样式
342
- * @param container 样式容器(默认为 document.head)
343
- */
344
- export function cleanupFontStyles(container?: HTMLElement): void {
345
- const target = container || document.head
346
- const oldStyle = target.querySelector('style[data-docx-fonts]')
347
- if (oldStyle) {
348
- oldStyle.remove()
349
- }
350
- }
351
-
352
- /**
353
- * 解析 fontTable.xml.rels 文件获取字体关系
354
- */
355
- export function parseFontRelationships(xmlContent: string): IRelationship[] {
356
- const parser = new DOMParser()
357
- const doc = parser.parseFromString(xmlContent, 'application/xml')
358
- const root = doc.documentElement
359
-
360
- const relationships: IRelationship[] = []
361
- const relElements = root.getElementsByTagName('Relationship')
362
-
363
- for (let i = 0; i < relElements.length; i++) {
364
- const el = relElements[i]
365
- const type = el.getAttribute('Type') || ''
366
-
367
- // 只处理字体类型的关系
368
- if (type === RELATIONSHIP_TYPES.FONT) {
369
- relationships.push({
370
- id: el.getAttribute('Id') || '',
371
- type,
372
- target: el.getAttribute('Target') || '',
373
- targetMode: el.getAttribute('TargetMode') || undefined,
374
- })
375
- }
376
- }
377
-
378
- return relationships
379
- }