@courtifyai/docx-render 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1606 @@
1
+ import JSZip from 'jszip'
2
+ import {
3
+ DomType,
4
+ IDocxDocument,
5
+ IDocumentElement,
6
+ IOpenXmlElement,
7
+ IParagraphElement,
8
+ IRunElement,
9
+ ITextElement,
10
+ IBreakElement,
11
+ ITableElement,
12
+ ITableRowElement,
13
+ ITableCellElement,
14
+ ITableProperties,
15
+ ITableCellProperties,
16
+ ICommentElement,
17
+ ICommentExtended,
18
+ ICommentRangeStart,
19
+ ICommentRangeEnd,
20
+ ICommentReference,
21
+ IHyperlinkElement,
22
+ IImageElement,
23
+ IDrawingElement,
24
+ IStyleDefinition,
25
+ IRelationship,
26
+ IParagraphProperties,
27
+ IRunProperties,
28
+ ISectionProperties,
29
+ IPageSize,
30
+ IPageMargins,
31
+ IColumns,
32
+ IHeaderFooterReference,
33
+ IHeaderElement,
34
+ IFooterElement,
35
+ ITabElement,
36
+ ISymbolElement,
37
+ IBorders,
38
+ IBorder,
39
+ ISimpleFieldElement,
40
+ IComplexFieldElement,
41
+ IFieldInstructionElement,
42
+ INumberingDefinition,
43
+ INumberingLevel,
44
+ IAbstractNumbering,
45
+ IParagraphNumbering,
46
+ ITheme,
47
+ IFootnoteElement,
48
+ IEndnoteElement,
49
+ IFootnoteReference,
50
+ IEndnoteReference,
51
+ IBookmarkStartElement,
52
+ IBookmarkEndElement,
53
+ IFontTable,
54
+ ILoadedEmbedFont,
55
+ TUnderlineStyle,
56
+ DOCX_PARTS,
57
+ RELATIONSHIP_TYPES,
58
+ } from '../types'
59
+ import { xmlParser, parseXmlString, LengthUsage } from './xml-parser'
60
+ import { parseTheme, resolveThemeColor } from '../theme'
61
+ import { parseFontTable, loadEmbeddedFonts, parseFontRelationships } from '../font-table'
62
+ import { parseCommentsExtended, buildCommentTree } from '../comments'
63
+
64
+ /**
65
+ * DOCX 文档解析器
66
+ * 解析 DOCX 文件,生成可渲染的文档模型
67
+ */
68
+ export class DocumentParser {
69
+ private zip: JSZip | null = null
70
+ private relationships: IRelationship[] = []
71
+ private images: Map<string, string> = new Map()
72
+ private theme: ITheme | undefined = undefined
73
+ private fontTable: IFontTable | undefined = undefined
74
+ private embeddedFonts: ILoadedEmbedFont[] = []
75
+ private bookmarks: Map<string, IBookmarkStartElement> = new Map()
76
+
77
+ /**
78
+ * 解析 DOCX 文件
79
+ */
80
+ async parse(file: File | ArrayBuffer | Blob): Promise<IDocxDocument> {
81
+ const arrayBuffer = file instanceof ArrayBuffer
82
+ ? file
83
+ : await (file as Blob).arrayBuffer()
84
+
85
+ this.zip = await JSZip.loadAsync(arrayBuffer)
86
+
87
+ // 重置书签 Map
88
+ this.bookmarks = new Map()
89
+
90
+ // 解析关系
91
+ await this.parseRelationships()
92
+
93
+ // 加载图片资源
94
+ await this.loadImages()
95
+
96
+ // 解析样式
97
+ const styles = await this.parseStyles()
98
+ const styleMap = new Map(styles.map(s => [s.id, s]))
99
+
100
+ // 解析评论
101
+ const comments = await this.parseComments()
102
+ const commentMap = new Map(comments.map(c => [c.id, c]))
103
+
104
+ // 解析扩展评论(回复链)
105
+ const commentsExtendedMap = await this.parseCommentsExtended()
106
+
107
+ // 构建评论回复链
108
+ const rootComments = buildCommentTree(comments, commentsExtendedMap)
109
+
110
+ // 解析编号
111
+ const { numberings, abstractNumberings, numberingMap } = await this.parseNumberings()
112
+
113
+ // 解析主题
114
+ this.theme = await this.parseTheme()
115
+
116
+ // 解析页眉
117
+ const headers = await this.parseHeadersFooters('header')
118
+
119
+ // 解析页脚
120
+ const footers = await this.parseHeadersFooters('footer')
121
+
122
+ // 解析脚注
123
+ const footnotes = await this.parseFootnotes()
124
+
125
+ // 解析尾注
126
+ const endnotes = await this.parseEndnotes()
127
+
128
+ // 解析字体表
129
+ this.fontTable = await this.parseFontTable()
130
+
131
+ // 加载嵌入字体
132
+ this.embeddedFonts = await this.loadEmbeddedFonts()
133
+
134
+ // 解析文档主体(书签会在解析过程中被收集)
135
+ const body = await this.parseDocument()
136
+
137
+ return {
138
+ body,
139
+ comments,
140
+ commentMap,
141
+ rootComments,
142
+ commentsExtendedMap,
143
+ styles,
144
+ styleMap,
145
+ numberings,
146
+ numberingMap,
147
+ abstractNumberings,
148
+ images: this.images,
149
+ relationships: this.relationships,
150
+ headers,
151
+ footers,
152
+ theme: this.theme,
153
+ footnotes,
154
+ endnotes,
155
+ fontTable: this.fontTable,
156
+ embeddedFonts: this.embeddedFonts,
157
+ bookmarks: this.bookmarks,
158
+ }
159
+ }
160
+
161
+ /**
162
+ * 获取 ZIP 实例
163
+ */
164
+ getZip(): JSZip | null {
165
+ return this.zip
166
+ }
167
+
168
+ /**
169
+ * 解析关系文件
170
+ */
171
+ private async parseRelationships(): Promise<void> {
172
+ const content = await this.zip?.file(DOCX_PARTS.RELS)?.async('string')
173
+ if (!content) return
174
+
175
+ const doc = parseXmlString(content)
176
+ const root = doc.documentElement
177
+
178
+ this.relationships = xmlParser.elements(root, 'Relationship').map(el => ({
179
+ id: xmlParser.attr(el, 'Id') || '',
180
+ type: xmlParser.attr(el, 'Type') || '',
181
+ target: xmlParser.attr(el, 'Target') || '',
182
+ targetMode: xmlParser.attr(el, 'TargetMode'),
183
+ }))
184
+ }
185
+
186
+ /**
187
+ * 加载图片资源
188
+ */
189
+ private async loadImages(): Promise<void> {
190
+ for (const rel of this.relationships) {
191
+ if (rel.type === RELATIONSHIP_TYPES.IMAGE) {
192
+ const imagePath = `word/${rel.target}`
193
+ const imageFile = this.zip?.file(imagePath)
194
+ if (imageFile) {
195
+ const blob = await imageFile.async('blob')
196
+ const base64 = await this.blobToBase64(blob)
197
+ this.images.set(rel.id, base64)
198
+ }
199
+ }
200
+ }
201
+ }
202
+
203
+ /**
204
+ * Blob 转 Base64
205
+ */
206
+ private blobToBase64(blob: Blob): Promise<string> {
207
+ return new Promise((resolve, reject) => {
208
+ const reader = new FileReader()
209
+ reader.onloadend = () => resolve(reader.result as string)
210
+ reader.onerror = reject
211
+ reader.readAsDataURL(blob)
212
+ })
213
+ }
214
+
215
+ /**
216
+ * 解析样式
217
+ */
218
+ private async parseStyles(): Promise<IStyleDefinition[]> {
219
+ const content = await this.zip?.file(DOCX_PARTS.STYLES)?.async('string')
220
+ if (!content) return []
221
+
222
+ const doc = parseXmlString(content)
223
+ const styles: IStyleDefinition[] = []
224
+
225
+ const styleElements = doc.getElementsByTagNameNS(
226
+ 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
227
+ 'style'
228
+ )
229
+
230
+ for (let i = 0; i < styleElements.length; i++) {
231
+ const el = styleElements[i]
232
+ const nameEl = xmlParser.element(el, 'name')
233
+ const basedOnEl = xmlParser.element(el, 'basedOn')
234
+
235
+ const style: IStyleDefinition = {
236
+ id: xmlParser.attr(el, 'styleId') || '',
237
+ name: nameEl ? xmlParser.attr(nameEl, 'val') : undefined,
238
+ type: (xmlParser.attr(el, 'type') as IStyleDefinition['type']) || 'paragraph',
239
+ basedOn: basedOnEl ? xmlParser.attr(basedOnEl, 'val') : undefined,
240
+ }
241
+
242
+ // 解析段落属性
243
+ const pPr = xmlParser.element(el, 'pPr')
244
+ if (pPr) {
245
+ style.paragraphProps = this.parseParagraphProperties(pPr)
246
+ }
247
+
248
+ // 解析文本属性
249
+ const rPr = xmlParser.element(el, 'rPr')
250
+ if (rPr) {
251
+ style.runProps = this.parseRunProperties(rPr)
252
+ }
253
+
254
+ styles.push(style)
255
+ }
256
+
257
+ return styles
258
+ }
259
+
260
+ /**
261
+ * 解析主题(word/theme/theme1.xml)
262
+ */
263
+ private async parseTheme(): Promise<ITheme | undefined> {
264
+ const content = await this.zip?.file(DOCX_PARTS.THEME)?.async('string')
265
+ if (!content) return undefined
266
+
267
+ try {
268
+ const doc = parseXmlString(content)
269
+ return parseTheme(doc.documentElement)
270
+ } catch (e) {
271
+ console.warn('主题解析失败:', e)
272
+ return undefined
273
+ }
274
+ }
275
+
276
+ /**
277
+ * 解析字体表(word/fontTable.xml)
278
+ */
279
+ private async parseFontTable(): Promise<IFontTable | undefined> {
280
+ const content = await this.zip?.file(DOCX_PARTS.FONT_TABLE)?.async('string')
281
+ if (!content) return undefined
282
+
283
+ try {
284
+ return parseFontTable(content)
285
+ } catch (e) {
286
+ console.warn('字体表解析失败:', e)
287
+ return undefined
288
+ }
289
+ }
290
+
291
+ /**
292
+ * 加载嵌入字体
293
+ */
294
+ private async loadEmbeddedFonts(): Promise<ILoadedEmbedFont[]> {
295
+ if (!this.fontTable || !this.zip) return []
296
+
297
+ // 检查是否有嵌入字体
298
+ const hasEmbedded = this.fontTable.fonts.some(f => f.embedFontRefs.length > 0)
299
+ if (!hasEmbedded) return []
300
+
301
+ try {
302
+ // 解析字体表的关系文件
303
+ const relsContent = await this.zip.file(DOCX_PARTS.FONT_TABLE_RELS)?.async('string')
304
+ if (!relsContent) {
305
+ console.warn('找不到字体表关系文件')
306
+ return []
307
+ }
308
+
309
+ const fontRels = parseFontRelationships(relsContent)
310
+
311
+ // 加载嵌入字体
312
+ return await loadEmbeddedFonts(this.zip, this.fontTable, fontRels, {
313
+ injectStyles: true,
314
+ })
315
+ } catch (e) {
316
+ console.warn('嵌入字体加载失败:', e)
317
+ return []
318
+ }
319
+ }
320
+
321
+ /**
322
+ * 解析脚注(word/footnotes.xml)
323
+ */
324
+ private async parseFootnotes(): Promise<Map<string, IFootnoteElement>> {
325
+ const content = await this.zip?.file(DOCX_PARTS.FOOTNOTES)?.async('string')
326
+ if (!content) return new Map()
327
+
328
+ try {
329
+ const doc = parseXmlString(content)
330
+ return this.parseNotes<IFootnoteElement>(doc.documentElement, 'footnote', DomType.Footnote)
331
+ } catch (e) {
332
+ console.warn('脚注解析失败:', e)
333
+ return new Map()
334
+ }
335
+ }
336
+
337
+ /**
338
+ * 解析尾注(word/endnotes.xml)
339
+ */
340
+ private async parseEndnotes(): Promise<Map<string, IEndnoteElement>> {
341
+ const content = await this.zip?.file(DOCX_PARTS.ENDNOTES)?.async('string')
342
+ if (!content) return new Map()
343
+
344
+ try {
345
+ const doc = parseXmlString(content)
346
+ return this.parseNotes<IEndnoteElement>(doc.documentElement, 'endnote', DomType.Endnote)
347
+ } catch (e) {
348
+ console.warn('尾注解析失败:', e)
349
+ return new Map()
350
+ }
351
+ }
352
+
353
+ /**
354
+ * 解析注释(脚注/尾注通用)
355
+ */
356
+ private parseNotes<T extends IFootnoteElement | IEndnoteElement>(
357
+ root: Element,
358
+ elemName: string,
359
+ domType: DomType
360
+ ): Map<string, T> {
361
+ const result = new Map<string, T>()
362
+
363
+ const noteElements = root.getElementsByTagNameNS(
364
+ 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
365
+ elemName
366
+ )
367
+
368
+ for (let i = 0; i < noteElements.length; i++) {
369
+ const el = noteElements[i]
370
+ const id = xmlParser.attr(el, 'id') || ''
371
+ const noteType = xmlParser.attr(el, 'type')
372
+
373
+ // 跳过分隔符类型的注释(separator, continuationSeparator)
374
+ if (noteType === 'separator' || noteType === 'continuationSeparator') {
375
+ continue
376
+ }
377
+
378
+ const note = {
379
+ type: domType,
380
+ id,
381
+ noteType,
382
+ children: this.parseChildren(el),
383
+ } as T
384
+
385
+ result.set(id, note)
386
+ }
387
+
388
+ return result
389
+ }
390
+
391
+ /**
392
+ * 解析编号(word/numbering.xml)
393
+ */
394
+ private async parseNumberings(): Promise<{
395
+ numberings: INumberingDefinition[]
396
+ abstractNumberings: IAbstractNumbering[]
397
+ numberingMap: Map<string, INumberingDefinition>
398
+ }> {
399
+ const content = await this.zip?.file(DOCX_PARTS.NUMBERING)?.async('string')
400
+ if (!content) {
401
+ return {
402
+ numberings: [],
403
+ abstractNumberings: [],
404
+ numberingMap: new Map(),
405
+ }
406
+ }
407
+
408
+ const doc = parseXmlString(content)
409
+ const root = doc.documentElement
410
+
411
+ // 存储抽象编号定义
412
+ const abstractNumberings: IAbstractNumbering[] = []
413
+ // 存储编号实例到抽象编号的映射
414
+ const numToAbstractMap: Map<string, string> = new Map()
415
+
416
+ // 第一遍:解析所有 abstractNum 和 num
417
+ for (const el of xmlParser.elements(root)) {
418
+ switch (el.localName) {
419
+ case 'abstractNum':
420
+ abstractNumberings.push(this.parseAbstractNumbering(el))
421
+ break
422
+ case 'num':
423
+ const numId = xmlParser.attr(el, 'numId') || ''
424
+ const abstractNumIdEl = xmlParser.element(el, 'abstractNumId')
425
+ if (abstractNumIdEl) {
426
+ const abstractNumId = xmlParser.attr(abstractNumIdEl, 'val') || ''
427
+ numToAbstractMap.set(numId, abstractNumId)
428
+ }
429
+ break
430
+ }
431
+ }
432
+
433
+ // 第二遍:根据映射生成最终的 numberings
434
+ const numberings: INumberingDefinition[] = []
435
+ const numberingMap = new Map<string, INumberingDefinition>()
436
+
437
+ for (const [numId, abstractNumId] of numToAbstractMap) {
438
+ const abstractNum = abstractNumberings.find(a => a.id === abstractNumId)
439
+ if (abstractNum) {
440
+ const numbering: INumberingDefinition = {
441
+ id: numId,
442
+ abstractNumId: abstractNumId,
443
+ levels: abstractNum.levels.map(level => ({ ...level })),
444
+ }
445
+ numberings.push(numbering)
446
+ numberingMap.set(numId, numbering)
447
+ }
448
+ }
449
+
450
+ return { numberings, abstractNumberings, numberingMap }
451
+ }
452
+
453
+ /**
454
+ * 解析抽象编号定义
455
+ */
456
+ private parseAbstractNumbering(el: Element): IAbstractNumbering {
457
+ const result: IAbstractNumbering = {
458
+ id: xmlParser.attr(el, 'abstractNumId') || '',
459
+ levels: [],
460
+ }
461
+
462
+ for (const child of xmlParser.elements(el)) {
463
+ switch (child.localName) {
464
+ case 'name':
465
+ result.name = xmlParser.attr(child, 'val')
466
+ break
467
+ case 'multiLevelType':
468
+ result.multiLevelType = xmlParser.attr(child, 'val')
469
+ break
470
+ case 'numStyleLink':
471
+ result.numberingStyleLink = xmlParser.attr(child, 'val')
472
+ break
473
+ case 'styleLink':
474
+ result.styleLink = xmlParser.attr(child, 'val')
475
+ break
476
+ case 'lvl':
477
+ result.levels.push(this.parseNumberingLevel(child))
478
+ break
479
+ }
480
+ }
481
+
482
+ return result
483
+ }
484
+
485
+ /**
486
+ * 解析编号级别
487
+ */
488
+ private parseNumberingLevel(el: Element): INumberingLevel {
489
+ const result: INumberingLevel = {
490
+ level: xmlParser.intAttr(el, 'ilvl') ?? 0,
491
+ format: 'decimal',
492
+ text: '',
493
+ start: 1,
494
+ suffix: 'tab',
495
+ }
496
+
497
+ for (const child of xmlParser.elements(el)) {
498
+ switch (child.localName) {
499
+ case 'start':
500
+ result.start = xmlParser.intAttr(child, 'val') ?? 1
501
+ break
502
+ case 'numFmt':
503
+ result.format = xmlParser.attr(child, 'val') || 'decimal'
504
+ break
505
+ case 'lvlText':
506
+ result.text = xmlParser.attr(child, 'val') || ''
507
+ break
508
+ case 'suff':
509
+ result.suffix = xmlParser.attr(child, 'val') || 'tab'
510
+ break
511
+ case 'pStyle':
512
+ result.pStyleName = xmlParser.attr(child, 'val')
513
+ break
514
+ case 'pPr':
515
+ result.paragraphProps = this.parseParagraphProperties(child)
516
+ break
517
+ case 'rPr':
518
+ result.runProps = this.parseRunProperties(child)
519
+ break
520
+ }
521
+ }
522
+
523
+ return result
524
+ }
525
+
526
+ /**
527
+ * 解析评论
528
+ */
529
+ private async parseComments(): Promise<ICommentElement[]> {
530
+ const content = await this.zip?.file(DOCX_PARTS.COMMENTS)?.async('string')
531
+ if (!content) return []
532
+
533
+ const doc = parseXmlString(content)
534
+ const comments: ICommentElement[] = []
535
+
536
+ const commentElements = doc.getElementsByTagNameNS(
537
+ 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
538
+ 'comment'
539
+ )
540
+
541
+ // Word 2010 命名空间(用于 paraId)
542
+ const W14_NS = 'http://schemas.microsoft.com/office/word/2010/wordml'
543
+
544
+ for (let i = 0; i < commentElements.length; i++) {
545
+ const el = commentElements[i]
546
+
547
+ // 获取评论的原始文本内容(作为备用)
548
+ const rawText = el.textContent?.trim() || ''
549
+
550
+ // 获取 paraId(w14:paraId 属性)
551
+ // paraId 用于关联 commentsExtended.xml 中的回复链信息
552
+ let paraId = el.getAttributeNS(W14_NS, 'paraId')
553
+ if (!paraId) {
554
+ // 尝试不带命名空间获取
555
+ paraId = el.getAttribute('w14:paraId') || undefined
556
+ }
557
+
558
+ const comment: ICommentElement = {
559
+ type: DomType.Comment,
560
+ id: xmlParser.attr(el, 'id') || '',
561
+ author: xmlParser.attr(el, 'author') || '未知',
562
+ date: xmlParser.attr(el, 'date') || new Date().toISOString(),
563
+ initials: xmlParser.attr(el, 'initials'),
564
+ children: this.parseChildren(el),
565
+ rawText,
566
+ paraId: paraId || undefined,
567
+ }
568
+ comments.push(comment)
569
+ }
570
+
571
+ console.log('[DEBUG] parseComments: found', comments.length, 'comments')
572
+
573
+ return comments
574
+ }
575
+
576
+ /**
577
+ * 解析扩展评论(word/commentsExtended.xml)
578
+ * 包含评论的父子关系信息
579
+ */
580
+ private async parseCommentsExtended(): Promise<Map<string, ICommentExtended>> {
581
+ const content = await this.zip?.file(DOCX_PARTS.COMMENTS_EXTENDED)?.async('string')
582
+ if (!content) {
583
+ console.log('[DEBUG] commentsExtended.xml not found')
584
+ return new Map()
585
+ }
586
+
587
+ console.log('[DEBUG] commentsExtended.xml found, length:', content.length)
588
+ return parseCommentsExtended(content)
589
+ }
590
+
591
+ /**
592
+ * 解析文档主体
593
+ */
594
+ private async parseDocument(): Promise<IDocumentElement> {
595
+ const content = await this.zip?.file(DOCX_PARTS.DOCUMENT)?.async('string')
596
+ if (!content) {
597
+ return { type: DomType.Document, children: [] }
598
+ }
599
+
600
+ const doc = parseXmlString(content)
601
+ const bodyEl = doc.getElementsByTagNameNS(
602
+ 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
603
+ 'body'
604
+ )[0]
605
+
606
+ if (!bodyEl) {
607
+ return { type: DomType.Document, children: [] }
608
+ }
609
+
610
+ // 解析文档级别的 sectPr(默认 Section 属性)
611
+ const sectPrEl = xmlParser.element(bodyEl, 'sectPr')
612
+ const sectionProps = sectPrEl ? this.parseSectionProperties(sectPrEl) : undefined
613
+
614
+ // 解析背景
615
+ const documentEl = doc.getElementsByTagNameNS(
616
+ 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
617
+ 'document'
618
+ )[0]
619
+ const backgroundEl = documentEl ? xmlParser.element(documentEl, 'background') : null
620
+ const background = backgroundEl ? this.parseBackground(backgroundEl) : undefined
621
+
622
+ return {
623
+ type: DomType.Document,
624
+ children: this.parseChildren(bodyEl),
625
+ sectionProps,
626
+ background,
627
+ }
628
+ }
629
+
630
+ /**
631
+ * 解析页眉页脚
632
+ */
633
+ private async parseHeadersFooters(type: 'header' | 'footer'): Promise<Map<string, IHeaderElement | IFooterElement>> {
634
+ const result = new Map<string, IHeaderElement | IFooterElement>()
635
+ const relType = type === 'header' ? RELATIONSHIP_TYPES.HEADER : RELATIONSHIP_TYPES.FOOTER
636
+
637
+ for (const rel of this.relationships) {
638
+ if (rel.type === relType) {
639
+ const path = `word/${rel.target}`
640
+ const content = await this.zip?.file(path)?.async('string')
641
+
642
+ if (content) {
643
+ const doc = parseXmlString(content)
644
+ const rootEl = doc.getElementsByTagNameNS(
645
+ 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
646
+ type === 'header' ? 'hdr' : 'ftr'
647
+ )[0]
648
+
649
+ if (rootEl) {
650
+ const element: IHeaderElement | IFooterElement = {
651
+ type: type === 'header' ? DomType.Header : DomType.Footer,
652
+ children: this.parseChildren(rootEl),
653
+ }
654
+ result.set(rel.id, element)
655
+ }
656
+ }
657
+ }
658
+ }
659
+
660
+ return result
661
+ }
662
+
663
+ /**
664
+ * 解析背景
665
+ */
666
+ private parseBackground(el: Element): Record<string, string> {
667
+ const style: Record<string, string> = {}
668
+
669
+ const color = xmlParser.attr(el, 'color')
670
+ if (color && color !== 'auto') {
671
+ style['background-color'] = `#${color}`
672
+ }
673
+
674
+ return style
675
+ }
676
+
677
+ /**
678
+ * 解析 Section 属性
679
+ */
680
+ private parseSectionProperties(el: Element): ISectionProperties {
681
+ const props: ISectionProperties = {}
682
+
683
+ for (const child of xmlParser.elements(el)) {
684
+ switch (child.localName) {
685
+ case 'pgSz':
686
+ props.pageSize = this.parsePageSize(child)
687
+ break
688
+ case 'pgMar':
689
+ props.pageMargins = this.parsePageMargins(child)
690
+ break
691
+ case 'type':
692
+ props.type = xmlParser.attr(child, 'val') || 'nextPage'
693
+ break
694
+ case 'cols':
695
+ props.columns = this.parseColumns(child)
696
+ break
697
+ case 'pgBorders':
698
+ props.pageBorders = this.parseBorders(child)
699
+ break
700
+ case 'pgNumType':
701
+ props.pageNumber = {
702
+ start: xmlParser.intAttr(child, 'start'),
703
+ format: xmlParser.attr(child, 'fmt'),
704
+ chapSep: xmlParser.attr(child, 'chapSep'),
705
+ chapStyle: xmlParser.attr(child, 'chapStyle'),
706
+ }
707
+ break
708
+ case 'headerReference':
709
+ if (!props.headerRefs) props.headerRefs = []
710
+ props.headerRefs.push({
711
+ id: xmlParser.attr(child, 'id') || '',
712
+ type: (xmlParser.attr(child, 'type') || 'default') as 'default' | 'first' | 'even',
713
+ })
714
+ break
715
+ case 'footerReference':
716
+ if (!props.footerRefs) props.footerRefs = []
717
+ props.footerRefs.push({
718
+ id: xmlParser.attr(child, 'id') || '',
719
+ type: (xmlParser.attr(child, 'type') || 'default') as 'default' | 'first' | 'even',
720
+ })
721
+ break
722
+ case 'titlePg':
723
+ props.titlePage = xmlParser.boolAttr(child, 'val') !== false
724
+ break
725
+ }
726
+ }
727
+
728
+ return props
729
+ }
730
+
731
+ /**
732
+ * 解析页面尺寸
733
+ */
734
+ private parsePageSize(el: Element): IPageSize {
735
+ return {
736
+ width: xmlParser.lengthAttr(el, 'w'),
737
+ height: xmlParser.lengthAttr(el, 'h'),
738
+ orientation: xmlParser.attr(el, 'orient') as 'portrait' | 'landscape' | undefined,
739
+ }
740
+ }
741
+
742
+ /**
743
+ * 解析页边距
744
+ */
745
+ private parsePageMargins(el: Element): IPageMargins {
746
+ return {
747
+ top: xmlParser.lengthAttr(el, 'top'),
748
+ right: xmlParser.lengthAttr(el, 'right'),
749
+ bottom: xmlParser.lengthAttr(el, 'bottom'),
750
+ left: xmlParser.lengthAttr(el, 'left'),
751
+ header: xmlParser.lengthAttr(el, 'header'),
752
+ footer: xmlParser.lengthAttr(el, 'footer'),
753
+ gutter: xmlParser.lengthAttr(el, 'gutter'),
754
+ }
755
+ }
756
+
757
+ /**
758
+ * 解析分栏
759
+ */
760
+ private parseColumns(el: Element): IColumns {
761
+ const columns: IColumns = {
762
+ numberOfColumns: xmlParser.intAttr(el, 'num'),
763
+ space: xmlParser.lengthAttr(el, 'space'),
764
+ separator: xmlParser.boolAttr(el, 'sep'),
765
+ equalWidth: xmlParser.boolAttr(el, 'equalWidth') !== false,
766
+ columns: [],
767
+ }
768
+
769
+ for (const child of xmlParser.elements(el, 'col')) {
770
+ columns.columns!.push({
771
+ width: xmlParser.lengthAttr(child, 'w'),
772
+ space: xmlParser.lengthAttr(child, 'space'),
773
+ })
774
+ }
775
+
776
+ return columns
777
+ }
778
+
779
+ /**
780
+ * 解析边框
781
+ */
782
+ private parseBorders(el: Element): IBorders {
783
+ const borders: IBorders = {}
784
+
785
+ for (const child of xmlParser.elements(el)) {
786
+ const border = this.parseBorder(child)
787
+ switch (child.localName) {
788
+ case 'top':
789
+ borders.top = border
790
+ break
791
+ case 'bottom':
792
+ borders.bottom = border
793
+ break
794
+ case 'left':
795
+ case 'start':
796
+ borders.left = border
797
+ break
798
+ case 'right':
799
+ case 'end':
800
+ borders.right = border
801
+ break
802
+ }
803
+ }
804
+
805
+ return borders
806
+ }
807
+
808
+ /**
809
+ * 解析单个边框
810
+ */
811
+ private parseBorder(el: Element): IBorder {
812
+ const color = xmlParser.attr(el, 'color')
813
+ return {
814
+ style: xmlParser.attr(el, 'val'),
815
+ width: xmlParser.lengthAttr(el, 'sz', LengthUsage.Border),
816
+ color: color && color !== 'auto' ? `#${color}` : undefined,
817
+ }
818
+ }
819
+
820
+ /**
821
+ * 解析子元素
822
+ */
823
+ private parseChildren(parent: Element, debug = false): IOpenXmlElement[] {
824
+ const children: IOpenXmlElement[] = []
825
+
826
+ for (const el of xmlParser.elements(parent)) {
827
+ const child = this.parseElement(el)
828
+ if (child) {
829
+ children.push(child)
830
+ }
831
+ }
832
+
833
+ if (debug && children.length === 0) {
834
+ // 只在解析失败时输出调试信息
835
+ console.log('[DEBUG] parseChildren: no children parsed from', parent.localName,
836
+ 'childNodes:', parent.childNodes.length,
837
+ 'elements:', xmlParser.elements(parent).map(e => e.localName))
838
+ }
839
+
840
+ return children
841
+ }
842
+
843
+ /**
844
+ * 解析单个元素
845
+ */
846
+ private parseElement(el: Element): IOpenXmlElement | null {
847
+ const localName = el.localName
848
+
849
+ switch (localName) {
850
+ case 'p':
851
+ return this.parseParagraph(el)
852
+ case 'r':
853
+ return this.parseRun(el)
854
+ case 't':
855
+ return this.parseText(el)
856
+ case 'br':
857
+ return this.parseBreak(el)
858
+ case 'tab':
859
+ return this.parseTab()
860
+ case 'sym':
861
+ return this.parseSymbol(el)
862
+ case 'lastRenderedPageBreak':
863
+ return { type: DomType.Break, breakType: 'lastRenderedPageBreak' } as IBreakElement
864
+ case 'fldSimple':
865
+ return this.parseSimpleField(el)
866
+ case 'fldChar':
867
+ return this.parseComplexField(el)
868
+ case 'instrText':
869
+ return this.parseFieldInstruction(el)
870
+ case 'tbl':
871
+ return this.parseTable(el)
872
+ case 'tr':
873
+ return this.parseTableRow(el)
874
+ case 'tc':
875
+ return this.parseTableCell(el)
876
+ case 'hyperlink':
877
+ return this.parseHyperlink(el)
878
+ case 'drawing':
879
+ return this.parseDrawing(el)
880
+ case 'commentRangeStart':
881
+ return this.parseCommentRangeStart(el)
882
+ case 'commentRangeEnd':
883
+ return this.parseCommentRangeEnd(el)
884
+ case 'commentReference':
885
+ return this.parseCommentReference(el)
886
+ case 'footnoteReference':
887
+ return this.parseFootnoteReference(el)
888
+ case 'endnoteReference':
889
+ return this.parseEndnoteReference(el)
890
+ case 'bookmarkStart':
891
+ return this.parseBookmarkStart(el)
892
+ case 'bookmarkEnd':
893
+ return this.parseBookmarkEnd(el)
894
+ default:
895
+ // 对于未知元素,尝试解析其子元素
896
+ console.log('[DEBUG] parseElement default branch for:', localName)
897
+ const children = this.parseChildren(el)
898
+ if (children.length > 0) {
899
+ console.log('[DEBUG] -> found children:', children.length)
900
+ return children.length === 1 ? children[0] : {
901
+ type: DomType.Run,
902
+ children,
903
+ } as IRunElement
904
+ }
905
+ // 如果没有子元素但有文本内容,创建一个文本元素
906
+ const textContent = el.textContent?.trim()
907
+ if (textContent) {
908
+ console.log('[DEBUG] -> using textContent:', textContent.substring(0, 50))
909
+ return {
910
+ type: DomType.Text,
911
+ text: textContent,
912
+ } as ITextElement
913
+ }
914
+ return null
915
+ }
916
+ }
917
+
918
+ /**
919
+ * 解析段落
920
+ */
921
+ private parseParagraph(el: Element): IParagraphElement {
922
+ const pPr = xmlParser.element(el, 'pPr')
923
+
924
+ return {
925
+ type: DomType.Paragraph,
926
+ props: pPr ? this.parseParagraphProperties(pPr) : undefined,
927
+ children: this.parseChildren(el).filter(c => c.type !== DomType.Paragraph),
928
+ }
929
+ }
930
+
931
+ /**
932
+ * 解析段落属性
933
+ */
934
+ private parseParagraphProperties(el: Element): IParagraphProperties {
935
+ const props: IParagraphProperties = {}
936
+
937
+ // 样式 ID
938
+ const pStyleEl = xmlParser.element(el, 'pStyle')
939
+ if (pStyleEl) {
940
+ props.styleId = xmlParser.attr(pStyleEl, 'val')
941
+ }
942
+
943
+ // 对齐方式
944
+ const jcEl = xmlParser.element(el, 'jc')
945
+ if (jcEl) {
946
+ const val = xmlParser.attr(jcEl, 'val')
947
+ if (val === 'left' || val === 'center' || val === 'right' || val === 'both') {
948
+ props.justification = val
949
+ }
950
+ }
951
+
952
+ // 缩进
953
+ const indEl = xmlParser.element(el, 'ind')
954
+ if (indEl) {
955
+ props.indentation = {
956
+ left: xmlParser.lengthAttr(indEl, 'left'),
957
+ right: xmlParser.lengthAttr(indEl, 'right'),
958
+ firstLine: xmlParser.lengthAttr(indEl, 'firstLine'),
959
+ hanging: xmlParser.lengthAttr(indEl, 'hanging'),
960
+ }
961
+ }
962
+
963
+ // 间距
964
+ const spacingEl = xmlParser.element(el, 'spacing')
965
+ if (spacingEl) {
966
+ props.spacing = {
967
+ before: xmlParser.lengthAttr(spacingEl, 'before'),
968
+ after: xmlParser.lengthAttr(spacingEl, 'after'),
969
+ // line 保存原始数值(twip),由渲染器根据 lineRule 计算
970
+ line: xmlParser.intAttr(spacingEl, 'line'),
971
+ lineRule: xmlParser.attr(spacingEl, 'lineRule') as 'auto' | 'atLeast' | 'exact',
972
+ }
973
+ }
974
+
975
+ // 段前分页
976
+ const pageBreakBeforeEl = xmlParser.element(el, 'pageBreakBefore')
977
+ if (pageBreakBeforeEl) {
978
+ props.pageBreakBefore = xmlParser.boolAttr(pageBreakBeforeEl, 'val') !== false
979
+ }
980
+
981
+ // 段落边框
982
+ const pBdrEl = xmlParser.element(el, 'pBdr')
983
+ if (pBdrEl) {
984
+ props.borders = this.parseBorders(pBdrEl)
985
+ }
986
+
987
+ // 段落内分节符
988
+ const sectPrEl = xmlParser.element(el, 'sectPr')
989
+ if (sectPrEl) {
990
+ props.sectionProps = this.parseSectionProperties(sectPrEl)
991
+ }
992
+
993
+ // 编号属性
994
+ const numPrEl = xmlParser.element(el, 'numPr')
995
+ if (numPrEl) {
996
+ props.numbering = this.parseParagraphNumbering(numPrEl)
997
+ }
998
+
999
+ return props
1000
+ }
1001
+
1002
+ /**
1003
+ * 解析段落编号引用
1004
+ */
1005
+ private parseParagraphNumbering(el: Element): IParagraphNumbering | undefined {
1006
+ const result: IParagraphNumbering = {
1007
+ id: '',
1008
+ level: 0,
1009
+ }
1010
+
1011
+ for (const child of xmlParser.elements(el)) {
1012
+ switch (child.localName) {
1013
+ case 'numId':
1014
+ result.id = xmlParser.attr(child, 'val') || ''
1015
+ break
1016
+ case 'ilvl':
1017
+ result.level = xmlParser.intAttr(child, 'val') ?? 0
1018
+ break
1019
+ }
1020
+ }
1021
+
1022
+ // 如果没有 numId,返回 undefined
1023
+ if (!result.id) {
1024
+ return undefined
1025
+ }
1026
+
1027
+ return result
1028
+ }
1029
+
1030
+ /**
1031
+ * 解析 Run
1032
+ */
1033
+ private parseRun(el: Element): IRunElement {
1034
+ const rPr = xmlParser.element(el, 'rPr')
1035
+
1036
+ return {
1037
+ type: DomType.Run,
1038
+ props: rPr ? this.parseRunProperties(rPr) : undefined,
1039
+ children: this.parseChildren(el),
1040
+ }
1041
+ }
1042
+
1043
+ /**
1044
+ * 解析 Run 属性
1045
+ */
1046
+ private parseRunProperties(el: Element): IRunProperties {
1047
+ const props: IRunProperties = {}
1048
+
1049
+ // 样式 ID
1050
+ const rStyleEl = xmlParser.element(el, 'rStyle')
1051
+ if (rStyleEl) {
1052
+ props.styleId = xmlParser.attr(rStyleEl, 'val')
1053
+ }
1054
+
1055
+ // 粗体
1056
+ const bEl = xmlParser.element(el, 'b')
1057
+ if (bEl) {
1058
+ props.bold = xmlParser.attr(bEl, 'val') !== '0'
1059
+ }
1060
+
1061
+ // 斜体
1062
+ const iEl = xmlParser.element(el, 'i')
1063
+ if (iEl) {
1064
+ props.italic = xmlParser.attr(iEl, 'val') !== '0'
1065
+ }
1066
+
1067
+ // 下划线
1068
+ const uEl = xmlParser.element(el, 'u')
1069
+ if (uEl) {
1070
+ props.underline = xmlParser.attr(uEl, 'val') || 'single'
1071
+ }
1072
+
1073
+ // 删除线
1074
+ const strikeEl = xmlParser.element(el, 'strike')
1075
+ if (strikeEl) {
1076
+ props.strike = xmlParser.attr(strikeEl, 'val') !== '0'
1077
+ }
1078
+
1079
+ // 双删除线
1080
+ const dstrikeEl = xmlParser.element(el, 'dstrike')
1081
+ if (dstrikeEl) {
1082
+ props.dstrike = xmlParser.attr(dstrikeEl, 'val') !== '0'
1083
+ }
1084
+
1085
+ // 上标/下标
1086
+ const vertAlignEl = xmlParser.element(el, 'vertAlign')
1087
+ if (vertAlignEl) {
1088
+ const val = xmlParser.attr(vertAlignEl, 'val')
1089
+ if (val === 'superscript' || val === 'subscript') {
1090
+ props.vertAlign = val
1091
+ }
1092
+ }
1093
+
1094
+ // 颜色(支持主题颜色)
1095
+ const colorEl = xmlParser.element(el, 'color')
1096
+ if (colorEl) {
1097
+ const val = xmlParser.attr(colorEl, 'val')
1098
+ const themeColor = xmlParser.attr(colorEl, 'themeColor')
1099
+
1100
+ if (themeColor) {
1101
+ // 主题颜色引用
1102
+ const themeTint = xmlParser.attr(colorEl, 'themeTint')
1103
+ const themeShade = xmlParser.attr(colorEl, 'themeShade')
1104
+
1105
+ props.themeColor = {
1106
+ themeColor,
1107
+ themeTint: themeTint ? parseInt(themeTint, 16) : undefined,
1108
+ themeShade: themeShade ? parseInt(themeShade, 16) : undefined,
1109
+ }
1110
+
1111
+ // 立即解析主题颜色为实际值
1112
+ if (this.theme) {
1113
+ const resolvedColor = resolveThemeColor(this.theme, props.themeColor)
1114
+ if (resolvedColor) {
1115
+ props.color = resolvedColor
1116
+ }
1117
+ }
1118
+ } else if (val && val !== 'auto') {
1119
+ props.color = `#${val}`
1120
+ }
1121
+ }
1122
+
1123
+ // 字号
1124
+ const szEl = xmlParser.element(el, 'sz')
1125
+ if (szEl) {
1126
+ props.fontSize = xmlParser.lengthAttr(szEl, 'val', LengthUsage.FontSize)
1127
+ }
1128
+
1129
+ // 字体(支持主题字体)
1130
+ const fontsEl = xmlParser.element(el, 'rFonts')
1131
+ if (fontsEl) {
1132
+ // 检查主题字体引用
1133
+ const asciiTheme = xmlParser.attr(fontsEl, 'asciiTheme')
1134
+ const eastAsiaTheme = xmlParser.attr(fontsEl, 'eastAsiaTheme')
1135
+
1136
+ if (asciiTheme || eastAsiaTheme) {
1137
+ // 主题字体:majorHAnsi/minorHAnsi/majorEastAsia/minorEastAsia 等
1138
+ const themeRef = asciiTheme || eastAsiaTheme || ''
1139
+ if (themeRef.startsWith('major')) {
1140
+ props.themeFontFamily = 'major'
1141
+ // 解析主题字体
1142
+ if (this.theme?.fontScheme?.majorFont) {
1143
+ const font = themeRef.includes('EastAsia')
1144
+ ? this.theme.fontScheme.majorFont.ea
1145
+ : this.theme.fontScheme.majorFont.latin
1146
+ if (font) props.fontFamily = font
1147
+ }
1148
+ } else if (themeRef.startsWith('minor')) {
1149
+ props.themeFontFamily = 'minor'
1150
+ // 解析主题字体
1151
+ if (this.theme?.fontScheme?.minorFont) {
1152
+ const font = themeRef.includes('EastAsia')
1153
+ ? this.theme.fontScheme.minorFont.ea
1154
+ : this.theme.fontScheme.minorFont.latin
1155
+ if (font) props.fontFamily = font
1156
+ }
1157
+ }
1158
+ }
1159
+
1160
+ // 如果没有主题字体或解析失败,使用直接指定的字体
1161
+ if (!props.fontFamily) {
1162
+ props.fontFamily = xmlParser.attr(fontsEl, 'ascii') ||
1163
+ xmlParser.attr(fontsEl, 'eastAsia') ||
1164
+ xmlParser.attr(fontsEl, 'hAnsi')
1165
+ }
1166
+ }
1167
+
1168
+ // 高亮
1169
+ const highlightEl = xmlParser.element(el, 'highlight')
1170
+ if (highlightEl) {
1171
+ props.highlight = xmlParser.attr(highlightEl, 'val')
1172
+ }
1173
+
1174
+ return props
1175
+ }
1176
+
1177
+ /**
1178
+ * 解析文本
1179
+ */
1180
+ private parseText(el: Element): ITextElement {
1181
+ return {
1182
+ type: DomType.Text,
1183
+ text: el.textContent || '',
1184
+ }
1185
+ }
1186
+
1187
+ /**
1188
+ * 解析换行
1189
+ */
1190
+ private parseBreak(el: Element): IBreakElement {
1191
+ const breakType = xmlParser.attr(el, 'type')
1192
+ return {
1193
+ type: DomType.Break,
1194
+ breakType: (breakType || 'textWrapping') as IBreakElement['breakType'],
1195
+ }
1196
+ }
1197
+
1198
+ /**
1199
+ * 解析 Tab
1200
+ */
1201
+ private parseTab(): ITabElement {
1202
+ return {
1203
+ type: DomType.Tab,
1204
+ }
1205
+ }
1206
+
1207
+ /**
1208
+ * 解析符号字符 - <w:sym w:font="Symbol" w:char="F0B7"/>
1209
+ * Symbol 字体中的特殊字符,如箭头、符号等
1210
+ */
1211
+ private parseSymbol(el: Element): ISymbolElement {
1212
+ const font = xmlParser.attr(el, 'font')
1213
+ const charCode = xmlParser.attr(el, 'char')
1214
+
1215
+ // 将十六进制字符码转换为实际字符
1216
+ let char: string | undefined
1217
+ if (charCode) {
1218
+ const code = parseInt(charCode, 16)
1219
+ if (!isNaN(code)) {
1220
+ char = String.fromCharCode(code)
1221
+ }
1222
+ }
1223
+
1224
+ return {
1225
+ type: DomType.Symbol,
1226
+ font,
1227
+ char,
1228
+ }
1229
+ }
1230
+
1231
+ /**
1232
+ * 解析简单域 - <w:fldSimple w:instr="PAGE">...</w:fldSimple>
1233
+ */
1234
+ private parseSimpleField(el: Element): ISimpleFieldElement {
1235
+ return {
1236
+ type: DomType.SimpleField,
1237
+ instruction: xmlParser.attr(el, 'instr') || '',
1238
+ children: this.parseChildren(el),
1239
+ }
1240
+ }
1241
+
1242
+ /**
1243
+ * 解析复杂域字符 - <w:fldChar w:fldCharType="begin"/>
1244
+ */
1245
+ private parseComplexField(el: Element): IComplexFieldElement {
1246
+ return {
1247
+ type: DomType.ComplexField,
1248
+ charType: xmlParser.attr(el, 'fldCharType') || '',
1249
+ }
1250
+ }
1251
+
1252
+ /**
1253
+ * 解析域指令 - <w:instrText>PAGE</w:instrText>
1254
+ */
1255
+ private parseFieldInstruction(el: Element): IFieldInstructionElement {
1256
+ return {
1257
+ type: DomType.FieldInstruction,
1258
+ text: el.textContent || '',
1259
+ }
1260
+ }
1261
+
1262
+ /**
1263
+ * 解析表格
1264
+ */
1265
+ private parseTable(el: Element): ITableElement {
1266
+ const table: ITableElement = {
1267
+ type: DomType.Table,
1268
+ children: [],
1269
+ }
1270
+
1271
+ for (const child of xmlParser.elements(el)) {
1272
+ switch (child.localName) {
1273
+ case 'tr':
1274
+ table.children.push(this.parseTableRow(child))
1275
+ break
1276
+ case 'tblGrid':
1277
+ table.columns = this.parseTableGrid(child)
1278
+ break
1279
+ case 'tblPr':
1280
+ table.props = this.parseTableProperties(child)
1281
+ break
1282
+ }
1283
+ }
1284
+
1285
+ return table
1286
+ }
1287
+
1288
+ /**
1289
+ * 解析表格列宽度
1290
+ */
1291
+ private parseTableGrid(el: Element): { width?: string }[] {
1292
+ const columns: { width?: string }[] = []
1293
+
1294
+ for (const child of xmlParser.elements(el)) {
1295
+ if (child.localName === 'gridCol') {
1296
+ columns.push({
1297
+ width: xmlParser.lengthAttr(child, 'w'),
1298
+ })
1299
+ }
1300
+ }
1301
+
1302
+ return columns
1303
+ }
1304
+
1305
+ /**
1306
+ * 解析表格属性
1307
+ */
1308
+ private parseTableProperties(el: Element): ITableProperties {
1309
+ const props: ITableProperties = {}
1310
+
1311
+ for (const child of xmlParser.elements(el)) {
1312
+ switch (child.localName) {
1313
+ case 'tblW':
1314
+ props.width = xmlParser.lengthAttr(child, 'w')
1315
+ const widthType = xmlParser.attr(child, 'type')
1316
+ if (widthType === 'auto' || widthType === 'dxa' || widthType === 'pct') {
1317
+ props.widthType = widthType
1318
+ }
1319
+ break
1320
+ case 'jc':
1321
+ props.justification = xmlParser.attr(child, 'val')
1322
+ break
1323
+ case 'tblBorders':
1324
+ props.borders = this.parseTableBorders(child)
1325
+ break
1326
+ case 'tblCellSpacing':
1327
+ props.cellSpacing = xmlParser.lengthAttr(child, 'w')
1328
+ break
1329
+ case 'tblCellMar':
1330
+ props.cellMargin = {
1331
+ top: xmlParser.lengthAttr(xmlParser.element(child, 'top'), 'w'),
1332
+ bottom: xmlParser.lengthAttr(xmlParser.element(child, 'bottom'), 'w'),
1333
+ left: xmlParser.lengthAttr(xmlParser.element(child, 'left'), 'w') ||
1334
+ xmlParser.lengthAttr(xmlParser.element(child, 'start'), 'w'),
1335
+ right: xmlParser.lengthAttr(xmlParser.element(child, 'right'), 'w') ||
1336
+ xmlParser.lengthAttr(xmlParser.element(child, 'end'), 'w'),
1337
+ }
1338
+ break
1339
+ }
1340
+ }
1341
+
1342
+ return props
1343
+ }
1344
+
1345
+ /**
1346
+ * 解析表格边框(包含内部边框 insideH/insideV)
1347
+ */
1348
+ private parseTableBorders(el: Element): IBorders {
1349
+ const borders: IBorders = {}
1350
+
1351
+ for (const child of xmlParser.elements(el)) {
1352
+ const border = this.parseBorder(child)
1353
+ switch (child.localName) {
1354
+ case 'top':
1355
+ borders.top = border
1356
+ break
1357
+ case 'bottom':
1358
+ borders.bottom = border
1359
+ break
1360
+ case 'left':
1361
+ case 'start':
1362
+ borders.left = border
1363
+ break
1364
+ case 'right':
1365
+ case 'end':
1366
+ borders.right = border
1367
+ break
1368
+ case 'insideH':
1369
+ borders.insideH = border
1370
+ break
1371
+ case 'insideV':
1372
+ borders.insideV = border
1373
+ break
1374
+ }
1375
+ }
1376
+
1377
+ return borders
1378
+ }
1379
+
1380
+ /**
1381
+ * 解析表格行
1382
+ */
1383
+ private parseTableRow(el: Element): ITableRowElement {
1384
+ return {
1385
+ type: DomType.TableRow,
1386
+ children: xmlParser.elements(el)
1387
+ .filter(c => c.localName === 'tc')
1388
+ .map(c => this.parseTableCell(c)),
1389
+ }
1390
+ }
1391
+
1392
+ /**
1393
+ * 解析表格单元格
1394
+ */
1395
+ private parseTableCell(el: Element): ITableCellElement {
1396
+ const cell: ITableCellElement = {
1397
+ type: DomType.TableCell,
1398
+ children: [],
1399
+ }
1400
+
1401
+ for (const child of xmlParser.elements(el)) {
1402
+ if (child.localName === 'tcPr') {
1403
+ cell.props = this.parseTableCellProperties(child)
1404
+ } else {
1405
+ const parsed = this.parseElement(child)
1406
+ if (parsed) {
1407
+ cell.children.push(parsed)
1408
+ }
1409
+ }
1410
+ }
1411
+
1412
+ return cell
1413
+ }
1414
+
1415
+ /**
1416
+ * 解析表格单元格属性
1417
+ */
1418
+ private parseTableCellProperties(el: Element): ITableCellProperties {
1419
+ const props: ITableCellProperties = {}
1420
+
1421
+ for (const child of xmlParser.elements(el)) {
1422
+ switch (child.localName) {
1423
+ case 'tcW':
1424
+ props.width = xmlParser.lengthAttr(child, 'w')
1425
+ break
1426
+ case 'gridSpan':
1427
+ props.gridSpan = xmlParser.intAttr(child, 'val')
1428
+ break
1429
+ case 'vMerge':
1430
+ const val = xmlParser.attr(child, 'val')
1431
+ props.verticalMerge = val === 'restart' ? 'restart' : 'continue'
1432
+ break
1433
+ case 'vAlign':
1434
+ props.verticalAlign = xmlParser.attr(child, 'val')
1435
+ break
1436
+ case 'shd':
1437
+ const fill = xmlParser.attr(child, 'fill')
1438
+ if (fill && fill !== 'auto') {
1439
+ props.shading = `#${fill}`
1440
+ }
1441
+ break
1442
+ case 'tcBorders':
1443
+ props.borders = this.parseBorders(child)
1444
+ break
1445
+ }
1446
+ }
1447
+
1448
+ return props
1449
+ }
1450
+
1451
+ /**
1452
+ * 解析超链接
1453
+ */
1454
+ private parseHyperlink(el: Element): IHyperlinkElement {
1455
+ const rId = xmlParser.attr(el, 'id')
1456
+ let href: string | undefined
1457
+
1458
+ if (rId) {
1459
+ const rel = this.relationships.find(r => r.id === rId)
1460
+ if (rel) {
1461
+ href = rel.target
1462
+ }
1463
+ }
1464
+
1465
+ return {
1466
+ type: DomType.Hyperlink,
1467
+ href,
1468
+ anchor: xmlParser.attr(el, 'anchor'),
1469
+ children: this.parseChildren(el),
1470
+ }
1471
+ }
1472
+
1473
+ /**
1474
+ * 解析绘图(图片等)
1475
+ */
1476
+ private parseDrawing(el: Element): IDrawingElement {
1477
+ const children: IOpenXmlElement[] = []
1478
+
1479
+ // 查找 blip 元素(包含图片引用)
1480
+ const blipElements = el.getElementsByTagNameNS(
1481
+ 'http://schemas.openxmlformats.org/drawingml/2006/main',
1482
+ 'blip'
1483
+ )
1484
+
1485
+ for (let i = 0; i < blipElements.length; i++) {
1486
+ const blip = blipElements[i]
1487
+ const embedId = blip.getAttributeNS(
1488
+ 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
1489
+ 'embed'
1490
+ )
1491
+
1492
+ if (embedId && this.images.has(embedId)) {
1493
+ // 获取图片尺寸
1494
+ const extentEl = el.getElementsByTagNameNS(
1495
+ 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing',
1496
+ 'extent'
1497
+ )[0]
1498
+
1499
+ let width: string | undefined
1500
+ let height: string | undefined
1501
+
1502
+ if (extentEl) {
1503
+ const cx = extentEl.getAttribute('cx')
1504
+ const cy = extentEl.getAttribute('cy')
1505
+ if (cx) width = xmlParser.convertLength(cx, LengthUsage.Emu)
1506
+ if (cy) height = xmlParser.convertLength(cy, LengthUsage.Emu)
1507
+ }
1508
+
1509
+ children.push({
1510
+ type: DomType.Image,
1511
+ src: this.images.get(embedId)!,
1512
+ width,
1513
+ height,
1514
+ } as IImageElement)
1515
+ }
1516
+ }
1517
+
1518
+ return {
1519
+ type: DomType.Drawing,
1520
+ children,
1521
+ }
1522
+ }
1523
+
1524
+ /**
1525
+ * 解析评论范围开始
1526
+ */
1527
+ private parseCommentRangeStart(el: Element): ICommentRangeStart {
1528
+ return {
1529
+ type: DomType.CommentRangeStart,
1530
+ id: xmlParser.attr(el, 'id') || '',
1531
+ }
1532
+ }
1533
+
1534
+ /**
1535
+ * 解析评论范围结束
1536
+ */
1537
+ private parseCommentRangeEnd(el: Element): ICommentRangeEnd {
1538
+ return {
1539
+ type: DomType.CommentRangeEnd,
1540
+ id: xmlParser.attr(el, 'id') || '',
1541
+ }
1542
+ }
1543
+
1544
+ /**
1545
+ * 解析评论引用
1546
+ */
1547
+ private parseCommentReference(el: Element): ICommentReference {
1548
+ return {
1549
+ type: DomType.CommentReference,
1550
+ id: xmlParser.attr(el, 'id') || '',
1551
+ }
1552
+ }
1553
+
1554
+ /**
1555
+ * 解析脚注引用
1556
+ */
1557
+ private parseFootnoteReference(el: Element): IFootnoteReference {
1558
+ return {
1559
+ type: DomType.FootnoteReference,
1560
+ id: xmlParser.attr(el, 'id') || '',
1561
+ }
1562
+ }
1563
+
1564
+ /**
1565
+ * 解析尾注引用
1566
+ */
1567
+ private parseEndnoteReference(el: Element): IEndnoteReference {
1568
+ return {
1569
+ type: DomType.EndnoteReference,
1570
+ id: xmlParser.attr(el, 'id') || '',
1571
+ }
1572
+ }
1573
+
1574
+ /**
1575
+ * 解析书签开始
1576
+ * <w:bookmarkStart w:id="0" w:name="bookmark1"/>
1577
+ */
1578
+ private parseBookmarkStart(el: Element): IBookmarkStartElement {
1579
+ const bookmark: IBookmarkStartElement = {
1580
+ type: DomType.BookmarkStart,
1581
+ id: xmlParser.attr(el, 'id') || '',
1582
+ name: xmlParser.attr(el, 'name') || '',
1583
+ colFirst: xmlParser.intAttr(el, 'colFirst'),
1584
+ colLast: xmlParser.intAttr(el, 'colLast'),
1585
+ }
1586
+
1587
+ // 收集书签到 Map(以 name 为 key)
1588
+ // 注意:忽略以 _ 开头的内置书签(如 _GoBack)
1589
+ if (bookmark.name && !bookmark.name.startsWith('_')) {
1590
+ this.bookmarks.set(bookmark.name, bookmark)
1591
+ }
1592
+
1593
+ return bookmark
1594
+ }
1595
+
1596
+ /**
1597
+ * 解析书签结束
1598
+ * <w:bookmarkEnd w:id="0"/>
1599
+ */
1600
+ private parseBookmarkEnd(el: Element): IBookmarkEndElement {
1601
+ return {
1602
+ type: DomType.BookmarkEnd,
1603
+ id: xmlParser.attr(el, 'id') || '',
1604
+ }
1605
+ }
1606
+ }