@fiduswriter/document 0.1.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/LICENSE +661 -0
  2. package/README.md +16 -0
  3. package/jest.config.js +23 -0
  4. package/package.json +59 -0
  5. package/schema.json +1 -0
  6. package/scripts/export-schema.js +16 -0
  7. package/src/bibliography/common.js +92 -0
  8. package/src/bibliography/csl_bib.js +139 -0
  9. package/src/citations/citeproc_sys.js +42 -0
  10. package/src/citations/format.js +194 -0
  11. package/src/common/blob.js +10 -0
  12. package/src/common/file.js +25 -0
  13. package/src/common/index.js +12 -0
  14. package/src/common/network.js +79 -0
  15. package/src/common/text.js +44 -0
  16. package/src/editor/e2ee/encryptor.js +228 -0
  17. package/src/exporter/docx/citations.js +177 -0
  18. package/src/exporter/docx/comments.js +165 -0
  19. package/src/exporter/docx/footnotes.js +240 -0
  20. package/src/exporter/docx/images.js +101 -0
  21. package/src/exporter/docx/index.js +185 -0
  22. package/src/exporter/docx/lists.js +260 -0
  23. package/src/exporter/docx/math.js +46 -0
  24. package/src/exporter/docx/metadata.js +289 -0
  25. package/src/exporter/docx/rels.js +193 -0
  26. package/src/exporter/docx/render.js +941 -0
  27. package/src/exporter/docx/richtext.js +1182 -0
  28. package/src/exporter/docx/tables.js +112 -0
  29. package/src/exporter/docx/tools.js +50 -0
  30. package/src/exporter/epub/index.js +142 -0
  31. package/src/exporter/epub/templates.js +140 -0
  32. package/src/exporter/epub/tools.js +96 -0
  33. package/src/exporter/html/citations.js +121 -0
  34. package/src/exporter/html/convert.js +813 -0
  35. package/src/exporter/html/index.js +192 -0
  36. package/src/exporter/html/templates.js +34 -0
  37. package/src/exporter/html/tools.js +50 -0
  38. package/src/exporter/jats/bibliography.js +183 -0
  39. package/src/exporter/jats/citations.js +109 -0
  40. package/src/exporter/jats/convert.js +871 -0
  41. package/src/exporter/jats/index.js +92 -0
  42. package/src/exporter/jats/templates.js +35 -0
  43. package/src/exporter/jats/text.js +72 -0
  44. package/src/exporter/latex/convert.js +934 -0
  45. package/src/exporter/latex/escape_latex.js +21 -0
  46. package/src/exporter/latex/index.js +74 -0
  47. package/src/exporter/latex/readme.js +22 -0
  48. package/src/exporter/native/shrink.js +132 -0
  49. package/src/exporter/odt/citations.js +101 -0
  50. package/src/exporter/odt/footnotes.js +147 -0
  51. package/src/exporter/odt/images.js +115 -0
  52. package/src/exporter/odt/index.js +156 -0
  53. package/src/exporter/odt/math.js +57 -0
  54. package/src/exporter/odt/metadata.js +251 -0
  55. package/src/exporter/odt/render.js +806 -0
  56. package/src/exporter/odt/richtext.js +865 -0
  57. package/src/exporter/odt/styles.js +387 -0
  58. package/src/exporter/odt/track.js +68 -0
  59. package/src/exporter/pandoc/citations.js +98 -0
  60. package/src/exporter/pandoc/convert.js +1017 -0
  61. package/src/exporter/pandoc/index.js +92 -0
  62. package/src/exporter/pandoc/readme.js +8 -0
  63. package/src/exporter/pandoc/tools.js +51 -0
  64. package/src/exporter/print/index.js +177 -0
  65. package/src/exporter/tools/doc_content.js +144 -0
  66. package/src/exporter/tools/file.js +9 -0
  67. package/src/exporter/tools/json.js +73 -0
  68. package/src/exporter/tools/svg.js +29 -0
  69. package/src/exporter/tools/xml.js +531 -0
  70. package/src/exporter/tools/xml_zip.js +95 -0
  71. package/src/exporter/tools/zip.js +90 -0
  72. package/src/exporter/tools/zotero_csl.js +93 -0
  73. package/src/importer/citations.js +129 -0
  74. package/src/importer/docx/citations.js +123 -0
  75. package/src/importer/docx/convert.js +1427 -0
  76. package/src/importer/docx/helpers.js +9 -0
  77. package/src/importer/docx/omml2mathml.js +1448 -0
  78. package/src/importer/docx/parse.js +735 -0
  79. package/src/importer/native/get_images.js +76 -0
  80. package/src/importer/native/update.js +29 -0
  81. package/src/importer/odt/citations.js +87 -0
  82. package/src/importer/odt/convert.js +1855 -0
  83. package/src/importer/pandoc/convert.js +884 -0
  84. package/src/importer/pandoc/helpers.js +84 -0
  85. package/src/importer/zip_analyzer.js +102 -0
  86. package/src/index.js +1 -0
  87. package/src/mathlive/opf_includes.js +24 -0
  88. package/src/schema/common/annotate.js +76 -0
  89. package/src/schema/common/base.js +118 -0
  90. package/src/schema/common/citation.js +62 -0
  91. package/src/schema/common/equation.js +31 -0
  92. package/src/schema/common/figure.js +190 -0
  93. package/src/schema/common/heading.js +43 -0
  94. package/src/schema/common/index.js +40 -0
  95. package/src/schema/common/list.js +95 -0
  96. package/src/schema/common/reference.js +100 -0
  97. package/src/schema/common/table.js +103 -0
  98. package/src/schema/common/track.js +190 -0
  99. package/src/schema/const.js +58 -0
  100. package/src/schema/convert.js +1272 -0
  101. package/src/schema/document/content.js +187 -0
  102. package/src/schema/document/index.js +117 -0
  103. package/src/schema/document/structure.js +452 -0
  104. package/src/schema/export.js +21 -0
  105. package/src/schema/footnotes.js +126 -0
  106. package/src/schema/footnotes_convert.js +31 -0
  107. package/src/schema/i18n.js +595 -0
  108. package/src/schema/index.js +5 -0
  109. package/src/schema/mini_json.js +61 -0
  110. package/src/schema/text.js +22 -0
@@ -0,0 +1,1427 @@
1
+ import {MathMLToLaTeX} from "mathml-to-latex"
2
+ import {xmlDOM} from "../../exporter/tools/xml.js"
3
+ import {
4
+ randomCommentId,
5
+ randomFigureId,
6
+ randomHeadingId
7
+ } from "../../schema/common/index.js"
8
+ import {
9
+ isDocxBibliographyField,
10
+ isDocxCitationField,
11
+ isDocxSdtBibliography,
12
+ isDocxSdtCitation,
13
+ parseDocxFieldCitation,
14
+ parseDocxSdtCitation
15
+ } from "./citations.js"
16
+ import {normalizeText} from "./helpers.js"
17
+ import {omml2mathml} from "./omml2mathml.js"
18
+ import {DocxParser} from "./parse.js"
19
+
20
+ export class DocxConvert {
21
+ constructor(zip, importId, template, bibliography) {
22
+ this.zip = zip
23
+ this.importId = importId
24
+ this.template = template
25
+ this.bibliography = bibliography
26
+ this.images = {}
27
+ this.parser = new DocxParser(zip)
28
+ this.tracks = {}
29
+ this.currentTracks = []
30
+ this.currentFields = []
31
+ this.currentCommentIds = []
32
+ this.sourcesXml = null
33
+ }
34
+
35
+ async init() {
36
+ await this.parser.init()
37
+ // Load Word-native bibliography sources if present.
38
+ // This file is required by DocxCitationsParser for CITATION field codes.
39
+ this.sourcesXml =
40
+ (await this.zip.file("customXml/item1.xml")?.async("string")) ??
41
+ null
42
+ const body = this.parser.document.query("w:body")
43
+ if (!body) {
44
+ return {
45
+ content: {
46
+ type: "doc",
47
+ content: []
48
+ },
49
+ settings: {
50
+ import_id: this.importId,
51
+ tracked: false,
52
+ language: "en-US"
53
+ },
54
+ comments: {}
55
+ }
56
+ }
57
+ // Find all reference targets in the document for cross-references
58
+ this.referenceTargets = this.findReferenceTargets(this.parser.document)
59
+
60
+ const convertedContent = this.convertDocument(body)
61
+ // Convert document
62
+ return {
63
+ content: convertedContent,
64
+ settings: {
65
+ import_id: this.importId,
66
+ tracked: this.hasTrackedChanges(this.parser.document),
67
+ language: this.detectLanguage(this.parser.document)
68
+ },
69
+ comments: this.parser.comments
70
+ }
71
+ }
72
+
73
+ convertDocument(body) {
74
+ const templateParts = this.template.content.content.slice()
75
+ templateParts.shift() // Remove first element
76
+
77
+ const document = {
78
+ type: "doc",
79
+ attrs: {
80
+ import_id: this.importId
81
+ },
82
+ content: []
83
+ }
84
+ // Add title (required first element)
85
+ const title = this.extractTitle(body)
86
+ document.content.push({
87
+ type: "title",
88
+ content: title.content || [
89
+ {type: "text", text: gettext("Untitled")}
90
+ ]
91
+ })
92
+ title.containerNodes.forEach(node => {
93
+ node.parentElement.removeChild(node)
94
+ })
95
+ document.attrs.title =
96
+ title.content.map(node => node.textContent).join("") ||
97
+ gettext("Untitled")
98
+ // Extract metadata sections
99
+ const metadata = this.extractMetadata(body)
100
+ metadata.forEach(({type, content}) => {
101
+ const templatePart = templateParts.find(
102
+ part => part.attrs.metadata === type
103
+ )
104
+ const attrs = {}
105
+ if (templatePart.attrs.hidden) {
106
+ attrs.hidden = false
107
+ }
108
+ if (templatePart) {
109
+ document.content.push({
110
+ type: templatePart.type,
111
+ attrs: {
112
+ ...templatePart.attrs,
113
+ ...attrs
114
+ },
115
+ content: content.content
116
+ })
117
+ // Remove paragraphs from content so they are not added to body
118
+ content.containerNodes.forEach(node => {
119
+ node.parentElement?.removeChild(node)
120
+ })
121
+ }
122
+ })
123
+ // Extract main content sections
124
+ const sections = this.groupContentIntoSections(body)
125
+ // Map sections to template parts
126
+ sections.forEach(section => {
127
+ const templatePart = this.findMatchingTemplatePart(
128
+ section.title,
129
+ templateParts
130
+ )
131
+ if (templatePart) {
132
+ document.content.push({
133
+ type: "richtext_part",
134
+ attrs: {
135
+ title: templatePart.attrs.title,
136
+ id: templatePart.attrs.id,
137
+ metadata: templatePart.attrs.metadata || undefined,
138
+ marks: templatePart.attrs.marks || [
139
+ "strong",
140
+ "em",
141
+ "link"
142
+ ]
143
+ },
144
+ content: section.content
145
+ })
146
+ }
147
+ })
148
+
149
+ // Add remaining content to body section
150
+ const unassignedContent = sections
151
+ .filter(
152
+ section =>
153
+ !this.findMatchingTemplatePart(section.title, templateParts)
154
+ )
155
+ .flatMap(section => section.content)
156
+
157
+ if (unassignedContent.length) {
158
+ const bodyTemplatePart = templateParts.find(
159
+ part => !part.attrs.metadata && part.type === "richtext_part"
160
+ )
161
+
162
+ document.content.push({
163
+ type: "richtext_part",
164
+ attrs: {
165
+ title: bodyTemplatePart
166
+ ? bodyTemplatePart.attrs.title
167
+ : "Body",
168
+ id: bodyTemplatePart ? bodyTemplatePart.attrs.id : "body",
169
+ marks: ["strong", "em", "link"]
170
+ },
171
+ content: unassignedContent
172
+ })
173
+ }
174
+
175
+ return document
176
+ }
177
+
178
+ extractMetadata(body) {
179
+ const metadata = []
180
+
181
+ // Try structured contributor data from custom properties first
182
+ const contributorsByRole = this.extractContributorsFromCustomProps()
183
+ if (Object.keys(contributorsByRole).length) {
184
+ Object.entries(contributorsByRole).forEach(
185
+ ([role, contributors]) => {
186
+ metadata.push({
187
+ type: role,
188
+ content: {content: contributors, containerNodes: []}
189
+ })
190
+ }
191
+ )
192
+ } else {
193
+ // Fall back to legacy author extraction
194
+ const authors = this.extractAuthors(body)
195
+ if (authors.content.length) {
196
+ metadata.push({
197
+ type: "authors",
198
+ content: authors
199
+ })
200
+ }
201
+ }
202
+
203
+ // Extract abstract if present
204
+ const abstract = this.extractAbstract(body)
205
+ if (abstract.content.length) {
206
+ metadata.push({
207
+ type: "abstract",
208
+ content: abstract
209
+ })
210
+ }
211
+
212
+ // Extract keywords if present
213
+ const keywords = this.extractKeywords(body)
214
+ if (keywords.content.length) {
215
+ metadata.push({
216
+ type: "keywords",
217
+ content: keywords
218
+ })
219
+ }
220
+ return metadata
221
+ }
222
+
223
+ extractContributorsFromCustomProps() {
224
+ if (!this.parser.customDoc) {
225
+ return {}
226
+ }
227
+
228
+ const properties = this.parser.customDoc.queryAll("property")
229
+ const contributors = []
230
+
231
+ properties.forEach(prop => {
232
+ const name = prop.getAttribute("name")
233
+ if (!name || !name.startsWith("fidus_contributor_")) {
234
+ return
235
+ }
236
+ const match = name.match(/^fidus_contributor_(\d+)_(\w+)$/)
237
+ if (!match) {
238
+ return
239
+ }
240
+ const num = parseInt(match[1])
241
+ const field = match[2]
242
+ const lpwstr = prop.query("vt:lpwstr")
243
+ const value = lpwstr ? lpwstr.textContent : ""
244
+
245
+ if (!contributors[num - 1]) {
246
+ contributors[num - 1] = {
247
+ type: "contributor",
248
+ attrs: {
249
+ firstname: "",
250
+ lastname: "",
251
+ email: "",
252
+ institution: "",
253
+ id_type: "",
254
+ id_value: "",
255
+ role: ""
256
+ }
257
+ }
258
+ }
259
+ if (field === "role") {
260
+ contributors[num - 1].attrs.role = value
261
+ } else if (
262
+ [
263
+ "firstname",
264
+ "lastname",
265
+ "email",
266
+ "institution",
267
+ "id_type",
268
+ "id_value"
269
+ ].includes(field)
270
+ ) {
271
+ contributors[num - 1].attrs[field] = value
272
+ }
273
+ })
274
+
275
+ const byRole = {}
276
+ contributors.forEach(contributor => {
277
+ if (!contributor) {
278
+ return
279
+ }
280
+ const role = contributor.attrs.role || "authors"
281
+ if (!byRole[role]) {
282
+ byRole[role] = []
283
+ }
284
+ byRole[role].push(contributor)
285
+ })
286
+
287
+ return byRole
288
+ }
289
+
290
+ extractAuthors(body) {
291
+ const authors = []
292
+
293
+ // Try to find author information in metadata
294
+ const authorNodes = body
295
+ .queryAll("w:pStyle", {"w:val": "Author"})
296
+ .map(pStyle => pStyle.closest("w:p"))
297
+ .filter(p => p)
298
+ authorNodes.forEach(authorNode => {
299
+ const authorText = this.getTextContent(authorNode)
300
+ const [firstname = "", lastname = ""] = authorText.split(" ", 2)
301
+ authors.push({
302
+ type: "contributor",
303
+ attrs: {
304
+ firstname,
305
+ lastname,
306
+ email: "",
307
+ institution: ""
308
+ }
309
+ })
310
+ })
311
+ if (authors.length) {
312
+ return {
313
+ content: authors,
314
+ containerNodes: authorNodes
315
+ }
316
+ }
317
+ // Also check Creator in document properties
318
+ const creator = this.parser.coreDoc.query("dc:creator")?.textContent
319
+
320
+ if (creator) {
321
+ const [firstname = "", lastname = ""] = creator.split(" ", 2)
322
+
323
+ return {
324
+ content: [
325
+ {
326
+ type: "contributor",
327
+ attrs: {
328
+ firstname,
329
+ lastname,
330
+ email: "",
331
+ institution: ""
332
+ }
333
+ }
334
+ ],
335
+ containerNodes: []
336
+ }
337
+ }
338
+ return {content: [], containerNodes: []}
339
+ }
340
+
341
+ extractAbstract(body) {
342
+ // Look for section with Abstract style or heading
343
+ const abstractNodes = body
344
+ .queryAll("w:pStyle", {"w:val": "Abstract"})
345
+ .map(pStyle => pStyle.closest("w:p"))
346
+ .filter(p => p)
347
+ if (abstractNodes.length) {
348
+ return {
349
+ content: abstractNodes.map(abstractNode =>
350
+ this.convertBlock(abstractNode)
351
+ ),
352
+ containerNodes: abstractNodes
353
+ }
354
+ }
355
+ const extractedPart = this.extractPartOnTitle(body, ["Abstract"])
356
+ if (extractedPart.content.length) {
357
+ return {
358
+ content: extractedPart.content.map(abstractNode =>
359
+ this.convertBlock(abstractNode)
360
+ ),
361
+ containerNodes: extractedPart.content.concat([
362
+ extractedPart.header
363
+ ])
364
+ }
365
+ }
366
+ return {content: [], containerNodes: []}
367
+ }
368
+
369
+ extractKeywords(body) {
370
+ let extraNodes = []
371
+ // Look for keywords section or metadata
372
+ let keywordNodes = body
373
+ .queryAll("w:pStyle", {"w:val": "Keywords"})
374
+ .map(pStyle => pStyle.closest("w:p"))
375
+ .filter(p => p)
376
+
377
+ if (!keywordNodes.length) {
378
+ // If no keywords section is found, look for a title called "Keywords"
379
+ const extractedPart = this.extractPartOnTitle(
380
+ body,
381
+ ["Keywords", "Keywords:", "Keyword"],
382
+ 1
383
+ )
384
+
385
+ if (extractedPart.content.length) {
386
+ keywordNodes = extractedPart.content
387
+ extraNodes = extractedPart.header ? [extractedPart.header] : []
388
+ }
389
+ }
390
+
391
+ if (keywordNodes) {
392
+ return {
393
+ content: keywordNodes
394
+ .map(keywordsNode => this.getTextContent(keywordsNode))
395
+ .flatMap(str => str.split(/[,;|:]+/)) // Split on multiple separators
396
+ .map(keyword => keyword.trim()) // Trim whitespace
397
+ .filter(keyword => keyword.length > 0)
398
+ .map(keyword => ({
399
+ type: "tag",
400
+ attrs: {
401
+ tag: keyword
402
+ }
403
+ })),
404
+ containerNodes: keywordNodes.concat(extraNodes)
405
+ }
406
+ }
407
+
408
+ return {content: [], containerNodes: []}
409
+ }
410
+
411
+ extractPartOnTitle(body, titleWords, maxPars = false) {
412
+ // Fall back to heading starting with TITLEWORD in text
413
+ if (typeof titleWords === "string") {
414
+ titleWords = [titleWords]
415
+ }
416
+ const headingPars = body
417
+ .queryAll("w:pStyle", {
418
+ "w:val": [
419
+ "Heading1",
420
+ "Heading2",
421
+ "Heading3",
422
+ "Heading4",
423
+ "Heading5",
424
+ "Heading6",
425
+ "Heading7",
426
+ "Heading8",
427
+ "Heading9"
428
+ ]
429
+ })
430
+ .map(pStyle => pStyle.closest("w:p"))
431
+ .filter(p => p)
432
+ const header = headingPars.find(p =>
433
+ titleWords.includes(this.getTextContent(p).trim())
434
+ )
435
+ const content = []
436
+ if (header && header.nextSibling) {
437
+ //const content = []
438
+ //const containerNodes = [sectionHeader]
439
+ const headerLevel = this.getParaStyle(header).level
440
+ let searchPar = header
441
+
442
+ // Add everything to abstract until next heading with the same or lower level
443
+ while (
444
+ searchPar.nextSibling &&
445
+ (!maxPars || content.length < maxPars)
446
+ ) {
447
+ searchPar = searchPar.nextSibling
448
+ const paraStyle = this.getParaStyle(searchPar)
449
+ if (paraStyle.isHeading && paraStyle.level <= headerLevel) {
450
+ break
451
+ }
452
+ content.push(searchPar)
453
+ }
454
+ }
455
+
456
+ return {header, content}
457
+ }
458
+
459
+ groupContentIntoSections(body) {
460
+ const sections = []
461
+ let currentSection = {
462
+ title: null,
463
+ content: []
464
+ }
465
+
466
+ const skippedBlocks = []
467
+
468
+ body.children.forEach(node => {
469
+ if (skippedBlocks.includes(node)) {
470
+ return
471
+ }
472
+ if (node.tagName !== "w:p") {
473
+ return
474
+ }
475
+
476
+ const style = this.getParaStyle(node)
477
+ const title = this.getSectionTitle(node, style)
478
+ if (title && style.isHeading) {
479
+ if (currentSection.content.length) {
480
+ sections.push(currentSection)
481
+ }
482
+ currentSection = {
483
+ title,
484
+ content: []
485
+ }
486
+ }
487
+
488
+ const block = this.convertBlock(node, skippedBlocks)
489
+ if (block) {
490
+ currentSection.content.push(block)
491
+ }
492
+ })
493
+
494
+ if (currentSection.content.length) {
495
+ sections.push(currentSection)
496
+ }
497
+
498
+ return sections
499
+ }
500
+
501
+ getSectionTitle(node, style) {
502
+ if (!node || !style) {
503
+ return null
504
+ }
505
+
506
+ // For headings, use text content as section title
507
+ if (style.isHeading && style.level <= 4) {
508
+ return this.getTextContent(node)
509
+ }
510
+
511
+ // Check style name for section indicators
512
+ if (style.name) {
513
+ const name = style.name.toLowerCase()
514
+ if (name.includes("section") || name.includes("title")) {
515
+ return this.getTextContent(node)
516
+ }
517
+ }
518
+
519
+ return null
520
+ }
521
+
522
+ findMatchingTemplatePart(sectionTitle, templateParts) {
523
+ if (!sectionTitle) {
524
+ return null
525
+ }
526
+
527
+ // Try exact match first
528
+ let matchingPart = templateParts.find(
529
+ part =>
530
+ part.type === "richtext_part" &&
531
+ !part.attrs.metadata &&
532
+ part.attrs.title.toLowerCase() === sectionTitle.toLowerCase()
533
+ )
534
+
535
+ if (!matchingPart) {
536
+ // Try fuzzy matching
537
+ matchingPart = templateParts.find(
538
+ part =>
539
+ part.type === "richtext_part" &&
540
+ !part.attrs.metadata &&
541
+ this.isSimilarTitle(part.attrs.title, sectionTitle)
542
+ )
543
+ }
544
+
545
+ return matchingPart
546
+ }
547
+
548
+ isSimilarTitle(title1, title2) {
549
+ const normalized1 = normalizeText(title1)
550
+ const normalized2 = normalizeText(title2)
551
+
552
+ return (
553
+ normalized1.includes(normalized2) ||
554
+ normalized2.includes(normalized1)
555
+ )
556
+ }
557
+
558
+ getTextContent(node) {
559
+ return node
560
+ .queryAll("w:t")
561
+ .map(t => t.textContent)
562
+ .join("")
563
+ }
564
+
565
+ extractTitle(body) {
566
+ // First try to find paragraph with Title style
567
+ const titlePars = body
568
+ .queryAll("w:pStyle", {"w:val": "Title"})
569
+ .map(pStyle => pStyle.closest("w:p"))
570
+ .filter(p => p)
571
+
572
+ if (titlePars.length) {
573
+ return {
574
+ content: this.convertInline(titlePars[0]),
575
+ containerNodes: [titlePars[0]]
576
+ }
577
+ }
578
+
579
+ // Fall back to first heading
580
+ const headingPars = body
581
+ .queryAll("w:pStyle", {
582
+ "w:val": [
583
+ "Heading1",
584
+ "Heading2",
585
+ "Heading3",
586
+ "Heading4",
587
+ "Heading5",
588
+ "Heading6",
589
+ "Heading7",
590
+ "Heading8",
591
+ "Heading9"
592
+ ]
593
+ })
594
+ .map(pStyle => pStyle.closest("w:p"))
595
+ .filter(p => p)
596
+ if (headingPars.length) {
597
+ return {
598
+ content: this.convertInline(headingPars[0]),
599
+ containerNodes: [headingPars[0]]
600
+ }
601
+ }
602
+
603
+ return {
604
+ content: [
605
+ {
606
+ type: "text",
607
+ text: gettext("Untitled")
608
+ }
609
+ ],
610
+ containerNodes: []
611
+ }
612
+ }
613
+
614
+ inBibliography(node) {
615
+ // Check if we currently are in a field.
616
+ const currentField = this.currentFields[this.currentFields.length - 1]
617
+
618
+ if (
619
+ currentField &&
620
+ isDocxBibliographyField(currentField.instructions)
621
+ ) {
622
+ return true
623
+ }
624
+ // Check every SDT block inside this paragraph.
625
+ for (const sdt of node.queryAll("w:sdt")) {
626
+ if (isDocxSdtBibliography(sdt)) {
627
+ return true
628
+ }
629
+ }
630
+
631
+ return false
632
+ }
633
+
634
+ convertBlock(node, skippedBlocks = []) {
635
+ if (node.tagName !== "w:p") {
636
+ return null
637
+ }
638
+ const inBibliography = this.inBibliography(node)
639
+ let converted
640
+ const style = this.getParaStyle(node)
641
+ if (style.isHeading) {
642
+ converted = this.convertHeading(node, style)
643
+ } else if (style.numbering) {
644
+ converted = this.convertListItem(node, style)
645
+ } else if (
646
+ style.isCaption &&
647
+ (node.query("w:drawing") || node.query("w:pict"))
648
+ ) {
649
+ converted = this.convertFigure(node, node)
650
+ } else if (
651
+ style.isCaption &&
652
+ (node.nextSibling?.query("w:drawing") ||
653
+ node.nextSibling?.query("w:pict")) &&
654
+ !skippedBlocks.includes(node.nextSibling)
655
+ ) {
656
+ skippedBlocks.push(node.nextSibling)
657
+ converted = this.convertFigure(node.nextSibling, node)
658
+ } else if (node.query("w:drawing") || node.query("w:pict")) {
659
+ if (
660
+ node.nextSibling &&
661
+ this.getParaStyle(node.nextSibling).isCaption
662
+ ) {
663
+ skippedBlocks.push(node.nextSibling)
664
+ converted = this.convertFigure(node, node.nextSibling)
665
+ } else {
666
+ converted = this.convertFigure(node)
667
+ }
668
+ } else {
669
+ converted = this.convertParagraph(node)
670
+ }
671
+ if (inBibliography || this.inBibliography(node)) {
672
+ // We skip bibliography paragraphs
673
+ return null
674
+ }
675
+ return this.wrapTrackChanges(node, converted)
676
+ }
677
+
678
+ wrapTrackChanges(node, content) {
679
+ if (!content || !node.previousSibling) {
680
+ return content
681
+ }
682
+ const track = this.getTracksFromNode(node.previousSibling)
683
+ if (!track) {
684
+ return content
685
+ }
686
+
687
+ return {
688
+ ...content,
689
+ attrs: Object.assign({}, content.attrs || {}, {track})
690
+ }
691
+ }
692
+
693
+ getTracksFromNode(node) {
694
+ const deletion = node.query("w:pPr")?.query("w:del")
695
+ const insertion = node.query("w:pPr")?.query("w:ins")
696
+
697
+ const tracks = []
698
+
699
+ if (insertion) {
700
+ const date = new Date(insertion.getAttribute("w:date"))
701
+ const date10 = Math.floor(date.getTime() / 60000) * 10
702
+ tracks.push({
703
+ type: "insertion",
704
+ user: 0, // Default user ID
705
+ username: insertion.getAttribute("w:author"),
706
+ date: date10
707
+ })
708
+ }
709
+
710
+ if (deletion) {
711
+ const date = new Date(deletion.getAttribute("w:date"))
712
+ const date10 = Math.floor(date.getTime() / 60000) * 10
713
+ tracks.push({
714
+ type: "deletion",
715
+ user: 0, // Default user ID
716
+ username: deletion.getAttribute("w:author"),
717
+ date: date10
718
+ })
719
+ }
720
+
721
+ if (tracks.length === 0) {
722
+ return null
723
+ }
724
+
725
+ return tracks
726
+ }
727
+
728
+ getParaStyle(node) {
729
+ const pStyle = node.query("w:pStyle")
730
+ const styleId = pStyle?.getAttribute("w:val")
731
+ const style = this.parser.styles[styleId] || {}
732
+
733
+ const numPr = node.query("w:numPr")
734
+ const numId = numPr?.query("w:numId")?.getAttribute("w:val")
735
+ const ilvl = parseInt(
736
+ numPr?.query("w:ilvl")?.getAttribute("w:val") || "0"
737
+ )
738
+
739
+ return {
740
+ ...style,
741
+ numbering: numId
742
+ ? {
743
+ id: numId,
744
+ level: ilvl,
745
+ definition: this.parser.numbering[numId]
746
+ }
747
+ : null
748
+ }
749
+ }
750
+
751
+ convertParagraph(node) {
752
+ const pStyle = node.query("w:pStyle")
753
+ const styleId = pStyle?.getAttribute("w:val")
754
+
755
+ // Check if this is a code block (Code style or inherited from one)
756
+ if (
757
+ styleId &&
758
+ (this.parser.isCodeStyle?.(styleId) || styleId === "Code")
759
+ ) {
760
+ return {
761
+ type: "code_block",
762
+ attrs: {
763
+ track: [],
764
+ language: "",
765
+ category: "",
766
+ title: "",
767
+ id: ""
768
+ },
769
+ content: this.convertInline(node)
770
+ }
771
+ }
772
+
773
+ return {
774
+ type: "paragraph",
775
+ content: this.convertInline(node)
776
+ }
777
+ }
778
+
779
+ convertHeading(node, style) {
780
+ return {
781
+ type: `heading${style.level}`,
782
+ attrs: {
783
+ id: randomHeadingId(),
784
+ level: style.level
785
+ },
786
+ content: this.convertInline(node)
787
+ }
788
+ }
789
+
790
+ convertListItem(node, style) {
791
+ const numbering = style.numbering
792
+ const level = numbering.definition?.levels[numbering.level]
793
+
794
+ return {
795
+ type: level?.format === "bullet" ? "bullet_list" : "ordered_list",
796
+ attrs: {
797
+ id: `L${Math.random().toString(36).slice(2)}`,
798
+ level: numbering.level,
799
+ start: level?.start || 1
800
+ },
801
+ content: [
802
+ {
803
+ type: "list_item",
804
+ content: [this.convertParagraph(node)]
805
+ }
806
+ ]
807
+ }
808
+ }
809
+
810
+ convertFigure(node, captionNode = null) {
811
+ let captionBlock, captionOrder
812
+ if (captionNode) {
813
+ captionBlock = this.convertParagraph(captionNode)
814
+ captionOrder = node.nextSibling === captionNode ? "after" : "before"
815
+ }
816
+
817
+ const drawing = node.query("w:drawing")
818
+ if (!drawing) {
819
+ return null
820
+ }
821
+
822
+ const blip = drawing.query("a:blip")
823
+ if (!blip) {
824
+ return null
825
+ }
826
+
827
+ const rId = blip.getAttribute("r:embed")
828
+ const rel = this.parser.relationships[rId]
829
+ if (!rel) {
830
+ return null
831
+ }
832
+
833
+ const imagePath = rel.target.split("/").pop()
834
+ const imageBlob = this.parser.images[imagePath]
835
+
836
+ if (!imageBlob) {
837
+ return null
838
+ }
839
+
840
+ // <a:ext cx="5753598" cy="4463556" />
841
+ //
842
+ const size = drawing.query("a:ext")
843
+ const width = parseInt(size.getAttribute("cx") || 0) / 9525 // In EMUs
844
+ const height = parseInt(size.getAttribute("cy") || 0) / 9525 // In EMUs
845
+
846
+ const imageId = Math.floor(Math.random() * 1000000)
847
+ this.images[imageId] = {
848
+ id: imageId,
849
+ title: imagePath,
850
+ image: imagePath,
851
+ file: imageBlob,
852
+
853
+ copyright: {
854
+ holder: false,
855
+ year: false,
856
+ freeToRead: true,
857
+ licenses: []
858
+ },
859
+ checksum: 0,
860
+ width,
861
+ height
862
+ }
863
+
864
+ const image = {
865
+ type: "image",
866
+ attrs: {
867
+ image: imageId
868
+ }
869
+ }
870
+
871
+ const caption = {
872
+ type: "figure_caption",
873
+ content: captionBlock?.content || []
874
+ }
875
+
876
+ const content =
877
+ captionOrder === "before" ? [caption, image] : [image, caption]
878
+
879
+ return {
880
+ type: "figure",
881
+ attrs: {
882
+ id: randomFigureId(),
883
+ aligned: "center",
884
+ width: 100,
885
+ caption: !!captionBlock
886
+ },
887
+ content
888
+ }
889
+ }
890
+
891
+ convertInline(node) {
892
+ const content = []
893
+
894
+ // We'll process all inline nodes in document order
895
+ node.children.forEach(child => {
896
+ let contentReceiver = content
897
+ const currentField =
898
+ this.currentFields[this.currentFields.length - 1]
899
+ if (currentField) {
900
+ if (currentField.status === "instruction") {
901
+ // We're currently inside the instruction part of a fieldChar
902
+ const instrText = child.query("w:instrText")
903
+ if (instrText) {
904
+ currentField.instructions += instrText.textContent
905
+ }
906
+ }
907
+ if (currentField.status === "display") {
908
+ // We're currently inside the display part of a fieldChar
909
+ contentReceiver = currentField.display
910
+ }
911
+ }
912
+ if (child.tagName === "w:r") {
913
+ // A run
914
+ const fieldChar = child.query("w:fldChar")
915
+ if (fieldChar) {
916
+ let currentField
917
+ let rendercurrentField = false
918
+ const type = fieldChar.getAttribute("w:fldCharType")
919
+ if (type === "begin") {
920
+ currentField = {
921
+ status: "instruction",
922
+ display: [],
923
+ instructions: "",
924
+ data: null
925
+ }
926
+ this.currentFields.push(currentField)
927
+ } else if (type === "separate") {
928
+ currentField =
929
+ this.currentFields[this.currentFields.length - 1]
930
+ currentField.status = "display"
931
+ contentReceiver = currentField.display
932
+ } else if (type === "end") {
933
+ currentField = this.currentFields.pop()
934
+ // If a fieldChar is closed and there was no display part,
935
+ // or it is inside another fieldChar, do nothing
936
+ if (
937
+ currentField &&
938
+ currentField.status === "display" &&
939
+ this.currentFields.length === 0
940
+ ) {
941
+ rendercurrentField = true
942
+ contentReceiver = content
943
+ }
944
+ }
945
+ // Capture base64-encoded field data (used by EndNote)
946
+ const fldDataNode = fieldChar.query("w:fldData")
947
+ if (fldDataNode && currentField) {
948
+ currentField.data = fldDataNode.textContent || null
949
+ }
950
+
951
+ if (rendercurrentField && currentField) {
952
+ this.renderField(currentField).forEach(node =>
953
+ contentReceiver.push(node)
954
+ )
955
+ }
956
+ return
957
+ }
958
+
959
+ // Process footnote references
960
+ const footnoteRef = child.query("w:footnoteReference")
961
+ if (footnoteRef) {
962
+ const footnoteId = footnoteRef.getAttribute("w:id")
963
+ if (this.parser.footnotes[footnoteId]) {
964
+ contentReceiver.push(this.convertFootnote(footnoteId))
965
+ }
966
+ return
967
+ }
968
+
969
+ // Process endnote references
970
+ const endnoteRef = child.query("w:endnoteReference")
971
+ if (endnoteRef) {
972
+ const endnoteId = endnoteRef.getAttribute("w:id")
973
+ if (this.parser.endnotes[endnoteId]) {
974
+ contentReceiver.push(
975
+ this.convertFootnote(endnoteId, true)
976
+ )
977
+ }
978
+ return
979
+ }
980
+
981
+ // Process text with formatting
982
+ const text =
983
+ child.query("w:t")?.textContent ||
984
+ child.query("w:delText")?.textContent
985
+ if (!text) {
986
+ // Process line breaks
987
+ if (child.query("w:br")) {
988
+ contentReceiver.push({type: "hard_break"})
989
+ }
990
+ return
991
+ }
992
+
993
+ const rPr = child.query("w:rPr")
994
+ const formatting = rPr
995
+ ? this.parser.extractRunProperties(rPr)
996
+ : {}
997
+ const insertion = child.closest("w:ins")
998
+ const deletion = child.closest("w:del")
999
+
1000
+ contentReceiver.push({
1001
+ type: "text",
1002
+ text,
1003
+ marks: this.getCurrentMarks(formatting, insertion, deletion)
1004
+ })
1005
+ } else if (child.tagName === "w:commentRangeStart") {
1006
+ const commentId = child.getAttribute("w:id")
1007
+ if (commentId && this.parser.comments[commentId]) {
1008
+ this.currentCommentIds.push(commentId)
1009
+ }
1010
+ return
1011
+ } else if (child.tagName === "w:commentRangeEnd") {
1012
+ const commentId = child.getAttribute("w:id")
1013
+ if (commentId) {
1014
+ const index = this.currentCommentIds.indexOf(commentId)
1015
+ if (index !== -1) {
1016
+ this.currentCommentIds.splice(index, 1)
1017
+ }
1018
+ }
1019
+ return
1020
+ } else if (
1021
+ child.tagName === "w:r" &&
1022
+ child.query("w:commentReference")
1023
+ ) {
1024
+ // Comment reference - just skip it (we already handle the range)
1025
+ return
1026
+ } else if (child.tagName === "w:hyperlink") {
1027
+ // Process hyperlink
1028
+ const rId = child.getAttribute("r:id")
1029
+ const anchor = child.getAttribute("w:anchor")
1030
+ const relationship = rId ? this.parser.relationships[rId] : null
1031
+ const href =
1032
+ relationship?.target || (anchor ? `#${anchor}` : null)
1033
+
1034
+ if (href) {
1035
+ const runs = child.queryAll("w:r")
1036
+ const text = runs
1037
+ .map(run => run.query("w:t")?.textContent || "")
1038
+ .join("")
1039
+
1040
+ if (text) {
1041
+ // Check if this is an internal link (bookmark reference) that should be a cross-reference
1042
+ if (anchor && this.referenceTargets[anchor]) {
1043
+ // If the link text is similar to the target text, treat it as a cross-reference
1044
+ const target = this.referenceTargets[anchor]
1045
+ const targetText = target.text || anchor
1046
+
1047
+ // Compare normalized versions to check if text matches target
1048
+ if (
1049
+ normalizeText(text) ===
1050
+ normalizeText(targetText) ||
1051
+ // Also check for "Figure X: " or "Table X: " style references
1052
+ text.match(
1053
+ /^(Figure|Table|Equation)\s+\d+(\.\d+)*(\:|\.)?\s*$/i
1054
+ )
1055
+ ) {
1056
+ contentReceiver.push(
1057
+ this.convertCrossReference(anchor, text)
1058
+ )
1059
+ return
1060
+ }
1061
+ }
1062
+
1063
+ // Otherwise, treat as a normal link
1064
+ const rPr = runs[0]?.query("w:rPr")
1065
+ const formatting = rPr
1066
+ ? this.parser.extractRunProperties(rPr)
1067
+ : {}
1068
+
1069
+ const marks = this.getCurrentMarks(formatting)
1070
+ marks.push({
1071
+ type: "link",
1072
+ attrs: {href, title: text}
1073
+ })
1074
+ contentReceiver.push({
1075
+ type: "text",
1076
+ text,
1077
+ marks
1078
+ })
1079
+ }
1080
+ }
1081
+ } else if (child.tagName === "m:oMath") {
1082
+ const equationNode = this.convertEquation(child)
1083
+ if (equationNode) {
1084
+ contentReceiver.push(equationNode)
1085
+ }
1086
+ } else if (child.tagName === "w:sdt") {
1087
+ if (isDocxSdtCitation(child)) {
1088
+ // Used by Mendeley Cite & Citavi
1089
+ const citationNode = parseDocxSdtCitation(
1090
+ child,
1091
+ this.bibliography
1092
+ )
1093
+ if (citationNode) {
1094
+ contentReceiver.push(citationNode)
1095
+ }
1096
+ }
1097
+ } else {
1098
+ console.warn("unhandled node", child)
1099
+ }
1100
+ })
1101
+
1102
+ return content
1103
+ }
1104
+
1105
+ // Method to help process cross-references in documents
1106
+ findReferenceTargets(document) {
1107
+ const targets = {}
1108
+
1109
+ // Find bookmarks
1110
+ document.queryAll("w:bookmarkStart").forEach(bookmark => {
1111
+ const id = bookmark.getAttribute("w:id")
1112
+ const name = bookmark.getAttribute("w:name")
1113
+ if (id && name) {
1114
+ targets[name] = {
1115
+ id: name,
1116
+ type: "bookmark"
1117
+ }
1118
+ }
1119
+ })
1120
+
1121
+ // Find headings (with styles like Heading1, Heading2, etc.)
1122
+ document.queryAll("w:pStyle").forEach(pStyle => {
1123
+ const val = pStyle.getAttribute("w:val")
1124
+ if (val && val.match(/^Heading\d+$/)) {
1125
+ const paragraph = pStyle.closest("w:p")
1126
+ if (paragraph) {
1127
+ const text = this.getTextContent(paragraph)
1128
+ // Create an ID from the heading text
1129
+ const id = text
1130
+ .trim()
1131
+ .toLowerCase()
1132
+ .replace(/[^\w\s-]/g, "")
1133
+ .replace(/\s+/g, "-")
1134
+
1135
+ targets[id] = {
1136
+ id: id,
1137
+ type: "heading",
1138
+ text: text
1139
+ }
1140
+ }
1141
+ }
1142
+ })
1143
+
1144
+ return targets
1145
+ }
1146
+
1147
+ convertFootnote(id, isEndnote = false) {
1148
+ const footnoteContent = isEndnote
1149
+ ? this.parser.endnotes[id].content
1150
+ : this.parser.footnotes[id].content
1151
+
1152
+ // Convert the footnote content to our document model
1153
+ const content = []
1154
+ footnoteContent.forEach(block => {
1155
+ if (block.type === "paragraph") {
1156
+ content.push({
1157
+ type: "paragraph",
1158
+ content: block.content.map(node => {
1159
+ if (node.type === "text") {
1160
+ return {
1161
+ type: "text",
1162
+ text: node.text,
1163
+ marks: node.marks || []
1164
+ }
1165
+ }
1166
+ return node
1167
+ })
1168
+ })
1169
+ }
1170
+ })
1171
+
1172
+ return {
1173
+ type: "footnote",
1174
+ attrs: {
1175
+ footnote: content
1176
+ }
1177
+ }
1178
+ }
1179
+
1180
+ convertEquation(oMathNode) {
1181
+ // Extract OMML content and convert to MathML
1182
+ const mmlNode = omml2mathml(oMathNode)
1183
+ const latex = MathMLToLaTeX.convert(mmlNode.outerXML)
1184
+ return {
1185
+ type: "equation",
1186
+ attrs: {
1187
+ equation: latex
1188
+ }
1189
+ }
1190
+ }
1191
+
1192
+ simplifiedOmmlToLatex(omml) {
1193
+ // This is a very basic conversion - in a real implementation you would
1194
+ // use a library like MathML-to-LaTeX or implement a more complete converter
1195
+
1196
+ // Extract text content as a fallback
1197
+ const textContent = omml
1198
+ .replace(/<[^>]+>/g, " ")
1199
+ .replace(/\s+/g, " ")
1200
+ .trim()
1201
+
1202
+ // If the OMML contains a fraction
1203
+ if (omml.includes("<m:f>")) {
1204
+ const numMatch = omml.match(/<m:num>(.*?)<\/m:num>/s)
1205
+ const denMatch = omml.match(/<m:den>(.*?)<\/m:den>/s)
1206
+
1207
+ if (numMatch && denMatch) {
1208
+ const num = numMatch[1].replace(/<[^>]+>/g, "").trim()
1209
+ const den = denMatch[1].replace(/<[^>]+>/g, "").trim()
1210
+ return `\\frac{${num}}{${den}}`
1211
+ }
1212
+ }
1213
+
1214
+ // If it contains a superscript
1215
+ if (omml.includes("<m:sup>")) {
1216
+ const baseMatch = omml.match(/<m:e>(.*?)<\/m:e>/s)
1217
+ const supMatch = omml.match(/<m:sup>(.*?)<\/m:sup>/s)
1218
+
1219
+ if (baseMatch && supMatch) {
1220
+ const base = baseMatch[1].replace(/<[^>]+>/g, "").trim()
1221
+ const sup = supMatch[1].replace(/<[^>]+>/g, "").trim()
1222
+ return `${base}^{${sup}}`
1223
+ }
1224
+ }
1225
+
1226
+ // If it contains a subscript
1227
+ if (omml.includes("<m:sub>")) {
1228
+ const baseMatch = omml.match(/<m:e>(.*?)<\/m:e>/s)
1229
+ const subMatch = omml.match(/<m:sub>(.*?)<\/m:sub>/s)
1230
+
1231
+ if (baseMatch && subMatch) {
1232
+ const base = baseMatch[1].replace(/<[^>]+>/g, "").trim()
1233
+ const sub = subMatch[1].replace(/<[^>]+>/g, "").trim()
1234
+ return `${base}_{${sub}}`
1235
+ }
1236
+ }
1237
+
1238
+ // Return a simplified representation with the text content
1239
+ return textContent || "x^2" // Default fallback
1240
+ }
1241
+
1242
+ renderField(field) {
1243
+ const instr = field.instructions.trim()
1244
+
1245
+ // Handle REF fields (cross-references)
1246
+ if (instr.startsWith("REF ")) {
1247
+ // Extract the target bookmark/heading ID
1248
+ const parts = instr.substring(4).trim().split(/\s+/)
1249
+ if (parts.length > 0) {
1250
+ const target = parts[0]
1251
+ const text = field.display.reduce(
1252
+ (accumulator, currentValue) => {
1253
+ if (currentValue.type === "text") {
1254
+ return accumulator + currentValue.text
1255
+ }
1256
+ return accumulator
1257
+ },
1258
+ ""
1259
+ )
1260
+ return [this.convertCrossReference(target, text)]
1261
+ }
1262
+ }
1263
+ // Handle SEQ fields (figure/table/equation number cross-references)
1264
+ else if (instr.startsWith("SEQ ")) {
1265
+ // This is a sequence field that generates numbers for figures/tables/equations.
1266
+ // For cross-references, we look for the text in the display part.
1267
+ const seqMatch = instr.match(/^SEQ\s+(\S+)/)
1268
+ if (seqMatch) {
1269
+ const _seqName = seqMatch[1]
1270
+ const text = field.display.reduce((acc, curr) => {
1271
+ if (curr.type === "text") {
1272
+ return acc + curr.text
1273
+ }
1274
+ return acc
1275
+ }, "")
1276
+ if (text) {
1277
+ // Return as a plain text node since we can't resolve SEQ references easily
1278
+ return [
1279
+ {
1280
+ type: "text",
1281
+ text,
1282
+ marks: []
1283
+ }
1284
+ ]
1285
+ }
1286
+ return []
1287
+ }
1288
+ }
1289
+ // Handle citation fields
1290
+ else if (isDocxCitationField(instr)) {
1291
+ return [
1292
+ parseDocxFieldCitation(
1293
+ instr,
1294
+ field.data,
1295
+ this.sourcesXml,
1296
+ this.bibliography
1297
+ )
1298
+ ]
1299
+ } else if (isDocxBibliographyField(instr)) {
1300
+ // We don't render the contents of bibliography fields
1301
+ return []
1302
+ } else {
1303
+ // We do not support this field type, so instead we return the display content.
1304
+ return field.display || []
1305
+ }
1306
+ }
1307
+
1308
+ convertCrossReference(targetId, displayText) {
1309
+ // Look up the target in our reference targets
1310
+ const target = this.referenceTargets[targetId]
1311
+
1312
+ // If we found the target, use its information
1313
+ if (target) {
1314
+ return {
1315
+ type: "cross_reference",
1316
+ attrs: {
1317
+ id: targetId,
1318
+ title: displayText || target.text || targetId
1319
+ }
1320
+ }
1321
+ }
1322
+
1323
+ // If target not found, create a reference with the display text or target ID
1324
+ return {
1325
+ type: "cross_reference",
1326
+ attrs: {
1327
+ id: targetId,
1328
+ title: displayText || targetId
1329
+ }
1330
+ }
1331
+ }
1332
+
1333
+ createMarksFromFormatting(formatting, insertion = null, deletion = null) {
1334
+ const marks = []
1335
+ if (formatting.bold) {
1336
+ marks.push({type: "strong"})
1337
+ }
1338
+ if (formatting.italic) {
1339
+ marks.push({type: "em"})
1340
+ }
1341
+ if (formatting.underline) {
1342
+ marks.push({type: "underline"})
1343
+ }
1344
+ // Handle superscript and subscript
1345
+ if (formatting.vertAlign === "superscript") {
1346
+ marks.push({type: "sup"})
1347
+ }
1348
+ if (formatting.vertAlign === "subscript") {
1349
+ marks.push({type: "sub"})
1350
+ }
1351
+ // Handle inline code (monospace fonts)
1352
+ if (formatting.fontFamily) {
1353
+ const monospacePatterns = [
1354
+ /^courier/i,
1355
+ /^consolas/i,
1356
+ /^monaco/i,
1357
+ /^menlo/i,
1358
+ /^lucida console/i,
1359
+ /^liberation mono/i,
1360
+ /^dejavu sans mono/i,
1361
+ /^bitstream vera sans mono/i,
1362
+ /^source code pro/i,
1363
+ /^fira code/i,
1364
+ /^ubuntu mono/i,
1365
+ /^droid sans mono/i
1366
+ ]
1367
+ const isMonospace = monospacePatterns.some(pattern =>
1368
+ pattern.test(formatting.fontFamily)
1369
+ )
1370
+ if (isMonospace) {
1371
+ marks.push({type: "code"})
1372
+ }
1373
+ }
1374
+ if (insertion) {
1375
+ const date = new Date(insertion.getAttribute("w:date"))
1376
+ const date10 = Math.floor(date.getTime() / 600000) * 10
1377
+ marks.push({
1378
+ type: "insertion",
1379
+ attrs: {
1380
+ user: 0,
1381
+ username: insertion.getAttribute("w:author"),
1382
+ date: date10,
1383
+ approved: false
1384
+ }
1385
+ })
1386
+ }
1387
+ if (deletion) {
1388
+ const date = new Date(deletion.getAttribute("w:date"))
1389
+ const date10 = Math.floor(date.getTime() / 600000) * 10
1390
+ marks.push({
1391
+ type: "deletion",
1392
+ attrs: {
1393
+ user: 0,
1394
+ username: deletion.getAttribute("w:author"),
1395
+ date: date10
1396
+ }
1397
+ })
1398
+ }
1399
+ return marks
1400
+ }
1401
+
1402
+ getCurrentMarks(formatting, insertion, deletion) {
1403
+ const marks = this.createMarksFromFormatting(
1404
+ formatting,
1405
+ insertion,
1406
+ deletion
1407
+ )
1408
+ // Add comment marks for any active comment IDs
1409
+ this.currentCommentIds.forEach(commentId => {
1410
+ marks.push({
1411
+ type: "comment",
1412
+ attrs: {
1413
+ id: Number.parseInt(commentId)
1414
+ }
1415
+ })
1416
+ })
1417
+ return marks
1418
+ }
1419
+
1420
+ hasTrackedChanges(doc) {
1421
+ return Boolean(doc.query("w:ins") || doc.query("w:del"))
1422
+ }
1423
+
1424
+ detectLanguage(doc) {
1425
+ return doc.query("w:lang")?.getAttribute("w:val") || "en-US"
1426
+ }
1427
+ }