@fiduswriter/document 0.1.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/LICENSE +661 -0
  2. package/README.md +16 -0
  3. package/jest.config.js +23 -0
  4. package/package.json +59 -0
  5. package/schema.json +1 -0
  6. package/scripts/export-schema.js +16 -0
  7. package/src/bibliography/common.js +92 -0
  8. package/src/bibliography/csl_bib.js +139 -0
  9. package/src/citations/citeproc_sys.js +42 -0
  10. package/src/citations/format.js +194 -0
  11. package/src/common/blob.js +10 -0
  12. package/src/common/file.js +25 -0
  13. package/src/common/index.js +12 -0
  14. package/src/common/network.js +79 -0
  15. package/src/common/text.js +44 -0
  16. package/src/editor/e2ee/encryptor.js +228 -0
  17. package/src/exporter/docx/citations.js +177 -0
  18. package/src/exporter/docx/comments.js +165 -0
  19. package/src/exporter/docx/footnotes.js +240 -0
  20. package/src/exporter/docx/images.js +101 -0
  21. package/src/exporter/docx/index.js +185 -0
  22. package/src/exporter/docx/lists.js +260 -0
  23. package/src/exporter/docx/math.js +46 -0
  24. package/src/exporter/docx/metadata.js +289 -0
  25. package/src/exporter/docx/rels.js +193 -0
  26. package/src/exporter/docx/render.js +941 -0
  27. package/src/exporter/docx/richtext.js +1182 -0
  28. package/src/exporter/docx/tables.js +112 -0
  29. package/src/exporter/docx/tools.js +50 -0
  30. package/src/exporter/epub/index.js +142 -0
  31. package/src/exporter/epub/templates.js +140 -0
  32. package/src/exporter/epub/tools.js +96 -0
  33. package/src/exporter/html/citations.js +121 -0
  34. package/src/exporter/html/convert.js +813 -0
  35. package/src/exporter/html/index.js +192 -0
  36. package/src/exporter/html/templates.js +34 -0
  37. package/src/exporter/html/tools.js +50 -0
  38. package/src/exporter/jats/bibliography.js +183 -0
  39. package/src/exporter/jats/citations.js +109 -0
  40. package/src/exporter/jats/convert.js +871 -0
  41. package/src/exporter/jats/index.js +92 -0
  42. package/src/exporter/jats/templates.js +35 -0
  43. package/src/exporter/jats/text.js +72 -0
  44. package/src/exporter/latex/convert.js +934 -0
  45. package/src/exporter/latex/escape_latex.js +21 -0
  46. package/src/exporter/latex/index.js +74 -0
  47. package/src/exporter/latex/readme.js +22 -0
  48. package/src/exporter/native/shrink.js +132 -0
  49. package/src/exporter/odt/citations.js +101 -0
  50. package/src/exporter/odt/footnotes.js +147 -0
  51. package/src/exporter/odt/images.js +115 -0
  52. package/src/exporter/odt/index.js +156 -0
  53. package/src/exporter/odt/math.js +57 -0
  54. package/src/exporter/odt/metadata.js +251 -0
  55. package/src/exporter/odt/render.js +806 -0
  56. package/src/exporter/odt/richtext.js +865 -0
  57. package/src/exporter/odt/styles.js +387 -0
  58. package/src/exporter/odt/track.js +68 -0
  59. package/src/exporter/pandoc/citations.js +98 -0
  60. package/src/exporter/pandoc/convert.js +1017 -0
  61. package/src/exporter/pandoc/index.js +92 -0
  62. package/src/exporter/pandoc/readme.js +8 -0
  63. package/src/exporter/pandoc/tools.js +51 -0
  64. package/src/exporter/print/index.js +177 -0
  65. package/src/exporter/tools/doc_content.js +144 -0
  66. package/src/exporter/tools/file.js +9 -0
  67. package/src/exporter/tools/json.js +73 -0
  68. package/src/exporter/tools/svg.js +29 -0
  69. package/src/exporter/tools/xml.js +531 -0
  70. package/src/exporter/tools/xml_zip.js +95 -0
  71. package/src/exporter/tools/zip.js +90 -0
  72. package/src/exporter/tools/zotero_csl.js +93 -0
  73. package/src/importer/citations.js +129 -0
  74. package/src/importer/docx/citations.js +123 -0
  75. package/src/importer/docx/convert.js +1427 -0
  76. package/src/importer/docx/helpers.js +9 -0
  77. package/src/importer/docx/omml2mathml.js +1448 -0
  78. package/src/importer/docx/parse.js +735 -0
  79. package/src/importer/native/get_images.js +76 -0
  80. package/src/importer/native/update.js +29 -0
  81. package/src/importer/odt/citations.js +87 -0
  82. package/src/importer/odt/convert.js +1855 -0
  83. package/src/importer/pandoc/convert.js +884 -0
  84. package/src/importer/pandoc/helpers.js +84 -0
  85. package/src/importer/zip_analyzer.js +102 -0
  86. package/src/index.js +1 -0
  87. package/src/mathlive/opf_includes.js +24 -0
  88. package/src/schema/common/annotate.js +76 -0
  89. package/src/schema/common/base.js +118 -0
  90. package/src/schema/common/citation.js +62 -0
  91. package/src/schema/common/equation.js +31 -0
  92. package/src/schema/common/figure.js +190 -0
  93. package/src/schema/common/heading.js +43 -0
  94. package/src/schema/common/index.js +40 -0
  95. package/src/schema/common/list.js +95 -0
  96. package/src/schema/common/reference.js +100 -0
  97. package/src/schema/common/table.js +103 -0
  98. package/src/schema/common/track.js +190 -0
  99. package/src/schema/const.js +58 -0
  100. package/src/schema/convert.js +1272 -0
  101. package/src/schema/document/content.js +187 -0
  102. package/src/schema/document/index.js +117 -0
  103. package/src/schema/document/structure.js +452 -0
  104. package/src/schema/export.js +21 -0
  105. package/src/schema/footnotes.js +126 -0
  106. package/src/schema/footnotes_convert.js +31 -0
  107. package/src/schema/i18n.js +595 -0
  108. package/src/schema/index.js +5 -0
  109. package/src/schema/mini_json.js +61 -0
  110. package/src/schema/text.js +22 -0
@@ -0,0 +1,1855 @@
1
+ import {MathMLToLaTeX} from "mathml-to-latex"
2
+
3
+ import {xmlDOM} from "../../exporter/tools/xml.js"
4
+ import {
5
+ randomCommentId,
6
+ randomFigureId,
7
+ randomHeadingId,
8
+ randomListId,
9
+ randomTableId
10
+ } from "../../schema/common/index.js"
11
+ import {parseTracks} from "../../schema/common/track.js"
12
+ import {
13
+ isOdtBibliographyReferenceMark,
14
+ isOdtBibliographySection,
15
+ isOdtCitationMark,
16
+ parseOdtBibliographyMark,
17
+ parseOdtReferenceMark
18
+ } from "./citations.js"
19
+
20
+ export class OdtConvert {
21
+ constructor(
22
+ contentXml,
23
+ stylesXml,
24
+ metaXml,
25
+ manifestXml,
26
+ importId,
27
+ template,
28
+ bibliography,
29
+ bibDb
30
+ ) {
31
+ this.importId = importId
32
+ this.template = template
33
+ this.bibliography = bibliography
34
+ this.bibDB = bibDb
35
+ this.images = {}
36
+ this.styles = {}
37
+
38
+ this.contentDoc = contentXml ? xmlDOM(contentXml) : null
39
+ this.stylesDoc = stylesXml ? xmlDOM(stylesXml) : null
40
+ this.metaDoc = metaXml ? xmlDOM(metaXml) : null
41
+ this.manifestDoc = manifestXml ? xmlDOM(manifestXml) : null
42
+
43
+ this.tracks = {}
44
+ this.comments = {}
45
+ this.currentCommentIds = []
46
+ this.currentTracks = []
47
+ this.referenceableObjects = {} // All objects that can be referenced
48
+ }
49
+
50
+ init() {
51
+ this.parseTrackedChanges()
52
+ this.parseStyles()
53
+ this.parseComments()
54
+
55
+ this.collectReferenceableObjects(this.contentDoc)
56
+ const content = this.convert()
57
+ return {
58
+ content,
59
+ settings: {
60
+ import_id: this.importId,
61
+ tracked: Object.keys(this.tracks).length > 0,
62
+ language: this.detectLanguage()
63
+ },
64
+ comments: this.comments
65
+ }
66
+ }
67
+
68
+ parseTrackedChanges() {
69
+ const trackedChangesEl = this.contentDoc.query("text:tracked-changes")
70
+ if (!trackedChangesEl) {
71
+ return
72
+ }
73
+
74
+ // Tracked deletions are stored in two different ways in FW and ODT.
75
+ // FW: The deleted content stays in place where it was before the deletion,
76
+ // and is marked with a tracked change mark. Megre only occurs after change
77
+ // has been accepted.
78
+ // ODT: The deleted content is removed from the content flow and is replaced by a marker.
79
+ // The removed content is stored in a special section of the document.
80
+ // This method takes all the deleted content and puts it back into the place where
81
+ // it was previously. That way the structure is more similar to the output FW document
82
+ // and is more easily converted.
83
+ const deletions = {}
84
+
85
+ const changedRegions = trackedChangesEl.queryAll("text:changed-region")
86
+ changedRegions.forEach(region => {
87
+ const id = region.getAttribute("text:id")
88
+
89
+ const insertion = region.query("text:insertion")
90
+ const deletion = region.query("text:deletion")
91
+ if (!insertion && !deletion) {
92
+ // Neither insertion or deletion. Must be type unknown to us
93
+ return
94
+ }
95
+ const changeInfo = region.query("office:change-info")
96
+ if (changeInfo) {
97
+ const track = {
98
+ type: insertion ? "insertion" : "deletion",
99
+ user: 1,
100
+ username: changeInfo.query("dc:creator")?.textContent || "",
101
+ date: parseInt(
102
+ new Date(
103
+ changeInfo.query("dc:date")?.textContent || ""
104
+ ).getTime() / 60000
105
+ )
106
+ }
107
+ if (insertion) {
108
+ track.approved = false
109
+ }
110
+ this.tracks[id] = track
111
+
112
+ if (deletion) {
113
+ // Store deletion content for later use
114
+ deletions[id] = deletion.children.filter(
115
+ child => child.tagName !== "office:change-info"
116
+ )
117
+ }
118
+ }
119
+ })
120
+
121
+ // Then find and replace all deletion change markers
122
+ const changeMarkers = this.contentDoc.queryAll("text:change")
123
+ changeMarkers.forEach(marker => {
124
+ const changeId = marker.getAttribute("text:change-id")
125
+ const deletion = deletions[changeId]
126
+ if (deletion) {
127
+ if (deletion.length > 0) {
128
+ // Create change-start and change-end elements
129
+ const markerIndex =
130
+ marker.parentElement.children.indexOf(marker)
131
+
132
+ marker.parentElement.insertXMLAt(
133
+ `<text:change-start text:change-id="${changeId}"/>`,
134
+ markerIndex
135
+ )
136
+ marker.parentElement.insertXMLAt(
137
+ `<text:change-end text:change-id="${changeId}"/>`,
138
+ markerIndex + 2
139
+ )
140
+
141
+ if (deletion.length === 1) {
142
+ // Single block - just insert the content
143
+ deletion[0].children.forEach(content => {
144
+ marker.parentElement.insertBefore(content, marker)
145
+ })
146
+ } else {
147
+ // Multiple blocks - need to split the paragraph/headline
148
+ const parentElement = marker.parentElement
149
+ parentElement.splitAtChildElement(
150
+ marker,
151
+ deletion[0].children
152
+ ?.map(node => node.toString())
153
+ .join("") || "", // First block content to be added to current node
154
+ deletion
155
+ .slice(1, -1)
156
+ .map(node => node.toString())
157
+ .join(""), // Middle blocks
158
+ deletion[deletion.length - 1].toString() // Last block
159
+ )
160
+ }
161
+ }
162
+ // Remove the original change marker
163
+ marker.parentElement.removeChild(marker)
164
+ }
165
+ })
166
+ }
167
+
168
+ parseStyles() {
169
+ if (!this.stylesDoc) {
170
+ return
171
+ }
172
+ const styleNodes = this.stylesDoc.queryAll("style:style")
173
+ styleNodes.forEach(node => {
174
+ const styleName = node.getAttribute("style:name")
175
+ this.styles[styleName] = this.parseStyle(node)
176
+ })
177
+ const contentStyleNodes = this.contentDoc.queryAll("style:style")
178
+ contentStyleNodes.forEach(node => {
179
+ const styleName = node.getAttribute("style:name")
180
+ this.styles[styleName] = this.parseStyle(node)
181
+ })
182
+ }
183
+
184
+ parseStyle(styleNode) {
185
+ const properties = {
186
+ // Basic style information
187
+ parentStyleName: styleNode.getAttribute("style:parent-style-name"),
188
+ isSection:
189
+ styleNode.getAttribute("style:family") === "section" ||
190
+ Boolean(styleNode.query("style:section-properties")),
191
+ title: styleNode.getAttribute("style:display-name"),
192
+
193
+ // Family and name info
194
+ family: styleNode.getAttribute("style:family"),
195
+ name: styleNode.getAttribute("style:name"),
196
+
197
+ // Heading related
198
+ isHeading:
199
+ styleNode.getAttribute("style:family") === "paragraph" &&
200
+ (styleNode
201
+ .getAttribute("style:name")
202
+ .toLowerCase()
203
+ .includes("heading") ||
204
+ styleNode
205
+ .getAttribute("style:parent-style-name")
206
+ ?.toLowerCase()
207
+ .includes("heading")),
208
+ outlineLevel: styleNode.getAttribute("text:outline-level"),
209
+
210
+ // Text properties
211
+ textProperties: {},
212
+
213
+ // Paragraph properties
214
+ paragraphProperties: {},
215
+
216
+ // Section properties
217
+ sectionProperties: {}
218
+ }
219
+
220
+ // Parse text properties
221
+ const textProperties = styleNode.query("style:text-properties")
222
+ if (textProperties) {
223
+ properties.textProperties = {
224
+ bold: textProperties.getAttribute("fo:font-weight") === "bold",
225
+ italic:
226
+ textProperties.getAttribute("fo:font-style") === "italic",
227
+ fontSize: this.convertLength(
228
+ textProperties.getAttribute("fo:font-size")
229
+ ),
230
+ fontFamily: textProperties.getAttribute("fo:font-family"),
231
+ color: textProperties.getAttribute("fo:color"),
232
+ backgroundColor: textProperties.getAttribute(
233
+ "fo:background-color"
234
+ ),
235
+ textDecoration:
236
+ textProperties.getAttribute("style:text-underline-style") ||
237
+ textProperties.getAttribute(
238
+ "style:text-line-through-style"
239
+ ),
240
+ textPosition: textProperties.getAttribute("style:text-position")
241
+ }
242
+ }
243
+
244
+ // Parse paragraph properties
245
+ const paragraphProperties = styleNode.query(
246
+ "style:paragraph-properties"
247
+ )
248
+ if (paragraphProperties) {
249
+ properties.paragraphProperties = {
250
+ marginTop: this.convertLength(
251
+ paragraphProperties.getAttribute("fo:margin-top")
252
+ ),
253
+ marginBottom: this.convertLength(
254
+ paragraphProperties.getAttribute("fo:margin-bottom")
255
+ ),
256
+ marginLeft: this.convertLength(
257
+ paragraphProperties.getAttribute("fo:margin-left")
258
+ ),
259
+ marginRight: this.convertLength(
260
+ paragraphProperties.getAttribute("fo:margin-right")
261
+ ),
262
+ textAlign: paragraphProperties.getAttribute("fo:text-align"),
263
+ lineHeight: paragraphProperties.getAttribute("fo:line-height"),
264
+ backgroundColor: paragraphProperties.getAttribute(
265
+ "fo:background-color"
266
+ ),
267
+ padding: this.convertLength(
268
+ paragraphProperties.getAttribute("fo:padding")
269
+ ),
270
+ borderStyle: paragraphProperties.getAttribute("fo:border-style")
271
+ }
272
+ }
273
+
274
+ // Parse section properties
275
+ const sectionProperties = styleNode.query("style:section-properties")
276
+ if (sectionProperties) {
277
+ properties.sectionProperties = {
278
+ columnCount: sectionProperties.getAttribute("fo:column-count"),
279
+ columnGap: this.convertLength(
280
+ sectionProperties.getAttribute("fo:column-gap")
281
+ ),
282
+ backgroundColor: sectionProperties.getAttribute(
283
+ "fo:background-color"
284
+ ),
285
+ margins: {
286
+ top: this.convertLength(
287
+ sectionProperties.getAttribute("fo:margin-top")
288
+ ),
289
+ bottom: this.convertLength(
290
+ sectionProperties.getAttribute("fo:margin-bottom")
291
+ ),
292
+ left: this.convertLength(
293
+ sectionProperties.getAttribute("fo:margin-left")
294
+ ),
295
+ right: this.convertLength(
296
+ sectionProperties.getAttribute("fo:margin-right")
297
+ )
298
+ }
299
+ }
300
+ }
301
+
302
+ // Additional table-specific properties
303
+ if (styleNode.getAttribute("style:family") === "table") {
304
+ properties.tableProperties = {
305
+ align: styleNode.getAttribute("table:align"),
306
+ width: this.convertLength(
307
+ styleNode.getAttribute("style:width")
308
+ ),
309
+ relWidth: styleNode.getAttribute("style:rel-width")
310
+ }
311
+ }
312
+
313
+ return properties
314
+ }
315
+
316
+ convertObject(node, attrs) {
317
+ const mathEl = node.query("math")
318
+ if (mathEl) {
319
+ attrs = Object.assign(
320
+ {
321
+ equation: MathMLToLaTeX.convert(mathEl.innerXML)
322
+ },
323
+ attrs
324
+ )
325
+ return {
326
+ type: "equation",
327
+ attrs
328
+ }
329
+ }
330
+ return null
331
+ }
332
+
333
+ parseComments() {
334
+ const annotations = this.contentDoc.queryAll("office:annotation")
335
+ annotations.forEach(annotation => {
336
+ const username = annotation.query("dc:creator")?.textContent || ""
337
+ const date = new Date(
338
+ annotation.query("dc:date")?.textContent || ""
339
+ ).getTime()
340
+
341
+ const id = (annotation.getAttribute("office:name") || "")
342
+ .replace(/\D/g, "")
343
+ .slice(0, 9)
344
+
345
+ if (id) {
346
+ // main comment
347
+ this.comments[id] = {
348
+ user: 0,
349
+ username,
350
+ date,
351
+ comment: annotation
352
+ .queryAll("text:p")
353
+ .map(par => this.convertBlockNode(par))
354
+ .filter(par => par)
355
+ .flat(),
356
+ answers: [],
357
+ resolved:
358
+ annotation.getAttribute("loext:resolved") === "true"
359
+ }
360
+ } else {
361
+ const parentId = (
362
+ annotation.getAttribute("loext:parent-name") || ""
363
+ )
364
+ .replace(/\D/g, "")
365
+ .slice(0, 9)
366
+ if (parentId && this.comments[parentId]) {
367
+ this.comments[parentId].answers.push({
368
+ id: randomCommentId(),
369
+ user: 0,
370
+ username,
371
+ date,
372
+ // drop the frist paragraph. It only contains "Reply to...."
373
+ answer: annotation
374
+ .queryAll("text:p")
375
+ .slice(1)
376
+ .map(par => this.convertBlockNode(par))
377
+ .filter(par => par)
378
+ .flat()
379
+ })
380
+ }
381
+ }
382
+ })
383
+ }
384
+
385
+ collectReferenceableObjects(node) {
386
+ // Handle heading bookmarks
387
+ const bookmarkStarts = node.queryAll("text:bookmark-start")
388
+ bookmarkStarts.forEach(mark => {
389
+ const refName = mark.getAttribute("text:name")
390
+ if (!refName) {
391
+ return
392
+ }
393
+
394
+ // Find the closest heading
395
+ let targetParent = mark.parentElement
396
+ while (targetParent) {
397
+ if (targetParent.tagName === "text:h") {
398
+ const id = randomHeadingId()
399
+ this.referenceableObjects[refName] = {
400
+ type: "heading",
401
+ id,
402
+ node: targetParent
403
+ }
404
+ break
405
+ }
406
+ targetParent = targetParent.parentElement
407
+ }
408
+ })
409
+
410
+ // Handle figure sequences
411
+ const sequences = node.queryAll("text:sequence")
412
+ sequences.forEach(sequence => {
413
+ const refName = sequence.getAttribute("text:ref-name")
414
+ if (!refName) {
415
+ return
416
+ }
417
+
418
+ // Find the figure container
419
+ let targetParent = sequence.parentElement
420
+ while (targetParent) {
421
+ if (targetParent.tagName === "draw:frame") {
422
+ const id = randomFigureId()
423
+ this.referenceableObjects[refName] = {
424
+ type: "figure",
425
+ id,
426
+ node: targetParent
427
+ }
428
+ break
429
+ }
430
+ targetParent = targetParent.parentElement
431
+ }
432
+ })
433
+ }
434
+
435
+ convert() {
436
+ const templateParts = this.template.content.content.slice()
437
+ templateParts.shift()
438
+
439
+ const document = {
440
+ type: "doc",
441
+ attrs: {
442
+ import_id: this.importId
443
+ },
444
+ content: []
445
+ }
446
+
447
+ // Add title (required first element)
448
+ const title = this.extractTitle()
449
+
450
+ if (title.content.length) {
451
+ document.content.push({
452
+ type: "title",
453
+ content: title.content
454
+ })
455
+ } else {
456
+ // If no title found, use default title
457
+ document.content.push({
458
+ type: "title",
459
+ content: [
460
+ {
461
+ type: "text",
462
+ text: gettext("Untitled")
463
+ }
464
+ ]
465
+ })
466
+ }
467
+ title.containerNodes.forEach(node => {
468
+ node.parentElement.removeChild(node)
469
+ })
470
+
471
+ document.attrs.title =
472
+ title.content.map(node => node.textContent).join("") ||
473
+ gettext("Untitled")
474
+
475
+ // Get all content sections from the ODT
476
+ const body = this.contentDoc.query("office:text")
477
+ if (!body) {
478
+ return document
479
+ }
480
+
481
+ // Look for metadata sections first (author, abstract, etc.)
482
+ const metadataContent = this.extractMetadata()
483
+ metadataContent.forEach(({type, attrs, content}) => {
484
+ const templatePart = templateParts.find(
485
+ part => part.attrs.metadata === type
486
+ )
487
+ if (templatePart) {
488
+ document.content.push({
489
+ type: templatePart.type,
490
+ attrs: {
491
+ ...templatePart.attrs,
492
+ ...attrs
493
+ },
494
+ content: content.content
495
+ })
496
+ // Remove paragraphs from content so they are not added to body
497
+ content.containerNodes.forEach(node => {
498
+ node.parentElement.removeChild(node)
499
+ })
500
+ }
501
+ })
502
+
503
+ // Group remaining content by sections based on style names/titles
504
+ const sections = this.groupContentIntoSections(body)
505
+
506
+ // Map ODT sections to template parts
507
+ sections.forEach(section => {
508
+ // Find matching template part
509
+ const templatePart = this.findMatchingTemplatePart(
510
+ section.title,
511
+ templateParts
512
+ )
513
+
514
+ if (templatePart) {
515
+ // If template part found, use its configuration
516
+ document.content.push({
517
+ type: "richtext_part",
518
+ attrs: {
519
+ title: templatePart.attrs.title,
520
+ id: templatePart.attrs.id,
521
+ metadata: templatePart.attrs.metadata || undefined,
522
+ marks: templatePart.attrs.marks || [
523
+ "strong",
524
+ "em",
525
+ "link"
526
+ ]
527
+ },
528
+ content: section.content
529
+ })
530
+ }
531
+ })
532
+
533
+ // Add remaining content to body section
534
+ const unassignedContent = sections
535
+ .filter(
536
+ section =>
537
+ !this.findMatchingTemplatePart(section.title, templateParts)
538
+ )
539
+ .flatMap(section => section.content)
540
+
541
+ if (unassignedContent.length) {
542
+ // Find default body template part
543
+ const bodyTemplatePart = templateParts.find(
544
+ part => !part.attrs.metadata && part.type === "richtext_part"
545
+ )
546
+
547
+ document.content.push({
548
+ type: "richtext_part",
549
+ attrs: {
550
+ title: bodyTemplatePart
551
+ ? bodyTemplatePart.attrs.title
552
+ : "Body",
553
+ id: bodyTemplatePart ? bodyTemplatePart.attrs.id : "body",
554
+ marks: ["strong", "em", "link"]
555
+ },
556
+ content: unassignedContent
557
+ })
558
+ }
559
+
560
+ return document
561
+ }
562
+
563
+ extractMetadata() {
564
+ const metadata = []
565
+
566
+ // Try structured contributor data from meta.xml first
567
+ const contributorsByRole = this.extractContributorsFromMeta()
568
+ if (Object.keys(contributorsByRole).length) {
569
+ Object.entries(contributorsByRole).forEach(
570
+ ([role, contributors]) => {
571
+ metadata.push({
572
+ type: role,
573
+ content: {content: contributors, containerNodes: []}
574
+ })
575
+ }
576
+ )
577
+ } else {
578
+ // Fall back to legacy author extraction
579
+ const authors = this.extractAuthors()
580
+ if (authors.content.length) {
581
+ metadata.push({
582
+ type: "authors",
583
+ content: authors
584
+ })
585
+ }
586
+ }
587
+
588
+ // Extract abstract if present
589
+ const abstract = this.extractAbstract()
590
+ if (abstract.content.length) {
591
+ metadata.push({
592
+ type: "abstract",
593
+ content: abstract
594
+ })
595
+ }
596
+
597
+ // Extract keywords if present
598
+ const keywords = this.extractKeywords()
599
+ if (keywords.content.length) {
600
+ metadata.push({
601
+ type: "keywords",
602
+ content: keywords
603
+ })
604
+ }
605
+
606
+ return metadata
607
+ }
608
+
609
+ extractContributorsFromMeta() {
610
+ if (!this.metaDoc) {
611
+ return {}
612
+ }
613
+
614
+ const userDefined = this.metaDoc.queryAll("meta:user-defined")
615
+ const contributors = []
616
+
617
+ userDefined.forEach(prop => {
618
+ const name = prop.getAttribute("meta:name")
619
+ if (!name || !name.startsWith("fidus_contributor_")) {
620
+ return
621
+ }
622
+ const match = name.match(/^fidus_contributor_(\d+)_(\w+)$/)
623
+ if (!match) {
624
+ return
625
+ }
626
+ const num = parseInt(match[1])
627
+ const field = match[2]
628
+ const value = prop.textContent || ""
629
+
630
+ if (!contributors[num - 1]) {
631
+ contributors[num - 1] = {
632
+ type: "contributor",
633
+ attrs: {
634
+ firstname: "",
635
+ lastname: "",
636
+ email: "",
637
+ institution: "",
638
+ id_type: "",
639
+ id_value: "",
640
+ role: ""
641
+ }
642
+ }
643
+ }
644
+ if (field === "role") {
645
+ contributors[num - 1].attrs.role = value
646
+ } else if (
647
+ [
648
+ "firstname",
649
+ "lastname",
650
+ "email",
651
+ "institution",
652
+ "id_type",
653
+ "id_value"
654
+ ].includes(field)
655
+ ) {
656
+ contributors[num - 1].attrs[field] = value
657
+ }
658
+ })
659
+
660
+ const byRole = {}
661
+ contributors.forEach(contributor => {
662
+ if (!contributor) {
663
+ return
664
+ }
665
+ const role = contributor.attrs.role || "authors"
666
+ if (!byRole[role]) {
667
+ byRole[role] = []
668
+ }
669
+ byRole[role].push(contributor)
670
+ })
671
+
672
+ return byRole
673
+ }
674
+
675
+ extractAuthors() {
676
+ const authors = []
677
+
678
+ // Try to find author information in metadata
679
+ const metaAuthors = this.contentDoc.queryAll("meta:user-defined", {
680
+ "meta:name": "author"
681
+ })
682
+ metaAuthors.forEach(authorMeta => {
683
+ const authorText = authorMeta.textContent
684
+ const [firstname = "", lastname = ""] = authorText.split(" ", 2)
685
+ authors.push({
686
+ type: "contributor",
687
+ attrs: {
688
+ firstname,
689
+ lastname,
690
+ email: "",
691
+ institution: ""
692
+ }
693
+ })
694
+ })
695
+ if (authors.length) {
696
+ return {
697
+ content: authors,
698
+ containerNodes: metaAuthors
699
+ }
700
+ }
701
+
702
+ // Also check for creator in document metadata
703
+ const creator = this.contentDoc.query("meta:creator")
704
+ if (creator) {
705
+ const [firstname = "", lastname = ""] = creator.textContent.split(
706
+ " ",
707
+ 2
708
+ )
709
+ return {
710
+ content: [
711
+ {
712
+ type: "contributor",
713
+ attrs: {
714
+ firstname,
715
+ lastname,
716
+ email: "",
717
+ institution: ""
718
+ }
719
+ }
720
+ ],
721
+ containerNodes: []
722
+ }
723
+ }
724
+
725
+ return {content: [], containerNodes: []}
726
+ }
727
+
728
+ extractAbstract() {
729
+ // Look for section titled "Abstract" or with abstract style
730
+ const abstractSection =
731
+ this.contentDoc.query("text:section", {
732
+ "text:style-name": "Abstract"
733
+ }) ||
734
+ this.contentDoc.query("text:h", {
735
+ "text:outline-level": "1"
736
+ }) // Then check content for "Abstract"
737
+
738
+ if (
739
+ abstractSection &&
740
+ (abstractSection.getAttribute("text:style-name") === "Abstract" ||
741
+ abstractSection.textContent.includes("Abstract"))
742
+ ) {
743
+ return {
744
+ content: this.convertContainer(abstractSection),
745
+ containerNodes: [abstractSection]
746
+ }
747
+ }
748
+
749
+ return {
750
+ content: [],
751
+ containerNodes: []
752
+ }
753
+ }
754
+
755
+ extractKeywords() {
756
+ // Look for keywords section or metadata
757
+ const keywordsSection =
758
+ this.contentDoc.query("text:p", {"text:style-name": "Keywords"}) ||
759
+ this.contentDoc.query("meta:user-defined", {
760
+ "meta:name": "keywords"
761
+ })
762
+
763
+ if (keywordsSection) {
764
+ return {
765
+ content: this.convertContainer(keywordsSection),
766
+ containerNodes: [keywordsSection]
767
+ }
768
+ }
769
+
770
+ return {content: [], containerNodes: []}
771
+ }
772
+
773
+ findMatchingTemplatePart(sectionTitle, templateParts) {
774
+ if (!sectionTitle) {
775
+ return null
776
+ }
777
+
778
+ // Try exact match first
779
+ let matchingPart = templateParts.find(
780
+ part =>
781
+ part.type === "richtext_part" &&
782
+ !part.attrs.metadata &&
783
+ part.attrs.title.toLowerCase() === sectionTitle.toLowerCase()
784
+ )
785
+
786
+ if (!matchingPart) {
787
+ // Try fuzzy matching if exact match fails
788
+ matchingPart = templateParts.find(
789
+ part =>
790
+ part.type === "richtext_part" &&
791
+ !part.attrs.metadata &&
792
+ this.isSimilarTitle(part.attrs.title, sectionTitle)
793
+ )
794
+ }
795
+
796
+ return matchingPart
797
+ }
798
+
799
+ isSimilarTitle(title1, title2) {
800
+ // Remove special characters and extra spaces
801
+ const normalize = str =>
802
+ str
803
+ .toLowerCase()
804
+ .replace(/[^a-z0-9]/g, "")
805
+ .trim()
806
+
807
+ const normalized1 = normalize(title1)
808
+ const normalized2 = normalize(title2)
809
+
810
+ // Check if one string contains the other
811
+ return (
812
+ normalized1.includes(normalized2) ||
813
+ normalized2.includes(normalized1)
814
+ )
815
+ }
816
+
817
+ extractTitle() {
818
+ // First try to find paragraph with Title style
819
+ const titleParagraph = this.contentDoc.query("text:p", {
820
+ "text:style-name": "Title"
821
+ })
822
+ if (titleParagraph) {
823
+ return {
824
+ content: this.convertBlockNode(titleParagraph)?.content || [],
825
+ containerNodes: [titleParagraph]
826
+ }
827
+ }
828
+
829
+ // Fall back to first heading
830
+ const titleHeading = this.contentDoc.query("text:h", {
831
+ "text:outline-level": "1"
832
+ })
833
+ if (titleHeading) {
834
+ return {
835
+ content: this.convertBlockNode(titleHeading)?.content || [],
836
+ containerNodes: [titleHeading]
837
+ }
838
+ }
839
+
840
+ // Check for other common title style names
841
+ const commonTitleStyles = [
842
+ "title",
843
+ "doctitle",
844
+ "document-title",
845
+ "heading-title"
846
+ ]
847
+ for (const styleName of commonTitleStyles) {
848
+ const titleElement = this.contentDoc.query("text:p", {
849
+ "text:style-name": styleName
850
+ })
851
+ if (titleElement) {
852
+ return {
853
+ content: this.convertBlockNode(titleElement)?.content || [],
854
+ containerNodes: [titleElement]
855
+ }
856
+ }
857
+ }
858
+
859
+ // Check style properties for title-like formatting
860
+ const firstParagraph = this.contentDoc.query("text:p")
861
+ if (firstParagraph) {
862
+ const styleName = firstParagraph.getAttribute("text:style-name")
863
+ const style = this.styles[styleName]
864
+
865
+ if (style && this.isTitleStyle(style)) {
866
+ // Remove this node from the document so it's not processed again
867
+ return {
868
+ content:
869
+ this.convertBlockNode(firstParagraph)?.content || [],
870
+ containerNodes: [firstParagraph]
871
+ }
872
+ }
873
+ }
874
+
875
+ return {
876
+ content: [],
877
+ containerNodes: []
878
+ }
879
+ }
880
+
881
+ isTitleStyle(style) {
882
+ // Check if style or its parent has characteristics of a title style
883
+ if (!style) {
884
+ return false
885
+ }
886
+
887
+ // Check style name
888
+ if (style.title?.toLowerCase().includes("title")) {
889
+ return true
890
+ }
891
+
892
+ // Check text properties for title-like formatting
893
+ const textProps = style.textProperties
894
+ if (textProps) {
895
+ // Title usually has larger font size and/or bold weight
896
+ if (textProps.fontSize > 14 || textProps.bold) {
897
+ return true
898
+ }
899
+ }
900
+
901
+ // Check paragraph properties
902
+ const paraProps = style.paragraphProperties
903
+ if (paraProps) {
904
+ // Titles are often centered and have larger margins
905
+ if (
906
+ paraProps.textAlign === "center" ||
907
+ (paraProps.marginTop > 0.5 && paraProps.marginBottom > 0.5)
908
+ ) {
909
+ return true
910
+ }
911
+ }
912
+
913
+ // Check parent style if exists
914
+ if (style.parentStyleName) {
915
+ const parentStyle = this.styles[style.parentStyleName]
916
+ return this.isTitleStyle(parentStyle)
917
+ }
918
+
919
+ return false
920
+ }
921
+
922
+ getSectionTitle(node, styleName) {
923
+ if (!node || !styleName) {
924
+ return null
925
+ }
926
+
927
+ // For headings, use the text content as section title
928
+ if (node.tagName === "text:h") {
929
+ // Get the heading level
930
+ const level = parseInt(node.getAttribute("text:outline-level")) || 1
931
+
932
+ // Only use level 1 and 2 headings as section titles
933
+ if (level <= 2) {
934
+ return node.textContent.trim()
935
+ }
936
+ }
937
+
938
+ // Check if the style indicates a section title
939
+ const style = this.styles[styleName]
940
+ if (style) {
941
+ // Check for explicit section title style
942
+ if (
943
+ style.title ||
944
+ styleName.toLowerCase().includes("section") ||
945
+ styleName.toLowerCase().includes("title")
946
+ ) {
947
+ // If it's a styled paragraph, use its content as title
948
+ if (node.tagName === "text:p") {
949
+ return node.textContent.trim()
950
+ }
951
+ }
952
+
953
+ // Check if it's a custom section style
954
+ const parentStyle = style.parentStyleName
955
+ ? this.styles[style.parentStyleName]
956
+ : null
957
+ if (parentStyle?.isSection) {
958
+ return node.textContent.trim()
959
+ }
960
+ }
961
+
962
+ // For text:section elements, check for section-name attribute
963
+ if (node.tagName === "text:section") {
964
+ const sectionName = node.getAttribute("text:name")
965
+ if (sectionName) {
966
+ return this.formatSectionName(sectionName)
967
+ }
968
+ }
969
+
970
+ return null
971
+ }
972
+
973
+ formatSectionName(name) {
974
+ // Remove common suffixes
975
+ name = name.replace(/_?(section|part|chapter)$/i, "")
976
+
977
+ // Split by underscores or hyphens
978
+ const words = name.split(/[_-]/)
979
+
980
+ // Capitalize first letter of each word and join
981
+ return words
982
+ .map(
983
+ word =>
984
+ word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()
985
+ )
986
+ .join(" ")
987
+ .trim()
988
+ }
989
+
990
+ groupContentIntoSections(body) {
991
+ const sections = []
992
+ let currentSection = {
993
+ title: null,
994
+ content: []
995
+ }
996
+
997
+ body.children.forEach(node => {
998
+ const styleName = node.getAttribute("text:style-name")
999
+ const title = this.getSectionTitle(node, styleName)
1000
+
1001
+ if (title && this.isHeadingStyle(styleName)) {
1002
+ // Start new section
1003
+ if (currentSection.content.length) {
1004
+ sections.push(currentSection)
1005
+ }
1006
+ currentSection = {
1007
+ title: title,
1008
+ content: []
1009
+ }
1010
+ }
1011
+
1012
+ const converted = [this.convertBlockNode(node)]
1013
+ .filter(node => node)
1014
+ .flat()
1015
+ converted.forEach(node => currentSection.content.push(node))
1016
+ })
1017
+
1018
+ // Add final section
1019
+ if (currentSection.content.length) {
1020
+ sections.push(currentSection)
1021
+ }
1022
+
1023
+ return sections
1024
+ }
1025
+
1026
+ isCodeBlockStyle(styleName, style) {
1027
+ if (!styleName) {
1028
+ return false
1029
+ }
1030
+
1031
+ // Check if style name contains preformatted or code indicators
1032
+ const lowerStyleName = styleName.toLowerCase()
1033
+ if (
1034
+ lowerStyleName.includes("preformatted") ||
1035
+ lowerStyleName.includes("code") ||
1036
+ styleName === "Preformatted_20_Text"
1037
+ ) {
1038
+ return true
1039
+ }
1040
+
1041
+ // Check if parent style is a code block style
1042
+ if (style?.parentStyleName) {
1043
+ const parentStyle = this.styles[style.parentStyleName]
1044
+ return this.isCodeBlockStyle(style.parentStyleName, parentStyle)
1045
+ }
1046
+
1047
+ // Check text properties for monospace fonts
1048
+ if (style?.textProperties?.fontFamily) {
1049
+ const fontFamily = style.textProperties.fontFamily.toLowerCase()
1050
+ const monospacePatterns = [
1051
+ "courier",
1052
+ "consolas",
1053
+ "monaco",
1054
+ "menlo",
1055
+ "lucida console",
1056
+ "liberation mono",
1057
+ "dejavu sans mono",
1058
+ "bitstream vera sans mono",
1059
+ "source code pro",
1060
+ "fira code"
1061
+ ]
1062
+ return monospacePatterns.some(pattern =>
1063
+ fontFamily.includes(pattern)
1064
+ )
1065
+ }
1066
+
1067
+ return false
1068
+ }
1069
+
1070
+ isHeadingStyle(styleName) {
1071
+ if (!styleName) {
1072
+ return false
1073
+ }
1074
+
1075
+ const style = this.styles[styleName]
1076
+ if (!style) {
1077
+ return false
1078
+ }
1079
+
1080
+ // Check multiple indicators that this might be a heading style
1081
+ return (
1082
+ // Direct heading indicators
1083
+ style.isHeading ||
1084
+ styleName.toLowerCase().includes("heading") ||
1085
+ styleName.toLowerCase().includes("title") ||
1086
+ // Check outline level property
1087
+ Boolean(style.outlineLevel) ||
1088
+ // Check if it's derived from a heading style
1089
+ (style.parentStyleName &&
1090
+ this.isHeadingStyle(style.parentStyleName)) ||
1091
+ // Check specific formatting that's typical for headings
1092
+ (style.paragraphProperties &&
1093
+ // Larger margins than normal paragraphs
1094
+ (style.paragraphProperties.marginTop > 0.3 ||
1095
+ style.paragraphProperties.marginBottom > 0.3 ||
1096
+ // Different alignment
1097
+ style.paragraphProperties.textAlign === "center")) ||
1098
+ // Check text properties typical for headings
1099
+ (style.textProperties &&
1100
+ // Larger font size
1101
+ (style.textProperties.fontSize > 12 ||
1102
+ // Bold text
1103
+ style.textProperties.bold ||
1104
+ // Different font family
1105
+ style.textProperties.fontFamily))
1106
+ )
1107
+ }
1108
+
1109
+ convertContainer(container) {
1110
+ return container.children
1111
+ .map(node => this.convertBlockNode(node))
1112
+ .filter(node => node)
1113
+ .flat()
1114
+ }
1115
+
1116
+ convertBlockNode(node) {
1117
+ const track = this.currentTracks.map(track => ({
1118
+ type: track.type,
1119
+ user: track.attrs.user,
1120
+ username: track.attrs.username,
1121
+ date: track.attrs.date
1122
+ }))
1123
+
1124
+ const attrs = track.length ? {track} : {}
1125
+
1126
+ switch (node.tagName) {
1127
+ case "text:p":
1128
+ if (
1129
+ node.children.length === 1 &&
1130
+ node.children[0].tagName === "draw:frame"
1131
+ ) {
1132
+ // Paragraph consists of only one figure/image.
1133
+ return this.convertImage(node.children[0], attrs)
1134
+ }
1135
+ return this.convertParagraph(node, attrs)
1136
+ case "text:h":
1137
+ return this.convertHeading(node, attrs)
1138
+ case "text:list":
1139
+ return this.convertList(node, attrs)
1140
+ case "draw:frame":
1141
+ return this.convertImage(node, attrs)
1142
+ case "draw:object":
1143
+ return this.convertObject(node, attrs)
1144
+ case "table:table":
1145
+ return this.convertTable(node, attrs)
1146
+ case "text:sequence-decls":
1147
+ case "office:forms":
1148
+ case "text:tracked-changes":
1149
+ return null
1150
+ case "text:bibliography":
1151
+ // LibreOffice native bibliography — rendered output only,
1152
+ // skip entirely in favour of Fidus Writer's own system.
1153
+ return null
1154
+ case "text:section": {
1155
+ // Skip bibliography sections inserted by citation managers
1156
+ // (Zotero: name contains "ZOTERO_BIBL"/"CSL_BIBLIOGRAPHY",
1157
+ // JabRef: name is "JR_bib" / "JR_BIB").
1158
+ const sectionName = node.getAttribute("text:name") || ""
1159
+ if (isOdtBibliographySection(sectionName)) {
1160
+ return null
1161
+ }
1162
+ // Other named sections are not bibliographies — fall through
1163
+ // to default handling (treat children as block content).
1164
+ return this.convertContainer(node)
1165
+ }
1166
+ default:
1167
+ console.warn(`Unsupported block node: ${node.tagName}`)
1168
+ return null
1169
+ }
1170
+ }
1171
+
1172
+ convertParagraph(node, attrs = {}) {
1173
+ const styleName = node.getAttribute("text:style-name")
1174
+ const style = this.styles[styleName]
1175
+
1176
+ // Check if this is a code block (preformatted text)
1177
+ if (this.isCodeBlockStyle(styleName, style)) {
1178
+ attrs = Object.assign(
1179
+ {
1180
+ track: [],
1181
+ language: "",
1182
+ category: "",
1183
+ title: "",
1184
+ id: ""
1185
+ },
1186
+ attrs
1187
+ )
1188
+ return {
1189
+ type: "code_block",
1190
+ attrs,
1191
+ content: this.convertNodeChildren(node)
1192
+ }
1193
+ }
1194
+
1195
+ // Check if this paragraph is title-like
1196
+ if (this.isTitleStyle(style)) {
1197
+ attrs = Object.assign(
1198
+ {
1199
+ id: randomHeadingId()
1200
+ },
1201
+ attrs
1202
+ )
1203
+ return {
1204
+ type: "heading1",
1205
+ attrs,
1206
+ content: this.convertNodeChildren(node)
1207
+ }
1208
+ }
1209
+
1210
+ if (this.isHeadingStyle(styleName)) {
1211
+ return this.convertHeading(node, attrs)
1212
+ }
1213
+
1214
+ return {
1215
+ type: "paragraph",
1216
+ attrs,
1217
+ content: this.convertNodeChildren(node)
1218
+ }
1219
+ }
1220
+
1221
+ convertHeading(node, attrs = {}) {
1222
+ const level =
1223
+ parseInt(node.getAttribute("text:outline-level") || 1) || 1
1224
+
1225
+ // Check for bookmark
1226
+ let id = null
1227
+ const bookmarkStart = node.query("text:bookmark-start")
1228
+ if (bookmarkStart) {
1229
+ const refName = bookmarkStart.getAttribute("text:name")
1230
+ if (refName && this.referenceableObjects[refName]) {
1231
+ id = this.referenceableObjects[refName].id
1232
+ }
1233
+ }
1234
+ attrs = Object.assign(
1235
+ {
1236
+ id: id || randomHeadingId()
1237
+ },
1238
+ attrs
1239
+ )
1240
+ return {
1241
+ type: `heading${level}`,
1242
+ attrs,
1243
+ content: this.convertNodeChildren(node)
1244
+ }
1245
+ }
1246
+
1247
+ convertNodeChildren(node, currentStyleMarks = []) {
1248
+ let insideCitationReferenceMark = false
1249
+ let insideBibliographyReferenceMark = false
1250
+
1251
+ return node.children
1252
+ .map(child => {
1253
+ if (insideBibliographyReferenceMark) {
1254
+ // Swallow all rendered bibliography content until the
1255
+ // closing mark — we have our own bibliography system.
1256
+ if (child.tagName === "text:reference-mark-end") {
1257
+ const name = child.getAttribute("text:name")
1258
+ if (name && isOdtBibliographyReferenceMark(name)) {
1259
+ insideBibliographyReferenceMark = false
1260
+ }
1261
+ }
1262
+ return null
1263
+ }
1264
+
1265
+ if (insideCitationReferenceMark) {
1266
+ if (child.tagName === "text:reference-mark-end") {
1267
+ // Process citation when we hit the end mark
1268
+ const name = child.getAttribute("text:name")
1269
+ if (name && isOdtCitationMark(name)) {
1270
+ insideCitationReferenceMark = false
1271
+ return this.convertCitation(name, currentStyleMarks)
1272
+ }
1273
+ }
1274
+ return null
1275
+ }
1276
+
1277
+ switch (child.tagName) {
1278
+ case "text:change-start": {
1279
+ const changeId = child.getAttribute("text:change-id")
1280
+ const track = this.tracks[changeId]
1281
+ if (track) {
1282
+ const trackMark = {
1283
+ type: track.type,
1284
+ attrs: {
1285
+ user: track.user,
1286
+ username: track.username,
1287
+ date: track.date
1288
+ }
1289
+ }
1290
+ if (track.type === "insertion") {
1291
+ trackMark.attrs.approved = track.approved
1292
+ }
1293
+ this.currentTracks.push(trackMark)
1294
+ }
1295
+ return null
1296
+ }
1297
+ case "text:change-end": {
1298
+ const changeId = child.getAttribute("text:change-id")
1299
+ const track = this.tracks[changeId]
1300
+ if (track) {
1301
+ this.currentTracks = this.currentTracks.filter(
1302
+ mark => mark.type !== track.type
1303
+ )
1304
+ }
1305
+ return null
1306
+ }
1307
+ case "#text":
1308
+ return this.convertText(
1309
+ String(child.textContent),
1310
+ currentStyleMarks
1311
+ )
1312
+ case "text:s": // space
1313
+ return this.convertText(" ", currentStyleMarks)
1314
+ case "text:span": {
1315
+ return this.convertSpan(child, currentStyleMarks)
1316
+ }
1317
+ case "text:a":
1318
+ return this.convertLink(child, currentStyleMarks)
1319
+ case "text:note":
1320
+ return this.convertFootnote(child, currentStyleMarks)
1321
+ case "office:annotation":
1322
+ return this.convertAnnotationStart(child)
1323
+ case "office:annotation-end":
1324
+ return this.convertAnnotationEnd(child)
1325
+ case "text:reference-mark-start": {
1326
+ const name = child.getAttribute("text:name")
1327
+ if (name && isOdtCitationMark(name)) {
1328
+ insideCitationReferenceMark = true
1329
+ } else if (
1330
+ name &&
1331
+ isOdtBibliographyReferenceMark(name)
1332
+ ) {
1333
+ insideBibliographyReferenceMark = true
1334
+ }
1335
+ return null
1336
+ }
1337
+ case "text:bibliography-mark":
1338
+ return this.convertBibliographyMark(
1339
+ child,
1340
+ currentStyleMarks
1341
+ )
1342
+ case "text:bookmark-ref":
1343
+ return this.convertHeadingReference(child)
1344
+ case "text:sequence-ref":
1345
+ return this.convertFigureReference(child)
1346
+ case "text:soft-page-break":
1347
+ return null
1348
+ default:
1349
+ console.warn(
1350
+ `Unsupported inline node: ${child.tagName}`
1351
+ )
1352
+ }
1353
+ })
1354
+ .filter(node => node)
1355
+ .flat()
1356
+ }
1357
+
1358
+ getCurrentMarks(currentStyleMarks = []) {
1359
+ const commentMarks = []
1360
+ // Add comment marks for any active comment IDs
1361
+ this.currentCommentIds.forEach(commentId => {
1362
+ commentMarks.push({
1363
+ type: "comment",
1364
+ attrs: {
1365
+ id: commentId
1366
+ }
1367
+ })
1368
+ })
1369
+ return [...currentStyleMarks, ...this.currentTracks, ...commentMarks]
1370
+ }
1371
+
1372
+ convertText(text, currentStyleMarks) {
1373
+ const textNode = {
1374
+ type: "text",
1375
+ text
1376
+ }
1377
+ const marks = this.getCurrentMarks(currentStyleMarks)
1378
+ if (marks.length) {
1379
+ textNode.marks = marks
1380
+ }
1381
+ return textNode
1382
+ }
1383
+
1384
+ convertSpan(node, currentStyleMarks) {
1385
+ const styleName = node.getAttribute("text:style-name")
1386
+ const style = this.styles[styleName]
1387
+ if (style?.textProperties?.bold) {
1388
+ currentStyleMarks = [...currentStyleMarks, {type: "strong"}]
1389
+ }
1390
+ if (style?.textProperties?.italic) {
1391
+ currentStyleMarks = [...currentStyleMarks, {type: "em"}]
1392
+ }
1393
+ // Handle superscript and subscript
1394
+ if (style?.textProperties?.textPosition) {
1395
+ const position = style.textProperties.textPosition
1396
+ if (position.includes("super")) {
1397
+ currentStyleMarks = [...currentStyleMarks, {type: "sup"}]
1398
+ } else if (position.includes("sub")) {
1399
+ currentStyleMarks = [...currentStyleMarks, {type: "sub"}]
1400
+ }
1401
+ }
1402
+ // Handle inline code (monospace fonts)
1403
+ if (style?.textProperties?.fontFamily) {
1404
+ const fontFamily = style.textProperties.fontFamily.toLowerCase()
1405
+ const monospacePatterns = [
1406
+ "courier",
1407
+ "consolas",
1408
+ "monaco",
1409
+ "menlo",
1410
+ "lucida console",
1411
+ "liberation mono",
1412
+ "dejavu sans mono",
1413
+ "bitstream vera sans mono",
1414
+ "source code pro",
1415
+ "fira code",
1416
+ "ubuntu mono",
1417
+ "droid sans mono",
1418
+ "monospace"
1419
+ ]
1420
+ const isMonospace = monospacePatterns.some(pattern =>
1421
+ fontFamily.includes(pattern)
1422
+ )
1423
+ if (isMonospace) {
1424
+ currentStyleMarks = [...currentStyleMarks, {type: "code"}]
1425
+ }
1426
+ }
1427
+ return this.convertNodeChildren(node, currentStyleMarks)
1428
+ }
1429
+
1430
+ convertFootnote(node, currentStyleMarks) {
1431
+ const noteBody = node.query("text:note-body")
1432
+ if (!noteBody) {
1433
+ return null
1434
+ }
1435
+
1436
+ // Get the first paragraph in the footnote
1437
+ const firstParagraph = noteBody.query("text:p")
1438
+ if (!firstParagraph) {
1439
+ return null
1440
+ }
1441
+
1442
+ // Check if this is a citation-only footnote
1443
+ const referenceMarkStart = firstParagraph.query(
1444
+ "text:reference-mark-start"
1445
+ )
1446
+ const referenceMarkEnd = firstParagraph.query("text:reference-mark-end")
1447
+
1448
+ const markName = referenceMarkStart?.getAttribute("text:name")
1449
+ if (
1450
+ referenceMarkStart &&
1451
+ referenceMarkEnd &&
1452
+ markName &&
1453
+ isOdtCitationMark(markName) &&
1454
+ // Check that there's no content outside the reference marks
1455
+ firstParagraph.children.every(
1456
+ child =>
1457
+ child.tagName === "text:reference-mark-start" ||
1458
+ child.tagName === "text:reference-mark-end" ||
1459
+ (child.tagName === "text:span" &&
1460
+ child.previousSibling?.tagName ===
1461
+ "text:reference-mark-start" &&
1462
+ child.nextSibling?.tagName ===
1463
+ "text:reference-mark-end")
1464
+ )
1465
+ ) {
1466
+ // If it's a citation-only footnote, convert it directly to a citation
1467
+ return this.convertCitation(markName, currentStyleMarks)
1468
+ }
1469
+
1470
+ // Otherwise, convert as regular footnote
1471
+ return {
1472
+ type: "footnote",
1473
+ attrs: {
1474
+ footnote: this.convertContainer(noteBody)
1475
+ },
1476
+ marks: this.getCurrentMarks(currentStyleMarks)
1477
+ }
1478
+ }
1479
+
1480
+ convertCitation(markName, currentStyleMarks) {
1481
+ const citationNode = parseOdtReferenceMark(
1482
+ markName,
1483
+ this.bibliography,
1484
+ this.bibDB
1485
+ )
1486
+ if (citationNode) {
1487
+ citationNode.marks = this.getCurrentMarks(currentStyleMarks)
1488
+ return citationNode
1489
+ }
1490
+ return null
1491
+ }
1492
+
1493
+ convertBibliographyMark(bibMarkNode, currentStyleMarks) {
1494
+ const citationNode = parseOdtBibliographyMark(
1495
+ bibMarkNode,
1496
+ this.bibliography
1497
+ )
1498
+ if (citationNode) {
1499
+ citationNode.marks = this.getCurrentMarks(currentStyleMarks)
1500
+ return citationNode
1501
+ }
1502
+ return null
1503
+ }
1504
+
1505
+ convertList(node, attrs) {
1506
+ const listStyle = node.getAttribute("text:style-name")
1507
+ const isOrdered = this.isOrderedList(listStyle)
1508
+
1509
+ attrs = Object.assign(
1510
+ {
1511
+ id: randomListId()
1512
+ },
1513
+ attrs
1514
+ )
1515
+
1516
+ if (isOrdered) {
1517
+ attrs.order = 1
1518
+ }
1519
+
1520
+ return {
1521
+ type: isOrdered ? "ordered_list" : "bullet_list",
1522
+ attrs,
1523
+ content: node.queryAll("text:list-item").map(item => ({
1524
+ type: "list_item",
1525
+ content: this.convertContainer(item)
1526
+ }))
1527
+ }
1528
+ }
1529
+
1530
+ convertAnnotationStart(node) {
1531
+ const commentId = (node.getAttribute("office:name") || "")
1532
+ .replace(/\D/g, "")
1533
+ .slice(0, 9)
1534
+ if (commentId && this.comments[commentId]) {
1535
+ this.currentCommentIds.push(commentId)
1536
+ }
1537
+ return null
1538
+ }
1539
+
1540
+ convertAnnotationEnd(node) {
1541
+ const commentId = (node.getAttribute("office:name") || "")
1542
+ .replace(/\D/g, "")
1543
+ .slice(0, 9)
1544
+ if (commentId) {
1545
+ const index = this.currentCommentIds.indexOf(commentId)
1546
+ if (index !== -1) {
1547
+ this.currentCommentIds.splice(index, 1)
1548
+ }
1549
+ }
1550
+ return null
1551
+ }
1552
+
1553
+ convertHeadingReference(node) {
1554
+ const refName = node.getAttribute("text:ref-name")
1555
+ if (!refName || !this.referenceableObjects[refName]) {
1556
+ return null
1557
+ }
1558
+
1559
+ const targetObject = this.referenceableObjects[refName]
1560
+ if (targetObject.type !== "heading") {
1561
+ return null
1562
+ }
1563
+
1564
+ return {
1565
+ type: "cross_reference",
1566
+ attrs: {
1567
+ id: targetObject.id,
1568
+ title: targetObject.node.textContent
1569
+ }
1570
+ }
1571
+ }
1572
+
1573
+ convertFigureReference(node) {
1574
+ const refName = node.getAttribute("text:ref-name")
1575
+ if (!refName || !this.referenceableObjects[refName]) {
1576
+ return null
1577
+ }
1578
+
1579
+ const targetObject = this.referenceableObjects[refName]
1580
+ if (targetObject.type !== "figure") {
1581
+ return null
1582
+ }
1583
+
1584
+ // Find the caption text within the figure
1585
+ const caption = targetObject.node.query("text:p")?.textContent || ""
1586
+
1587
+ return {
1588
+ type: "cross_reference",
1589
+ attrs: {
1590
+ id: targetObject.id,
1591
+ title: caption
1592
+ }
1593
+ }
1594
+ }
1595
+
1596
+ isOrderedList(styleName) {
1597
+ if (!this.stylesDoc) {
1598
+ return false
1599
+ }
1600
+ const listStyle = this.stylesDoc.query("text:list-style", {
1601
+ "style:name": styleName
1602
+ })
1603
+ return listStyle?.query("text:list-level-style-number") !== null
1604
+ }
1605
+
1606
+ convertImage(node, attrs = {}) {
1607
+ const imageElement = node.query("draw:image")
1608
+ if (!imageElement) {
1609
+ return null
1610
+ }
1611
+
1612
+ const frame = node.closest("draw:frame")
1613
+ if (!frame) {
1614
+ return null
1615
+ }
1616
+
1617
+ const href = imageElement.getAttribute("xlink:href")
1618
+ if (!href || !href.startsWith("Pictures/")) {
1619
+ return null
1620
+ }
1621
+
1622
+ const imageId = Math.floor(Math.random() * 1000000)
1623
+ const width = this.convertLength(node.getAttribute("svg:width"))
1624
+ const height = this.convertLength(node.getAttribute("svg:height"))
1625
+
1626
+ const title = href.split("/").pop()
1627
+ this.images[imageId] = {
1628
+ id: imageId,
1629
+ title,
1630
+ copyright: {
1631
+ holder: false,
1632
+ year: false,
1633
+ freeToRead: true,
1634
+ licenses: []
1635
+ },
1636
+ image: href,
1637
+ file_type: this.getImageFileType(title),
1638
+ file: null,
1639
+ width,
1640
+ height,
1641
+ checksum: 0
1642
+ }
1643
+
1644
+ // Find sequence element for figure reference
1645
+ const sequence = frame.query("text:sequence")
1646
+ let figureId = null
1647
+ if (sequence) {
1648
+ const refName = sequence.getAttribute("text:ref-name")
1649
+ if (refName && this.referenceableObjects[refName]) {
1650
+ figureId = this.referenceableObjects[refName].id
1651
+ }
1652
+ }
1653
+
1654
+ const caption = node.query("text:p")
1655
+ const captionContent = caption ? this.convertNodeChildren(caption) : []
1656
+
1657
+ attrs = Object.assign(
1658
+ {
1659
+ id: figureId || randomFigureId(),
1660
+ aligned: "center",
1661
+ width: Math.min(Math.round((width / 8.5) * 100), 100),
1662
+ caption: Boolean(captionContent.length)
1663
+ },
1664
+ attrs
1665
+ )
1666
+
1667
+ const figureCaption = {type: "figure_caption"}
1668
+ if (captionContent.length) {
1669
+ figureCaption.content = captionContent
1670
+ }
1671
+
1672
+ return {
1673
+ type: "figure",
1674
+ attrs,
1675
+ content: [
1676
+ {
1677
+ type: "image",
1678
+ attrs: {
1679
+ image: imageId
1680
+ }
1681
+ },
1682
+ figureCaption
1683
+ ]
1684
+ }
1685
+ }
1686
+
1687
+ getImageFileType(filename) {
1688
+ const ext = filename.split(".").pop().toLowerCase()
1689
+ switch (ext) {
1690
+ case "avif":
1691
+ case "avifs":
1692
+ return "image/avif"
1693
+ case "png":
1694
+ return "image/png"
1695
+ case "jpg":
1696
+ case "jpeg":
1697
+ return "image/jpeg"
1698
+ case "gif":
1699
+ return "image/gif"
1700
+ case "svg":
1701
+ return "image/svg+xml"
1702
+ case "webp":
1703
+ return "image/webp"
1704
+ default:
1705
+ return "image/png" // Default fallback
1706
+ }
1707
+ }
1708
+
1709
+ convertLength(length) {
1710
+ if (!length) {
1711
+ return 0
1712
+ }
1713
+
1714
+ // Match number and unit
1715
+ const match = length.match(/^(-?\d*\.?\d+)(pt|cm|mm|in|pc|px|%)?$/)
1716
+ if (!match) {
1717
+ return 0
1718
+ }
1719
+
1720
+ const [_, value, unit = "pt"] = match
1721
+ const numValue = parseFloat(value)
1722
+
1723
+ // Convert to inches first (as base unit)
1724
+ switch (unit) {
1725
+ case "pt": // points
1726
+ return numValue / 72
1727
+ case "pc": // picas (1 pica = 12 points)
1728
+ return (numValue * 12) / 72
1729
+ case "cm": // centimeters
1730
+ return numValue / 2.54
1731
+ case "mm": // millimeters
1732
+ return numValue / 25.4
1733
+ case "in": // inches
1734
+ return numValue
1735
+ case "px": // pixels (assuming 96 DPI)
1736
+ return numValue / 96
1737
+ case "%": // percentage (return as is)
1738
+ return numValue
1739
+ default:
1740
+ return 0
1741
+ }
1742
+ }
1743
+
1744
+ convertTable(node, attrs) {
1745
+ const width =
1746
+ node.getAttribute("style:rel-width")?.replace("%", "") || "100"
1747
+ const styleName = node.getAttribute("table:style-name")
1748
+ const style = this.styles[styleName]
1749
+ const aligned = style?.tableProperties.align || "center"
1750
+
1751
+ attrs = Object.assign(
1752
+ {
1753
+ id: randomTableId(),
1754
+ track: parseTracks(node.getAttribute("text:change-id")),
1755
+ width,
1756
+ aligned,
1757
+ layout: "fixed",
1758
+ category: "none",
1759
+ caption: false
1760
+ },
1761
+ attrs
1762
+ )
1763
+ return {
1764
+ type: "table",
1765
+ attrs,
1766
+ content: [
1767
+ {type: "table_caption"},
1768
+ {
1769
+ type: "table_body",
1770
+ content: node
1771
+ .queryAll("table:table-row")
1772
+ .map(row => this.convertTableRow(row))
1773
+ }
1774
+ ]
1775
+ }
1776
+ }
1777
+
1778
+ convertTableRow(row) {
1779
+ return {
1780
+ type: "table_row",
1781
+ content: row
1782
+ .queryAll(["table:table-cell", "table:covered-table-cell"])
1783
+ .map(cell => this.convertTableCell(cell))
1784
+ }
1785
+ }
1786
+
1787
+ convertTableCell(node) {
1788
+ if (node.tagName === "table:covered-table-cell") {
1789
+ return null
1790
+ }
1791
+ return {
1792
+ type: "table_cell",
1793
+ attrs: {
1794
+ colspan:
1795
+ parseInt(
1796
+ node.getAttribute("table:number-columns-spanned")
1797
+ ) || 1,
1798
+ rowspan:
1799
+ parseInt(node.getAttribute("table:number-rows-spanned")) ||
1800
+ 1,
1801
+ track: parseTracks(node.getAttribute("text:change-id"))
1802
+ },
1803
+ content: this.convertContainer(node)
1804
+ }
1805
+ }
1806
+
1807
+ convertLink(node, currentStyleMarks) {
1808
+ const href = node.getAttribute("xlink:href")
1809
+ currentStyleMarks = currentStyleMarks.concat([
1810
+ {type: "link", attrs: {href}}
1811
+ ])
1812
+ return this.convertNodeChildren(node, currentStyleMarks)
1813
+ }
1814
+
1815
+ detectLanguage() {
1816
+ // Try to detect document language in following order:
1817
+ // 1. From document content
1818
+ // 2. From document styles
1819
+ // 3. Default to "en-US"
1820
+
1821
+ // Check content language
1822
+ if (this.contentDoc) {
1823
+ const langAttr =
1824
+ this.contentDoc.getAttribute("office:default-language") ||
1825
+ this.contentDoc.getAttribute("dc:language")
1826
+ if (langAttr) {
1827
+ return langAttr
1828
+ }
1829
+
1830
+ const firstParagraph = this.contentDoc.query("text:p")
1831
+ if (firstParagraph) {
1832
+ const paraLang = firstParagraph.getAttribute("xml:lang")
1833
+ if (paraLang) {
1834
+ return paraLang
1835
+ }
1836
+ }
1837
+ }
1838
+
1839
+ // Check styles language
1840
+ if (this.stylesDoc) {
1841
+ const defaultStyle = this.stylesDoc.query("style:default-style")
1842
+ if (defaultStyle) {
1843
+ const styleLang =
1844
+ defaultStyle.getAttribute("fo:language") ||
1845
+ defaultStyle.getAttribute("style:language-complex")
1846
+ if (styleLang) {
1847
+ return styleLang
1848
+ }
1849
+ }
1850
+ }
1851
+
1852
+ // Default to "en-US"
1853
+ return "en-US"
1854
+ }
1855
+ }