@fiduswriter/document 0.1.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/LICENSE +661 -0
  2. package/README.md +16 -0
  3. package/jest.config.js +23 -0
  4. package/package.json +59 -0
  5. package/schema.json +1 -0
  6. package/scripts/export-schema.js +16 -0
  7. package/src/bibliography/common.js +92 -0
  8. package/src/bibliography/csl_bib.js +139 -0
  9. package/src/citations/citeproc_sys.js +42 -0
  10. package/src/citations/format.js +194 -0
  11. package/src/common/blob.js +10 -0
  12. package/src/common/file.js +25 -0
  13. package/src/common/index.js +12 -0
  14. package/src/common/network.js +79 -0
  15. package/src/common/text.js +44 -0
  16. package/src/editor/e2ee/encryptor.js +228 -0
  17. package/src/exporter/docx/citations.js +177 -0
  18. package/src/exporter/docx/comments.js +165 -0
  19. package/src/exporter/docx/footnotes.js +240 -0
  20. package/src/exporter/docx/images.js +101 -0
  21. package/src/exporter/docx/index.js +185 -0
  22. package/src/exporter/docx/lists.js +260 -0
  23. package/src/exporter/docx/math.js +46 -0
  24. package/src/exporter/docx/metadata.js +289 -0
  25. package/src/exporter/docx/rels.js +193 -0
  26. package/src/exporter/docx/render.js +941 -0
  27. package/src/exporter/docx/richtext.js +1182 -0
  28. package/src/exporter/docx/tables.js +112 -0
  29. package/src/exporter/docx/tools.js +50 -0
  30. package/src/exporter/epub/index.js +142 -0
  31. package/src/exporter/epub/templates.js +140 -0
  32. package/src/exporter/epub/tools.js +96 -0
  33. package/src/exporter/html/citations.js +121 -0
  34. package/src/exporter/html/convert.js +813 -0
  35. package/src/exporter/html/index.js +192 -0
  36. package/src/exporter/html/templates.js +34 -0
  37. package/src/exporter/html/tools.js +50 -0
  38. package/src/exporter/jats/bibliography.js +183 -0
  39. package/src/exporter/jats/citations.js +109 -0
  40. package/src/exporter/jats/convert.js +871 -0
  41. package/src/exporter/jats/index.js +92 -0
  42. package/src/exporter/jats/templates.js +35 -0
  43. package/src/exporter/jats/text.js +72 -0
  44. package/src/exporter/latex/convert.js +934 -0
  45. package/src/exporter/latex/escape_latex.js +21 -0
  46. package/src/exporter/latex/index.js +74 -0
  47. package/src/exporter/latex/readme.js +22 -0
  48. package/src/exporter/native/shrink.js +132 -0
  49. package/src/exporter/odt/citations.js +101 -0
  50. package/src/exporter/odt/footnotes.js +147 -0
  51. package/src/exporter/odt/images.js +115 -0
  52. package/src/exporter/odt/index.js +156 -0
  53. package/src/exporter/odt/math.js +57 -0
  54. package/src/exporter/odt/metadata.js +251 -0
  55. package/src/exporter/odt/render.js +806 -0
  56. package/src/exporter/odt/richtext.js +865 -0
  57. package/src/exporter/odt/styles.js +387 -0
  58. package/src/exporter/odt/track.js +68 -0
  59. package/src/exporter/pandoc/citations.js +98 -0
  60. package/src/exporter/pandoc/convert.js +1017 -0
  61. package/src/exporter/pandoc/index.js +92 -0
  62. package/src/exporter/pandoc/readme.js +8 -0
  63. package/src/exporter/pandoc/tools.js +51 -0
  64. package/src/exporter/print/index.js +177 -0
  65. package/src/exporter/tools/doc_content.js +144 -0
  66. package/src/exporter/tools/file.js +9 -0
  67. package/src/exporter/tools/json.js +73 -0
  68. package/src/exporter/tools/svg.js +29 -0
  69. package/src/exporter/tools/xml.js +531 -0
  70. package/src/exporter/tools/xml_zip.js +95 -0
  71. package/src/exporter/tools/zip.js +90 -0
  72. package/src/exporter/tools/zotero_csl.js +93 -0
  73. package/src/importer/citations.js +129 -0
  74. package/src/importer/docx/citations.js +123 -0
  75. package/src/importer/docx/convert.js +1427 -0
  76. package/src/importer/docx/helpers.js +9 -0
  77. package/src/importer/docx/omml2mathml.js +1448 -0
  78. package/src/importer/docx/parse.js +735 -0
  79. package/src/importer/native/get_images.js +76 -0
  80. package/src/importer/native/update.js +29 -0
  81. package/src/importer/odt/citations.js +87 -0
  82. package/src/importer/odt/convert.js +1855 -0
  83. package/src/importer/pandoc/convert.js +884 -0
  84. package/src/importer/pandoc/helpers.js +84 -0
  85. package/src/importer/zip_analyzer.js +102 -0
  86. package/src/index.js +1 -0
  87. package/src/mathlive/opf_includes.js +24 -0
  88. package/src/schema/common/annotate.js +76 -0
  89. package/src/schema/common/base.js +118 -0
  90. package/src/schema/common/citation.js +62 -0
  91. package/src/schema/common/equation.js +31 -0
  92. package/src/schema/common/figure.js +190 -0
  93. package/src/schema/common/heading.js +43 -0
  94. package/src/schema/common/index.js +40 -0
  95. package/src/schema/common/list.js +95 -0
  96. package/src/schema/common/reference.js +100 -0
  97. package/src/schema/common/table.js +103 -0
  98. package/src/schema/common/track.js +190 -0
  99. package/src/schema/const.js +58 -0
  100. package/src/schema/convert.js +1272 -0
  101. package/src/schema/document/content.js +187 -0
  102. package/src/schema/document/index.js +117 -0
  103. package/src/schema/document/structure.js +452 -0
  104. package/src/schema/export.js +21 -0
  105. package/src/schema/footnotes.js +126 -0
  106. package/src/schema/footnotes_convert.js +31 -0
  107. package/src/schema/i18n.js +595 -0
  108. package/src/schema/index.js +5 -0
  109. package/src/schema/mini_json.js +61 -0
  110. package/src/schema/text.js +22 -0
@@ -0,0 +1,884 @@
1
+ import {parseCSL} from "biblatex-csl-converter"
2
+
3
+ import {applyAnnotation, applyMarkToNodes, mergeTextNodes} from "./helpers.js"
4
+
5
+ export class PandocConvert {
6
+ constructor(doc, importId, template, bibliography) {
7
+ this.doc = doc
8
+ this.importId = importId
9
+ this.template = template
10
+ this.bibliography = bibliography
11
+
12
+ this.images = []
13
+
14
+ this.language = this.doc.meta?.lang?.c?.[0]?.c || "en-US"
15
+
16
+ this.SMALL_IMAGE_THRESHOLD = 1.0 // Smaller images will be discarded (in inches)
17
+ }
18
+
19
+ init() {
20
+ try {
21
+ this.validatePandocFormat()
22
+ } catch (error) {
23
+ console.error("Pandoc format validation failed:", error)
24
+ throw new Error("Invalid Pandoc document format: " + error.message)
25
+ }
26
+
27
+ return {
28
+ content: this.convert(),
29
+ settings: {
30
+ import_id: this.importId,
31
+ tracked: false,
32
+ language: this.language
33
+ }
34
+ }
35
+ }
36
+
37
+ validatePandocFormat() {
38
+ // Check API version (Pandoc uses [major, minor, patch] or [major, minor, patch, tweak])
39
+ if (
40
+ !Array.isArray(this.doc["pandoc-api-version"]) ||
41
+ this.doc["pandoc-api-version"].length < 3 ||
42
+ !this.doc["pandoc-api-version"].every(
43
+ num => typeof num === "number"
44
+ )
45
+ ) {
46
+ throw new Error("Invalid or missing Pandoc API version")
47
+ }
48
+
49
+ // Check for required top-level properties
50
+ if (!this.doc.blocks || !Array.isArray(this.doc.blocks)) {
51
+ throw new Error("Missing or invalid blocks property")
52
+ }
53
+
54
+ // Check meta property structure if it exists
55
+ if (this.doc.meta && typeof this.doc.meta !== "object") {
56
+ throw new Error("Invalid meta property")
57
+ }
58
+
59
+ // Basic validation of block structure
60
+ if (
61
+ !this.doc.blocks.every(
62
+ block =>
63
+ block &&
64
+ typeof block === "object" &&
65
+ typeof block.t === "string" &&
66
+ ("c" in block || block.t === "Null")
67
+ )
68
+ ) {
69
+ throw new Error("Invalid block structure")
70
+ }
71
+
72
+ return true
73
+ }
74
+
75
+ convert() {
76
+ const templateParts = this.template.content.content.slice()
77
+ templateParts.shift()
78
+ // Create the outer document structure
79
+ const document = {
80
+ type: "doc",
81
+ attrs: {
82
+ import_id: this.importId
83
+ },
84
+ content: []
85
+ }
86
+
87
+ // Add title (required first element)
88
+ document.content.push({
89
+ type: "title",
90
+ content: this.convertInlines(
91
+ this.doc.meta?.title?.c || [{t: "Str", c: "Untitled"}]
92
+ )
93
+ })
94
+ // Add subtitle if present
95
+ if (this.doc.meta?.subtitle?.c) {
96
+ const templatePart = templateParts.find(
97
+ part => part.attrs.metadata === "subtitle"
98
+ )
99
+ document.content.push({
100
+ type: "heading_part",
101
+ attrs: {
102
+ title: templatePart ? templatePart.attrs.title : "Subtitle",
103
+ id: templatePart ? templatePart.attrs.id : "subtitle",
104
+ metadata: "subtitle"
105
+ },
106
+ content: [
107
+ {
108
+ type: "heading1",
109
+ attrs: {
110
+ id: "H" + Math.random().toString(36).substr(2, 7)
111
+ },
112
+ content: this.convertInlines(this.doc.meta.subtitle.c)
113
+ }
114
+ ]
115
+ })
116
+ }
117
+
118
+ // Add authors if present
119
+ if (this.doc.meta?.author?.c) {
120
+ const templatePart = templateParts.find(
121
+ part => part.attrs.metadata === "authors"
122
+ )
123
+ document.content.push({
124
+ type: "contributors_part",
125
+ attrs: {
126
+ title: templatePart ? templatePart.attrs.title : "Authors",
127
+ id: templatePart ? templatePart.attrs.id : "authors",
128
+ metadata: "authors"
129
+ },
130
+ content: this.doc.meta.author.c.map(author => ({
131
+ type: "contributor",
132
+ attrs: this.convertContributor(author)
133
+ }))
134
+ })
135
+ }
136
+
137
+ // Add abstract if present
138
+ if (this.doc.meta?.abstract?.c) {
139
+ const templatePart = templateParts.find(
140
+ part => part.attrs.metadata === "abstract"
141
+ )
142
+ document.content.push({
143
+ type: "richtext_part",
144
+ attrs: {
145
+ title: templatePart
146
+ ? templatePart.attrs.title
147
+ : gettext("Abstract"),
148
+ id: templatePart ? templatePart.attrs.id : "abstract",
149
+ metadata: "abstract"
150
+ },
151
+ content: this.convertBlocks(this.doc.meta.abstract.c)
152
+ })
153
+ }
154
+
155
+ const templatePart = templateParts.find(
156
+ part => !part.attrs.metadata && part.type === "richtext_part"
157
+ )
158
+ // Add main body content
159
+ document.content.push({
160
+ type: "richtext_part",
161
+ attrs: {
162
+ title: templatePart ? templatePart.attrs.title : "Body",
163
+ id: templatePart ? templatePart.attrs.id : "body",
164
+ marks: ["strong", "em", "link"]
165
+ },
166
+ content: this.convertBlocks(this.doc.blocks)
167
+ })
168
+
169
+ return document
170
+ }
171
+
172
+ convertContributor(author) {
173
+ const attrs = {
174
+ firstname: "",
175
+ lastname: "",
176
+ email: "",
177
+ institution: ""
178
+ }
179
+
180
+ // Extract name components
181
+ if (author.c) {
182
+ const textParts = author.c
183
+ .filter(part => part.t === "Str")
184
+ .map(part => part.c)
185
+
186
+ if (textParts.length > 1) {
187
+ attrs.lastname = textParts.pop()
188
+ attrs.firstname = textParts.join(" ")
189
+ } else if (textParts.length === 1) {
190
+ attrs.lastname = textParts[0]
191
+ }
192
+
193
+ // Extract email from notes if present
194
+ const note = author.c.find(part => part.t === "Note")
195
+ if (note) {
196
+ attrs.email = this.convertInlines(note.c[0].c)
197
+ .map(node => node.text)
198
+ .join("")
199
+ }
200
+ }
201
+
202
+ return attrs
203
+ }
204
+
205
+ convertBlocks(blocks) {
206
+ if (!blocks) {
207
+ return []
208
+ }
209
+ return blocks
210
+ .map(block => this.convertBlock(block))
211
+ .flat()
212
+ .filter(block => block)
213
+ }
214
+
215
+ convertBlock(block) {
216
+ switch (block.t) {
217
+ case "CodeBlock": {
218
+ const [attrs, code] = block.c
219
+ // attrs structure: [id, classes, keyValuePairs]
220
+ // Example: ["ref-label", ["python3"], [["caption", "The Caption"], ["linenos", ""]]]
221
+ const id = attrs?.[0] || ""
222
+ const language = attrs?.[1]?.[0] || "" // First class is language
223
+ const keyValuePairs = attrs?.[2] || []
224
+
225
+ // Extract caption and category from key-value pairs
226
+ let title = ""
227
+ let category = ""
228
+
229
+ const captionPair = keyValuePairs.find(
230
+ pair => pair[0] === "caption"
231
+ )
232
+ if (captionPair) {
233
+ title = captionPair[1]
234
+ }
235
+
236
+ const categoryPair = keyValuePairs.find(
237
+ pair => pair[0] === "category"
238
+ )
239
+ if (categoryPair) {
240
+ category = categoryPair[1]
241
+ } else if (title) {
242
+ // If there's a caption but no explicit category, default to 'listing'
243
+ // This makes the code block referenceable and properly numbered
244
+ category = "listing"
245
+ }
246
+
247
+ return [
248
+ {
249
+ type: "code_block",
250
+ attrs: {
251
+ track: [],
252
+ language: language,
253
+ category: category,
254
+ title: title,
255
+ id: id
256
+ },
257
+ content: [{type: "text", text: code}]
258
+ }
259
+ ]
260
+ }
261
+ case "Div":
262
+ // Handle special figure containers
263
+ if (block.attr?.classes?.includes("figure")) {
264
+ return this.convertFigure(block)
265
+ }
266
+ // Ignore otherwise. Could be bibliography
267
+ // or other non-content block
268
+ return []
269
+ case "Para":
270
+ case "Plain": {
271
+ // Process each inline, splitting into paragraphs and figures
272
+ const blocks = []
273
+ let currentInlines = []
274
+ for (const inline of block.c) {
275
+ if (inline.t === "Image") {
276
+ // Convert accumulated inlines to a paragraph
277
+ if (currentInlines.length > 0) {
278
+ blocks.push({
279
+ type: "paragraph",
280
+ content: this.convertInlines(currentInlines)
281
+ })
282
+ currentInlines = []
283
+ }
284
+ // Convert image to figure and add as block
285
+ const figure = this.convertInline(inline)
286
+ blocks.push(figure)
287
+ } else {
288
+ currentInlines.push(inline)
289
+ }
290
+ }
291
+ // Add remaining inlines as a paragraph
292
+ if (currentInlines.length > 0) {
293
+ blocks.push({
294
+ type: "paragraph",
295
+ content: this.convertInlines(currentInlines)
296
+ })
297
+ }
298
+ return blocks
299
+ }
300
+ case "Header":
301
+ return [
302
+ {
303
+ type: `heading${block.c[0]}`,
304
+ attrs: {
305
+ id: block.c[1][0]
306
+ },
307
+ content: this.convertInlines(block.c[2])
308
+ }
309
+ ]
310
+ case "BlockQuote":
311
+ return [
312
+ {
313
+ type: "blockquote",
314
+ content: this.convertBlocks(block.c)
315
+ }
316
+ ]
317
+ case "BulletList":
318
+ return [
319
+ {
320
+ type: "bullet_list",
321
+ content: block.c.map(item => ({
322
+ type: "list_item",
323
+ content: this.convertBlocks(item)
324
+ }))
325
+ }
326
+ ]
327
+ case "DefinitionList": {
328
+ return block.c.flatMap(item => [
329
+ {
330
+ type: "paragraph",
331
+ content: applyMarkToNodes(
332
+ this.convertInlines(item.term),
333
+ "strong"
334
+ )
335
+ },
336
+ {
337
+ type: "bullet_list",
338
+ content: item.definitions.map(def => ({
339
+ type: "list_item",
340
+ content: this.convertBlocks(def)
341
+ }))
342
+ }
343
+ ])
344
+ }
345
+ case "OrderedList":
346
+ return [
347
+ {
348
+ type: "ordered_list",
349
+ attrs: {
350
+ order: block.c[0][0]
351
+ },
352
+ content: block.c[1].map(item => ({
353
+ type: "list_item",
354
+ content: this.convertBlocks(item)
355
+ }))
356
+ }
357
+ ]
358
+ case "Table":
359
+ return [this.convertTable(block)]
360
+ case "Figure":
361
+ return [this.convertFigure(block)]
362
+ default:
363
+ console.warn(`Unhandled block type: ${block.t}`)
364
+ return []
365
+ }
366
+ }
367
+
368
+ convertInlines(inlines) {
369
+ if (!inlines) {
370
+ return []
371
+ }
372
+ // Convert each inline element, flatten, and merge adjacent text nodes with same marks
373
+ const convertedNodes = inlines
374
+ .map(inline => this.convertInline(inline))
375
+ .filter(inline => inline)
376
+ .flat()
377
+
378
+ // Remove hard breaks at start and end
379
+ const filteredNodes = convertedNodes.filter((node, index, array) => {
380
+ if (node.type === "hard_break") {
381
+ // Remove if first or last node
382
+ if (index === 0 || index === array.length - 1) {
383
+ return false
384
+ }
385
+ }
386
+ return true
387
+ })
388
+
389
+ return mergeTextNodes(filteredNodes)
390
+ }
391
+
392
+ convertInline(inline) {
393
+ if (!inline) {
394
+ return null
395
+ }
396
+
397
+ switch (inline.t) {
398
+ case "Cite":
399
+ return this.convertCitation(inline)
400
+ case "Image": {
401
+ const imagePath = inline.c[2][0]
402
+
403
+ const widthInfo = inline.c[0][2].find(
404
+ attr => attr[0] === "width"
405
+ )
406
+
407
+ if (widthInfo) {
408
+ const width = parseFloat(widthInfo[1]) // in inches
409
+ if (width < this.SMALL_IMAGE_THRESHOLD) {
410
+ console.warn(
411
+ `Skipping small decorative image: ${imagePath} (width: ${width}%)`
412
+ )
413
+ return null
414
+ }
415
+ }
416
+
417
+ const imageId = Math.floor(Math.random() * 1000000)
418
+ const imageTitle = imagePath.split("/").pop()
419
+
420
+ // Skip small decorative images
421
+
422
+ // Store image reference
423
+ this.images[imageId] = {
424
+ id: imageId,
425
+ title: imageTitle,
426
+ copyright: {
427
+ holder: false,
428
+ year: false,
429
+ freeToRead: true,
430
+ licenses: []
431
+ },
432
+ image: imagePath,
433
+ file_type: this.getImageFileType(imageTitle),
434
+ file: null,
435
+ checksum: 0
436
+ }
437
+
438
+ // Create a figure with optional caption
439
+ const caption = inline.c[1] || []
440
+ let category = "none"
441
+ if (
442
+ caption.length &&
443
+ ["Figure", "Table", "Photo"].includes(caption[0].c)
444
+ ) {
445
+ category = caption[0].c.toLowerCase()
446
+ caption.shift() // Category name, for example "Figure"
447
+ caption.shift() // Space
448
+ caption.shift() // Category number, for example "1:"
449
+ caption.shift() // Space
450
+ }
451
+
452
+ const percentageWidth = this.extractImageWidth(inline.c[0][2])
453
+ return {
454
+ type: "figure",
455
+ attrs: {
456
+ aligned: "center",
457
+ width: percentageWidth,
458
+ category,
459
+ caption: Boolean(caption.length)
460
+ },
461
+ content: [
462
+ {
463
+ type: "image",
464
+ attrs: {
465
+ image: imageId
466
+ }
467
+ },
468
+ {
469
+ type: "figure_caption",
470
+ content: this.convertInlines(caption)
471
+ }
472
+ ]
473
+ }
474
+ }
475
+ case "Str":
476
+ return {
477
+ type: "text",
478
+ text: inline.c
479
+ }
480
+ case "Space":
481
+ return {
482
+ type: "text",
483
+ text: " "
484
+ }
485
+ case "Strong": {
486
+ const innerNodes = this.convertInlines(inline.c)
487
+ return mergeTextNodes(applyMarkToNodes(innerNodes, "strong"))
488
+ }
489
+ case "Emph": {
490
+ const innerNodes = this.convertInlines(inline.c)
491
+ return mergeTextNodes(applyMarkToNodes(innerNodes, "em"))
492
+ }
493
+ case "Underline": {
494
+ const innerNodes = this.convertInlines(inline.c)
495
+ return mergeTextNodes(applyMarkToNodes(innerNodes, "underline"))
496
+ }
497
+ case "Strikeout": {
498
+ const inner = this.convertInlines(inline.c)
499
+ return applyAnnotation(inner, "strikeout")
500
+ }
501
+ case "SmallCaps": {
502
+ const inner = this.convertInlines(inline.c)
503
+ return applyAnnotation(inner, "smallcaps")
504
+ }
505
+ case "Superscript": {
506
+ const innerNodes = this.convertInlines(inline.c)
507
+ return mergeTextNodes(applyMarkToNodes(innerNodes, "sup"))
508
+ }
509
+ case "Subscript": {
510
+ const innerNodes = this.convertInlines(inline.c)
511
+ return mergeTextNodes(applyMarkToNodes(innerNodes, "sub"))
512
+ }
513
+ case "Code": {
514
+ const text = inline.c[1]
515
+ return {
516
+ type: "text",
517
+ text: text,
518
+ marks: [{type: "code"}]
519
+ }
520
+ }
521
+ case "Link": {
522
+ const innerNodes = this.convertInlines(inline.c[1])
523
+ return mergeTextNodes(
524
+ applyMarkToNodes(innerNodes, "link", {href: inline.c[2][0]})
525
+ )
526
+ }
527
+ case "Note": {
528
+ if (
529
+ inline.c.length === 1 &&
530
+ inline.c[0].t === "Para" &&
531
+ inline.c[0].c.length === 2 &&
532
+ inline.c[0].c[0].t === "Cite" &&
533
+ inline.c[0].c[1].t === "Str" &&
534
+ inline.c[0].c[1].c === "."
535
+ ) {
536
+ // This is a citation note rendered as a footnote.
537
+ return this.convertInline(inline.c[0].c[0])
538
+ }
539
+
540
+ return {
541
+ type: "footnote",
542
+ attrs: {
543
+ footnote: this.convertBlocks(inline.c)
544
+ }
545
+ }
546
+ }
547
+ case "Math":
548
+ return {
549
+ type: "equation",
550
+ attrs: {
551
+ equation: inline.c[1]
552
+ }
553
+ }
554
+ case "Quoted": {
555
+ const type =
556
+ inline.c[0].t === "SingleQuote" ? "single" : "double"
557
+ const quoteStart = type === "single" ? "‘" : "“" // U+2018, U+201C
558
+ const quoteEnd = type === "single" ? "’" : "”" // U+2019, U+201D
559
+ const innerNodes = this.convertInlines(inline.c[1])
560
+ const quotedNodes = [
561
+ {type: "text", text: quoteStart},
562
+ ...innerNodes,
563
+ {type: "text", text: quoteEnd}
564
+ ]
565
+ return mergeTextNodes(quotedNodes)
566
+ }
567
+ case "RawBlock":
568
+ case "RawInline": {
569
+ return [
570
+ {
571
+ type: "text",
572
+ text: `[RAW CONTENT: ${inline.text}]`,
573
+ marks: [
574
+ {
575
+ type: "annotation_tag",
576
+ attrs: {
577
+ type: "raw",
578
+ key: inline.format,
579
+ value: ""
580
+ }
581
+ }
582
+ ]
583
+ }
584
+ ]
585
+ }
586
+ case "SoftBreak":
587
+ return {type: "text", text: " "}
588
+ case "LineBreak":
589
+ return {type: "hard_break"}
590
+ case "Span": {
591
+ // Check if this is a Zotero CSL citation
592
+ const attrs = inline.c[0][0]
593
+ if (attrs && attrs.startsWith("ZOTERO_ITEM CSL_CITATION")) {
594
+ try {
595
+ // Extract just the JSON portion
596
+ const jsonStr = attrs.replace(
597
+ "ZOTERO_ITEM CSL_CITATION ",
598
+ ""
599
+ )
600
+ const lastBrace = jsonStr.lastIndexOf("}") + 1
601
+ const cslData = JSON.parse(
602
+ jsonStr.substring(0, lastBrace)
603
+ )
604
+
605
+ // Create citation references
606
+ const citations = cslData.citationItems.map(item => {
607
+ const id = String(item.itemData.id)
608
+
609
+ // find in bibliography
610
+ let [bibKey, _] =
611
+ Object.entries(this.bibliography).find(
612
+ ([_key, entry]) => entry.entry_key === id
613
+ ) || []
614
+ if (!bibKey) {
615
+ // Not yet present in bibliography. We'll parse the CSL data and add it.
616
+ const parseData = parseCSL({
617
+ [id]: item.itemData
618
+ })
619
+ const bibEntry = parseData["1"]
620
+ bibKey = `${Object.keys(this.bibliography).length + 1}`
621
+ this.bibliography[bibKey] = bibEntry
622
+ }
623
+ return {
624
+ id: bibKey,
625
+ prefix: item.prefix || "",
626
+ locator: item.locator || ""
627
+ }
628
+ })
629
+
630
+ return {
631
+ type: "citation",
632
+ attrs: {
633
+ format: "cite",
634
+ references: citations
635
+ }
636
+ }
637
+ } catch (error) {
638
+ console.warn("Failed to parse CSL citation:", error)
639
+ }
640
+ }
641
+ // If not a citation or parsing failed, fall through to regular text
642
+ return this.convertInlines(inline.c[1])
643
+ }
644
+ default:
645
+ console.warn(`Unhandled inline type: ${inline.t}`)
646
+ return null
647
+ }
648
+ }
649
+
650
+ extractImageWidth(attrs) {
651
+ const widthAttr = attrs.find(attr => attr[0] === "width")
652
+ if (widthAttr) {
653
+ // Convert inch measurement to percentage (assuming max width is 8.5 inches)
654
+ const widthInInches = parseFloat(widthAttr[1])
655
+ return Math.min(Math.round((widthInInches / 8.5) * 100), 100)
656
+ }
657
+ return 100 // default width
658
+ }
659
+
660
+ convertTable(table) {
661
+ const attrs = {
662
+ width: 100,
663
+ aligned: "center",
664
+ layout: "fixed"
665
+ }
666
+
667
+ //c[0]: Attr
668
+ //c[0][0]: identifier
669
+ //c[0][1]: classes
670
+ //c[0][2]: key-value pairs
671
+ //c[1]: Caption
672
+ //c[1][0]: Caption
673
+ //c[1][1]: (Maybe ShortCaption)
674
+ //c[2]: [ColSpec] // per table column
675
+ //c[3]: TableHead
676
+ //c[3][0]: Attrs
677
+ //c[3][1]: Row
678
+ //c[4]: [TableBody]
679
+ //c[4][X][0]: Attr
680
+ //c[4][X][1]: RowHeadColumns
681
+ //c[4][X][2]: [Row]
682
+ //c[4][X][3]: [Row]
683
+ //c[5]: TableFoot
684
+ //c[5][0]: Attrs
685
+ //c[5][1]: Row
686
+
687
+ //Row
688
+ //c[0]: Attrs
689
+ //c[1]: [Cell]
690
+
691
+ //Cell
692
+ //c[0]: Attr
693
+ //c[1]: Alignment
694
+ //c[2]: RowSpan
695
+ //c[3]: ColSpan
696
+ //c[4]: [Block]
697
+
698
+ // Extract table attributes
699
+ const tableAttrs = table.c[0][2]
700
+ tableAttrs.forEach(attr => {
701
+ if (attr[0] === "width") {
702
+ attrs.width = parseInt(attr[1])
703
+ } else if (attr[0] === "aligned") {
704
+ attrs.aligned = attr[1]
705
+ } else if (attr[0] === "layout") {
706
+ attrs.layout = attr[1]
707
+ }
708
+ })
709
+
710
+ const rows = table.c[3][1]
711
+ .concat(
712
+ table.c[4]
713
+ .map(tableBody => tableBody[2].concat(tableBody[3]))
714
+ .flat()
715
+ )
716
+ .concat(table.c[5][1])
717
+
718
+ const caption = table.c[1][0] || []
719
+ return {
720
+ type: "table",
721
+ attrs,
722
+ content: [
723
+ {
724
+ type: "table_caption",
725
+ content: this.convertInlines(caption)
726
+ },
727
+ {
728
+ type: "table_body",
729
+ content: rows.map(row => ({
730
+ type: "table_row",
731
+ content: row[1].map(cell => {
732
+ const cellContent = this.convertBlocks(cell[4])
733
+ if (cellContent.length === 0) {
734
+ cellContent.push({type: "paragraph"})
735
+ }
736
+ return {
737
+ type: "table_cell",
738
+ attrs: {
739
+ colspan: cell[3],
740
+ rowspan: cell[2]
741
+ },
742
+ content: cellContent
743
+ }
744
+ })
745
+ }))
746
+ }
747
+ ]
748
+ }
749
+ }
750
+
751
+ getImageFileType(filename) {
752
+ const ext = filename.split(".").pop().toLowerCase()
753
+ switch (ext) {
754
+ case "avif":
755
+ case "avifs":
756
+ return "image/avif"
757
+ case "png":
758
+ return "image/png"
759
+ case "jpg":
760
+ case "jpeg":
761
+ return "image/jpeg"
762
+ case "gif":
763
+ return "image/gif"
764
+ case "svg":
765
+ return "image/svg+xml"
766
+ case "webp":
767
+ return "image/webp"
768
+ default:
769
+ return "image/png" // Default fallback
770
+ }
771
+ }
772
+
773
+ convertFigure(figure) {
774
+ const caption = figure.c[1][1]
775
+ const attrs = {
776
+ aligned: "center",
777
+ width: 100,
778
+ figureCategory: "none",
779
+ caption: Boolean(caption.length)
780
+ }
781
+
782
+ // Extract figure attributes
783
+ const figureAttrs = figure.c[0][2]
784
+ figureAttrs.forEach(attr => {
785
+ if (attr[0] === "width") {
786
+ attrs.width = parseInt(attr[1])
787
+ } else if (attr[0] === "aligned") {
788
+ attrs.aligned = attr[1]
789
+ } else if (attr[0] === "category") {
790
+ attrs.figureCategory = attr[1]
791
+ }
792
+ })
793
+
794
+ const imagePath = figure.c[2][0].c[0].c[2][0]
795
+ const imageId = Math.floor(Math.random() * 1000000)
796
+ const imageTitle = imagePath.split("/").pop()
797
+
798
+ // Store image reference
799
+ this.images[imageId] = {
800
+ id: imageId,
801
+ title: imageTitle,
802
+ copyright: {
803
+ holder: false,
804
+ year: false,
805
+ freeToRead: true,
806
+ licenses: []
807
+ },
808
+ image: imagePath,
809
+ file_type: this.getImageFileType(imageTitle),
810
+ file: null,
811
+ checksum: 0
812
+ }
813
+
814
+ return {
815
+ type: "figure",
816
+ attrs,
817
+ content: [
818
+ {
819
+ type: "image",
820
+ attrs: {
821
+ image: imageId
822
+ }
823
+ },
824
+ {
825
+ type: "figure_caption",
826
+ content: this.convertBlocks(caption)
827
+ .map(block => block.content || [])
828
+ .flat()
829
+ }
830
+ ]
831
+ }
832
+ }
833
+
834
+ convertCitation(cite) {
835
+ const references = cite.c[0]
836
+ .map(ref => {
837
+ // Handle empty bibliography case
838
+ if (
839
+ !this.bibliography ||
840
+ Object.keys(this.bibliography).length === 0
841
+ ) {
842
+ return
843
+ }
844
+
845
+ const foundEntry = Object.entries(this.bibliography).find(
846
+ ([_id, definition]) =>
847
+ definition.entry_key === ref.citationId
848
+ )
849
+
850
+ if (!foundEntry) {
851
+ return
852
+ }
853
+
854
+ const [bibId, _bibEntry] = foundEntry
855
+ if (!bibId) {
856
+ return
857
+ }
858
+ return {
859
+ id: bibId,
860
+ prefix: ref.citationPrefix
861
+ .map(prefix => prefix.c)
862
+ .join(" "),
863
+ locator: ref.citationSuffix
864
+ .map(suffix => suffix.c)
865
+ .join(" ")
866
+ }
867
+ })
868
+ .filter(ref => ref)
869
+
870
+ if (!references.length) {
871
+ return null
872
+ }
873
+ return {
874
+ type: "citation",
875
+ attrs: {
876
+ format:
877
+ cite.c[0][0].citationMode.t === "AuthorInText"
878
+ ? "textcite"
879
+ : "cite",
880
+ references
881
+ }
882
+ }
883
+ }
884
+ }