@fiduswriter/document 0.1.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/README.md +16 -0
- package/jest.config.js +23 -0
- package/package.json +59 -0
- package/schema.json +1 -0
- package/scripts/export-schema.js +16 -0
- package/src/bibliography/common.js +92 -0
- package/src/bibliography/csl_bib.js +139 -0
- package/src/citations/citeproc_sys.js +42 -0
- package/src/citations/format.js +194 -0
- package/src/common/blob.js +10 -0
- package/src/common/file.js +25 -0
- package/src/common/index.js +12 -0
- package/src/common/network.js +79 -0
- package/src/common/text.js +44 -0
- package/src/editor/e2ee/encryptor.js +228 -0
- package/src/exporter/docx/citations.js +177 -0
- package/src/exporter/docx/comments.js +165 -0
- package/src/exporter/docx/footnotes.js +240 -0
- package/src/exporter/docx/images.js +101 -0
- package/src/exporter/docx/index.js +185 -0
- package/src/exporter/docx/lists.js +260 -0
- package/src/exporter/docx/math.js +46 -0
- package/src/exporter/docx/metadata.js +289 -0
- package/src/exporter/docx/rels.js +193 -0
- package/src/exporter/docx/render.js +941 -0
- package/src/exporter/docx/richtext.js +1182 -0
- package/src/exporter/docx/tables.js +112 -0
- package/src/exporter/docx/tools.js +50 -0
- package/src/exporter/epub/index.js +142 -0
- package/src/exporter/epub/templates.js +140 -0
- package/src/exporter/epub/tools.js +96 -0
- package/src/exporter/html/citations.js +121 -0
- package/src/exporter/html/convert.js +813 -0
- package/src/exporter/html/index.js +192 -0
- package/src/exporter/html/templates.js +34 -0
- package/src/exporter/html/tools.js +50 -0
- package/src/exporter/jats/bibliography.js +183 -0
- package/src/exporter/jats/citations.js +109 -0
- package/src/exporter/jats/convert.js +871 -0
- package/src/exporter/jats/index.js +92 -0
- package/src/exporter/jats/templates.js +35 -0
- package/src/exporter/jats/text.js +72 -0
- package/src/exporter/latex/convert.js +934 -0
- package/src/exporter/latex/escape_latex.js +21 -0
- package/src/exporter/latex/index.js +74 -0
- package/src/exporter/latex/readme.js +22 -0
- package/src/exporter/native/shrink.js +132 -0
- package/src/exporter/odt/citations.js +101 -0
- package/src/exporter/odt/footnotes.js +147 -0
- package/src/exporter/odt/images.js +115 -0
- package/src/exporter/odt/index.js +156 -0
- package/src/exporter/odt/math.js +57 -0
- package/src/exporter/odt/metadata.js +251 -0
- package/src/exporter/odt/render.js +806 -0
- package/src/exporter/odt/richtext.js +865 -0
- package/src/exporter/odt/styles.js +387 -0
- package/src/exporter/odt/track.js +68 -0
- package/src/exporter/pandoc/citations.js +98 -0
- package/src/exporter/pandoc/convert.js +1017 -0
- package/src/exporter/pandoc/index.js +92 -0
- package/src/exporter/pandoc/readme.js +8 -0
- package/src/exporter/pandoc/tools.js +51 -0
- package/src/exporter/print/index.js +177 -0
- package/src/exporter/tools/doc_content.js +144 -0
- package/src/exporter/tools/file.js +9 -0
- package/src/exporter/tools/json.js +73 -0
- package/src/exporter/tools/svg.js +29 -0
- package/src/exporter/tools/xml.js +531 -0
- package/src/exporter/tools/xml_zip.js +95 -0
- package/src/exporter/tools/zip.js +90 -0
- package/src/exporter/tools/zotero_csl.js +93 -0
- package/src/importer/citations.js +129 -0
- package/src/importer/docx/citations.js +123 -0
- package/src/importer/docx/convert.js +1427 -0
- package/src/importer/docx/helpers.js +9 -0
- package/src/importer/docx/omml2mathml.js +1448 -0
- package/src/importer/docx/parse.js +735 -0
- package/src/importer/native/get_images.js +76 -0
- package/src/importer/native/update.js +29 -0
- package/src/importer/odt/citations.js +87 -0
- package/src/importer/odt/convert.js +1855 -0
- package/src/importer/pandoc/convert.js +884 -0
- package/src/importer/pandoc/helpers.js +84 -0
- package/src/importer/zip_analyzer.js +102 -0
- package/src/index.js +1 -0
- package/src/mathlive/opf_includes.js +24 -0
- package/src/schema/common/annotate.js +76 -0
- package/src/schema/common/base.js +118 -0
- package/src/schema/common/citation.js +62 -0
- package/src/schema/common/equation.js +31 -0
- package/src/schema/common/figure.js +190 -0
- package/src/schema/common/heading.js +43 -0
- package/src/schema/common/index.js +40 -0
- package/src/schema/common/list.js +95 -0
- package/src/schema/common/reference.js +100 -0
- package/src/schema/common/table.js +103 -0
- package/src/schema/common/track.js +190 -0
- package/src/schema/const.js +58 -0
- package/src/schema/convert.js +1272 -0
- package/src/schema/document/content.js +187 -0
- package/src/schema/document/index.js +117 -0
- package/src/schema/document/structure.js +452 -0
- package/src/schema/export.js +21 -0
- package/src/schema/footnotes.js +126 -0
- package/src/schema/footnotes_convert.js +31 -0
- package/src/schema/i18n.js +595 -0
- package/src/schema/index.js +5 -0
- package/src/schema/mini_json.js +61 -0
- package/src/schema/text.js +22 -0
|
@@ -0,0 +1,884 @@
|
|
|
1
|
+
import {parseCSL} from "biblatex-csl-converter"
|
|
2
|
+
|
|
3
|
+
import {applyAnnotation, applyMarkToNodes, mergeTextNodes} from "./helpers.js"
|
|
4
|
+
|
|
5
|
+
export class PandocConvert {
|
|
6
|
+
constructor(doc, importId, template, bibliography) {
|
|
7
|
+
this.doc = doc
|
|
8
|
+
this.importId = importId
|
|
9
|
+
this.template = template
|
|
10
|
+
this.bibliography = bibliography
|
|
11
|
+
|
|
12
|
+
this.images = []
|
|
13
|
+
|
|
14
|
+
this.language = this.doc.meta?.lang?.c?.[0]?.c || "en-US"
|
|
15
|
+
|
|
16
|
+
this.SMALL_IMAGE_THRESHOLD = 1.0 // Smaller images will be discarded (in inches)
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
init() {
|
|
20
|
+
try {
|
|
21
|
+
this.validatePandocFormat()
|
|
22
|
+
} catch (error) {
|
|
23
|
+
console.error("Pandoc format validation failed:", error)
|
|
24
|
+
throw new Error("Invalid Pandoc document format: " + error.message)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return {
|
|
28
|
+
content: this.convert(),
|
|
29
|
+
settings: {
|
|
30
|
+
import_id: this.importId,
|
|
31
|
+
tracked: false,
|
|
32
|
+
language: this.language
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
validatePandocFormat() {
|
|
38
|
+
// Check API version (Pandoc uses [major, minor, patch] or [major, minor, patch, tweak])
|
|
39
|
+
if (
|
|
40
|
+
!Array.isArray(this.doc["pandoc-api-version"]) ||
|
|
41
|
+
this.doc["pandoc-api-version"].length < 3 ||
|
|
42
|
+
!this.doc["pandoc-api-version"].every(
|
|
43
|
+
num => typeof num === "number"
|
|
44
|
+
)
|
|
45
|
+
) {
|
|
46
|
+
throw new Error("Invalid or missing Pandoc API version")
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Check for required top-level properties
|
|
50
|
+
if (!this.doc.blocks || !Array.isArray(this.doc.blocks)) {
|
|
51
|
+
throw new Error("Missing or invalid blocks property")
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Check meta property structure if it exists
|
|
55
|
+
if (this.doc.meta && typeof this.doc.meta !== "object") {
|
|
56
|
+
throw new Error("Invalid meta property")
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Basic validation of block structure
|
|
60
|
+
if (
|
|
61
|
+
!this.doc.blocks.every(
|
|
62
|
+
block =>
|
|
63
|
+
block &&
|
|
64
|
+
typeof block === "object" &&
|
|
65
|
+
typeof block.t === "string" &&
|
|
66
|
+
("c" in block || block.t === "Null")
|
|
67
|
+
)
|
|
68
|
+
) {
|
|
69
|
+
throw new Error("Invalid block structure")
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return true
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
convert() {
|
|
76
|
+
const templateParts = this.template.content.content.slice()
|
|
77
|
+
templateParts.shift()
|
|
78
|
+
// Create the outer document structure
|
|
79
|
+
const document = {
|
|
80
|
+
type: "doc",
|
|
81
|
+
attrs: {
|
|
82
|
+
import_id: this.importId
|
|
83
|
+
},
|
|
84
|
+
content: []
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Add title (required first element)
|
|
88
|
+
document.content.push({
|
|
89
|
+
type: "title",
|
|
90
|
+
content: this.convertInlines(
|
|
91
|
+
this.doc.meta?.title?.c || [{t: "Str", c: "Untitled"}]
|
|
92
|
+
)
|
|
93
|
+
})
|
|
94
|
+
// Add subtitle if present
|
|
95
|
+
if (this.doc.meta?.subtitle?.c) {
|
|
96
|
+
const templatePart = templateParts.find(
|
|
97
|
+
part => part.attrs.metadata === "subtitle"
|
|
98
|
+
)
|
|
99
|
+
document.content.push({
|
|
100
|
+
type: "heading_part",
|
|
101
|
+
attrs: {
|
|
102
|
+
title: templatePart ? templatePart.attrs.title : "Subtitle",
|
|
103
|
+
id: templatePart ? templatePart.attrs.id : "subtitle",
|
|
104
|
+
metadata: "subtitle"
|
|
105
|
+
},
|
|
106
|
+
content: [
|
|
107
|
+
{
|
|
108
|
+
type: "heading1",
|
|
109
|
+
attrs: {
|
|
110
|
+
id: "H" + Math.random().toString(36).substr(2, 7)
|
|
111
|
+
},
|
|
112
|
+
content: this.convertInlines(this.doc.meta.subtitle.c)
|
|
113
|
+
}
|
|
114
|
+
]
|
|
115
|
+
})
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Add authors if present
|
|
119
|
+
if (this.doc.meta?.author?.c) {
|
|
120
|
+
const templatePart = templateParts.find(
|
|
121
|
+
part => part.attrs.metadata === "authors"
|
|
122
|
+
)
|
|
123
|
+
document.content.push({
|
|
124
|
+
type: "contributors_part",
|
|
125
|
+
attrs: {
|
|
126
|
+
title: templatePart ? templatePart.attrs.title : "Authors",
|
|
127
|
+
id: templatePart ? templatePart.attrs.id : "authors",
|
|
128
|
+
metadata: "authors"
|
|
129
|
+
},
|
|
130
|
+
content: this.doc.meta.author.c.map(author => ({
|
|
131
|
+
type: "contributor",
|
|
132
|
+
attrs: this.convertContributor(author)
|
|
133
|
+
}))
|
|
134
|
+
})
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Add abstract if present
|
|
138
|
+
if (this.doc.meta?.abstract?.c) {
|
|
139
|
+
const templatePart = templateParts.find(
|
|
140
|
+
part => part.attrs.metadata === "abstract"
|
|
141
|
+
)
|
|
142
|
+
document.content.push({
|
|
143
|
+
type: "richtext_part",
|
|
144
|
+
attrs: {
|
|
145
|
+
title: templatePart
|
|
146
|
+
? templatePart.attrs.title
|
|
147
|
+
: gettext("Abstract"),
|
|
148
|
+
id: templatePart ? templatePart.attrs.id : "abstract",
|
|
149
|
+
metadata: "abstract"
|
|
150
|
+
},
|
|
151
|
+
content: this.convertBlocks(this.doc.meta.abstract.c)
|
|
152
|
+
})
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const templatePart = templateParts.find(
|
|
156
|
+
part => !part.attrs.metadata && part.type === "richtext_part"
|
|
157
|
+
)
|
|
158
|
+
// Add main body content
|
|
159
|
+
document.content.push({
|
|
160
|
+
type: "richtext_part",
|
|
161
|
+
attrs: {
|
|
162
|
+
title: templatePart ? templatePart.attrs.title : "Body",
|
|
163
|
+
id: templatePart ? templatePart.attrs.id : "body",
|
|
164
|
+
marks: ["strong", "em", "link"]
|
|
165
|
+
},
|
|
166
|
+
content: this.convertBlocks(this.doc.blocks)
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
return document
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
convertContributor(author) {
|
|
173
|
+
const attrs = {
|
|
174
|
+
firstname: "",
|
|
175
|
+
lastname: "",
|
|
176
|
+
email: "",
|
|
177
|
+
institution: ""
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Extract name components
|
|
181
|
+
if (author.c) {
|
|
182
|
+
const textParts = author.c
|
|
183
|
+
.filter(part => part.t === "Str")
|
|
184
|
+
.map(part => part.c)
|
|
185
|
+
|
|
186
|
+
if (textParts.length > 1) {
|
|
187
|
+
attrs.lastname = textParts.pop()
|
|
188
|
+
attrs.firstname = textParts.join(" ")
|
|
189
|
+
} else if (textParts.length === 1) {
|
|
190
|
+
attrs.lastname = textParts[0]
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Extract email from notes if present
|
|
194
|
+
const note = author.c.find(part => part.t === "Note")
|
|
195
|
+
if (note) {
|
|
196
|
+
attrs.email = this.convertInlines(note.c[0].c)
|
|
197
|
+
.map(node => node.text)
|
|
198
|
+
.join("")
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
return attrs
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
convertBlocks(blocks) {
|
|
206
|
+
if (!blocks) {
|
|
207
|
+
return []
|
|
208
|
+
}
|
|
209
|
+
return blocks
|
|
210
|
+
.map(block => this.convertBlock(block))
|
|
211
|
+
.flat()
|
|
212
|
+
.filter(block => block)
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
convertBlock(block) {
|
|
216
|
+
switch (block.t) {
|
|
217
|
+
case "CodeBlock": {
|
|
218
|
+
const [attrs, code] = block.c
|
|
219
|
+
// attrs structure: [id, classes, keyValuePairs]
|
|
220
|
+
// Example: ["ref-label", ["python3"], [["caption", "The Caption"], ["linenos", ""]]]
|
|
221
|
+
const id = attrs?.[0] || ""
|
|
222
|
+
const language = attrs?.[1]?.[0] || "" // First class is language
|
|
223
|
+
const keyValuePairs = attrs?.[2] || []
|
|
224
|
+
|
|
225
|
+
// Extract caption and category from key-value pairs
|
|
226
|
+
let title = ""
|
|
227
|
+
let category = ""
|
|
228
|
+
|
|
229
|
+
const captionPair = keyValuePairs.find(
|
|
230
|
+
pair => pair[0] === "caption"
|
|
231
|
+
)
|
|
232
|
+
if (captionPair) {
|
|
233
|
+
title = captionPair[1]
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
const categoryPair = keyValuePairs.find(
|
|
237
|
+
pair => pair[0] === "category"
|
|
238
|
+
)
|
|
239
|
+
if (categoryPair) {
|
|
240
|
+
category = categoryPair[1]
|
|
241
|
+
} else if (title) {
|
|
242
|
+
// If there's a caption but no explicit category, default to 'listing'
|
|
243
|
+
// This makes the code block referenceable and properly numbered
|
|
244
|
+
category = "listing"
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
return [
|
|
248
|
+
{
|
|
249
|
+
type: "code_block",
|
|
250
|
+
attrs: {
|
|
251
|
+
track: [],
|
|
252
|
+
language: language,
|
|
253
|
+
category: category,
|
|
254
|
+
title: title,
|
|
255
|
+
id: id
|
|
256
|
+
},
|
|
257
|
+
content: [{type: "text", text: code}]
|
|
258
|
+
}
|
|
259
|
+
]
|
|
260
|
+
}
|
|
261
|
+
case "Div":
|
|
262
|
+
// Handle special figure containers
|
|
263
|
+
if (block.attr?.classes?.includes("figure")) {
|
|
264
|
+
return this.convertFigure(block)
|
|
265
|
+
}
|
|
266
|
+
// Ignore otherwise. Could be bibliography
|
|
267
|
+
// or other non-content block
|
|
268
|
+
return []
|
|
269
|
+
case "Para":
|
|
270
|
+
case "Plain": {
|
|
271
|
+
// Process each inline, splitting into paragraphs and figures
|
|
272
|
+
const blocks = []
|
|
273
|
+
let currentInlines = []
|
|
274
|
+
for (const inline of block.c) {
|
|
275
|
+
if (inline.t === "Image") {
|
|
276
|
+
// Convert accumulated inlines to a paragraph
|
|
277
|
+
if (currentInlines.length > 0) {
|
|
278
|
+
blocks.push({
|
|
279
|
+
type: "paragraph",
|
|
280
|
+
content: this.convertInlines(currentInlines)
|
|
281
|
+
})
|
|
282
|
+
currentInlines = []
|
|
283
|
+
}
|
|
284
|
+
// Convert image to figure and add as block
|
|
285
|
+
const figure = this.convertInline(inline)
|
|
286
|
+
blocks.push(figure)
|
|
287
|
+
} else {
|
|
288
|
+
currentInlines.push(inline)
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
// Add remaining inlines as a paragraph
|
|
292
|
+
if (currentInlines.length > 0) {
|
|
293
|
+
blocks.push({
|
|
294
|
+
type: "paragraph",
|
|
295
|
+
content: this.convertInlines(currentInlines)
|
|
296
|
+
})
|
|
297
|
+
}
|
|
298
|
+
return blocks
|
|
299
|
+
}
|
|
300
|
+
case "Header":
|
|
301
|
+
return [
|
|
302
|
+
{
|
|
303
|
+
type: `heading${block.c[0]}`,
|
|
304
|
+
attrs: {
|
|
305
|
+
id: block.c[1][0]
|
|
306
|
+
},
|
|
307
|
+
content: this.convertInlines(block.c[2])
|
|
308
|
+
}
|
|
309
|
+
]
|
|
310
|
+
case "BlockQuote":
|
|
311
|
+
return [
|
|
312
|
+
{
|
|
313
|
+
type: "blockquote",
|
|
314
|
+
content: this.convertBlocks(block.c)
|
|
315
|
+
}
|
|
316
|
+
]
|
|
317
|
+
case "BulletList":
|
|
318
|
+
return [
|
|
319
|
+
{
|
|
320
|
+
type: "bullet_list",
|
|
321
|
+
content: block.c.map(item => ({
|
|
322
|
+
type: "list_item",
|
|
323
|
+
content: this.convertBlocks(item)
|
|
324
|
+
}))
|
|
325
|
+
}
|
|
326
|
+
]
|
|
327
|
+
case "DefinitionList": {
|
|
328
|
+
return block.c.flatMap(item => [
|
|
329
|
+
{
|
|
330
|
+
type: "paragraph",
|
|
331
|
+
content: applyMarkToNodes(
|
|
332
|
+
this.convertInlines(item.term),
|
|
333
|
+
"strong"
|
|
334
|
+
)
|
|
335
|
+
},
|
|
336
|
+
{
|
|
337
|
+
type: "bullet_list",
|
|
338
|
+
content: item.definitions.map(def => ({
|
|
339
|
+
type: "list_item",
|
|
340
|
+
content: this.convertBlocks(def)
|
|
341
|
+
}))
|
|
342
|
+
}
|
|
343
|
+
])
|
|
344
|
+
}
|
|
345
|
+
case "OrderedList":
|
|
346
|
+
return [
|
|
347
|
+
{
|
|
348
|
+
type: "ordered_list",
|
|
349
|
+
attrs: {
|
|
350
|
+
order: block.c[0][0]
|
|
351
|
+
},
|
|
352
|
+
content: block.c[1].map(item => ({
|
|
353
|
+
type: "list_item",
|
|
354
|
+
content: this.convertBlocks(item)
|
|
355
|
+
}))
|
|
356
|
+
}
|
|
357
|
+
]
|
|
358
|
+
case "Table":
|
|
359
|
+
return [this.convertTable(block)]
|
|
360
|
+
case "Figure":
|
|
361
|
+
return [this.convertFigure(block)]
|
|
362
|
+
default:
|
|
363
|
+
console.warn(`Unhandled block type: ${block.t}`)
|
|
364
|
+
return []
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
convertInlines(inlines) {
|
|
369
|
+
if (!inlines) {
|
|
370
|
+
return []
|
|
371
|
+
}
|
|
372
|
+
// Convert each inline element, flatten, and merge adjacent text nodes with same marks
|
|
373
|
+
const convertedNodes = inlines
|
|
374
|
+
.map(inline => this.convertInline(inline))
|
|
375
|
+
.filter(inline => inline)
|
|
376
|
+
.flat()
|
|
377
|
+
|
|
378
|
+
// Remove hard breaks at start and end
|
|
379
|
+
const filteredNodes = convertedNodes.filter((node, index, array) => {
|
|
380
|
+
if (node.type === "hard_break") {
|
|
381
|
+
// Remove if first or last node
|
|
382
|
+
if (index === 0 || index === array.length - 1) {
|
|
383
|
+
return false
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
return true
|
|
387
|
+
})
|
|
388
|
+
|
|
389
|
+
return mergeTextNodes(filteredNodes)
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
convertInline(inline) {
|
|
393
|
+
if (!inline) {
|
|
394
|
+
return null
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
switch (inline.t) {
|
|
398
|
+
case "Cite":
|
|
399
|
+
return this.convertCitation(inline)
|
|
400
|
+
case "Image": {
|
|
401
|
+
const imagePath = inline.c[2][0]
|
|
402
|
+
|
|
403
|
+
const widthInfo = inline.c[0][2].find(
|
|
404
|
+
attr => attr[0] === "width"
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
if (widthInfo) {
|
|
408
|
+
const width = parseFloat(widthInfo[1]) // in inches
|
|
409
|
+
if (width < this.SMALL_IMAGE_THRESHOLD) {
|
|
410
|
+
console.warn(
|
|
411
|
+
`Skipping small decorative image: ${imagePath} (width: ${width}%)`
|
|
412
|
+
)
|
|
413
|
+
return null
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
const imageId = Math.floor(Math.random() * 1000000)
|
|
418
|
+
const imageTitle = imagePath.split("/").pop()
|
|
419
|
+
|
|
420
|
+
// Skip small decorative images
|
|
421
|
+
|
|
422
|
+
// Store image reference
|
|
423
|
+
this.images[imageId] = {
|
|
424
|
+
id: imageId,
|
|
425
|
+
title: imageTitle,
|
|
426
|
+
copyright: {
|
|
427
|
+
holder: false,
|
|
428
|
+
year: false,
|
|
429
|
+
freeToRead: true,
|
|
430
|
+
licenses: []
|
|
431
|
+
},
|
|
432
|
+
image: imagePath,
|
|
433
|
+
file_type: this.getImageFileType(imageTitle),
|
|
434
|
+
file: null,
|
|
435
|
+
checksum: 0
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
// Create a figure with optional caption
|
|
439
|
+
const caption = inline.c[1] || []
|
|
440
|
+
let category = "none"
|
|
441
|
+
if (
|
|
442
|
+
caption.length &&
|
|
443
|
+
["Figure", "Table", "Photo"].includes(caption[0].c)
|
|
444
|
+
) {
|
|
445
|
+
category = caption[0].c.toLowerCase()
|
|
446
|
+
caption.shift() // Category name, for example "Figure"
|
|
447
|
+
caption.shift() // Space
|
|
448
|
+
caption.shift() // Category number, for example "1:"
|
|
449
|
+
caption.shift() // Space
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
const percentageWidth = this.extractImageWidth(inline.c[0][2])
|
|
453
|
+
return {
|
|
454
|
+
type: "figure",
|
|
455
|
+
attrs: {
|
|
456
|
+
aligned: "center",
|
|
457
|
+
width: percentageWidth,
|
|
458
|
+
category,
|
|
459
|
+
caption: Boolean(caption.length)
|
|
460
|
+
},
|
|
461
|
+
content: [
|
|
462
|
+
{
|
|
463
|
+
type: "image",
|
|
464
|
+
attrs: {
|
|
465
|
+
image: imageId
|
|
466
|
+
}
|
|
467
|
+
},
|
|
468
|
+
{
|
|
469
|
+
type: "figure_caption",
|
|
470
|
+
content: this.convertInlines(caption)
|
|
471
|
+
}
|
|
472
|
+
]
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
case "Str":
|
|
476
|
+
return {
|
|
477
|
+
type: "text",
|
|
478
|
+
text: inline.c
|
|
479
|
+
}
|
|
480
|
+
case "Space":
|
|
481
|
+
return {
|
|
482
|
+
type: "text",
|
|
483
|
+
text: " "
|
|
484
|
+
}
|
|
485
|
+
case "Strong": {
|
|
486
|
+
const innerNodes = this.convertInlines(inline.c)
|
|
487
|
+
return mergeTextNodes(applyMarkToNodes(innerNodes, "strong"))
|
|
488
|
+
}
|
|
489
|
+
case "Emph": {
|
|
490
|
+
const innerNodes = this.convertInlines(inline.c)
|
|
491
|
+
return mergeTextNodes(applyMarkToNodes(innerNodes, "em"))
|
|
492
|
+
}
|
|
493
|
+
case "Underline": {
|
|
494
|
+
const innerNodes = this.convertInlines(inline.c)
|
|
495
|
+
return mergeTextNodes(applyMarkToNodes(innerNodes, "underline"))
|
|
496
|
+
}
|
|
497
|
+
case "Strikeout": {
|
|
498
|
+
const inner = this.convertInlines(inline.c)
|
|
499
|
+
return applyAnnotation(inner, "strikeout")
|
|
500
|
+
}
|
|
501
|
+
case "SmallCaps": {
|
|
502
|
+
const inner = this.convertInlines(inline.c)
|
|
503
|
+
return applyAnnotation(inner, "smallcaps")
|
|
504
|
+
}
|
|
505
|
+
case "Superscript": {
|
|
506
|
+
const innerNodes = this.convertInlines(inline.c)
|
|
507
|
+
return mergeTextNodes(applyMarkToNodes(innerNodes, "sup"))
|
|
508
|
+
}
|
|
509
|
+
case "Subscript": {
|
|
510
|
+
const innerNodes = this.convertInlines(inline.c)
|
|
511
|
+
return mergeTextNodes(applyMarkToNodes(innerNodes, "sub"))
|
|
512
|
+
}
|
|
513
|
+
case "Code": {
|
|
514
|
+
const text = inline.c[1]
|
|
515
|
+
return {
|
|
516
|
+
type: "text",
|
|
517
|
+
text: text,
|
|
518
|
+
marks: [{type: "code"}]
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
case "Link": {
|
|
522
|
+
const innerNodes = this.convertInlines(inline.c[1])
|
|
523
|
+
return mergeTextNodes(
|
|
524
|
+
applyMarkToNodes(innerNodes, "link", {href: inline.c[2][0]})
|
|
525
|
+
)
|
|
526
|
+
}
|
|
527
|
+
case "Note": {
|
|
528
|
+
if (
|
|
529
|
+
inline.c.length === 1 &&
|
|
530
|
+
inline.c[0].t === "Para" &&
|
|
531
|
+
inline.c[0].c.length === 2 &&
|
|
532
|
+
inline.c[0].c[0].t === "Cite" &&
|
|
533
|
+
inline.c[0].c[1].t === "Str" &&
|
|
534
|
+
inline.c[0].c[1].c === "."
|
|
535
|
+
) {
|
|
536
|
+
// This is a citation note rendered as a footnote.
|
|
537
|
+
return this.convertInline(inline.c[0].c[0])
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
return {
|
|
541
|
+
type: "footnote",
|
|
542
|
+
attrs: {
|
|
543
|
+
footnote: this.convertBlocks(inline.c)
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
case "Math":
|
|
548
|
+
return {
|
|
549
|
+
type: "equation",
|
|
550
|
+
attrs: {
|
|
551
|
+
equation: inline.c[1]
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
case "Quoted": {
|
|
555
|
+
const type =
|
|
556
|
+
inline.c[0].t === "SingleQuote" ? "single" : "double"
|
|
557
|
+
const quoteStart = type === "single" ? "‘" : "“" // U+2018, U+201C
|
|
558
|
+
const quoteEnd = type === "single" ? "’" : "”" // U+2019, U+201D
|
|
559
|
+
const innerNodes = this.convertInlines(inline.c[1])
|
|
560
|
+
const quotedNodes = [
|
|
561
|
+
{type: "text", text: quoteStart},
|
|
562
|
+
...innerNodes,
|
|
563
|
+
{type: "text", text: quoteEnd}
|
|
564
|
+
]
|
|
565
|
+
return mergeTextNodes(quotedNodes)
|
|
566
|
+
}
|
|
567
|
+
case "RawBlock":
|
|
568
|
+
case "RawInline": {
|
|
569
|
+
return [
|
|
570
|
+
{
|
|
571
|
+
type: "text",
|
|
572
|
+
text: `[RAW CONTENT: ${inline.text}]`,
|
|
573
|
+
marks: [
|
|
574
|
+
{
|
|
575
|
+
type: "annotation_tag",
|
|
576
|
+
attrs: {
|
|
577
|
+
type: "raw",
|
|
578
|
+
key: inline.format,
|
|
579
|
+
value: ""
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
]
|
|
583
|
+
}
|
|
584
|
+
]
|
|
585
|
+
}
|
|
586
|
+
case "SoftBreak":
|
|
587
|
+
return {type: "text", text: " "}
|
|
588
|
+
case "LineBreak":
|
|
589
|
+
return {type: "hard_break"}
|
|
590
|
+
case "Span": {
|
|
591
|
+
// Check if this is a Zotero CSL citation
|
|
592
|
+
const attrs = inline.c[0][0]
|
|
593
|
+
if (attrs && attrs.startsWith("ZOTERO_ITEM CSL_CITATION")) {
|
|
594
|
+
try {
|
|
595
|
+
// Extract just the JSON portion
|
|
596
|
+
const jsonStr = attrs.replace(
|
|
597
|
+
"ZOTERO_ITEM CSL_CITATION ",
|
|
598
|
+
""
|
|
599
|
+
)
|
|
600
|
+
const lastBrace = jsonStr.lastIndexOf("}") + 1
|
|
601
|
+
const cslData = JSON.parse(
|
|
602
|
+
jsonStr.substring(0, lastBrace)
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
// Create citation references
|
|
606
|
+
const citations = cslData.citationItems.map(item => {
|
|
607
|
+
const id = String(item.itemData.id)
|
|
608
|
+
|
|
609
|
+
// find in bibliography
|
|
610
|
+
let [bibKey, _] =
|
|
611
|
+
Object.entries(this.bibliography).find(
|
|
612
|
+
([_key, entry]) => entry.entry_key === id
|
|
613
|
+
) || []
|
|
614
|
+
if (!bibKey) {
|
|
615
|
+
// Not yet present in bibliography. We'll parse the CSL data and add it.
|
|
616
|
+
const parseData = parseCSL({
|
|
617
|
+
[id]: item.itemData
|
|
618
|
+
})
|
|
619
|
+
const bibEntry = parseData["1"]
|
|
620
|
+
bibKey = `${Object.keys(this.bibliography).length + 1}`
|
|
621
|
+
this.bibliography[bibKey] = bibEntry
|
|
622
|
+
}
|
|
623
|
+
return {
|
|
624
|
+
id: bibKey,
|
|
625
|
+
prefix: item.prefix || "",
|
|
626
|
+
locator: item.locator || ""
|
|
627
|
+
}
|
|
628
|
+
})
|
|
629
|
+
|
|
630
|
+
return {
|
|
631
|
+
type: "citation",
|
|
632
|
+
attrs: {
|
|
633
|
+
format: "cite",
|
|
634
|
+
references: citations
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
} catch (error) {
|
|
638
|
+
console.warn("Failed to parse CSL citation:", error)
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
// If not a citation or parsing failed, fall through to regular text
|
|
642
|
+
return this.convertInlines(inline.c[1])
|
|
643
|
+
}
|
|
644
|
+
default:
|
|
645
|
+
console.warn(`Unhandled inline type: ${inline.t}`)
|
|
646
|
+
return null
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
extractImageWidth(attrs) {
|
|
651
|
+
const widthAttr = attrs.find(attr => attr[0] === "width")
|
|
652
|
+
if (widthAttr) {
|
|
653
|
+
// Convert inch measurement to percentage (assuming max width is 8.5 inches)
|
|
654
|
+
const widthInInches = parseFloat(widthAttr[1])
|
|
655
|
+
return Math.min(Math.round((widthInInches / 8.5) * 100), 100)
|
|
656
|
+
}
|
|
657
|
+
return 100 // default width
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
convertTable(table) {
|
|
661
|
+
const attrs = {
|
|
662
|
+
width: 100,
|
|
663
|
+
aligned: "center",
|
|
664
|
+
layout: "fixed"
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
//c[0]: Attr
|
|
668
|
+
//c[0][0]: identifier
|
|
669
|
+
//c[0][1]: classes
|
|
670
|
+
//c[0][2]: key-value pairs
|
|
671
|
+
//c[1]: Caption
|
|
672
|
+
//c[1][0]: Caption
|
|
673
|
+
//c[1][1]: (Maybe ShortCaption)
|
|
674
|
+
//c[2]: [ColSpec] // per table column
|
|
675
|
+
//c[3]: TableHead
|
|
676
|
+
//c[3][0]: Attrs
|
|
677
|
+
//c[3][1]: Row
|
|
678
|
+
//c[4]: [TableBody]
|
|
679
|
+
//c[4][X][0]: Attr
|
|
680
|
+
//c[4][X][1]: RowHeadColumns
|
|
681
|
+
//c[4][X][2]: [Row]
|
|
682
|
+
//c[4][X][3]: [Row]
|
|
683
|
+
//c[5]: TableFoot
|
|
684
|
+
//c[5][0]: Attrs
|
|
685
|
+
//c[5][1]: Row
|
|
686
|
+
|
|
687
|
+
//Row
|
|
688
|
+
//c[0]: Attrs
|
|
689
|
+
//c[1]: [Cell]
|
|
690
|
+
|
|
691
|
+
//Cell
|
|
692
|
+
//c[0]: Attr
|
|
693
|
+
//c[1]: Alignment
|
|
694
|
+
//c[2]: RowSpan
|
|
695
|
+
//c[3]: ColSpan
|
|
696
|
+
//c[4]: [Block]
|
|
697
|
+
|
|
698
|
+
// Extract table attributes
|
|
699
|
+
const tableAttrs = table.c[0][2]
|
|
700
|
+
tableAttrs.forEach(attr => {
|
|
701
|
+
if (attr[0] === "width") {
|
|
702
|
+
attrs.width = parseInt(attr[1])
|
|
703
|
+
} else if (attr[0] === "aligned") {
|
|
704
|
+
attrs.aligned = attr[1]
|
|
705
|
+
} else if (attr[0] === "layout") {
|
|
706
|
+
attrs.layout = attr[1]
|
|
707
|
+
}
|
|
708
|
+
})
|
|
709
|
+
|
|
710
|
+
const rows = table.c[3][1]
|
|
711
|
+
.concat(
|
|
712
|
+
table.c[4]
|
|
713
|
+
.map(tableBody => tableBody[2].concat(tableBody[3]))
|
|
714
|
+
.flat()
|
|
715
|
+
)
|
|
716
|
+
.concat(table.c[5][1])
|
|
717
|
+
|
|
718
|
+
const caption = table.c[1][0] || []
|
|
719
|
+
return {
|
|
720
|
+
type: "table",
|
|
721
|
+
attrs,
|
|
722
|
+
content: [
|
|
723
|
+
{
|
|
724
|
+
type: "table_caption",
|
|
725
|
+
content: this.convertInlines(caption)
|
|
726
|
+
},
|
|
727
|
+
{
|
|
728
|
+
type: "table_body",
|
|
729
|
+
content: rows.map(row => ({
|
|
730
|
+
type: "table_row",
|
|
731
|
+
content: row[1].map(cell => {
|
|
732
|
+
const cellContent = this.convertBlocks(cell[4])
|
|
733
|
+
if (cellContent.length === 0) {
|
|
734
|
+
cellContent.push({type: "paragraph"})
|
|
735
|
+
}
|
|
736
|
+
return {
|
|
737
|
+
type: "table_cell",
|
|
738
|
+
attrs: {
|
|
739
|
+
colspan: cell[3],
|
|
740
|
+
rowspan: cell[2]
|
|
741
|
+
},
|
|
742
|
+
content: cellContent
|
|
743
|
+
}
|
|
744
|
+
})
|
|
745
|
+
}))
|
|
746
|
+
}
|
|
747
|
+
]
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
getImageFileType(filename) {
|
|
752
|
+
const ext = filename.split(".").pop().toLowerCase()
|
|
753
|
+
switch (ext) {
|
|
754
|
+
case "avif":
|
|
755
|
+
case "avifs":
|
|
756
|
+
return "image/avif"
|
|
757
|
+
case "png":
|
|
758
|
+
return "image/png"
|
|
759
|
+
case "jpg":
|
|
760
|
+
case "jpeg":
|
|
761
|
+
return "image/jpeg"
|
|
762
|
+
case "gif":
|
|
763
|
+
return "image/gif"
|
|
764
|
+
case "svg":
|
|
765
|
+
return "image/svg+xml"
|
|
766
|
+
case "webp":
|
|
767
|
+
return "image/webp"
|
|
768
|
+
default:
|
|
769
|
+
return "image/png" // Default fallback
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
convertFigure(figure) {
|
|
774
|
+
const caption = figure.c[1][1]
|
|
775
|
+
const attrs = {
|
|
776
|
+
aligned: "center",
|
|
777
|
+
width: 100,
|
|
778
|
+
figureCategory: "none",
|
|
779
|
+
caption: Boolean(caption.length)
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
// Extract figure attributes
|
|
783
|
+
const figureAttrs = figure.c[0][2]
|
|
784
|
+
figureAttrs.forEach(attr => {
|
|
785
|
+
if (attr[0] === "width") {
|
|
786
|
+
attrs.width = parseInt(attr[1])
|
|
787
|
+
} else if (attr[0] === "aligned") {
|
|
788
|
+
attrs.aligned = attr[1]
|
|
789
|
+
} else if (attr[0] === "category") {
|
|
790
|
+
attrs.figureCategory = attr[1]
|
|
791
|
+
}
|
|
792
|
+
})
|
|
793
|
+
|
|
794
|
+
const imagePath = figure.c[2][0].c[0].c[2][0]
|
|
795
|
+
const imageId = Math.floor(Math.random() * 1000000)
|
|
796
|
+
const imageTitle = imagePath.split("/").pop()
|
|
797
|
+
|
|
798
|
+
// Store image reference
|
|
799
|
+
this.images[imageId] = {
|
|
800
|
+
id: imageId,
|
|
801
|
+
title: imageTitle,
|
|
802
|
+
copyright: {
|
|
803
|
+
holder: false,
|
|
804
|
+
year: false,
|
|
805
|
+
freeToRead: true,
|
|
806
|
+
licenses: []
|
|
807
|
+
},
|
|
808
|
+
image: imagePath,
|
|
809
|
+
file_type: this.getImageFileType(imageTitle),
|
|
810
|
+
file: null,
|
|
811
|
+
checksum: 0
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
return {
|
|
815
|
+
type: "figure",
|
|
816
|
+
attrs,
|
|
817
|
+
content: [
|
|
818
|
+
{
|
|
819
|
+
type: "image",
|
|
820
|
+
attrs: {
|
|
821
|
+
image: imageId
|
|
822
|
+
}
|
|
823
|
+
},
|
|
824
|
+
{
|
|
825
|
+
type: "figure_caption",
|
|
826
|
+
content: this.convertBlocks(caption)
|
|
827
|
+
.map(block => block.content || [])
|
|
828
|
+
.flat()
|
|
829
|
+
}
|
|
830
|
+
]
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
convertCitation(cite) {
|
|
835
|
+
const references = cite.c[0]
|
|
836
|
+
.map(ref => {
|
|
837
|
+
// Handle empty bibliography case
|
|
838
|
+
if (
|
|
839
|
+
!this.bibliography ||
|
|
840
|
+
Object.keys(this.bibliography).length === 0
|
|
841
|
+
) {
|
|
842
|
+
return
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
const foundEntry = Object.entries(this.bibliography).find(
|
|
846
|
+
([_id, definition]) =>
|
|
847
|
+
definition.entry_key === ref.citationId
|
|
848
|
+
)
|
|
849
|
+
|
|
850
|
+
if (!foundEntry) {
|
|
851
|
+
return
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
const [bibId, _bibEntry] = foundEntry
|
|
855
|
+
if (!bibId) {
|
|
856
|
+
return
|
|
857
|
+
}
|
|
858
|
+
return {
|
|
859
|
+
id: bibId,
|
|
860
|
+
prefix: ref.citationPrefix
|
|
861
|
+
.map(prefix => prefix.c)
|
|
862
|
+
.join(" "),
|
|
863
|
+
locator: ref.citationSuffix
|
|
864
|
+
.map(suffix => suffix.c)
|
|
865
|
+
.join(" ")
|
|
866
|
+
}
|
|
867
|
+
})
|
|
868
|
+
.filter(ref => ref)
|
|
869
|
+
|
|
870
|
+
if (!references.length) {
|
|
871
|
+
return null
|
|
872
|
+
}
|
|
873
|
+
return {
|
|
874
|
+
type: "citation",
|
|
875
|
+
attrs: {
|
|
876
|
+
format:
|
|
877
|
+
cite.c[0][0].citationMode.t === "AuthorInText"
|
|
878
|
+
? "textcite"
|
|
879
|
+
: "cite",
|
|
880
|
+
references
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
}
|
|
884
|
+
}
|