@fiduswriter/document 0.1.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/LICENSE +661 -0
  2. package/README.md +16 -0
  3. package/jest.config.js +23 -0
  4. package/package.json +59 -0
  5. package/schema.json +1 -0
  6. package/scripts/export-schema.js +16 -0
  7. package/src/bibliography/common.js +92 -0
  8. package/src/bibliography/csl_bib.js +139 -0
  9. package/src/citations/citeproc_sys.js +42 -0
  10. package/src/citations/format.js +194 -0
  11. package/src/common/blob.js +10 -0
  12. package/src/common/file.js +25 -0
  13. package/src/common/index.js +12 -0
  14. package/src/common/network.js +79 -0
  15. package/src/common/text.js +44 -0
  16. package/src/editor/e2ee/encryptor.js +228 -0
  17. package/src/exporter/docx/citations.js +177 -0
  18. package/src/exporter/docx/comments.js +165 -0
  19. package/src/exporter/docx/footnotes.js +240 -0
  20. package/src/exporter/docx/images.js +101 -0
  21. package/src/exporter/docx/index.js +185 -0
  22. package/src/exporter/docx/lists.js +260 -0
  23. package/src/exporter/docx/math.js +46 -0
  24. package/src/exporter/docx/metadata.js +289 -0
  25. package/src/exporter/docx/rels.js +193 -0
  26. package/src/exporter/docx/render.js +941 -0
  27. package/src/exporter/docx/richtext.js +1182 -0
  28. package/src/exporter/docx/tables.js +112 -0
  29. package/src/exporter/docx/tools.js +50 -0
  30. package/src/exporter/epub/index.js +142 -0
  31. package/src/exporter/epub/templates.js +140 -0
  32. package/src/exporter/epub/tools.js +96 -0
  33. package/src/exporter/html/citations.js +121 -0
  34. package/src/exporter/html/convert.js +813 -0
  35. package/src/exporter/html/index.js +192 -0
  36. package/src/exporter/html/templates.js +34 -0
  37. package/src/exporter/html/tools.js +50 -0
  38. package/src/exporter/jats/bibliography.js +183 -0
  39. package/src/exporter/jats/citations.js +109 -0
  40. package/src/exporter/jats/convert.js +871 -0
  41. package/src/exporter/jats/index.js +92 -0
  42. package/src/exporter/jats/templates.js +35 -0
  43. package/src/exporter/jats/text.js +72 -0
  44. package/src/exporter/latex/convert.js +934 -0
  45. package/src/exporter/latex/escape_latex.js +21 -0
  46. package/src/exporter/latex/index.js +74 -0
  47. package/src/exporter/latex/readme.js +22 -0
  48. package/src/exporter/native/shrink.js +132 -0
  49. package/src/exporter/odt/citations.js +101 -0
  50. package/src/exporter/odt/footnotes.js +147 -0
  51. package/src/exporter/odt/images.js +115 -0
  52. package/src/exporter/odt/index.js +156 -0
  53. package/src/exporter/odt/math.js +57 -0
  54. package/src/exporter/odt/metadata.js +251 -0
  55. package/src/exporter/odt/render.js +806 -0
  56. package/src/exporter/odt/richtext.js +865 -0
  57. package/src/exporter/odt/styles.js +387 -0
  58. package/src/exporter/odt/track.js +68 -0
  59. package/src/exporter/pandoc/citations.js +98 -0
  60. package/src/exporter/pandoc/convert.js +1017 -0
  61. package/src/exporter/pandoc/index.js +92 -0
  62. package/src/exporter/pandoc/readme.js +8 -0
  63. package/src/exporter/pandoc/tools.js +51 -0
  64. package/src/exporter/print/index.js +177 -0
  65. package/src/exporter/tools/doc_content.js +144 -0
  66. package/src/exporter/tools/file.js +9 -0
  67. package/src/exporter/tools/json.js +73 -0
  68. package/src/exporter/tools/svg.js +29 -0
  69. package/src/exporter/tools/xml.js +531 -0
  70. package/src/exporter/tools/xml_zip.js +95 -0
  71. package/src/exporter/tools/zip.js +90 -0
  72. package/src/exporter/tools/zotero_csl.js +93 -0
  73. package/src/importer/citations.js +129 -0
  74. package/src/importer/docx/citations.js +123 -0
  75. package/src/importer/docx/convert.js +1427 -0
  76. package/src/importer/docx/helpers.js +9 -0
  77. package/src/importer/docx/omml2mathml.js +1448 -0
  78. package/src/importer/docx/parse.js +735 -0
  79. package/src/importer/native/get_images.js +76 -0
  80. package/src/importer/native/update.js +29 -0
  81. package/src/importer/odt/citations.js +87 -0
  82. package/src/importer/odt/convert.js +1855 -0
  83. package/src/importer/pandoc/convert.js +884 -0
  84. package/src/importer/pandoc/helpers.js +84 -0
  85. package/src/importer/zip_analyzer.js +102 -0
  86. package/src/index.js +1 -0
  87. package/src/mathlive/opf_includes.js +24 -0
  88. package/src/schema/common/annotate.js +76 -0
  89. package/src/schema/common/base.js +118 -0
  90. package/src/schema/common/citation.js +62 -0
  91. package/src/schema/common/equation.js +31 -0
  92. package/src/schema/common/figure.js +190 -0
  93. package/src/schema/common/heading.js +43 -0
  94. package/src/schema/common/index.js +40 -0
  95. package/src/schema/common/list.js +95 -0
  96. package/src/schema/common/reference.js +100 -0
  97. package/src/schema/common/table.js +103 -0
  98. package/src/schema/common/track.js +190 -0
  99. package/src/schema/const.js +58 -0
  100. package/src/schema/convert.js +1272 -0
  101. package/src/schema/document/content.js +187 -0
  102. package/src/schema/document/index.js +117 -0
  103. package/src/schema/document/structure.js +452 -0
  104. package/src/schema/export.js +21 -0
  105. package/src/schema/footnotes.js +126 -0
  106. package/src/schema/footnotes_convert.js +31 -0
  107. package/src/schema/i18n.js +595 -0
  108. package/src/schema/index.js +5 -0
  109. package/src/schema/mini_json.js +61 -0
  110. package/src/schema/text.js +22 -0
@@ -0,0 +1,941 @@
1
+ import {escapeText} from "../../common/index.js"
2
+ import {BIBLIOGRAPHY_HEADERS} from "../../schema/i18n.js"
3
+ import {textContent} from "../tools/doc_content.js"
4
+ import {xmlDOM} from "../tools/xml.js"
5
+
6
+ export class DOCXExporterRender {
7
+ constructor(xml) {
8
+ this.xml = xml
9
+
10
+ this.filePath = false // "word/document.xml" or "word/document2.xml" in some cases
11
+ this.ctXML = false
12
+ this.text = false
13
+ }
14
+
15
+ init() {
16
+ return this.xml
17
+ .getXml("[Content_Types].xml")
18
+ .then(ctXML => {
19
+ this.ctXML = ctXML
20
+ const documentOverride = this.ctXML.query("Override", {
21
+ ContentType:
22
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"
23
+ })
24
+ this.filePath = documentOverride
25
+ .getAttribute("PartName")
26
+ .slice(1)
27
+ return this.xml.getXml(this.filePath)
28
+ })
29
+ .then(xml => {
30
+ this.text = xml
31
+ // Ensure we support the three latest docx feature sets:
32
+ // wp14 (drawing 2010), w14 (word 2010), w15 (word 2012)
33
+ const documentEl = this.text.query("w:document")
34
+ if (!documentEl.getAttribute("xmlns:wp14")) {
35
+ documentEl.setAttribute(
36
+ "xmlns:wp14",
37
+ "http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing"
38
+ )
39
+ }
40
+ if (!documentEl.getAttribute("xmlns:w14")) {
41
+ documentEl.setAttribute(
42
+ "xmlns:w14",
43
+ "http://schemas.microsoft.com/office/word/2010/wordml"
44
+ )
45
+ }
46
+ if (!documentEl.getAttribute("xmlns:w15")) {
47
+ documentEl.setAttribute(
48
+ "xmlns:w15",
49
+ "http://schemas.microsoft.com/office/word/2012/wordml"
50
+ )
51
+ }
52
+ const ignorable = [
53
+ ...new Set(
54
+ ["w14", "wp14", "w15"].concat(
55
+ documentEl
56
+ .getAttribute("mc:Ignorable", "")
57
+ .split(" ")
58
+ .filter(item => item.length)
59
+ )
60
+ )
61
+ ]
62
+ documentEl.setAttribute("mc:Ignorable", ignorable.join(" "))
63
+ return Promise.resolve()
64
+ })
65
+ }
66
+
67
+ parseStructuredTags(block, tag) {
68
+ let blockText = block.textContent
69
+ const tagName = tag.title
70
+
71
+ // Check for BEGIN...END loops (with optional limit)
72
+ const beginStartRegex = new RegExp(
73
+ `\\{BEGIN_${tagName}(?::limit=(\\d+))?\\}`
74
+ )
75
+ const beginStartMatch = blockText.match(beginStartRegex)
76
+
77
+ if (
78
+ beginStartMatch &&
79
+ tag.content &&
80
+ Array.isArray(tag.content) &&
81
+ tag.content.length > 0
82
+ ) {
83
+ const limit = beginStartMatch[1]
84
+ ? parseInt(beginStartMatch[1])
85
+ : null
86
+ const beginStart = beginStartMatch.index
87
+ const beginEnd = beginStart + beginStartMatch[0].length
88
+
89
+ // Find matching {END_tag}
90
+ const endTag = `{END_${tagName}}`
91
+ const endPos = blockText.indexOf(endTag, beginEnd)
92
+ if (endPos === -1) {
93
+ console.warn(`Missing ${endTag} for ${tagName}`)
94
+ return
95
+ }
96
+
97
+ const templateXml = blockText.slice(beginEnd, endPos)
98
+ const replacementXml = this.processLoop(
99
+ templateXml,
100
+ tag.content,
101
+ tagName,
102
+ limit
103
+ )
104
+
105
+ const beforeText = blockText.slice(0, beginStart)
106
+ const afterText = blockText.slice(endPos + endTag.length)
107
+ const fullReplacement = beforeText + replacementXml + afterText
108
+
109
+ block.innerXML = fullReplacement
110
+ return
111
+ }
112
+
113
+ // Check for IF...ELIF...ELSE...ENDIF conditionals
114
+ blockText = this.processConditionals(blockText, {
115
+ tagName,
116
+ count: tag.content ? tag.content.length : 0,
117
+ content: tag.content || []
118
+ })
119
+
120
+ if (blockText !== block.textContent) {
121
+ block.innerXML = blockText
122
+ }
123
+ }
124
+
125
+ processLoop(templateXml, items, tagName, limit = null) {
126
+ const effectiveItems = limit !== null ? items.slice(0, limit) : items
127
+ const results = []
128
+
129
+ effectiveItems.forEach((item, index) => {
130
+ const loopCtx = {
131
+ count: items.length,
132
+ index: index,
133
+ first: index === 0,
134
+ last: index === effectiveItems.length - 1,
135
+ item: item,
136
+ content: [item],
137
+ odd: index % 2 === 1,
138
+ even: index % 2 === 0
139
+ }
140
+
141
+ let itemXml = templateXml
142
+
143
+ // Replace field placeholders
144
+ if (typeof item === "string") {
145
+ itemXml = itemXml.replace(/%tag/g, escapeText(item))
146
+ } else {
147
+ itemXml = itemXml
148
+ .replace(
149
+ /\{?%firstname\}?/g,
150
+ escapeText(item.firstname || "")
151
+ )
152
+ .replace(
153
+ /\{?%lastname\}?/g,
154
+ escapeText(item.lastname || "")
155
+ )
156
+ .replace(
157
+ /\{?%institution\}?/g,
158
+ escapeText(item.institution || "")
159
+ )
160
+ .replace(/\{?%email\}?/g, escapeText(item.email || ""))
161
+ .replace(/\{?%id_type\}?/g, escapeText(item.id_type || ""))
162
+ .replace(
163
+ /\{?%id_value\}?/g,
164
+ escapeText(item.id_value || "")
165
+ )
166
+ }
167
+
168
+ // Handle conditionals inside the loop
169
+ itemXml = this.processConditionals(itemXml, {tagName, ...loopCtx})
170
+
171
+ // Handle special delimiters for DOCX
172
+ itemXml = itemXml.replace(/\\n/g, "<w:br/>")
173
+ itemXml = itemXml.replace(/\\p/g, "</w:p><w:p>")
174
+
175
+ results.push(itemXml)
176
+ })
177
+
178
+ return results.join("")
179
+ }
180
+
181
+ processConditionals(text, ctx) {
182
+ let result = text
183
+ let changed = true
184
+ while (changed) {
185
+ changed = false
186
+ const ifStart = result.indexOf("{IF(")
187
+ if (ifStart === -1) {
188
+ break
189
+ }
190
+
191
+ let depth = 1
192
+ let pos = ifStart + 4 // skip {IF(
193
+ // Find the closing ) of the IF expression
194
+ while (pos < result.length && result[pos] !== ")") {
195
+ pos++
196
+ }
197
+ if (pos >= result.length) {
198
+ break
199
+ }
200
+ pos++ // skip )
201
+
202
+ // Now scan for matching {ENDIF}
203
+ while (pos < result.length && depth > 0) {
204
+ if (result.substr(pos, 4) === "{IF(") {
205
+ depth++
206
+ pos += 4
207
+ } else if (result.substr(pos, 7) === "{ENDIF}") {
208
+ depth--
209
+ if (depth > 0) {
210
+ pos += 7
211
+ }
212
+ } else {
213
+ pos++
214
+ }
215
+ }
216
+
217
+ if (depth === 0) {
218
+ const exprEnd = result.indexOf(")", ifStart + 4)
219
+ const ifExpr = result.slice(ifStart + 4, exprEnd)
220
+ // Skip the closing } of {IF(...)} if present
221
+ let innerStart = exprEnd + 1
222
+ if (result[innerStart] === "}") {
223
+ innerStart++
224
+ }
225
+ const innerContent = result.slice(innerStart, pos)
226
+
227
+ const conditions = []
228
+ conditions.push({expr: ifExpr, content: ""})
229
+
230
+ const remaining = innerContent
231
+ let lastIndex = 0
232
+
233
+ const elifRegex = /\{ELIF\(([^)]+)\)\}/g
234
+ let elifMatch
235
+ while ((elifMatch = elifRegex.exec(remaining)) !== null) {
236
+ conditions[conditions.length - 1].content = remaining.slice(
237
+ lastIndex,
238
+ elifMatch.index
239
+ )
240
+ conditions.push({expr: elifMatch[1], content: ""})
241
+ lastIndex = elifMatch.index + elifMatch[0].length
242
+ }
243
+
244
+ const elseMatch = remaining.slice(lastIndex).match(/\{ELSE\}/)
245
+ if (elseMatch) {
246
+ conditions[conditions.length - 1].content = remaining.slice(
247
+ lastIndex,
248
+ lastIndex + elseMatch.index
249
+ )
250
+ conditions.push({
251
+ expr: null,
252
+ content: remaining.slice(
253
+ lastIndex + elseMatch.index + elseMatch[0].length
254
+ )
255
+ })
256
+ } else {
257
+ conditions[conditions.length - 1].content =
258
+ remaining.slice(lastIndex)
259
+ }
260
+
261
+ let replacement = ""
262
+ for (const cond of conditions) {
263
+ if (
264
+ cond.expr === null ||
265
+ this.evaluateExpression(cond.expr, ctx)
266
+ ) {
267
+ replacement = cond.content
268
+ break
269
+ }
270
+ }
271
+
272
+ result =
273
+ result.slice(0, ifStart) +
274
+ replacement +
275
+ result.slice(pos + 7)
276
+ changed = true
277
+ }
278
+ }
279
+ return result
280
+ }
281
+
282
+ evaluateExpression(expr, ctx) {
283
+ try {
284
+ // Allow explicit tag name references (e.g., authors.count -> ctx.count)
285
+ if (ctx.tagName) {
286
+ const safeTagName = ctx.tagName.replace(
287
+ /[.*+?^${}()|[\]\\]/g,
288
+ "\\$&"
289
+ )
290
+ expr = expr.replace(
291
+ new RegExp(`\\b${safeTagName}\\b`, "g"),
292
+ "ctx"
293
+ )
294
+ }
295
+
296
+ // Replace ctx.property accesses with literal values
297
+ const evalExpr = expr.replace(
298
+ /ctx\.(\w+)(?:\.(\w+))?(?:\[(\d+)\])?/g,
299
+ (_match, p1, p2, p3) => {
300
+ let val = ctx[p1]
301
+ if (p2 !== undefined && val !== undefined) {
302
+ val = val[p2]
303
+ }
304
+ if (p3 !== undefined && val !== undefined) {
305
+ val = val[parseInt(p3)]
306
+ }
307
+ return JSON.stringify(val)
308
+ }
309
+ )
310
+
311
+ // Remove string literals before character check
312
+ const safeExpr = evalExpr.replace(
313
+ /"(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*'/g,
314
+ '""'
315
+ )
316
+
317
+ // Check for unknown identifiers
318
+ const bareIdRegex = /\b[a-zA-Z_]\w*\b/g
319
+ const allowed = ["true", "false", "null", "undefined"]
320
+ let m
321
+ while ((m = bareIdRegex.exec(safeExpr)) !== null) {
322
+ if (!allowed.includes(m[0])) {
323
+ console.warn(
324
+ "Unknown identifier in expression:",
325
+ m[0],
326
+ "expression:",
327
+ expr
328
+ )
329
+ return false
330
+ }
331
+ }
332
+
333
+ // Check for unsafe characters
334
+ if (/[^ \t\n\r0-9a-zA-Z_\.\+\-*\/%==<>!&|()\[\]]/.test(safeExpr)) {
335
+ console.warn("Unsafe characters in expression:", expr)
336
+ return false
337
+ }
338
+
339
+ return new Function(`return (${evalExpr})`)()
340
+ } catch (e) {
341
+ console.warn("Error evaluating expression:", expr, e)
342
+ return false
343
+ }
344
+ }
345
+
346
+ processMultiBlockStructuredTags(blocks, tags) {
347
+ const tagMap = {}
348
+ tags.forEach(tag => {
349
+ if (tag.title) {
350
+ tagMap[tag.title] = tag
351
+ }
352
+ })
353
+
354
+ // Process from end to start to avoid index shifting issues
355
+ for (let i = blocks.length - 1; i >= 0; i--) {
356
+ const block = blocks[i]
357
+ const text = block.textContent
358
+
359
+ // Check for multi-block BEGIN...END loops
360
+ for (const tag of tags) {
361
+ if (!tag.title || !tag.content || !Array.isArray(tag.content)) {
362
+ continue
363
+ }
364
+ const tagName = tag.title
365
+ const beginRegex = new RegExp(
366
+ `\\{BEGIN_${tagName}(?::limit=(\\d+))?\\}`
367
+ )
368
+ const beginMatch = text.match(beginRegex)
369
+ if (!beginMatch) {
370
+ continue
371
+ }
372
+
373
+ // Find matching END in a later block
374
+ let endIndex = -1
375
+ for (let j = i + 1; j < blocks.length; j++) {
376
+ if (blocks[j].textContent.includes(`{END_${tagName}}`)) {
377
+ endIndex = j
378
+ break
379
+ }
380
+ }
381
+
382
+ if (endIndex === -1 || endIndex === i) {
383
+ continue
384
+ }
385
+
386
+ // Found multi-block loop - process it
387
+ const limit = beginMatch[1] ? parseInt(beginMatch[1]) : null
388
+ this._replaceMultiBlockLoop(blocks, i, endIndex, tag, limit)
389
+ // Adjust i since blocks array was modified
390
+ i = Math.min(i, blocks.length - 1)
391
+ break // Only process one loop per block per iteration
392
+ }
393
+ }
394
+
395
+ // Process multi-block conditionals from end to start
396
+ for (let i = blocks.length - 1; i >= 0; i--) {
397
+ const block = blocks[i]
398
+ const text = block.textContent
399
+ const ifMatch = text.match(/\{IF\(([^)]+)\)\}/)
400
+ if (!ifMatch) {
401
+ continue
402
+ }
403
+
404
+ // Find matching ENDIF in a later block
405
+ let endIndex = -1
406
+ for (let j = i + 1; j < blocks.length; j++) {
407
+ if (/\{ENDIF\}/.test(blocks[j].textContent)) {
408
+ endIndex = j
409
+ break
410
+ }
411
+ }
412
+
413
+ if (endIndex === -1 || endIndex === i) {
414
+ continue
415
+ }
416
+
417
+ this._replaceMultiBlockConditional(
418
+ blocks,
419
+ i,
420
+ endIndex,
421
+ ifMatch[1],
422
+ tagMap
423
+ )
424
+ i = Math.min(i, blocks.length - 1)
425
+ }
426
+ }
427
+
428
+ _replaceMultiBlockLoop(blocks, beginIndex, endIndex, tag, limit) {
429
+ const tagName = tag.title
430
+ const beginBlock = blocks[beginIndex]
431
+
432
+ // Concatenate all blocks from begin to end
433
+ let combinedXml = ""
434
+ for (let i = beginIndex; i <= endIndex; i++) {
435
+ combinedXml += blocks[i].toString()
436
+ }
437
+
438
+ // Find the BEGIN and END tags in the combined XML
439
+ const beginRegex = new RegExp(`\\{BEGIN_${tagName}(?::limit=\\d+)?\\}`)
440
+ const beginMatch = combinedXml.match(beginRegex)
441
+ const endTag = `{END_${tagName}}`
442
+ const endPos = combinedXml.indexOf(endTag)
443
+
444
+ if (!beginMatch || endPos === -1) {
445
+ return
446
+ }
447
+
448
+ const beforeXml = combinedXml.slice(0, beginMatch.index)
449
+ const templateXml = combinedXml.slice(
450
+ beginMatch.index + beginMatch[0].length,
451
+ endPos
452
+ )
453
+ const afterXml = combinedXml.slice(endPos + endTag.length)
454
+
455
+ // Decode &gt; so expressions like >= work in nested conditionals
456
+ const decodedTemplateXml = templateXml.replace(/&gt;/g, ">")
457
+ const replacementXml = this.processLoop(
458
+ decodedTemplateXml,
459
+ tag.content,
460
+ tagName,
461
+ limit
462
+ )
463
+ const fullReplacement = beforeXml + replacementXml + afterXml
464
+
465
+ // Parse replacement
466
+ const parent = beginBlock.parentElement
467
+ const dom = xmlDOM(`<root>${fullReplacement}</root>`)
468
+ const root = dom.query("root")
469
+ const newBlocks = root.children.filter(
470
+ child => child.tagName === "w:p" || child.tagName === "w:sectPr"
471
+ )
472
+
473
+ // Insert new blocks before begin block
474
+ for (let i = newBlocks.length - 1; i >= 0; i--) {
475
+ parent.insertBefore(newBlocks[i], beginBlock)
476
+ }
477
+
478
+ // Remove old blocks
479
+ for (let i = endIndex; i >= beginIndex; i--) {
480
+ parent.removeChild(blocks[i])
481
+ }
482
+
483
+ // Update blocks array
484
+ blocks.splice(beginIndex, endIndex - beginIndex + 1, ...newBlocks)
485
+ }
486
+
487
+ _replaceMultiBlockConditional(blocks, ifIndex, endIndex, expr, tagMap) {
488
+ const ifBlock = blocks[ifIndex]
489
+
490
+ // Concatenate all blocks from if to endif
491
+ let combinedXml = ""
492
+ for (let i = ifIndex; i <= endIndex; i++) {
493
+ combinedXml += blocks[i].toString()
494
+ }
495
+
496
+ // Determine which tag the expression references
497
+ let ctx = {count: 0, content: []}
498
+ for (const tagName in tagMap) {
499
+ const safeTagName = tagName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
500
+ if (new RegExp(`\\b${safeTagName}\\b`).test(expr)) {
501
+ const tag = tagMap[tagName]
502
+ ctx = {
503
+ tagName: tag.title,
504
+ count: tag.content ? tag.content.length : 0,
505
+ content: tag.content || []
506
+ }
507
+ break
508
+ }
509
+ }
510
+
511
+ // Decode &gt; so expressions like >= work in conditionals
512
+ const decodedXml = combinedXml.replace(/&gt;/g, ">")
513
+ // Process conditionals on the combined XML
514
+ const processedXml = this.processConditionals(decodedXml, ctx)
515
+
516
+ if (processedXml === combinedXml) {
517
+ return
518
+ }
519
+
520
+ // Parse and replace
521
+ const parent = ifBlock.parentElement
522
+ const dom = xmlDOM(`<root>${processedXml}</root>`)
523
+ const root = dom.query("root")
524
+ const newBlocks = root.children.filter(
525
+ child => child.tagName === "w:p" || child.tagName === "w:sectPr"
526
+ )
527
+
528
+ for (let i = newBlocks.length - 1; i >= 0; i--) {
529
+ parent.insertBefore(newBlocks[i], ifBlock)
530
+ }
531
+
532
+ for (let i = endIndex; i >= ifIndex; i--) {
533
+ parent.removeChild(blocks[i])
534
+ }
535
+
536
+ blocks.splice(ifIndex, endIndex - ifIndex + 1, ...newBlocks)
537
+ }
538
+
539
+ // Define the tags that are to be looked for in the document
540
+ getTagData(docContent, pmBib, settings) {
541
+ const tags = docContent.content.map(node => {
542
+ const tag = {}
543
+ switch (node.type) {
544
+ case "title":
545
+ tag.title = "title"
546
+ tag.content = textContent(node)
547
+ break
548
+ case "heading_part":
549
+ tag.title = node.attrs.id
550
+ tag.content = textContent(node)
551
+ break
552
+ case "table_part":
553
+ case "richtext_part":
554
+ tag.title = `@${node.attrs.id}`
555
+ tag.content = node.content
556
+ break
557
+ case "contributors_part":
558
+ tag.title = node.attrs.id
559
+ // Return array of structured objects for format with delimiter support
560
+ tag.content = node.content
561
+ ? node.content.map(node => {
562
+ const c = node.attrs
563
+ return {
564
+ firstname: c.firstname || "",
565
+ lastname: c.lastname || "",
566
+ institution: c.institution || "",
567
+ email: c.email || "",
568
+ id_type: c.id_type || "",
569
+ id_value: c.id_value || ""
570
+ }
571
+ })
572
+ : []
573
+ break
574
+ case "tags_part":
575
+ tag.title = node.attrs.id
576
+ // Return array of tag strings for format with delimiter support
577
+ tag.content = node.content
578
+ ? node.content.map(node => node.attrs.tag)
579
+ : []
580
+ break
581
+ }
582
+ return tag
583
+ })
584
+
585
+ let bibliographyContent
586
+ if (pmBib && pmBib.content && pmBib.content.length > 0) {
587
+ // Add bibliography heading and mark first/last items
588
+ const firstPmBib = pmBib.content[0]
589
+ const lastPmBib = pmBib.content[pmBib.content.length - 1]
590
+ firstPmBib.attrs = firstPmBib.attrs || {}
591
+ firstPmBib.attrs.first = true
592
+ lastPmBib.attrs = lastPmBib.attrs || {}
593
+ lastPmBib.attrs.last = true
594
+ const bibliographyHeader =
595
+ settings.bibliography_header[settings.language] ||
596
+ BIBLIOGRAPHY_HEADERS[settings.language]
597
+ bibliographyContent = [
598
+ {
599
+ type: "bibliography_heading",
600
+ content: [{type: "text", text: bibliographyHeader}]
601
+ },
602
+ pmBib
603
+ ]
604
+ } else {
605
+ // No bibliography content, add a placeholder paragraph
606
+ bibliographyContent = [
607
+ {type: "paragraph", content: [{type: "text", text: " "}]}
608
+ ]
609
+ }
610
+
611
+ // Add bibliography content
612
+ tags.push({
613
+ title: "@bibliography", // The '@' triggers handling as block
614
+ content: bibliographyContent
615
+ })
616
+
617
+ tags.push({
618
+ title: "@copyright", // The '@' triggers handling as block
619
+ content:
620
+ settings.copyright && settings.copyright.holder
621
+ ? [
622
+ {
623
+ type: "paragraph",
624
+ content: [
625
+ {
626
+ type: "text",
627
+ text: `© ${settings.copyright.year ? settings.copyright.year : new Date().getFullYear()} ${settings.copyright.holder}`
628
+ }
629
+ ]
630
+ }
631
+ ]
632
+ : [
633
+ {
634
+ type: "paragraph",
635
+ content: [{type: "text", text: " "}]
636
+ }
637
+ ]
638
+ })
639
+ tags.push({
640
+ title: "@licenses", // The '@' triggers handling as block
641
+ content:
642
+ settings.copyright && settings.copyright.licenses.length
643
+ ? settings.copyright.licenses.map(license => ({
644
+ type: "paragraph",
645
+ content: [
646
+ {
647
+ type: "text",
648
+ marks: [
649
+ {
650
+ type: "link",
651
+ attrs: {
652
+ href: license.url,
653
+ title: license.title
654
+ }
655
+ }
656
+ ],
657
+ text: license.title
658
+ },
659
+ {
660
+ type: "text",
661
+ text: license.start
662
+ ? ` (${license.start})`
663
+ : ""
664
+ }
665
+ ]
666
+ }))
667
+ : [
668
+ {
669
+ type: "paragraph",
670
+ content: [{type: "text", text: " "}]
671
+ }
672
+ ]
673
+ })
674
+
675
+ return tags
676
+ }
677
+
678
+ // go through document.xml looking for tags and replace them with the given
679
+ // replacements.
680
+ render(docContent, pmBib, settings, richtext, citations) {
681
+ const tags = this.getTagData(docContent, pmBib, settings)
682
+
683
+ // Including global page definition at end
684
+ const blocks = this.text.queryAll(["w:p", "w:sectPr"])
685
+
686
+ // Process multi-block structured tags first (BEGIN...END across paragraphs)
687
+ this.processMultiBlockStructuredTags(blocks, tags)
688
+
689
+ const currentTags = []
690
+ blocks.forEach(block => {
691
+ // Assuming there is nothing outside of <w:t>...</w:t>
692
+ const text = block.textContent
693
+ tags.forEach(tag => {
694
+ const tagString = tag.title
695
+ const hasInlineTag =
696
+ text.includes(`{${tagString}}`) ||
697
+ text.includes(`{${tagString}:format=`)
698
+ const hasBeginTag = text.includes(`{BEGIN_${tagString}}`)
699
+ const hasIfTag =
700
+ text.includes(`{IF(${tagString}.`) ||
701
+ text.includes(`{IF(ctx.`)
702
+ if (hasInlineTag || hasBeginTag || hasIfTag) {
703
+ currentTags.push(tag)
704
+ tag.block = block
705
+ // We don't worry about the same tag appearing twice in the document,
706
+ // as that would make no sense.
707
+ }
708
+ })
709
+
710
+ // Parse structured tags (BEGIN...END and IF...ENDIF)
711
+ currentTags.forEach(tag => {
712
+ if (tag.block) {
713
+ this.parseStructuredTags(tag.block, tag)
714
+ }
715
+ })
716
+
717
+ const pageSize = block.query("w:pgSz")
718
+ const pageMargins = block.query("w:pgMar")
719
+ const cols = block.query("w:cols")
720
+ if (pageSize && pageMargins) {
721
+ // Not sure if these all need to come together
722
+ let width =
723
+ Number.parseInt(pageSize.getAttribute("w:w")) -
724
+ Number.parseInt(pageMargins.getAttribute("w:right")) -
725
+ Number.parseInt(pageMargins.getAttribute("w:left"))
726
+ const height =
727
+ Number.parseInt(pageSize.getAttribute("w:h")) -
728
+ Number.parseInt(pageMargins.getAttribute("w:bottom")) -
729
+ Number.parseInt(pageMargins.getAttribute("w:top")) -
730
+ Number.parseInt(pageMargins.getAttribute("w:header")) -
731
+ Number.parseInt(pageMargins.getAttribute("w:footer"))
732
+
733
+ const colCount = cols
734
+ ? Number.parseInt(cols.getAttribute("w:num"))
735
+ : 1
736
+ if (colCount > 1) {
737
+ const colSpace = Number.parseInt(
738
+ cols.getAttribute("w:space")
739
+ )
740
+ width = width - colSpace * (colCount - 1)
741
+ width = width / colCount
742
+ }
743
+ while (currentTags.length) {
744
+ const tag = currentTags.pop()
745
+ tag.dimensions = {
746
+ width: width * 635, // convert to EMU
747
+ height: height * 635 // convert to EMU
748
+ }
749
+ }
750
+ }
751
+ })
752
+ tags.forEach(tag => {
753
+ if (!tag.title) {
754
+ return
755
+ } else if (tag.title[0] === "@") {
756
+ this.blockRender(tag, citations, richtext)
757
+ } else {
758
+ this.inlineRender(tag)
759
+ }
760
+ })
761
+ }
762
+
763
+ // Render Tags that only exchange inline content
764
+ inlineRender(tag) {
765
+ if (!tag.block) {
766
+ return
767
+ }
768
+ const blockText = tag.block.textContent
769
+ const tagString = `{${tag.title}}`
770
+
771
+ if (!blockText.includes(`{${tag.title}`)) {
772
+ // No inline tag present - structured tags only
773
+ return
774
+ }
775
+
776
+ // Check for format string with delimiter: {tag:format=%firstname|; }
777
+ const formatRegex = new RegExp(
778
+ `\\{${tag.title}:format=([^|]+)\\|?([^}]*)?\\}`
779
+ )
780
+ const formatMatch = blockText.match(formatRegex)
781
+
782
+ let fullText = ""
783
+
784
+ if (formatMatch && tag.content && Array.isArray(tag.content)) {
785
+ // Find format string and delimiter
786
+ const [, format, delimiter = "; "] = formatMatch
787
+
788
+ // Process each item with the format string
789
+ const formattedItems = tag.content
790
+ .map(item => {
791
+ if (typeof item === "string") {
792
+ // For tags (simple strings)
793
+ return format.replace(/%tag/g, item)
794
+ } else {
795
+ // For contributors (objects)
796
+ return format
797
+ .replace(/%firstname/g, item.firstname || "")
798
+ .replace(/%lastname/g, item.lastname || "")
799
+ .replace(/%institution/g, item.institution || "")
800
+ .replace(/%email/g, item.email || "")
801
+ .replace(/%id_type/g, item.id_type || "")
802
+ .replace(/%id_value/g, item.id_value || "")
803
+ }
804
+ })
805
+ .filter(s => s.trim() !== "")
806
+
807
+ // Handle special delimiters
808
+ let delimiterText = delimiter
809
+ delimiterText = delimiterText.replace(/\\n/g, "\n")
810
+ delimiterText = delimiterText.replace(/\\p/g, "\n\n")
811
+
812
+ const replacement = formattedItems.join(delimiterText)
813
+ fullText = blockText.replace(formatRegex, replacement)
814
+ } else {
815
+ // Fall back to simple string replacement (backward compatible)
816
+ let contentStr = tag.content || ""
817
+ if (Array.isArray(contentStr)) {
818
+ if (contentStr.length === 0) {
819
+ contentStr = ""
820
+ } else if (typeof contentStr[0] === "string") {
821
+ contentStr = contentStr.join(", ")
822
+ } else {
823
+ // Contributors - backward compatible formatting
824
+ contentStr = contentStr
825
+ .map(item => {
826
+ const nameParts = []
827
+ let affiliation = false
828
+ if (item.firstname) {
829
+ nameParts.push(item.firstname)
830
+ }
831
+ if (item.lastname) {
832
+ nameParts.push(item.lastname)
833
+ }
834
+ if (item.institution) {
835
+ if (nameParts.length) {
836
+ affiliation = item.institution
837
+ } else {
838
+ nameParts.push(item.institution)
839
+ }
840
+ }
841
+ const parts = [nameParts.join(" ")]
842
+ if (affiliation) {
843
+ parts.push(affiliation)
844
+ }
845
+ if (item.email) {
846
+ parts.push(item.email)
847
+ }
848
+ if (item.id_type && item.id_value) {
849
+ parts.push(`${item.id_type}: ${item.id_value}`)
850
+ }
851
+ return parts.join(", ")
852
+ })
853
+ .join("; ")
854
+ }
855
+ }
856
+ const texts = blockText.split(tagString)
857
+ fullText = texts[0] + contentStr + texts[1]
858
+ }
859
+
860
+ // Apply the replacement
861
+ const rs = tag.block.queryAll("w:r").reverse()
862
+ let lastR
863
+ // Remove all <w:r> with text in them (<w:t>).
864
+ // Exclude <w:r> used for other things, like page breaks.
865
+ rs.forEach(r => {
866
+ if (r.query("w:t")) {
867
+ if (lastR) {
868
+ r.parentElement.removeChild(r)
869
+ } else {
870
+ lastR = r
871
+ }
872
+ }
873
+ })
874
+ if (!lastR) {
875
+ // This should not be possible. Error.
876
+ return
877
+ }
878
+ if (fullText.length) {
879
+ if (fullText.includes("\n")) {
880
+ // Split on newlines and create <w:t> elements separated by <w:br/>
881
+ const parts = fullText.split("\n").map(part => escapeText(part))
882
+ lastR.innerXML = parts
883
+ .map((part, index) => {
884
+ const br = index > 0 ? "<w:br/>" : ""
885
+ return `${br}<w:t xml:space="preserve">${part}</w:t>`
886
+ })
887
+ .join("")
888
+ } else {
889
+ let textAttr = ""
890
+ if (
891
+ fullText[0] === " " ||
892
+ fullText[fullText.length - 1] === " "
893
+ ) {
894
+ textAttr += 'xml:space="preserve"'
895
+ }
896
+ lastR.innerXML = `<w:t ${textAttr}>${escapeText(fullText)}</w:t>`
897
+ }
898
+ } else {
899
+ lastR.parentElement.removeChild(lastR)
900
+ }
901
+ }
902
+
903
+ // Render tags that exchange paragraphs
904
+ blockRender(tag, citations, richtext) {
905
+ if (!tag.block) {
906
+ return
907
+ }
908
+ const pStyle = tag.block.query("w:pStyle")
909
+ const options = {
910
+ dimensions: tag.dimensions,
911
+ citationType: citations.citFm.citationType,
912
+ section: pStyle ? pStyle.getAttribute("w:val") : "Normal",
913
+ tag: tag.title.slice(1)
914
+ }
915
+ const outXML = tag.content
916
+ ? tag.content
917
+ .map((content, i) =>
918
+ richtext.run(content, options, tag.content[i + 1])
919
+ )
920
+ .join("")
921
+ : ""
922
+ if (!outXML.length) {
923
+ // If there is no content, we need to put in a space to prevent the
924
+ // tag from being removed.
925
+ tag.block.innerXML = '<w:r><w:t xml:space="preserve"> </w:t></w:r>'
926
+ return
927
+ }
928
+ const parentElement = tag.block.parentElement
929
+ const dom = xmlDOM(outXML)
930
+ const domPars = dom.node["#document"]?.slice() || [dom]
931
+ domPars.forEach(node => parentElement.insertBefore(node, tag.block))
932
+ // sectPr contains information about columns, etc. We need to move this
933
+ // to the last paragraph we will be adding.
934
+ const sectPr = tag.block.query("w:sectPr")
935
+ if (sectPr) {
936
+ const pPr = tag.block.previousSibling.query("w:pPr")
937
+ pPr.appendChild(sectPr)
938
+ }
939
+ parentElement.removeChild(tag.block)
940
+ }
941
+ }