@jti/adoc2typst2pdf 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1195 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * docbook2typst.js
5
+ *
6
+ * Converts DocBook XML (as produced by asciidoctor -b docbook) to Typst markup.
7
+ *
8
+ * Usage:
9
+ * node docbook2typst.js [options] <input.xml>
10
+ *
11
+ * Options:
12
+ * -o <file> Output Typst path (default: <input>.typ)
13
+ * -h, --help Show this help and exit
14
+ */
15
+
16
+ 'use strict';
17
+
18
+ const fs = require('fs')
19
+ const path = require('path')
20
+
21
+
22
+ // ── CLI ────────────────────────────────────────────────────────────────────────
23
+
24
+ const argv = process.argv.slice(2)
25
+
26
+ if (argv.length === 0 || argv.includes('--help') || argv.includes('-h')) {
27
+ console.log(`
28
+ Usage: node docbook-typst.js [options] <input.xml>
29
+
30
+ Converts DocBook XML to Typst markup.
31
+
32
+ Options:
33
+ -o <file> Output Typst file (default: <input>.typ)
34
+ -h, --help Show this help and exit
35
+ `)
36
+ process.exit(0)
37
+ }
38
+
39
+ let inputFile = null
40
+ let outputFile = null
41
+ let useTheme = true
42
+
43
+ for (let i = 0; i < argv.length; i++) {
44
+ if ((argv[i] === '-o' || argv[i] === '--out-file') && argv[i + 1]) {
45
+ outputFile = argv[++i]
46
+ } else if (argv[i] === '--no-theme') {
47
+ useTheme = false
48
+ } else if (!argv[i].startsWith('-')) {
49
+ inputFile = argv[i]
50
+ }
51
+ }
52
+
53
+ if (!inputFile) {
54
+ console.error('Error: no input file specified.')
55
+ console.error('Run with --help for usage information.')
56
+ process.exit(1)
57
+ }
58
+
59
+ if (!fs.existsSync(inputFile)) {
60
+ console.error(`Error: file not found: ${inputFile}`)
61
+ process.exit(1)
62
+ }
63
+
64
+ outputFile = outputFile || inputFile.replace(/\.[^.]+$/, '.typ')
65
+
66
+ // The assembled AsciiDoc source sits alongside the XML with the same basename.
67
+ const adocSourceFile = inputFile.replace(/\.xml$/i, '.adoc')
68
+
69
+ // ── XML Parser ────────────────────────────────────────────────────────────────
70
+ //
71
+ // Minimal recursive-descent parser for well-formed XML (as asciidoctor produces).
72
+ // Returns a tree of { type, name, attrs, children } or { type: 'text', text }.
73
+
74
+ function parseXml(src) {
75
+ let pos = 0
76
+
77
+ function skipWs() {
78
+ while (pos < src.length && /\s/.test(src[pos])) pos++
79
+ }
80
+
81
+ function readUntil(end) {
82
+ const i = src.indexOf(end, pos)
83
+ if (i === -1) throw new Error(`Unclosed: looking for ${JSON.stringify(end)} near pos ${pos}`)
84
+ const s = src.slice(pos, i)
85
+ pos = i + end.length
86
+ return s
87
+ }
88
+
89
+ function readName() {
90
+ const start = pos
91
+ while (pos < src.length && /[\w:.-]/.test(src[pos])) pos++
92
+ return src.slice(start, pos)
93
+ }
94
+
95
+ function decodeEntity() {
96
+ // Called when src[pos] === '&'
97
+ pos++ // consume '&'
98
+ const semi = src.indexOf(';', pos)
99
+ if (semi === -1) return '&'
100
+ const ref = src.slice(pos, semi)
101
+ pos = semi + 1
102
+ const named = { lt: '<', gt: '>', amp: '&', quot: '"', apos: "'" }
103
+ if (ref in named) return named[ref]
104
+ if (ref.startsWith('#x') || ref.startsWith('#X'))
105
+ return String.fromCodePoint(parseInt(ref.slice(2), 16))
106
+ if (ref.startsWith('#'))
107
+ return String.fromCodePoint(parseInt(ref.slice(1), 10))
108
+ return `&${ref};` // unknown entity — keep as-is
109
+ }
110
+
111
+ function readAttrValue() {
112
+ const quote = src[pos]
113
+ if (quote !== '"' && quote !== "'")
114
+ throw new Error(`Expected quote at pos ${pos}, got ${JSON.stringify(src[pos])}`)
115
+ pos++
116
+ let val = ''
117
+ while (pos < src.length && src[pos] !== quote) {
118
+ val += src[pos] === '&' ? decodeEntity() : src[pos++]
119
+ }
120
+ pos++ // closing quote
121
+ return val
122
+ }
123
+
124
+ function readAttrs() {
125
+ const attrs = {}
126
+ while (pos < src.length) {
127
+ skipWs()
128
+ if (src[pos] === '>' || src[pos] === '/') break
129
+ const name = readName()
130
+ if (!name) break
131
+ skipWs()
132
+ if (src[pos] === '=') {
133
+ pos++
134
+ skipWs()
135
+ attrs[name] = readAttrValue()
136
+ } else {
137
+ attrs[name] = true // valueless boolean attribute
138
+ }
139
+ }
140
+ return attrs
141
+ }
142
+
143
+ function parseNode() {
144
+ if (pos >= src.length) return null
145
+
146
+ // Text node
147
+ if (src[pos] !== '<') {
148
+ let text = ''
149
+ while (pos < src.length && src[pos] !== '<') {
150
+ text += src[pos] === '&' ? decodeEntity() : src[pos++]
151
+ }
152
+ return { type: 'text', text }
153
+ }
154
+
155
+ pos++ // consume '<'
156
+
157
+ // Processing instruction: <?target data?>
158
+ if (src[pos] === '?') {
159
+ pos++
160
+ const piContent = readUntil('?>').trim()
161
+ const spaceIdx = piContent.search(/\s/)
162
+ const target = spaceIdx === -1 ? piContent : piContent.slice(0, spaceIdx)
163
+ return { type: 'pi', target }
164
+ }
165
+
166
+ // Comments, CDATA, DOCTYPE
167
+ if (src[pos] === '!') {
168
+ pos++
169
+ if (src.slice(pos, pos + 2) === '--') {
170
+ pos += 2
171
+ readUntil('-->')
172
+ return null
173
+ }
174
+ if (src.slice(pos, pos + 7) === '[CDATA[') {
175
+ pos += 7
176
+ const text = readUntil(']]>')
177
+ return { type: 'text', text }
178
+ }
179
+ readUntil('>') // DOCTYPE or other
180
+ return null
181
+ }
182
+
183
+ // Closing tag — caller will handle
184
+ if (src[pos] === '/') return null
185
+
186
+ // Opening element
187
+ const name = readName()
188
+ const attrs = readAttrs()
189
+ skipWs()
190
+
191
+ if (src[pos] === '/') {
192
+ pos++ // self-closing />
193
+ if (src[pos] === '>') pos++
194
+ return { type: 'element', name, attrs, children: [] }
195
+ }
196
+
197
+ if (src[pos] === '>') pos++
198
+
199
+ const children = []
200
+ while (pos < src.length) {
201
+ // Closing tag for this element?
202
+ if (src[pos] === '<' && src[pos + 1] === '/') {
203
+ pos += 2
204
+ const closeName = readName()
205
+ skipWs()
206
+ if (src[pos] === '>') pos++
207
+ if (closeName !== name) {
208
+ // Mismatched tag — tolerate and rewind (best-effort)
209
+ console.warn(`Warning: mismatched </${closeName}>, expected </${name}> — skipping`)
210
+ }
211
+ break
212
+ }
213
+ const child = parseNode()
214
+ if (child) children.push(child)
215
+ }
216
+
217
+ return { type: 'element', name, attrs, children }
218
+ }
219
+
220
+ // Parse the full document
221
+ const nodes = []
222
+ while (pos < src.length) {
223
+ skipWs()
224
+ if (pos >= src.length) break
225
+ const node = parseNode()
226
+ if (node) nodes.push(node)
227
+ }
228
+
229
+ return nodes.find(n => n.type === 'element') || null
230
+ }
231
+
232
+ // ── DOM Helpers ───────────────────────────────────────────────────────────────
233
+
234
+ // Read an attribute, trying plain name, then xl: and xml: prefixed variants.
235
+ function attr(node, name) {
236
+ if (!node || !node.attrs) return null
237
+ return node.attrs[name]
238
+ ?? node.attrs['xl:' + name]
239
+ ?? node.attrs['xml:' + name]
240
+ ?? null
241
+ }
242
+
243
+ function childElements(node, ...tags) {
244
+ if (!node || !node.children) return []
245
+ const kids = node.children.filter(c => c.type === 'element')
246
+ return tags.length ? kids.filter(c => tags.includes(c.name)) : kids
247
+ }
248
+
249
+ function firstChild(node, ...tags) {
250
+ return childElements(node, ...tags)[0] ?? null
251
+ }
252
+
253
+ // Recursively extract all text content.
254
+ function textContent(node) {
255
+ if (!node) return ''
256
+ if (node.type === 'text') return node.text
257
+ return (node.children ?? []).map(textContent).join('')
258
+ }
259
+
260
+ // ── Typst Escaping ────────────────────────────────────────────────────────────
261
+
262
+ // Escape special Typst markup characters in normal text context.
263
+ function esc(s) {
264
+ if (!s) return ''
265
+ return s
266
+ .replace(/\\/g, '\\\\')
267
+ .replace(/#/g, '\\#')
268
+ .replace(/\$/g, '\\$')
269
+ .replace(/@/g, '\\@')
270
+ .replace(/</g, '\\<')
271
+ .replace(/>/g, '\\>')
272
+ .replace(/\[/g, '\\[')
273
+ .replace(/\]/g, '\\]')
274
+ .replace(/\*/g, '\\*')
275
+ .replace(/_/g, '\\_')
276
+ .replace(/`/g, '\\`')
277
+ }
278
+
279
+ // Escape inside a Typst double-quoted string literal.
280
+ function escStr(s) {
281
+ if (!s) return ''
282
+ return s.replace(/\\/g, '\\\\').replace(/"/g, '\\"')
283
+ }
284
+
285
+ // Decode percent-encoded characters in an image fileref so Typst receives a
286
+ // plain filesystem path (e.g. "foo%20bar.png" → "foo bar.png").
287
+ function decodeFileref(s) {
288
+ if (!s) return ''
289
+ try { return decodeURIComponent(s) }
290
+ catch { return s }
291
+ }
292
+
293
+ // Convert a DocBook contentwidth/contentdepth value to a Typst length string.
294
+ // Values may be bare integers (pixels), floats with a trailing dot (e.g. "96."),
295
+ // or numbers with an explicit unit suffix (px, pt, mm, cm, in, em, pc, %).
296
+ // Returns null when the value is non-numeric or otherwise unusable.
297
+ function parseImageDim(value) {
298
+ if (!value) return null
299
+ const m = String(value).match(/^(\d+(?:\.\d*)?)(\s*(%|px|pt|mm|cm|in|em|pc))?$/i)
300
+ if (!m) return null
301
+ const num = parseFloat(m[1])
302
+ if (!isFinite(num) || num <= 0) return null
303
+ const unit = (m[3] || '').toLowerCase()
304
+ switch (unit) {
305
+ case '%': return `${num}%`
306
+ case 'pt': return `${num}pt`
307
+ case 'mm': return `${num}mm`
308
+ case 'cm': return `${num}cm`
309
+ case 'in': return `${num}in`
310
+ case 'em': return `${num}em`
311
+ case 'pc': return `${num * 12}pt` // 1 pica = 12 points
312
+ case 'px':
313
+ case '': return `${(num * 0.75).toFixed(2)}pt` // px → pt at 96 dpi
314
+ default: return null
315
+ }
316
+ }
317
+
318
+ // Search the assembled AsciiDoc source for the first line that references the
319
+ // given image filename, returning a human-readable "line N: <text>" string, or
320
+ // null when the source file is absent or the image is not found there.
321
+ let _adocLines = null
322
+ function findAdocImageLine(fileref) {
323
+ if (!fileref) return null
324
+ try {
325
+ if (!_adocLines) {
326
+ _adocLines = fs.readFileSync(adocSourceFile, 'utf-8').split('\n')
327
+ }
328
+ } catch { return null }
329
+ const base = path.basename(fileref)
330
+ for (let i = 0; i < _adocLines.length; i++) {
331
+ if (_adocLines[i].includes(base)) {
332
+ return `line ${i + 1}: ${_adocLines[i].trim()}`
333
+ }
334
+ }
335
+ return null
336
+ }
337
+
338
+ // Build the Typst image() argument string for a given <imagedata> element,
339
+ // appending width: and/or height: when contentwidth/contentdepth are present.
340
+ // Throws when contentwidth contains a non-numeric value that cannot be parsed
341
+ // as a dimension, reporting the offending image and its AsciiDoc source line.
342
+ function imageDataArgs(imgdata) {
343
+ const rawW = attr(imgdata, 'contentwidth')
344
+ const rawH = attr(imgdata, 'contentdepth')
345
+ const fileref = decodeFileref(attr(imgdata, 'fileref') || '')
346
+
347
+ if (rawW) {
348
+ const w = parseImageDim(rawW)
349
+ if (w === null) {
350
+ const loc = findAdocImageLine(fileref)
351
+ const where = loc ? `\n ${adocSourceFile} ${loc}` : `\n image: ${fileref}`
352
+ throw new Error(`Invalid contentwidth value "${rawW}"${where}`)
353
+ }
354
+ }
355
+
356
+ const w = parseImageDim(rawW)
357
+ const h = parseImageDim(rawH)
358
+ const parts = []
359
+ if (w) parts.push(`width: ${w}`)
360
+ if (h) parts.push(`height: ${h}`)
361
+ return parts.length ? ', ' + parts.join(', ') : ''
362
+ }
363
+
364
+ // Sanitize a DocBook xml:id into a valid Typst label (letters, digits, - _ . : /).
365
+ function sanitizeLabel(id) {
366
+ if (!id) return ''
367
+ return id.replace(/[^a-zA-Z0-9\-_.:/]/g, '-')
368
+ }
369
+
370
+ // ── ID → title index ──────────────────────────────────────────────────────────
371
+
372
+ // Maps xml:id values to their section/element title text, built before conversion.
373
+ const idTitles = new Map()
374
+
375
+ function buildIdIndex(node) {
376
+ if (!node || node.type !== 'element') return
377
+ const id = attr(node, 'id')
378
+ if (id) {
379
+ const titleNode = firstChild(node, 'title')
380
+ if (titleNode) {
381
+ // Section/figure/table: title is a child element.
382
+ idTitles.set(id, textContent(titleNode).trim())
383
+ } else if (node.name === 'bridgehead') {
384
+ // Bridgehead: the element's own text content is the title.
385
+ const t = textContent(node).trim()
386
+ if (t) idTitles.set(id, t)
387
+ }
388
+ }
389
+ for (const child of (node.children ?? [])) buildIdIndex(child)
390
+ }
391
+
392
+ // ── Converter ─────────────────────────────────────────────────────────────────
393
+
394
+ function convert(root) {
395
+ buildIdIndex(root)
396
+ const parts = []
397
+
398
+ // Skip <info> — title is provided by the preamble in asciidoc-typst-pdf.js.
399
+ for (const child of (root.children ?? [])) {
400
+ if (child.type !== 'element') continue
401
+ if (child.name === 'info') continue
402
+ emitBlock(child, parts, 0)
403
+ }
404
+
405
+ return parts.join('').replace(/\n{3,}/g, '\n\n').trimEnd() + '\n'
406
+ }
407
+
408
+ // ── Block emitter ─────────────────────────────────────────────────────────────
409
+
410
+ function emitBlock(node, out, depth) {
411
+ if (!node) return
412
+
413
+ if (node.type === 'text') {
414
+ const t = node.text.trim()
415
+ if (t) out.push(esc(t) + '\n\n')
416
+ return
417
+ }
418
+
419
+ const tag = node.name
420
+
421
+ switch (tag) {
422
+ case 'book':
423
+ case 'article':
424
+ for (const c of childElements(node)) {
425
+ if (c.name === 'info') continue
426
+ emitBlock(c, out, depth)
427
+ }
428
+ break
429
+
430
+ case 'chapter':
431
+ case 'part':
432
+ emitSection(node, out, 2)
433
+ break
434
+
435
+ case 'section':
436
+ case 'appendix':
437
+ case 'preface':
438
+ emitSection(node, out, depth + 1)
439
+ break
440
+
441
+ case 'simpara':
442
+ case 'para': {
443
+ // A <simpara> or <para> containing only <?asciidoc-pagebreak?> emits a page break.
444
+ if ((node.children ?? []).some(c => c.type === 'pi' && c.target === 'asciidoc-pagebreak')) {
445
+ out.push('#pagebreak()\n\n')
446
+ break
447
+ }
448
+ // A para may wrap block-level elements (e.g. <informaltable>, <figure>).
449
+ // When it does, emit each block child as a block and collect inline runs
450
+ // as separate paragraphs, rather than calling emitInline() on the whole node.
451
+ const BLOCK_TAGS = new Set([
452
+ 'table', 'informaltable', 'figure', 'informalfigure',
453
+ 'itemizedlist', 'orderedlist', 'variablelist',
454
+ 'programlisting', 'screen', 'literallayout',
455
+ 'note', 'tip', 'important', 'caution', 'warning',
456
+ ])
457
+ const children = node.children ?? []
458
+ const hasBlock = children.some(c => c.type === 'element' && BLOCK_TAGS.has(c.name))
459
+ if (hasBlock) {
460
+ let inlineRun = []
461
+ const flushInline = () => {
462
+ const t = inlineRun.join('').trim()
463
+ if (t) out.push(t + '\n\n')
464
+ inlineRun = []
465
+ }
466
+ for (const c of children) {
467
+ if (c.type === 'element' && BLOCK_TAGS.has(c.name)) {
468
+ flushInline()
469
+ emitBlock(c, out, depth)
470
+ } else {
471
+ inlineRun.push(emitInline(c))
472
+ }
473
+ }
474
+ flushInline()
475
+ } else {
476
+ out.push(emitInline(node).trim() + '\n\n')
477
+ }
478
+ break
479
+ }
480
+
481
+ case 'formalpara': {
482
+ const t = firstChild(node, 'title')
483
+ const body = firstChild(node, 'para', 'simpara')
484
+ const titleStr = t ? `#strong[${emitInline(t).trim()}]` : ''
485
+ const bodyStr = body ? emitInline(body).trim() : ''
486
+ out.push([titleStr, bodyStr].filter(Boolean).join('\n\n') + '\n\n')
487
+ break
488
+ }
489
+
490
+ case 'itemizedlist':
491
+ emitItemizedList(node, out, 0)
492
+ out.push('\n')
493
+ break
494
+
495
+ case 'orderedlist':
496
+ emitOrderedList(node, out, 0)
497
+ out.push('\n')
498
+ break
499
+
500
+ case 'variablelist':
501
+ emitVariableList(node, out)
502
+ break
503
+
504
+ case 'note':
505
+ case 'tip':
506
+ case 'important':
507
+ case 'caution':
508
+ case 'warning': {
509
+ emitAdmonition(node, out, admonitionKind(tag))
510
+ break
511
+ }
512
+
513
+ case 'figure':
514
+ emitFigure(node, out)
515
+ break
516
+
517
+ case 'informalfigure':
518
+ emitFigure(node, out)
519
+ break
520
+
521
+ case 'informaltable':
522
+ case 'table':
523
+ emitTable(node, out)
524
+ break
525
+
526
+ case 'programlisting':
527
+ case 'screen':
528
+ case 'literallayout':
529
+ emitListing(node, out)
530
+ break
531
+
532
+ case 'calloutlist':
533
+ emitCalloutList(node, out)
534
+ break
535
+
536
+ case 'informalexample':
537
+ case 'example':
538
+ for (const c of childElements(node)) emitBlock(c, out, depth)
539
+ break
540
+
541
+ case 'bridgehead': {
542
+ const renderas = attr(node, 'renderas') || 'sect3'
543
+ const level = parseInt(renderas.replace(/\D+/, ''), 10) || 3
544
+ const xmlId = attr(node, 'id')
545
+ const labelPart = xmlId ? ` <${sanitizeLabel(xmlId)}>` : ''
546
+ // Use #heading() to suppress TOC entry
547
+ const title = emitInline(node).trim()
548
+ out.push(`#heading(level: ${level}, outlined: false)[${title}]${labelPart}\n\n`)
549
+ break
550
+ }
551
+
552
+ case 'anchor': {
553
+ const xmlId = attr(node, 'id')
554
+ if (xmlId) out.push(`#[#metadata("") <${sanitizeLabel(xmlId)}>]\n\n`)
555
+ break
556
+ }
557
+
558
+ case 'blockquote':
559
+ case 'quote': {
560
+ const content = childElements(node, 'para', 'simpara')
561
+ .map(c => emitInline(c).trim())
562
+ .join('\n\n')
563
+ out.push(`#quote(block: true)[\n${content}\n]\n\n`)
564
+ break
565
+ }
566
+
567
+ case 'sidebar':
568
+ case 'abstract': {
569
+ const title = firstChild(node, 'title')
570
+ const inner = []
571
+ for (const c of childElements(node)) {
572
+ if (c.name === 'title') continue
573
+ emitBlock(c, inner, depth)
574
+ }
575
+ const titleStr = title ? `#strong[${emitInline(title).trim()}]\n\n` : ''
576
+ out.push(`#block(stroke: 0.5pt + gray, inset: 1em, radius: 4pt, width: 100%)[\n${titleStr}${inner.join('')}]\n\n`)
577
+ break
578
+ }
579
+
580
+ // Elements that appear in block context but are really inline wrappers
581
+ case 'title':
582
+ case 'info':
583
+ case 'date':
584
+ break // handled by their parent
585
+
586
+ default:
587
+ // Fallback: recurse into children
588
+ for (const c of (node.children ?? [])) {
589
+ if (c.type === 'element') emitBlock(c, out, depth)
590
+ else if (c.type === 'text' && c.text.trim()) {
591
+ out.push(esc(c.text.trim()) + '\n\n')
592
+ }
593
+ }
594
+ }
595
+ }
596
+
597
+ // ── Section emitter ───────────────────────────────────────────────────────────
598
+
599
+ function emitSection(node, out, level) {
600
+ const titleNode = firstChild(node, 'title')
601
+ const xmlId = attr(node, 'id')
602
+
603
+ if (titleNode) {
604
+ const effectiveLevel = useTheme ? level : Math.max(1, level - 1)
605
+ const hashes = '='.repeat(Math.min(effectiveLevel, 6))
606
+ const titleText = emitInline(titleNode).trim()
607
+ out.push(`${hashes} ${titleText}\n`)
608
+ if (xmlId) out.push(`<${sanitizeLabel(xmlId)}>\n`)
609
+ out.push('\n')
610
+ }
611
+
612
+ for (const child of childElements(node)) {
613
+ if (child.name === 'title') continue
614
+ if (child.name === 'section' || child.name === 'appendix') {
615
+ emitSection(child, out, level + 1)
616
+ } else {
617
+ emitBlock(child, out, level)
618
+ }
619
+ }
620
+ }
621
+
622
+ // ── List emitters ─────────────────────────────────────────────────────────────
623
+
624
+ function emitItemizedList(node, out, indent) {
625
+ for (const item of childElements(node, 'listitem')) {
626
+ const lines = emitListItem(item, '-', indent)
627
+ out.push(lines + '\n')
628
+ }
629
+ }
630
+
631
+ function emitOrderedList(node, out, indent) {
632
+ for (const item of childElements(node, 'listitem')) {
633
+ const lines = emitListItem(item, '+', indent)
634
+ out.push(lines + '\n')
635
+ }
636
+ }
637
+
638
+ function emitListItem(item, marker, indent) {
639
+ const pad = ' '.repeat(indent)
640
+ const subpad = ' '.repeat(indent + 1)
641
+ const parts = []
642
+
643
+ for (const c of childElements(item)) {
644
+ if (c.name === 'simpara' || c.name === 'para') {
645
+ parts.push(emitInline(c).trim())
646
+ } else if (c.name === 'itemizedlist') {
647
+ const sub = []
648
+ for (const si of childElements(c, 'listitem')) {
649
+ sub.push(subpad + emitListItem(si, '-', indent + 1))
650
+ }
651
+ parts.push(sub.join('\n'))
652
+ } else if (c.name === 'orderedlist') {
653
+ const sub = []
654
+ for (const si of childElements(c, 'listitem')) {
655
+ sub.push(subpad + emitListItem(si, '+', indent + 1))
656
+ }
657
+ parts.push(sub.join('\n'))
658
+ } else if (c.name === 'programlisting' || c.name === 'screen') {
659
+ const lang = attr(c, 'language') || ''
660
+ parts.push(`\`\`\`${lang}\n${listingText(c)}\n\`\`\``)
661
+ } else if (c.name === 'figure' || c.name === 'informalfigure') {
662
+ const s = figureString(c)
663
+ if (s) parts.push(s)
664
+ } else if (c.name === 'table' || c.name === 'informaltable') {
665
+ parts.push(tableString(c))
666
+ } else {
667
+ const adm = admonitionKind(c.name)
668
+ if (adm) {
669
+ parts.push(admonitionBlock(c, adm))
670
+ } else {
671
+ const t = textContent(c).trim()
672
+ if (t) parts.push(esc(t))
673
+ }
674
+ }
675
+ }
676
+
677
+ if (!parts.length) return `${pad}${marker} `
678
+
679
+ const first = (parts.shift() || '').replace(/\n/g, '\n' + subpad)
680
+ const rest = parts.map(p => subpad + p.replace(/\n/g, '\n' + subpad)).join('\n')
681
+ return `${pad}${marker} ${first}${rest ? '\n' + rest : ''}`
682
+ }
683
+
684
+ function emitVariableList(node, out) {
685
+ for (const entry of childElements(node, 'varlistentry')) {
686
+ const termNode = firstChild(entry, 'term')
687
+ const itemNode = firstChild(entry, 'listitem')
688
+ const term = termNode ? emitInline(termNode).trim() : ''
689
+ const def = itemNode
690
+ ? childElements(itemNode, 'simpara', 'para')
691
+ .map(c => emitInline(c).trim())
692
+ .join(' ')
693
+ : ''
694
+ out.push(`/ ${term}: ${def}\n\n`)
695
+ }
696
+ }
697
+
698
+ // ── Admonition emitter ────────────────────────────────────────────────────────
699
+
700
+ // Returns the Typst block string for an admonition node (without trailing newlines).
701
+ function admonitionBlock(node, kind) {
702
+ const inner = []
703
+
704
+ for (const c of childElements(node)) {
705
+ if (c.name === 'title') continue // styling handled by theme
706
+ if (c.name === 'simpara' || c.name === 'para') {
707
+ inner.push(emitInline(c).trim())
708
+ } else if (c.name === 'itemizedlist') {
709
+ for (const item of childElements(c, 'listitem')) {
710
+ inner.push(emitListItem(item, '-', 0))
711
+ }
712
+ } else if (c.name === 'orderedlist') {
713
+ for (const item of childElements(c, 'listitem')) {
714
+ inner.push(emitListItem(item, '+', 0))
715
+ }
716
+ } else if (c.name === 'figure' || c.name === 'informalfigure') {
717
+ const s = figureString(c)
718
+ if (s) inner.push(s)
719
+ } else {
720
+ const t = textContent(c).trim()
721
+ if (t) inner.push(esc(t))
722
+ }
723
+ }
724
+
725
+ const body = inner.join('\n\n')
726
+ if (useTheme) return `#admonition("${kind}")[\n${body}\n]`
727
+ const label = kind.charAt(0).toUpperCase() + kind.slice(1)
728
+ return `#block(stroke: 0.5pt, inset: 8pt)[#strong[${esc(label)}:] ${body}]`
729
+ }
730
+
731
+ function emitAdmonition(node, out, kind) {
732
+ out.push(admonitionBlock(node, kind) + '\n\n')
733
+ }
734
+
735
+ // Return the admonition kind string for a given tag name, or null if not an admonition.
736
+ function admonitionKind(tag) {
737
+ switch (tag) {
738
+ case 'note': return 'note'
739
+ case 'tip': return 'tip'
740
+ case 'important': return 'important'
741
+ case 'caution': return 'caution'
742
+ case 'warning': return 'warning'
743
+ default: return null
744
+ }
745
+ }
746
+
747
+ // ── Figure / image emitter ────────────────────────────────────────────────────
748
+
749
+ // Extract the plain-text alt string from <mediaobject><textobject><phrase>.
750
+ function figureAlt(mo) {
751
+ if (!mo) return ''
752
+ const to = firstChild(mo, 'textobject')
753
+ if (!to) return ''
754
+ const phrase = firstChild(to, 'phrase')
755
+ return (phrase ? textContent(phrase) : textContent(to)).trim()
756
+ }
757
+
758
+ // Return the alt: named argument for image(), or empty string when none.
759
+ function imageAltArg(alt) {
760
+ return alt ? `, alt: "${escStr(alt)}"` : ''
761
+ }
762
+
763
+ // Return the Typst caption/alt arguments for #figure().
764
+ // captionText is the figure title (formal) or phrase alt text (informal).
765
+ // alt is the image accessibility text.
766
+ function figureAttrs(captionText, alt, formal) {
767
+ const cap = captionText ? esc(captionText) : ''
768
+ return ` caption: [${cap}],\n` +
769
+ (alt ? ` alt: "${escStr(alt)}",\n` : '')
770
+ }
771
+
772
+ // Return the Typst #figure(...) string for a <figure> or <informalfigure>
773
+ // node, or null if the node lacks a usable image reference.
774
+ function figureString(node) {
775
+ const formal = node.name === 'figure'
776
+ const mo = firstChild(node, 'mediaobject', 'inlinemediaobject')
777
+ if (!mo) return null
778
+
779
+ const io = firstChild(mo, 'imageobject')
780
+ if (!io) return null
781
+
782
+ const imgdata = firstChild(io, 'imagedata')
783
+ if (!imgdata) return null
784
+
785
+ const fileref = decodeFileref(attr(imgdata, 'fileref'))
786
+ if (!fileref) return null
787
+
788
+ const alt = figureAlt(mo)
789
+ // Prefer the figure's <title> element as the caption; fall back to alt text.
790
+ const titleNode = formal ? firstChild(node, 'title') : null
791
+ const captionText = (titleNode ? textContent(titleNode).trim() : null) || alt
792
+
793
+ return (
794
+ `#figure(\n` +
795
+ ` image("${escStr(fileref)}"${imageDataArgs(imgdata)}${imageAltArg(alt)}),\n` +
796
+ figureAttrs(captionText, alt, formal) +
797
+ `)`
798
+ )
799
+ }
800
+
801
+ function emitFigure(node, out) {
802
+ const s = figureString(node)
803
+ if (s) out.push(s + '\n\n')
804
+ }
805
+
806
+ // ── Table emitter ─────────────────────────────────────────────────────────────
807
+
808
+ function tableString(node) {
809
+ const titleNode = firstChild(node, 'title')
810
+ const tgroup = firstChild(node, 'tgroup')
811
+ if (!tgroup) return
812
+
813
+ const colspecs = childElements(tgroup, 'colspec')
814
+ const thead = firstChild(tgroup, 'thead')
815
+ const tbody = firstChild(tgroup, 'tbody')
816
+
817
+ // Build column widths from colspec/@colwidth, e.g. "50*" → "1fr"
818
+ const numCols = parseInt(attr(tgroup, 'cols') || '0', 10) || colspecs.length || 2
819
+ const widths = colspecs.map(c => {
820
+ const w = attr(c, 'colwidth') || '1*'
821
+ // proportional widths ("50*") → fractional
822
+ const match = w.match(/^([\d.]+)\*/)
823
+ return match ? `${match[1]}fr` : w
824
+ })
825
+ while (widths.length < numCols) widths.push('1fr')
826
+
827
+ const colsExpr = '(' + widths.join(', ') + ')'
828
+ const cells = []
829
+
830
+ if (thead) {
831
+ const headerRow = firstChild(thead, 'row')
832
+ if (headerRow) {
833
+ cells.push(' table.header(')
834
+ for (const e of childElements(headerRow, 'entry')) {
835
+ const align = attr(e, 'align') || 'left'
836
+ const content = emitCellContent(e)
837
+ cells.push(` table.cell(align: ${align})[${content}],`)
838
+ }
839
+ cells.push(' ),')
840
+ }
841
+ }
842
+
843
+ if (tbody) {
844
+ for (const row of childElements(tbody, 'row')) {
845
+ for (const e of childElements(row, 'entry')) {
846
+ const align = attr(e, 'align') || 'left'
847
+ const content = emitCellContent(e)
848
+ cells.push(` table.cell(align: ${align})[${content}],`)
849
+ }
850
+ }
851
+ }
852
+
853
+ // Build the table call without a leading `#` so it can be used either as a
854
+ // standalone statement (#table(...)) or as an argument to #figure() where
855
+ // the `#` is not valid (Typst code mode).
856
+ const tableCall =
857
+ `table(\n` +
858
+ ` columns: ${colsExpr},\n` +
859
+ ` stroke: (_, y) => (bottom: if y == 0 { 0.5pt + black } else { 0.5pt + luma(200) }),\n` +
860
+ cells.join('\n') + '\n' +
861
+ `)`
862
+
863
+ const xmlId = attr(node, 'id')
864
+ const labelSuffix = xmlId ? ` <${sanitizeLabel(xmlId)}>` : ''
865
+
866
+ if (titleNode) {
867
+ const cap = emitInline(titleNode).trim()
868
+ return `#figure(\n ${tableCall},\n caption: [${cap}],\n kind: table,\n)${labelSuffix}`
869
+ } else {
870
+ return `#figure(\n ${tableCall},\n caption: none,\n kind: table,\n numbering: none,\n)${labelSuffix}`
871
+ }
872
+ }
873
+
874
+ function emitTable(node, out) {
875
+ out.push(tableString(node) + '\n\n')
876
+ }
877
+
878
+ // Render the content of a table cell. Cells can hold simpara, itemizedlist,
879
+ // informalfigure, and other block elements.
880
+ function emitCellContent(entry) {
881
+ const parts = []
882
+
883
+ for (const c of (entry.children ?? [])) {
884
+ if (c.type === 'text') {
885
+ const t = c.text.trim()
886
+ if (t) parts.push(esc(t))
887
+ } else if (c.type === 'element') {
888
+ switch (c.name) {
889
+ case 'simpara':
890
+ case 'para':
891
+ parts.push(emitInline(c).trim())
892
+ break
893
+ case 'itemizedlist':
894
+ for (const item of childElements(c, 'listitem')) {
895
+ parts.push(emitListItem(item, '-', 0))
896
+ }
897
+ break
898
+ case 'orderedlist':
899
+ for (const item of childElements(c, 'listitem')) {
900
+ parts.push(emitListItem(item, '+', 0))
901
+ }
902
+ break
903
+ case 'figure':
904
+ case 'informalfigure': {
905
+ const s = figureString(c)
906
+ if (s) parts.push(s)
907
+ break
908
+ }
909
+ case 'programlisting':
910
+ case 'screen': {
911
+ const lang = attr(c, 'language') || ''
912
+ parts.push(`\`\`\`${lang}\n${listingText(c)}\n\`\`\``)
913
+ break
914
+ }
915
+ case 'table':
916
+ case 'informaltable':
917
+ parts.push(tableString(c))
918
+ break
919
+ default: {
920
+ const adm = admonitionKind(c.name)
921
+ if (adm) {
922
+ parts.push(admonitionBlock(c, adm))
923
+ } else {
924
+ const t = textContent(c).trim()
925
+ if (t) parts.push(esc(t))
926
+ }
927
+ }
928
+ }
929
+ }
930
+ }
931
+
932
+ return parts.join('\n\n')
933
+ }
934
+
935
+ // ── Listing emitter ───────────────────────────────────────────────────────────
936
+
937
+ // Returns the Unicode circled-number character for n (1–20), used as callout
938
+ // markers in code blocks. The Typst template replaces these with SVG circles.
939
+ function circledNumber(n) {
940
+ if (n >= 1 && n <= 20) return String.fromCodePoint(0x245F + n) // ①=U+2460 … ⑳=U+2473
941
+ return `(${n})` // fallback for n > 20
942
+ }
943
+
944
+ // Build the code string for a <programlisting> or <screen> node, substituting
945
+ // any inline <co> child elements with Unicode circled-number callout markers.
946
+ function listingText(node) {
947
+ let code = ''
948
+ for (const child of (node.children ?? [])) {
949
+ if (child.type === 'text') {
950
+ code += child.text
951
+ } else if (child.type === 'element' && child.name === 'co') {
952
+ const xmlId = attr(child, 'id')
953
+ const match = xmlId && xmlId.match(/(\d+)$/)
954
+ const n = match ? parseInt(match[1], 10) : 0
955
+ if (n >= 1) code += circledNumber(n)
956
+ }
957
+ // Other element children within programlisting are ignored.
958
+ }
959
+ return code
960
+ }
961
+
962
+ function emitListing(node, out) {
963
+ const lang = attr(node, 'language') || ''
964
+ out.push(`\`\`\`${lang}\n${listingText(node)}\n\`\`\`\n\n`)
965
+ }
966
+
967
+ // ── Callout list emitter ──────────────────────────────────────────────────────
968
+
969
+ function emitCalloutList(node, out) {
970
+ const callouts = childElements(node, 'callout')
971
+ if (!callouts.length) return
972
+
973
+ for (const callout of callouts) {
974
+ const arearefs = attr(callout, 'arearefs') || ''
975
+ const match = arearefs.match(/(\d+)$/)
976
+ const n = match ? parseInt(match[1], 10) : 0
977
+ if (!n) continue
978
+
979
+ const descParts = []
980
+ for (const c of childElements(callout)) {
981
+ if (c.name === 'para' || c.name === 'simpara') {
982
+ descParts.push(emitInline(c).trim())
983
+ } else if (c.name === 'itemizedlist') {
984
+ const sub = []
985
+ emitItemizedList(c, sub, 0)
986
+ descParts.push(sub.join(''))
987
+ } else if (c.name === 'orderedlist') {
988
+ const sub = []
989
+ emitOrderedList(c, sub, 0)
990
+ descParts.push(sub.join(''))
991
+ }
992
+ }
993
+
994
+ const desc = descParts.join('\n\n').trim()
995
+ if (useTheme) {
996
+ out.push(`#callout-item(${n})[\n${desc}\n]\n`)
997
+ } else {
998
+ out.push(`${n}. ${desc}\n\n`)
999
+ }
1000
+ }
1001
+ out.push('\n')
1002
+ }
1003
+
1004
+ // ── Inline emitter ────────────────────────────────────────────────────────────
1005
+ //
1006
+ // Processes a node and returns a Typst inline string.
1007
+
1008
+ function emitInline(node) {
1009
+ if (!node) return ''
1010
+ if (node.type === 'text') return esc(node.text)
1011
+
1012
+ const tag = node.name
1013
+ const kids = () => (node.children ?? []).map(emitInline).join('')
1014
+
1015
+ switch (tag) {
1016
+ // Pass-through wrappers
1017
+ case 'title':
1018
+ case 'simpara':
1019
+ case 'para':
1020
+ case 'phrase':
1021
+ case 'term':
1022
+ return kids()
1023
+
1024
+ // Emphasis / strong
1025
+ case 'emphasis': {
1026
+ const role = attr(node, 'role') || ''
1027
+ const content = kids()
1028
+ if (role.includes('strong')) return `#strong[${content}]`
1029
+ return `#emph[${content}]`
1030
+ }
1031
+
1032
+ // UI element — <literal role="ui ui-xxxx">
1033
+ case 'literal': {
1034
+ const role = attr(node, 'role') || ''
1035
+ if (role.startsWith('ui ui-')) {
1036
+ if (useTheme) {
1037
+ const kind = escStr(role.slice('ui ui-'.length))
1038
+ return `#ui("${kind}")[${kids()}]`
1039
+ }
1040
+ return `#strong[${kids()}]`
1041
+ }
1042
+ // Not a UI element — fall through to inline-code rendering
1043
+ const rawLit = textContent(node)
1044
+ if (rawLit.includes('`')) return `#raw("${escStr(rawLit)}")`
1045
+ return `\`${rawLit}\``
1046
+ }
1047
+
1048
+ // Inline code — strip role attributes, just render as code
1049
+ case 'code':
1050
+ case 'computeroutput':
1051
+ case 'filename':
1052
+ case 'systemitem':
1053
+ case 'classname':
1054
+ case 'methodname':
1055
+ case 'varname':
1056
+ case 'constant': {
1057
+ const raw = textContent(node)
1058
+ // Avoid nested backticks
1059
+ if (raw.includes('`')) return `#raw("${escStr(raw)}")`
1060
+ return `\`${raw}\``
1061
+ }
1062
+
1063
+ // Hyperlink
1064
+ case 'link':
1065
+ case 'ulink': {
1066
+ const href = attr(node, 'href') ?? ''
1067
+ const text = kids()
1068
+ if (!href) return text
1069
+ if (text.trim() === href) return `#link("${escStr(href)}")`
1070
+ return `#link("${escStr(href)}")[${text}]`
1071
+ }
1072
+
1073
+ // Cross-reference — use #link(<label>)[title] because @label requires heading numbering
1074
+ case 'xref': {
1075
+ const linkend = attr(node, 'linkend') ?? ''
1076
+ if (!linkend) return ''
1077
+ const label = sanitizeLabel(linkend)
1078
+ // Look up the section title; fall back to a humanised version of the id.
1079
+ const title = idTitles.get(linkend)
1080
+ ?? linkend.replace(/.*[-_]([^-_]+)$/, '$1').replace(/[-_]/g, ' ')
1081
+ return `#link(<${label}>)[${esc(title)}]`
1082
+ }
1083
+
1084
+ // UI button — bold with square brackets via theme function
1085
+ case 'guibutton':
1086
+ if (useTheme) return `#guibutton("${escStr(textContent(node))}")`
1087
+ return `#strong[${kids()}]`
1088
+
1089
+ // UI label/icon — render as bold
1090
+ case 'guilabel':
1091
+ case 'guiicon':
1092
+ return `#strong[${kids()}]`
1093
+
1094
+ // Menu navigation elements — menuchoice wraps all items in bold,
1095
+ // joining guimenu/guisubmenu/guimenuitem with a filled right-pointing triangle.
1096
+ case 'guimenu':
1097
+ case 'guisubmenu':
1098
+ case 'guimenuitem':
1099
+ return kids()
1100
+
1101
+ case 'menuchoice': {
1102
+ const items = childElements(node)
1103
+ .filter(c => ['guimenu','guisubmenu','guimenuitem','shortcut'].includes(c.name))
1104
+ .map(emitInline)
1105
+ const sep = ' #text(size: 0.75em)[\u25b6] '
1106
+ return `#strong[${items.join(sep)}]`
1107
+ }
1108
+
1109
+ // Keyboard keys
1110
+ case 'keycap': {
1111
+ const raw = textContent(node)
1112
+ if (useTheme) return `#keycap("${escStr(raw)}")`
1113
+ return raw.includes('`') ? `#raw("${escStr(raw)}")` : `\`${raw}\``
1114
+ }
1115
+
1116
+ case 'keycombo': {
1117
+ const keys = childElements(node, 'keycap').map(c => {
1118
+ const raw = textContent(c)
1119
+ if (useTheme) return `#keycap("${escStr(raw)}")`
1120
+ return raw.includes('`') ? `#raw("${escStr(raw)}")` : `\`${raw}\``
1121
+ })
1122
+ return keys.join('+')
1123
+ }
1124
+
1125
+ // Inline quotation
1126
+ case 'quote':
1127
+ return `"${kids()}"`
1128
+
1129
+ // Footnote
1130
+ case 'footnote': {
1131
+ const content = childElements(node, 'para', 'simpara')
1132
+ .map(c => emitInline(c).trim())
1133
+ .join(' ')
1134
+ return `#footnote[${content}]`
1135
+ }
1136
+
1137
+ // Superscript / subscript
1138
+ case 'superscript':
1139
+ return `#super[${kids()}]`
1140
+ case 'subscript':
1141
+ return `#sub[${kids()}]`
1142
+
1143
+ // Inline image
1144
+ case 'inlinemediaobject': {
1145
+ const imobj = firstChild(node, 'imageobject')
1146
+ const imdata = imobj ? firstChild(imobj, 'imagedata') : null
1147
+ const fileref = decodeFileref(imdata ? attr(imdata, 'fileref') : null)
1148
+ if (fileref) {
1149
+ const alt = figureAlt(node)
1150
+ return `#box(image("${escStr(fileref)}", height: 1em${imageAltArg(alt)}))`
1151
+ }
1152
+ return ''
1153
+ }
1154
+
1155
+ // Ignored inline wrappers
1156
+ case 'anchor':
1157
+ case 'textobject':
1158
+ case 'imageobject':
1159
+ case 'mediaobject':
1160
+ return ''
1161
+
1162
+ // Inline URI
1163
+ case 'uri':
1164
+ return `#link("${escStr(textContent(node))}")`
1165
+
1166
+ // Definition-list term (when nested inline)
1167
+ case 'varlistentry':
1168
+ return kids()
1169
+
1170
+ // Generic fallback — pass children through
1171
+ default:
1172
+ return kids()
1173
+ }
1174
+ }
1175
+
1176
+ // ── Main ──────────────────────────────────────────────────────────────────────
1177
+
1178
+ const src = fs.readFileSync(inputFile, 'utf-8')
1179
+
1180
+ let root
1181
+ try {
1182
+ root = parseXml(src)
1183
+ } catch (err) {
1184
+ console.error(`XML parse error: ${err.message}`)
1185
+ process.exit(1)
1186
+ }
1187
+
1188
+ if (!root) {
1189
+ console.error('Error: no root element found in XML.')
1190
+ process.exit(1)
1191
+ }
1192
+
1193
+ const typst = convert(root)
1194
+ fs.writeFileSync(outputFile, typst, 'utf-8')
1195
+ console.log(`Wrote ${outputFile}`)