djot 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,642 @@
1
+ local match = require("djot.match")
2
+ local unpack = unpack or table.unpack
3
+
4
+ local find, lower, sub, gsub, rep, format =
5
+ string.find, string.lower, string.sub, string.gsub, string.rep, string.format
6
+
7
+ local unpack_match, get_length, matches_pattern =
8
+ match.unpack_match, match.get_length, match.matches_pattern
9
+
10
+ local function get_string_content(node)
11
+ local buffer = {}
12
+ for i=2,#node do
13
+ local n = node[i]
14
+ if type(n) ~= "table" then
15
+ break
16
+ elseif n[1] == "str" or n[1] == "nbsp" then
17
+ buffer[#buffer + 1] = n[2]
18
+ elseif n[1] == "softbreak" then
19
+ buffer[#buffer + 1] = "\n"
20
+ else
21
+ buffer[#buffer + 1] = get_string_content(n)
22
+ end
23
+ end
24
+ return table.concat(buffer)
25
+ end
26
+
27
+ local roman_digits = {
28
+ i = 1,
29
+ v = 5,
30
+ x = 10,
31
+ l = 50,
32
+ c = 100,
33
+ d = 500,
34
+ m = 1000 }
35
+
36
+ local function roman_to_number(s)
37
+ -- go backwards through the digits
38
+ local total = 0
39
+ local prevdigit = 0
40
+ local i=#s
41
+ while i > 0 do
42
+ local c = lower(sub(s,i,i))
43
+ local n = roman_digits[c]
44
+ if n < prevdigit then -- e.g. ix
45
+ total = total - n
46
+ else
47
+ total = total + n
48
+ end
49
+ assert(n ~= nil, "Encountered bad character in roman numeral " .. s)
50
+ prevdigit = n
51
+ i = i - 1
52
+ end
53
+ return total
54
+ end
55
+
56
+ local function get_list_start(marker, style)
57
+ local numtype = string.gsub(style, "%p", "")
58
+ local s = string.gsub(marker, "%p", "")
59
+ if numtype == "1" then
60
+ return tonumber(s)
61
+ elseif numtype == "A" then
62
+ return (string.byte(s) - string.byte("A") + 1)
63
+ elseif numtype == "a" then
64
+ return (string.byte(s) - string.byte("a") + 1)
65
+ elseif numtype == "I" then
66
+ return roman_to_number(s)
67
+ elseif numtype == "i" then
68
+ return roman_to_number(s)
69
+ elseif numtype == "" then
70
+ return nil
71
+ end
72
+ end
73
+
74
+ local ignorable = {
75
+ image_marker = true,
76
+ escape = true,
77
+ blankline = true,
78
+ checkbox_checked = true,
79
+ checkbox_unchecked = true
80
+ }
81
+
82
+ local function is_tight(matches, startidx, endidx, is_last_item)
83
+ -- see if there are any blank lines between blocks in a list item.
84
+ local blanklines = 0
85
+ -- we don't care about blank lines at very end of list
86
+ for i=startidx, endidx do
87
+ local _, _, x = unpack_match(matches[i])
88
+ if x == "blankline" then
89
+ if matches_pattern(matches[i+1], "%+list_item") then
90
+ -- don't count blank lines before list starts
91
+ elseif matches_pattern(matches[i+1], "%-list_item") and
92
+ (is_last_item or matches_pattern(matches[i+2], "%-list_item")) then
93
+ -- don't count blank lines at end of nested lists
94
+ -- or end of last item
95
+ else
96
+ blanklines = blanklines + 1
97
+ end
98
+ end
99
+ end
100
+ return (blanklines == 0)
101
+ end
102
+
103
+ local function insert_attribute(attr, key, val)
104
+ if not attr._keys then
105
+ attr._keys = {}
106
+ end
107
+ local function add_key(k)
108
+ local keys = attr._keys
109
+ for i=1,#keys do
110
+ if keys[i] == k then
111
+ return
112
+ end
113
+ end
114
+ keys[#keys + 1] = k
115
+ end
116
+ -- _keys records order of key insertion for deterministic output
117
+ if key == "id" then
118
+ attr.id = val
119
+ add_key("id")
120
+ elseif key == "class" then
121
+ if attr.class then
122
+ attr.class =
123
+ attr.class .. " " .. val
124
+ else
125
+ attr.class = val
126
+ add_key("class")
127
+ end
128
+ else
129
+ attr[key] = val
130
+ add_key(key)
131
+ end
132
+ end
133
+
134
+ local function copy_attributes(target, source)
135
+ if source then
136
+ for k,v in pairs(source) do
137
+ if k ~= "_keys" then
138
+ insert_attribute(target, k, v)
139
+ end
140
+ end
141
+ end
142
+ end
143
+
144
+ local function insert_attributes(targetnode, attrnode)
145
+ targetnode.attr = targetnode.attr or {_keys = {}}
146
+ local i=2
147
+ while i <= #attrnode do
148
+ local x,y = unpack(attrnode[i])
149
+ if x == "id" or x == "class" then
150
+ insert_attribute(targetnode.attr, x, y)
151
+ elseif x == "key" then
152
+ local valnode = attrnode[i + 1]
153
+ if valnode[1] == "value" then
154
+ -- resolve backslash escapes
155
+ insert_attribute(targetnode.attr, y, valnode[2]:gsub("\\(%p)", "%1"))
156
+ end
157
+ i = i + 1
158
+ end
159
+ i = i + 1
160
+ end
161
+ end
162
+
163
+ local function make_definition_list_item(result)
164
+ assert(result[1] and result[1][1] ~= "list_item", "sanity check")
165
+ result[1] = "definition_list_item"
166
+ if result[2] and result[2][1] == "para" then
167
+ result[2][1] = "term"
168
+ else
169
+ table.insert(result, 2, {"term"})
170
+ end
171
+ if result[3] then
172
+ local defn = {"definition"}
173
+ for i=3,#result do
174
+ defn[#defn + 1] = result[i]
175
+ result[i] = nil
176
+ end
177
+ result[3] = defn
178
+ end
179
+ end
180
+
181
+ -- create an abstract syntax tree based on an event
182
+ -- stream and references
183
+ local function to_ast(subject, matches, options)
184
+ if not options then
185
+ options = {}
186
+ end
187
+ local idx = 1
188
+ local matcheslen = #matches
189
+ local sourcepos = options.sourcepos
190
+ local references = {}
191
+ local footnotes = {}
192
+ local identifiers = {} -- identifiers used (to ensure uniqueness)
193
+
194
+ -- generate auto identifier for heading
195
+ local function get_identifier(s)
196
+ local base = s:gsub("[][~!@#$%^&*(){}`,.<>\\|=+/?]","")
197
+ :gsub("^%s+",""):gsub("%s+$","")
198
+ :gsub("%s+","-")
199
+ local suffix = ""
200
+ local i = 0
201
+ local ident = base
202
+ -- generate unique id
203
+ while identifiers[ident] do
204
+ i = i + 1
205
+ ident = base .. tostring(i)
206
+ end
207
+ identifiers[ident] = true
208
+ return ident
209
+ end
210
+
211
+ local function set_checkbox(node, startidx)
212
+ -- determine if checked or unchecked
213
+ local _,_,ann = unpack_match(matches[startidx + 1])
214
+ if ann == "checkbox_checked" then
215
+ node.checkbox = "checked"
216
+ elseif ann == "checkbox_unchecked" then
217
+ node.checkbox = "unchecked"
218
+ end
219
+ end
220
+
221
+ local function get_node(maintag)
222
+ local nodes = {maintag}
223
+ local stopper
224
+ local block_attributes = nil
225
+ if maintag then
226
+ -- strip off data (e.g. for list_items)
227
+ stopper = "^%-" .. gsub(maintag, "%[.*$", "")
228
+ end
229
+ while idx <= matcheslen do
230
+ local match = matches[idx]
231
+ local startpos, endpos, annot = unpack_match(match)
232
+ if stopper and find(annot, stopper) then
233
+ idx = idx + 1
234
+ return nodes
235
+ else
236
+ local mod, tag = string.match(annot, "^([-+]?)(.*)")
237
+ if ignorable[tag] then
238
+ idx = idx + 1 -- skip
239
+ elseif mod == "+" then -- open
240
+ local startidx = idx
241
+ idx = idx + 1
242
+ local result = get_node(tag)
243
+ if tag == "list_item[X]" then
244
+ set_checkbox(result, startidx)
245
+ end
246
+ local _, finalpos = unpack_match(matches[idx - 1])
247
+ if sourcepos then
248
+ result.pos = {startpos, finalpos}
249
+ end
250
+ if block_attributes and tag ~= "block_attributes" then
251
+ for i=1,#block_attributes do
252
+ insert_attributes(result, block_attributes[i])
253
+ end
254
+ if result.attr and result.attr.id then
255
+ identifiers[result.attr.id] = true
256
+ end
257
+ block_attributes = nil
258
+ end
259
+ if tag == "verbatim" then
260
+ local s = get_string_content(result)
261
+ -- trim space next to ` at beginning or end
262
+ if find(s, "^ +`") then
263
+ s = s:sub(2)
264
+ end
265
+ if find(s, "` +$") then
266
+ s = s:sub(1, #s - 1)
267
+ end
268
+ result = {"verbatim", {"str", s}}
269
+ -- check for raw_format, which makes this a raw node
270
+ local sp,ep,ann = unpack_match(matches[idx])
271
+ if ann == "raw_format" then
272
+ local s = get_string_content(result)
273
+ result = {"raw_inline", s}
274
+ result.format = sub(subject, sp + 2, ep - 1)
275
+ idx = idx + 1 -- skip the raw_format
276
+ end
277
+ elseif tag == "caption" then
278
+ if nodes[#nodes][1] == "table" then
279
+ -- move caption in table node
280
+ table.insert(nodes[#nodes], 2, result)
281
+ result = nil
282
+ end
283
+ elseif tag == "reference_definition" then
284
+ local dest = ""
285
+ local key
286
+ for i=2,#result do
287
+ if result[i][1] == "reference_key" then
288
+ key = result[i][2]
289
+ end
290
+ if result[i][1] == "reference_value" then
291
+ dest = dest .. result[i][2]
292
+ end
293
+ end
294
+ references[key] = { destination = dest,
295
+ attributes = result.attr }
296
+ elseif tag == "footnote" then
297
+ local label
298
+ if result[2][1] == "note_label" then
299
+ label = result[2][2]
300
+ end
301
+ if label then
302
+ table.remove(result,2)
303
+ footnotes[label] = result
304
+ end
305
+ result = nil
306
+ elseif tag == "inline_math" then
307
+ result[1] = "math"
308
+ result.attr = {class = "math inline", _keys={"class"}}
309
+ elseif tag == "display_math" then
310
+ result[1] = "math"
311
+ result.attr = {class = "math display", _keys={"class"}}
312
+ elseif tag == "url" then
313
+ result[1] = "link"
314
+ result.destination = get_string_content(result)
315
+ elseif tag == "email" then
316
+ result[1] = "link"
317
+ result.destination = "mailto:" .. get_string_content(result)
318
+ elseif tag == "imagetext" or tag == "linktext" then
319
+ -- gobble destination or reference
320
+ local nextmatch = matches[idx]
321
+ local _, _, nextannot = unpack_match(nextmatch)
322
+ if nextannot == "+destination" then
323
+ idx = idx + 1
324
+ local dest = get_node("destination")
325
+ result.destination = get_string_content(dest):gsub("\r?\n", "")
326
+ elseif nextannot == "+reference" then
327
+ idx = idx + 1
328
+ local ref = get_node("reference")
329
+ if #ref == 1 then -- []
330
+ result.reference = get_string_content(result):gsub("\r?\n", " ")
331
+ else
332
+ result.reference = get_string_content(ref):gsub("\r?\n", " ")
333
+ end
334
+ end
335
+ result[1] = result[1]:gsub("text","")
336
+ elseif tag == "heading" then
337
+ result.level = get_length(match)
338
+ local heading_str = get_string_content(result)
339
+ :gsub("^%s+",""):gsub("%s+$","")
340
+ if not (result.attr and result.attr.id) then
341
+ local ident = get_identifier(heading_str)
342
+ insert_attributes(result, {nil,{"id", ident}})
343
+ end
344
+ -- insert into references unless there's a same-named one already:
345
+ if not references[heading_str] then
346
+ references[heading_str] =
347
+ {destination = "#" .. result.attr.id, attributes = {_keys={}}}
348
+ end
349
+ elseif tag == "table" then
350
+ -- look for a separator line
351
+ -- if found, make the preceding rows headings
352
+ -- and set attributes for column alignments on the table
353
+ local i=2
354
+ local aligns = {}
355
+ while i <= #result do
356
+ local found, align
357
+ if result[i][1] == "row" then
358
+ local row = result[i]
359
+ for j=2,#row do
360
+ found, _, align = find(row[j][1], "^separator_(.*)")
361
+ if not found then
362
+ break
363
+ end
364
+ aligns[j - 1] = align
365
+ end
366
+ if found and #aligns > 0 then
367
+ -- set previous row to head and adjust aligns
368
+ local prevrow = result[i - 1]
369
+ if prevrow[1] == "row" then
370
+ prevrow.head = true
371
+ for k=2,#prevrow do
372
+ -- set head on cells too
373
+ prevrow[k].head = true
374
+ if aligns[k - 1] ~= "default" then
375
+ prevrow[k].align = aligns[k - 1]
376
+ end
377
+ end
378
+ end
379
+ table.remove(result,i) -- remove sep line
380
+ -- we don't need to increment i because we removed ith elt
381
+ else
382
+ if #aligns > 0 then
383
+ for l=2,#result[i] do
384
+ if aligns[l - 1] ~= "default" then
385
+ result[i][l].align = aligns[l - 1]
386
+ end
387
+ end
388
+ end
389
+ i = i + 1
390
+ end
391
+ end
392
+ end
393
+ result.level = get_length(match)
394
+ elseif tag == "div" then
395
+ if result[2] and result[2][1] == "class" then
396
+ result.attr = result.attr or {_keys = {}}
397
+ insert_attribute(result.attr, "class", result[2][2])
398
+ table.remove(result, 2)
399
+ end
400
+ elseif tag == "code_block" then
401
+ if result[2] then
402
+ if result[2][1] == "code_language" then
403
+ result.lang = result[2][2]
404
+ table.remove(result, 2)
405
+ elseif result[2][1] == "raw_format" then
406
+ local fmt = result[2][2]:sub(2)
407
+ local s = get_string_content(result)
408
+ result = {"raw_block", s}
409
+ result.format = fmt
410
+ end
411
+ end
412
+ elseif tag == "block_attributes" then
413
+ if block_attributes then
414
+ block_attributes[#block_attributes + 1] = result
415
+ else
416
+ block_attributes = {result}
417
+ end
418
+ result = nil
419
+ elseif tag == "attributes" then
420
+ -- parse attributes, add to last node
421
+ local prevnode = nodes[#nodes]
422
+ local endswithspace = false
423
+ if type(prevnode) == "table" then
424
+ if prevnode[1] == "str" then
425
+ -- split off last consecutive word of string
426
+ -- to which to attach attributes
427
+ local lastwordpos = string.find(prevnode[2], "%w+$")
428
+ if not lastwordpos then
429
+ endswithspace = true
430
+ elseif lastwordpos > 1 then
431
+ local newnode = {"str", sub(prevnode[2], lastwordpos, -1)}
432
+ prevnode[2] = sub(prevnode[2], 1, lastwordpos - 1)
433
+ nodes[#nodes + 1] = newnode
434
+ prevnode = newnode
435
+ end
436
+ end
437
+ if not endswithspace then
438
+ insert_attributes(prevnode, result)
439
+ end
440
+ end
441
+ result = nil
442
+ elseif find(tag, "^list_item") then
443
+ local marker = string.match(subject, "^%S+", startpos)
444
+ local styles = {}
445
+ gsub(tag, "%[([^]]*)%]", function(x) styles[#styles + 1] = x end)
446
+ -- create a list node with the consecutive list items
447
+ -- of the same kind
448
+ local list = {"list", result}
449
+ -- put the attributes from the first item on the list itself:
450
+ list.attr = result.attr
451
+ result.attr = nil
452
+ result[1] = "list_item"
453
+ if marker == ":" then
454
+ make_definition_list_item(result)
455
+ end
456
+ if sourcepos then
457
+ list.pos = {result.pos[1], result.pos[2]}
458
+ end
459
+ -- now get remaining items
460
+ local nextitem = matches[idx]
461
+ while nextitem do
462
+ local sp, _, ann = unpack_match(nextitem)
463
+ if not find(ann, "^%+list_item") then
464
+ break
465
+ end
466
+ -- check which of the styles this item matches
467
+ local newstyles = {}
468
+ gsub(ann, "%[([^]]*)%]",
469
+ function(x) newstyles[x] = true end)
470
+ local matched_styles = {}
471
+ for _,x in ipairs(styles) do
472
+ if newstyles[x] then
473
+ matched_styles[#matched_styles + 1] = x
474
+ end
475
+ end
476
+ if #styles > 0 and #matched_styles == 0 then
477
+ break -- does not match any styles
478
+ end
479
+ styles = matched_styles
480
+ -- at this point styles contains the styles that match all items
481
+ -- in the list so far...
482
+
483
+ list[#list].tight = is_tight(matches, startidx, idx - 1, false)
484
+ startidx = idx
485
+ idx = idx + 1
486
+ local item = get_node(tag)
487
+ if tag == "list_item[X]" then
488
+ set_checkbox(item, startidx)
489
+ end
490
+ item[1] = "list_item"
491
+ if sourcepos then
492
+ item.pos = {sp, finalpos}
493
+ list.pos[2] = item.pos[2]
494
+ end
495
+ if marker == ":" then
496
+ make_definition_list_item(item)
497
+ end
498
+ list[#list + 1] = item
499
+ nextitem = matches[idx]
500
+ end
501
+ list[#list].tight = is_tight(matches, startidx, idx - 1, true)
502
+ local tight = true
503
+ for i=2,#list do
504
+ tight = tight and list[i].tight
505
+ list[i].tight = nil
506
+ end
507
+ list.list_style = styles[1] -- resolve, if still ambiguous
508
+ list.tight = tight
509
+ list.start = get_list_start(marker, list.list_style)
510
+ result = list
511
+ end
512
+ nodes[#nodes + 1] = result
513
+ elseif mod == "-" then -- close
514
+ assert(false, "unmatched " .. annot .. " encountered at byte " ..
515
+ startpos)
516
+ idx = idx + 1
517
+ return nil
518
+ elseif tag == "reference_key" then
519
+ local key = sub(subject, startpos + 1, endpos - 1)
520
+ local result = {tag, key}
521
+ idx = idx + 1
522
+ nodes[#nodes + 1] = result
523
+ elseif tag == "reference_value" then
524
+ local val = sub(subject, startpos, endpos)
525
+ local result = {tag, val}
526
+ idx = idx + 1
527
+ nodes[#nodes + 1] = result
528
+ else -- leaf
529
+ local result
530
+ if tag == "softbreak" then
531
+ result = {tag}
532
+ elseif tag == "footnote_reference" then
533
+ result = {tag, sub(subject, startpos + 2, endpos - 1)}
534
+ else
535
+ result = {tag, sub(subject, startpos, endpos)}
536
+ end
537
+ if sourcepos then
538
+ result.pos = {startpos, endpos}
539
+ end
540
+ if block_attributes then
541
+ for i=1,#block_attributes do
542
+ insert_attributes(result, block_attributes[i])
543
+ end
544
+ block_attributes = nil
545
+ end
546
+ idx = idx + 1
547
+ if result then
548
+ nodes[#nodes + 1] = result
549
+ end
550
+ end
551
+ end
552
+ end
553
+ return nodes
554
+ end
555
+
556
+ local doc = get_node("doc")
557
+ doc.references = references
558
+ doc.footnotes = footnotes
559
+ return doc
560
+ end
561
+
562
+ local function render_nodes(nodes, handle, init, indent)
563
+ indent = indent or 0
564
+ init = init or 1
565
+ for i=init,#nodes do
566
+ local node = nodes[i]
567
+ handle:write(rep(" ", indent))
568
+ if type(node) == "string" then
569
+ handle:write(format("%q",node))
570
+ else
571
+ handle:write(node[1])
572
+ if node.pos then
573
+ handle:write(format(" (%d-%d)", node.pos[1], node.pos[2]))
574
+ end
575
+ for k,v in pairs(node) do
576
+ if type(k) == "string" and k ~= "pos" and k ~= "attr" then
577
+ handle:write(format(" %s=%q", k, tostring(v)))
578
+ end
579
+ end
580
+ if node.attr then
581
+ local keys = node.attr._keys
582
+ for j=1,#keys do
583
+ local k = keys[j]
584
+ handle:write(format(" %s=%q", k, node.attr[k]))
585
+ end
586
+ end
587
+ end
588
+ handle:write("\n")
589
+ if node[2] then -- children
590
+ render_nodes(node, handle, 2, indent + 2)
591
+ end
592
+ end
593
+ end
594
+
595
+ local function render(doc, handle)
596
+ render_nodes(doc, handle, 2, 0)
597
+ if doc.references then
598
+ handle:write("references = {\n")
599
+ for k,v in pairs(doc.references) do
600
+ handle:write(format(" [%q] = %q,\n", k, v.destination))
601
+ end
602
+ handle:write("}\n")
603
+ end
604
+ if doc.footnotes then
605
+ handle:write("footnotes = {\n")
606
+ for k,v in pairs(doc.footnotes) do
607
+ handle:write(format(" [%q] =\n", k))
608
+ render_nodes(v, handle, 2, 4)
609
+ end
610
+ handle:write("}\n")
611
+ end
612
+ end
613
+
614
+ return { to_ast = to_ast,
615
+ render = render,
616
+ insert_attribute = insert_attribute,
617
+ copy_attributes = copy_attributes }
618
+
619
+
620
+ --[[
621
+ Copyright (C) 2022 John MacFarlane
622
+
623
+ Permission is hereby granted, free of charge, to any person obtaining
624
+ a copy of this software and associated documentation files (the
625
+ "Software"), to deal in the Software without restriction, including
626
+ without limitation the rights to use, copy, modify, merge, publish,
627
+ distribute, sublicense, and/or sell copies of the Software, and to
628
+ permit persons to whom the Software is furnished to do so, subject to
629
+ the following conditions:
630
+
631
+ The above copyright notice and this permission notice shall be included
632
+ in all copies or substantial portions of the Software.
633
+
634
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
635
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
636
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
637
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
638
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
639
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
640
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
641
+
642
+ ]]