djot 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,642 @@
1
+ local match = require("djot.match")
2
+ local unpack = unpack or table.unpack
3
+
4
+ local find, lower, sub, gsub, rep, format =
5
+ string.find, string.lower, string.sub, string.gsub, string.rep, string.format
6
+
7
+ local unpack_match, get_length, matches_pattern =
8
+ match.unpack_match, match.get_length, match.matches_pattern
9
+
10
+ local function get_string_content(node)
11
+ local buffer = {}
12
+ for i=2,#node do
13
+ local n = node[i]
14
+ if type(n) ~= "table" then
15
+ break
16
+ elseif n[1] == "str" or n[1] == "nbsp" then
17
+ buffer[#buffer + 1] = n[2]
18
+ elseif n[1] == "softbreak" then
19
+ buffer[#buffer + 1] = "\n"
20
+ else
21
+ buffer[#buffer + 1] = get_string_content(n)
22
+ end
23
+ end
24
+ return table.concat(buffer)
25
+ end
26
+
27
+ local roman_digits = {
28
+ i = 1,
29
+ v = 5,
30
+ x = 10,
31
+ l = 50,
32
+ c = 100,
33
+ d = 500,
34
+ m = 1000 }
35
+
36
+ local function roman_to_number(s)
37
+ -- go backwards through the digits
38
+ local total = 0
39
+ local prevdigit = 0
40
+ local i=#s
41
+ while i > 0 do
42
+ local c = lower(sub(s,i,i))
43
+ local n = roman_digits[c]
44
+ if n < prevdigit then -- e.g. ix
45
+ total = total - n
46
+ else
47
+ total = total + n
48
+ end
49
+ assert(n ~= nil, "Encountered bad character in roman numeral " .. s)
50
+ prevdigit = n
51
+ i = i - 1
52
+ end
53
+ return total
54
+ end
55
+
56
+ local function get_list_start(marker, style)
57
+ local numtype = string.gsub(style, "%p", "")
58
+ local s = string.gsub(marker, "%p", "")
59
+ if numtype == "1" then
60
+ return tonumber(s)
61
+ elseif numtype == "A" then
62
+ return (string.byte(s) - string.byte("A") + 1)
63
+ elseif numtype == "a" then
64
+ return (string.byte(s) - string.byte("a") + 1)
65
+ elseif numtype == "I" then
66
+ return roman_to_number(s)
67
+ elseif numtype == "i" then
68
+ return roman_to_number(s)
69
+ elseif numtype == "" then
70
+ return nil
71
+ end
72
+ end
73
+
74
+ local ignorable = {
75
+ image_marker = true,
76
+ escape = true,
77
+ blankline = true,
78
+ checkbox_checked = true,
79
+ checkbox_unchecked = true
80
+ }
81
+
82
+ local function is_tight(matches, startidx, endidx, is_last_item)
83
+ -- see if there are any blank lines between blocks in a list item.
84
+ local blanklines = 0
85
+ -- we don't care about blank lines at very end of list
86
+ for i=startidx, endidx do
87
+ local _, _, x = unpack_match(matches[i])
88
+ if x == "blankline" then
89
+ if matches_pattern(matches[i+1], "%+list_item") then
90
+ -- don't count blank lines before list starts
91
+ elseif matches_pattern(matches[i+1], "%-list_item") and
92
+ (is_last_item or matches_pattern(matches[i+2], "%-list_item")) then
93
+ -- don't count blank lines at end of nested lists
94
+ -- or end of last item
95
+ else
96
+ blanklines = blanklines + 1
97
+ end
98
+ end
99
+ end
100
+ return (blanklines == 0)
101
+ end
102
+
103
+ local function insert_attribute(attr, key, val)
104
+ if not attr._keys then
105
+ attr._keys = {}
106
+ end
107
+ local function add_key(k)
108
+ local keys = attr._keys
109
+ for i=1,#keys do
110
+ if keys[i] == k then
111
+ return
112
+ end
113
+ end
114
+ keys[#keys + 1] = k
115
+ end
116
+ -- _keys records order of key insertion for deterministic output
117
+ if key == "id" then
118
+ attr.id = val
119
+ add_key("id")
120
+ elseif key == "class" then
121
+ if attr.class then
122
+ attr.class =
123
+ attr.class .. " " .. val
124
+ else
125
+ attr.class = val
126
+ add_key("class")
127
+ end
128
+ else
129
+ attr[key] = val
130
+ add_key(key)
131
+ end
132
+ end
133
+
134
+ local function copy_attributes(target, source)
135
+ if source then
136
+ for k,v in pairs(source) do
137
+ if k ~= "_keys" then
138
+ insert_attribute(target, k, v)
139
+ end
140
+ end
141
+ end
142
+ end
143
+
144
+ local function insert_attributes(targetnode, attrnode)
145
+ targetnode.attr = targetnode.attr or {_keys = {}}
146
+ local i=2
147
+ while i <= #attrnode do
148
+ local x,y = unpack(attrnode[i])
149
+ if x == "id" or x == "class" then
150
+ insert_attribute(targetnode.attr, x, y)
151
+ elseif x == "key" then
152
+ local valnode = attrnode[i + 1]
153
+ if valnode[1] == "value" then
154
+ -- resolve backslash escapes
155
+ insert_attribute(targetnode.attr, y, valnode[2]:gsub("\\(%p)", "%1"))
156
+ end
157
+ i = i + 1
158
+ end
159
+ i = i + 1
160
+ end
161
+ end
162
+
163
+ local function make_definition_list_item(result)
164
+ assert(result[1] and result[1][1] ~= "list_item", "sanity check")
165
+ result[1] = "definition_list_item"
166
+ if result[2] and result[2][1] == "para" then
167
+ result[2][1] = "term"
168
+ else
169
+ table.insert(result, 2, {"term"})
170
+ end
171
+ if result[3] then
172
+ local defn = {"definition"}
173
+ for i=3,#result do
174
+ defn[#defn + 1] = result[i]
175
+ result[i] = nil
176
+ end
177
+ result[3] = defn
178
+ end
179
+ end
180
+
181
+ -- create an abstract syntax tree based on an event
182
+ -- stream and references
183
+ local function to_ast(subject, matches, options)
184
+ if not options then
185
+ options = {}
186
+ end
187
+ local idx = 1
188
+ local matcheslen = #matches
189
+ local sourcepos = options.sourcepos
190
+ local references = {}
191
+ local footnotes = {}
192
+ local identifiers = {} -- identifiers used (to ensure uniqueness)
193
+
194
+ -- generate auto identifier for heading
195
+ local function get_identifier(s)
196
+ local base = s:gsub("[][~!@#$%^&*(){}`,.<>\\|=+/?]","")
197
+ :gsub("^%s+",""):gsub("%s+$","")
198
+ :gsub("%s+","-")
199
+ local suffix = ""
200
+ local i = 0
201
+ local ident = base
202
+ -- generate unique id
203
+ while identifiers[ident] do
204
+ i = i + 1
205
+ ident = base .. tostring(i)
206
+ end
207
+ identifiers[ident] = true
208
+ return ident
209
+ end
210
+
211
+ local function set_checkbox(node, startidx)
212
+ -- determine if checked or unchecked
213
+ local _,_,ann = unpack_match(matches[startidx + 1])
214
+ if ann == "checkbox_checked" then
215
+ node.checkbox = "checked"
216
+ elseif ann == "checkbox_unchecked" then
217
+ node.checkbox = "unchecked"
218
+ end
219
+ end
220
+
221
+ local function get_node(maintag)
222
+ local nodes = {maintag}
223
+ local stopper
224
+ local block_attributes = nil
225
+ if maintag then
226
+ -- strip off data (e.g. for list_items)
227
+ stopper = "^%-" .. gsub(maintag, "%[.*$", "")
228
+ end
229
+ while idx <= matcheslen do
230
+ local match = matches[idx]
231
+ local startpos, endpos, annot = unpack_match(match)
232
+ if stopper and find(annot, stopper) then
233
+ idx = idx + 1
234
+ return nodes
235
+ else
236
+ local mod, tag = string.match(annot, "^([-+]?)(.*)")
237
+ if ignorable[tag] then
238
+ idx = idx + 1 -- skip
239
+ elseif mod == "+" then -- open
240
+ local startidx = idx
241
+ idx = idx + 1
242
+ local result = get_node(tag)
243
+ if tag == "list_item[X]" then
244
+ set_checkbox(result, startidx)
245
+ end
246
+ local _, finalpos = unpack_match(matches[idx - 1])
247
+ if sourcepos then
248
+ result.pos = {startpos, finalpos}
249
+ end
250
+ if block_attributes and tag ~= "block_attributes" then
251
+ for i=1,#block_attributes do
252
+ insert_attributes(result, block_attributes[i])
253
+ end
254
+ if result.attr and result.attr.id then
255
+ identifiers[result.attr.id] = true
256
+ end
257
+ block_attributes = nil
258
+ end
259
+ if tag == "verbatim" then
260
+ local s = get_string_content(result)
261
+ -- trim space next to ` at beginning or end
262
+ if find(s, "^ +`") then
263
+ s = s:sub(2)
264
+ end
265
+ if find(s, "` +$") then
266
+ s = s:sub(1, #s - 1)
267
+ end
268
+ result = {"verbatim", {"str", s}}
269
+ -- check for raw_format, which makes this a raw node
270
+ local sp,ep,ann = unpack_match(matches[idx])
271
+ if ann == "raw_format" then
272
+ local s = get_string_content(result)
273
+ result = {"raw_inline", s}
274
+ result.format = sub(subject, sp + 2, ep - 1)
275
+ idx = idx + 1 -- skip the raw_format
276
+ end
277
+ elseif tag == "caption" then
278
+ if nodes[#nodes][1] == "table" then
279
+ -- move caption in table node
280
+ table.insert(nodes[#nodes], 2, result)
281
+ result = nil
282
+ end
283
+ elseif tag == "reference_definition" then
284
+ local dest = ""
285
+ local key
286
+ for i=2,#result do
287
+ if result[i][1] == "reference_key" then
288
+ key = result[i][2]
289
+ end
290
+ if result[i][1] == "reference_value" then
291
+ dest = dest .. result[i][2]
292
+ end
293
+ end
294
+ references[key] = { destination = dest,
295
+ attributes = result.attr }
296
+ elseif tag == "footnote" then
297
+ local label
298
+ if result[2][1] == "note_label" then
299
+ label = result[2][2]
300
+ end
301
+ if label then
302
+ table.remove(result,2)
303
+ footnotes[label] = result
304
+ end
305
+ result = nil
306
+ elseif tag == "inline_math" then
307
+ result[1] = "math"
308
+ result.attr = {class = "math inline", _keys={"class"}}
309
+ elseif tag == "display_math" then
310
+ result[1] = "math"
311
+ result.attr = {class = "math display", _keys={"class"}}
312
+ elseif tag == "url" then
313
+ result[1] = "link"
314
+ result.destination = get_string_content(result)
315
+ elseif tag == "email" then
316
+ result[1] = "link"
317
+ result.destination = "mailto:" .. get_string_content(result)
318
+ elseif tag == "imagetext" or tag == "linktext" then
319
+ -- gobble destination or reference
320
+ local nextmatch = matches[idx]
321
+ local _, _, nextannot = unpack_match(nextmatch)
322
+ if nextannot == "+destination" then
323
+ idx = idx + 1
324
+ local dest = get_node("destination")
325
+ result.destination = get_string_content(dest):gsub("\r?\n", "")
326
+ elseif nextannot == "+reference" then
327
+ idx = idx + 1
328
+ local ref = get_node("reference")
329
+ if #ref == 1 then -- []
330
+ result.reference = get_string_content(result):gsub("\r?\n", " ")
331
+ else
332
+ result.reference = get_string_content(ref):gsub("\r?\n", " ")
333
+ end
334
+ end
335
+ result[1] = result[1]:gsub("text","")
336
+ elseif tag == "heading" then
337
+ result.level = get_length(match)
338
+ local heading_str = get_string_content(result)
339
+ :gsub("^%s+",""):gsub("%s+$","")
340
+ if not (result.attr and result.attr.id) then
341
+ local ident = get_identifier(heading_str)
342
+ insert_attributes(result, {nil,{"id", ident}})
343
+ end
344
+ -- insert into references unless there's a same-named one already:
345
+ if not references[heading_str] then
346
+ references[heading_str] =
347
+ {destination = "#" .. result.attr.id, attributes = {_keys={}}}
348
+ end
349
+ elseif tag == "table" then
350
+ -- look for a separator line
351
+ -- if found, make the preceding rows headings
352
+ -- and set attributes for column alignments on the table
353
+ local i=2
354
+ local aligns = {}
355
+ while i <= #result do
356
+ local found, align
357
+ if result[i][1] == "row" then
358
+ local row = result[i]
359
+ for j=2,#row do
360
+ found, _, align = find(row[j][1], "^separator_(.*)")
361
+ if not found then
362
+ break
363
+ end
364
+ aligns[j - 1] = align
365
+ end
366
+ if found and #aligns > 0 then
367
+ -- set previous row to head and adjust aligns
368
+ local prevrow = result[i - 1]
369
+ if prevrow[1] == "row" then
370
+ prevrow.head = true
371
+ for k=2,#prevrow do
372
+ -- set head on cells too
373
+ prevrow[k].head = true
374
+ if aligns[k - 1] ~= "default" then
375
+ prevrow[k].align = aligns[k - 1]
376
+ end
377
+ end
378
+ end
379
+ table.remove(result,i) -- remove sep line
380
+ -- we don't need to increment i because we removed ith elt
381
+ else
382
+ if #aligns > 0 then
383
+ for l=2,#result[i] do
384
+ if aligns[l - 1] ~= "default" then
385
+ result[i][l].align = aligns[l - 1]
386
+ end
387
+ end
388
+ end
389
+ i = i + 1
390
+ end
391
+ end
392
+ end
393
+ result.level = get_length(match)
394
+ elseif tag == "div" then
395
+ if result[2] and result[2][1] == "class" then
396
+ result.attr = result.attr or {_keys = {}}
397
+ insert_attribute(result.attr, "class", result[2][2])
398
+ table.remove(result, 2)
399
+ end
400
+ elseif tag == "code_block" then
401
+ if result[2] then
402
+ if result[2][1] == "code_language" then
403
+ result.lang = result[2][2]
404
+ table.remove(result, 2)
405
+ elseif result[2][1] == "raw_format" then
406
+ local fmt = result[2][2]:sub(2)
407
+ local s = get_string_content(result)
408
+ result = {"raw_block", s}
409
+ result.format = fmt
410
+ end
411
+ end
412
+ elseif tag == "block_attributes" then
413
+ if block_attributes then
414
+ block_attributes[#block_attributes + 1] = result
415
+ else
416
+ block_attributes = {result}
417
+ end
418
+ result = nil
419
+ elseif tag == "attributes" then
420
+ -- parse attributes, add to last node
421
+ local prevnode = nodes[#nodes]
422
+ local endswithspace = false
423
+ if type(prevnode) == "table" then
424
+ if prevnode[1] == "str" then
425
+ -- split off last consecutive word of string
426
+ -- to which to attach attributes
427
+ local lastwordpos = string.find(prevnode[2], "%w+$")
428
+ if not lastwordpos then
429
+ endswithspace = true
430
+ elseif lastwordpos > 1 then
431
+ local newnode = {"str", sub(prevnode[2], lastwordpos, -1)}
432
+ prevnode[2] = sub(prevnode[2], 1, lastwordpos - 1)
433
+ nodes[#nodes + 1] = newnode
434
+ prevnode = newnode
435
+ end
436
+ end
437
+ if not endswithspace then
438
+ insert_attributes(prevnode, result)
439
+ end
440
+ end
441
+ result = nil
442
+ elseif find(tag, "^list_item") then
443
+ local marker = string.match(subject, "^%S+", startpos)
444
+ local styles = {}
445
+ gsub(tag, "%[([^]]*)%]", function(x) styles[#styles + 1] = x end)
446
+ -- create a list node with the consecutive list items
447
+ -- of the same kind
448
+ local list = {"list", result}
449
+ -- put the attributes from the first item on the list itself:
450
+ list.attr = result.attr
451
+ result.attr = nil
452
+ result[1] = "list_item"
453
+ if marker == ":" then
454
+ make_definition_list_item(result)
455
+ end
456
+ if sourcepos then
457
+ list.pos = {result.pos[1], result.pos[2]}
458
+ end
459
+ -- now get remaining items
460
+ local nextitem = matches[idx]
461
+ while nextitem do
462
+ local sp, _, ann = unpack_match(nextitem)
463
+ if not find(ann, "^%+list_item") then
464
+ break
465
+ end
466
+ -- check which of the styles this item matches
467
+ local newstyles = {}
468
+ gsub(ann, "%[([^]]*)%]",
469
+ function(x) newstyles[x] = true end)
470
+ local matched_styles = {}
471
+ for _,x in ipairs(styles) do
472
+ if newstyles[x] then
473
+ matched_styles[#matched_styles + 1] = x
474
+ end
475
+ end
476
+ if #styles > 0 and #matched_styles == 0 then
477
+ break -- does not match any styles
478
+ end
479
+ styles = matched_styles
480
+ -- at this point styles contains the styles that match all items
481
+ -- in the list so far...
482
+
483
+ list[#list].tight = is_tight(matches, startidx, idx - 1, false)
484
+ startidx = idx
485
+ idx = idx + 1
486
+ local item = get_node(tag)
487
+ if tag == "list_item[X]" then
488
+ set_checkbox(item, startidx)
489
+ end
490
+ item[1] = "list_item"
491
+ if sourcepos then
492
+ item.pos = {sp, finalpos}
493
+ list.pos[2] = item.pos[2]
494
+ end
495
+ if marker == ":" then
496
+ make_definition_list_item(item)
497
+ end
498
+ list[#list + 1] = item
499
+ nextitem = matches[idx]
500
+ end
501
+ list[#list].tight = is_tight(matches, startidx, idx - 1, true)
502
+ local tight = true
503
+ for i=2,#list do
504
+ tight = tight and list[i].tight
505
+ list[i].tight = nil
506
+ end
507
+ list.list_style = styles[1] -- resolve, if still ambiguous
508
+ list.tight = tight
509
+ list.start = get_list_start(marker, list.list_style)
510
+ result = list
511
+ end
512
+ nodes[#nodes + 1] = result
513
+ elseif mod == "-" then -- close
514
+ assert(false, "unmatched " .. annot .. " encountered at byte " ..
515
+ startpos)
516
+ idx = idx + 1
517
+ return nil
518
+ elseif tag == "reference_key" then
519
+ local key = sub(subject, startpos + 1, endpos - 1)
520
+ local result = {tag, key}
521
+ idx = idx + 1
522
+ nodes[#nodes + 1] = result
523
+ elseif tag == "reference_value" then
524
+ local val = sub(subject, startpos, endpos)
525
+ local result = {tag, val}
526
+ idx = idx + 1
527
+ nodes[#nodes + 1] = result
528
+ else -- leaf
529
+ local result
530
+ if tag == "softbreak" then
531
+ result = {tag}
532
+ elseif tag == "footnote_reference" then
533
+ result = {tag, sub(subject, startpos + 2, endpos - 1)}
534
+ else
535
+ result = {tag, sub(subject, startpos, endpos)}
536
+ end
537
+ if sourcepos then
538
+ result.pos = {startpos, endpos}
539
+ end
540
+ if block_attributes then
541
+ for i=1,#block_attributes do
542
+ insert_attributes(result, block_attributes[i])
543
+ end
544
+ block_attributes = nil
545
+ end
546
+ idx = idx + 1
547
+ if result then
548
+ nodes[#nodes + 1] = result
549
+ end
550
+ end
551
+ end
552
+ end
553
+ return nodes
554
+ end
555
+
556
+ local doc = get_node("doc")
557
+ doc.references = references
558
+ doc.footnotes = footnotes
559
+ return doc
560
+ end
561
+
562
+ local function render_nodes(nodes, handle, init, indent)
563
+ indent = indent or 0
564
+ init = init or 1
565
+ for i=init,#nodes do
566
+ local node = nodes[i]
567
+ handle:write(rep(" ", indent))
568
+ if type(node) == "string" then
569
+ handle:write(format("%q",node))
570
+ else
571
+ handle:write(node[1])
572
+ if node.pos then
573
+ handle:write(format(" (%d-%d)", node.pos[1], node.pos[2]))
574
+ end
575
+ for k,v in pairs(node) do
576
+ if type(k) == "string" and k ~= "pos" and k ~= "attr" then
577
+ handle:write(format(" %s=%q", k, tostring(v)))
578
+ end
579
+ end
580
+ if node.attr then
581
+ local keys = node.attr._keys
582
+ for j=1,#keys do
583
+ local k = keys[j]
584
+ handle:write(format(" %s=%q", k, node.attr[k]))
585
+ end
586
+ end
587
+ end
588
+ handle:write("\n")
589
+ if node[2] then -- children
590
+ render_nodes(node, handle, 2, indent + 2)
591
+ end
592
+ end
593
+ end
594
+
595
+ local function render(doc, handle)
596
+ render_nodes(doc, handle, 2, 0)
597
+ if doc.references then
598
+ handle:write("references = {\n")
599
+ for k,v in pairs(doc.references) do
600
+ handle:write(format(" [%q] = %q,\n", k, v.destination))
601
+ end
602
+ handle:write("}\n")
603
+ end
604
+ if doc.footnotes then
605
+ handle:write("footnotes = {\n")
606
+ for k,v in pairs(doc.footnotes) do
607
+ handle:write(format(" [%q] =\n", k))
608
+ render_nodes(v, handle, 2, 4)
609
+ end
610
+ handle:write("}\n")
611
+ end
612
+ end
613
+
614
+ return { to_ast = to_ast,
615
+ render = render,
616
+ insert_attribute = insert_attribute,
617
+ copy_attributes = copy_attributes }
618
+
619
+
620
+ --[[
621
+ Copyright (C) 2022 John MacFarlane
622
+
623
+ Permission is hereby granted, free of charge, to any person obtaining
624
+ a copy of this software and associated documentation files (the
625
+ "Software"), to deal in the Software without restriction, including
626
+ without limitation the rights to use, copy, modify, merge, publish,
627
+ distribute, sublicense, and/or sell copies of the Software, and to
628
+ permit persons to whom the Software is furnished to do so, subject to
629
+ the following conditions:
630
+
631
+ The above copyright notice and this permission notice shall be included
632
+ in all copies or substantial portions of the Software.
633
+
634
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
635
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
636
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
637
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
638
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
639
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
640
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
641
+
642
+ ]]