djot 0.0.5 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
data/lib/lua/djot/ast.lua DELETED
@@ -1,642 +0,0 @@
1
- local match = require("djot.match")
2
- local unpack = unpack or table.unpack
3
-
4
- local find, lower, sub, gsub, rep, format =
5
- string.find, string.lower, string.sub, string.gsub, string.rep, string.format
6
-
7
- local unpack_match, get_length, matches_pattern =
8
- match.unpack_match, match.get_length, match.matches_pattern
9
-
10
- local function get_string_content(node)
11
- local buffer = {}
12
- for i=2,#node do
13
- local n = node[i]
14
- if type(n) ~= "table" then
15
- break
16
- elseif n[1] == "str" or n[1] == "nbsp" then
17
- buffer[#buffer + 1] = n[2]
18
- elseif n[1] == "softbreak" then
19
- buffer[#buffer + 1] = "\n"
20
- else
21
- buffer[#buffer + 1] = get_string_content(n)
22
- end
23
- end
24
- return table.concat(buffer)
25
- end
26
-
27
- local roman_digits = {
28
- i = 1,
29
- v = 5,
30
- x = 10,
31
- l = 50,
32
- c = 100,
33
- d = 500,
34
- m = 1000 }
35
-
36
- local function roman_to_number(s)
37
- -- go backwards through the digits
38
- local total = 0
39
- local prevdigit = 0
40
- local i=#s
41
- while i > 0 do
42
- local c = lower(sub(s,i,i))
43
- local n = roman_digits[c]
44
- if n < prevdigit then -- e.g. ix
45
- total = total - n
46
- else
47
- total = total + n
48
- end
49
- assert(n ~= nil, "Encountered bad character in roman numeral " .. s)
50
- prevdigit = n
51
- i = i - 1
52
- end
53
- return total
54
- end
55
-
56
- local function get_list_start(marker, style)
57
- local numtype = string.gsub(style, "%p", "")
58
- local s = string.gsub(marker, "%p", "")
59
- if numtype == "1" then
60
- return tonumber(s)
61
- elseif numtype == "A" then
62
- return (string.byte(s) - string.byte("A") + 1)
63
- elseif numtype == "a" then
64
- return (string.byte(s) - string.byte("a") + 1)
65
- elseif numtype == "I" then
66
- return roman_to_number(s)
67
- elseif numtype == "i" then
68
- return roman_to_number(s)
69
- elseif numtype == "" then
70
- return nil
71
- end
72
- end
73
-
74
- local ignorable = {
75
- image_marker = true,
76
- escape = true,
77
- blankline = true,
78
- checkbox_checked = true,
79
- checkbox_unchecked = true
80
- }
81
-
82
- local function is_tight(matches, startidx, endidx, is_last_item)
83
- -- see if there are any blank lines between blocks in a list item.
84
- local blanklines = 0
85
- -- we don't care about blank lines at very end of list
86
- for i=startidx, endidx do
87
- local _, _, x = unpack_match(matches[i])
88
- if x == "blankline" then
89
- if matches_pattern(matches[i+1], "%+list_item") then
90
- -- don't count blank lines before list starts
91
- elseif matches_pattern(matches[i+1], "%-list_item") and
92
- (is_last_item or matches_pattern(matches[i+2], "%-list_item")) then
93
- -- don't count blank lines at end of nested lists
94
- -- or end of last item
95
- else
96
- blanklines = blanklines + 1
97
- end
98
- end
99
- end
100
- return (blanklines == 0)
101
- end
102
-
103
- local function insert_attribute(attr, key, val)
104
- if not attr._keys then
105
- attr._keys = {}
106
- end
107
- local function add_key(k)
108
- local keys = attr._keys
109
- for i=1,#keys do
110
- if keys[i] == k then
111
- return
112
- end
113
- end
114
- keys[#keys + 1] = k
115
- end
116
- -- _keys records order of key insertion for deterministic output
117
- if key == "id" then
118
- attr.id = val
119
- add_key("id")
120
- elseif key == "class" then
121
- if attr.class then
122
- attr.class =
123
- attr.class .. " " .. val
124
- else
125
- attr.class = val
126
- add_key("class")
127
- end
128
- else
129
- attr[key] = val
130
- add_key(key)
131
- end
132
- end
133
-
134
- local function copy_attributes(target, source)
135
- if source then
136
- for k,v in pairs(source) do
137
- if k ~= "_keys" then
138
- insert_attribute(target, k, v)
139
- end
140
- end
141
- end
142
- end
143
-
144
- local function insert_attributes(targetnode, attrnode)
145
- targetnode.attr = targetnode.attr or {_keys = {}}
146
- local i=2
147
- while i <= #attrnode do
148
- local x,y = unpack(attrnode[i])
149
- if x == "id" or x == "class" then
150
- insert_attribute(targetnode.attr, x, y)
151
- elseif x == "key" then
152
- local valnode = attrnode[i + 1]
153
- if valnode[1] == "value" then
154
- -- resolve backslash escapes
155
- insert_attribute(targetnode.attr, y, valnode[2]:gsub("\\(%p)", "%1"))
156
- end
157
- i = i + 1
158
- end
159
- i = i + 1
160
- end
161
- end
162
-
163
- local function make_definition_list_item(result)
164
- assert(result[1] and result[1][1] ~= "list_item", "sanity check")
165
- result[1] = "definition_list_item"
166
- if result[2] and result[2][1] == "para" then
167
- result[2][1] = "term"
168
- else
169
- table.insert(result, 2, {"term"})
170
- end
171
- if result[3] then
172
- local defn = {"definition"}
173
- for i=3,#result do
174
- defn[#defn + 1] = result[i]
175
- result[i] = nil
176
- end
177
- result[3] = defn
178
- end
179
- end
180
-
181
- -- create an abstract syntax tree based on an event
182
- -- stream and references
183
- local function to_ast(subject, matches, options)
184
- if not options then
185
- options = {}
186
- end
187
- local idx = 1
188
- local matcheslen = #matches
189
- local sourcepos = options.sourcepos
190
- local references = {}
191
- local footnotes = {}
192
- local identifiers = {} -- identifiers used (to ensure uniqueness)
193
-
194
- -- generate auto identifier for heading
195
- local function get_identifier(s)
196
- local base = s:gsub("[][~!@#$%^&*(){}`,.<>\\|=+/?]","")
197
- :gsub("^%s+",""):gsub("%s+$","")
198
- :gsub("%s+","-")
199
- local suffix = ""
200
- local i = 0
201
- local ident = base
202
- -- generate unique id
203
- while identifiers[ident] do
204
- i = i + 1
205
- ident = base .. tostring(i)
206
- end
207
- identifiers[ident] = true
208
- return ident
209
- end
210
-
211
- local function set_checkbox(node, startidx)
212
- -- determine if checked or unchecked
213
- local _,_,ann = unpack_match(matches[startidx + 1])
214
- if ann == "checkbox_checked" then
215
- node.checkbox = "checked"
216
- elseif ann == "checkbox_unchecked" then
217
- node.checkbox = "unchecked"
218
- end
219
- end
220
-
221
- local function get_node(maintag)
222
- local nodes = {maintag}
223
- local stopper
224
- local block_attributes = nil
225
- if maintag then
226
- -- strip off data (e.g. for list_items)
227
- stopper = "^%-" .. gsub(maintag, "%[.*$", "")
228
- end
229
- while idx <= matcheslen do
230
- local match = matches[idx]
231
- local startpos, endpos, annot = unpack_match(match)
232
- if stopper and find(annot, stopper) then
233
- idx = idx + 1
234
- return nodes
235
- else
236
- local mod, tag = string.match(annot, "^([-+]?)(.*)")
237
- if ignorable[tag] then
238
- idx = idx + 1 -- skip
239
- elseif mod == "+" then -- open
240
- local startidx = idx
241
- idx = idx + 1
242
- local result = get_node(tag)
243
- if tag == "list_item[X]" then
244
- set_checkbox(result, startidx)
245
- end
246
- local _, finalpos = unpack_match(matches[idx - 1])
247
- if sourcepos then
248
- result.pos = {startpos, finalpos}
249
- end
250
- if block_attributes and tag ~= "block_attributes" then
251
- for i=1,#block_attributes do
252
- insert_attributes(result, block_attributes[i])
253
- end
254
- if result.attr and result.attr.id then
255
- identifiers[result.attr.id] = true
256
- end
257
- block_attributes = nil
258
- end
259
- if tag == "verbatim" then
260
- local s = get_string_content(result)
261
- -- trim space next to ` at beginning or end
262
- if find(s, "^ +`") then
263
- s = s:sub(2)
264
- end
265
- if find(s, "` +$") then
266
- s = s:sub(1, #s - 1)
267
- end
268
- result = {"verbatim", {"str", s}}
269
- -- check for raw_format, which makes this a raw node
270
- local sp,ep,ann = unpack_match(matches[idx])
271
- if ann == "raw_format" then
272
- local s = get_string_content(result)
273
- result = {"raw_inline", s}
274
- result.format = sub(subject, sp + 2, ep - 1)
275
- idx = idx + 1 -- skip the raw_format
276
- end
277
- elseif tag == "caption" then
278
- if nodes[#nodes][1] == "table" then
279
- -- move caption in table node
280
- table.insert(nodes[#nodes], 2, result)
281
- result = nil
282
- end
283
- elseif tag == "reference_definition" then
284
- local dest = ""
285
- local key
286
- for i=2,#result do
287
- if result[i][1] == "reference_key" then
288
- key = result[i][2]
289
- end
290
- if result[i][1] == "reference_value" then
291
- dest = dest .. result[i][2]
292
- end
293
- end
294
- references[key] = { destination = dest,
295
- attributes = result.attr }
296
- elseif tag == "footnote" then
297
- local label
298
- if result[2][1] == "note_label" then
299
- label = result[2][2]
300
- end
301
- if label then
302
- table.remove(result,2)
303
- footnotes[label] = result
304
- end
305
- result = nil
306
- elseif tag == "inline_math" then
307
- result[1] = "math"
308
- result.attr = {class = "math inline", _keys={"class"}}
309
- elseif tag == "display_math" then
310
- result[1] = "math"
311
- result.attr = {class = "math display", _keys={"class"}}
312
- elseif tag == "url" then
313
- result[1] = "link"
314
- result.destination = get_string_content(result)
315
- elseif tag == "email" then
316
- result[1] = "link"
317
- result.destination = "mailto:" .. get_string_content(result)
318
- elseif tag == "imagetext" or tag == "linktext" then
319
- -- gobble destination or reference
320
- local nextmatch = matches[idx]
321
- local _, _, nextannot = unpack_match(nextmatch)
322
- if nextannot == "+destination" then
323
- idx = idx + 1
324
- local dest = get_node("destination")
325
- result.destination = get_string_content(dest):gsub("\r?\n", "")
326
- elseif nextannot == "+reference" then
327
- idx = idx + 1
328
- local ref = get_node("reference")
329
- if #ref == 1 then -- []
330
- result.reference = get_string_content(result):gsub("\r?\n", " ")
331
- else
332
- result.reference = get_string_content(ref):gsub("\r?\n", " ")
333
- end
334
- end
335
- result[1] = result[1]:gsub("text","")
336
- elseif tag == "heading" then
337
- result.level = get_length(match)
338
- local heading_str = get_string_content(result)
339
- :gsub("^%s+",""):gsub("%s+$","")
340
- if not (result.attr and result.attr.id) then
341
- local ident = get_identifier(heading_str)
342
- insert_attributes(result, {nil,{"id", ident}})
343
- end
344
- -- insert into references unless there's a same-named one already:
345
- if not references[heading_str] then
346
- references[heading_str] =
347
- {destination = "#" .. result.attr.id, attributes = {_keys={}}}
348
- end
349
- elseif tag == "table" then
350
- -- look for a separator line
351
- -- if found, make the preceding rows headings
352
- -- and set attributes for column alignments on the table
353
- local i=2
354
- local aligns = {}
355
- while i <= #result do
356
- local found, align
357
- if result[i][1] == "row" then
358
- local row = result[i]
359
- for j=2,#row do
360
- found, _, align = find(row[j][1], "^separator_(.*)")
361
- if not found then
362
- break
363
- end
364
- aligns[j - 1] = align
365
- end
366
- if found and #aligns > 0 then
367
- -- set previous row to head and adjust aligns
368
- local prevrow = result[i - 1]
369
- if prevrow[1] == "row" then
370
- prevrow.head = true
371
- for k=2,#prevrow do
372
- -- set head on cells too
373
- prevrow[k].head = true
374
- if aligns[k - 1] ~= "default" then
375
- prevrow[k].align = aligns[k - 1]
376
- end
377
- end
378
- end
379
- table.remove(result,i) -- remove sep line
380
- -- we don't need to increment i because we removed ith elt
381
- else
382
- if #aligns > 0 then
383
- for l=2,#result[i] do
384
- if aligns[l - 1] ~= "default" then
385
- result[i][l].align = aligns[l - 1]
386
- end
387
- end
388
- end
389
- i = i + 1
390
- end
391
- end
392
- end
393
- result.level = get_length(match)
394
- elseif tag == "div" then
395
- if result[2] and result[2][1] == "class" then
396
- result.attr = result.attr or {_keys = {}}
397
- insert_attribute(result.attr, "class", result[2][2])
398
- table.remove(result, 2)
399
- end
400
- elseif tag == "code_block" then
401
- if result[2] then
402
- if result[2][1] == "code_language" then
403
- result.lang = result[2][2]
404
- table.remove(result, 2)
405
- elseif result[2][1] == "raw_format" then
406
- local fmt = result[2][2]:sub(2)
407
- local s = get_string_content(result)
408
- result = {"raw_block", s}
409
- result.format = fmt
410
- end
411
- end
412
- elseif tag == "block_attributes" then
413
- if block_attributes then
414
- block_attributes[#block_attributes + 1] = result
415
- else
416
- block_attributes = {result}
417
- end
418
- result = nil
419
- elseif tag == "attributes" then
420
- -- parse attributes, add to last node
421
- local prevnode = nodes[#nodes]
422
- local endswithspace = false
423
- if type(prevnode) == "table" then
424
- if prevnode[1] == "str" then
425
- -- split off last consecutive word of string
426
- -- to which to attach attributes
427
- local lastwordpos = string.find(prevnode[2], "%w+$")
428
- if not lastwordpos then
429
- endswithspace = true
430
- elseif lastwordpos > 1 then
431
- local newnode = {"str", sub(prevnode[2], lastwordpos, -1)}
432
- prevnode[2] = sub(prevnode[2], 1, lastwordpos - 1)
433
- nodes[#nodes + 1] = newnode
434
- prevnode = newnode
435
- end
436
- end
437
- if not endswithspace then
438
- insert_attributes(prevnode, result)
439
- end
440
- end
441
- result = nil
442
- elseif find(tag, "^list_item") then
443
- local marker = string.match(subject, "^%S+", startpos)
444
- local styles = {}
445
- gsub(tag, "%[([^]]*)%]", function(x) styles[#styles + 1] = x end)
446
- -- create a list node with the consecutive list items
447
- -- of the same kind
448
- local list = {"list", result}
449
- -- put the attributes from the first item on the list itself:
450
- list.attr = result.attr
451
- result.attr = nil
452
- result[1] = "list_item"
453
- if marker == ":" then
454
- make_definition_list_item(result)
455
- end
456
- if sourcepos then
457
- list.pos = {result.pos[1], result.pos[2]}
458
- end
459
- -- now get remaining items
460
- local nextitem = matches[idx]
461
- while nextitem do
462
- local sp, _, ann = unpack_match(nextitem)
463
- if not find(ann, "^%+list_item") then
464
- break
465
- end
466
- -- check which of the styles this item matches
467
- local newstyles = {}
468
- gsub(ann, "%[([^]]*)%]",
469
- function(x) newstyles[x] = true end)
470
- local matched_styles = {}
471
- for _,x in ipairs(styles) do
472
- if newstyles[x] then
473
- matched_styles[#matched_styles + 1] = x
474
- end
475
- end
476
- if #styles > 0 and #matched_styles == 0 then
477
- break -- does not match any styles
478
- end
479
- styles = matched_styles
480
- -- at this point styles contains the styles that match all items
481
- -- in the list so far...
482
-
483
- list[#list].tight = is_tight(matches, startidx, idx - 1, false)
484
- startidx = idx
485
- idx = idx + 1
486
- local item = get_node(tag)
487
- if tag == "list_item[X]" then
488
- set_checkbox(item, startidx)
489
- end
490
- item[1] = "list_item"
491
- if sourcepos then
492
- item.pos = {sp, finalpos}
493
- list.pos[2] = item.pos[2]
494
- end
495
- if marker == ":" then
496
- make_definition_list_item(item)
497
- end
498
- list[#list + 1] = item
499
- nextitem = matches[idx]
500
- end
501
- list[#list].tight = is_tight(matches, startidx, idx - 1, true)
502
- local tight = true
503
- for i=2,#list do
504
- tight = tight and list[i].tight
505
- list[i].tight = nil
506
- end
507
- list.list_style = styles[1] -- resolve, if still ambiguous
508
- list.tight = tight
509
- list.start = get_list_start(marker, list.list_style)
510
- result = list
511
- end
512
- nodes[#nodes + 1] = result
513
- elseif mod == "-" then -- close
514
- assert(false, "unmatched " .. annot .. " encountered at byte " ..
515
- startpos)
516
- idx = idx + 1
517
- return nil
518
- elseif tag == "reference_key" then
519
- local key = sub(subject, startpos + 1, endpos - 1)
520
- local result = {tag, key}
521
- idx = idx + 1
522
- nodes[#nodes + 1] = result
523
- elseif tag == "reference_value" then
524
- local val = sub(subject, startpos, endpos)
525
- local result = {tag, val}
526
- idx = idx + 1
527
- nodes[#nodes + 1] = result
528
- else -- leaf
529
- local result
530
- if tag == "softbreak" then
531
- result = {tag}
532
- elseif tag == "footnote_reference" then
533
- result = {tag, sub(subject, startpos + 2, endpos - 1)}
534
- else
535
- result = {tag, sub(subject, startpos, endpos)}
536
- end
537
- if sourcepos then
538
- result.pos = {startpos, endpos}
539
- end
540
- if block_attributes then
541
- for i=1,#block_attributes do
542
- insert_attributes(result, block_attributes[i])
543
- end
544
- block_attributes = nil
545
- end
546
- idx = idx + 1
547
- if result then
548
- nodes[#nodes + 1] = result
549
- end
550
- end
551
- end
552
- end
553
- return nodes
554
- end
555
-
556
- local doc = get_node("doc")
557
- doc.references = references
558
- doc.footnotes = footnotes
559
- return doc
560
- end
561
-
562
- local function render_nodes(nodes, handle, init, indent)
563
- indent = indent or 0
564
- init = init or 1
565
- for i=init,#nodes do
566
- local node = nodes[i]
567
- handle:write(rep(" ", indent))
568
- if type(node) == "string" then
569
- handle:write(format("%q",node))
570
- else
571
- handle:write(node[1])
572
- if node.pos then
573
- handle:write(format(" (%d-%d)", node.pos[1], node.pos[2]))
574
- end
575
- for k,v in pairs(node) do
576
- if type(k) == "string" and k ~= "pos" and k ~= "attr" then
577
- handle:write(format(" %s=%q", k, tostring(v)))
578
- end
579
- end
580
- if node.attr then
581
- local keys = node.attr._keys
582
- for j=1,#keys do
583
- local k = keys[j]
584
- handle:write(format(" %s=%q", k, node.attr[k]))
585
- end
586
- end
587
- end
588
- handle:write("\n")
589
- if node[2] then -- children
590
- render_nodes(node, handle, 2, indent + 2)
591
- end
592
- end
593
- end
594
-
595
- local function render(doc, handle)
596
- render_nodes(doc, handle, 2, 0)
597
- if doc.references then
598
- handle:write("references = {\n")
599
- for k,v in pairs(doc.references) do
600
- handle:write(format(" [%q] = %q,\n", k, v.destination))
601
- end
602
- handle:write("}\n")
603
- end
604
- if doc.footnotes then
605
- handle:write("footnotes = {\n")
606
- for k,v in pairs(doc.footnotes) do
607
- handle:write(format(" [%q] =\n", k))
608
- render_nodes(v, handle, 2, 4)
609
- end
610
- handle:write("}\n")
611
- end
612
- end
613
-
614
- return { to_ast = to_ast,
615
- render = render,
616
- insert_attribute = insert_attribute,
617
- copy_attributes = copy_attributes }
618
-
619
-
620
- --[[
621
- Copyright (C) 2022 John MacFarlane
622
-
623
- Permission is hereby granted, free of charge, to any person obtaining
624
- a copy of this software and associated documentation files (the
625
- "Software"), to deal in the Software without restriction, including
626
- without limitation the rights to use, copy, modify, merge, publish,
627
- distribute, sublicense, and/or sell copies of the Software, and to
628
- permit persons to whom the Software is furnished to do so, subject to
629
- the following conditions:
630
-
631
- The above copyright notice and this permission notice shall be included
632
- in all copies or substantial portions of the Software.
633
-
634
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
635
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
636
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
637
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
638
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
639
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
640
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
641
-
642
- ]]