djot 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rubocop.yml +20 -0
- data/CHANGELOG.md +7 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +13 -0
- data/LICENSE.txt +21 -0
- data/README.md +54 -0
- data/Rakefile +40 -0
- data/Steepfile +10 -0
- data/djot.gemspec +33 -0
- data/lib/djot/version.rb +5 -0
- data/lib/djot.rb +42 -0
- data/lib/lua/djot/ast.lua +642 -0
- data/lib/lua/djot/attributes.lua +273 -0
- data/lib/lua/djot/block.lua +807 -0
- data/lib/lua/djot/emoji.lua +1880 -0
- data/lib/lua/djot/html.lua +557 -0
- data/lib/lua/djot/inline.lua +641 -0
- data/lib/lua/djot/match.lua +75 -0
- data/lib/lua/djot.lua +107 -0
- data/sig/djot.rbs +6 -0
- metadata +81 -0
@@ -0,0 +1,641 @@
|
|
1
|
+
-- this allows the code to work with both lua and luajit:
|
2
|
+
local unpack = unpack or table.unpack
|
3
|
+
local match = require("djot.match")
|
4
|
+
local attributes = require("djot.attributes")
|
5
|
+
local make_match, unpack_match, matches_pattern =
|
6
|
+
match.make_match, match.unpack_match, match.matches_pattern
|
7
|
+
local find, byte = string.find, string.byte
|
8
|
+
|
9
|
+
-- allow up to 3 captures...
|
10
|
+
local function bounded_find(subj, patt, startpos, endpos)
|
11
|
+
local sp,ep,c1,c2,c3 = find(subj, patt, startpos)
|
12
|
+
if ep and ep <= endpos then
|
13
|
+
return sp,ep,c1,c2,c3
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
local Parser = {}
|
18
|
+
|
19
|
+
function Parser:new(subject, opts)
|
20
|
+
local state =
|
21
|
+
{ opts = opts or {}, -- options
|
22
|
+
subject = subject,
|
23
|
+
matches = {}, -- table pos : (endpos, annotation)
|
24
|
+
warnings = {}, -- array of {pos, string} arrays
|
25
|
+
openers = {}, -- map from closer_type to array of (pos, data) in reverse order
|
26
|
+
verbatim = 0, -- parsing verbatim span to be ended by n backticks
|
27
|
+
verbatim_type = nil, -- whether verbatim is math or regular
|
28
|
+
destination = false, -- parsing link destination in ()
|
29
|
+
firstpos = 0, -- position of first slice
|
30
|
+
lastpos = 0, -- position of last slice
|
31
|
+
allow_attributes = true, -- allow parsing of attributes
|
32
|
+
attribute_parser = nil, -- attribute parser
|
33
|
+
attribute_start = nil, -- start of potential attribute
|
34
|
+
attribute_slices = nil, -- slices we've tried to parse as attributes
|
35
|
+
}
|
36
|
+
setmetatable(state, self)
|
37
|
+
self.__index = self
|
38
|
+
return state
|
39
|
+
end
|
40
|
+
|
41
|
+
function Parser:add_match(startpos, endpos, annotation)
|
42
|
+
self.matches[startpos] = make_match(startpos, endpos, annotation)
|
43
|
+
end
|
44
|
+
|
45
|
+
function Parser:add_opener(name, ...)
|
46
|
+
-- 1 = startpos, 2 = endpos, 3 = annotation, 4 = substartpos, 5 = endpos
|
47
|
+
if not self.openers[name] then
|
48
|
+
self.openers[name] = {}
|
49
|
+
end
|
50
|
+
table.insert(self.openers[name], {...})
|
51
|
+
end
|
52
|
+
|
53
|
+
function Parser:clear_openers(startpos, endpos)
|
54
|
+
-- remove other openers in between the matches
|
55
|
+
for _,v in pairs(self.openers) do
|
56
|
+
local i = #v
|
57
|
+
while v[i] do
|
58
|
+
local sp,ep,_,sp2,ep2 = unpack(v[i])
|
59
|
+
if sp >= startpos and ep <= endpos then
|
60
|
+
v[i] = nil
|
61
|
+
elseif (sp2 and sp2 >= startpos) and (ep2 and ep2 <= endpos) then
|
62
|
+
v[i][3] = nil
|
63
|
+
v[i][4] = nil
|
64
|
+
v[i][5] = nil
|
65
|
+
else
|
66
|
+
break
|
67
|
+
end
|
68
|
+
i = i - 1
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
function Parser:str_matches(startpos, endpos)
|
74
|
+
for i = startpos, endpos do
|
75
|
+
local m = self.matches[i]
|
76
|
+
if m then
|
77
|
+
local sp, ep, annot = unpack_match(m)
|
78
|
+
if annot ~= "str" and annot ~= "escape" then
|
79
|
+
self.matches[i] = make_match(sp, ep, "str")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
function Parser.between_matched(c, annotation, defaultmatch, opentest)
|
86
|
+
return function(self, pos)
|
87
|
+
local defaultmatch = defaultmatch or "str"
|
88
|
+
local subject = self.subject
|
89
|
+
local can_open = find(subject, "^%S", pos + 1)
|
90
|
+
local can_close = find(subject, "^%S", pos - 1)
|
91
|
+
local has_open_marker = matches_pattern(self.matches[pos - 1], "^open%_marker")
|
92
|
+
local has_close_marker = byte(subject, pos + 1) == 125 -- }
|
93
|
+
local endcloser = pos
|
94
|
+
local startopener = pos
|
95
|
+
|
96
|
+
if type(opentest) == "function" then
|
97
|
+
can_open = can_open and opentest(self, pos)
|
98
|
+
end
|
99
|
+
|
100
|
+
-- allow explicit open/close markers to override:
|
101
|
+
if has_open_marker then
|
102
|
+
can_open = true
|
103
|
+
can_close = false
|
104
|
+
startopener = pos - 1
|
105
|
+
end
|
106
|
+
if not has_open_marker and has_close_marker then
|
107
|
+
can_close = true
|
108
|
+
can_open = false
|
109
|
+
endcloser = pos + 1
|
110
|
+
end
|
111
|
+
|
112
|
+
if has_open_marker and defaultmatch:match("^right") then
|
113
|
+
defaultmatch = defaultmatch:gsub("^right", "left")
|
114
|
+
elseif has_close_marker and defaultmatch:match("^left") then
|
115
|
+
defaultmatch = defaultmatch:gsub("^left", "right")
|
116
|
+
end
|
117
|
+
|
118
|
+
local openers = self.openers[c]
|
119
|
+
local matched = false
|
120
|
+
if can_close and openers and #openers > 0 then
|
121
|
+
-- check openers for a match
|
122
|
+
local openpos, openposend = unpack(openers[#openers])
|
123
|
+
if openposend ~= pos - 1 then -- exclude empty emph
|
124
|
+
self:clear_openers(openpos, pos)
|
125
|
+
self:add_match(openpos, openposend, "+" .. annotation)
|
126
|
+
self:add_match(pos, endcloser, "-" .. annotation)
|
127
|
+
return endcloser + 1
|
128
|
+
end
|
129
|
+
end
|
130
|
+
-- if we get here, we didn't match an opener
|
131
|
+
if can_open then
|
132
|
+
self:add_opener(c, startopener, pos)
|
133
|
+
self:add_match(startopener, pos, defaultmatch)
|
134
|
+
return pos + 1
|
135
|
+
else
|
136
|
+
self:add_match(pos, endcloser, defaultmatch)
|
137
|
+
return endcloser + 1
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
Parser.matchers = {
|
143
|
+
-- 96 = `
|
144
|
+
[96] = function(self, pos, endpos)
|
145
|
+
local subject = self.subject
|
146
|
+
local _, endchar = bounded_find(subject, "^`*", pos, endpos)
|
147
|
+
if not endchar then
|
148
|
+
return nil
|
149
|
+
end
|
150
|
+
if find(subject, "^%$%$", pos - 2) then
|
151
|
+
self.matches[pos - 2] = nil
|
152
|
+
self.matches[pos - 1] = nil
|
153
|
+
self:add_match(pos - 2, endchar, "+display_math")
|
154
|
+
self.verbatim_type = "display_math"
|
155
|
+
elseif find(subject, "^%$", pos - 1) then
|
156
|
+
self.matches[pos - 1] = nil
|
157
|
+
self:add_match(pos - 1, endchar, "+inline_math")
|
158
|
+
self.verbatim_type = "inline_math"
|
159
|
+
else
|
160
|
+
self:add_match(pos, endchar, "+verbatim")
|
161
|
+
self.verbatim_type = "verbatim"
|
162
|
+
end
|
163
|
+
self.verbatim = endchar - pos + 1
|
164
|
+
return endchar + 1
|
165
|
+
end,
|
166
|
+
|
167
|
+
-- 92 = \
|
168
|
+
[92] = function(self, pos, endpos)
|
169
|
+
local subject = self.subject
|
170
|
+
local _, endchar = bounded_find(subject, "^[ \t]*\r?\n", pos + 1, endpos)
|
171
|
+
self:add_match(pos, pos, "escape")
|
172
|
+
if endchar then
|
173
|
+
-- see if there were preceding spaces
|
174
|
+
if #self.matches > 0 then
|
175
|
+
local sp, ep, annot = unpack_match(self.matches[#self.matches])
|
176
|
+
if annot == "str" then
|
177
|
+
while subject:byte(ep) == 32 or subject:byte(ep) == 9 do
|
178
|
+
ep = ep -1
|
179
|
+
end
|
180
|
+
if sp == ep then
|
181
|
+
self.matches[#self.matches] = nil
|
182
|
+
else
|
183
|
+
self:add_match(sp, ep, "str")
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
self:add_match(pos + 1, endchar, "hardbreak")
|
188
|
+
return endchar + 1
|
189
|
+
else
|
190
|
+
local _, ec = bounded_find(subject, "^[%p ]", pos + 1, endpos)
|
191
|
+
if not ec then
|
192
|
+
self:add_match(pos, pos, "str")
|
193
|
+
return pos + 1
|
194
|
+
else
|
195
|
+
self:add_match(pos, pos, "escape")
|
196
|
+
if find(subject, "^ ", pos + 1) then
|
197
|
+
self:add_match(pos + 1, ec, "nbsp")
|
198
|
+
else
|
199
|
+
self:add_match(pos + 1, ec, "str")
|
200
|
+
end
|
201
|
+
return ec + 1
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end,
|
205
|
+
|
206
|
+
-- 60 = <
|
207
|
+
[60] = function(self, pos, endpos)
|
208
|
+
local subject = self.subject
|
209
|
+
local starturl, endurl =
|
210
|
+
bounded_find(subject, "^%<[^<>%s]+%>", pos, endpos)
|
211
|
+
if starturl then
|
212
|
+
local is_url = bounded_find(subject, "^%a+:", pos + 1, endurl)
|
213
|
+
local is_email = bounded_find(subject, "^[^:]+%@", pos + 1, endurl)
|
214
|
+
if is_email then
|
215
|
+
self:add_match(starturl, starturl, "+email")
|
216
|
+
self:add_match(starturl + 1, endurl - 1, "str")
|
217
|
+
self:add_match(endurl, endurl, "-email")
|
218
|
+
return endurl + 1
|
219
|
+
elseif is_url then
|
220
|
+
self:add_match(starturl, starturl, "+url")
|
221
|
+
self:add_match(starturl + 1, endurl - 1, "str")
|
222
|
+
self:add_match(endurl, endurl, "-url")
|
223
|
+
return endurl + 1
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end,
|
227
|
+
|
228
|
+
-- 126 = ~
|
229
|
+
[126] = Parser.between_matched('~', 'subscript'),
|
230
|
+
|
231
|
+
-- 94 = ^
|
232
|
+
[94] = Parser.between_matched('^', 'superscript'),
|
233
|
+
|
234
|
+
-- 91 = [
|
235
|
+
[91] = function(self, pos, endpos)
|
236
|
+
local sp, ep = bounded_find(self.subject, "^%^([^]]+)%]", pos + 1, endpos)
|
237
|
+
if sp then -- footnote ref
|
238
|
+
self:add_match(pos, ep, "footnote_reference")
|
239
|
+
return ep + 1
|
240
|
+
else
|
241
|
+
self:add_opener("[", pos, pos)
|
242
|
+
self:add_match(pos, pos, "str")
|
243
|
+
return pos + 1
|
244
|
+
end
|
245
|
+
end,
|
246
|
+
|
247
|
+
-- 93 = ]
|
248
|
+
[93] = function(self, pos, endpos)
|
249
|
+
local openers = self.openers["["]
|
250
|
+
local subject = self.subject
|
251
|
+
if openers and #openers > 0 then
|
252
|
+
local opener = openers[#openers]
|
253
|
+
if opener[3] == "reference_link" then
|
254
|
+
-- found a reference link
|
255
|
+
-- add the matches
|
256
|
+
local subject = self.subject
|
257
|
+
local is_image = bounded_find(subject, "^!", opener[1] - 1, endpos)
|
258
|
+
and not bounded_find(subject, "^[\\]", opener[1] - 2, endpos)
|
259
|
+
if is_image then
|
260
|
+
self:add_match(opener[1] - 1, opener[1] - 1, "image_marker")
|
261
|
+
self:add_match(opener[1], opener[2], "+imagetext")
|
262
|
+
self:add_match(opener[4], opener[5], "-imagetext")
|
263
|
+
else
|
264
|
+
self:add_match(opener[1], opener[2], "+linktext")
|
265
|
+
self:add_match(opener[4], opener[5], "-linktext")
|
266
|
+
end
|
267
|
+
self:add_match(opener[5], opener[5], "+reference")
|
268
|
+
self:add_match(pos, pos, "-reference")
|
269
|
+
-- convert all matches to str
|
270
|
+
self:str_matches(opener[5] + 1, pos - 1)
|
271
|
+
-- remove from openers
|
272
|
+
self:clear_openers(opener[1], pos)
|
273
|
+
return pos + 1
|
274
|
+
elseif bounded_find(subject, "^%[", pos + 1, endpos) then
|
275
|
+
opener[3] = "reference_link"
|
276
|
+
opener[4] = pos -- intermediate ]
|
277
|
+
opener[5] = pos + 1 -- intermediate [
|
278
|
+
self:add_match(pos, pos + 1, "str")
|
279
|
+
return pos + 2
|
280
|
+
elseif bounded_find(subject, "^%(", pos + 1, endpos) then
|
281
|
+
self.openers["("] = {} -- clear ( openers
|
282
|
+
opener[3] = "explicit_link"
|
283
|
+
opener[4] = pos -- intermediate ]
|
284
|
+
opener[5] = pos + 1 -- intermediate (
|
285
|
+
self.destination = true
|
286
|
+
self:add_match(pos, pos + 1, "str")
|
287
|
+
return pos + 2
|
288
|
+
elseif bounded_find(subject, "^%{", pos + 1, endpos) then
|
289
|
+
-- assume this is attributes, bracketed span
|
290
|
+
self:add_match(opener[1], opener[2], "+span")
|
291
|
+
self:add_match(pos, pos, "-span")
|
292
|
+
self:clear_openers(opener[1], pos)
|
293
|
+
return pos + 1
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end,
|
297
|
+
|
298
|
+
|
299
|
+
-- 40 = (
|
300
|
+
[40] = function(self, pos)
|
301
|
+
if not self.destination then return nil end
|
302
|
+
self:add_opener("(", pos, pos)
|
303
|
+
self:add_match(pos, pos, "str")
|
304
|
+
return pos + 1
|
305
|
+
end,
|
306
|
+
|
307
|
+
-- 41 = )
|
308
|
+
[41] = function(self, pos, endpos)
|
309
|
+
if not self.destination then return nil end
|
310
|
+
local parens = self.openers["("]
|
311
|
+
if parens and #parens > 0 and parens[#parens][1] then
|
312
|
+
parens[#parens] = nil -- clear opener
|
313
|
+
self:add_match(pos, pos, "str")
|
314
|
+
return pos + 1
|
315
|
+
else
|
316
|
+
local subject = self.subject
|
317
|
+
local openers = self.openers["["]
|
318
|
+
if openers and #openers > 0
|
319
|
+
and openers[#openers][3] == "explicit_link" then
|
320
|
+
local opener = openers[#openers]
|
321
|
+
local startdest, enddest = opener[5], pos
|
322
|
+
-- we have inline link
|
323
|
+
local is_image = bounded_find(subject, "^!", opener[1] - 1, endpos)
|
324
|
+
and not bounded_find(subject, "^[\\]", opener[1] - 2, endpos)
|
325
|
+
if is_image then
|
326
|
+
self:add_match(opener[1] - 1, opener[1] - 1, "image_marker")
|
327
|
+
self:add_match(opener[1], opener[2], "+imagetext")
|
328
|
+
self:add_match(opener[4], opener[4], "-imagetext")
|
329
|
+
else
|
330
|
+
self:add_match(opener[1], opener[2], "+linktext")
|
331
|
+
self:add_match(opener[4], opener[4], "-linktext")
|
332
|
+
end
|
333
|
+
self:add_match(startdest, startdest, "+destination")
|
334
|
+
self:add_match(enddest, enddest, "-destination")
|
335
|
+
self.destination = false
|
336
|
+
-- convert all matches to str
|
337
|
+
self:str_matches(opener[5] + 1, pos - 1)
|
338
|
+
-- remove from openers
|
339
|
+
self:clear_openers(opener[2], pos)
|
340
|
+
return enddest + 1
|
341
|
+
end
|
342
|
+
end
|
343
|
+
end,
|
344
|
+
|
345
|
+
-- 95 = _
|
346
|
+
[95] = Parser.between_matched('_', 'emph'),
|
347
|
+
|
348
|
+
-- 42 = *
|
349
|
+
[42] = Parser.between_matched('*', 'strong'),
|
350
|
+
|
351
|
+
-- 123 = {
|
352
|
+
[123] = function(self, pos, endpos)
|
353
|
+
if bounded_find(self.subject, "^[_*~^+='\"-]", pos + 1, endpos) then
|
354
|
+
self:add_match(pos, pos, "open_marker")
|
355
|
+
return pos + 1
|
356
|
+
elseif self.allow_attributes then
|
357
|
+
self.attribute_parser = attributes.AttributeParser:new(self.subject)
|
358
|
+
self.attribute_start = pos
|
359
|
+
self.attribute_slices = {}
|
360
|
+
return pos
|
361
|
+
else
|
362
|
+
self:add_match(pos, pos, "str")
|
363
|
+
return pos + 1
|
364
|
+
end
|
365
|
+
end,
|
366
|
+
|
367
|
+
-- 58 = :
|
368
|
+
[58] = function(self, pos, endpos)
|
369
|
+
local sp, ep = bounded_find(self.subject, "^%:[%w_+-]+%:", pos, endpos)
|
370
|
+
if sp then
|
371
|
+
self:add_match(sp, ep, "emoji")
|
372
|
+
return ep + 1
|
373
|
+
else
|
374
|
+
self:add_match(pos, pos, "str")
|
375
|
+
return pos + 1
|
376
|
+
end
|
377
|
+
end,
|
378
|
+
|
379
|
+
-- 43 = +
|
380
|
+
[43] = Parser.between_matched("+", "insert", "str",
|
381
|
+
function(self, pos)
|
382
|
+
return find(self.subject, "^%{", pos - 1) or
|
383
|
+
find(self.subject, "^%}", pos + 1)
|
384
|
+
end),
|
385
|
+
|
386
|
+
-- 61 = =
|
387
|
+
[61] = Parser.between_matched("=", "mark", "str",
|
388
|
+
function(self, pos)
|
389
|
+
return find(self.subject, "^%{", pos - 1) or
|
390
|
+
find(self.subject, "^%}", pos + 1)
|
391
|
+
end),
|
392
|
+
|
393
|
+
-- 39 = '
|
394
|
+
[39] = Parser.between_matched("'", "single_quoted", "right_single_quote",
|
395
|
+
function(self, pos) -- test to open
|
396
|
+
return pos == 1 or
|
397
|
+
find(self.subject, "^[%s\"'-([]", pos - 1)
|
398
|
+
end),
|
399
|
+
|
400
|
+
-- 34 = "
|
401
|
+
[34] = Parser.between_matched('"', "double_quoted", "left_double_quote"),
|
402
|
+
|
403
|
+
-- 45 = -
|
404
|
+
[45] = function(self, pos, endpos)
|
405
|
+
local subject = self.subject
|
406
|
+
local _, ep = find(subject, "^%-*", pos)
|
407
|
+
local hyphens
|
408
|
+
if endpos < ep then
|
409
|
+
hyphens = 1 + endpos - pos
|
410
|
+
else
|
411
|
+
hyphens = 1 + ep - pos
|
412
|
+
end
|
413
|
+
if byte(subject, ep + 1) == 125 then -- }
|
414
|
+
hyphens = hyphens - 1 -- last hyphen is close del
|
415
|
+
end
|
416
|
+
if byte(subject, pos - 1) == 123 or byte(subject, pos + 1) == 125 then
|
417
|
+
return Parser.between_matched("-", "delete")(self, pos, endpos)
|
418
|
+
end
|
419
|
+
-- Try to construct a homogeneous sequence of dashes
|
420
|
+
local all_em = hyphens % 3 == 0
|
421
|
+
local all_en = hyphens % 2 == 0
|
422
|
+
while hyphens > 0 do
|
423
|
+
if all_em then
|
424
|
+
self:add_match(pos, pos + 2, "em_dash")
|
425
|
+
pos = pos + 3
|
426
|
+
hyphens = hyphens - 3
|
427
|
+
elseif all_en then
|
428
|
+
self:add_match(pos, pos + 1, "en_dash")
|
429
|
+
pos = pos + 2
|
430
|
+
hyphens = hyphens - 2
|
431
|
+
elseif hyphens >= 3 and (hyphens % 2 ~= 0 or hyphens > 4) then
|
432
|
+
self:add_match(pos, pos + 2, "em_dash")
|
433
|
+
pos = pos + 3
|
434
|
+
hyphens = hyphens - 3
|
435
|
+
elseif hyphens >= 2 then
|
436
|
+
self:add_match(pos, pos + 1, "en_dash")
|
437
|
+
pos = pos + 2
|
438
|
+
hyphens = hyphens - 2
|
439
|
+
else
|
440
|
+
self:add_match(pos, pos, "str")
|
441
|
+
pos = pos + 1
|
442
|
+
hyphens = hyphens - 1
|
443
|
+
end
|
444
|
+
end
|
445
|
+
return pos
|
446
|
+
end,
|
447
|
+
|
448
|
+
-- 46 = .
|
449
|
+
[46] = function(self, pos, endpos)
|
450
|
+
if bounded_find(self.subject, "^%.%.", pos + 1, endpos) then
|
451
|
+
self:add_match(pos, pos +2, "ellipses")
|
452
|
+
return pos + 3
|
453
|
+
end
|
454
|
+
end
|
455
|
+
}
|
456
|
+
|
457
|
+
function Parser:single_char(pos)
|
458
|
+
self:add_match(pos, pos, "str")
|
459
|
+
return pos + 1
|
460
|
+
end
|
461
|
+
|
462
|
+
-- Feed a slice to the parser, updating state.
|
463
|
+
function Parser:feed(spos, endpos)
|
464
|
+
local special = "[][\\`{}_*()!<>~^:=+$\r\n'\".-]"
|
465
|
+
local subject = self.subject
|
466
|
+
local matchers = self.matchers
|
467
|
+
local pos
|
468
|
+
if self.firstpos == 0 or spos < self.firstpos then
|
469
|
+
self.firstpos = spos
|
470
|
+
end
|
471
|
+
if self.lastpos == 0 or endpos > self.lastpos then
|
472
|
+
self.lastpos = endpos
|
473
|
+
end
|
474
|
+
pos = spos
|
475
|
+
while pos <= endpos do
|
476
|
+
if self.attribute_parser then
|
477
|
+
local sp = pos
|
478
|
+
local ep2 = bounded_find(subject, special, pos, endpos) or endpos
|
479
|
+
local status, ep = self.attribute_parser:feed(sp, ep2)
|
480
|
+
if status == "done" then
|
481
|
+
local attribute_start = self.attribute_start
|
482
|
+
-- add attribute matches
|
483
|
+
self:add_match(attribute_start, attribute_start, "+attributes")
|
484
|
+
self:add_match(ep, ep, "-attributes")
|
485
|
+
local attr_matches = self.attribute_parser:get_matches()
|
486
|
+
-- add attribute matches
|
487
|
+
for i=1,#attr_matches do
|
488
|
+
self:add_match(unpack_match(attr_matches[i]))
|
489
|
+
end
|
490
|
+
-- restore state to prior to adding attribute parser:
|
491
|
+
self.attribute_parser = nil
|
492
|
+
self.attribute_start = nil
|
493
|
+
self.attribute_slices = nil
|
494
|
+
pos = ep + 1
|
495
|
+
elseif status == "fail" then
|
496
|
+
-- backtrack:
|
497
|
+
local slices = self.attribute_slices
|
498
|
+
self.allow_attributes = false
|
499
|
+
self.attribute_parser = nil
|
500
|
+
self.attribute_start = nil
|
501
|
+
for i=1,#slices do
|
502
|
+
self:feed(unpack(slices[i]))
|
503
|
+
end
|
504
|
+
self.allow_attributes = true
|
505
|
+
self.slices = nil
|
506
|
+
pos = sp
|
507
|
+
elseif status == "continue" then
|
508
|
+
self.attribute_slices[#self.attribute_slices + 1] = {sp,ep}
|
509
|
+
pos = ep + 1
|
510
|
+
end
|
511
|
+
else
|
512
|
+
-- find next interesting character:
|
513
|
+
local newpos = bounded_find(subject, special, pos, endpos) or endpos + 1
|
514
|
+
if newpos > pos then
|
515
|
+
self:add_match(pos, newpos - 1, "str")
|
516
|
+
pos = newpos
|
517
|
+
if pos > endpos then
|
518
|
+
break -- otherwise, fall through:
|
519
|
+
end
|
520
|
+
end
|
521
|
+
-- if we get here, then newpos = pos,
|
522
|
+
-- i.e. we have something interesting at pos
|
523
|
+
local c = byte(subject, pos)
|
524
|
+
|
525
|
+
if c == 13 or c == 10 then -- cr or lf
|
526
|
+
if c == 13 and bounded_find(subject, "^[%n]", pos + 1, endpos) then
|
527
|
+
self:add_match(pos, pos + 1, "softbreak")
|
528
|
+
pos = pos + 2
|
529
|
+
else
|
530
|
+
self:add_match(pos, pos, "softbreak")
|
531
|
+
pos = pos + 1
|
532
|
+
end
|
533
|
+
elseif self.verbatim > 0 then
|
534
|
+
if c == 96 then
|
535
|
+
local _, endchar = bounded_find(subject, "^`+", pos, endpos)
|
536
|
+
if endchar and endchar - pos + 1 == self.verbatim then
|
537
|
+
-- check for raw attribute
|
538
|
+
local sp, ep =
|
539
|
+
bounded_find(subject, "^%{%=[^%s{}`]+%}", endchar + 1, endpos)
|
540
|
+
if sp and self.verbatim_type == "verbatim" then -- raw
|
541
|
+
self:add_match(pos, endchar, "-" .. self.verbatim_type)
|
542
|
+
self:add_match(sp, ep, "raw_format")
|
543
|
+
pos = ep + 1
|
544
|
+
else
|
545
|
+
self:add_match(pos, endchar, "-" .. self.verbatim_type)
|
546
|
+
pos = endchar + 1
|
547
|
+
end
|
548
|
+
self.verbatim = 0
|
549
|
+
self.verbatim_type = nil
|
550
|
+
else
|
551
|
+
endchar = endchar or endpos
|
552
|
+
self:add_match(pos, endchar, "str")
|
553
|
+
pos = endchar + 1
|
554
|
+
end
|
555
|
+
else
|
556
|
+
self:add_match(pos, pos, "str")
|
557
|
+
pos = pos + 1
|
558
|
+
end
|
559
|
+
else
|
560
|
+
pos = (matchers[c] and matchers[c](self, pos, endpos))
|
561
|
+
or self:single_char(pos)
|
562
|
+
end
|
563
|
+
end
|
564
|
+
end
|
565
|
+
end
|
566
|
+
|
567
|
+
-- Return true if we're parsing verbatim content.
|
568
|
+
function Parser:in_verbatim()
|
569
|
+
return self.verbatim > 0
|
570
|
+
end
|
571
|
+
|
572
|
+
-- Return parse results and any warnings.
|
573
|
+
function Parser:get_matches()
|
574
|
+
local sorted = {}
|
575
|
+
local subject = self.subject
|
576
|
+
local lastsp, lastep, lastannot
|
577
|
+
for i=self.firstpos, self.lastpos do
|
578
|
+
if self.matches[i] then
|
579
|
+
local sp, ep, annot = unpack_match(self.matches[i])
|
580
|
+
if annot == "str" and lastannot == "str" and lastep + 1 == sp then
|
581
|
+
-- consolidate adjacent strs
|
582
|
+
sorted[#sorted] = make_match(lastsp, ep, annot)
|
583
|
+
lastsp, lastep, lastannot = lastsp, ep, annot
|
584
|
+
else
|
585
|
+
sorted[#sorted + 1] = self.matches[i]
|
586
|
+
lastsp, lastep, lastannot = sp, ep, annot
|
587
|
+
end
|
588
|
+
end
|
589
|
+
end
|
590
|
+
if #sorted > 0 then
|
591
|
+
local last = sorted[#sorted]
|
592
|
+
local startpos, endpos, annot = unpack_match(last)
|
593
|
+
-- remove final softbreak
|
594
|
+
if annot == "softbreak" then
|
595
|
+
sorted[#sorted] = nil
|
596
|
+
last = sorted[#sorted]
|
597
|
+
startpos, endpos, annot = unpack_match(last)
|
598
|
+
end
|
599
|
+
-- remove trailing spaces
|
600
|
+
if annot == "str" and byte(subject, endpos) == 32 then
|
601
|
+
while endpos > startpos and byte(subject, endpos) == 32 do
|
602
|
+
endpos = endpos - 1
|
603
|
+
end
|
604
|
+
sorted[#sorted] = make_match(startpos, endpos, annot)
|
605
|
+
end
|
606
|
+
if self.verbatim > 0 then -- unclosed verbatim
|
607
|
+
self.warnings[#self.warnings + 1] =
|
608
|
+
{startpos, "Unclosed verbatim"}
|
609
|
+
sorted[#sorted + 1] = make_match(startpos, endpos,
|
610
|
+
"-" .. self.verbatim_type)
|
611
|
+
end
|
612
|
+
end
|
613
|
+
return sorted, self.warnings
|
614
|
+
end
|
615
|
+
|
616
|
+
return { Parser = Parser }
|
617
|
+
|
618
|
+
|
619
|
+
--[[
|
620
|
+
Copyright (C) 2022 John MacFarlane
|
621
|
+
|
622
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
623
|
+
a copy of this software and associated documentation files (the
|
624
|
+
"Software"), to deal in the Software without restriction, including
|
625
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
626
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
627
|
+
permit persons to whom the Software is furnished to do so, subject to
|
628
|
+
the following conditions:
|
629
|
+
|
630
|
+
The above copyright notice and this permission notice shall be included
|
631
|
+
in all copies or substantial portions of the Software.
|
632
|
+
|
633
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
634
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
635
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
636
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
637
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
638
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
639
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
640
|
+
|
641
|
+
]]
|