djot 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +6 -0
- data/.rubocop_todo.yml +7 -15
- data/CHANGELOG.md +21 -0
- data/Dockerfile +10 -0
- data/README.md +3 -6
- data/Rakefile +1 -17
- data/djot.gemspec +2 -2
- data/lib/djot/javascript.rb +4 -2
- data/lib/djot/pure.rb +4 -0
- data/lib/djot/version.rb +1 -1
- data/lib/djot.rb +6 -10
- data/manifest.scm +2 -0
- data/sig/djot.gen.rbs +3 -13
- data/sig/djot.rbs +0 -3
- metadata +13 -32
- data/lib/djot/lua.rb +0 -52
- data/lib/lua/djot/ast.lua +0 -642
- data/lib/lua/djot/attributes.lua +0 -273
- data/lib/lua/djot/block.lua +0 -807
- data/lib/lua/djot/emoji.lua +0 -1880
- data/lib/lua/djot/html.lua +0 -557
- data/lib/lua/djot/inline.lua +0 -641
- data/lib/lua/djot/match.lua +0 -75
- data/lib/lua/djot.lua +0 -107
data/lib/lua/djot/inline.lua
DELETED
@@ -1,641 +0,0 @@
|
|
1
|
-
-- this allows the code to work with both lua and luajit:
|
2
|
-
local unpack = unpack or table.unpack
|
3
|
-
local match = require("djot.match")
|
4
|
-
local attributes = require("djot.attributes")
|
5
|
-
local make_match, unpack_match, matches_pattern =
|
6
|
-
match.make_match, match.unpack_match, match.matches_pattern
|
7
|
-
local find, byte = string.find, string.byte
|
8
|
-
|
9
|
-
-- allow up to 3 captures...
|
10
|
-
local function bounded_find(subj, patt, startpos, endpos)
|
11
|
-
local sp,ep,c1,c2,c3 = find(subj, patt, startpos)
|
12
|
-
if ep and ep <= endpos then
|
13
|
-
return sp,ep,c1,c2,c3
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
local Parser = {}
|
18
|
-
|
19
|
-
function Parser:new(subject, opts)
|
20
|
-
local state =
|
21
|
-
{ opts = opts or {}, -- options
|
22
|
-
subject = subject,
|
23
|
-
matches = {}, -- table pos : (endpos, annotation)
|
24
|
-
warnings = {}, -- array of {pos, string} arrays
|
25
|
-
openers = {}, -- map from closer_type to array of (pos, data) in reverse order
|
26
|
-
verbatim = 0, -- parsing verbatim span to be ended by n backticks
|
27
|
-
verbatim_type = nil, -- whether verbatim is math or regular
|
28
|
-
destination = false, -- parsing link destination in ()
|
29
|
-
firstpos = 0, -- position of first slice
|
30
|
-
lastpos = 0, -- position of last slice
|
31
|
-
allow_attributes = true, -- allow parsing of attributes
|
32
|
-
attribute_parser = nil, -- attribute parser
|
33
|
-
attribute_start = nil, -- start of potential attribute
|
34
|
-
attribute_slices = nil, -- slices we've tried to parse as attributes
|
35
|
-
}
|
36
|
-
setmetatable(state, self)
|
37
|
-
self.__index = self
|
38
|
-
return state
|
39
|
-
end
|
40
|
-
|
41
|
-
function Parser:add_match(startpos, endpos, annotation)
|
42
|
-
self.matches[startpos] = make_match(startpos, endpos, annotation)
|
43
|
-
end
|
44
|
-
|
45
|
-
function Parser:add_opener(name, ...)
|
46
|
-
-- 1 = startpos, 2 = endpos, 3 = annotation, 4 = substartpos, 5 = endpos
|
47
|
-
if not self.openers[name] then
|
48
|
-
self.openers[name] = {}
|
49
|
-
end
|
50
|
-
table.insert(self.openers[name], {...})
|
51
|
-
end
|
52
|
-
|
53
|
-
function Parser:clear_openers(startpos, endpos)
|
54
|
-
-- remove other openers in between the matches
|
55
|
-
for _,v in pairs(self.openers) do
|
56
|
-
local i = #v
|
57
|
-
while v[i] do
|
58
|
-
local sp,ep,_,sp2,ep2 = unpack(v[i])
|
59
|
-
if sp >= startpos and ep <= endpos then
|
60
|
-
v[i] = nil
|
61
|
-
elseif (sp2 and sp2 >= startpos) and (ep2 and ep2 <= endpos) then
|
62
|
-
v[i][3] = nil
|
63
|
-
v[i][4] = nil
|
64
|
-
v[i][5] = nil
|
65
|
-
else
|
66
|
-
break
|
67
|
-
end
|
68
|
-
i = i - 1
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
function Parser:str_matches(startpos, endpos)
|
74
|
-
for i = startpos, endpos do
|
75
|
-
local m = self.matches[i]
|
76
|
-
if m then
|
77
|
-
local sp, ep, annot = unpack_match(m)
|
78
|
-
if annot ~= "str" and annot ~= "escape" then
|
79
|
-
self.matches[i] = make_match(sp, ep, "str")
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
function Parser.between_matched(c, annotation, defaultmatch, opentest)
|
86
|
-
return function(self, pos)
|
87
|
-
local defaultmatch = defaultmatch or "str"
|
88
|
-
local subject = self.subject
|
89
|
-
local can_open = find(subject, "^%S", pos + 1)
|
90
|
-
local can_close = find(subject, "^%S", pos - 1)
|
91
|
-
local has_open_marker = matches_pattern(self.matches[pos - 1], "^open%_marker")
|
92
|
-
local has_close_marker = byte(subject, pos + 1) == 125 -- }
|
93
|
-
local endcloser = pos
|
94
|
-
local startopener = pos
|
95
|
-
|
96
|
-
if type(opentest) == "function" then
|
97
|
-
can_open = can_open and opentest(self, pos)
|
98
|
-
end
|
99
|
-
|
100
|
-
-- allow explicit open/close markers to override:
|
101
|
-
if has_open_marker then
|
102
|
-
can_open = true
|
103
|
-
can_close = false
|
104
|
-
startopener = pos - 1
|
105
|
-
end
|
106
|
-
if not has_open_marker and has_close_marker then
|
107
|
-
can_close = true
|
108
|
-
can_open = false
|
109
|
-
endcloser = pos + 1
|
110
|
-
end
|
111
|
-
|
112
|
-
if has_open_marker and defaultmatch:match("^right") then
|
113
|
-
defaultmatch = defaultmatch:gsub("^right", "left")
|
114
|
-
elseif has_close_marker and defaultmatch:match("^left") then
|
115
|
-
defaultmatch = defaultmatch:gsub("^left", "right")
|
116
|
-
end
|
117
|
-
|
118
|
-
local openers = self.openers[c]
|
119
|
-
local matched = false
|
120
|
-
if can_close and openers and #openers > 0 then
|
121
|
-
-- check openers for a match
|
122
|
-
local openpos, openposend = unpack(openers[#openers])
|
123
|
-
if openposend ~= pos - 1 then -- exclude empty emph
|
124
|
-
self:clear_openers(openpos, pos)
|
125
|
-
self:add_match(openpos, openposend, "+" .. annotation)
|
126
|
-
self:add_match(pos, endcloser, "-" .. annotation)
|
127
|
-
return endcloser + 1
|
128
|
-
end
|
129
|
-
end
|
130
|
-
-- if we get here, we didn't match an opener
|
131
|
-
if can_open then
|
132
|
-
self:add_opener(c, startopener, pos)
|
133
|
-
self:add_match(startopener, pos, defaultmatch)
|
134
|
-
return pos + 1
|
135
|
-
else
|
136
|
-
self:add_match(pos, endcloser, defaultmatch)
|
137
|
-
return endcloser + 1
|
138
|
-
end
|
139
|
-
end
|
140
|
-
end
|
141
|
-
|
142
|
-
Parser.matchers = {
|
143
|
-
-- 96 = `
|
144
|
-
[96] = function(self, pos, endpos)
|
145
|
-
local subject = self.subject
|
146
|
-
local _, endchar = bounded_find(subject, "^`*", pos, endpos)
|
147
|
-
if not endchar then
|
148
|
-
return nil
|
149
|
-
end
|
150
|
-
if find(subject, "^%$%$", pos - 2) then
|
151
|
-
self.matches[pos - 2] = nil
|
152
|
-
self.matches[pos - 1] = nil
|
153
|
-
self:add_match(pos - 2, endchar, "+display_math")
|
154
|
-
self.verbatim_type = "display_math"
|
155
|
-
elseif find(subject, "^%$", pos - 1) then
|
156
|
-
self.matches[pos - 1] = nil
|
157
|
-
self:add_match(pos - 1, endchar, "+inline_math")
|
158
|
-
self.verbatim_type = "inline_math"
|
159
|
-
else
|
160
|
-
self:add_match(pos, endchar, "+verbatim")
|
161
|
-
self.verbatim_type = "verbatim"
|
162
|
-
end
|
163
|
-
self.verbatim = endchar - pos + 1
|
164
|
-
return endchar + 1
|
165
|
-
end,
|
166
|
-
|
167
|
-
-- 92 = \
|
168
|
-
[92] = function(self, pos, endpos)
|
169
|
-
local subject = self.subject
|
170
|
-
local _, endchar = bounded_find(subject, "^[ \t]*\r?\n", pos + 1, endpos)
|
171
|
-
self:add_match(pos, pos, "escape")
|
172
|
-
if endchar then
|
173
|
-
-- see if there were preceding spaces
|
174
|
-
if #self.matches > 0 then
|
175
|
-
local sp, ep, annot = unpack_match(self.matches[#self.matches])
|
176
|
-
if annot == "str" then
|
177
|
-
while subject:byte(ep) == 32 or subject:byte(ep) == 9 do
|
178
|
-
ep = ep -1
|
179
|
-
end
|
180
|
-
if sp == ep then
|
181
|
-
self.matches[#self.matches] = nil
|
182
|
-
else
|
183
|
-
self:add_match(sp, ep, "str")
|
184
|
-
end
|
185
|
-
end
|
186
|
-
end
|
187
|
-
self:add_match(pos + 1, endchar, "hardbreak")
|
188
|
-
return endchar + 1
|
189
|
-
else
|
190
|
-
local _, ec = bounded_find(subject, "^[%p ]", pos + 1, endpos)
|
191
|
-
if not ec then
|
192
|
-
self:add_match(pos, pos, "str")
|
193
|
-
return pos + 1
|
194
|
-
else
|
195
|
-
self:add_match(pos, pos, "escape")
|
196
|
-
if find(subject, "^ ", pos + 1) then
|
197
|
-
self:add_match(pos + 1, ec, "nbsp")
|
198
|
-
else
|
199
|
-
self:add_match(pos + 1, ec, "str")
|
200
|
-
end
|
201
|
-
return ec + 1
|
202
|
-
end
|
203
|
-
end
|
204
|
-
end,
|
205
|
-
|
206
|
-
-- 60 = <
|
207
|
-
[60] = function(self, pos, endpos)
|
208
|
-
local subject = self.subject
|
209
|
-
local starturl, endurl =
|
210
|
-
bounded_find(subject, "^%<[^<>%s]+%>", pos, endpos)
|
211
|
-
if starturl then
|
212
|
-
local is_url = bounded_find(subject, "^%a+:", pos + 1, endurl)
|
213
|
-
local is_email = bounded_find(subject, "^[^:]+%@", pos + 1, endurl)
|
214
|
-
if is_email then
|
215
|
-
self:add_match(starturl, starturl, "+email")
|
216
|
-
self:add_match(starturl + 1, endurl - 1, "str")
|
217
|
-
self:add_match(endurl, endurl, "-email")
|
218
|
-
return endurl + 1
|
219
|
-
elseif is_url then
|
220
|
-
self:add_match(starturl, starturl, "+url")
|
221
|
-
self:add_match(starturl + 1, endurl - 1, "str")
|
222
|
-
self:add_match(endurl, endurl, "-url")
|
223
|
-
return endurl + 1
|
224
|
-
end
|
225
|
-
end
|
226
|
-
end,
|
227
|
-
|
228
|
-
-- 126 = ~
|
229
|
-
[126] = Parser.between_matched('~', 'subscript'),
|
230
|
-
|
231
|
-
-- 94 = ^
|
232
|
-
[94] = Parser.between_matched('^', 'superscript'),
|
233
|
-
|
234
|
-
-- 91 = [
|
235
|
-
[91] = function(self, pos, endpos)
|
236
|
-
local sp, ep = bounded_find(self.subject, "^%^([^]]+)%]", pos + 1, endpos)
|
237
|
-
if sp then -- footnote ref
|
238
|
-
self:add_match(pos, ep, "footnote_reference")
|
239
|
-
return ep + 1
|
240
|
-
else
|
241
|
-
self:add_opener("[", pos, pos)
|
242
|
-
self:add_match(pos, pos, "str")
|
243
|
-
return pos + 1
|
244
|
-
end
|
245
|
-
end,
|
246
|
-
|
247
|
-
-- 93 = ]
|
248
|
-
[93] = function(self, pos, endpos)
|
249
|
-
local openers = self.openers["["]
|
250
|
-
local subject = self.subject
|
251
|
-
if openers and #openers > 0 then
|
252
|
-
local opener = openers[#openers]
|
253
|
-
if opener[3] == "reference_link" then
|
254
|
-
-- found a reference link
|
255
|
-
-- add the matches
|
256
|
-
local subject = self.subject
|
257
|
-
local is_image = bounded_find(subject, "^!", opener[1] - 1, endpos)
|
258
|
-
and not bounded_find(subject, "^[\\]", opener[1] - 2, endpos)
|
259
|
-
if is_image then
|
260
|
-
self:add_match(opener[1] - 1, opener[1] - 1, "image_marker")
|
261
|
-
self:add_match(opener[1], opener[2], "+imagetext")
|
262
|
-
self:add_match(opener[4], opener[5], "-imagetext")
|
263
|
-
else
|
264
|
-
self:add_match(opener[1], opener[2], "+linktext")
|
265
|
-
self:add_match(opener[4], opener[5], "-linktext")
|
266
|
-
end
|
267
|
-
self:add_match(opener[5], opener[5], "+reference")
|
268
|
-
self:add_match(pos, pos, "-reference")
|
269
|
-
-- convert all matches to str
|
270
|
-
self:str_matches(opener[5] + 1, pos - 1)
|
271
|
-
-- remove from openers
|
272
|
-
self:clear_openers(opener[1], pos)
|
273
|
-
return pos + 1
|
274
|
-
elseif bounded_find(subject, "^%[", pos + 1, endpos) then
|
275
|
-
opener[3] = "reference_link"
|
276
|
-
opener[4] = pos -- intermediate ]
|
277
|
-
opener[5] = pos + 1 -- intermediate [
|
278
|
-
self:add_match(pos, pos + 1, "str")
|
279
|
-
return pos + 2
|
280
|
-
elseif bounded_find(subject, "^%(", pos + 1, endpos) then
|
281
|
-
self.openers["("] = {} -- clear ( openers
|
282
|
-
opener[3] = "explicit_link"
|
283
|
-
opener[4] = pos -- intermediate ]
|
284
|
-
opener[5] = pos + 1 -- intermediate (
|
285
|
-
self.destination = true
|
286
|
-
self:add_match(pos, pos + 1, "str")
|
287
|
-
return pos + 2
|
288
|
-
elseif bounded_find(subject, "^%{", pos + 1, endpos) then
|
289
|
-
-- assume this is attributes, bracketed span
|
290
|
-
self:add_match(opener[1], opener[2], "+span")
|
291
|
-
self:add_match(pos, pos, "-span")
|
292
|
-
self:clear_openers(opener[1], pos)
|
293
|
-
return pos + 1
|
294
|
-
end
|
295
|
-
end
|
296
|
-
end,
|
297
|
-
|
298
|
-
|
299
|
-
-- 40 = (
|
300
|
-
[40] = function(self, pos)
|
301
|
-
if not self.destination then return nil end
|
302
|
-
self:add_opener("(", pos, pos)
|
303
|
-
self:add_match(pos, pos, "str")
|
304
|
-
return pos + 1
|
305
|
-
end,
|
306
|
-
|
307
|
-
-- 41 = )
|
308
|
-
[41] = function(self, pos, endpos)
|
309
|
-
if not self.destination then return nil end
|
310
|
-
local parens = self.openers["("]
|
311
|
-
if parens and #parens > 0 and parens[#parens][1] then
|
312
|
-
parens[#parens] = nil -- clear opener
|
313
|
-
self:add_match(pos, pos, "str")
|
314
|
-
return pos + 1
|
315
|
-
else
|
316
|
-
local subject = self.subject
|
317
|
-
local openers = self.openers["["]
|
318
|
-
if openers and #openers > 0
|
319
|
-
and openers[#openers][3] == "explicit_link" then
|
320
|
-
local opener = openers[#openers]
|
321
|
-
local startdest, enddest = opener[5], pos
|
322
|
-
-- we have inline link
|
323
|
-
local is_image = bounded_find(subject, "^!", opener[1] - 1, endpos)
|
324
|
-
and not bounded_find(subject, "^[\\]", opener[1] - 2, endpos)
|
325
|
-
if is_image then
|
326
|
-
self:add_match(opener[1] - 1, opener[1] - 1, "image_marker")
|
327
|
-
self:add_match(opener[1], opener[2], "+imagetext")
|
328
|
-
self:add_match(opener[4], opener[4], "-imagetext")
|
329
|
-
else
|
330
|
-
self:add_match(opener[1], opener[2], "+linktext")
|
331
|
-
self:add_match(opener[4], opener[4], "-linktext")
|
332
|
-
end
|
333
|
-
self:add_match(startdest, startdest, "+destination")
|
334
|
-
self:add_match(enddest, enddest, "-destination")
|
335
|
-
self.destination = false
|
336
|
-
-- convert all matches to str
|
337
|
-
self:str_matches(opener[5] + 1, pos - 1)
|
338
|
-
-- remove from openers
|
339
|
-
self:clear_openers(opener[2], pos)
|
340
|
-
return enddest + 1
|
341
|
-
end
|
342
|
-
end
|
343
|
-
end,
|
344
|
-
|
345
|
-
-- 95 = _
|
346
|
-
[95] = Parser.between_matched('_', 'emph'),
|
347
|
-
|
348
|
-
-- 42 = *
|
349
|
-
[42] = Parser.between_matched('*', 'strong'),
|
350
|
-
|
351
|
-
-- 123 = {
|
352
|
-
[123] = function(self, pos, endpos)
|
353
|
-
if bounded_find(self.subject, "^[_*~^+='\"-]", pos + 1, endpos) then
|
354
|
-
self:add_match(pos, pos, "open_marker")
|
355
|
-
return pos + 1
|
356
|
-
elseif self.allow_attributes then
|
357
|
-
self.attribute_parser = attributes.AttributeParser:new(self.subject)
|
358
|
-
self.attribute_start = pos
|
359
|
-
self.attribute_slices = {}
|
360
|
-
return pos
|
361
|
-
else
|
362
|
-
self:add_match(pos, pos, "str")
|
363
|
-
return pos + 1
|
364
|
-
end
|
365
|
-
end,
|
366
|
-
|
367
|
-
-- 58 = :
|
368
|
-
[58] = function(self, pos, endpos)
|
369
|
-
local sp, ep = bounded_find(self.subject, "^%:[%w_+-]+%:", pos, endpos)
|
370
|
-
if sp then
|
371
|
-
self:add_match(sp, ep, "emoji")
|
372
|
-
return ep + 1
|
373
|
-
else
|
374
|
-
self:add_match(pos, pos, "str")
|
375
|
-
return pos + 1
|
376
|
-
end
|
377
|
-
end,
|
378
|
-
|
379
|
-
-- 43 = +
|
380
|
-
[43] = Parser.between_matched("+", "insert", "str",
|
381
|
-
function(self, pos)
|
382
|
-
return find(self.subject, "^%{", pos - 1) or
|
383
|
-
find(self.subject, "^%}", pos + 1)
|
384
|
-
end),
|
385
|
-
|
386
|
-
-- 61 = =
|
387
|
-
[61] = Parser.between_matched("=", "mark", "str",
|
388
|
-
function(self, pos)
|
389
|
-
return find(self.subject, "^%{", pos - 1) or
|
390
|
-
find(self.subject, "^%}", pos + 1)
|
391
|
-
end),
|
392
|
-
|
393
|
-
-- 39 = '
|
394
|
-
[39] = Parser.between_matched("'", "single_quoted", "right_single_quote",
|
395
|
-
function(self, pos) -- test to open
|
396
|
-
return pos == 1 or
|
397
|
-
find(self.subject, "^[%s\"'-([]", pos - 1)
|
398
|
-
end),
|
399
|
-
|
400
|
-
-- 34 = "
|
401
|
-
[34] = Parser.between_matched('"', "double_quoted", "left_double_quote"),
|
402
|
-
|
403
|
-
-- 45 = -
|
404
|
-
[45] = function(self, pos, endpos)
|
405
|
-
local subject = self.subject
|
406
|
-
local _, ep = find(subject, "^%-*", pos)
|
407
|
-
local hyphens
|
408
|
-
if endpos < ep then
|
409
|
-
hyphens = 1 + endpos - pos
|
410
|
-
else
|
411
|
-
hyphens = 1 + ep - pos
|
412
|
-
end
|
413
|
-
if byte(subject, ep + 1) == 125 then -- }
|
414
|
-
hyphens = hyphens - 1 -- last hyphen is close del
|
415
|
-
end
|
416
|
-
if byte(subject, pos - 1) == 123 or byte(subject, pos + 1) == 125 then
|
417
|
-
return Parser.between_matched("-", "delete")(self, pos, endpos)
|
418
|
-
end
|
419
|
-
-- Try to construct a homogeneous sequence of dashes
|
420
|
-
local all_em = hyphens % 3 == 0
|
421
|
-
local all_en = hyphens % 2 == 0
|
422
|
-
while hyphens > 0 do
|
423
|
-
if all_em then
|
424
|
-
self:add_match(pos, pos + 2, "em_dash")
|
425
|
-
pos = pos + 3
|
426
|
-
hyphens = hyphens - 3
|
427
|
-
elseif all_en then
|
428
|
-
self:add_match(pos, pos + 1, "en_dash")
|
429
|
-
pos = pos + 2
|
430
|
-
hyphens = hyphens - 2
|
431
|
-
elseif hyphens >= 3 and (hyphens % 2 ~= 0 or hyphens > 4) then
|
432
|
-
self:add_match(pos, pos + 2, "em_dash")
|
433
|
-
pos = pos + 3
|
434
|
-
hyphens = hyphens - 3
|
435
|
-
elseif hyphens >= 2 then
|
436
|
-
self:add_match(pos, pos + 1, "en_dash")
|
437
|
-
pos = pos + 2
|
438
|
-
hyphens = hyphens - 2
|
439
|
-
else
|
440
|
-
self:add_match(pos, pos, "str")
|
441
|
-
pos = pos + 1
|
442
|
-
hyphens = hyphens - 1
|
443
|
-
end
|
444
|
-
end
|
445
|
-
return pos
|
446
|
-
end,
|
447
|
-
|
448
|
-
-- 46 = .
|
449
|
-
[46] = function(self, pos, endpos)
|
450
|
-
if bounded_find(self.subject, "^%.%.", pos + 1, endpos) then
|
451
|
-
self:add_match(pos, pos +2, "ellipses")
|
452
|
-
return pos + 3
|
453
|
-
end
|
454
|
-
end
|
455
|
-
}
|
456
|
-
|
457
|
-
function Parser:single_char(pos)
|
458
|
-
self:add_match(pos, pos, "str")
|
459
|
-
return pos + 1
|
460
|
-
end
|
461
|
-
|
462
|
-
-- Feed a slice to the parser, updating state.
|
463
|
-
function Parser:feed(spos, endpos)
|
464
|
-
local special = "[][\\`{}_*()!<>~^:=+$\r\n'\".-]"
|
465
|
-
local subject = self.subject
|
466
|
-
local matchers = self.matchers
|
467
|
-
local pos
|
468
|
-
if self.firstpos == 0 or spos < self.firstpos then
|
469
|
-
self.firstpos = spos
|
470
|
-
end
|
471
|
-
if self.lastpos == 0 or endpos > self.lastpos then
|
472
|
-
self.lastpos = endpos
|
473
|
-
end
|
474
|
-
pos = spos
|
475
|
-
while pos <= endpos do
|
476
|
-
if self.attribute_parser then
|
477
|
-
local sp = pos
|
478
|
-
local ep2 = bounded_find(subject, special, pos, endpos) or endpos
|
479
|
-
local status, ep = self.attribute_parser:feed(sp, ep2)
|
480
|
-
if status == "done" then
|
481
|
-
local attribute_start = self.attribute_start
|
482
|
-
-- add attribute matches
|
483
|
-
self:add_match(attribute_start, attribute_start, "+attributes")
|
484
|
-
self:add_match(ep, ep, "-attributes")
|
485
|
-
local attr_matches = self.attribute_parser:get_matches()
|
486
|
-
-- add attribute matches
|
487
|
-
for i=1,#attr_matches do
|
488
|
-
self:add_match(unpack_match(attr_matches[i]))
|
489
|
-
end
|
490
|
-
-- restore state to prior to adding attribute parser:
|
491
|
-
self.attribute_parser = nil
|
492
|
-
self.attribute_start = nil
|
493
|
-
self.attribute_slices = nil
|
494
|
-
pos = ep + 1
|
495
|
-
elseif status == "fail" then
|
496
|
-
-- backtrack:
|
497
|
-
local slices = self.attribute_slices
|
498
|
-
self.allow_attributes = false
|
499
|
-
self.attribute_parser = nil
|
500
|
-
self.attribute_start = nil
|
501
|
-
for i=1,#slices do
|
502
|
-
self:feed(unpack(slices[i]))
|
503
|
-
end
|
504
|
-
self.allow_attributes = true
|
505
|
-
self.slices = nil
|
506
|
-
pos = sp
|
507
|
-
elseif status == "continue" then
|
508
|
-
self.attribute_slices[#self.attribute_slices + 1] = {sp,ep}
|
509
|
-
pos = ep + 1
|
510
|
-
end
|
511
|
-
else
|
512
|
-
-- find next interesting character:
|
513
|
-
local newpos = bounded_find(subject, special, pos, endpos) or endpos + 1
|
514
|
-
if newpos > pos then
|
515
|
-
self:add_match(pos, newpos - 1, "str")
|
516
|
-
pos = newpos
|
517
|
-
if pos > endpos then
|
518
|
-
break -- otherwise, fall through:
|
519
|
-
end
|
520
|
-
end
|
521
|
-
-- if we get here, then newpos = pos,
|
522
|
-
-- i.e. we have something interesting at pos
|
523
|
-
local c = byte(subject, pos)
|
524
|
-
|
525
|
-
if c == 13 or c == 10 then -- cr or lf
|
526
|
-
if c == 13 and bounded_find(subject, "^[%n]", pos + 1, endpos) then
|
527
|
-
self:add_match(pos, pos + 1, "softbreak")
|
528
|
-
pos = pos + 2
|
529
|
-
else
|
530
|
-
self:add_match(pos, pos, "softbreak")
|
531
|
-
pos = pos + 1
|
532
|
-
end
|
533
|
-
elseif self.verbatim > 0 then
|
534
|
-
if c == 96 then
|
535
|
-
local _, endchar = bounded_find(subject, "^`+", pos, endpos)
|
536
|
-
if endchar and endchar - pos + 1 == self.verbatim then
|
537
|
-
-- check for raw attribute
|
538
|
-
local sp, ep =
|
539
|
-
bounded_find(subject, "^%{%=[^%s{}`]+%}", endchar + 1, endpos)
|
540
|
-
if sp and self.verbatim_type == "verbatim" then -- raw
|
541
|
-
self:add_match(pos, endchar, "-" .. self.verbatim_type)
|
542
|
-
self:add_match(sp, ep, "raw_format")
|
543
|
-
pos = ep + 1
|
544
|
-
else
|
545
|
-
self:add_match(pos, endchar, "-" .. self.verbatim_type)
|
546
|
-
pos = endchar + 1
|
547
|
-
end
|
548
|
-
self.verbatim = 0
|
549
|
-
self.verbatim_type = nil
|
550
|
-
else
|
551
|
-
endchar = endchar or endpos
|
552
|
-
self:add_match(pos, endchar, "str")
|
553
|
-
pos = endchar + 1
|
554
|
-
end
|
555
|
-
else
|
556
|
-
self:add_match(pos, pos, "str")
|
557
|
-
pos = pos + 1
|
558
|
-
end
|
559
|
-
else
|
560
|
-
pos = (matchers[c] and matchers[c](self, pos, endpos))
|
561
|
-
or self:single_char(pos)
|
562
|
-
end
|
563
|
-
end
|
564
|
-
end
|
565
|
-
end
|
566
|
-
|
567
|
-
-- Return true if we're parsing verbatim content.
|
568
|
-
function Parser:in_verbatim()
|
569
|
-
return self.verbatim > 0
|
570
|
-
end
|
571
|
-
|
572
|
-
-- Return parse results and any warnings.
|
573
|
-
function Parser:get_matches()
|
574
|
-
local sorted = {}
|
575
|
-
local subject = self.subject
|
576
|
-
local lastsp, lastep, lastannot
|
577
|
-
for i=self.firstpos, self.lastpos do
|
578
|
-
if self.matches[i] then
|
579
|
-
local sp, ep, annot = unpack_match(self.matches[i])
|
580
|
-
if annot == "str" and lastannot == "str" and lastep + 1 == sp then
|
581
|
-
-- consolidate adjacent strs
|
582
|
-
sorted[#sorted] = make_match(lastsp, ep, annot)
|
583
|
-
lastsp, lastep, lastannot = lastsp, ep, annot
|
584
|
-
else
|
585
|
-
sorted[#sorted + 1] = self.matches[i]
|
586
|
-
lastsp, lastep, lastannot = sp, ep, annot
|
587
|
-
end
|
588
|
-
end
|
589
|
-
end
|
590
|
-
if #sorted > 0 then
|
591
|
-
local last = sorted[#sorted]
|
592
|
-
local startpos, endpos, annot = unpack_match(last)
|
593
|
-
-- remove final softbreak
|
594
|
-
if annot == "softbreak" then
|
595
|
-
sorted[#sorted] = nil
|
596
|
-
last = sorted[#sorted]
|
597
|
-
startpos, endpos, annot = unpack_match(last)
|
598
|
-
end
|
599
|
-
-- remove trailing spaces
|
600
|
-
if annot == "str" and byte(subject, endpos) == 32 then
|
601
|
-
while endpos > startpos and byte(subject, endpos) == 32 do
|
602
|
-
endpos = endpos - 1
|
603
|
-
end
|
604
|
-
sorted[#sorted] = make_match(startpos, endpos, annot)
|
605
|
-
end
|
606
|
-
if self.verbatim > 0 then -- unclosed verbatim
|
607
|
-
self.warnings[#self.warnings + 1] =
|
608
|
-
{startpos, "Unclosed verbatim"}
|
609
|
-
sorted[#sorted + 1] = make_match(startpos, endpos,
|
610
|
-
"-" .. self.verbatim_type)
|
611
|
-
end
|
612
|
-
end
|
613
|
-
return sorted, self.warnings
|
614
|
-
end
|
615
|
-
|
616
|
-
return { Parser = Parser }
|
617
|
-
|
618
|
-
|
619
|
-
--[[
|
620
|
-
Copyright (C) 2022 John MacFarlane
|
621
|
-
|
622
|
-
Permission is hereby granted, free of charge, to any person obtaining
|
623
|
-
a copy of this software and associated documentation files (the
|
624
|
-
"Software"), to deal in the Software without restriction, including
|
625
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
626
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
627
|
-
permit persons to whom the Software is furnished to do so, subject to
|
628
|
-
the following conditions:
|
629
|
-
|
630
|
-
The above copyright notice and this permission notice shall be included
|
631
|
-
in all copies or substantial portions of the Software.
|
632
|
-
|
633
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
634
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
635
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
636
|
-
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
637
|
-
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
638
|
-
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
639
|
-
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
640
|
-
|
641
|
-
]]
|