djot 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +20 -0
- data/CHANGELOG.md +7 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +13 -0
- data/LICENSE.txt +21 -0
- data/README.md +54 -0
- data/Rakefile +40 -0
- data/Steepfile +10 -0
- data/djot.gemspec +33 -0
- data/lib/djot/version.rb +5 -0
- data/lib/djot.rb +42 -0
- data/lib/lua/djot/ast.lua +642 -0
- data/lib/lua/djot/attributes.lua +273 -0
- data/lib/lua/djot/block.lua +807 -0
- data/lib/lua/djot/emoji.lua +1880 -0
- data/lib/lua/djot/html.lua +557 -0
- data/lib/lua/djot/inline.lua +641 -0
- data/lib/lua/djot/match.lua +75 -0
- data/lib/lua/djot.lua +107 -0
- data/sig/djot.rbs +6 -0
- metadata +81 -0
@@ -0,0 +1,641 @@
|
|
1
|
+
-- this allows the code to work with both lua and luajit:
|
2
|
+
local unpack = unpack or table.unpack
|
3
|
+
local match = require("djot.match")
|
4
|
+
local attributes = require("djot.attributes")
|
5
|
+
local make_match, unpack_match, matches_pattern =
|
6
|
+
match.make_match, match.unpack_match, match.matches_pattern
|
7
|
+
local find, byte = string.find, string.byte
|
8
|
+
|
9
|
+
-- allow up to 3 captures...
|
10
|
+
local function bounded_find(subj, patt, startpos, endpos)
|
11
|
+
local sp,ep,c1,c2,c3 = find(subj, patt, startpos)
|
12
|
+
if ep and ep <= endpos then
|
13
|
+
return sp,ep,c1,c2,c3
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
local Parser = {}
|
18
|
+
|
19
|
+
function Parser:new(subject, opts)
|
20
|
+
local state =
|
21
|
+
{ opts = opts or {}, -- options
|
22
|
+
subject = subject,
|
23
|
+
matches = {}, -- table pos : (endpos, annotation)
|
24
|
+
warnings = {}, -- array of {pos, string} arrays
|
25
|
+
openers = {}, -- map from closer_type to array of (pos, data) in reverse order
|
26
|
+
verbatim = 0, -- parsing verbatim span to be ended by n backticks
|
27
|
+
verbatim_type = nil, -- whether verbatim is math or regular
|
28
|
+
destination = false, -- parsing link destination in ()
|
29
|
+
firstpos = 0, -- position of first slice
|
30
|
+
lastpos = 0, -- position of last slice
|
31
|
+
allow_attributes = true, -- allow parsing of attributes
|
32
|
+
attribute_parser = nil, -- attribute parser
|
33
|
+
attribute_start = nil, -- start of potential attribute
|
34
|
+
attribute_slices = nil, -- slices we've tried to parse as attributes
|
35
|
+
}
|
36
|
+
setmetatable(state, self)
|
37
|
+
self.__index = self
|
38
|
+
return state
|
39
|
+
end
|
40
|
+
|
41
|
+
function Parser:add_match(startpos, endpos, annotation)
|
42
|
+
self.matches[startpos] = make_match(startpos, endpos, annotation)
|
43
|
+
end
|
44
|
+
|
45
|
+
function Parser:add_opener(name, ...)
|
46
|
+
-- 1 = startpos, 2 = endpos, 3 = annotation, 4 = substartpos, 5 = endpos
|
47
|
+
if not self.openers[name] then
|
48
|
+
self.openers[name] = {}
|
49
|
+
end
|
50
|
+
table.insert(self.openers[name], {...})
|
51
|
+
end
|
52
|
+
|
53
|
+
function Parser:clear_openers(startpos, endpos)
|
54
|
+
-- remove other openers in between the matches
|
55
|
+
for _,v in pairs(self.openers) do
|
56
|
+
local i = #v
|
57
|
+
while v[i] do
|
58
|
+
local sp,ep,_,sp2,ep2 = unpack(v[i])
|
59
|
+
if sp >= startpos and ep <= endpos then
|
60
|
+
v[i] = nil
|
61
|
+
elseif (sp2 and sp2 >= startpos) and (ep2 and ep2 <= endpos) then
|
62
|
+
v[i][3] = nil
|
63
|
+
v[i][4] = nil
|
64
|
+
v[i][5] = nil
|
65
|
+
else
|
66
|
+
break
|
67
|
+
end
|
68
|
+
i = i - 1
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
function Parser:str_matches(startpos, endpos)
|
74
|
+
for i = startpos, endpos do
|
75
|
+
local m = self.matches[i]
|
76
|
+
if m then
|
77
|
+
local sp, ep, annot = unpack_match(m)
|
78
|
+
if annot ~= "str" and annot ~= "escape" then
|
79
|
+
self.matches[i] = make_match(sp, ep, "str")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
function Parser.between_matched(c, annotation, defaultmatch, opentest)
|
86
|
+
return function(self, pos)
|
87
|
+
local defaultmatch = defaultmatch or "str"
|
88
|
+
local subject = self.subject
|
89
|
+
local can_open = find(subject, "^%S", pos + 1)
|
90
|
+
local can_close = find(subject, "^%S", pos - 1)
|
91
|
+
local has_open_marker = matches_pattern(self.matches[pos - 1], "^open%_marker")
|
92
|
+
local has_close_marker = byte(subject, pos + 1) == 125 -- }
|
93
|
+
local endcloser = pos
|
94
|
+
local startopener = pos
|
95
|
+
|
96
|
+
if type(opentest) == "function" then
|
97
|
+
can_open = can_open and opentest(self, pos)
|
98
|
+
end
|
99
|
+
|
100
|
+
-- allow explicit open/close markers to override:
|
101
|
+
if has_open_marker then
|
102
|
+
can_open = true
|
103
|
+
can_close = false
|
104
|
+
startopener = pos - 1
|
105
|
+
end
|
106
|
+
if not has_open_marker and has_close_marker then
|
107
|
+
can_close = true
|
108
|
+
can_open = false
|
109
|
+
endcloser = pos + 1
|
110
|
+
end
|
111
|
+
|
112
|
+
if has_open_marker and defaultmatch:match("^right") then
|
113
|
+
defaultmatch = defaultmatch:gsub("^right", "left")
|
114
|
+
elseif has_close_marker and defaultmatch:match("^left") then
|
115
|
+
defaultmatch = defaultmatch:gsub("^left", "right")
|
116
|
+
end
|
117
|
+
|
118
|
+
local openers = self.openers[c]
|
119
|
+
local matched = false
|
120
|
+
if can_close and openers and #openers > 0 then
|
121
|
+
-- check openers for a match
|
122
|
+
local openpos, openposend = unpack(openers[#openers])
|
123
|
+
if openposend ~= pos - 1 then -- exclude empty emph
|
124
|
+
self:clear_openers(openpos, pos)
|
125
|
+
self:add_match(openpos, openposend, "+" .. annotation)
|
126
|
+
self:add_match(pos, endcloser, "-" .. annotation)
|
127
|
+
return endcloser + 1
|
128
|
+
end
|
129
|
+
end
|
130
|
+
-- if we get here, we didn't match an opener
|
131
|
+
if can_open then
|
132
|
+
self:add_opener(c, startopener, pos)
|
133
|
+
self:add_match(startopener, pos, defaultmatch)
|
134
|
+
return pos + 1
|
135
|
+
else
|
136
|
+
self:add_match(pos, endcloser, defaultmatch)
|
137
|
+
return endcloser + 1
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
Parser.matchers = {
|
143
|
+
-- 96 = `
|
144
|
+
[96] = function(self, pos, endpos)
|
145
|
+
local subject = self.subject
|
146
|
+
local _, endchar = bounded_find(subject, "^`*", pos, endpos)
|
147
|
+
if not endchar then
|
148
|
+
return nil
|
149
|
+
end
|
150
|
+
if find(subject, "^%$%$", pos - 2) then
|
151
|
+
self.matches[pos - 2] = nil
|
152
|
+
self.matches[pos - 1] = nil
|
153
|
+
self:add_match(pos - 2, endchar, "+display_math")
|
154
|
+
self.verbatim_type = "display_math"
|
155
|
+
elseif find(subject, "^%$", pos - 1) then
|
156
|
+
self.matches[pos - 1] = nil
|
157
|
+
self:add_match(pos - 1, endchar, "+inline_math")
|
158
|
+
self.verbatim_type = "inline_math"
|
159
|
+
else
|
160
|
+
self:add_match(pos, endchar, "+verbatim")
|
161
|
+
self.verbatim_type = "verbatim"
|
162
|
+
end
|
163
|
+
self.verbatim = endchar - pos + 1
|
164
|
+
return endchar + 1
|
165
|
+
end,
|
166
|
+
|
167
|
+
-- 92 = \
|
168
|
+
[92] = function(self, pos, endpos)
|
169
|
+
local subject = self.subject
|
170
|
+
local _, endchar = bounded_find(subject, "^[ \t]*\r?\n", pos + 1, endpos)
|
171
|
+
self:add_match(pos, pos, "escape")
|
172
|
+
if endchar then
|
173
|
+
-- see if there were preceding spaces
|
174
|
+
if #self.matches > 0 then
|
175
|
+
local sp, ep, annot = unpack_match(self.matches[#self.matches])
|
176
|
+
if annot == "str" then
|
177
|
+
while subject:byte(ep) == 32 or subject:byte(ep) == 9 do
|
178
|
+
ep = ep -1
|
179
|
+
end
|
180
|
+
if sp == ep then
|
181
|
+
self.matches[#self.matches] = nil
|
182
|
+
else
|
183
|
+
self:add_match(sp, ep, "str")
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
self:add_match(pos + 1, endchar, "hardbreak")
|
188
|
+
return endchar + 1
|
189
|
+
else
|
190
|
+
local _, ec = bounded_find(subject, "^[%p ]", pos + 1, endpos)
|
191
|
+
if not ec then
|
192
|
+
self:add_match(pos, pos, "str")
|
193
|
+
return pos + 1
|
194
|
+
else
|
195
|
+
self:add_match(pos, pos, "escape")
|
196
|
+
if find(subject, "^ ", pos + 1) then
|
197
|
+
self:add_match(pos + 1, ec, "nbsp")
|
198
|
+
else
|
199
|
+
self:add_match(pos + 1, ec, "str")
|
200
|
+
end
|
201
|
+
return ec + 1
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end,
|
205
|
+
|
206
|
+
-- 60 = <
|
207
|
+
[60] = function(self, pos, endpos)
|
208
|
+
local subject = self.subject
|
209
|
+
local starturl, endurl =
|
210
|
+
bounded_find(subject, "^%<[^<>%s]+%>", pos, endpos)
|
211
|
+
if starturl then
|
212
|
+
local is_url = bounded_find(subject, "^%a+:", pos + 1, endurl)
|
213
|
+
local is_email = bounded_find(subject, "^[^:]+%@", pos + 1, endurl)
|
214
|
+
if is_email then
|
215
|
+
self:add_match(starturl, starturl, "+email")
|
216
|
+
self:add_match(starturl + 1, endurl - 1, "str")
|
217
|
+
self:add_match(endurl, endurl, "-email")
|
218
|
+
return endurl + 1
|
219
|
+
elseif is_url then
|
220
|
+
self:add_match(starturl, starturl, "+url")
|
221
|
+
self:add_match(starturl + 1, endurl - 1, "str")
|
222
|
+
self:add_match(endurl, endurl, "-url")
|
223
|
+
return endurl + 1
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end,
|
227
|
+
|
228
|
+
-- 126 = ~
|
229
|
+
[126] = Parser.between_matched('~', 'subscript'),
|
230
|
+
|
231
|
+
-- 94 = ^
|
232
|
+
[94] = Parser.between_matched('^', 'superscript'),
|
233
|
+
|
234
|
+
-- 91 = [
|
235
|
+
[91] = function(self, pos, endpos)
|
236
|
+
local sp, ep = bounded_find(self.subject, "^%^([^]]+)%]", pos + 1, endpos)
|
237
|
+
if sp then -- footnote ref
|
238
|
+
self:add_match(pos, ep, "footnote_reference")
|
239
|
+
return ep + 1
|
240
|
+
else
|
241
|
+
self:add_opener("[", pos, pos)
|
242
|
+
self:add_match(pos, pos, "str")
|
243
|
+
return pos + 1
|
244
|
+
end
|
245
|
+
end,
|
246
|
+
|
247
|
+
-- 93 = ]
|
248
|
+
[93] = function(self, pos, endpos)
|
249
|
+
local openers = self.openers["["]
|
250
|
+
local subject = self.subject
|
251
|
+
if openers and #openers > 0 then
|
252
|
+
local opener = openers[#openers]
|
253
|
+
if opener[3] == "reference_link" then
|
254
|
+
-- found a reference link
|
255
|
+
-- add the matches
|
256
|
+
local subject = self.subject
|
257
|
+
local is_image = bounded_find(subject, "^!", opener[1] - 1, endpos)
|
258
|
+
and not bounded_find(subject, "^[\\]", opener[1] - 2, endpos)
|
259
|
+
if is_image then
|
260
|
+
self:add_match(opener[1] - 1, opener[1] - 1, "image_marker")
|
261
|
+
self:add_match(opener[1], opener[2], "+imagetext")
|
262
|
+
self:add_match(opener[4], opener[5], "-imagetext")
|
263
|
+
else
|
264
|
+
self:add_match(opener[1], opener[2], "+linktext")
|
265
|
+
self:add_match(opener[4], opener[5], "-linktext")
|
266
|
+
end
|
267
|
+
self:add_match(opener[5], opener[5], "+reference")
|
268
|
+
self:add_match(pos, pos, "-reference")
|
269
|
+
-- convert all matches to str
|
270
|
+
self:str_matches(opener[5] + 1, pos - 1)
|
271
|
+
-- remove from openers
|
272
|
+
self:clear_openers(opener[1], pos)
|
273
|
+
return pos + 1
|
274
|
+
elseif bounded_find(subject, "^%[", pos + 1, endpos) then
|
275
|
+
opener[3] = "reference_link"
|
276
|
+
opener[4] = pos -- intermediate ]
|
277
|
+
opener[5] = pos + 1 -- intermediate [
|
278
|
+
self:add_match(pos, pos + 1, "str")
|
279
|
+
return pos + 2
|
280
|
+
elseif bounded_find(subject, "^%(", pos + 1, endpos) then
|
281
|
+
self.openers["("] = {} -- clear ( openers
|
282
|
+
opener[3] = "explicit_link"
|
283
|
+
opener[4] = pos -- intermediate ]
|
284
|
+
opener[5] = pos + 1 -- intermediate (
|
285
|
+
self.destination = true
|
286
|
+
self:add_match(pos, pos + 1, "str")
|
287
|
+
return pos + 2
|
288
|
+
elseif bounded_find(subject, "^%{", pos + 1, endpos) then
|
289
|
+
-- assume this is attributes, bracketed span
|
290
|
+
self:add_match(opener[1], opener[2], "+span")
|
291
|
+
self:add_match(pos, pos, "-span")
|
292
|
+
self:clear_openers(opener[1], pos)
|
293
|
+
return pos + 1
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end,
|
297
|
+
|
298
|
+
|
299
|
+
-- 40 = (
|
300
|
+
[40] = function(self, pos)
|
301
|
+
if not self.destination then return nil end
|
302
|
+
self:add_opener("(", pos, pos)
|
303
|
+
self:add_match(pos, pos, "str")
|
304
|
+
return pos + 1
|
305
|
+
end,
|
306
|
+
|
307
|
+
-- 41 = )
|
308
|
+
[41] = function(self, pos, endpos)
|
309
|
+
if not self.destination then return nil end
|
310
|
+
local parens = self.openers["("]
|
311
|
+
if parens and #parens > 0 and parens[#parens][1] then
|
312
|
+
parens[#parens] = nil -- clear opener
|
313
|
+
self:add_match(pos, pos, "str")
|
314
|
+
return pos + 1
|
315
|
+
else
|
316
|
+
local subject = self.subject
|
317
|
+
local openers = self.openers["["]
|
318
|
+
if openers and #openers > 0
|
319
|
+
and openers[#openers][3] == "explicit_link" then
|
320
|
+
local opener = openers[#openers]
|
321
|
+
local startdest, enddest = opener[5], pos
|
322
|
+
-- we have inline link
|
323
|
+
local is_image = bounded_find(subject, "^!", opener[1] - 1, endpos)
|
324
|
+
and not bounded_find(subject, "^[\\]", opener[1] - 2, endpos)
|
325
|
+
if is_image then
|
326
|
+
self:add_match(opener[1] - 1, opener[1] - 1, "image_marker")
|
327
|
+
self:add_match(opener[1], opener[2], "+imagetext")
|
328
|
+
self:add_match(opener[4], opener[4], "-imagetext")
|
329
|
+
else
|
330
|
+
self:add_match(opener[1], opener[2], "+linktext")
|
331
|
+
self:add_match(opener[4], opener[4], "-linktext")
|
332
|
+
end
|
333
|
+
self:add_match(startdest, startdest, "+destination")
|
334
|
+
self:add_match(enddest, enddest, "-destination")
|
335
|
+
self.destination = false
|
336
|
+
-- convert all matches to str
|
337
|
+
self:str_matches(opener[5] + 1, pos - 1)
|
338
|
+
-- remove from openers
|
339
|
+
self:clear_openers(opener[2], pos)
|
340
|
+
return enddest + 1
|
341
|
+
end
|
342
|
+
end
|
343
|
+
end,
|
344
|
+
|
345
|
+
-- 95 = _
|
346
|
+
[95] = Parser.between_matched('_', 'emph'),
|
347
|
+
|
348
|
+
-- 42 = *
|
349
|
+
[42] = Parser.between_matched('*', 'strong'),
|
350
|
+
|
351
|
+
-- 123 = {
|
352
|
+
[123] = function(self, pos, endpos)
|
353
|
+
if bounded_find(self.subject, "^[_*~^+='\"-]", pos + 1, endpos) then
|
354
|
+
self:add_match(pos, pos, "open_marker")
|
355
|
+
return pos + 1
|
356
|
+
elseif self.allow_attributes then
|
357
|
+
self.attribute_parser = attributes.AttributeParser:new(self.subject)
|
358
|
+
self.attribute_start = pos
|
359
|
+
self.attribute_slices = {}
|
360
|
+
return pos
|
361
|
+
else
|
362
|
+
self:add_match(pos, pos, "str")
|
363
|
+
return pos + 1
|
364
|
+
end
|
365
|
+
end,
|
366
|
+
|
367
|
+
-- 58 = :
|
368
|
+
[58] = function(self, pos, endpos)
|
369
|
+
local sp, ep = bounded_find(self.subject, "^%:[%w_+-]+%:", pos, endpos)
|
370
|
+
if sp then
|
371
|
+
self:add_match(sp, ep, "emoji")
|
372
|
+
return ep + 1
|
373
|
+
else
|
374
|
+
self:add_match(pos, pos, "str")
|
375
|
+
return pos + 1
|
376
|
+
end
|
377
|
+
end,
|
378
|
+
|
379
|
+
-- 43 = +
|
380
|
+
[43] = Parser.between_matched("+", "insert", "str",
|
381
|
+
function(self, pos)
|
382
|
+
return find(self.subject, "^%{", pos - 1) or
|
383
|
+
find(self.subject, "^%}", pos + 1)
|
384
|
+
end),
|
385
|
+
|
386
|
+
-- 61 = =
|
387
|
+
[61] = Parser.between_matched("=", "mark", "str",
|
388
|
+
function(self, pos)
|
389
|
+
return find(self.subject, "^%{", pos - 1) or
|
390
|
+
find(self.subject, "^%}", pos + 1)
|
391
|
+
end),
|
392
|
+
|
393
|
+
-- 39 = '
|
394
|
+
[39] = Parser.between_matched("'", "single_quoted", "right_single_quote",
|
395
|
+
function(self, pos) -- test to open
|
396
|
+
return pos == 1 or
|
397
|
+
find(self.subject, "^[%s\"'-([]", pos - 1)
|
398
|
+
end),
|
399
|
+
|
400
|
+
-- 34 = "
|
401
|
+
[34] = Parser.between_matched('"', "double_quoted", "left_double_quote"),
|
402
|
+
|
403
|
+
-- 45 = -
|
404
|
+
[45] = function(self, pos, endpos)
|
405
|
+
local subject = self.subject
|
406
|
+
local _, ep = find(subject, "^%-*", pos)
|
407
|
+
local hyphens
|
408
|
+
if endpos < ep then
|
409
|
+
hyphens = 1 + endpos - pos
|
410
|
+
else
|
411
|
+
hyphens = 1 + ep - pos
|
412
|
+
end
|
413
|
+
if byte(subject, ep + 1) == 125 then -- }
|
414
|
+
hyphens = hyphens - 1 -- last hyphen is close del
|
415
|
+
end
|
416
|
+
if byte(subject, pos - 1) == 123 or byte(subject, pos + 1) == 125 then
|
417
|
+
return Parser.between_matched("-", "delete")(self, pos, endpos)
|
418
|
+
end
|
419
|
+
-- Try to construct a homogeneous sequence of dashes
|
420
|
+
local all_em = hyphens % 3 == 0
|
421
|
+
local all_en = hyphens % 2 == 0
|
422
|
+
while hyphens > 0 do
|
423
|
+
if all_em then
|
424
|
+
self:add_match(pos, pos + 2, "em_dash")
|
425
|
+
pos = pos + 3
|
426
|
+
hyphens = hyphens - 3
|
427
|
+
elseif all_en then
|
428
|
+
self:add_match(pos, pos + 1, "en_dash")
|
429
|
+
pos = pos + 2
|
430
|
+
hyphens = hyphens - 2
|
431
|
+
elseif hyphens >= 3 and (hyphens % 2 ~= 0 or hyphens > 4) then
|
432
|
+
self:add_match(pos, pos + 2, "em_dash")
|
433
|
+
pos = pos + 3
|
434
|
+
hyphens = hyphens - 3
|
435
|
+
elseif hyphens >= 2 then
|
436
|
+
self:add_match(pos, pos + 1, "en_dash")
|
437
|
+
pos = pos + 2
|
438
|
+
hyphens = hyphens - 2
|
439
|
+
else
|
440
|
+
self:add_match(pos, pos, "str")
|
441
|
+
pos = pos + 1
|
442
|
+
hyphens = hyphens - 1
|
443
|
+
end
|
444
|
+
end
|
445
|
+
return pos
|
446
|
+
end,
|
447
|
+
|
448
|
+
-- 46 = .
|
449
|
+
[46] = function(self, pos, endpos)
|
450
|
+
if bounded_find(self.subject, "^%.%.", pos + 1, endpos) then
|
451
|
+
self:add_match(pos, pos +2, "ellipses")
|
452
|
+
return pos + 3
|
453
|
+
end
|
454
|
+
end
|
455
|
+
}
|
456
|
+
|
457
|
+
function Parser:single_char(pos)
|
458
|
+
self:add_match(pos, pos, "str")
|
459
|
+
return pos + 1
|
460
|
+
end
|
461
|
+
|
462
|
+
-- Feed a slice to the parser, updating state.
|
463
|
+
function Parser:feed(spos, endpos)
|
464
|
+
local special = "[][\\`{}_*()!<>~^:=+$\r\n'\".-]"
|
465
|
+
local subject = self.subject
|
466
|
+
local matchers = self.matchers
|
467
|
+
local pos
|
468
|
+
if self.firstpos == 0 or spos < self.firstpos then
|
469
|
+
self.firstpos = spos
|
470
|
+
end
|
471
|
+
if self.lastpos == 0 or endpos > self.lastpos then
|
472
|
+
self.lastpos = endpos
|
473
|
+
end
|
474
|
+
pos = spos
|
475
|
+
while pos <= endpos do
|
476
|
+
if self.attribute_parser then
|
477
|
+
local sp = pos
|
478
|
+
local ep2 = bounded_find(subject, special, pos, endpos) or endpos
|
479
|
+
local status, ep = self.attribute_parser:feed(sp, ep2)
|
480
|
+
if status == "done" then
|
481
|
+
local attribute_start = self.attribute_start
|
482
|
+
-- add attribute matches
|
483
|
+
self:add_match(attribute_start, attribute_start, "+attributes")
|
484
|
+
self:add_match(ep, ep, "-attributes")
|
485
|
+
local attr_matches = self.attribute_parser:get_matches()
|
486
|
+
-- add attribute matches
|
487
|
+
for i=1,#attr_matches do
|
488
|
+
self:add_match(unpack_match(attr_matches[i]))
|
489
|
+
end
|
490
|
+
-- restore state to prior to adding attribute parser:
|
491
|
+
self.attribute_parser = nil
|
492
|
+
self.attribute_start = nil
|
493
|
+
self.attribute_slices = nil
|
494
|
+
pos = ep + 1
|
495
|
+
elseif status == "fail" then
|
496
|
+
-- backtrack:
|
497
|
+
local slices = self.attribute_slices
|
498
|
+
self.allow_attributes = false
|
499
|
+
self.attribute_parser = nil
|
500
|
+
self.attribute_start = nil
|
501
|
+
for i=1,#slices do
|
502
|
+
self:feed(unpack(slices[i]))
|
503
|
+
end
|
504
|
+
self.allow_attributes = true
|
505
|
+
self.slices = nil
|
506
|
+
pos = sp
|
507
|
+
elseif status == "continue" then
|
508
|
+
self.attribute_slices[#self.attribute_slices + 1] = {sp,ep}
|
509
|
+
pos = ep + 1
|
510
|
+
end
|
511
|
+
else
|
512
|
+
-- find next interesting character:
|
513
|
+
local newpos = bounded_find(subject, special, pos, endpos) or endpos + 1
|
514
|
+
if newpos > pos then
|
515
|
+
self:add_match(pos, newpos - 1, "str")
|
516
|
+
pos = newpos
|
517
|
+
if pos > endpos then
|
518
|
+
break -- otherwise, fall through:
|
519
|
+
end
|
520
|
+
end
|
521
|
+
-- if we get here, then newpos = pos,
|
522
|
+
-- i.e. we have something interesting at pos
|
523
|
+
local c = byte(subject, pos)
|
524
|
+
|
525
|
+
if c == 13 or c == 10 then -- cr or lf
|
526
|
+
if c == 13 and bounded_find(subject, "^[%n]", pos + 1, endpos) then
|
527
|
+
self:add_match(pos, pos + 1, "softbreak")
|
528
|
+
pos = pos + 2
|
529
|
+
else
|
530
|
+
self:add_match(pos, pos, "softbreak")
|
531
|
+
pos = pos + 1
|
532
|
+
end
|
533
|
+
elseif self.verbatim > 0 then
|
534
|
+
if c == 96 then
|
535
|
+
local _, endchar = bounded_find(subject, "^`+", pos, endpos)
|
536
|
+
if endchar and endchar - pos + 1 == self.verbatim then
|
537
|
+
-- check for raw attribute
|
538
|
+
local sp, ep =
|
539
|
+
bounded_find(subject, "^%{%=[^%s{}`]+%}", endchar + 1, endpos)
|
540
|
+
if sp and self.verbatim_type == "verbatim" then -- raw
|
541
|
+
self:add_match(pos, endchar, "-" .. self.verbatim_type)
|
542
|
+
self:add_match(sp, ep, "raw_format")
|
543
|
+
pos = ep + 1
|
544
|
+
else
|
545
|
+
self:add_match(pos, endchar, "-" .. self.verbatim_type)
|
546
|
+
pos = endchar + 1
|
547
|
+
end
|
548
|
+
self.verbatim = 0
|
549
|
+
self.verbatim_type = nil
|
550
|
+
else
|
551
|
+
endchar = endchar or endpos
|
552
|
+
self:add_match(pos, endchar, "str")
|
553
|
+
pos = endchar + 1
|
554
|
+
end
|
555
|
+
else
|
556
|
+
self:add_match(pos, pos, "str")
|
557
|
+
pos = pos + 1
|
558
|
+
end
|
559
|
+
else
|
560
|
+
pos = (matchers[c] and matchers[c](self, pos, endpos))
|
561
|
+
or self:single_char(pos)
|
562
|
+
end
|
563
|
+
end
|
564
|
+
end
|
565
|
+
end
|
566
|
+
|
567
|
+
-- Return true if we're parsing verbatim content.
|
568
|
+
function Parser:in_verbatim()
|
569
|
+
return self.verbatim > 0
|
570
|
+
end
|
571
|
+
|
572
|
+
-- Return parse results and any warnings.
|
573
|
+
function Parser:get_matches()
|
574
|
+
local sorted = {}
|
575
|
+
local subject = self.subject
|
576
|
+
local lastsp, lastep, lastannot
|
577
|
+
for i=self.firstpos, self.lastpos do
|
578
|
+
if self.matches[i] then
|
579
|
+
local sp, ep, annot = unpack_match(self.matches[i])
|
580
|
+
if annot == "str" and lastannot == "str" and lastep + 1 == sp then
|
581
|
+
-- consolidate adjacent strs
|
582
|
+
sorted[#sorted] = make_match(lastsp, ep, annot)
|
583
|
+
lastsp, lastep, lastannot = lastsp, ep, annot
|
584
|
+
else
|
585
|
+
sorted[#sorted + 1] = self.matches[i]
|
586
|
+
lastsp, lastep, lastannot = sp, ep, annot
|
587
|
+
end
|
588
|
+
end
|
589
|
+
end
|
590
|
+
if #sorted > 0 then
|
591
|
+
local last = sorted[#sorted]
|
592
|
+
local startpos, endpos, annot = unpack_match(last)
|
593
|
+
-- remove final softbreak
|
594
|
+
if annot == "softbreak" then
|
595
|
+
sorted[#sorted] = nil
|
596
|
+
last = sorted[#sorted]
|
597
|
+
startpos, endpos, annot = unpack_match(last)
|
598
|
+
end
|
599
|
+
-- remove trailing spaces
|
600
|
+
if annot == "str" and byte(subject, endpos) == 32 then
|
601
|
+
while endpos > startpos and byte(subject, endpos) == 32 do
|
602
|
+
endpos = endpos - 1
|
603
|
+
end
|
604
|
+
sorted[#sorted] = make_match(startpos, endpos, annot)
|
605
|
+
end
|
606
|
+
if self.verbatim > 0 then -- unclosed verbatim
|
607
|
+
self.warnings[#self.warnings + 1] =
|
608
|
+
{startpos, "Unclosed verbatim"}
|
609
|
+
sorted[#sorted + 1] = make_match(startpos, endpos,
|
610
|
+
"-" .. self.verbatim_type)
|
611
|
+
end
|
612
|
+
end
|
613
|
+
return sorted, self.warnings
|
614
|
+
end
|
615
|
+
|
616
|
+
return { Parser = Parser }
|
617
|
+
|
618
|
+
|
619
|
+
--[[
|
620
|
+
Copyright (C) 2022 John MacFarlane
|
621
|
+
|
622
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
623
|
+
a copy of this software and associated documentation files (the
|
624
|
+
"Software"), to deal in the Software without restriction, including
|
625
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
626
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
627
|
+
permit persons to whom the Software is furnished to do so, subject to
|
628
|
+
the following conditions:
|
629
|
+
|
630
|
+
The above copyright notice and this permission notice shall be included
|
631
|
+
in all copies or substantial portions of the Software.
|
632
|
+
|
633
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
634
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
635
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
636
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
637
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
638
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
639
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
640
|
+
|
641
|
+
]]
|