djot 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,273 @@
1
+ local find, sub = string.find, string.sub
2
+ local match = require("djot.match")
3
+ local make_match = match.make_match
4
+
5
+ -- parser for attributes
6
+ -- attributes { id = "foo", class = "bar baz",
7
+ -- key1 = "val1", key2 = "val2" }
8
+ -- syntax:
9
+ --
10
+ -- attributes <- '{' whitespace* attribute (whitespace attribute)* whitespace* '}'
11
+ -- attribute <- identifier | class | keyval
12
+ -- identifier <- '#' name
13
+ -- class <- '.' name
14
+ -- name <- (nonspace, nonpunctuation other than ':', '_', '-')+
15
+ -- keyval <- key '=' val
16
+ -- key <- (ASCII_ALPHANUM | ':' | '_' | '-')+
17
+ -- val <- bareval | quotedval
18
+ -- bareval <- (ASCII_ALPHANUM | ':' | '_' | '-')+
19
+ -- quotedval <- '"' ([^"] | '\"') '"'
20
+
21
+ -- states:
22
+ local SCANNING = 0
23
+ local SCANNING_ID = 1
24
+ local SCANNING_CLASS= 2
25
+ local SCANNING_KEY = 3
26
+ local SCANNING_VALUE = 4
27
+ local SCANNING_BARE_VALUE = 5
28
+ local SCANNING_QUOTED_VALUE = 6
29
+ local SCANNING_ESCAPED = 7
30
+ local SCANNING_COMMENT = 8
31
+ local FAIL = 9
32
+ local DONE = 10
33
+ local START = 11
34
+
35
+ local AttributeParser = {}
36
+
37
+ local handlers = {}
38
+
39
+ handlers[START] = function(self, pos)
40
+ if find(self.subject, "^{", pos) then
41
+ return SCANNING
42
+ else
43
+ return FAIL
44
+ end
45
+ end
46
+
47
+ handlers[FAIL] = function(_self, _pos)
48
+ return FAIL
49
+ end
50
+
51
+ handlers[DONE] = function(_self, _pos)
52
+ return DONE
53
+ end
54
+
55
+ handlers[SCANNING] = function(self, pos)
56
+ local c = sub(self.subject, pos, pos)
57
+ if c == ' ' or c == '\t' or c == '\n' or c == '\r' then
58
+ return SCANNING
59
+ elseif c == '}' then
60
+ return DONE
61
+ elseif c == '#' then
62
+ self.begin = pos
63
+ return SCANNING_ID
64
+ elseif c == '%' then
65
+ self.begin = pos
66
+ return SCANNING_COMMENT
67
+ elseif c == '.' then
68
+ self.begin = pos
69
+ return SCANNING_CLASS
70
+ elseif find(c, "^[%a%d_:-]") then
71
+ self.begin = pos
72
+ return SCANNING_KEY
73
+ else -- TODO
74
+ return FAIL
75
+ end
76
+ end
77
+
78
+ handlers[SCANNING_COMMENT] = function(self, pos)
79
+ if sub(self.subject, pos, pos) == "%" then
80
+ return SCANNING
81
+ else
82
+ return SCANNING_COMMENT
83
+ end
84
+ end
85
+
86
+ handlers[SCANNING_ID] = function(self, pos)
87
+ local c = sub(self.subject, pos, pos)
88
+ if find(c, "^[^%s%p]") or c == "_" or c == "-" or c == ":" then
89
+ return SCANNING_ID
90
+ elseif c == '}' then
91
+ if self.lastpos > self.begin then
92
+ self:add_match(self.begin + 1, self.lastpos, "id")
93
+ end
94
+ self.begin = nil
95
+ return DONE
96
+ elseif find(c, "^%s") then
97
+ if self.lastpos > self.begin then
98
+ self:add_match(self.begin + 1, self.lastpos, "id")
99
+ end
100
+ self.begin = nil
101
+ return SCANNING
102
+ else
103
+ return FAIL
104
+ end
105
+ end
106
+
107
+ handlers[SCANNING_CLASS] = function(self, pos)
108
+ local c = sub(self.subject, pos, pos)
109
+ if find(c, "^[^%s%p]") or c == "_" or c == "-" or c == ":" then
110
+ return SCANNING_CLASS
111
+ elseif c == '}' then
112
+ if self.lastpos > self.begin then
113
+ self:add_match(self.begin + 1, self.lastpos, "class")
114
+ end
115
+ self.begin = nil
116
+ return DONE
117
+ elseif find(c, "^%s") then
118
+ if self.lastpos > self.begin then
119
+ self:add_match(self.begin + 1, self.lastpos, "class")
120
+ end
121
+ self.begin = nil
122
+ return SCANNING
123
+ else
124
+ return FAIL
125
+ end
126
+ end
127
+
128
+ handlers[SCANNING_KEY] = function(self, pos)
129
+ local c = sub(self.subject, pos, pos)
130
+ if c == "=" then
131
+ self:add_match(self.begin, self.lastpos, "key")
132
+ self.begin = nil
133
+ return SCANNING_VALUE
134
+ elseif find(c, "^[%a%d_:-]") then
135
+ return SCANNING_KEY
136
+ else
137
+ return FAIL
138
+ end
139
+ end
140
+
141
+ handlers[SCANNING_VALUE] = function(self, pos)
142
+ local c = sub(self.subject, pos, pos)
143
+ if c == '"' then
144
+ self.begin = pos
145
+ return SCANNING_QUOTED_VALUE
146
+ elseif find(c, "^[%a%d_:-]") then
147
+ self.begin = pos
148
+ return SCANNING_BARE_VALUE
149
+ else
150
+ return FAIL
151
+ end
152
+ end
153
+
154
+ handlers[SCANNING_BARE_VALUE] = function(self, pos)
155
+ local c = sub(self.subject, pos, pos)
156
+ if find(c, "^[%a%d_:-]") then
157
+ return SCANNING_BARE_VALUE
158
+ elseif c == '}' then
159
+ self:add_match(self.begin, self.lastpos, "value")
160
+ self.begin = nil
161
+ return DONE
162
+ elseif find(c, "^%s") then
163
+ self:add_match(self.begin, self.lastpos, "value")
164
+ self.begin = nil
165
+ return SCANNING
166
+ else
167
+ return FAIL
168
+ end
169
+ end
170
+
171
+ handlers[SCANNING_ESCAPED] = function(_self, _pos)
172
+ return SCANNING_QUOTED_VALUE
173
+ end
174
+
175
+ handlers[SCANNING_QUOTED_VALUE] = function(self, pos)
176
+ local c = sub(self.subject, pos, pos)
177
+ if c == '"' then
178
+ self:add_match(self.begin + 1, self.lastpos, "value")
179
+ self.begin = nil
180
+ return SCANNING
181
+ elseif c == "\\" then
182
+ return SCANNING_ESCAPED
183
+ elseif c == "{" or c == "}" then
184
+ return FAIL
185
+ elseif c == "\n" then
186
+ self:add_match(self.begin + 1, self.lastpos, "value")
187
+ return SCANNING_QUOTED_VALUE
188
+ else
189
+ return SCANNING_QUOTED_VALUE
190
+ end
191
+ end
192
+
193
+ function AttributeParser:new(subject)
194
+ local state = {
195
+ subject = subject,
196
+ state = START,
197
+ begin = nil,
198
+ failed = nil,
199
+ lastpos = nil,
200
+ matches = {}
201
+ }
202
+ setmetatable(state, self)
203
+ self.__index = self
204
+ return state
205
+ end
206
+
207
+ function AttributeParser:add_match(sp, ep, tag)
208
+ self.matches[#self.matches + 1] = make_match(sp, ep, tag)
209
+ end
210
+
211
+ function AttributeParser:get_matches()
212
+ return self.matches
213
+ end
214
+
215
+ -- Feed parser a slice of text from the subject, between
216
+ -- startpos and endpos inclusive. Return status, position,
217
+ -- where status is either "done" (position should point to
218
+ -- final '}'), "fail" (position should point to first character
219
+ -- that could not be parsed), or "continue" (position should
220
+ -- point to last character parsed).
221
+ function AttributeParser:feed(startpos, endpos)
222
+ local pos = startpos
223
+ while pos <= endpos do
224
+ self.state = handlers[self.state](self, pos)
225
+ if self.state == DONE then
226
+ return "done", pos
227
+ elseif self.state == FAIL then
228
+ self.lastpos = pos
229
+ return "fail", pos
230
+ else
231
+ self.lastpos = pos
232
+ pos = pos + 1
233
+ end
234
+ end
235
+ return "continue", endpos
236
+ end
237
+
238
+ --[[
239
+ local test = function()
240
+ local parser = AttributeParser:new("{a=b #ident\n.class\nkey=val1\n .class key2=\"val two \\\" ok\" x")
241
+ local x,y,z = parser:feed(1,56)
242
+ print(require'inspect'(parser:get_matches{}))
243
+ end
244
+
245
+ test()
246
+ --]]
247
+
248
+ return { AttributeParser = AttributeParser }
249
+
250
+
251
+ --[[
252
+ Copyright (C) 2022 John MacFarlane
253
+
254
+ Permission is hereby granted, free of charge, to any person obtaining
255
+ a copy of this software and associated documentation files (the
256
+ "Software"), to deal in the Software without restriction, including
257
+ without limitation the rights to use, copy, modify, merge, publish,
258
+ distribute, sublicense, and/or sell copies of the Software, and to
259
+ permit persons to whom the Software is furnished to do so, subject to
260
+ the following conditions:
261
+
262
+ The above copyright notice and this permission notice shall be included
263
+ in all copies or substantial portions of the Software.
264
+
265
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
266
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
267
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
268
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
269
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
270
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
271
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
272
+
273
+ ]]