kdl 1.0.6 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +8 -1
- data/.gitignore +1 -0
- data/.gitmodules +4 -0
- data/Gemfile +6 -1
- data/README.md +67 -7
- data/Rakefile +6 -1
- data/bin/kdl +1 -1
- data/kdl.gemspec +2 -2
- data/lib/kdl/document.rb +60 -2
- data/lib/kdl/error.rb +24 -0
- data/lib/kdl/kdl.tab.rb +305 -231
- data/lib/kdl/kdl.yy +57 -49
- data/lib/kdl/node.rb +116 -13
- data/lib/kdl/parser_common.rb +28 -0
- data/lib/kdl/string_dumper.rb +32 -33
- data/lib/kdl/tokenizer.rb +387 -136
- data/lib/kdl/types/base64.rb +3 -1
- data/lib/kdl/types/country/iso3166_countries.rb +3 -1
- data/lib/kdl/types/country/iso3166_subdivisions.rb +3 -1
- data/lib/kdl/types/country.rb +4 -2
- data/lib/kdl/types/currency/iso4217_currencies.rb +3 -1
- data/lib/kdl/types/currency.rb +3 -1
- data/lib/kdl/types/date_time.rb +5 -3
- data/lib/kdl/types/decimal.rb +3 -1
- data/lib/kdl/types/duration/iso8601_parser.rb +3 -1
- data/lib/kdl/types/duration.rb +3 -1
- data/lib/kdl/types/email/parser.rb +10 -8
- data/lib/kdl/types/email.rb +3 -1
- data/lib/kdl/types/hostname/validator.rb +3 -1
- data/lib/kdl/types/hostname.rb +3 -1
- data/lib/kdl/types/ip.rb +3 -1
- data/lib/kdl/types/irl/parser.rb +10 -8
- data/lib/kdl/types/irl.rb +3 -1
- data/lib/kdl/types/regex.rb +3 -1
- data/lib/kdl/types/url.rb +3 -1
- data/lib/kdl/types/url_template.rb +6 -4
- data/lib/kdl/types/uuid.rb +3 -1
- data/lib/kdl/types.rb +2 -0
- data/lib/kdl/v1/document.rb +19 -0
- data/lib/kdl/v1/kdl.tab.rb +594 -0
- data/lib/kdl/v1/kdl.yy +89 -0
- data/lib/kdl/v1/node.rb +32 -0
- data/lib/kdl/v1/string_dumper.rb +30 -0
- data/lib/kdl/v1/tokenizer.rb +298 -0
- data/lib/kdl/v1/value.rb +91 -0
- data/lib/kdl/v1.rb +13 -0
- data/lib/kdl/value.rb +87 -15
- data/lib/kdl/version.rb +3 -1
- data/lib/kdl.rb +47 -1
- metadata +14 -7
data/lib/kdl/tokenizer.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'bigdecimal'
|
2
4
|
|
3
5
|
module KDL
|
4
6
|
class Tokenizer
|
5
|
-
class Error <
|
7
|
+
class Error < ::KDL::Error
|
6
8
|
def initialize(message, line, column)
|
7
9
|
super("#{message} (#{line}:#{column})")
|
8
10
|
end
|
@@ -36,32 +38,47 @@ module KDL
|
|
36
38
|
SYMBOLS = {
|
37
39
|
'{' => :LBRACE,
|
38
40
|
'}' => :RBRACE,
|
39
|
-
'
|
40
|
-
'
|
41
|
-
';' => :SEMICOLON
|
41
|
+
';' => :SEMICOLON,
|
42
|
+
'=' => :EQUALS
|
42
43
|
}
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
45
|
+
WHITESPACE = ["\u0009", "\u0020", "\u00A0", "\u1680",
|
46
|
+
"\u2000", "\u2001", "\u2002", "\u2003",
|
47
|
+
"\u2004", "\u2005", "\u2006", "\u2007",
|
48
|
+
"\u2008", "\u2009", "\u200A", "\u202F",
|
49
|
+
"\u205F", "\u3000"]
|
50
|
+
WS = "[#{Regexp.escape(WHITESPACE.join)}]"
|
51
|
+
WS_STAR = /\A#{WS}*\z/
|
52
|
+
WS_PLUS = /\A#{WS}+\z/
|
53
|
+
|
54
|
+
NEWLINES = ["\u000A", "\u0085", "\u000B", "\u000C", "\u2028", "\u2029"]
|
55
|
+
NEWLINES_PATTERN = Regexp.new("(#{NEWLINES.map{Regexp.escape(_1)}.join('|')}|\r\n?)", Regexp::MULTILINE)
|
49
56
|
|
50
|
-
|
57
|
+
OTHER_NON_IDENTIFIER_CHARS = ("\x0".."\x20").to_a - WHITESPACE
|
51
58
|
|
52
|
-
NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join
|
53
|
-
IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}
|
54
|
-
INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9
|
59
|
+
NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join}()[]/\\\"##{WHITESPACE.join}#{OTHER_NON_IDENTIFIER_CHARS.join}"
|
60
|
+
IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}]/
|
61
|
+
INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9]/
|
55
62
|
|
56
|
-
|
57
|
-
|
63
|
+
FORBIDDEN = [
|
64
|
+
*"\u0000".."\u0008",
|
65
|
+
*"\u000E".."\u001F",
|
66
|
+
"\u007F",
|
67
|
+
*"\u200E".."\u200F",
|
68
|
+
*"\u202A".."\u202E",
|
69
|
+
*"\u2066".."\u2069",
|
70
|
+
"\uFEFF"
|
71
|
+
]
|
72
|
+
|
73
|
+
VERSION_PATTERN = /\A\/-[#{WHITESPACE.join}]*kdl-version[#{WHITESPACE.join}]+(\d+)[#{WHITESPACE.join}]*[#{NEWLINES.join}]/
|
58
74
|
|
59
75
|
def initialize(str, start = 0)
|
60
|
-
@str = str
|
76
|
+
@str = debom(str)
|
61
77
|
@context = nil
|
62
78
|
@rawstring_hashes = nil
|
79
|
+
@start = start
|
63
80
|
@index = start
|
64
|
-
@buffer = ""
|
81
|
+
@buffer = +""
|
65
82
|
@done = false
|
66
83
|
@previous_context = nil
|
67
84
|
@line = 1
|
@@ -70,122 +87,175 @@ module KDL
|
|
70
87
|
@last_token = nil
|
71
88
|
end
|
72
89
|
|
90
|
+
def version_directive
|
91
|
+
if m = @str.match(VERSION_PATTERN)
|
92
|
+
m[1].to_i
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def done?
|
97
|
+
@done
|
98
|
+
end
|
99
|
+
|
100
|
+
def [](i)
|
101
|
+
@str[i].tap do |c|
|
102
|
+
raise_error "Forbidden character: #{c.inspect}" if FORBIDDEN.include?(c)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def tokens
|
107
|
+
a = []
|
108
|
+
while !done?
|
109
|
+
a << next_token
|
110
|
+
end
|
111
|
+
a
|
112
|
+
end
|
113
|
+
|
73
114
|
def next_token
|
74
115
|
@context = nil
|
75
116
|
@previous_context = nil
|
76
117
|
@line_at_start = @line
|
77
118
|
@column_at_start = @column
|
78
119
|
loop do
|
79
|
-
c =
|
120
|
+
c = self[@index]
|
80
121
|
case @context
|
81
122
|
when nil
|
82
123
|
case c
|
83
124
|
when '"'
|
84
|
-
self
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
@
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
@
|
97
|
-
|
125
|
+
if self[@index + 1] == '"' && self[@index + 2] == '"'
|
126
|
+
nl = expect_newline(@index + 3)
|
127
|
+
self.context = :multiline_string
|
128
|
+
@buffer = +''
|
129
|
+
traverse(3 + nl.length)
|
130
|
+
else
|
131
|
+
self.context = :string
|
132
|
+
@buffer = +''
|
133
|
+
traverse(1)
|
134
|
+
end
|
135
|
+
when '#'
|
136
|
+
if self[@index + 1] == '"'
|
137
|
+
if self[@index + 2] == '"' && self[@index + 3] == '"'
|
138
|
+
nl = expect_newline(@index + 4)
|
139
|
+
self.context = :multiline_rawstring
|
140
|
+
@rawstring_hashes = 1
|
141
|
+
@buffer = +''
|
142
|
+
traverse(4 + nl.length)
|
143
|
+
next
|
144
|
+
else
|
145
|
+
self.context = :rawstring
|
146
|
+
traverse(2)
|
147
|
+
@rawstring_hashes = 1
|
148
|
+
@buffer = +''
|
149
|
+
next
|
150
|
+
end
|
151
|
+
elsif self[@index + 1] == '#'
|
152
|
+
i = @index + 2
|
153
|
+
@rawstring_hashes = 2
|
154
|
+
while self[i] == '#'
|
98
155
|
@rawstring_hashes += 1
|
99
156
|
i += 1
|
100
157
|
end
|
101
|
-
if
|
102
|
-
self
|
103
|
-
|
104
|
-
|
105
|
-
|
158
|
+
if self[i] == '"'
|
159
|
+
if self[i + 1] == '"' && self[i + 2] == '"'
|
160
|
+
nl = expect_newline(i + 3)
|
161
|
+
self.context = :multiline_rawstring
|
162
|
+
traverse(@rawstring_hashes + 3 + nl.length)
|
163
|
+
@buffer = +''
|
164
|
+
next
|
165
|
+
else
|
166
|
+
self.context = :rawstring
|
167
|
+
traverse(@rawstring_hashes + 1)
|
168
|
+
@buffer = +''
|
169
|
+
next
|
170
|
+
end
|
106
171
|
end
|
107
172
|
end
|
108
|
-
self.context = :
|
109
|
-
@buffer = c
|
173
|
+
self.context = :keyword
|
174
|
+
@buffer = +c
|
110
175
|
traverse(1)
|
111
|
-
when
|
112
|
-
n =
|
176
|
+
when '-'
|
177
|
+
n = self[@index + 1]
|
178
|
+
if n =~ /[0-9]/
|
179
|
+
n2 = self[@index + 2]
|
180
|
+
if n == '0' && n2 =~ /[box]/
|
181
|
+
self.context = integer_context(n2)
|
182
|
+
traverse(3)
|
183
|
+
else
|
184
|
+
self.context = :decimal
|
185
|
+
traverse(1)
|
186
|
+
end
|
187
|
+
else
|
188
|
+
self.context = :ident
|
189
|
+
traverse(1)
|
190
|
+
end
|
191
|
+
@buffer = +c
|
192
|
+
when /[0-9+]/
|
193
|
+
n = self[@index + 1]
|
113
194
|
if c == '0' && n =~ /[box]/
|
114
195
|
traverse(2)
|
115
|
-
@buffer = ''
|
196
|
+
@buffer = +''
|
116
197
|
self.context = integer_context(n)
|
117
|
-
elsif c == '-' && n == '0' && (n2 = @str[@index + 2]) =~ /[box]/
|
118
|
-
traverse(3)
|
119
|
-
@buffer = '-'
|
120
|
-
self.context = integer_context(n2)
|
121
198
|
else
|
122
199
|
self.context = :decimal
|
123
|
-
@buffer = c
|
200
|
+
@buffer = +c
|
124
201
|
traverse(1)
|
125
202
|
end
|
126
203
|
when '\\'
|
127
204
|
t = Tokenizer.new(@str, @index + 1)
|
128
205
|
la = t.next_token
|
129
206
|
if la[0] == :NEWLINE || la[0] == :EOF || (la[0] == :WS && (lan = t.next_token[0]) == :NEWLINE || lan == :EOF)
|
130
|
-
|
131
|
-
|
132
|
-
|
207
|
+
traverse_to(t.index)
|
208
|
+
@buffer = "#{c}#{la[1].value}"
|
209
|
+
@buffer << "\n" if lan == :NEWLINE
|
210
|
+
self.context = :whitespace
|
133
211
|
else
|
134
212
|
raise_error "Unexpected '\\' (#{la[0]})"
|
135
213
|
end
|
214
|
+
when '='
|
215
|
+
self.context = :equals
|
216
|
+
@buffer = +c
|
217
|
+
traverse(1)
|
136
218
|
when *SYMBOLS.keys
|
137
|
-
return token(SYMBOLS[c], c).tap { traverse(1) }
|
138
|
-
when "\r"
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
traverse(2)
|
143
|
-
new_line
|
144
|
-
end
|
145
|
-
else
|
146
|
-
return token(:NEWLINE, c).tap do
|
147
|
-
traverse(1)
|
148
|
-
new_line
|
149
|
-
end
|
150
|
-
end
|
151
|
-
when *NEWLINES
|
152
|
-
return token(:NEWLINE, c).tap do
|
153
|
-
traverse(1)
|
154
|
-
new_line
|
219
|
+
return token(SYMBOLS[c], -c).tap { traverse(1) }
|
220
|
+
when *NEWLINES, "\r"
|
221
|
+
nl = expect_newline
|
222
|
+
return token(:NEWLINE, -nl).tap do
|
223
|
+
traverse(nl.length)
|
155
224
|
end
|
156
225
|
when "/"
|
157
|
-
if
|
226
|
+
if self[@index + 1] == '/'
|
158
227
|
self.context = :single_line_comment
|
159
228
|
traverse(2)
|
160
|
-
elsif
|
229
|
+
elsif self[@index + 1] == '*'
|
161
230
|
self.context = :multi_line_comment
|
162
231
|
@comment_nesting = 1
|
163
232
|
traverse(2)
|
164
|
-
elsif
|
233
|
+
elsif self[@index + 1] == '-'
|
165
234
|
return token(:SLASHDASH, '/-').tap { traverse(2) }
|
166
235
|
else
|
167
236
|
self.context = :ident
|
168
|
-
@buffer = c
|
237
|
+
@buffer = +c
|
169
238
|
traverse(1)
|
170
239
|
end
|
171
|
-
when *
|
240
|
+
when *WHITESPACE
|
172
241
|
self.context = :whitespace
|
173
|
-
@buffer = c
|
242
|
+
@buffer = +c
|
174
243
|
traverse(1)
|
175
244
|
when nil
|
176
245
|
return [false, token(:EOF, :EOF)[1]] if @done
|
246
|
+
|
177
247
|
@done = true
|
178
248
|
return token(:EOF, :EOF)
|
179
249
|
when INITIAL_IDENTIFIER_CHARS
|
180
250
|
self.context = :ident
|
181
|
-
@buffer = c
|
251
|
+
@buffer = +c
|
182
252
|
traverse(1)
|
183
253
|
when '('
|
184
254
|
@type_context = true
|
185
|
-
return token(:LPAREN, c).tap { traverse(1) }
|
255
|
+
return token(:LPAREN, -c).tap { traverse(1) }
|
186
256
|
when ')'
|
187
257
|
@type_context = false
|
188
|
-
return token(:RPAREN, c).tap { traverse(1) }
|
258
|
+
return token(:RPAREN, -c).tap { traverse(1) }
|
189
259
|
else
|
190
260
|
raise_error "Unexpected character #{c.inspect}"
|
191
261
|
end
|
@@ -193,49 +263,111 @@ module KDL
|
|
193
263
|
case c
|
194
264
|
when IDENTIFIER_CHARS
|
195
265
|
traverse(1)
|
196
|
-
@buffer
|
266
|
+
@buffer << c
|
267
|
+
else
|
268
|
+
case @buffer
|
269
|
+
when 'true', 'false', 'null', 'inf', '-inf', 'nan'
|
270
|
+
raise_error "Identifier cannot be a literal"
|
271
|
+
when /\A\.\d/
|
272
|
+
raise_error "Identifier cannot look like an illegal float"
|
273
|
+
else
|
274
|
+
return token(:IDENT, -@buffer)
|
275
|
+
end
|
276
|
+
end
|
277
|
+
when :keyword
|
278
|
+
case c
|
279
|
+
when /[a-z\-]/
|
280
|
+
traverse(1)
|
281
|
+
@buffer << c
|
197
282
|
else
|
198
283
|
case @buffer
|
199
|
-
when 'true' then return token(:TRUE, true)
|
200
|
-
when 'false' then return token(:FALSE, false)
|
201
|
-
when 'null' then return token(:NULL, nil)
|
202
|
-
|
284
|
+
when '#true' then return token(:TRUE, true)
|
285
|
+
when '#false' then return token(:FALSE, false)
|
286
|
+
when '#null' then return token(:NULL, nil)
|
287
|
+
when '#inf' then return token(:FLOAT, Float::INFINITY)
|
288
|
+
when '#-inf' then return token(:FLOAT, -Float::INFINITY)
|
289
|
+
when '#nan' then return token(:FLOAT, Float::NAN)
|
290
|
+
else raise_error "Unknown keyword #{@buffer.inspect}"
|
203
291
|
end
|
204
292
|
end
|
205
293
|
when :string
|
206
294
|
case c
|
207
295
|
when '\\'
|
208
|
-
@buffer
|
209
|
-
|
210
|
-
|
296
|
+
@buffer << c
|
297
|
+
c2 = self[@index + 1]
|
298
|
+
@buffer << c2
|
299
|
+
if c2.match?(NEWLINES_PATTERN)
|
300
|
+
i = 2
|
301
|
+
while self[@index + i]&.match?(NEWLINES_PATTERN)
|
302
|
+
@buffer << self[@index + i]
|
303
|
+
i+=1
|
304
|
+
end
|
305
|
+
traverse(i)
|
306
|
+
else
|
307
|
+
traverse(2)
|
308
|
+
end
|
211
309
|
when '"'
|
212
|
-
return token(:STRING,
|
310
|
+
return token(:STRING, -unescape(@buffer)).tap { traverse(1) }
|
311
|
+
when *NEWLINES, "\r"
|
312
|
+
raise_error "Unexpected NEWLINE in string literal"
|
213
313
|
when nil
|
214
314
|
raise_error "Unterminated string literal"
|
215
315
|
else
|
216
|
-
@buffer
|
316
|
+
@buffer << c
|
317
|
+
traverse(1)
|
318
|
+
end
|
319
|
+
when :multiline_string
|
320
|
+
case c
|
321
|
+
when '\\'
|
322
|
+
@buffer << c
|
323
|
+
@buffer << self[@index + 1]
|
324
|
+
traverse(2)
|
325
|
+
when '"'
|
326
|
+
if self[@index + 1] == '"' && self[@index + 2] == '"'
|
327
|
+
return token(:STRING, -unescape_non_ws(dedent(unescape_ws(@buffer)))).tap { traverse(3) }
|
328
|
+
end
|
329
|
+
@buffer << c
|
330
|
+
traverse(1)
|
331
|
+
when nil
|
332
|
+
raise_error "Unterminated multi-line string literal"
|
333
|
+
else
|
334
|
+
@buffer << c
|
217
335
|
traverse(1)
|
218
336
|
end
|
219
337
|
when :rawstring
|
220
338
|
raise_error "Unterminated rawstring literal" if c.nil?
|
221
339
|
|
222
|
-
|
340
|
+
case c
|
341
|
+
when '"'
|
223
342
|
h = 0
|
224
|
-
while
|
225
|
-
|
343
|
+
h += 1 while self[@index + 1 + h] == '#' && h < @rawstring_hashes
|
344
|
+
if h == @rawstring_hashes
|
345
|
+
return token(:RAWSTRING, -@buffer).tap { traverse(1 + h) }
|
226
346
|
end
|
347
|
+
when *NEWLINES, "\r"
|
348
|
+
raise_error "Unexpected NEWLINE in rawstring literal"
|
349
|
+
end
|
350
|
+
|
351
|
+
@buffer << c
|
352
|
+
traverse(1)
|
353
|
+
when :multiline_rawstring
|
354
|
+
raise_error "Unterminated multi-line rawstring literal" if c.nil?
|
355
|
+
|
356
|
+
if c == '"' && self[@index + 1] == '"' && self[@index + 2] == '"' && self[@index + 3] == '#'
|
357
|
+
h = 1
|
358
|
+
h += 1 while self[@index + 3 + h] == '#' && h < @rawstring_hashes
|
227
359
|
if h == @rawstring_hashes
|
228
|
-
return token(:RAWSTRING, @buffer).tap { traverse(
|
360
|
+
return token(:RAWSTRING, -dedent(@buffer)).tap { traverse(3 + h) }
|
229
361
|
end
|
230
362
|
end
|
231
363
|
|
232
|
-
@buffer
|
364
|
+
@buffer << c
|
233
365
|
traverse(1)
|
234
366
|
when :decimal
|
235
367
|
case c
|
236
368
|
when /[0-9.\-+_eE]/
|
237
369
|
traverse(1)
|
238
|
-
@buffer
|
370
|
+
@buffer << c
|
239
371
|
else
|
240
372
|
return parse_decimal(@buffer)
|
241
373
|
end
|
@@ -243,7 +375,7 @@ module KDL
|
|
243
375
|
case c
|
244
376
|
when /[0-9a-fA-F_]/
|
245
377
|
traverse(1)
|
246
|
-
@buffer
|
378
|
+
@buffer << c
|
247
379
|
else
|
248
380
|
return parse_hexadecimal(@buffer)
|
249
381
|
end
|
@@ -251,7 +383,7 @@ module KDL
|
|
251
383
|
case c
|
252
384
|
when /[0-7_]/
|
253
385
|
traverse(1)
|
254
|
-
@buffer
|
386
|
+
@buffer << c
|
255
387
|
else
|
256
388
|
return parse_octal(@buffer)
|
257
389
|
end
|
@@ -259,26 +391,27 @@ module KDL
|
|
259
391
|
case c
|
260
392
|
when /[01_]/
|
261
393
|
traverse(1)
|
262
|
-
@buffer
|
394
|
+
@buffer << c
|
263
395
|
else
|
264
396
|
return parse_binary(@buffer)
|
265
397
|
end
|
266
398
|
when :single_line_comment
|
267
|
-
|
399
|
+
case c
|
400
|
+
when *NEWLINES, "\r"
|
268
401
|
self.context = nil
|
269
402
|
@column_at_start = @column
|
270
403
|
next
|
271
|
-
|
404
|
+
when nil
|
272
405
|
@done = true
|
273
406
|
return token(:EOF, :EOF)
|
274
407
|
else
|
275
408
|
traverse(1)
|
276
409
|
end
|
277
410
|
when :multi_line_comment
|
278
|
-
if c == '/' &&
|
411
|
+
if c == '/' && self[@index + 1] == '*'
|
279
412
|
@comment_nesting += 1
|
280
413
|
traverse(2)
|
281
|
-
elsif c == '*' &&
|
414
|
+
elsif c == '*' && self[@index + 1] == '/'
|
282
415
|
@comment_nesting -= 1
|
283
416
|
traverse(2)
|
284
417
|
if @comment_nesting == 0
|
@@ -288,16 +421,42 @@ module KDL
|
|
288
421
|
traverse(1)
|
289
422
|
end
|
290
423
|
when :whitespace
|
291
|
-
if
|
424
|
+
if WHITESPACE.include?(c)
|
292
425
|
traverse(1)
|
293
|
-
@buffer
|
294
|
-
elsif c ==
|
426
|
+
@buffer << c
|
427
|
+
elsif c == '='
|
428
|
+
self.context = :equals
|
429
|
+
@buffer << c
|
430
|
+
traverse(1)
|
431
|
+
elsif c == "/" && self[@index + 1] == '*'
|
295
432
|
self.context = :multi_line_comment
|
296
433
|
@comment_nesting = 1
|
297
434
|
traverse(2)
|
435
|
+
elsif c == "\\"
|
436
|
+
t = Tokenizer.new(@str, @index + 1)
|
437
|
+
la = t.next_token
|
438
|
+
if la[0] == :NEWLINE || la[0] == :EOF || (la[0] == :WS && (lan = t.next_token[0]) == :NEWLINE || lan == :EOF)
|
439
|
+
traverse_to(t.index)
|
440
|
+
@buffer << "#{c}#{la[1].value}"
|
441
|
+
@buffer << "\n" if lan == :NEWLINE
|
442
|
+
else
|
443
|
+
raise_error "Unexpected '\\' (#{la[0]})"
|
444
|
+
end
|
298
445
|
else
|
299
|
-
return token(:WS,
|
446
|
+
return token(:WS, -@buffer)
|
300
447
|
end
|
448
|
+
when :equals
|
449
|
+
t = Tokenizer.new(@str, @index)
|
450
|
+
la = t.next_token
|
451
|
+
if la[0] == :WS
|
452
|
+
@buffer << la[1].value
|
453
|
+
traverse_to(t.index)
|
454
|
+
end
|
455
|
+
return token(:EQUALS, -@buffer)
|
456
|
+
else
|
457
|
+
# :nocov:
|
458
|
+
raise_error "Unknown context `#{@context}'"
|
459
|
+
# :nocov:
|
301
460
|
end
|
302
461
|
end
|
303
462
|
end
|
@@ -309,43 +468,69 @@ module KDL
|
|
309
468
|
end
|
310
469
|
|
311
470
|
def traverse(n = 1)
|
312
|
-
|
471
|
+
n.times do |i|
|
472
|
+
case self[@index + i]
|
473
|
+
when "\r"
|
474
|
+
@column = 1
|
475
|
+
when *NEWLINES
|
476
|
+
@line += 1
|
477
|
+
@column = 1
|
478
|
+
else
|
479
|
+
@column += 1
|
480
|
+
end
|
481
|
+
end
|
313
482
|
@index += n
|
314
483
|
end
|
315
484
|
|
316
|
-
def
|
317
|
-
|
485
|
+
def traverse_to(i)
|
486
|
+
traverse(i - @index)
|
318
487
|
end
|
319
488
|
|
320
|
-
def
|
321
|
-
|
322
|
-
@line
|
489
|
+
def raise_error(error)
|
490
|
+
case error
|
491
|
+
when String then raise Error.new(error, @line, @column)
|
492
|
+
when Error then raise error
|
493
|
+
else raise Error.new(error.message, @line, @column)
|
494
|
+
end
|
323
495
|
end
|
324
496
|
|
325
497
|
def context=(val)
|
326
|
-
if @type_context && !
|
498
|
+
if @type_context && !allowed_in_type?(val)
|
327
499
|
raise_error "#{val} context not allowed in type declaration"
|
328
|
-
elsif @last_token && @last_token[0] == :RPAREN &&
|
500
|
+
elsif @last_token && @last_token[0] == :RPAREN && !allowed_after_type?(val)
|
329
501
|
raise_error 'Comments are not allowed after a type declaration'
|
330
502
|
end
|
331
503
|
@previous_context = @context
|
332
504
|
@context = val
|
333
505
|
end
|
334
506
|
|
507
|
+
def allowed_in_type?(val)
|
508
|
+
%i[ident string rawstring multi_line_comment whitespace].include?(val)
|
509
|
+
end
|
510
|
+
|
511
|
+
def allowed_after_type?(val)
|
512
|
+
!%i[single_line_comment].include?(val)
|
513
|
+
end
|
514
|
+
|
335
515
|
def revert_context
|
336
516
|
@context = @previous_context
|
337
517
|
@previous_context = nil
|
338
518
|
end
|
339
519
|
|
340
|
-
def
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
520
|
+
def expect_newline(i = @index)
|
521
|
+
c = self[i]
|
522
|
+
case c
|
523
|
+
when "\r"
|
524
|
+
n = self[i + 1]
|
525
|
+
if n == "\n"
|
526
|
+
"#{c}#{n}"
|
527
|
+
else
|
528
|
+
c
|
529
|
+
end
|
530
|
+
when *NEWLINES
|
531
|
+
c
|
347
532
|
else
|
348
|
-
|
533
|
+
raise_error "Expected NEWLINE, found '#{c}'"
|
349
534
|
end
|
350
535
|
end
|
351
536
|
|
@@ -357,6 +542,18 @@ module KDL
|
|
357
542
|
end
|
358
543
|
end
|
359
544
|
|
545
|
+
def parse_decimal(s)
|
546
|
+
return parse_float(s) if s =~ /[.E]/i
|
547
|
+
|
548
|
+
token(:INTEGER, Integer(munch_underscores(s), 10), format: '%d')
|
549
|
+
rescue => e
|
550
|
+
if s[0] =~ INITIAL_IDENTIFIER_CHARS && s[1..-1].each_char.all? { |c| c =~ IDENTIFIER_CHARS }
|
551
|
+
token(:IDENT, -s)
|
552
|
+
else
|
553
|
+
raise_error(e)
|
554
|
+
end
|
555
|
+
end
|
556
|
+
|
360
557
|
def parse_float(s)
|
361
558
|
match, _, fraction, exponent = *s.match(/^([-+]?[\d_]+)(?:\.([\d_]+))?(?:[eE]([-+]?[\d_]+))?$/)
|
362
559
|
raise_error "Invalid floating point value #{s}" if match.nil?
|
@@ -375,40 +572,94 @@ module KDL
|
|
375
572
|
|
376
573
|
def parse_hexadecimal(s)
|
377
574
|
token(:INTEGER, Integer(munch_underscores(s), 16))
|
575
|
+
rescue ArgumentError => e
|
576
|
+
raise_error(e)
|
378
577
|
end
|
379
578
|
|
380
579
|
def parse_octal(s)
|
381
580
|
token(:INTEGER, Integer(munch_underscores(s), 8))
|
581
|
+
rescue ArgumentError => e
|
582
|
+
raise_error(e)
|
382
583
|
end
|
383
584
|
|
384
585
|
def parse_binary(s)
|
385
586
|
token(:INTEGER, Integer(munch_underscores(s), 2))
|
587
|
+
rescue ArgumentError => e
|
588
|
+
raise_error(e)
|
386
589
|
end
|
387
590
|
|
388
591
|
def munch_underscores(s)
|
389
592
|
s.chomp('_').squeeze('_')
|
390
593
|
end
|
391
594
|
|
392
|
-
def
|
393
|
-
string.gsub(/\\
|
595
|
+
def unescape_ws(string)
|
596
|
+
string.gsub(/\\(\\|\s+)/) do |m|
|
394
597
|
case m
|
395
|
-
when '
|
396
|
-
|
397
|
-
when '\t' then "\t"
|
398
|
-
when '\\\\' then "\\"
|
399
|
-
when '\"' then "\""
|
400
|
-
when '\b' then "\b"
|
401
|
-
when '\f' then "\f"
|
402
|
-
when '\/' then "/"
|
403
|
-
else raise_error "Unexpected escape #{m.inspect}"
|
598
|
+
when '\\\\' then '\\\\'
|
599
|
+
else ''
|
404
600
|
end
|
405
|
-
end
|
406
|
-
|
407
|
-
|
408
|
-
|
601
|
+
end
|
602
|
+
end
|
603
|
+
|
604
|
+
UNESCAPE = /\\(?:[#{WHITESPACE.join}#{NEWLINES.join}\r]+|[^u])/
|
605
|
+
UNESCAPE_NON_WS = /\\(?:[^u])/
|
606
|
+
|
607
|
+
def unescape_non_ws(string)
|
608
|
+
unescape(string, UNESCAPE_NON_WS)
|
609
|
+
end
|
610
|
+
|
611
|
+
def unescape(string, rgx = UNESCAPE)
|
612
|
+
string
|
613
|
+
.gsub(rgx) { |m| replace_esc(m) }
|
614
|
+
.gsub(/\\u\{[0-9a-fA-F]{0,6}\}/) do |m|
|
615
|
+
i = Integer(m[3..-2], 16)
|
616
|
+
if i < 0 || i > 0x10FFFF || (0xD800..0xDFFF).include?(i)
|
617
|
+
raise_error "Invalid code point #{m}"
|
618
|
+
end
|
619
|
+
i.chr(Encoding::UTF_8)
|
409
620
|
end
|
410
|
-
|
621
|
+
end
|
622
|
+
|
623
|
+
def replace_esc(m)
|
624
|
+
case m
|
625
|
+
when '\n' then "\n"
|
626
|
+
when '\r' then "\r"
|
627
|
+
when '\t' then "\t"
|
628
|
+
when '\\\\' then "\\"
|
629
|
+
when '\"' then "\""
|
630
|
+
when '\b' then "\b"
|
631
|
+
when '\f' then "\f"
|
632
|
+
when '\s' then ' '
|
633
|
+
when /\\[#{WHITESPACE.join}#{NEWLINES.join}]+/ then ''
|
634
|
+
else raise_error "Unexpected escape #{m.inspect}"
|
411
635
|
end
|
412
636
|
end
|
637
|
+
|
638
|
+
def dedent(string)
|
639
|
+
split = string.split(NEWLINES_PATTERN)
|
640
|
+
lines = split.partition.with_index { |_, i| i.even? }.first
|
641
|
+
if split.last.match?(NEWLINES_PATTERN)
|
642
|
+
indent = ""
|
643
|
+
else
|
644
|
+
*lines, indent = lines
|
645
|
+
end
|
646
|
+
return "" if lines.empty?
|
647
|
+
raise_error "Invalid multiline string final line" unless indent.match?(WS_STAR)
|
648
|
+
valid = /\A#{Regexp.escape(indent)}(.*)/
|
649
|
+
|
650
|
+
lines.map do |line|
|
651
|
+
case line
|
652
|
+
when WS_STAR then ""
|
653
|
+
when valid then $1
|
654
|
+
else raise_error "Invalid multiline string indentation"
|
655
|
+
end
|
656
|
+
end.join("\n")
|
657
|
+
end
|
658
|
+
|
659
|
+
def debom(str)
|
660
|
+
return str unless str.start_with?("\uFEFF")
|
661
|
+
|
662
|
+
str[1..]
|
663
|
+
end
|
413
664
|
end
|
414
665
|
end
|