kdl 1.0.6 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +8 -1
- data/.gitignore +1 -0
- data/.gitmodules +4 -0
- data/Gemfile +6 -1
- data/README.md +67 -7
- data/Rakefile +6 -1
- data/bin/kdl +1 -1
- data/kdl.gemspec +2 -2
- data/lib/kdl/document.rb +60 -2
- data/lib/kdl/error.rb +24 -0
- data/lib/kdl/kdl.tab.rb +305 -231
- data/lib/kdl/kdl.yy +57 -49
- data/lib/kdl/node.rb +116 -13
- data/lib/kdl/parser_common.rb +28 -0
- data/lib/kdl/string_dumper.rb +32 -33
- data/lib/kdl/tokenizer.rb +387 -136
- data/lib/kdl/types/base64.rb +3 -1
- data/lib/kdl/types/country/iso3166_countries.rb +3 -1
- data/lib/kdl/types/country/iso3166_subdivisions.rb +3 -1
- data/lib/kdl/types/country.rb +4 -2
- data/lib/kdl/types/currency/iso4217_currencies.rb +3 -1
- data/lib/kdl/types/currency.rb +3 -1
- data/lib/kdl/types/date_time.rb +5 -3
- data/lib/kdl/types/decimal.rb +3 -1
- data/lib/kdl/types/duration/iso8601_parser.rb +3 -1
- data/lib/kdl/types/duration.rb +3 -1
- data/lib/kdl/types/email/parser.rb +10 -8
- data/lib/kdl/types/email.rb +3 -1
- data/lib/kdl/types/hostname/validator.rb +3 -1
- data/lib/kdl/types/hostname.rb +3 -1
- data/lib/kdl/types/ip.rb +3 -1
- data/lib/kdl/types/irl/parser.rb +10 -8
- data/lib/kdl/types/irl.rb +3 -1
- data/lib/kdl/types/regex.rb +3 -1
- data/lib/kdl/types/url.rb +3 -1
- data/lib/kdl/types/url_template.rb +6 -4
- data/lib/kdl/types/uuid.rb +3 -1
- data/lib/kdl/types.rb +2 -0
- data/lib/kdl/v1/document.rb +19 -0
- data/lib/kdl/v1/kdl.tab.rb +594 -0
- data/lib/kdl/v1/kdl.yy +89 -0
- data/lib/kdl/v1/node.rb +32 -0
- data/lib/kdl/v1/string_dumper.rb +30 -0
- data/lib/kdl/v1/tokenizer.rb +298 -0
- data/lib/kdl/v1/value.rb +91 -0
- data/lib/kdl/v1.rb +13 -0
- data/lib/kdl/value.rb +87 -15
- data/lib/kdl/version.rb +3 -1
- data/lib/kdl.rb +47 -1
- metadata +14 -7
data/lib/kdl/tokenizer.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'bigdecimal'
|
2
4
|
|
3
5
|
module KDL
|
4
6
|
class Tokenizer
|
5
|
-
class Error <
|
7
|
+
class Error < ::KDL::Error
|
6
8
|
def initialize(message, line, column)
|
7
9
|
super("#{message} (#{line}:#{column})")
|
8
10
|
end
|
@@ -36,32 +38,47 @@ module KDL
|
|
36
38
|
SYMBOLS = {
|
37
39
|
'{' => :LBRACE,
|
38
40
|
'}' => :RBRACE,
|
39
|
-
'
|
40
|
-
'
|
41
|
-
';' => :SEMICOLON
|
41
|
+
';' => :SEMICOLON,
|
42
|
+
'=' => :EQUALS
|
42
43
|
}
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
45
|
+
WHITESPACE = ["\u0009", "\u0020", "\u00A0", "\u1680",
|
46
|
+
"\u2000", "\u2001", "\u2002", "\u2003",
|
47
|
+
"\u2004", "\u2005", "\u2006", "\u2007",
|
48
|
+
"\u2008", "\u2009", "\u200A", "\u202F",
|
49
|
+
"\u205F", "\u3000"]
|
50
|
+
WS = "[#{Regexp.escape(WHITESPACE.join)}]"
|
51
|
+
WS_STAR = /\A#{WS}*\z/
|
52
|
+
WS_PLUS = /\A#{WS}+\z/
|
53
|
+
|
54
|
+
NEWLINES = ["\u000A", "\u0085", "\u000B", "\u000C", "\u2028", "\u2029"]
|
55
|
+
NEWLINES_PATTERN = Regexp.new("(#{NEWLINES.map{Regexp.escape(_1)}.join('|')}|\r\n?)", Regexp::MULTILINE)
|
49
56
|
|
50
|
-
|
57
|
+
OTHER_NON_IDENTIFIER_CHARS = ("\x0".."\x20").to_a - WHITESPACE
|
51
58
|
|
52
|
-
NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join
|
53
|
-
IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}
|
54
|
-
INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9
|
59
|
+
NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join}()[]/\\\"##{WHITESPACE.join}#{OTHER_NON_IDENTIFIER_CHARS.join}"
|
60
|
+
IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}]/
|
61
|
+
INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9]/
|
55
62
|
|
56
|
-
|
57
|
-
|
63
|
+
FORBIDDEN = [
|
64
|
+
*"\u0000".."\u0008",
|
65
|
+
*"\u000E".."\u001F",
|
66
|
+
"\u007F",
|
67
|
+
*"\u200E".."\u200F",
|
68
|
+
*"\u202A".."\u202E",
|
69
|
+
*"\u2066".."\u2069",
|
70
|
+
"\uFEFF"
|
71
|
+
]
|
72
|
+
|
73
|
+
VERSION_PATTERN = /\A\/-[#{WHITESPACE.join}]*kdl-version[#{WHITESPACE.join}]+(\d+)[#{WHITESPACE.join}]*[#{NEWLINES.join}]/
|
58
74
|
|
59
75
|
def initialize(str, start = 0)
|
60
|
-
@str = str
|
76
|
+
@str = debom(str)
|
61
77
|
@context = nil
|
62
78
|
@rawstring_hashes = nil
|
79
|
+
@start = start
|
63
80
|
@index = start
|
64
|
-
@buffer = ""
|
81
|
+
@buffer = +""
|
65
82
|
@done = false
|
66
83
|
@previous_context = nil
|
67
84
|
@line = 1
|
@@ -70,122 +87,175 @@ module KDL
|
|
70
87
|
@last_token = nil
|
71
88
|
end
|
72
89
|
|
90
|
+
def version_directive
|
91
|
+
if m = @str.match(VERSION_PATTERN)
|
92
|
+
m[1].to_i
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def done?
|
97
|
+
@done
|
98
|
+
end
|
99
|
+
|
100
|
+
def [](i)
|
101
|
+
@str[i].tap do |c|
|
102
|
+
raise_error "Forbidden character: #{c.inspect}" if FORBIDDEN.include?(c)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def tokens
|
107
|
+
a = []
|
108
|
+
while !done?
|
109
|
+
a << next_token
|
110
|
+
end
|
111
|
+
a
|
112
|
+
end
|
113
|
+
|
73
114
|
def next_token
|
74
115
|
@context = nil
|
75
116
|
@previous_context = nil
|
76
117
|
@line_at_start = @line
|
77
118
|
@column_at_start = @column
|
78
119
|
loop do
|
79
|
-
c =
|
120
|
+
c = self[@index]
|
80
121
|
case @context
|
81
122
|
when nil
|
82
123
|
case c
|
83
124
|
when '"'
|
84
|
-
self
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
@
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
@
|
97
|
-
|
125
|
+
if self[@index + 1] == '"' && self[@index + 2] == '"'
|
126
|
+
nl = expect_newline(@index + 3)
|
127
|
+
self.context = :multiline_string
|
128
|
+
@buffer = +''
|
129
|
+
traverse(3 + nl.length)
|
130
|
+
else
|
131
|
+
self.context = :string
|
132
|
+
@buffer = +''
|
133
|
+
traverse(1)
|
134
|
+
end
|
135
|
+
when '#'
|
136
|
+
if self[@index + 1] == '"'
|
137
|
+
if self[@index + 2] == '"' && self[@index + 3] == '"'
|
138
|
+
nl = expect_newline(@index + 4)
|
139
|
+
self.context = :multiline_rawstring
|
140
|
+
@rawstring_hashes = 1
|
141
|
+
@buffer = +''
|
142
|
+
traverse(4 + nl.length)
|
143
|
+
next
|
144
|
+
else
|
145
|
+
self.context = :rawstring
|
146
|
+
traverse(2)
|
147
|
+
@rawstring_hashes = 1
|
148
|
+
@buffer = +''
|
149
|
+
next
|
150
|
+
end
|
151
|
+
elsif self[@index + 1] == '#'
|
152
|
+
i = @index + 2
|
153
|
+
@rawstring_hashes = 2
|
154
|
+
while self[i] == '#'
|
98
155
|
@rawstring_hashes += 1
|
99
156
|
i += 1
|
100
157
|
end
|
101
|
-
if
|
102
|
-
self
|
103
|
-
|
104
|
-
|
105
|
-
|
158
|
+
if self[i] == '"'
|
159
|
+
if self[i + 1] == '"' && self[i + 2] == '"'
|
160
|
+
nl = expect_newline(i + 3)
|
161
|
+
self.context = :multiline_rawstring
|
162
|
+
traverse(@rawstring_hashes + 3 + nl.length)
|
163
|
+
@buffer = +''
|
164
|
+
next
|
165
|
+
else
|
166
|
+
self.context = :rawstring
|
167
|
+
traverse(@rawstring_hashes + 1)
|
168
|
+
@buffer = +''
|
169
|
+
next
|
170
|
+
end
|
106
171
|
end
|
107
172
|
end
|
108
|
-
self.context = :
|
109
|
-
@buffer = c
|
173
|
+
self.context = :keyword
|
174
|
+
@buffer = +c
|
110
175
|
traverse(1)
|
111
|
-
when
|
112
|
-
n =
|
176
|
+
when '-'
|
177
|
+
n = self[@index + 1]
|
178
|
+
if n =~ /[0-9]/
|
179
|
+
n2 = self[@index + 2]
|
180
|
+
if n == '0' && n2 =~ /[box]/
|
181
|
+
self.context = integer_context(n2)
|
182
|
+
traverse(3)
|
183
|
+
else
|
184
|
+
self.context = :decimal
|
185
|
+
traverse(1)
|
186
|
+
end
|
187
|
+
else
|
188
|
+
self.context = :ident
|
189
|
+
traverse(1)
|
190
|
+
end
|
191
|
+
@buffer = +c
|
192
|
+
when /[0-9+]/
|
193
|
+
n = self[@index + 1]
|
113
194
|
if c == '0' && n =~ /[box]/
|
114
195
|
traverse(2)
|
115
|
-
@buffer = ''
|
196
|
+
@buffer = +''
|
116
197
|
self.context = integer_context(n)
|
117
|
-
elsif c == '-' && n == '0' && (n2 = @str[@index + 2]) =~ /[box]/
|
118
|
-
traverse(3)
|
119
|
-
@buffer = '-'
|
120
|
-
self.context = integer_context(n2)
|
121
198
|
else
|
122
199
|
self.context = :decimal
|
123
|
-
@buffer = c
|
200
|
+
@buffer = +c
|
124
201
|
traverse(1)
|
125
202
|
end
|
126
203
|
when '\\'
|
127
204
|
t = Tokenizer.new(@str, @index + 1)
|
128
205
|
la = t.next_token
|
129
206
|
if la[0] == :NEWLINE || la[0] == :EOF || (la[0] == :WS && (lan = t.next_token[0]) == :NEWLINE || lan == :EOF)
|
130
|
-
|
131
|
-
|
132
|
-
|
207
|
+
traverse_to(t.index)
|
208
|
+
@buffer = "#{c}#{la[1].value}"
|
209
|
+
@buffer << "\n" if lan == :NEWLINE
|
210
|
+
self.context = :whitespace
|
133
211
|
else
|
134
212
|
raise_error "Unexpected '\\' (#{la[0]})"
|
135
213
|
end
|
214
|
+
when '='
|
215
|
+
self.context = :equals
|
216
|
+
@buffer = +c
|
217
|
+
traverse(1)
|
136
218
|
when *SYMBOLS.keys
|
137
|
-
return token(SYMBOLS[c], c).tap { traverse(1) }
|
138
|
-
when "\r"
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
traverse(2)
|
143
|
-
new_line
|
144
|
-
end
|
145
|
-
else
|
146
|
-
return token(:NEWLINE, c).tap do
|
147
|
-
traverse(1)
|
148
|
-
new_line
|
149
|
-
end
|
150
|
-
end
|
151
|
-
when *NEWLINES
|
152
|
-
return token(:NEWLINE, c).tap do
|
153
|
-
traverse(1)
|
154
|
-
new_line
|
219
|
+
return token(SYMBOLS[c], -c).tap { traverse(1) }
|
220
|
+
when *NEWLINES, "\r"
|
221
|
+
nl = expect_newline
|
222
|
+
return token(:NEWLINE, -nl).tap do
|
223
|
+
traverse(nl.length)
|
155
224
|
end
|
156
225
|
when "/"
|
157
|
-
if
|
226
|
+
if self[@index + 1] == '/'
|
158
227
|
self.context = :single_line_comment
|
159
228
|
traverse(2)
|
160
|
-
elsif
|
229
|
+
elsif self[@index + 1] == '*'
|
161
230
|
self.context = :multi_line_comment
|
162
231
|
@comment_nesting = 1
|
163
232
|
traverse(2)
|
164
|
-
elsif
|
233
|
+
elsif self[@index + 1] == '-'
|
165
234
|
return token(:SLASHDASH, '/-').tap { traverse(2) }
|
166
235
|
else
|
167
236
|
self.context = :ident
|
168
|
-
@buffer = c
|
237
|
+
@buffer = +c
|
169
238
|
traverse(1)
|
170
239
|
end
|
171
|
-
when *
|
240
|
+
when *WHITESPACE
|
172
241
|
self.context = :whitespace
|
173
|
-
@buffer = c
|
242
|
+
@buffer = +c
|
174
243
|
traverse(1)
|
175
244
|
when nil
|
176
245
|
return [false, token(:EOF, :EOF)[1]] if @done
|
246
|
+
|
177
247
|
@done = true
|
178
248
|
return token(:EOF, :EOF)
|
179
249
|
when INITIAL_IDENTIFIER_CHARS
|
180
250
|
self.context = :ident
|
181
|
-
@buffer = c
|
251
|
+
@buffer = +c
|
182
252
|
traverse(1)
|
183
253
|
when '('
|
184
254
|
@type_context = true
|
185
|
-
return token(:LPAREN, c).tap { traverse(1) }
|
255
|
+
return token(:LPAREN, -c).tap { traverse(1) }
|
186
256
|
when ')'
|
187
257
|
@type_context = false
|
188
|
-
return token(:RPAREN, c).tap { traverse(1) }
|
258
|
+
return token(:RPAREN, -c).tap { traverse(1) }
|
189
259
|
else
|
190
260
|
raise_error "Unexpected character #{c.inspect}"
|
191
261
|
end
|
@@ -193,49 +263,111 @@ module KDL
|
|
193
263
|
case c
|
194
264
|
when IDENTIFIER_CHARS
|
195
265
|
traverse(1)
|
196
|
-
@buffer
|
266
|
+
@buffer << c
|
267
|
+
else
|
268
|
+
case @buffer
|
269
|
+
when 'true', 'false', 'null', 'inf', '-inf', 'nan'
|
270
|
+
raise_error "Identifier cannot be a literal"
|
271
|
+
when /\A\.\d/
|
272
|
+
raise_error "Identifier cannot look like an illegal float"
|
273
|
+
else
|
274
|
+
return token(:IDENT, -@buffer)
|
275
|
+
end
|
276
|
+
end
|
277
|
+
when :keyword
|
278
|
+
case c
|
279
|
+
when /[a-z\-]/
|
280
|
+
traverse(1)
|
281
|
+
@buffer << c
|
197
282
|
else
|
198
283
|
case @buffer
|
199
|
-
when 'true' then return token(:TRUE, true)
|
200
|
-
when 'false' then return token(:FALSE, false)
|
201
|
-
when 'null' then return token(:NULL, nil)
|
202
|
-
|
284
|
+
when '#true' then return token(:TRUE, true)
|
285
|
+
when '#false' then return token(:FALSE, false)
|
286
|
+
when '#null' then return token(:NULL, nil)
|
287
|
+
when '#inf' then return token(:FLOAT, Float::INFINITY)
|
288
|
+
when '#-inf' then return token(:FLOAT, -Float::INFINITY)
|
289
|
+
when '#nan' then return token(:FLOAT, Float::NAN)
|
290
|
+
else raise_error "Unknown keyword #{@buffer.inspect}"
|
203
291
|
end
|
204
292
|
end
|
205
293
|
when :string
|
206
294
|
case c
|
207
295
|
when '\\'
|
208
|
-
@buffer
|
209
|
-
|
210
|
-
|
296
|
+
@buffer << c
|
297
|
+
c2 = self[@index + 1]
|
298
|
+
@buffer << c2
|
299
|
+
if c2.match?(NEWLINES_PATTERN)
|
300
|
+
i = 2
|
301
|
+
while self[@index + i]&.match?(NEWLINES_PATTERN)
|
302
|
+
@buffer << self[@index + i]
|
303
|
+
i+=1
|
304
|
+
end
|
305
|
+
traverse(i)
|
306
|
+
else
|
307
|
+
traverse(2)
|
308
|
+
end
|
211
309
|
when '"'
|
212
|
-
return token(:STRING,
|
310
|
+
return token(:STRING, -unescape(@buffer)).tap { traverse(1) }
|
311
|
+
when *NEWLINES, "\r"
|
312
|
+
raise_error "Unexpected NEWLINE in string literal"
|
213
313
|
when nil
|
214
314
|
raise_error "Unterminated string literal"
|
215
315
|
else
|
216
|
-
@buffer
|
316
|
+
@buffer << c
|
317
|
+
traverse(1)
|
318
|
+
end
|
319
|
+
when :multiline_string
|
320
|
+
case c
|
321
|
+
when '\\'
|
322
|
+
@buffer << c
|
323
|
+
@buffer << self[@index + 1]
|
324
|
+
traverse(2)
|
325
|
+
when '"'
|
326
|
+
if self[@index + 1] == '"' && self[@index + 2] == '"'
|
327
|
+
return token(:STRING, -unescape_non_ws(dedent(unescape_ws(@buffer)))).tap { traverse(3) }
|
328
|
+
end
|
329
|
+
@buffer << c
|
330
|
+
traverse(1)
|
331
|
+
when nil
|
332
|
+
raise_error "Unterminated multi-line string literal"
|
333
|
+
else
|
334
|
+
@buffer << c
|
217
335
|
traverse(1)
|
218
336
|
end
|
219
337
|
when :rawstring
|
220
338
|
raise_error "Unterminated rawstring literal" if c.nil?
|
221
339
|
|
222
|
-
|
340
|
+
case c
|
341
|
+
when '"'
|
223
342
|
h = 0
|
224
|
-
while
|
225
|
-
|
343
|
+
h += 1 while self[@index + 1 + h] == '#' && h < @rawstring_hashes
|
344
|
+
if h == @rawstring_hashes
|
345
|
+
return token(:RAWSTRING, -@buffer).tap { traverse(1 + h) }
|
226
346
|
end
|
347
|
+
when *NEWLINES, "\r"
|
348
|
+
raise_error "Unexpected NEWLINE in rawstring literal"
|
349
|
+
end
|
350
|
+
|
351
|
+
@buffer << c
|
352
|
+
traverse(1)
|
353
|
+
when :multiline_rawstring
|
354
|
+
raise_error "Unterminated multi-line rawstring literal" if c.nil?
|
355
|
+
|
356
|
+
if c == '"' && self[@index + 1] == '"' && self[@index + 2] == '"' && self[@index + 3] == '#'
|
357
|
+
h = 1
|
358
|
+
h += 1 while self[@index + 3 + h] == '#' && h < @rawstring_hashes
|
227
359
|
if h == @rawstring_hashes
|
228
|
-
return token(:RAWSTRING, @buffer).tap { traverse(
|
360
|
+
return token(:RAWSTRING, -dedent(@buffer)).tap { traverse(3 + h) }
|
229
361
|
end
|
230
362
|
end
|
231
363
|
|
232
|
-
@buffer
|
364
|
+
@buffer << c
|
233
365
|
traverse(1)
|
234
366
|
when :decimal
|
235
367
|
case c
|
236
368
|
when /[0-9.\-+_eE]/
|
237
369
|
traverse(1)
|
238
|
-
@buffer
|
370
|
+
@buffer << c
|
239
371
|
else
|
240
372
|
return parse_decimal(@buffer)
|
241
373
|
end
|
@@ -243,7 +375,7 @@ module KDL
|
|
243
375
|
case c
|
244
376
|
when /[0-9a-fA-F_]/
|
245
377
|
traverse(1)
|
246
|
-
@buffer
|
378
|
+
@buffer << c
|
247
379
|
else
|
248
380
|
return parse_hexadecimal(@buffer)
|
249
381
|
end
|
@@ -251,7 +383,7 @@ module KDL
|
|
251
383
|
case c
|
252
384
|
when /[0-7_]/
|
253
385
|
traverse(1)
|
254
|
-
@buffer
|
386
|
+
@buffer << c
|
255
387
|
else
|
256
388
|
return parse_octal(@buffer)
|
257
389
|
end
|
@@ -259,26 +391,27 @@ module KDL
|
|
259
391
|
case c
|
260
392
|
when /[01_]/
|
261
393
|
traverse(1)
|
262
|
-
@buffer
|
394
|
+
@buffer << c
|
263
395
|
else
|
264
396
|
return parse_binary(@buffer)
|
265
397
|
end
|
266
398
|
when :single_line_comment
|
267
|
-
|
399
|
+
case c
|
400
|
+
when *NEWLINES, "\r"
|
268
401
|
self.context = nil
|
269
402
|
@column_at_start = @column
|
270
403
|
next
|
271
|
-
|
404
|
+
when nil
|
272
405
|
@done = true
|
273
406
|
return token(:EOF, :EOF)
|
274
407
|
else
|
275
408
|
traverse(1)
|
276
409
|
end
|
277
410
|
when :multi_line_comment
|
278
|
-
if c == '/' &&
|
411
|
+
if c == '/' && self[@index + 1] == '*'
|
279
412
|
@comment_nesting += 1
|
280
413
|
traverse(2)
|
281
|
-
elsif c == '*' &&
|
414
|
+
elsif c == '*' && self[@index + 1] == '/'
|
282
415
|
@comment_nesting -= 1
|
283
416
|
traverse(2)
|
284
417
|
if @comment_nesting == 0
|
@@ -288,16 +421,42 @@ module KDL
|
|
288
421
|
traverse(1)
|
289
422
|
end
|
290
423
|
when :whitespace
|
291
|
-
if
|
424
|
+
if WHITESPACE.include?(c)
|
292
425
|
traverse(1)
|
293
|
-
@buffer
|
294
|
-
elsif c ==
|
426
|
+
@buffer << c
|
427
|
+
elsif c == '='
|
428
|
+
self.context = :equals
|
429
|
+
@buffer << c
|
430
|
+
traverse(1)
|
431
|
+
elsif c == "/" && self[@index + 1] == '*'
|
295
432
|
self.context = :multi_line_comment
|
296
433
|
@comment_nesting = 1
|
297
434
|
traverse(2)
|
435
|
+
elsif c == "\\"
|
436
|
+
t = Tokenizer.new(@str, @index + 1)
|
437
|
+
la = t.next_token
|
438
|
+
if la[0] == :NEWLINE || la[0] == :EOF || (la[0] == :WS && (lan = t.next_token[0]) == :NEWLINE || lan == :EOF)
|
439
|
+
traverse_to(t.index)
|
440
|
+
@buffer << "#{c}#{la[1].value}"
|
441
|
+
@buffer << "\n" if lan == :NEWLINE
|
442
|
+
else
|
443
|
+
raise_error "Unexpected '\\' (#{la[0]})"
|
444
|
+
end
|
298
445
|
else
|
299
|
-
return token(:WS,
|
446
|
+
return token(:WS, -@buffer)
|
300
447
|
end
|
448
|
+
when :equals
|
449
|
+
t = Tokenizer.new(@str, @index)
|
450
|
+
la = t.next_token
|
451
|
+
if la[0] == :WS
|
452
|
+
@buffer << la[1].value
|
453
|
+
traverse_to(t.index)
|
454
|
+
end
|
455
|
+
return token(:EQUALS, -@buffer)
|
456
|
+
else
|
457
|
+
# :nocov:
|
458
|
+
raise_error "Unknown context `#{@context}'"
|
459
|
+
# :nocov:
|
301
460
|
end
|
302
461
|
end
|
303
462
|
end
|
@@ -309,43 +468,69 @@ module KDL
|
|
309
468
|
end
|
310
469
|
|
311
470
|
def traverse(n = 1)
|
312
|
-
|
471
|
+
n.times do |i|
|
472
|
+
case self[@index + i]
|
473
|
+
when "\r"
|
474
|
+
@column = 1
|
475
|
+
when *NEWLINES
|
476
|
+
@line += 1
|
477
|
+
@column = 1
|
478
|
+
else
|
479
|
+
@column += 1
|
480
|
+
end
|
481
|
+
end
|
313
482
|
@index += n
|
314
483
|
end
|
315
484
|
|
316
|
-
def
|
317
|
-
|
485
|
+
def traverse_to(i)
|
486
|
+
traverse(i - @index)
|
318
487
|
end
|
319
488
|
|
320
|
-
def
|
321
|
-
|
322
|
-
@line
|
489
|
+
def raise_error(error)
|
490
|
+
case error
|
491
|
+
when String then raise Error.new(error, @line, @column)
|
492
|
+
when Error then raise error
|
493
|
+
else raise Error.new(error.message, @line, @column)
|
494
|
+
end
|
323
495
|
end
|
324
496
|
|
325
497
|
def context=(val)
|
326
|
-
if @type_context && !
|
498
|
+
if @type_context && !allowed_in_type?(val)
|
327
499
|
raise_error "#{val} context not allowed in type declaration"
|
328
|
-
elsif @last_token && @last_token[0] == :RPAREN &&
|
500
|
+
elsif @last_token && @last_token[0] == :RPAREN && !allowed_after_type?(val)
|
329
501
|
raise_error 'Comments are not allowed after a type declaration'
|
330
502
|
end
|
331
503
|
@previous_context = @context
|
332
504
|
@context = val
|
333
505
|
end
|
334
506
|
|
507
|
+
def allowed_in_type?(val)
|
508
|
+
%i[ident string rawstring multi_line_comment whitespace].include?(val)
|
509
|
+
end
|
510
|
+
|
511
|
+
def allowed_after_type?(val)
|
512
|
+
!%i[single_line_comment].include?(val)
|
513
|
+
end
|
514
|
+
|
335
515
|
def revert_context
|
336
516
|
@context = @previous_context
|
337
517
|
@previous_context = nil
|
338
518
|
end
|
339
519
|
|
340
|
-
def
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
520
|
+
def expect_newline(i = @index)
|
521
|
+
c = self[i]
|
522
|
+
case c
|
523
|
+
when "\r"
|
524
|
+
n = self[i + 1]
|
525
|
+
if n == "\n"
|
526
|
+
"#{c}#{n}"
|
527
|
+
else
|
528
|
+
c
|
529
|
+
end
|
530
|
+
when *NEWLINES
|
531
|
+
c
|
347
532
|
else
|
348
|
-
|
533
|
+
raise_error "Expected NEWLINE, found '#{c}'"
|
349
534
|
end
|
350
535
|
end
|
351
536
|
|
@@ -357,6 +542,18 @@ module KDL
|
|
357
542
|
end
|
358
543
|
end
|
359
544
|
|
545
|
+
def parse_decimal(s)
|
546
|
+
return parse_float(s) if s =~ /[.E]/i
|
547
|
+
|
548
|
+
token(:INTEGER, Integer(munch_underscores(s), 10), format: '%d')
|
549
|
+
rescue => e
|
550
|
+
if s[0] =~ INITIAL_IDENTIFIER_CHARS && s[1..-1].each_char.all? { |c| c =~ IDENTIFIER_CHARS }
|
551
|
+
token(:IDENT, -s)
|
552
|
+
else
|
553
|
+
raise_error(e)
|
554
|
+
end
|
555
|
+
end
|
556
|
+
|
360
557
|
def parse_float(s)
|
361
558
|
match, _, fraction, exponent = *s.match(/^([-+]?[\d_]+)(?:\.([\d_]+))?(?:[eE]([-+]?[\d_]+))?$/)
|
362
559
|
raise_error "Invalid floating point value #{s}" if match.nil?
|
@@ -375,40 +572,94 @@ module KDL
|
|
375
572
|
|
376
573
|
def parse_hexadecimal(s)
|
377
574
|
token(:INTEGER, Integer(munch_underscores(s), 16))
|
575
|
+
rescue ArgumentError => e
|
576
|
+
raise_error(e)
|
378
577
|
end
|
379
578
|
|
380
579
|
def parse_octal(s)
|
381
580
|
token(:INTEGER, Integer(munch_underscores(s), 8))
|
581
|
+
rescue ArgumentError => e
|
582
|
+
raise_error(e)
|
382
583
|
end
|
383
584
|
|
384
585
|
def parse_binary(s)
|
385
586
|
token(:INTEGER, Integer(munch_underscores(s), 2))
|
587
|
+
rescue ArgumentError => e
|
588
|
+
raise_error(e)
|
386
589
|
end
|
387
590
|
|
388
591
|
def munch_underscores(s)
|
389
592
|
s.chomp('_').squeeze('_')
|
390
593
|
end
|
391
594
|
|
392
|
-
def
|
393
|
-
string.gsub(/\\
|
595
|
+
def unescape_ws(string)
|
596
|
+
string.gsub(/\\(\\|\s+)/) do |m|
|
394
597
|
case m
|
395
|
-
when '
|
396
|
-
|
397
|
-
when '\t' then "\t"
|
398
|
-
when '\\\\' then "\\"
|
399
|
-
when '\"' then "\""
|
400
|
-
when '\b' then "\b"
|
401
|
-
when '\f' then "\f"
|
402
|
-
when '\/' then "/"
|
403
|
-
else raise_error "Unexpected escape #{m.inspect}"
|
598
|
+
when '\\\\' then '\\\\'
|
599
|
+
else ''
|
404
600
|
end
|
405
|
-
end
|
406
|
-
|
407
|
-
|
408
|
-
|
601
|
+
end
|
602
|
+
end
|
603
|
+
|
604
|
+
UNESCAPE = /\\(?:[#{WHITESPACE.join}#{NEWLINES.join}\r]+|[^u])/
|
605
|
+
UNESCAPE_NON_WS = /\\(?:[^u])/
|
606
|
+
|
607
|
+
def unescape_non_ws(string)
|
608
|
+
unescape(string, UNESCAPE_NON_WS)
|
609
|
+
end
|
610
|
+
|
611
|
+
def unescape(string, rgx = UNESCAPE)
|
612
|
+
string
|
613
|
+
.gsub(rgx) { |m| replace_esc(m) }
|
614
|
+
.gsub(/\\u\{[0-9a-fA-F]{0,6}\}/) do |m|
|
615
|
+
i = Integer(m[3..-2], 16)
|
616
|
+
if i < 0 || i > 0x10FFFF || (0xD800..0xDFFF).include?(i)
|
617
|
+
raise_error "Invalid code point #{m}"
|
618
|
+
end
|
619
|
+
i.chr(Encoding::UTF_8)
|
409
620
|
end
|
410
|
-
|
621
|
+
end
|
622
|
+
|
623
|
+
def replace_esc(m)
|
624
|
+
case m
|
625
|
+
when '\n' then "\n"
|
626
|
+
when '\r' then "\r"
|
627
|
+
when '\t' then "\t"
|
628
|
+
when '\\\\' then "\\"
|
629
|
+
when '\"' then "\""
|
630
|
+
when '\b' then "\b"
|
631
|
+
when '\f' then "\f"
|
632
|
+
when '\s' then ' '
|
633
|
+
when /\\[#{WHITESPACE.join}#{NEWLINES.join}]+/ then ''
|
634
|
+
else raise_error "Unexpected escape #{m.inspect}"
|
411
635
|
end
|
412
636
|
end
|
637
|
+
|
638
|
+
def dedent(string)
|
639
|
+
split = string.split(NEWLINES_PATTERN)
|
640
|
+
lines = split.partition.with_index { |_, i| i.even? }.first
|
641
|
+
if split.last.match?(NEWLINES_PATTERN)
|
642
|
+
indent = ""
|
643
|
+
else
|
644
|
+
*lines, indent = lines
|
645
|
+
end
|
646
|
+
return "" if lines.empty?
|
647
|
+
raise_error "Invalid multiline string final line" unless indent.match?(WS_STAR)
|
648
|
+
valid = /\A#{Regexp.escape(indent)}(.*)/
|
649
|
+
|
650
|
+
lines.map do |line|
|
651
|
+
case line
|
652
|
+
when WS_STAR then ""
|
653
|
+
when valid then $1
|
654
|
+
else raise_error "Invalid multiline string indentation"
|
655
|
+
end
|
656
|
+
end.join("\n")
|
657
|
+
end
|
658
|
+
|
659
|
+
def debom(str)
|
660
|
+
return str unless str.start_with?("\uFEFF")
|
661
|
+
|
662
|
+
str[1..]
|
663
|
+
end
|
413
664
|
end
|
414
665
|
end
|