kdl 1.0.6 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +8 -1
  3. data/.gitignore +1 -0
  4. data/.gitmodules +4 -0
  5. data/Gemfile +6 -1
  6. data/README.md +67 -7
  7. data/Rakefile +6 -1
  8. data/bin/kdl +1 -1
  9. data/kdl.gemspec +2 -2
  10. data/lib/kdl/document.rb +60 -2
  11. data/lib/kdl/error.rb +24 -0
  12. data/lib/kdl/kdl.tab.rb +305 -231
  13. data/lib/kdl/kdl.yy +57 -49
  14. data/lib/kdl/node.rb +116 -13
  15. data/lib/kdl/parser_common.rb +28 -0
  16. data/lib/kdl/string_dumper.rb +32 -33
  17. data/lib/kdl/tokenizer.rb +387 -136
  18. data/lib/kdl/types/base64.rb +3 -1
  19. data/lib/kdl/types/country/iso3166_countries.rb +3 -1
  20. data/lib/kdl/types/country/iso3166_subdivisions.rb +3 -1
  21. data/lib/kdl/types/country.rb +4 -2
  22. data/lib/kdl/types/currency/iso4217_currencies.rb +3 -1
  23. data/lib/kdl/types/currency.rb +3 -1
  24. data/lib/kdl/types/date_time.rb +5 -3
  25. data/lib/kdl/types/decimal.rb +3 -1
  26. data/lib/kdl/types/duration/iso8601_parser.rb +3 -1
  27. data/lib/kdl/types/duration.rb +3 -1
  28. data/lib/kdl/types/email/parser.rb +10 -8
  29. data/lib/kdl/types/email.rb +3 -1
  30. data/lib/kdl/types/hostname/validator.rb +3 -1
  31. data/lib/kdl/types/hostname.rb +3 -1
  32. data/lib/kdl/types/ip.rb +3 -1
  33. data/lib/kdl/types/irl/parser.rb +10 -8
  34. data/lib/kdl/types/irl.rb +3 -1
  35. data/lib/kdl/types/regex.rb +3 -1
  36. data/lib/kdl/types/url.rb +3 -1
  37. data/lib/kdl/types/url_template.rb +6 -4
  38. data/lib/kdl/types/uuid.rb +3 -1
  39. data/lib/kdl/types.rb +2 -0
  40. data/lib/kdl/v1/document.rb +19 -0
  41. data/lib/kdl/v1/kdl.tab.rb +594 -0
  42. data/lib/kdl/v1/kdl.yy +89 -0
  43. data/lib/kdl/v1/node.rb +32 -0
  44. data/lib/kdl/v1/string_dumper.rb +30 -0
  45. data/lib/kdl/v1/tokenizer.rb +298 -0
  46. data/lib/kdl/v1/value.rb +91 -0
  47. data/lib/kdl/v1.rb +13 -0
  48. data/lib/kdl/value.rb +87 -15
  49. data/lib/kdl/version.rb +3 -1
  50. data/lib/kdl.rb +47 -1
  51. metadata +14 -7
data/lib/kdl/tokenizer.rb CHANGED
@@ -1,8 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'bigdecimal'
2
4
 
3
5
  module KDL
4
6
  class Tokenizer
5
- class Error < StandardError
7
+ class Error < ::KDL::Error
6
8
  def initialize(message, line, column)
7
9
  super("#{message} (#{line}:#{column})")
8
10
  end
@@ -36,32 +38,47 @@ module KDL
36
38
  SYMBOLS = {
37
39
  '{' => :LBRACE,
38
40
  '}' => :RBRACE,
39
- '=' => :EQUALS,
40
- '' => :EQUALS,
41
- ';' => :SEMICOLON
41
+ ';' => :SEMICOLON,
42
+ '=' => :EQUALS
42
43
  }
43
44
 
44
- WHITEPACE = ["\u0009", "\u0020", "\u00A0", "\u1680",
45
- "\u2000", "\u2001", "\u2002", "\u2003",
46
- "\u2004", "\u2005", "\u2006", "\u2007",
47
- "\u2008", "\u2009", "\u200A", "\u202F",
48
- "\u205F", "\u3000" ]
45
+ WHITESPACE = ["\u0009", "\u0020", "\u00A0", "\u1680",
46
+ "\u2000", "\u2001", "\u2002", "\u2003",
47
+ "\u2004", "\u2005", "\u2006", "\u2007",
48
+ "\u2008", "\u2009", "\u200A", "\u202F",
49
+ "\u205F", "\u3000"]
50
+ WS = "[#{Regexp.escape(WHITESPACE.join)}]"
51
+ WS_STAR = /\A#{WS}*\z/
52
+ WS_PLUS = /\A#{WS}+\z/
53
+
54
+ NEWLINES = ["\u000A", "\u0085", "\u000B", "\u000C", "\u2028", "\u2029"]
55
+ NEWLINES_PATTERN = Regexp.new("(#{NEWLINES.map{Regexp.escape(_1)}.join('|')}|\r\n?)", Regexp::MULTILINE)
49
56
 
50
- NEWLINES = ["\u000A", "\u0085", "\u000C", "\u2028", "\u2029"]
57
+ OTHER_NON_IDENTIFIER_CHARS = ("\x0".."\x20").to_a - WHITESPACE
51
58
 
52
- NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join('')}()/\\<>[]\","
53
- IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}\x0-\x20]/
54
- INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9\x0-\x20]/
59
+ NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join}()[]/\\\"##{WHITESPACE.join}#{OTHER_NON_IDENTIFIER_CHARS.join}"
60
+ IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}]/
61
+ INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9]/
55
62
 
56
- ALLOWED_IN_TYPE = [:ident, :string, :rawstring]
57
- NOT_ALLOWED_AFTER_TYPE = [:single_line_comment, :multi_line_comment]
63
+ FORBIDDEN = [
64
+ *"\u0000".."\u0008",
65
+ *"\u000E".."\u001F",
66
+ "\u007F",
67
+ *"\u200E".."\u200F",
68
+ *"\u202A".."\u202E",
69
+ *"\u2066".."\u2069",
70
+ "\uFEFF"
71
+ ]
72
+
73
+ VERSION_PATTERN = /\A\/-[#{WHITESPACE.join}]*kdl-version[#{WHITESPACE.join}]+(\d+)[#{WHITESPACE.join}]*[#{NEWLINES.join}]/
58
74
 
59
75
  def initialize(str, start = 0)
60
- @str = str
76
+ @str = debom(str)
61
77
  @context = nil
62
78
  @rawstring_hashes = nil
79
+ @start = start
63
80
  @index = start
64
- @buffer = ""
81
+ @buffer = +""
65
82
  @done = false
66
83
  @previous_context = nil
67
84
  @line = 1
@@ -70,122 +87,175 @@ module KDL
70
87
  @last_token = nil
71
88
  end
72
89
 
90
+ def version_directive
91
+ if m = @str.match(VERSION_PATTERN)
92
+ m[1].to_i
93
+ end
94
+ end
95
+
96
+ def done?
97
+ @done
98
+ end
99
+
100
+ def [](i)
101
+ @str[i].tap do |c|
102
+ raise_error "Forbidden character: #{c.inspect}" if FORBIDDEN.include?(c)
103
+ end
104
+ end
105
+
106
+ def tokens
107
+ a = []
108
+ while !done?
109
+ a << next_token
110
+ end
111
+ a
112
+ end
113
+
73
114
  def next_token
74
115
  @context = nil
75
116
  @previous_context = nil
76
117
  @line_at_start = @line
77
118
  @column_at_start = @column
78
119
  loop do
79
- c = @str[@index]
120
+ c = self[@index]
80
121
  case @context
81
122
  when nil
82
123
  case c
83
124
  when '"'
84
- self.context = :string
85
- @buffer = ''
86
- traverse(1)
87
- when 'r'
88
- if @str[@index + 1] == '"'
89
- self.context = :rawstring
90
- traverse(2)
91
- @rawstring_hashes = 0
92
- @buffer = ''
93
- next
94
- elsif @str[@index + 1] == '#'
95
- i = @index + 1
96
- @rawstring_hashes = 0
97
- while @str[i] == '#'
125
+ if self[@index + 1] == '"' && self[@index + 2] == '"'
126
+ nl = expect_newline(@index + 3)
127
+ self.context = :multiline_string
128
+ @buffer = +''
129
+ traverse(3 + nl.length)
130
+ else
131
+ self.context = :string
132
+ @buffer = +''
133
+ traverse(1)
134
+ end
135
+ when '#'
136
+ if self[@index + 1] == '"'
137
+ if self[@index + 2] == '"' && self[@index + 3] == '"'
138
+ nl = expect_newline(@index + 4)
139
+ self.context = :multiline_rawstring
140
+ @rawstring_hashes = 1
141
+ @buffer = +''
142
+ traverse(4 + nl.length)
143
+ next
144
+ else
145
+ self.context = :rawstring
146
+ traverse(2)
147
+ @rawstring_hashes = 1
148
+ @buffer = +''
149
+ next
150
+ end
151
+ elsif self[@index + 1] == '#'
152
+ i = @index + 2
153
+ @rawstring_hashes = 2
154
+ while self[i] == '#'
98
155
  @rawstring_hashes += 1
99
156
  i += 1
100
157
  end
101
- if @str[i] == '"'
102
- self.context = :rawstring
103
- @index = i + 1
104
- @buffer = ''
105
- next
158
+ if self[i] == '"'
159
+ if self[i + 1] == '"' && self[i + 2] == '"'
160
+ nl = expect_newline(i + 3)
161
+ self.context = :multiline_rawstring
162
+ traverse(@rawstring_hashes + 3 + nl.length)
163
+ @buffer = +''
164
+ next
165
+ else
166
+ self.context = :rawstring
167
+ traverse(@rawstring_hashes + 1)
168
+ @buffer = +''
169
+ next
170
+ end
106
171
  end
107
172
  end
108
- self.context = :ident
109
- @buffer = c
173
+ self.context = :keyword
174
+ @buffer = +c
110
175
  traverse(1)
111
- when /[0-9\-+]/
112
- n = @str[@index + 1]
176
+ when '-'
177
+ n = self[@index + 1]
178
+ if n =~ /[0-9]/
179
+ n2 = self[@index + 2]
180
+ if n == '0' && n2 =~ /[box]/
181
+ self.context = integer_context(n2)
182
+ traverse(3)
183
+ else
184
+ self.context = :decimal
185
+ traverse(1)
186
+ end
187
+ else
188
+ self.context = :ident
189
+ traverse(1)
190
+ end
191
+ @buffer = +c
192
+ when /[0-9+]/
193
+ n = self[@index + 1]
113
194
  if c == '0' && n =~ /[box]/
114
195
  traverse(2)
115
- @buffer = ''
196
+ @buffer = +''
116
197
  self.context = integer_context(n)
117
- elsif c == '-' && n == '0' && (n2 = @str[@index + 2]) =~ /[box]/
118
- traverse(3)
119
- @buffer = '-'
120
- self.context = integer_context(n2)
121
198
  else
122
199
  self.context = :decimal
123
- @buffer = c
200
+ @buffer = +c
124
201
  traverse(1)
125
202
  end
126
203
  when '\\'
127
204
  t = Tokenizer.new(@str, @index + 1)
128
205
  la = t.next_token
129
206
  if la[0] == :NEWLINE || la[0] == :EOF || (la[0] == :WS && (lan = t.next_token[0]) == :NEWLINE || lan == :EOF)
130
- @index = t.index
131
- new_line
132
- return token(:ESCLINE, "\\#{la[1].value}")
207
+ traverse_to(t.index)
208
+ @buffer = "#{c}#{la[1].value}"
209
+ @buffer << "\n" if lan == :NEWLINE
210
+ self.context = :whitespace
133
211
  else
134
212
  raise_error "Unexpected '\\' (#{la[0]})"
135
213
  end
214
+ when '='
215
+ self.context = :equals
216
+ @buffer = +c
217
+ traverse(1)
136
218
  when *SYMBOLS.keys
137
- return token(SYMBOLS[c], c).tap { traverse(1) }
138
- when "\r"
139
- n = @str[@index + 1]
140
- if n == "\n"
141
- return token(:NEWLINE, "#{c}#{n}").tap do
142
- traverse(2)
143
- new_line
144
- end
145
- else
146
- return token(:NEWLINE, c).tap do
147
- traverse(1)
148
- new_line
149
- end
150
- end
151
- when *NEWLINES
152
- return token(:NEWLINE, c).tap do
153
- traverse(1)
154
- new_line
219
+ return token(SYMBOLS[c], -c).tap { traverse(1) }
220
+ when *NEWLINES, "\r"
221
+ nl = expect_newline
222
+ return token(:NEWLINE, -nl).tap do
223
+ traverse(nl.length)
155
224
  end
156
225
  when "/"
157
- if @str[@index + 1] == '/'
226
+ if self[@index + 1] == '/'
158
227
  self.context = :single_line_comment
159
228
  traverse(2)
160
- elsif @str[@index + 1] == '*'
229
+ elsif self[@index + 1] == '*'
161
230
  self.context = :multi_line_comment
162
231
  @comment_nesting = 1
163
232
  traverse(2)
164
- elsif @str[@index + 1] == '-'
233
+ elsif self[@index + 1] == '-'
165
234
  return token(:SLASHDASH, '/-').tap { traverse(2) }
166
235
  else
167
236
  self.context = :ident
168
- @buffer = c
237
+ @buffer = +c
169
238
  traverse(1)
170
239
  end
171
- when *WHITEPACE
240
+ when *WHITESPACE
172
241
  self.context = :whitespace
173
- @buffer = c
242
+ @buffer = +c
174
243
  traverse(1)
175
244
  when nil
176
245
  return [false, token(:EOF, :EOF)[1]] if @done
246
+
177
247
  @done = true
178
248
  return token(:EOF, :EOF)
179
249
  when INITIAL_IDENTIFIER_CHARS
180
250
  self.context = :ident
181
- @buffer = c
251
+ @buffer = +c
182
252
  traverse(1)
183
253
  when '('
184
254
  @type_context = true
185
- return token(:LPAREN, c).tap { traverse(1) }
255
+ return token(:LPAREN, -c).tap { traverse(1) }
186
256
  when ')'
187
257
  @type_context = false
188
- return token(:RPAREN, c).tap { traverse(1) }
258
+ return token(:RPAREN, -c).tap { traverse(1) }
189
259
  else
190
260
  raise_error "Unexpected character #{c.inspect}"
191
261
  end
@@ -193,49 +263,111 @@ module KDL
193
263
  case c
194
264
  when IDENTIFIER_CHARS
195
265
  traverse(1)
196
- @buffer += c
266
+ @buffer << c
267
+ else
268
+ case @buffer
269
+ when 'true', 'false', 'null', 'inf', '-inf', 'nan'
270
+ raise_error "Identifier cannot be a literal"
271
+ when /\A\.\d/
272
+ raise_error "Identifier cannot look like an illegal float"
273
+ else
274
+ return token(:IDENT, -@buffer)
275
+ end
276
+ end
277
+ when :keyword
278
+ case c
279
+ when /[a-z\-]/
280
+ traverse(1)
281
+ @buffer << c
197
282
  else
198
283
  case @buffer
199
- when 'true' then return token(:TRUE, true)
200
- when 'false' then return token(:FALSE, false)
201
- when 'null' then return token(:NULL, nil)
202
- else return token(:IDENT, @buffer)
284
+ when '#true' then return token(:TRUE, true)
285
+ when '#false' then return token(:FALSE, false)
286
+ when '#null' then return token(:NULL, nil)
287
+ when '#inf' then return token(:FLOAT, Float::INFINITY)
288
+ when '#-inf' then return token(:FLOAT, -Float::INFINITY)
289
+ when '#nan' then return token(:FLOAT, Float::NAN)
290
+ else raise_error "Unknown keyword #{@buffer.inspect}"
203
291
  end
204
292
  end
205
293
  when :string
206
294
  case c
207
295
  when '\\'
208
- @buffer += c
209
- @buffer += @str[@index + 1]
210
- traverse(2)
296
+ @buffer << c
297
+ c2 = self[@index + 1]
298
+ @buffer << c2
299
+ if c2.match?(NEWLINES_PATTERN)
300
+ i = 2
301
+ while self[@index + i]&.match?(NEWLINES_PATTERN)
302
+ @buffer << self[@index + i]
303
+ i+=1
304
+ end
305
+ traverse(i)
306
+ else
307
+ traverse(2)
308
+ end
211
309
  when '"'
212
- return token(:STRING, convert_escapes(@buffer)).tap { traverse(1) }
310
+ return token(:STRING, -unescape(@buffer)).tap { traverse(1) }
311
+ when *NEWLINES, "\r"
312
+ raise_error "Unexpected NEWLINE in string literal"
213
313
  when nil
214
314
  raise_error "Unterminated string literal"
215
315
  else
216
- @buffer += c
316
+ @buffer << c
317
+ traverse(1)
318
+ end
319
+ when :multiline_string
320
+ case c
321
+ when '\\'
322
+ @buffer << c
323
+ @buffer << self[@index + 1]
324
+ traverse(2)
325
+ when '"'
326
+ if self[@index + 1] == '"' && self[@index + 2] == '"'
327
+ return token(:STRING, -unescape_non_ws(dedent(unescape_ws(@buffer)))).tap { traverse(3) }
328
+ end
329
+ @buffer << c
330
+ traverse(1)
331
+ when nil
332
+ raise_error "Unterminated multi-line string literal"
333
+ else
334
+ @buffer << c
217
335
  traverse(1)
218
336
  end
219
337
  when :rawstring
220
338
  raise_error "Unterminated rawstring literal" if c.nil?
221
339
 
222
- if c == '"'
340
+ case c
341
+ when '"'
223
342
  h = 0
224
- while @str[@index + 1 + h] == '#' && h < @rawstring_hashes
225
- h += 1
343
+ h += 1 while self[@index + 1 + h] == '#' && h < @rawstring_hashes
344
+ if h == @rawstring_hashes
345
+ return token(:RAWSTRING, -@buffer).tap { traverse(1 + h) }
226
346
  end
347
+ when *NEWLINES, "\r"
348
+ raise_error "Unexpected NEWLINE in rawstring literal"
349
+ end
350
+
351
+ @buffer << c
352
+ traverse(1)
353
+ when :multiline_rawstring
354
+ raise_error "Unterminated multi-line rawstring literal" if c.nil?
355
+
356
+ if c == '"' && self[@index + 1] == '"' && self[@index + 2] == '"' && self[@index + 3] == '#'
357
+ h = 1
358
+ h += 1 while self[@index + 3 + h] == '#' && h < @rawstring_hashes
227
359
  if h == @rawstring_hashes
228
- return token(:RAWSTRING, @buffer).tap { traverse(1 + h) }
360
+ return token(:RAWSTRING, -dedent(@buffer)).tap { traverse(3 + h) }
229
361
  end
230
362
  end
231
363
 
232
- @buffer += c
364
+ @buffer << c
233
365
  traverse(1)
234
366
  when :decimal
235
367
  case c
236
368
  when /[0-9.\-+_eE]/
237
369
  traverse(1)
238
- @buffer += c
370
+ @buffer << c
239
371
  else
240
372
  return parse_decimal(@buffer)
241
373
  end
@@ -243,7 +375,7 @@ module KDL
243
375
  case c
244
376
  when /[0-9a-fA-F_]/
245
377
  traverse(1)
246
- @buffer += c
378
+ @buffer << c
247
379
  else
248
380
  return parse_hexadecimal(@buffer)
249
381
  end
@@ -251,7 +383,7 @@ module KDL
251
383
  case c
252
384
  when /[0-7_]/
253
385
  traverse(1)
254
- @buffer += c
386
+ @buffer << c
255
387
  else
256
388
  return parse_octal(@buffer)
257
389
  end
@@ -259,26 +391,27 @@ module KDL
259
391
  case c
260
392
  when /[01_]/
261
393
  traverse(1)
262
- @buffer += c
394
+ @buffer << c
263
395
  else
264
396
  return parse_binary(@buffer)
265
397
  end
266
398
  when :single_line_comment
267
- if NEWLINES.include?(c) || c == "\r"
399
+ case c
400
+ when *NEWLINES, "\r"
268
401
  self.context = nil
269
402
  @column_at_start = @column
270
403
  next
271
- elsif c.nil?
404
+ when nil
272
405
  @done = true
273
406
  return token(:EOF, :EOF)
274
407
  else
275
408
  traverse(1)
276
409
  end
277
410
  when :multi_line_comment
278
- if c == '/' && @str[@index + 1] == '*'
411
+ if c == '/' && self[@index + 1] == '*'
279
412
  @comment_nesting += 1
280
413
  traverse(2)
281
- elsif c == '*' && @str[@index + 1] == '/'
414
+ elsif c == '*' && self[@index + 1] == '/'
282
415
  @comment_nesting -= 1
283
416
  traverse(2)
284
417
  if @comment_nesting == 0
@@ -288,16 +421,42 @@ module KDL
288
421
  traverse(1)
289
422
  end
290
423
  when :whitespace
291
- if WHITEPACE.include?(c)
424
+ if WHITESPACE.include?(c)
292
425
  traverse(1)
293
- @buffer += c
294
- elsif c == "/" && @str[@index + 1] == '*'
426
+ @buffer << c
427
+ elsif c == '='
428
+ self.context = :equals
429
+ @buffer << c
430
+ traverse(1)
431
+ elsif c == "/" && self[@index + 1] == '*'
295
432
  self.context = :multi_line_comment
296
433
  @comment_nesting = 1
297
434
  traverse(2)
435
+ elsif c == "\\"
436
+ t = Tokenizer.new(@str, @index + 1)
437
+ la = t.next_token
438
+ if la[0] == :NEWLINE || la[0] == :EOF || (la[0] == :WS && (lan = t.next_token[0]) == :NEWLINE || lan == :EOF)
439
+ traverse_to(t.index)
440
+ @buffer << "#{c}#{la[1].value}"
441
+ @buffer << "\n" if lan == :NEWLINE
442
+ else
443
+ raise_error "Unexpected '\\' (#{la[0]})"
444
+ end
298
445
  else
299
- return token(:WS, @buffer)
446
+ return token(:WS, -@buffer)
300
447
  end
448
+ when :equals
449
+ t = Tokenizer.new(@str, @index)
450
+ la = t.next_token
451
+ if la[0] == :WS
452
+ @buffer << la[1].value
453
+ traverse_to(t.index)
454
+ end
455
+ return token(:EQUALS, -@buffer)
456
+ else
457
+ # :nocov:
458
+ raise_error "Unknown context `#{@context}'"
459
+ # :nocov:
301
460
  end
302
461
  end
303
462
  end
@@ -309,43 +468,69 @@ module KDL
309
468
  end
310
469
 
311
470
  def traverse(n = 1)
312
- @column += n
471
+ n.times do |i|
472
+ case self[@index + i]
473
+ when "\r"
474
+ @column = 1
475
+ when *NEWLINES
476
+ @line += 1
477
+ @column = 1
478
+ else
479
+ @column += 1
480
+ end
481
+ end
313
482
  @index += n
314
483
  end
315
484
 
316
- def raise_error(message)
317
- raise Error.new(message, @line, @column)
485
+ def traverse_to(i)
486
+ traverse(i - @index)
318
487
  end
319
488
 
320
- def new_line
321
- @column = 1
322
- @line += 1
489
+ def raise_error(error)
490
+ case error
491
+ when String then raise Error.new(error, @line, @column)
492
+ when Error then raise error
493
+ else raise Error.new(error.message, @line, @column)
494
+ end
323
495
  end
324
496
 
325
497
  def context=(val)
326
- if @type_context && !ALLOWED_IN_TYPE.include?(val)
498
+ if @type_context && !allowed_in_type?(val)
327
499
  raise_error "#{val} context not allowed in type declaration"
328
- elsif @last_token && @last_token[0] == :RPAREN && NOT_ALLOWED_AFTER_TYPE.include?(val)
500
+ elsif @last_token && @last_token[0] == :RPAREN && !allowed_after_type?(val)
329
501
  raise_error 'Comments are not allowed after a type declaration'
330
502
  end
331
503
  @previous_context = @context
332
504
  @context = val
333
505
  end
334
506
 
507
+ def allowed_in_type?(val)
508
+ %i[ident string rawstring multi_line_comment whitespace].include?(val)
509
+ end
510
+
511
+ def allowed_after_type?(val)
512
+ !%i[single_line_comment].include?(val)
513
+ end
514
+
335
515
  def revert_context
336
516
  @context = @previous_context
337
517
  @previous_context = nil
338
518
  end
339
519
 
340
- def parse_decimal(s)
341
- return parse_float(s) if s =~ /[.E]/i
342
-
343
- token(:INTEGER, Integer(munch_underscores(s), 10), format: '%d')
344
- rescue
345
- if s[0] =~ INITIAL_IDENTIFIER_CHARS && s[1..-1].each_char.all? { |c| c =~ IDENTIFIER_CHARS }
346
- token(:IDENT, s)
520
+ def expect_newline(i = @index)
521
+ c = self[i]
522
+ case c
523
+ when "\r"
524
+ n = self[i + 1]
525
+ if n == "\n"
526
+ "#{c}#{n}"
527
+ else
528
+ c
529
+ end
530
+ when *NEWLINES
531
+ c
347
532
  else
348
- raise
533
+ raise_error "Expected NEWLINE, found '#{c}'"
349
534
  end
350
535
  end
351
536
 
@@ -357,6 +542,18 @@ module KDL
357
542
  end
358
543
  end
359
544
 
545
+ def parse_decimal(s)
546
+ return parse_float(s) if s =~ /[.E]/i
547
+
548
+ token(:INTEGER, Integer(munch_underscores(s), 10), format: '%d')
549
+ rescue => e
550
+ if s[0] =~ INITIAL_IDENTIFIER_CHARS && s[1..-1].each_char.all? { |c| c =~ IDENTIFIER_CHARS }
551
+ token(:IDENT, -s)
552
+ else
553
+ raise_error(e)
554
+ end
555
+ end
556
+
360
557
  def parse_float(s)
361
558
  match, _, fraction, exponent = *s.match(/^([-+]?[\d_]+)(?:\.([\d_]+))?(?:[eE]([-+]?[\d_]+))?$/)
362
559
  raise_error "Invalid floating point value #{s}" if match.nil?
@@ -375,40 +572,94 @@ module KDL
375
572
 
376
573
  def parse_hexadecimal(s)
377
574
  token(:INTEGER, Integer(munch_underscores(s), 16))
575
+ rescue ArgumentError => e
576
+ raise_error(e)
378
577
  end
379
578
 
380
579
  def parse_octal(s)
381
580
  token(:INTEGER, Integer(munch_underscores(s), 8))
581
+ rescue ArgumentError => e
582
+ raise_error(e)
382
583
  end
383
584
 
384
585
  def parse_binary(s)
385
586
  token(:INTEGER, Integer(munch_underscores(s), 2))
587
+ rescue ArgumentError => e
588
+ raise_error(e)
386
589
  end
387
590
 
388
591
  def munch_underscores(s)
389
592
  s.chomp('_').squeeze('_')
390
593
  end
391
594
 
392
- def convert_escapes(string)
393
- string.gsub(/\\[^u]/) do |m|
595
+ def unescape_ws(string)
596
+ string.gsub(/\\(\\|\s+)/) do |m|
394
597
  case m
395
- when '\n' then "\n"
396
- when '\r' then "\r"
397
- when '\t' then "\t"
398
- when '\\\\' then "\\"
399
- when '\"' then "\""
400
- when '\b' then "\b"
401
- when '\f' then "\f"
402
- when '\/' then "/"
403
- else raise_error "Unexpected escape #{m.inspect}"
598
+ when '\\\\' then '\\\\'
599
+ else ''
404
600
  end
405
- end.gsub(/\\u\{[0-9a-fA-F]{0,6}\}/) do |m|
406
- i = Integer(m[3..-2], 16)
407
- if i < 0 || i > 0x10FFFF
408
- raise_error "Invalid code point #{u}"
601
+ end
602
+ end
603
+
604
+ UNESCAPE = /\\(?:[#{WHITESPACE.join}#{NEWLINES.join}\r]+|[^u])/
605
+ UNESCAPE_NON_WS = /\\(?:[^u])/
606
+
607
+ def unescape_non_ws(string)
608
+ unescape(string, UNESCAPE_NON_WS)
609
+ end
610
+
611
+ def unescape(string, rgx = UNESCAPE)
612
+ string
613
+ .gsub(rgx) { |m| replace_esc(m) }
614
+ .gsub(/\\u\{[0-9a-fA-F]{0,6}\}/) do |m|
615
+ i = Integer(m[3..-2], 16)
616
+ if i < 0 || i > 0x10FFFF || (0xD800..0xDFFF).include?(i)
617
+ raise_error "Invalid code point #{m}"
618
+ end
619
+ i.chr(Encoding::UTF_8)
409
620
  end
410
- i.chr(Encoding::UTF_8)
621
+ end
622
+
623
+ def replace_esc(m)
624
+ case m
625
+ when '\n' then "\n"
626
+ when '\r' then "\r"
627
+ when '\t' then "\t"
628
+ when '\\\\' then "\\"
629
+ when '\"' then "\""
630
+ when '\b' then "\b"
631
+ when '\f' then "\f"
632
+ when '\s' then ' '
633
+ when /\\[#{WHITESPACE.join}#{NEWLINES.join}]+/ then ''
634
+ else raise_error "Unexpected escape #{m.inspect}"
411
635
  end
412
636
  end
637
+
638
+ def dedent(string)
639
+ split = string.split(NEWLINES_PATTERN)
640
+ lines = split.partition.with_index { |_, i| i.even? }.first
641
+ if split.last.match?(NEWLINES_PATTERN)
642
+ indent = ""
643
+ else
644
+ *lines, indent = lines
645
+ end
646
+ return "" if lines.empty?
647
+ raise_error "Invalid multiline string final line" unless indent.match?(WS_STAR)
648
+ valid = /\A#{Regexp.escape(indent)}(.*)/
649
+
650
+ lines.map do |line|
651
+ case line
652
+ when WS_STAR then ""
653
+ when valid then $1
654
+ else raise_error "Invalid multiline string indentation"
655
+ end
656
+ end.join("\n")
657
+ end
658
+
659
+ def debom(str)
660
+ return str unless str.start_with?("\uFEFF")
661
+
662
+ str[1..]
663
+ end
413
664
  end
414
665
  end