kdl 1.0.6 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +8 -1
  3. data/.gitignore +1 -0
  4. data/.gitmodules +4 -0
  5. data/Gemfile +6 -1
  6. data/README.md +67 -7
  7. data/Rakefile +6 -1
  8. data/bin/kdl +1 -1
  9. data/kdl.gemspec +2 -2
  10. data/lib/kdl/document.rb +60 -2
  11. data/lib/kdl/error.rb +24 -0
  12. data/lib/kdl/kdl.tab.rb +305 -231
  13. data/lib/kdl/kdl.yy +57 -49
  14. data/lib/kdl/node.rb +116 -13
  15. data/lib/kdl/parser_common.rb +28 -0
  16. data/lib/kdl/string_dumper.rb +32 -33
  17. data/lib/kdl/tokenizer.rb +387 -136
  18. data/lib/kdl/types/base64.rb +3 -1
  19. data/lib/kdl/types/country/iso3166_countries.rb +3 -1
  20. data/lib/kdl/types/country/iso3166_subdivisions.rb +3 -1
  21. data/lib/kdl/types/country.rb +4 -2
  22. data/lib/kdl/types/currency/iso4217_currencies.rb +3 -1
  23. data/lib/kdl/types/currency.rb +3 -1
  24. data/lib/kdl/types/date_time.rb +5 -3
  25. data/lib/kdl/types/decimal.rb +3 -1
  26. data/lib/kdl/types/duration/iso8601_parser.rb +3 -1
  27. data/lib/kdl/types/duration.rb +3 -1
  28. data/lib/kdl/types/email/parser.rb +10 -8
  29. data/lib/kdl/types/email.rb +3 -1
  30. data/lib/kdl/types/hostname/validator.rb +3 -1
  31. data/lib/kdl/types/hostname.rb +3 -1
  32. data/lib/kdl/types/ip.rb +3 -1
  33. data/lib/kdl/types/irl/parser.rb +10 -8
  34. data/lib/kdl/types/irl.rb +3 -1
  35. data/lib/kdl/types/regex.rb +3 -1
  36. data/lib/kdl/types/url.rb +3 -1
  37. data/lib/kdl/types/url_template.rb +6 -4
  38. data/lib/kdl/types/uuid.rb +3 -1
  39. data/lib/kdl/types.rb +2 -0
  40. data/lib/kdl/v1/document.rb +19 -0
  41. data/lib/kdl/v1/kdl.tab.rb +594 -0
  42. data/lib/kdl/v1/kdl.yy +89 -0
  43. data/lib/kdl/v1/node.rb +32 -0
  44. data/lib/kdl/v1/string_dumper.rb +30 -0
  45. data/lib/kdl/v1/tokenizer.rb +298 -0
  46. data/lib/kdl/v1/value.rb +91 -0
  47. data/lib/kdl/v1.rb +13 -0
  48. data/lib/kdl/value.rb +87 -15
  49. data/lib/kdl/version.rb +3 -1
  50. data/lib/kdl.rb +47 -1
  51. metadata +14 -7
data/lib/kdl/tokenizer.rb CHANGED
@@ -1,8 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'bigdecimal'
2
4
 
3
5
  module KDL
4
6
  class Tokenizer
5
- class Error < StandardError
7
+ class Error < ::KDL::Error
6
8
  def initialize(message, line, column)
7
9
  super("#{message} (#{line}:#{column})")
8
10
  end
@@ -36,32 +38,47 @@ module KDL
36
38
  SYMBOLS = {
37
39
  '{' => :LBRACE,
38
40
  '}' => :RBRACE,
39
- '=' => :EQUALS,
40
- '' => :EQUALS,
41
- ';' => :SEMICOLON
41
+ ';' => :SEMICOLON,
42
+ '=' => :EQUALS
42
43
  }
43
44
 
44
- WHITEPACE = ["\u0009", "\u0020", "\u00A0", "\u1680",
45
- "\u2000", "\u2001", "\u2002", "\u2003",
46
- "\u2004", "\u2005", "\u2006", "\u2007",
47
- "\u2008", "\u2009", "\u200A", "\u202F",
48
- "\u205F", "\u3000" ]
45
+ WHITESPACE = ["\u0009", "\u0020", "\u00A0", "\u1680",
46
+ "\u2000", "\u2001", "\u2002", "\u2003",
47
+ "\u2004", "\u2005", "\u2006", "\u2007",
48
+ "\u2008", "\u2009", "\u200A", "\u202F",
49
+ "\u205F", "\u3000"]
50
+ WS = "[#{Regexp.escape(WHITESPACE.join)}]"
51
+ WS_STAR = /\A#{WS}*\z/
52
+ WS_PLUS = /\A#{WS}+\z/
53
+
54
+ NEWLINES = ["\u000A", "\u0085", "\u000B", "\u000C", "\u2028", "\u2029"]
55
+ NEWLINES_PATTERN = Regexp.new("(#{NEWLINES.map{Regexp.escape(_1)}.join('|')}|\r\n?)", Regexp::MULTILINE)
49
56
 
50
- NEWLINES = ["\u000A", "\u0085", "\u000C", "\u2028", "\u2029"]
57
+ OTHER_NON_IDENTIFIER_CHARS = ("\x0".."\x20").to_a - WHITESPACE
51
58
 
52
- NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join('')}()/\\<>[]\","
53
- IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}\x0-\x20]/
54
- INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9\x0-\x20]/
59
+ NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join}()[]/\\\"##{WHITESPACE.join}#{OTHER_NON_IDENTIFIER_CHARS.join}"
60
+ IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}]/
61
+ INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9]/
55
62
 
56
- ALLOWED_IN_TYPE = [:ident, :string, :rawstring]
57
- NOT_ALLOWED_AFTER_TYPE = [:single_line_comment, :multi_line_comment]
63
+ FORBIDDEN = [
64
+ *"\u0000".."\u0008",
65
+ *"\u000E".."\u001F",
66
+ "\u007F",
67
+ *"\u200E".."\u200F",
68
+ *"\u202A".."\u202E",
69
+ *"\u2066".."\u2069",
70
+ "\uFEFF"
71
+ ]
72
+
73
+ VERSION_PATTERN = /\A\/-[#{WHITESPACE.join}]*kdl-version[#{WHITESPACE.join}]+(\d+)[#{WHITESPACE.join}]*[#{NEWLINES.join}]/
58
74
 
59
75
  def initialize(str, start = 0)
60
- @str = str
76
+ @str = debom(str)
61
77
  @context = nil
62
78
  @rawstring_hashes = nil
79
+ @start = start
63
80
  @index = start
64
- @buffer = ""
81
+ @buffer = +""
65
82
  @done = false
66
83
  @previous_context = nil
67
84
  @line = 1
@@ -70,122 +87,175 @@ module KDL
70
87
  @last_token = nil
71
88
  end
72
89
 
90
+ def version_directive
91
+ if m = @str.match(VERSION_PATTERN)
92
+ m[1].to_i
93
+ end
94
+ end
95
+
96
+ def done?
97
+ @done
98
+ end
99
+
100
+ def [](i)
101
+ @str[i].tap do |c|
102
+ raise_error "Forbidden character: #{c.inspect}" if FORBIDDEN.include?(c)
103
+ end
104
+ end
105
+
106
+ def tokens
107
+ a = []
108
+ while !done?
109
+ a << next_token
110
+ end
111
+ a
112
+ end
113
+
73
114
  def next_token
74
115
  @context = nil
75
116
  @previous_context = nil
76
117
  @line_at_start = @line
77
118
  @column_at_start = @column
78
119
  loop do
79
- c = @str[@index]
120
+ c = self[@index]
80
121
  case @context
81
122
  when nil
82
123
  case c
83
124
  when '"'
84
- self.context = :string
85
- @buffer = ''
86
- traverse(1)
87
- when 'r'
88
- if @str[@index + 1] == '"'
89
- self.context = :rawstring
90
- traverse(2)
91
- @rawstring_hashes = 0
92
- @buffer = ''
93
- next
94
- elsif @str[@index + 1] == '#'
95
- i = @index + 1
96
- @rawstring_hashes = 0
97
- while @str[i] == '#'
125
+ if self[@index + 1] == '"' && self[@index + 2] == '"'
126
+ nl = expect_newline(@index + 3)
127
+ self.context = :multiline_string
128
+ @buffer = +''
129
+ traverse(3 + nl.length)
130
+ else
131
+ self.context = :string
132
+ @buffer = +''
133
+ traverse(1)
134
+ end
135
+ when '#'
136
+ if self[@index + 1] == '"'
137
+ if self[@index + 2] == '"' && self[@index + 3] == '"'
138
+ nl = expect_newline(@index + 4)
139
+ self.context = :multiline_rawstring
140
+ @rawstring_hashes = 1
141
+ @buffer = +''
142
+ traverse(4 + nl.length)
143
+ next
144
+ else
145
+ self.context = :rawstring
146
+ traverse(2)
147
+ @rawstring_hashes = 1
148
+ @buffer = +''
149
+ next
150
+ end
151
+ elsif self[@index + 1] == '#'
152
+ i = @index + 2
153
+ @rawstring_hashes = 2
154
+ while self[i] == '#'
98
155
  @rawstring_hashes += 1
99
156
  i += 1
100
157
  end
101
- if @str[i] == '"'
102
- self.context = :rawstring
103
- @index = i + 1
104
- @buffer = ''
105
- next
158
+ if self[i] == '"'
159
+ if self[i + 1] == '"' && self[i + 2] == '"'
160
+ nl = expect_newline(i + 3)
161
+ self.context = :multiline_rawstring
162
+ traverse(@rawstring_hashes + 3 + nl.length)
163
+ @buffer = +''
164
+ next
165
+ else
166
+ self.context = :rawstring
167
+ traverse(@rawstring_hashes + 1)
168
+ @buffer = +''
169
+ next
170
+ end
106
171
  end
107
172
  end
108
- self.context = :ident
109
- @buffer = c
173
+ self.context = :keyword
174
+ @buffer = +c
110
175
  traverse(1)
111
- when /[0-9\-+]/
112
- n = @str[@index + 1]
176
+ when '-'
177
+ n = self[@index + 1]
178
+ if n =~ /[0-9]/
179
+ n2 = self[@index + 2]
180
+ if n == '0' && n2 =~ /[box]/
181
+ self.context = integer_context(n2)
182
+ traverse(3)
183
+ else
184
+ self.context = :decimal
185
+ traverse(1)
186
+ end
187
+ else
188
+ self.context = :ident
189
+ traverse(1)
190
+ end
191
+ @buffer = +c
192
+ when /[0-9+]/
193
+ n = self[@index + 1]
113
194
  if c == '0' && n =~ /[box]/
114
195
  traverse(2)
115
- @buffer = ''
196
+ @buffer = +''
116
197
  self.context = integer_context(n)
117
- elsif c == '-' && n == '0' && (n2 = @str[@index + 2]) =~ /[box]/
118
- traverse(3)
119
- @buffer = '-'
120
- self.context = integer_context(n2)
121
198
  else
122
199
  self.context = :decimal
123
- @buffer = c
200
+ @buffer = +c
124
201
  traverse(1)
125
202
  end
126
203
  when '\\'
127
204
  t = Tokenizer.new(@str, @index + 1)
128
205
  la = t.next_token
129
206
  if la[0] == :NEWLINE || la[0] == :EOF || (la[0] == :WS && (lan = t.next_token[0]) == :NEWLINE || lan == :EOF)
130
- @index = t.index
131
- new_line
132
- return token(:ESCLINE, "\\#{la[1].value}")
207
+ traverse_to(t.index)
208
+ @buffer = "#{c}#{la[1].value}"
209
+ @buffer << "\n" if lan == :NEWLINE
210
+ self.context = :whitespace
133
211
  else
134
212
  raise_error "Unexpected '\\' (#{la[0]})"
135
213
  end
214
+ when '='
215
+ self.context = :equals
216
+ @buffer = +c
217
+ traverse(1)
136
218
  when *SYMBOLS.keys
137
- return token(SYMBOLS[c], c).tap { traverse(1) }
138
- when "\r"
139
- n = @str[@index + 1]
140
- if n == "\n"
141
- return token(:NEWLINE, "#{c}#{n}").tap do
142
- traverse(2)
143
- new_line
144
- end
145
- else
146
- return token(:NEWLINE, c).tap do
147
- traverse(1)
148
- new_line
149
- end
150
- end
151
- when *NEWLINES
152
- return token(:NEWLINE, c).tap do
153
- traverse(1)
154
- new_line
219
+ return token(SYMBOLS[c], -c).tap { traverse(1) }
220
+ when *NEWLINES, "\r"
221
+ nl = expect_newline
222
+ return token(:NEWLINE, -nl).tap do
223
+ traverse(nl.length)
155
224
  end
156
225
  when "/"
157
- if @str[@index + 1] == '/'
226
+ if self[@index + 1] == '/'
158
227
  self.context = :single_line_comment
159
228
  traverse(2)
160
- elsif @str[@index + 1] == '*'
229
+ elsif self[@index + 1] == '*'
161
230
  self.context = :multi_line_comment
162
231
  @comment_nesting = 1
163
232
  traverse(2)
164
- elsif @str[@index + 1] == '-'
233
+ elsif self[@index + 1] == '-'
165
234
  return token(:SLASHDASH, '/-').tap { traverse(2) }
166
235
  else
167
236
  self.context = :ident
168
- @buffer = c
237
+ @buffer = +c
169
238
  traverse(1)
170
239
  end
171
- when *WHITEPACE
240
+ when *WHITESPACE
172
241
  self.context = :whitespace
173
- @buffer = c
242
+ @buffer = +c
174
243
  traverse(1)
175
244
  when nil
176
245
  return [false, token(:EOF, :EOF)[1]] if @done
246
+
177
247
  @done = true
178
248
  return token(:EOF, :EOF)
179
249
  when INITIAL_IDENTIFIER_CHARS
180
250
  self.context = :ident
181
- @buffer = c
251
+ @buffer = +c
182
252
  traverse(1)
183
253
  when '('
184
254
  @type_context = true
185
- return token(:LPAREN, c).tap { traverse(1) }
255
+ return token(:LPAREN, -c).tap { traverse(1) }
186
256
  when ')'
187
257
  @type_context = false
188
- return token(:RPAREN, c).tap { traverse(1) }
258
+ return token(:RPAREN, -c).tap { traverse(1) }
189
259
  else
190
260
  raise_error "Unexpected character #{c.inspect}"
191
261
  end
@@ -193,49 +263,111 @@ module KDL
193
263
  case c
194
264
  when IDENTIFIER_CHARS
195
265
  traverse(1)
196
- @buffer += c
266
+ @buffer << c
267
+ else
268
+ case @buffer
269
+ when 'true', 'false', 'null', 'inf', '-inf', 'nan'
270
+ raise_error "Identifier cannot be a literal"
271
+ when /\A\.\d/
272
+ raise_error "Identifier cannot look like an illegal float"
273
+ else
274
+ return token(:IDENT, -@buffer)
275
+ end
276
+ end
277
+ when :keyword
278
+ case c
279
+ when /[a-z\-]/
280
+ traverse(1)
281
+ @buffer << c
197
282
  else
198
283
  case @buffer
199
- when 'true' then return token(:TRUE, true)
200
- when 'false' then return token(:FALSE, false)
201
- when 'null' then return token(:NULL, nil)
202
- else return token(:IDENT, @buffer)
284
+ when '#true' then return token(:TRUE, true)
285
+ when '#false' then return token(:FALSE, false)
286
+ when '#null' then return token(:NULL, nil)
287
+ when '#inf' then return token(:FLOAT, Float::INFINITY)
288
+ when '#-inf' then return token(:FLOAT, -Float::INFINITY)
289
+ when '#nan' then return token(:FLOAT, Float::NAN)
290
+ else raise_error "Unknown keyword #{@buffer.inspect}"
203
291
  end
204
292
  end
205
293
  when :string
206
294
  case c
207
295
  when '\\'
208
- @buffer += c
209
- @buffer += @str[@index + 1]
210
- traverse(2)
296
+ @buffer << c
297
+ c2 = self[@index + 1]
298
+ @buffer << c2
299
+ if c2.match?(NEWLINES_PATTERN)
300
+ i = 2
301
+ while self[@index + i]&.match?(NEWLINES_PATTERN)
302
+ @buffer << self[@index + i]
303
+ i+=1
304
+ end
305
+ traverse(i)
306
+ else
307
+ traverse(2)
308
+ end
211
309
  when '"'
212
- return token(:STRING, convert_escapes(@buffer)).tap { traverse(1) }
310
+ return token(:STRING, -unescape(@buffer)).tap { traverse(1) }
311
+ when *NEWLINES, "\r"
312
+ raise_error "Unexpected NEWLINE in string literal"
213
313
  when nil
214
314
  raise_error "Unterminated string literal"
215
315
  else
216
- @buffer += c
316
+ @buffer << c
317
+ traverse(1)
318
+ end
319
+ when :multiline_string
320
+ case c
321
+ when '\\'
322
+ @buffer << c
323
+ @buffer << self[@index + 1]
324
+ traverse(2)
325
+ when '"'
326
+ if self[@index + 1] == '"' && self[@index + 2] == '"'
327
+ return token(:STRING, -unescape_non_ws(dedent(unescape_ws(@buffer)))).tap { traverse(3) }
328
+ end
329
+ @buffer << c
330
+ traverse(1)
331
+ when nil
332
+ raise_error "Unterminated multi-line string literal"
333
+ else
334
+ @buffer << c
217
335
  traverse(1)
218
336
  end
219
337
  when :rawstring
220
338
  raise_error "Unterminated rawstring literal" if c.nil?
221
339
 
222
- if c == '"'
340
+ case c
341
+ when '"'
223
342
  h = 0
224
- while @str[@index + 1 + h] == '#' && h < @rawstring_hashes
225
- h += 1
343
+ h += 1 while self[@index + 1 + h] == '#' && h < @rawstring_hashes
344
+ if h == @rawstring_hashes
345
+ return token(:RAWSTRING, -@buffer).tap { traverse(1 + h) }
226
346
  end
347
+ when *NEWLINES, "\r"
348
+ raise_error "Unexpected NEWLINE in rawstring literal"
349
+ end
350
+
351
+ @buffer << c
352
+ traverse(1)
353
+ when :multiline_rawstring
354
+ raise_error "Unterminated multi-line rawstring literal" if c.nil?
355
+
356
+ if c == '"' && self[@index + 1] == '"' && self[@index + 2] == '"' && self[@index + 3] == '#'
357
+ h = 1
358
+ h += 1 while self[@index + 3 + h] == '#' && h < @rawstring_hashes
227
359
  if h == @rawstring_hashes
228
- return token(:RAWSTRING, @buffer).tap { traverse(1 + h) }
360
+ return token(:RAWSTRING, -dedent(@buffer)).tap { traverse(3 + h) }
229
361
  end
230
362
  end
231
363
 
232
- @buffer += c
364
+ @buffer << c
233
365
  traverse(1)
234
366
  when :decimal
235
367
  case c
236
368
  when /[0-9.\-+_eE]/
237
369
  traverse(1)
238
- @buffer += c
370
+ @buffer << c
239
371
  else
240
372
  return parse_decimal(@buffer)
241
373
  end
@@ -243,7 +375,7 @@ module KDL
243
375
  case c
244
376
  when /[0-9a-fA-F_]/
245
377
  traverse(1)
246
- @buffer += c
378
+ @buffer << c
247
379
  else
248
380
  return parse_hexadecimal(@buffer)
249
381
  end
@@ -251,7 +383,7 @@ module KDL
251
383
  case c
252
384
  when /[0-7_]/
253
385
  traverse(1)
254
- @buffer += c
386
+ @buffer << c
255
387
  else
256
388
  return parse_octal(@buffer)
257
389
  end
@@ -259,26 +391,27 @@ module KDL
259
391
  case c
260
392
  when /[01_]/
261
393
  traverse(1)
262
- @buffer += c
394
+ @buffer << c
263
395
  else
264
396
  return parse_binary(@buffer)
265
397
  end
266
398
  when :single_line_comment
267
- if NEWLINES.include?(c) || c == "\r"
399
+ case c
400
+ when *NEWLINES, "\r"
268
401
  self.context = nil
269
402
  @column_at_start = @column
270
403
  next
271
- elsif c.nil?
404
+ when nil
272
405
  @done = true
273
406
  return token(:EOF, :EOF)
274
407
  else
275
408
  traverse(1)
276
409
  end
277
410
  when :multi_line_comment
278
- if c == '/' && @str[@index + 1] == '*'
411
+ if c == '/' && self[@index + 1] == '*'
279
412
  @comment_nesting += 1
280
413
  traverse(2)
281
- elsif c == '*' && @str[@index + 1] == '/'
414
+ elsif c == '*' && self[@index + 1] == '/'
282
415
  @comment_nesting -= 1
283
416
  traverse(2)
284
417
  if @comment_nesting == 0
@@ -288,16 +421,42 @@ module KDL
288
421
  traverse(1)
289
422
  end
290
423
  when :whitespace
291
- if WHITEPACE.include?(c)
424
+ if WHITESPACE.include?(c)
292
425
  traverse(1)
293
- @buffer += c
294
- elsif c == "/" && @str[@index + 1] == '*'
426
+ @buffer << c
427
+ elsif c == '='
428
+ self.context = :equals
429
+ @buffer << c
430
+ traverse(1)
431
+ elsif c == "/" && self[@index + 1] == '*'
295
432
  self.context = :multi_line_comment
296
433
  @comment_nesting = 1
297
434
  traverse(2)
435
+ elsif c == "\\"
436
+ t = Tokenizer.new(@str, @index + 1)
437
+ la = t.next_token
438
+ if la[0] == :NEWLINE || la[0] == :EOF || (la[0] == :WS && (lan = t.next_token[0]) == :NEWLINE || lan == :EOF)
439
+ traverse_to(t.index)
440
+ @buffer << "#{c}#{la[1].value}"
441
+ @buffer << "\n" if lan == :NEWLINE
442
+ else
443
+ raise_error "Unexpected '\\' (#{la[0]})"
444
+ end
298
445
  else
299
- return token(:WS, @buffer)
446
+ return token(:WS, -@buffer)
300
447
  end
448
+ when :equals
449
+ t = Tokenizer.new(@str, @index)
450
+ la = t.next_token
451
+ if la[0] == :WS
452
+ @buffer << la[1].value
453
+ traverse_to(t.index)
454
+ end
455
+ return token(:EQUALS, -@buffer)
456
+ else
457
+ # :nocov:
458
+ raise_error "Unknown context `#{@context}'"
459
+ # :nocov:
301
460
  end
302
461
  end
303
462
  end
@@ -309,43 +468,69 @@ module KDL
309
468
  end
310
469
 
311
470
  def traverse(n = 1)
312
- @column += n
471
+ n.times do |i|
472
+ case self[@index + i]
473
+ when "\r"
474
+ @column = 1
475
+ when *NEWLINES
476
+ @line += 1
477
+ @column = 1
478
+ else
479
+ @column += 1
480
+ end
481
+ end
313
482
  @index += n
314
483
  end
315
484
 
316
- def raise_error(message)
317
- raise Error.new(message, @line, @column)
485
+ def traverse_to(i)
486
+ traverse(i - @index)
318
487
  end
319
488
 
320
- def new_line
321
- @column = 1
322
- @line += 1
489
+ def raise_error(error)
490
+ case error
491
+ when String then raise Error.new(error, @line, @column)
492
+ when Error then raise error
493
+ else raise Error.new(error.message, @line, @column)
494
+ end
323
495
  end
324
496
 
325
497
  def context=(val)
326
- if @type_context && !ALLOWED_IN_TYPE.include?(val)
498
+ if @type_context && !allowed_in_type?(val)
327
499
  raise_error "#{val} context not allowed in type declaration"
328
- elsif @last_token && @last_token[0] == :RPAREN && NOT_ALLOWED_AFTER_TYPE.include?(val)
500
+ elsif @last_token && @last_token[0] == :RPAREN && !allowed_after_type?(val)
329
501
  raise_error 'Comments are not allowed after a type declaration'
330
502
  end
331
503
  @previous_context = @context
332
504
  @context = val
333
505
  end
334
506
 
507
+ def allowed_in_type?(val)
508
+ %i[ident string rawstring multi_line_comment whitespace].include?(val)
509
+ end
510
+
511
+ def allowed_after_type?(val)
512
+ !%i[single_line_comment].include?(val)
513
+ end
514
+
335
515
  def revert_context
336
516
  @context = @previous_context
337
517
  @previous_context = nil
338
518
  end
339
519
 
340
- def parse_decimal(s)
341
- return parse_float(s) if s =~ /[.E]/i
342
-
343
- token(:INTEGER, Integer(munch_underscores(s), 10), format: '%d')
344
- rescue
345
- if s[0] =~ INITIAL_IDENTIFIER_CHARS && s[1..-1].each_char.all? { |c| c =~ IDENTIFIER_CHARS }
346
- token(:IDENT, s)
520
+ def expect_newline(i = @index)
521
+ c = self[i]
522
+ case c
523
+ when "\r"
524
+ n = self[i + 1]
525
+ if n == "\n"
526
+ "#{c}#{n}"
527
+ else
528
+ c
529
+ end
530
+ when *NEWLINES
531
+ c
347
532
  else
348
- raise
533
+ raise_error "Expected NEWLINE, found '#{c}'"
349
534
  end
350
535
  end
351
536
 
@@ -357,6 +542,18 @@ module KDL
357
542
  end
358
543
  end
359
544
 
545
+ def parse_decimal(s)
546
+ return parse_float(s) if s =~ /[.E]/i
547
+
548
+ token(:INTEGER, Integer(munch_underscores(s), 10), format: '%d')
549
+ rescue => e
550
+ if s[0] =~ INITIAL_IDENTIFIER_CHARS && s[1..-1].each_char.all? { |c| c =~ IDENTIFIER_CHARS }
551
+ token(:IDENT, -s)
552
+ else
553
+ raise_error(e)
554
+ end
555
+ end
556
+
360
557
  def parse_float(s)
361
558
  match, _, fraction, exponent = *s.match(/^([-+]?[\d_]+)(?:\.([\d_]+))?(?:[eE]([-+]?[\d_]+))?$/)
362
559
  raise_error "Invalid floating point value #{s}" if match.nil?
@@ -375,40 +572,94 @@ module KDL
375
572
 
376
573
  def parse_hexadecimal(s)
377
574
  token(:INTEGER, Integer(munch_underscores(s), 16))
575
+ rescue ArgumentError => e
576
+ raise_error(e)
378
577
  end
379
578
 
380
579
  def parse_octal(s)
381
580
  token(:INTEGER, Integer(munch_underscores(s), 8))
581
+ rescue ArgumentError => e
582
+ raise_error(e)
382
583
  end
383
584
 
384
585
  def parse_binary(s)
385
586
  token(:INTEGER, Integer(munch_underscores(s), 2))
587
+ rescue ArgumentError => e
588
+ raise_error(e)
386
589
  end
387
590
 
388
591
  def munch_underscores(s)
389
592
  s.chomp('_').squeeze('_')
390
593
  end
391
594
 
392
- def convert_escapes(string)
393
- string.gsub(/\\[^u]/) do |m|
595
+ def unescape_ws(string)
596
+ string.gsub(/\\(\\|\s+)/) do |m|
394
597
  case m
395
- when '\n' then "\n"
396
- when '\r' then "\r"
397
- when '\t' then "\t"
398
- when '\\\\' then "\\"
399
- when '\"' then "\""
400
- when '\b' then "\b"
401
- when '\f' then "\f"
402
- when '\/' then "/"
403
- else raise_error "Unexpected escape #{m.inspect}"
598
+ when '\\\\' then '\\\\'
599
+ else ''
404
600
  end
405
- end.gsub(/\\u\{[0-9a-fA-F]{0,6}\}/) do |m|
406
- i = Integer(m[3..-2], 16)
407
- if i < 0 || i > 0x10FFFF
408
- raise_error "Invalid code point #{u}"
601
+ end
602
+ end
603
+
604
+ UNESCAPE = /\\(?:[#{WHITESPACE.join}#{NEWLINES.join}\r]+|[^u])/
605
+ UNESCAPE_NON_WS = /\\(?:[^u])/
606
+
607
+ def unescape_non_ws(string)
608
+ unescape(string, UNESCAPE_NON_WS)
609
+ end
610
+
611
+ def unescape(string, rgx = UNESCAPE)
612
+ string
613
+ .gsub(rgx) { |m| replace_esc(m) }
614
+ .gsub(/\\u\{[0-9a-fA-F]{0,6}\}/) do |m|
615
+ i = Integer(m[3..-2], 16)
616
+ if i < 0 || i > 0x10FFFF || (0xD800..0xDFFF).include?(i)
617
+ raise_error "Invalid code point #{m}"
618
+ end
619
+ i.chr(Encoding::UTF_8)
409
620
  end
410
- i.chr(Encoding::UTF_8)
621
+ end
622
+
623
+ def replace_esc(m)
624
+ case m
625
+ when '\n' then "\n"
626
+ when '\r' then "\r"
627
+ when '\t' then "\t"
628
+ when '\\\\' then "\\"
629
+ when '\"' then "\""
630
+ when '\b' then "\b"
631
+ when '\f' then "\f"
632
+ when '\s' then ' '
633
+ when /\\[#{WHITESPACE.join}#{NEWLINES.join}]+/ then ''
634
+ else raise_error "Unexpected escape #{m.inspect}"
411
635
  end
412
636
  end
637
+
638
+ def dedent(string)
639
+ split = string.split(NEWLINES_PATTERN)
640
+ lines = split.partition.with_index { |_, i| i.even? }.first
641
+ if split.last.match?(NEWLINES_PATTERN)
642
+ indent = ""
643
+ else
644
+ *lines, indent = lines
645
+ end
646
+ return "" if lines.empty?
647
+ raise_error "Invalid multiline string final line" unless indent.match?(WS_STAR)
648
+ valid = /\A#{Regexp.escape(indent)}(.*)/
649
+
650
+ lines.map do |line|
651
+ case line
652
+ when WS_STAR then ""
653
+ when valid then $1
654
+ else raise_error "Invalid multiline string indentation"
655
+ end
656
+ end.join("\n")
657
+ end
658
+
659
+ def debom(str)
660
+ return str unless str.start_with?("\uFEFF")
661
+
662
+ str[1..]
663
+ end
413
664
  end
414
665
  end