kdl 1.0.6 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +7 -1
  3. data/.gitignore +1 -0
  4. data/.gitmodules +4 -0
  5. data/Gemfile +6 -1
  6. data/README.md +51 -7
  7. data/Rakefile +6 -1
  8. data/bin/kdl +1 -1
  9. data/kdl.gemspec +2 -2
  10. data/lib/kdl/document.rb +58 -2
  11. data/lib/kdl/kdl.tab.rb +303 -228
  12. data/lib/kdl/kdl.yy +57 -49
  13. data/lib/kdl/node.rb +113 -12
  14. data/lib/kdl/parser_common.rb +26 -0
  15. data/lib/kdl/string_dumper.rb +30 -33
  16. data/lib/kdl/tokenizer.rb +350 -113
  17. data/lib/kdl/types/base64.rb +1 -1
  18. data/lib/kdl/types/country/iso3166_countries.rb +1 -1
  19. data/lib/kdl/types/country/iso3166_subdivisions.rb +1 -1
  20. data/lib/kdl/types/country.rb +2 -2
  21. data/lib/kdl/types/currency/iso4217_currencies.rb +1 -1
  22. data/lib/kdl/types/currency.rb +1 -1
  23. data/lib/kdl/types/date_time.rb +3 -3
  24. data/lib/kdl/types/decimal.rb +1 -1
  25. data/lib/kdl/types/duration/iso8601_parser.rb +1 -1
  26. data/lib/kdl/types/duration.rb +1 -1
  27. data/lib/kdl/types/email/parser.rb +1 -1
  28. data/lib/kdl/types/email.rb +1 -1
  29. data/lib/kdl/types/hostname/validator.rb +1 -1
  30. data/lib/kdl/types/hostname.rb +1 -1
  31. data/lib/kdl/types/ip.rb +1 -1
  32. data/lib/kdl/types/irl/parser.rb +1 -1
  33. data/lib/kdl/types/irl.rb +1 -1
  34. data/lib/kdl/types/regex.rb +1 -1
  35. data/lib/kdl/types/url.rb +1 -1
  36. data/lib/kdl/types/url_template.rb +1 -1
  37. data/lib/kdl/types/uuid.rb +1 -1
  38. data/lib/kdl/v1/document.rb +17 -0
  39. data/lib/kdl/v1/kdl.tab.rb +594 -0
  40. data/lib/kdl/v1/kdl.yy +89 -0
  41. data/lib/kdl/v1/node.rb +30 -0
  42. data/lib/kdl/v1/string_dumper.rb +28 -0
  43. data/lib/kdl/v1/tokenizer.rb +296 -0
  44. data/lib/kdl/v1/value.rb +89 -0
  45. data/lib/kdl/v1.rb +11 -0
  46. data/lib/kdl/value.rb +81 -12
  47. data/lib/kdl/version.rb +1 -1
  48. data/lib/kdl.rb +40 -1
  49. metadata +13 -4
data/lib/kdl/tokenizer.rb CHANGED
@@ -36,30 +36,43 @@ module KDL
36
36
  SYMBOLS = {
37
37
  '{' => :LBRACE,
38
38
  '}' => :RBRACE,
39
- '=' => :EQUALS,
40
- '' => :EQUALS,
41
- ';' => :SEMICOLON
39
+ ';' => :SEMICOLON,
40
+ '=' => :EQUALS
42
41
  }
43
42
 
44
- WHITEPACE = ["\u0009", "\u0020", "\u00A0", "\u1680",
45
- "\u2000", "\u2001", "\u2002", "\u2003",
46
- "\u2004", "\u2005", "\u2006", "\u2007",
47
- "\u2008", "\u2009", "\u200A", "\u202F",
48
- "\u205F", "\u3000" ]
43
+ WHITESPACE = ["\u0009", "\u000B", "\u0020", "\u00A0",
44
+ "\u1680", "\u2000", "\u2001", "\u2002",
45
+ "\u2003", "\u2004", "\u2005", "\u2006",
46
+ "\u2007", "\u2008", "\u2009", "\u200A",
47
+ "\u202F", "\u205F", "\u3000" ]
48
+ WS = "[#{Regexp.escape(WHITESPACE.join)}]"
49
+ WS_STAR = /\A#{WS}*\z/
50
+ WS_PLUS = /\A#{WS}+\z/
49
51
 
50
52
  NEWLINES = ["\u000A", "\u0085", "\u000C", "\u2028", "\u2029"]
53
+ NEWLINES_PATTERN = Regexp.new("(#{NEWLINES.map{Regexp.escape(_1)}.join('|')}|\r\n?)", Regexp::MULTILINE)
51
54
 
52
- NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join('')}()/\\<>[]\","
53
- IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}\x0-\x20]/
54
- INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9\x0-\x20]/
55
+ OTHER_NON_IDENTIFIER_CHARS = ("\x0".."\x20").to_a - WHITESPACE
55
56
 
56
- ALLOWED_IN_TYPE = [:ident, :string, :rawstring]
57
- NOT_ALLOWED_AFTER_TYPE = [:single_line_comment, :multi_line_comment]
57
+ NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join}()[]/\\\"##{WHITESPACE.join}#{OTHER_NON_IDENTIFIER_CHARS.join}"
58
+ IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}]/
59
+ INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9]/
60
+
61
+ FORBIDDEN = [
62
+ *"\u0000".."\u0008",
63
+ *"\u000E".."\u001F",
64
+ "\u007F",
65
+ *"\u200E".."\u200F",
66
+ *"\u202A".."\u202E",
67
+ *"\u2066".."\u2069",
68
+ "\uFEFF"
69
+ ]
58
70
 
59
71
  def initialize(str, start = 0)
60
- @str = str
72
+ @str = debom(str)
61
73
  @context = nil
62
74
  @rawstring_hashes = nil
75
+ @start = start
63
76
  @index = start
64
77
  @buffer = ""
65
78
  @done = false
@@ -70,54 +83,114 @@ module KDL
70
83
  @last_token = nil
71
84
  end
72
85
 
86
+ def version_directive
87
+ if m = @str.match(/\A\/-[#{WHITESPACE.join}]*kdl-version[#{WHITESPACE.join}]+(\d+)\s*[#{NEWLINES.join}]/)
88
+ m[1].to_i
89
+ end
90
+ end
91
+
92
+ def done?
93
+ @done
94
+ end
95
+
96
+ def [](i)
97
+ @str[i].tap do |c|
98
+ raise_error "Forbidden character: #{c.inspect}" if FORBIDDEN.include?(c)
99
+ end
100
+ end
101
+
102
+ def tokens
103
+ a = []
104
+ while !done?
105
+ a << next_token
106
+ end
107
+ a
108
+ end
109
+
73
110
  def next_token
74
111
  @context = nil
75
112
  @previous_context = nil
76
113
  @line_at_start = @line
77
114
  @column_at_start = @column
78
115
  loop do
79
- c = @str[@index]
116
+ c = self[@index]
80
117
  case @context
81
118
  when nil
82
119
  case c
83
120
  when '"'
84
- self.context = :string
85
- @buffer = ''
86
- traverse(1)
87
- when 'r'
88
- if @str[@index + 1] == '"'
89
- self.context = :rawstring
90
- traverse(2)
91
- @rawstring_hashes = 0
121
+ if self[@index + 1] == '"' && self[@index + 2] == '"'
122
+ nl = expect_newline(@index + 3)
123
+ self.context = :multiline_string
92
124
  @buffer = ''
93
- next
94
- elsif @str[@index + 1] == '#'
95
- i = @index + 1
96
- @rawstring_hashes = 0
97
- while @str[i] == '#'
98
- @rawstring_hashes += 1
99
- i += 1
100
- end
101
- if @str[i] == '"'
125
+ traverse(3 + nl.length)
126
+ else
127
+ self.context = :string
128
+ @buffer = ''
129
+ traverse(1)
130
+ end
131
+ when '#'
132
+ if self[@index + 1] == '"'
133
+ if self[@index + 2] == '"' && self[@index + 3] == '"'
134
+ nl = expect_newline(@index + 4)
135
+ self.context = :multiline_rawstring
136
+ @rawstring_hashes = 1
137
+ @buffer = ''
138
+ traverse(4 + nl.length)
139
+ next
140
+ else
102
141
  self.context = :rawstring
103
- @index = i + 1
142
+ traverse(2)
143
+ @rawstring_hashes = 1
104
144
  @buffer = ''
105
145
  next
106
146
  end
147
+ elsif self[@index + 1] == '#'
148
+ i = @index + 2
149
+ @rawstring_hashes = 2
150
+ while self[i] == '#'
151
+ @rawstring_hashes += 1
152
+ i += 1
153
+ end
154
+ if self[i] == '"'
155
+ if self[i + 1] == '"' && self[i + 2] == '"'
156
+ nl = expect_newline(i + 3)
157
+ self.context = :multiline_rawstring
158
+ traverse(@rawstring_hashes + 3 + nl.length)
159
+ @buffer = ''
160
+ next
161
+ else
162
+ self.context = :rawstring
163
+ traverse(@rawstring_hashes + 1)
164
+ @buffer = ''
165
+ next
166
+ end
167
+ end
107
168
  end
108
- self.context = :ident
169
+ self.context = :keyword
109
170
  @buffer = c
110
171
  traverse(1)
111
- when /[0-9\-+]/
112
- n = @str[@index + 1]
172
+ when '-'
173
+ n = self[@index + 1]
174
+ if n =~ /[0-9]/
175
+ n2 = self[@index + 2]
176
+ if n == '0' && n2 =~ /[box]/
177
+ self.context = integer_context(n2)
178
+ traverse(3)
179
+ else
180
+ self.context = :decimal
181
+ traverse(1)
182
+ end
183
+ else
184
+ self.context = :ident
185
+ traverse(1)
186
+ end
187
+ @buffer = c
188
+ when /[0-9+]/
189
+ n = self[@index + 1]
113
190
  if c == '0' && n =~ /[box]/
114
191
  traverse(2)
115
192
  @buffer = ''
116
193
  self.context = integer_context(n)
117
- elsif c == '-' && n == '0' && (n2 = @str[@index + 2]) =~ /[box]/
118
- traverse(3)
119
- @buffer = '-'
120
- self.context = integer_context(n2)
121
194
  else
122
195
  self.context = :decimal
123
196
  @buffer = c
@@ -127,53 +200,46 @@ module KDL
127
200
  t = Tokenizer.new(@str, @index + 1)
128
201
  la = t.next_token
129
202
  if la[0] == :NEWLINE || la[0] == :EOF || (la[0] == :WS && (lan = t.next_token[0]) == :NEWLINE || lan == :EOF)
130
- @index = t.index
131
- new_line
132
- return token(:ESCLINE, "\\#{la[1].value}")
203
+ traverse_to(t.index)
204
+ @buffer = "#{c}#{la[1].value}"
205
+ @buffer += "\n" if lan == :NEWLINE
206
+ self.context = :whitespace
133
207
  else
134
208
  raise_error "Unexpected '\\' (#{la[0]})"
135
209
  end
210
+ when '='
211
+ self.context = :equals
212
+ @buffer = c
213
+ traverse(1)
136
214
  when *SYMBOLS.keys
137
215
  return token(SYMBOLS[c], c).tap { traverse(1) }
138
- when "\r"
139
- n = @str[@index + 1]
140
- if n == "\n"
141
- return token(:NEWLINE, "#{c}#{n}").tap do
142
- traverse(2)
143
- new_line
144
- end
145
- else
146
- return token(:NEWLINE, c).tap do
147
- traverse(1)
148
- new_line
149
- end
150
- end
151
- when *NEWLINES
152
- return token(:NEWLINE, c).tap do
153
- traverse(1)
154
- new_line
216
+ when *NEWLINES, "\r"
217
+ nl = expect_newline
218
+ return token(:NEWLINE, nl).tap do
219
+ traverse(nl.length)
155
220
  end
156
221
  when "/"
157
- if @str[@index + 1] == '/'
222
+ if self[@index + 1] == '/'
158
223
  self.context = :single_line_comment
159
224
  traverse(2)
160
- elsif @str[@index + 1] == '*'
225
+ elsif self[@index + 1] == '*'
161
226
  self.context = :multi_line_comment
162
227
  @comment_nesting = 1
163
228
  traverse(2)
164
- elsif @str[@index + 1] == '-'
229
+ elsif self[@index + 1] == '-'
165
230
  return token(:SLASHDASH, '/-').tap { traverse(2) }
166
231
  else
167
232
  self.context = :ident
168
233
  @buffer = c
169
234
  traverse(1)
170
235
  end
171
- when *WHITEPACE
236
+ when *WHITESPACE
172
237
  self.context = :whitespace
173
238
  @buffer = c
174
239
  traverse(1)
175
240
  when nil
176
241
  return [false, token(:EOF, :EOF)[1]] if @done
242
+
177
243
  @done = true
178
244
  return token(:EOF, :EOF)
179
245
  when INITIAL_IDENTIFIER_CHARS
@@ -196,37 +262,99 @@ module KDL
196
262
  @buffer += c
197
263
  else
198
264
  case @buffer
199
- when 'true' then return token(:TRUE, true)
200
- when 'false' then return token(:FALSE, false)
201
- when 'null' then return token(:NULL, nil)
202
- else return token(:IDENT, @buffer)
265
+ when 'true', 'false', 'null', 'inf', '-inf', 'nan'
266
+ raise_error "Identifier cannot be a literal"
267
+ when /\A\.\d/
268
+ raise_error "Identifier cannot look like an illegal float"
269
+ else
270
+ return token(:IDENT, @buffer)
271
+ end
272
+ end
273
+ when :keyword
274
+ case c
275
+ when /[a-z\-]/
276
+ traverse(1)
277
+ @buffer += c
278
+ else
279
+ case @buffer
280
+ when '#true' then return token(:TRUE, true)
281
+ when '#false' then return token(:FALSE, false)
282
+ when '#null' then return token(:NULL, nil)
283
+ when '#inf' then return token(:FLOAT, Float::INFINITY)
284
+ when '#-inf' then return token(:FLOAT, -Float::INFINITY)
285
+ when '#nan' then return token(:FLOAT, Float::NAN)
286
+ else raise_error "Unknown keyword #{@buffer.inspect}"
203
287
  end
204
288
  end
205
289
  when :string
206
290
  case c
207
291
  when '\\'
208
292
  @buffer += c
209
- @buffer += @str[@index + 1]
210
- traverse(2)
293
+ c2 = self[@index + 1]
294
+ @buffer += c2
295
+ if c2.match?(NEWLINES_PATTERN)
296
+ i = 2
297
+ while self[@index + i]&.match?(NEWLINES_PATTERN)
298
+ @buffer += self[@index + i]
299
+ i+=1
300
+ end
301
+ traverse(i)
302
+ else
303
+ traverse(2)
304
+ end
211
305
  when '"'
212
- return token(:STRING, convert_escapes(@buffer)).tap { traverse(1) }
306
+ return token(:STRING, unescape(@buffer)).tap { traverse(1) }
307
+ when *NEWLINES, "\r"
308
+ raise_error "Unexpected NEWLINE in string literal"
213
309
  when nil
214
310
  raise_error "Unterminated string literal"
215
311
  else
216
312
  @buffer += c
217
313
  traverse(1)
218
314
  end
315
+ when :multiline_string
316
+ case c
317
+ when '\\'
318
+ @buffer += c
319
+ @buffer += self[@index + 1]
320
+ traverse(2)
321
+ when '"'
322
+ if self[@index + 1] == '"' && self[@index + 2] == '"'
323
+ return token(:STRING, unescape_non_ws(dedent(unescape_ws(@buffer)))).tap { traverse(3) }
324
+ end
325
+ @buffer += c
326
+ traverse(1)
327
+ when nil
328
+ raise_error "Unterminated multi-line string literal"
329
+ else
330
+ @buffer += c
331
+ traverse(1)
332
+ end
219
333
  when :rawstring
220
334
  raise_error "Unterminated rawstring literal" if c.nil?
221
335
 
222
- if c == '"'
336
+ case c
337
+ when '"'
223
338
  h = 0
224
- while @str[@index + 1 + h] == '#' && h < @rawstring_hashes
225
- h += 1
226
- end
339
+ h += 1 while self[@index + 1 + h] == '#' && h < @rawstring_hashes
227
340
  if h == @rawstring_hashes
228
341
  return token(:RAWSTRING, @buffer).tap { traverse(1 + h) }
229
342
  end
343
+ when *NEWLINES, "\r"
344
+ raise_error "Unexpected NEWLINE in rawstring literal"
345
+ end
346
+
347
+ @buffer += c
348
+ traverse(1)
349
+ when :multiline_rawstring
350
+ raise_error "Unterminated multi-line rawstring literal" if c.nil?
351
+
352
+ if c == '"' && self[@index + 1] == '"' && self[@index + 2] == '"' && self[@index + 3] == '#'
353
+ h = 1
354
+ h += 1 while self[@index + 3 + h] == '#' && h < @rawstring_hashes
355
+ if h == @rawstring_hashes
356
+ return token(:RAWSTRING, dedent(@buffer)).tap { traverse(3 + h) }
357
+ end
230
358
  end
231
359
 
232
360
  @buffer += c
@@ -264,21 +392,22 @@ module KDL
264
392
  return parse_binary(@buffer)
265
393
  end
266
394
  when :single_line_comment
267
- if NEWLINES.include?(c) || c == "\r"
395
+ case c
396
+ when *NEWLINES, "\r"
268
397
  self.context = nil
269
398
  @column_at_start = @column
270
399
  next
271
- elsif c.nil?
400
+ when nil
272
401
  @done = true
273
402
  return token(:EOF, :EOF)
274
403
  else
275
404
  traverse(1)
276
405
  end
277
406
  when :multi_line_comment
278
- if c == '/' && @str[@index + 1] == '*'
407
+ if c == '/' && self[@index + 1] == '*'
279
408
  @comment_nesting += 1
280
409
  traverse(2)
281
- elsif c == '*' && @str[@index + 1] == '/'
410
+ elsif c == '*' && self[@index + 1] == '/'
282
411
  @comment_nesting -= 1
283
412
  traverse(2)
284
413
  if @comment_nesting == 0
@@ -288,16 +417,42 @@ module KDL
288
417
  traverse(1)
289
418
  end
290
419
  when :whitespace
291
- if WHITEPACE.include?(c)
420
+ if WHITESPACE.include?(c)
292
421
  traverse(1)
293
422
  @buffer += c
294
- elsif c == "/" && @str[@index + 1] == '*'
423
+ elsif c == '='
424
+ self.context = :equals
425
+ @buffer += c
426
+ traverse(1)
427
+ elsif c == "/" && self[@index + 1] == '*'
295
428
  self.context = :multi_line_comment
296
429
  @comment_nesting = 1
297
430
  traverse(2)
431
+ elsif c == "\\"
432
+ t = Tokenizer.new(@str, @index + 1)
433
+ la = t.next_token
434
+ if la[0] == :NEWLINE || la[0] == :EOF || (la[0] == :WS && (lan = t.next_token[0]) == :NEWLINE || lan == :EOF)
435
+ traverse_to(t.index)
436
+ @buffer += "#{c}#{la[1].value}"
437
+ @buffer += "\n" if lan == :NEWLINE
438
+ else
439
+ raise_error "Unexpected '\\' (#{la[0]})"
440
+ end
298
441
  else
299
442
  return token(:WS, @buffer)
300
443
  end
444
+ when :equals
445
+ t = Tokenizer.new(@str, @index)
446
+ la = t.next_token
447
+ if la[0] == :WS
448
+ @buffer += la[1].value
449
+ traverse_to(t.index)
450
+ end
451
+ return token(:EQUALS, @buffer)
452
+ else
453
+ # :nocov:
454
+ raise_error "Unknown context `#{@context}'"
455
+ # :nocov:
301
456
  end
302
457
  end
303
458
  end
@@ -309,43 +464,65 @@ module KDL
309
464
  end
310
465
 
311
466
  def traverse(n = 1)
312
- @column += n
467
+ n.times do |i|
468
+ case self[@index + i]
469
+ when "\r"
470
+ @column = 1
471
+ when *NEWLINES
472
+ @line += 1
473
+ @column = 1
474
+ else
475
+ @column += 1
476
+ end
477
+ end
313
478
  @index += n
314
479
  end
315
480
 
316
- def raise_error(message)
317
- raise Error.new(message, @line, @column)
481
+ def traverse_to(i)
482
+ traverse(i - @index)
318
483
  end
319
484
 
320
- def new_line
321
- @column = 1
322
- @line += 1
485
+ def raise_error(message)
486
+ raise Error.new(message, @line, @column)
323
487
  end
324
488
 
325
489
  def context=(val)
326
- if @type_context && !ALLOWED_IN_TYPE.include?(val)
490
+ if @type_context && !allowed_in_type?(val)
327
491
  raise_error "#{val} context not allowed in type declaration"
328
- elsif @last_token && @last_token[0] == :RPAREN && NOT_ALLOWED_AFTER_TYPE.include?(val)
492
+ elsif @last_token && @last_token[0] == :RPAREN && !allowed_after_type?(val)
329
493
  raise_error 'Comments are not allowed after a type declaration'
330
494
  end
331
495
  @previous_context = @context
332
496
  @context = val
333
497
  end
334
498
 
499
+ def allowed_in_type?(val)
500
+ %i[ident string rawstring multi_line_comment whitespace].include?(val)
501
+ end
502
+
503
+ def allowed_after_type?(val)
504
+ !%i[single_line_comment].include?(val)
505
+ end
506
+
335
507
  def revert_context
336
508
  @context = @previous_context
337
509
  @previous_context = nil
338
510
  end
339
511
 
340
- def parse_decimal(s)
341
- return parse_float(s) if s =~ /[.E]/i
342
-
343
- token(:INTEGER, Integer(munch_underscores(s), 10), format: '%d')
344
- rescue
345
- if s[0] =~ INITIAL_IDENTIFIER_CHARS && s[1..-1].each_char.all? { |c| c =~ IDENTIFIER_CHARS }
346
- token(:IDENT, s)
512
+ def expect_newline(i = @index)
513
+ c = self[i]
514
+ case c
515
+ when "\r"
516
+ n = self[i + 1]
517
+ if n == "\n"
518
+ "#{c}#{n}"
519
+ else
520
+ c
521
+ end
522
+ when *NEWLINES
523
+ c
347
524
  else
348
- raise
525
+ raise_error "Expected NEWLINE, found '#{c}'"
349
526
  end
350
527
  end
351
528
 
@@ -357,6 +534,18 @@ module KDL
357
534
  end
358
535
  end
359
536
 
537
+ def parse_decimal(s)
538
+ return parse_float(s) if s =~ /[.E]/i
539
+
540
+ token(:INTEGER, Integer(munch_underscores(s), 10), format: '%d')
541
+ rescue
542
+ if s[0] =~ INITIAL_IDENTIFIER_CHARS && s[1..-1].each_char.all? { |c| c =~ IDENTIFIER_CHARS }
543
+ token(:IDENT, s)
544
+ else
545
+ raise
546
+ end
547
+ end
548
+
360
549
  def parse_float(s)
361
550
  match, _, fraction, exponent = *s.match(/^([-+]?[\d_]+)(?:\.([\d_]+))?(?:[eE]([-+]?[\d_]+))?$/)
362
551
  raise_error "Invalid floating point value #{s}" if match.nil?
@@ -389,26 +578,74 @@ module KDL
389
578
  s.chomp('_').squeeze('_')
390
579
  end
391
580
 
392
- def convert_escapes(string)
393
- string.gsub(/\\[^u]/) do |m|
581
+ def unescape_ws(string)
582
+ string.gsub(/\\(\\|\s+)/) do |m|
394
583
  case m
395
- when '\n' then "\n"
396
- when '\r' then "\r"
397
- when '\t' then "\t"
398
- when '\\\\' then "\\"
399
- when '\"' then "\""
400
- when '\b' then "\b"
401
- when '\f' then "\f"
402
- when '\/' then "/"
403
- else raise_error "Unexpected escape #{m.inspect}"
584
+ when '\\\\' then '\\\\'
585
+ else ''
404
586
  end
405
- end.gsub(/\\u\{[0-9a-fA-F]{0,6}\}/) do |m|
406
- i = Integer(m[3..-2], 16)
407
- if i < 0 || i > 0x10FFFF
408
- raise_error "Invalid code point #{u}"
587
+ end
588
+ end
589
+
590
+ UNESCAPE = /\\(?:[#{WHITESPACE.join}#{NEWLINES.join}\r]+|[^u])/
591
+ UNESCAPE_NON_WS = /\\(?:[^u])/
592
+
593
+ def unescape_non_ws(string)
594
+ unescape(string, UNESCAPE_NON_WS)
595
+ end
596
+
597
+ def unescape(string, rgx = UNESCAPE)
598
+ string
599
+ .gsub(rgx) { |m| replace_esc(m) }
600
+ .gsub(/\\u\{[0-9a-fA-F]{0,6}\}/) do |m|
601
+ i = Integer(m[3..-2], 16)
602
+ if i < 0 || i > 0x10FFFF
603
+ raise_error "Invalid code point #{u}"
604
+ end
605
+ i.chr(Encoding::UTF_8)
409
606
  end
410
- i.chr(Encoding::UTF_8)
607
+ end
608
+
609
+ def replace_esc(m)
610
+ case m
611
+ when '\n' then "\n"
612
+ when '\r' then "\r"
613
+ when '\t' then "\t"
614
+ when '\\\\' then "\\"
615
+ when '\"' then "\""
616
+ when '\b' then "\b"
617
+ when '\f' then "\f"
618
+ when '\s' then ' '
619
+ when /\\[#{WHITESPACE.join}#{NEWLINES.join}]+/ then ''
620
+ else raise_error "Unexpected escape #{m.inspect}"
411
621
  end
412
622
  end
623
+
624
+ def dedent(string)
625
+ split = string.split(NEWLINES_PATTERN)
626
+ lines = split.partition.with_index { |_, i| i.even? }.first
627
+ if split.last.match?(NEWLINES_PATTERN)
628
+ indent = ""
629
+ else
630
+ *lines, indent = lines
631
+ end
632
+ return "" if lines.empty?
633
+ raise_error "Invalid multiline string final line" unless indent.match?(WS_STAR)
634
+ valid = /\A(?:#{Regexp.escape(indent)})(.*)/
635
+
636
+ lines.map! do |line|
637
+ case line
638
+ when WS_STAR then ""
639
+ when valid then $1
640
+ else raise_error "Invalid multiline string indentation"
641
+ end
642
+ end.join("\n")
643
+ end
644
+
645
+ def debom(str)
646
+ return str unless str.start_with?("\uFEFF")
647
+
648
+ str[1..]
649
+ end
413
650
  end
414
651
  end
@@ -2,7 +2,7 @@ require 'base64'
2
2
 
3
3
  module KDL
4
4
  module Types
5
- class Base64 < Value
5
+ class Base64 < Value::Custom
6
6
  RGX = /^[A-Za-z0-9+\/=]+$/.freeze
7
7
 
8
8
  def self.call(value, type = 'base64')
@@ -1,6 +1,6 @@
1
1
  module KDL
2
2
  module Types
3
- class Country < Value
3
+ class Country < Value::Custom
4
4
  # From: https://en.wikipedia.org/wiki/ISO_3166-1#Current_codes
5
5
  COUNTRIES3 = {
6
6
  'AFG' => { alpha3: 'AFG', alpha2: 'AF', numeric_code: 4, name: 'Afghanistan' }.freeze,
@@ -1,6 +1,6 @@
1
1
  module KDL
2
2
  module Types
3
- class CountrySubdivision < Value
3
+ class CountrySubdivision < Value::Custom
4
4
  # From: https://en.wikipedia.org/wiki/ISO_3166-2#Current_codes
5
5
  COUNTRY_SUBDIVISIONS = {
6
6
  "AD" => {
@@ -3,7 +3,7 @@ require 'kdl/types/country/iso3166_subdivisions'
3
3
 
4
4
  module KDL
5
5
  module Types
6
- class Country < Value
6
+ class Country < Value::Custom
7
7
  attr_reader :name, :alpha2, :alpha3, :numeric_code
8
8
 
9
9
  def initialize(value, format: nil, type: 'country-3')
@@ -42,7 +42,7 @@ module KDL
42
42
  end
43
43
  MAPPING['country-2'] = Country2
44
44
 
45
- class CountrySubdivision < Value
45
+ class CountrySubdivision < Value::Custom
46
46
  attr_reader :country, :name
47
47
 
48
48
  def initialize(value, type: 'country-subdivision', country:, name:, **kwargs)