kdl 1.0.5 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +7 -1
  3. data/.gitignore +1 -0
  4. data/.gitmodules +4 -0
  5. data/Gemfile +6 -1
  6. data/README.md +51 -7
  7. data/Rakefile +6 -1
  8. data/bin/kdl +1 -1
  9. data/kdl.gemspec +2 -2
  10. data/lib/kdl/document.rb +58 -2
  11. data/lib/kdl/kdl.tab.rb +303 -228
  12. data/lib/kdl/kdl.yy +57 -49
  13. data/lib/kdl/node.rb +113 -12
  14. data/lib/kdl/parser_common.rb +26 -0
  15. data/lib/kdl/string_dumper.rb +30 -33
  16. data/lib/kdl/tokenizer.rb +350 -113
  17. data/lib/kdl/types/base64.rb +1 -1
  18. data/lib/kdl/types/country/iso3166_countries.rb +1 -1
  19. data/lib/kdl/types/country/iso3166_subdivisions.rb +1 -1
  20. data/lib/kdl/types/country.rb +2 -2
  21. data/lib/kdl/types/currency/iso4217_currencies.rb +1 -1
  22. data/lib/kdl/types/currency.rb +1 -1
  23. data/lib/kdl/types/date_time.rb +3 -3
  24. data/lib/kdl/types/decimal.rb +1 -1
  25. data/lib/kdl/types/duration/iso8601_parser.rb +1 -1
  26. data/lib/kdl/types/duration.rb +1 -1
  27. data/lib/kdl/types/email/parser.rb +2 -2
  28. data/lib/kdl/types/email.rb +1 -1
  29. data/lib/kdl/types/hostname/validator.rb +1 -1
  30. data/lib/kdl/types/hostname.rb +1 -1
  31. data/lib/kdl/types/ip.rb +1 -1
  32. data/lib/kdl/types/irl/parser.rb +1 -1
  33. data/lib/kdl/types/irl.rb +1 -1
  34. data/lib/kdl/types/regex.rb +1 -1
  35. data/lib/kdl/types/url.rb +1 -1
  36. data/lib/kdl/types/url_template.rb +1 -1
  37. data/lib/kdl/types/uuid.rb +1 -1
  38. data/lib/kdl/v1/document.rb +17 -0
  39. data/lib/kdl/v1/kdl.tab.rb +594 -0
  40. data/lib/kdl/v1/kdl.yy +89 -0
  41. data/lib/kdl/v1/node.rb +30 -0
  42. data/lib/kdl/v1/string_dumper.rb +28 -0
  43. data/lib/kdl/v1/tokenizer.rb +296 -0
  44. data/lib/kdl/v1/value.rb +89 -0
  45. data/lib/kdl/v1.rb +11 -0
  46. data/lib/kdl/value.rb +81 -12
  47. data/lib/kdl/version.rb +1 -1
  48. data/lib/kdl.rb +40 -1
  49. metadata +13 -4
data/lib/kdl/tokenizer.rb CHANGED
@@ -36,30 +36,43 @@ module KDL
36
36
  SYMBOLS = {
37
37
  '{' => :LBRACE,
38
38
  '}' => :RBRACE,
39
- '=' => :EQUALS,
40
- '' => :EQUALS,
41
- ';' => :SEMICOLON
39
+ ';' => :SEMICOLON,
40
+ '=' => :EQUALS
42
41
  }
43
42
 
44
- WHITEPACE = ["\u0009", "\u0020", "\u00A0", "\u1680",
45
- "\u2000", "\u2001", "\u2002", "\u2003",
46
- "\u2004", "\u2005", "\u2006", "\u2007",
47
- "\u2008", "\u2009", "\u200A", "\u202F",
48
- "\u205F", "\u3000" ]
43
+ WHITESPACE = ["\u0009", "\u000B", "\u0020", "\u00A0",
44
+ "\u1680", "\u2000", "\u2001", "\u2002",
45
+ "\u2003", "\u2004", "\u2005", "\u2006",
46
+ "\u2007", "\u2008", "\u2009", "\u200A",
47
+ "\u202F", "\u205F", "\u3000" ]
48
+ WS = "[#{Regexp.escape(WHITESPACE.join)}]"
49
+ WS_STAR = /\A#{WS}*\z/
50
+ WS_PLUS = /\A#{WS}+\z/
49
51
 
50
52
  NEWLINES = ["\u000A", "\u0085", "\u000C", "\u2028", "\u2029"]
53
+ NEWLINES_PATTERN = Regexp.new("(#{NEWLINES.map{Regexp.escape(_1)}.join('|')}|\r\n?)", Regexp::MULTILINE)
51
54
 
52
- NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join('')}()/\\<>[]\","
53
- IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}\x0-\x20]/
54
- INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9\x0-\x20]/
55
+ OTHER_NON_IDENTIFIER_CHARS = ("\x0".."\x20").to_a - WHITESPACE
55
56
 
56
- ALLOWED_IN_TYPE = [:ident, :string, :rawstring]
57
- NOT_ALLOWED_AFTER_TYPE = [:single_line_comment, :multi_line_comment]
57
+ NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join}()[]/\\\"##{WHITESPACE.join}#{OTHER_NON_IDENTIFIER_CHARS.join}"
58
+ IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}]/
59
+ INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9]/
60
+
61
+ FORBIDDEN = [
62
+ *"\u0000".."\u0008",
63
+ *"\u000E".."\u001F",
64
+ "\u007F",
65
+ *"\u200E".."\u200F",
66
+ *"\u202A".."\u202E",
67
+ *"\u2066".."\u2069",
68
+ "\uFEFF"
69
+ ]
58
70
 
59
71
  def initialize(str, start = 0)
60
- @str = str
72
+ @str = debom(str)
61
73
  @context = nil
62
74
  @rawstring_hashes = nil
75
+ @start = start
63
76
  @index = start
64
77
  @buffer = ""
65
78
  @done = false
@@ -70,54 +83,114 @@ module KDL
70
83
  @last_token = nil
71
84
  end
72
85
 
86
+ def version_directive
87
+ if m = @str.match(/\A\/-[#{WHITESPACE.join}]*kdl-version[#{WHITESPACE.join}]+(\d+)\s*[#{NEWLINES.join}]/)
88
+ m[1].to_i
89
+ end
90
+ end
91
+
92
+ def done?
93
+ @done
94
+ end
95
+
96
+ def [](i)
97
+ @str[i].tap do |c|
98
+ raise_error "Forbidden character: #{c.inspect}" if FORBIDDEN.include?(c)
99
+ end
100
+ end
101
+
102
+ def tokens
103
+ a = []
104
+ while !done?
105
+ a << next_token
106
+ end
107
+ a
108
+ end
109
+
73
110
  def next_token
74
111
  @context = nil
75
112
  @previous_context = nil
76
113
  @line_at_start = @line
77
114
  @column_at_start = @column
78
115
  loop do
79
- c = @str[@index]
116
+ c = self[@index]
80
117
  case @context
81
118
  when nil
82
119
  case c
83
120
  when '"'
84
- self.context = :string
85
- @buffer = ''
86
- traverse(1)
87
- when 'r'
88
- if @str[@index + 1] == '"'
89
- self.context = :rawstring
90
- traverse(2)
91
- @rawstring_hashes = 0
121
+ if self[@index + 1] == '"' && self[@index + 2] == '"'
122
+ nl = expect_newline(@index + 3)
123
+ self.context = :multiline_string
92
124
  @buffer = ''
93
- next
94
- elsif @str[@index + 1] == '#'
95
- i = @index + 1
96
- @rawstring_hashes = 0
97
- while @str[i] == '#'
98
- @rawstring_hashes += 1
99
- i += 1
100
- end
101
- if @str[i] == '"'
125
+ traverse(3 + nl.length)
126
+ else
127
+ self.context = :string
128
+ @buffer = ''
129
+ traverse(1)
130
+ end
131
+ when '#'
132
+ if self[@index + 1] == '"'
133
+ if self[@index + 2] == '"' && self[@index + 3] == '"'
134
+ nl = expect_newline(@index + 4)
135
+ self.context = :multiline_rawstring
136
+ @rawstring_hashes = 1
137
+ @buffer = ''
138
+ traverse(4 + nl.length)
139
+ next
140
+ else
102
141
  self.context = :rawstring
103
- @index = i + 1
142
+ traverse(2)
143
+ @rawstring_hashes = 1
104
144
  @buffer = ''
105
145
  next
106
146
  end
147
+ elsif self[@index + 1] == '#'
148
+ i = @index + 2
149
+ @rawstring_hashes = 2
150
+ while self[i] == '#'
151
+ @rawstring_hashes += 1
152
+ i += 1
153
+ end
154
+ if self[i] == '"'
155
+ if self[i + 1] == '"' && self[i + 2] == '"'
156
+ nl = expect_newline(i + 3)
157
+ self.context = :multiline_rawstring
158
+ traverse(@rawstring_hashes + 3 + nl.length)
159
+ @buffer = ''
160
+ next
161
+ else
162
+ self.context = :rawstring
163
+ traverse(@rawstring_hashes + 1)
164
+ @buffer = ''
165
+ next
166
+ end
167
+ end
107
168
  end
108
- self.context = :ident
169
+ self.context = :keyword
109
170
  @buffer = c
110
171
  traverse(1)
111
- when /[0-9\-+]/
112
- n = @str[@index + 1]
172
+ when '-'
173
+ n = self[@index + 1]
174
+ if n =~ /[0-9]/
175
+ n2 = self[@index + 2]
176
+ if n == '0' && n2 =~ /[box]/
177
+ self.context = integer_context(n2)
178
+ traverse(3)
179
+ else
180
+ self.context = :decimal
181
+ traverse(1)
182
+ end
183
+ else
184
+ self.context = :ident
185
+ traverse(1)
186
+ end
187
+ @buffer = c
188
+ when /[0-9+]/
189
+ n = self[@index + 1]
113
190
  if c == '0' && n =~ /[box]/
114
191
  traverse(2)
115
192
  @buffer = ''
116
193
  self.context = integer_context(n)
117
- elsif c == '-' && n == '0' && (n2 = @str[@index + 2]) =~ /[box]/
118
- traverse(3)
119
- @buffer = '-'
120
- self.context = integer_context(n2)
121
194
  else
122
195
  self.context = :decimal
123
196
  @buffer = c
@@ -127,53 +200,46 @@ module KDL
127
200
  t = Tokenizer.new(@str, @index + 1)
128
201
  la = t.next_token
129
202
  if la[0] == :NEWLINE || la[0] == :EOF || (la[0] == :WS && (lan = t.next_token[0]) == :NEWLINE || lan == :EOF)
130
- @index = t.index
131
- new_line
132
- return token(:ESCLINE, "\\#{la[1].value}")
203
+ traverse_to(t.index)
204
+ @buffer = "#{c}#{la[1].value}"
205
+ @buffer += "\n" if lan == :NEWLINE
206
+ self.context = :whitespace
133
207
  else
134
208
  raise_error "Unexpected '\\' (#{la[0]})"
135
209
  end
210
+ when '='
211
+ self.context = :equals
212
+ @buffer = c
213
+ traverse(1)
136
214
  when *SYMBOLS.keys
137
215
  return token(SYMBOLS[c], c).tap { traverse(1) }
138
- when "\r"
139
- n = @str[@index + 1]
140
- if n == "\n"
141
- return token(:NEWLINE, "#{c}#{n}").tap do
142
- traverse(2)
143
- new_line
144
- end
145
- else
146
- return token(:NEWLINE, c).tap do
147
- traverse(1)
148
- new_line
149
- end
150
- end
151
- when *NEWLINES
152
- return token(:NEWLINE, c).tap do
153
- traverse(1)
154
- new_line
216
+ when *NEWLINES, "\r"
217
+ nl = expect_newline
218
+ return token(:NEWLINE, nl).tap do
219
+ traverse(nl.length)
155
220
  end
156
221
  when "/"
157
- if @str[@index + 1] == '/'
222
+ if self[@index + 1] == '/'
158
223
  self.context = :single_line_comment
159
224
  traverse(2)
160
- elsif @str[@index + 1] == '*'
225
+ elsif self[@index + 1] == '*'
161
226
  self.context = :multi_line_comment
162
227
  @comment_nesting = 1
163
228
  traverse(2)
164
- elsif @str[@index + 1] == '-'
229
+ elsif self[@index + 1] == '-'
165
230
  return token(:SLASHDASH, '/-').tap { traverse(2) }
166
231
  else
167
232
  self.context = :ident
168
233
  @buffer = c
169
234
  traverse(1)
170
235
  end
171
- when *WHITEPACE
236
+ when *WHITESPACE
172
237
  self.context = :whitespace
173
238
  @buffer = c
174
239
  traverse(1)
175
240
  when nil
176
241
  return [false, token(:EOF, :EOF)[1]] if @done
242
+
177
243
  @done = true
178
244
  return token(:EOF, :EOF)
179
245
  when INITIAL_IDENTIFIER_CHARS
@@ -196,37 +262,99 @@ module KDL
196
262
  @buffer += c
197
263
  else
198
264
  case @buffer
199
- when 'true' then return token(:TRUE, true)
200
- when 'false' then return token(:FALSE, false)
201
- when 'null' then return token(:NULL, nil)
202
- else return token(:IDENT, @buffer)
265
+ when 'true', 'false', 'null', 'inf', '-inf', 'nan'
266
+ raise_error "Identifier cannot be a literal"
267
+ when /\A\.\d/
268
+ raise_error "Identifier cannot look like an illegal float"
269
+ else
270
+ return token(:IDENT, @buffer)
271
+ end
272
+ end
273
+ when :keyword
274
+ case c
275
+ when /[a-z\-]/
276
+ traverse(1)
277
+ @buffer += c
278
+ else
279
+ case @buffer
280
+ when '#true' then return token(:TRUE, true)
281
+ when '#false' then return token(:FALSE, false)
282
+ when '#null' then return token(:NULL, nil)
283
+ when '#inf' then return token(:FLOAT, Float::INFINITY)
284
+ when '#-inf' then return token(:FLOAT, -Float::INFINITY)
285
+ when '#nan' then return token(:FLOAT, Float::NAN)
286
+ else raise_error "Unknown keyword #{@buffer.inspect}"
203
287
  end
204
288
  end
205
289
  when :string
206
290
  case c
207
291
  when '\\'
208
292
  @buffer += c
209
- @buffer += @str[@index + 1]
210
- traverse(2)
293
+ c2 = self[@index + 1]
294
+ @buffer += c2
295
+ if c2.match?(NEWLINES_PATTERN)
296
+ i = 2
297
+ while self[@index + i]&.match?(NEWLINES_PATTERN)
298
+ @buffer += self[@index + i]
299
+ i+=1
300
+ end
301
+ traverse(i)
302
+ else
303
+ traverse(2)
304
+ end
211
305
  when '"'
212
- return token(:STRING, convert_escapes(@buffer)).tap { traverse(1) }
306
+ return token(:STRING, unescape(@buffer)).tap { traverse(1) }
307
+ when *NEWLINES, "\r"
308
+ raise_error "Unexpected NEWLINE in string literal"
213
309
  when nil
214
310
  raise_error "Unterminated string literal"
215
311
  else
216
312
  @buffer += c
217
313
  traverse(1)
218
314
  end
315
+ when :multiline_string
316
+ case c
317
+ when '\\'
318
+ @buffer += c
319
+ @buffer += self[@index + 1]
320
+ traverse(2)
321
+ when '"'
322
+ if self[@index + 1] == '"' && self[@index + 2] == '"'
323
+ return token(:STRING, unescape_non_ws(dedent(unescape_ws(@buffer)))).tap { traverse(3) }
324
+ end
325
+ @buffer += c
326
+ traverse(1)
327
+ when nil
328
+ raise_error "Unterminated multi-line string literal"
329
+ else
330
+ @buffer += c
331
+ traverse(1)
332
+ end
219
333
  when :rawstring
220
334
  raise_error "Unterminated rawstring literal" if c.nil?
221
335
 
222
- if c == '"'
336
+ case c
337
+ when '"'
223
338
  h = 0
224
- while @str[@index + 1 + h] == '#' && h < @rawstring_hashes
225
- h += 1
226
- end
339
+ h += 1 while self[@index + 1 + h] == '#' && h < @rawstring_hashes
227
340
  if h == @rawstring_hashes
228
341
  return token(:RAWSTRING, @buffer).tap { traverse(1 + h) }
229
342
  end
343
+ when *NEWLINES, "\r"
344
+ raise_error "Unexpected NEWLINE in rawstring literal"
345
+ end
346
+
347
+ @buffer += c
348
+ traverse(1)
349
+ when :multiline_rawstring
350
+ raise_error "Unterminated multi-line rawstring literal" if c.nil?
351
+
352
+ if c == '"' && self[@index + 1] == '"' && self[@index + 2] == '"' && self[@index + 3] == '#'
353
+ h = 1
354
+ h += 1 while self[@index + 3 + h] == '#' && h < @rawstring_hashes
355
+ if h == @rawstring_hashes
356
+ return token(:RAWSTRING, dedent(@buffer)).tap { traverse(3 + h) }
357
+ end
230
358
  end
231
359
 
232
360
  @buffer += c
@@ -264,21 +392,22 @@ module KDL
264
392
  return parse_binary(@buffer)
265
393
  end
266
394
  when :single_line_comment
267
- if NEWLINES.include?(c) || c == "\r"
395
+ case c
396
+ when *NEWLINES, "\r"
268
397
  self.context = nil
269
398
  @column_at_start = @column
270
399
  next
271
- elsif c.nil?
400
+ when nil
272
401
  @done = true
273
402
  return token(:EOF, :EOF)
274
403
  else
275
404
  traverse(1)
276
405
  end
277
406
  when :multi_line_comment
278
- if c == '/' && @str[@index + 1] == '*'
407
+ if c == '/' && self[@index + 1] == '*'
279
408
  @comment_nesting += 1
280
409
  traverse(2)
281
- elsif c == '*' && @str[@index + 1] == '/'
410
+ elsif c == '*' && self[@index + 1] == '/'
282
411
  @comment_nesting -= 1
283
412
  traverse(2)
284
413
  if @comment_nesting == 0
@@ -288,16 +417,42 @@ module KDL
288
417
  traverse(1)
289
418
  end
290
419
  when :whitespace
291
- if WHITEPACE.include?(c)
420
+ if WHITESPACE.include?(c)
292
421
  traverse(1)
293
422
  @buffer += c
294
- elsif c == "/" && @str[@index + 1] == '*'
423
+ elsif c == '='
424
+ self.context = :equals
425
+ @buffer += c
426
+ traverse(1)
427
+ elsif c == "/" && self[@index + 1] == '*'
295
428
  self.context = :multi_line_comment
296
429
  @comment_nesting = 1
297
430
  traverse(2)
431
+ elsif c == "\\"
432
+ t = Tokenizer.new(@str, @index + 1)
433
+ la = t.next_token
434
+ if la[0] == :NEWLINE || la[0] == :EOF || (la[0] == :WS && (lan = t.next_token[0]) == :NEWLINE || lan == :EOF)
435
+ traverse_to(t.index)
436
+ @buffer += "#{c}#{la[1].value}"
437
+ @buffer += "\n" if lan == :NEWLINE
438
+ else
439
+ raise_error "Unexpected '\\' (#{la[0]})"
440
+ end
298
441
  else
299
442
  return token(:WS, @buffer)
300
443
  end
444
+ when :equals
445
+ t = Tokenizer.new(@str, @index)
446
+ la = t.next_token
447
+ if la[0] == :WS
448
+ @buffer += la[1].value
449
+ traverse_to(t.index)
450
+ end
451
+ return token(:EQUALS, @buffer)
452
+ else
453
+ # :nocov:
454
+ raise_error "Unknown context `#{@context}'"
455
+ # :nocov:
301
456
  end
302
457
  end
303
458
  end
@@ -309,43 +464,65 @@ module KDL
309
464
  end
310
465
 
311
466
  def traverse(n = 1)
312
- @column += n
467
+ n.times do |i|
468
+ case self[@index + i]
469
+ when "\r"
470
+ @column = 1
471
+ when *NEWLINES
472
+ @line += 1
473
+ @column = 1
474
+ else
475
+ @column += 1
476
+ end
477
+ end
313
478
  @index += n
314
479
  end
315
480
 
316
- def raise_error(message)
317
- raise Error.new(message, @line, @column)
481
+ def traverse_to(i)
482
+ traverse(i - @index)
318
483
  end
319
484
 
320
- def new_line
321
- @column = 1
322
- @line += 1
485
+ def raise_error(message)
486
+ raise Error.new(message, @line, @column)
323
487
  end
324
488
 
325
489
  def context=(val)
326
- if @type_context && !ALLOWED_IN_TYPE.include?(val)
490
+ if @type_context && !allowed_in_type?(val)
327
491
  raise_error "#{val} context not allowed in type declaration"
328
- elsif @last_token && @last_token[0] == :RPAREN && NOT_ALLOWED_AFTER_TYPE.include?(val)
492
+ elsif @last_token && @last_token[0] == :RPAREN && !allowed_after_type?(val)
329
493
  raise_error 'Comments are not allowed after a type declaration'
330
494
  end
331
495
  @previous_context = @context
332
496
  @context = val
333
497
  end
334
498
 
499
+ def allowed_in_type?(val)
500
+ %i[ident string rawstring multi_line_comment whitespace].include?(val)
501
+ end
502
+
503
+ def allowed_after_type?(val)
504
+ !%i[single_line_comment].include?(val)
505
+ end
506
+
335
507
  def revert_context
336
508
  @context = @previous_context
337
509
  @previous_context = nil
338
510
  end
339
511
 
340
- def parse_decimal(s)
341
- return parse_float(s) if s =~ /[.E]/i
342
-
343
- token(:INTEGER, Integer(munch_underscores(s), 10), format: '%d')
344
- rescue
345
- if s[0] =~ INITIAL_IDENTIFIER_CHARS && s[1..-1].each_char.all? { |c| c =~ IDENTIFIER_CHARS }
346
- token(:IDENT, s)
512
+ def expect_newline(i = @index)
513
+ c = self[i]
514
+ case c
515
+ when "\r"
516
+ n = self[i + 1]
517
+ if n == "\n"
518
+ "#{c}#{n}"
519
+ else
520
+ c
521
+ end
522
+ when *NEWLINES
523
+ c
347
524
  else
348
- raise
525
+ raise_error "Expected NEWLINE, found '#{c}'"
349
526
  end
350
527
  end
351
528
 
@@ -357,6 +534,18 @@ module KDL
357
534
  end
358
535
  end
359
536
 
537
+ def parse_decimal(s)
538
+ return parse_float(s) if s =~ /[.E]/i
539
+
540
+ token(:INTEGER, Integer(munch_underscores(s), 10), format: '%d')
541
+ rescue
542
+ if s[0] =~ INITIAL_IDENTIFIER_CHARS && s[1..-1].each_char.all? { |c| c =~ IDENTIFIER_CHARS }
543
+ token(:IDENT, s)
544
+ else
545
+ raise
546
+ end
547
+ end
548
+
360
549
  def parse_float(s)
361
550
  match, _, fraction, exponent = *s.match(/^([-+]?[\d_]+)(?:\.([\d_]+))?(?:[eE]([-+]?[\d_]+))?$/)
362
551
  raise_error "Invalid floating point value #{s}" if match.nil?
@@ -389,26 +578,74 @@ module KDL
389
578
  s.chomp('_').squeeze('_')
390
579
  end
391
580
 
392
- def convert_escapes(string)
393
- string.gsub(/\\[^u]/) do |m|
581
+ def unescape_ws(string)
582
+ string.gsub(/\\(\\|\s+)/) do |m|
394
583
  case m
395
- when '\n' then "\n"
396
- when '\r' then "\r"
397
- when '\t' then "\t"
398
- when '\\\\' then "\\"
399
- when '\"' then "\""
400
- when '\b' then "\b"
401
- when '\f' then "\f"
402
- when '\/' then "/"
403
- else raise_error "Unexpected escape #{m.inspect}"
584
+ when '\\\\' then '\\\\'
585
+ else ''
404
586
  end
405
- end.gsub(/\\u\{[0-9a-fA-F]{0,6}\}/) do |m|
406
- i = Integer(m[3..-2], 16)
407
- if i < 0 || i > 0x10FFFF
408
- raise_error "Invalid code point #{u}"
587
+ end
588
+ end
589
+
590
+ UNESCAPE = /\\(?:[#{WHITESPACE.join}#{NEWLINES.join}\r]+|[^u])/
591
+ UNESCAPE_NON_WS = /\\(?:[^u])/
592
+
593
+ def unescape_non_ws(string)
594
+ unescape(string, UNESCAPE_NON_WS)
595
+ end
596
+
597
+ def unescape(string, rgx = UNESCAPE)
598
+ string
599
+ .gsub(rgx) { |m| replace_esc(m) }
600
+ .gsub(/\\u\{[0-9a-fA-F]{0,6}\}/) do |m|
601
+ i = Integer(m[3..-2], 16)
602
+ if i < 0 || i > 0x10FFFF
603
+ raise_error "Invalid code point #{u}"
604
+ end
605
+ i.chr(Encoding::UTF_8)
409
606
  end
410
- i.chr(Encoding::UTF_8)
607
+ end
608
+
609
+ def replace_esc(m)
610
+ case m
611
+ when '\n' then "\n"
612
+ when '\r' then "\r"
613
+ when '\t' then "\t"
614
+ when '\\\\' then "\\"
615
+ when '\"' then "\""
616
+ when '\b' then "\b"
617
+ when '\f' then "\f"
618
+ when '\s' then ' '
619
+ when /\\[#{WHITESPACE.join}#{NEWLINES.join}]+/ then ''
620
+ else raise_error "Unexpected escape #{m.inspect}"
411
621
  end
412
622
  end
623
+
624
+ def dedent(string)
625
+ split = string.split(NEWLINES_PATTERN)
626
+ lines = split.partition.with_index { |_, i| i.even? }.first
627
+ if split.last.match?(NEWLINES_PATTERN)
628
+ indent = ""
629
+ else
630
+ *lines, indent = lines
631
+ end
632
+ return "" if lines.empty?
633
+ raise_error "Invalid multiline string final line" unless indent.match?(WS_STAR)
634
+ valid = /\A(?:#{Regexp.escape(indent)})(.*)/
635
+
636
+ lines.map! do |line|
637
+ case line
638
+ when WS_STAR then ""
639
+ when valid then $1
640
+ else raise_error "Invalid multiline string indentation"
641
+ end
642
+ end.join("\n")
643
+ end
644
+
645
+ def debom(str)
646
+ return str unless str.start_with?("\uFEFF")
647
+
648
+ str[1..]
649
+ end
413
650
  end
414
651
  end
@@ -2,7 +2,7 @@ require 'base64'
2
2
 
3
3
  module KDL
4
4
  module Types
5
- class Base64 < Value
5
+ class Base64 < Value::Custom
6
6
  RGX = /^[A-Za-z0-9+\/=]+$/.freeze
7
7
 
8
8
  def self.call(value, type = 'base64')
@@ -1,6 +1,6 @@
1
1
  module KDL
2
2
  module Types
3
- class Country < Value
3
+ class Country < Value::Custom
4
4
  # From: https://en.wikipedia.org/wiki/ISO_3166-1#Current_codes
5
5
  COUNTRIES3 = {
6
6
  'AFG' => { alpha3: 'AFG', alpha2: 'AF', numeric_code: 4, name: 'Afghanistan' }.freeze,
@@ -1,6 +1,6 @@
1
1
  module KDL
2
2
  module Types
3
- class CountrySubdivision < Value
3
+ class CountrySubdivision < Value::Custom
4
4
  # From: https://en.wikipedia.org/wiki/ISO_3166-2#Current_codes
5
5
  COUNTRY_SUBDIVISIONS = {
6
6
  "AD" => {
@@ -3,7 +3,7 @@ require 'kdl/types/country/iso3166_subdivisions'
3
3
 
4
4
  module KDL
5
5
  module Types
6
- class Country < Value
6
+ class Country < Value::Custom
7
7
  attr_reader :name, :alpha2, :alpha3, :numeric_code
8
8
 
9
9
  def initialize(value, format: nil, type: 'country-3')
@@ -42,7 +42,7 @@ module KDL
42
42
  end
43
43
  MAPPING['country-2'] = Country2
44
44
 
45
- class CountrySubdivision < Value
45
+ class CountrySubdivision < Value::Custom
46
46
  attr_reader :country, :name
47
47
 
48
48
  def initialize(value, type: 'country-subdivision', country:, name:, **kwargs)