RubyGems - kdl - Versions diffs - 1.0.6 → 2.0.1 - Mend

kdl 1.0.6 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

checksums.yaml +4 -4
data/.github/workflows/ruby.yml +8 -1
data/.gitignore +1 -0
data/.gitmodules +4 -0
data/Gemfile +6 -1
data/README.md +67 -7
data/Rakefile +6 -1
data/bin/kdl +1 -1
data/kdl.gemspec +2 -2
data/lib/kdl/document.rb +60 -2
data/lib/kdl/error.rb +24 -0
data/lib/kdl/kdl.tab.rb +305 -231
data/lib/kdl/kdl.yy +57 -49
data/lib/kdl/node.rb +116 -13
data/lib/kdl/parser_common.rb +28 -0
data/lib/kdl/string_dumper.rb +32 -33
data/lib/kdl/tokenizer.rb +387 -136
data/lib/kdl/types/base64.rb +3 -1
data/lib/kdl/types/country/iso3166_countries.rb +3 -1
data/lib/kdl/types/country/iso3166_subdivisions.rb +3 -1
data/lib/kdl/types/country.rb +4 -2
data/lib/kdl/types/currency/iso4217_currencies.rb +3 -1
data/lib/kdl/types/currency.rb +3 -1
data/lib/kdl/types/date_time.rb +5 -3
data/lib/kdl/types/decimal.rb +3 -1
data/lib/kdl/types/duration/iso8601_parser.rb +3 -1
data/lib/kdl/types/duration.rb +3 -1
data/lib/kdl/types/email/parser.rb +10 -8
data/lib/kdl/types/email.rb +3 -1
data/lib/kdl/types/hostname/validator.rb +3 -1
data/lib/kdl/types/hostname.rb +3 -1
data/lib/kdl/types/ip.rb +3 -1
data/lib/kdl/types/irl/parser.rb +10 -8
data/lib/kdl/types/irl.rb +3 -1
data/lib/kdl/types/regex.rb +3 -1
data/lib/kdl/types/url.rb +3 -1
data/lib/kdl/types/url_template.rb +6 -4
data/lib/kdl/types/uuid.rb +3 -1
data/lib/kdl/types.rb +2 -0
data/lib/kdl/v1/document.rb +19 -0
data/lib/kdl/v1/kdl.tab.rb +594 -0
data/lib/kdl/v1/kdl.yy +89 -0
data/lib/kdl/v1/node.rb +32 -0
data/lib/kdl/v1/string_dumper.rb +30 -0
data/lib/kdl/v1/tokenizer.rb +298 -0
data/lib/kdl/v1/value.rb +91 -0
data/lib/kdl/v1.rb +13 -0
data/lib/kdl/value.rb +87 -15
data/lib/kdl/version.rb +3 -1
data/lib/kdl.rb +47 -1
metadata +14 -7

data/lib/kdl/tokenizer.rb CHANGED Viewed

@@ -1,8 +1,10 @@
+# frozen_string_literal: true
 require 'bigdecimal'
 module KDL
   class Tokenizer
-    class Error < StandardError
+    class Error < ::KDL::Error
       def initialize(message, line, column)
         super("#{message} (#{line}:#{column})")
       end
@@ -36,32 +38,47 @@ module KDL
     SYMBOLS = {
       '{' => :LBRACE,
       '}' => :RBRACE,
-      '=' => :EQUALS,
-      '＝' => :EQUALS,
-      ';' => :SEMICOLON
+      ';' => :SEMICOLON,
+      '=' => :EQUALS
     }
-    WHITEPACE = ["\u0009", "\u0020", "\u00A0", "\u1680",
-                 "\u2000", "\u2001", "\u2002", "\u2003",
-                 "\u2004", "\u2005", "\u2006", "\u2007",
-                 "\u2008", "\u2009", "\u200A", "\u202F",
-                 "\u205F", "\u3000" ]
+    WHITESPACE = ["\u0009", "\u0020", "\u00A0", "\u1680",
+                  "\u2000", "\u2001", "\u2002", "\u2003",
+                  "\u2004", "\u2005", "\u2006", "\u2007",
+                  "\u2008", "\u2009", "\u200A", "\u202F",
+                  "\u205F", "\u3000"]
+    WS = "[#{Regexp.escape(WHITESPACE.join)}]"
+    WS_STAR = /\A#{WS}*\z/
+    WS_PLUS = /\A#{WS}+\z/
+    NEWLINES = ["\u000A", "\u0085", "\u000B", "\u000C", "\u2028", "\u2029"]
+    NEWLINES_PATTERN = Regexp.new("(#{NEWLINES.map{Regexp.escape(_1)}.join('|')}|\r\n?)", Regexp::MULTILINE)
-    NEWLINES = ["\u000A", "\u0085", "\u000C", "\u2028", "\u2029"]
+    OTHER_NON_IDENTIFIER_CHARS = ("\x0".."\x20").to_a - WHITESPACE
-    NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join('')}()/\\<>[]\","
-    IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}\x0-\x20]/
-    INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9\x0-\x20]/
+    NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join}()[]/\\\"##{WHITESPACE.join}#{OTHER_NON_IDENTIFIER_CHARS.join}"
+    IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}]/
+    INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9]/
-    ALLOWED_IN_TYPE = [:ident, :string, :rawstring]
-    NOT_ALLOWED_AFTER_TYPE = [:single_line_comment, :multi_line_comment]
+    FORBIDDEN = [
+      *"\u0000".."\u0008",
+      *"\u000E".."\u001F",
+      "\u007F",
+      *"\u200E".."\u200F",
+      *"\u202A".."\u202E",
+      *"\u2066".."\u2069",
+      "\uFEFF"
+    ]
+    VERSION_PATTERN = /\A\/-[#{WHITESPACE.join}]*kdl-version[#{WHITESPACE.join}]+(\d+)[#{WHITESPACE.join}]*[#{NEWLINES.join}]/
     def initialize(str, start = 0)
-      @str = str
+      @str = debom(str)
       @context = nil
       @rawstring_hashes = nil
+      @start = start
       @index = start
-      @buffer = ""
+      @buffer = +""
       @done = false
       @previous_context = nil
       @line = 1
@@ -70,122 +87,175 @@ module KDL
       @last_token = nil
     end
+    def version_directive
+      if m = @str.match(VERSION_PATTERN)
+        m[1].to_i
+      end
+    end
+    def done?
+      @done
+    end
+    def [](i)
+      @str[i].tap do |c|
+        raise_error "Forbidden character: #{c.inspect}" if FORBIDDEN.include?(c)
+      end
+    end
+    def tokens
+      a = []
+      while !done?
+        a << next_token
+      end
+      a
+    end
     def next_token
       @context = nil
       @previous_context = nil
       @line_at_start = @line
       @column_at_start = @column
       loop do
-        c = @str[@index]
+        c = self[@index]
         case @context
         when nil
           case c
           when '"'
-            self.context = :string
-            @buffer = ''
-            traverse(1)
-          when 'r'
-            if @str[@index + 1] == '"'
-              self.context = :rawstring
-              traverse(2)
-              @rawstring_hashes = 0
-              @buffer = ''
-              next
-            elsif @str[@index + 1] == '#'
-              i = @index + 1
-              @rawstring_hashes = 0
-              while @str[i] == '#'
+            if self[@index + 1] == '"' && self[@index + 2] == '"'
+              nl = expect_newline(@index + 3)
+              self.context = :multiline_string
+              @buffer = +''
+              traverse(3 + nl.length)
+            else
+              self.context = :string
+              @buffer = +''
+              traverse(1)
+            end
+          when '#'
+            if self[@index + 1] == '"'
+              if self[@index + 2] == '"' && self[@index + 3] == '"'
+                nl = expect_newline(@index + 4)
+                self.context = :multiline_rawstring
+                @rawstring_hashes = 1
+                @buffer = +''
+                traverse(4 + nl.length)
+                next
+              else
+                self.context = :rawstring
+                traverse(2)
+                @rawstring_hashes = 1
+                @buffer = +''
+                next
+              end
+            elsif self[@index + 1] == '#'
+              i = @index + 2
+              @rawstring_hashes = 2
+              while self[i] == '#'
                 @rawstring_hashes += 1
                 i += 1
               end
-              if @str[i] == '"'
-                self.context = :rawstring
-                @index = i + 1
-                @buffer = ''
-                next
+              if self[i] == '"'
+                if self[i + 1] == '"' && self[i + 2] == '"'
+                  nl = expect_newline(i + 3)
+                  self.context = :multiline_rawstring
+                  traverse(@rawstring_hashes + 3 + nl.length)
+                  @buffer = +''
+                  next
+                else
+                  self.context = :rawstring
+                  traverse(@rawstring_hashes + 1)
+                  @buffer = +''
+                  next
+                end
               end
             end
-            self.context = :ident
-            @buffer = c
+            self.context = :keyword
+            @buffer = +c
             traverse(1)
-          when /[0-9\-+]/
-            n = @str[@index + 1]
+          when '-'
+            n = self[@index + 1]
+            if n =~ /[0-9]/
+              n2 = self[@index + 2]
+              if n == '0' && n2 =~ /[box]/
+                self.context = integer_context(n2)
+                traverse(3)
+              else
+                self.context = :decimal
+                traverse(1)
+              end
+            else
+              self.context = :ident
+              traverse(1)
+            end
+            @buffer = +c
+          when /[0-9+]/
+            n = self[@index + 1]
             if c == '0' && n =~ /[box]/
               traverse(2)
-              @buffer = ''
+              @buffer = +''
               self.context = integer_context(n)
-            elsif c == '-' && n == '0' && (n2 = @str[@index + 2]) =~ /[box]/
-              traverse(3)
-              @buffer = '-'
-              self.context = integer_context(n2)
             else
               self.context = :decimal
-              @buffer = c
+              @buffer = +c
               traverse(1)
             end
           when '\\'
             t = Tokenizer.new(@str, @index + 1)
             la = t.next_token
             if la[0] == :NEWLINE || la[0] == :EOF || (la[0] == :WS && (lan = t.next_token[0]) == :NEWLINE || lan == :EOF)
-              @index = t.index
-              new_line
-              return token(:ESCLINE, "\\#{la[1].value}")
+              traverse_to(t.index)
+              @buffer = "#{c}#{la[1].value}"
+              @buffer << "\n" if lan == :NEWLINE
+              self.context = :whitespace
             else
               raise_error "Unexpected '\\' (#{la[0]})"
             end
+          when '='
+            self.context = :equals
+            @buffer = +c
+            traverse(1)
           when *SYMBOLS.keys
-            return token(SYMBOLS[c], c).tap { traverse(1) }
-          when "\r"
-            n = @str[@index + 1]
-            if n == "\n"
-              return token(:NEWLINE, "#{c}#{n}").tap do
-                traverse(2)
-                new_line
-              end
-            else
-              return token(:NEWLINE, c).tap do
-                traverse(1)
-                new_line
-              end
-            end
-          when *NEWLINES
-            return token(:NEWLINE, c).tap do
-              traverse(1)
-              new_line
+            return token(SYMBOLS[c], -c).tap { traverse(1) }
+          when *NEWLINES, "\r"
+            nl = expect_newline
+            return token(:NEWLINE, -nl).tap do
+              traverse(nl.length)
             end
           when "/"
-            if @str[@index + 1] == '/'
+            if self[@index + 1] == '/'
               self.context = :single_line_comment
               traverse(2)
-            elsif @str[@index + 1] == '*'
+            elsif self[@index + 1] == '*'
               self.context = :multi_line_comment
               @comment_nesting = 1
               traverse(2)
-            elsif @str[@index + 1] == '-'
+            elsif self[@index + 1] == '-'
               return token(:SLASHDASH, '/-').tap { traverse(2) }
             else
               self.context = :ident
-              @buffer = c
+              @buffer = +c
               traverse(1)
             end
-          when *WHITEPACE
+          when *WHITESPACE
             self.context = :whitespace
-            @buffer = c
+            @buffer = +c
             traverse(1)
           when nil
             return [false, token(:EOF, :EOF)[1]] if @done
             @done = true
             return token(:EOF, :EOF)
           when INITIAL_IDENTIFIER_CHARS
             self.context = :ident
-            @buffer = c
+            @buffer = +c
             traverse(1)
           when '('
             @type_context = true
-            return token(:LPAREN, c).tap { traverse(1) }
+            return token(:LPAREN, -c).tap { traverse(1) }
           when ')'
             @type_context = false
-            return token(:RPAREN, c).tap { traverse(1) }
+            return token(:RPAREN, -c).tap { traverse(1) }
           else
             raise_error "Unexpected character #{c.inspect}"
           end
@@ -193,49 +263,111 @@ module KDL
           case c
           when IDENTIFIER_CHARS
             traverse(1)
-            @buffer += c
+            @buffer << c
+          else
+            case @buffer
+            when 'true', 'false', 'null', 'inf', '-inf', 'nan'
+              raise_error "Identifier cannot be a literal"
+            when /\A\.\d/
+              raise_error "Identifier cannot look like an illegal float"
+            else
+              return token(:IDENT, -@buffer)
+            end
+          end
+        when :keyword
+          case c
+          when /[a-z\-]/
+            traverse(1)
+            @buffer << c
           else
             case @buffer
-            when 'true'  then return token(:TRUE, true)
-            when 'false' then return token(:FALSE, false)
-            when 'null'  then return token(:NULL, nil)
-            else return token(:IDENT, @buffer)
+            when '#true'  then return token(:TRUE, true)
+            when '#false' then return token(:FALSE, false)
+            when '#null'  then return token(:NULL, nil)
+            when '#inf'   then return token(:FLOAT, Float::INFINITY)
+            when '#-inf'  then return token(:FLOAT, -Float::INFINITY)
+            when '#nan'   then return token(:FLOAT, Float::NAN)
+            else raise_error "Unknown keyword #{@buffer.inspect}"
             end
           end
         when :string
           case c
           when '\\'
-            @buffer += c
-            @buffer += @str[@index + 1]
-            traverse(2)
+            @buffer << c
+            c2 = self[@index + 1]
+            @buffer << c2
+            if c2.match?(NEWLINES_PATTERN)
+              i = 2
+              while self[@index + i]&.match?(NEWLINES_PATTERN)
+                @buffer << self[@index + i]
+                i+=1
+              end
+              traverse(i)
+            else
+              traverse(2)
+            end
           when '"'
-            return token(:STRING, convert_escapes(@buffer)).tap { traverse(1) }
+            return token(:STRING, -unescape(@buffer)).tap { traverse(1) }
+          when *NEWLINES, "\r"
+            raise_error "Unexpected NEWLINE in string literal"
           when nil
             raise_error "Unterminated string literal"
           else
-            @buffer += c
+            @buffer << c
+            traverse(1)
+          end
+        when :multiline_string
+          case c
+          when '\\'
+            @buffer << c
+            @buffer << self[@index + 1]
+            traverse(2)
+          when '"'
+            if self[@index + 1] == '"' && self[@index + 2] == '"'
+              return token(:STRING, -unescape_non_ws(dedent(unescape_ws(@buffer)))).tap { traverse(3) }
+            end
+            @buffer << c
+            traverse(1)
+          when nil
+            raise_error "Unterminated multi-line string literal"
+          else
+            @buffer << c
             traverse(1)
           end
         when :rawstring
           raise_error "Unterminated rawstring literal" if c.nil?
-          if c == '"'
+          case c
+          when '"'
             h = 0
-            while @str[@index + 1 + h] == '#' && h < @rawstring_hashes
-              h += 1
+            h += 1 while self[@index + 1 + h] == '#' && h < @rawstring_hashes
+            if h == @rawstring_hashes
+              return token(:RAWSTRING, -@buffer).tap { traverse(1 + h) }
             end
+          when *NEWLINES, "\r"
+            raise_error "Unexpected NEWLINE in rawstring literal"
+          end
+          @buffer << c
+          traverse(1)
+        when :multiline_rawstring
+          raise_error "Unterminated multi-line rawstring literal" if c.nil?
+          if c == '"' && self[@index + 1] == '"' && self[@index + 2] == '"' && self[@index + 3] == '#'
+            h = 1
+            h += 1 while self[@index + 3 + h] == '#' && h < @rawstring_hashes
             if h == @rawstring_hashes
-              return token(:RAWSTRING, @buffer).tap { traverse(1 + h) }
+              return token(:RAWSTRING, -dedent(@buffer)).tap { traverse(3 + h) }
             end
           end
-          @buffer += c
+          @buffer << c
           traverse(1)
         when :decimal
           case c
           when /[0-9.\-+_eE]/
             traverse(1)
-            @buffer += c
+            @buffer << c
           else
             return parse_decimal(@buffer)
           end
@@ -243,7 +375,7 @@ module KDL
           case c
           when /[0-9a-fA-F_]/
             traverse(1)
-            @buffer += c
+            @buffer << c
           else
             return parse_hexadecimal(@buffer)
           end
@@ -251,7 +383,7 @@ module KDL
           case c
           when /[0-7_]/
             traverse(1)
-            @buffer += c
+            @buffer << c
           else
             return parse_octal(@buffer)
           end
@@ -259,26 +391,27 @@ module KDL
           case c
           when /[01_]/
             traverse(1)
-            @buffer += c
+            @buffer << c
           else
             return parse_binary(@buffer)
           end
         when :single_line_comment
-          if NEWLINES.include?(c) || c == "\r"
+          case c
+          when *NEWLINES, "\r"
             self.context = nil
             @column_at_start = @column
             next
-          elsif c.nil?
+          when nil
             @done = true
             return token(:EOF, :EOF)
           else
             traverse(1)
           end
         when :multi_line_comment
-          if c == '/' && @str[@index + 1] == '*'
+          if c == '/' && self[@index + 1] == '*'
             @comment_nesting += 1
             traverse(2)
-          elsif c == '*' && @str[@index + 1] == '/'
+          elsif c == '*' && self[@index + 1] == '/'
             @comment_nesting -= 1
             traverse(2)
             if @comment_nesting == 0
@@ -288,16 +421,42 @@ module KDL
             traverse(1)
           end
         when :whitespace
-          if WHITEPACE.include?(c)
+          if WHITESPACE.include?(c)
             traverse(1)
-            @buffer += c
-          elsif c == "/" && @str[@index + 1] == '*'
+            @buffer << c
+          elsif c == '='
+            self.context = :equals
+            @buffer << c
+            traverse(1)
+          elsif c == "/" && self[@index + 1] == '*'
             self.context = :multi_line_comment
             @comment_nesting = 1
             traverse(2)
+          elsif c == "\\"
+            t = Tokenizer.new(@str, @index + 1)
+            la = t.next_token
+            if la[0] == :NEWLINE || la[0] == :EOF || (la[0] == :WS && (lan = t.next_token[0]) == :NEWLINE || lan == :EOF)
+              traverse_to(t.index)
+              @buffer << "#{c}#{la[1].value}"
+              @buffer << "\n" if lan == :NEWLINE
+            else
+              raise_error "Unexpected '\\' (#{la[0]})"
+            end
           else
-            return token(:WS, @buffer)
+            return token(:WS, -@buffer)
           end
+        when :equals
+          t = Tokenizer.new(@str, @index)
+          la = t.next_token
+          if la[0] == :WS
+            @buffer << la[1].value
+            traverse_to(t.index)
+          end
+          return token(:EQUALS, -@buffer)
+        else
+          # :nocov:
+          raise_error "Unknown context `#{@context}'"
+          # :nocov:
         end
       end
     end
@@ -309,43 +468,69 @@ module KDL
     end
     def traverse(n = 1)
-      @column += n
+      n.times do |i|
+        case self[@index + i]
+        when "\r"
+          @column = 1
+        when *NEWLINES
+          @line += 1
+          @column = 1
+        else
+          @column += 1
+        end
+      end
       @index += n
     end
-    def raise_error(message)
-      raise Error.new(message, @line, @column)
+    def traverse_to(i)
+      traverse(i - @index)
     end
-    def new_line
-      @column = 1
-      @line += 1
+    def raise_error(error)
+      case error
+      when String then raise Error.new(error, @line, @column)
+      when Error then raise error
+      else raise Error.new(error.message, @line, @column)
+      end
     end
     def context=(val)
-      if @type_context && !ALLOWED_IN_TYPE.include?(val)
+      if @type_context && !allowed_in_type?(val)
         raise_error "#{val} context not allowed in type declaration"
-      elsif @last_token && @last_token[0] == :RPAREN && NOT_ALLOWED_AFTER_TYPE.include?(val)
+      elsif @last_token && @last_token[0] == :RPAREN && !allowed_after_type?(val)
         raise_error 'Comments are not allowed after a type declaration'
       end
       @previous_context = @context
       @context = val
     end
+    def allowed_in_type?(val)
+      %i[ident string rawstring multi_line_comment whitespace].include?(val)
+    end
+    def allowed_after_type?(val)
+      !%i[single_line_comment].include?(val)
+    end
     def revert_context
       @context = @previous_context
       @previous_context = nil
     end
-    def parse_decimal(s)
-      return parse_float(s) if s =~ /[.E]/i
-      token(:INTEGER, Integer(munch_underscores(s), 10), format: '%d')
-    rescue
-      if s[0] =~ INITIAL_IDENTIFIER_CHARS && s[1..-1].each_char.all? { |c| c =~ IDENTIFIER_CHARS }
-        token(:IDENT, s)
+    def expect_newline(i = @index)
+      c = self[i]
+      case c
+      when "\r"
+        n = self[i + 1]
+        if n == "\n"
+          "#{c}#{n}"
+        else
+          c
+        end
+      when *NEWLINES
+        c
       else
-        raise
+        raise_error "Expected NEWLINE, found '#{c}'"
       end
     end
@@ -357,6 +542,18 @@ module KDL
       end
     end
+    def parse_decimal(s)
+      return parse_float(s) if s =~ /[.E]/i
+      token(:INTEGER, Integer(munch_underscores(s), 10), format: '%d')
+    rescue => e
+      if s[0] =~ INITIAL_IDENTIFIER_CHARS && s[1..-1].each_char.all? { |c| c =~ IDENTIFIER_CHARS }
+        token(:IDENT, -s)
+      else
+        raise_error(e)
+      end
+    end
     def parse_float(s)
       match, _, fraction, exponent = *s.match(/^([-+]?[\d_]+)(?:\.([\d_]+))?(?:[eE]([-+]?[\d_]+))?$/)
       raise_error "Invalid floating point value #{s}" if match.nil?
@@ -375,40 +572,94 @@ module KDL
     def parse_hexadecimal(s)
       token(:INTEGER, Integer(munch_underscores(s), 16))
+    rescue ArgumentError => e
+      raise_error(e)
     end
     def parse_octal(s)
       token(:INTEGER, Integer(munch_underscores(s), 8))
+    rescue ArgumentError => e
+      raise_error(e)
     end
     def parse_binary(s)
       token(:INTEGER, Integer(munch_underscores(s), 2))
+    rescue ArgumentError => e
+      raise_error(e)
     end
     def munch_underscores(s)
       s.chomp('_').squeeze('_')
     end
-    def convert_escapes(string)
-      string.gsub(/\\[^u]/) do |m|
+    def unescape_ws(string)
+      string.gsub(/\\(\\|\s+)/) do |m|
         case m
-        when '\n' then "\n"
-        when '\r' then "\r"
-        when '\t' then "\t"
-        when '\\\\' then "\\"
-        when '\"' then "\""
-        when '\b' then "\b"
-        when '\f' then "\f"
-        when '\/' then "/"
-        else raise_error "Unexpected escape #{m.inspect}"
+        when '\\\\' then '\\\\'
+        else ''
         end
-      end.gsub(/\\u\{[0-9a-fA-F]{0,6}\}/) do |m|
-        i = Integer(m[3..-2], 16)
-        if i < 0 || i > 0x10FFFF
-          raise_error "Invalid code point #{u}"
+      end
+    end
+    UNESCAPE        = /\\(?:[#{WHITESPACE.join}#{NEWLINES.join}\r]+|[^u])/
+    UNESCAPE_NON_WS = /\\(?:[^u])/
+    def unescape_non_ws(string)
+      unescape(string, UNESCAPE_NON_WS)
+    end
+    def unescape(string, rgx = UNESCAPE)
+      string
+        .gsub(rgx) { |m| replace_esc(m) }
+        .gsub(/\\u\{[0-9a-fA-F]{0,6}\}/) do |m|
+          i = Integer(m[3..-2], 16)
+          if i < 0 || i > 0x10FFFF || (0xD800..0xDFFF).include?(i)
+            raise_error "Invalid code point #{m}"
+          end
+          i.chr(Encoding::UTF_8)
         end
-        i.chr(Encoding::UTF_8)
+    end
+    def replace_esc(m)
+      case m
+      when '\n'   then "\n"
+      when '\r'   then "\r"
+      when '\t'   then "\t"
+      when '\\\\' then "\\"
+      when '\"'   then "\""
+      when '\b'   then "\b"
+      when '\f'   then "\f"
+      when '\s'   then ' '
+      when /\\[#{WHITESPACE.join}#{NEWLINES.join}]+/ then ''
+      else raise_error "Unexpected escape #{m.inspect}"
       end
     end
+    def dedent(string)
+      split = string.split(NEWLINES_PATTERN)
+      lines = split.partition.with_index { |_, i| i.even? }.first
+      if split.last.match?(NEWLINES_PATTERN)
+        indent = ""
+      else
+        *lines, indent = lines
+      end
+      return "" if lines.empty?
+      raise_error "Invalid multiline string final line" unless indent.match?(WS_STAR)
+      valid = /\A#{Regexp.escape(indent)}(.*)/
+      lines.map do |line|
+        case line
+        when WS_STAR then ""
+        when valid then $1
+        else raise_error "Invalid multiline string indentation"
+        end
+      end.join("\n")
+    end
+    def debom(str)
+      return str unless str.start_with?("\uFEFF")
+      str[1..]
+    end
   end
 end