yarp 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -1
- data/Makefile +5 -1
- data/README.md +4 -3
- data/config.yml +461 -150
- data/docs/configuration.md +1 -0
- data/docs/encoding.md +5 -5
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +3 -3
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +810 -199
- data/ext/yarp/extension.c +94 -31
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +653 -150
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/node.h +10 -0
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +7 -5
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +7 -4
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +14 -3
- data/lib/yarp/desugar_visitor.rb +204 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3121 -597
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +53 -7
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1444 -836
- data/src/prettyprint.c +324 -103
- data/src/regexp.c +21 -21
- data/src/serialize.c +429 -276
- data/src/token_type.c +2 -2
- data/src/unescape.c +184 -136
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +14 -3
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +1504 -615
- data/yarp.gemspec +3 -1
- metadata +4 -2
| @@ -0,0 +1,204 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module YARP
         | 
| 4 | 
            +
              class DesugarVisitor < MutationVisitor
         | 
| 5 | 
            +
                # @@foo &&= bar
         | 
| 6 | 
            +
                #
         | 
| 7 | 
            +
                # becomes
         | 
| 8 | 
            +
                #
         | 
| 9 | 
            +
                # @@foo && @@foo = bar
         | 
| 10 | 
            +
                def visit_class_variable_and_write_node(node)
         | 
| 11 | 
            +
                  desugar_and_write_node(node, ClassVariableReadNode, ClassVariableWriteNode, arguments: [node.name])
         | 
| 12 | 
            +
                end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                # @@foo ||= bar
         | 
| 15 | 
            +
                #
         | 
| 16 | 
            +
                # becomes
         | 
| 17 | 
            +
                #
         | 
| 18 | 
            +
                # defined?(@@foo) ? @@foo : @@foo = bar
         | 
| 19 | 
            +
                def visit_class_variable_or_write_node(node)
         | 
| 20 | 
            +
                  desugar_or_write_defined_node(node, ClassVariableReadNode, ClassVariableWriteNode, arguments: [node.name])
         | 
| 21 | 
            +
                end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                # @@foo += bar
         | 
| 24 | 
            +
                #
         | 
| 25 | 
            +
                # becomes
         | 
| 26 | 
            +
                #
         | 
| 27 | 
            +
                # @@foo = @@foo + bar
         | 
| 28 | 
            +
                def visit_class_variable_operator_write_node(node)
         | 
| 29 | 
            +
                  desugar_operator_write_node(node, ClassVariableReadNode, ClassVariableWriteNode, arguments: [node.name])
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                # Foo &&= bar
         | 
| 33 | 
            +
                #
         | 
| 34 | 
            +
                # becomes
         | 
| 35 | 
            +
                #
         | 
| 36 | 
            +
                # Foo && Foo = bar
         | 
| 37 | 
            +
                def visit_constant_and_write_node(node)
         | 
| 38 | 
            +
                  desugar_and_write_node(node, ConstantReadNode, ConstantWriteNode)
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                # Foo ||= bar
         | 
| 42 | 
            +
                #
         | 
| 43 | 
            +
                # becomes
         | 
| 44 | 
            +
                #
         | 
| 45 | 
            +
                # defined?(Foo) ? Foo : Foo = bar
         | 
| 46 | 
            +
                def visit_constant_or_write_node(node)
         | 
| 47 | 
            +
                  desugar_or_write_defined_node(node, ConstantReadNode, ConstantWriteNode)
         | 
| 48 | 
            +
                end
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                # Foo += bar
         | 
| 51 | 
            +
                #
         | 
| 52 | 
            +
                # becomes
         | 
| 53 | 
            +
                #
         | 
| 54 | 
            +
                # Foo = Foo + bar
         | 
| 55 | 
            +
                def visit_constant_operator_write_node(node)
         | 
| 56 | 
            +
                  desugar_operator_write_node(node, ConstantReadNode, ConstantWriteNode)
         | 
| 57 | 
            +
                end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                # $foo &&= bar
         | 
| 60 | 
            +
                #
         | 
| 61 | 
            +
                # becomes
         | 
| 62 | 
            +
                #
         | 
| 63 | 
            +
                # $foo && $foo = bar
         | 
| 64 | 
            +
                def visit_global_variable_and_write_node(node)
         | 
| 65 | 
            +
                  desugar_and_write_node(node, GlobalVariableReadNode, GlobalVariableWriteNode)
         | 
| 66 | 
            +
                end
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                # $foo ||= bar
         | 
| 69 | 
            +
                #
         | 
| 70 | 
            +
                # becomes
         | 
| 71 | 
            +
                #
         | 
| 72 | 
            +
                # defined?($foo) ? $foo : $foo = bar
         | 
| 73 | 
            +
                def visit_global_variable_or_write_node(node)
         | 
| 74 | 
            +
                  desugar_or_write_defined_node(node, GlobalVariableReadNode, GlobalVariableWriteNode)
         | 
| 75 | 
            +
                end
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                # $foo += bar
         | 
| 78 | 
            +
                #
         | 
| 79 | 
            +
                # becomes
         | 
| 80 | 
            +
                #
         | 
| 81 | 
            +
                # $foo = $foo + bar
         | 
| 82 | 
            +
                def visit_global_variable_operator_write_node(node)
         | 
| 83 | 
            +
                  desugar_operator_write_node(node, GlobalVariableReadNode, GlobalVariableWriteNode)
         | 
| 84 | 
            +
                end
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                # @foo &&= bar
         | 
| 87 | 
            +
                #
         | 
| 88 | 
            +
                # becomes
         | 
| 89 | 
            +
                #
         | 
| 90 | 
            +
                # @foo && @foo = bar
         | 
| 91 | 
            +
                def visit_instance_variable_and_write_node(node)
         | 
| 92 | 
            +
                  desugar_and_write_node(node, InstanceVariableReadNode, InstanceVariableWriteNode, arguments: [node.name])
         | 
| 93 | 
            +
                end
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                # @foo ||= bar
         | 
| 96 | 
            +
                #
         | 
| 97 | 
            +
                # becomes
         | 
| 98 | 
            +
                #
         | 
| 99 | 
            +
                # @foo || @foo = bar
         | 
| 100 | 
            +
                def visit_instance_variable_or_write_node(node)
         | 
| 101 | 
            +
                  desugar_or_write_node(node, InstanceVariableReadNode, InstanceVariableWriteNode, arguments: [node.name])
         | 
| 102 | 
            +
                end
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                # @foo += bar
         | 
| 105 | 
            +
                #
         | 
| 106 | 
            +
                # becomes
         | 
| 107 | 
            +
                #
         | 
| 108 | 
            +
                # @foo = @foo + bar
         | 
| 109 | 
            +
                def visit_instance_variable_operator_write_node(node)
         | 
| 110 | 
            +
                  desugar_operator_write_node(node, InstanceVariableReadNode, InstanceVariableWriteNode, arguments: [node.name])
         | 
| 111 | 
            +
                end
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                # foo &&= bar
         | 
| 114 | 
            +
                #
         | 
| 115 | 
            +
                # becomes
         | 
| 116 | 
            +
                #
         | 
| 117 | 
            +
                # foo && foo = bar
         | 
| 118 | 
            +
                def visit_local_variable_and_write_node(node)
         | 
| 119 | 
            +
                  desugar_and_write_node(node, LocalVariableReadNode, LocalVariableWriteNode, arguments: [node.name, node.depth])
         | 
| 120 | 
            +
                end
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                # foo ||= bar
         | 
| 123 | 
            +
                #
         | 
| 124 | 
            +
                # becomes
         | 
| 125 | 
            +
                #
         | 
| 126 | 
            +
                # foo || foo = bar
         | 
| 127 | 
            +
                def visit_local_variable_or_write_node(node)
         | 
| 128 | 
            +
                  desugar_or_write_node(node, LocalVariableReadNode, LocalVariableWriteNode, arguments: [node.name, node.depth])
         | 
| 129 | 
            +
                end
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                # foo += bar
         | 
| 132 | 
            +
                #
         | 
| 133 | 
            +
                # becomes
         | 
| 134 | 
            +
                #
         | 
| 135 | 
            +
                # foo = foo + bar
         | 
| 136 | 
            +
                def visit_local_variable_operator_write_node(node)
         | 
| 137 | 
            +
                  desugar_operator_write_node(node, LocalVariableReadNode, LocalVariableWriteNode, arguments: [node.name, node.depth])
         | 
| 138 | 
            +
                end
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                private
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                # Desugar `x &&= y` to `x && x = y`
         | 
| 143 | 
            +
                def desugar_and_write_node(node, read_class, write_class, arguments: [])
         | 
| 144 | 
            +
                  AndNode.new(
         | 
| 145 | 
            +
                    read_class.new(*arguments, node.name_loc),
         | 
| 146 | 
            +
                    write_class.new(*arguments, node.name_loc, node.value, node.operator_loc, node.location),
         | 
| 147 | 
            +
                    node.operator_loc,
         | 
| 148 | 
            +
                    node.location
         | 
| 149 | 
            +
                  )
         | 
| 150 | 
            +
                end
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                # Desugar `x += y` to `x = x + y`
         | 
| 153 | 
            +
                def desugar_operator_write_node(node, read_class, write_class, arguments: [])
         | 
| 154 | 
            +
                  write_class.new(
         | 
| 155 | 
            +
                    *arguments,
         | 
| 156 | 
            +
                    node.name_loc,
         | 
| 157 | 
            +
                    CallNode.new(
         | 
| 158 | 
            +
                      read_class.new(*arguments, node.name_loc),
         | 
| 159 | 
            +
                      nil,
         | 
| 160 | 
            +
                      node.operator_loc.copy(length: node.operator_loc.length - 1),
         | 
| 161 | 
            +
                      nil,
         | 
| 162 | 
            +
                      ArgumentsNode.new([node.value], node.value.location),
         | 
| 163 | 
            +
                      nil,
         | 
| 164 | 
            +
                      nil,
         | 
| 165 | 
            +
                      0,
         | 
| 166 | 
            +
                      node.operator_loc.slice.chomp("="),
         | 
| 167 | 
            +
                      node.location
         | 
| 168 | 
            +
                    ),
         | 
| 169 | 
            +
                    node.operator_loc.copy(start_offset: node.operator_loc.end_offset - 1, length: 1),
         | 
| 170 | 
            +
                    node.location
         | 
| 171 | 
            +
                  )
         | 
| 172 | 
            +
                end
         | 
| 173 | 
            +
             | 
| 174 | 
            +
                # Desugar `x ||= y` to `x || x = y`
         | 
| 175 | 
            +
                def desugar_or_write_node(node, read_class, write_class, arguments: [])
         | 
| 176 | 
            +
                  OrNode.new(
         | 
| 177 | 
            +
                    read_class.new(*arguments, node.name_loc),
         | 
| 178 | 
            +
                    write_class.new(*arguments, node.name_loc, node.value, node.operator_loc, node.location),
         | 
| 179 | 
            +
                    node.operator_loc,
         | 
| 180 | 
            +
                    node.location
         | 
| 181 | 
            +
                  )
         | 
| 182 | 
            +
                end
         | 
| 183 | 
            +
             | 
| 184 | 
            +
                # Desugar `x ||= y` to `defined?(x) ? x : x = y`
         | 
| 185 | 
            +
                def desugar_or_write_defined_node(node, read_class, write_class, arguments: [])
         | 
| 186 | 
            +
                  IfNode.new(
         | 
| 187 | 
            +
                    node.operator_loc,
         | 
| 188 | 
            +
                    DefinedNode.new(nil, read_class.new(*arguments, node.name_loc), nil, node.operator_loc, node.name_loc),
         | 
| 189 | 
            +
                    StatementsNode.new([read_class.new(*arguments, node.name_loc)], node.location),
         | 
| 190 | 
            +
                    ElseNode.new(
         | 
| 191 | 
            +
                      node.operator_loc,
         | 
| 192 | 
            +
                      StatementsNode.new(
         | 
| 193 | 
            +
                        [write_class.new(*arguments, node.name_loc, node.value, node.operator_loc, node.location)],
         | 
| 194 | 
            +
                        node.location
         | 
| 195 | 
            +
                      ),
         | 
| 196 | 
            +
                      node.operator_loc,
         | 
| 197 | 
            +
                      node.location
         | 
| 198 | 
            +
                    ),
         | 
| 199 | 
            +
                    node.operator_loc,
         | 
| 200 | 
            +
                    node.location
         | 
| 201 | 
            +
                  )
         | 
| 202 | 
            +
                end
         | 
| 203 | 
            +
              end
         | 
| 204 | 
            +
            end
         | 
    
        data/lib/yarp/ffi.rb
    CHANGED
    
    | @@ -70,7 +70,8 @@ module YARP | |
| 70 70 | 
             
                  "yarp.h",
         | 
| 71 71 | 
             
                  "yp_version",
         | 
| 72 72 | 
             
                  "yp_parse_serialize",
         | 
| 73 | 
            -
                  "yp_lex_serialize"
         | 
| 73 | 
            +
                  "yp_lex_serialize",
         | 
| 74 | 
            +
                  "yp_parse_lex_serialize"
         | 
| 74 75 | 
             
                )
         | 
| 75 76 |  | 
| 76 77 | 
             
                load_exported_functions_from(
         | 
| @@ -223,4 +224,29 @@ module YARP | |
| 223 224 | 
             
                  parse(string.read, filepath)
         | 
| 224 225 | 
             
                end
         | 
| 225 226 | 
             
              end
         | 
| 227 | 
            +
             | 
| 228 | 
            +
              # Mirror the YARP.parse_lex API by using the serialization API.
         | 
| 229 | 
            +
              def self.parse_lex(code, filepath = nil)
         | 
| 230 | 
            +
                LibRubyParser::YPBuffer.with do |buffer|
         | 
| 231 | 
            +
                  metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
         | 
| 232 | 
            +
                  LibRubyParser.yp_parse_lex_serialize(code, code.bytesize, buffer.pointer, metadata)
         | 
| 233 | 
            +
             | 
| 234 | 
            +
                  source = Source.new(code)
         | 
| 235 | 
            +
                  loader = Serialize::Loader.new(source, buffer.read)
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                  tokens = loader.load_tokens
         | 
| 238 | 
            +
                  node, comments, errors, warnings = loader.load_nodes
         | 
| 239 | 
            +
             | 
| 240 | 
            +
                  tokens.each { |token,| token.value.force_encoding(loader.encoding) }
         | 
| 241 | 
            +
             | 
| 242 | 
            +
                  ParseResult.new([node, tokens], comments, errors, warnings, source)
         | 
| 243 | 
            +
                end
         | 
| 244 | 
            +
              end
         | 
| 245 | 
            +
             | 
| 246 | 
            +
              # Mirror the YARP.parse_lex_file API by using the serialization API.
         | 
| 247 | 
            +
              def self.parse_lex_file(filepath)
         | 
| 248 | 
            +
                LibRubyParser::YPString.with(filepath) do |string|
         | 
| 249 | 
            +
                  parse_lex(string.read, filepath)
         | 
| 250 | 
            +
                end
         | 
| 251 | 
            +
              end
         | 
| 226 252 | 
             
            end
         | 
    
        data/lib/yarp/lex_compat.rb
    CHANGED
    
    | @@ -208,18 +208,9 @@ module YARP | |
| 208 208 | 
             
                  end
         | 
| 209 209 | 
             
                end
         | 
| 210 210 |  | 
| 211 | 
            -
                #  | 
| 212 | 
            -
                #  | 
| 213 | 
            -
                 | 
| 214 | 
            -
                class CommentToken < Token
         | 
| 215 | 
            -
                  def ==(other)
         | 
| 216 | 
            -
                    self[0...-1] == other[0...-1]
         | 
| 217 | 
            -
                  end
         | 
| 218 | 
            -
                end
         | 
| 219 | 
            -
             | 
| 220 | 
            -
                # Heredoc end tokens are emitted in an odd order, so we don't compare the
         | 
| 221 | 
            -
                # state on them.
         | 
| 222 | 
            -
                class HeredocEndToken < Token
         | 
| 211 | 
            +
                # Tokens where state should be ignored
         | 
| 212 | 
            +
                # used for :on_comment, :on_heredoc_end, :on_embexpr_end
         | 
| 213 | 
            +
                class IgnoreStateToken < Token
         | 
| 223 214 | 
             
                  def ==(other)
         | 
| 224 215 | 
             
                    self[0...-1] == other[0...-1]
         | 
| 225 216 | 
             
                  end
         | 
| @@ -252,6 +243,23 @@ module YARP | |
| 252 243 | 
             
                  end
         | 
| 253 244 | 
             
                end
         | 
| 254 245 |  | 
| 246 | 
            +
                # If we have an identifier that follows a method name like:
         | 
| 247 | 
            +
                #
         | 
| 248 | 
            +
                #     def foo bar
         | 
| 249 | 
            +
                #
         | 
| 250 | 
            +
                # then Ripper will mark bar as END|LABEL if there is a local in a parent
         | 
| 251 | 
            +
                # scope named bar because it hasn't pushed the local table yet. We do this
         | 
| 252 | 
            +
                # more accurately, so we need to allow comparing against both END and
         | 
| 253 | 
            +
                # END|LABEL.
         | 
| 254 | 
            +
                class ParamToken < Token
         | 
| 255 | 
            +
                  def ==(other)
         | 
| 256 | 
            +
                    (self[0...-1] == other[0...-1]) && (
         | 
| 257 | 
            +
                      (other[3] == Ripper::EXPR_END) ||
         | 
| 258 | 
            +
                      (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
         | 
| 259 | 
            +
                    )
         | 
| 260 | 
            +
                  end
         | 
| 261 | 
            +
                end
         | 
| 262 | 
            +
             | 
| 255 263 | 
             
                # A heredoc in this case is a list of tokens that belong to the body of the
         | 
| 256 264 | 
             
                # heredoc that should be appended onto the list of tokens when the heredoc
         | 
| 257 265 | 
             
                # closes.
         | 
| @@ -558,18 +566,45 @@ module YARP | |
| 558 566 | 
             
                  result_value = result.value
         | 
| 559 567 | 
             
                  previous_state = nil
         | 
| 560 568 |  | 
| 561 | 
            -
                  #  | 
| 562 | 
            -
                  #  | 
| 563 | 
            -
                  #  | 
| 564 | 
            -
                   | 
| 565 | 
            -
                   | 
| 566 | 
            -
                  bom = source.bytes[0..2] == [0xEF, 0xBB, 0xBF]
         | 
| 567 | 
            -
                  result_value[0][0].value.prepend("\xEF\xBB\xBF") if bom
         | 
| 569 | 
            +
                  # In previous versions of Ruby, Ripper wouldn't flush the bom before the
         | 
| 570 | 
            +
                  # first token, so we had to have a hack in place to account for that. This
         | 
| 571 | 
            +
                  # checks for that behavior.
         | 
| 572 | 
            +
                  bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
         | 
| 573 | 
            +
                  bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
         | 
| 568 574 |  | 
| 569 575 | 
             
                  result_value.each_with_index do |(token, lex_state), index|
         | 
| 570 576 | 
             
                    lineno = token.location.start_line
         | 
| 571 577 | 
             
                    column = token.location.start_column
         | 
| 572 | 
            -
             | 
| 578 | 
            +
             | 
| 579 | 
            +
                    # If there's a UTF-8 byte-order mark as the start of the file, then for
         | 
| 580 | 
            +
                    # certain tokens ripper sets the first token back by 3 bytes. It also
         | 
| 581 | 
            +
                    # keeps the byte order mark in the first token's value. This is weird,
         | 
| 582 | 
            +
                    # and I don't want to mirror that in our parser. So instead, we'll match
         | 
| 583 | 
            +
                    # up the columns and values here.
         | 
| 584 | 
            +
                    if bom && lineno == 1
         | 
| 585 | 
            +
                      column -= 3
         | 
| 586 | 
            +
             | 
| 587 | 
            +
                      if index == 0 && column == 0 && !bom_flushed
         | 
| 588 | 
            +
                        flushed =
         | 
| 589 | 
            +
                          case token.type
         | 
| 590 | 
            +
                          when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
         | 
| 591 | 
            +
                              :GLOBAL_VARIABLE, :NUMBERED_REFERENCE, :PERCENT_LOWER_I,
         | 
| 592 | 
            +
                              :PERCENT_LOWER_X, :PERCENT_LOWER_W, :PERCENT_UPPER_I,
         | 
| 593 | 
            +
                              :PERCENT_UPPER_W, :STRING_BEGIN
         | 
| 594 | 
            +
                            true
         | 
| 595 | 
            +
                          when :REGEXP_BEGIN, :SYMBOL_BEGIN
         | 
| 596 | 
            +
                            token.value.start_with?("%")
         | 
| 597 | 
            +
                          else
         | 
| 598 | 
            +
                            false
         | 
| 599 | 
            +
                          end
         | 
| 600 | 
            +
             | 
| 601 | 
            +
                        unless flushed
         | 
| 602 | 
            +
                          column -= 3
         | 
| 603 | 
            +
                          value = token.value
         | 
| 604 | 
            +
                          value.prepend(String.new("\xEF\xBB\xBF", encoding: value.encoding))
         | 
| 605 | 
            +
                        end
         | 
| 606 | 
            +
                      end
         | 
| 607 | 
            +
                    end
         | 
| 573 608 |  | 
| 574 609 | 
             
                    event = RIPPER.fetch(token.type)
         | 
| 575 610 | 
             
                    value = token.value
         | 
| @@ -580,13 +615,23 @@ module YARP | |
| 580 615 | 
             
                      when :on___end__
         | 
| 581 616 | 
             
                        EndContentToken.new([[lineno, column], event, value, lex_state])
         | 
| 582 617 | 
             
                      when :on_comment
         | 
| 583 | 
            -
                         | 
| 618 | 
            +
                        IgnoreStateToken.new([[lineno, column], event, value, lex_state])
         | 
| 584 619 | 
             
                      when :on_heredoc_end
         | 
| 585 620 | 
             
                        # Heredoc end tokens can be emitted in an odd order, so we don't
         | 
| 586 621 | 
             
                        # want to bother comparing the state on them.
         | 
| 587 | 
            -
                         | 
| 588 | 
            -
                      when : | 
| 589 | 
            -
                        if lex_state == Ripper::EXPR_END | 
| 622 | 
            +
                        IgnoreStateToken.new([[lineno, column], event, value, lex_state])
         | 
| 623 | 
            +
                      when :on_ident
         | 
| 624 | 
            +
                        if lex_state == Ripper::EXPR_END
         | 
| 625 | 
            +
                          # If we have an identifier that follows a method name like:
         | 
| 626 | 
            +
                          #
         | 
| 627 | 
            +
                          #     def foo bar
         | 
| 628 | 
            +
                          #
         | 
| 629 | 
            +
                          # then Ripper will mark bar as END|LABEL if there is a local in a
         | 
| 630 | 
            +
                          # parent scope named bar because it hasn't pushed the local table
         | 
| 631 | 
            +
                          # yet. We do this more accurately, so we need to allow comparing
         | 
| 632 | 
            +
                          # against both END and END|LABEL.
         | 
| 633 | 
            +
                          ParamToken.new([[lineno, column], event, value, lex_state])
         | 
| 634 | 
            +
                        elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
         | 
| 590 635 | 
             
                          # In the event that we're comparing identifiers, we're going to
         | 
| 591 636 | 
             
                          # allow a little divergence. Ripper doesn't account for local
         | 
| 592 637 | 
             
                          # variables introduced through named captures in regexes, and we
         | 
| @@ -595,6 +640,8 @@ module YARP | |
| 595 640 | 
             
                        else
         | 
| 596 641 | 
             
                          Token.new([[lineno, column], event, value, lex_state])
         | 
| 597 642 | 
             
                        end
         | 
| 643 | 
            +
                      when :on_embexpr_end
         | 
| 644 | 
            +
                        IgnoreStateToken.new([[lineno, column], event, value, lex_state])
         | 
| 598 645 | 
             
                      when :on_ignored_nl
         | 
| 599 646 | 
             
                        # Ignored newlines can occasionally have a LABEL state attached to
         | 
| 600 647 | 
             
                        # them which doesn't actually impact anything. We don't mirror that
         | 
| @@ -629,6 +676,26 @@ module YARP | |
| 629 676 | 
             
                            previous_state
         | 
| 630 677 | 
             
                          end
         | 
| 631 678 |  | 
| 679 | 
            +
                        Token.new([[lineno, column], event, value, lex_state])
         | 
| 680 | 
            +
                      when :on_eof
         | 
| 681 | 
            +
                        previous_token = result_value[index - 1][0]
         | 
| 682 | 
            +
             | 
| 683 | 
            +
                        # If we're at the end of the file and the previous token was a
         | 
| 684 | 
            +
                        # comment and there is still whitespace after the comment, then
         | 
| 685 | 
            +
                        # Ripper will append a on_nl token (even though there isn't
         | 
| 686 | 
            +
                        # necessarily a newline). We mirror that here.
         | 
| 687 | 
            +
                        start_offset = previous_token.location.end_offset
         | 
| 688 | 
            +
                        end_offset = token.location.start_offset
         | 
| 689 | 
            +
             | 
| 690 | 
            +
                        if previous_token.type == :COMMENT && start_offset < end_offset
         | 
| 691 | 
            +
                          if bom
         | 
| 692 | 
            +
                            start_offset += 3
         | 
| 693 | 
            +
                            end_offset += 3
         | 
| 694 | 
            +
                          end
         | 
| 695 | 
            +
             | 
| 696 | 
            +
                          tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
         | 
| 697 | 
            +
                        end
         | 
| 698 | 
            +
             | 
| 632 699 | 
             
                        Token.new([[lineno, column], event, value, lex_state])
         | 
| 633 700 | 
             
                      else
         | 
| 634 701 | 
             
                        Token.new([[lineno, column], event, value, lex_state])
         | 
| @@ -713,7 +780,8 @@ module YARP | |
| 713 780 | 
             
                    end
         | 
| 714 781 | 
             
                  end
         | 
| 715 782 |  | 
| 716 | 
            -
                   | 
| 783 | 
            +
                  # Drop the EOF token from the list
         | 
| 784 | 
            +
                  tokens = tokens[0...-1]
         | 
| 717 785 |  | 
| 718 786 | 
             
                  # We sort by location to compare against Ripper's output
         | 
| 719 787 | 
             
                  tokens.sort_by!(&:location)
         |