yarp 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,267 @@
1
+ # frozen_string_literal: true
2
+
3
+ module YARP
4
+ class DesugarVisitor < MutationVisitor
5
+ # @@foo &&= bar
6
+ #
7
+ # becomes
8
+ #
9
+ # @@foo && @@foo = bar
10
+ def visit_class_variable_and_write_node(node)
11
+ AndNode.new(
12
+ ClassVariableReadNode.new(node.name_loc),
13
+ ClassVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
14
+ node.operator_loc,
15
+ node.location
16
+ )
17
+ end
18
+
19
+ # @@foo ||= bar
20
+ #
21
+ # becomes
22
+ #
23
+ # @@foo || @@foo = bar
24
+ def visit_class_variable_or_write_node(node)
25
+ OrNode.new(
26
+ ClassVariableReadNode.new(node.name_loc),
27
+ ClassVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
28
+ node.operator_loc,
29
+ node.location
30
+ )
31
+ end
32
+
33
+ # @@foo += bar
34
+ #
35
+ # becomes
36
+ #
37
+ # @@foo = @@foo + bar
38
+ def visit_class_variable_operator_write_node(node)
39
+ desugar_operator_write_node(node, ClassVariableWriteNode, ClassVariableReadNode)
40
+ end
41
+
42
+ # Foo &&= bar
43
+ #
44
+ # becomes
45
+ #
46
+ # Foo && Foo = bar
47
+ def visit_constant_and_write_node(node)
48
+ AndNode.new(
49
+ ConstantReadNode.new(node.name_loc),
50
+ ConstantWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
51
+ node.operator_loc,
52
+ node.location
53
+ )
54
+ end
55
+
56
+ # Foo ||= bar
57
+ #
58
+ # becomes
59
+ #
60
+ # Foo || Foo = bar
61
+ def visit_constant_or_write_node(node)
62
+ OrNode.new(
63
+ ConstantReadNode.new(node.name_loc),
64
+ ConstantWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
65
+ node.operator_loc,
66
+ node.location
67
+ )
68
+ end
69
+
70
+ # Foo += bar
71
+ #
72
+ # becomes
73
+ #
74
+ # Foo = Foo + bar
75
+ def visit_constant_operator_write_node(node)
76
+ desugar_operator_write_node(node, ConstantWriteNode, ConstantReadNode)
77
+ end
78
+
79
+ # Foo::Bar &&= baz
80
+ #
81
+ # becomes
82
+ #
83
+ # Foo::Bar && Foo::Bar = baz
84
+ def visit_constant_path_and_write_node(node)
85
+ AndNode.new(
86
+ node.target,
87
+ ConstantPathWriteNode.new(node.target, node.value, node.operator_loc, node.location),
88
+ node.operator_loc,
89
+ node.location
90
+ )
91
+ end
92
+
93
+ # Foo::Bar ||= baz
94
+ #
95
+ # becomes
96
+ #
97
+ # Foo::Bar || Foo::Bar = baz
98
+ def visit_constant_path_or_write_node(node)
99
+ OrNode.new(
100
+ node.target,
101
+ ConstantPathWriteNode.new(node.target, node.value, node.operator_loc, node.location),
102
+ node.operator_loc,
103
+ node.location
104
+ )
105
+ end
106
+
107
+ # Foo::Bar += baz
108
+ #
109
+ # becomes
110
+ #
111
+ # Foo::Bar = Foo::Bar + baz
112
+ def visit_constant_path_operator_write_node(node)
113
+ ConstantPathWriteNode.new(
114
+ node.target,
115
+ CallNode.new(
116
+ node.target,
117
+ nil,
118
+ node.operator_loc.copy(length: node.operator_loc.length - 1),
119
+ nil,
120
+ ArgumentsNode.new([node.value], node.value.location),
121
+ nil,
122
+ nil,
123
+ 0,
124
+ node.operator_loc.slice.chomp("="),
125
+ node.location
126
+ ),
127
+ node.operator_loc.copy(start_offset: node.operator_loc.end_offset - 1, length: 1),
128
+ node.location
129
+ )
130
+ end
131
+
132
+ # $foo &&= bar
133
+ #
134
+ # becomes
135
+ #
136
+ # $foo && $foo = bar
137
+ def visit_global_variable_and_write_node(node)
138
+ AndNode.new(
139
+ GlobalVariableReadNode.new(node.name_loc),
140
+ GlobalVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
141
+ node.operator_loc,
142
+ node.location
143
+ )
144
+ end
145
+
146
+ # $foo ||= bar
147
+ #
148
+ # becomes
149
+ #
150
+ # $foo || $foo = bar
151
+ def visit_global_variable_or_write_node(node)
152
+ OrNode.new(
153
+ GlobalVariableReadNode.new(node.name_loc),
154
+ GlobalVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
155
+ node.operator_loc,
156
+ node.location
157
+ )
158
+ end
159
+
160
+ # $foo += bar
161
+ #
162
+ # becomes
163
+ #
164
+ # $foo = $foo + bar
165
+ def visit_global_variable_operator_write_node(node)
166
+ desugar_operator_write_node(node, GlobalVariableWriteNode, GlobalVariableReadNode)
167
+ end
168
+
169
+ # @foo &&= bar
170
+ #
171
+ # becomes
172
+ #
173
+ # @foo && @foo = bar
174
+ def visit_instance_variable_and_write_node(node)
175
+ AndNode.new(
176
+ InstanceVariableReadNode.new(node.name_loc),
177
+ InstanceVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
178
+ node.operator_loc,
179
+ node.location
180
+ )
181
+ end
182
+
183
+ # @foo ||= bar
184
+ #
185
+ # becomes
186
+ #
187
+ # @foo || @foo = bar
188
+ def visit_instance_variable_or_write_node(node)
189
+ OrNode.new(
190
+ InstanceVariableReadNode.new(node.name_loc),
191
+ InstanceVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
192
+ node.operator_loc,
193
+ node.location
194
+ )
195
+ end
196
+
197
+ # @foo += bar
198
+ #
199
+ # becomes
200
+ #
201
+ # @foo = @foo + bar
202
+ def visit_instance_variable_operator_write_node(node)
203
+ desugar_operator_write_node(node, InstanceVariableWriteNode, InstanceVariableReadNode)
204
+ end
205
+
206
+ # foo &&= bar
207
+ #
208
+ # becomes
209
+ #
210
+ # foo && foo = bar
211
+ def visit_local_variable_and_write_node(node)
212
+ AndNode.new(
213
+ LocalVariableReadNode.new(node.constant_id, node.depth, node.name_loc),
214
+ LocalVariableWriteNode.new(node.constant_id, node.depth, node.name_loc, node.value, node.operator_loc, node.location),
215
+ node.operator_loc,
216
+ node.location
217
+ )
218
+ end
219
+
220
+ # foo ||= bar
221
+ #
222
+ # becomes
223
+ #
224
+ # foo || foo = bar
225
+ def visit_local_variable_or_write_node(node)
226
+ OrNode.new(
227
+ LocalVariableReadNode.new(node.constant_id, node.depth, node.name_loc),
228
+ LocalVariableWriteNode.new(node.constant_id, node.depth, node.name_loc, node.value, node.operator_loc, node.location),
229
+ node.operator_loc,
230
+ node.location
231
+ )
232
+ end
233
+
234
+ # foo += bar
235
+ #
236
+ # becomes
237
+ #
238
+ # foo = foo + bar
239
+ def visit_local_variable_operator_write_node(node)
240
+ desugar_operator_write_node(node, LocalVariableWriteNode, LocalVariableReadNode, arguments: [node.constant_id, node.depth])
241
+ end
242
+
243
+ private
244
+
245
+ # Desugar `x += y` to `x = x + y`
246
+ def desugar_operator_write_node(node, write_class, read_class, arguments: [])
247
+ write_class.new(
248
+ *arguments,
249
+ node.name_loc,
250
+ CallNode.new(
251
+ read_class.new(*arguments, node.name_loc),
252
+ nil,
253
+ node.operator_loc.copy(length: node.operator_loc.length - 1),
254
+ nil,
255
+ ArgumentsNode.new([node.value], node.value.location),
256
+ nil,
257
+ nil,
258
+ 0,
259
+ node.operator_loc.slice.chomp("="),
260
+ node.location
261
+ ),
262
+ node.operator_loc.copy(start_offset: node.operator_loc.end_offset - 1, length: 1),
263
+ node.location
264
+ )
265
+ end
266
+ end
267
+ end
data/lib/yarp/ffi.rb CHANGED
@@ -70,7 +70,8 @@ module YARP
70
70
  "yarp.h",
71
71
  "yp_version",
72
72
  "yp_parse_serialize",
73
- "yp_lex_serialize"
73
+ "yp_lex_serialize",
74
+ "yp_parse_lex_serialize"
74
75
  )
75
76
 
76
77
  load_exported_functions_from(
@@ -223,4 +224,29 @@ module YARP
223
224
  parse(string.read, filepath)
224
225
  end
225
226
  end
227
+
228
+ # Mirror the YARP.parse_lex API by using the serialization API.
229
+ def self.parse_lex(code, filepath = nil)
230
+ LibRubyParser::YPBuffer.with do |buffer|
231
+ metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
232
+ LibRubyParser.yp_parse_lex_serialize(code, code.bytesize, buffer.pointer, metadata)
233
+
234
+ source = Source.new(code)
235
+ loader = Serialize::Loader.new(source, buffer.read)
236
+
237
+ tokens = loader.load_tokens
238
+ node, comments, errors, warnings = loader.load_nodes
239
+
240
+ tokens.each { |token,| token.value.force_encoding(loader.encoding) }
241
+
242
+ ParseResult.new([node, tokens], comments, errors, warnings, source)
243
+ end
244
+ end
245
+
246
+ # Mirror the YARP.parse_lex_file API by using the serialization API.
247
+ def self.parse_lex_file(filepath)
248
+ LibRubyParser::YPString.with(filepath) do |string|
249
+ parse_lex(string.read, filepath)
250
+ end
251
+ end
226
252
  end
@@ -208,18 +208,9 @@ module YARP
208
208
  end
209
209
  end
210
210
 
211
- # It is extremely non obvious which state the parser is in when comments get
212
- # dispatched. Because of this we don't both comparing state when comparing
213
- # against other comment tokens.
214
- class CommentToken < Token
215
- def ==(other)
216
- self[0...-1] == other[0...-1]
217
- end
218
- end
219
-
220
- # Heredoc end tokens are emitted in an odd order, so we don't compare the
221
- # state on them.
222
- class HeredocEndToken < Token
211
+ # Tokens where state should be ignored
212
+ # used for :on_comment, :on_heredoc_end, :on_embexpr_end
213
+ class IgnoreStateToken < Token
223
214
  def ==(other)
224
215
  self[0...-1] == other[0...-1]
225
216
  end
@@ -252,6 +243,23 @@ module YARP
252
243
  end
253
244
  end
254
245
 
246
+ # If we have an identifier that follows a method name like:
247
+ #
248
+ # def foo bar
249
+ #
250
+ # then Ripper will mark bar as END|LABEL if there is a local in a parent
251
+ # scope named bar because it hasn't pushed the local table yet. We do this
252
+ # more accurately, so we need to allow comparing against both END and
253
+ # END|LABEL.
254
+ class ParamToken < Token
255
+ def ==(other)
256
+ (self[0...-1] == other[0...-1]) && (
257
+ (other[3] == Ripper::EXPR_END) ||
258
+ (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
259
+ )
260
+ end
261
+ end
262
+
255
263
  # A heredoc in this case is a list of tokens that belong to the body of the
256
264
  # heredoc that should be appended onto the list of tokens when the heredoc
257
265
  # closes.
@@ -558,18 +566,45 @@ module YARP
558
566
  result_value = result.value
559
567
  previous_state = nil
560
568
 
561
- # If there's a UTF-8 byte-order mark as the start of the file, then ripper
562
- # sets every token's on the first line back by 6 bytes. It also keeps the
563
- # byte order mark in the first token's value. This is weird, and I don't
564
- # want to mirror that in our parser. So instead, we'll match up the values
565
- # here, and then match up the locations as we process the tokens.
566
- bom = source.bytes[0..2] == [0xEF, 0xBB, 0xBF]
567
- result_value[0][0].value.prepend("\xEF\xBB\xBF") if bom
569
+ # In previous versions of Ruby, Ripper wouldn't flush the bom before the
570
+ # first token, so we had to have a hack in place to account for that. This
571
+ # checks for that behavior.
572
+ bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
573
+ bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
568
574
 
569
575
  result_value.each_with_index do |(token, lex_state), index|
570
576
  lineno = token.location.start_line
571
577
  column = token.location.start_column
572
- column -= index == 0 ? 6 : 3 if bom && lineno == 1
578
+
579
+ # If there's a UTF-8 byte-order mark as the start of the file, then for
580
+ # certain tokens ripper sets the first token back by 3 bytes. It also
581
+ # keeps the byte order mark in the first token's value. This is weird,
582
+ # and I don't want to mirror that in our parser. So instead, we'll match
583
+ # up the columns and values here.
584
+ if bom && lineno == 1
585
+ column -= 3
586
+
587
+ if index == 0 && column == 0 && !bom_flushed
588
+ flushed =
589
+ case token.type
590
+ when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
591
+ :GLOBAL_VARIABLE, :NUMBERED_REFERENCE, :PERCENT_LOWER_I,
592
+ :PERCENT_LOWER_X, :PERCENT_LOWER_W, :PERCENT_UPPER_I,
593
+ :PERCENT_UPPER_W, :STRING_BEGIN
594
+ true
595
+ when :REGEXP_BEGIN, :SYMBOL_BEGIN
596
+ token.value.start_with?("%")
597
+ else
598
+ false
599
+ end
600
+
601
+ unless flushed
602
+ column -= 3
603
+ value = token.value
604
+ value.prepend(String.new("\xEF\xBB\xBF", encoding: value.encoding))
605
+ end
606
+ end
607
+ end
573
608
 
574
609
  event = RIPPER.fetch(token.type)
575
610
  value = token.value
@@ -580,13 +615,23 @@ module YARP
580
615
  when :on___end__
581
616
  EndContentToken.new([[lineno, column], event, value, lex_state])
582
617
  when :on_comment
583
- CommentToken.new([[lineno, column], event, value, lex_state])
618
+ IgnoreStateToken.new([[lineno, column], event, value, lex_state])
584
619
  when :on_heredoc_end
585
620
  # Heredoc end tokens can be emitted in an odd order, so we don't
586
621
  # want to bother comparing the state on them.
587
- HeredocEndToken.new([[lineno, column], event, value, lex_state])
588
- when :on_embexpr_end, :on_ident
589
- if lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
622
+ IgnoreStateToken.new([[lineno, column], event, value, lex_state])
623
+ when :on_ident
624
+ if lex_state == Ripper::EXPR_END
625
+ # If we have an identifier that follows a method name like:
626
+ #
627
+ # def foo bar
628
+ #
629
+ # then Ripper will mark bar as END|LABEL if there is a local in a
630
+ # parent scope named bar because it hasn't pushed the local table
631
+ # yet. We do this more accurately, so we need to allow comparing
632
+ # against both END and END|LABEL.
633
+ ParamToken.new([[lineno, column], event, value, lex_state])
634
+ elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
590
635
  # In the event that we're comparing identifiers, we're going to
591
636
  # allow a little divergence. Ripper doesn't account for local
592
637
  # variables introduced through named captures in regexes, and we
@@ -595,6 +640,8 @@ module YARP
595
640
  else
596
641
  Token.new([[lineno, column], event, value, lex_state])
597
642
  end
643
+ when :on_embexpr_end
644
+ IgnoreStateToken.new([[lineno, column], event, value, lex_state])
598
645
  when :on_ignored_nl
599
646
  # Ignored newlines can occasionally have a LABEL state attached to
600
647
  # them which doesn't actually impact anything. We don't mirror that
@@ -629,6 +676,26 @@ module YARP
629
676
  previous_state
630
677
  end
631
678
 
679
+ Token.new([[lineno, column], event, value, lex_state])
680
+ when :on_eof
681
+ previous_token = result_value[index - 1][0]
682
+
683
+ # If we're at the end of the file and the previous token was a
684
+ # comment and there is still whitespace after the comment, then
685
+ # Ripper will append a on_nl token (even though there isn't
686
+ # necessarily a newline). We mirror that here.
687
+ start_offset = previous_token.location.end_offset
688
+ end_offset = token.location.start_offset
689
+
690
+ if previous_token.type == :COMMENT && start_offset < end_offset
691
+ if bom
692
+ start_offset += 3
693
+ end_offset += 3
694
+ end
695
+
696
+ tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
697
+ end
698
+
632
699
  Token.new([[lineno, column], event, value, lex_state])
633
700
  else
634
701
  Token.new([[lineno, column], event, value, lex_state])
@@ -713,7 +780,8 @@ module YARP
713
780
  end
714
781
  end
715
782
 
716
- tokens.reject! { |t| t.event == :on_eof }
783
+ # Drop the EOF token from the list
784
+ tokens = tokens[0...-1]
717
785
 
718
786
  # We sort by location to compare against Ripper's output
719
787
  tokens.sort_by!(&:location)