syntax_tree 5.3.0 → 6.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +12 -1
  3. data/CHANGELOG.md +78 -1
  4. data/Gemfile.lock +7 -7
  5. data/README.md +33 -9
  6. data/Rakefile +12 -8
  7. data/bin/console +1 -0
  8. data/bin/whitequark +79 -0
  9. data/doc/changing_structure.md +16 -0
  10. data/lib/syntax_tree/basic_visitor.rb +44 -5
  11. data/lib/syntax_tree/cli.rb +2 -2
  12. data/lib/syntax_tree/dsl.rb +23 -11
  13. data/lib/syntax_tree/{visitor/field_visitor.rb → field_visitor.rb} +54 -55
  14. data/lib/syntax_tree/formatter.rb +1 -1
  15. data/lib/syntax_tree/index.rb +158 -59
  16. data/lib/syntax_tree/json_visitor.rb +55 -0
  17. data/lib/syntax_tree/language_server.rb +157 -2
  18. data/lib/syntax_tree/match_visitor.rb +120 -0
  19. data/lib/syntax_tree/mermaid.rb +177 -0
  20. data/lib/syntax_tree/mermaid_visitor.rb +69 -0
  21. data/lib/syntax_tree/{visitor/mutation_visitor.rb → mutation_visitor.rb} +27 -27
  22. data/lib/syntax_tree/node.rb +245 -123
  23. data/lib/syntax_tree/parser.rb +332 -119
  24. data/lib/syntax_tree/pretty_print_visitor.rb +83 -0
  25. data/lib/syntax_tree/reflection.rb +241 -0
  26. data/lib/syntax_tree/translation/parser.rb +3107 -0
  27. data/lib/syntax_tree/translation/rubocop_ast.rb +21 -0
  28. data/lib/syntax_tree/translation.rb +28 -0
  29. data/lib/syntax_tree/version.rb +1 -1
  30. data/lib/syntax_tree/with_scope.rb +244 -0
  31. data/lib/syntax_tree/yarv/basic_block.rb +53 -0
  32. data/lib/syntax_tree/yarv/calldata.rb +91 -0
  33. data/lib/syntax_tree/yarv/compiler.rb +110 -100
  34. data/lib/syntax_tree/yarv/control_flow_graph.rb +257 -0
  35. data/lib/syntax_tree/yarv/data_flow_graph.rb +338 -0
  36. data/lib/syntax_tree/yarv/decompiler.rb +1 -1
  37. data/lib/syntax_tree/yarv/disassembler.rb +104 -80
  38. data/lib/syntax_tree/yarv/instruction_sequence.rb +43 -18
  39. data/lib/syntax_tree/yarv/instructions.rb +203 -649
  40. data/lib/syntax_tree/yarv/legacy.rb +12 -24
  41. data/lib/syntax_tree/yarv/sea_of_nodes.rb +534 -0
  42. data/lib/syntax_tree/yarv.rb +18 -0
  43. data/lib/syntax_tree.rb +88 -56
  44. data/tasks/sorbet.rake +277 -0
  45. data/tasks/whitequark.rake +87 -0
  46. metadata +23 -11
  47. data/.gitmodules +0 -9
  48. data/lib/syntax_tree/language_server/inlay_hints.rb +0 -159
  49. data/lib/syntax_tree/visitor/environment.rb +0 -84
  50. data/lib/syntax_tree/visitor/json_visitor.rb +0 -55
  51. data/lib/syntax_tree/visitor/match_visitor.rb +0 -122
  52. data/lib/syntax_tree/visitor/pretty_print_visitor.rb +0 -85
  53. data/lib/syntax_tree/visitor/with_environment.rb +0 -140
@@ -1,55 +1,54 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SyntaxTree
4
- class Visitor
5
- # This is the parent class of a lot of built-in visitors for Syntax Tree. It
6
- # reflects visiting each of the fields on every node in turn. It itself does
7
- # not do anything with these fields, it leaves that behavior up to the
8
- # subclass to implement.
9
- #
10
- # In order to properly use this class, you will need to subclass it and
11
- # implement #comments, #field, #list, #node, #pairs, and #text. Those are
12
- # documented here.
13
- #
14
- # == comments(node)
15
- #
16
- # This accepts the node that is being visited and does something depending
17
- # on the comments attached to the node.
18
- #
19
- # == field(name, value)
20
- #
21
- # This accepts the name of the field being visited as a string (like
22
- # "value") and the actual value of that field. The value can be a subclass
23
- # of Node or any other type that can be held within the tree.
24
- #
25
- # == list(name, values)
26
- #
27
- # This accepts the name of the field being visited as well as a list of
28
- # values. This is used, for example, when visiting something like the body
29
- # of a Statements node.
30
- #
31
- # == node(name, node)
32
- #
33
- # This is the parent serialization method for each node. It is called with
34
- # the node itself, as well as the type of the node as a string. The type
35
- # is an internally used value that usually resembles the name of the
36
- # ripper event that generated the node. The method should yield to the
37
- # given block which then calls through to visit each of the fields on the
38
- # node.
39
- #
40
- # == text(name, value)
41
- #
42
- # This accepts the name of the field being visited as well as a string
43
- # value representing the value of the field.
44
- #
45
- # == pairs(name, values)
46
- #
47
- # This accepts the name of the field being visited as well as a list of
48
- # pairs that represent the value of the field. It is used only in a couple
49
- # of circumstances, like when visiting the list of optional parameters
50
- # defined on a method.
51
- #
52
- class FieldVisitor < BasicVisitor
4
+ # This is the parent class of a lot of built-in visitors for Syntax Tree. It
5
+ # reflects visiting each of the fields on every node in turn. It itself does
6
+ # not do anything with these fields, it leaves that behavior up to the
7
+ # subclass to implement.
8
+ #
9
+ # In order to properly use this class, you will need to subclass it and
10
+ # implement #comments, #field, #list, #node, #pairs, and #text. Those are
11
+ # documented here.
12
+ #
13
+ # == comments(node)
14
+ #
15
+ # This accepts the node that is being visited and does something depending on
16
+ # the comments attached to the node.
17
+ #
18
+ # == field(name, value)
19
+ #
20
+ # This accepts the name of the field being visited as a string (like "value")
21
+ # and the actual value of that field. The value can be a subclass of Node or
22
+ # any other type that can be held within the tree.
23
+ #
24
+ # == list(name, values)
25
+ #
26
+ # This accepts the name of the field being visited as well as a list of
27
+ # values. This is used, for example, when visiting something like the body of
28
+ # a Statements node.
29
+ #
30
+ # == node(name, node)
31
+ #
32
+ # This is the parent serialization method for each node. It is called with the
33
+ # node itself, as well as the type of the node as a string. The type is an
34
+ # internally used value that usually resembles the name of the ripper event
35
+ # that generated the node. The method should yield to the given block which
36
+ # then calls through to visit each of the fields on the node.
37
+ #
38
+ # == text(name, value)
39
+ #
40
+ # This accepts the name of the field being visited as well as a string value
41
+ # representing the value of the field.
42
+ #
43
+ # == pairs(name, values)
44
+ #
45
+ # This accepts the name of the field being visited as well as a list of pairs
46
+ # that represent the value of the field. It is used only in a couple of
47
+ # circumstances, like when visiting the list of optional parameters defined on
48
+ # a method.
49
+ #
50
+ class FieldVisitor < BasicVisitor
51
+ visit_methods do
53
52
  def visit_aref(node)
54
53
  node(node, "aref") do
55
54
  field("collection", node.collection)
@@ -1017,14 +1016,14 @@ module SyntaxTree
1017
1016
  def visit___end__(node)
1018
1017
  visit_token(node, "__end__")
1019
1018
  end
1019
+ end
1020
1020
 
1021
- private
1021
+ private
1022
1022
 
1023
- def visit_token(node, type)
1024
- node(node, type) do
1025
- field("value", node.value)
1026
- comments(node)
1027
- end
1023
+ def visit_token(node, type)
1024
+ node(node, type) do
1025
+ field("value", node.value)
1026
+ comments(node)
1028
1027
  end
1029
1028
  end
1030
1029
  end
@@ -138,7 +138,7 @@ module SyntaxTree
138
138
  # going to just print out the node as it was seen in the source.
139
139
  doc =
140
140
  if last_leading&.ignore?
141
- range = source[node.location.start_char...node.location.end_char]
141
+ range = source[node.start_char...node.end_char]
142
142
  first = true
143
143
 
144
144
  range.each_line(chomp: true) do |line|
@@ -20,11 +20,12 @@ module SyntaxTree
20
20
 
21
21
  # This entry represents a class definition using the class keyword.
22
22
  class ClassDefinition
23
- attr_reader :nesting, :name, :location, :comments
23
+ attr_reader :nesting, :name, :superclass, :location, :comments
24
24
 
25
- def initialize(nesting, name, location, comments)
25
+ def initialize(nesting, name, superclass, location, comments)
26
26
  @nesting = nesting
27
27
  @name = name
28
+ @superclass = superclass
28
29
  @location = location
29
30
  @comments = comments
30
31
  end
@@ -176,30 +177,101 @@ module SyntaxTree
176
177
  Location.new(code_location[0], code_location[1])
177
178
  end
178
179
 
180
+ def find_constant_path(insns, index)
181
+ index -= 1 while insns[index].is_a?(Integer)
182
+ insn = insns[index]
183
+
184
+ if insn.is_a?(Array) && insn[0] == :opt_getconstant_path
185
+ # In this case we're on Ruby 3.2+ and we have an opt_getconstant_path
186
+ # instruction, so we already know all of the symbols in the nesting.
187
+ [index - 1, insn[1]]
188
+ elsif insn.is_a?(Symbol) && insn.match?(/\Alabel_\d+/)
189
+ # Otherwise, if we have a label then this is very likely the
190
+ # destination of an opt_getinlinecache instruction, in which case
191
+ # we'll walk backwards to grab up all of the constants.
192
+ names = []
193
+
194
+ index -= 1
195
+ until insns[index].is_a?(Array) &&
196
+ insns[index][0] == :opt_getinlinecache
197
+ if insns[index].is_a?(Array) && insns[index][0] == :getconstant
198
+ names.unshift(insns[index][1])
199
+ end
200
+
201
+ index -= 1
202
+ end
203
+
204
+ [index - 1, names]
205
+ else
206
+ [index, []]
207
+ end
208
+ end
209
+
179
210
  def index_iseq(iseq, file_comments)
180
211
  results = []
181
212
  queue = [[iseq, []]]
182
213
 
183
214
  while (current_iseq, current_nesting = queue.shift)
184
- current_iseq[13].each_with_index do |insn, index|
185
- next unless insn.is_a?(Array)
215
+ line = current_iseq[8]
216
+ insns = current_iseq[13]
217
+
218
+ insns.each_with_index do |insn, index|
219
+ case insn
220
+ when Integer
221
+ line = insn
222
+ next
223
+ when Array
224
+ # continue on
225
+ else
226
+ # skip everything else
227
+ next
228
+ end
186
229
 
187
230
  case insn[0]
188
231
  when :defineclass
189
232
  _, name, class_iseq, flags = insn
233
+ next_nesting = current_nesting.dup
234
+
235
+ # This is the index we're going to search for the nested constant
236
+ # path within the declaration name.
237
+ constant_index = index - 2
238
+
239
+ # This is the superclass of the class being defined.
240
+ superclass = []
241
+
242
+ # If there is a superclass, then we're going to find it here and
243
+ # then update the constant_index as necessary.
244
+ if flags & VM_DEFINECLASS_FLAG_HAS_SUPERCLASS > 0
245
+ constant_index, superclass =
246
+ find_constant_path(insns, index - 1)
247
+
248
+ if superclass.empty?
249
+ raise NotImplementedError,
250
+ "superclass with non constant path on line #{line}"
251
+ end
252
+ end
253
+
254
+ if (_, nesting = find_constant_path(insns, constant_index))
255
+ # If there is a constant path in the class name, then we need to
256
+ # handle that by updating the nesting.
257
+ next_nesting << (nesting << name)
258
+ else
259
+ # Otherwise we'll add the class name to the nesting.
260
+ next_nesting << [name]
261
+ end
190
262
 
191
263
  if flags == VM_DEFINECLASS_TYPE_SINGLETON_CLASS
192
264
  # At the moment, we don't support singletons that aren't
193
265
  # defined on self. We could, but it would require more
194
266
  # emulation.
195
- if current_iseq[13][index - 2] != [:putself]
267
+ if insns[index - 2] != [:putself]
196
268
  raise NotImplementedError,
197
269
  "singleton class with non-self receiver"
198
270
  end
199
271
  elsif flags & VM_DEFINECLASS_TYPE_MODULE > 0
200
272
  location = location_for(class_iseq)
201
273
  results << ModuleDefinition.new(
202
- current_nesting,
274
+ next_nesting,
203
275
  name,
204
276
  location,
205
277
  EntryComments.new(file_comments, location)
@@ -207,14 +279,15 @@ module SyntaxTree
207
279
  else
208
280
  location = location_for(class_iseq)
209
281
  results << ClassDefinition.new(
210
- current_nesting,
282
+ next_nesting,
211
283
  name,
284
+ superclass,
212
285
  location,
213
286
  EntryComments.new(file_comments, location)
214
287
  )
215
288
  end
216
289
 
217
- queue << [class_iseq, current_nesting + [name]]
290
+ queue << [class_iseq, next_nesting]
218
291
  when :definemethod
219
292
  location = location_for(insn[2])
220
293
  results << MethodDefinition.new(
@@ -257,74 +330,100 @@ module SyntaxTree
257
330
  @statements = nil
258
331
  end
259
332
 
260
- def visit_class(node)
261
- name = visit(node.constant).to_sym
262
- location =
263
- Location.new(node.location.start_line, node.location.start_column)
333
+ visit_methods do
334
+ def visit_class(node)
335
+ names = visit(node.constant)
336
+ nesting << names
264
337
 
265
- results << ClassDefinition.new(
266
- nesting.dup,
267
- name,
268
- location,
269
- comments_for(node)
270
- )
338
+ location =
339
+ Location.new(node.location.start_line, node.location.start_column)
271
340
 
272
- nesting << name
273
- super
274
- nesting.pop
275
- end
341
+ superclass =
342
+ if node.superclass
343
+ visited = visit(node.superclass)
276
344
 
277
- def visit_const_ref(node)
278
- node.constant.value
279
- end
345
+ if visited == [[]]
346
+ raise NotImplementedError, "superclass with non constant path"
347
+ end
280
348
 
281
- def visit_def(node)
282
- name = node.name.value.to_sym
283
- location =
284
- Location.new(node.location.start_line, node.location.start_column)
349
+ visited
350
+ else
351
+ []
352
+ end
285
353
 
286
- results << if node.target.nil?
287
- MethodDefinition.new(
354
+ results << ClassDefinition.new(
288
355
  nesting.dup,
289
- name,
356
+ names.last,
357
+ superclass,
290
358
  location,
291
359
  comments_for(node)
292
360
  )
293
- else
294
- SingletonMethodDefinition.new(
361
+
362
+ super
363
+ nesting.pop
364
+ end
365
+
366
+ def visit_const_ref(node)
367
+ [node.constant.value.to_sym]
368
+ end
369
+
370
+ def visit_const_path_ref(node)
371
+ visit(node.parent) << node.constant.value.to_sym
372
+ end
373
+
374
+ def visit_def(node)
375
+ name = node.name.value.to_sym
376
+ location =
377
+ Location.new(node.location.start_line, node.location.start_column)
378
+
379
+ results << if node.target.nil?
380
+ MethodDefinition.new(
381
+ nesting.dup,
382
+ name,
383
+ location,
384
+ comments_for(node)
385
+ )
386
+ else
387
+ SingletonMethodDefinition.new(
388
+ nesting.dup,
389
+ name,
390
+ location,
391
+ comments_for(node)
392
+ )
393
+ end
394
+ end
395
+
396
+ def visit_module(node)
397
+ names = visit(node.constant)
398
+ nesting << names
399
+
400
+ location =
401
+ Location.new(node.location.start_line, node.location.start_column)
402
+
403
+ results << ModuleDefinition.new(
295
404
  nesting.dup,
296
- name,
405
+ names.last,
297
406
  location,
298
407
  comments_for(node)
299
408
  )
300
- end
301
- end
302
409
 
303
- def visit_module(node)
304
- name = visit(node.constant).to_sym
305
- location =
306
- Location.new(node.location.start_line, node.location.start_column)
307
-
308
- results << ModuleDefinition.new(
309
- nesting.dup,
310
- name,
311
- location,
312
- comments_for(node)
313
- )
410
+ super
411
+ nesting.pop
412
+ end
314
413
 
315
- nesting << name
316
- super
317
- nesting.pop
318
- end
414
+ def visit_program(node)
415
+ super
416
+ results
417
+ end
319
418
 
320
- def visit_program(node)
321
- super
322
- results
323
- end
419
+ def visit_statements(node)
420
+ @statements = node
421
+ super
422
+ end
324
423
 
325
- def visit_statements(node)
326
- @statements = node
327
- super
424
+ def visit_var_ref(node)
425
+ [node.value.value.to_sym]
426
+ end
328
427
  end
329
428
 
330
429
  private
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module SyntaxTree
6
+ # This visitor transforms the AST into a hash that contains only primitives
7
+ # that can be easily serialized into JSON.
8
+ class JSONVisitor < FieldVisitor
9
+ attr_reader :target
10
+
11
+ def initialize
12
+ @target = nil
13
+ end
14
+
15
+ private
16
+
17
+ def comments(node)
18
+ target[:comments] = visit_all(node.comments)
19
+ end
20
+
21
+ def field(name, value)
22
+ target[name] = value.is_a?(Node) ? visit(value) : value
23
+ end
24
+
25
+ def list(name, values)
26
+ target[name] = visit_all(values)
27
+ end
28
+
29
+ def node(node, type)
30
+ previous = @target
31
+ @target = { type: type, location: visit_location(node.location) }
32
+ yield
33
+ @target
34
+ ensure
35
+ @target = previous
36
+ end
37
+
38
+ def pairs(name, values)
39
+ target[name] = values.map { |(key, value)| [visit(key), visit(value)] }
40
+ end
41
+
42
+ def text(name, value)
43
+ target[name] = value
44
+ end
45
+
46
+ def visit_location(location)
47
+ [
48
+ location.start_line,
49
+ location.start_char,
50
+ location.end_line,
51
+ location.end_char
52
+ ]
53
+ end
54
+ end
55
+ end
@@ -2,10 +2,9 @@
2
2
 
3
3
  require "cgi"
4
4
  require "json"
5
+ require "pp"
5
6
  require "uri"
6
7
 
7
- require_relative "language_server/inlay_hints"
8
-
9
8
  module SyntaxTree
10
9
  # Syntax Tree additionally ships with a language server conforming to the
11
10
  # language server protocol. It can be invoked through the CLI by running:
@@ -13,6 +12,162 @@ module SyntaxTree
13
12
  # stree lsp
14
13
  #
15
14
  class LanguageServer
15
+ # This class provides inlay hints for the language server. For more
16
+ # information, see the spec here:
17
+ # https://github.com/microsoft/language-server-protocol/issues/956.
18
+ class InlayHints < Visitor
19
+ # This represents a hint that is going to be displayed in the editor.
20
+ class Hint
21
+ attr_reader :line, :character, :label
22
+
23
+ def initialize(line:, character:, label:)
24
+ @line = line
25
+ @character = character
26
+ @label = label
27
+ end
28
+
29
+ # This is the shape that the LSP expects.
30
+ def to_json(*opts)
31
+ {
32
+ position: {
33
+ line: line,
34
+ character: character
35
+ },
36
+ label: label
37
+ }.to_json(*opts)
38
+ end
39
+ end
40
+
41
+ attr_reader :stack, :hints
42
+
43
+ def initialize
44
+ @stack = []
45
+ @hints = []
46
+ end
47
+
48
+ def visit(node)
49
+ stack << node
50
+ result = super
51
+ stack.pop
52
+ result
53
+ end
54
+
55
+ visit_methods do
56
+ # Adds parentheses around assignments contained within the default
57
+ # values of parameters. For example,
58
+ #
59
+ # def foo(a = b = c)
60
+ # end
61
+ #
62
+ # becomes
63
+ #
64
+ # def foo(a = ₍b = c₎)
65
+ # end
66
+ #
67
+ def visit_assign(node)
68
+ parentheses(node.location) if stack[-2].is_a?(Params)
69
+ super
70
+ end
71
+
72
+ # Adds parentheses around binary expressions to make it clear which
73
+ # subexpression will be evaluated first. For example,
74
+ #
75
+ # a + b * c
76
+ #
77
+ # becomes
78
+ #
79
+ # a + ₍b * c₎
80
+ #
81
+ def visit_binary(node)
82
+ case stack[-2]
83
+ when Assign, OpAssign
84
+ parentheses(node.location)
85
+ when Binary
86
+ parentheses(node.location) if stack[-2].operator != node.operator
87
+ end
88
+
89
+ super
90
+ end
91
+
92
+ # Adds parentheses around ternary operators contained within certain
93
+ # expressions where it could be confusing which subexpression will get
94
+ # evaluated first. For example,
95
+ #
96
+ # a ? b : c ? d : e
97
+ #
98
+ # becomes
99
+ #
100
+ # a ? b : ₍c ? d : e₎
101
+ #
102
+ def visit_if_op(node)
103
+ case stack[-2]
104
+ when Assign, Binary, IfOp, OpAssign
105
+ parentheses(node.location)
106
+ end
107
+
108
+ super
109
+ end
110
+
111
+ # Adds the implicitly rescued StandardError into a bare rescue clause.
112
+ # For example,
113
+ #
114
+ # begin
115
+ # rescue
116
+ # end
117
+ #
118
+ # becomes
119
+ #
120
+ # begin
121
+ # rescue StandardError
122
+ # end
123
+ #
124
+ def visit_rescue(node)
125
+ if node.exception.nil?
126
+ hints << Hint.new(
127
+ line: node.location.start_line - 1,
128
+ character: node.location.start_column + "rescue".length,
129
+ label: " StandardError"
130
+ )
131
+ end
132
+
133
+ super
134
+ end
135
+
136
+ # Adds parentheses around unary statements using the - operator that are
137
+ # contained within Binary nodes. For example,
138
+ #
139
+ # -a + b
140
+ #
141
+ # becomes
142
+ #
143
+ # ₍-a₎ + b
144
+ #
145
+ def visit_unary(node)
146
+ if stack[-2].is_a?(Binary) && (node.operator == "-")
147
+ parentheses(node.location)
148
+ end
149
+
150
+ super
151
+ end
152
+ end
153
+
154
+ private
155
+
156
+ def parentheses(location)
157
+ hints << Hint.new(
158
+ line: location.start_line - 1,
159
+ character: location.start_column,
160
+ label: "₍"
161
+ )
162
+
163
+ hints << Hint.new(
164
+ line: location.end_line - 1,
165
+ character: location.end_column,
166
+ label: "₎"
167
+ )
168
+ end
169
+ end
170
+
16
171
  # This is a small module that effectively mirrors pattern matching. We're
17
172
  # using it so that we can support truffleruby without having to ignore the
18
173
  # language server.