syntax_tree 5.3.0 → 6.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +12 -1
  3. data/CHANGELOG.md +78 -1
  4. data/Gemfile.lock +7 -7
  5. data/README.md +33 -9
  6. data/Rakefile +12 -8
  7. data/bin/console +1 -0
  8. data/bin/whitequark +79 -0
  9. data/doc/changing_structure.md +16 -0
  10. data/lib/syntax_tree/basic_visitor.rb +44 -5
  11. data/lib/syntax_tree/cli.rb +2 -2
  12. data/lib/syntax_tree/dsl.rb +23 -11
  13. data/lib/syntax_tree/{visitor/field_visitor.rb → field_visitor.rb} +54 -55
  14. data/lib/syntax_tree/formatter.rb +1 -1
  15. data/lib/syntax_tree/index.rb +158 -59
  16. data/lib/syntax_tree/json_visitor.rb +55 -0
  17. data/lib/syntax_tree/language_server.rb +157 -2
  18. data/lib/syntax_tree/match_visitor.rb +120 -0
  19. data/lib/syntax_tree/mermaid.rb +177 -0
  20. data/lib/syntax_tree/mermaid_visitor.rb +69 -0
  21. data/lib/syntax_tree/{visitor/mutation_visitor.rb → mutation_visitor.rb} +27 -27
  22. data/lib/syntax_tree/node.rb +245 -123
  23. data/lib/syntax_tree/parser.rb +332 -119
  24. data/lib/syntax_tree/pretty_print_visitor.rb +83 -0
  25. data/lib/syntax_tree/reflection.rb +241 -0
  26. data/lib/syntax_tree/translation/parser.rb +3107 -0
  27. data/lib/syntax_tree/translation/rubocop_ast.rb +21 -0
  28. data/lib/syntax_tree/translation.rb +28 -0
  29. data/lib/syntax_tree/version.rb +1 -1
  30. data/lib/syntax_tree/with_scope.rb +244 -0
  31. data/lib/syntax_tree/yarv/basic_block.rb +53 -0
  32. data/lib/syntax_tree/yarv/calldata.rb +91 -0
  33. data/lib/syntax_tree/yarv/compiler.rb +110 -100
  34. data/lib/syntax_tree/yarv/control_flow_graph.rb +257 -0
  35. data/lib/syntax_tree/yarv/data_flow_graph.rb +338 -0
  36. data/lib/syntax_tree/yarv/decompiler.rb +1 -1
  37. data/lib/syntax_tree/yarv/disassembler.rb +104 -80
  38. data/lib/syntax_tree/yarv/instruction_sequence.rb +43 -18
  39. data/lib/syntax_tree/yarv/instructions.rb +203 -649
  40. data/lib/syntax_tree/yarv/legacy.rb +12 -24
  41. data/lib/syntax_tree/yarv/sea_of_nodes.rb +534 -0
  42. data/lib/syntax_tree/yarv.rb +18 -0
  43. data/lib/syntax_tree.rb +88 -56
  44. data/tasks/sorbet.rake +277 -0
  45. data/tasks/whitequark.rake +87 -0
  46. metadata +23 -11
  47. data/.gitmodules +0 -9
  48. data/lib/syntax_tree/language_server/inlay_hints.rb +0 -159
  49. data/lib/syntax_tree/visitor/environment.rb +0 -84
  50. data/lib/syntax_tree/visitor/json_visitor.rb +0 -55
  51. data/lib/syntax_tree/visitor/match_visitor.rb +0 -122
  52. data/lib/syntax_tree/visitor/pretty_print_visitor.rb +0 -85
  53. data/lib/syntax_tree/visitor/with_environment.rb +0 -140
@@ -1,55 +1,54 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SyntaxTree
4
- class Visitor
5
- # This is the parent class of a lot of built-in visitors for Syntax Tree. It
6
- # reflects visiting each of the fields on every node in turn. It itself does
7
- # not do anything with these fields, it leaves that behavior up to the
8
- # subclass to implement.
9
- #
10
- # In order to properly use this class, you will need to subclass it and
11
- # implement #comments, #field, #list, #node, #pairs, and #text. Those are
12
- # documented here.
13
- #
14
- # == comments(node)
15
- #
16
- # This accepts the node that is being visited and does something depending
17
- # on the comments attached to the node.
18
- #
19
- # == field(name, value)
20
- #
21
- # This accepts the name of the field being visited as a string (like
22
- # "value") and the actual value of that field. The value can be a subclass
23
- # of Node or any other type that can be held within the tree.
24
- #
25
- # == list(name, values)
26
- #
27
- # This accepts the name of the field being visited as well as a list of
28
- # values. This is used, for example, when visiting something like the body
29
- # of a Statements node.
30
- #
31
- # == node(name, node)
32
- #
33
- # This is the parent serialization method for each node. It is called with
34
- # the node itself, as well as the type of the node as a string. The type
35
- # is an internally used value that usually resembles the name of the
36
- # ripper event that generated the node. The method should yield to the
37
- # given block which then calls through to visit each of the fields on the
38
- # node.
39
- #
40
- # == text(name, value)
41
- #
42
- # This accepts the name of the field being visited as well as a string
43
- # value representing the value of the field.
44
- #
45
- # == pairs(name, values)
46
- #
47
- # This accepts the name of the field being visited as well as a list of
48
- # pairs that represent the value of the field. It is used only in a couple
49
- # of circumstances, like when visiting the list of optional parameters
50
- # defined on a method.
51
- #
52
- class FieldVisitor < BasicVisitor
4
+ # This is the parent class of a lot of built-in visitors for Syntax Tree. It
5
+ # reflects visiting each of the fields on every node in turn. It itself does
6
+ # not do anything with these fields, it leaves that behavior up to the
7
+ # subclass to implement.
8
+ #
9
+ # In order to properly use this class, you will need to subclass it and
10
+ # implement #comments, #field, #list, #node, #pairs, and #text. Those are
11
+ # documented here.
12
+ #
13
+ # == comments(node)
14
+ #
15
+ # This accepts the node that is being visited and does something depending on
16
+ # the comments attached to the node.
17
+ #
18
+ # == field(name, value)
19
+ #
20
+ # This accepts the name of the field being visited as a string (like "value")
21
+ # and the actual value of that field. The value can be a subclass of Node or
22
+ # any other type that can be held within the tree.
23
+ #
24
+ # == list(name, values)
25
+ #
26
+ # This accepts the name of the field being visited as well as a list of
27
+ # values. This is used, for example, when visiting something like the body of
28
+ # a Statements node.
29
+ #
30
+ # == node(name, node)
31
+ #
32
+ # This is the parent serialization method for each node. It is called with the
33
+ # node itself, as well as the type of the node as a string. The type is an
34
+ # internally used value that usually resembles the name of the ripper event
35
+ # that generated the node. The method should yield to the given block which
36
+ # then calls through to visit each of the fields on the node.
37
+ #
38
+ # == text(name, value)
39
+ #
40
+ # This accepts the name of the field being visited as well as a string value
41
+ # representing the value of the field.
42
+ #
43
+ # == pairs(name, values)
44
+ #
45
+ # This accepts the name of the field being visited as well as a list of pairs
46
+ # that represent the value of the field. It is used only in a couple of
47
+ # circumstances, like when visiting the list of optional parameters defined on
48
+ # a method.
49
+ #
50
+ class FieldVisitor < BasicVisitor
51
+ visit_methods do
53
52
  def visit_aref(node)
54
53
  node(node, "aref") do
55
54
  field("collection", node.collection)
@@ -1017,14 +1016,14 @@ module SyntaxTree
1017
1016
  def visit___end__(node)
1018
1017
  visit_token(node, "__end__")
1019
1018
  end
1019
+ end
1020
1020
 
1021
- private
1021
+ private
1022
1022
 
1023
- def visit_token(node, type)
1024
- node(node, type) do
1025
- field("value", node.value)
1026
- comments(node)
1027
- end
1023
+ def visit_token(node, type)
1024
+ node(node, type) do
1025
+ field("value", node.value)
1026
+ comments(node)
1028
1027
  end
1029
1028
  end
1030
1029
  end
@@ -138,7 +138,7 @@ module SyntaxTree
138
138
  # going to just print out the node as it was seen in the source.
139
139
  doc =
140
140
  if last_leading&.ignore?
141
- range = source[node.location.start_char...node.location.end_char]
141
+ range = source[node.start_char...node.end_char]
142
142
  first = true
143
143
 
144
144
  range.each_line(chomp: true) do |line|
@@ -20,11 +20,12 @@ module SyntaxTree
20
20
 
21
21
  # This entry represents a class definition using the class keyword.
22
22
  class ClassDefinition
23
- attr_reader :nesting, :name, :location, :comments
23
+ attr_reader :nesting, :name, :superclass, :location, :comments
24
24
 
25
- def initialize(nesting, name, location, comments)
25
+ def initialize(nesting, name, superclass, location, comments)
26
26
  @nesting = nesting
27
27
  @name = name
28
+ @superclass = superclass
28
29
  @location = location
29
30
  @comments = comments
30
31
  end
@@ -176,30 +177,101 @@ module SyntaxTree
176
177
  Location.new(code_location[0], code_location[1])
177
178
  end
178
179
 
180
+ def find_constant_path(insns, index)
181
+ index -= 1 while insns[index].is_a?(Integer)
182
+ insn = insns[index]
183
+
184
+ if insn.is_a?(Array) && insn[0] == :opt_getconstant_path
185
+ # In this case we're on Ruby 3.2+ and we have an opt_getconstant_path
186
+ # instruction, so we already know all of the symbols in the nesting.
187
+ [index - 1, insn[1]]
188
+ elsif insn.is_a?(Symbol) && insn.match?(/\Alabel_\d+/)
189
+ # Otherwise, if we have a label then this is very likely the
190
+ # destination of an opt_getinlinecache instruction, in which case
191
+ # we'll walk backwards to grab up all of the constants.
192
+ names = []
193
+
194
+ index -= 1
195
+ until insns[index].is_a?(Array) &&
196
+ insns[index][0] == :opt_getinlinecache
197
+ if insns[index].is_a?(Array) && insns[index][0] == :getconstant
198
+ names.unshift(insns[index][1])
199
+ end
200
+
201
+ index -= 1
202
+ end
203
+
204
+ [index - 1, names]
205
+ else
206
+ [index, []]
207
+ end
208
+ end
209
+
179
210
  def index_iseq(iseq, file_comments)
180
211
  results = []
181
212
  queue = [[iseq, []]]
182
213
 
183
214
  while (current_iseq, current_nesting = queue.shift)
184
- current_iseq[13].each_with_index do |insn, index|
185
- next unless insn.is_a?(Array)
215
+ line = current_iseq[8]
216
+ insns = current_iseq[13]
217
+
218
+ insns.each_with_index do |insn, index|
219
+ case insn
220
+ when Integer
221
+ line = insn
222
+ next
223
+ when Array
224
+ # continue on
225
+ else
226
+ # skip everything else
227
+ next
228
+ end
186
229
 
187
230
  case insn[0]
188
231
  when :defineclass
189
232
  _, name, class_iseq, flags = insn
233
+ next_nesting = current_nesting.dup
234
+
235
+ # This is the index we're going to search for the nested constant
236
+ # path within the declaration name.
237
+ constant_index = index - 2
238
+
239
+ # This is the superclass of the class being defined.
240
+ superclass = []
241
+
242
+ # If there is a superclass, then we're going to find it here and
243
+ # then update the constant_index as necessary.
244
+ if flags & VM_DEFINECLASS_FLAG_HAS_SUPERCLASS > 0
245
+ constant_index, superclass =
246
+ find_constant_path(insns, index - 1)
247
+
248
+ if superclass.empty?
249
+ raise NotImplementedError,
250
+ "superclass with non constant path on line #{line}"
251
+ end
252
+ end
253
+
254
+ if (_, nesting = find_constant_path(insns, constant_index))
255
+ # If there is a constant path in the class name, then we need to
256
+ # handle that by updating the nesting.
257
+ next_nesting << (nesting << name)
258
+ else
259
+ # Otherwise we'll add the class name to the nesting.
260
+ next_nesting << [name]
261
+ end
190
262
 
191
263
  if flags == VM_DEFINECLASS_TYPE_SINGLETON_CLASS
192
264
  # At the moment, we don't support singletons that aren't
193
265
  # defined on self. We could, but it would require more
194
266
  # emulation.
195
- if current_iseq[13][index - 2] != [:putself]
267
+ if insns[index - 2] != [:putself]
196
268
  raise NotImplementedError,
197
269
  "singleton class with non-self receiver"
198
270
  end
199
271
  elsif flags & VM_DEFINECLASS_TYPE_MODULE > 0
200
272
  location = location_for(class_iseq)
201
273
  results << ModuleDefinition.new(
202
- current_nesting,
274
+ next_nesting,
203
275
  name,
204
276
  location,
205
277
  EntryComments.new(file_comments, location)
@@ -207,14 +279,15 @@ module SyntaxTree
207
279
  else
208
280
  location = location_for(class_iseq)
209
281
  results << ClassDefinition.new(
210
- current_nesting,
282
+ next_nesting,
211
283
  name,
284
+ superclass,
212
285
  location,
213
286
  EntryComments.new(file_comments, location)
214
287
  )
215
288
  end
216
289
 
217
- queue << [class_iseq, current_nesting + [name]]
290
+ queue << [class_iseq, next_nesting]
218
291
  when :definemethod
219
292
  location = location_for(insn[2])
220
293
  results << MethodDefinition.new(
@@ -257,74 +330,100 @@ module SyntaxTree
257
330
  @statements = nil
258
331
  end
259
332
 
260
- def visit_class(node)
261
- name = visit(node.constant).to_sym
262
- location =
263
- Location.new(node.location.start_line, node.location.start_column)
333
+ visit_methods do
334
+ def visit_class(node)
335
+ names = visit(node.constant)
336
+ nesting << names
264
337
 
265
- results << ClassDefinition.new(
266
- nesting.dup,
267
- name,
268
- location,
269
- comments_for(node)
270
- )
338
+ location =
339
+ Location.new(node.location.start_line, node.location.start_column)
271
340
 
272
- nesting << name
273
- super
274
- nesting.pop
275
- end
341
+ superclass =
342
+ if node.superclass
343
+ visited = visit(node.superclass)
276
344
 
277
- def visit_const_ref(node)
278
- node.constant.value
279
- end
345
+ if visited == [[]]
346
+ raise NotImplementedError, "superclass with non constant path"
347
+ end
280
348
 
281
- def visit_def(node)
282
- name = node.name.value.to_sym
283
- location =
284
- Location.new(node.location.start_line, node.location.start_column)
349
+ visited
350
+ else
351
+ []
352
+ end
285
353
 
286
- results << if node.target.nil?
287
- MethodDefinition.new(
354
+ results << ClassDefinition.new(
288
355
  nesting.dup,
289
- name,
356
+ names.last,
357
+ superclass,
290
358
  location,
291
359
  comments_for(node)
292
360
  )
293
- else
294
- SingletonMethodDefinition.new(
361
+
362
+ super
363
+ nesting.pop
364
+ end
365
+
366
+ def visit_const_ref(node)
367
+ [node.constant.value.to_sym]
368
+ end
369
+
370
+ def visit_const_path_ref(node)
371
+ visit(node.parent) << node.constant.value.to_sym
372
+ end
373
+
374
+ def visit_def(node)
375
+ name = node.name.value.to_sym
376
+ location =
377
+ Location.new(node.location.start_line, node.location.start_column)
378
+
379
+ results << if node.target.nil?
380
+ MethodDefinition.new(
381
+ nesting.dup,
382
+ name,
383
+ location,
384
+ comments_for(node)
385
+ )
386
+ else
387
+ SingletonMethodDefinition.new(
388
+ nesting.dup,
389
+ name,
390
+ location,
391
+ comments_for(node)
392
+ )
393
+ end
394
+ end
395
+
396
+ def visit_module(node)
397
+ names = visit(node.constant)
398
+ nesting << names
399
+
400
+ location =
401
+ Location.new(node.location.start_line, node.location.start_column)
402
+
403
+ results << ModuleDefinition.new(
295
404
  nesting.dup,
296
- name,
405
+ names.last,
297
406
  location,
298
407
  comments_for(node)
299
408
  )
300
- end
301
- end
302
409
 
303
- def visit_module(node)
304
- name = visit(node.constant).to_sym
305
- location =
306
- Location.new(node.location.start_line, node.location.start_column)
307
-
308
- results << ModuleDefinition.new(
309
- nesting.dup,
310
- name,
311
- location,
312
- comments_for(node)
313
- )
410
+ super
411
+ nesting.pop
412
+ end
314
413
 
315
- nesting << name
316
- super
317
- nesting.pop
318
- end
414
+ def visit_program(node)
415
+ super
416
+ results
417
+ end
319
418
 
320
- def visit_program(node)
321
- super
322
- results
323
- end
419
+ def visit_statements(node)
420
+ @statements = node
421
+ super
422
+ end
324
423
 
325
- def visit_statements(node)
326
- @statements = node
327
- super
424
+ def visit_var_ref(node)
425
+ [node.value.value.to_sym]
426
+ end
328
427
  end
329
428
 
330
429
  private
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module SyntaxTree
6
+ # This visitor transforms the AST into a hash that contains only primitives
7
+ # that can be easily serialized into JSON.
8
+ class JSONVisitor < FieldVisitor
9
+ attr_reader :target
10
+
11
+ def initialize
12
+ @target = nil
13
+ end
14
+
15
+ private
16
+
17
+ def comments(node)
18
+ target[:comments] = visit_all(node.comments)
19
+ end
20
+
21
+ def field(name, value)
22
+ target[name] = value.is_a?(Node) ? visit(value) : value
23
+ end
24
+
25
+ def list(name, values)
26
+ target[name] = visit_all(values)
27
+ end
28
+
29
+ def node(node, type)
30
+ previous = @target
31
+ @target = { type: type, location: visit_location(node.location) }
32
+ yield
33
+ @target
34
+ ensure
35
+ @target = previous
36
+ end
37
+
38
+ def pairs(name, values)
39
+ target[name] = values.map { |(key, value)| [visit(key), visit(value)] }
40
+ end
41
+
42
+ def text(name, value)
43
+ target[name] = value
44
+ end
45
+
46
+ def visit_location(location)
47
+ [
48
+ location.start_line,
49
+ location.start_char,
50
+ location.end_line,
51
+ location.end_char
52
+ ]
53
+ end
54
+ end
55
+ end
@@ -2,10 +2,9 @@
2
2
 
3
3
  require "cgi"
4
4
  require "json"
5
+ require "pp"
5
6
  require "uri"
6
7
 
7
- require_relative "language_server/inlay_hints"
8
-
9
8
  module SyntaxTree
10
9
  # Syntax Tree additionally ships with a language server conforming to the
11
10
  # language server protocol. It can be invoked through the CLI by running:
@@ -13,6 +12,162 @@ module SyntaxTree
13
12
  # stree lsp
14
13
  #
15
14
  class LanguageServer
15
+ # This class provides inlay hints for the language server. For more
16
+ # information, see the spec here:
17
+ # https://github.com/microsoft/language-server-protocol/issues/956.
18
+ class InlayHints < Visitor
19
+ # This represents a hint that is going to be displayed in the editor.
20
+ class Hint
21
+ attr_reader :line, :character, :label
22
+
23
+ def initialize(line:, character:, label:)
24
+ @line = line
25
+ @character = character
26
+ @label = label
27
+ end
28
+
29
+ # This is the shape that the LSP expects.
30
+ def to_json(*opts)
31
+ {
32
+ position: {
33
+ line: line,
34
+ character: character
35
+ },
36
+ label: label
37
+ }.to_json(*opts)
38
+ end
39
+ end
40
+
41
+ attr_reader :stack, :hints
42
+
43
+ def initialize
44
+ @stack = []
45
+ @hints = []
46
+ end
47
+
48
+ def visit(node)
49
+ stack << node
50
+ result = super
51
+ stack.pop
52
+ result
53
+ end
54
+
55
+ visit_methods do
56
+ # Adds parentheses around assignments contained within the default
57
+ # values of parameters. For example,
58
+ #
59
+ # def foo(a = b = c)
60
+ # end
61
+ #
62
+ # becomes
63
+ #
64
+ # def foo(a = ₍b = c₎)
65
+ # end
66
+ #
67
+ def visit_assign(node)
68
+ parentheses(node.location) if stack[-2].is_a?(Params)
69
+ super
70
+ end
71
+
72
+ # Adds parentheses around binary expressions to make it clear which
73
+ # subexpression will be evaluated first. For example,
74
+ #
75
+ # a + b * c
76
+ #
77
+ # becomes
78
+ #
79
+ # a + ₍b * c₎
80
+ #
81
+ def visit_binary(node)
82
+ case stack[-2]
83
+ when Assign, OpAssign
84
+ parentheses(node.location)
85
+ when Binary
86
+ parentheses(node.location) if stack[-2].operator != node.operator
87
+ end
88
+
89
+ super
90
+ end
91
+
92
+ # Adds parentheses around ternary operators contained within certain
93
+ # expressions where it could be confusing which subexpression will get
94
+ # evaluated first. For example,
95
+ #
96
+ # a ? b : c ? d : e
97
+ #
98
+ # becomes
99
+ #
100
+ # a ? b : ₍c ? d : e₎
101
+ #
102
+ def visit_if_op(node)
103
+ case stack[-2]
104
+ when Assign, Binary, IfOp, OpAssign
105
+ parentheses(node.location)
106
+ end
107
+
108
+ super
109
+ end
110
+
111
+ # Adds the implicitly rescued StandardError into a bare rescue clause.
112
+ # For example,
113
+ #
114
+ # begin
115
+ # rescue
116
+ # end
117
+ #
118
+ # becomes
119
+ #
120
+ # begin
121
+ # rescue StandardError
122
+ # end
123
+ #
124
+ def visit_rescue(node)
125
+ if node.exception.nil?
126
+ hints << Hint.new(
127
+ line: node.location.start_line - 1,
128
+ character: node.location.start_column + "rescue".length,
129
+ label: " StandardError"
130
+ )
131
+ end
132
+
133
+ super
134
+ end
135
+
136
+ # Adds parentheses around unary statements using the - operator that are
137
+ # contained within Binary nodes. For example,
138
+ #
139
+ # -a + b
140
+ #
141
+ # becomes
142
+ #
143
+ # ₍-a₎ + b
144
+ #
145
+ def visit_unary(node)
146
+ if stack[-2].is_a?(Binary) && (node.operator == "-")
147
+ parentheses(node.location)
148
+ end
149
+
150
+ super
151
+ end
152
+ end
153
+
154
+ private
155
+
156
+ def parentheses(location)
157
+ hints << Hint.new(
158
+ line: location.start_line - 1,
159
+ character: location.start_column,
160
+ label: "₍"
161
+ )
162
+
163
+ hints << Hint.new(
164
+ line: location.end_line - 1,
165
+ character: location.end_column,
166
+ label: "₎"
167
+ )
168
+ end
169
+ end
170
+
16
171
  # This is a small module that effectively mirrors pattern matching. We're
17
172
  # using it so that we can support truffleruby without having to ignore the
18
173
  # language server.