syntax_tree 5.3.0 → 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +12 -1
  3. data/CHANGELOG.md +64 -1
  4. data/Gemfile.lock +2 -2
  5. data/README.md +28 -9
  6. data/Rakefile +12 -8
  7. data/bin/console +1 -0
  8. data/bin/whitequark +79 -0
  9. data/doc/changing_structure.md +16 -0
  10. data/lib/syntax_tree/basic_visitor.rb +44 -5
  11. data/lib/syntax_tree/cli.rb +2 -2
  12. data/lib/syntax_tree/dsl.rb +23 -11
  13. data/lib/syntax_tree/{visitor/field_visitor.rb → field_visitor.rb} +54 -55
  14. data/lib/syntax_tree/formatter.rb +1 -1
  15. data/lib/syntax_tree/index.rb +56 -54
  16. data/lib/syntax_tree/json_visitor.rb +55 -0
  17. data/lib/syntax_tree/language_server.rb +157 -2
  18. data/lib/syntax_tree/match_visitor.rb +120 -0
  19. data/lib/syntax_tree/mermaid.rb +177 -0
  20. data/lib/syntax_tree/mermaid_visitor.rb +69 -0
  21. data/lib/syntax_tree/{visitor/mutation_visitor.rb → mutation_visitor.rb} +27 -27
  22. data/lib/syntax_tree/node.rb +198 -107
  23. data/lib/syntax_tree/parser.rb +322 -118
  24. data/lib/syntax_tree/pretty_print_visitor.rb +83 -0
  25. data/lib/syntax_tree/reflection.rb +241 -0
  26. data/lib/syntax_tree/translation/parser.rb +3019 -0
  27. data/lib/syntax_tree/translation/rubocop_ast.rb +21 -0
  28. data/lib/syntax_tree/translation.rb +28 -0
  29. data/lib/syntax_tree/version.rb +1 -1
  30. data/lib/syntax_tree/with_scope.rb +244 -0
  31. data/lib/syntax_tree/yarv/basic_block.rb +53 -0
  32. data/lib/syntax_tree/yarv/calldata.rb +91 -0
  33. data/lib/syntax_tree/yarv/compiler.rb +110 -100
  34. data/lib/syntax_tree/yarv/control_flow_graph.rb +257 -0
  35. data/lib/syntax_tree/yarv/data_flow_graph.rb +338 -0
  36. data/lib/syntax_tree/yarv/decompiler.rb +1 -1
  37. data/lib/syntax_tree/yarv/disassembler.rb +104 -80
  38. data/lib/syntax_tree/yarv/instruction_sequence.rb +43 -18
  39. data/lib/syntax_tree/yarv/instructions.rb +203 -649
  40. data/lib/syntax_tree/yarv/legacy.rb +12 -24
  41. data/lib/syntax_tree/yarv/sea_of_nodes.rb +534 -0
  42. data/lib/syntax_tree/yarv.rb +18 -0
  43. data/lib/syntax_tree.rb +88 -56
  44. data/tasks/sorbet.rake +277 -0
  45. data/tasks/whitequark.rake +87 -0
  46. metadata +23 -11
  47. data/.gitmodules +0 -9
  48. data/lib/syntax_tree/language_server/inlay_hints.rb +0 -159
  49. data/lib/syntax_tree/visitor/environment.rb +0 -84
  50. data/lib/syntax_tree/visitor/json_visitor.rb +0 -55
  51. data/lib/syntax_tree/visitor/match_visitor.rb +0 -122
  52. data/lib/syntax_tree/visitor/pretty_print_visitor.rb +0 -85
  53. data/lib/syntax_tree/visitor/with_environment.rb +0 -140
@@ -19,7 +19,7 @@ module SyntaxTree
19
19
  # @@class_variable
20
20
  # ~~~
21
21
  #
22
- class GetClassVariable
22
+ class GetClassVariable < Instruction
23
23
  attr_reader :name
24
24
 
25
25
  def initialize(name)
@@ -46,10 +46,6 @@ module SyntaxTree
46
46
  2
47
47
  end
48
48
 
49
- def pops
50
- 0
51
- end
52
-
53
49
  def pushes
54
50
  1
55
51
  end
@@ -79,7 +75,7 @@ module SyntaxTree
79
75
  # Constant
80
76
  # ~~~
81
77
  #
82
- class OptGetInlineCache
78
+ class OptGetInlineCache < Instruction
83
79
  attr_reader :label, :cache
84
80
 
85
81
  def initialize(label, cache)
@@ -111,21 +107,21 @@ module SyntaxTree
111
107
  3
112
108
  end
113
109
 
114
- def pops
115
- 0
116
- end
117
-
118
110
  def pushes
119
111
  1
120
112
  end
121
113
 
122
- def canonical
123
- self
124
- end
125
-
126
114
  def call(vm)
127
115
  vm.push(nil)
128
116
  end
117
+
118
+ def branch_targets
119
+ [label]
120
+ end
121
+
122
+ def falls_through?
123
+ true
124
+ end
129
125
  end
130
126
 
131
127
  # ### Summary
@@ -143,7 +139,7 @@ module SyntaxTree
143
139
  # Constant
144
140
  # ~~~
145
141
  #
146
- class OptSetInlineCache
142
+ class OptSetInlineCache < Instruction
147
143
  attr_reader :cache
148
144
 
149
145
  def initialize(cache)
@@ -178,10 +174,6 @@ module SyntaxTree
178
174
  1
179
175
  end
180
176
 
181
- def canonical
182
- self
183
- end
184
-
185
177
  def call(vm)
186
178
  end
187
179
  end
@@ -200,7 +192,7 @@ module SyntaxTree
200
192
  # @@class_variable = 1
201
193
  # ~~~
202
194
  #
203
- class SetClassVariable
195
+ class SetClassVariable < Instruction
204
196
  attr_reader :name
205
197
 
206
198
  def initialize(name)
@@ -231,10 +223,6 @@ module SyntaxTree
231
223
  1
232
224
  end
233
225
 
234
- def pushes
235
- 0
236
- end
237
-
238
226
  def canonical
239
227
  YARV::SetClassVariable.new(name, nil)
240
228
  end
@@ -0,0 +1,534 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module YARV
5
+ # A sea of nodes is an intermediate representation used by a compiler to
6
+ # represent both control and data flow in the same graph. The way we use it
7
+ # allows us to have the vertices of the graph represent either an
8
+ # instruction in the instruction sequence or a synthesized node that we add
9
+ # to the graph. The edges of the graph represent either control flow or data
10
+ # flow.
11
+ class SeaOfNodes
12
+ # This object represents a node in the graph that holds a YARV
13
+ # instruction.
14
+ class InsnNode
15
+ attr_reader :inputs, :outputs, :insn, :offset
16
+
17
+ def initialize(insn, offset)
18
+ @inputs = []
19
+ @outputs = []
20
+
21
+ @insn = insn
22
+ @offset = offset
23
+ end
24
+
25
+ def id
26
+ offset
27
+ end
28
+
29
+ def label
30
+ "%04d %s" % [offset, insn.disasm(Disassembler::Squished.new)]
31
+ end
32
+ end
33
+
34
+ # Phi nodes are used to represent the merging of data flow from multiple
35
+ # incoming blocks.
36
+ class PhiNode
37
+ attr_reader :inputs, :outputs, :id
38
+
39
+ def initialize(id)
40
+ @inputs = []
41
+ @outputs = []
42
+ @id = id
43
+ end
44
+
45
+ def label
46
+ "#{id} φ"
47
+ end
48
+ end
49
+
50
+ # Merge nodes are present in any block that has multiple incoming blocks.
51
+ # It provides a place for Phi nodes to attach their results.
52
+ class MergeNode
53
+ attr_reader :inputs, :outputs, :id
54
+
55
+ def initialize(id)
56
+ @inputs = []
57
+ @outputs = []
58
+ @id = id
59
+ end
60
+
61
+ def label
62
+ "#{id} ψ"
63
+ end
64
+ end
65
+
66
+ # The edge of a graph represents either control flow or data flow.
67
+ class Edge
68
+ TYPES = %i[data control info].freeze
69
+
70
+ attr_reader :from
71
+ attr_reader :to
72
+ attr_reader :type
73
+ attr_reader :label
74
+
75
+ def initialize(from, to, type, label)
76
+ raise unless TYPES.include?(type)
77
+
78
+ @from = from
79
+ @to = to
80
+ @type = type
81
+ @label = label
82
+ end
83
+ end
84
+
85
+ # A subgraph represents the local data and control flow of a single basic
86
+ # block.
87
+ class SubGraph
88
+ attr_reader :first_fixed, :last_fixed, :inputs, :outputs
89
+
90
+ def initialize(first_fixed, last_fixed, inputs, outputs)
91
+ @first_fixed = first_fixed
92
+ @last_fixed = last_fixed
93
+ @inputs = inputs
94
+ @outputs = outputs
95
+ end
96
+ end
97
+
98
+ # The compiler is responsible for taking a data flow graph and turning it
99
+ # into a sea of nodes.
100
+ class Compiler
101
+ attr_reader :dfg, :nodes
102
+
103
+ def initialize(dfg)
104
+ @dfg = dfg
105
+ @nodes = []
106
+
107
+ # We need to put a unique ID on the synthetic nodes in the graph, so
108
+ # we keep a counter that we increment any time we create a new
109
+ # synthetic node.
110
+ @id_counter = 999
111
+ end
112
+
113
+ def compile
114
+ local_graphs = {}
115
+ dfg.blocks.each do |block|
116
+ local_graphs[block.id] = create_local_graph(block)
117
+ end
118
+
119
+ connect_local_graphs_control(local_graphs)
120
+ connect_local_graphs_data(local_graphs)
121
+ cleanup_phi_nodes
122
+ cleanup_insn_nodes
123
+
124
+ SeaOfNodes.new(dfg, nodes, local_graphs).tap(&:verify)
125
+ end
126
+
127
+ private
128
+
129
+ # Counter for synthetic nodes.
130
+ def id_counter
131
+ @id_counter += 1
132
+ end
133
+
134
+ # Create a sub-graph for a single basic block - block block argument
135
+ # inputs and outputs will be left dangling, to be connected later.
136
+ def create_local_graph(block)
137
+ block_flow = dfg.block_flows.fetch(block.id)
138
+
139
+ # A map of instructions to nodes.
140
+ insn_nodes = {}
141
+
142
+ # Create a node for each instruction in the block.
143
+ block.each_with_length do |insn, offset|
144
+ node = InsnNode.new(insn, offset)
145
+ insn_nodes[offset] = node
146
+ nodes << node
147
+ end
148
+
149
+ # The first and last node in the sub-graph, and the last fixed node.
150
+ previous_fixed = nil
151
+ first_fixed = nil
152
+ last_fixed = nil
153
+
154
+ # The merge node for the phi nodes to attach to.
155
+ merge_node = nil
156
+
157
+ # If there is more than one predecessor and we have basic block
158
+ # arguments coming in, then we need a merge node for the phi nodes to
159
+ # attach to.
160
+ if block.incoming_blocks.size > 1 && !block_flow.in.empty?
161
+ merge_node = MergeNode.new(id_counter)
162
+ nodes << merge_node
163
+
164
+ previous_fixed = merge_node
165
+ first_fixed = merge_node
166
+ last_fixed = merge_node
167
+ end
168
+
169
+ # Connect local control flow (only nodes with side effects.)
170
+ block.each_with_length do |insn, length|
171
+ if insn.side_effects?
172
+ insn_node = insn_nodes[length]
173
+ connect previous_fixed, insn_node, :control if previous_fixed
174
+ previous_fixed = insn_node
175
+ first_fixed ||= insn_node
176
+ last_fixed = insn_node
177
+ end
178
+ end
179
+
180
+ # Connect basic block arguments.
181
+ inputs = {}
182
+ outputs = {}
183
+ block_flow.in.each do |arg|
184
+ # Each basic block argument gets a phi node. Even if there's only
185
+ # one predecessor! We'll tidy this up later.
186
+ phi = PhiNode.new(id_counter)
187
+ connect(phi, merge_node, :info) if merge_node
188
+ nodes << phi
189
+ inputs[arg] = phi
190
+
191
+ block.each_with_length do |_, consumer_offset|
192
+ consumer_flow = dfg.insn_flows[consumer_offset]
193
+ consumer_flow.in.each_with_index do |producer, input_index|
194
+ if producer == arg
195
+ connect(phi, insn_nodes[consumer_offset], :data, input_index)
196
+ end
197
+ end
198
+ end
199
+
200
+ block_flow.out.each { |out| outputs[out] = phi if out == arg }
201
+ end
202
+
203
+ # Connect local dataflow from consumers back to producers.
204
+ block.each_with_length do |_, consumer_offset|
205
+ consumer_flow = dfg.insn_flows.fetch(consumer_offset)
206
+ consumer_flow.in.each_with_index do |producer, input_index|
207
+ if producer.local?
208
+ connect(
209
+ insn_nodes[producer.length],
210
+ insn_nodes[consumer_offset],
211
+ :data,
212
+ input_index
213
+ )
214
+ end
215
+ end
216
+ end
217
+
218
+ # Connect dataflow from producers that leaves the block.
219
+ block.each_with_length do |_, producer_pc|
220
+ dfg
221
+ .insn_flows
222
+ .fetch(producer_pc)
223
+ .out
224
+ .each do |consumer|
225
+ unless consumer.local?
226
+ # This is an argument to the successor block - not to an
227
+ # instruction here.
228
+ outputs[consumer.name] = insn_nodes[producer_pc]
229
+ end
230
+ end
231
+ end
232
+
233
+ # A graph with only side-effect free instructions will currently have
234
+ # no fixed nodes! In that case just use the first instruction's node
235
+ # for both first and last. But it's a bug that it'll appear in the
236
+ # control flow path!
237
+ SubGraph.new(
238
+ first_fixed || insn_nodes[block.block_start],
239
+ last_fixed || insn_nodes[block.block_start],
240
+ inputs,
241
+ outputs
242
+ )
243
+ end
244
+
245
+ # Connect control flow that flows between basic blocks.
246
+ def connect_local_graphs_control(local_graphs)
247
+ dfg.blocks.each do |predecessor|
248
+ predecessor_last = local_graphs[predecessor.id].last_fixed
249
+ predecessor.outgoing_blocks.each_with_index do |successor, index|
250
+ label =
251
+ if index > 0 &&
252
+ index == (predecessor.outgoing_blocks.length - 1)
253
+ # If there are multiple outgoing blocks from this block, then
254
+ # the last one is a fallthrough. Otherwise it's a branch.
255
+ :fallthrough
256
+ else
257
+ :"branch#{index}"
258
+ end
259
+
260
+ connect(
261
+ predecessor_last,
262
+ local_graphs[successor.id].first_fixed,
263
+ :control,
264
+ label
265
+ )
266
+ end
267
+ end
268
+ end
269
+
270
+ # Connect data flow that flows between basic blocks.
271
+ def connect_local_graphs_data(local_graphs)
272
+ dfg.blocks.each do |predecessor|
273
+ arg_outs = local_graphs[predecessor.id].outputs.values
274
+ arg_outs.each_with_index do |arg_out, arg_n|
275
+ predecessor.outgoing_blocks.each do |successor|
276
+ successor_graph = local_graphs[successor.id]
277
+ arg_in = successor_graph.inputs.values[arg_n]
278
+
279
+ # We're connecting to a phi node, so we may need a special
280
+ # label.
281
+ raise unless arg_in.is_a?(PhiNode)
282
+
283
+ label =
284
+ case arg_out
285
+ when InsnNode
286
+ # Instructions that go into a phi node are labelled by the
287
+ # offset of last instruction in the block that executed
288
+ # them. This way you know which value to use for the phi,
289
+ # based on the last instruction you executed.
290
+ dfg.blocks.find do |block|
291
+ block_start = block.block_start
292
+ block_end =
293
+ block_start + block.insns.sum(&:length) -
294
+ block.insns.last.length
295
+
296
+ if (block_start..block_end).cover?(arg_out.offset)
297
+ break block_end
298
+ end
299
+ end
300
+ when PhiNode
301
+ # Phi nodes to phi nodes are not labelled.
302
+ else
303
+ raise
304
+ end
305
+
306
+ connect(arg_out, arg_in, :data, label)
307
+ end
308
+ end
309
+ end
310
+ end
311
+
312
+ # We don't always build things in an optimal way. Go back and fix up
313
+ # some mess we left. Ideally we wouldn't create these problems in the
314
+ # first place.
315
+ def cleanup_phi_nodes
316
+ nodes.dup.each do |node| # dup because we're mutating
317
+ next unless node.is_a?(PhiNode)
318
+
319
+ if node.inputs.size == 1
320
+ # Remove phi nodes with a single input.
321
+ connect_over(node)
322
+ remove(node)
323
+ elsif node.inputs.map(&:from).uniq.size == 1
324
+ # Remove phi nodes where all inputs are the same.
325
+ producer_edge = node.inputs.first
326
+ consumer_edge = node.outputs.find { |e| !e.to.is_a?(MergeNode) }
327
+ connect(
328
+ producer_edge.from,
329
+ consumer_edge.to,
330
+ :data,
331
+ consumer_edge.label
332
+ )
333
+ remove(node)
334
+ end
335
+ end
336
+ end
337
+
338
+ # Eliminate as many unnecessary nodes as we can.
339
+ def cleanup_insn_nodes
340
+ nodes.dup.each do |node|
341
+ next unless node.is_a?(InsnNode)
342
+
343
+ case node.insn
344
+ when AdjustStack
345
+ # If there are any inputs to the adjust stack that are immediately
346
+ # discarded, we can remove them from the input list.
347
+ number = node.insn.number
348
+
349
+ node.inputs.dup.each do |input_edge|
350
+ next if input_edge.type != :data
351
+
352
+ from = input_edge.from
353
+ next unless from.is_a?(InsnNode)
354
+
355
+ if from.inputs.empty? && from.outputs.size == 1
356
+ number -= 1
357
+ remove(input_edge.from)
358
+ elsif from.insn.is_a?(Dup)
359
+ number -= 1
360
+ connect_over(from)
361
+ remove(from)
362
+
363
+ new_edge = node.inputs.last
364
+ new_edge.from.outputs.delete(new_edge)
365
+ node.inputs.delete(new_edge)
366
+ end
367
+ end
368
+
369
+ if number == 0
370
+ connect_over(node)
371
+ remove(node)
372
+ else
373
+ next_node =
374
+ if number == 1
375
+ InsnNode.new(Pop.new, node.offset)
376
+ else
377
+ InsnNode.new(AdjustStack.new(number), node.offset)
378
+ end
379
+
380
+ next_node.inputs.concat(node.inputs)
381
+ next_node.outputs.concat(node.outputs)
382
+
383
+ # Dynamically finding the index of the node in the nodes array
384
+ # because we're mutating the array as we go.
385
+ nodes[nodes.index(node)] = next_node
386
+ end
387
+ when Jump
388
+ # When you have a jump instruction that only has one input and one
389
+ # output, you can just connect over top of it and remove it.
390
+ if node.inputs.size == 1 && node.outputs.size == 1
391
+ connect_over(node)
392
+ remove(node)
393
+ end
394
+ when Pop
395
+ from = node.inputs.find { |edge| edge.type == :data }.from
396
+ next unless from.is_a?(InsnNode)
397
+
398
+ removed =
399
+ if from.inputs.empty? && from.outputs.size == 1
400
+ remove(from)
401
+ true
402
+ elsif from.insn.is_a?(Dup)
403
+ connect_over(from)
404
+ remove(from)
405
+
406
+ new_edge = node.inputs.last
407
+ new_edge.from.outputs.delete(new_edge)
408
+ node.inputs.delete(new_edge)
409
+ true
410
+ else
411
+ false
412
+ end
413
+
414
+ if removed
415
+ connect_over(node)
416
+ remove(node)
417
+ end
418
+ end
419
+ end
420
+ end
421
+
422
+ # Connect one node to another.
423
+ def connect(from, to, type, label = nil)
424
+ raise if from == to
425
+ raise if !to.is_a?(PhiNode) && type == :data && label.nil?
426
+
427
+ edge = Edge.new(from, to, type, label)
428
+ from.outputs << edge
429
+ to.inputs << edge
430
+ end
431
+
432
+ # Connect all of the inputs to all of the outputs of a node.
433
+ def connect_over(node)
434
+ node.inputs.each do |producer_edge|
435
+ node.outputs.each do |consumer_edge|
436
+ connect(
437
+ producer_edge.from,
438
+ consumer_edge.to,
439
+ producer_edge.type,
440
+ producer_edge.label
441
+ )
442
+ end
443
+ end
444
+ end
445
+
446
+ # Remove a node from the graph.
447
+ def remove(node)
448
+ node.inputs.each do |producer_edge|
449
+ producer_edge.from.outputs.reject! { |edge| edge.to == node }
450
+ end
451
+
452
+ node.outputs.each do |consumer_edge|
453
+ consumer_edge.to.inputs.reject! { |edge| edge.from == node }
454
+ end
455
+
456
+ nodes.delete(node)
457
+ end
458
+ end
459
+
460
+ attr_reader :dfg, :nodes, :local_graphs
461
+
462
+ def initialize(dfg, nodes, local_graphs)
463
+ @dfg = dfg
464
+ @nodes = nodes
465
+ @local_graphs = local_graphs
466
+ end
467
+
468
+ def to_mermaid
469
+ Mermaid.flowchart do |flowchart|
470
+ nodes.each do |node|
471
+ flowchart.node("node_#{node.id}", node.label, shape: :rounded)
472
+ end
473
+
474
+ nodes.each do |producer|
475
+ producer.outputs.each do |consumer_edge|
476
+ label =
477
+ if !consumer_edge.label
478
+ # No label.
479
+ elsif consumer_edge.to.is_a?(PhiNode)
480
+ # Edges into phi nodes are labelled by the offset of the
481
+ # instruction going into the merge.
482
+ "%04d" % consumer_edge.label
483
+ else
484
+ consumer_edge.label.to_s
485
+ end
486
+
487
+ flowchart.link(
488
+ flowchart.fetch("node_#{producer.id}"),
489
+ flowchart.fetch("node_#{consumer_edge.to.id}"),
490
+ label,
491
+ type: consumer_edge.type == :info ? :dotted : :directed,
492
+ color: { data: :green, control: :red }[consumer_edge.type]
493
+ )
494
+ end
495
+ end
496
+ end
497
+ end
498
+
499
+ def verify
500
+ # Verify edge labels.
501
+ nodes.each do |node|
502
+ # Not talking about phi nodes right now.
503
+ next if node.is_a?(PhiNode)
504
+
505
+ if node.is_a?(InsnNode) && node.insn.branch_targets.any? &&
506
+ !node.insn.is_a?(Leave)
507
+ # A branching node must have at least one branch edge and
508
+ # potentially a fallthrough edge coming out.
509
+
510
+ labels = node.outputs.map(&:label).sort
511
+ raise if labels[0] != :branch0
512
+ raise if labels[1] != :fallthrough && labels.size > 2
513
+ else
514
+ labels = node.inputs.filter { |e| e.type == :data }.map(&:label)
515
+ next if labels.empty?
516
+
517
+ # No nil labels
518
+ raise if labels.any?(&:nil?)
519
+
520
+ # Labels should start at zero.
521
+ raise unless labels.min.zero?
522
+
523
+ # Labels should be contiguous.
524
+ raise unless labels.sort == (labels.min..labels.max).to_a
525
+ end
526
+ end
527
+ end
528
+
529
+ def self.compile(dfg)
530
+ Compiler.new(dfg).compile
531
+ end
532
+ end
533
+ end
534
+ end
@@ -1,5 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "stringio"
4
+
5
+ require_relative "yarv/basic_block"
6
+ require_relative "yarv/bf"
7
+ require_relative "yarv/calldata"
8
+ require_relative "yarv/compiler"
9
+ require_relative "yarv/control_flow_graph"
10
+ require_relative "yarv/data_flow_graph"
11
+ require_relative "yarv/decompiler"
12
+ require_relative "yarv/disassembler"
13
+ require_relative "yarv/instruction_sequence"
14
+ require_relative "yarv/instructions"
15
+ require_relative "yarv/legacy"
16
+ require_relative "yarv/local_table"
17
+ require_relative "yarv/sea_of_nodes"
18
+ require_relative "yarv/assembler"
19
+ require_relative "yarv/vm"
20
+
3
21
  module SyntaxTree
4
22
  # This module provides an object representation of the YARV bytecode.
5
23
  module YARV