syntax_tree 5.3.0 → 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +12 -1
  3. data/CHANGELOG.md +64 -1
  4. data/Gemfile.lock +2 -2
  5. data/README.md +28 -9
  6. data/Rakefile +12 -8
  7. data/bin/console +1 -0
  8. data/bin/whitequark +79 -0
  9. data/doc/changing_structure.md +16 -0
  10. data/lib/syntax_tree/basic_visitor.rb +44 -5
  11. data/lib/syntax_tree/cli.rb +2 -2
  12. data/lib/syntax_tree/dsl.rb +23 -11
  13. data/lib/syntax_tree/{visitor/field_visitor.rb → field_visitor.rb} +54 -55
  14. data/lib/syntax_tree/formatter.rb +1 -1
  15. data/lib/syntax_tree/index.rb +56 -54
  16. data/lib/syntax_tree/json_visitor.rb +55 -0
  17. data/lib/syntax_tree/language_server.rb +157 -2
  18. data/lib/syntax_tree/match_visitor.rb +120 -0
  19. data/lib/syntax_tree/mermaid.rb +177 -0
  20. data/lib/syntax_tree/mermaid_visitor.rb +69 -0
  21. data/lib/syntax_tree/{visitor/mutation_visitor.rb → mutation_visitor.rb} +27 -27
  22. data/lib/syntax_tree/node.rb +198 -107
  23. data/lib/syntax_tree/parser.rb +322 -118
  24. data/lib/syntax_tree/pretty_print_visitor.rb +83 -0
  25. data/lib/syntax_tree/reflection.rb +241 -0
  26. data/lib/syntax_tree/translation/parser.rb +3019 -0
  27. data/lib/syntax_tree/translation/rubocop_ast.rb +21 -0
  28. data/lib/syntax_tree/translation.rb +28 -0
  29. data/lib/syntax_tree/version.rb +1 -1
  30. data/lib/syntax_tree/with_scope.rb +244 -0
  31. data/lib/syntax_tree/yarv/basic_block.rb +53 -0
  32. data/lib/syntax_tree/yarv/calldata.rb +91 -0
  33. data/lib/syntax_tree/yarv/compiler.rb +110 -100
  34. data/lib/syntax_tree/yarv/control_flow_graph.rb +257 -0
  35. data/lib/syntax_tree/yarv/data_flow_graph.rb +338 -0
  36. data/lib/syntax_tree/yarv/decompiler.rb +1 -1
  37. data/lib/syntax_tree/yarv/disassembler.rb +104 -80
  38. data/lib/syntax_tree/yarv/instruction_sequence.rb +43 -18
  39. data/lib/syntax_tree/yarv/instructions.rb +203 -649
  40. data/lib/syntax_tree/yarv/legacy.rb +12 -24
  41. data/lib/syntax_tree/yarv/sea_of_nodes.rb +534 -0
  42. data/lib/syntax_tree/yarv.rb +18 -0
  43. data/lib/syntax_tree.rb +88 -56
  44. data/tasks/sorbet.rake +277 -0
  45. data/tasks/whitequark.rake +87 -0
  46. metadata +23 -11
  47. data/.gitmodules +0 -9
  48. data/lib/syntax_tree/language_server/inlay_hints.rb +0 -159
  49. data/lib/syntax_tree/visitor/environment.rb +0 -84
  50. data/lib/syntax_tree/visitor/json_visitor.rb +0 -55
  51. data/lib/syntax_tree/visitor/match_visitor.rb +0 -122
  52. data/lib/syntax_tree/visitor/pretty_print_visitor.rb +0 -85
  53. data/lib/syntax_tree/visitor/with_environment.rb +0 -140
@@ -0,0 +1,257 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module YARV
5
+ # This class represents a control flow graph of a YARV instruction sequence.
6
+ # It constructs a graph of basic blocks that hold subsets of the list of
7
+ # instructions from the instruction sequence.
8
+ #
9
+ # You can use this class by calling the ::compile method and passing it a
10
+ # YARV instruction sequence. It will return a control flow graph object.
11
+ #
12
+ # iseq = RubyVM::InstructionSequence.compile("1 + 2")
13
+ # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a)
14
+ # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq)
15
+ #
16
+ class ControlFlowGraph
17
+ # This class is responsible for creating a control flow graph from the
18
+ # given instruction sequence.
19
+ class Compiler
20
+ # This is the instruction sequence that is being compiled.
21
+ attr_reader :iseq
22
+
23
+ # This is a hash of indices in the YARV instruction sequence that point
24
+ # to their corresponding instruction.
25
+ attr_reader :insns
26
+
27
+ # This is a hash of labels that point to their corresponding index into
28
+ # the YARV instruction sequence. Note that this is not the same as the
29
+ # index into the list of instructions on the instruction sequence
30
+ # object. Instead, this is the index into the C array, so it includes
31
+ # operands.
32
+ attr_reader :labels
33
+
34
+ def initialize(iseq)
35
+ @iseq = iseq
36
+
37
+ @insns = {}
38
+ @labels = {}
39
+
40
+ length = 0
41
+ iseq.insns.each do |insn|
42
+ case insn
43
+ when Instruction
44
+ @insns[length] = insn
45
+ length += insn.length
46
+ when InstructionSequence::Label
47
+ @labels[insn] = length
48
+ end
49
+ end
50
+ end
51
+
52
+ # This method is used to compile the instruction sequence into a control
53
+ # flow graph. It returns an instance of ControlFlowGraph.
54
+ def compile
55
+ blocks = build_basic_blocks
56
+
57
+ connect_basic_blocks(blocks)
58
+ prune_basic_blocks(blocks)
59
+
60
+ ControlFlowGraph.new(iseq, insns, blocks.values).tap(&:verify)
61
+ end
62
+
63
+ private
64
+
65
+ # Finds the indices of the instructions that start a basic block because
66
+ # they're either:
67
+ #
68
+ # * the start of an instruction sequence
69
+ # * the target of a branch
70
+ # * fallen through to from a branch
71
+ #
72
+ def find_basic_block_starts
73
+ block_starts = Set.new([0])
74
+
75
+ insns.each do |index, insn|
76
+ branch_targets = insn.branch_targets
77
+
78
+ if branch_targets.any?
79
+ branch_targets.each do |branch_target|
80
+ block_starts.add(labels[branch_target])
81
+ end
82
+
83
+ block_starts.add(index + insn.length) if insn.falls_through?
84
+ end
85
+ end
86
+
87
+ block_starts.to_a.sort
88
+ end
89
+
90
+ # Builds up a set of basic blocks by iterating over the starts of each
91
+ # block. They are keyed by the index of their first instruction.
92
+ def build_basic_blocks
93
+ block_starts = find_basic_block_starts
94
+
95
+ length = 0
96
+ blocks =
97
+ iseq
98
+ .insns
99
+ .grep(Instruction)
100
+ .slice_after do |insn|
101
+ length += insn.length
102
+ block_starts.include?(length)
103
+ end
104
+
105
+ block_starts
106
+ .zip(blocks)
107
+ .to_h do |block_start, insns|
108
+ # It's possible that we have not detected a block start but still
109
+ # have branching instructions inside of a basic block. This can
110
+ # happen if you have an unconditional jump which is followed by
111
+ # instructions that are unreachable. As of Ruby 3.2, this is
112
+ # possible with something as simple as "1 => a". In this case we
113
+ # can discard all instructions that follow branching instructions.
114
+ block_insns =
115
+ insns.slice_after { |insn| insn.branch_targets.any? }.first
116
+
117
+ [block_start, BasicBlock.new(block_start, block_insns)]
118
+ end
119
+ end
120
+
121
+ # Connect the blocks by letting them know which blocks are incoming and
122
+ # outgoing from each block.
123
+ def connect_basic_blocks(blocks)
124
+ blocks.each do |block_start, block|
125
+ insn = block.insns.last
126
+
127
+ insn.branch_targets.each do |branch_target|
128
+ block.outgoing_blocks << blocks.fetch(labels[branch_target])
129
+ end
130
+
131
+ if (insn.branch_targets.empty? && !insn.leaves?) ||
132
+ insn.falls_through?
133
+ fall_through_start = block_start + block.insns.sum(&:length)
134
+ block.outgoing_blocks << blocks.fetch(fall_through_start)
135
+ end
136
+
137
+ block.outgoing_blocks.each do |outgoing_block|
138
+ outgoing_block.incoming_blocks << block
139
+ end
140
+ end
141
+ end
142
+
143
+ # If there are blocks that are unreachable, we can remove them from the
144
+ # graph entirely at this point.
145
+ def prune_basic_blocks(blocks)
146
+ visited = Set.new
147
+ queue = [blocks.fetch(0)]
148
+
149
+ until queue.empty?
150
+ current_block = queue.shift
151
+ next if visited.include?(current_block)
152
+
153
+ visited << current_block
154
+ queue.concat(current_block.outgoing_blocks)
155
+ end
156
+
157
+ blocks.select! { |_, block| visited.include?(block) }
158
+ end
159
+ end
160
+
161
+ # This is the instruction sequence that this control flow graph
162
+ # corresponds to.
163
+ attr_reader :iseq
164
+
165
+ # This is the list of instructions that this control flow graph contains.
166
+ # It is effectively the same as the list of instructions in the
167
+ # instruction sequence but with line numbers and events filtered out.
168
+ attr_reader :insns
169
+
170
+ # This is the set of basic blocks that this control-flow graph contains.
171
+ attr_reader :blocks
172
+
173
+ def initialize(iseq, insns, blocks)
174
+ @iseq = iseq
175
+ @insns = insns
176
+ @blocks = blocks
177
+ end
178
+
179
+ def disasm
180
+ fmt = Disassembler.new(iseq)
181
+ fmt.puts("== cfg: #{iseq.inspect}")
182
+
183
+ blocks.each do |block|
184
+ fmt.puts(block.id)
185
+ fmt.with_prefix(" ") do |prefix|
186
+ unless block.incoming_blocks.empty?
187
+ from = block.incoming_blocks.map(&:id)
188
+ fmt.puts("#{prefix}== from: #{from.join(", ")}")
189
+ end
190
+
191
+ fmt.format_insns!(block.insns, block.block_start)
192
+
193
+ to = block.outgoing_blocks.map(&:id)
194
+ to << "leaves" if block.insns.last.leaves?
195
+ fmt.puts("#{prefix}== to: #{to.join(", ")}")
196
+ end
197
+ end
198
+
199
+ fmt.string
200
+ end
201
+
202
+ def to_dfg
203
+ DataFlowGraph.compile(self)
204
+ end
205
+
206
+ def to_son
207
+ to_dfg.to_son
208
+ end
209
+
210
+ def to_mermaid
211
+ Mermaid.flowchart do |flowchart|
212
+ disasm = Disassembler::Squished.new
213
+
214
+ blocks.each do |block|
215
+ flowchart.subgraph(block.id) do
216
+ previous = nil
217
+
218
+ block.each_with_length do |insn, length|
219
+ node =
220
+ flowchart.node(
221
+ "node_#{length}",
222
+ "%04d %s" % [length, insn.disasm(disasm)]
223
+ )
224
+
225
+ flowchart.link(previous, node) if previous
226
+ previous = node
227
+ end
228
+ end
229
+ end
230
+
231
+ blocks.each do |block|
232
+ block.outgoing_blocks.each do |outgoing|
233
+ offset =
234
+ block.block_start + block.insns.sum(&:length) -
235
+ block.insns.last.length
236
+
237
+ from = flowchart.fetch("node_#{offset}")
238
+ to = flowchart.fetch("node_#{outgoing.block_start}")
239
+ flowchart.link(from, to)
240
+ end
241
+ end
242
+ end
243
+ end
244
+
245
+ # This method is used to verify that the control flow graph is well
246
+ # formed. It does this by checking that each basic block is itself well
247
+ # formed.
248
+ def verify
249
+ blocks.each(&:verify)
250
+ end
251
+
252
+ def self.compile(iseq)
253
+ Compiler.new(iseq).compile
254
+ end
255
+ end
256
+ end
257
+ end
@@ -0,0 +1,338 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module YARV
5
+ # Constructs a data-flow-graph of a YARV instruction sequence, via a
6
+ # control-flow-graph. Data flow is discovered locally and then globally. The
7
+ # graph only considers data flow through the stack - local variables and
8
+ # objects are considered fully escaped in this analysis.
9
+ #
10
+ # You can use this class by calling the ::compile method and passing it a
11
+ # control flow graph. It will return a data flow graph object.
12
+ #
13
+ # iseq = RubyVM::InstructionSequence.compile("1 + 2")
14
+ # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a)
15
+ # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq)
16
+ # dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg)
17
+ #
18
+ class DataFlowGraph
19
+ # This object represents the flow of data between instructions.
20
+ class DataFlow
21
+ attr_reader :in
22
+ attr_reader :out
23
+
24
+ def initialize
25
+ @in = []
26
+ @out = []
27
+ end
28
+ end
29
+
30
+ # This represents an object that goes on the stack that is passed between
31
+ # basic blocks.
32
+ class BlockArgument
33
+ attr_reader :name
34
+
35
+ def initialize(name)
36
+ @name = name
37
+ end
38
+
39
+ def local?
40
+ false
41
+ end
42
+
43
+ def to_str
44
+ name.to_s
45
+ end
46
+ end
47
+
48
+ # This represents an object that goes on the stack that is passed between
49
+ # instructions within a basic block.
50
+ class LocalArgument
51
+ attr_reader :name, :length
52
+
53
+ def initialize(length)
54
+ @length = length
55
+ end
56
+
57
+ def local?
58
+ true
59
+ end
60
+
61
+ def to_str
62
+ length.to_s
63
+ end
64
+ end
65
+
66
+ attr_reader :cfg, :insn_flows, :block_flows
67
+
68
+ def initialize(cfg, insn_flows, block_flows)
69
+ @cfg = cfg
70
+ @insn_flows = insn_flows
71
+ @block_flows = block_flows
72
+ end
73
+
74
+ def blocks
75
+ cfg.blocks
76
+ end
77
+
78
+ def disasm
79
+ fmt = Disassembler.new(cfg.iseq)
80
+ fmt.puts("== dfg: #{cfg.iseq.inspect}")
81
+
82
+ blocks.each do |block|
83
+ fmt.puts(block.id)
84
+ fmt.with_prefix(" ") do |prefix|
85
+ unless block.incoming_blocks.empty?
86
+ from = block.incoming_blocks.map(&:id)
87
+ fmt.puts("#{prefix}== from: #{from.join(", ")}")
88
+ end
89
+
90
+ block_flow = block_flows.fetch(block.id)
91
+ unless block_flow.in.empty?
92
+ fmt.puts("#{prefix}== in: #{block_flow.in.join(", ")}")
93
+ end
94
+
95
+ fmt.format_insns!(block.insns, block.block_start) do |_, length|
96
+ insn_flow = insn_flows[length]
97
+ next if insn_flow.in.empty? && insn_flow.out.empty?
98
+
99
+ fmt.print(" # ")
100
+ unless insn_flow.in.empty?
101
+ fmt.print("in: #{insn_flow.in.join(", ")}")
102
+ fmt.print("; ") unless insn_flow.out.empty?
103
+ end
104
+
105
+ unless insn_flow.out.empty?
106
+ fmt.print("out: #{insn_flow.out.join(", ")}")
107
+ end
108
+ end
109
+
110
+ to = block.outgoing_blocks.map(&:id)
111
+ to << "leaves" if block.insns.last.leaves?
112
+ fmt.puts("#{prefix}== to: #{to.join(", ")}")
113
+
114
+ unless block_flow.out.empty?
115
+ fmt.puts("#{prefix}== out: #{block_flow.out.join(", ")}")
116
+ end
117
+ end
118
+ end
119
+
120
+ fmt.string
121
+ end
122
+
123
+ def to_son
124
+ SeaOfNodes.compile(self)
125
+ end
126
+
127
+ def to_mermaid
128
+ Mermaid.flowchart do |flowchart|
129
+ disasm = Disassembler::Squished.new
130
+
131
+ blocks.each do |block|
132
+ block_flow = block_flows.fetch(block.id)
133
+ graph_name =
134
+ if block_flow.in.any?
135
+ "#{block.id} #{block_flows[block.id].in.join(", ")}"
136
+ else
137
+ block.id
138
+ end
139
+
140
+ flowchart.subgraph(graph_name) do
141
+ previous = nil
142
+
143
+ block.each_with_length do |insn, length|
144
+ node =
145
+ flowchart.node(
146
+ "node_#{length}",
147
+ "%04d %s" % [length, insn.disasm(disasm)],
148
+ shape: :rounded
149
+ )
150
+
151
+ flowchart.link(previous, node, color: :red) if previous
152
+ insn_flows[length].in.each do |input|
153
+ if input.is_a?(LocalArgument)
154
+ from = flowchart.fetch("node_#{input.length}")
155
+ flowchart.link(from, node, color: :green)
156
+ end
157
+ end
158
+
159
+ previous = node
160
+ end
161
+ end
162
+ end
163
+
164
+ blocks.each do |block|
165
+ block.outgoing_blocks.each do |outgoing|
166
+ offset =
167
+ block.block_start + block.insns.sum(&:length) -
168
+ block.insns.last.length
169
+
170
+ from = flowchart.fetch("node_#{offset}")
171
+ to = flowchart.fetch("node_#{outgoing.block_start}")
172
+ flowchart.link(from, to, color: :red)
173
+ end
174
+ end
175
+ end
176
+ end
177
+
178
+ # Verify that we constructed the data flow graph correctly.
179
+ def verify
180
+ # Check that the first block has no arguments.
181
+ raise unless block_flows.fetch(blocks.first.id).in.empty?
182
+
183
+ # Check all control flow edges between blocks pass the right number of
184
+ # arguments.
185
+ blocks.each do |block|
186
+ block_flow = block_flows.fetch(block.id)
187
+
188
+ if block.outgoing_blocks.empty?
189
+ # With no outgoing blocks, there should be no output arguments.
190
+ raise unless block_flow.out.empty?
191
+ else
192
+ # Check with outgoing blocks...
193
+ block.outgoing_blocks.each do |outgoing_block|
194
+ outgoing_flow = block_flows.fetch(outgoing_block.id)
195
+
196
+ # The block should have as many output arguments as the
197
+ # outgoing block has input arguments.
198
+ raise unless block_flow.out.size == outgoing_flow.in.size
199
+ end
200
+ end
201
+ end
202
+ end
203
+
204
+ def self.compile(cfg)
205
+ Compiler.new(cfg).compile
206
+ end
207
+
208
+ # This class is responsible for creating a data flow graph from the given
209
+ # control flow graph.
210
+ class Compiler
211
+ # This is the control flow graph that is being compiled.
212
+ attr_reader :cfg
213
+
214
+ # This data structure will hold the data flow between instructions
215
+ # within individual basic blocks.
216
+ attr_reader :insn_flows
217
+
218
+ # This data structure will hold the data flow between basic blocks.
219
+ attr_reader :block_flows
220
+
221
+ def initialize(cfg)
222
+ @cfg = cfg
223
+ @insn_flows = cfg.insns.to_h { |length, _| [length, DataFlow.new] }
224
+ @block_flows = cfg.blocks.to_h { |block| [block.id, DataFlow.new] }
225
+ end
226
+
227
+ def compile
228
+ find_internal_flow
229
+ find_external_flow
230
+ DataFlowGraph.new(cfg, insn_flows, block_flows).tap(&:verify)
231
+ end
232
+
233
+ private
234
+
235
+ # Find the data flow within each basic block. Using an abstract stack,
236
+ # connect from consumers of data to the producers of that data.
237
+ def find_internal_flow
238
+ cfg.blocks.each do |block|
239
+ block_flow = block_flows.fetch(block.id)
240
+ stack = []
241
+
242
+ # Go through each instruction in the block.
243
+ block.each_with_length do |insn, length|
244
+ insn_flow = insn_flows[length]
245
+
246
+ # How many values will be missing from the local stack to run this
247
+ # instruction? This will be used to determine if the values that
248
+ # are being used by this instruction are coming from previous
249
+ # instructions or from previous basic blocks.
250
+ missing = insn.pops - stack.size
251
+
252
+ # For every value the instruction pops off the stack.
253
+ insn.pops.times do
254
+ # Was the value it pops off from another basic block?
255
+ if stack.empty?
256
+ # If the stack is empty, then there aren't enough values being
257
+ # pushed from previous instructions to fulfill the needs of
258
+ # this instruction. In that case the values must be coming
259
+ # from previous basic blocks.
260
+ missing -= 1
261
+ argument = BlockArgument.new(:"in_#{missing}")
262
+
263
+ insn_flow.in.unshift(argument)
264
+ block_flow.in.unshift(argument)
265
+ else
266
+ # Since there are values in the stack, we can connect this
267
+ # consumer to the producer of the value.
268
+ insn_flow.in.unshift(stack.pop)
269
+ end
270
+ end
271
+
272
+ # Record on our abstract stack that this instruction pushed
273
+ # this value onto the stack.
274
+ insn.pushes.times { stack << LocalArgument.new(length) }
275
+ end
276
+
277
+ # Values that are left on the stack after going through all
278
+ # instructions are arguments to the basic block that we jump to.
279
+ stack.reverse_each.with_index do |producer, index|
280
+ block_flow.out << producer
281
+
282
+ argument = BlockArgument.new(:"out_#{index}")
283
+ insn_flows[producer.length].out << argument
284
+ end
285
+ end
286
+
287
+ # Go backwards and connect from producers to consumers.
288
+ cfg.insns.each_key do |length|
289
+ # For every instruction that produced a value used in this
290
+ # instruction...
291
+ insn_flows[length].in.each do |producer|
292
+ # If it's actually another instruction and not a basic block
293
+ # argument...
294
+ if producer.is_a?(LocalArgument)
295
+ # Record in the producing instruction that it produces a value
296
+ # used by this construction.
297
+ insn_flows[producer.length].out << LocalArgument.new(length)
298
+ end
299
+ end
300
+ end
301
+ end
302
+
303
+ # Find the data that flows between basic blocks.
304
+ def find_external_flow
305
+ stack = [*cfg.blocks]
306
+
307
+ until stack.empty?
308
+ block = stack.pop
309
+ block_flow = block_flows.fetch(block.id)
310
+
311
+ block.incoming_blocks.each do |incoming_block|
312
+ incoming_flow = block_flows.fetch(incoming_block.id)
313
+
314
+ # Does a predecessor block have fewer outputs than the successor
315
+ # has inputs?
316
+ if incoming_flow.out.size < block_flow.in.size
317
+ # If so then add arguments to pass data through from the
318
+ # incoming block's incoming blocks.
319
+ (block_flow.in.size - incoming_flow.out.size).times do |index|
320
+ name = BlockArgument.new(:"pass_#{index}")
321
+
322
+ incoming_flow.in.unshift(name)
323
+ incoming_flow.out.unshift(name)
324
+ end
325
+
326
+ # Since we modified the incoming block, add it back to the stack
327
+ # so it'll be considered as an outgoing block again, and
328
+ # propogate the external data flow back up the control flow
329
+ # graph.
330
+ stack << incoming_block
331
+ end
332
+ end
333
+ end
334
+ end
335
+ end
336
+ end
337
+ end
338
+ end
@@ -151,7 +151,7 @@ module SyntaxTree
151
151
  elsif argc == 1 && method.end_with?("=")
152
152
  receiver, argument = clause.pop(2)
153
153
  clause << Assign(
154
- CallNode(receiver, Period("."), Ident(method[0..-2]), nil),
154
+ Field(receiver, Period("."), Ident(method[0..-2])),
155
155
  argument
156
156
  )
157
157
  else