syntax_tree 5.3.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +12 -1
  3. data/CHANGELOG.md +64 -1
  4. data/Gemfile.lock +2 -2
  5. data/README.md +28 -9
  6. data/Rakefile +12 -8
  7. data/bin/console +1 -0
  8. data/bin/whitequark +79 -0
  9. data/doc/changing_structure.md +16 -0
  10. data/lib/syntax_tree/basic_visitor.rb +44 -5
  11. data/lib/syntax_tree/cli.rb +2 -2
  12. data/lib/syntax_tree/dsl.rb +23 -11
  13. data/lib/syntax_tree/{visitor/field_visitor.rb → field_visitor.rb} +54 -55
  14. data/lib/syntax_tree/formatter.rb +1 -1
  15. data/lib/syntax_tree/index.rb +56 -54
  16. data/lib/syntax_tree/json_visitor.rb +55 -0
  17. data/lib/syntax_tree/language_server.rb +157 -2
  18. data/lib/syntax_tree/match_visitor.rb +120 -0
  19. data/lib/syntax_tree/mermaid.rb +177 -0
  20. data/lib/syntax_tree/mermaid_visitor.rb +69 -0
  21. data/lib/syntax_tree/{visitor/mutation_visitor.rb → mutation_visitor.rb} +27 -27
  22. data/lib/syntax_tree/node.rb +198 -107
  23. data/lib/syntax_tree/parser.rb +322 -118
  24. data/lib/syntax_tree/pretty_print_visitor.rb +83 -0
  25. data/lib/syntax_tree/reflection.rb +241 -0
  26. data/lib/syntax_tree/translation/parser.rb +3019 -0
  27. data/lib/syntax_tree/translation/rubocop_ast.rb +21 -0
  28. data/lib/syntax_tree/translation.rb +28 -0
  29. data/lib/syntax_tree/version.rb +1 -1
  30. data/lib/syntax_tree/with_scope.rb +244 -0
  31. data/lib/syntax_tree/yarv/basic_block.rb +53 -0
  32. data/lib/syntax_tree/yarv/calldata.rb +91 -0
  33. data/lib/syntax_tree/yarv/compiler.rb +110 -100
  34. data/lib/syntax_tree/yarv/control_flow_graph.rb +257 -0
  35. data/lib/syntax_tree/yarv/data_flow_graph.rb +338 -0
  36. data/lib/syntax_tree/yarv/decompiler.rb +1 -1
  37. data/lib/syntax_tree/yarv/disassembler.rb +104 -80
  38. data/lib/syntax_tree/yarv/instruction_sequence.rb +43 -18
  39. data/lib/syntax_tree/yarv/instructions.rb +203 -649
  40. data/lib/syntax_tree/yarv/legacy.rb +12 -24
  41. data/lib/syntax_tree/yarv/sea_of_nodes.rb +534 -0
  42. data/lib/syntax_tree/yarv.rb +18 -0
  43. data/lib/syntax_tree.rb +88 -56
  44. data/tasks/sorbet.rake +277 -0
  45. data/tasks/whitequark.rake +87 -0
  46. metadata +23 -11
  47. data/.gitmodules +0 -9
  48. data/lib/syntax_tree/language_server/inlay_hints.rb +0 -159
  49. data/lib/syntax_tree/visitor/environment.rb +0 -84
  50. data/lib/syntax_tree/visitor/json_visitor.rb +0 -55
  51. data/lib/syntax_tree/visitor/match_visitor.rb +0 -122
  52. data/lib/syntax_tree/visitor/pretty_print_visitor.rb +0 -85
  53. data/lib/syntax_tree/visitor/with_environment.rb +0 -140
@@ -0,0 +1,257 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module YARV
5
+ # This class represents a control flow graph of a YARV instruction sequence.
6
+ # It constructs a graph of basic blocks that hold subsets of the list of
7
+ # instructions from the instruction sequence.
8
+ #
9
+ # You can use this class by calling the ::compile method and passing it a
10
+ # YARV instruction sequence. It will return a control flow graph object.
11
+ #
12
+ # iseq = RubyVM::InstructionSequence.compile("1 + 2")
13
+ # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a)
14
+ # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq)
15
+ #
16
+ class ControlFlowGraph
17
+ # This class is responsible for creating a control flow graph from the
18
+ # given instruction sequence.
19
+ class Compiler
20
+ # This is the instruction sequence that is being compiled.
21
+ attr_reader :iseq
22
+
23
+ # This is a hash of indices in the YARV instruction sequence that point
24
+ # to their corresponding instruction.
25
+ attr_reader :insns
26
+
27
+ # This is a hash of labels that point to their corresponding index into
28
+ # the YARV instruction sequence. Note that this is not the same as the
29
+ # index into the list of instructions on the instruction sequence
30
+ # object. Instead, this is the index into the C array, so it includes
31
+ # operands.
32
+ attr_reader :labels
33
+
34
+ def initialize(iseq)
35
+ @iseq = iseq
36
+
37
+ @insns = {}
38
+ @labels = {}
39
+
40
+ length = 0
41
+ iseq.insns.each do |insn|
42
+ case insn
43
+ when Instruction
44
+ @insns[length] = insn
45
+ length += insn.length
46
+ when InstructionSequence::Label
47
+ @labels[insn] = length
48
+ end
49
+ end
50
+ end
51
+
52
+ # This method is used to compile the instruction sequence into a control
53
+ # flow graph. It returns an instance of ControlFlowGraph.
54
+ def compile
55
+ blocks = build_basic_blocks
56
+
57
+ connect_basic_blocks(blocks)
58
+ prune_basic_blocks(blocks)
59
+
60
+ ControlFlowGraph.new(iseq, insns, blocks.values).tap(&:verify)
61
+ end
62
+
63
+ private
64
+
65
+ # Finds the indices of the instructions that start a basic block because
66
+ # they're either:
67
+ #
68
+ # * the start of an instruction sequence
69
+ # * the target of a branch
70
+ # * fallen through to from a branch
71
+ #
72
+ def find_basic_block_starts
73
+ block_starts = Set.new([0])
74
+
75
+ insns.each do |index, insn|
76
+ branch_targets = insn.branch_targets
77
+
78
+ if branch_targets.any?
79
+ branch_targets.each do |branch_target|
80
+ block_starts.add(labels[branch_target])
81
+ end
82
+
83
+ block_starts.add(index + insn.length) if insn.falls_through?
84
+ end
85
+ end
86
+
87
+ block_starts.to_a.sort
88
+ end
89
+
90
+ # Builds up a set of basic blocks by iterating over the starts of each
91
+ # block. They are keyed by the index of their first instruction.
92
+ def build_basic_blocks
93
+ block_starts = find_basic_block_starts
94
+
95
+ length = 0
96
+ blocks =
97
+ iseq
98
+ .insns
99
+ .grep(Instruction)
100
+ .slice_after do |insn|
101
+ length += insn.length
102
+ block_starts.include?(length)
103
+ end
104
+
105
+ block_starts
106
+ .zip(blocks)
107
+ .to_h do |block_start, insns|
108
+ # It's possible that we have not detected a block start but still
109
+ # have branching instructions inside of a basic block. This can
110
+ # happen if you have an unconditional jump which is followed by
111
+ # instructions that are unreachable. As of Ruby 3.2, this is
112
+ # possible with something as simple as "1 => a". In this case we
113
+ # can discard all instructions that follow branching instructions.
114
+ block_insns =
115
+ insns.slice_after { |insn| insn.branch_targets.any? }.first
116
+
117
+ [block_start, BasicBlock.new(block_start, block_insns)]
118
+ end
119
+ end
120
+
121
+ # Connect the blocks by letting them know which blocks are incoming and
122
+ # outgoing from each block.
123
+ def connect_basic_blocks(blocks)
124
+ blocks.each do |block_start, block|
125
+ insn = block.insns.last
126
+
127
+ insn.branch_targets.each do |branch_target|
128
+ block.outgoing_blocks << blocks.fetch(labels[branch_target])
129
+ end
130
+
131
+ if (insn.branch_targets.empty? && !insn.leaves?) ||
132
+ insn.falls_through?
133
+ fall_through_start = block_start + block.insns.sum(&:length)
134
+ block.outgoing_blocks << blocks.fetch(fall_through_start)
135
+ end
136
+
137
+ block.outgoing_blocks.each do |outgoing_block|
138
+ outgoing_block.incoming_blocks << block
139
+ end
140
+ end
141
+ end
142
+
143
+ # If there are blocks that are unreachable, we can remove them from the
144
+ # graph entirely at this point.
145
+ def prune_basic_blocks(blocks)
146
+ visited = Set.new
147
+ queue = [blocks.fetch(0)]
148
+
149
+ until queue.empty?
150
+ current_block = queue.shift
151
+ next if visited.include?(current_block)
152
+
153
+ visited << current_block
154
+ queue.concat(current_block.outgoing_blocks)
155
+ end
156
+
157
+ blocks.select! { |_, block| visited.include?(block) }
158
+ end
159
+ end
160
+
161
+ # This is the instruction sequence that this control flow graph
162
+ # corresponds to.
163
+ attr_reader :iseq
164
+
165
+ # This is the list of instructions that this control flow graph contains.
166
+ # It is effectively the same as the list of instructions in the
167
+ # instruction sequence but with line numbers and events filtered out.
168
+ attr_reader :insns
169
+
170
+ # This is the set of basic blocks that this control-flow graph contains.
171
+ attr_reader :blocks
172
+
173
+ def initialize(iseq, insns, blocks)
174
+ @iseq = iseq
175
+ @insns = insns
176
+ @blocks = blocks
177
+ end
178
+
179
+ def disasm
180
+ fmt = Disassembler.new(iseq)
181
+ fmt.puts("== cfg: #{iseq.inspect}")
182
+
183
+ blocks.each do |block|
184
+ fmt.puts(block.id)
185
+ fmt.with_prefix(" ") do |prefix|
186
+ unless block.incoming_blocks.empty?
187
+ from = block.incoming_blocks.map(&:id)
188
+ fmt.puts("#{prefix}== from: #{from.join(", ")}")
189
+ end
190
+
191
+ fmt.format_insns!(block.insns, block.block_start)
192
+
193
+ to = block.outgoing_blocks.map(&:id)
194
+ to << "leaves" if block.insns.last.leaves?
195
+ fmt.puts("#{prefix}== to: #{to.join(", ")}")
196
+ end
197
+ end
198
+
199
+ fmt.string
200
+ end
201
+
202
+ def to_dfg
203
+ DataFlowGraph.compile(self)
204
+ end
205
+
206
+ def to_son
207
+ to_dfg.to_son
208
+ end
209
+
210
+ def to_mermaid
211
+ Mermaid.flowchart do |flowchart|
212
+ disasm = Disassembler::Squished.new
213
+
214
+ blocks.each do |block|
215
+ flowchart.subgraph(block.id) do
216
+ previous = nil
217
+
218
+ block.each_with_length do |insn, length|
219
+ node =
220
+ flowchart.node(
221
+ "node_#{length}",
222
+ "%04d %s" % [length, insn.disasm(disasm)]
223
+ )
224
+
225
+ flowchart.link(previous, node) if previous
226
+ previous = node
227
+ end
228
+ end
229
+ end
230
+
231
+ blocks.each do |block|
232
+ block.outgoing_blocks.each do |outgoing|
233
+ offset =
234
+ block.block_start + block.insns.sum(&:length) -
235
+ block.insns.last.length
236
+
237
+ from = flowchart.fetch("node_#{offset}")
238
+ to = flowchart.fetch("node_#{outgoing.block_start}")
239
+ flowchart.link(from, to)
240
+ end
241
+ end
242
+ end
243
+ end
244
+
245
+ # This method is used to verify that the control flow graph is well
246
+ # formed. It does this by checking that each basic block is itself well
247
+ # formed.
248
+ def verify
249
+ blocks.each(&:verify)
250
+ end
251
+
252
+ def self.compile(iseq)
253
+ Compiler.new(iseq).compile
254
+ end
255
+ end
256
+ end
257
+ end
@@ -0,0 +1,338 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module YARV
5
+ # Constructs a data-flow-graph of a YARV instruction sequence, via a
6
+ # control-flow-graph. Data flow is discovered locally and then globally. The
7
+ # graph only considers data flow through the stack - local variables and
8
+ # objects are considered fully escaped in this analysis.
9
+ #
10
+ # You can use this class by calling the ::compile method and passing it a
11
+ # control flow graph. It will return a data flow graph object.
12
+ #
13
+ # iseq = RubyVM::InstructionSequence.compile("1 + 2")
14
+ # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a)
15
+ # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq)
16
+ # dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg)
17
+ #
18
+ class DataFlowGraph
19
+ # This object represents the flow of data between instructions.
20
+ class DataFlow
21
+ attr_reader :in
22
+ attr_reader :out
23
+
24
+ def initialize
25
+ @in = []
26
+ @out = []
27
+ end
28
+ end
29
+
30
+ # This represents an object that goes on the stack that is passed between
31
+ # basic blocks.
32
+ class BlockArgument
33
+ attr_reader :name
34
+
35
+ def initialize(name)
36
+ @name = name
37
+ end
38
+
39
+ def local?
40
+ false
41
+ end
42
+
43
+ def to_str
44
+ name.to_s
45
+ end
46
+ end
47
+
48
+ # This represents an object that goes on the stack that is passed between
49
+ # instructions within a basic block.
50
+ class LocalArgument
51
+ attr_reader :name, :length
52
+
53
+ def initialize(length)
54
+ @length = length
55
+ end
56
+
57
+ def local?
58
+ true
59
+ end
60
+
61
+ def to_str
62
+ length.to_s
63
+ end
64
+ end
65
+
66
+ attr_reader :cfg, :insn_flows, :block_flows
67
+
68
+ def initialize(cfg, insn_flows, block_flows)
69
+ @cfg = cfg
70
+ @insn_flows = insn_flows
71
+ @block_flows = block_flows
72
+ end
73
+
74
+ def blocks
75
+ cfg.blocks
76
+ end
77
+
78
+ def disasm
79
+ fmt = Disassembler.new(cfg.iseq)
80
+ fmt.puts("== dfg: #{cfg.iseq.inspect}")
81
+
82
+ blocks.each do |block|
83
+ fmt.puts(block.id)
84
+ fmt.with_prefix(" ") do |prefix|
85
+ unless block.incoming_blocks.empty?
86
+ from = block.incoming_blocks.map(&:id)
87
+ fmt.puts("#{prefix}== from: #{from.join(", ")}")
88
+ end
89
+
90
+ block_flow = block_flows.fetch(block.id)
91
+ unless block_flow.in.empty?
92
+ fmt.puts("#{prefix}== in: #{block_flow.in.join(", ")}")
93
+ end
94
+
95
+ fmt.format_insns!(block.insns, block.block_start) do |_, length|
96
+ insn_flow = insn_flows[length]
97
+ next if insn_flow.in.empty? && insn_flow.out.empty?
98
+
99
+ fmt.print(" # ")
100
+ unless insn_flow.in.empty?
101
+ fmt.print("in: #{insn_flow.in.join(", ")}")
102
+ fmt.print("; ") unless insn_flow.out.empty?
103
+ end
104
+
105
+ unless insn_flow.out.empty?
106
+ fmt.print("out: #{insn_flow.out.join(", ")}")
107
+ end
108
+ end
109
+
110
+ to = block.outgoing_blocks.map(&:id)
111
+ to << "leaves" if block.insns.last.leaves?
112
+ fmt.puts("#{prefix}== to: #{to.join(", ")}")
113
+
114
+ unless block_flow.out.empty?
115
+ fmt.puts("#{prefix}== out: #{block_flow.out.join(", ")}")
116
+ end
117
+ end
118
+ end
119
+
120
+ fmt.string
121
+ end
122
+
123
+ def to_son
124
+ SeaOfNodes.compile(self)
125
+ end
126
+
127
+ def to_mermaid
128
+ Mermaid.flowchart do |flowchart|
129
+ disasm = Disassembler::Squished.new
130
+
131
+ blocks.each do |block|
132
+ block_flow = block_flows.fetch(block.id)
133
+ graph_name =
134
+ if block_flow.in.any?
135
+ "#{block.id} #{block_flows[block.id].in.join(", ")}"
136
+ else
137
+ block.id
138
+ end
139
+
140
+ flowchart.subgraph(graph_name) do
141
+ previous = nil
142
+
143
+ block.each_with_length do |insn, length|
144
+ node =
145
+ flowchart.node(
146
+ "node_#{length}",
147
+ "%04d %s" % [length, insn.disasm(disasm)],
148
+ shape: :rounded
149
+ )
150
+
151
+ flowchart.link(previous, node, color: :red) if previous
152
+ insn_flows[length].in.each do |input|
153
+ if input.is_a?(LocalArgument)
154
+ from = flowchart.fetch("node_#{input.length}")
155
+ flowchart.link(from, node, color: :green)
156
+ end
157
+ end
158
+
159
+ previous = node
160
+ end
161
+ end
162
+ end
163
+
164
+ blocks.each do |block|
165
+ block.outgoing_blocks.each do |outgoing|
166
+ offset =
167
+ block.block_start + block.insns.sum(&:length) -
168
+ block.insns.last.length
169
+
170
+ from = flowchart.fetch("node_#{offset}")
171
+ to = flowchart.fetch("node_#{outgoing.block_start}")
172
+ flowchart.link(from, to, color: :red)
173
+ end
174
+ end
175
+ end
176
+ end
177
+
178
+ # Verify that we constructed the data flow graph correctly.
179
+ def verify
180
+ # Check that the first block has no arguments.
181
+ raise unless block_flows.fetch(blocks.first.id).in.empty?
182
+
183
+ # Check all control flow edges between blocks pass the right number of
184
+ # arguments.
185
+ blocks.each do |block|
186
+ block_flow = block_flows.fetch(block.id)
187
+
188
+ if block.outgoing_blocks.empty?
189
+ # With no outgoing blocks, there should be no output arguments.
190
+ raise unless block_flow.out.empty?
191
+ else
192
+ # Check with outgoing blocks...
193
+ block.outgoing_blocks.each do |outgoing_block|
194
+ outgoing_flow = block_flows.fetch(outgoing_block.id)
195
+
196
+ # The block should have as many output arguments as the
197
+ # outgoing block has input arguments.
198
+ raise unless block_flow.out.size == outgoing_flow.in.size
199
+ end
200
+ end
201
+ end
202
+ end
203
+
204
+ def self.compile(cfg)
205
+ Compiler.new(cfg).compile
206
+ end
207
+
208
+ # This class is responsible for creating a data flow graph from the given
209
+ # control flow graph.
210
+ class Compiler
211
+ # This is the control flow graph that is being compiled.
212
+ attr_reader :cfg
213
+
214
+ # This data structure will hold the data flow between instructions
215
+ # within individual basic blocks.
216
+ attr_reader :insn_flows
217
+
218
+ # This data structure will hold the data flow between basic blocks.
219
+ attr_reader :block_flows
220
+
221
+ def initialize(cfg)
222
+ @cfg = cfg
223
+ @insn_flows = cfg.insns.to_h { |length, _| [length, DataFlow.new] }
224
+ @block_flows = cfg.blocks.to_h { |block| [block.id, DataFlow.new] }
225
+ end
226
+
227
+ def compile
228
+ find_internal_flow
229
+ find_external_flow
230
+ DataFlowGraph.new(cfg, insn_flows, block_flows).tap(&:verify)
231
+ end
232
+
233
+ private
234
+
235
+ # Find the data flow within each basic block. Using an abstract stack,
236
+ # connect from consumers of data to the producers of that data.
237
+ def find_internal_flow
238
+ cfg.blocks.each do |block|
239
+ block_flow = block_flows.fetch(block.id)
240
+ stack = []
241
+
242
+ # Go through each instruction in the block.
243
+ block.each_with_length do |insn, length|
244
+ insn_flow = insn_flows[length]
245
+
246
+ # How many values will be missing from the local stack to run this
247
+ # instruction? This will be used to determine if the values that
248
+ # are being used by this instruction are coming from previous
249
+ # instructions or from previous basic blocks.
250
+ missing = insn.pops - stack.size
251
+
252
+ # For every value the instruction pops off the stack.
253
+ insn.pops.times do
254
+ # Was the value it pops off from another basic block?
255
+ if stack.empty?
256
+ # If the stack is empty, then there aren't enough values being
257
+ # pushed from previous instructions to fulfill the needs of
258
+ # this instruction. In that case the values must be coming
259
+ # from previous basic blocks.
260
+ missing -= 1
261
+ argument = BlockArgument.new(:"in_#{missing}")
262
+
263
+ insn_flow.in.unshift(argument)
264
+ block_flow.in.unshift(argument)
265
+ else
266
+ # Since there are values in the stack, we can connect this
267
+ # consumer to the producer of the value.
268
+ insn_flow.in.unshift(stack.pop)
269
+ end
270
+ end
271
+
272
+ # Record on our abstract stack that this instruction pushed
273
+ # this value onto the stack.
274
+ insn.pushes.times { stack << LocalArgument.new(length) }
275
+ end
276
+
277
+ # Values that are left on the stack after going through all
278
+ # instructions are arguments to the basic block that we jump to.
279
+ stack.reverse_each.with_index do |producer, index|
280
+ block_flow.out << producer
281
+
282
+ argument = BlockArgument.new(:"out_#{index}")
283
+ insn_flows[producer.length].out << argument
284
+ end
285
+ end
286
+
287
+ # Go backwards and connect from producers to consumers.
288
+ cfg.insns.each_key do |length|
289
+ # For every instruction that produced a value used in this
290
+ # instruction...
291
+ insn_flows[length].in.each do |producer|
292
+ # If it's actually another instruction and not a basic block
293
+ # argument...
294
+ if producer.is_a?(LocalArgument)
295
+ # Record in the producing instruction that it produces a value
296
+ # used by this construction.
297
+ insn_flows[producer.length].out << LocalArgument.new(length)
298
+ end
299
+ end
300
+ end
301
+ end
302
+
303
+ # Find the data that flows between basic blocks.
304
+ def find_external_flow
305
+ stack = [*cfg.blocks]
306
+
307
+ until stack.empty?
308
+ block = stack.pop
309
+ block_flow = block_flows.fetch(block.id)
310
+
311
+ block.incoming_blocks.each do |incoming_block|
312
+ incoming_flow = block_flows.fetch(incoming_block.id)
313
+
314
+ # Does a predecessor block have fewer outputs than the successor
315
+ # has inputs?
316
+ if incoming_flow.out.size < block_flow.in.size
317
+ # If so then add arguments to pass data through from the
318
+ # incoming block's incoming blocks.
319
+ (block_flow.in.size - incoming_flow.out.size).times do |index|
320
+ name = BlockArgument.new(:"pass_#{index}")
321
+
322
+ incoming_flow.in.unshift(name)
323
+ incoming_flow.out.unshift(name)
324
+ end
325
+
326
+ # Since we modified the incoming block, add it back to the stack
327
+ # so it'll be considered as an outgoing block again, and
328
+ # propogate the external data flow back up the control flow
329
+ # graph.
330
+ stack << incoming_block
331
+ end
332
+ end
333
+ end
334
+ end
335
+ end
336
+ end
337
+ end
338
+ end
@@ -151,7 +151,7 @@ module SyntaxTree
151
151
  elsif argc == 1 && method.end_with?("=")
152
152
  receiver, argument = clause.pop(2)
153
153
  clause << Assign(
154
- CallNode(receiver, Period("."), Ident(method[0..-2]), nil),
154
+ Field(receiver, Period("."), Ident(method[0..-2])),
155
155
  argument
156
156
  )
157
157
  else