syntax_tree 5.3.0 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +12 -1
- data/CHANGELOG.md +64 -1
- data/Gemfile.lock +2 -2
- data/README.md +28 -9
- data/Rakefile +12 -8
- data/bin/console +1 -0
- data/bin/whitequark +79 -0
- data/doc/changing_structure.md +16 -0
- data/lib/syntax_tree/basic_visitor.rb +44 -5
- data/lib/syntax_tree/cli.rb +2 -2
- data/lib/syntax_tree/dsl.rb +23 -11
- data/lib/syntax_tree/{visitor/field_visitor.rb → field_visitor.rb} +54 -55
- data/lib/syntax_tree/formatter.rb +1 -1
- data/lib/syntax_tree/index.rb +56 -54
- data/lib/syntax_tree/json_visitor.rb +55 -0
- data/lib/syntax_tree/language_server.rb +157 -2
- data/lib/syntax_tree/match_visitor.rb +120 -0
- data/lib/syntax_tree/mermaid.rb +177 -0
- data/lib/syntax_tree/mermaid_visitor.rb +69 -0
- data/lib/syntax_tree/{visitor/mutation_visitor.rb → mutation_visitor.rb} +27 -27
- data/lib/syntax_tree/node.rb +198 -107
- data/lib/syntax_tree/parser.rb +322 -118
- data/lib/syntax_tree/pretty_print_visitor.rb +83 -0
- data/lib/syntax_tree/reflection.rb +241 -0
- data/lib/syntax_tree/translation/parser.rb +3019 -0
- data/lib/syntax_tree/translation/rubocop_ast.rb +21 -0
- data/lib/syntax_tree/translation.rb +28 -0
- data/lib/syntax_tree/version.rb +1 -1
- data/lib/syntax_tree/with_scope.rb +244 -0
- data/lib/syntax_tree/yarv/basic_block.rb +53 -0
- data/lib/syntax_tree/yarv/calldata.rb +91 -0
- data/lib/syntax_tree/yarv/compiler.rb +110 -100
- data/lib/syntax_tree/yarv/control_flow_graph.rb +257 -0
- data/lib/syntax_tree/yarv/data_flow_graph.rb +338 -0
- data/lib/syntax_tree/yarv/decompiler.rb +1 -1
- data/lib/syntax_tree/yarv/disassembler.rb +104 -80
- data/lib/syntax_tree/yarv/instruction_sequence.rb +43 -18
- data/lib/syntax_tree/yarv/instructions.rb +203 -649
- data/lib/syntax_tree/yarv/legacy.rb +12 -24
- data/lib/syntax_tree/yarv/sea_of_nodes.rb +534 -0
- data/lib/syntax_tree/yarv.rb +18 -0
- data/lib/syntax_tree.rb +88 -56
- data/tasks/sorbet.rake +277 -0
- data/tasks/whitequark.rake +87 -0
- metadata +23 -11
- data/.gitmodules +0 -9
- data/lib/syntax_tree/language_server/inlay_hints.rb +0 -159
- data/lib/syntax_tree/visitor/environment.rb +0 -84
- data/lib/syntax_tree/visitor/json_visitor.rb +0 -55
- data/lib/syntax_tree/visitor/match_visitor.rb +0 -122
- data/lib/syntax_tree/visitor/pretty_print_visitor.rb +0 -85
- data/lib/syntax_tree/visitor/with_environment.rb +0 -140
@@ -19,7 +19,7 @@ module SyntaxTree
|
|
19
19
|
# @@class_variable
|
20
20
|
# ~~~
|
21
21
|
#
|
22
|
-
class GetClassVariable
|
22
|
+
class GetClassVariable < Instruction
|
23
23
|
attr_reader :name
|
24
24
|
|
25
25
|
def initialize(name)
|
@@ -46,10 +46,6 @@ module SyntaxTree
|
|
46
46
|
2
|
47
47
|
end
|
48
48
|
|
49
|
-
def pops
|
50
|
-
0
|
51
|
-
end
|
52
|
-
|
53
49
|
def pushes
|
54
50
|
1
|
55
51
|
end
|
@@ -79,7 +75,7 @@ module SyntaxTree
|
|
79
75
|
# Constant
|
80
76
|
# ~~~
|
81
77
|
#
|
82
|
-
class OptGetInlineCache
|
78
|
+
class OptGetInlineCache < Instruction
|
83
79
|
attr_reader :label, :cache
|
84
80
|
|
85
81
|
def initialize(label, cache)
|
@@ -111,21 +107,21 @@ module SyntaxTree
|
|
111
107
|
3
|
112
108
|
end
|
113
109
|
|
114
|
-
def pops
|
115
|
-
0
|
116
|
-
end
|
117
|
-
|
118
110
|
def pushes
|
119
111
|
1
|
120
112
|
end
|
121
113
|
|
122
|
-
def canonical
|
123
|
-
self
|
124
|
-
end
|
125
|
-
|
126
114
|
def call(vm)
|
127
115
|
vm.push(nil)
|
128
116
|
end
|
117
|
+
|
118
|
+
def branch_targets
|
119
|
+
[label]
|
120
|
+
end
|
121
|
+
|
122
|
+
def falls_through?
|
123
|
+
true
|
124
|
+
end
|
129
125
|
end
|
130
126
|
|
131
127
|
# ### Summary
|
@@ -143,7 +139,7 @@ module SyntaxTree
|
|
143
139
|
# Constant
|
144
140
|
# ~~~
|
145
141
|
#
|
146
|
-
class OptSetInlineCache
|
142
|
+
class OptSetInlineCache < Instruction
|
147
143
|
attr_reader :cache
|
148
144
|
|
149
145
|
def initialize(cache)
|
@@ -178,10 +174,6 @@ module SyntaxTree
|
|
178
174
|
1
|
179
175
|
end
|
180
176
|
|
181
|
-
def canonical
|
182
|
-
self
|
183
|
-
end
|
184
|
-
|
185
177
|
def call(vm)
|
186
178
|
end
|
187
179
|
end
|
@@ -200,7 +192,7 @@ module SyntaxTree
|
|
200
192
|
# @@class_variable = 1
|
201
193
|
# ~~~
|
202
194
|
#
|
203
|
-
class SetClassVariable
|
195
|
+
class SetClassVariable < Instruction
|
204
196
|
attr_reader :name
|
205
197
|
|
206
198
|
def initialize(name)
|
@@ -231,10 +223,6 @@ module SyntaxTree
|
|
231
223
|
1
|
232
224
|
end
|
233
225
|
|
234
|
-
def pushes
|
235
|
-
0
|
236
|
-
end
|
237
|
-
|
238
226
|
def canonical
|
239
227
|
YARV::SetClassVariable.new(name, nil)
|
240
228
|
end
|
@@ -0,0 +1,534 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SyntaxTree
|
4
|
+
module YARV
|
5
|
+
# A sea of nodes is an intermediate representation used by a compiler to
|
6
|
+
# represent both control and data flow in the same graph. The way we use it
|
7
|
+
# allows us to have the vertices of the graph represent either an
|
8
|
+
# instruction in the instruction sequence or a synthesized node that we add
|
9
|
+
# to the graph. The edges of the graph represent either control flow or data
|
10
|
+
# flow.
|
11
|
+
class SeaOfNodes
|
12
|
+
# This object represents a node in the graph that holds a YARV
|
13
|
+
# instruction.
|
14
|
+
class InsnNode
|
15
|
+
attr_reader :inputs, :outputs, :insn, :offset
|
16
|
+
|
17
|
+
def initialize(insn, offset)
|
18
|
+
@inputs = []
|
19
|
+
@outputs = []
|
20
|
+
|
21
|
+
@insn = insn
|
22
|
+
@offset = offset
|
23
|
+
end
|
24
|
+
|
25
|
+
def id
|
26
|
+
offset
|
27
|
+
end
|
28
|
+
|
29
|
+
def label
|
30
|
+
"%04d %s" % [offset, insn.disasm(Disassembler::Squished.new)]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Phi nodes are used to represent the merging of data flow from multiple
|
35
|
+
# incoming blocks.
|
36
|
+
class PhiNode
|
37
|
+
attr_reader :inputs, :outputs, :id
|
38
|
+
|
39
|
+
def initialize(id)
|
40
|
+
@inputs = []
|
41
|
+
@outputs = []
|
42
|
+
@id = id
|
43
|
+
end
|
44
|
+
|
45
|
+
def label
|
46
|
+
"#{id} φ"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Merge nodes are present in any block that has multiple incoming blocks.
|
51
|
+
# It provides a place for Phi nodes to attach their results.
|
52
|
+
class MergeNode
|
53
|
+
attr_reader :inputs, :outputs, :id
|
54
|
+
|
55
|
+
def initialize(id)
|
56
|
+
@inputs = []
|
57
|
+
@outputs = []
|
58
|
+
@id = id
|
59
|
+
end
|
60
|
+
|
61
|
+
def label
|
62
|
+
"#{id} ψ"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# The edge of a graph represents either control flow or data flow.
|
67
|
+
class Edge
|
68
|
+
TYPES = %i[data control info].freeze
|
69
|
+
|
70
|
+
attr_reader :from
|
71
|
+
attr_reader :to
|
72
|
+
attr_reader :type
|
73
|
+
attr_reader :label
|
74
|
+
|
75
|
+
def initialize(from, to, type, label)
|
76
|
+
raise unless TYPES.include?(type)
|
77
|
+
|
78
|
+
@from = from
|
79
|
+
@to = to
|
80
|
+
@type = type
|
81
|
+
@label = label
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# A subgraph represents the local data and control flow of a single basic
|
86
|
+
# block.
|
87
|
+
class SubGraph
|
88
|
+
attr_reader :first_fixed, :last_fixed, :inputs, :outputs
|
89
|
+
|
90
|
+
def initialize(first_fixed, last_fixed, inputs, outputs)
|
91
|
+
@first_fixed = first_fixed
|
92
|
+
@last_fixed = last_fixed
|
93
|
+
@inputs = inputs
|
94
|
+
@outputs = outputs
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# The compiler is responsible for taking a data flow graph and turning it
|
99
|
+
# into a sea of nodes.
|
100
|
+
class Compiler
|
101
|
+
attr_reader :dfg, :nodes
|
102
|
+
|
103
|
+
def initialize(dfg)
|
104
|
+
@dfg = dfg
|
105
|
+
@nodes = []
|
106
|
+
|
107
|
+
# We need to put a unique ID on the synthetic nodes in the graph, so
|
108
|
+
# we keep a counter that we increment any time we create a new
|
109
|
+
# synthetic node.
|
110
|
+
@id_counter = 999
|
111
|
+
end
|
112
|
+
|
113
|
+
def compile
|
114
|
+
local_graphs = {}
|
115
|
+
dfg.blocks.each do |block|
|
116
|
+
local_graphs[block.id] = create_local_graph(block)
|
117
|
+
end
|
118
|
+
|
119
|
+
connect_local_graphs_control(local_graphs)
|
120
|
+
connect_local_graphs_data(local_graphs)
|
121
|
+
cleanup_phi_nodes
|
122
|
+
cleanup_insn_nodes
|
123
|
+
|
124
|
+
SeaOfNodes.new(dfg, nodes, local_graphs).tap(&:verify)
|
125
|
+
end
|
126
|
+
|
127
|
+
private
|
128
|
+
|
129
|
+
# Counter for synthetic nodes.
|
130
|
+
def id_counter
|
131
|
+
@id_counter += 1
|
132
|
+
end
|
133
|
+
|
134
|
+
# Create a sub-graph for a single basic block - block block argument
|
135
|
+
# inputs and outputs will be left dangling, to be connected later.
|
136
|
+
def create_local_graph(block)
|
137
|
+
block_flow = dfg.block_flows.fetch(block.id)
|
138
|
+
|
139
|
+
# A map of instructions to nodes.
|
140
|
+
insn_nodes = {}
|
141
|
+
|
142
|
+
# Create a node for each instruction in the block.
|
143
|
+
block.each_with_length do |insn, offset|
|
144
|
+
node = InsnNode.new(insn, offset)
|
145
|
+
insn_nodes[offset] = node
|
146
|
+
nodes << node
|
147
|
+
end
|
148
|
+
|
149
|
+
# The first and last node in the sub-graph, and the last fixed node.
|
150
|
+
previous_fixed = nil
|
151
|
+
first_fixed = nil
|
152
|
+
last_fixed = nil
|
153
|
+
|
154
|
+
# The merge node for the phi nodes to attach to.
|
155
|
+
merge_node = nil
|
156
|
+
|
157
|
+
# If there is more than one predecessor and we have basic block
|
158
|
+
# arguments coming in, then we need a merge node for the phi nodes to
|
159
|
+
# attach to.
|
160
|
+
if block.incoming_blocks.size > 1 && !block_flow.in.empty?
|
161
|
+
merge_node = MergeNode.new(id_counter)
|
162
|
+
nodes << merge_node
|
163
|
+
|
164
|
+
previous_fixed = merge_node
|
165
|
+
first_fixed = merge_node
|
166
|
+
last_fixed = merge_node
|
167
|
+
end
|
168
|
+
|
169
|
+
# Connect local control flow (only nodes with side effects.)
|
170
|
+
block.each_with_length do |insn, length|
|
171
|
+
if insn.side_effects?
|
172
|
+
insn_node = insn_nodes[length]
|
173
|
+
connect previous_fixed, insn_node, :control if previous_fixed
|
174
|
+
previous_fixed = insn_node
|
175
|
+
first_fixed ||= insn_node
|
176
|
+
last_fixed = insn_node
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Connect basic block arguments.
|
181
|
+
inputs = {}
|
182
|
+
outputs = {}
|
183
|
+
block_flow.in.each do |arg|
|
184
|
+
# Each basic block argument gets a phi node. Even if there's only
|
185
|
+
# one predecessor! We'll tidy this up later.
|
186
|
+
phi = PhiNode.new(id_counter)
|
187
|
+
connect(phi, merge_node, :info) if merge_node
|
188
|
+
nodes << phi
|
189
|
+
inputs[arg] = phi
|
190
|
+
|
191
|
+
block.each_with_length do |_, consumer_offset|
|
192
|
+
consumer_flow = dfg.insn_flows[consumer_offset]
|
193
|
+
consumer_flow.in.each_with_index do |producer, input_index|
|
194
|
+
if producer == arg
|
195
|
+
connect(phi, insn_nodes[consumer_offset], :data, input_index)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
block_flow.out.each { |out| outputs[out] = phi if out == arg }
|
201
|
+
end
|
202
|
+
|
203
|
+
# Connect local dataflow from consumers back to producers.
|
204
|
+
block.each_with_length do |_, consumer_offset|
|
205
|
+
consumer_flow = dfg.insn_flows.fetch(consumer_offset)
|
206
|
+
consumer_flow.in.each_with_index do |producer, input_index|
|
207
|
+
if producer.local?
|
208
|
+
connect(
|
209
|
+
insn_nodes[producer.length],
|
210
|
+
insn_nodes[consumer_offset],
|
211
|
+
:data,
|
212
|
+
input_index
|
213
|
+
)
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
# Connect dataflow from producers that leaves the block.
|
219
|
+
block.each_with_length do |_, producer_pc|
|
220
|
+
dfg
|
221
|
+
.insn_flows
|
222
|
+
.fetch(producer_pc)
|
223
|
+
.out
|
224
|
+
.each do |consumer|
|
225
|
+
unless consumer.local?
|
226
|
+
# This is an argument to the successor block - not to an
|
227
|
+
# instruction here.
|
228
|
+
outputs[consumer.name] = insn_nodes[producer_pc]
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
# A graph with only side-effect free instructions will currently have
|
234
|
+
# no fixed nodes! In that case just use the first instruction's node
|
235
|
+
# for both first and last. But it's a bug that it'll appear in the
|
236
|
+
# control flow path!
|
237
|
+
SubGraph.new(
|
238
|
+
first_fixed || insn_nodes[block.block_start],
|
239
|
+
last_fixed || insn_nodes[block.block_start],
|
240
|
+
inputs,
|
241
|
+
outputs
|
242
|
+
)
|
243
|
+
end
|
244
|
+
|
245
|
+
# Connect control flow that flows between basic blocks.
|
246
|
+
def connect_local_graphs_control(local_graphs)
|
247
|
+
dfg.blocks.each do |predecessor|
|
248
|
+
predecessor_last = local_graphs[predecessor.id].last_fixed
|
249
|
+
predecessor.outgoing_blocks.each_with_index do |successor, index|
|
250
|
+
label =
|
251
|
+
if index > 0 &&
|
252
|
+
index == (predecessor.outgoing_blocks.length - 1)
|
253
|
+
# If there are multiple outgoing blocks from this block, then
|
254
|
+
# the last one is a fallthrough. Otherwise it's a branch.
|
255
|
+
:fallthrough
|
256
|
+
else
|
257
|
+
:"branch#{index}"
|
258
|
+
end
|
259
|
+
|
260
|
+
connect(
|
261
|
+
predecessor_last,
|
262
|
+
local_graphs[successor.id].first_fixed,
|
263
|
+
:control,
|
264
|
+
label
|
265
|
+
)
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
# Connect data flow that flows between basic blocks.
|
271
|
+
def connect_local_graphs_data(local_graphs)
|
272
|
+
dfg.blocks.each do |predecessor|
|
273
|
+
arg_outs = local_graphs[predecessor.id].outputs.values
|
274
|
+
arg_outs.each_with_index do |arg_out, arg_n|
|
275
|
+
predecessor.outgoing_blocks.each do |successor|
|
276
|
+
successor_graph = local_graphs[successor.id]
|
277
|
+
arg_in = successor_graph.inputs.values[arg_n]
|
278
|
+
|
279
|
+
# We're connecting to a phi node, so we may need a special
|
280
|
+
# label.
|
281
|
+
raise unless arg_in.is_a?(PhiNode)
|
282
|
+
|
283
|
+
label =
|
284
|
+
case arg_out
|
285
|
+
when InsnNode
|
286
|
+
# Instructions that go into a phi node are labelled by the
|
287
|
+
# offset of last instruction in the block that executed
|
288
|
+
# them. This way you know which value to use for the phi,
|
289
|
+
# based on the last instruction you executed.
|
290
|
+
dfg.blocks.find do |block|
|
291
|
+
block_start = block.block_start
|
292
|
+
block_end =
|
293
|
+
block_start + block.insns.sum(&:length) -
|
294
|
+
block.insns.last.length
|
295
|
+
|
296
|
+
if (block_start..block_end).cover?(arg_out.offset)
|
297
|
+
break block_end
|
298
|
+
end
|
299
|
+
end
|
300
|
+
when PhiNode
|
301
|
+
# Phi nodes to phi nodes are not labelled.
|
302
|
+
else
|
303
|
+
raise
|
304
|
+
end
|
305
|
+
|
306
|
+
connect(arg_out, arg_in, :data, label)
|
307
|
+
end
|
308
|
+
end
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
# We don't always build things in an optimal way. Go back and fix up
|
313
|
+
# some mess we left. Ideally we wouldn't create these problems in the
|
314
|
+
# first place.
|
315
|
+
def cleanup_phi_nodes
|
316
|
+
nodes.dup.each do |node| # dup because we're mutating
|
317
|
+
next unless node.is_a?(PhiNode)
|
318
|
+
|
319
|
+
if node.inputs.size == 1
|
320
|
+
# Remove phi nodes with a single input.
|
321
|
+
connect_over(node)
|
322
|
+
remove(node)
|
323
|
+
elsif node.inputs.map(&:from).uniq.size == 1
|
324
|
+
# Remove phi nodes where all inputs are the same.
|
325
|
+
producer_edge = node.inputs.first
|
326
|
+
consumer_edge = node.outputs.find { |e| !e.to.is_a?(MergeNode) }
|
327
|
+
connect(
|
328
|
+
producer_edge.from,
|
329
|
+
consumer_edge.to,
|
330
|
+
:data,
|
331
|
+
consumer_edge.label
|
332
|
+
)
|
333
|
+
remove(node)
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|
337
|
+
|
338
|
+
# Eliminate as many unnecessary nodes as we can.
|
339
|
+
def cleanup_insn_nodes
|
340
|
+
nodes.dup.each do |node|
|
341
|
+
next unless node.is_a?(InsnNode)
|
342
|
+
|
343
|
+
case node.insn
|
344
|
+
when AdjustStack
|
345
|
+
# If there are any inputs to the adjust stack that are immediately
|
346
|
+
# discarded, we can remove them from the input list.
|
347
|
+
number = node.insn.number
|
348
|
+
|
349
|
+
node.inputs.dup.each do |input_edge|
|
350
|
+
next if input_edge.type != :data
|
351
|
+
|
352
|
+
from = input_edge.from
|
353
|
+
next unless from.is_a?(InsnNode)
|
354
|
+
|
355
|
+
if from.inputs.empty? && from.outputs.size == 1
|
356
|
+
number -= 1
|
357
|
+
remove(input_edge.from)
|
358
|
+
elsif from.insn.is_a?(Dup)
|
359
|
+
number -= 1
|
360
|
+
connect_over(from)
|
361
|
+
remove(from)
|
362
|
+
|
363
|
+
new_edge = node.inputs.last
|
364
|
+
new_edge.from.outputs.delete(new_edge)
|
365
|
+
node.inputs.delete(new_edge)
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
if number == 0
|
370
|
+
connect_over(node)
|
371
|
+
remove(node)
|
372
|
+
else
|
373
|
+
next_node =
|
374
|
+
if number == 1
|
375
|
+
InsnNode.new(Pop.new, node.offset)
|
376
|
+
else
|
377
|
+
InsnNode.new(AdjustStack.new(number), node.offset)
|
378
|
+
end
|
379
|
+
|
380
|
+
next_node.inputs.concat(node.inputs)
|
381
|
+
next_node.outputs.concat(node.outputs)
|
382
|
+
|
383
|
+
# Dynamically finding the index of the node in the nodes array
|
384
|
+
# because we're mutating the array as we go.
|
385
|
+
nodes[nodes.index(node)] = next_node
|
386
|
+
end
|
387
|
+
when Jump
|
388
|
+
# When you have a jump instruction that only has one input and one
|
389
|
+
# output, you can just connect over top of it and remove it.
|
390
|
+
if node.inputs.size == 1 && node.outputs.size == 1
|
391
|
+
connect_over(node)
|
392
|
+
remove(node)
|
393
|
+
end
|
394
|
+
when Pop
|
395
|
+
from = node.inputs.find { |edge| edge.type == :data }.from
|
396
|
+
next unless from.is_a?(InsnNode)
|
397
|
+
|
398
|
+
removed =
|
399
|
+
if from.inputs.empty? && from.outputs.size == 1
|
400
|
+
remove(from)
|
401
|
+
true
|
402
|
+
elsif from.insn.is_a?(Dup)
|
403
|
+
connect_over(from)
|
404
|
+
remove(from)
|
405
|
+
|
406
|
+
new_edge = node.inputs.last
|
407
|
+
new_edge.from.outputs.delete(new_edge)
|
408
|
+
node.inputs.delete(new_edge)
|
409
|
+
true
|
410
|
+
else
|
411
|
+
false
|
412
|
+
end
|
413
|
+
|
414
|
+
if removed
|
415
|
+
connect_over(node)
|
416
|
+
remove(node)
|
417
|
+
end
|
418
|
+
end
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
# Connect one node to another.
|
423
|
+
def connect(from, to, type, label = nil)
|
424
|
+
raise if from == to
|
425
|
+
raise if !to.is_a?(PhiNode) && type == :data && label.nil?
|
426
|
+
|
427
|
+
edge = Edge.new(from, to, type, label)
|
428
|
+
from.outputs << edge
|
429
|
+
to.inputs << edge
|
430
|
+
end
|
431
|
+
|
432
|
+
# Connect all of the inputs to all of the outputs of a node.
|
433
|
+
def connect_over(node)
|
434
|
+
node.inputs.each do |producer_edge|
|
435
|
+
node.outputs.each do |consumer_edge|
|
436
|
+
connect(
|
437
|
+
producer_edge.from,
|
438
|
+
consumer_edge.to,
|
439
|
+
producer_edge.type,
|
440
|
+
producer_edge.label
|
441
|
+
)
|
442
|
+
end
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
# Remove a node from the graph.
|
447
|
+
def remove(node)
|
448
|
+
node.inputs.each do |producer_edge|
|
449
|
+
producer_edge.from.outputs.reject! { |edge| edge.to == node }
|
450
|
+
end
|
451
|
+
|
452
|
+
node.outputs.each do |consumer_edge|
|
453
|
+
consumer_edge.to.inputs.reject! { |edge| edge.from == node }
|
454
|
+
end
|
455
|
+
|
456
|
+
nodes.delete(node)
|
457
|
+
end
|
458
|
+
end
|
459
|
+
|
460
|
+
attr_reader :dfg, :nodes, :local_graphs
|
461
|
+
|
462
|
+
def initialize(dfg, nodes, local_graphs)
|
463
|
+
@dfg = dfg
|
464
|
+
@nodes = nodes
|
465
|
+
@local_graphs = local_graphs
|
466
|
+
end
|
467
|
+
|
468
|
+
def to_mermaid
|
469
|
+
Mermaid.flowchart do |flowchart|
|
470
|
+
nodes.each do |node|
|
471
|
+
flowchart.node("node_#{node.id}", node.label, shape: :rounded)
|
472
|
+
end
|
473
|
+
|
474
|
+
nodes.each do |producer|
|
475
|
+
producer.outputs.each do |consumer_edge|
|
476
|
+
label =
|
477
|
+
if !consumer_edge.label
|
478
|
+
# No label.
|
479
|
+
elsif consumer_edge.to.is_a?(PhiNode)
|
480
|
+
# Edges into phi nodes are labelled by the offset of the
|
481
|
+
# instruction going into the merge.
|
482
|
+
"%04d" % consumer_edge.label
|
483
|
+
else
|
484
|
+
consumer_edge.label.to_s
|
485
|
+
end
|
486
|
+
|
487
|
+
flowchart.link(
|
488
|
+
flowchart.fetch("node_#{producer.id}"),
|
489
|
+
flowchart.fetch("node_#{consumer_edge.to.id}"),
|
490
|
+
label,
|
491
|
+
type: consumer_edge.type == :info ? :dotted : :directed,
|
492
|
+
color: { data: :green, control: :red }[consumer_edge.type]
|
493
|
+
)
|
494
|
+
end
|
495
|
+
end
|
496
|
+
end
|
497
|
+
end
|
498
|
+
|
499
|
+
def verify
|
500
|
+
# Verify edge labels.
|
501
|
+
nodes.each do |node|
|
502
|
+
# Not talking about phi nodes right now.
|
503
|
+
next if node.is_a?(PhiNode)
|
504
|
+
|
505
|
+
if node.is_a?(InsnNode) && node.insn.branch_targets.any? &&
|
506
|
+
!node.insn.is_a?(Leave)
|
507
|
+
# A branching node must have at least one branch edge and
|
508
|
+
# potentially a fallthrough edge coming out.
|
509
|
+
|
510
|
+
labels = node.outputs.map(&:label).sort
|
511
|
+
raise if labels[0] != :branch0
|
512
|
+
raise if labels[1] != :fallthrough && labels.size > 2
|
513
|
+
else
|
514
|
+
labels = node.inputs.filter { |e| e.type == :data }.map(&:label)
|
515
|
+
next if labels.empty?
|
516
|
+
|
517
|
+
# No nil labels
|
518
|
+
raise if labels.any?(&:nil?)
|
519
|
+
|
520
|
+
# Labels should start at zero.
|
521
|
+
raise unless labels.min.zero?
|
522
|
+
|
523
|
+
# Labels should be contiguous.
|
524
|
+
raise unless labels.sort == (labels.min..labels.max).to_a
|
525
|
+
end
|
526
|
+
end
|
527
|
+
end
|
528
|
+
|
529
|
+
def self.compile(dfg)
|
530
|
+
Compiler.new(dfg).compile
|
531
|
+
end
|
532
|
+
end
|
533
|
+
end
|
534
|
+
end
|
data/lib/syntax_tree/yarv.rb
CHANGED
@@ -1,5 +1,23 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "stringio"
|
4
|
+
|
5
|
+
require_relative "yarv/basic_block"
|
6
|
+
require_relative "yarv/bf"
|
7
|
+
require_relative "yarv/calldata"
|
8
|
+
require_relative "yarv/compiler"
|
9
|
+
require_relative "yarv/control_flow_graph"
|
10
|
+
require_relative "yarv/data_flow_graph"
|
11
|
+
require_relative "yarv/decompiler"
|
12
|
+
require_relative "yarv/disassembler"
|
13
|
+
require_relative "yarv/instruction_sequence"
|
14
|
+
require_relative "yarv/instructions"
|
15
|
+
require_relative "yarv/legacy"
|
16
|
+
require_relative "yarv/local_table"
|
17
|
+
require_relative "yarv/sea_of_nodes"
|
18
|
+
require_relative "yarv/assembler"
|
19
|
+
require_relative "yarv/vm"
|
20
|
+
|
3
21
|
module SyntaxTree
|
4
22
|
# This module provides an object representation of the YARV bytecode.
|
5
23
|
module YARV
|