syntax_tree 5.0.1 → 5.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +4 -0
- data/.github/workflows/auto-merge.yml +1 -1
- data/.github/workflows/main.yml +5 -2
- data/.gitmodules +6 -0
- data/.rubocop.yml +58 -1
- data/CHANGELOG.md +28 -1
- data/Gemfile.lock +12 -12
- data/README.md +5 -5
- data/Rakefile +7 -0
- data/exe/yarv +63 -0
- data/lib/syntax_tree/dsl.rb +1004 -0
- data/lib/syntax_tree/formatter.rb +2 -2
- data/lib/syntax_tree/language_server.rb +2 -0
- data/lib/syntax_tree/node.rb +26 -17
- data/lib/syntax_tree/parser.rb +21 -22
- data/lib/syntax_tree/version.rb +1 -1
- data/lib/syntax_tree/yarv/assembler.rb +459 -0
- data/lib/syntax_tree/yarv/bf.rb +176 -0
- data/lib/syntax_tree/yarv/compiler.rb +2298 -0
- data/lib/syntax_tree/yarv/decompiler.rb +263 -0
- data/lib/syntax_tree/yarv/disassembler.rb +212 -0
- data/lib/syntax_tree/yarv/instruction_sequence.rb +1275 -0
- data/lib/syntax_tree/yarv/instructions.rb +5372 -0
- data/lib/syntax_tree/yarv/legacy.rb +215 -0
- data/lib/syntax_tree/yarv/local_table.rb +89 -0
- data/lib/syntax_tree/yarv/vm.rb +624 -0
- data/lib/syntax_tree/yarv.rb +18 -0
- data/lib/syntax_tree.rb +20 -1
- data/syntax_tree.gemspec +1 -1
- metadata +20 -5
@@ -0,0 +1,263 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SyntaxTree
|
4
|
+
module YARV
|
5
|
+
# This class is responsible for taking a compiled instruction sequence and
|
6
|
+
# walking through it to generate equivalent Ruby code.
|
7
|
+
class Decompiler
|
8
|
+
# When we're decompiling, we use a looped case statement to emulate
|
9
|
+
# jumping around in the same way the virtual machine would. This class
|
10
|
+
# provides convenience methods for generating the AST nodes that have to
|
11
|
+
# do with that label.
|
12
|
+
class BlockLabel
|
13
|
+
include DSL
|
14
|
+
attr_reader :name
|
15
|
+
|
16
|
+
def initialize(name)
|
17
|
+
@name = name
|
18
|
+
end
|
19
|
+
|
20
|
+
def field
|
21
|
+
VarField(Ident(name))
|
22
|
+
end
|
23
|
+
|
24
|
+
def ref
|
25
|
+
VarRef(Ident(name))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
include DSL
|
30
|
+
attr_reader :iseq, :block_label
|
31
|
+
|
32
|
+
def initialize(iseq)
|
33
|
+
@iseq = iseq
|
34
|
+
@block_label = BlockLabel.new("__block_label")
|
35
|
+
end
|
36
|
+
|
37
|
+
def to_ruby
|
38
|
+
Program(decompile(iseq))
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def node_for(value)
|
44
|
+
case value
|
45
|
+
when Integer
|
46
|
+
Int(value.to_s)
|
47
|
+
when Symbol
|
48
|
+
SymbolLiteral(Ident(value.to_s))
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def decompile(iseq)
|
53
|
+
label = :label_0
|
54
|
+
clauses = {}
|
55
|
+
clause = []
|
56
|
+
|
57
|
+
iseq.insns.each do |insn|
|
58
|
+
case insn
|
59
|
+
when InstructionSequence::Label
|
60
|
+
unless clause.last.is_a?(Next)
|
61
|
+
clause << Assign(block_label.field, node_for(insn.name))
|
62
|
+
end
|
63
|
+
|
64
|
+
clauses[label] = clause
|
65
|
+
clause = []
|
66
|
+
label = insn.name
|
67
|
+
when BranchIf
|
68
|
+
body = [
|
69
|
+
Assign(block_label.field, node_for(insn.label.name)),
|
70
|
+
Next(Args([]))
|
71
|
+
]
|
72
|
+
|
73
|
+
clause << UnlessNode(clause.pop, Statements(body), nil)
|
74
|
+
when BranchUnless
|
75
|
+
body = [
|
76
|
+
Assign(block_label.field, node_for(insn.label.name)),
|
77
|
+
Next(Args([]))
|
78
|
+
]
|
79
|
+
|
80
|
+
clause << IfNode(clause.pop, Statements(body), nil)
|
81
|
+
when Dup
|
82
|
+
clause << clause.last
|
83
|
+
when DupHash
|
84
|
+
assocs =
|
85
|
+
insn.object.map do |key, value|
|
86
|
+
Assoc(node_for(key), node_for(value))
|
87
|
+
end
|
88
|
+
|
89
|
+
clause << HashLiteral(LBrace("{"), assocs)
|
90
|
+
when GetGlobal
|
91
|
+
clause << VarRef(GVar(insn.name.to_s))
|
92
|
+
when GetLocalWC0
|
93
|
+
local = iseq.local_table.locals[insn.index]
|
94
|
+
clause << VarRef(Ident(local.name.to_s))
|
95
|
+
when Jump
|
96
|
+
clause << Assign(block_label.field, node_for(insn.label.name))
|
97
|
+
clause << Next(Args([]))
|
98
|
+
when Leave
|
99
|
+
value = Args([clause.pop])
|
100
|
+
clause << (iseq.type == :top ? Break(value) : ReturnNode(value))
|
101
|
+
when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT,
|
102
|
+
OptMinus, OptMod, OptMult, OptOr, OptPlus
|
103
|
+
left, right = clause.pop(2)
|
104
|
+
clause << Binary(left, insn.calldata.method, right)
|
105
|
+
when OptAref
|
106
|
+
collection, arg = clause.pop(2)
|
107
|
+
clause << ARef(collection, Args([arg]))
|
108
|
+
when OptAset
|
109
|
+
collection, arg, value = clause.pop(3)
|
110
|
+
|
111
|
+
clause << if value.is_a?(Binary) && value.left.is_a?(ARef) &&
|
112
|
+
collection === value.left.collection &&
|
113
|
+
arg === value.left.index.parts[0]
|
114
|
+
OpAssign(
|
115
|
+
ARefField(collection, Args([arg])),
|
116
|
+
Op("#{value.operator}="),
|
117
|
+
value.right
|
118
|
+
)
|
119
|
+
else
|
120
|
+
Assign(ARefField(collection, Args([arg])), value)
|
121
|
+
end
|
122
|
+
when OptNEq
|
123
|
+
left, right = clause.pop(2)
|
124
|
+
clause << Binary(left, :"!=", right)
|
125
|
+
when OptSendWithoutBlock
|
126
|
+
method = insn.calldata.method.to_s
|
127
|
+
argc = insn.calldata.argc
|
128
|
+
|
129
|
+
if insn.calldata.flag?(CallData::CALL_FCALL)
|
130
|
+
if argc == 0
|
131
|
+
clause.pop
|
132
|
+
clause << CallNode(nil, nil, Ident(method), Args([]))
|
133
|
+
elsif argc == 1 && method.end_with?("=")
|
134
|
+
_receiver, argument = clause.pop(2)
|
135
|
+
clause << Assign(
|
136
|
+
CallNode(nil, nil, Ident(method[0..-2]), nil),
|
137
|
+
argument
|
138
|
+
)
|
139
|
+
else
|
140
|
+
_receiver, *arguments = clause.pop(argc + 1)
|
141
|
+
clause << CallNode(
|
142
|
+
nil,
|
143
|
+
nil,
|
144
|
+
Ident(method),
|
145
|
+
ArgParen(Args(arguments))
|
146
|
+
)
|
147
|
+
end
|
148
|
+
else
|
149
|
+
if argc == 0
|
150
|
+
clause << CallNode(clause.pop, Period("."), Ident(method), nil)
|
151
|
+
elsif argc == 1 && method.end_with?("=")
|
152
|
+
receiver, argument = clause.pop(2)
|
153
|
+
clause << Assign(
|
154
|
+
CallNode(receiver, Period("."), Ident(method[0..-2]), nil),
|
155
|
+
argument
|
156
|
+
)
|
157
|
+
else
|
158
|
+
receiver, *arguments = clause.pop(argc + 1)
|
159
|
+
clause << CallNode(
|
160
|
+
receiver,
|
161
|
+
Period("."),
|
162
|
+
Ident(method),
|
163
|
+
ArgParen(Args(arguments))
|
164
|
+
)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
when Pop
|
168
|
+
# skip
|
169
|
+
when PutObject
|
170
|
+
case insn.object
|
171
|
+
when Float
|
172
|
+
clause << FloatLiteral(insn.object.inspect)
|
173
|
+
when Integer
|
174
|
+
clause << Int(insn.object.inspect)
|
175
|
+
else
|
176
|
+
raise "Unknown object type: #{insn.object.class.name}"
|
177
|
+
end
|
178
|
+
when PutObjectInt2Fix0
|
179
|
+
clause << Int("0")
|
180
|
+
when PutObjectInt2Fix1
|
181
|
+
clause << Int("1")
|
182
|
+
when PutSelf
|
183
|
+
clause << VarRef(Kw("self"))
|
184
|
+
when SetGlobal
|
185
|
+
target = GVar(insn.name.to_s)
|
186
|
+
value = clause.pop
|
187
|
+
|
188
|
+
clause << if value.is_a?(Binary) && VarRef(target) === value.left
|
189
|
+
OpAssign(VarField(target), Op("#{value.operator}="), value.right)
|
190
|
+
else
|
191
|
+
Assign(VarField(target), value)
|
192
|
+
end
|
193
|
+
when SetLocalWC0
|
194
|
+
target = Ident(local_name(insn.index, 0))
|
195
|
+
value = clause.pop
|
196
|
+
|
197
|
+
clause << if value.is_a?(Binary) && VarRef(target) === value.left
|
198
|
+
OpAssign(VarField(target), Op("#{value.operator}="), value.right)
|
199
|
+
else
|
200
|
+
Assign(VarField(target), value)
|
201
|
+
end
|
202
|
+
else
|
203
|
+
raise "Unknown instruction #{insn}"
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
# If there's only one clause, then we don't need a case statement, and
|
208
|
+
# we can just disassemble the first clause.
|
209
|
+
clauses[label] = clause
|
210
|
+
return Statements(clauses.values.first) if clauses.size == 1
|
211
|
+
|
212
|
+
# Here we're going to build up a big case statement that will handle all
|
213
|
+
# of the different labels.
|
214
|
+
current = nil
|
215
|
+
clauses.reverse_each do |current_label, current_clause|
|
216
|
+
current =
|
217
|
+
When(
|
218
|
+
Args([node_for(current_label)]),
|
219
|
+
Statements(current_clause),
|
220
|
+
current
|
221
|
+
)
|
222
|
+
end
|
223
|
+
switch = Case(Kw("case"), block_label.ref, current)
|
224
|
+
|
225
|
+
# Here we're going to make sure that any locals that were established in
|
226
|
+
# the label_0 block are initialized so that scoping rules work
|
227
|
+
# correctly.
|
228
|
+
stack = []
|
229
|
+
locals = [block_label.name]
|
230
|
+
|
231
|
+
clauses[:label_0].each do |node|
|
232
|
+
if node.is_a?(Assign) && node.target.is_a?(VarField) &&
|
233
|
+
node.target.value.is_a?(Ident)
|
234
|
+
value = node.target.value.value
|
235
|
+
next if locals.include?(value)
|
236
|
+
|
237
|
+
stack << Assign(node.target, VarRef(Kw("nil")))
|
238
|
+
locals << value
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
# Finally, we'll set up the initial label and loop the entire case
|
243
|
+
# statement.
|
244
|
+
stack << Assign(block_label.field, node_for(:label_0))
|
245
|
+
stack << MethodAddBlock(
|
246
|
+
CallNode(nil, nil, Ident("loop"), Args([])),
|
247
|
+
BlockNode(
|
248
|
+
Kw("do"),
|
249
|
+
nil,
|
250
|
+
BodyStmt(Statements([switch]), nil, nil, nil, nil)
|
251
|
+
)
|
252
|
+
)
|
253
|
+
Statements(stack)
|
254
|
+
end
|
255
|
+
|
256
|
+
def local_name(index, level)
|
257
|
+
current = iseq
|
258
|
+
level.times { current = current.parent_iseq }
|
259
|
+
current.local_table.locals[index].name.to_s
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
@@ -0,0 +1,212 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SyntaxTree
|
4
|
+
module YARV
|
5
|
+
class Disassembler
|
6
|
+
attr_reader :output, :queue
|
7
|
+
attr_reader :current_prefix
|
8
|
+
attr_accessor :current_iseq
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@output = StringIO.new
|
12
|
+
@queue = []
|
13
|
+
|
14
|
+
@current_prefix = ""
|
15
|
+
@current_iseq = nil
|
16
|
+
end
|
17
|
+
|
18
|
+
########################################################################
|
19
|
+
# Helpers for various instructions
|
20
|
+
########################################################################
|
21
|
+
|
22
|
+
def calldata(value)
|
23
|
+
flag_names = []
|
24
|
+
flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT)
|
25
|
+
if value.flag?(CallData::CALL_ARGS_BLOCKARG)
|
26
|
+
flag_names << :ARGS_BLOCKARG
|
27
|
+
end
|
28
|
+
flag_names << :FCALL if value.flag?(CallData::CALL_FCALL)
|
29
|
+
flag_names << :VCALL if value.flag?(CallData::CALL_VCALL)
|
30
|
+
flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE)
|
31
|
+
flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ)
|
32
|
+
flag_names << :KWARG if value.flag?(CallData::CALL_KWARG)
|
33
|
+
flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT)
|
34
|
+
flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL)
|
35
|
+
flag_names << :SUPER if value.flag?(CallData::CALL_SUPER)
|
36
|
+
flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER)
|
37
|
+
flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND)
|
38
|
+
flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT)
|
39
|
+
|
40
|
+
parts = []
|
41
|
+
parts << "mid:#{value.method}" if value.method
|
42
|
+
parts << "argc:#{value.argc}"
|
43
|
+
parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg
|
44
|
+
parts << flag_names.join("|") if flag_names.any?
|
45
|
+
|
46
|
+
"<calldata!#{parts.join(", ")}>"
|
47
|
+
end
|
48
|
+
|
49
|
+
def enqueue(iseq)
|
50
|
+
queue << iseq
|
51
|
+
end
|
52
|
+
|
53
|
+
def event(name)
|
54
|
+
case name
|
55
|
+
when :RUBY_EVENT_B_CALL
|
56
|
+
"Bc"
|
57
|
+
when :RUBY_EVENT_B_RETURN
|
58
|
+
"Br"
|
59
|
+
when :RUBY_EVENT_CALL
|
60
|
+
"Ca"
|
61
|
+
when :RUBY_EVENT_CLASS
|
62
|
+
"Cl"
|
63
|
+
when :RUBY_EVENT_END
|
64
|
+
"En"
|
65
|
+
when :RUBY_EVENT_LINE
|
66
|
+
"Li"
|
67
|
+
when :RUBY_EVENT_RETURN
|
68
|
+
"Re"
|
69
|
+
else
|
70
|
+
raise "Unknown event: #{name}"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def inline_storage(cache)
|
75
|
+
"<is:#{cache}>"
|
76
|
+
end
|
77
|
+
|
78
|
+
def instruction(name, operands = [])
|
79
|
+
operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")]
|
80
|
+
end
|
81
|
+
|
82
|
+
def label(value)
|
83
|
+
value.name["label_".length..]
|
84
|
+
end
|
85
|
+
|
86
|
+
def local(index, explicit: nil, implicit: nil)
|
87
|
+
current = current_iseq
|
88
|
+
(explicit || implicit).times { current = current.parent_iseq }
|
89
|
+
|
90
|
+
value = "#{current.local_table.name_at(index)}@#{index}"
|
91
|
+
value << ", #{explicit}" if explicit
|
92
|
+
value
|
93
|
+
end
|
94
|
+
|
95
|
+
def object(value)
|
96
|
+
value.inspect
|
97
|
+
end
|
98
|
+
|
99
|
+
########################################################################
|
100
|
+
# Main entrypoint
|
101
|
+
########################################################################
|
102
|
+
|
103
|
+
def format!
|
104
|
+
while (@current_iseq = queue.shift)
|
105
|
+
output << "\n" if output.pos > 0
|
106
|
+
format_iseq(@current_iseq)
|
107
|
+
end
|
108
|
+
|
109
|
+
output.string
|
110
|
+
end
|
111
|
+
|
112
|
+
private
|
113
|
+
|
114
|
+
def format_iseq(iseq)
|
115
|
+
output << "#{current_prefix}== disasm: "
|
116
|
+
output << "#<ISeq:#{iseq.name}@<compiled>:1 "
|
117
|
+
|
118
|
+
location = Location.fixed(line: iseq.line, char: 0, column: 0)
|
119
|
+
output << "(#{location.start_line},#{location.start_column})-"
|
120
|
+
output << "(#{location.end_line},#{location.end_column})"
|
121
|
+
output << "> "
|
122
|
+
|
123
|
+
if iseq.catch_table.any?
|
124
|
+
output << "(catch: TRUE)\n"
|
125
|
+
output << "#{current_prefix}== catch table\n"
|
126
|
+
|
127
|
+
with_prefix("#{current_prefix}| ") do
|
128
|
+
iseq.catch_table.each do |entry|
|
129
|
+
case entry
|
130
|
+
when InstructionSequence::CatchBreak
|
131
|
+
output << "#{current_prefix}catch type: break\n"
|
132
|
+
format_iseq(entry.iseq)
|
133
|
+
when InstructionSequence::CatchNext
|
134
|
+
output << "#{current_prefix}catch type: next\n"
|
135
|
+
when InstructionSequence::CatchRedo
|
136
|
+
output << "#{current_prefix}catch type: redo\n"
|
137
|
+
when InstructionSequence::CatchRescue
|
138
|
+
output << "#{current_prefix}catch type: rescue\n"
|
139
|
+
format_iseq(entry.iseq)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
output << "#{current_prefix}|#{"-" * 72}\n"
|
145
|
+
else
|
146
|
+
output << "(catch: FALSE)\n"
|
147
|
+
end
|
148
|
+
|
149
|
+
if (local_table = iseq.local_table) && !local_table.empty?
|
150
|
+
output << "#{current_prefix}local table (size: #{local_table.size})\n"
|
151
|
+
|
152
|
+
locals =
|
153
|
+
local_table.locals.each_with_index.map do |local, index|
|
154
|
+
"[%2d] %s@%d" % [local_table.offset(index), local.name, index]
|
155
|
+
end
|
156
|
+
|
157
|
+
output << "#{current_prefix}#{locals.join(" ")}\n"
|
158
|
+
end
|
159
|
+
|
160
|
+
length = 0
|
161
|
+
events = []
|
162
|
+
lines = []
|
163
|
+
|
164
|
+
iseq.insns.each do |insn|
|
165
|
+
case insn
|
166
|
+
when Integer
|
167
|
+
lines << insn
|
168
|
+
when Symbol
|
169
|
+
events << event(insn)
|
170
|
+
when InstructionSequence::Label
|
171
|
+
# skip
|
172
|
+
else
|
173
|
+
output << "#{current_prefix}%04d " % length
|
174
|
+
|
175
|
+
disasm = insn.disasm(self)
|
176
|
+
output << disasm
|
177
|
+
|
178
|
+
if lines.any?
|
179
|
+
output << " " * (65 - disasm.length) if disasm.length < 65
|
180
|
+
elsif events.any?
|
181
|
+
output << " " * (39 - disasm.length) if disasm.length < 39
|
182
|
+
end
|
183
|
+
|
184
|
+
if lines.any?
|
185
|
+
output << "(%4d)" % lines.last
|
186
|
+
lines.clear
|
187
|
+
end
|
188
|
+
|
189
|
+
if events.any?
|
190
|
+
output << "[#{events.join}]"
|
191
|
+
events.clear
|
192
|
+
end
|
193
|
+
|
194
|
+
output << "\n"
|
195
|
+
length += insn.length
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def with_prefix(value)
|
201
|
+
previous = @current_prefix
|
202
|
+
|
203
|
+
begin
|
204
|
+
@current_prefix = value
|
205
|
+
yield
|
206
|
+
ensure
|
207
|
+
@current_prefix = previous
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|