syntax_tree 5.0.0 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +51 -0
- data/CHANGELOG.md +24 -1
- data/Gemfile.lock +9 -9
- data/README.md +5 -5
- data/lib/syntax_tree/cli.rb +8 -6
- data/lib/syntax_tree/dsl.rb +1004 -0
- data/lib/syntax_tree/formatter.rb +2 -2
- data/lib/syntax_tree/language_server.rb +2 -0
- data/lib/syntax_tree/node.rb +7 -7
- data/lib/syntax_tree/parser.rb +20 -21
- data/lib/syntax_tree/version.rb +1 -1
- data/lib/syntax_tree/yarv/assembler.rb +459 -0
- data/lib/syntax_tree/yarv/bf.rb +179 -0
- data/lib/syntax_tree/yarv/compiler.rb +2287 -0
- data/lib/syntax_tree/yarv/decompiler.rb +254 -0
- data/lib/syntax_tree/yarv/disassembler.rb +211 -0
- data/lib/syntax_tree/yarv/instruction_sequence.rb +1171 -0
- data/lib/syntax_tree/yarv/instructions.rb +5203 -0
- data/lib/syntax_tree/yarv/legacy.rb +192 -0
- data/lib/syntax_tree/yarv/local_table.rb +89 -0
- data/lib/syntax_tree/yarv.rb +287 -0
- data/lib/syntax_tree.rb +23 -1
- data/syntax_tree.gemspec +1 -1
- metadata +15 -4
@@ -0,0 +1,254 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SyntaxTree
|
4
|
+
module YARV
|
5
|
+
# This class is responsible for taking a compiled instruction sequence and
|
6
|
+
# walking through it to generate equivalent Ruby code.
|
7
|
+
class Decompiler
|
8
|
+
# When we're decompiling, we use a looped case statement to emulate
|
9
|
+
# jumping around in the same way the virtual machine would. This class
|
10
|
+
# provides convenience methods for generating the AST nodes that have to
|
11
|
+
# do with that label.
|
12
|
+
class BlockLabel
|
13
|
+
include DSL
|
14
|
+
attr_reader :name
|
15
|
+
|
16
|
+
def initialize(name)
|
17
|
+
@name = name
|
18
|
+
end
|
19
|
+
|
20
|
+
def field
|
21
|
+
VarField(Ident(name))
|
22
|
+
end
|
23
|
+
|
24
|
+
def ref
|
25
|
+
VarRef(Ident(name))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
include DSL
|
30
|
+
attr_reader :iseq, :block_label
|
31
|
+
|
32
|
+
def initialize(iseq)
|
33
|
+
@iseq = iseq
|
34
|
+
@block_label = BlockLabel.new("__block_label")
|
35
|
+
end
|
36
|
+
|
37
|
+
def to_ruby
|
38
|
+
Program(decompile(iseq))
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def node_for(value)
|
44
|
+
case value
|
45
|
+
when Integer
|
46
|
+
Int(value.to_s)
|
47
|
+
when Symbol
|
48
|
+
SymbolLiteral(Ident(value.to_s))
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def decompile(iseq)
|
53
|
+
label = :label_0
|
54
|
+
clauses = {}
|
55
|
+
clause = []
|
56
|
+
|
57
|
+
iseq.insns.each do |insn|
|
58
|
+
case insn
|
59
|
+
when InstructionSequence::Label
|
60
|
+
unless clause.last.is_a?(Next)
|
61
|
+
clause << Assign(block_label.field, node_for(insn.name))
|
62
|
+
end
|
63
|
+
|
64
|
+
clauses[label] = clause
|
65
|
+
clause = []
|
66
|
+
label = insn.name
|
67
|
+
when BranchUnless
|
68
|
+
body = [
|
69
|
+
Assign(block_label.field, node_for(insn.label.name)),
|
70
|
+
Next(Args([]))
|
71
|
+
]
|
72
|
+
|
73
|
+
clause << IfNode(clause.pop, Statements(body), nil)
|
74
|
+
when Dup
|
75
|
+
clause << clause.last
|
76
|
+
when DupHash
|
77
|
+
assocs =
|
78
|
+
insn.object.map do |key, value|
|
79
|
+
Assoc(node_for(key), node_for(value))
|
80
|
+
end
|
81
|
+
|
82
|
+
clause << HashLiteral(LBrace("{"), assocs)
|
83
|
+
when GetGlobal
|
84
|
+
clause << VarRef(GVar(insn.name.to_s))
|
85
|
+
when GetLocalWC0
|
86
|
+
local = iseq.local_table.locals[insn.index]
|
87
|
+
clause << VarRef(Ident(local.name.to_s))
|
88
|
+
when Jump
|
89
|
+
clause << Assign(block_label.field, node_for(insn.label.name))
|
90
|
+
clause << Next(Args([]))
|
91
|
+
when Leave
|
92
|
+
value = Args([clause.pop])
|
93
|
+
clause << (iseq.type == :top ? Break(value) : ReturnNode(value))
|
94
|
+
when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT,
|
95
|
+
OptMinus, OptMod, OptMult, OptOr, OptPlus
|
96
|
+
left, right = clause.pop(2)
|
97
|
+
clause << Binary(left, insn.calldata.method, right)
|
98
|
+
when OptAref
|
99
|
+
collection, arg = clause.pop(2)
|
100
|
+
clause << ARef(collection, Args([arg]))
|
101
|
+
when OptAset
|
102
|
+
collection, arg, value = clause.pop(3)
|
103
|
+
|
104
|
+
clause << if value.is_a?(Binary) && value.left.is_a?(ARef) &&
|
105
|
+
collection === value.left.collection &&
|
106
|
+
arg === value.left.index.parts[0]
|
107
|
+
OpAssign(
|
108
|
+
ARefField(collection, Args([arg])),
|
109
|
+
Op("#{value.operator}="),
|
110
|
+
value.right
|
111
|
+
)
|
112
|
+
else
|
113
|
+
Assign(ARefField(collection, Args([arg])), value)
|
114
|
+
end
|
115
|
+
when OptNEq
|
116
|
+
left, right = clause.pop(2)
|
117
|
+
clause << Binary(left, :"!=", right)
|
118
|
+
when OptSendWithoutBlock
|
119
|
+
method = insn.calldata.method.to_s
|
120
|
+
argc = insn.calldata.argc
|
121
|
+
|
122
|
+
if insn.calldata.flag?(CallData::CALL_FCALL)
|
123
|
+
if argc == 0
|
124
|
+
clause.pop
|
125
|
+
clause << CallNode(nil, nil, Ident(method), Args([]))
|
126
|
+
elsif argc == 1 && method.end_with?("=")
|
127
|
+
_receiver, argument = clause.pop(2)
|
128
|
+
clause << Assign(
|
129
|
+
CallNode(nil, nil, Ident(method[0..-2]), nil),
|
130
|
+
argument
|
131
|
+
)
|
132
|
+
else
|
133
|
+
_receiver, *arguments = clause.pop(argc + 1)
|
134
|
+
clause << CallNode(
|
135
|
+
nil,
|
136
|
+
nil,
|
137
|
+
Ident(method),
|
138
|
+
ArgParen(Args(arguments))
|
139
|
+
)
|
140
|
+
end
|
141
|
+
else
|
142
|
+
if argc == 0
|
143
|
+
clause << CallNode(clause.pop, Period("."), Ident(method), nil)
|
144
|
+
elsif argc == 1 && method.end_with?("=")
|
145
|
+
receiver, argument = clause.pop(2)
|
146
|
+
clause << Assign(
|
147
|
+
CallNode(receiver, Period("."), Ident(method[0..-2]), nil),
|
148
|
+
argument
|
149
|
+
)
|
150
|
+
else
|
151
|
+
receiver, *arguments = clause.pop(argc + 1)
|
152
|
+
clause << CallNode(
|
153
|
+
receiver,
|
154
|
+
Period("."),
|
155
|
+
Ident(method),
|
156
|
+
ArgParen(Args(arguments))
|
157
|
+
)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
when PutObject
|
161
|
+
case insn.object
|
162
|
+
when Float
|
163
|
+
clause << FloatLiteral(insn.object.inspect)
|
164
|
+
when Integer
|
165
|
+
clause << Int(insn.object.inspect)
|
166
|
+
else
|
167
|
+
raise "Unknown object type: #{insn.object.class.name}"
|
168
|
+
end
|
169
|
+
when PutObjectInt2Fix0
|
170
|
+
clause << Int("0")
|
171
|
+
when PutObjectInt2Fix1
|
172
|
+
clause << Int("1")
|
173
|
+
when PutSelf
|
174
|
+
clause << VarRef(Kw("self"))
|
175
|
+
when SetGlobal
|
176
|
+
target = GVar(insn.name.to_s)
|
177
|
+
value = clause.pop
|
178
|
+
|
179
|
+
clause << if value.is_a?(Binary) && VarRef(target) === value.left
|
180
|
+
OpAssign(VarField(target), Op("#{value.operator}="), value.right)
|
181
|
+
else
|
182
|
+
Assign(VarField(target), value)
|
183
|
+
end
|
184
|
+
when SetLocalWC0
|
185
|
+
target = Ident(local_name(insn.index, 0))
|
186
|
+
value = clause.pop
|
187
|
+
|
188
|
+
clause << if value.is_a?(Binary) && VarRef(target) === value.left
|
189
|
+
OpAssign(VarField(target), Op("#{value.operator}="), value.right)
|
190
|
+
else
|
191
|
+
Assign(VarField(target), value)
|
192
|
+
end
|
193
|
+
else
|
194
|
+
raise "Unknown instruction #{insn}"
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
# If there's only one clause, then we don't need a case statement, and
|
199
|
+
# we can just disassemble the first clause.
|
200
|
+
clauses[label] = clause
|
201
|
+
return Statements(clauses.values.first) if clauses.size == 1
|
202
|
+
|
203
|
+
# Here we're going to build up a big case statement that will handle all
|
204
|
+
# of the different labels.
|
205
|
+
current = nil
|
206
|
+
clauses.reverse_each do |current_label, current_clause|
|
207
|
+
current =
|
208
|
+
When(
|
209
|
+
Args([node_for(current_label)]),
|
210
|
+
Statements(current_clause),
|
211
|
+
current
|
212
|
+
)
|
213
|
+
end
|
214
|
+
switch = Case(Kw("case"), block_label.ref, current)
|
215
|
+
|
216
|
+
# Here we're going to make sure that any locals that were established in
|
217
|
+
# the label_0 block are initialized so that scoping rules work
|
218
|
+
# correctly.
|
219
|
+
stack = []
|
220
|
+
locals = [block_label.name]
|
221
|
+
|
222
|
+
clauses[:label_0].each do |node|
|
223
|
+
if node.is_a?(Assign) && node.target.is_a?(VarField) &&
|
224
|
+
node.target.value.is_a?(Ident)
|
225
|
+
value = node.target.value.value
|
226
|
+
next if locals.include?(value)
|
227
|
+
|
228
|
+
stack << Assign(node.target, VarRef(Kw("nil")))
|
229
|
+
locals << value
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
# Finally, we'll set up the initial label and loop the entire case
|
234
|
+
# statement.
|
235
|
+
stack << Assign(block_label.field, node_for(:label_0))
|
236
|
+
stack << MethodAddBlock(
|
237
|
+
CallNode(nil, nil, Ident("loop"), Args([])),
|
238
|
+
BlockNode(
|
239
|
+
Kw("do"),
|
240
|
+
nil,
|
241
|
+
BodyStmt(Statements([switch]), nil, nil, nil, nil)
|
242
|
+
)
|
243
|
+
)
|
244
|
+
Statements(stack)
|
245
|
+
end
|
246
|
+
|
247
|
+
def local_name(index, level)
|
248
|
+
current = iseq
|
249
|
+
level.times { current = current.parent_iseq }
|
250
|
+
current.local_table.locals[index].name.to_s
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
end
|
@@ -0,0 +1,211 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SyntaxTree
|
4
|
+
module YARV
|
5
|
+
class Disassembler
|
6
|
+
attr_reader :output, :queue
|
7
|
+
attr_reader :current_prefix, :current_iseq
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@output = StringIO.new
|
11
|
+
@queue = []
|
12
|
+
|
13
|
+
@current_prefix = ""
|
14
|
+
@current_iseq = nil
|
15
|
+
end
|
16
|
+
|
17
|
+
########################################################################
|
18
|
+
# Helpers for various instructions
|
19
|
+
########################################################################
|
20
|
+
|
21
|
+
def calldata(value)
|
22
|
+
flag_names = []
|
23
|
+
flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT)
|
24
|
+
if value.flag?(CallData::CALL_ARGS_BLOCKARG)
|
25
|
+
flag_names << :ARGS_BLOCKARG
|
26
|
+
end
|
27
|
+
flag_names << :FCALL if value.flag?(CallData::CALL_FCALL)
|
28
|
+
flag_names << :VCALL if value.flag?(CallData::CALL_VCALL)
|
29
|
+
flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE)
|
30
|
+
flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ)
|
31
|
+
flag_names << :KWARG if value.flag?(CallData::CALL_KWARG)
|
32
|
+
flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT)
|
33
|
+
flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL)
|
34
|
+
flag_names << :SUPER if value.flag?(CallData::CALL_SUPER)
|
35
|
+
flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER)
|
36
|
+
flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND)
|
37
|
+
flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT)
|
38
|
+
|
39
|
+
parts = []
|
40
|
+
parts << "mid:#{value.method}" if value.method
|
41
|
+
parts << "argc:#{value.argc}"
|
42
|
+
parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg
|
43
|
+
parts << flag_names.join("|") if flag_names.any?
|
44
|
+
|
45
|
+
"<calldata!#{parts.join(", ")}>"
|
46
|
+
end
|
47
|
+
|
48
|
+
def enqueue(iseq)
|
49
|
+
queue << iseq
|
50
|
+
end
|
51
|
+
|
52
|
+
def event(name)
|
53
|
+
case name
|
54
|
+
when :RUBY_EVENT_B_CALL
|
55
|
+
"Bc"
|
56
|
+
when :RUBY_EVENT_B_RETURN
|
57
|
+
"Br"
|
58
|
+
when :RUBY_EVENT_CALL
|
59
|
+
"Ca"
|
60
|
+
when :RUBY_EVENT_CLASS
|
61
|
+
"Cl"
|
62
|
+
when :RUBY_EVENT_END
|
63
|
+
"En"
|
64
|
+
when :RUBY_EVENT_LINE
|
65
|
+
"Li"
|
66
|
+
when :RUBY_EVENT_RETURN
|
67
|
+
"Re"
|
68
|
+
else
|
69
|
+
raise "Unknown event: #{name}"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def inline_storage(cache)
|
74
|
+
"<is:#{cache}>"
|
75
|
+
end
|
76
|
+
|
77
|
+
def instruction(name, operands = [])
|
78
|
+
operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")]
|
79
|
+
end
|
80
|
+
|
81
|
+
def label(value)
|
82
|
+
value.name["label_".length..]
|
83
|
+
end
|
84
|
+
|
85
|
+
def local(index, explicit: nil, implicit: nil)
|
86
|
+
current = current_iseq
|
87
|
+
(explicit || implicit).times { current = current.parent_iseq }
|
88
|
+
|
89
|
+
value = "#{current.local_table.name_at(index)}@#{index}"
|
90
|
+
value << ", #{explicit}" if explicit
|
91
|
+
value
|
92
|
+
end
|
93
|
+
|
94
|
+
def object(value)
|
95
|
+
value.inspect
|
96
|
+
end
|
97
|
+
|
98
|
+
########################################################################
|
99
|
+
# Main entrypoint
|
100
|
+
########################################################################
|
101
|
+
|
102
|
+
def format!
|
103
|
+
while (@current_iseq = queue.shift)
|
104
|
+
output << "\n" if output.pos > 0
|
105
|
+
format_iseq(@current_iseq)
|
106
|
+
end
|
107
|
+
|
108
|
+
output.string
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def format_iseq(iseq)
|
114
|
+
output << "#{current_prefix}== disasm: "
|
115
|
+
output << "#<ISeq:#{iseq.name}@<compiled>:1 "
|
116
|
+
|
117
|
+
location = iseq.location
|
118
|
+
output << "(#{location.start_line},#{location.start_column})-"
|
119
|
+
output << "(#{location.end_line},#{location.end_column})"
|
120
|
+
output << "> "
|
121
|
+
|
122
|
+
if iseq.catch_table.any?
|
123
|
+
output << "(catch: TRUE)\n"
|
124
|
+
output << "#{current_prefix}== catch table\n"
|
125
|
+
|
126
|
+
with_prefix("#{current_prefix}| ") do
|
127
|
+
iseq.catch_table.each do |entry|
|
128
|
+
case entry
|
129
|
+
when InstructionSequence::CatchBreak
|
130
|
+
output << "#{current_prefix}catch type: break\n"
|
131
|
+
format_iseq(entry.iseq)
|
132
|
+
when InstructionSequence::CatchNext
|
133
|
+
output << "#{current_prefix}catch type: next\n"
|
134
|
+
when InstructionSequence::CatchRedo
|
135
|
+
output << "#{current_prefix}catch type: redo\n"
|
136
|
+
when InstructionSequence::CatchRescue
|
137
|
+
output << "#{current_prefix}catch type: rescue\n"
|
138
|
+
format_iseq(entry.iseq)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
output << "#{current_prefix}|#{"-" * 72}\n"
|
144
|
+
else
|
145
|
+
output << "(catch: FALSE)\n"
|
146
|
+
end
|
147
|
+
|
148
|
+
if (local_table = iseq.local_table) && !local_table.empty?
|
149
|
+
output << "#{current_prefix}local table (size: #{local_table.size})\n"
|
150
|
+
|
151
|
+
locals =
|
152
|
+
local_table.locals.each_with_index.map do |local, index|
|
153
|
+
"[%2d] %s@%d" % [local_table.offset(index), local.name, index]
|
154
|
+
end
|
155
|
+
|
156
|
+
output << "#{current_prefix}#{locals.join(" ")}\n"
|
157
|
+
end
|
158
|
+
|
159
|
+
length = 0
|
160
|
+
events = []
|
161
|
+
lines = []
|
162
|
+
|
163
|
+
iseq.insns.each do |insn|
|
164
|
+
case insn
|
165
|
+
when Integer
|
166
|
+
lines << insn
|
167
|
+
when Symbol
|
168
|
+
events << event(insn)
|
169
|
+
when InstructionSequence::Label
|
170
|
+
# skip
|
171
|
+
else
|
172
|
+
output << "#{current_prefix}%04d " % length
|
173
|
+
|
174
|
+
disasm = insn.disasm(self)
|
175
|
+
output << disasm
|
176
|
+
|
177
|
+
if lines.any?
|
178
|
+
output << " " * (65 - disasm.length) if disasm.length < 65
|
179
|
+
elsif events.any?
|
180
|
+
output << " " * (39 - disasm.length) if disasm.length < 39
|
181
|
+
end
|
182
|
+
|
183
|
+
if lines.any?
|
184
|
+
output << "(%4d)" % lines.last
|
185
|
+
lines.clear
|
186
|
+
end
|
187
|
+
|
188
|
+
if events.any?
|
189
|
+
output << "[#{events.join}]"
|
190
|
+
events.clear
|
191
|
+
end
|
192
|
+
|
193
|
+
output << "\n"
|
194
|
+
length += insn.length
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def with_prefix(value)
|
200
|
+
previous = @current_prefix
|
201
|
+
|
202
|
+
begin
|
203
|
+
@current_prefix = value
|
204
|
+
yield
|
205
|
+
ensure
|
206
|
+
@current_prefix = previous
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|