syntax_tree 5.0.1 → 5.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,254 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module YARV
5
+ # This class is responsible for taking a compiled instruction sequence and
6
+ # walking through it to generate equivalent Ruby code.
7
+ class Decompiler
8
+ # When we're decompiling, we use a looped case statement to emulate
9
+ # jumping around in the same way the virtual machine would. This class
10
+ # provides convenience methods for generating the AST nodes that have to
11
+ # do with that label.
12
+ class BlockLabel
13
+ include DSL
14
+ attr_reader :name
15
+
16
+ def initialize(name)
17
+ @name = name
18
+ end
19
+
20
+ def field
21
+ VarField(Ident(name))
22
+ end
23
+
24
+ def ref
25
+ VarRef(Ident(name))
26
+ end
27
+ end
28
+
29
+ include DSL
30
+ attr_reader :iseq, :block_label
31
+
32
+ def initialize(iseq)
33
+ @iseq = iseq
34
+ @block_label = BlockLabel.new("__block_label")
35
+ end
36
+
37
+ def to_ruby
38
+ Program(decompile(iseq))
39
+ end
40
+
41
+ private
42
+
43
+ def node_for(value)
44
+ case value
45
+ when Integer
46
+ Int(value.to_s)
47
+ when Symbol
48
+ SymbolLiteral(Ident(value.to_s))
49
+ end
50
+ end
51
+
52
+ def decompile(iseq)
53
+ label = :label_0
54
+ clauses = {}
55
+ clause = []
56
+
57
+ iseq.insns.each do |insn|
58
+ case insn
59
+ when InstructionSequence::Label
60
+ unless clause.last.is_a?(Next)
61
+ clause << Assign(block_label.field, node_for(insn.name))
62
+ end
63
+
64
+ clauses[label] = clause
65
+ clause = []
66
+ label = insn.name
67
+ when BranchUnless
68
+ body = [
69
+ Assign(block_label.field, node_for(insn.label.name)),
70
+ Next(Args([]))
71
+ ]
72
+
73
+ clause << IfNode(clause.pop, Statements(body), nil)
74
+ when Dup
75
+ clause << clause.last
76
+ when DupHash
77
+ assocs =
78
+ insn.object.map do |key, value|
79
+ Assoc(node_for(key), node_for(value))
80
+ end
81
+
82
+ clause << HashLiteral(LBrace("{"), assocs)
83
+ when GetGlobal
84
+ clause << VarRef(GVar(insn.name.to_s))
85
+ when GetLocalWC0
86
+ local = iseq.local_table.locals[insn.index]
87
+ clause << VarRef(Ident(local.name.to_s))
88
+ when Jump
89
+ clause << Assign(block_label.field, node_for(insn.label.name))
90
+ clause << Next(Args([]))
91
+ when Leave
92
+ value = Args([clause.pop])
93
+ clause << (iseq.type == :top ? Break(value) : ReturnNode(value))
94
+ when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT,
95
+ OptMinus, OptMod, OptMult, OptOr, OptPlus
96
+ left, right = clause.pop(2)
97
+ clause << Binary(left, insn.calldata.method, right)
98
+ when OptAref
99
+ collection, arg = clause.pop(2)
100
+ clause << ARef(collection, Args([arg]))
101
+ when OptAset
102
+ collection, arg, value = clause.pop(3)
103
+
104
+ clause << if value.is_a?(Binary) && value.left.is_a?(ARef) &&
105
+ collection === value.left.collection &&
106
+ arg === value.left.index.parts[0]
107
+ OpAssign(
108
+ ARefField(collection, Args([arg])),
109
+ Op("#{value.operator}="),
110
+ value.right
111
+ )
112
+ else
113
+ Assign(ARefField(collection, Args([arg])), value)
114
+ end
115
+ when OptNEq
116
+ left, right = clause.pop(2)
117
+ clause << Binary(left, :"!=", right)
118
+ when OptSendWithoutBlock
119
+ method = insn.calldata.method.to_s
120
+ argc = insn.calldata.argc
121
+
122
+ if insn.calldata.flag?(CallData::CALL_FCALL)
123
+ if argc == 0
124
+ clause.pop
125
+ clause << CallNode(nil, nil, Ident(method), Args([]))
126
+ elsif argc == 1 && method.end_with?("=")
127
+ _receiver, argument = clause.pop(2)
128
+ clause << Assign(
129
+ CallNode(nil, nil, Ident(method[0..-2]), nil),
130
+ argument
131
+ )
132
+ else
133
+ _receiver, *arguments = clause.pop(argc + 1)
134
+ clause << CallNode(
135
+ nil,
136
+ nil,
137
+ Ident(method),
138
+ ArgParen(Args(arguments))
139
+ )
140
+ end
141
+ else
142
+ if argc == 0
143
+ clause << CallNode(clause.pop, Period("."), Ident(method), nil)
144
+ elsif argc == 1 && method.end_with?("=")
145
+ receiver, argument = clause.pop(2)
146
+ clause << Assign(
147
+ CallNode(receiver, Period("."), Ident(method[0..-2]), nil),
148
+ argument
149
+ )
150
+ else
151
+ receiver, *arguments = clause.pop(argc + 1)
152
+ clause << CallNode(
153
+ receiver,
154
+ Period("."),
155
+ Ident(method),
156
+ ArgParen(Args(arguments))
157
+ )
158
+ end
159
+ end
160
+ when PutObject
161
+ case insn.object
162
+ when Float
163
+ clause << FloatLiteral(insn.object.inspect)
164
+ when Integer
165
+ clause << Int(insn.object.inspect)
166
+ else
167
+ raise "Unknown object type: #{insn.object.class.name}"
168
+ end
169
+ when PutObjectInt2Fix0
170
+ clause << Int("0")
171
+ when PutObjectInt2Fix1
172
+ clause << Int("1")
173
+ when PutSelf
174
+ clause << VarRef(Kw("self"))
175
+ when SetGlobal
176
+ target = GVar(insn.name.to_s)
177
+ value = clause.pop
178
+
179
+ clause << if value.is_a?(Binary) && VarRef(target) === value.left
180
+ OpAssign(VarField(target), Op("#{value.operator}="), value.right)
181
+ else
182
+ Assign(VarField(target), value)
183
+ end
184
+ when SetLocalWC0
185
+ target = Ident(local_name(insn.index, 0))
186
+ value = clause.pop
187
+
188
+ clause << if value.is_a?(Binary) && VarRef(target) === value.left
189
+ OpAssign(VarField(target), Op("#{value.operator}="), value.right)
190
+ else
191
+ Assign(VarField(target), value)
192
+ end
193
+ else
194
+ raise "Unknown instruction #{insn}"
195
+ end
196
+ end
197
+
198
+ # If there's only one clause, then we don't need a case statement, and
199
+ # we can just disassemble the first clause.
200
+ clauses[label] = clause
201
+ return Statements(clauses.values.first) if clauses.size == 1
202
+
203
+ # Here we're going to build up a big case statement that will handle all
204
+ # of the different labels.
205
+ current = nil
206
+ clauses.reverse_each do |current_label, current_clause|
207
+ current =
208
+ When(
209
+ Args([node_for(current_label)]),
210
+ Statements(current_clause),
211
+ current
212
+ )
213
+ end
214
+ switch = Case(Kw("case"), block_label.ref, current)
215
+
216
+ # Here we're going to make sure that any locals that were established in
217
+ # the label_0 block are initialized so that scoping rules work
218
+ # correctly.
219
+ stack = []
220
+ locals = [block_label.name]
221
+
222
+ clauses[:label_0].each do |node|
223
+ if node.is_a?(Assign) && node.target.is_a?(VarField) &&
224
+ node.target.value.is_a?(Ident)
225
+ value = node.target.value.value
226
+ next if locals.include?(value)
227
+
228
+ stack << Assign(node.target, VarRef(Kw("nil")))
229
+ locals << value
230
+ end
231
+ end
232
+
233
+ # Finally, we'll set up the initial label and loop the entire case
234
+ # statement.
235
+ stack << Assign(block_label.field, node_for(:label_0))
236
+ stack << MethodAddBlock(
237
+ CallNode(nil, nil, Ident("loop"), Args([])),
238
+ BlockNode(
239
+ Kw("do"),
240
+ nil,
241
+ BodyStmt(Statements([switch]), nil, nil, nil, nil)
242
+ )
243
+ )
244
+ Statements(stack)
245
+ end
246
+
247
+ def local_name(index, level)
248
+ current = iseq
249
+ level.times { current = current.parent_iseq }
250
+ current.local_table.locals[index].name.to_s
251
+ end
252
+ end
253
+ end
254
+ end
@@ -0,0 +1,211 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module YARV
5
+ class Disassembler
6
+ attr_reader :output, :queue
7
+ attr_reader :current_prefix, :current_iseq
8
+
9
+ def initialize
10
+ @output = StringIO.new
11
+ @queue = []
12
+
13
+ @current_prefix = ""
14
+ @current_iseq = nil
15
+ end
16
+
17
+ ########################################################################
18
+ # Helpers for various instructions
19
+ ########################################################################
20
+
21
+ def calldata(value)
22
+ flag_names = []
23
+ flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT)
24
+ if value.flag?(CallData::CALL_ARGS_BLOCKARG)
25
+ flag_names << :ARGS_BLOCKARG
26
+ end
27
+ flag_names << :FCALL if value.flag?(CallData::CALL_FCALL)
28
+ flag_names << :VCALL if value.flag?(CallData::CALL_VCALL)
29
+ flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE)
30
+ flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ)
31
+ flag_names << :KWARG if value.flag?(CallData::CALL_KWARG)
32
+ flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT)
33
+ flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL)
34
+ flag_names << :SUPER if value.flag?(CallData::CALL_SUPER)
35
+ flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER)
36
+ flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND)
37
+ flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT)
38
+
39
+ parts = []
40
+ parts << "mid:#{value.method}" if value.method
41
+ parts << "argc:#{value.argc}"
42
+ parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg
43
+ parts << flag_names.join("|") if flag_names.any?
44
+
45
+ "<calldata!#{parts.join(", ")}>"
46
+ end
47
+
48
+ def enqueue(iseq)
49
+ queue << iseq
50
+ end
51
+
52
+ def event(name)
53
+ case name
54
+ when :RUBY_EVENT_B_CALL
55
+ "Bc"
56
+ when :RUBY_EVENT_B_RETURN
57
+ "Br"
58
+ when :RUBY_EVENT_CALL
59
+ "Ca"
60
+ when :RUBY_EVENT_CLASS
61
+ "Cl"
62
+ when :RUBY_EVENT_END
63
+ "En"
64
+ when :RUBY_EVENT_LINE
65
+ "Li"
66
+ when :RUBY_EVENT_RETURN
67
+ "Re"
68
+ else
69
+ raise "Unknown event: #{name}"
70
+ end
71
+ end
72
+
73
+ def inline_storage(cache)
74
+ "<is:#{cache}>"
75
+ end
76
+
77
+ def instruction(name, operands = [])
78
+ operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")]
79
+ end
80
+
81
+ def label(value)
82
+ value.name["label_".length..]
83
+ end
84
+
85
+ def local(index, explicit: nil, implicit: nil)
86
+ current = current_iseq
87
+ (explicit || implicit).times { current = current.parent_iseq }
88
+
89
+ value = "#{current.local_table.name_at(index)}@#{index}"
90
+ value << ", #{explicit}" if explicit
91
+ value
92
+ end
93
+
94
+ def object(value)
95
+ value.inspect
96
+ end
97
+
98
+ ########################################################################
99
+ # Main entrypoint
100
+ ########################################################################
101
+
102
+ def format!
103
+ while (@current_iseq = queue.shift)
104
+ output << "\n" if output.pos > 0
105
+ format_iseq(@current_iseq)
106
+ end
107
+
108
+ output.string
109
+ end
110
+
111
+ private
112
+
113
+ def format_iseq(iseq)
114
+ output << "#{current_prefix}== disasm: "
115
+ output << "#<ISeq:#{iseq.name}@<compiled>:1 "
116
+
117
+ location = iseq.location
118
+ output << "(#{location.start_line},#{location.start_column})-"
119
+ output << "(#{location.end_line},#{location.end_column})"
120
+ output << "> "
121
+
122
+ if iseq.catch_table.any?
123
+ output << "(catch: TRUE)\n"
124
+ output << "#{current_prefix}== catch table\n"
125
+
126
+ with_prefix("#{current_prefix}| ") do
127
+ iseq.catch_table.each do |entry|
128
+ case entry
129
+ when InstructionSequence::CatchBreak
130
+ output << "#{current_prefix}catch type: break\n"
131
+ format_iseq(entry.iseq)
132
+ when InstructionSequence::CatchNext
133
+ output << "#{current_prefix}catch type: next\n"
134
+ when InstructionSequence::CatchRedo
135
+ output << "#{current_prefix}catch type: redo\n"
136
+ when InstructionSequence::CatchRescue
137
+ output << "#{current_prefix}catch type: rescue\n"
138
+ format_iseq(entry.iseq)
139
+ end
140
+ end
141
+ end
142
+
143
+ output << "#{current_prefix}|#{"-" * 72}\n"
144
+ else
145
+ output << "(catch: FALSE)\n"
146
+ end
147
+
148
+ if (local_table = iseq.local_table) && !local_table.empty?
149
+ output << "#{current_prefix}local table (size: #{local_table.size})\n"
150
+
151
+ locals =
152
+ local_table.locals.each_with_index.map do |local, index|
153
+ "[%2d] %s@%d" % [local_table.offset(index), local.name, index]
154
+ end
155
+
156
+ output << "#{current_prefix}#{locals.join(" ")}\n"
157
+ end
158
+
159
+ length = 0
160
+ events = []
161
+ lines = []
162
+
163
+ iseq.insns.each do |insn|
164
+ case insn
165
+ when Integer
166
+ lines << insn
167
+ when Symbol
168
+ events << event(insn)
169
+ when InstructionSequence::Label
170
+ # skip
171
+ else
172
+ output << "#{current_prefix}%04d " % length
173
+
174
+ disasm = insn.disasm(self)
175
+ output << disasm
176
+
177
+ if lines.any?
178
+ output << " " * (65 - disasm.length) if disasm.length < 65
179
+ elsif events.any?
180
+ output << " " * (39 - disasm.length) if disasm.length < 39
181
+ end
182
+
183
+ if lines.any?
184
+ output << "(%4d)" % lines.last
185
+ lines.clear
186
+ end
187
+
188
+ if events.any?
189
+ output << "[#{events.join}]"
190
+ events.clear
191
+ end
192
+
193
+ output << "\n"
194
+ length += insn.length
195
+ end
196
+ end
197
+ end
198
+
199
+ def with_prefix(value)
200
+ previous = @current_prefix
201
+
202
+ begin
203
+ @current_prefix = value
204
+ yield
205
+ ensure
206
+ @current_prefix = previous
207
+ end
208
+ end
209
+ end
210
+ end
211
+ end