syntax_tree 5.0.1 → 5.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,263 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module YARV
5
+ # This class is responsible for taking a compiled instruction sequence and
6
+ # walking through it to generate equivalent Ruby code.
7
+ class Decompiler
8
+ # When we're decompiling, we use a looped case statement to emulate
9
+ # jumping around in the same way the virtual machine would. This class
10
+ # provides convenience methods for generating the AST nodes that have to
11
+ # do with that label.
12
+ class BlockLabel
13
+ include DSL
14
+ attr_reader :name
15
+
16
+ def initialize(name)
17
+ @name = name
18
+ end
19
+
20
+ def field
21
+ VarField(Ident(name))
22
+ end
23
+
24
+ def ref
25
+ VarRef(Ident(name))
26
+ end
27
+ end
28
+
29
+ include DSL
30
+ attr_reader :iseq, :block_label
31
+
32
+ def initialize(iseq)
33
+ @iseq = iseq
34
+ @block_label = BlockLabel.new("__block_label")
35
+ end
36
+
37
+ def to_ruby
38
+ Program(decompile(iseq))
39
+ end
40
+
41
+ private
42
+
43
+ def node_for(value)
44
+ case value
45
+ when Integer
46
+ Int(value.to_s)
47
+ when Symbol
48
+ SymbolLiteral(Ident(value.to_s))
49
+ end
50
+ end
51
+
52
+ def decompile(iseq)
53
+ label = :label_0
54
+ clauses = {}
55
+ clause = []
56
+
57
+ iseq.insns.each do |insn|
58
+ case insn
59
+ when InstructionSequence::Label
60
+ unless clause.last.is_a?(Next)
61
+ clause << Assign(block_label.field, node_for(insn.name))
62
+ end
63
+
64
+ clauses[label] = clause
65
+ clause = []
66
+ label = insn.name
67
+ when BranchIf
68
+ body = [
69
+ Assign(block_label.field, node_for(insn.label.name)),
70
+ Next(Args([]))
71
+ ]
72
+
73
+ clause << UnlessNode(clause.pop, Statements(body), nil)
74
+ when BranchUnless
75
+ body = [
76
+ Assign(block_label.field, node_for(insn.label.name)),
77
+ Next(Args([]))
78
+ ]
79
+
80
+ clause << IfNode(clause.pop, Statements(body), nil)
81
+ when Dup
82
+ clause << clause.last
83
+ when DupHash
84
+ assocs =
85
+ insn.object.map do |key, value|
86
+ Assoc(node_for(key), node_for(value))
87
+ end
88
+
89
+ clause << HashLiteral(LBrace("{"), assocs)
90
+ when GetGlobal
91
+ clause << VarRef(GVar(insn.name.to_s))
92
+ when GetLocalWC0
93
+ local = iseq.local_table.locals[insn.index]
94
+ clause << VarRef(Ident(local.name.to_s))
95
+ when Jump
96
+ clause << Assign(block_label.field, node_for(insn.label.name))
97
+ clause << Next(Args([]))
98
+ when Leave
99
+ value = Args([clause.pop])
100
+ clause << (iseq.type == :top ? Break(value) : ReturnNode(value))
101
+ when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT,
102
+ OptMinus, OptMod, OptMult, OptOr, OptPlus
103
+ left, right = clause.pop(2)
104
+ clause << Binary(left, insn.calldata.method, right)
105
+ when OptAref
106
+ collection, arg = clause.pop(2)
107
+ clause << ARef(collection, Args([arg]))
108
+ when OptAset
109
+ collection, arg, value = clause.pop(3)
110
+
111
+ clause << if value.is_a?(Binary) && value.left.is_a?(ARef) &&
112
+ collection === value.left.collection &&
113
+ arg === value.left.index.parts[0]
114
+ OpAssign(
115
+ ARefField(collection, Args([arg])),
116
+ Op("#{value.operator}="),
117
+ value.right
118
+ )
119
+ else
120
+ Assign(ARefField(collection, Args([arg])), value)
121
+ end
122
+ when OptNEq
123
+ left, right = clause.pop(2)
124
+ clause << Binary(left, :"!=", right)
125
+ when OptSendWithoutBlock
126
+ method = insn.calldata.method.to_s
127
+ argc = insn.calldata.argc
128
+
129
+ if insn.calldata.flag?(CallData::CALL_FCALL)
130
+ if argc == 0
131
+ clause.pop
132
+ clause << CallNode(nil, nil, Ident(method), Args([]))
133
+ elsif argc == 1 && method.end_with?("=")
134
+ _receiver, argument = clause.pop(2)
135
+ clause << Assign(
136
+ CallNode(nil, nil, Ident(method[0..-2]), nil),
137
+ argument
138
+ )
139
+ else
140
+ _receiver, *arguments = clause.pop(argc + 1)
141
+ clause << CallNode(
142
+ nil,
143
+ nil,
144
+ Ident(method),
145
+ ArgParen(Args(arguments))
146
+ )
147
+ end
148
+ else
149
+ if argc == 0
150
+ clause << CallNode(clause.pop, Period("."), Ident(method), nil)
151
+ elsif argc == 1 && method.end_with?("=")
152
+ receiver, argument = clause.pop(2)
153
+ clause << Assign(
154
+ CallNode(receiver, Period("."), Ident(method[0..-2]), nil),
155
+ argument
156
+ )
157
+ else
158
+ receiver, *arguments = clause.pop(argc + 1)
159
+ clause << CallNode(
160
+ receiver,
161
+ Period("."),
162
+ Ident(method),
163
+ ArgParen(Args(arguments))
164
+ )
165
+ end
166
+ end
167
+ when Pop
168
+ # skip
169
+ when PutObject
170
+ case insn.object
171
+ when Float
172
+ clause << FloatLiteral(insn.object.inspect)
173
+ when Integer
174
+ clause << Int(insn.object.inspect)
175
+ else
176
+ raise "Unknown object type: #{insn.object.class.name}"
177
+ end
178
+ when PutObjectInt2Fix0
179
+ clause << Int("0")
180
+ when PutObjectInt2Fix1
181
+ clause << Int("1")
182
+ when PutSelf
183
+ clause << VarRef(Kw("self"))
184
+ when SetGlobal
185
+ target = GVar(insn.name.to_s)
186
+ value = clause.pop
187
+
188
+ clause << if value.is_a?(Binary) && VarRef(target) === value.left
189
+ OpAssign(VarField(target), Op("#{value.operator}="), value.right)
190
+ else
191
+ Assign(VarField(target), value)
192
+ end
193
+ when SetLocalWC0
194
+ target = Ident(local_name(insn.index, 0))
195
+ value = clause.pop
196
+
197
+ clause << if value.is_a?(Binary) && VarRef(target) === value.left
198
+ OpAssign(VarField(target), Op("#{value.operator}="), value.right)
199
+ else
200
+ Assign(VarField(target), value)
201
+ end
202
+ else
203
+ raise "Unknown instruction #{insn}"
204
+ end
205
+ end
206
+
207
+ # If there's only one clause, then we don't need a case statement, and
208
+ # we can just disassemble the first clause.
209
+ clauses[label] = clause
210
+ return Statements(clauses.values.first) if clauses.size == 1
211
+
212
+ # Here we're going to build up a big case statement that will handle all
213
+ # of the different labels.
214
+ current = nil
215
+ clauses.reverse_each do |current_label, current_clause|
216
+ current =
217
+ When(
218
+ Args([node_for(current_label)]),
219
+ Statements(current_clause),
220
+ current
221
+ )
222
+ end
223
+ switch = Case(Kw("case"), block_label.ref, current)
224
+
225
+ # Here we're going to make sure that any locals that were established in
226
+ # the label_0 block are initialized so that scoping rules work
227
+ # correctly.
228
+ stack = []
229
+ locals = [block_label.name]
230
+
231
+ clauses[:label_0].each do |node|
232
+ if node.is_a?(Assign) && node.target.is_a?(VarField) &&
233
+ node.target.value.is_a?(Ident)
234
+ value = node.target.value.value
235
+ next if locals.include?(value)
236
+
237
+ stack << Assign(node.target, VarRef(Kw("nil")))
238
+ locals << value
239
+ end
240
+ end
241
+
242
+ # Finally, we'll set up the initial label and loop the entire case
243
+ # statement.
244
+ stack << Assign(block_label.field, node_for(:label_0))
245
+ stack << MethodAddBlock(
246
+ CallNode(nil, nil, Ident("loop"), Args([])),
247
+ BlockNode(
248
+ Kw("do"),
249
+ nil,
250
+ BodyStmt(Statements([switch]), nil, nil, nil, nil)
251
+ )
252
+ )
253
+ Statements(stack)
254
+ end
255
+
256
+ def local_name(index, level)
257
+ current = iseq
258
+ level.times { current = current.parent_iseq }
259
+ current.local_table.locals[index].name.to_s
260
+ end
261
+ end
262
+ end
263
+ end
@@ -0,0 +1,212 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module YARV
5
+ class Disassembler
6
+ attr_reader :output, :queue
7
+ attr_reader :current_prefix
8
+ attr_accessor :current_iseq
9
+
10
+ def initialize
11
+ @output = StringIO.new
12
+ @queue = []
13
+
14
+ @current_prefix = ""
15
+ @current_iseq = nil
16
+ end
17
+
18
+ ########################################################################
19
+ # Helpers for various instructions
20
+ ########################################################################
21
+
22
+ def calldata(value)
23
+ flag_names = []
24
+ flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT)
25
+ if value.flag?(CallData::CALL_ARGS_BLOCKARG)
26
+ flag_names << :ARGS_BLOCKARG
27
+ end
28
+ flag_names << :FCALL if value.flag?(CallData::CALL_FCALL)
29
+ flag_names << :VCALL if value.flag?(CallData::CALL_VCALL)
30
+ flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE)
31
+ flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ)
32
+ flag_names << :KWARG if value.flag?(CallData::CALL_KWARG)
33
+ flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT)
34
+ flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL)
35
+ flag_names << :SUPER if value.flag?(CallData::CALL_SUPER)
36
+ flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER)
37
+ flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND)
38
+ flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT)
39
+
40
+ parts = []
41
+ parts << "mid:#{value.method}" if value.method
42
+ parts << "argc:#{value.argc}"
43
+ parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg
44
+ parts << flag_names.join("|") if flag_names.any?
45
+
46
+ "<calldata!#{parts.join(", ")}>"
47
+ end
48
+
49
+ def enqueue(iseq)
50
+ queue << iseq
51
+ end
52
+
53
+ def event(name)
54
+ case name
55
+ when :RUBY_EVENT_B_CALL
56
+ "Bc"
57
+ when :RUBY_EVENT_B_RETURN
58
+ "Br"
59
+ when :RUBY_EVENT_CALL
60
+ "Ca"
61
+ when :RUBY_EVENT_CLASS
62
+ "Cl"
63
+ when :RUBY_EVENT_END
64
+ "En"
65
+ when :RUBY_EVENT_LINE
66
+ "Li"
67
+ when :RUBY_EVENT_RETURN
68
+ "Re"
69
+ else
70
+ raise "Unknown event: #{name}"
71
+ end
72
+ end
73
+
74
+ def inline_storage(cache)
75
+ "<is:#{cache}>"
76
+ end
77
+
78
+ def instruction(name, operands = [])
79
+ operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")]
80
+ end
81
+
82
+ def label(value)
83
+ value.name["label_".length..]
84
+ end
85
+
86
+ def local(index, explicit: nil, implicit: nil)
87
+ current = current_iseq
88
+ (explicit || implicit).times { current = current.parent_iseq }
89
+
90
+ value = "#{current.local_table.name_at(index)}@#{index}"
91
+ value << ", #{explicit}" if explicit
92
+ value
93
+ end
94
+
95
+ def object(value)
96
+ value.inspect
97
+ end
98
+
99
+ ########################################################################
100
+ # Main entrypoint
101
+ ########################################################################
102
+
103
+ def format!
104
+ while (@current_iseq = queue.shift)
105
+ output << "\n" if output.pos > 0
106
+ format_iseq(@current_iseq)
107
+ end
108
+
109
+ output.string
110
+ end
111
+
112
+ private
113
+
114
+ def format_iseq(iseq)
115
+ output << "#{current_prefix}== disasm: "
116
+ output << "#<ISeq:#{iseq.name}@<compiled>:1 "
117
+
118
+ location = Location.fixed(line: iseq.line, char: 0, column: 0)
119
+ output << "(#{location.start_line},#{location.start_column})-"
120
+ output << "(#{location.end_line},#{location.end_column})"
121
+ output << "> "
122
+
123
+ if iseq.catch_table.any?
124
+ output << "(catch: TRUE)\n"
125
+ output << "#{current_prefix}== catch table\n"
126
+
127
+ with_prefix("#{current_prefix}| ") do
128
+ iseq.catch_table.each do |entry|
129
+ case entry
130
+ when InstructionSequence::CatchBreak
131
+ output << "#{current_prefix}catch type: break\n"
132
+ format_iseq(entry.iseq)
133
+ when InstructionSequence::CatchNext
134
+ output << "#{current_prefix}catch type: next\n"
135
+ when InstructionSequence::CatchRedo
136
+ output << "#{current_prefix}catch type: redo\n"
137
+ when InstructionSequence::CatchRescue
138
+ output << "#{current_prefix}catch type: rescue\n"
139
+ format_iseq(entry.iseq)
140
+ end
141
+ end
142
+ end
143
+
144
+ output << "#{current_prefix}|#{"-" * 72}\n"
145
+ else
146
+ output << "(catch: FALSE)\n"
147
+ end
148
+
149
+ if (local_table = iseq.local_table) && !local_table.empty?
150
+ output << "#{current_prefix}local table (size: #{local_table.size})\n"
151
+
152
+ locals =
153
+ local_table.locals.each_with_index.map do |local, index|
154
+ "[%2d] %s@%d" % [local_table.offset(index), local.name, index]
155
+ end
156
+
157
+ output << "#{current_prefix}#{locals.join(" ")}\n"
158
+ end
159
+
160
+ length = 0
161
+ events = []
162
+ lines = []
163
+
164
+ iseq.insns.each do |insn|
165
+ case insn
166
+ when Integer
167
+ lines << insn
168
+ when Symbol
169
+ events << event(insn)
170
+ when InstructionSequence::Label
171
+ # skip
172
+ else
173
+ output << "#{current_prefix}%04d " % length
174
+
175
+ disasm = insn.disasm(self)
176
+ output << disasm
177
+
178
+ if lines.any?
179
+ output << " " * (65 - disasm.length) if disasm.length < 65
180
+ elsif events.any?
181
+ output << " " * (39 - disasm.length) if disasm.length < 39
182
+ end
183
+
184
+ if lines.any?
185
+ output << "(%4d)" % lines.last
186
+ lines.clear
187
+ end
188
+
189
+ if events.any?
190
+ output << "[#{events.join}]"
191
+ events.clear
192
+ end
193
+
194
+ output << "\n"
195
+ length += insn.length
196
+ end
197
+ end
198
+ end
199
+
200
+ def with_prefix(value)
201
+ previous = @current_prefix
202
+
203
+ begin
204
+ @current_prefix = value
205
+ yield
206
+ ensure
207
+ @current_prefix = previous
208
+ end
209
+ end
210
+ end
211
+ end
212
+ end