syntax_tree 5.0.1 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,263 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module YARV
5
+ # This class is responsible for taking a compiled instruction sequence and
6
+ # walking through it to generate equivalent Ruby code.
7
+ class Decompiler
8
+ # When we're decompiling, we use a looped case statement to emulate
9
+ # jumping around in the same way the virtual machine would. This class
10
+ # provides convenience methods for generating the AST nodes that have to
11
+ # do with that label.
12
+ class BlockLabel
13
+ include DSL
14
+ attr_reader :name
15
+
16
+ def initialize(name)
17
+ @name = name
18
+ end
19
+
20
+ def field
21
+ VarField(Ident(name))
22
+ end
23
+
24
+ def ref
25
+ VarRef(Ident(name))
26
+ end
27
+ end
28
+
29
+ include DSL
30
+ attr_reader :iseq, :block_label
31
+
32
+ def initialize(iseq)
33
+ @iseq = iseq
34
+ @block_label = BlockLabel.new("__block_label")
35
+ end
36
+
37
+ def to_ruby
38
+ Program(decompile(iseq))
39
+ end
40
+
41
+ private
42
+
43
+ def node_for(value)
44
+ case value
45
+ when Integer
46
+ Int(value.to_s)
47
+ when Symbol
48
+ SymbolLiteral(Ident(value.to_s))
49
+ end
50
+ end
51
+
52
+ def decompile(iseq)
53
+ label = :label_0
54
+ clauses = {}
55
+ clause = []
56
+
57
+ iseq.insns.each do |insn|
58
+ case insn
59
+ when InstructionSequence::Label
60
+ unless clause.last.is_a?(Next)
61
+ clause << Assign(block_label.field, node_for(insn.name))
62
+ end
63
+
64
+ clauses[label] = clause
65
+ clause = []
66
+ label = insn.name
67
+ when BranchIf
68
+ body = [
69
+ Assign(block_label.field, node_for(insn.label.name)),
70
+ Next(Args([]))
71
+ ]
72
+
73
+ clause << UnlessNode(clause.pop, Statements(body), nil)
74
+ when BranchUnless
75
+ body = [
76
+ Assign(block_label.field, node_for(insn.label.name)),
77
+ Next(Args([]))
78
+ ]
79
+
80
+ clause << IfNode(clause.pop, Statements(body), nil)
81
+ when Dup
82
+ clause << clause.last
83
+ when DupHash
84
+ assocs =
85
+ insn.object.map do |key, value|
86
+ Assoc(node_for(key), node_for(value))
87
+ end
88
+
89
+ clause << HashLiteral(LBrace("{"), assocs)
90
+ when GetGlobal
91
+ clause << VarRef(GVar(insn.name.to_s))
92
+ when GetLocalWC0
93
+ local = iseq.local_table.locals[insn.index]
94
+ clause << VarRef(Ident(local.name.to_s))
95
+ when Jump
96
+ clause << Assign(block_label.field, node_for(insn.label.name))
97
+ clause << Next(Args([]))
98
+ when Leave
99
+ value = Args([clause.pop])
100
+ clause << (iseq.type == :top ? Break(value) : ReturnNode(value))
101
+ when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT,
102
+ OptMinus, OptMod, OptMult, OptOr, OptPlus
103
+ left, right = clause.pop(2)
104
+ clause << Binary(left, insn.calldata.method, right)
105
+ when OptAref
106
+ collection, arg = clause.pop(2)
107
+ clause << ARef(collection, Args([arg]))
108
+ when OptAset
109
+ collection, arg, value = clause.pop(3)
110
+
111
+ clause << if value.is_a?(Binary) && value.left.is_a?(ARef) &&
112
+ collection === value.left.collection &&
113
+ arg === value.left.index.parts[0]
114
+ OpAssign(
115
+ ARefField(collection, Args([arg])),
116
+ Op("#{value.operator}="),
117
+ value.right
118
+ )
119
+ else
120
+ Assign(ARefField(collection, Args([arg])), value)
121
+ end
122
+ when OptNEq
123
+ left, right = clause.pop(2)
124
+ clause << Binary(left, :"!=", right)
125
+ when OptSendWithoutBlock
126
+ method = insn.calldata.method.to_s
127
+ argc = insn.calldata.argc
128
+
129
+ if insn.calldata.flag?(CallData::CALL_FCALL)
130
+ if argc == 0
131
+ clause.pop
132
+ clause << CallNode(nil, nil, Ident(method), Args([]))
133
+ elsif argc == 1 && method.end_with?("=")
134
+ _receiver, argument = clause.pop(2)
135
+ clause << Assign(
136
+ CallNode(nil, nil, Ident(method[0..-2]), nil),
137
+ argument
138
+ )
139
+ else
140
+ _receiver, *arguments = clause.pop(argc + 1)
141
+ clause << CallNode(
142
+ nil,
143
+ nil,
144
+ Ident(method),
145
+ ArgParen(Args(arguments))
146
+ )
147
+ end
148
+ else
149
+ if argc == 0
150
+ clause << CallNode(clause.pop, Period("."), Ident(method), nil)
151
+ elsif argc == 1 && method.end_with?("=")
152
+ receiver, argument = clause.pop(2)
153
+ clause << Assign(
154
+ CallNode(receiver, Period("."), Ident(method[0..-2]), nil),
155
+ argument
156
+ )
157
+ else
158
+ receiver, *arguments = clause.pop(argc + 1)
159
+ clause << CallNode(
160
+ receiver,
161
+ Period("."),
162
+ Ident(method),
163
+ ArgParen(Args(arguments))
164
+ )
165
+ end
166
+ end
167
+ when Pop
168
+ # skip
169
+ when PutObject
170
+ case insn.object
171
+ when Float
172
+ clause << FloatLiteral(insn.object.inspect)
173
+ when Integer
174
+ clause << Int(insn.object.inspect)
175
+ else
176
+ raise "Unknown object type: #{insn.object.class.name}"
177
+ end
178
+ when PutObjectInt2Fix0
179
+ clause << Int("0")
180
+ when PutObjectInt2Fix1
181
+ clause << Int("1")
182
+ when PutSelf
183
+ clause << VarRef(Kw("self"))
184
+ when SetGlobal
185
+ target = GVar(insn.name.to_s)
186
+ value = clause.pop
187
+
188
+ clause << if value.is_a?(Binary) && VarRef(target) === value.left
189
+ OpAssign(VarField(target), Op("#{value.operator}="), value.right)
190
+ else
191
+ Assign(VarField(target), value)
192
+ end
193
+ when SetLocalWC0
194
+ target = Ident(local_name(insn.index, 0))
195
+ value = clause.pop
196
+
197
+ clause << if value.is_a?(Binary) && VarRef(target) === value.left
198
+ OpAssign(VarField(target), Op("#{value.operator}="), value.right)
199
+ else
200
+ Assign(VarField(target), value)
201
+ end
202
+ else
203
+ raise "Unknown instruction #{insn}"
204
+ end
205
+ end
206
+
207
+ # If there's only one clause, then we don't need a case statement, and
208
+ # we can just disassemble the first clause.
209
+ clauses[label] = clause
210
+ return Statements(clauses.values.first) if clauses.size == 1
211
+
212
+ # Here we're going to build up a big case statement that will handle all
213
+ # of the different labels.
214
+ current = nil
215
+ clauses.reverse_each do |current_label, current_clause|
216
+ current =
217
+ When(
218
+ Args([node_for(current_label)]),
219
+ Statements(current_clause),
220
+ current
221
+ )
222
+ end
223
+ switch = Case(Kw("case"), block_label.ref, current)
224
+
225
+ # Here we're going to make sure that any locals that were established in
226
+ # the label_0 block are initialized so that scoping rules work
227
+ # correctly.
228
+ stack = []
229
+ locals = [block_label.name]
230
+
231
+ clauses[:label_0].each do |node|
232
+ if node.is_a?(Assign) && node.target.is_a?(VarField) &&
233
+ node.target.value.is_a?(Ident)
234
+ value = node.target.value.value
235
+ next if locals.include?(value)
236
+
237
+ stack << Assign(node.target, VarRef(Kw("nil")))
238
+ locals << value
239
+ end
240
+ end
241
+
242
+ # Finally, we'll set up the initial label and loop the entire case
243
+ # statement.
244
+ stack << Assign(block_label.field, node_for(:label_0))
245
+ stack << MethodAddBlock(
246
+ CallNode(nil, nil, Ident("loop"), Args([])),
247
+ BlockNode(
248
+ Kw("do"),
249
+ nil,
250
+ BodyStmt(Statements([switch]), nil, nil, nil, nil)
251
+ )
252
+ )
253
+ Statements(stack)
254
+ end
255
+
256
+ def local_name(index, level)
257
+ current = iseq
258
+ level.times { current = current.parent_iseq }
259
+ current.local_table.locals[index].name.to_s
260
+ end
261
+ end
262
+ end
263
+ end
@@ -0,0 +1,212 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module YARV
5
+ class Disassembler
6
+ attr_reader :output, :queue
7
+ attr_reader :current_prefix
8
+ attr_accessor :current_iseq
9
+
10
+ def initialize
11
+ @output = StringIO.new
12
+ @queue = []
13
+
14
+ @current_prefix = ""
15
+ @current_iseq = nil
16
+ end
17
+
18
+ ########################################################################
19
+ # Helpers for various instructions
20
+ ########################################################################
21
+
22
+ def calldata(value)
23
+ flag_names = []
24
+ flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT)
25
+ if value.flag?(CallData::CALL_ARGS_BLOCKARG)
26
+ flag_names << :ARGS_BLOCKARG
27
+ end
28
+ flag_names << :FCALL if value.flag?(CallData::CALL_FCALL)
29
+ flag_names << :VCALL if value.flag?(CallData::CALL_VCALL)
30
+ flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE)
31
+ flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ)
32
+ flag_names << :KWARG if value.flag?(CallData::CALL_KWARG)
33
+ flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT)
34
+ flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL)
35
+ flag_names << :SUPER if value.flag?(CallData::CALL_SUPER)
36
+ flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER)
37
+ flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND)
38
+ flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT)
39
+
40
+ parts = []
41
+ parts << "mid:#{value.method}" if value.method
42
+ parts << "argc:#{value.argc}"
43
+ parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg
44
+ parts << flag_names.join("|") if flag_names.any?
45
+
46
+ "<calldata!#{parts.join(", ")}>"
47
+ end
48
+
49
+ def enqueue(iseq)
50
+ queue << iseq
51
+ end
52
+
53
+ def event(name)
54
+ case name
55
+ when :RUBY_EVENT_B_CALL
56
+ "Bc"
57
+ when :RUBY_EVENT_B_RETURN
58
+ "Br"
59
+ when :RUBY_EVENT_CALL
60
+ "Ca"
61
+ when :RUBY_EVENT_CLASS
62
+ "Cl"
63
+ when :RUBY_EVENT_END
64
+ "En"
65
+ when :RUBY_EVENT_LINE
66
+ "Li"
67
+ when :RUBY_EVENT_RETURN
68
+ "Re"
69
+ else
70
+ raise "Unknown event: #{name}"
71
+ end
72
+ end
73
+
74
+ def inline_storage(cache)
75
+ "<is:#{cache}>"
76
+ end
77
+
78
+ def instruction(name, operands = [])
79
+ operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")]
80
+ end
81
+
82
+ def label(value)
83
+ value.name["label_".length..]
84
+ end
85
+
86
+ def local(index, explicit: nil, implicit: nil)
87
+ current = current_iseq
88
+ (explicit || implicit).times { current = current.parent_iseq }
89
+
90
+ value = "#{current.local_table.name_at(index)}@#{index}"
91
+ value << ", #{explicit}" if explicit
92
+ value
93
+ end
94
+
95
+ def object(value)
96
+ value.inspect
97
+ end
98
+
99
+ ########################################################################
100
+ # Main entrypoint
101
+ ########################################################################
102
+
103
+ def format!
104
+ while (@current_iseq = queue.shift)
105
+ output << "\n" if output.pos > 0
106
+ format_iseq(@current_iseq)
107
+ end
108
+
109
+ output.string
110
+ end
111
+
112
+ private
113
+
114
+ def format_iseq(iseq)
115
+ output << "#{current_prefix}== disasm: "
116
+ output << "#<ISeq:#{iseq.name}@<compiled>:1 "
117
+
118
+ location = Location.fixed(line: iseq.line, char: 0, column: 0)
119
+ output << "(#{location.start_line},#{location.start_column})-"
120
+ output << "(#{location.end_line},#{location.end_column})"
121
+ output << "> "
122
+
123
+ if iseq.catch_table.any?
124
+ output << "(catch: TRUE)\n"
125
+ output << "#{current_prefix}== catch table\n"
126
+
127
+ with_prefix("#{current_prefix}| ") do
128
+ iseq.catch_table.each do |entry|
129
+ case entry
130
+ when InstructionSequence::CatchBreak
131
+ output << "#{current_prefix}catch type: break\n"
132
+ format_iseq(entry.iseq)
133
+ when InstructionSequence::CatchNext
134
+ output << "#{current_prefix}catch type: next\n"
135
+ when InstructionSequence::CatchRedo
136
+ output << "#{current_prefix}catch type: redo\n"
137
+ when InstructionSequence::CatchRescue
138
+ output << "#{current_prefix}catch type: rescue\n"
139
+ format_iseq(entry.iseq)
140
+ end
141
+ end
142
+ end
143
+
144
+ output << "#{current_prefix}|#{"-" * 72}\n"
145
+ else
146
+ output << "(catch: FALSE)\n"
147
+ end
148
+
149
+ if (local_table = iseq.local_table) && !local_table.empty?
150
+ output << "#{current_prefix}local table (size: #{local_table.size})\n"
151
+
152
+ locals =
153
+ local_table.locals.each_with_index.map do |local, index|
154
+ "[%2d] %s@%d" % [local_table.offset(index), local.name, index]
155
+ end
156
+
157
+ output << "#{current_prefix}#{locals.join(" ")}\n"
158
+ end
159
+
160
+ length = 0
161
+ events = []
162
+ lines = []
163
+
164
+ iseq.insns.each do |insn|
165
+ case insn
166
+ when Integer
167
+ lines << insn
168
+ when Symbol
169
+ events << event(insn)
170
+ when InstructionSequence::Label
171
+ # skip
172
+ else
173
+ output << "#{current_prefix}%04d " % length
174
+
175
+ disasm = insn.disasm(self)
176
+ output << disasm
177
+
178
+ if lines.any?
179
+ output << " " * (65 - disasm.length) if disasm.length < 65
180
+ elsif events.any?
181
+ output << " " * (39 - disasm.length) if disasm.length < 39
182
+ end
183
+
184
+ if lines.any?
185
+ output << "(%4d)" % lines.last
186
+ lines.clear
187
+ end
188
+
189
+ if events.any?
190
+ output << "[#{events.join}]"
191
+ events.clear
192
+ end
193
+
194
+ output << "\n"
195
+ length += insn.length
196
+ end
197
+ end
198
+ end
199
+
200
+ def with_prefix(value)
201
+ previous = @current_prefix
202
+
203
+ begin
204
+ @current_prefix = value
205
+ yield
206
+ ensure
207
+ @current_prefix = previous
208
+ end
209
+ end
210
+ end
211
+ end
212
+ end