syntax_tree 5.0.0 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,254 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module YARV
5
+ # This class is responsible for taking a compiled instruction sequence and
6
+ # walking through it to generate equivalent Ruby code.
7
+ class Decompiler
8
+ # When we're decompiling, we use a looped case statement to emulate
9
+ # jumping around in the same way the virtual machine would. This class
10
+ # provides convenience methods for generating the AST nodes that have to
11
+ # do with that label.
12
+ class BlockLabel
13
+ include DSL
14
+ attr_reader :name
15
+
16
+ def initialize(name)
17
+ @name = name
18
+ end
19
+
20
+ def field
21
+ VarField(Ident(name))
22
+ end
23
+
24
+ def ref
25
+ VarRef(Ident(name))
26
+ end
27
+ end
28
+
29
+ include DSL
30
+ attr_reader :iseq, :block_label
31
+
32
+ def initialize(iseq)
33
+ @iseq = iseq
34
+ @block_label = BlockLabel.new("__block_label")
35
+ end
36
+
37
+ def to_ruby
38
+ Program(decompile(iseq))
39
+ end
40
+
41
+ private
42
+
43
+ def node_for(value)
44
+ case value
45
+ when Integer
46
+ Int(value.to_s)
47
+ when Symbol
48
+ SymbolLiteral(Ident(value.to_s))
49
+ end
50
+ end
51
+
52
+ def decompile(iseq)
53
+ label = :label_0
54
+ clauses = {}
55
+ clause = []
56
+
57
+ iseq.insns.each do |insn|
58
+ case insn
59
+ when InstructionSequence::Label
60
+ unless clause.last.is_a?(Next)
61
+ clause << Assign(block_label.field, node_for(insn.name))
62
+ end
63
+
64
+ clauses[label] = clause
65
+ clause = []
66
+ label = insn.name
67
+ when BranchUnless
68
+ body = [
69
+ Assign(block_label.field, node_for(insn.label.name)),
70
+ Next(Args([]))
71
+ ]
72
+
73
+ clause << IfNode(clause.pop, Statements(body), nil)
74
+ when Dup
75
+ clause << clause.last
76
+ when DupHash
77
+ assocs =
78
+ insn.object.map do |key, value|
79
+ Assoc(node_for(key), node_for(value))
80
+ end
81
+
82
+ clause << HashLiteral(LBrace("{"), assocs)
83
+ when GetGlobal
84
+ clause << VarRef(GVar(insn.name.to_s))
85
+ when GetLocalWC0
86
+ local = iseq.local_table.locals[insn.index]
87
+ clause << VarRef(Ident(local.name.to_s))
88
+ when Jump
89
+ clause << Assign(block_label.field, node_for(insn.label.name))
90
+ clause << Next(Args([]))
91
+ when Leave
92
+ value = Args([clause.pop])
93
+ clause << (iseq.type == :top ? Break(value) : ReturnNode(value))
94
+ when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT,
95
+ OptMinus, OptMod, OptMult, OptOr, OptPlus
96
+ left, right = clause.pop(2)
97
+ clause << Binary(left, insn.calldata.method, right)
98
+ when OptAref
99
+ collection, arg = clause.pop(2)
100
+ clause << ARef(collection, Args([arg]))
101
+ when OptAset
102
+ collection, arg, value = clause.pop(3)
103
+
104
+ clause << if value.is_a?(Binary) && value.left.is_a?(ARef) &&
105
+ collection === value.left.collection &&
106
+ arg === value.left.index.parts[0]
107
+ OpAssign(
108
+ ARefField(collection, Args([arg])),
109
+ Op("#{value.operator}="),
110
+ value.right
111
+ )
112
+ else
113
+ Assign(ARefField(collection, Args([arg])), value)
114
+ end
115
+ when OptNEq
116
+ left, right = clause.pop(2)
117
+ clause << Binary(left, :"!=", right)
118
+ when OptSendWithoutBlock
119
+ method = insn.calldata.method.to_s
120
+ argc = insn.calldata.argc
121
+
122
+ if insn.calldata.flag?(CallData::CALL_FCALL)
123
+ if argc == 0
124
+ clause.pop
125
+ clause << CallNode(nil, nil, Ident(method), Args([]))
126
+ elsif argc == 1 && method.end_with?("=")
127
+ _receiver, argument = clause.pop(2)
128
+ clause << Assign(
129
+ CallNode(nil, nil, Ident(method[0..-2]), nil),
130
+ argument
131
+ )
132
+ else
133
+ _receiver, *arguments = clause.pop(argc + 1)
134
+ clause << CallNode(
135
+ nil,
136
+ nil,
137
+ Ident(method),
138
+ ArgParen(Args(arguments))
139
+ )
140
+ end
141
+ else
142
+ if argc == 0
143
+ clause << CallNode(clause.pop, Period("."), Ident(method), nil)
144
+ elsif argc == 1 && method.end_with?("=")
145
+ receiver, argument = clause.pop(2)
146
+ clause << Assign(
147
+ CallNode(receiver, Period("."), Ident(method[0..-2]), nil),
148
+ argument
149
+ )
150
+ else
151
+ receiver, *arguments = clause.pop(argc + 1)
152
+ clause << CallNode(
153
+ receiver,
154
+ Period("."),
155
+ Ident(method),
156
+ ArgParen(Args(arguments))
157
+ )
158
+ end
159
+ end
160
+ when PutObject
161
+ case insn.object
162
+ when Float
163
+ clause << FloatLiteral(insn.object.inspect)
164
+ when Integer
165
+ clause << Int(insn.object.inspect)
166
+ else
167
+ raise "Unknown object type: #{insn.object.class.name}"
168
+ end
169
+ when PutObjectInt2Fix0
170
+ clause << Int("0")
171
+ when PutObjectInt2Fix1
172
+ clause << Int("1")
173
+ when PutSelf
174
+ clause << VarRef(Kw("self"))
175
+ when SetGlobal
176
+ target = GVar(insn.name.to_s)
177
+ value = clause.pop
178
+
179
+ clause << if value.is_a?(Binary) && VarRef(target) === value.left
180
+ OpAssign(VarField(target), Op("#{value.operator}="), value.right)
181
+ else
182
+ Assign(VarField(target), value)
183
+ end
184
+ when SetLocalWC0
185
+ target = Ident(local_name(insn.index, 0))
186
+ value = clause.pop
187
+
188
+ clause << if value.is_a?(Binary) && VarRef(target) === value.left
189
+ OpAssign(VarField(target), Op("#{value.operator}="), value.right)
190
+ else
191
+ Assign(VarField(target), value)
192
+ end
193
+ else
194
+ raise "Unknown instruction #{insn}"
195
+ end
196
+ end
197
+
198
+ # If there's only one clause, then we don't need a case statement, and
199
+ # we can just disassemble the first clause.
200
+ clauses[label] = clause
201
+ return Statements(clauses.values.first) if clauses.size == 1
202
+
203
+ # Here we're going to build up a big case statement that will handle all
204
+ # of the different labels.
205
+ current = nil
206
+ clauses.reverse_each do |current_label, current_clause|
207
+ current =
208
+ When(
209
+ Args([node_for(current_label)]),
210
+ Statements(current_clause),
211
+ current
212
+ )
213
+ end
214
+ switch = Case(Kw("case"), block_label.ref, current)
215
+
216
+ # Here we're going to make sure that any locals that were established in
217
+ # the label_0 block are initialized so that scoping rules work
218
+ # correctly.
219
+ stack = []
220
+ locals = [block_label.name]
221
+
222
+ clauses[:label_0].each do |node|
223
+ if node.is_a?(Assign) && node.target.is_a?(VarField) &&
224
+ node.target.value.is_a?(Ident)
225
+ value = node.target.value.value
226
+ next if locals.include?(value)
227
+
228
+ stack << Assign(node.target, VarRef(Kw("nil")))
229
+ locals << value
230
+ end
231
+ end
232
+
233
+ # Finally, we'll set up the initial label and loop the entire case
234
+ # statement.
235
+ stack << Assign(block_label.field, node_for(:label_0))
236
+ stack << MethodAddBlock(
237
+ CallNode(nil, nil, Ident("loop"), Args([])),
238
+ BlockNode(
239
+ Kw("do"),
240
+ nil,
241
+ BodyStmt(Statements([switch]), nil, nil, nil, nil)
242
+ )
243
+ )
244
+ Statements(stack)
245
+ end
246
+
247
+ def local_name(index, level)
248
+ current = iseq
249
+ level.times { current = current.parent_iseq }
250
+ current.local_table.locals[index].name.to_s
251
+ end
252
+ end
253
+ end
254
+ end
@@ -0,0 +1,211 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module YARV
5
+ class Disassembler
6
+ attr_reader :output, :queue
7
+ attr_reader :current_prefix, :current_iseq
8
+
9
+ def initialize
10
+ @output = StringIO.new
11
+ @queue = []
12
+
13
+ @current_prefix = ""
14
+ @current_iseq = nil
15
+ end
16
+
17
+ ########################################################################
18
+ # Helpers for various instructions
19
+ ########################################################################
20
+
21
+ def calldata(value)
22
+ flag_names = []
23
+ flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT)
24
+ if value.flag?(CallData::CALL_ARGS_BLOCKARG)
25
+ flag_names << :ARGS_BLOCKARG
26
+ end
27
+ flag_names << :FCALL if value.flag?(CallData::CALL_FCALL)
28
+ flag_names << :VCALL if value.flag?(CallData::CALL_VCALL)
29
+ flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE)
30
+ flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ)
31
+ flag_names << :KWARG if value.flag?(CallData::CALL_KWARG)
32
+ flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT)
33
+ flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL)
34
+ flag_names << :SUPER if value.flag?(CallData::CALL_SUPER)
35
+ flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER)
36
+ flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND)
37
+ flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT)
38
+
39
+ parts = []
40
+ parts << "mid:#{value.method}" if value.method
41
+ parts << "argc:#{value.argc}"
42
+ parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg
43
+ parts << flag_names.join("|") if flag_names.any?
44
+
45
+ "<calldata!#{parts.join(", ")}>"
46
+ end
47
+
48
+ def enqueue(iseq)
49
+ queue << iseq
50
+ end
51
+
52
+ def event(name)
53
+ case name
54
+ when :RUBY_EVENT_B_CALL
55
+ "Bc"
56
+ when :RUBY_EVENT_B_RETURN
57
+ "Br"
58
+ when :RUBY_EVENT_CALL
59
+ "Ca"
60
+ when :RUBY_EVENT_CLASS
61
+ "Cl"
62
+ when :RUBY_EVENT_END
63
+ "En"
64
+ when :RUBY_EVENT_LINE
65
+ "Li"
66
+ when :RUBY_EVENT_RETURN
67
+ "Re"
68
+ else
69
+ raise "Unknown event: #{name}"
70
+ end
71
+ end
72
+
73
+ def inline_storage(cache)
74
+ "<is:#{cache}>"
75
+ end
76
+
77
+ def instruction(name, operands = [])
78
+ operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")]
79
+ end
80
+
81
+ def label(value)
82
+ value.name["label_".length..]
83
+ end
84
+
85
+ def local(index, explicit: nil, implicit: nil)
86
+ current = current_iseq
87
+ (explicit || implicit).times { current = current.parent_iseq }
88
+
89
+ value = "#{current.local_table.name_at(index)}@#{index}"
90
+ value << ", #{explicit}" if explicit
91
+ value
92
+ end
93
+
94
+ def object(value)
95
+ value.inspect
96
+ end
97
+
98
+ ########################################################################
99
+ # Main entrypoint
100
+ ########################################################################
101
+
102
+ def format!
103
+ while (@current_iseq = queue.shift)
104
+ output << "\n" if output.pos > 0
105
+ format_iseq(@current_iseq)
106
+ end
107
+
108
+ output.string
109
+ end
110
+
111
+ private
112
+
113
+ def format_iseq(iseq)
114
+ output << "#{current_prefix}== disasm: "
115
+ output << "#<ISeq:#{iseq.name}@<compiled>:1 "
116
+
117
+ location = iseq.location
118
+ output << "(#{location.start_line},#{location.start_column})-"
119
+ output << "(#{location.end_line},#{location.end_column})"
120
+ output << "> "
121
+
122
+ if iseq.catch_table.any?
123
+ output << "(catch: TRUE)\n"
124
+ output << "#{current_prefix}== catch table\n"
125
+
126
+ with_prefix("#{current_prefix}| ") do
127
+ iseq.catch_table.each do |entry|
128
+ case entry
129
+ when InstructionSequence::CatchBreak
130
+ output << "#{current_prefix}catch type: break\n"
131
+ format_iseq(entry.iseq)
132
+ when InstructionSequence::CatchNext
133
+ output << "#{current_prefix}catch type: next\n"
134
+ when InstructionSequence::CatchRedo
135
+ output << "#{current_prefix}catch type: redo\n"
136
+ when InstructionSequence::CatchRescue
137
+ output << "#{current_prefix}catch type: rescue\n"
138
+ format_iseq(entry.iseq)
139
+ end
140
+ end
141
+ end
142
+
143
+ output << "#{current_prefix}|#{"-" * 72}\n"
144
+ else
145
+ output << "(catch: FALSE)\n"
146
+ end
147
+
148
+ if (local_table = iseq.local_table) && !local_table.empty?
149
+ output << "#{current_prefix}local table (size: #{local_table.size})\n"
150
+
151
+ locals =
152
+ local_table.locals.each_with_index.map do |local, index|
153
+ "[%2d] %s@%d" % [local_table.offset(index), local.name, index]
154
+ end
155
+
156
+ output << "#{current_prefix}#{locals.join(" ")}\n"
157
+ end
158
+
159
+ length = 0
160
+ events = []
161
+ lines = []
162
+
163
+ iseq.insns.each do |insn|
164
+ case insn
165
+ when Integer
166
+ lines << insn
167
+ when Symbol
168
+ events << event(insn)
169
+ when InstructionSequence::Label
170
+ # skip
171
+ else
172
+ output << "#{current_prefix}%04d " % length
173
+
174
+ disasm = insn.disasm(self)
175
+ output << disasm
176
+
177
+ if lines.any?
178
+ output << " " * (65 - disasm.length) if disasm.length < 65
179
+ elsif events.any?
180
+ output << " " * (39 - disasm.length) if disasm.length < 39
181
+ end
182
+
183
+ if lines.any?
184
+ output << "(%4d)" % lines.last
185
+ lines.clear
186
+ end
187
+
188
+ if events.any?
189
+ output << "[#{events.join}]"
190
+ events.clear
191
+ end
192
+
193
+ output << "\n"
194
+ length += insn.length
195
+ end
196
+ end
197
+ end
198
+
199
+ def with_prefix(value)
200
+ previous = @current_prefix
201
+
202
+ begin
203
+ @current_prefix = value
204
+ yield
205
+ ensure
206
+ @current_prefix = previous
207
+ end
208
+ end
209
+ end
210
+ end
211
+ end