syntax_tree 5.3.0 → 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +12 -1
- data/CHANGELOG.md +64 -1
- data/Gemfile.lock +2 -2
- data/README.md +28 -9
- data/Rakefile +12 -8
- data/bin/console +1 -0
- data/bin/whitequark +79 -0
- data/doc/changing_structure.md +16 -0
- data/lib/syntax_tree/basic_visitor.rb +44 -5
- data/lib/syntax_tree/cli.rb +2 -2
- data/lib/syntax_tree/dsl.rb +23 -11
- data/lib/syntax_tree/{visitor/field_visitor.rb → field_visitor.rb} +54 -55
- data/lib/syntax_tree/formatter.rb +1 -1
- data/lib/syntax_tree/index.rb +56 -54
- data/lib/syntax_tree/json_visitor.rb +55 -0
- data/lib/syntax_tree/language_server.rb +157 -2
- data/lib/syntax_tree/match_visitor.rb +120 -0
- data/lib/syntax_tree/mermaid.rb +177 -0
- data/lib/syntax_tree/mermaid_visitor.rb +69 -0
- data/lib/syntax_tree/{visitor/mutation_visitor.rb → mutation_visitor.rb} +27 -27
- data/lib/syntax_tree/node.rb +198 -107
- data/lib/syntax_tree/parser.rb +322 -118
- data/lib/syntax_tree/pretty_print_visitor.rb +83 -0
- data/lib/syntax_tree/reflection.rb +241 -0
- data/lib/syntax_tree/translation/parser.rb +3019 -0
- data/lib/syntax_tree/translation/rubocop_ast.rb +21 -0
- data/lib/syntax_tree/translation.rb +28 -0
- data/lib/syntax_tree/version.rb +1 -1
- data/lib/syntax_tree/with_scope.rb +244 -0
- data/lib/syntax_tree/yarv/basic_block.rb +53 -0
- data/lib/syntax_tree/yarv/calldata.rb +91 -0
- data/lib/syntax_tree/yarv/compiler.rb +110 -100
- data/lib/syntax_tree/yarv/control_flow_graph.rb +257 -0
- data/lib/syntax_tree/yarv/data_flow_graph.rb +338 -0
- data/lib/syntax_tree/yarv/decompiler.rb +1 -1
- data/lib/syntax_tree/yarv/disassembler.rb +104 -80
- data/lib/syntax_tree/yarv/instruction_sequence.rb +43 -18
- data/lib/syntax_tree/yarv/instructions.rb +203 -649
- data/lib/syntax_tree/yarv/legacy.rb +12 -24
- data/lib/syntax_tree/yarv/sea_of_nodes.rb +534 -0
- data/lib/syntax_tree/yarv.rb +18 -0
- data/lib/syntax_tree.rb +88 -56
- data/tasks/sorbet.rake +277 -0
- data/tasks/whitequark.rake +87 -0
- metadata +23 -11
- data/.gitmodules +0 -9
- data/lib/syntax_tree/language_server/inlay_hints.rb +0 -159
- data/lib/syntax_tree/visitor/environment.rb +0 -84
- data/lib/syntax_tree/visitor/json_visitor.rb +0 -55
- data/lib/syntax_tree/visitor/match_visitor.rb +0 -122
- data/lib/syntax_tree/visitor/pretty_print_visitor.rb +0 -85
- data/lib/syntax_tree/visitor/with_environment.rb +0 -140
@@ -3,16 +3,52 @@
|
|
3
3
|
module SyntaxTree
|
4
4
|
module YARV
|
5
5
|
class Disassembler
|
6
|
+
# This class is another object that handles disassembling a YARV
|
7
|
+
# instruction sequence but it renders it without any of the extra spacing
|
8
|
+
# or alignment.
|
9
|
+
class Squished
|
10
|
+
def calldata(value)
|
11
|
+
value.inspect
|
12
|
+
end
|
13
|
+
|
14
|
+
def enqueue(iseq)
|
15
|
+
end
|
16
|
+
|
17
|
+
def event(name)
|
18
|
+
end
|
19
|
+
|
20
|
+
def inline_storage(cache)
|
21
|
+
"<is:#{cache}>"
|
22
|
+
end
|
23
|
+
|
24
|
+
def instruction(name, operands = [])
|
25
|
+
operands.empty? ? name : "#{name} #{operands.join(", ")}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def label(value)
|
29
|
+
"%04d" % value.name["label_".length..]
|
30
|
+
end
|
31
|
+
|
32
|
+
def local(index, **)
|
33
|
+
index.inspect
|
34
|
+
end
|
35
|
+
|
36
|
+
def object(value)
|
37
|
+
value.inspect
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
6
41
|
attr_reader :output, :queue
|
42
|
+
|
7
43
|
attr_reader :current_prefix
|
8
44
|
attr_accessor :current_iseq
|
9
45
|
|
10
|
-
def initialize
|
46
|
+
def initialize(current_iseq = nil)
|
11
47
|
@output = StringIO.new
|
12
48
|
@queue = []
|
13
49
|
|
14
50
|
@current_prefix = ""
|
15
|
-
@current_iseq =
|
51
|
+
@current_iseq = current_iseq
|
16
52
|
end
|
17
53
|
|
18
54
|
########################################################################
|
@@ -20,30 +56,7 @@ module SyntaxTree
|
|
20
56
|
########################################################################
|
21
57
|
|
22
58
|
def calldata(value)
|
23
|
-
|
24
|
-
flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT)
|
25
|
-
if value.flag?(CallData::CALL_ARGS_BLOCKARG)
|
26
|
-
flag_names << :ARGS_BLOCKARG
|
27
|
-
end
|
28
|
-
flag_names << :FCALL if value.flag?(CallData::CALL_FCALL)
|
29
|
-
flag_names << :VCALL if value.flag?(CallData::CALL_VCALL)
|
30
|
-
flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE)
|
31
|
-
flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ)
|
32
|
-
flag_names << :KWARG if value.flag?(CallData::CALL_KWARG)
|
33
|
-
flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT)
|
34
|
-
flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL)
|
35
|
-
flag_names << :SUPER if value.flag?(CallData::CALL_SUPER)
|
36
|
-
flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER)
|
37
|
-
flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND)
|
38
|
-
flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT)
|
39
|
-
|
40
|
-
parts = []
|
41
|
-
parts << "mid:#{value.method}" if value.method
|
42
|
-
parts << "argc:#{value.argc}"
|
43
|
-
parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg
|
44
|
-
parts << flag_names.join("|") if flag_names.any?
|
45
|
-
|
46
|
-
"<calldata!#{parts.join(", ")}>"
|
59
|
+
value.inspect
|
47
60
|
end
|
48
61
|
|
49
62
|
def enqueue(iseq)
|
@@ -97,7 +110,7 @@ module SyntaxTree
|
|
97
110
|
end
|
98
111
|
|
99
112
|
########################################################################
|
100
|
-
#
|
113
|
+
# Entrypoints
|
101
114
|
########################################################################
|
102
115
|
|
103
116
|
def format!
|
@@ -105,63 +118,13 @@ module SyntaxTree
|
|
105
118
|
output << "\n" if output.pos > 0
|
106
119
|
format_iseq(@current_iseq)
|
107
120
|
end
|
108
|
-
|
109
|
-
output.string
|
110
121
|
end
|
111
122
|
|
112
|
-
|
113
|
-
|
114
|
-
def format_iseq(iseq)
|
115
|
-
output << "#{current_prefix}== disasm: "
|
116
|
-
output << "#<ISeq:#{iseq.name}@<compiled>:1 "
|
117
|
-
|
118
|
-
location = Location.fixed(line: iseq.line, char: 0, column: 0)
|
119
|
-
output << "(#{location.start_line},#{location.start_column})-"
|
120
|
-
output << "(#{location.end_line},#{location.end_column})"
|
121
|
-
output << "> "
|
122
|
-
|
123
|
-
if iseq.catch_table.any?
|
124
|
-
output << "(catch: TRUE)\n"
|
125
|
-
output << "#{current_prefix}== catch table\n"
|
126
|
-
|
127
|
-
with_prefix("#{current_prefix}| ") do
|
128
|
-
iseq.catch_table.each do |entry|
|
129
|
-
case entry
|
130
|
-
when InstructionSequence::CatchBreak
|
131
|
-
output << "#{current_prefix}catch type: break\n"
|
132
|
-
format_iseq(entry.iseq)
|
133
|
-
when InstructionSequence::CatchNext
|
134
|
-
output << "#{current_prefix}catch type: next\n"
|
135
|
-
when InstructionSequence::CatchRedo
|
136
|
-
output << "#{current_prefix}catch type: redo\n"
|
137
|
-
when InstructionSequence::CatchRescue
|
138
|
-
output << "#{current_prefix}catch type: rescue\n"
|
139
|
-
format_iseq(entry.iseq)
|
140
|
-
end
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
output << "#{current_prefix}|#{"-" * 72}\n"
|
145
|
-
else
|
146
|
-
output << "(catch: FALSE)\n"
|
147
|
-
end
|
148
|
-
|
149
|
-
if (local_table = iseq.local_table) && !local_table.empty?
|
150
|
-
output << "#{current_prefix}local table (size: #{local_table.size})\n"
|
151
|
-
|
152
|
-
locals =
|
153
|
-
local_table.locals.each_with_index.map do |local, index|
|
154
|
-
"[%2d] %s@%d" % [local_table.offset(index), local.name, index]
|
155
|
-
end
|
156
|
-
|
157
|
-
output << "#{current_prefix}#{locals.join(" ")}\n"
|
158
|
-
end
|
159
|
-
|
160
|
-
length = 0
|
123
|
+
def format_insns!(insns, length = 0)
|
161
124
|
events = []
|
162
125
|
lines = []
|
163
126
|
|
164
|
-
|
127
|
+
insns.each do |insn|
|
165
128
|
case insn
|
166
129
|
when Integer
|
167
130
|
lines << insn
|
@@ -191,22 +154,83 @@ module SyntaxTree
|
|
191
154
|
events.clear
|
192
155
|
end
|
193
156
|
|
157
|
+
# A hook here to allow for custom formatting of instructions after
|
158
|
+
# the main body has been processed.
|
159
|
+
yield insn, length if block_given?
|
160
|
+
|
194
161
|
output << "\n"
|
195
162
|
length += insn.length
|
196
163
|
end
|
197
164
|
end
|
198
165
|
end
|
199
166
|
|
167
|
+
def print(string)
|
168
|
+
output.print(string)
|
169
|
+
end
|
170
|
+
|
171
|
+
def puts(string)
|
172
|
+
output.puts(string)
|
173
|
+
end
|
174
|
+
|
175
|
+
def string
|
176
|
+
output.string
|
177
|
+
end
|
178
|
+
|
200
179
|
def with_prefix(value)
|
201
180
|
previous = @current_prefix
|
202
181
|
|
203
182
|
begin
|
204
183
|
@current_prefix = value
|
205
|
-
yield
|
184
|
+
yield value
|
206
185
|
ensure
|
207
186
|
@current_prefix = previous
|
208
187
|
end
|
209
188
|
end
|
189
|
+
|
190
|
+
private
|
191
|
+
|
192
|
+
def format_iseq(iseq)
|
193
|
+
output << "#{current_prefix}== disasm: #{iseq.inspect} "
|
194
|
+
|
195
|
+
if iseq.catch_table.any?
|
196
|
+
output << "(catch: TRUE)\n"
|
197
|
+
output << "#{current_prefix}== catch table\n"
|
198
|
+
|
199
|
+
with_prefix("#{current_prefix}| ") do
|
200
|
+
iseq.catch_table.each do |entry|
|
201
|
+
case entry
|
202
|
+
when InstructionSequence::CatchBreak
|
203
|
+
output << "#{current_prefix}catch type: break\n"
|
204
|
+
format_iseq(entry.iseq)
|
205
|
+
when InstructionSequence::CatchNext
|
206
|
+
output << "#{current_prefix}catch type: next\n"
|
207
|
+
when InstructionSequence::CatchRedo
|
208
|
+
output << "#{current_prefix}catch type: redo\n"
|
209
|
+
when InstructionSequence::CatchRescue
|
210
|
+
output << "#{current_prefix}catch type: rescue\n"
|
211
|
+
format_iseq(entry.iseq)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
output << "#{current_prefix}|#{"-" * 72}\n"
|
217
|
+
else
|
218
|
+
output << "(catch: FALSE)\n"
|
219
|
+
end
|
220
|
+
|
221
|
+
if (local_table = iseq.local_table) && !local_table.empty?
|
222
|
+
output << "#{current_prefix}local table (size: #{local_table.size})\n"
|
223
|
+
|
224
|
+
locals =
|
225
|
+
local_table.locals.each_with_index.map do |local, index|
|
226
|
+
"[%2d] %s@%d" % [local_table.offset(index), local.name, index]
|
227
|
+
end
|
228
|
+
|
229
|
+
output << "#{current_prefix}#{locals.join(" ")}\n"
|
230
|
+
end
|
231
|
+
|
232
|
+
format_insns!(iseq.insns)
|
233
|
+
end
|
210
234
|
end
|
211
235
|
end
|
212
236
|
end
|
@@ -7,6 +7,28 @@ module SyntaxTree
|
|
7
7
|
# list of instructions along with the metadata pertaining to them. It also
|
8
8
|
# functions as a builder for the instruction sequence.
|
9
9
|
class InstructionSequence
|
10
|
+
# This provides a handle to the rb_iseq_load function, which allows you
|
11
|
+
# to pass a serialized iseq to Ruby and have it return a
|
12
|
+
# RubyVM::InstructionSequence object.
|
13
|
+
def self.iseq_load(iseq)
|
14
|
+
require "fiddle"
|
15
|
+
|
16
|
+
@iseq_load_function ||=
|
17
|
+
Fiddle::Function.new(
|
18
|
+
Fiddle::Handle::DEFAULT["rb_iseq_load"],
|
19
|
+
[Fiddle::TYPE_VOIDP] * 3,
|
20
|
+
Fiddle::TYPE_VOIDP
|
21
|
+
)
|
22
|
+
|
23
|
+
Fiddle.dlunwrap(@iseq_load_function.call(Fiddle.dlwrap(iseq), 0, nil))
|
24
|
+
rescue LoadError
|
25
|
+
raise "Could not load the Fiddle library"
|
26
|
+
rescue NameError
|
27
|
+
raise "Unable to find rb_iseq_load"
|
28
|
+
rescue Fiddle::DLError
|
29
|
+
raise "Unable to perform a dynamic load"
|
30
|
+
end
|
31
|
+
|
10
32
|
# When the list of instructions is first being created, it's stored as a
|
11
33
|
# linked list. This is to make it easier to perform peephole optimizations
|
12
34
|
# and other transformations like instruction specialization.
|
@@ -60,19 +82,6 @@ module SyntaxTree
|
|
60
82
|
|
61
83
|
MAGIC = "YARVInstructionSequence/SimpleDataFormat"
|
62
84
|
|
63
|
-
# This provides a handle to the rb_iseq_load function, which allows you to
|
64
|
-
# pass a serialized iseq to Ruby and have it return a
|
65
|
-
# RubyVM::InstructionSequence object.
|
66
|
-
ISEQ_LOAD =
|
67
|
-
begin
|
68
|
-
Fiddle::Function.new(
|
69
|
-
Fiddle::Handle::DEFAULT["rb_iseq_load"],
|
70
|
-
[Fiddle::TYPE_VOIDP] * 3,
|
71
|
-
Fiddle::TYPE_VOIDP
|
72
|
-
)
|
73
|
-
rescue NameError, Fiddle::DLError
|
74
|
-
end
|
75
|
-
|
76
85
|
# This object is used to track the size of the stack at any given time. It
|
77
86
|
# is effectively a mini symbolic interpreter. It's necessary because when
|
78
87
|
# instruction sequences get serialized they include a :stack_max field on
|
@@ -221,8 +230,7 @@ module SyntaxTree
|
|
221
230
|
end
|
222
231
|
|
223
232
|
def eval
|
224
|
-
|
225
|
-
Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(to_a), 0, nil)).eval
|
233
|
+
InstructionSequence.iseq_load(to_a).eval
|
226
234
|
end
|
227
235
|
|
228
236
|
def to_a
|
@@ -269,10 +277,27 @@ module SyntaxTree
|
|
269
277
|
]
|
270
278
|
end
|
271
279
|
|
280
|
+
def to_cfg
|
281
|
+
ControlFlowGraph.compile(self)
|
282
|
+
end
|
283
|
+
|
284
|
+
def to_dfg
|
285
|
+
to_cfg.to_dfg
|
286
|
+
end
|
287
|
+
|
288
|
+
def to_son
|
289
|
+
to_dfg.to_son
|
290
|
+
end
|
291
|
+
|
272
292
|
def disasm
|
273
|
-
|
274
|
-
|
275
|
-
|
293
|
+
fmt = Disassembler.new
|
294
|
+
fmt.enqueue(self)
|
295
|
+
fmt.format!
|
296
|
+
fmt.string
|
297
|
+
end
|
298
|
+
|
299
|
+
def inspect
|
300
|
+
"#<ISeq:#{name}@<compiled>:1 (#{line},0)-(#{line},0)>"
|
276
301
|
end
|
277
302
|
|
278
303
|
# This method converts our linked list of instructions into a final array
|