syntax_tree 5.3.0 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +12 -1
- data/CHANGELOG.md +64 -1
- data/Gemfile.lock +2 -2
- data/README.md +28 -9
- data/Rakefile +12 -8
- data/bin/console +1 -0
- data/bin/whitequark +79 -0
- data/doc/changing_structure.md +16 -0
- data/lib/syntax_tree/basic_visitor.rb +44 -5
- data/lib/syntax_tree/cli.rb +2 -2
- data/lib/syntax_tree/dsl.rb +23 -11
- data/lib/syntax_tree/{visitor/field_visitor.rb → field_visitor.rb} +54 -55
- data/lib/syntax_tree/formatter.rb +1 -1
- data/lib/syntax_tree/index.rb +56 -54
- data/lib/syntax_tree/json_visitor.rb +55 -0
- data/lib/syntax_tree/language_server.rb +157 -2
- data/lib/syntax_tree/match_visitor.rb +120 -0
- data/lib/syntax_tree/mermaid.rb +177 -0
- data/lib/syntax_tree/mermaid_visitor.rb +69 -0
- data/lib/syntax_tree/{visitor/mutation_visitor.rb → mutation_visitor.rb} +27 -27
- data/lib/syntax_tree/node.rb +198 -107
- data/lib/syntax_tree/parser.rb +322 -118
- data/lib/syntax_tree/pretty_print_visitor.rb +83 -0
- data/lib/syntax_tree/reflection.rb +241 -0
- data/lib/syntax_tree/translation/parser.rb +3019 -0
- data/lib/syntax_tree/translation/rubocop_ast.rb +21 -0
- data/lib/syntax_tree/translation.rb +28 -0
- data/lib/syntax_tree/version.rb +1 -1
- data/lib/syntax_tree/with_scope.rb +244 -0
- data/lib/syntax_tree/yarv/basic_block.rb +53 -0
- data/lib/syntax_tree/yarv/calldata.rb +91 -0
- data/lib/syntax_tree/yarv/compiler.rb +110 -100
- data/lib/syntax_tree/yarv/control_flow_graph.rb +257 -0
- data/lib/syntax_tree/yarv/data_flow_graph.rb +338 -0
- data/lib/syntax_tree/yarv/decompiler.rb +1 -1
- data/lib/syntax_tree/yarv/disassembler.rb +104 -80
- data/lib/syntax_tree/yarv/instruction_sequence.rb +43 -18
- data/lib/syntax_tree/yarv/instructions.rb +203 -649
- data/lib/syntax_tree/yarv/legacy.rb +12 -24
- data/lib/syntax_tree/yarv/sea_of_nodes.rb +534 -0
- data/lib/syntax_tree/yarv.rb +18 -0
- data/lib/syntax_tree.rb +88 -56
- data/tasks/sorbet.rake +277 -0
- data/tasks/whitequark.rake +87 -0
- metadata +23 -11
- data/.gitmodules +0 -9
- data/lib/syntax_tree/language_server/inlay_hints.rb +0 -159
- data/lib/syntax_tree/visitor/environment.rb +0 -84
- data/lib/syntax_tree/visitor/json_visitor.rb +0 -55
- data/lib/syntax_tree/visitor/match_visitor.rb +0 -122
- data/lib/syntax_tree/visitor/pretty_print_visitor.rb +0 -85
- data/lib/syntax_tree/visitor/with_environment.rb +0 -140
@@ -3,16 +3,52 @@
|
|
3
3
|
module SyntaxTree
|
4
4
|
module YARV
|
5
5
|
class Disassembler
|
6
|
+
# This class is another object that handles disassembling a YARV
|
7
|
+
# instruction sequence but it renders it without any of the extra spacing
|
8
|
+
# or alignment.
|
9
|
+
class Squished
|
10
|
+
def calldata(value)
|
11
|
+
value.inspect
|
12
|
+
end
|
13
|
+
|
14
|
+
def enqueue(iseq)
|
15
|
+
end
|
16
|
+
|
17
|
+
def event(name)
|
18
|
+
end
|
19
|
+
|
20
|
+
def inline_storage(cache)
|
21
|
+
"<is:#{cache}>"
|
22
|
+
end
|
23
|
+
|
24
|
+
def instruction(name, operands = [])
|
25
|
+
operands.empty? ? name : "#{name} #{operands.join(", ")}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def label(value)
|
29
|
+
"%04d" % value.name["label_".length..]
|
30
|
+
end
|
31
|
+
|
32
|
+
def local(index, **)
|
33
|
+
index.inspect
|
34
|
+
end
|
35
|
+
|
36
|
+
def object(value)
|
37
|
+
value.inspect
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
6
41
|
attr_reader :output, :queue
|
42
|
+
|
7
43
|
attr_reader :current_prefix
|
8
44
|
attr_accessor :current_iseq
|
9
45
|
|
10
|
-
def initialize
|
46
|
+
def initialize(current_iseq = nil)
|
11
47
|
@output = StringIO.new
|
12
48
|
@queue = []
|
13
49
|
|
14
50
|
@current_prefix = ""
|
15
|
-
@current_iseq =
|
51
|
+
@current_iseq = current_iseq
|
16
52
|
end
|
17
53
|
|
18
54
|
########################################################################
|
@@ -20,30 +56,7 @@ module SyntaxTree
|
|
20
56
|
########################################################################
|
21
57
|
|
22
58
|
def calldata(value)
|
23
|
-
|
24
|
-
flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT)
|
25
|
-
if value.flag?(CallData::CALL_ARGS_BLOCKARG)
|
26
|
-
flag_names << :ARGS_BLOCKARG
|
27
|
-
end
|
28
|
-
flag_names << :FCALL if value.flag?(CallData::CALL_FCALL)
|
29
|
-
flag_names << :VCALL if value.flag?(CallData::CALL_VCALL)
|
30
|
-
flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE)
|
31
|
-
flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ)
|
32
|
-
flag_names << :KWARG if value.flag?(CallData::CALL_KWARG)
|
33
|
-
flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT)
|
34
|
-
flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL)
|
35
|
-
flag_names << :SUPER if value.flag?(CallData::CALL_SUPER)
|
36
|
-
flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER)
|
37
|
-
flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND)
|
38
|
-
flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT)
|
39
|
-
|
40
|
-
parts = []
|
41
|
-
parts << "mid:#{value.method}" if value.method
|
42
|
-
parts << "argc:#{value.argc}"
|
43
|
-
parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg
|
44
|
-
parts << flag_names.join("|") if flag_names.any?
|
45
|
-
|
46
|
-
"<calldata!#{parts.join(", ")}>"
|
59
|
+
value.inspect
|
47
60
|
end
|
48
61
|
|
49
62
|
def enqueue(iseq)
|
@@ -97,7 +110,7 @@ module SyntaxTree
|
|
97
110
|
end
|
98
111
|
|
99
112
|
########################################################################
|
100
|
-
#
|
113
|
+
# Entrypoints
|
101
114
|
########################################################################
|
102
115
|
|
103
116
|
def format!
|
@@ -105,63 +118,13 @@ module SyntaxTree
|
|
105
118
|
output << "\n" if output.pos > 0
|
106
119
|
format_iseq(@current_iseq)
|
107
120
|
end
|
108
|
-
|
109
|
-
output.string
|
110
121
|
end
|
111
122
|
|
112
|
-
|
113
|
-
|
114
|
-
def format_iseq(iseq)
|
115
|
-
output << "#{current_prefix}== disasm: "
|
116
|
-
output << "#<ISeq:#{iseq.name}@<compiled>:1 "
|
117
|
-
|
118
|
-
location = Location.fixed(line: iseq.line, char: 0, column: 0)
|
119
|
-
output << "(#{location.start_line},#{location.start_column})-"
|
120
|
-
output << "(#{location.end_line},#{location.end_column})"
|
121
|
-
output << "> "
|
122
|
-
|
123
|
-
if iseq.catch_table.any?
|
124
|
-
output << "(catch: TRUE)\n"
|
125
|
-
output << "#{current_prefix}== catch table\n"
|
126
|
-
|
127
|
-
with_prefix("#{current_prefix}| ") do
|
128
|
-
iseq.catch_table.each do |entry|
|
129
|
-
case entry
|
130
|
-
when InstructionSequence::CatchBreak
|
131
|
-
output << "#{current_prefix}catch type: break\n"
|
132
|
-
format_iseq(entry.iseq)
|
133
|
-
when InstructionSequence::CatchNext
|
134
|
-
output << "#{current_prefix}catch type: next\n"
|
135
|
-
when InstructionSequence::CatchRedo
|
136
|
-
output << "#{current_prefix}catch type: redo\n"
|
137
|
-
when InstructionSequence::CatchRescue
|
138
|
-
output << "#{current_prefix}catch type: rescue\n"
|
139
|
-
format_iseq(entry.iseq)
|
140
|
-
end
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
output << "#{current_prefix}|#{"-" * 72}\n"
|
145
|
-
else
|
146
|
-
output << "(catch: FALSE)\n"
|
147
|
-
end
|
148
|
-
|
149
|
-
if (local_table = iseq.local_table) && !local_table.empty?
|
150
|
-
output << "#{current_prefix}local table (size: #{local_table.size})\n"
|
151
|
-
|
152
|
-
locals =
|
153
|
-
local_table.locals.each_with_index.map do |local, index|
|
154
|
-
"[%2d] %s@%d" % [local_table.offset(index), local.name, index]
|
155
|
-
end
|
156
|
-
|
157
|
-
output << "#{current_prefix}#{locals.join(" ")}\n"
|
158
|
-
end
|
159
|
-
|
160
|
-
length = 0
|
123
|
+
def format_insns!(insns, length = 0)
|
161
124
|
events = []
|
162
125
|
lines = []
|
163
126
|
|
164
|
-
|
127
|
+
insns.each do |insn|
|
165
128
|
case insn
|
166
129
|
when Integer
|
167
130
|
lines << insn
|
@@ -191,22 +154,83 @@ module SyntaxTree
|
|
191
154
|
events.clear
|
192
155
|
end
|
193
156
|
|
157
|
+
# A hook here to allow for custom formatting of instructions after
|
158
|
+
# the main body has been processed.
|
159
|
+
yield insn, length if block_given?
|
160
|
+
|
194
161
|
output << "\n"
|
195
162
|
length += insn.length
|
196
163
|
end
|
197
164
|
end
|
198
165
|
end
|
199
166
|
|
167
|
+
def print(string)
|
168
|
+
output.print(string)
|
169
|
+
end
|
170
|
+
|
171
|
+
def puts(string)
|
172
|
+
output.puts(string)
|
173
|
+
end
|
174
|
+
|
175
|
+
def string
|
176
|
+
output.string
|
177
|
+
end
|
178
|
+
|
200
179
|
def with_prefix(value)
|
201
180
|
previous = @current_prefix
|
202
181
|
|
203
182
|
begin
|
204
183
|
@current_prefix = value
|
205
|
-
yield
|
184
|
+
yield value
|
206
185
|
ensure
|
207
186
|
@current_prefix = previous
|
208
187
|
end
|
209
188
|
end
|
189
|
+
|
190
|
+
private
|
191
|
+
|
192
|
+
def format_iseq(iseq)
|
193
|
+
output << "#{current_prefix}== disasm: #{iseq.inspect} "
|
194
|
+
|
195
|
+
if iseq.catch_table.any?
|
196
|
+
output << "(catch: TRUE)\n"
|
197
|
+
output << "#{current_prefix}== catch table\n"
|
198
|
+
|
199
|
+
with_prefix("#{current_prefix}| ") do
|
200
|
+
iseq.catch_table.each do |entry|
|
201
|
+
case entry
|
202
|
+
when InstructionSequence::CatchBreak
|
203
|
+
output << "#{current_prefix}catch type: break\n"
|
204
|
+
format_iseq(entry.iseq)
|
205
|
+
when InstructionSequence::CatchNext
|
206
|
+
output << "#{current_prefix}catch type: next\n"
|
207
|
+
when InstructionSequence::CatchRedo
|
208
|
+
output << "#{current_prefix}catch type: redo\n"
|
209
|
+
when InstructionSequence::CatchRescue
|
210
|
+
output << "#{current_prefix}catch type: rescue\n"
|
211
|
+
format_iseq(entry.iseq)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
output << "#{current_prefix}|#{"-" * 72}\n"
|
217
|
+
else
|
218
|
+
output << "(catch: FALSE)\n"
|
219
|
+
end
|
220
|
+
|
221
|
+
if (local_table = iseq.local_table) && !local_table.empty?
|
222
|
+
output << "#{current_prefix}local table (size: #{local_table.size})\n"
|
223
|
+
|
224
|
+
locals =
|
225
|
+
local_table.locals.each_with_index.map do |local, index|
|
226
|
+
"[%2d] %s@%d" % [local_table.offset(index), local.name, index]
|
227
|
+
end
|
228
|
+
|
229
|
+
output << "#{current_prefix}#{locals.join(" ")}\n"
|
230
|
+
end
|
231
|
+
|
232
|
+
format_insns!(iseq.insns)
|
233
|
+
end
|
210
234
|
end
|
211
235
|
end
|
212
236
|
end
|
@@ -7,6 +7,28 @@ module SyntaxTree
|
|
7
7
|
# list of instructions along with the metadata pertaining to them. It also
|
8
8
|
# functions as a builder for the instruction sequence.
|
9
9
|
class InstructionSequence
|
10
|
+
# This provides a handle to the rb_iseq_load function, which allows you
|
11
|
+
# to pass a serialized iseq to Ruby and have it return a
|
12
|
+
# RubyVM::InstructionSequence object.
|
13
|
+
def self.iseq_load(iseq)
|
14
|
+
require "fiddle"
|
15
|
+
|
16
|
+
@iseq_load_function ||=
|
17
|
+
Fiddle::Function.new(
|
18
|
+
Fiddle::Handle::DEFAULT["rb_iseq_load"],
|
19
|
+
[Fiddle::TYPE_VOIDP] * 3,
|
20
|
+
Fiddle::TYPE_VOIDP
|
21
|
+
)
|
22
|
+
|
23
|
+
Fiddle.dlunwrap(@iseq_load_function.call(Fiddle.dlwrap(iseq), 0, nil))
|
24
|
+
rescue LoadError
|
25
|
+
raise "Could not load the Fiddle library"
|
26
|
+
rescue NameError
|
27
|
+
raise "Unable to find rb_iseq_load"
|
28
|
+
rescue Fiddle::DLError
|
29
|
+
raise "Unable to perform a dynamic load"
|
30
|
+
end
|
31
|
+
|
10
32
|
# When the list of instructions is first being created, it's stored as a
|
11
33
|
# linked list. This is to make it easier to perform peephole optimizations
|
12
34
|
# and other transformations like instruction specialization.
|
@@ -60,19 +82,6 @@ module SyntaxTree
|
|
60
82
|
|
61
83
|
MAGIC = "YARVInstructionSequence/SimpleDataFormat"
|
62
84
|
|
63
|
-
# This provides a handle to the rb_iseq_load function, which allows you to
|
64
|
-
# pass a serialized iseq to Ruby and have it return a
|
65
|
-
# RubyVM::InstructionSequence object.
|
66
|
-
ISEQ_LOAD =
|
67
|
-
begin
|
68
|
-
Fiddle::Function.new(
|
69
|
-
Fiddle::Handle::DEFAULT["rb_iseq_load"],
|
70
|
-
[Fiddle::TYPE_VOIDP] * 3,
|
71
|
-
Fiddle::TYPE_VOIDP
|
72
|
-
)
|
73
|
-
rescue NameError, Fiddle::DLError
|
74
|
-
end
|
75
|
-
|
76
85
|
# This object is used to track the size of the stack at any given time. It
|
77
86
|
# is effectively a mini symbolic interpreter. It's necessary because when
|
78
87
|
# instruction sequences get serialized they include a :stack_max field on
|
@@ -221,8 +230,7 @@ module SyntaxTree
|
|
221
230
|
end
|
222
231
|
|
223
232
|
def eval
|
224
|
-
|
225
|
-
Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(to_a), 0, nil)).eval
|
233
|
+
InstructionSequence.iseq_load(to_a).eval
|
226
234
|
end
|
227
235
|
|
228
236
|
def to_a
|
@@ -269,10 +277,27 @@ module SyntaxTree
|
|
269
277
|
]
|
270
278
|
end
|
271
279
|
|
280
|
+
def to_cfg
|
281
|
+
ControlFlowGraph.compile(self)
|
282
|
+
end
|
283
|
+
|
284
|
+
def to_dfg
|
285
|
+
to_cfg.to_dfg
|
286
|
+
end
|
287
|
+
|
288
|
+
def to_son
|
289
|
+
to_dfg.to_son
|
290
|
+
end
|
291
|
+
|
272
292
|
def disasm
|
273
|
-
|
274
|
-
|
275
|
-
|
293
|
+
fmt = Disassembler.new
|
294
|
+
fmt.enqueue(self)
|
295
|
+
fmt.format!
|
296
|
+
fmt.string
|
297
|
+
end
|
298
|
+
|
299
|
+
def inspect
|
300
|
+
"#<ISeq:#{name}@<compiled>:1 (#{line},0)-(#{line},0)>"
|
276
301
|
end
|
277
302
|
|
278
303
|
# This method converts our linked list of instructions into a final array
|