syntax_tree 5.3.0 → 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +12 -1
  3. data/CHANGELOG.md +64 -1
  4. data/Gemfile.lock +2 -2
  5. data/README.md +28 -9
  6. data/Rakefile +12 -8
  7. data/bin/console +1 -0
  8. data/bin/whitequark +79 -0
  9. data/doc/changing_structure.md +16 -0
  10. data/lib/syntax_tree/basic_visitor.rb +44 -5
  11. data/lib/syntax_tree/cli.rb +2 -2
  12. data/lib/syntax_tree/dsl.rb +23 -11
  13. data/lib/syntax_tree/{visitor/field_visitor.rb → field_visitor.rb} +54 -55
  14. data/lib/syntax_tree/formatter.rb +1 -1
  15. data/lib/syntax_tree/index.rb +56 -54
  16. data/lib/syntax_tree/json_visitor.rb +55 -0
  17. data/lib/syntax_tree/language_server.rb +157 -2
  18. data/lib/syntax_tree/match_visitor.rb +120 -0
  19. data/lib/syntax_tree/mermaid.rb +177 -0
  20. data/lib/syntax_tree/mermaid_visitor.rb +69 -0
  21. data/lib/syntax_tree/{visitor/mutation_visitor.rb → mutation_visitor.rb} +27 -27
  22. data/lib/syntax_tree/node.rb +198 -107
  23. data/lib/syntax_tree/parser.rb +322 -118
  24. data/lib/syntax_tree/pretty_print_visitor.rb +83 -0
  25. data/lib/syntax_tree/reflection.rb +241 -0
  26. data/lib/syntax_tree/translation/parser.rb +3019 -0
  27. data/lib/syntax_tree/translation/rubocop_ast.rb +21 -0
  28. data/lib/syntax_tree/translation.rb +28 -0
  29. data/lib/syntax_tree/version.rb +1 -1
  30. data/lib/syntax_tree/with_scope.rb +244 -0
  31. data/lib/syntax_tree/yarv/basic_block.rb +53 -0
  32. data/lib/syntax_tree/yarv/calldata.rb +91 -0
  33. data/lib/syntax_tree/yarv/compiler.rb +110 -100
  34. data/lib/syntax_tree/yarv/control_flow_graph.rb +257 -0
  35. data/lib/syntax_tree/yarv/data_flow_graph.rb +338 -0
  36. data/lib/syntax_tree/yarv/decompiler.rb +1 -1
  37. data/lib/syntax_tree/yarv/disassembler.rb +104 -80
  38. data/lib/syntax_tree/yarv/instruction_sequence.rb +43 -18
  39. data/lib/syntax_tree/yarv/instructions.rb +203 -649
  40. data/lib/syntax_tree/yarv/legacy.rb +12 -24
  41. data/lib/syntax_tree/yarv/sea_of_nodes.rb +534 -0
  42. data/lib/syntax_tree/yarv.rb +18 -0
  43. data/lib/syntax_tree.rb +88 -56
  44. data/tasks/sorbet.rake +277 -0
  45. data/tasks/whitequark.rake +87 -0
  46. metadata +23 -11
  47. data/.gitmodules +0 -9
  48. data/lib/syntax_tree/language_server/inlay_hints.rb +0 -159
  49. data/lib/syntax_tree/visitor/environment.rb +0 -84
  50. data/lib/syntax_tree/visitor/json_visitor.rb +0 -55
  51. data/lib/syntax_tree/visitor/match_visitor.rb +0 -122
  52. data/lib/syntax_tree/visitor/pretty_print_visitor.rb +0 -85
  53. data/lib/syntax_tree/visitor/with_environment.rb +0 -140
@@ -3,16 +3,52 @@
3
3
  module SyntaxTree
4
4
  module YARV
5
5
  class Disassembler
6
+ # This class is another object that handles disassembling a YARV
7
+ # instruction sequence but it renders it without any of the extra spacing
8
+ # or alignment.
9
+ class Squished
10
+ def calldata(value)
11
+ value.inspect
12
+ end
13
+
14
+ def enqueue(iseq)
15
+ end
16
+
17
+ def event(name)
18
+ end
19
+
20
+ def inline_storage(cache)
21
+ "<is:#{cache}>"
22
+ end
23
+
24
+ def instruction(name, operands = [])
25
+ operands.empty? ? name : "#{name} #{operands.join(", ")}"
26
+ end
27
+
28
+ def label(value)
29
+ "%04d" % value.name["label_".length..]
30
+ end
31
+
32
+ def local(index, **)
33
+ index.inspect
34
+ end
35
+
36
+ def object(value)
37
+ value.inspect
38
+ end
39
+ end
40
+
6
41
  attr_reader :output, :queue
42
+
7
43
  attr_reader :current_prefix
8
44
  attr_accessor :current_iseq
9
45
 
10
- def initialize
46
+ def initialize(current_iseq = nil)
11
47
  @output = StringIO.new
12
48
  @queue = []
13
49
 
14
50
  @current_prefix = ""
15
- @current_iseq = nil
51
+ @current_iseq = current_iseq
16
52
  end
17
53
 
18
54
  ########################################################################
@@ -20,30 +56,7 @@ module SyntaxTree
20
56
  ########################################################################
21
57
 
22
58
  def calldata(value)
23
- flag_names = []
24
- flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT)
25
- if value.flag?(CallData::CALL_ARGS_BLOCKARG)
26
- flag_names << :ARGS_BLOCKARG
27
- end
28
- flag_names << :FCALL if value.flag?(CallData::CALL_FCALL)
29
- flag_names << :VCALL if value.flag?(CallData::CALL_VCALL)
30
- flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE)
31
- flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ)
32
- flag_names << :KWARG if value.flag?(CallData::CALL_KWARG)
33
- flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT)
34
- flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL)
35
- flag_names << :SUPER if value.flag?(CallData::CALL_SUPER)
36
- flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER)
37
- flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND)
38
- flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT)
39
-
40
- parts = []
41
- parts << "mid:#{value.method}" if value.method
42
- parts << "argc:#{value.argc}"
43
- parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg
44
- parts << flag_names.join("|") if flag_names.any?
45
-
46
- "<calldata!#{parts.join(", ")}>"
59
+ value.inspect
47
60
  end
48
61
 
49
62
  def enqueue(iseq)
@@ -97,7 +110,7 @@ module SyntaxTree
97
110
  end
98
111
 
99
112
  ########################################################################
100
- # Main entrypoint
113
+ # Entrypoints
101
114
  ########################################################################
102
115
 
103
116
  def format!
@@ -105,63 +118,13 @@ module SyntaxTree
105
118
  output << "\n" if output.pos > 0
106
119
  format_iseq(@current_iseq)
107
120
  end
108
-
109
- output.string
110
121
  end
111
122
 
112
- private
113
-
114
- def format_iseq(iseq)
115
- output << "#{current_prefix}== disasm: "
116
- output << "#<ISeq:#{iseq.name}@<compiled>:1 "
117
-
118
- location = Location.fixed(line: iseq.line, char: 0, column: 0)
119
- output << "(#{location.start_line},#{location.start_column})-"
120
- output << "(#{location.end_line},#{location.end_column})"
121
- output << "> "
122
-
123
- if iseq.catch_table.any?
124
- output << "(catch: TRUE)\n"
125
- output << "#{current_prefix}== catch table\n"
126
-
127
- with_prefix("#{current_prefix}| ") do
128
- iseq.catch_table.each do |entry|
129
- case entry
130
- when InstructionSequence::CatchBreak
131
- output << "#{current_prefix}catch type: break\n"
132
- format_iseq(entry.iseq)
133
- when InstructionSequence::CatchNext
134
- output << "#{current_prefix}catch type: next\n"
135
- when InstructionSequence::CatchRedo
136
- output << "#{current_prefix}catch type: redo\n"
137
- when InstructionSequence::CatchRescue
138
- output << "#{current_prefix}catch type: rescue\n"
139
- format_iseq(entry.iseq)
140
- end
141
- end
142
- end
143
-
144
- output << "#{current_prefix}|#{"-" * 72}\n"
145
- else
146
- output << "(catch: FALSE)\n"
147
- end
148
-
149
- if (local_table = iseq.local_table) && !local_table.empty?
150
- output << "#{current_prefix}local table (size: #{local_table.size})\n"
151
-
152
- locals =
153
- local_table.locals.each_with_index.map do |local, index|
154
- "[%2d] %s@%d" % [local_table.offset(index), local.name, index]
155
- end
156
-
157
- output << "#{current_prefix}#{locals.join(" ")}\n"
158
- end
159
-
160
- length = 0
123
+ def format_insns!(insns, length = 0)
161
124
  events = []
162
125
  lines = []
163
126
 
164
- iseq.insns.each do |insn|
127
+ insns.each do |insn|
165
128
  case insn
166
129
  when Integer
167
130
  lines << insn
@@ -191,22 +154,83 @@ module SyntaxTree
191
154
  events.clear
192
155
  end
193
156
 
157
+ # A hook here to allow for custom formatting of instructions after
158
+ # the main body has been processed.
159
+ yield insn, length if block_given?
160
+
194
161
  output << "\n"
195
162
  length += insn.length
196
163
  end
197
164
  end
198
165
  end
199
166
 
167
+ def print(string)
168
+ output.print(string)
169
+ end
170
+
171
+ def puts(string)
172
+ output.puts(string)
173
+ end
174
+
175
+ def string
176
+ output.string
177
+ end
178
+
200
179
  def with_prefix(value)
201
180
  previous = @current_prefix
202
181
 
203
182
  begin
204
183
  @current_prefix = value
205
- yield
184
+ yield value
206
185
  ensure
207
186
  @current_prefix = previous
208
187
  end
209
188
  end
189
+
190
+ private
191
+
192
+ def format_iseq(iseq)
193
+ output << "#{current_prefix}== disasm: #{iseq.inspect} "
194
+
195
+ if iseq.catch_table.any?
196
+ output << "(catch: TRUE)\n"
197
+ output << "#{current_prefix}== catch table\n"
198
+
199
+ with_prefix("#{current_prefix}| ") do
200
+ iseq.catch_table.each do |entry|
201
+ case entry
202
+ when InstructionSequence::CatchBreak
203
+ output << "#{current_prefix}catch type: break\n"
204
+ format_iseq(entry.iseq)
205
+ when InstructionSequence::CatchNext
206
+ output << "#{current_prefix}catch type: next\n"
207
+ when InstructionSequence::CatchRedo
208
+ output << "#{current_prefix}catch type: redo\n"
209
+ when InstructionSequence::CatchRescue
210
+ output << "#{current_prefix}catch type: rescue\n"
211
+ format_iseq(entry.iseq)
212
+ end
213
+ end
214
+ end
215
+
216
+ output << "#{current_prefix}|#{"-" * 72}\n"
217
+ else
218
+ output << "(catch: FALSE)\n"
219
+ end
220
+
221
+ if (local_table = iseq.local_table) && !local_table.empty?
222
+ output << "#{current_prefix}local table (size: #{local_table.size})\n"
223
+
224
+ locals =
225
+ local_table.locals.each_with_index.map do |local, index|
226
+ "[%2d] %s@%d" % [local_table.offset(index), local.name, index]
227
+ end
228
+
229
+ output << "#{current_prefix}#{locals.join(" ")}\n"
230
+ end
231
+
232
+ format_insns!(iseq.insns)
233
+ end
210
234
  end
211
235
  end
212
236
  end
@@ -7,6 +7,28 @@ module SyntaxTree
7
7
  # list of instructions along with the metadata pertaining to them. It also
8
8
  # functions as a builder for the instruction sequence.
9
9
  class InstructionSequence
10
+ # This provides a handle to the rb_iseq_load function, which allows you
11
+ # to pass a serialized iseq to Ruby and have it return a
12
+ # RubyVM::InstructionSequence object.
13
+ def self.iseq_load(iseq)
14
+ require "fiddle"
15
+
16
+ @iseq_load_function ||=
17
+ Fiddle::Function.new(
18
+ Fiddle::Handle::DEFAULT["rb_iseq_load"],
19
+ [Fiddle::TYPE_VOIDP] * 3,
20
+ Fiddle::TYPE_VOIDP
21
+ )
22
+
23
+ Fiddle.dlunwrap(@iseq_load_function.call(Fiddle.dlwrap(iseq), 0, nil))
24
+ rescue LoadError
25
+ raise "Could not load the Fiddle library"
26
+ rescue NameError
27
+ raise "Unable to find rb_iseq_load"
28
+ rescue Fiddle::DLError
29
+ raise "Unable to perform a dynamic load"
30
+ end
31
+
10
32
  # When the list of instructions is first being created, it's stored as a
11
33
  # linked list. This is to make it easier to perform peephole optimizations
12
34
  # and other transformations like instruction specialization.
@@ -60,19 +82,6 @@ module SyntaxTree
60
82
 
61
83
  MAGIC = "YARVInstructionSequence/SimpleDataFormat"
62
84
 
63
- # This provides a handle to the rb_iseq_load function, which allows you to
64
- # pass a serialized iseq to Ruby and have it return a
65
- # RubyVM::InstructionSequence object.
66
- ISEQ_LOAD =
67
- begin
68
- Fiddle::Function.new(
69
- Fiddle::Handle::DEFAULT["rb_iseq_load"],
70
- [Fiddle::TYPE_VOIDP] * 3,
71
- Fiddle::TYPE_VOIDP
72
- )
73
- rescue NameError, Fiddle::DLError
74
- end
75
-
76
85
  # This object is used to track the size of the stack at any given time. It
77
86
  # is effectively a mini symbolic interpreter. It's necessary because when
78
87
  # instruction sequences get serialized they include a :stack_max field on
@@ -221,8 +230,7 @@ module SyntaxTree
221
230
  end
222
231
 
223
232
  def eval
224
- raise "Unsupported platform" if ISEQ_LOAD.nil?
225
- Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(to_a), 0, nil)).eval
233
+ InstructionSequence.iseq_load(to_a).eval
226
234
  end
227
235
 
228
236
  def to_a
@@ -269,10 +277,27 @@ module SyntaxTree
269
277
  ]
270
278
  end
271
279
 
280
+ def to_cfg
281
+ ControlFlowGraph.compile(self)
282
+ end
283
+
284
+ def to_dfg
285
+ to_cfg.to_dfg
286
+ end
287
+
288
+ def to_son
289
+ to_dfg.to_son
290
+ end
291
+
272
292
  def disasm
273
- disassembler = Disassembler.new
274
- disassembler.enqueue(self)
275
- disassembler.format!
293
+ fmt = Disassembler.new
294
+ fmt.enqueue(self)
295
+ fmt.format!
296
+ fmt.string
297
+ end
298
+
299
+ def inspect
300
+ "#<ISeq:#{name}@<compiled>:1 (#{line},0)-(#{line},0)>"
276
301
  end
277
302
 
278
303
  # This method converts our linked list of instructions into a final array