syntax_tree 5.3.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +12 -1
  3. data/CHANGELOG.md +64 -1
  4. data/Gemfile.lock +2 -2
  5. data/README.md +28 -9
  6. data/Rakefile +12 -8
  7. data/bin/console +1 -0
  8. data/bin/whitequark +79 -0
  9. data/doc/changing_structure.md +16 -0
  10. data/lib/syntax_tree/basic_visitor.rb +44 -5
  11. data/lib/syntax_tree/cli.rb +2 -2
  12. data/lib/syntax_tree/dsl.rb +23 -11
  13. data/lib/syntax_tree/{visitor/field_visitor.rb → field_visitor.rb} +54 -55
  14. data/lib/syntax_tree/formatter.rb +1 -1
  15. data/lib/syntax_tree/index.rb +56 -54
  16. data/lib/syntax_tree/json_visitor.rb +55 -0
  17. data/lib/syntax_tree/language_server.rb +157 -2
  18. data/lib/syntax_tree/match_visitor.rb +120 -0
  19. data/lib/syntax_tree/mermaid.rb +177 -0
  20. data/lib/syntax_tree/mermaid_visitor.rb +69 -0
  21. data/lib/syntax_tree/{visitor/mutation_visitor.rb → mutation_visitor.rb} +27 -27
  22. data/lib/syntax_tree/node.rb +198 -107
  23. data/lib/syntax_tree/parser.rb +322 -118
  24. data/lib/syntax_tree/pretty_print_visitor.rb +83 -0
  25. data/lib/syntax_tree/reflection.rb +241 -0
  26. data/lib/syntax_tree/translation/parser.rb +3019 -0
  27. data/lib/syntax_tree/translation/rubocop_ast.rb +21 -0
  28. data/lib/syntax_tree/translation.rb +28 -0
  29. data/lib/syntax_tree/version.rb +1 -1
  30. data/lib/syntax_tree/with_scope.rb +244 -0
  31. data/lib/syntax_tree/yarv/basic_block.rb +53 -0
  32. data/lib/syntax_tree/yarv/calldata.rb +91 -0
  33. data/lib/syntax_tree/yarv/compiler.rb +110 -100
  34. data/lib/syntax_tree/yarv/control_flow_graph.rb +257 -0
  35. data/lib/syntax_tree/yarv/data_flow_graph.rb +338 -0
  36. data/lib/syntax_tree/yarv/decompiler.rb +1 -1
  37. data/lib/syntax_tree/yarv/disassembler.rb +104 -80
  38. data/lib/syntax_tree/yarv/instruction_sequence.rb +43 -18
  39. data/lib/syntax_tree/yarv/instructions.rb +203 -649
  40. data/lib/syntax_tree/yarv/legacy.rb +12 -24
  41. data/lib/syntax_tree/yarv/sea_of_nodes.rb +534 -0
  42. data/lib/syntax_tree/yarv.rb +18 -0
  43. data/lib/syntax_tree.rb +88 -56
  44. data/tasks/sorbet.rake +277 -0
  45. data/tasks/whitequark.rake +87 -0
  46. metadata +23 -11
  47. data/.gitmodules +0 -9
  48. data/lib/syntax_tree/language_server/inlay_hints.rb +0 -159
  49. data/lib/syntax_tree/visitor/environment.rb +0 -84
  50. data/lib/syntax_tree/visitor/json_visitor.rb +0 -55
  51. data/lib/syntax_tree/visitor/match_visitor.rb +0 -122
  52. data/lib/syntax_tree/visitor/pretty_print_visitor.rb +0 -85
  53. data/lib/syntax_tree/visitor/with_environment.rb +0 -140
@@ -3,16 +3,52 @@
3
3
  module SyntaxTree
4
4
  module YARV
5
5
  class Disassembler
6
+ # This class is another object that handles disassembling a YARV
7
+ # instruction sequence but it renders it without any of the extra spacing
8
+ # or alignment.
9
+ class Squished
10
+ def calldata(value)
11
+ value.inspect
12
+ end
13
+
14
+ def enqueue(iseq)
15
+ end
16
+
17
+ def event(name)
18
+ end
19
+
20
+ def inline_storage(cache)
21
+ "<is:#{cache}>"
22
+ end
23
+
24
+ def instruction(name, operands = [])
25
+ operands.empty? ? name : "#{name} #{operands.join(", ")}"
26
+ end
27
+
28
+ def label(value)
29
+ "%04d" % value.name["label_".length..]
30
+ end
31
+
32
+ def local(index, **)
33
+ index.inspect
34
+ end
35
+
36
+ def object(value)
37
+ value.inspect
38
+ end
39
+ end
40
+
6
41
  attr_reader :output, :queue
42
+
7
43
  attr_reader :current_prefix
8
44
  attr_accessor :current_iseq
9
45
 
10
- def initialize
46
+ def initialize(current_iseq = nil)
11
47
  @output = StringIO.new
12
48
  @queue = []
13
49
 
14
50
  @current_prefix = ""
15
- @current_iseq = nil
51
+ @current_iseq = current_iseq
16
52
  end
17
53
 
18
54
  ########################################################################
@@ -20,30 +56,7 @@ module SyntaxTree
20
56
  ########################################################################
21
57
 
22
58
  def calldata(value)
23
- flag_names = []
24
- flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT)
25
- if value.flag?(CallData::CALL_ARGS_BLOCKARG)
26
- flag_names << :ARGS_BLOCKARG
27
- end
28
- flag_names << :FCALL if value.flag?(CallData::CALL_FCALL)
29
- flag_names << :VCALL if value.flag?(CallData::CALL_VCALL)
30
- flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE)
31
- flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ)
32
- flag_names << :KWARG if value.flag?(CallData::CALL_KWARG)
33
- flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT)
34
- flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL)
35
- flag_names << :SUPER if value.flag?(CallData::CALL_SUPER)
36
- flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER)
37
- flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND)
38
- flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT)
39
-
40
- parts = []
41
- parts << "mid:#{value.method}" if value.method
42
- parts << "argc:#{value.argc}"
43
- parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg
44
- parts << flag_names.join("|") if flag_names.any?
45
-
46
- "<calldata!#{parts.join(", ")}>"
59
+ value.inspect
47
60
  end
48
61
 
49
62
  def enqueue(iseq)
@@ -97,7 +110,7 @@ module SyntaxTree
97
110
  end
98
111
 
99
112
  ########################################################################
100
- # Main entrypoint
113
+ # Entrypoints
101
114
  ########################################################################
102
115
 
103
116
  def format!
@@ -105,63 +118,13 @@ module SyntaxTree
105
118
  output << "\n" if output.pos > 0
106
119
  format_iseq(@current_iseq)
107
120
  end
108
-
109
- output.string
110
121
  end
111
122
 
112
- private
113
-
114
- def format_iseq(iseq)
115
- output << "#{current_prefix}== disasm: "
116
- output << "#<ISeq:#{iseq.name}@<compiled>:1 "
117
-
118
- location = Location.fixed(line: iseq.line, char: 0, column: 0)
119
- output << "(#{location.start_line},#{location.start_column})-"
120
- output << "(#{location.end_line},#{location.end_column})"
121
- output << "> "
122
-
123
- if iseq.catch_table.any?
124
- output << "(catch: TRUE)\n"
125
- output << "#{current_prefix}== catch table\n"
126
-
127
- with_prefix("#{current_prefix}| ") do
128
- iseq.catch_table.each do |entry|
129
- case entry
130
- when InstructionSequence::CatchBreak
131
- output << "#{current_prefix}catch type: break\n"
132
- format_iseq(entry.iseq)
133
- when InstructionSequence::CatchNext
134
- output << "#{current_prefix}catch type: next\n"
135
- when InstructionSequence::CatchRedo
136
- output << "#{current_prefix}catch type: redo\n"
137
- when InstructionSequence::CatchRescue
138
- output << "#{current_prefix}catch type: rescue\n"
139
- format_iseq(entry.iseq)
140
- end
141
- end
142
- end
143
-
144
- output << "#{current_prefix}|#{"-" * 72}\n"
145
- else
146
- output << "(catch: FALSE)\n"
147
- end
148
-
149
- if (local_table = iseq.local_table) && !local_table.empty?
150
- output << "#{current_prefix}local table (size: #{local_table.size})\n"
151
-
152
- locals =
153
- local_table.locals.each_with_index.map do |local, index|
154
- "[%2d] %s@%d" % [local_table.offset(index), local.name, index]
155
- end
156
-
157
- output << "#{current_prefix}#{locals.join(" ")}\n"
158
- end
159
-
160
- length = 0
123
+ def format_insns!(insns, length = 0)
161
124
  events = []
162
125
  lines = []
163
126
 
164
- iseq.insns.each do |insn|
127
+ insns.each do |insn|
165
128
  case insn
166
129
  when Integer
167
130
  lines << insn
@@ -191,22 +154,83 @@ module SyntaxTree
191
154
  events.clear
192
155
  end
193
156
 
157
+ # A hook here to allow for custom formatting of instructions after
158
+ # the main body has been processed.
159
+ yield insn, length if block_given?
160
+
194
161
  output << "\n"
195
162
  length += insn.length
196
163
  end
197
164
  end
198
165
  end
199
166
 
167
+ def print(string)
168
+ output.print(string)
169
+ end
170
+
171
+ def puts(string)
172
+ output.puts(string)
173
+ end
174
+
175
+ def string
176
+ output.string
177
+ end
178
+
200
179
  def with_prefix(value)
201
180
  previous = @current_prefix
202
181
 
203
182
  begin
204
183
  @current_prefix = value
205
- yield
184
+ yield value
206
185
  ensure
207
186
  @current_prefix = previous
208
187
  end
209
188
  end
189
+
190
+ private
191
+
192
+ def format_iseq(iseq)
193
+ output << "#{current_prefix}== disasm: #{iseq.inspect} "
194
+
195
+ if iseq.catch_table.any?
196
+ output << "(catch: TRUE)\n"
197
+ output << "#{current_prefix}== catch table\n"
198
+
199
+ with_prefix("#{current_prefix}| ") do
200
+ iseq.catch_table.each do |entry|
201
+ case entry
202
+ when InstructionSequence::CatchBreak
203
+ output << "#{current_prefix}catch type: break\n"
204
+ format_iseq(entry.iseq)
205
+ when InstructionSequence::CatchNext
206
+ output << "#{current_prefix}catch type: next\n"
207
+ when InstructionSequence::CatchRedo
208
+ output << "#{current_prefix}catch type: redo\n"
209
+ when InstructionSequence::CatchRescue
210
+ output << "#{current_prefix}catch type: rescue\n"
211
+ format_iseq(entry.iseq)
212
+ end
213
+ end
214
+ end
215
+
216
+ output << "#{current_prefix}|#{"-" * 72}\n"
217
+ else
218
+ output << "(catch: FALSE)\n"
219
+ end
220
+
221
+ if (local_table = iseq.local_table) && !local_table.empty?
222
+ output << "#{current_prefix}local table (size: #{local_table.size})\n"
223
+
224
+ locals =
225
+ local_table.locals.each_with_index.map do |local, index|
226
+ "[%2d] %s@%d" % [local_table.offset(index), local.name, index]
227
+ end
228
+
229
+ output << "#{current_prefix}#{locals.join(" ")}\n"
230
+ end
231
+
232
+ format_insns!(iseq.insns)
233
+ end
210
234
  end
211
235
  end
212
236
  end
@@ -7,6 +7,28 @@ module SyntaxTree
7
7
  # list of instructions along with the metadata pertaining to them. It also
8
8
  # functions as a builder for the instruction sequence.
9
9
  class InstructionSequence
10
+ # This provides a handle to the rb_iseq_load function, which allows you
11
+ # to pass a serialized iseq to Ruby and have it return a
12
+ # RubyVM::InstructionSequence object.
13
+ def self.iseq_load(iseq)
14
+ require "fiddle"
15
+
16
+ @iseq_load_function ||=
17
+ Fiddle::Function.new(
18
+ Fiddle::Handle::DEFAULT["rb_iseq_load"],
19
+ [Fiddle::TYPE_VOIDP] * 3,
20
+ Fiddle::TYPE_VOIDP
21
+ )
22
+
23
+ Fiddle.dlunwrap(@iseq_load_function.call(Fiddle.dlwrap(iseq), 0, nil))
24
+ rescue LoadError
25
+ raise "Could not load the Fiddle library"
26
+ rescue NameError
27
+ raise "Unable to find rb_iseq_load"
28
+ rescue Fiddle::DLError
29
+ raise "Unable to perform a dynamic load"
30
+ end
31
+
10
32
  # When the list of instructions is first being created, it's stored as a
11
33
  # linked list. This is to make it easier to perform peephole optimizations
12
34
  # and other transformations like instruction specialization.
@@ -60,19 +82,6 @@ module SyntaxTree
60
82
 
61
83
  MAGIC = "YARVInstructionSequence/SimpleDataFormat"
62
84
 
63
- # This provides a handle to the rb_iseq_load function, which allows you to
64
- # pass a serialized iseq to Ruby and have it return a
65
- # RubyVM::InstructionSequence object.
66
- ISEQ_LOAD =
67
- begin
68
- Fiddle::Function.new(
69
- Fiddle::Handle::DEFAULT["rb_iseq_load"],
70
- [Fiddle::TYPE_VOIDP] * 3,
71
- Fiddle::TYPE_VOIDP
72
- )
73
- rescue NameError, Fiddle::DLError
74
- end
75
-
76
85
  # This object is used to track the size of the stack at any given time. It
77
86
  # is effectively a mini symbolic interpreter. It's necessary because when
78
87
  # instruction sequences get serialized they include a :stack_max field on
@@ -221,8 +230,7 @@ module SyntaxTree
221
230
  end
222
231
 
223
232
  def eval
224
- raise "Unsupported platform" if ISEQ_LOAD.nil?
225
- Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(to_a), 0, nil)).eval
233
+ InstructionSequence.iseq_load(to_a).eval
226
234
  end
227
235
 
228
236
  def to_a
@@ -269,10 +277,27 @@ module SyntaxTree
269
277
  ]
270
278
  end
271
279
 
280
+ def to_cfg
281
+ ControlFlowGraph.compile(self)
282
+ end
283
+
284
+ def to_dfg
285
+ to_cfg.to_dfg
286
+ end
287
+
288
+ def to_son
289
+ to_dfg.to_son
290
+ end
291
+
272
292
  def disasm
273
- disassembler = Disassembler.new
274
- disassembler.enqueue(self)
275
- disassembler.format!
293
+ fmt = Disassembler.new
294
+ fmt.enqueue(self)
295
+ fmt.format!
296
+ fmt.string
297
+ end
298
+
299
+ def inspect
300
+ "#<ISeq:#{name}@<compiled>:1 (#{line},0)-(#{line},0)>"
276
301
  end
277
302
 
278
303
  # This method converts our linked list of instructions into a final array