prettier 0.21.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +112 -7
  3. data/CONTRIBUTING.md +4 -4
  4. data/README.md +18 -14
  5. data/package.json +9 -6
  6. data/src/embed.js +27 -8
  7. data/src/nodes.js +5 -2
  8. data/src/nodes/alias.js +29 -31
  9. data/src/nodes/aref.js +26 -26
  10. data/src/nodes/args.js +55 -47
  11. data/src/nodes/arrays.js +132 -106
  12. data/src/nodes/assign.js +32 -32
  13. data/src/nodes/blocks.js +8 -3
  14. data/src/nodes/calls.js +163 -60
  15. data/src/nodes/case.js +11 -7
  16. data/src/nodes/class.js +74 -0
  17. data/src/nodes/commands.js +36 -31
  18. data/src/nodes/conditionals.js +44 -30
  19. data/src/nodes/constants.js +39 -21
  20. data/src/nodes/flow.js +11 -1
  21. data/src/nodes/hashes.js +90 -109
  22. data/src/nodes/heredocs.js +34 -0
  23. data/src/nodes/hooks.js +21 -22
  24. data/src/nodes/ints.js +27 -20
  25. data/src/nodes/lambdas.js +14 -27
  26. data/src/nodes/loops.js +10 -5
  27. data/src/nodes/massign.js +87 -65
  28. data/src/nodes/methods.js +48 -73
  29. data/src/nodes/operators.js +70 -39
  30. data/src/nodes/params.js +26 -16
  31. data/src/nodes/patterns.js +108 -33
  32. data/src/nodes/regexp.js +45 -14
  33. data/src/nodes/rescue.js +72 -59
  34. data/src/nodes/statements.js +86 -44
  35. data/src/nodes/strings.js +95 -85
  36. data/src/nodes/super.js +35 -0
  37. data/src/nodes/undef.js +42 -0
  38. data/src/parser.js +86 -0
  39. data/src/parser.rb +2400 -621
  40. data/src/printer.js +90 -0
  41. data/src/ruby.js +19 -41
  42. data/src/toProc.js +4 -4
  43. data/src/utils.js +24 -88
  44. data/src/utils/literalLineNoBreak.js +7 -0
  45. data/src/utils/printEmptyCollection.js +42 -0
  46. metadata +12 -49
  47. data/src/nodes/scopes.js +0 -61
  48. data/src/parse.js +0 -37
  49. data/src/print.js +0 -23
@@ -0,0 +1,86 @@
1
+ const { spawnSync } = require("child_process");
2
+ const path = require("path");
3
+
4
+ // In order to properly parse ruby code, we need to tell the ruby process to
5
+ // parse using UTF-8. Unfortunately, the way that you accomplish this looks
6
+ // differently depending on your platform.
7
+ const LANG = (() => {
8
+ const { env, platform } = process;
9
+ const envValue = env.LC_ALL || env.LC_CTYPE || env.LANG;
10
+
11
+ // If an env var is set for the locale that already includes UTF-8 in the
12
+ // name, then assume we can go with that.
13
+ if (envValue && envValue.includes("UTF-8")) {
14
+ return envValue;
15
+ }
16
+
17
+ // Otherwise, we're going to guess which encoding to use based on the system.
18
+ // This is probably not the best approach in the world, as you could be on
19
+ // linux and not have C.UTF-8, but in that case you're probably passing an env
20
+ // var for it. This object below represents all of the possible values of
21
+ // process.platform per:
22
+ // https://nodejs.org/api/process.html#process_process_platform
23
+ return {
24
+ aix: "C.UTF-8",
25
+ darwin: "en_US.UTF-8",
26
+ freebsd: "C.UTF-8",
27
+ linux: "C.UTF-8",
28
+ openbsd: "C.UTF-8",
29
+ sunos: "C.UTF-8",
30
+ win32: ".UTF-8"
31
+ }[platform];
32
+ })();
33
+
34
+ // This function is responsible for taking an input string of text and returning
35
+ // to prettier a JavaScript object that is the equivalent AST that represents
36
+ // the code stored in that string. We accomplish this by spawning a new Ruby
37
+ // process of parser.rb and reading JSON off STDOUT.
38
+ function parse(text, _parsers, _opts) {
39
+ const child = spawnSync(
40
+ "ruby",
41
+ ["--disable-gems", path.join(__dirname, "./parser.rb")],
42
+ {
43
+ env: Object.assign({}, process.env, { LANG }),
44
+ input: text,
45
+ maxBuffer: 10 * 1024 * 1024 // 10MB
46
+ }
47
+ );
48
+
49
+ const error = child.stderr.toString();
50
+ if (error) {
51
+ throw new Error(error);
52
+ }
53
+
54
+ const response = child.stdout.toString();
55
+ return JSON.parse(response);
56
+ }
57
+
58
+ const pragmaPattern = /#\s*@(prettier|format)/;
59
+
60
+ // This function handles checking whether or not the source string has the
61
+ // pragma for prettier. This is an optional workflow for incremental adoption.
62
+ function hasPragma(text) {
63
+ return pragmaPattern.test(text);
64
+ }
65
+
66
+ // This function is critical for comments and cursor support, and is responsible
67
+ // for returning the index of the character within the source string that is the
68
+ // beginning of the given node.
69
+ function locStart(node) {
70
+ return node.char_start;
71
+ }
72
+
73
+ // This function is critical for comments and cursor support, and is responsible
74
+ // for returning the index of the character within the source string that is the
75
+ // ending of the given node.
76
+ function locEnd(node) {
77
+ return node.char_end;
78
+ }
79
+
80
+ module.exports = {
81
+ parse,
82
+ astFormat: "ruby",
83
+ hasPragma,
84
+ locStart,
85
+ locEnd
86
+ };
@@ -2,9 +2,9 @@
2
2
 
3
3
  # We implement our own version checking here instead of using Gem::Version so
4
4
  # that we can use the --disable-gems flag.
5
- major, minor, * = RUBY_VERSION.split('.').map(&:to_i)
5
+ RUBY_MAJOR, RUBY_MINOR, RUBY_PATCH, * = RUBY_VERSION.split('.').map(&:to_i)
6
6
 
7
- if (major < 2) || ((major == 2) && (minor < 5))
7
+ if (RUBY_MAJOR < 2) || ((RUBY_MAJOR == 2) && (RUBY_MINOR < 5))
8
8
  warn(
9
9
  "Ruby version #{RUBY_VERSION} not supported. " \
10
10
  'Please upgrade to 2.5.0 or above.'
@@ -13,761 +13,2540 @@ if (major < 2) || ((major == 2) && (minor < 5))
13
13
  exit 1
14
14
  end
15
15
 
16
+ require 'delegate'
16
17
  require 'json' unless defined?(JSON)
17
18
  require 'ripper'
18
19
 
19
20
  module Prettier; end
20
21
 
21
22
  class Prettier::Parser < Ripper
22
- attr_reader :source, :lines, :__end__
23
+ attr_reader :source, :lines, :scanner_events, :line_counts
23
24
 
24
25
  def initialize(source, *args)
25
26
  super(source, *args)
26
27
 
27
28
  @source = source
28
29
  @lines = source.split("\n")
30
+
31
+ @comments = []
32
+ @embdoc = nil
29
33
  @__end__ = nil
34
+
35
+ @heredocs = []
36
+
37
+ @scanner_events = []
38
+ @line_counts = [0]
39
+
40
+ @source.lines.each { |line| @line_counts << @line_counts.last + line.size }
30
41
  end
31
42
 
32
43
  private
33
44
 
45
+ # This represents the current place in the source string that we've gotten to
46
+ # so far. We have a memoized line_counts object that we can use to get the
47
+ # number of characters that we've had to go through to get to the beginning of
48
+ # this line, then we add the number of columns into this line that we've gone
49
+ # through.
50
+ def char_pos
51
+ line_counts[lineno - 1] + column
52
+ end
53
+
54
+ # As we build up a list of scanner events, we'll periodically need to go
55
+ # backwards and find the ones that we've already hit in order to determine the
56
+ # location information for nodes that use them. For example, if you have a
57
+ # module node then you'll look backward for a @module scanner event to
58
+ # determine your start location.
59
+ #
60
+ # This works with nesting since we're deleting scanner events from the list
61
+ # once they've been used up. For example if you had nested module declarations
62
+ # then the innermost declaration would grab the last @module event (which
63
+ # would happen to be the innermost keyword). Then the outer one would only be
64
+ # able to grab the first one. In this way all of the scanner events act as
65
+ # their own stack.
66
+ def find_scanner_event(type, body = :any)
67
+ index =
68
+ scanner_events.rindex do |scanner_event|
69
+ scanner_event[:type] == type &&
70
+ (body == :any || (scanner_event[:body] == body))
71
+ end
72
+
73
+ scanner_events.delete_at(index)
74
+ end
75
+
34
76
  # Scanner events occur when the lexer hits a new token, like a keyword or an
35
77
  # end. These nodes always contain just one argument which is a string
36
78
  # representing the content. For the most part these can just be printed
37
79
  # directly, which very few exceptions.
38
- SCANNER_EVENTS.each do |event|
39
- define_method(:"on_#{event}") do |body|
40
- { type: :"@#{event}", body: body, start: lineno, end: lineno }
80
+ defined = %i[
81
+ comment
82
+ embdoc
83
+ embdoc_beg
84
+ embdoc_end
85
+ heredoc_beg
86
+ heredoc_end
87
+ ignored_nl
88
+ ]
89
+
90
+ (SCANNER_EVENTS - defined).each do |event|
91
+ define_method(:"on_#{event}") do |value|
92
+ char_end = char_pos + value.size
93
+ node = {
94
+ type: :"@#{event}",
95
+ body: value,
96
+ start: lineno,
97
+ end: lineno,
98
+ char_start: char_pos,
99
+ char_end: char_end
100
+ }
101
+
102
+ scanner_events << node
103
+ node
41
104
  end
42
105
  end
43
106
 
44
- # Parser events represent nodes in the ripper abstract syntax tree. The event
45
- # is reported after the children of the node have already been built.
46
- PARSER_EVENTS.each do |event|
47
- define_method(:"on_#{event}") do |*body|
48
- min = body.map { |part| part.is_a?(Hash) ? part[:start] : lineno }.min
49
- { type: event, body: body, start: min || lineno, end: lineno }
50
- end
107
+ # We keep track of each comment as it comes in and then eventually add
108
+ # them to the top of the generated AST so that prettier can start adding
109
+ # them back into the final representation. Comments come in including
110
+ # their starting pound sign and the newline at the end, so we also chop
111
+ # those off.
112
+ #
113
+ # If there is an encoding magic comment at the top of the file, ripper
114
+ # will actually change into that encoding for the storage of the string.
115
+ # This will break everything, so we need to force the encoding back into
116
+ # UTF-8 so that the JSON library won't break.
117
+ def on_comment(value)
118
+ @comments << {
119
+ type: :@comment,
120
+ value: value[1..-1].chomp.force_encoding('UTF-8'),
121
+ start: lineno,
122
+ end: lineno,
123
+ char_start: char_pos,
124
+ char_end: char_pos + value.length - 1
125
+ }
126
+ end
127
+
128
+ # ignored_nl is a special kind of scanner event that passes nil as the value,
129
+ # so we can't do our normal tracking of value.size. Instead of adding a
130
+ # condition to the main SCANNER_EVENTS loop above, we'll just explicitly
131
+ # define the method here. You can trigger the ignored_nl event with the
132
+ # following snippet:
133
+ #
134
+ # foo.bar
135
+ # .baz
136
+ #
137
+ def on_ignored_nl(value)
138
+ {
139
+ type: :ignored_nl,
140
+ body: nil,
141
+ start: lineno,
142
+ end: lineno,
143
+ char_start: char_pos,
144
+ char_end: char_pos
145
+ }
51
146
  end
52
147
 
53
- # Some nodes are lists that come back from the parser. They always start with
54
- # a `*_new` node (or in the case of string, `*_content`) and each additional
55
- # node in the list is a `*_add` node. This module takes those nodes and turns
56
- # them into one node with an array body.
57
- #
58
- # For example, the statement `[a, b, c]` would be parsed as:
59
- #
60
- # [:args_add,
61
- # [:args_add,
62
- # [:args_add,
63
- # [:args_new],
64
- # [:vcall, [:@ident, "a", [1, 1]]]
65
- # ],
66
- # [:vcall, [:@ident, "b", [1, 4]]]
67
- # ],
68
- # [:vcall, [:@ident, "c", [1, 7]]]
69
- # ]
70
- #
71
- # But after this module is applied that is instead parsed as:
72
- #
73
- # [:args,
74
- # [
75
- # [:vcall, [:@ident, "a", [1, 1]]],
76
- # [:vcall, [:@ident, "b", [1, 4]]],
77
- # [:vcall, [:@ident, "c", [1, 7]]]
78
- # ]
79
- # ]
80
- #
81
- # This makes it a lot easier to join things with commas, and ends up resulting
82
- # in a much flatter `prettier` tree once it has been converted. Note that
83
- # because of this module some extra node types are added (the aggregate of
84
- # the previous `*_add` nodes) and some nodes now have arrays in places where
85
- # they previously had single nodes.
86
148
  prepend(
87
149
  Module.new do
88
- events = %i[
89
- args
90
- mlhs
91
- mrhs
92
- qsymbols
93
- qwords
94
- regexp
95
- stmts
96
- string
97
- symbols
98
- words
99
- xstring
100
- ]
101
-
102
150
  private
103
151
 
104
- events.each do |event|
105
- suffix = event == :string ? 'content' : 'new'
152
+ # Handles __END__ syntax, which allows individual scripts to keep content
153
+ # after the main ruby code that can be read through DATA. It looks like:
154
+ #
155
+ # foo.bar
156
+ #
157
+ # __END__
158
+ # some other content that isn't normally read by ripper
159
+ def on___end__(*)
160
+ @__end__ = super(lines[lineno..-1].join("\n"))
161
+ end
106
162
 
107
- define_method(:"on_#{event}_#{suffix}") do
108
- { type: event, body: [], start: lineno, end: lineno }
109
- end
163
+ # Like comments, we need to force the encoding here so JSON doesn't break.
164
+ def on_ident(value)
165
+ super(value.force_encoding('UTF-8'))
166
+ end
110
167
 
111
- define_method(:"on_#{event}_add") do |parts, part|
112
- parts.tap do |node|
113
- node[:body] << part
114
- node[:end] = lineno
115
- end
116
- end
168
+ # Like comments, we need to force the encoding here so JSON doesn't break.
169
+ def on_tstring_content(value)
170
+ super(value.force_encoding('UTF-8'))
117
171
  end
118
172
  end
119
173
  )
120
174
 
121
- # For each node, we need to attach where it came from in order to be able to
122
- # support placing the cursor correctly before and after formatting.
175
+ # A BEGIN node is a parser event that represents the use of the BEGIN
176
+ # keyword, which hooks into the lifecycle of the interpreter. It's a bit
177
+ # of a legacy from the stream operating days, and gets its inspiration
178
+ # from tools like awk. Whatever is inside the "block" will get executed
179
+ # when the program starts. The syntax looks like the following:
123
180
  #
124
- # For most nodes, it's enough to look at the child nodes to determine the
125
- # start of the parent node. However, for some nodes it's necessary to keep
126
- # track of the keywords as they come in from the lexer and to modify the start
127
- # node once we have it.
128
- prepend(
129
- Module.new do
130
- def initialize(source, *args)
131
- super(source, *args)
181
+ # BEGIN {
182
+ # # execute stuff here
183
+ # }
184
+ #
185
+ def on_BEGIN(stmts)
186
+ beging = find_scanner_event(:@lbrace)
187
+ ending = find_scanner_event(:@rbrace)
132
188
 
133
- @scanner_events = []
134
- @line_counts = [0]
189
+ stmts.bind(
190
+ find_next_statement_start(beging[:char_end]),
191
+ ending[:char_start]
192
+ )
135
193
 
136
- source.lines.each { |line| line_counts << line_counts.last + line.size }
137
- end
194
+ find_scanner_event(:@kw, 'BEGIN').merge!(
195
+ type: :BEGIN,
196
+ body: [beging, stmts],
197
+ end: ending[:end],
198
+ char_end: ending[:char_end]
199
+ )
200
+ end
138
201
 
139
- def self.prepended(base)
140
- base.attr_reader :scanner_events, :line_counts
141
- end
202
+ # A END node is a parser event that represents the use of the END keyword,
203
+ # which hooks into the lifecycle of the interpreter. It's a bit of a
204
+ # legacy from the stream operating days, and gets its inspiration from
205
+ # tools like awk. Whatever is inside the "block" will get executed when
206
+ # the program ends. The syntax looks like the following:
207
+ #
208
+ # END {
209
+ # # execute stuff here
210
+ # }
211
+ #
212
+ def on_END(stmts)
213
+ beging = find_scanner_event(:@lbrace)
214
+ ending = find_scanner_event(:@rbrace)
142
215
 
143
- private
216
+ stmts.bind(
217
+ find_next_statement_start(beging[:char_end]),
218
+ ending[:char_start]
219
+ )
144
220
 
145
- def char_pos
146
- line_counts[lineno - 1] + column
147
- end
221
+ find_scanner_event(:@kw, 'END').merge!(
222
+ type: :END,
223
+ body: [beging, stmts],
224
+ end: ending[:end],
225
+ char_end: ending[:char_end]
226
+ )
227
+ end
148
228
 
149
- def char_start_for(body)
150
- children = body.length == 1 && body[0].is_a?(Array) ? body[0] : body
151
- char_starts =
152
- children.map { |part| part[:char_start] if part.is_a?(Hash) }.compact
229
+ # alias is a parser event that represents when you're using the alias
230
+ # keyword with regular arguments. This can be either symbol literals or
231
+ # bare words. You can optionally use parentheses with this keyword, so we
232
+ # either track the location information based on those or the final
233
+ # argument to the alias method.
234
+ def on_alias(left, right)
235
+ beging = find_scanner_event(:@kw, 'alias')
236
+
237
+ paren = source[beging[:char_end]...left[:char_start]].include?('(')
238
+ ending = paren ? find_scanner_event(:@rparen) : right
239
+
240
+ {
241
+ type: :alias,
242
+ body: [left, right],
243
+ start: beging[:start],
244
+ char_start: beging[:char_start],
245
+ end: ending[:end],
246
+ char_end: ending[:char_end]
247
+ }
248
+ end
153
249
 
154
- char_starts.min || char_pos
155
- end
250
+ # aref nodes are when you're pulling a value out of a collection at a
251
+ # specific index. Put another way, it's any time you're calling the method
252
+ # #[]. As an example:
253
+ #
254
+ # foo[index]
255
+ #
256
+ # The nodes usually contains two children, the collection and the index.
257
+ # In some cases, you don't necessarily have the second child node, because
258
+ # you can call procs with a pretty esoteric syntax. In the following
259
+ # example, you wouldn't have a second child, and "foo" would be the first
260
+ # child:
261
+ #
262
+ # foo[]
263
+ #
264
+ def on_aref(collection, index)
265
+ find_scanner_event(:@lbracket)
266
+ ending = find_scanner_event(:@rbracket)
267
+
268
+ {
269
+ type: :aref,
270
+ body: [collection, index],
271
+ start: collection[:start],
272
+ char_start: collection[:char_start],
273
+ end: ending[:end],
274
+ char_end: ending[:char_end]
275
+ }
276
+ end
156
277
 
157
- def find_scanner_event(type, body = :any)
158
- index =
159
- scanner_events.rindex do |scanner_event|
160
- scanner_event[:type] == type &&
161
- (body == :any || (scanner_event[:body] == body))
162
- end
278
+ # aref_field is a parser event that is very similar to aref except that it
279
+ # is being used inside of an assignment.
280
+ def on_aref_field(collection, index)
281
+ find_scanner_event(:@lbracket)
282
+ ending = find_scanner_event(:@rbracket)
283
+
284
+ {
285
+ type: :aref_field,
286
+ body: [collection, index],
287
+ start: collection[:start],
288
+ char_start: collection[:char_start],
289
+ end: ending[:end],
290
+ char_end: ending[:char_end]
291
+ }
292
+ end
163
293
 
164
- scanner_events.delete_at(index)
165
- end
294
+ # args_new is a parser event that represents the beginning of a list of
295
+ # arguments to any method call or an array. It can be followed by any
296
+ # number of args_add events, which we'll append onto an array body.
297
+ def on_args_new
298
+ {
299
+ type: :args,
300
+ body: [],
301
+ start: lineno,
302
+ char_start: char_pos,
303
+ end: lineno,
304
+ char_end: char_pos
305
+ }
306
+ end
166
307
 
167
- events = {
168
- BEGIN: [:@kw, 'BEGIN'],
169
- END: [:@kw, 'END'],
170
- alias: [:@kw, 'alias'],
171
- assoc_splat: [:@op, '**'],
172
- arg_paren: :@lparen,
173
- args_add_star: [:@op, '*'],
174
- args_forward: [:@op, '...'],
175
- begin: [:@kw, 'begin'],
176
- blockarg: [:@op, '&'],
177
- brace_block: :@lbrace,
178
- break: [:@kw, 'break'],
179
- case: [:@kw, 'case'],
180
- class: [:@kw, 'class'],
181
- def: [:@kw, 'def'],
182
- defined: [:@kw, 'defined?'],
183
- defs: [:@kw, 'def'],
184
- do_block: [:@kw, 'do'],
185
- else: [:@kw, 'else'],
186
- elsif: [:@kw, 'elsif'],
187
- ensure: [:@kw, 'ensure'],
188
- excessed_comma: :@comma,
189
- for: [:@kw, 'for'],
190
- hash: :@lbrace,
191
- if: [:@kw, 'if'],
192
- in: [:@kw, 'in'],
193
- kwrest_param: [:@op, '**'],
194
- lambda: :@tlambda,
195
- mlhs_paren: :@lparen,
196
- mrhs_add_star: [:@op, '*'],
197
- module: [:@kw, 'module'],
198
- next: [:@kw, 'next'],
199
- paren: :@lparen,
200
- qsymbols_new: :@qsymbols_beg,
201
- qwords_new: :@qwords_beg,
202
- redo: [:@kw, 'redo'],
203
- regexp_literal: :@regexp_beg,
204
- rescue: [:@kw, 'rescue'],
205
- rest_param: [:@op, '*'],
206
- retry: [:@kw, 'retry'],
207
- return0: [:@kw, 'return'],
208
- return: [:@kw, 'return'],
209
- sclass: [:@kw, 'class'],
210
- string_dvar: :@embvar,
211
- string_embexpr: :@embexpr_beg,
212
- super: [:@kw, 'super'],
213
- symbols_new: :@symbols_beg,
214
- top_const_field: [:@op, '::'],
215
- top_const_ref: [:@op, '::'],
216
- undef: [:@kw, 'undef'],
217
- unless: [:@kw, 'unless'],
218
- until: [:@kw, 'until'],
219
- var_alias: [:@kw, 'alias'],
220
- when: [:@kw, 'when'],
221
- while: [:@kw, 'while'],
222
- words_new: :@words_beg,
223
- xstring_literal: :@backtick,
224
- yield0: [:@kw, 'yield'],
225
- yield: [:@kw, 'yield'],
226
- zsuper: [:@kw, 'super']
308
+ # args_add is a parser event that represents a single argument inside a
309
+ # list of arguments to any method call or an array. It accepts as
310
+ # arguments the parent args node as well as an arg which can be anything
311
+ # that could be passed as an argument.
312
+ def on_args_add(args, arg)
313
+ if args[:body].empty?
314
+ arg.merge(type: :args, body: [arg])
315
+ else
316
+ args.merge!(
317
+ body: args[:body] << arg,
318
+ end: arg[:end],
319
+ char_end: arg[:char_end]
320
+ )
321
+ end
322
+ end
323
+
324
+ # args_add_block is a parser event that represents a list of arguments and
325
+ # potentially a block argument. If no block is passed, then the second
326
+ # argument will be false.
327
+ def on_args_add_block(args, block)
328
+ ending = block || args
329
+
330
+ args.merge(
331
+ type: :args_add_block,
332
+ body: [args, block],
333
+ end: ending[:end],
334
+ char_end: ending[:char_end]
335
+ )
336
+ end
337
+
338
+ # args_add_star is a parser event that represents adding a splat of values
339
+ # to a list of arguments. If accepts as arguments the parent args node as
340
+ # well as the part that is being splatted.
341
+ def on_args_add_star(args, part)
342
+ beging = find_scanner_event(:@op, '*')
343
+ ending = part || beging
344
+
345
+ {
346
+ type: :args_add_star,
347
+ body: [args, part],
348
+ start: beging[:start],
349
+ char_start: beging[:char_start],
350
+ end: ending[:end],
351
+ char_end: ending[:char_end]
352
+ }
353
+ end
354
+
355
+ # args_forward is a parser event that represents forwarding all kinds of
356
+ # arguments onto another method call.
357
+ def on_args_forward
358
+ find_scanner_event(:@op, '...').merge!(type: :args_forward)
359
+ end
360
+
361
+ # arg_paren is a parser event that represents wrapping arguments to a
362
+ # method inside a set of parentheses.
363
+ def on_arg_paren(args)
364
+ beging = find_scanner_event(:@lparen)
365
+ rparen = find_scanner_event(:@rparen)
366
+
367
+ # If the arguments exceed the ending of the parentheses, then we know we
368
+ # have a heredoc in the arguments, and we need to use the bounds of the
369
+ # arguments to determine how large the arg_paren is.
370
+ ending = (args && args[:end] > rparen[:end]) ? args : rparen
371
+
372
+ {
373
+ type: :arg_paren,
374
+ body: [args],
375
+ start: beging[:start],
376
+ char_start: beging[:char_start],
377
+ end: ending[:end],
378
+ char_end: ending[:char_end]
379
+ }
380
+ end
381
+
382
+ # Array nodes can contain a myriad of subnodes because of the special
383
+ # array literal syntax like %w and %i. As a result, we may be looking for
384
+ # an left bracket, or we may be just looking at the children to get the
385
+ # bounds.
386
+ def on_array(contents)
387
+ if !contents || %i[args args_add_star].include?(contents[:type])
388
+ beging = find_scanner_event(:@lbracket)
389
+ ending = find_scanner_event(:@rbracket)
390
+
391
+ {
392
+ type: :array,
393
+ body: [contents],
394
+ start: beging[:start],
395
+ char_start: beging[:char_start],
396
+ end: ending[:end],
397
+ char_end: ending[:char_end]
227
398
  }
399
+ else
400
+ ending = find_scanner_event(:@tstring_end)
401
+ contents[:char_end] = ending[:char_end]
402
+
403
+ ending.merge!(
404
+ type: :array,
405
+ body: [contents],
406
+ start: contents[:start],
407
+ char_start: contents[:char_start]
408
+ )
409
+ end
410
+ end
228
411
 
229
- events.each do |event, (type, scanned)|
230
- define_method(:"on_#{event}") do |*body|
231
- node = find_scanner_event(type, scanned || :any)
412
+ # aryptn is a parser event that represents matching against an array pattern
413
+ # using the Ruby 2.7+ pattern matching syntax.
414
+ def on_aryptn(const, preargs, splatarg, postargs)
415
+ pieces = [const, *preargs, splatarg, *postargs].compact
416
+
417
+ {
418
+ type: :aryptn,
419
+ body: [const, preargs, splatarg, postargs],
420
+ start: pieces[0][:start],
421
+ char_start: pieces[0][:char_start],
422
+ end: pieces[-1][:end],
423
+ char_end: pieces[-1][:char_end]
424
+ }
425
+ end
232
426
 
233
- super(*body).merge!(
234
- start: node[:start],
235
- char_start: node[:char_start],
236
- char_end: char_pos
237
- )
238
- end
239
- end
427
+ # assign is a parser event that represents assigning something to a
428
+ # variable or constant. It accepts as arguments the left side of the
429
+ # expression before the equals sign and the right side of the expression.
430
+ def on_assign(left, right)
431
+ left.merge(
432
+ type: :assign,
433
+ body: [left, right],
434
+ end: right[:end],
435
+ char_end: right[:char_end]
436
+ )
437
+ end
240
438
 
241
- # Array nodes can contain a myriad of subnodes because of the special
242
- # array literal syntax like %w and %i. As a result, we may be looking for
243
- # an left bracket, or we may be just looking at the children.
244
- def on_array(*body)
245
- if body[0] && %i[args args_add_star].include?(body[0][:type])
246
- node = find_scanner_event(:@lbracket)
247
-
248
- super(*body).merge!(
249
- start: node[:start],
250
- char_start: node[:char_start],
251
- char_end: char_pos
252
- )
253
- else
254
- super(*body).merge!(
255
- char_start: char_start_for(body), char_end: char_pos
256
- )
257
- end
258
- end
439
+ # assoc_new is a parser event that contains a key-value pair within a
440
+ # hash. It is a child event of either an assoclist_from_args or a
441
+ # bare_assoc_hash.
442
+ def on_assoc_new(key, value)
443
+ {
444
+ type: :assoc_new,
445
+ body: [key, value],
446
+ start: key[:start],
447
+ char_start: key[:char_start],
448
+ end: value[:end],
449
+ char_end: value[:char_end]
450
+ }
451
+ end
259
452
 
260
- # Array pattern nodes contain an odd mix of potential child nodes based on
261
- # which kind of pattern is being used.
262
- def on_aryptn(*body)
263
- char_start, char_end = char_pos, char_pos
453
+ # assoc_splat is a parser event that represents splatting a value into a
454
+ # hash (either a hash literal or a bare hash in a method call).
455
+ def on_assoc_splat(contents)
456
+ find_scanner_event(:@op, '**').merge!(
457
+ type: :assoc_splat,
458
+ body: [contents],
459
+ end: contents[:end],
460
+ char_end: contents[:char_end]
461
+ )
462
+ end
264
463
 
265
- body.flatten(1).each do |part|
266
- next unless part
464
+ # assoclist_from_args is a parser event that contains a list of all of the
465
+ # associations inside of a hash literal. Its parent node is always a hash.
466
+ # It accepts as an argument an array of assoc events (either assoc_new or
467
+ # assoc_splat).
468
+ def on_assoclist_from_args(assocs)
469
+ {
470
+ type: :assoclist_from_args,
471
+ body: assocs,
472
+ start: assocs[0][:start],
473
+ char_start: assocs[0][:char_start],
474
+ end: assocs[-1][:end],
475
+ char_end: assocs[-1][:char_end]
476
+ }
477
+ end
267
478
 
268
- char_start = [char_start, part[:char_start]].min
269
- char_end = [char_end, part[:char_end]].max
270
- end
479
+ # bare_assoc_hash is a parser event that represents a hash of contents
480
+ # being passed as a method argument (and therefore has omitted braces). It
481
+ # accepts as an argument an array of assoc events (either assoc_new or
482
+ # assoc_splat).
483
+ def on_bare_assoc_hash(assoc_news)
484
+ {
485
+ type: :bare_assoc_hash,
486
+ body: assoc_news,
487
+ start: assoc_news[0][:start],
488
+ char_start: assoc_news[0][:char_start],
489
+ end: assoc_news[-1][:end],
490
+ char_end: assoc_news[-1][:char_end]
491
+ }
492
+ end
271
493
 
272
- super(*body).merge!(char_start: char_start, char_end: char_end)
494
+ # begin is a parser event that represents the beginning of a begin..end chain.
495
+ # It includes a bodystmt event that has all of the consequent clauses.
496
+ def on_begin(bodystmt)
497
+ beging = find_scanner_event(:@kw, 'begin')
498
+ char_end =
499
+ if bodystmt[:body][1..-1].any?
500
+ bodystmt[:char_end]
501
+ else
502
+ find_scanner_event(:@kw, 'end')[:char_end]
273
503
  end
274
504
 
275
- # Params have a somewhat interesting structure in that they are an array
276
- # of arrays where the position in the top-level array indicates the type
277
- # of param and the subarray is the list of parameters of that type. We
278
- # therefore have to flatten them down to get to the location.
279
- def on_params(*body)
280
- super(*body).merge!(
281
- char_start: char_start_for(body.flatten(1)), char_end: char_pos
282
- )
283
- end
505
+ bodystmt.bind(beging[:char_end], char_end)
284
506
 
285
- # String literals and either contain string parts or a heredoc. If it
286
- # contains a heredoc we can just go directly to the child nodes, otherwise
287
- # we need to look for a `tstring_beg`.
288
- def on_string_literal(*body)
289
- if body[0][:type] == :heredoc
290
- super(*body).merge!(
291
- char_start: char_start_for(body), char_end: char_pos
292
- )
293
- else
294
- node = find_scanner_event(:@tstring_beg)
295
-
296
- super(*body).merge!(
297
- start: node[:start],
298
- char_start: node[:char_start],
299
- char_end: char_pos,
300
- quote: node[:body]
301
- )
302
- end
303
- end
507
+ beging.merge!(
508
+ type: :begin,
509
+ body: [bodystmt],
510
+ end: bodystmt[:end],
511
+ char_end: bodystmt[:char_end]
512
+ )
513
+ end
304
514
 
305
- # Technically, the `not` operator is a unary operator but is reported as
306
- # a keyword and not an operator. Because of the inconsistency, we have to
307
- # manually look for the correct scanner event here.
308
- def on_unary(*body)
309
- node =
310
- if body[0] == :not
311
- find_scanner_event(:@kw, 'not')
312
- else
313
- find_scanner_event(:@op)
314
- end
315
-
316
- super(*body).merge!(
317
- start: node[:start], char_start: node[:char_start], char_end: char_pos
318
- )
319
- end
515
+ # binary is a parser event that represents a binary operation between two
516
+ # values.
517
+ def on_binary(left, oper, right)
518
+ {
519
+ type: :binary,
520
+ body: [left, oper, right],
521
+ start: left[:start],
522
+ char_start: left[:char_start],
523
+ end: right[:end],
524
+ char_end: right[:char_end]
525
+ }
526
+ end
320
527
 
321
- # Symbols don't necessarily have to have a @symbeg event fired before they
322
- # start. For example, you can have symbol literals inside an `alias` node
323
- # if you're just using bare words, as in: `alias foo bar`. So this is a
324
- # special case in which if there is a `:@symbeg` event we can hook on to
325
- # then we use it, otherwise we just look at the beginning of the first
326
- # child node.
327
- %i[dyna_symbol symbol_literal].each do |event|
328
- define_method(:"on_#{event}") do |*body|
329
- options =
330
- if scanner_events.any? { |sevent| sevent[:type] == :@symbeg }
331
- symbeg = find_scanner_event(:@symbeg)
332
-
333
- {
334
- char_start: symbeg[:char_start],
335
- char_end: char_pos,
336
- quote: symbeg[:body][1]
337
- }
338
- elsif scanner_events.any? { |sevent| sevent[:type] == :@label_end }
339
- label_end = find_scanner_event(:@label_end)
340
-
341
- {
342
- char_start: char_start_for(body),
343
- char_end: char_pos,
344
- quote: label_end[:body][0]
345
- }
346
- else
347
- { char_start: char_start_for(body), char_end: char_pos }
348
- end
349
-
350
- super(*body).merge!(options)
351
- end
528
+ # block_var is a parser event that represents the parameters being passed to
529
+ # block. Effectively they're everything contained within the pipes.
530
+ def on_block_var(params, locals)
531
+ index =
532
+ scanner_events.rindex do |event|
533
+ event[:type] == :@op && %w[| ||].include?(event[:body]) &&
534
+ event[:char_start] < params[:char_start]
352
535
  end
353
536
 
354
- def on_program(*body)
355
- super(*body).merge!(start: 1, char_start: 0, char_end: char_pos)
537
+ beging = scanner_events[index]
538
+ ending = scanner_events[-1]
539
+
540
+ {
541
+ type: :block_var,
542
+ body: [params, locals],
543
+ start: beging[:start],
544
+ char_start: beging[:char_start],
545
+ end: ending[:end],
546
+ char_end: ending[:char_end]
547
+ }
548
+ end
549
+
550
+ # blockarg is a parser event that represents defining a block variable on
551
+ # a method definition.
552
+ def on_blockarg(ident)
553
+ find_scanner_event(:@op, '&').merge!(
554
+ type: :blockarg,
555
+ body: [ident],
556
+ end: ident[:end],
557
+ char_end: ident[:char_end]
558
+ )
559
+ end
560
+
561
+ # bodystmt can't actually determine its bounds appropriately because it
562
+ # doesn't necessarily know where it started. So the parent node needs to
563
+ # report back down into this one where it goes.
564
+ class BodyStmt < SimpleDelegator
565
+ def bind(char_start, char_end)
566
+ merge!(char_start: char_start, char_end: char_end)
567
+ parts = self[:body]
568
+
569
+ # Here we're going to determine the bounds for the stmts
570
+ consequent = parts[1..-1].compact.first
571
+ self[:body][0].bind(
572
+ char_start,
573
+ consequent ? consequent[:char_start] : char_end
574
+ )
575
+
576
+ # Next we're going to determine the rescue clause if there is one
577
+ if parts[1]
578
+ consequent = parts[2..-1].compact.first
579
+ self[:body][1].bind_end(consequent ? consequent[:char_start] : char_end)
356
580
  end
581
+ end
582
+ end
357
583
 
358
- defined =
359
- private_instance_methods(false).grep(/\Aon_/) { $'.to_sym } +
360
- %i[embdoc embdoc_beg embdoc_end heredoc_beg heredoc_end]
584
+ # bodystmt is a parser event that represents all of the possible combinations
585
+ # of clauses within the body of a method or block.
586
+ def on_bodystmt(stmts, rescued, ensured, elsed)
587
+ BodyStmt.new(
588
+ type: :bodystmt,
589
+ body: [stmts, rescued, ensured, elsed],
590
+ start: lineno,
591
+ char_start: char_pos,
592
+ end: lineno,
593
+ char_end: char_pos
594
+ )
595
+ end
361
596
 
362
- (SCANNER_EVENTS - defined).each do |event|
363
- define_method(:"on_#{event}") do |body|
364
- super(body).tap do |node|
365
- char_end = char_pos + (body ? body.size : 0)
366
- node.merge!(char_start: char_pos, char_end: char_end)
597
+ # brace_block is a parser event that represents passing a block to a
598
+ # method call using the {..} operators. It accepts as arguments an
599
+ # optional block_var event that represents any parameters to the block as
600
+ # well as a stmts event that represents the statements inside the block.
601
+ def on_brace_block(block_var, stmts)
602
+ beging = find_scanner_event(:@lbrace)
603
+ ending = find_scanner_event(:@rbrace)
604
+
605
+ stmts.bind((block_var || beging)[:char_end], ending[:char_start])
606
+
607
+ {
608
+ type: :brace_block,
609
+ body: [block_var, stmts],
610
+ start: beging[:start],
611
+ char_start: beging[:char_start],
612
+ end: ending[:end],
613
+ char_end: ending[:char_end]
614
+ }
615
+ end
367
616
 
368
- scanner_events << node
369
- end
370
- end
371
- end
617
+ # break is a parser event that represents using the break keyword. It
618
+ # accepts as an argument an args or args_add_block event that contains all
619
+ # of the arguments being passed to the break.
620
+ def on_break(args_add_block)
621
+ beging = find_scanner_event(:@kw, 'break')
622
+
623
+ # You can hit this if you are passing no arguments to break but it has a
624
+ # comment right after it. In that case we can just use the location
625
+ # information straight from the keyword.
626
+ if args_add_block[:type] == :args
627
+ return beging.merge!(type: :break, body: [args_add_block])
628
+ end
372
629
 
373
- (PARSER_EVENTS - defined).each do |event|
374
- define_method(:"on_#{event}") do |*body|
375
- super(*body).merge!(
376
- char_start: char_start_for(body), char_end: char_pos
377
- )
378
- end
379
- end
630
+ beging.merge!(
631
+ type: :break,
632
+ body: [args_add_block],
633
+ end: args_add_block[:end],
634
+ char_end: args_add_block[:char_end]
635
+ )
636
+ end
637
+
638
+ # call is a parser event representing a method call with no arguments. It
639
+ # accepts as arguments the receiver of the method, the operator being used
640
+ # to send the method (., ::, or &.), and the value that is being sent to
641
+ # the receiver (which can be another nested call as well).
642
+ #
643
+ # There is one esoteric syntax that comes into play here as well. If the
644
+ # sending argument to this method is the symbol :call, then it represents
645
+ # calling a lambda in a very odd looking way, as in:
646
+ #
647
+ # foo.(1, 2, 3)
648
+ #
649
+ def on_call(receiver, oper, sending)
650
+ # Make sure we take the operator out of the scanner events so that it
651
+ # doesn't get confused for a unary operator later.
652
+ scanner_events.delete(oper)
653
+
654
+ ending = sending
655
+
656
+ if sending == :call
657
+ ending = oper
658
+
659
+ # Special handling here for Ruby <= 2.5 because the oper argument to this
660
+ # method wasn't a parser event here it was just a plain symbol.
661
+ ending = receiver if RUBY_MAJOR <= 2 && RUBY_MINOR <= 5
380
662
  end
381
- )
382
663
 
383
- # This layer keeps track of inline comments as they come in. Ripper itself
384
- # doesn't attach comments to the AST, so we need to do it manually. In this
385
- # case, inline comments are defined as any comments wherein the lexer state is
386
- # not equal to EXPR_BEG (tracked in the BlockComments layer).
387
- prepend(
388
- Module.new do
389
- # Certain events needs to steal the comments from their children in order
390
- # for them to display properly.
391
- events = {
392
- aref: [:body, 1],
393
- args_add_block: [:body, 0],
394
- break: [:body, 0],
395
- call: [:body, 0],
396
- command: [:body, 1],
397
- command_call: [:body, 3],
398
- regexp_literal: [:body, 0],
399
- string_literal: [:body, 0],
400
- symbol_literal: [:body, 0]
401
- }
664
+ {
665
+ type: :call,
666
+ body: [receiver, oper, sending],
667
+ start: receiver[:start],
668
+ char_start: receiver[:char_start],
669
+ end: ending[:end],
670
+ char_end: ending[:char_end]
671
+ }
672
+ end
402
673
 
403
- def initialize(*args)
404
- super(*args)
405
- @inline_comments = []
406
- @last_sexp = nil
407
- end
674
+ # case is a parser event that represents the beginning of a case chain.
675
+ # It accepts as arguments the switch of the case and the consequent
676
+ # clause.
677
+ def on_case(switch, consequent)
678
+ find_scanner_event(:@kw, 'case').merge!(
679
+ type: :case,
680
+ body: [switch, consequent],
681
+ end: consequent[:end],
682
+ char_end: consequent[:char_end]
683
+ )
684
+ end
408
685
 
409
- def self.prepended(base)
410
- base.attr_reader :inline_comments, :last_sexp
411
- end
686
+ # Finds the next position in the source string that begins a statement. This
687
+ # is used to bind statements lists and make sure they don't include a
688
+ # preceding comment. For example, we want the following comment to be attached
689
+ # to the class node and not the statement node:
690
+ #
691
+ # class Foo # :nodoc:
692
+ # ...
693
+ # end
694
+ #
695
+ # By finding the next non-space character, we can make sure that the bounds of
696
+ # the statement list are correct.
697
+ def find_next_statement_start(position)
698
+ remaining = source[position..-1]
412
699
 
413
- private
700
+ if remaining.sub(/\A +/, '')[0] == '#'
701
+ return position + remaining.index("\n")
702
+ end
414
703
 
415
- events.each do |event, path|
416
- define_method(:"on_#{event}") do |*body|
417
- @last_sexp =
418
- super(*body).tap do |sexp|
419
- comments = (sexp.dig(*path) || {}).delete(:comments)
420
- sexp.merge!(comments: comments) if comments
421
- end
422
- end
423
- end
704
+ position
705
+ end
424
706
 
425
- SPECIAL_LITERALS = %i[qsymbols qwords symbols words].freeze
707
+ # class is a parser event that represents defining a class. It accepts as
708
+ # arguments the name of the class, the optional name of the superclass,
709
+ # and the bodystmt event that represents the statements evaluated within
710
+ # the context of the class.
711
+ def on_class(const, superclass, bodystmt)
712
+ beging = find_scanner_event(:@kw, 'class')
713
+ ending = find_scanner_event(:@kw, 'end')
714
+
715
+ bodystmt.bind(
716
+ find_next_statement_start((superclass || const)[:char_end]),
717
+ ending[:char_start]
718
+ )
426
719
 
427
- # Special array literals are handled in different ways and so their
428
- # comments need to be passed up to their parent array node.
429
- def on_array(*body)
430
- @last_sexp =
431
- super(*body).tap do |sexp|
432
- next unless SPECIAL_LITERALS.include?(body.dig(0, :type))
720
+ {
721
+ type: :class,
722
+ body: [const, superclass, bodystmt],
723
+ start: beging[:start],
724
+ char_start: beging[:char_start],
725
+ end: ending[:end],
726
+ char_end: ending[:char_end]
727
+ }
728
+ end
433
729
 
434
- comments = sexp.dig(:body, 0).delete(:comments)
435
- sexp.merge!(comments: comments) if comments
436
- end
437
- end
730
+ # command is a parser event representing a method call with arguments and
731
+ # no parentheses. It accepts as arguments the name of the method and the
732
+ # arguments being passed to the method.
733
+ def on_command(ident, args)
734
+ {
735
+ type: :command,
736
+ body: [ident, args],
737
+ start: ident[:start],
738
+ char_start: ident[:char_start],
739
+ end: args[:end],
740
+ char_end: args[:char_end]
741
+ }
742
+ end
438
743
 
439
- # Handling this specially because we want to pull the comments out of both
440
- # child nodes.
441
- def on_assoc_new(*body)
442
- @last_sexp =
443
- super(*body).tap do |sexp|
444
- comments =
445
- (sexp.dig(:body, 0).delete(:comments) || []) +
446
- (sexp.dig(:body, 1).delete(:comments) || [])
447
-
448
- sexp.merge!(comments: comments) if comments.any?
449
- end
450
- end
744
+ # command_call is a parser event representing a method call on an object
745
+ # with arguments and no parentheses. It accepts as arguments the receiver
746
+ # of the method, the operator being used to send the method, the name of
747
+ # the method, and the arguments being passed to the method.
748
+ def on_command_call(receiver, oper, ident, args)
749
+ ending = args || ident
750
+
751
+ {
752
+ type: :command_call,
753
+ body: [receiver, oper, ident, args],
754
+ start: receiver[:start],
755
+ char_start: receiver[:char_start],
756
+ end: ending[:end],
757
+ char_end: ending[:char_end]
758
+ }
759
+ end
451
760
 
452
- # Most scanner events don't stand on their own as s-expressions, but the
453
- # CHAR scanner event is effectively just a string, so we need to track it
454
- # as a s-expression.
455
- def on_CHAR(body)
456
- @last_sexp = super(body)
457
- end
761
+ # A const_path_field is a parser event that is always the child of some
762
+ # kind of assignment. It represents when you're assigning to a constant
763
+ # that is being referenced as a child of another variable. For example:
764
+ #
765
+ # foo::X = 1
766
+ #
767
+ def on_const_path_field(left, const)
768
+ {
769
+ type: :const_path_field,
770
+ body: [left, const],
771
+ start: left[:start],
772
+ char_start: left[:char_start],
773
+ end: const[:end],
774
+ char_end: const[:char_end]
775
+ }
776
+ end
458
777
 
459
- # We need to know exactly where the comment is, switching off the current
460
- # lexer state. In Ruby 2.7.0-dev, that's defined as:
461
- #
462
- # enum lex_state_bits {
463
- # EXPR_BEG_bit, /* ignore newline, +/- is a sign. */
464
- # EXPR_END_bit, /* newline significant, +/- is an operator. */
465
- # EXPR_ENDARG_bit, /* ditto, and unbound braces. */
466
- # EXPR_ENDFN_bit, /* ditto, and unbound braces. */
467
- # EXPR_ARG_bit, /* newline significant, +/- is an operator. */
468
- # EXPR_CMDARG_bit, /* newline significant, +/- is an operator. */
469
- # EXPR_MID_bit, /* newline significant, +/- is an operator. */
470
- # EXPR_FNAME_bit, /* ignore newline, no reserved words. */
471
- # EXPR_DOT_bit, /* right after `.' or `::', no reserved words. */
472
- # EXPR_CLASS_bit, /* immediate after `class', no here document. */
473
- # EXPR_LABEL_bit, /* flag bit, label is allowed. */
474
- # EXPR_LABELED_bit, /* flag bit, just after a label. */
475
- # EXPR_FITEM_bit, /* symbol literal as FNAME. */
476
- # EXPR_MAX_STATE
477
- # };
478
- def on_comment(body)
479
- sexp = { type: :@comment, body: body.chomp, start: lineno, end: lineno }
480
-
481
- case Prettier::Parser.lex_state_name(state).gsub('EXPR_', '')
482
- when 'END', 'ARG|LABELED', 'ENDFN'
483
- last_sexp.merge!(comments: [sexp])
484
- when 'CMDARG', 'END|ENDARG', 'ENDARG', 'ARG', 'FNAME|FITEM', 'CLASS',
485
- 'END|LABEL'
486
- inline_comments << sexp
487
- when 'BEG|LABEL', 'MID'
488
- inline_comments << sexp.merge!(break: true)
489
- when 'DOT'
490
- last_sexp.merge!(comments: [sexp.merge!(break: true)])
491
- end
778
+ # A const_path_ref is a parser event that is a very similar to
779
+ # const_path_field except that it is not involved in an assignment. It
780
+ # looks like the following example:
781
+ #
782
+ # foo::X
783
+ #
784
+ def on_const_path_ref(left, const)
785
+ {
786
+ type: :const_path_ref,
787
+ body: [left, const],
788
+ start: left[:start],
789
+ char_start: left[:char_start],
790
+ end: const[:end],
791
+ char_end: const[:char_end]
792
+ }
793
+ end
492
794
 
493
- sexp
494
- end
795
+ # A const_ref is a parser event that represents the name of the constant
796
+ # being used in a class or module declaration. In the following example it
797
+ # is the @const scanner event that has the contents of Foo.
798
+ #
799
+ # class Foo; end
800
+ #
801
+ def on_const_ref(const)
802
+ const.merge(type: :const_ref, body: [const])
803
+ end
804
+
805
+ # A def is a parser event that represents defining a regular method on the
806
+ # current self object. It accepts as arguments the ident (the name of the
807
+ # method being defined), the params (the parameter declaration for the
808
+ # method), and a bodystmt node which represents the statements inside the
809
+ # method. As an example, here are the parts that go into this:
810
+ #
811
+ # def foo(bar) do baz end
812
+ # │ │ │
813
+ # │ │ └> bodystmt
814
+ # │ └> params
815
+ # └> ident
816
+ #
817
+ def on_def(ident, params, bodystmt)
818
+ # Make sure to delete this scanner event in case you're defining something
819
+ # like def class which would lead to this being a kw and causing all kinds
820
+ # of trouble
821
+ scanner_events.delete(ident)
822
+
823
+ if params[:type] == :params && !params[:body].any?
824
+ location = ident[:char_end]
825
+ params.merge!(char_start: location, char_end: location)
826
+ end
495
827
 
496
- defined = private_instance_methods(false).grep(/\Aon_/) { $'.to_sym }
828
+ beging = find_scanner_event(:@kw, 'def')
829
+ ending = find_scanner_event(:@kw, 'end')
497
830
 
498
- (PARSER_EVENTS - defined).each do |event|
499
- define_method(:"on_#{event}") do |*body|
500
- super(*body).tap do |sexp|
501
- @last_sexp = sexp
502
- next if inline_comments.empty?
831
+ bodystmt.bind(
832
+ find_next_statement_start(params[:char_end]),
833
+ ending[:char_start]
834
+ )
503
835
 
504
- sexp[:comments] = inline_comments.reverse
505
- @inline_comments = []
506
- end
507
- end
508
- end
836
+ {
837
+ type: :def,
838
+ body: [ident, params, bodystmt],
839
+ start: beging[:start],
840
+ char_start: beging[:char_start],
841
+ end: ending[:end],
842
+ char_end: ending[:char_end]
843
+ }
844
+ end
845
+
846
+ # A defs is a parser event that represents defining a singleton method on
847
+ # an object. It accepts the same arguments as the def event, as well as
848
+ # the target and operator that on which this method is being defined. As
849
+ # an example, here are the parts that go into this:
850
+ #
851
+ # def foo.bar(baz) do baz end
852
+ # │ │ │ │ │
853
+ # │ │ │ │ │
854
+ # │ │ │ │ └> bodystmt
855
+ # │ │ │ └> params
856
+ # │ │ └> ident
857
+ # │ └> oper
858
+ # └> target
859
+ #
860
+ def on_defs(target, oper, ident, params, bodystmt)
861
+ # Make sure to delete this scanner event in case you're defining something
862
+ # like def class which would lead to this being a kw and causing all kinds
863
+ # of trouble
864
+ scanner_events.delete(ident)
865
+
866
+ if params[:type] == :params && !params[:body].any?
867
+ location = ident[:char_end]
868
+ params.merge!(char_start: location, char_end: location)
509
869
  end
510
- )
511
870
 
512
- # Nodes that are always on their own line occur when the lexer is in the
513
- # EXPR_BEG state. Those comments are tracked within the @block_comments
514
- # instance variable. Then for each node that could contain them, we attach
515
- # them after the node has been built.
516
- prepend(
517
- Module.new do
518
- events = {
519
- begin: [0, :body, 0],
520
- bodystmt: [0],
521
- class: [2, :body, 0],
522
- def: [2, :body, 0],
523
- defs: [4, :body, 0],
524
- else: [0],
525
- elsif: [1],
526
- ensure: [0],
527
- if: [1],
528
- program: [0],
529
- rescue: [2],
530
- sclass: [1, :body, 0],
531
- unless: [1],
532
- until: [1],
533
- when: [1],
534
- while: [1]
535
- }
871
+ beging = find_scanner_event(:@kw, 'def')
872
+ ending = find_scanner_event(:@kw, 'end')
536
873
 
537
- def initialize(*args)
538
- super(*args)
539
- @block_comments = []
540
- @current_embdoc = nil
541
- end
874
+ bodystmt.bind(
875
+ find_next_statement_start(params[:char_end]),
876
+ ending[:char_start]
877
+ )
542
878
 
543
- def self.prepended(base)
544
- base.attr_reader :block_comments, :current_embdoc
545
- end
879
+ {
880
+ type: :defs,
881
+ body: [target, oper, ident, params, bodystmt],
882
+ start: beging[:start],
883
+ char_start: beging[:char_start],
884
+ end: ending[:end],
885
+ char_end: ending[:char_end]
886
+ }
887
+ end
546
888
 
547
- private
889
+ # A defined node represents the rather unique defined? operator. It can be
890
+ # used with and without parentheses. If they're present, we use them to
891
+ # determine our bounds, otherwise we use the value that's being passed to
892
+ # the operator.
893
+ def on_defined(value)
894
+ beging = find_scanner_event(:@kw, 'defined?')
895
+
896
+ paren = source[beging[:char_end]...value[:char_start]].include?('(')
897
+ ending = paren ? find_scanner_event(:@rparen) : value
898
+
899
+ beging.merge!(
900
+ type: :defined,
901
+ body: [value],
902
+ end: ending[:end],
903
+ char_end: ending[:char_end]
904
+ )
905
+ end
548
906
 
549
- def attach_comments(sexp, stmts)
550
- range = sexp[:start]..sexp[:end]
551
- comments =
552
- block_comments.group_by { |comment| range.include?(comment[:start]) }
907
+ # do_block is a parser event that represents passing a block to a method
908
+ # call using the do..end keywords. It accepts as arguments an optional
909
+ # block_var event that represents any parameters to the block as well as
910
+ # a bodystmt event that represents the statements inside the block.
911
+ def on_do_block(block_var, bodystmt)
912
+ beging = find_scanner_event(:@kw, 'do')
913
+ ending = find_scanner_event(:@kw, 'end')
914
+
915
+ bodystmt.bind((block_var || beging)[:char_end], ending[:char_start])
916
+
917
+ {
918
+ type: :do_block,
919
+ body: [block_var, bodystmt],
920
+ start: beging[:start],
921
+ char_start: beging[:char_start],
922
+ end: ending[:end],
923
+ char_end: ending[:char_end]
924
+ }
925
+ end
553
926
 
554
- if comments[true]
555
- stmts[:body] =
556
- (stmts[:body] + comments[true]).sort_by { |node| node[:start] }
927
+ # dot2 is a parser event that represents using the .. operator between two
928
+ # expressions. Usually this is to create a range object but sometimes it's to
929
+ # use the flip-flop operator.
930
+ def on_dot2(left, right)
931
+ operator = find_scanner_event(:@op, '..')
932
+
933
+ beging = left || operator
934
+ ending = right || operator
935
+
936
+ {
937
+ type: :dot2,
938
+ body: [left, right],
939
+ start: beging[:start],
940
+ char_start: beging[:char_start],
941
+ end: ending[:end],
942
+ char_end: ending[:char_end]
943
+ }
944
+ end
557
945
 
558
- @block_comments = comments.fetch(false) { [] }
559
- end
560
- end
946
+ # dot3 is a parser event that represents using the ... operator between two
947
+ # expressions. Usually this is to create a range object but sometimes it's to
948
+ # use the flip-flop operator.
949
+ def on_dot3(left, right)
950
+ operator = find_scanner_event(:@op, '...')
951
+
952
+ beging = left || operator
953
+ ending = right || operator
954
+
955
+ {
956
+ type: :dot3,
957
+ body: [left, right],
958
+ start: beging[:start],
959
+ char_start: beging[:char_start],
960
+ end: ending[:end],
961
+ char_end: ending[:char_end]
962
+ }
963
+ end
561
964
 
562
- events.each do |event, path|
563
- define_method(:"on_#{event}") do |*body|
564
- super(*body).tap { |sexp| attach_comments(sexp, body.dig(*path)) }
565
- end
566
- end
965
+ # A dyna_symbol is a parser event that represents a symbol literal that
966
+ # uses quotes to interpolate its value. For example, if you had a variable
967
+ # foo and you wanted a symbol that contained its value, you would write:
968
+ #
969
+ # :"#{foo}"
970
+ #
971
+ # As such, they accept as one argument a string node, which is the same
972
+ # node that gets accepted into a string_literal (since we're basically
973
+ # talking about a string literal with a : character at the beginning).
974
+ #
975
+ # They can also come in another flavor which is a dynamic symbol as a hash
976
+ # key. This is kind of an interesting syntax which results in us having to
977
+ # look for a @label_end scanner event instead to get our bearings. That
978
+ # kind of code would look like:
979
+ #
980
+ # { "#{foo}": bar }
981
+ #
982
+ # which would be the same symbol as above.
983
+ def on_dyna_symbol(string)
984
+ if scanner_events.any? { |event| event[:type] == :@symbeg }
985
+ # A normal dynamic symbol
986
+ beging = find_scanner_event(:@symbeg)
987
+ ending = find_scanner_event(:@tstring_end)
988
+
989
+ beging.merge(
990
+ type: :dyna_symbol,
991
+ quote: beging[:body][1],
992
+ body: string[:body],
993
+ end: ending[:end],
994
+ char_end: ending[:char_end]
995
+ )
996
+ else
997
+ # A dynamic symbol as a hash key
998
+ beging = find_scanner_event(:@tstring_beg)
999
+ ending = find_scanner_event(:@label_end)
1000
+
1001
+ string.merge!(
1002
+ type: :dyna_symbol,
1003
+ quote: ending[:body][0],
1004
+ start: beging[:start],
1005
+ char_start: beging[:char_start],
1006
+ end: ending[:end],
1007
+ char_end: ending[:char_end]
1008
+ )
1009
+ end
1010
+ end
567
1011
 
568
- def on_comment(body)
569
- super(body).tap do |sexp|
570
- lex_state = Prettier::Parser.lex_state_name(state).gsub('EXPR_', '')
571
- block_comments << sexp if lex_state == 'BEG'
572
- end
1012
+ # else can either end with an end keyword (in which case we'll want to
1013
+ # consume that event) or it can end with an ensure keyword (in which case
1014
+ # we'll leave that to the ensure to handle).
1015
+ def find_else_ending
1016
+ index =
1017
+ scanner_events.rindex do |event|
1018
+ event[:type] == :@kw && %w[end ensure].include?(event[:body])
573
1019
  end
574
1020
 
575
- def on_embdoc_beg(comment)
576
- @current_embdoc = {
577
- type: :embdoc, body: comment, start: lineno, end: lineno
578
- }
579
- end
1021
+ event = scanner_events[index]
1022
+ event[:body] == 'end' ? scanner_events.delete_at(index) : event
1023
+ end
580
1024
 
581
- def on_embdoc(comment)
582
- @current_embdoc[:body] << comment
583
- end
1025
+ # else is a parser event that represents the end of a if, unless, or begin
1026
+ # chain. It accepts as an argument the statements that are contained
1027
+ # within the else clause.
1028
+ def on_else(stmts)
1029
+ beging = find_scanner_event(:@kw, 'else')
1030
+ ending = find_else_ending
1031
+
1032
+ stmts.bind(beging[:char_end], ending[:char_start])
1033
+
1034
+ {
1035
+ type: :else,
1036
+ body: [stmts],
1037
+ start: beging[:start],
1038
+ char_start: beging[:char_start],
1039
+ end: ending[:end],
1040
+ char_end: ending[:char_end]
1041
+ }
1042
+ end
584
1043
 
585
- def on_embdoc_end(comment)
586
- @current_embdoc[:body] << comment.chomp
587
- @block_comments << @current_embdoc
588
- @current_embdoc = nil
589
- end
1044
+ # elsif is a parser event that represents another clause in an if chain.
1045
+ # It accepts as arguments the predicate of the else if, the statements
1046
+ # that are contained within the else if clause, and the optional
1047
+ # consequent clause.
1048
+ def on_elsif(predicate, stmts, consequent)
1049
+ beging = find_scanner_event(:@kw, 'elsif')
1050
+ ending = consequent || find_scanner_event(:@kw, 'end')
1051
+
1052
+ stmts.bind(predicate[:char_end], ending[:char_start])
1053
+
1054
+ {
1055
+ type: :elsif,
1056
+ body: [predicate, stmts, consequent],
1057
+ start: beging[:start],
1058
+ char_start: beging[:char_start],
1059
+ end: ending[:end],
1060
+ char_end: ending[:char_end]
1061
+ }
1062
+ end
590
1063
 
591
- def on_method_add_block(*body)
592
- super(*body).tap do |sexp|
593
- stmts = body[1][:body][1]
594
- stmts = stmts[:type] == :stmts ? stmts : body[1][:body][1][:body][0]
1064
+ # embdocs are long comments that are surrounded by =begin..=end. They
1065
+ # cannot be nested, so we don't need to worry about keeping a stack around
1066
+ # like we do with heredocs. Instead we can just track the current embdoc
1067
+ # and add to it as we get content. It always starts with this scanner
1068
+ # event, so here we'll initialize the current embdoc.
1069
+ def on_embdoc_beg(value)
1070
+ @embdoc = {
1071
+ type: :@embdoc,
1072
+ value: value,
1073
+ start: lineno,
1074
+ char_start: char_pos
1075
+ }
1076
+ end
595
1077
 
596
- attach_comments(sexp, stmts)
597
- end
1078
+ # This is a scanner event that gets hit when we're inside an embdoc and
1079
+ # receive a new line of content. Here we are guaranteed to already have
1080
+ # initialized the @embdoc variable so we can just append the new line onto
1081
+ # the existing content.
1082
+ def on_embdoc(value)
1083
+ @embdoc[:value] << value
1084
+ end
1085
+
1086
+ # This is the final scanner event for embdocs. It receives the =end. Here
1087
+ # we can finalize the embdoc with its location information and the final
1088
+ # piece of the string. We then add it to the list of comments so that
1089
+ # prettier can place it into the final source string.
1090
+ def on_embdoc_end(value)
1091
+ @comments <<
1092
+ @embdoc.merge!(
1093
+ value: @embdoc[:value] << value.chomp,
1094
+ end: lineno,
1095
+ char_end: char_pos + value.length - 1
1096
+ )
1097
+
1098
+ @embdoc = nil
1099
+ end
1100
+
1101
+ # ensure is a parser event that represents the use of the ensure keyword
1102
+ # and its subsequent statements.
1103
+ def on_ensure(stmts)
1104
+ beging = find_scanner_event(:@kw, 'ensure')
1105
+
1106
+ # Specifically not using find_scanner_event here because we don't want to
1107
+ # consume the :@end event, because that would break def..ensure..end chains.
1108
+ index =
1109
+ scanner_events.rindex do |scanner_event|
1110
+ scanner_event[:type] == :@kw && scanner_event[:body] == 'end'
598
1111
  end
1112
+
1113
+ ending = scanner_events[index]
1114
+ stmts.bind(
1115
+ find_next_statement_start(beging[:char_end]),
1116
+ ending[:char_start]
1117
+ )
1118
+
1119
+ {
1120
+ type: :ensure,
1121
+ body: [beging, stmts],
1122
+ start: beging[:start],
1123
+ char_start: beging[:char_start],
1124
+ end: ending[:end],
1125
+ char_end: ending[:char_end]
1126
+ }
1127
+ end
1128
+
1129
+ # An excessed_comma is a special kind of parser event that represents a comma
1130
+ # at the end of a list of parameters. It's a very strange node. It accepts a
1131
+ # different number of arguments depending on Ruby version, which is why we
1132
+ # have the anonymous splat there.
1133
+ def on_excessed_comma(*)
1134
+ find_scanner_event(:@comma).merge!(type: :excessed_comma)
1135
+ end
1136
+
1137
+ # An fcall is a parser event that represents the piece of a method call
1138
+ # that comes before any arguments (i.e., just the name of the method).
1139
+ def on_fcall(ident)
1140
+ ident.merge(type: :fcall, body: [ident])
1141
+ end
1142
+
1143
+ # A field is a parser event that is always the child of an assignment. It
1144
+ # accepts as arguments the left side of operation, the operator (. or ::),
1145
+ # and the right side of the operation. For example:
1146
+ #
1147
+ # foo.x = 1
1148
+ #
1149
+ def on_field(left, oper, right)
1150
+ {
1151
+ type: :field,
1152
+ body: [left, oper, right],
1153
+ start: left[:start],
1154
+ char_start: left[:char_start],
1155
+ end: right[:end],
1156
+ char_end: right[:char_end]
1157
+ }
1158
+ end
1159
+
1160
+ # for is a parser event that represents using the somewhat esoteric for
1161
+ # loop. It accepts as arguments an ident which is the iterating variable,
1162
+ # an enumerable for that which is being enumerated, and a stmts event that
1163
+ # represents the statements inside the for loop.
1164
+ def on_for(ident, enumerable, stmts)
1165
+ beging = find_scanner_event(:@kw, 'for')
1166
+ ending = find_scanner_event(:@kw, 'end')
1167
+
1168
+ stmts.bind(enumerable[:char_end], ending[:char_start])
1169
+
1170
+ {
1171
+ type: :for,
1172
+ body: [ident, enumerable, stmts],
1173
+ start: beging[:start],
1174
+ char_start: beging[:char_start],
1175
+ end: ending[:end],
1176
+ char_end: ending[:char_end]
1177
+ }
1178
+ end
1179
+
1180
+ # hash is a parser event that represents a hash literal. It accepts as an
1181
+ # argument an optional assoclist_from_args event which contains the
1182
+ # contents of the hash.
1183
+ def on_hash(assoclist_from_args)
1184
+ beging = find_scanner_event(:@lbrace)
1185
+ ending = find_scanner_event(:@rbrace)
1186
+
1187
+ if assoclist_from_args
1188
+ # Here we're going to expand out the location information for the assocs
1189
+ # node so that it can grab up any remaining comments inside the hash.
1190
+ assoclist_from_args.merge!(
1191
+ char_start: beging[:char_end],
1192
+ char_end: ending[:char_start]
1193
+ )
599
1194
  end
600
- )
601
1195
 
602
- # Tracking heredocs in somewhat interesting. Straight-line heredocs are
603
- # reported as strings, whereas squiggly-line heredocs are reported as
604
- # heredocs. We track the start and matching end of the heredoc as "beging" and
605
- # "ending" respectively.
606
- prepend(
607
- Module.new do
608
- def initialize(*args)
609
- super(*args)
610
- @heredoc_stack = []
611
- end
1196
+ {
1197
+ type: :hash,
1198
+ body: [assoclist_from_args],
1199
+ start: beging[:start],
1200
+ char_start: beging[:char_start],
1201
+ end: ending[:end],
1202
+ char_end: ending[:char_end]
1203
+ }
1204
+ end
612
1205
 
613
- def self.prepended(base)
614
- base.attr_reader :heredoc_stack
615
- end
1206
+ # This is a scanner event that represents the beginning of the heredoc. It
1207
+ # includes the declaration (which we call beging here, which is just short
1208
+ # for beginning). The declaration looks something like <<-HERE or <<~HERE.
1209
+ # If the downcased version of the declaration actually matches an existing
1210
+ # prettier parser, we'll later attempt to print it using that parser and
1211
+ # printer through our embed function.
1212
+ def on_heredoc_beg(beging)
1213
+ location = {
1214
+ start: lineno,
1215
+ end: lineno,
1216
+ char_start: char_pos,
1217
+ char_end: char_pos + beging.length + 1
1218
+ }
1219
+
1220
+ # Here we're going to artificially create an extra node type so that if
1221
+ # there are comments after the declaration of a heredoc, they get printed.
1222
+ location
1223
+ .merge(
1224
+ type: :heredoc,
1225
+ beging: location.merge(type: :@heredoc_beg, body: beging)
1226
+ )
1227
+ .tap { |node| @heredocs << node }
1228
+ end
616
1229
 
617
- private
1230
+ # This is a parser event that occurs when you're using a heredoc with a
1231
+ # tilde. These are considered `heredoc_dedent` nodes, whereas the hyphen
1232
+ # heredocs show up as string literals.
1233
+ def on_heredoc_dedent(string, _width)
1234
+ @heredocs[-1].merge!(body: string[:body])
1235
+ end
1236
+
1237
+ # This is a scanner event that represents the end of the heredoc.
1238
+ def on_heredoc_end(ending)
1239
+ @heredocs[-1].merge!(ending: ending.chomp, end: lineno, char_end: char_pos)
1240
+ end
1241
+
1242
+ # hshptn is a parser event that represents matching against a hash pattern
1243
+ # using the Ruby 2.7+ pattern matching syntax.
1244
+ def on_hshptn(const, kw, kwrest)
1245
+ pieces = [const, kw, kwrest].flatten(2).compact
1246
+
1247
+ {
1248
+ type: :hshptn,
1249
+ body: [const, kw, kwrest],
1250
+ start: pieces[0][:start],
1251
+ char_start: pieces[0][:char_start],
1252
+ end: pieces[-1][:end],
1253
+ char_end: pieces[-1][:char_end]
1254
+ }
1255
+ end
1256
+
1257
+ # if is a parser event that represents the first clause in an if chain.
1258
+ # It accepts as arguments the predicate of the if, the statements that are
1259
+ # contained within the if clause, and the optional consequent clause.
1260
+ def on_if(predicate, stmts, consequent)
1261
+ beging = find_scanner_event(:@kw, 'if')
1262
+ ending = consequent || find_scanner_event(:@kw, 'end')
1263
+
1264
+ stmts.bind(predicate[:char_end], ending[:char_start])
1265
+
1266
+ {
1267
+ type: :if,
1268
+ body: [predicate, stmts, consequent],
1269
+ start: beging[:start],
1270
+ char_start: beging[:char_start],
1271
+ end: ending[:end],
1272
+ char_end: ending[:char_end]
1273
+ }
1274
+ end
1275
+
1276
+ # ifop is a parser event that represents a ternary operator. It accepts as
1277
+ # arguments the predicate to the ternary, the truthy clause, and the falsy
1278
+ # clause.
1279
+ def on_ifop(predicate, truthy, falsy)
1280
+ predicate.merge(
1281
+ type: :ifop,
1282
+ body: [predicate, truthy, falsy],
1283
+ end: falsy[:end],
1284
+ char_end: falsy[:char_end]
1285
+ )
1286
+ end
1287
+
1288
+ # if_mod is a parser event that represents the modifier form of an if
1289
+ # statement. It accepts as arguments the predicate of the if and the
1290
+ # statement that are contained within the if clause.
1291
+ def on_if_mod(predicate, statement)
1292
+ find_scanner_event(:@kw, 'if')
1293
+
1294
+ {
1295
+ type: :if_mod,
1296
+ body: [predicate, statement],
1297
+ start: statement[:start],
1298
+ char_start: statement[:char_start],
1299
+ end: predicate[:end],
1300
+ char_end: predicate[:char_end]
1301
+ }
1302
+ end
1303
+
1304
+ # in is a parser event that represents using the in keyword within the
1305
+ # Ruby 2.7+ pattern matching syntax.
1306
+ def on_in(pattern, stmts, consequent)
1307
+ beging = find_scanner_event(:@kw, 'in')
1308
+ ending = consequent || find_scanner_event(:@kw, 'end')
1309
+
1310
+ stmts.bind(beging[:char_end], ending[:char_start])
1311
+
1312
+ beging.merge!(
1313
+ type: :in,
1314
+ body: [pattern, stmts, consequent],
1315
+ end: ending[:end],
1316
+ char_end: ending[:char_end]
1317
+ )
1318
+ end
1319
+
1320
+ # kwrest_param is a parser event that represents defining a parameter in a
1321
+ # method definition that accepts all remaining keyword parameters.
1322
+ def on_kwrest_param(ident)
1323
+ oper = find_scanner_event(:@op, '**')
1324
+ return oper.merge!(type: :kwrest_param, body: [nil]) unless ident
1325
+
1326
+ oper.merge!(
1327
+ type: :kwrest_param,
1328
+ body: [ident],
1329
+ end: ident[:end],
1330
+ char_end: ident[:char_end]
1331
+ )
1332
+ end
1333
+
1334
+ # lambda is a parser event that represents using a "stabby" lambda
1335
+ # literal. It accepts as arguments a params event that represents any
1336
+ # parameters to the lambda and a stmts event that represents the
1337
+ # statements inside the lambda.
1338
+ #
1339
+ # It can be wrapped in either {..} or do..end so we look for either of
1340
+ # those combinations to get our bounds.
1341
+ def on_lambda(params, stmts)
1342
+ beging = find_scanner_event(:@tlambda)
1343
+
1344
+ if scanner_events.any? { |event| event[:type] == :@tlambeg }
1345
+ opening = find_scanner_event(:@tlambeg)
1346
+ closing = find_scanner_event(:@rbrace)
1347
+ else
1348
+ opening = find_scanner_event(:@kw, 'do')
1349
+ closing = find_scanner_event(:@kw, 'end')
1350
+ end
1351
+
1352
+ stmts.bind(opening[:char_end], closing[:char_start])
1353
+
1354
+ {
1355
+ type: :lambda,
1356
+ body: [params, stmts],
1357
+ start: beging[:start],
1358
+ char_start: beging[:char_start],
1359
+ end: closing[:end],
1360
+ char_end: closing[:char_end]
1361
+ }
1362
+ end
1363
+
1364
+ # massign is a parser event that is a parent node of any kind of multiple
1365
+ # assignment. This includes splitting out variables on the left like:
1366
+ #
1367
+ # a, b, c = foo
1368
+ #
1369
+ # as well as splitting out variables on the right, as in:
1370
+ #
1371
+ # foo = a, b, c
1372
+ #
1373
+ # Both sides support splats, as well as variables following them. There's
1374
+ # also slightly odd behavior that you can achieve with the following:
1375
+ #
1376
+ # a, = foo
1377
+ #
1378
+ # In this case a would receive only the first value of the foo enumerable,
1379
+ # in which case we need to explicitly track the comma and add it onto the
1380
+ # child node.
1381
+ def on_massign(left, right)
1382
+ if source[left[:char_end]...right[:char_start]].strip.start_with?(',')
1383
+ left[:comma] = true
1384
+ end
1385
+
1386
+ {
1387
+ type: :massign,
1388
+ body: [left, right],
1389
+ start: left[:start],
1390
+ char_start: left[:char_start],
1391
+ end: right[:end],
1392
+ char_end: right[:char_end]
1393
+ }
1394
+ end
1395
+
1396
+ # method_add_arg is a parser event that represents a method call with
1397
+ # arguments and parentheses. It accepts as arguments the method being called
1398
+ # and the arg_paren event that contains the arguments to the method.
1399
+ def on_method_add_arg(fcall, arg_paren)
1400
+ # You can hit this if you are passing no arguments to a method that ends in
1401
+ # a question mark. Because it knows it has to be a method and not a local
1402
+ # variable. In that case we can just use the location information straight
1403
+ # from the fcall.
1404
+ if arg_paren[:type] == :args
1405
+ return fcall.merge(type: :method_add_arg, body: [fcall, arg_paren])
1406
+ end
1407
+
1408
+ {
1409
+ type: :method_add_arg,
1410
+ body: [fcall, arg_paren],
1411
+ start: fcall[:start],
1412
+ char_start: fcall[:char_start],
1413
+ end: arg_paren[:end],
1414
+ char_end: arg_paren[:char_end]
1415
+ }
1416
+ end
1417
+
1418
+ # method_add_block is a parser event that represents a method call with a
1419
+ # block argument. It accepts as arguments the method being called and the
1420
+ # block event.
1421
+ def on_method_add_block(method_add_arg, block)
1422
+ {
1423
+ type: :method_add_block,
1424
+ body: [method_add_arg, block],
1425
+ start: method_add_arg[:start],
1426
+ char_start: method_add_arg[:char_start],
1427
+ end: block[:end],
1428
+ char_end: block[:char_end]
1429
+ }
1430
+ end
1431
+
1432
+ # An mlhs_new is a parser event that represents the beginning of the left
1433
+ # side of a multiple assignment. It is followed by any number of mlhs_add
1434
+ # nodes that each represent another variable being assigned.
1435
+ def on_mlhs_new
1436
+ {
1437
+ type: :mlhs,
1438
+ body: [],
1439
+ start: lineno,
1440
+ char_start: char_pos,
1441
+ end: lineno,
1442
+ char_end: char_pos
1443
+ }
1444
+ end
1445
+
1446
+ # An mlhs_add is a parser event that represents adding another variable
1447
+ # onto a list of assignments. It accepts as arguments the parent mlhs node
1448
+ # as well as the part that is being added to the list.
1449
+ def on_mlhs_add(mlhs, part)
1450
+ if mlhs[:body].empty?
1451
+ part.merge(type: :mlhs, body: [part])
1452
+ else
1453
+ mlhs.merge!(
1454
+ body: mlhs[:body] << part,
1455
+ end: part[:end],
1456
+ char_end: part[:char_end]
1457
+ )
1458
+ end
1459
+ end
1460
+
1461
+ # An mlhs_add_post is a parser event that represents adding another set of
1462
+ # variables onto a list of assignments after a splat variable. It accepts
1463
+ # as arguments the previous mlhs_add_star node that represented the splat
1464
+ # as well another mlhs node that represents all of the variables after the
1465
+ # splat.
1466
+ def on_mlhs_add_post(mlhs_add_star, mlhs)
1467
+ mlhs_add_star.merge(
1468
+ type: :mlhs_add_post,
1469
+ body: [mlhs_add_star, mlhs],
1470
+ end: mlhs[:end],
1471
+ char_end: mlhs[:char_end]
1472
+ )
1473
+ end
1474
+
1475
+ # An mlhs_add_star is a parser event that represents a splatted variable
1476
+ # inside of a multiple assignment on the left hand side. It accepts as
1477
+ # arguments the parent mlhs node as well as the part that represents the
1478
+ # splatted variable.
1479
+ def on_mlhs_add_star(mlhs, part)
1480
+ beging = find_scanner_event(:@op, '*')
1481
+ ending = part || beging
1482
+
1483
+ {
1484
+ type: :mlhs_add_star,
1485
+ body: [mlhs, part],
1486
+ start: beging[:start],
1487
+ char_start: beging[:char_start],
1488
+ end: ending[:end],
1489
+ char_end: ending[:char_end]
1490
+ }
1491
+ end
618
1492
 
619
- # This is a scanner event that represents the beginning of the heredoc.
620
- def on_heredoc_beg(beging)
1493
+ # An mlhs_paren is a parser event that represents parentheses being used
1494
+ # to deconstruct values in a multiple assignment on the left hand side. It
1495
+ # accepts as arguments the contents of the inside of the parentheses,
1496
+ # which is another mlhs node.
1497
+ def on_mlhs_paren(contents)
1498
+ beging = find_scanner_event(:@lparen)
1499
+ ending = find_scanner_event(:@rparen)
1500
+
1501
+ if source[beging[:char_end]...ending[:char_start]].strip.end_with?(',')
1502
+ contents[:comma] = true
1503
+ end
1504
+
1505
+ {
1506
+ type: :mlhs_paren,
1507
+ body: [contents],
1508
+ start: beging[:start],
1509
+ char_start: beging[:char_start],
1510
+ end: ending[:end],
1511
+ char_end: ending[:char_end]
1512
+ }
1513
+ end
1514
+
1515
+ # module is a parser event that represents defining a module. It accepts
1516
+ # as arguments the name of the module and the bodystmt event that
1517
+ # represents the statements evaluated within the context of the module.
1518
+ def on_module(const, bodystmt)
1519
+ beging = find_scanner_event(:@kw, 'module')
1520
+ ending = find_scanner_event(:@kw, 'end')
1521
+
1522
+ bodystmt.bind(
1523
+ find_next_statement_start(const[:char_end]),
1524
+ ending[:char_start]
1525
+ )
1526
+
1527
+ {
1528
+ type: :module,
1529
+ body: [const, bodystmt],
1530
+ start: beging[:start],
1531
+ char_start: beging[:char_start],
1532
+ end: ending[:end],
1533
+ char_end: ending[:char_end]
1534
+ }
1535
+ end
1536
+
1537
+ # An mrhs_new is a parser event that represents the beginning of a list of
1538
+ # values that are being assigned within a multiple assignment node. It can
1539
+ # be followed by any number of mrhs_add nodes that we'll build up into an
1540
+ # array body.
1541
+ def on_mrhs_new
1542
+ {
1543
+ type: :mrhs,
1544
+ body: [],
1545
+ start: lineno,
1546
+ char_start: char_pos,
1547
+ end: lineno,
1548
+ char_end: char_pos
1549
+ }
1550
+ end
1551
+
1552
+ # An mrhs_add is a parser event that represents adding another value onto
1553
+ # a list on the right hand side of a multiple assignment.
1554
+ def on_mrhs_add(mrhs, part)
1555
+ if mrhs[:body].empty?
1556
+ part.merge(type: :mrhs, body: [part])
1557
+ else
1558
+ mrhs.merge!(
1559
+ body: mrhs[:body] << part,
1560
+ end: part[:end],
1561
+ char_end: part[:char_end]
1562
+ )
1563
+ end
1564
+ end
1565
+
1566
+ # An mrhs_add_star is a parser event that represents using the splat
1567
+ # operator to expand out a value on the right hand side of a multiple
1568
+ # assignment.
1569
+ def on_mrhs_add_star(mrhs, part)
1570
+ beging = find_scanner_event(:@op, '*')
1571
+ ending = part || beging
1572
+
1573
+ {
1574
+ type: :mrhs_add_star,
1575
+ body: [mrhs, part],
1576
+ start: beging[:start],
1577
+ char_start: beging[:char_start],
1578
+ end: ending[:end],
1579
+ char_end: ending[:char_end]
1580
+ }
1581
+ end
1582
+
1583
+ # An mrhs_new_from_args is a parser event that represents the shorthand
1584
+ # of a multiple assignment that allows you to assign values using just
1585
+ # commas as opposed to assigning from an array. For example, in the
1586
+ # following segment the right hand side of the assignment would trigger
1587
+ # this event:
1588
+ #
1589
+ # foo = 1, 2, 3
1590
+ #
1591
+ def on_mrhs_new_from_args(args)
1592
+ args.merge(type: :mrhs_new_from_args, body: [args])
1593
+ end
1594
+
1595
+ # next is a parser event that represents using the next keyword. It
1596
+ # accepts as an argument an args or args_add_block event that contains all
1597
+ # of the arguments being passed to the next.
1598
+ def on_next(args_add_block)
1599
+ find_scanner_event(:@kw, 'next').merge!(
1600
+ type: :next,
1601
+ body: [args_add_block],
1602
+ end: args_add_block[:end],
1603
+ char_end: args_add_block[:char_end]
1604
+ )
1605
+ end
1606
+
1607
+ # opassign is a parser event that represents assigning something to a
1608
+ # variable or constant using an operator like += or ||=. It accepts as
1609
+ # arguments the left side of the expression before the operator, the
1610
+ # operator itself, and the right side of the expression.
1611
+ def on_opassign(left, oper, right)
1612
+ left.merge(
1613
+ type: :opassign,
1614
+ body: [left, oper, right],
1615
+ end: right[:end],
1616
+ char_end: right[:char_end]
1617
+ )
1618
+ end
1619
+
1620
+ # params is a parser event that represents defining parameters on a
1621
+ # method. They have a somewhat interesting structure in that they are an
1622
+ # array of arrays where the position in the top-level array indicates the
1623
+ # type of param and the subarray is the list of parameters of that type.
1624
+ # We therefore have to flatten them down to get to the location.
1625
+ def on_params(*types)
1626
+ flattened = types.flatten(2).select { |type| type.is_a?(Hash) }
1627
+ location =
1628
+ if flattened.any?
621
1629
  {
622
- type: :heredoc,
623
- beging: beging,
624
- start: lineno,
625
- end: lineno,
626
- char_start: char_pos - beging.length + 1,
627
- char_end: char_pos
628
- }.tap { |node| heredoc_stack << node }
1630
+ start: flattened[0][:start],
1631
+ char_start: flattened[0][:char_start],
1632
+ end: flattened[-1][:end],
1633
+ char_end: flattened[-1][:char_end]
1634
+ }
1635
+ else
1636
+ { start: lineno, char_start: char_pos, end: lineno, char_end: char_pos }
629
1637
  end
630
1638
 
631
- # This is a scanner event that represents the end of the heredoc.
632
- def on_heredoc_end(ending)
633
- heredoc_stack[-1].merge!(
634
- ending: ending.chomp, end: lineno, char_end: char_pos
635
- )
636
- end
1639
+ location.merge!(type: :params, body: types)
1640
+ end
637
1641
 
638
- # This is a parser event that occurs when you're using a heredoc with a
639
- # tilde. These are considered `heredoc_dedent` nodes, whereas the hyphen
640
- # heredocs show up as string literals.
641
- def on_heredoc_dedent(string, _width)
642
- heredoc_stack[-1].merge!(string.slice(:body))
643
- end
1642
+ # A paren is a parser event that represents using parentheses pretty much
1643
+ # anywhere in a Ruby program. It accepts as arguments the contents, which
1644
+ # can be either params or statements.
1645
+ def on_paren(contents)
1646
+ ending = find_scanner_event(:@rparen)
1647
+
1648
+ find_scanner_event(:@lparen).merge!(
1649
+ type: :paren,
1650
+ body: [contents],
1651
+ end: ending[:end],
1652
+ char_end: ending[:char_end]
1653
+ )
1654
+ end
644
1655
 
645
- # String literals are either going to be a normal string or they're going
646
- # to be a heredoc with a hyphen.
647
- def on_string_literal(string)
648
- heredoc = heredoc_stack[-1]
1656
+ # The program node is the very top of the AST. Here we'll attach all of
1657
+ # the comments that we've gathered up over the course of parsing the
1658
+ # source string. We'll also attach on the __END__ content if there was
1659
+ # some found at the end of the source string.
1660
+ def on_program(stmts)
1661
+ range = {
1662
+ start: 1,
1663
+ end: lines.length,
1664
+ char_start: 0,
1665
+ char_end: source.length
1666
+ }
1667
+
1668
+ stmts[:body] << @__end__ if @__end__
1669
+ stmts.bind(0, source.length)
1670
+
1671
+ range.merge(type: :program, body: [stmts], comments: @comments)
1672
+ end
649
1673
 
650
- if heredoc && heredoc[:ending]
651
- heredoc_stack.pop.merge!(string.slice(:body))
652
- else
653
- super
654
- end
1674
+ # qsymbols_new is a parser event that represents the beginning of a symbol
1675
+ # literal array, like %i[one two three]. It can be followed by any number
1676
+ # of qsymbols_add events, which we'll append onto an array body.
1677
+ def on_qsymbols_new
1678
+ find_scanner_event(:@qsymbols_beg).merge!(type: :qsymbols, body: [])
1679
+ end
1680
+
1681
+ # qsymbols_add is a parser event that represents an element inside of a
1682
+ # symbol literal array like %i[one two three]. It accepts as arguments the
1683
+ # parent qsymbols node as well as a tstring_content scanner event
1684
+ # representing the bare words.
1685
+ def on_qsymbols_add(qsymbols, tstring_content)
1686
+ qsymbols.merge!(
1687
+ body: qsymbols[:body] << tstring_content,
1688
+ end: tstring_content[:end],
1689
+ char_end: tstring_content[:char_end]
1690
+ )
1691
+ end
1692
+
1693
+ # qwords_new is a parser event that represents the beginning of a string
1694
+ # literal array, like %w[one two three]. It can be followed by any number
1695
+ # of qwords_add events, which we'll append onto an array body.
1696
+ def on_qwords_new
1697
+ find_scanner_event(:@qwords_beg).merge!(type: :qwords, body: [])
1698
+ end
1699
+
1700
+ # qsymbols_add is a parser event that represents an element inside of a
1701
+ # symbol literal array like %i[one two three]. It accepts as arguments the
1702
+ # parent qsymbols node as well as a tstring_content scanner event
1703
+ # representing the bare words.
1704
+ def on_qwords_add(qwords, tstring_content)
1705
+ qwords.merge!(
1706
+ body: qwords[:body] << tstring_content,
1707
+ end: tstring_content[:end],
1708
+ char_end: tstring_content[:char_end]
1709
+ )
1710
+ end
1711
+
1712
+ # redo is a parser event that represents the bare redo keyword. It has no
1713
+ # body as it accepts no arguments.
1714
+ def on_redo
1715
+ find_scanner_event(:@kw, 'redo').merge!(type: :redo)
1716
+ end
1717
+
1718
+ # regexp_new is a parser event that represents the beginning of a regular
1719
+ # expression literal, like /foo/. It can be followed by any number of
1720
+ # regexp_add events, which we'll append onto an array body.
1721
+ def on_regexp_new
1722
+ find_scanner_event(:@regexp_beg).merge!(type: :regexp, body: [])
1723
+ end
1724
+
1725
+ # regexp_add is a parser event that represents a piece of a regular
1726
+ # body. It accepts as arguments the parent regexp node as well as a
1727
+ # tstring_content scanner event representing string content or a
1728
+ # string_embexpr parser event representing interpolated content.
1729
+ def on_regexp_add(regexp, piece)
1730
+ regexp.merge!(
1731
+ body: regexp[:body] << piece,
1732
+ end: regexp[:end],
1733
+ char_end: regexp[:char_end]
1734
+ )
1735
+ end
1736
+
1737
+ # regexp_literal is a parser event that represents a regular expression.
1738
+ # It accepts as arguments a regexp node which is a built-up array of
1739
+ # pieces that go into the regexp content, as well as the ending used to
1740
+ # close out the regexp which includes any modifiers.
1741
+ def on_regexp_literal(regexp, ending)
1742
+ regexp.merge!(
1743
+ type: :regexp_literal,
1744
+ ending: ending[:body],
1745
+ end: ending[:end],
1746
+ char_end: ending[:char_end]
1747
+ )
1748
+ end
1749
+
1750
+ # rescue is a special kind of node where you have a rescue chain but it
1751
+ # doesn't really have all of the information that it needs in order to
1752
+ # determine its ending. Therefore it relies on its parent bodystmt node to
1753
+ # report its ending to it.
1754
+ class Rescue < SimpleDelegator
1755
+ def bind_end(char_end)
1756
+ merge!(char_end: char_end)
1757
+
1758
+ stmts = self[:body][2]
1759
+ consequent = self[:body][3]
1760
+
1761
+ if consequent
1762
+ consequent.bind_end(char_end)
1763
+ stmts.bind_end(consequent[:char_start])
1764
+ else
1765
+ stmts.bind_end(char_end)
655
1766
  end
656
1767
  end
657
- )
1768
+ end
658
1769
 
659
- # This module contains miscellaneous fixes required to get the right
660
- # structure.
661
- prepend(
662
- Module.new do
663
- private
1770
+ # rescue is a parser event that represents the use of the rescue keyword
1771
+ # inside of a bodystmt.
1772
+ def on_rescue(exceptions, variable, stmts, consequent)
1773
+ beging = find_scanner_event(:@kw, 'rescue')
664
1774
 
665
- # These are the event types that contain _actual_ string content. If
666
- # there is an encoding magic comment at the top of the file, ripper will
667
- # actually change into that encoding for the storage of the string. This
668
- # will break everything, so we need to force the encoding back into UTF-8
669
- # so that the JSON library won't break.
670
- %w[comment ident tstring_content].each do |event|
671
- define_method(:"on_#{event}") do |body|
672
- super(body.force_encoding('UTF-8'))
673
- end
674
- end
1775
+ last_exception = exceptions.is_a?(Array) ? exceptions[-1] : exceptions
1776
+ last_node = variable || last_exception || beging
675
1777
 
676
- # Handles __END__ syntax, which allows individual scripts to keep content
677
- # after the main ruby code that can be read through DATA. It looks like:
678
- #
679
- # foo.bar
680
- #
681
- # __END__
682
- # some other content that isn't normally read by ripper
683
- def on___end__(body)
684
- @__end__ = super(lines[lineno..-1].join("\n"))
685
- end
1778
+ stmts.bind(find_next_statement_start(last_node[:char_end]), char_pos)
686
1779
 
687
- def on_program(*body)
688
- super(*body).tap { |node| node[:body][0][:body] << __end__ if __end__ }
689
- end
1780
+ Rescue.new(
1781
+ beging.merge!(
1782
+ type: :rescue,
1783
+ body: [exceptions, variable, stmts, consequent],
1784
+ end: lineno,
1785
+ char_end: char_pos
1786
+ )
1787
+ )
1788
+ end
1789
+
1790
+ # rescue_mod represents the modifier form of a rescue clause. It accepts as
1791
+ # arguments the statement that may raise an error and the value that should
1792
+ # be used if it does.
1793
+ def on_rescue_mod(statement, rescued)
1794
+ find_scanner_event(:@kw, 'rescue')
1795
+
1796
+ {
1797
+ type: :rescue_mod,
1798
+ body: [statement, rescued],
1799
+ start: statement[:start],
1800
+ char_start: statement[:char_start],
1801
+ end: rescued[:end],
1802
+ char_end: rescued[:char_end]
1803
+ }
1804
+ end
1805
+
1806
+ # rest_param is a parser event that represents defining a parameter in a
1807
+ # method definition that accepts all remaining positional parameters. It
1808
+ # accepts as an argument an optional identifier for the parameter. If it
1809
+ # is omitted, then we're just using the plain operator.
1810
+ def on_rest_param(ident)
1811
+ oper = find_scanner_event(:@op, '*')
1812
+ return oper.merge!(type: :rest_param, body: [nil]) unless ident
1813
+
1814
+ oper.merge!(
1815
+ type: :rest_param,
1816
+ body: [ident],
1817
+ end: ident[:end],
1818
+ char_end: ident[:char_end]
1819
+ )
1820
+ end
1821
+
1822
+ # retry is a parser event that represents the bare retry keyword. It has
1823
+ # no body as it accepts no arguments.
1824
+ def on_retry
1825
+ find_scanner_event(:@kw, 'retry').merge!(type: :retry)
1826
+ end
690
1827
 
691
- # Normally access controls are reported as vcall nodes. This creates a
692
- # new node type to explicitly track those nodes instead, so that the
693
- # printer can add new lines as necessary.
694
- def on_vcall(ident)
695
- @access_controls ||= %w[private protected public].freeze
1828
+ # return is a parser event that represents using the return keyword with
1829
+ # arguments. It accepts as an argument an args_add_block event that
1830
+ # contains all of the arguments being passed.
1831
+ def on_return(args_add_block)
1832
+ find_scanner_event(:@kw, 'return').merge!(
1833
+ type: :return,
1834
+ body: [args_add_block],
1835
+ end: args_add_block[:end],
1836
+ char_end: args_add_block[:char_end]
1837
+ )
1838
+ end
696
1839
 
697
- super(ident).tap do |node|
698
- if !@access_controls.include?(ident[:body]) ||
699
- ident[:body] != lines[lineno - 1].strip
700
- next
701
- end
1840
+ # return0 is a parser event that represents the bare return keyword. It
1841
+ # has no body as it accepts no arguments. This is as opposed to the return
1842
+ # parser event, which is the version where you're returning one or more
1843
+ # values.
1844
+ def on_return0
1845
+ find_scanner_event(:@kw, 'return').merge!(type: :return0)
1846
+ end
702
1847
 
703
- node.merge!(type: :access_ctrl)
704
- end
1848
+ # sclass is a parser event that represents a block of statements that
1849
+ # should be evaluated within the context of the singleton class of an
1850
+ # object. It's frequently used to define singleton methods. It looks like
1851
+ # the following example:
1852
+ #
1853
+ # class << self do foo end
1854
+ # │ │
1855
+ # │ └> bodystmt
1856
+ # └> target
1857
+ #
1858
+ def on_sclass(target, bodystmt)
1859
+ beging = find_scanner_event(:@kw, 'class')
1860
+ ending = find_scanner_event(:@kw, 'end')
1861
+
1862
+ bodystmt.bind(
1863
+ find_next_statement_start(target[:char_end]),
1864
+ ending[:char_start]
1865
+ )
1866
+
1867
+ {
1868
+ type: :sclass,
1869
+ body: [target, bodystmt],
1870
+ start: beging[:start],
1871
+ char_start: beging[:char_start],
1872
+ end: ending[:end],
1873
+ char_end: ending[:char_end]
1874
+ }
1875
+ end
1876
+
1877
+ # Everything that has a block of code inside of it has a list of statements.
1878
+ # Normally we would just track those as a node that has an array body, but we
1879
+ # have some special handling in order to handle empty statement lists. They
1880
+ # need to have the right location information, so all of the parent node of
1881
+ # stmts nodes will report back down the location information. We then
1882
+ # propagate that onto void_stmt nodes inside the stmts in order to make sure
1883
+ # all comments get printed appropriately.
1884
+ class Stmts < SimpleDelegator
1885
+ def bind(char_start, char_end)
1886
+ merge!(char_start: char_start, char_end: char_end)
1887
+
1888
+ if self[:body][0][:type] == :void_stmt
1889
+ self[:body][0].merge!(char_start: char_start, char_end: char_start)
705
1890
  end
1891
+ end
706
1892
 
707
- # When the only statement inside of a `def` node is a `begin` node, then
708
- # you can safely replace the body of the `def` with the body of the
709
- # `begin`. For example:
710
- #
711
- # def foo
712
- # begin
713
- # try_something
714
- # rescue SomeError => error
715
- # handle_error(error)
716
- # end
717
- # end
718
- #
719
- # can get transformed into:
720
- #
721
- # def foo
722
- # try_something
723
- # rescue SomeError => error
724
- # handle_error(error)
725
- # end
726
- #
727
- # This module handles this by hoisting up the `bodystmt` node from the
728
- # inner `begin` up to the `def`.
729
- def on_def(ident, params, bodystmt)
730
- def_bodystmt = bodystmt
731
- stmts, *other_parts = bodystmt[:body]
732
-
733
- if !other_parts.any? && stmts[:body].length == 1 &&
734
- stmts.dig(:body, 0, :type) == :begin
735
- def_bodystmt = stmts.dig(:body, 0, :body, 0)
736
- end
1893
+ def bind_end(char_end)
1894
+ merge!(char_end: char_end)
1895
+ end
737
1896
 
738
- super(ident, params, def_bodystmt)
1897
+ def <<(statement)
1898
+ if self[:body].any?
1899
+ merge!(statement.slice(:end, :char_end))
1900
+ else
1901
+ merge!(statement.slice(:start, :end, :char_start, :char_end))
739
1902
  end
740
1903
 
741
- # We need to track for `mlhs_paren` and `massign` nodes whether or not
742
- # there was an extra comma at the end of the expression. For some reason
743
- # it's not showing up in the AST in an obvious way. In this case we're
744
- # just simplifying everything by adding an additional field to `mlhs`
745
- # nodes called `comma` that indicates whether or not there was an extra.
746
- def on_mlhs_paren(body)
747
- super.tap do |node|
748
- next unless body[:type] == :mlhs
1904
+ self[:body] << statement
1905
+ self
1906
+ end
1907
+ end
1908
+
1909
+ # stmts_new is a parser event that represents the beginning of a list of
1910
+ # statements within any lexical block. It can be followed by any number of
1911
+ # stmts_add events, which we'll append onto an array body.
1912
+ def on_stmts_new
1913
+ Stmts.new(
1914
+ type: :stmts,
1915
+ body: [],
1916
+ start: lineno,
1917
+ end: lineno,
1918
+ char_start: char_pos,
1919
+ char_end: char_pos
1920
+ )
1921
+ end
749
1922
 
750
- ending = source.rindex(')', char_pos)
751
- buffer = source[(node[:char_start] + 1)...ending]
1923
+ # stmts_add is a parser event that represents a single statement inside a
1924
+ # list of statements within any lexical block. It accepts as arguments the
1925
+ # parent stmts node as well as an stmt which can be any expression in
1926
+ # Ruby.
1927
+ def on_stmts_add(stmts, stmt)
1928
+ stmts << stmt
1929
+ end
752
1930
 
753
- body[:comma] = buffer.strip.end_with?(',')
754
- end
1931
+ # string_concat is a parser event that represents concatenating two
1932
+ # strings together using a backward slash, as in the following example:
1933
+ #
1934
+ # 'foo' \
1935
+ # 'bar'
1936
+ #
1937
+ def on_string_concat(left, right)
1938
+ {
1939
+ type: :string_concat,
1940
+ body: [left, right],
1941
+ start: left[:start],
1942
+ char_start: left[:char_start],
1943
+ end: right[:end],
1944
+ char_end: right[:char_end]
1945
+ }
1946
+ end
1947
+
1948
+ # string_content is a parser event that represents the beginning of the
1949
+ # contents of a string, which will either be embedded inside of a
1950
+ # string_literal or a dyna_symbol node. It will have an array body so that
1951
+ # we can build up a list of @tstring_content, string_embexpr, and
1952
+ # string_dvar nodes.
1953
+ def on_string_content
1954
+ {
1955
+ type: :string,
1956
+ body: [],
1957
+ start: lineno,
1958
+ end: lineno,
1959
+ char_start: char_pos,
1960
+ char_end: char_pos
1961
+ }
1962
+ end
1963
+
1964
+ # string_add is a parser event that represents a piece of a string. It
1965
+ # could be plain @tstring_content, string_embexpr, or string_dvar nodes.
1966
+ # It accepts as arguments the parent string node as well as the additional
1967
+ # piece of the string.
1968
+ def on_string_add(string, piece)
1969
+ string.merge!(
1970
+ body: string[:body] << piece,
1971
+ end: piece[:end],
1972
+ char_end: piece[:char_end]
1973
+ )
1974
+ end
1975
+
1976
+ # string_dvar is a parser event that represents a very special kind of
1977
+ # interpolation into string. It allows you to take an instance variable,
1978
+ # class variable, or global variable and omit the braces when
1979
+ # interpolating. For example, if you wanted to interpolate the instance
1980
+ # variable @foo into a string, you could do "#@foo".
1981
+ def on_string_dvar(var_ref)
1982
+ find_scanner_event(:@embvar).merge!(
1983
+ type: :string_dvar,
1984
+ body: [var_ref],
1985
+ end: var_ref[:end],
1986
+ char_end: var_ref[:char_end]
1987
+ )
1988
+ end
1989
+
1990
+ # string_embexpr is a parser event that represents interpolated content.
1991
+ # It can go a bunch of different parent nodes, including regexp, strings,
1992
+ # xstrings, heredocs, dyna_symbols, etc. Basically it's anywhere you see
1993
+ # the #{} construct.
1994
+ def on_string_embexpr(stmts)
1995
+ beging = find_scanner_event(:@embexpr_beg)
1996
+ ending = find_scanner_event(:@embexpr_end)
1997
+
1998
+ stmts.bind(beging[:char_end], ending[:char_start])
1999
+
2000
+ {
2001
+ type: :string_embexpr,
2002
+ body: [stmts],
2003
+ start: beging[:start],
2004
+ char_start: beging[:char_start],
2005
+ end: ending[:end],
2006
+ char_end: ending[:char_end]
2007
+ }
2008
+ end
2009
+
2010
+ # String literals are either going to be a normal string or they're going
2011
+ # to be a heredoc if we've just closed a heredoc.
2012
+ def on_string_literal(string)
2013
+ heredoc = @heredocs[-1]
2014
+
2015
+ if heredoc && heredoc[:ending]
2016
+ @heredocs.pop.merge!(body: string[:body])
2017
+ else
2018
+ beging = find_scanner_event(:@tstring_beg)
2019
+ ending = find_scanner_event(:@tstring_end)
2020
+
2021
+ {
2022
+ type: :string_literal,
2023
+ body: string[:body],
2024
+ quote: beging[:body],
2025
+ start: beging[:start],
2026
+ char_start: beging[:char_start],
2027
+ end: ending[:end],
2028
+ char_end: ending[:char_end]
2029
+ }
2030
+ end
2031
+ end
2032
+
2033
+ # A super is a parser event that represents using the super keyword with
2034
+ # any number of arguments. It can optionally use parentheses (represented
2035
+ # by an arg_paren node) or just skip straight to the arguments (with an
2036
+ # args_add_block node).
2037
+ def on_super(contents)
2038
+ find_scanner_event(:@kw, 'super').merge!(
2039
+ type: :super,
2040
+ body: [contents],
2041
+ end: contents[:end],
2042
+ char_end: contents[:char_end]
2043
+ )
2044
+ end
2045
+
2046
+ # A symbol is a parser event that immediately descends from a symbol
2047
+ # literal and contains an ident representing the contents of the symbol.
2048
+ def on_symbol(ident)
2049
+ # What the heck is this here for you ask!? Turns out when Ripper is lexing
2050
+ # source text, it turns symbols into keywords if their contents match, which
2051
+ # will mess up the location information of all of our other nodes.
2052
+ #
2053
+ # So for example instead of { type: :@ident, body: "class" } you would
2054
+ # instead get { type: :@kw, body: "class" } which is all kinds of
2055
+ # problematic.
2056
+ #
2057
+ # In order to take care of this, we explicitly delete this scanner event
2058
+ # from the stack to make sure it doesn't screw things up.
2059
+ scanner_events.pop
2060
+
2061
+ ident.merge(type: :symbol, body: [ident])
2062
+ end
2063
+
2064
+ # A symbol_literal represents a symbol in the system with no interpolation
2065
+ # (as opposed to a dyna_symbol). As its only argument it accepts either a
2066
+ # symbol node (for most cases) or an ident node (in the case that we're
2067
+ # using bare words, as in an alias node like alias foo bar).
2068
+ def on_symbol_literal(contents)
2069
+ if scanner_events[-1] == contents
2070
+ contents.merge(type: :symbol_literal, body: [contents])
2071
+ else
2072
+ beging = find_scanner_event(:@symbeg)
2073
+ contents.merge!(type: :symbol_literal, char_start: beging[:char_start])
2074
+ end
2075
+ end
2076
+
2077
+ # symbols_new is a parser event that represents the beginning of a symbol
2078
+ # literal array that accepts interpolation, like %I[one #{two} three]. It
2079
+ # can be followed by any number of symbols_add events, which we'll append
2080
+ # onto an array body.
2081
+ def on_symbols_new
2082
+ find_scanner_event(:@symbols_beg).merge!(type: :symbols, body: [])
2083
+ end
2084
+
2085
+ # symbols_add is a parser event that represents an element inside of a
2086
+ # symbol literal array that accepts interpolation, like
2087
+ # %I[one #{two} three]. It accepts as arguments the parent symbols node as
2088
+ # well as a word_add parser event.
2089
+ def on_symbols_add(symbols, word_add)
2090
+ symbols.merge!(
2091
+ body: symbols[:body] << word_add,
2092
+ end: word_add[:end],
2093
+ char_end: word_add[:char_end]
2094
+ )
2095
+ end
2096
+
2097
+ # A helper function to find a :: operator for the next two nodes. We do
2098
+ # special handling instead of using find_scanner_event here because we
2099
+ # don't pop off all of the :: operators so you could end up getting the
2100
+ # wrong information if you have for instance ::X::Y::Z.
2101
+ def find_colon2_before(const)
2102
+ index =
2103
+ scanner_events.rindex do |event|
2104
+ event[:type] == :@op && event[:body] == '::' &&
2105
+ event[:char_start] < const[:char_start]
755
2106
  end
756
2107
 
757
- def on_massign(left, right)
758
- super.tap do
759
- next unless left[:type] == :mlhs
2108
+ scanner_events[index]
2109
+ end
760
2110
 
761
- range = left[:char_start]..left[:char_end]
762
- left[:comma] = source[range].strip.end_with?(',')
2111
+ # A top_const_field is a parser event that is always the child of some
2112
+ # kind of assignment. It represents when you're assigning to a constant
2113
+ # that is being referenced at the top level. For example:
2114
+ #
2115
+ # ::X = 1
2116
+ #
2117
+ def on_top_const_field(const)
2118
+ beging = find_colon2_before(const)
2119
+ const.merge(
2120
+ type: :top_const_field,
2121
+ body: [const],
2122
+ start: beging[:start],
2123
+ char_start: beging[:char_start]
2124
+ )
2125
+ end
2126
+
2127
+ # A top_const_ref is a parser event that is a very similar to
2128
+ # top_const_field except that it is not involved in an assignment. It
2129
+ # looks like the following example:
2130
+ #
2131
+ # ::X
2132
+ #
2133
+ def on_top_const_ref(const)
2134
+ beging = find_colon2_before(const)
2135
+ const.merge(
2136
+ type: :top_const_ref,
2137
+ body: [const],
2138
+ start: beging[:start],
2139
+ char_start: beging[:char_start]
2140
+ )
2141
+ end
2142
+
2143
+ # A unary node represents a unary method being called on an expression, as
2144
+ # in !, ~, or not. We have somewhat special handling of the not operator
2145
+ # since if it has parentheses they don't get reported as a paren node for
2146
+ # some reason.
2147
+ def on_unary(oper, value)
2148
+ if oper == :not
2149
+ node = find_scanner_event(:@kw, 'not')
2150
+
2151
+ paren = source[node[:char_end]...value[:char_start]].include?('(')
2152
+ ending = paren ? find_scanner_event(:@rparen) : value
2153
+
2154
+ node.merge!(
2155
+ type: :unary,
2156
+ oper: oper,
2157
+ body: [value],
2158
+ end: ending[:end],
2159
+ char_end: ending[:char_end],
2160
+ paren: paren
2161
+ )
2162
+ else
2163
+ # Special case instead of using find_scanner_event here. It turns out that
2164
+ # if you have a range that goes from a negative number to a negative
2165
+ # number then you can end up with a .. or a ... that's higher in the
2166
+ # stack. So we need to explicitly disallow those operators.
2167
+ index =
2168
+ scanner_events.rindex do |scanner_event|
2169
+ scanner_event[:type] == :@op &&
2170
+ !%w[.. ...].include?(scanner_event[:body])
763
2171
  end
2172
+
2173
+ beging = scanner_events.delete_at(index)
2174
+ beging.merge!(
2175
+ type: :unary,
2176
+ oper: oper[0],
2177
+ body: [value],
2178
+ end: value[:end],
2179
+ char_end: value[:char_end]
2180
+ )
2181
+ end
2182
+ end
2183
+
2184
+ # undef nodes represent using the keyword undef. It accepts as an argument
2185
+ # an array of symbol_literal nodes that represent each message that the
2186
+ # user is attempting to undefine. We use the keyword to get the beginning
2187
+ # location and the last symbol to get the ending.
2188
+ def on_undef(symbol_literals)
2189
+ last = symbol_literals.last
2190
+
2191
+ find_scanner_event(:@kw, 'undef').merge!(
2192
+ type: :undef,
2193
+ body: symbol_literals,
2194
+ end: last[:end],
2195
+ char_end: last[:char_end]
2196
+ )
2197
+ end
2198
+
2199
+ # unless is a parser event that represents the first clause in an unless
2200
+ # chain. It accepts as arguments the predicate of the unless, the
2201
+ # statements that are contained within the unless clause, and the optional
2202
+ # consequent clause.
2203
+ def on_unless(predicate, stmts, consequent)
2204
+ beging = find_scanner_event(:@kw, 'unless')
2205
+ ending = consequent || find_scanner_event(:@kw, 'end')
2206
+
2207
+ stmts.bind(predicate[:char_end], ending[:char_start])
2208
+
2209
+ {
2210
+ type: :unless,
2211
+ body: [predicate, stmts, consequent],
2212
+ start: beging[:start],
2213
+ char_start: beging[:char_start],
2214
+ end: ending[:end],
2215
+ char_end: ending[:char_end]
2216
+ }
2217
+ end
2218
+
2219
+ # unless_mod is a parser event that represents the modifier form of an
2220
+ # unless statement. It accepts as arguments the predicate of the unless
2221
+ # and the statement that are contained within the unless clause.
2222
+ def on_unless_mod(predicate, statement)
2223
+ find_scanner_event(:@kw, 'unless')
2224
+
2225
+ {
2226
+ type: :unless_mod,
2227
+ body: [predicate, statement],
2228
+ start: statement[:start],
2229
+ char_start: statement[:char_start],
2230
+ end: predicate[:end],
2231
+ char_end: predicate[:char_end]
2232
+ }
2233
+ end
2234
+
2235
+ # until is a parser event that represents an until loop. It accepts as
2236
+ # arguments the predicate to the until and the statements that are
2237
+ # contained within the until clause.
2238
+ def on_until(predicate, stmts)
2239
+ beging = find_scanner_event(:@kw, 'until')
2240
+ ending = find_scanner_event(:@kw, 'end')
2241
+
2242
+ stmts.bind(predicate[:char_end], ending[:char_start])
2243
+
2244
+ {
2245
+ type: :until,
2246
+ body: [predicate, stmts],
2247
+ start: beging[:start],
2248
+ char_start: beging[:char_start],
2249
+ end: ending[:end],
2250
+ char_end: ending[:char_end]
2251
+ }
2252
+ end
2253
+
2254
+ # until_mod is a parser event that represents the modifier form of an
2255
+ # until loop. It accepts as arguments the predicate to the until and the
2256
+ # statement that is contained within the until loop.
2257
+ def on_until_mod(predicate, statement)
2258
+ find_scanner_event(:@kw, 'until')
2259
+
2260
+ {
2261
+ type: :until_mod,
2262
+ body: [predicate, statement],
2263
+ start: statement[:start],
2264
+ char_start: statement[:char_start],
2265
+ end: predicate[:end],
2266
+ char_end: predicate[:char_end]
2267
+ }
2268
+ end
2269
+
2270
+ # var_alias is a parser event that represents when you're using the alias
2271
+ # keyword with global variable arguments. You can optionally use
2272
+ # parentheses with this keyword, so we either track the location
2273
+ # information based on those or the final argument to the alias method.
2274
+ def on_var_alias(left, right)
2275
+ beging = find_scanner_event(:@kw, 'alias')
2276
+
2277
+ paren = source[beging[:char_end]...left[:char_start]].include?('(')
2278
+ ending = paren ? find_scanner_event(:@rparen) : right
2279
+
2280
+ {
2281
+ type: :var_alias,
2282
+ body: [left, right],
2283
+ start: beging[:start],
2284
+ char_start: beging[:char_start],
2285
+ end: ending[:end],
2286
+ char_end: ending[:char_end]
2287
+ }
2288
+ end
2289
+
2290
+ # var_ref is a parser event that represents using either a local variable,
2291
+ # a nil literal, a true or false literal, or a numbered block variable.
2292
+ def on_var_ref(contents)
2293
+ contents.merge(type: :var_ref, body: [contents])
2294
+ end
2295
+
2296
+ # var_field is a parser event that represents a variable that is being
2297
+ # assigned a value. As such, it is always a child of an assignment type
2298
+ # node. For example, in the following example foo is a var_field:
2299
+ #
2300
+ # foo = 1
2301
+ #
2302
+ def on_var_field(ident)
2303
+ if ident
2304
+ ident.merge(type: :var_field, body: [ident])
2305
+ else
2306
+ # You can hit this pattern if you're assigning to a splat using pattern
2307
+ # matching syntax in Ruby 2.7+
2308
+ { type: :var_field, body: [] }
2309
+ end
2310
+ end
2311
+
2312
+ # vcall nodes are any plain named thing with Ruby that could be either a
2313
+ # local variable or a method call. They accept as an argument the ident
2314
+ # scanner event that contains their content.
2315
+ #
2316
+ # Access controls like private, protected, and public are reported as
2317
+ # vcall nodes since they're technically method calls. We want to be able
2318
+ # add new lines around them as necessary, so here we're going to
2319
+ # explicitly track those as a different node type.
2320
+ def on_vcall(ident)
2321
+ @controls ||= %w[private protected public].freeze
2322
+
2323
+ body = ident[:body]
2324
+ type =
2325
+ if @controls.include?(body) && body == lines[lineno - 1].strip
2326
+ :access_ctrl
2327
+ else
2328
+ :vcall
764
2329
  end
2330
+
2331
+ ident.merge(type: type, body: [ident])
2332
+ end
2333
+
2334
+ # void_stmt is a special kind of parser event that represents an empty lexical
2335
+ # block of code. It often will have comments attached to it, so it requires
2336
+ # some special handling.
2337
+ def on_void_stmt
2338
+ {
2339
+ type: :void_stmt,
2340
+ start: lineno,
2341
+ end: lineno,
2342
+ char_start: char_pos,
2343
+ char_end: char_pos
2344
+ }
2345
+ end
2346
+
2347
+ # when is a parser event that represents another clause in a case chain.
2348
+ # It accepts as arguments the predicate of the when, the statements that
2349
+ # are contained within the else if clause, and the optional consequent
2350
+ # clause.
2351
+ def on_when(predicate, stmts, consequent)
2352
+ beging = find_scanner_event(:@kw, 'when')
2353
+ ending = consequent || find_scanner_event(:@kw, 'end')
2354
+
2355
+ stmts.bind(predicate[:char_end], ending[:char_start])
2356
+
2357
+ {
2358
+ type: :when,
2359
+ body: [predicate, stmts, consequent],
2360
+ start: beging[:start],
2361
+ char_start: beging[:char_start],
2362
+ end: ending[:end],
2363
+ char_end: ending[:char_end]
2364
+ }
2365
+ end
2366
+
2367
+ # while is a parser event that represents a while loop. It accepts as
2368
+ # arguments the predicate to the while and the statements that are
2369
+ # contained within the while clause.
2370
+ def on_while(predicate, stmts)
2371
+ beging = find_scanner_event(:@kw, 'while')
2372
+ ending = find_scanner_event(:@kw, 'end')
2373
+
2374
+ stmts.bind(predicate[:char_end], ending[:char_start])
2375
+
2376
+ {
2377
+ type: :while,
2378
+ body: [predicate, stmts],
2379
+ start: beging[:start],
2380
+ char_start: beging[:char_start],
2381
+ end: ending[:end],
2382
+ char_end: ending[:char_end]
2383
+ }
2384
+ end
2385
+
2386
+ # while_mod is a parser event that represents the modifier form of an
2387
+ # while loop. It accepts as arguments the predicate to the while and the
2388
+ # statement that is contained within the while loop.
2389
+ def on_while_mod(predicate, statement)
2390
+ find_scanner_event(:@kw, 'while')
2391
+
2392
+ {
2393
+ type: :while_mod,
2394
+ body: [predicate, statement],
2395
+ start: statement[:start],
2396
+ char_start: statement[:char_start],
2397
+ end: predicate[:end],
2398
+ char_end: predicate[:char_end]
2399
+ }
2400
+ end
2401
+
2402
+ # word_new is a parser event that represents the beginning of a word
2403
+ # within a special array literal (either strings or symbols) that accepts
2404
+ # interpolation. For example, in the following array, there are three
2405
+ # word nodes:
2406
+ #
2407
+ # %W[one a#{two}a three]
2408
+ #
2409
+ # Each word inside that array is represented as its own node, which is in
2410
+ # terms of the parser a tree of word_new and word_add nodes. For our
2411
+ # purposes, we're going to report this as a word node and build up an
2412
+ # array body of our parts.
2413
+ def on_word_new
2414
+ { type: :word, body: [] }
2415
+ end
2416
+
2417
+ # word_add is a parser event that represents a piece of a word within a
2418
+ # special array literal that accepts interpolation. It accepts as
2419
+ # arguments the parent word node as well as the additional piece of the
2420
+ # word, which can be either a @tstring_content node for a plain string
2421
+ # piece or a string_embexpr for an interpolated piece.
2422
+ def on_word_add(word, piece)
2423
+ if word[:body].empty?
2424
+ # Here we're making sure we get the correct bounds by using the
2425
+ # location information from the first piece.
2426
+ piece.merge(type: :word, body: [piece])
2427
+ else
2428
+ word.merge!(
2429
+ body: word[:body] << piece,
2430
+ end: piece[:end],
2431
+ char_end: piece[:char_end]
2432
+ )
765
2433
  end
766
- )
2434
+ end
2435
+
2436
+ # words_new is a parser event that represents the beginning of a string
2437
+ # literal array that accepts interpolation, like %W[one #{two} three]. It
2438
+ # can be followed by any number of words_add events, which we'll append
2439
+ # onto an array body.
2440
+ def on_words_new
2441
+ find_scanner_event(:@words_beg).merge!(type: :words, body: [])
2442
+ end
2443
+
2444
+ # words_add is a parser event that represents an element inside of a
2445
+ # string literal array that accepts interpolation, like
2446
+ # %W[one #{two} three]. It accepts as arguments the parent words node as
2447
+ # well as a word_add parser event.
2448
+ def on_words_add(words, word_add)
2449
+ words.merge!(
2450
+ body: words[:body] << word_add,
2451
+ end: word_add[:end],
2452
+ char_end: word_add[:char_end]
2453
+ )
2454
+ end
2455
+
2456
+ # xstring_new is a parser event that represents the beginning of a string
2457
+ # of commands that gets sent out to the terminal, like `ls`. It can
2458
+ # optionally include interpolation much like a regular string, so we're
2459
+ # going to build up an array body.
2460
+ #
2461
+ # If the xstring actually starts with a heredoc declaration, then we're
2462
+ # going to let heredocs continue to do their thing and instead just use
2463
+ # its location information.
2464
+ def on_xstring_new
2465
+ heredoc = @heredocs[-1]
2466
+
2467
+ if heredoc && heredoc[:beging][3] = '`'
2468
+ heredoc.merge(type: :xstring, body: [])
2469
+ elsif RUBY_MAJOR <= 2 && RUBY_MINOR <= 5 && RUBY_PATCH < 7
2470
+ { type: :xstring, body: [] }
2471
+ else
2472
+ find_scanner_event(:@backtick).merge!(type: :xstring, body: [])
2473
+ end
2474
+ end
2475
+
2476
+ # xstring_add is a parser event that represents a piece of a string of
2477
+ # commands that gets sent out to the terminal, like `ls`. It accepts two
2478
+ # arguments, the parent xstring node as well as the piece that is being
2479
+ # added to the string. Because it supports interpolation this is either a
2480
+ # tstring_content scanner event representing bare string content or a
2481
+ # string_embexpr representing interpolated content.
2482
+ def on_xstring_add(xstring, piece)
2483
+ xstring.merge!(
2484
+ body: xstring[:body] << piece,
2485
+ end: piece[:end],
2486
+ char_end: piece[:char_end]
2487
+ )
2488
+ end
2489
+
2490
+ # xstring_literal is a parser event that represents a string of commands
2491
+ # that gets sent to the terminal, like `ls`. It accepts as its only
2492
+ # argument an xstring node that is a built up array representation of all
2493
+ # of the parts of the string (including the plain string content and the
2494
+ # interpolated content).
2495
+ #
2496
+ # They can also use heredocs to present themselves, as in the example:
2497
+ #
2498
+ # <<-`SHELL`
2499
+ # ls
2500
+ # SHELL
2501
+ #
2502
+ # In this case we need to change the node type to be a heredoc instead of
2503
+ # an xstring_literal in order to get the right formatting.
2504
+ def on_xstring_literal(xstring)
2505
+ heredoc = @heredocs[-1]
2506
+
2507
+ if heredoc && heredoc[:beging][3] = '`'
2508
+ heredoc.merge!(body: xstring[:body])
2509
+ else
2510
+ ending = find_scanner_event(:@tstring_end)
2511
+ xstring.merge!(
2512
+ type: :xstring_literal,
2513
+ end: ending[:end],
2514
+ char_end: ending[:char_end]
2515
+ )
2516
+ end
2517
+ end
2518
+
2519
+ # yield is a parser event that represents using the yield keyword with
2520
+ # arguments. It accepts as an argument an args_add_block event that
2521
+ # contains all of the arguments being passed.
2522
+ def on_yield(args_add_block)
2523
+ find_scanner_event(:@kw, 'yield').merge!(
2524
+ type: :yield,
2525
+ body: [args_add_block],
2526
+ end: args_add_block[:end],
2527
+ char_end: args_add_block[:char_end]
2528
+ )
2529
+ end
2530
+
2531
+ # yield0 is a parser event that represents the bare yield keyword. It has
2532
+ # no body as it accepts no arguments. This is as opposed to the yield
2533
+ # parser event, which is the version where you're yielding one or more
2534
+ # values.
2535
+ def on_yield0
2536
+ find_scanner_event(:@kw, 'yield').merge!(type: :yield0)
2537
+ end
2538
+
2539
+ # zsuper is a parser event that represents the bare super keyword. It has
2540
+ # no body as it accepts no arguments. This is as opposed to the super
2541
+ # parser event, which is the version where you're calling super with one
2542
+ # or more values.
2543
+ def on_zsuper
2544
+ find_scanner_event(:@kw, 'super').merge!(type: :zsuper)
2545
+ end
767
2546
  end
768
2547
 
769
2548
  # If this is the main file we're executing, then most likely this is being
770
- # executed from the parse.js spawn. In that case, read the ruby source from
2549
+ # executed from the parser.js spawn. In that case, read the ruby source from
771
2550
  # stdin and report back the AST over stdout.
772
2551
 
773
2552
  if $0 == __FILE__