prettier 0.21.0 → 0.22.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,5 @@
1
1
  const {
2
+ breakParent,
2
3
  concat,
3
4
  dedent,
4
5
  group,
@@ -40,7 +41,6 @@ module.exports = {
40
41
 
41
42
  return group(concat(parts));
42
43
  },
43
- embdoc: (path, _opts, _print) => concat([trim, path.getValue().body]),
44
44
  paren: (path, opts, print) => {
45
45
  if (!path.getValue().body[0]) {
46
46
  return "()";
@@ -68,6 +68,28 @@ module.exports = {
68
68
  concat([join(hardline, path.map(print, "body")), hardline]),
69
69
  stmts: (path, opts, print) => {
70
70
  const stmts = path.getValue().body;
71
+
72
+ // This is a special case where we have only comments inside a statement
73
+ // list. In this case we want to avoid doing any kind of line number
74
+ // tracking and just print out the comments.
75
+ if (
76
+ stmts.length === 1 &&
77
+ stmts[0].type === "void_stmt" &&
78
+ stmts[0].comments
79
+ ) {
80
+ const comments = path.map(
81
+ (commentPath, index) => {
82
+ stmts[0].comments[index].printed = true;
83
+ return opts.printer.printComment(commentPath);
84
+ },
85
+ "body",
86
+ 0,
87
+ "comments"
88
+ );
89
+
90
+ return concat([breakParent, join(hardline, comments)]);
91
+ }
92
+
71
93
  const parts = [];
72
94
  let lineNo = null;
73
95
 
@@ -8,30 +8,30 @@ const {
8
8
  join
9
9
  } = require("../prettier");
10
10
 
11
- const { concatBody, empty, makeList, prefix, surround } = require("../utils");
12
-
13
11
  // If there is some part of this string that matches an escape sequence or that
14
12
  // contains the interpolation pattern ("#{"), then we are locked into whichever
15
13
  // quote the user chose. (If they chose single quotes, then double quoting
16
14
  // would activate the escape sequence, and if they chose double quotes, then
17
15
  // single quotes would deactivate it.)
18
- const isQuoteLocked = (string) =>
19
- string.body.some(
16
+ function isQuoteLocked(node) {
17
+ return node.body.some(
20
18
  (part) =>
21
19
  part.type === "@tstring_content" &&
22
20
  (part.body.includes("#{") || part.body.includes("\\"))
23
21
  );
22
+ }
24
23
 
25
24
  // A string is considered to be able to use single quotes if it contains only
26
25
  // plain string content and that content does not contain a single quote.
27
- const isSingleQuotable = (string) =>
28
- string.body.every(
26
+ function isSingleQuotable(node) {
27
+ return node.body.every(
29
28
  (part) => part.type === "@tstring_content" && !part.body.includes("'")
30
29
  );
30
+ }
31
31
 
32
32
  const quotePattern = new RegExp("\\\\([\\s\\S])|(['\"])", "g");
33
33
 
34
- const normalizeQuotes = (content, enclosingQuote, originalQuote) => {
34
+ function normalizeQuotes(content, enclosingQuote, originalQuote) {
35
35
  const replaceOther = ["'", '"'].includes(originalQuote);
36
36
  const otherQuote = enclosingQuote === '"' ? "'" : '"';
37
37
 
@@ -52,7 +52,7 @@ const normalizeQuotes = (content, enclosingQuote, originalQuote) => {
52
52
 
53
53
  return `\\${escaped}`;
54
54
  });
55
- };
55
+ }
56
56
 
57
57
  const quotePairs = {
58
58
  "(": ")",
@@ -61,7 +61,7 @@ const quotePairs = {
61
61
  "<": ">"
62
62
  };
63
63
 
64
- const getClosingQuote = (quote) => {
64
+ function getClosingQuote(quote) {
65
65
  if (!quote.startsWith("%")) {
66
66
  return quote;
67
67
  }
@@ -72,40 +72,89 @@ const getClosingQuote = (quote) => {
72
72
  }
73
73
 
74
74
  return boundary;
75
- };
75
+ }
76
76
 
77
- module.exports = {
78
- "@CHAR": (path, { preferSingleQuotes }, _print) => {
79
- const { body } = path.getValue();
77
+ // Prints a @CHAR node. @CHAR nodes are special character strings that usually
78
+ // are strings of length 1. If they're any longer than we'll try to apply the
79
+ // correct quotes.
80
+ function printChar(path, { preferSingleQuotes }, _print) {
81
+ const { body } = path.getValue();
80
82
 
81
- if (body.length !== 2) {
82
- return body;
83
+ if (body.length !== 2) {
84
+ return body;
85
+ }
86
+
87
+ const quote = preferSingleQuotes ? "'" : '"';
88
+ return concat([quote, body.slice(1), quote]);
89
+ }
90
+
91
+ // Prints a dynamic symbol. Assumes there's a quote property attached to the
92
+ // node that will tell us which quote to use when printing. We're just going to
93
+ // use whatever quote was provided.
94
+ function printDynaSymbol(path, opts, print) {
95
+ const { quote } = path.getValue();
96
+
97
+ return concat([":", quote].concat(path.map(print, "body")).concat(quote));
98
+ }
99
+
100
+ // Prints out an interpolated variable in the string by converting it into an
101
+ // embedded expression.
102
+ function printStringDVar(path, opts, print) {
103
+ return concat(["#{", path.call(print, "body", 0), "}"]);
104
+ }
105
+
106
+ // Prints out a literal string. This function does its best to respect the
107
+ // wishes of the user with regards to single versus double quotes, but if the
108
+ // string contains any escape expressions then it will just keep the original
109
+ // quotes.
110
+ function printStringLiteral(path, { preferSingleQuotes }, print) {
111
+ const node = path.getValue();
112
+
113
+ // If the string is empty, it will not have any parts, so just print out the
114
+ // quotes corresponding to the config
115
+ if (node.body.length === 0) {
116
+ return preferSingleQuotes ? "''" : '""';
117
+ }
118
+
119
+ // Determine the quote that should enclose the new string
120
+ let quote;
121
+ if (isQuoteLocked(node)) {
122
+ quote = node.quote;
123
+ } else {
124
+ quote = preferSingleQuotes && isSingleQuotable(node) ? "'" : '"';
125
+ }
126
+
127
+ const parts = node.body.map((part, index) => {
128
+ if (part.type !== "@tstring_content") {
129
+ // In this case, the part of the string is an embedded expression
130
+ return path.call(print, "body", index);
83
131
  }
84
132
 
85
- const quote = preferSingleQuotes ? "'" : '"';
86
- return body.length === 2 ? concat([quote, body.slice(1), quote]) : body;
87
- },
88
- dyna_symbol: (path, opts, print) => {
89
- const { quote } = path.getValue();
133
+ // In this case, the part of the string is just regular string content
134
+ return join(
135
+ literalline,
136
+ normalizeQuotes(part.body, quote, node.quote).split("\n")
137
+ );
138
+ });
90
139
 
91
- return concat([":", quote, concat(path.call(print, "body", 0)), quote]);
92
- },
93
- heredoc: (path, opts, print) => {
94
- const { beging, body, ending } = path.getValue();
140
+ return concat([quote].concat(parts).concat(getClosingQuote(quote)));
141
+ }
95
142
 
96
- const parts = body.map((part, index) => {
97
- if (part.type !== "@tstring_content") {
98
- // In this case, the part of the string is an embedded expression
99
- return path.call(print, "body", index);
100
- }
143
+ // Prints out a symbol literal. Its child will always be the ident that
144
+ // represents the string content of the symbol.
145
+ function printSymbolLiteral(path, opts, print) {
146
+ return concat([":", path.call(print, "body", 0)]);
147
+ }
101
148
 
102
- // In this case, the part of the string is just regular string content
103
- return join(literalline, part.body.split("\n"));
104
- });
149
+ // Prints out an xstring literal. Its child is an array of string parts,
150
+ // including plain string content and interpolated content.
151
+ function printXStringLiteral(path, opts, print) {
152
+ return concat(["`"].concat(path.map(print, "body")).concat("`"));
153
+ }
105
154
 
106
- return concat([beging, literalline, concat(parts), ending]);
107
- },
108
- string: makeList,
155
+ module.exports = {
156
+ "@CHAR": printChar,
157
+ dyna_symbol: printDynaSymbol,
109
158
  string_concat: (path, opts, print) =>
110
159
  group(
111
160
  concat([
@@ -114,14 +163,14 @@ module.exports = {
114
163
  indent(concat([hardline, path.call(print, "body", 1)]))
115
164
  ])
116
165
  ),
117
- string_dvar: surround("#{", "}"),
166
+ string_dvar: printStringDVar,
118
167
  string_embexpr: (path, opts, print) => {
119
168
  const parts = path.call(print, "body", 0);
120
169
 
121
170
  // If the interpolated expression is inside of an xstring literal (a string
122
171
  // that gets sent to the command line) then we don't want to automatically
123
172
  // indent, as this can lead to some very odd looking expressions
124
- if (path.getParentNode().type === "xstring") {
173
+ if (path.getParentNode().type === "xstring_literal") {
125
174
  return concat(["#{", parts, "}"]);
126
175
  }
127
176
 
@@ -129,47 +178,7 @@ module.exports = {
129
178
  concat(["#{", indent(concat([softline, parts])), concat([softline, "}"])])
130
179
  );
131
180
  },
132
- string_literal: (path, { preferSingleQuotes }, print) => {
133
- const stringLiteral = path.getValue();
134
- const string = stringLiteral.body[0];
135
-
136
- // If the string is empty, it will not have any parts, so just print out the
137
- // quotes corresponding to the config
138
- if (string.body.length === 0) {
139
- return preferSingleQuotes ? "''" : '""';
140
- }
141
-
142
- // Determine the quote that should enclose the new string
143
- let quote;
144
- if (isQuoteLocked(string)) {
145
- ({ quote } = stringLiteral);
146
- } else {
147
- quote = preferSingleQuotes && isSingleQuotable(string) ? "'" : '"';
148
- }
149
-
150
- const parts = string.body.map((part, index) => {
151
- if (part.type !== "@tstring_content") {
152
- // In this case, the part of the string is an embedded expression
153
- return path.call(print, "body", 0, "body", index);
154
- }
155
-
156
- // In this case, the part of the string is just regular string content
157
- return join(
158
- literalline,
159
- normalizeQuotes(part.body, quote, stringLiteral.quote).split("\n")
160
- );
161
- });
162
-
163
- return concat([quote].concat(parts).concat(getClosingQuote(quote)));
164
- },
165
- symbol: prefix(":"),
166
- symbol_literal: concatBody,
167
- word_add: concatBody,
168
- word_new: empty,
169
- xstring: makeList,
170
- xstring_literal: (path, opts, print) => {
171
- const parts = path.call(print, "body", 0);
172
-
173
- return concat(["`"].concat(parts).concat("`"));
174
- }
181
+ string_literal: printStringLiteral,
182
+ symbol_literal: printSymbolLiteral,
183
+ xstring_literal: printXStringLiteral
175
184
  };
@@ -0,0 +1,35 @@
1
+ const { align, concat, group, join, line } = require("../prettier");
2
+ const { literal } = require("../utils");
3
+
4
+ function printSuper(path, opts, print) {
5
+ const args = path.getValue().body[0];
6
+
7
+ if (args.type === "arg_paren") {
8
+ // In case there are explicitly no arguments but they are using parens,
9
+ // we assume they are attempting to override the initializer and pass no
10
+ // arguments up.
11
+ if (args.body[0] === null) {
12
+ return "super()";
13
+ }
14
+
15
+ return concat(["super", path.call(print, "body", 0)]);
16
+ }
17
+
18
+ const keyword = "super ";
19
+ const argsDocs = path.call(print, "body", 0);
20
+
21
+ return group(
22
+ concat([
23
+ keyword,
24
+ align(keyword.length, group(join(concat([",", line]), argsDocs)))
25
+ ])
26
+ );
27
+ }
28
+
29
+ // Version of super without any parens or args.
30
+ const printZSuper = literal("super");
31
+
32
+ module.exports = {
33
+ super: printSuper,
34
+ zsuper: printZSuper
35
+ };
@@ -0,0 +1,42 @@
1
+ const {
2
+ addTrailingComment,
3
+ align,
4
+ concat,
5
+ group,
6
+ join,
7
+ line
8
+ } = require("../prettier");
9
+
10
+ function printUndefSymbol(path, opts, print) {
11
+ const node = path.getValue();
12
+
13
+ // Since we're going to descend into the symbol literal to grab out the ident
14
+ // node, then we need to make sure we copy over any comments as well,
15
+ // otherwise we could accidentally skip printing them.
16
+ if (node.comments) {
17
+ node.comments.forEach((comment) => {
18
+ addTrailingComment(node.body[0], comment);
19
+ });
20
+ }
21
+
22
+ return path.call(print, "body", 0);
23
+ }
24
+
25
+ function printUndef(path, opts, print) {
26
+ const keyword = "undef ";
27
+ const argNodes = path.map(
28
+ (symbolPath) => printUndefSymbol(symbolPath, opts, print),
29
+ "body"
30
+ );
31
+
32
+ return group(
33
+ concat([
34
+ keyword,
35
+ align(keyword.length, join(concat([",", line]), argNodes))
36
+ ])
37
+ );
38
+ }
39
+
40
+ module.exports = {
41
+ undef: printUndef
42
+ };
@@ -0,0 +1,71 @@
1
+ const { spawnSync } = require("child_process");
2
+ const path = require("path");
3
+
4
+ // In order to properly parse ruby code, we need to tell the ruby process to
5
+ // parse using UTF-8. Unfortunately, the way that you accomplish this looks
6
+ // differently depending on your platform. This object below represents all of
7
+ // the possible values of process.platform per:
8
+ // https://nodejs.org/api/process.html#process_process_platform
9
+ const LANG = {
10
+ aix: "C.UTF-8",
11
+ darwin: "en_US.UTF-8",
12
+ freebsd: "C.UTF-8",
13
+ linux: "C.UTF-8",
14
+ openbsd: "C.UTF-8",
15
+ sunos: "C.UTF-8",
16
+ win32: ".UTF-8"
17
+ }[process.platform];
18
+
19
+ // This function is responsible for taking an input string of text and returning
20
+ // to prettier a JavaScript object that is the equivalent AST that represents
21
+ // the code stored in that string. We accomplish this by spawning a new Ruby
22
+ // process of parser.rb and reading JSON off STDOUT.
23
+ function parse(text, _parsers, _opts) {
24
+ const child = spawnSync(
25
+ "ruby",
26
+ ["--disable-gems", path.join(__dirname, "./parser.rb")],
27
+ {
28
+ env: Object.assign({}, process.env, { LANG }),
29
+ input: text,
30
+ maxBuffer: 10 * 1024 * 1024 // 10MB
31
+ }
32
+ );
33
+
34
+ const error = child.stderr.toString();
35
+ if (error) {
36
+ throw new Error(error);
37
+ }
38
+
39
+ const response = child.stdout.toString();
40
+ return JSON.parse(response);
41
+ }
42
+
43
+ const pragmaPattern = /#\s*@(prettier|format)/;
44
+
45
+ // This function handles checking whether or not the source string has the
46
+ // pragma for prettier. This is an optional workflow for incremental adoption.
47
+ function hasPragma(text) {
48
+ return pragmaPattern.test(text);
49
+ }
50
+
51
+ // This function is critical for comments and cursor support, and is responsible
52
+ // for returning the index of the character within the source string that is the
53
+ // beginning of the given node.
54
+ function locStart(node) {
55
+ return node.char_start;
56
+ }
57
+
58
+ // This function is critical for comments and cursor support, and is responsible
59
+ // for returning the index of the character within the source string that is the
60
+ // ending of the given node.
61
+ function locEnd(node) {
62
+ return node.char_end;
63
+ }
64
+
65
+ module.exports = {
66
+ parse,
67
+ astFormat: "ruby",
68
+ hasPragma,
69
+ locStart,
70
+ locEnd
71
+ };
@@ -2,9 +2,9 @@
2
2
 
3
3
  # We implement our own version checking here instead of using Gem::Version so
4
4
  # that we can use the --disable-gems flag.
5
- major, minor, * = RUBY_VERSION.split('.').map(&:to_i)
5
+ RUBY_MAJOR, RUBY_MINOR, * = RUBY_VERSION.split('.').map(&:to_i)
6
6
 
7
- if (major < 2) || ((major == 2) && (minor < 5))
7
+ if (RUBY_MAJOR < 2) || ((RUBY_MAJOR == 2) && (RUBY_MINOR < 5))
8
8
  warn(
9
9
  "Ruby version #{RUBY_VERSION} not supported. " \
10
10
  'Please upgrade to 2.5.0 or above.'
@@ -13,761 +13,2405 @@ if (major < 2) || ((major == 2) && (minor < 5))
13
13
  exit 1
14
14
  end
15
15
 
16
+ require 'delegate'
16
17
  require 'json' unless defined?(JSON)
17
18
  require 'ripper'
18
19
 
19
20
  module Prettier; end
20
21
 
21
22
  class Prettier::Parser < Ripper
22
- attr_reader :source, :lines, :__end__
23
+ attr_reader :source, :lines, :scanner_events, :line_counts
23
24
 
24
25
  def initialize(source, *args)
25
26
  super(source, *args)
26
27
 
27
28
  @source = source
28
29
  @lines = source.split("\n")
30
+
31
+ @comments = []
32
+ @embdoc = nil
29
33
  @__end__ = nil
34
+
35
+ @heredocs = []
36
+
37
+ @scanner_events = []
38
+ @line_counts = [0]
39
+
40
+ @source.lines.each { |line| @line_counts << @line_counts.last + line.size }
30
41
  end
31
42
 
32
43
  private
33
44
 
45
+ # This represents the current place in the source string that we've gotten to
46
+ # so far. We have a memoized line_counts object that we can use to get the
47
+ # number of characters that we've had to go through to get to the beginning of
48
+ # this line, then we add the number of columns into this line that we've gone
49
+ # through.
50
+ def char_pos
51
+ line_counts[lineno - 1] + column
52
+ end
53
+
54
+ # As we build up a list of scanner events, we'll periodically need to go
55
+ # backwards and find the ones that we've already hit in order to determine the
56
+ # location information for nodes that use them. For example, if you have a
57
+ # module node then you'll look backward for a @module scanner event to
58
+ # determine your start location.
59
+ #
60
+ # This works with nesting since we're deleting scanner events from the list
61
+ # once they've been used up. For example if you had nested module declarations
62
+ # then the innermost declaration would grab the last @module event (which
63
+ # would happen to be the innermost keyword). Then the outer one would only be
64
+ # able to grab the first one. In this way all of the scanner events act as
65
+ # their own stack.
66
+ def find_scanner_event(type, body = :any)
67
+ index =
68
+ scanner_events.rindex do |scanner_event|
69
+ scanner_event[:type] == type &&
70
+ (body == :any || (scanner_event[:body] == body))
71
+ end
72
+
73
+ scanner_events.delete_at(index)
74
+ end
75
+
34
76
  # Scanner events occur when the lexer hits a new token, like a keyword or an
35
77
  # end. These nodes always contain just one argument which is a string
36
78
  # representing the content. For the most part these can just be printed
37
79
  # directly, which very few exceptions.
38
- SCANNER_EVENTS.each do |event|
39
- define_method(:"on_#{event}") do |body|
40
- { type: :"@#{event}", body: body, start: lineno, end: lineno }
80
+ defined = %i[
81
+ comment
82
+ embdoc
83
+ embdoc_beg
84
+ embdoc_end
85
+ heredoc_beg
86
+ heredoc_end
87
+ ignored_nl
88
+ ]
89
+
90
+ (SCANNER_EVENTS - defined).each do |event|
91
+ define_method(:"on_#{event}") do |value|
92
+ char_end = char_pos + value.size
93
+ node = {
94
+ type: :"@#{event}",
95
+ body: value,
96
+ start: lineno,
97
+ end: lineno,
98
+ char_start: char_pos,
99
+ char_end: char_end
100
+ }
101
+
102
+ scanner_events << node
103
+ node
41
104
  end
42
105
  end
43
106
 
44
- # Parser events represent nodes in the ripper abstract syntax tree. The event
45
- # is reported after the children of the node have already been built.
46
- PARSER_EVENTS.each do |event|
47
- define_method(:"on_#{event}") do |*body|
48
- min = body.map { |part| part.is_a?(Hash) ? part[:start] : lineno }.min
49
- { type: event, body: body, start: min || lineno, end: lineno }
50
- end
107
+ # We keep track of each comment as it comes in and then eventually add
108
+ # them to the top of the generated AST so that prettier can start adding
109
+ # them back into the final representation. Comments come in including
110
+ # their starting pound sign and the newline at the end, so we also chop
111
+ # those off.
112
+ #
113
+ # If there is an encoding magic comment at the top of the file, ripper
114
+ # will actually change into that encoding for the storage of the string.
115
+ # This will break everything, so we need to force the encoding back into
116
+ # UTF-8 so that the JSON library won't break.
117
+ def on_comment(value)
118
+ @comments <<
119
+ {
120
+ type: :@comment,
121
+ value: value[1..-1].chomp.force_encoding('UTF-8'),
122
+ start: lineno,
123
+ end: lineno,
124
+ char_start: char_pos,
125
+ char_end: char_pos + value.length - 1
126
+ }
127
+ end
128
+
129
+ # ignored_nl is a special kind of scanner event that passes nil as the value,
130
+ # so we can't do our normal tracking of value.size. Instead of adding a
131
+ # condition to the main SCANNER_EVENTS loop above, we'll just explicitly
132
+ # define the method here. You can trigger the ignored_nl event with the
133
+ # following snippet:
134
+ #
135
+ # foo.bar
136
+ # .baz
137
+ #
138
+ def on_ignored_nl(value)
139
+ {
140
+ type: :ignored_nl,
141
+ body: nil,
142
+ start: lineno,
143
+ end: lineno,
144
+ char_start: char_pos,
145
+ char_end: char_pos
146
+ }
51
147
  end
52
148
 
53
- # Some nodes are lists that come back from the parser. They always start with
54
- # a `*_new` node (or in the case of string, `*_content`) and each additional
55
- # node in the list is a `*_add` node. This module takes those nodes and turns
56
- # them into one node with an array body.
57
- #
58
- # For example, the statement `[a, b, c]` would be parsed as:
59
- #
60
- # [:args_add,
61
- # [:args_add,
62
- # [:args_add,
63
- # [:args_new],
64
- # [:vcall, [:@ident, "a", [1, 1]]]
65
- # ],
66
- # [:vcall, [:@ident, "b", [1, 4]]]
67
- # ],
68
- # [:vcall, [:@ident, "c", [1, 7]]]
69
- # ]
70
- #
71
- # But after this module is applied that is instead parsed as:
72
- #
73
- # [:args,
74
- # [
75
- # [:vcall, [:@ident, "a", [1, 1]]],
76
- # [:vcall, [:@ident, "b", [1, 4]]],
77
- # [:vcall, [:@ident, "c", [1, 7]]]
78
- # ]
79
- # ]
80
- #
81
- # This makes it a lot easier to join things with commas, and ends up resulting
82
- # in a much flatter `prettier` tree once it has been converted. Note that
83
- # because of this module some extra node types are added (the aggregate of
84
- # the previous `*_add` nodes) and some nodes now have arrays in places where
85
- # they previously had single nodes.
86
149
  prepend(
87
150
  Module.new do
88
- events = %i[
89
- args
90
- mlhs
91
- mrhs
92
- qsymbols
93
- qwords
94
- regexp
95
- stmts
96
- string
97
- symbols
98
- words
99
- xstring
100
- ]
101
-
102
151
  private
103
152
 
104
- events.each do |event|
105
- suffix = event == :string ? 'content' : 'new'
153
+ # Handles __END__ syntax, which allows individual scripts to keep content
154
+ # after the main ruby code that can be read through DATA. It looks like:
155
+ #
156
+ # foo.bar
157
+ #
158
+ # __END__
159
+ # some other content that isn't normally read by ripper
160
+ def on___end__(*)
161
+ @__end__ = super(lines[lineno..-1].join("\n"))
162
+ end
106
163
 
107
- define_method(:"on_#{event}_#{suffix}") do
108
- { type: event, body: [], start: lineno, end: lineno }
109
- end
164
+ # Like comments, we need to force the encoding here so JSON doesn't break.
165
+ def on_ident(value)
166
+ super(value.force_encoding('UTF-8'))
167
+ end
110
168
 
111
- define_method(:"on_#{event}_add") do |parts, part|
112
- parts.tap do |node|
113
- node[:body] << part
114
- node[:end] = lineno
115
- end
116
- end
169
+ # Like comments, we need to force the encoding here so JSON doesn't break.
170
+ def on_tstring_content(value)
171
+ super(value.force_encoding('UTF-8'))
117
172
  end
118
173
  end
119
174
  )
120
175
 
121
- # For each node, we need to attach where it came from in order to be able to
122
- # support placing the cursor correctly before and after formatting.
176
+ # A BEGIN node is a parser event that represents the use of the BEGIN
177
+ # keyword, which hooks into the lifecycle of the interpreter. It's a bit
178
+ # of a legacy from the stream operating days, and gets its inspiration
179
+ # from tools like awk. Whatever is inside the "block" will get executed
180
+ # when the program starts. The syntax looks like the following:
123
181
  #
124
- # For most nodes, it's enough to look at the child nodes to determine the
125
- # start of the parent node. However, for some nodes it's necessary to keep
126
- # track of the keywords as they come in from the lexer and to modify the start
127
- # node once we have it.
128
- prepend(
129
- Module.new do
130
- def initialize(source, *args)
131
- super(source, *args)
182
+ # BEGIN {
183
+ # # execute stuff here
184
+ # }
185
+ #
186
+ def on_BEGIN(stmts)
187
+ beging = find_scanner_event(:@lbrace)
188
+ ending = find_scanner_event(:@rbrace)
132
189
 
133
- @scanner_events = []
134
- @line_counts = [0]
190
+ stmts.bind(beging[:char_end], ending[:char_start])
135
191
 
136
- source.lines.each { |line| line_counts << line_counts.last + line.size }
137
- end
192
+ find_scanner_event(:@kw, 'BEGIN').merge!(
193
+ type: :BEGIN,
194
+ body: [stmts],
195
+ end: ending[:end],
196
+ char_end: ending[:char_end]
197
+ )
198
+ end
138
199
 
139
- def self.prepended(base)
140
- base.attr_reader :scanner_events, :line_counts
141
- end
200
+ # A END node is a parser event that represents the use of the END keyword,
201
+ # which hooks into the lifecycle of the interpreter. It's a bit of a
202
+ # legacy from the stream operating days, and gets its inspiration from
203
+ # tools like awk. Whatever is inside the "block" will get executed when
204
+ # the program ends. The syntax looks like the following:
205
+ #
206
+ # END {
207
+ # # execute stuff here
208
+ # }
209
+ #
210
+ def on_END(stmts)
211
+ beging = find_scanner_event(:@lbrace)
212
+ ending = find_scanner_event(:@rbrace)
142
213
 
143
- private
214
+ stmts.bind(beging[:char_end], ending[:char_start])
144
215
 
145
- def char_pos
146
- line_counts[lineno - 1] + column
147
- end
216
+ find_scanner_event(:@kw, 'END').merge!(
217
+ type: :END, body: [stmts], end: ending[:end], char_end: ending[:char_end]
218
+ )
219
+ end
148
220
 
149
- def char_start_for(body)
150
- children = body.length == 1 && body[0].is_a?(Array) ? body[0] : body
151
- char_starts =
152
- children.map { |part| part[:char_start] if part.is_a?(Hash) }.compact
221
+ # alias is a parser event that represents when you're using the alias
222
+ # keyword with regular arguments. This can be either symbol literals or
223
+ # bare words. You can optionally use parentheses with this keyword, so we
224
+ # either track the location information based on those or the final
225
+ # argument to the alias method.
226
+ def on_alias(left, right)
227
+ beging = find_scanner_event(:@kw, 'alias')
228
+
229
+ paren = source[beging[:char_end]...left[:char_start]].include?('(')
230
+ ending = paren ? find_scanner_event(:@rparen) : right
231
+
232
+ {
233
+ type: :alias,
234
+ body: [left, right],
235
+ start: beging[:start],
236
+ char_start: beging[:char_start],
237
+ end: ending[:end],
238
+ char_end: ending[:char_end]
239
+ }
240
+ end
153
241
 
154
- char_starts.min || char_pos
155
- end
242
+ # aref nodes are when you're pulling a value out of a collection at a
243
+ # specific index. Put another way, it's any time you're calling the method
244
+ # #[]. As an example:
245
+ #
246
+ # foo[index]
247
+ #
248
+ # The nodes usually contains two children, the collection and the index.
249
+ # In some cases, you don't necessarily have the second child node, because
250
+ # you can call procs with a pretty esoteric syntax. In the following
251
+ # example, you wouldn't have a second child, and "foo" would be the first
252
+ # child:
253
+ #
254
+ # foo[]
255
+ #
256
+ def on_aref(collection, index)
257
+ find_scanner_event(:@lbracket)
258
+ ending = find_scanner_event(:@rbracket)
259
+
260
+ {
261
+ type: :aref,
262
+ body: [collection, index],
263
+ start: collection[:start],
264
+ char_start: collection[:char_start],
265
+ end: ending[:end],
266
+ char_end: ending[:char_end]
267
+ }
268
+ end
156
269
 
157
- def find_scanner_event(type, body = :any)
158
- index =
159
- scanner_events.rindex do |scanner_event|
160
- scanner_event[:type] == type &&
161
- (body == :any || (scanner_event[:body] == body))
162
- end
270
+ # aref_field is a parser event that is very similar to aref except that it
271
+ # is being used inside of an assignment.
272
+ def on_aref_field(collection, index)
273
+ find_scanner_event(:@lbracket)
274
+ ending = find_scanner_event(:@rbracket)
275
+
276
+ {
277
+ type: :aref_field,
278
+ body: [collection, index],
279
+ start: collection[:start],
280
+ char_start: collection[:char_start],
281
+ end: ending[:end],
282
+ char_end: ending[:char_end]
283
+ }
284
+ end
163
285
 
164
- scanner_events.delete_at(index)
165
- end
286
+ # args_new is a parser event that represents the beginning of a list of
287
+ # arguments to any method call or an array. It can be followed by any
288
+ # number of args_add events, which we'll append onto an array body.
289
+ def on_args_new
290
+ {
291
+ type: :args,
292
+ body: [],
293
+ start: lineno,
294
+ char_start: char_pos,
295
+ end: lineno,
296
+ char_end: char_pos
297
+ }
298
+ end
299
+
300
+ # args_add is a parser event that represents a single argument inside a
301
+ # list of arguments to any method call or an array. It accepts as
302
+ # arguments the parent args node as well as an arg which can be anything
303
+ # that could be passed as an argument.
304
+ def on_args_add(args, arg)
305
+ if args[:body].empty?
306
+ arg.merge(type: :args, body: [arg])
307
+ else
308
+ args.merge!(
309
+ body: args[:body] << arg, end: arg[:end], char_end: arg[:char_end]
310
+ )
311
+ end
312
+ end
313
+
314
+ # args_add_block is a parser event that represents a list of arguments and
315
+ # potentially a block argument. If no block is passed, then the second
316
+ # argument will be false.
317
+ def on_args_add_block(args, block)
318
+ ending = block || args
319
+
320
+ args.merge(
321
+ type: :args_add_block,
322
+ body: [args, block],
323
+ end: ending[:end],
324
+ char_end: ending[:char_end]
325
+ )
326
+ end
327
+
328
+ # args_add_star is a parser event that represents adding a splat of values
329
+ # to a list of arguments. If accepts as arguments the parent args node as
330
+ # well as the part that is being splatted.
331
+ def on_args_add_star(args, part)
332
+ beging = find_scanner_event(:@op, '*')
333
+ ending = part || beging
334
+
335
+ {
336
+ type: :args_add_star,
337
+ body: [args, part],
338
+ start: beging[:start],
339
+ char_start: beging[:char_start],
340
+ end: ending[:end],
341
+ char_end: ending[:char_end]
342
+ }
343
+ end
344
+
345
+ # args_forward is a parser event that represents forwarding all kinds of
346
+ # arguments onto another method call.
347
+ def on_args_forward
348
+ find_scanner_event(:@op, '...').merge!(type: :args_forward)
349
+ end
350
+
351
+ # arg_paren is a parser event that represents wrapping arguments to a
352
+ # method inside a set of parentheses.
353
+ def on_arg_paren(args)
354
+ beging = find_scanner_event(:@lparen)
355
+ ending = find_scanner_event(:@rparen)
356
+
357
+ {
358
+ type: :arg_paren,
359
+ body: [args],
360
+ start: beging[:start],
361
+ char_start: beging[:char_start],
362
+ end: ending[:end],
363
+ char_end: ending[:char_end]
364
+ }
365
+ end
166
366
 
167
- events = {
168
- BEGIN: [:@kw, 'BEGIN'],
169
- END: [:@kw, 'END'],
170
- alias: [:@kw, 'alias'],
171
- assoc_splat: [:@op, '**'],
172
- arg_paren: :@lparen,
173
- args_add_star: [:@op, '*'],
174
- args_forward: [:@op, '...'],
175
- begin: [:@kw, 'begin'],
176
- blockarg: [:@op, '&'],
177
- brace_block: :@lbrace,
178
- break: [:@kw, 'break'],
179
- case: [:@kw, 'case'],
180
- class: [:@kw, 'class'],
181
- def: [:@kw, 'def'],
182
- defined: [:@kw, 'defined?'],
183
- defs: [:@kw, 'def'],
184
- do_block: [:@kw, 'do'],
185
- else: [:@kw, 'else'],
186
- elsif: [:@kw, 'elsif'],
187
- ensure: [:@kw, 'ensure'],
188
- excessed_comma: :@comma,
189
- for: [:@kw, 'for'],
190
- hash: :@lbrace,
191
- if: [:@kw, 'if'],
192
- in: [:@kw, 'in'],
193
- kwrest_param: [:@op, '**'],
194
- lambda: :@tlambda,
195
- mlhs_paren: :@lparen,
196
- mrhs_add_star: [:@op, '*'],
197
- module: [:@kw, 'module'],
198
- next: [:@kw, 'next'],
199
- paren: :@lparen,
200
- qsymbols_new: :@qsymbols_beg,
201
- qwords_new: :@qwords_beg,
202
- redo: [:@kw, 'redo'],
203
- regexp_literal: :@regexp_beg,
204
- rescue: [:@kw, 'rescue'],
205
- rest_param: [:@op, '*'],
206
- retry: [:@kw, 'retry'],
207
- return0: [:@kw, 'return'],
208
- return: [:@kw, 'return'],
209
- sclass: [:@kw, 'class'],
210
- string_dvar: :@embvar,
211
- string_embexpr: :@embexpr_beg,
212
- super: [:@kw, 'super'],
213
- symbols_new: :@symbols_beg,
214
- top_const_field: [:@op, '::'],
215
- top_const_ref: [:@op, '::'],
216
- undef: [:@kw, 'undef'],
217
- unless: [:@kw, 'unless'],
218
- until: [:@kw, 'until'],
219
- var_alias: [:@kw, 'alias'],
220
- when: [:@kw, 'when'],
221
- while: [:@kw, 'while'],
222
- words_new: :@words_beg,
223
- xstring_literal: :@backtick,
224
- yield0: [:@kw, 'yield'],
225
- yield: [:@kw, 'yield'],
226
- zsuper: [:@kw, 'super']
367
+ # Array nodes can contain a myriad of subnodes because of the special
368
+ # array literal syntax like %w and %i. As a result, we may be looking for
369
+ # an left bracket, or we may be just looking at the children to get the
370
+ # bounds.
371
+ def on_array(contents)
372
+ if !contents || %i[args args_add_star].include?(contents[:type])
373
+ beging = find_scanner_event(:@lbracket)
374
+ ending = find_scanner_event(:@rbracket)
375
+
376
+ {
377
+ type: :array,
378
+ body: [contents],
379
+ start: beging[:start],
380
+ char_start: beging[:char_start],
381
+ end: ending[:end],
382
+ char_end: ending[:char_end]
227
383
  }
384
+ else
385
+ ending = find_scanner_event(:@tstring_end)
386
+ contents[:char_end] = ending[:char_end]
387
+
388
+ ending.merge!(
389
+ type: :array,
390
+ body: [contents],
391
+ start: contents[:start],
392
+ char_start: contents[:char_start]
393
+ )
394
+ end
395
+ end
228
396
 
229
- events.each do |event, (type, scanned)|
230
- define_method(:"on_#{event}") do |*body|
231
- node = find_scanner_event(type, scanned || :any)
397
+ # aryptn is a parser event that represents matching against an array pattern
398
+ # using the Ruby 2.7+ pattern matching syntax.
399
+ def on_aryptn(const, preargs, splatarg, postargs)
400
+ pieces = [const, *preargs, splatarg, *postargs].compact
401
+
402
+ {
403
+ type: :aryptn,
404
+ body: [const, preargs, splatarg, postargs],
405
+ start: pieces[0][:start],
406
+ char_start: pieces[0][:char_start],
407
+ end: pieces[-1][:end],
408
+ char_end: pieces[-1][:char_end]
409
+ }
410
+ end
232
411
 
233
- super(*body).merge!(
234
- start: node[:start],
235
- char_start: node[:char_start],
236
- char_end: char_pos
237
- )
238
- end
239
- end
412
+ # assign is a parser event that represents assigning something to a
413
+ # variable or constant. It accepts as arguments the left side of the
414
+ # expression before the equals sign and the right side of the expression.
415
+ def on_assign(left, right)
416
+ left.merge(
417
+ type: :assign,
418
+ body: [left, right],
419
+ end: right[:end],
420
+ char_end: right[:char_end]
421
+ )
422
+ end
240
423
 
241
- # Array nodes can contain a myriad of subnodes because of the special
242
- # array literal syntax like %w and %i. As a result, we may be looking for
243
- # an left bracket, or we may be just looking at the children.
244
- def on_array(*body)
245
- if body[0] && %i[args args_add_star].include?(body[0][:type])
246
- node = find_scanner_event(:@lbracket)
247
-
248
- super(*body).merge!(
249
- start: node[:start],
250
- char_start: node[:char_start],
251
- char_end: char_pos
252
- )
253
- else
254
- super(*body).merge!(
255
- char_start: char_start_for(body), char_end: char_pos
256
- )
257
- end
258
- end
424
+ # assoc_new is a parser event that contains a key-value pair within a
425
+ # hash. It is a child event of either an assoclist_from_args or a
426
+ # bare_assoc_hash.
427
+ def on_assoc_new(key, value)
428
+ {
429
+ type: :assoc_new,
430
+ body: [key, value],
431
+ start: key[:start],
432
+ char_start: key[:char_start],
433
+ end: value[:end],
434
+ char_end: value[:char_end]
435
+ }
436
+ end
259
437
 
260
- # Array pattern nodes contain an odd mix of potential child nodes based on
261
- # which kind of pattern is being used.
262
- def on_aryptn(*body)
263
- char_start, char_end = char_pos, char_pos
438
+ # assoc_splat is a parser event that represents splatting a value into a
439
+ # hash (either a hash literal or a bare hash in a method call).
440
+ def on_assoc_splat(contents)
441
+ find_scanner_event(:@op, '**').merge!(
442
+ type: :assoc_splat,
443
+ body: [contents],
444
+ end: contents[:end],
445
+ char_end: contents[:char_end]
446
+ )
447
+ end
264
448
 
265
- body.flatten(1).each do |part|
266
- next unless part
449
+ # assoclist_from_args is a parser event that contains a list of all of the
450
+ # associations inside of a hash literal. Its parent node is always a hash.
451
+ # It accepts as an argument an array of assoc events (either assoc_new or
452
+ # assoc_splat).
453
+ def on_assoclist_from_args(assocs)
454
+ {
455
+ type: :assoclist_from_args,
456
+ body: assocs,
457
+ start: assocs[0][:start],
458
+ char_start: assocs[0][:char_start],
459
+ end: assocs[-1][:end],
460
+ char_end: assocs[-1][:char_end]
461
+ }
462
+ end
267
463
 
268
- char_start = [char_start, part[:char_start]].min
269
- char_end = [char_end, part[:char_end]].max
270
- end
464
+ # bare_assoc_hash is a parser event that represents a hash of contents
465
+ # being passed as a method argument (and therefore has omitted braces). It
466
+ # accepts as an argument an array of assoc events (either assoc_new or
467
+ # assoc_splat).
468
+ def on_bare_assoc_hash(assoc_news)
469
+ {
470
+ type: :bare_assoc_hash,
471
+ body: assoc_news,
472
+ start: assoc_news[0][:start],
473
+ char_start: assoc_news[0][:char_start],
474
+ end: assoc_news[-1][:end],
475
+ char_end: assoc_news[-1][:char_end]
476
+ }
477
+ end
271
478
 
272
- super(*body).merge!(char_start: char_start, char_end: char_end)
479
+ # begin is a parser event that represents the beginning of a begin..end chain.
480
+ # It includes a bodystmt event that has all of the consequent clauses.
481
+ def on_begin(bodystmt)
482
+ beging = find_scanner_event(:@kw, 'begin')
483
+ char_end =
484
+ if bodystmt[:body][1..-1].any?
485
+ bodystmt[:char_end]
486
+ else
487
+ find_scanner_event(:@kw, 'end')[:char_end]
273
488
  end
274
489
 
275
- # Params have a somewhat interesting structure in that they are an array
276
- # of arrays where the position in the top-level array indicates the type
277
- # of param and the subarray is the list of parameters of that type. We
278
- # therefore have to flatten them down to get to the location.
279
- def on_params(*body)
280
- super(*body).merge!(
281
- char_start: char_start_for(body.flatten(1)), char_end: char_pos
282
- )
283
- end
490
+ bodystmt.bind(beging[:char_end], char_end)
284
491
 
285
- # String literals and either contain string parts or a heredoc. If it
286
- # contains a heredoc we can just go directly to the child nodes, otherwise
287
- # we need to look for a `tstring_beg`.
288
- def on_string_literal(*body)
289
- if body[0][:type] == :heredoc
290
- super(*body).merge!(
291
- char_start: char_start_for(body), char_end: char_pos
292
- )
293
- else
294
- node = find_scanner_event(:@tstring_beg)
295
-
296
- super(*body).merge!(
297
- start: node[:start],
298
- char_start: node[:char_start],
299
- char_end: char_pos,
300
- quote: node[:body]
301
- )
302
- end
303
- end
492
+ beging.merge!(
493
+ type: :begin,
494
+ body: [bodystmt],
495
+ end: bodystmt[:end],
496
+ char_end: bodystmt[:char_end]
497
+ )
498
+ end
304
499
 
305
- # Technically, the `not` operator is a unary operator but is reported as
306
- # a keyword and not an operator. Because of the inconsistency, we have to
307
- # manually look for the correct scanner event here.
308
- def on_unary(*body)
309
- node =
310
- if body[0] == :not
311
- find_scanner_event(:@kw, 'not')
312
- else
313
- find_scanner_event(:@op)
314
- end
315
-
316
- super(*body).merge!(
317
- start: node[:start], char_start: node[:char_start], char_end: char_pos
318
- )
319
- end
500
+ # binary is a parser event that represents a binary operation between two
501
+ # values.
502
+ def on_binary(left, oper, right)
503
+ {
504
+ type: :binary,
505
+ body: [left, oper, right],
506
+ start: left[:start],
507
+ char_start: left[:char_start],
508
+ end: right[:end],
509
+ char_end: right[:char_end]
510
+ }
511
+ end
320
512
 
321
- # Symbols don't necessarily have to have a @symbeg event fired before they
322
- # start. For example, you can have symbol literals inside an `alias` node
323
- # if you're just using bare words, as in: `alias foo bar`. So this is a
324
- # special case in which if there is a `:@symbeg` event we can hook on to
325
- # then we use it, otherwise we just look at the beginning of the first
326
- # child node.
327
- %i[dyna_symbol symbol_literal].each do |event|
328
- define_method(:"on_#{event}") do |*body|
329
- options =
330
- if scanner_events.any? { |sevent| sevent[:type] == :@symbeg }
331
- symbeg = find_scanner_event(:@symbeg)
332
-
333
- {
334
- char_start: symbeg[:char_start],
335
- char_end: char_pos,
336
- quote: symbeg[:body][1]
337
- }
338
- elsif scanner_events.any? { |sevent| sevent[:type] == :@label_end }
339
- label_end = find_scanner_event(:@label_end)
340
-
341
- {
342
- char_start: char_start_for(body),
343
- char_end: char_pos,
344
- quote: label_end[:body][0]
345
- }
346
- else
347
- { char_start: char_start_for(body), char_end: char_pos }
348
- end
349
-
350
- super(*body).merge!(options)
351
- end
513
+ # block_var is a parser event that represents the parameters being passed to
514
+ # block. Effectively they're everything contained within the pipes.
515
+ def on_block_var(params, locals)
516
+ index =
517
+ scanner_events.rindex do |event|
518
+ event[:type] == :@op && %w[| ||].include?(event[:body]) &&
519
+ event[:char_start] < params[:char_start]
352
520
  end
353
521
 
354
- def on_program(*body)
355
- super(*body).merge!(start: 1, char_start: 0, char_end: char_pos)
522
+ beging = scanner_events[index]
523
+ ending = scanner_events[-1]
524
+
525
+ {
526
+ type: :block_var,
527
+ body: [params, locals],
528
+ start: beging[:start],
529
+ char_start: beging[:char_start],
530
+ end: ending[:end],
531
+ char_end: ending[:char_end]
532
+ }
533
+ end
534
+
535
+ # blockarg is a parser event that represents defining a block variable on
536
+ # a method definition.
537
+ def on_blockarg(ident)
538
+ find_scanner_event(:@op, '&').merge!(
539
+ type: :blockarg,
540
+ body: [ident],
541
+ end: ident[:end],
542
+ char_end: ident[:char_end]
543
+ )
544
+ end
545
+
546
+ # bodystmt can't actually determine its bounds appropriately because it
547
+ # doesn't necessarily know where it started. So the parent node needs to
548
+ # report back down into this one where it goes.
549
+ class BodyStmt < SimpleDelegator
550
+ def bind(char_start, char_end)
551
+ merge!(char_start: char_start, char_end: char_end)
552
+ parts = self[:body]
553
+
554
+ # Here we're going to determine the bounds for the stmts
555
+ consequent = parts[1..-1].compact.first
556
+ self[:body][0].bind(char_start,
557
+ consequent ? consequent[:char_start] : char_end)
558
+
559
+ # Next we're going to determine the rescue clause if there is one
560
+ if parts[1]
561
+ consequent = parts[2..-1].compact.first
562
+ self[:body][1].bind(consequent ? consequent[:char_start] : char_end)
356
563
  end
564
+ end
565
+ end
357
566
 
358
- defined =
359
- private_instance_methods(false).grep(/\Aon_/) { $'.to_sym } +
360
- %i[embdoc embdoc_beg embdoc_end heredoc_beg heredoc_end]
567
+ # bodystmt is a parser event that represents all of the possible combinations
568
+ # of clauses within the body of a method or block.
569
+ def on_bodystmt(stmts, rescued, ensured, elsed)
570
+ BodyStmt.new(
571
+ type: :bodystmt,
572
+ body: [stmts, rescued, ensured, elsed],
573
+ start: lineno,
574
+ char_start: char_pos,
575
+ end: lineno,
576
+ char_end: char_pos
577
+ )
578
+ end
361
579
 
362
- (SCANNER_EVENTS - defined).each do |event|
363
- define_method(:"on_#{event}") do |body|
364
- super(body).tap do |node|
365
- char_end = char_pos + (body ? body.size : 0)
366
- node.merge!(char_start: char_pos, char_end: char_end)
580
+ # brace_block is a parser event that represents passing a block to a
581
+ # method call using the {..} operators. It accepts as arguments an
582
+ # optional block_var event that represents any parameters to the block as
583
+ # well as a stmts event that represents the statements inside the block.
584
+ def on_brace_block(block_var, stmts)
585
+ beging = find_scanner_event(:@lbrace)
586
+ ending = find_scanner_event(:@rbrace)
587
+
588
+ stmts.bind((block_var || beging)[:char_end], ending[:char_start])
589
+
590
+ {
591
+ type: :brace_block,
592
+ body: [block_var, stmts],
593
+ start: beging[:start],
594
+ char_start: beging[:char_start],
595
+ end: ending[:end],
596
+ char_end: ending[:char_end]
597
+ }
598
+ end
367
599
 
368
- scanner_events << node
369
- end
370
- end
371
- end
600
+ # break is a parser event that represents using the break keyword. It
601
+ # accepts as an argument an args or args_add_block event that contains all
602
+ # of the arguments being passed to the break.
603
+ def on_break(args_add_block)
604
+ find_scanner_event(:@kw, 'break').merge!(
605
+ type: :break,
606
+ body: [args_add_block],
607
+ end: args_add_block[:end],
608
+ char_end: args_add_block[:char_end]
609
+ )
610
+ end
372
611
 
373
- (PARSER_EVENTS - defined).each do |event|
374
- define_method(:"on_#{event}") do |*body|
375
- super(*body).merge!(
376
- char_start: char_start_for(body), char_end: char_pos
377
- )
378
- end
379
- end
612
+ # call is a parser event representing a method call with no arguments. It
613
+ # accepts as arguments the receiver of the method, the operator being used
614
+ # to send the method (., ::, or &.), and the value that is being sent to
615
+ # the receiver (which can be another nested call as well).
616
+ #
617
+ # There is one esoteric syntax that comes into play here as well. If the
618
+ # sending argument to this method is the symbol :call, then it represents
619
+ # calling a lambda in a very odd looking way, as in:
620
+ #
621
+ # foo.(1, 2, 3)
622
+ #
623
+ def on_call(receiver, oper, sending)
624
+ ending = sending
625
+
626
+ if sending == :call
627
+ ending = oper
628
+
629
+ # Special handling here for Ruby <= 2.5 because the oper argument to this
630
+ # method wasn't a parser event here it was just a plain symbol.
631
+ ending = receiver if RUBY_MAJOR <= 2 && RUBY_MINOR <= 5
380
632
  end
381
- )
382
633
 
383
- # This layer keeps track of inline comments as they come in. Ripper itself
384
- # doesn't attach comments to the AST, so we need to do it manually. In this
385
- # case, inline comments are defined as any comments wherein the lexer state is
386
- # not equal to EXPR_BEG (tracked in the BlockComments layer).
387
- prepend(
388
- Module.new do
389
- # Certain events needs to steal the comments from their children in order
390
- # for them to display properly.
391
- events = {
392
- aref: [:body, 1],
393
- args_add_block: [:body, 0],
394
- break: [:body, 0],
395
- call: [:body, 0],
396
- command: [:body, 1],
397
- command_call: [:body, 3],
398
- regexp_literal: [:body, 0],
399
- string_literal: [:body, 0],
400
- symbol_literal: [:body, 0]
401
- }
634
+ {
635
+ type: :call,
636
+ body: [receiver, oper, sending],
637
+ start: receiver[:start],
638
+ char_start: receiver[:char_start],
639
+ end: ending[:end],
640
+ char_end: ending[:char_end]
641
+ }
642
+ end
402
643
 
403
- def initialize(*args)
404
- super(*args)
405
- @inline_comments = []
406
- @last_sexp = nil
407
- end
644
+ # case is a parser event that represents the beginning of a case chain.
645
+ # It accepts as arguments the switch of the case and the consequent
646
+ # clause.
647
+ def on_case(switch, consequent)
648
+ find_scanner_event(:@kw, 'case').merge!(
649
+ type: :case,
650
+ body: [switch, consequent],
651
+ end: consequent[:end],
652
+ char_end: consequent[:char_end]
653
+ )
654
+ end
408
655
 
409
- def self.prepended(base)
410
- base.attr_reader :inline_comments, :last_sexp
411
- end
656
+ # class is a parser event that represents defining a class. It accepts as
657
+ # arguments the name of the class, the optional name of the superclass,
658
+ # and the bodystmt event that represents the statements evaluated within
659
+ # the context of the class.
660
+ def on_class(const, superclass, bodystmt)
661
+ beging = find_scanner_event(:@kw, 'class')
662
+ ending = find_scanner_event(:@kw, 'end')
663
+
664
+ bodystmt.bind((superclass || const)[:char_end], ending[:char_start])
665
+
666
+ {
667
+ type: :class,
668
+ body: [const, superclass, bodystmt],
669
+ start: beging[:start],
670
+ char_start: beging[:char_start],
671
+ end: ending[:end],
672
+ char_end: ending[:char_end]
673
+ }
674
+ end
412
675
 
413
- private
676
+ # command is a parser event representing a method call with arguments and
677
+ # no parentheses. It accepts as arguments the name of the method and the
678
+ # arguments being passed to the method.
679
+ def on_command(ident, args)
680
+ {
681
+ type: :command,
682
+ body: [ident, args],
683
+ start: ident[:start],
684
+ char_start: ident[:char_start],
685
+ end: args[:end],
686
+ char_end: args[:char_end]
687
+ }
688
+ end
414
689
 
415
- events.each do |event, path|
416
- define_method(:"on_#{event}") do |*body|
417
- @last_sexp =
418
- super(*body).tap do |sexp|
419
- comments = (sexp.dig(*path) || {}).delete(:comments)
420
- sexp.merge!(comments: comments) if comments
421
- end
422
- end
423
- end
690
+ # command_call is a parser event representing a method call on an object
691
+ # with arguments and no parentheses. It accepts as arguments the receiver
692
+ # of the method, the operator being used to send the method, the name of
693
+ # the method, and the arguments being passed to the method.
694
+ def on_command_call(receiver, oper, ident, args)
695
+ ending = args || ident
696
+
697
+ {
698
+ type: :command_call,
699
+ body: [receiver, oper, ident, args],
700
+ start: receiver[:start],
701
+ char_start: receiver[:char_start],
702
+ end: ending[:end],
703
+ char_end: ending[:char_end]
704
+ }
705
+ end
424
706
 
425
- SPECIAL_LITERALS = %i[qsymbols qwords symbols words].freeze
707
+ # A const_path_field is a parser event that is always the child of some
708
+ # kind of assignment. It represents when you're assigning to a constant
709
+ # that is being referenced as a child of another variable. For example:
710
+ #
711
+ # foo::X = 1
712
+ #
713
+ def on_const_path_field(left, const)
714
+ {
715
+ type: :const_path_field,
716
+ body: [left, const],
717
+ start: left[:start],
718
+ char_start: left[:char_start],
719
+ end: const[:end],
720
+ char_end: const[:char_end]
721
+ }
722
+ end
426
723
 
427
- # Special array literals are handled in different ways and so their
428
- # comments need to be passed up to their parent array node.
429
- def on_array(*body)
430
- @last_sexp =
431
- super(*body).tap do |sexp|
432
- next unless SPECIAL_LITERALS.include?(body.dig(0, :type))
724
+ # A const_path_ref is a parser event that is a very similar to
725
+ # const_path_field except that it is not involved in an assignment. It
726
+ # looks like the following example:
727
+ #
728
+ # foo::X
729
+ #
730
+ def on_const_path_ref(left, const)
731
+ {
732
+ type: :const_path_ref,
733
+ body: [left, const],
734
+ start: left[:start],
735
+ char_start: left[:char_start],
736
+ end: const[:end],
737
+ char_end: const[:char_end]
738
+ }
739
+ end
433
740
 
434
- comments = sexp.dig(:body, 0).delete(:comments)
435
- sexp.merge!(comments: comments) if comments
436
- end
437
- end
741
+ # A const_ref is a parser event that represents the name of the constant
742
+ # being used in a class or module declaration. In the following example it
743
+ # is the @const scanner event that has the contents of Foo.
744
+ #
745
+ # class Foo; end
746
+ #
747
+ def on_const_ref(const)
748
+ const.merge(type: :const_ref, body: [const])
749
+ end
438
750
 
439
- # Handling this specially because we want to pull the comments out of both
440
- # child nodes.
441
- def on_assoc_new(*body)
442
- @last_sexp =
443
- super(*body).tap do |sexp|
444
- comments =
445
- (sexp.dig(:body, 0).delete(:comments) || []) +
446
- (sexp.dig(:body, 1).delete(:comments) || [])
447
-
448
- sexp.merge!(comments: comments) if comments.any?
449
- end
450
- end
751
+ # A def is a parser event that represents defining a regular method on the
752
+ # current self object. It accepts as arguments the ident (the name of the
753
+ # method being defined), the params (the parameter declaration for the
754
+ # method), and a bodystmt node which represents the statements inside the
755
+ # method. As an example, here are the parts that go into this:
756
+ #
757
+ # def foo(bar) do baz end
758
+ # │ │ │
759
+ # │ │ └> bodystmt
760
+ # │ └> params
761
+ # └> ident
762
+ #
763
+ def on_def(ident, params, bodystmt)
764
+ if params[:type] == :params && !params[:body].any?
765
+ location = ident[:char_end]
766
+ params.merge!(char_start: location, char_end: location)
767
+ end
451
768
 
452
- # Most scanner events don't stand on their own as s-expressions, but the
453
- # CHAR scanner event is effectively just a string, so we need to track it
454
- # as a s-expression.
455
- def on_CHAR(body)
456
- @last_sexp = super(body)
457
- end
769
+ beging = find_scanner_event(:@kw, 'def')
770
+ ending = find_scanner_event(:@kw, 'end')
458
771
 
459
- # We need to know exactly where the comment is, switching off the current
460
- # lexer state. In Ruby 2.7.0-dev, that's defined as:
461
- #
462
- # enum lex_state_bits {
463
- # EXPR_BEG_bit, /* ignore newline, +/- is a sign. */
464
- # EXPR_END_bit, /* newline significant, +/- is an operator. */
465
- # EXPR_ENDARG_bit, /* ditto, and unbound braces. */
466
- # EXPR_ENDFN_bit, /* ditto, and unbound braces. */
467
- # EXPR_ARG_bit, /* newline significant, +/- is an operator. */
468
- # EXPR_CMDARG_bit, /* newline significant, +/- is an operator. */
469
- # EXPR_MID_bit, /* newline significant, +/- is an operator. */
470
- # EXPR_FNAME_bit, /* ignore newline, no reserved words. */
471
- # EXPR_DOT_bit, /* right after `.' or `::', no reserved words. */
472
- # EXPR_CLASS_bit, /* immediate after `class', no here document. */
473
- # EXPR_LABEL_bit, /* flag bit, label is allowed. */
474
- # EXPR_LABELED_bit, /* flag bit, just after a label. */
475
- # EXPR_FITEM_bit, /* symbol literal as FNAME. */
476
- # EXPR_MAX_STATE
477
- # };
478
- def on_comment(body)
479
- sexp = { type: :@comment, body: body.chomp, start: lineno, end: lineno }
480
-
481
- case Prettier::Parser.lex_state_name(state).gsub('EXPR_', '')
482
- when 'END', 'ARG|LABELED', 'ENDFN'
483
- last_sexp.merge!(comments: [sexp])
484
- when 'CMDARG', 'END|ENDARG', 'ENDARG', 'ARG', 'FNAME|FITEM', 'CLASS',
485
- 'END|LABEL'
486
- inline_comments << sexp
487
- when 'BEG|LABEL', 'MID'
488
- inline_comments << sexp.merge!(break: true)
489
- when 'DOT'
490
- last_sexp.merge!(comments: [sexp.merge!(break: true)])
491
- end
492
-
493
- sexp
494
- end
772
+ bodystmt.bind(params[:char_end], ending[:char_start])
773
+
774
+ {
775
+ type: :def,
776
+ body: [ident, params, bodystmt],
777
+ start: beging[:start],
778
+ char_start: beging[:char_start],
779
+ end: ending[:end],
780
+ char_end: ending[:char_end]
781
+ }
782
+ end
783
+
784
+ # A defs is a parser event that represents defining a singleton method on
785
+ # an object. It accepts the same arguments as the def event, as well as
786
+ # the target and operator that on which this method is being defined. As
787
+ # an example, here are the parts that go into this:
788
+ #
789
+ # def foo.bar(baz) do baz end
790
+ # │ │ │ │
791
+ # │ │ │ │ │
792
+ # │ │ │ └> bodystmt
793
+ # │ │ │ └> params
794
+ # │ └> ident
795
+ # │ └> oper
796
+ # └> target
797
+ #
798
+ def on_defs(target, oper, ident, params, bodystmt)
799
+ if params[:type] == :params && !params[:body].any?
800
+ location = ident[:char_end]
801
+ params.merge!(char_start: location, char_end: location)
802
+ end
495
803
 
496
- defined = private_instance_methods(false).grep(/\Aon_/) { $'.to_sym }
804
+ beging = find_scanner_event(:@kw, 'def')
805
+ ending = find_scanner_event(:@kw, 'end')
497
806
 
498
- (PARSER_EVENTS - defined).each do |event|
499
- define_method(:"on_#{event}") do |*body|
500
- super(*body).tap do |sexp|
501
- @last_sexp = sexp
502
- next if inline_comments.empty?
807
+ bodystmt.bind(params[:char_end], ending[:char_start])
503
808
 
504
- sexp[:comments] = inline_comments.reverse
505
- @inline_comments = []
506
- end
507
- end
809
+ {
810
+ type: :defs,
811
+ body: [target, oper, ident, params, bodystmt],
812
+ start: beging[:start],
813
+ char_start: beging[:char_start],
814
+ end: ending[:end],
815
+ char_end: ending[:char_end]
816
+ }
817
+ end
818
+
819
+ # A defined node represents the rather unique defined? operator. It can be
820
+ # used with and without parentheses. If they're present, we use them to
821
+ # determine our bounds, otherwise we use the value that's being passed to
822
+ # the operator.
823
+ def on_defined(value)
824
+ beging = find_scanner_event(:@kw, 'defined?')
825
+
826
+ paren = source[beging[:char_end]...value[:char_start]].include?('(')
827
+ ending = paren ? find_scanner_event(:@rparen) : value
828
+
829
+ beging.merge!(
830
+ type: :defined,
831
+ body: [value],
832
+ end: ending[:end],
833
+ char_end: ending[:char_end]
834
+ )
835
+ end
836
+
837
+ # do_block is a parser event that represents passing a block to a method
838
+ # call using the do..end keywords. It accepts as arguments an optional
839
+ # block_var event that represents any parameters to the block as well as
840
+ # a bodystmt event that represents the statements inside the block.
841
+ def on_do_block(block_var, bodystmt)
842
+ beging = find_scanner_event(:@kw, 'do')
843
+ ending = find_scanner_event(:@kw, 'end')
844
+
845
+ bodystmt.bind((block_var || beging)[:char_end], ending[:char_start])
846
+
847
+ {
848
+ type: :do_block,
849
+ body: [block_var, bodystmt],
850
+ start: beging[:start],
851
+ char_start: beging[:char_start],
852
+ end: ending[:end],
853
+ char_end: ending[:char_end]
854
+ }
855
+ end
856
+
857
+ # dot2 is a parser event that represents using the .. operator between two
858
+ # expressions. Usually this is to create a range object but sometimes it's to
859
+ # use the flip-flop operator.
860
+ def on_dot2(left, right)
861
+ operator = find_scanner_event(:@op, '..')
862
+
863
+ beging = left || operator
864
+ ending = right || operator
865
+
866
+ {
867
+ type: :dot2,
868
+ body: [left, right],
869
+ start: beging[:start],
870
+ char_start: beging[:char_start],
871
+ end: ending[:end],
872
+ char_end: ending[:char_end]
873
+ }
874
+ end
875
+
876
+ # dot3 is a parser event that represents using the ... operator between two
877
+ # expressions. Usually this is to create a range object but sometimes it's to
878
+ # use the flip-flop operator.
879
+ def on_dot3(left, right)
880
+ operator = find_scanner_event(:@op, '...')
881
+
882
+ beging = left || operator
883
+ ending = right || operator
884
+
885
+ {
886
+ type: :dot3,
887
+ body: [left, right],
888
+ start: beging[:start],
889
+ char_start: beging[:char_start],
890
+ end: ending[:end],
891
+ char_end: ending[:char_end]
892
+ }
893
+ end
894
+
895
+ # A dyna_symbol is a parser event that represents a symbol literal that
896
+ # uses quotes to interpolate its value. For example, if you had a variable
897
+ # foo and you wanted a symbol that contained its value, you would write:
898
+ #
899
+ # :"#{foo}"
900
+ #
901
+ # As such, they accept as one argument a string node, which is the same
902
+ # node that gets accepted into a string_literal (since we're basically
903
+ # talking about a string literal with a : character at the beginning).
904
+ #
905
+ # They can also come in another flavor which is a dynamic symbol as a hash
906
+ # key. This is kind of an interesting syntax which results in us having to
907
+ # look for a @label_end scanner event instead to get our bearings. That
908
+ # kind of code would look like:
909
+ #
910
+ # { "#{foo}": bar }
911
+ #
912
+ # which would be the same symbol as above.
913
+ def on_dyna_symbol(string)
914
+ if scanner_events.any? { |event| event[:type] == :@symbeg }
915
+ # A normal dynamic symbol
916
+ beging = find_scanner_event(:@symbeg)
917
+ ending = find_scanner_event(:@tstring_end)
918
+
919
+ beging.merge(
920
+ type: :dyna_symbol,
921
+ quote: beging[:body][1],
922
+ body: string[:body],
923
+ end: ending[:end],
924
+ char_end: ending[:char_end]
925
+ )
926
+ else
927
+ # A dynamic symbol as a hash key
928
+ beging = find_scanner_event(:@tstring_beg)
929
+ ending = find_scanner_event(:@label_end)
930
+
931
+ string.merge!(
932
+ type: :dyna_symbol,
933
+ quote: ending[:body][0],
934
+ start: beging[:start],
935
+ char_start: beging[:char_start],
936
+ end: ending[:end],
937
+ char_end: ending[:char_end]
938
+ )
939
+ end
940
+ end
941
+
942
+ # else can either end with an end keyword (in which case we'll want to
943
+ # consume that event) or it can end with an ensure keyword (in which case
944
+ # we'll leave that to the ensure to handle).
945
+ def find_else_ending
946
+ index =
947
+ scanner_events.rindex do |event|
948
+ event[:type] == :@kw && %w[end ensure].include?(event[:body])
508
949
  end
950
+
951
+ event = scanner_events[index]
952
+ event[:body] == 'end' ? scanner_events.delete_at(index) : event
953
+ end
954
+
955
+ # else is a parser event that represents the end of a if, unless, or begin
956
+ # chain. It accepts as an argument the statements that are contained
957
+ # within the else clause.
958
+ def on_else(stmts)
959
+ beging = find_scanner_event(:@kw, 'else')
960
+ ending = find_else_ending
961
+
962
+ stmts.bind(beging[:char_end], ending[:char_start])
963
+
964
+ {
965
+ type: :else,
966
+ body: [stmts],
967
+ start: beging[:start],
968
+ char_start: beging[:char_start],
969
+ end: ending[:end],
970
+ char_end: ending[:char_end]
971
+ }
972
+ end
973
+
974
+ # elsif is a parser event that represents another clause in an if chain.
975
+ # It accepts as arguments the predicate of the else if, the statements
976
+ # that are contained within the else if clause, and the optional
977
+ # consequent clause.
978
+ def on_elsif(predicate, stmts, consequent)
979
+ beging = find_scanner_event(:@kw, 'elsif')
980
+ ending = consequent || find_scanner_event(:@kw, 'end')
981
+
982
+ stmts.bind(predicate[:char_end], ending[:char_start])
983
+
984
+ {
985
+ type: :elsif,
986
+ body: [predicate, stmts, consequent],
987
+ start: beging[:start],
988
+ char_start: beging[:char_start],
989
+ end: ending[:end],
990
+ char_end: ending[:char_end]
991
+ }
992
+ end
993
+
994
+ # embdocs are long comments that are surrounded by =begin..=end. They
995
+ # cannot be nested, so we don't need to worry about keeping a stack around
996
+ # like we do with heredocs. Instead we can just track the current embdoc
997
+ # and add to it as we get content. It always starts with this scanner
998
+ # event, so here we'll initialize the current embdoc.
999
+ def on_embdoc_beg(value)
1000
+ @embdoc = {
1001
+ type: :@embdoc, value: value, start: lineno, char_start: char_pos
1002
+ }
1003
+ end
1004
+
1005
+ # This is a scanner event that gets hit when we're inside an embdoc and
1006
+ # receive a new line of content. Here we are guaranteed to already have
1007
+ # initialized the @embdoc variable so we can just append the new line onto
1008
+ # the existing content.
1009
+ def on_embdoc(value)
1010
+ @embdoc[:value] << value
1011
+ end
1012
+
1013
+ # This is the final scanner event for embdocs. It receives the =end. Here
1014
+ # we can finalize the embdoc with its location information and the final
1015
+ # piece of the string. We then add it to the list of comments so that
1016
+ # prettier can place it into the final source string.
1017
+ def on_embdoc_end(value)
1018
+ @comments <<
1019
+ @embdoc.merge!(
1020
+ value: @embdoc[:value] << value.chomp,
1021
+ end: lineno,
1022
+ char_end: char_pos + value.length - 1
1023
+ )
1024
+
1025
+ @embdoc = nil
1026
+ end
1027
+
1028
+ # ensure is a parser event that represents the use of the ensure keyword
1029
+ # and its subsequent statements.
1030
+ def on_ensure(stmts)
1031
+ beging = find_scanner_event(:@kw, 'ensure')
1032
+ ending = find_scanner_event(:@kw, 'end')
1033
+
1034
+ stmts.bind(beging[:char_end], ending[:char_start])
1035
+
1036
+ {
1037
+ type: :ensure,
1038
+ body: [stmts],
1039
+ start: beging[:start],
1040
+ char_start: beging[:char_start],
1041
+ end: ending[:end],
1042
+ char_end: ending[:char_end]
1043
+ }
1044
+ end
1045
+
1046
+ # An excessed_comma is a special kind of parser event that represents a comma
1047
+ # at the end of a list of parameters. It's a very strange node. It accepts a
1048
+ # different number of arguments depending on Ruby version, which is why we
1049
+ # have the anonymous splat there.
1050
+ def on_excessed_comma(*)
1051
+ find_scanner_event(:@comma).merge!(type: :excessed_comma)
1052
+ end
1053
+
1054
+ # An fcall is a parser event that represents the piece of a method call
1055
+ # that comes before any arguments (i.e., just the name of the method).
1056
+ def on_fcall(ident)
1057
+ ident.merge(type: :fcall, body: [ident])
1058
+ end
1059
+
1060
+ # A field is a parser event that is always the child of an assignment. It
1061
+ # accepts as arguments the left side of operation, the operator (. or ::),
1062
+ # and the right side of the operation. For example:
1063
+ #
1064
+ # foo.x = 1
1065
+ #
1066
+ def on_field(left, oper, right)
1067
+ {
1068
+ type: :field,
1069
+ body: [left, oper, right],
1070
+ start: left[:start],
1071
+ char_start: left[:char_start],
1072
+ end: right[:end],
1073
+ char_end: right[:char_end]
1074
+ }
1075
+ end
1076
+
1077
+ # for is a parser event that represents using the somewhat esoteric for
1078
+ # loop. It accepts as arguments an ident which is the iterating variable,
1079
+ # an enumerable for that which is being enumerated, and a stmts event that
1080
+ # represents the statements inside the for loop.
1081
+ def on_for(ident, enumerable, stmts)
1082
+ beging = find_scanner_event(:@kw, 'for')
1083
+ ending = find_scanner_event(:@kw, 'end')
1084
+
1085
+ stmts.bind(enumerable[:char_end], ending[:char_start])
1086
+
1087
+ {
1088
+ type: :for,
1089
+ body: [ident, enumerable, stmts],
1090
+ start: beging[:start],
1091
+ char_start: beging[:char_start],
1092
+ end: ending[:end],
1093
+ char_end: ending[:char_end]
1094
+ }
1095
+ end
1096
+
1097
+ # hash is a parser event that represents a hash literal. It accepts as an
1098
+ # argument an optional assoclist_from_args event which contains the
1099
+ # contents of the hash.
1100
+ def on_hash(assoclist_from_args)
1101
+ beging = find_scanner_event(:@lbrace)
1102
+ ending = find_scanner_event(:@rbrace)
1103
+
1104
+ if assoclist_from_args
1105
+ # Here we're going to expand out the location information for the assocs
1106
+ # node so that it can grab up any remaining comments inside the hash.
1107
+ assoclist_from_args.merge!(
1108
+ char_start: beging[:char_end], char_end: ending[:char_start]
1109
+ )
509
1110
  end
510
- )
511
1111
 
512
- # Nodes that are always on their own line occur when the lexer is in the
513
- # EXPR_BEG state. Those comments are tracked within the @block_comments
514
- # instance variable. Then for each node that could contain them, we attach
515
- # them after the node has been built.
516
- prepend(
517
- Module.new do
518
- events = {
519
- begin: [0, :body, 0],
520
- bodystmt: [0],
521
- class: [2, :body, 0],
522
- def: [2, :body, 0],
523
- defs: [4, :body, 0],
524
- else: [0],
525
- elsif: [1],
526
- ensure: [0],
527
- if: [1],
528
- program: [0],
529
- rescue: [2],
530
- sclass: [1, :body, 0],
531
- unless: [1],
532
- until: [1],
533
- when: [1],
534
- while: [1]
535
- }
1112
+ {
1113
+ type: :hash,
1114
+ body: [assoclist_from_args],
1115
+ start: beging[:start],
1116
+ char_start: beging[:char_start],
1117
+ end: ending[:end],
1118
+ char_end: ending[:char_end]
1119
+ }
1120
+ end
536
1121
 
537
- def initialize(*args)
538
- super(*args)
539
- @block_comments = []
540
- @current_embdoc = nil
541
- end
1122
+ # This is a scanner event that represents the beginning of the heredoc. It
1123
+ # includes the declaration (which we call beging here, which is just short
1124
+ # for beginning). The declaration looks something like <<-HERE or <<~HERE.
1125
+ # If the downcased version of the declaration actually matches an existing
1126
+ # prettier parser, we'll later attempt to print it using that parser and
1127
+ # printer through our embed function.
1128
+ def on_heredoc_beg(beging)
1129
+ {
1130
+ type: :heredoc,
1131
+ beging: beging,
1132
+ start: lineno,
1133
+ end: lineno,
1134
+ char_start: char_pos - beging.length + 1,
1135
+ char_end: char_pos
1136
+ }.tap { |node| @heredocs << node }
1137
+ end
542
1138
 
543
- def self.prepended(base)
544
- base.attr_reader :block_comments, :current_embdoc
545
- end
1139
+ # This is a parser event that occurs when you're using a heredoc with a
1140
+ # tilde. These are considered `heredoc_dedent` nodes, whereas the hyphen
1141
+ # heredocs show up as string literals.
1142
+ def on_heredoc_dedent(string, _width)
1143
+ @heredocs[-1].merge!(string.slice(:body))
1144
+ end
546
1145
 
547
- private
1146
+ # This is a scanner event that represents the end of the heredoc.
1147
+ def on_heredoc_end(ending)
1148
+ @heredocs[-1].merge!(ending: ending.chomp, end: lineno, char_end: char_pos)
1149
+ end
548
1150
 
549
- def attach_comments(sexp, stmts)
550
- range = sexp[:start]..sexp[:end]
551
- comments =
552
- block_comments.group_by { |comment| range.include?(comment[:start]) }
1151
+ # hshptn is a parser event that represents matching against a hash pattern
1152
+ # using the Ruby 2.7+ pattern matching syntax.
1153
+ def on_hshptn(const, kw, kwrest)
1154
+ pieces = [const, kw, kwrest].flatten(2).compact
1155
+
1156
+ {
1157
+ type: :hshptn,
1158
+ body: [const, kw, kwrest],
1159
+ start: pieces[0][:start],
1160
+ char_start: pieces[0][:char_start],
1161
+ end: pieces[-1][:end],
1162
+ char_end: pieces[-1][:char_end]
1163
+ }
1164
+ end
553
1165
 
554
- if comments[true]
555
- stmts[:body] =
556
- (stmts[:body] + comments[true]).sort_by { |node| node[:start] }
1166
+ # if is a parser event that represents the first clause in an if chain.
1167
+ # It accepts as arguments the predicate of the if, the statements that are
1168
+ # contained within the if clause, and the optional consequent clause.
1169
+ def on_if(predicate, stmts, consequent)
1170
+ beging = find_scanner_event(:@kw, 'if')
1171
+ ending = consequent || find_scanner_event(:@kw, 'end')
1172
+
1173
+ stmts.bind(predicate[:char_end], ending[:char_start])
1174
+
1175
+ {
1176
+ type: :if,
1177
+ body: [predicate, stmts, consequent],
1178
+ start: beging[:start],
1179
+ char_start: beging[:char_start],
1180
+ end: ending[:end],
1181
+ char_end: ending[:char_end]
1182
+ }
1183
+ end
557
1184
 
558
- @block_comments = comments.fetch(false) { [] }
559
- end
560
- end
1185
+ # ifop is a parser event that represents a ternary operator. It accepts as
1186
+ # arguments the predicate to the ternary, the truthy clause, and the falsy
1187
+ # clause.
1188
+ def on_ifop(predicate, truthy, falsy)
1189
+ predicate.merge(
1190
+ type: :ifop,
1191
+ body: [predicate, truthy, falsy],
1192
+ end: falsy[:end],
1193
+ char_end: falsy[:char_end]
1194
+ )
1195
+ end
561
1196
 
562
- events.each do |event, path|
563
- define_method(:"on_#{event}") do |*body|
564
- super(*body).tap { |sexp| attach_comments(sexp, body.dig(*path)) }
565
- end
566
- end
1197
+ # if_mod is a parser event that represents the modifier form of an if
1198
+ # statement. It accepts as arguments the predicate of the if and the
1199
+ # statement that are contained within the if clause.
1200
+ def on_if_mod(predicate, statement)
1201
+ find_scanner_event(:@kw, 'if')
1202
+
1203
+ {
1204
+ type: :if_mod,
1205
+ body: [predicate, statement],
1206
+ start: statement[:start],
1207
+ char_start: statement[:char_start],
1208
+ end: predicate[:end],
1209
+ char_end: predicate[:char_end]
1210
+ }
1211
+ end
567
1212
 
568
- def on_comment(body)
569
- super(body).tap do |sexp|
570
- lex_state = Prettier::Parser.lex_state_name(state).gsub('EXPR_', '')
571
- block_comments << sexp if lex_state == 'BEG'
572
- end
573
- end
1213
+ # in is a parser event that represents using the in keyword within the
1214
+ # Ruby 2.7+ pattern matching syntax.
1215
+ def on_in(pattern, stmts, consequent)
1216
+ beging = find_scanner_event(:@kw, 'in')
1217
+ ending = consequent || find_scanner_event(:@kw, 'end')
574
1218
 
575
- def on_embdoc_beg(comment)
576
- @current_embdoc = {
577
- type: :embdoc, body: comment, start: lineno, end: lineno
578
- }
579
- end
1219
+ stmts.bind(beging[:char_end], ending[:char_start])
580
1220
 
581
- def on_embdoc(comment)
582
- @current_embdoc[:body] << comment
583
- end
1221
+ beging.merge!(
1222
+ type: :in,
1223
+ body: [pattern, stmts, consequent],
1224
+ end: ending[:end],
1225
+ char_end: ending[:char_end]
1226
+ )
1227
+ end
584
1228
 
585
- def on_embdoc_end(comment)
586
- @current_embdoc[:body] << comment.chomp
587
- @block_comments << @current_embdoc
588
- @current_embdoc = nil
589
- end
1229
+ # kwrest_param is a parser event that represents defining a parameter in a
1230
+ # method definition that accepts all remaining keyword parameters.
1231
+ def on_kwrest_param(ident)
1232
+ oper = find_scanner_event(:@op, '**')
1233
+ return oper.merge!(type: :kwrest_param, body: [nil]) unless ident
1234
+
1235
+ oper.merge!(
1236
+ type: :kwrest_param,
1237
+ body: [ident],
1238
+ end: ident[:end],
1239
+ char_end: ident[:char_end]
1240
+ )
1241
+ end
590
1242
 
591
- def on_method_add_block(*body)
592
- super(*body).tap do |sexp|
593
- stmts = body[1][:body][1]
594
- stmts = stmts[:type] == :stmts ? stmts : body[1][:body][1][:body][0]
1243
+ # lambda is a parser event that represents using a "stabby" lambda
1244
+ # literal. It accepts as arguments a params event that represents any
1245
+ # parameters to the lambda and a stmts event that represents the
1246
+ # statements inside the lambda.
1247
+ #
1248
+ # It can be wrapped in either {..} or do..end so we look for either of
1249
+ # those combinations to get our bounds.
1250
+ def on_lambda(params, stmts)
1251
+ beging = find_scanner_event(:@tlambda)
1252
+
1253
+ if scanner_events.any? { |event| event[:type] == :@tlambeg }
1254
+ opening = find_scanner_event(:@tlambeg)
1255
+ closing = find_scanner_event(:@rbrace)
1256
+ else
1257
+ opening = find_scanner_event(:@kw, 'do')
1258
+ closing = find_scanner_event(:@kw, 'end')
1259
+ end
595
1260
 
596
- attach_comments(sexp, stmts)
597
- end
598
- end
1261
+ stmts.bind(opening[:char_end], closing[:char_start])
1262
+
1263
+ {
1264
+ type: :lambda,
1265
+ body: [params, stmts],
1266
+ start: beging[:start],
1267
+ char_start: beging[:char_start],
1268
+ end: closing[:end],
1269
+ char_end: closing[:char_end]
1270
+ }
1271
+ end
1272
+
1273
+ # massign is a parser event that is a parent node of any kind of multiple
1274
+ # assignment. This includes splitting out variables on the left like:
1275
+ #
1276
+ # a, b, c = foo
1277
+ #
1278
+ # as well as splitting out variables on the right, as in:
1279
+ #
1280
+ # foo = a, b, c
1281
+ #
1282
+ # Both sides support splats, as well as variables following them. There's
1283
+ # also slightly odd behavior that you can achieve with the following:
1284
+ #
1285
+ # a, = foo
1286
+ #
1287
+ # In this case a would receive only the first value of the foo enumerable,
1288
+ # in which case we need to explicitly track the comma and add it onto the
1289
+ # child node.
1290
+ def on_massign(left, right)
1291
+ if source[left[:char_end]...right[:char_start]].strip.start_with?(',')
1292
+ left[:comma] = true
599
1293
  end
600
- )
601
1294
 
602
- # Tracking heredocs in somewhat interesting. Straight-line heredocs are
603
- # reported as strings, whereas squiggly-line heredocs are reported as
604
- # heredocs. We track the start and matching end of the heredoc as "beging" and
605
- # "ending" respectively.
606
- prepend(
607
- Module.new do
608
- def initialize(*args)
609
- super(*args)
610
- @heredoc_stack = []
611
- end
1295
+ {
1296
+ type: :massign,
1297
+ body: [left, right],
1298
+ start: left[:start],
1299
+ char_start: left[:char_start],
1300
+ end: right[:end],
1301
+ char_end: right[:char_end]
1302
+ }
1303
+ end
612
1304
 
613
- def self.prepended(base)
614
- base.attr_reader :heredoc_stack
615
- end
1305
+ # method_add_arg is a parser event that represents a method call with
1306
+ # arguments and parentheses. It accepts as arguments the method being called
1307
+ # and the arg_paren event that contains the arguments to the method.
1308
+ def on_method_add_arg(fcall, arg_paren)
1309
+ {
1310
+ type: :method_add_arg,
1311
+ body: [fcall, arg_paren],
1312
+ start: fcall[:start],
1313
+ char_start: fcall[:char_start],
1314
+ end: arg_paren[:end],
1315
+ char_end: arg_paren[:char_end]
1316
+ }
1317
+ end
616
1318
 
617
- private
1319
+ # method_add_block is a parser event that represents a method call with a
1320
+ # block argument. It accepts as arguments the method being called and the
1321
+ # block event.
1322
+ def on_method_add_block(method_add_arg, block)
1323
+ {
1324
+ type: :method_add_block,
1325
+ body: [method_add_arg, block],
1326
+ start: method_add_arg[:start],
1327
+ char_start: method_add_arg[:char_start],
1328
+ end: block[:end],
1329
+ char_end: block[:char_end]
1330
+ }
1331
+ end
1332
+
1333
+ # An mlhs_new is a parser event that represents the beginning of the left
1334
+ # side of a multiple assignment. It is followed by any number of mlhs_add
1335
+ # nodes that each represent another variable being assigned.
1336
+ def on_mlhs_new
1337
+ {
1338
+ type: :mlhs,
1339
+ body: [],
1340
+ start: lineno,
1341
+ char_start: char_pos,
1342
+ end: lineno,
1343
+ char_end: char_pos
1344
+ }
1345
+ end
1346
+
1347
+ # An mlhs_add is a parser event that represents adding another variable
1348
+ # onto a list of assignments. It accepts as arguments the parent mlhs node
1349
+ # as well as the part that is being added to the list.
1350
+ def on_mlhs_add(mlhs, part)
1351
+ if mlhs[:body].empty?
1352
+ part.merge(type: :mlhs, body: [part])
1353
+ else
1354
+ mlhs.merge!(
1355
+ body: mlhs[:body] << part, end: part[:end], char_end: part[:char_end]
1356
+ )
1357
+ end
1358
+ end
1359
+
1360
+ # An mlhs_add_post is a parser event that represents adding another set of
1361
+ # variables onto a list of assignments after a splat variable. It accepts
1362
+ # as arguments the previous mlhs_add_star node that represented the splat
1363
+ # as well another mlhs node that represents all of the variables after the
1364
+ # splat.
1365
+ def on_mlhs_add_post(mlhs_add_star, mlhs)
1366
+ mlhs_add_star.merge(
1367
+ type: :mlhs_add_post,
1368
+ body: [mlhs_add_star, mlhs],
1369
+ end: mlhs[:end],
1370
+ char_end: mlhs[:char_end]
1371
+ )
1372
+ end
1373
+
1374
+ # An mlhs_add_star is a parser event that represents a splatted variable
1375
+ # inside of a multiple assignment on the left hand side. It accepts as
1376
+ # arguments the parent mlhs node as well as the part that represents the
1377
+ # splatted variable.
1378
+ def on_mlhs_add_star(mlhs, part)
1379
+ beging = find_scanner_event(:@op, '*')
1380
+ ending = part || beging
1381
+
1382
+ {
1383
+ type: :mlhs_add_star,
1384
+ body: [mlhs, part],
1385
+ start: beging[:start],
1386
+ char_start: beging[:char_start],
1387
+ end: ending[:end],
1388
+ char_end: ending[:char_end]
1389
+ }
1390
+ end
1391
+
1392
+ # An mlhs_paren is a parser event that represents parentheses being used
1393
+ # to deconstruct values in a multiple assignment on the left hand side. It
1394
+ # accepts as arguments the contents of the inside of the parentheses,
1395
+ # which is another mlhs node.
1396
+ def on_mlhs_paren(contents)
1397
+ beging = find_scanner_event(:@lparen)
1398
+ ending = find_scanner_event(:@rparen)
1399
+
1400
+ if source[beging[:char_end]...ending[:char_start]].strip.end_with?(',')
1401
+ contents[:comma] = true
1402
+ end
1403
+
1404
+ {
1405
+ type: :mlhs_paren,
1406
+ body: [contents],
1407
+ start: beging[:start],
1408
+ char_start: beging[:char_start],
1409
+ end: ending[:end],
1410
+ char_end: ending[:char_end]
1411
+ }
1412
+ end
1413
+
1414
+ # module is a parser event that represents defining a module. It accepts
1415
+ # as arguments the name of the module and the bodystmt event that
1416
+ # represents the statements evaluated within the context of the module.
1417
+ def on_module(const, bodystmt)
1418
+ beging = find_scanner_event(:@kw, 'module')
1419
+ ending = find_scanner_event(:@kw, 'end')
1420
+
1421
+ bodystmt.bind(const[:char_end], ending[:char_start])
1422
+
1423
+ {
1424
+ type: :module,
1425
+ body: [const, bodystmt],
1426
+ start: beging[:start],
1427
+ char_start: beging[:char_start],
1428
+ end: ending[:end],
1429
+ char_end: ending[:char_end]
1430
+ }
1431
+ end
1432
+
1433
+ # An mrhs_new is a parser event that represents the beginning of a list of
1434
+ # values that are being assigned within a multiple assignment node. It can
1435
+ # be followed by any number of mrhs_add nodes that we'll build up into an
1436
+ # array body.
1437
+ def on_mrhs_new
1438
+ {
1439
+ type: :mrhs,
1440
+ body: [],
1441
+ start: lineno,
1442
+ char_start: char_pos,
1443
+ end: lineno,
1444
+ char_end: char_pos
1445
+ }
1446
+ end
618
1447
 
619
- # This is a scanner event that represents the beginning of the heredoc.
620
- def on_heredoc_beg(beging)
1448
+ # An mrhs_add is a parser event that represents adding another value onto
1449
+ # a list on the right hand side of a multiple assignment.
1450
+ def on_mrhs_add(mrhs, part)
1451
+ if mrhs[:body].empty?
1452
+ part.merge(type: :mrhs, body: [part])
1453
+ else
1454
+ mrhs.merge!(
1455
+ body: mrhs[:body] << part, end: part[:end], char_end: part[:char_end]
1456
+ )
1457
+ end
1458
+ end
1459
+
1460
+ # An mrhs_add_star is a parser event that represents using the splat
1461
+ # operator to expand out a value on the right hand side of a multiple
1462
+ # assignment.
1463
+ def on_mrhs_add_star(mrhs, part)
1464
+ beging = find_scanner_event(:@op, '*')
1465
+ ending = part || beging
1466
+
1467
+ {
1468
+ type: :mrhs_add_star,
1469
+ body: [mrhs, part],
1470
+ start: beging[:start],
1471
+ char_start: beging[:char_start],
1472
+ end: ending[:end],
1473
+ char_end: ending[:char_end]
1474
+ }
1475
+ end
1476
+
1477
+ # An mrhs_new_from_args is a parser event that represents the shorthand
1478
+ # of a multiple assignment that allows you to assign values using just
1479
+ # commas as opposed to assigning from an array. For example, in the
1480
+ # following segment the right hand side of the assignment would trigger
1481
+ # this event:
1482
+ #
1483
+ # foo = 1, 2, 3
1484
+ #
1485
+ def on_mrhs_new_from_args(args)
1486
+ args.merge(type: :mrhs_new_from_args, body: [args])
1487
+ end
1488
+
1489
+ # next is a parser event that represents using the next keyword. It
1490
+ # accepts as an argument an args or args_add_block event that contains all
1491
+ # of the arguments being passed to the next.
1492
+ def on_next(args_add_block)
1493
+ find_scanner_event(:@kw, 'next').merge!(
1494
+ type: :next,
1495
+ body: [args_add_block],
1496
+ end: args_add_block[:end],
1497
+ char_end: args_add_block[:char_end]
1498
+ )
1499
+ end
1500
+
1501
+ # opassign is a parser event that represents assigning something to a
1502
+ # variable or constant using an operator like += or ||=. It accepts as
1503
+ # arguments the left side of the expression before the operator, the
1504
+ # operator itself, and the right side of the expression.
1505
+ def on_opassign(left, oper, right)
1506
+ left.merge(
1507
+ type: :opassign,
1508
+ body: [left, oper, right],
1509
+ end: right[:end],
1510
+ char_end: right[:char_end]
1511
+ )
1512
+ end
1513
+
1514
+ # params is a parser event that represents defining parameters on a
1515
+ # method. They have a somewhat interesting structure in that they are an
1516
+ # array of arrays where the position in the top-level array indicates the
1517
+ # type of param and the subarray is the list of parameters of that type.
1518
+ # We therefore have to flatten them down to get to the location.
1519
+ def on_params(*types)
1520
+ flattened = types.flatten(2).select { |type| type.is_a?(Hash) }
1521
+ location =
1522
+ if flattened.any?
621
1523
  {
622
- type: :heredoc,
623
- beging: beging,
624
- start: lineno,
625
- end: lineno,
626
- char_start: char_pos - beging.length + 1,
627
- char_end: char_pos
628
- }.tap { |node| heredoc_stack << node }
1524
+ start: flattened[0][:start],
1525
+ char_start: flattened[0][:char_start],
1526
+ end: flattened[-1][:end],
1527
+ char_end: flattened[-1][:char_end]
1528
+ }
1529
+ else
1530
+ { start: lineno, char_start: char_pos, end: lineno, char_end: char_pos }
629
1531
  end
630
1532
 
631
- # This is a scanner event that represents the end of the heredoc.
632
- def on_heredoc_end(ending)
633
- heredoc_stack[-1].merge!(
634
- ending: ending.chomp, end: lineno, char_end: char_pos
635
- )
636
- end
1533
+ location.merge!(type: :params, body: types)
1534
+ end
637
1535
 
638
- # This is a parser event that occurs when you're using a heredoc with a
639
- # tilde. These are considered `heredoc_dedent` nodes, whereas the hyphen
640
- # heredocs show up as string literals.
641
- def on_heredoc_dedent(string, _width)
642
- heredoc_stack[-1].merge!(string.slice(:body))
643
- end
1536
+ # A paren is a parser event that represents using parentheses pretty much
1537
+ # anywhere in a Ruby program. It accepts as arguments the contents, which
1538
+ # can be either params or statements.
1539
+ def on_paren(contents)
1540
+ ending = find_scanner_event(:@rparen)
1541
+
1542
+ find_scanner_event(:@lparen).merge!(
1543
+ type: :paren,
1544
+ body: [contents],
1545
+ end: ending[:end],
1546
+ char_end: ending[:char_end]
1547
+ )
1548
+ end
1549
+
1550
+ # The program node is the very top of the AST. Here we'll attach all of
1551
+ # the comments that we've gathered up over the course of parsing the
1552
+ # source string. We'll also attach on the __END__ content if there was
1553
+ # some found at the end of the source string.
1554
+ def on_program(stmts)
1555
+ range = {
1556
+ start: 1, end: lines.length, char_start: 0, char_end: source.length
1557
+ }
1558
+
1559
+ stmts[:body] << @__end__ if @__end__
1560
+ stmts.bind(0, source.length)
1561
+
1562
+ range.merge(type: :program, body: [stmts], comments: @comments)
1563
+ end
1564
+
1565
+ # qsymbols_new is a parser event that represents the beginning of a symbol
1566
+ # literal array, like %i[one two three]. It can be followed by any number
1567
+ # of qsymbols_add events, which we'll append onto an array body.
1568
+ def on_qsymbols_new
1569
+ find_scanner_event(:@qsymbols_beg).merge!(type: :qsymbols, body: [])
1570
+ end
1571
+
1572
+ # qsymbols_add is a parser event that represents an element inside of a
1573
+ # symbol literal array like %i[one two three]. It accepts as arguments the
1574
+ # parent qsymbols node as well as a tstring_content scanner event
1575
+ # representing the bare words.
1576
+ def on_qsymbols_add(qsymbols, tstring_content)
1577
+ qsymbols.merge!(
1578
+ body: qsymbols[:body] << tstring_content,
1579
+ end: tstring_content[:end],
1580
+ char_end: tstring_content[:char_end]
1581
+ )
1582
+ end
1583
+
1584
+ # qwords_new is a parser event that represents the beginning of a string
1585
+ # literal array, like %w[one two three]. It can be followed by any number
1586
+ # of qwords_add events, which we'll append onto an array body.
1587
+ def on_qwords_new
1588
+ find_scanner_event(:@qwords_beg).merge!(type: :qwords, body: [])
1589
+ end
1590
+
1591
+ # qsymbols_add is a parser event that represents an element inside of a
1592
+ # symbol literal array like %i[one two three]. It accepts as arguments the
1593
+ # parent qsymbols node as well as a tstring_content scanner event
1594
+ # representing the bare words.
1595
+ def on_qwords_add(qwords, tstring_content)
1596
+ qwords.merge!(
1597
+ body: qwords[:body] << tstring_content,
1598
+ end: tstring_content[:end],
1599
+ char_end: tstring_content[:char_end]
1600
+ )
1601
+ end
1602
+
1603
+ # redo is a parser event that represents the bare redo keyword. It has no
1604
+ # body as it accepts no arguments.
1605
+ def on_redo
1606
+ find_scanner_event(:@kw, 'redo').merge!(type: :redo)
1607
+ end
1608
+
1609
+ # regexp_new is a parser event that represents the beginning of a regular
1610
+ # expression literal, like /foo/. It can be followed by any number of
1611
+ # regexp_add events, which we'll append onto an array body.
1612
+ def on_regexp_new
1613
+ find_scanner_event(:@regexp_beg).merge!(type: :regexp, body: [])
1614
+ end
1615
+
1616
+ # regexp_add is a parser event that represents a piece of a regular
1617
+ # body. It accepts as arguments the parent regexp node as well as a
1618
+ # tstring_content scanner event representing string content or a
1619
+ # string_embexpr parser event representing interpolated content.
1620
+ def on_regexp_add(regexp, piece)
1621
+ regexp.merge!(
1622
+ body: regexp[:body] << piece,
1623
+ end: regexp[:end],
1624
+ char_end: regexp[:char_end]
1625
+ )
1626
+ end
644
1627
 
645
- # String literals are either going to be a normal string or they're going
646
- # to be a heredoc with a hyphen.
647
- def on_string_literal(string)
648
- heredoc = heredoc_stack[-1]
1628
+ # regexp_literal is a parser event that represents a regular expression.
1629
+ # It accepts as arguments a regexp node which is a built-up array of
1630
+ # pieces that go into the regexp content, as well as the ending used to
1631
+ # close out the regexp which includes any modifiers.
1632
+ def on_regexp_literal(regexp, ending)
1633
+ regexp.merge!(
1634
+ type: :regexp_literal,
1635
+ ending: ending[:body],
1636
+ end: ending[:end],
1637
+ char_end: ending[:char_end]
1638
+ )
1639
+ end
649
1640
 
650
- if heredoc && heredoc[:ending]
651
- heredoc_stack.pop.merge!(string.slice(:body))
652
- else
653
- super
654
- end
1641
+ # rescue is a special kind of node where you have a rescue chain but it
1642
+ # doesn't really have all of the information that it needs in order to
1643
+ # determine its ending. Therefore it relies on its parent bodystmt node to
1644
+ # report its ending to it.
1645
+ class Rescue < SimpleDelegator
1646
+ def bind(char_end)
1647
+ merge!(char_end: char_end)
1648
+
1649
+ stmts = self[:body][2]
1650
+ consequent = self[:body][3]
1651
+
1652
+ if consequent
1653
+ consequent.bind(char_end)
1654
+ stmts.bind(stmts[:char_start], consequent[:char_start])
1655
+ else
1656
+ stmts.bind(stmts[:char_start], char_end)
655
1657
  end
656
1658
  end
657
- )
1659
+ end
658
1660
 
659
- # This module contains miscellaneous fixes required to get the right
660
- # structure.
661
- prepend(
662
- Module.new do
663
- private
1661
+ # rescue is a parser event that represents the use of the rescue keyword
1662
+ # inside of a bodystmt.
1663
+ def on_rescue(exceptions, variable, stmts, consequent)
1664
+ beging = find_scanner_event(:@kw, 'rescue')
664
1665
 
665
- # These are the event types that contain _actual_ string content. If
666
- # there is an encoding magic comment at the top of the file, ripper will
667
- # actually change into that encoding for the storage of the string. This
668
- # will break everything, so we need to force the encoding back into UTF-8
669
- # so that the JSON library won't break.
670
- %w[comment ident tstring_content].each do |event|
671
- define_method(:"on_#{event}") do |body|
672
- super(body.force_encoding('UTF-8'))
673
- end
674
- end
1666
+ stmts.bind(
1667
+ ((exceptions || [])[-1] || variable || beging)[:char_end],
1668
+ char_pos
1669
+ )
675
1670
 
676
- # Handles __END__ syntax, which allows individual scripts to keep content
677
- # after the main ruby code that can be read through DATA. It looks like:
678
- #
679
- # foo.bar
680
- #
681
- # __END__
682
- # some other content that isn't normally read by ripper
683
- def on___end__(body)
684
- @__end__ = super(lines[lineno..-1].join("\n"))
685
- end
1671
+ Rescue.new(
1672
+ beging.merge!(
1673
+ type: :rescue,
1674
+ body: [exceptions, variable, stmts, consequent],
1675
+ end: lineno,
1676
+ char_end: char_pos
1677
+ )
1678
+ )
1679
+ end
686
1680
 
687
- def on_program(*body)
688
- super(*body).tap { |node| node[:body][0][:body] << __end__ if __end__ }
689
- end
1681
+ # rescue_mod represents the modifier form of a rescue clause. It accepts as
1682
+ # arguments the statement that may raise an error and the value that should
1683
+ # be used if it does.
1684
+ def on_rescue_mod(statement, rescued)
1685
+ find_scanner_event(:@kw, 'rescue')
1686
+
1687
+ {
1688
+ type: :rescue_mod,
1689
+ body: [statement, rescued],
1690
+ start: statement[:start],
1691
+ char_start: statement[:char_start],
1692
+ end: rescued[:end],
1693
+ char_end: rescued[:char_end]
1694
+ }
1695
+ end
1696
+
1697
+ # rest_param is a parser event that represents defining a parameter in a
1698
+ # method definition that accepts all remaining positional parameters. It
1699
+ # accepts as an argument an optional identifier for the parameter. If it
1700
+ # is omitted, then we're just using the plain operator.
1701
+ def on_rest_param(ident)
1702
+ oper = find_scanner_event(:@op, '*')
1703
+ return oper.merge!(type: :rest_param, body: [nil]) unless ident
1704
+
1705
+ oper.merge!(
1706
+ type: :rest_param,
1707
+ body: [ident],
1708
+ end: ident[:end],
1709
+ char_end: ident[:char_end]
1710
+ )
1711
+ end
1712
+
1713
+ # retry is a parser event that represents the bare retry keyword. It has
1714
+ # no body as it accepts no arguments.
1715
+ def on_retry
1716
+ find_scanner_event(:@kw, 'retry').merge!(type: :retry)
1717
+ end
1718
+
1719
+ # return is a parser event that represents using the return keyword with
1720
+ # arguments. It accepts as an argument an args_add_block event that
1721
+ # contains all of the arguments being passed.
1722
+ def on_return(args_add_block)
1723
+ find_scanner_event(:@kw, 'return').merge!(
1724
+ type: :return,
1725
+ body: [args_add_block],
1726
+ end: args_add_block[:end],
1727
+ char_end: args_add_block[:char_end]
1728
+ )
1729
+ end
690
1730
 
691
- # Normally access controls are reported as vcall nodes. This creates a
692
- # new node type to explicitly track those nodes instead, so that the
693
- # printer can add new lines as necessary.
694
- def on_vcall(ident)
695
- @access_controls ||= %w[private protected public].freeze
1731
+ # return0 is a parser event that represents the bare return keyword. It
1732
+ # has no body as it accepts no arguments. This is as opposed to the return
1733
+ # parser event, which is the version where you're returning one or more
1734
+ # values.
1735
+ def on_return0
1736
+ find_scanner_event(:@kw, 'return').merge!(type: :return0)
1737
+ end
696
1738
 
697
- super(ident).tap do |node|
698
- if !@access_controls.include?(ident[:body]) ||
699
- ident[:body] != lines[lineno - 1].strip
700
- next
701
- end
1739
+ # sclass is a parser event that represents a block of statements that
1740
+ # should be evaluated within the context of the singleton class of an
1741
+ # object. It's frequently used to define singleton methods. It looks like
1742
+ # the following example:
1743
+ #
1744
+ # class << self do foo end
1745
+ # │ │
1746
+ # │ └> bodystmt
1747
+ # └> target
1748
+ #
1749
+ def on_sclass(target, bodystmt)
1750
+ beging = find_scanner_event(:@kw, 'class')
1751
+ ending = find_scanner_event(:@kw, 'end')
1752
+
1753
+ bodystmt.bind(target[:char_end], ending[:char_start])
1754
+
1755
+ {
1756
+ type: :sclass,
1757
+ body: [target, bodystmt],
1758
+ start: beging[:start],
1759
+ char_start: beging[:char_start],
1760
+ end: ending[:end],
1761
+ char_end: ending[:char_end]
1762
+ }
1763
+ end
702
1764
 
703
- node.merge!(type: :access_ctrl)
704
- end
1765
+ # Everything that has a block of code inside of it has a list of statements.
1766
+ # Normally we would just track those as a node that has an array body, but we
1767
+ # have some special handling in order to handle empty statement lists. They
1768
+ # need to have the right location information, so all of the parent node of
1769
+ # stmts nodes will report back down the location information. We then
1770
+ # propagate that onto void_stmt nodes inside the stmts in order to make sure
1771
+ # all comments get printed appropriately.
1772
+ class Stmts < SimpleDelegator
1773
+ def bind(char_start, char_end)
1774
+ merge!(char_start: char_start, char_end: char_end)
1775
+
1776
+ if self[:body][0][:type] == :void_stmt
1777
+ self[:body][0].merge!(char_start: char_start, char_end: char_start)
705
1778
  end
1779
+ end
706
1780
 
707
- # When the only statement inside of a `def` node is a `begin` node, then
708
- # you can safely replace the body of the `def` with the body of the
709
- # `begin`. For example:
710
- #
711
- # def foo
712
- # begin
713
- # try_something
714
- # rescue SomeError => error
715
- # handle_error(error)
716
- # end
717
- # end
718
- #
719
- # can get transformed into:
720
- #
721
- # def foo
722
- # try_something
723
- # rescue SomeError => error
724
- # handle_error(error)
725
- # end
726
- #
727
- # This module handles this by hoisting up the `bodystmt` node from the
728
- # inner `begin` up to the `def`.
729
- def on_def(ident, params, bodystmt)
730
- def_bodystmt = bodystmt
731
- stmts, *other_parts = bodystmt[:body]
732
-
733
- if !other_parts.any? && stmts[:body].length == 1 &&
734
- stmts.dig(:body, 0, :type) == :begin
735
- def_bodystmt = stmts.dig(:body, 0, :body, 0)
736
- end
737
-
738
- super(ident, params, def_bodystmt)
1781
+ def <<(statement)
1782
+ if self[:body].any?
1783
+ merge!(statement.slice(:end, :char_end))
1784
+ else
1785
+ merge!(statement.slice(:start, :end, :char_start, :char_end))
739
1786
  end
740
1787
 
741
- # We need to track for `mlhs_paren` and `massign` nodes whether or not
742
- # there was an extra comma at the end of the expression. For some reason
743
- # it's not showing up in the AST in an obvious way. In this case we're
744
- # just simplifying everything by adding an additional field to `mlhs`
745
- # nodes called `comma` that indicates whether or not there was an extra.
746
- def on_mlhs_paren(body)
747
- super.tap do |node|
748
- next unless body[:type] == :mlhs
1788
+ self[:body] << statement
1789
+ self
1790
+ end
1791
+ end
749
1792
 
750
- ending = source.rindex(')', char_pos)
751
- buffer = source[(node[:char_start] + 1)...ending]
1793
+ # stmts_new is a parser event that represents the beginning of a list of
1794
+ # statements within any lexical block. It can be followed by any number of
1795
+ # stmts_add events, which we'll append onto an array body.
1796
+ def on_stmts_new
1797
+ Stmts.new(
1798
+ type: :stmts,
1799
+ body: [],
1800
+ start: lineno,
1801
+ end: lineno,
1802
+ char_start: char_pos,
1803
+ char_end: char_pos
1804
+ )
1805
+ end
1806
+
1807
+ # stmts_add is a parser event that represents a single statement inside a
1808
+ # list of statements within any lexical block. It accepts as arguments the
1809
+ # parent stmts node as well as an stmt which can be any expression in
1810
+ # Ruby.
1811
+ def on_stmts_add(stmts, stmt)
1812
+ stmts << stmt
1813
+ end
1814
+
1815
+ # string_concat is a parser event that represents concatenating two
1816
+ # strings together using a backward slash, as in the following example:
1817
+ #
1818
+ # 'foo' \
1819
+ # 'bar'
1820
+ #
1821
+ def on_string_concat(left, right)
1822
+ {
1823
+ type: :string_concat,
1824
+ body: [left, right],
1825
+ start: left[:start],
1826
+ char_start: left[:char_start],
1827
+ end: right[:end],
1828
+ char_end: right[:char_end]
1829
+ }
1830
+ end
1831
+
1832
+ # string_content is a parser event that represents the beginning of the
1833
+ # contents of a string, which will either be embedded inside of a
1834
+ # string_literal or a dyna_symbol node. It will have an array body so that
1835
+ # we can build up a list of @tstring_content, string_embexpr, and
1836
+ # string_dvar nodes.
1837
+ def on_string_content
1838
+ {
1839
+ type: :string,
1840
+ body: [],
1841
+ start: lineno,
1842
+ end: lineno,
1843
+ char_start: char_pos,
1844
+ char_end: char_pos
1845
+ }
1846
+ end
1847
+
1848
+ # string_add is a parser event that represents a piece of a string. It
1849
+ # could be plain @tstring_content, string_embexpr, or string_dvar nodes.
1850
+ # It accepts as arguments the parent string node as well as the additional
1851
+ # piece of the string.
1852
+ def on_string_add(string, piece)
1853
+ string.merge!(
1854
+ body: string[:body] << piece, end: piece[:end], char_end: piece[:char_end]
1855
+ )
1856
+ end
1857
+
1858
+ # string_dvar is a parser event that represents a very special kind of
1859
+ # interpolation into string. It allows you to take an instance variable,
1860
+ # class variable, or global variable and omit the braces when
1861
+ # interpolating. For example, if you wanted to interpolate the instance
1862
+ # variable @foo into a string, you could do "#@foo".
1863
+ def on_string_dvar(var_ref)
1864
+ find_scanner_event(:@embvar).merge!(
1865
+ type: :string_dvar,
1866
+ body: [var_ref],
1867
+ end: var_ref[:end],
1868
+ char_end: var_ref[:char_end]
1869
+ )
1870
+ end
1871
+
1872
+ # string_embexpr is a parser event that represents interpolated content.
1873
+ # It can go a bunch of different parent nodes, including regexp, strings,
1874
+ # xstrings, heredocs, dyna_symbols, etc. Basically it's anywhere you see
1875
+ # the #{} construct.
1876
+ def on_string_embexpr(stmts)
1877
+ beging = find_scanner_event(:@embexpr_beg)
1878
+ ending = find_scanner_event(:@embexpr_end)
1879
+
1880
+ stmts.bind(beging[:char_end], ending[:char_start])
1881
+
1882
+ {
1883
+ type: :string_embexpr,
1884
+ body: [stmts],
1885
+ start: beging[:start],
1886
+ char_start: beging[:char_start],
1887
+ end: ending[:end],
1888
+ char_end: ending[:char_end]
1889
+ }
1890
+ end
1891
+
1892
+ # String literals are either going to be a normal string or they're going
1893
+ # to be a heredoc if we've just closed a heredoc.
1894
+ def on_string_literal(string)
1895
+ heredoc = @heredocs[-1]
1896
+
1897
+ if heredoc && heredoc[:ending]
1898
+ @heredocs.pop.merge!(body: string[:body])
1899
+ else
1900
+ beging = find_scanner_event(:@tstring_beg)
1901
+ ending = find_scanner_event(:@tstring_end)
1902
+
1903
+ {
1904
+ type: :string_literal,
1905
+ body: string[:body],
1906
+ quote: beging[:body],
1907
+ start: beging[:start],
1908
+ char_start: beging[:char_start],
1909
+ end: ending[:end],
1910
+ char_end: ending[:char_end]
1911
+ }
1912
+ end
1913
+ end
1914
+
1915
+ # A super is a parser event that represents using the super keyword with
1916
+ # any number of arguments. It can optionally use parentheses (represented
1917
+ # by an arg_paren node) or just skip straight to the arguments (with an
1918
+ # args_add_block node).
1919
+ def on_super(contents)
1920
+ find_scanner_event(:@kw, 'super').merge!(
1921
+ type: :super,
1922
+ body: [contents],
1923
+ end: contents[:end],
1924
+ char_end: contents[:char_end]
1925
+ )
1926
+ end
1927
+
1928
+ # A symbol is a parser event that immediately descends from a symbol
1929
+ # literal and contains an ident representing the contents of the symbol.
1930
+ def on_symbol(ident)
1931
+ # What the heck is this here for you ask!? Turns out when Ripper is lexing
1932
+ # source text, it turns symbols into keywords if their contents match, which
1933
+ # will mess up the location information of all of our other nodes.
1934
+ #
1935
+ # So for example instead of { type: :@ident, body: "class" } you would
1936
+ # instead get { type: :@kw, body: "class" } which is all kinds of
1937
+ # problematic.
1938
+ #
1939
+ # In order to take care of this, we explicitly delete this scanner event
1940
+ # from the stack to make sure it doesn't screw things up.
1941
+ scanner_events.pop
1942
+
1943
+ ident.merge(type: :symbol, body: [ident])
1944
+ end
1945
+
1946
+ # A symbol_literal represents a symbol in the system with no interpolation
1947
+ # (as opposed to a dyna_symbol). As its only argument it accepts either a
1948
+ # symbol node (for most cases) or an ident node (in the case that we're
1949
+ # using bare words, as in an alias node like alias foo bar).
1950
+ def on_symbol_literal(contents)
1951
+ if contents[:type] == :@ident
1952
+ contents.merge(type: :symbol_literal, body: [contents])
1953
+ else
1954
+ beging = find_scanner_event(:@symbeg)
1955
+ contents.merge!(type: :symbol_literal, char_start: beging[:char_start])
1956
+ end
1957
+ end
1958
+
1959
+ # symbols_new is a parser event that represents the beginning of a symbol
1960
+ # literal array that accepts interpolation, like %I[one #{two} three]. It
1961
+ # can be followed by any number of symbols_add events, which we'll append
1962
+ # onto an array body.
1963
+ def on_symbols_new
1964
+ find_scanner_event(:@symbols_beg).merge!(type: :symbols, body: [])
1965
+ end
752
1966
 
753
- body[:comma] = buffer.strip.end_with?(',')
754
- end
1967
+ # symbols_add is a parser event that represents an element inside of a
1968
+ # symbol literal array that accepts interpolation, like
1969
+ # %I[one #{two} three]. It accepts as arguments the parent symbols node as
1970
+ # well as a word_add parser event.
1971
+ def on_symbols_add(symbols, word_add)
1972
+ symbols.merge!(
1973
+ body: symbols[:body] << word_add,
1974
+ end: word_add[:end],
1975
+ char_end: word_add[:char_end]
1976
+ )
1977
+ end
1978
+
1979
+ # A helper function to find a :: operator for the next two nodes. We do
1980
+ # special handling instead of using find_scanner_event here because we
1981
+ # don't pop off all of the :: operators so you could end up getting the
1982
+ # wrong information if you have for instance ::X::Y::Z.
1983
+ def find_colon2_before(const)
1984
+ index =
1985
+ scanner_events.rindex do |event|
1986
+ event[:type] == :@op && event[:body] == '::' &&
1987
+ event[:char_start] < const[:char_start]
755
1988
  end
756
1989
 
757
- def on_massign(left, right)
758
- super.tap do
759
- next unless left[:type] == :mlhs
1990
+ scanner_events[index]
1991
+ end
1992
+
1993
+ # A top_const_field is a parser event that is always the child of some
1994
+ # kind of assignment. It represents when you're assigning to a constant
1995
+ # that is being referenced at the top level. For example:
1996
+ #
1997
+ # ::X = 1
1998
+ #
1999
+ def on_top_const_field(const)
2000
+ beging = find_colon2_before(const)
2001
+ const.merge(
2002
+ type: :top_const_field,
2003
+ body: [const],
2004
+ start: beging[:start],
2005
+ char_start: beging[:char_start]
2006
+ )
2007
+ end
2008
+
2009
+ # A top_const_ref is a parser event that is a very similar to
2010
+ # top_const_field except that it is not involved in an assignment. It
2011
+ # looks like the following example:
2012
+ #
2013
+ # ::X
2014
+ #
2015
+ def on_top_const_ref(const)
2016
+ beging = find_colon2_before(const)
2017
+ const.merge(
2018
+ type: :top_const_ref,
2019
+ body: [const],
2020
+ start: beging[:start],
2021
+ char_start: beging[:char_start]
2022
+ )
2023
+ end
2024
+
2025
+ # A unary node represents a unary method being called on an expression, as
2026
+ # in !, ~, or not. We have somewhat special handling of the not operator
2027
+ # since if it has parentheses they don't get reported as a paren node for
2028
+ # some reason.
2029
+ def on_unary(oper, value)
2030
+ if oper == :not
2031
+ node = find_scanner_event(:@kw, 'not')
2032
+
2033
+ paren = source[node[:char_end]...value[:char_start]].include?('(')
2034
+ ending = paren ? find_scanner_event(:@rparen) : value
2035
+
2036
+ node.merge!(
2037
+ type: :unary,
2038
+ oper: oper,
2039
+ body: [value],
2040
+ end: ending[:end],
2041
+ char_end: ending[:char_end],
2042
+ paren: paren
2043
+ )
2044
+ else
2045
+ find_scanner_event(:@op).merge!(
2046
+ type: :unary,
2047
+ oper: oper[0],
2048
+ body: [value],
2049
+ end: value[:end],
2050
+ char_end: value[:char_end]
2051
+ )
2052
+ end
2053
+ end
2054
+
2055
+ # undef nodes represent using the keyword undef. It accepts as an argument
2056
+ # an array of symbol_literal nodes that represent each message that the
2057
+ # user is attempting to undefine. We use the keyword to get the beginning
2058
+ # location and the last symbol to get the ending.
2059
+ def on_undef(symbol_literals)
2060
+ last = symbol_literals.last
2061
+
2062
+ find_scanner_event(:@kw, 'undef').merge!(
2063
+ type: :undef,
2064
+ body: symbol_literals,
2065
+ end: last[:end],
2066
+ char_end: last[:char_end]
2067
+ )
2068
+ end
2069
+
2070
+ # unless is a parser event that represents the first clause in an unless
2071
+ # chain. It accepts as arguments the predicate of the unless, the
2072
+ # statements that are contained within the unless clause, and the optional
2073
+ # consequent clause.
2074
+ def on_unless(predicate, stmts, consequent)
2075
+ beging = find_scanner_event(:@kw, 'unless')
2076
+ ending = consequent || find_scanner_event(:@kw, 'end')
2077
+
2078
+ stmts.bind(predicate[:char_end], ending[:char_start])
2079
+
2080
+ {
2081
+ type: :unless,
2082
+ body: [predicate, stmts, consequent],
2083
+ start: beging[:start],
2084
+ char_start: beging[:char_start],
2085
+ end: ending[:end],
2086
+ char_end: ending[:char_end]
2087
+ }
2088
+ end
760
2089
 
761
- range = left[:char_start]..left[:char_end]
762
- left[:comma] = source[range].strip.end_with?(',')
763
- end
2090
+ # unless_mod is a parser event that represents the modifier form of an
2091
+ # unless statement. It accepts as arguments the predicate of the unless
2092
+ # and the statement that are contained within the unless clause.
2093
+ def on_unless_mod(predicate, statement)
2094
+ find_scanner_event(:@kw, 'unless')
2095
+
2096
+ {
2097
+ type: :unless_mod,
2098
+ body: [predicate, statement],
2099
+ start: statement[:start],
2100
+ char_start: statement[:char_start],
2101
+ end: predicate[:end],
2102
+ char_end: predicate[:char_end]
2103
+ }
2104
+ end
2105
+
2106
+ # until is a parser event that represents an until loop. It accepts as
2107
+ # arguments the predicate to the until and the statements that are
2108
+ # contained within the until clause.
2109
+ def on_until(predicate, stmts)
2110
+ beging = find_scanner_event(:@kw, 'until')
2111
+ ending = find_scanner_event(:@kw, 'end')
2112
+
2113
+ stmts.bind(predicate[:char_end], ending[:char_start])
2114
+
2115
+ {
2116
+ type: :until,
2117
+ body: [predicate, stmts],
2118
+ start: beging[:start],
2119
+ char_start: beging[:char_start],
2120
+ end: ending[:end],
2121
+ char_end: ending[:char_end]
2122
+ }
2123
+ end
2124
+
2125
+ # until_mod is a parser event that represents the modifier form of an
2126
+ # until loop. It accepts as arguments the predicate to the until and the
2127
+ # statement that is contained within the until loop.
2128
+ def on_until_mod(predicate, statement)
2129
+ find_scanner_event(:@kw, 'until')
2130
+
2131
+ {
2132
+ type: :until_mod,
2133
+ body: [predicate, statement],
2134
+ start: statement[:start],
2135
+ char_start: statement[:char_start],
2136
+ end: predicate[:end],
2137
+ char_end: predicate[:char_end]
2138
+ }
2139
+ end
2140
+
2141
+ # var_alias is a parser event that represents when you're using the alias
2142
+ # keyword with global variable arguments. You can optionally use
2143
+ # parentheses with this keyword, so we either track the location
2144
+ # information based on those or the final argument to the alias method.
2145
+ def on_var_alias(left, right)
2146
+ beging = find_scanner_event(:@kw, 'alias')
2147
+
2148
+ paren = source[beging[:char_end]...left[:char_start]].include?('(')
2149
+ ending = paren ? find_scanner_event(:@rparen) : right
2150
+
2151
+ {
2152
+ type: :var_alias,
2153
+ body: [left, right],
2154
+ start: beging[:start],
2155
+ char_start: beging[:char_start],
2156
+ end: ending[:end],
2157
+ char_end: ending[:char_end]
2158
+ }
2159
+ end
2160
+
2161
+ # var_ref is a parser event that represents using either a local variable,
2162
+ # a nil literal, a true or false literal, or a numbered block variable.
2163
+ def on_var_ref(contents)
2164
+ contents.merge(type: :var_ref, body: [contents])
2165
+ end
2166
+
2167
+ # var_field is a parser event that represents a variable that is being
2168
+ # assigned a value. As such, it is always a child of an assignment type
2169
+ # node. For example, in the following example foo is a var_field:
2170
+ #
2171
+ # foo = 1
2172
+ #
2173
+ def on_var_field(ident)
2174
+ if ident
2175
+ ident.merge(type: :var_field, body: [ident])
2176
+ else
2177
+ # You can hit this pattern if you're assigning to a splat using pattern
2178
+ # matching syntax in Ruby 2.7+
2179
+ { type: :var_field, body: [] }
2180
+ end
2181
+ end
2182
+
2183
+ # vcall nodes are any plain named thing with Ruby that could be either a
2184
+ # local variable or a method call. They accept as an argument the ident
2185
+ # scanner event that contains their content.
2186
+ #
2187
+ # Access controls like private, protected, and public are reported as
2188
+ # vcall nodes since they're technically method calls. We want to be able
2189
+ # add new lines around them as necessary, so here we're going to
2190
+ # explicitly track those as a different node type.
2191
+ def on_vcall(ident)
2192
+ @controls ||= %w[private protected public].freeze
2193
+
2194
+ body = ident[:body]
2195
+ type =
2196
+ if @controls.include?(body) && body == lines[lineno - 1].strip
2197
+ :access_ctrl
2198
+ else
2199
+ :vcall
764
2200
  end
2201
+
2202
+ ident.merge(type: type, body: [ident])
2203
+ end
2204
+
2205
+ # void_stmt is a special kind of parser event that represents an empty lexical
2206
+ # block of code. It often will have comments attached to it, so it requires
2207
+ # some special handling.
2208
+ def on_void_stmt
2209
+ {
2210
+ type: :void_stmt,
2211
+ start: lineno,
2212
+ end: lineno,
2213
+ char_start: char_pos,
2214
+ char_end: char_pos
2215
+ }
2216
+ end
2217
+
2218
+ # when is a parser event that represents another clause in a case chain.
2219
+ # It accepts as arguments the predicate of the when, the statements that
2220
+ # are contained within the else if clause, and the optional consequent
2221
+ # clause.
2222
+ def on_when(predicate, stmts, consequent)
2223
+ beging = find_scanner_event(:@kw, 'when')
2224
+ ending = consequent || find_scanner_event(:@kw, 'end')
2225
+
2226
+ stmts.bind(predicate[:char_end], ending[:char_start])
2227
+
2228
+ {
2229
+ type: :when,
2230
+ body: [predicate, stmts, consequent],
2231
+ start: beging[:start],
2232
+ char_start: beging[:char_start],
2233
+ end: ending[:end],
2234
+ char_end: ending[:char_end]
2235
+ }
2236
+ end
2237
+
2238
+ # while is a parser event that represents a while loop. It accepts as
2239
+ # arguments the predicate to the while and the statements that are
2240
+ # contained within the while clause.
2241
+ def on_while(predicate, stmts)
2242
+ beging = find_scanner_event(:@kw, 'while')
2243
+ ending = find_scanner_event(:@kw, 'end')
2244
+
2245
+ stmts.bind(predicate[:char_end], ending[:char_start])
2246
+
2247
+ {
2248
+ type: :while,
2249
+ body: [predicate, stmts],
2250
+ start: beging[:start],
2251
+ char_start: beging[:char_start],
2252
+ end: ending[:end],
2253
+ char_end: ending[:char_end]
2254
+ }
2255
+ end
2256
+
2257
+ # while_mod is a parser event that represents the modifier form of an
2258
+ # while loop. It accepts as arguments the predicate to the while and the
2259
+ # statement that is contained within the while loop.
2260
+ def on_while_mod(predicate, statement)
2261
+ find_scanner_event(:@kw, 'while')
2262
+
2263
+ {
2264
+ type: :while_mod,
2265
+ body: [predicate, statement],
2266
+ start: statement[:start],
2267
+ char_start: statement[:char_start],
2268
+ end: predicate[:end],
2269
+ char_end: predicate[:char_end]
2270
+ }
2271
+ end
2272
+
2273
+ # word_new is a parser event that represents the beginning of a word
2274
+ # within a special array literal (either strings or symbols) that accepts
2275
+ # interpolation. For example, in the following array, there are three
2276
+ # word nodes:
2277
+ #
2278
+ # %W[one a#{two}a three]
2279
+ #
2280
+ # Each word inside that array is represented as its own node, which is in
2281
+ # terms of the parser a tree of word_new and word_add nodes. For our
2282
+ # purposes, we're going to report this as a word node and build up an
2283
+ # array body of our parts.
2284
+ def on_word_new
2285
+ { type: :word, body: [] }
2286
+ end
2287
+
2288
+ # word_add is a parser event that represents a piece of a word within a
2289
+ # special array literal that accepts interpolation. It accepts as
2290
+ # arguments the parent word node as well as the additional piece of the
2291
+ # word, which can be either a @tstring_content node for a plain string
2292
+ # piece or a string_embexpr for an interpolated piece.
2293
+ def on_word_add(word, piece)
2294
+ if word[:body].empty?
2295
+ # Here we're making sure we get the correct bounds by using the
2296
+ # location information from the first piece.
2297
+ piece.merge(type: :word, body: [piece])
2298
+ else
2299
+ word.merge!(
2300
+ body: word[:body] << piece, end: piece[:end], char_end: piece[:char_end]
2301
+ )
765
2302
  end
766
- )
2303
+ end
2304
+
2305
+ # words_new is a parser event that represents the beginning of a string
2306
+ # literal array that accepts interpolation, like %W[one #{two} three]. It
2307
+ # can be followed by any number of words_add events, which we'll append
2308
+ # onto an array body.
2309
+ def on_words_new
2310
+ find_scanner_event(:@words_beg).merge!(type: :words, body: [])
2311
+ end
2312
+
2313
+ # words_add is a parser event that represents an element inside of a
2314
+ # string literal array that accepts interpolation, like
2315
+ # %W[one #{two} three]. It accepts as arguments the parent words node as
2316
+ # well as a word_add parser event.
2317
+ def on_words_add(words, word_add)
2318
+ words.merge!(
2319
+ body: words[:body] << word_add,
2320
+ end: word_add[:end],
2321
+ char_end: word_add[:char_end]
2322
+ )
2323
+ end
2324
+
2325
+ # xstring_new is a parser event that represents the beginning of a string
2326
+ # of commands that gets sent out to the terminal, like `ls`. It can
2327
+ # optionally include interpolation much like a regular string, so we're
2328
+ # going to build up an array body.
2329
+ #
2330
+ # If the xstring actually starts with a heredoc declaration, then we're
2331
+ # going to let heredocs continue to do their thing and instead just use
2332
+ # its location information.
2333
+ def on_xstring_new
2334
+ heredoc = @heredocs[-1]
2335
+
2336
+ if heredoc && heredoc[:beging][3] = '`'
2337
+ heredoc.merge(type: :xstring, body: [])
2338
+ else
2339
+ find_scanner_event(:@backtick).merge!(type: :xstring, body: [])
2340
+ end
2341
+ end
2342
+
2343
+ # xstring_add is a parser event that represents a piece of a string of
2344
+ # commands that gets sent out to the terminal, like `ls`. It accepts two
2345
+ # arguments, the parent xstring node as well as the piece that is being
2346
+ # added to the string. Because it supports interpolation this is either a
2347
+ # tstring_content scanner event representing bare string content or a
2348
+ # string_embexpr representing interpolated content.
2349
+ def on_xstring_add(xstring, piece)
2350
+ xstring.merge!(
2351
+ body: xstring[:body] << piece,
2352
+ end: piece[:end],
2353
+ char_end: piece[:char_end]
2354
+ )
2355
+ end
2356
+
2357
+ # xstring_literal is a parser event that represents a string of commands
2358
+ # that gets sent to the terminal, like `ls`. It accepts as its only
2359
+ # argument an xstring node that is a built up array representation of all
2360
+ # of the parts of the string (including the plain string content and the
2361
+ # interpolated content).
2362
+ #
2363
+ # They can also use heredocs to present themselves, as in the example:
2364
+ #
2365
+ # <<-`SHELL`
2366
+ # ls
2367
+ # SHELL
2368
+ #
2369
+ # In this case we need to change the node type to be a heredoc instead of
2370
+ # an xstring_literal in order to get the right formatting.
2371
+ def on_xstring_literal(xstring)
2372
+ heredoc = @heredocs[-1]
2373
+
2374
+ if heredoc && heredoc[:beging][3] = '`'
2375
+ heredoc.merge!(body: xstring[:body])
2376
+ else
2377
+ ending = find_scanner_event(:@tstring_end)
2378
+ xstring.merge!(
2379
+ type: :xstring_literal, end: ending[:end], char_end: ending[:char_end]
2380
+ )
2381
+ end
2382
+ end
2383
+
2384
+ # yield is a parser event that represents using the yield keyword with
2385
+ # arguments. It accepts as an argument an args_add_block event that
2386
+ # contains all of the arguments being passed.
2387
+ def on_yield(args_add_block)
2388
+ find_scanner_event(:@kw, 'yield').merge!(
2389
+ type: :yield,
2390
+ body: [args_add_block],
2391
+ end: args_add_block[:end],
2392
+ char_end: args_add_block[:char_end]
2393
+ )
2394
+ end
2395
+
2396
+ # yield0 is a parser event that represents the bare yield keyword. It has
2397
+ # no body as it accepts no arguments. This is as opposed to the yield
2398
+ # parser event, which is the version where you're yielding one or more
2399
+ # values.
2400
+ def on_yield0
2401
+ find_scanner_event(:@kw, 'yield').merge!(type: :yield0)
2402
+ end
2403
+
2404
+ # zsuper is a parser event that represents the bare super keyword. It has
2405
+ # no body as it accepts no arguments. This is as opposed to the super
2406
+ # parser event, which is the version where you're calling super with one
2407
+ # or more values.
2408
+ def on_zsuper
2409
+ find_scanner_event(:@kw, 'super').merge!(type: :zsuper)
2410
+ end
767
2411
  end
768
2412
 
769
2413
  # If this is the main file we're executing, then most likely this is being
770
- # executed from the parse.js spawn. In that case, read the ruby source from
2414
+ # executed from the parser.js spawn. In that case, read the ruby source from
771
2415
  # stdin and report back the AST over stdout.
772
2416
 
773
2417
  if $0 == __FILE__