prettier 0.21.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  const {
2
+ breakParent,
2
3
  concat,
3
4
  dedent,
4
5
  group,
@@ -40,7 +41,6 @@ module.exports = {
40
41
 
41
42
  return group(concat(parts));
42
43
  },
43
- embdoc: (path, _opts, _print) => concat([trim, path.getValue().body]),
44
44
  paren: (path, opts, print) => {
45
45
  if (!path.getValue().body[0]) {
46
46
  return "()";
@@ -68,6 +68,28 @@ module.exports = {
68
68
  concat([join(hardline, path.map(print, "body")), hardline]),
69
69
  stmts: (path, opts, print) => {
70
70
  const stmts = path.getValue().body;
71
+
72
+ // This is a special case where we have only comments inside a statement
73
+ // list. In this case we want to avoid doing any kind of line number
74
+ // tracking and just print out the comments.
75
+ if (
76
+ stmts.length === 1 &&
77
+ stmts[0].type === "void_stmt" &&
78
+ stmts[0].comments
79
+ ) {
80
+ const comments = path.map(
81
+ (commentPath, index) => {
82
+ stmts[0].comments[index].printed = true;
83
+ return opts.printer.printComment(commentPath);
84
+ },
85
+ "body",
86
+ 0,
87
+ "comments"
88
+ );
89
+
90
+ return concat([breakParent, join(hardline, comments)]);
91
+ }
92
+
71
93
  const parts = [];
72
94
  let lineNo = null;
73
95
 
@@ -8,30 +8,30 @@ const {
8
8
  join
9
9
  } = require("../prettier");
10
10
 
11
- const { concatBody, empty, makeList, prefix, surround } = require("../utils");
12
-
13
11
  // If there is some part of this string that matches an escape sequence or that
14
12
  // contains the interpolation pattern ("#{"), then we are locked into whichever
15
13
  // quote the user chose. (If they chose single quotes, then double quoting
16
14
  // would activate the escape sequence, and if they chose double quotes, then
17
15
  // single quotes would deactivate it.)
18
- const isQuoteLocked = (string) =>
19
- string.body.some(
16
+ function isQuoteLocked(node) {
17
+ return node.body.some(
20
18
  (part) =>
21
19
  part.type === "@tstring_content" &&
22
20
  (part.body.includes("#{") || part.body.includes("\\"))
23
21
  );
22
+ }
24
23
 
25
24
  // A string is considered to be able to use single quotes if it contains only
26
25
  // plain string content and that content does not contain a single quote.
27
- const isSingleQuotable = (string) =>
28
- string.body.every(
26
+ function isSingleQuotable(node) {
27
+ return node.body.every(
29
28
  (part) => part.type === "@tstring_content" && !part.body.includes("'")
30
29
  );
30
+ }
31
31
 
32
32
  const quotePattern = new RegExp("\\\\([\\s\\S])|(['\"])", "g");
33
33
 
34
- const normalizeQuotes = (content, enclosingQuote, originalQuote) => {
34
+ function normalizeQuotes(content, enclosingQuote, originalQuote) {
35
35
  const replaceOther = ["'", '"'].includes(originalQuote);
36
36
  const otherQuote = enclosingQuote === '"' ? "'" : '"';
37
37
 
@@ -52,7 +52,7 @@ const normalizeQuotes = (content, enclosingQuote, originalQuote) => {
52
52
 
53
53
  return `\\${escaped}`;
54
54
  });
55
- };
55
+ }
56
56
 
57
57
  const quotePairs = {
58
58
  "(": ")",
@@ -61,7 +61,7 @@ const quotePairs = {
61
61
  "<": ">"
62
62
  };
63
63
 
64
- const getClosingQuote = (quote) => {
64
+ function getClosingQuote(quote) {
65
65
  if (!quote.startsWith("%")) {
66
66
  return quote;
67
67
  }
@@ -72,40 +72,89 @@ const getClosingQuote = (quote) => {
72
72
  }
73
73
 
74
74
  return boundary;
75
- };
75
+ }
76
76
 
77
- module.exports = {
78
- "@CHAR": (path, { preferSingleQuotes }, _print) => {
79
- const { body } = path.getValue();
77
+ // Prints a @CHAR node. @CHAR nodes are special character strings that usually
78
+ // are strings of length 1. If they're any longer than we'll try to apply the
79
+ // correct quotes.
80
+ function printChar(path, { preferSingleQuotes }, _print) {
81
+ const { body } = path.getValue();
80
82
 
81
- if (body.length !== 2) {
82
- return body;
83
+ if (body.length !== 2) {
84
+ return body;
85
+ }
86
+
87
+ const quote = preferSingleQuotes ? "'" : '"';
88
+ return concat([quote, body.slice(1), quote]);
89
+ }
90
+
91
+ // Prints a dynamic symbol. Assumes there's a quote property attached to the
92
+ // node that will tell us which quote to use when printing. We're just going to
93
+ // use whatever quote was provided.
94
+ function printDynaSymbol(path, opts, print) {
95
+ const { quote } = path.getValue();
96
+
97
+ return concat([":", quote].concat(path.map(print, "body")).concat(quote));
98
+ }
99
+
100
+ // Prints out an interpolated variable in the string by converting it into an
101
+ // embedded expression.
102
+ function printStringDVar(path, opts, print) {
103
+ return concat(["#{", path.call(print, "body", 0), "}"]);
104
+ }
105
+
106
+ // Prints out a literal string. This function does its best to respect the
107
+ // wishes of the user with regards to single versus double quotes, but if the
108
+ // string contains any escape expressions then it will just keep the original
109
+ // quotes.
110
+ function printStringLiteral(path, { preferSingleQuotes }, print) {
111
+ const node = path.getValue();
112
+
113
+ // If the string is empty, it will not have any parts, so just print out the
114
+ // quotes corresponding to the config
115
+ if (node.body.length === 0) {
116
+ return preferSingleQuotes ? "''" : '""';
117
+ }
118
+
119
+ // Determine the quote that should enclose the new string
120
+ let quote;
121
+ if (isQuoteLocked(node)) {
122
+ quote = node.quote;
123
+ } else {
124
+ quote = preferSingleQuotes && isSingleQuotable(node) ? "'" : '"';
125
+ }
126
+
127
+ const parts = node.body.map((part, index) => {
128
+ if (part.type !== "@tstring_content") {
129
+ // In this case, the part of the string is an embedded expression
130
+ return path.call(print, "body", index);
83
131
  }
84
132
 
85
- const quote = preferSingleQuotes ? "'" : '"';
86
- return body.length === 2 ? concat([quote, body.slice(1), quote]) : body;
87
- },
88
- dyna_symbol: (path, opts, print) => {
89
- const { quote } = path.getValue();
133
+ // In this case, the part of the string is just regular string content
134
+ return join(
135
+ literalline,
136
+ normalizeQuotes(part.body, quote, node.quote).split("\n")
137
+ );
138
+ });
90
139
 
91
- return concat([":", quote, concat(path.call(print, "body", 0)), quote]);
92
- },
93
- heredoc: (path, opts, print) => {
94
- const { beging, body, ending } = path.getValue();
140
+ return concat([quote].concat(parts).concat(getClosingQuote(quote)));
141
+ }
95
142
 
96
- const parts = body.map((part, index) => {
97
- if (part.type !== "@tstring_content") {
98
- // In this case, the part of the string is an embedded expression
99
- return path.call(print, "body", index);
100
- }
143
+ // Prints out a symbol literal. Its child will always be the ident that
144
+ // represents the string content of the symbol.
145
+ function printSymbolLiteral(path, opts, print) {
146
+ return concat([":", path.call(print, "body", 0)]);
147
+ }
101
148
 
102
- // In this case, the part of the string is just regular string content
103
- return join(literalline, part.body.split("\n"));
104
- });
149
+ // Prints out an xstring literal. Its child is an array of string parts,
150
+ // including plain string content and interpolated content.
151
+ function printXStringLiteral(path, opts, print) {
152
+ return concat(["`"].concat(path.map(print, "body")).concat("`"));
153
+ }
105
154
 
106
- return concat([beging, literalline, concat(parts), ending]);
107
- },
108
- string: makeList,
155
+ module.exports = {
156
+ "@CHAR": printChar,
157
+ dyna_symbol: printDynaSymbol,
109
158
  string_concat: (path, opts, print) =>
110
159
  group(
111
160
  concat([
@@ -114,14 +163,14 @@ module.exports = {
114
163
  indent(concat([hardline, path.call(print, "body", 1)]))
115
164
  ])
116
165
  ),
117
- string_dvar: surround("#{", "}"),
166
+ string_dvar: printStringDVar,
118
167
  string_embexpr: (path, opts, print) => {
119
168
  const parts = path.call(print, "body", 0);
120
169
 
121
170
  // If the interpolated expression is inside of an xstring literal (a string
122
171
  // that gets sent to the command line) then we don't want to automatically
123
172
  // indent, as this can lead to some very odd looking expressions
124
- if (path.getParentNode().type === "xstring") {
173
+ if (path.getParentNode().type === "xstring_literal") {
125
174
  return concat(["#{", parts, "}"]);
126
175
  }
127
176
 
@@ -129,47 +178,7 @@ module.exports = {
129
178
  concat(["#{", indent(concat([softline, parts])), concat([softline, "}"])])
130
179
  );
131
180
  },
132
- string_literal: (path, { preferSingleQuotes }, print) => {
133
- const stringLiteral = path.getValue();
134
- const string = stringLiteral.body[0];
135
-
136
- // If the string is empty, it will not have any parts, so just print out the
137
- // quotes corresponding to the config
138
- if (string.body.length === 0) {
139
- return preferSingleQuotes ? "''" : '""';
140
- }
141
-
142
- // Determine the quote that should enclose the new string
143
- let quote;
144
- if (isQuoteLocked(string)) {
145
- ({ quote } = stringLiteral);
146
- } else {
147
- quote = preferSingleQuotes && isSingleQuotable(string) ? "'" : '"';
148
- }
149
-
150
- const parts = string.body.map((part, index) => {
151
- if (part.type !== "@tstring_content") {
152
- // In this case, the part of the string is an embedded expression
153
- return path.call(print, "body", 0, "body", index);
154
- }
155
-
156
- // In this case, the part of the string is just regular string content
157
- return join(
158
- literalline,
159
- normalizeQuotes(part.body, quote, stringLiteral.quote).split("\n")
160
- );
161
- });
162
-
163
- return concat([quote].concat(parts).concat(getClosingQuote(quote)));
164
- },
165
- symbol: prefix(":"),
166
- symbol_literal: concatBody,
167
- word_add: concatBody,
168
- word_new: empty,
169
- xstring: makeList,
170
- xstring_literal: (path, opts, print) => {
171
- const parts = path.call(print, "body", 0);
172
-
173
- return concat(["`"].concat(parts).concat("`"));
174
- }
181
+ string_literal: printStringLiteral,
182
+ symbol_literal: printSymbolLiteral,
183
+ xstring_literal: printXStringLiteral
175
184
  };
@@ -0,0 +1,35 @@
1
+ const { align, concat, group, join, line } = require("../prettier");
2
+ const { literal } = require("../utils");
3
+
4
+ function printSuper(path, opts, print) {
5
+ const args = path.getValue().body[0];
6
+
7
+ if (args.type === "arg_paren") {
8
+ // In case there are explicitly no arguments but they are using parens,
9
+ // we assume they are attempting to override the initializer and pass no
10
+ // arguments up.
11
+ if (args.body[0] === null) {
12
+ return "super()";
13
+ }
14
+
15
+ return concat(["super", path.call(print, "body", 0)]);
16
+ }
17
+
18
+ const keyword = "super ";
19
+ const argsDocs = path.call(print, "body", 0);
20
+
21
+ return group(
22
+ concat([
23
+ keyword,
24
+ align(keyword.length, group(join(concat([",", line]), argsDocs)))
25
+ ])
26
+ );
27
+ }
28
+
29
+ // Version of super without any parens or args.
30
+ const printZSuper = literal("super");
31
+
32
+ module.exports = {
33
+ super: printSuper,
34
+ zsuper: printZSuper
35
+ };
@@ -0,0 +1,42 @@
1
+ const {
2
+ addTrailingComment,
3
+ align,
4
+ concat,
5
+ group,
6
+ join,
7
+ line
8
+ } = require("../prettier");
9
+
10
+ function printUndefSymbol(path, opts, print) {
11
+ const node = path.getValue();
12
+
13
+ // Since we're going to descend into the symbol literal to grab out the ident
14
+ // node, then we need to make sure we copy over any comments as well,
15
+ // otherwise we could accidentally skip printing them.
16
+ if (node.comments) {
17
+ node.comments.forEach((comment) => {
18
+ addTrailingComment(node.body[0], comment);
19
+ });
20
+ }
21
+
22
+ return path.call(print, "body", 0);
23
+ }
24
+
25
+ function printUndef(path, opts, print) {
26
+ const keyword = "undef ";
27
+ const argNodes = path.map(
28
+ (symbolPath) => printUndefSymbol(symbolPath, opts, print),
29
+ "body"
30
+ );
31
+
32
+ return group(
33
+ concat([
34
+ keyword,
35
+ align(keyword.length, join(concat([",", line]), argNodes))
36
+ ])
37
+ );
38
+ }
39
+
40
+ module.exports = {
41
+ undef: printUndef
42
+ };
@@ -0,0 +1,71 @@
1
+ const { spawnSync } = require("child_process");
2
+ const path = require("path");
3
+
4
+ // In order to properly parse ruby code, we need to tell the ruby process to
5
+ // parse using UTF-8. Unfortunately, the way that you accomplish this looks
6
+ // differently depending on your platform. This object below represents all of
7
+ // the possible values of process.platform per:
8
+ // https://nodejs.org/api/process.html#process_process_platform
9
+ const LANG = {
10
+ aix: "C.UTF-8",
11
+ darwin: "en_US.UTF-8",
12
+ freebsd: "C.UTF-8",
13
+ linux: "C.UTF-8",
14
+ openbsd: "C.UTF-8",
15
+ sunos: "C.UTF-8",
16
+ win32: ".UTF-8"
17
+ }[process.platform];
18
+
19
+ // This function is responsible for taking an input string of text and returning
20
+ // to prettier a JavaScript object that is the equivalent AST that represents
21
+ // the code stored in that string. We accomplish this by spawning a new Ruby
22
+ // process of parser.rb and reading JSON off STDOUT.
23
+ function parse(text, _parsers, _opts) {
24
+ const child = spawnSync(
25
+ "ruby",
26
+ ["--disable-gems", path.join(__dirname, "./parser.rb")],
27
+ {
28
+ env: Object.assign({}, process.env, { LANG }),
29
+ input: text,
30
+ maxBuffer: 10 * 1024 * 1024 // 10MB
31
+ }
32
+ );
33
+
34
+ const error = child.stderr.toString();
35
+ if (error) {
36
+ throw new Error(error);
37
+ }
38
+
39
+ const response = child.stdout.toString();
40
+ return JSON.parse(response);
41
+ }
42
+
43
+ const pragmaPattern = /#\s*@(prettier|format)/;
44
+
45
+ // This function handles checking whether or not the source string has the
46
+ // pragma for prettier. This is an optional workflow for incremental adoption.
47
+ function hasPragma(text) {
48
+ return pragmaPattern.test(text);
49
+ }
50
+
51
+ // This function is critical for comments and cursor support, and is responsible
52
+ // for returning the index of the character within the source string that is the
53
+ // beginning of the given node.
54
+ function locStart(node) {
55
+ return node.char_start;
56
+ }
57
+
58
+ // This function is critical for comments and cursor support, and is responsible
59
+ // for returning the index of the character within the source string that is the
60
+ // ending of the given node.
61
+ function locEnd(node) {
62
+ return node.char_end;
63
+ }
64
+
65
+ module.exports = {
66
+ parse,
67
+ astFormat: "ruby",
68
+ hasPragma,
69
+ locStart,
70
+ locEnd
71
+ };
@@ -2,9 +2,9 @@
2
2
 
3
3
  # We implement our own version checking here instead of using Gem::Version so
4
4
  # that we can use the --disable-gems flag.
5
- major, minor, * = RUBY_VERSION.split('.').map(&:to_i)
5
+ RUBY_MAJOR, RUBY_MINOR, * = RUBY_VERSION.split('.').map(&:to_i)
6
6
 
7
- if (major < 2) || ((major == 2) && (minor < 5))
7
+ if (RUBY_MAJOR < 2) || ((RUBY_MAJOR == 2) && (RUBY_MINOR < 5))
8
8
  warn(
9
9
  "Ruby version #{RUBY_VERSION} not supported. " \
10
10
  'Please upgrade to 2.5.0 or above.'
@@ -13,761 +13,2405 @@ if (major < 2) || ((major == 2) && (minor < 5))
13
13
  exit 1
14
14
  end
15
15
 
16
+ require 'delegate'
16
17
  require 'json' unless defined?(JSON)
17
18
  require 'ripper'
18
19
 
19
20
  module Prettier; end
20
21
 
21
22
  class Prettier::Parser < Ripper
22
- attr_reader :source, :lines, :__end__
23
+ attr_reader :source, :lines, :scanner_events, :line_counts
23
24
 
24
25
  def initialize(source, *args)
25
26
  super(source, *args)
26
27
 
27
28
  @source = source
28
29
  @lines = source.split("\n")
30
+
31
+ @comments = []
32
+ @embdoc = nil
29
33
  @__end__ = nil
34
+
35
+ @heredocs = []
36
+
37
+ @scanner_events = []
38
+ @line_counts = [0]
39
+
40
+ @source.lines.each { |line| @line_counts << @line_counts.last + line.size }
30
41
  end
31
42
 
32
43
  private
33
44
 
45
+ # This represents the current place in the source string that we've gotten to
46
+ # so far. We have a memoized line_counts object that we can use to get the
47
+ # number of characters that we've had to go through to get to the beginning of
48
+ # this line, then we add the number of columns into this line that we've gone
49
+ # through.
50
+ def char_pos
51
+ line_counts[lineno - 1] + column
52
+ end
53
+
54
+ # As we build up a list of scanner events, we'll periodically need to go
55
+ # backwards and find the ones that we've already hit in order to determine the
56
+ # location information for nodes that use them. For example, if you have a
57
+ # module node then you'll look backward for a @module scanner event to
58
+ # determine your start location.
59
+ #
60
+ # This works with nesting since we're deleting scanner events from the list
61
+ # once they've been used up. For example if you had nested module declarations
62
+ # then the innermost declaration would grab the last @module event (which
63
+ # would happen to be the innermost keyword). Then the outer one would only be
64
+ # able to grab the first one. In this way all of the scanner events act as
65
+ # their own stack.
66
+ def find_scanner_event(type, body = :any)
67
+ index =
68
+ scanner_events.rindex do |scanner_event|
69
+ scanner_event[:type] == type &&
70
+ (body == :any || (scanner_event[:body] == body))
71
+ end
72
+
73
+ scanner_events.delete_at(index)
74
+ end
75
+
34
76
  # Scanner events occur when the lexer hits a new token, like a keyword or an
35
77
  # end. These nodes always contain just one argument which is a string
36
78
  # representing the content. For the most part these can just be printed
37
79
  # directly, which very few exceptions.
38
- SCANNER_EVENTS.each do |event|
39
- define_method(:"on_#{event}") do |body|
40
- { type: :"@#{event}", body: body, start: lineno, end: lineno }
80
+ defined = %i[
81
+ comment
82
+ embdoc
83
+ embdoc_beg
84
+ embdoc_end
85
+ heredoc_beg
86
+ heredoc_end
87
+ ignored_nl
88
+ ]
89
+
90
+ (SCANNER_EVENTS - defined).each do |event|
91
+ define_method(:"on_#{event}") do |value|
92
+ char_end = char_pos + value.size
93
+ node = {
94
+ type: :"@#{event}",
95
+ body: value,
96
+ start: lineno,
97
+ end: lineno,
98
+ char_start: char_pos,
99
+ char_end: char_end
100
+ }
101
+
102
+ scanner_events << node
103
+ node
41
104
  end
42
105
  end
43
106
 
44
- # Parser events represent nodes in the ripper abstract syntax tree. The event
45
- # is reported after the children of the node have already been built.
46
- PARSER_EVENTS.each do |event|
47
- define_method(:"on_#{event}") do |*body|
48
- min = body.map { |part| part.is_a?(Hash) ? part[:start] : lineno }.min
49
- { type: event, body: body, start: min || lineno, end: lineno }
50
- end
107
+ # We keep track of each comment as it comes in and then eventually add
108
+ # them to the top of the generated AST so that prettier can start adding
109
+ # them back into the final representation. Comments come in including
110
+ # their starting pound sign and the newline at the end, so we also chop
111
+ # those off.
112
+ #
113
+ # If there is an encoding magic comment at the top of the file, ripper
114
+ # will actually change into that encoding for the storage of the string.
115
+ # This will break everything, so we need to force the encoding back into
116
+ # UTF-8 so that the JSON library won't break.
117
+ def on_comment(value)
118
+ @comments <<
119
+ {
120
+ type: :@comment,
121
+ value: value[1..-1].chomp.force_encoding('UTF-8'),
122
+ start: lineno,
123
+ end: lineno,
124
+ char_start: char_pos,
125
+ char_end: char_pos + value.length - 1
126
+ }
127
+ end
128
+
129
+ # ignored_nl is a special kind of scanner event that passes nil as the value,
130
+ # so we can't do our normal tracking of value.size. Instead of adding a
131
+ # condition to the main SCANNER_EVENTS loop above, we'll just explicitly
132
+ # define the method here. You can trigger the ignored_nl event with the
133
+ # following snippet:
134
+ #
135
+ # foo.bar
136
+ # .baz
137
+ #
138
+ def on_ignored_nl(value)
139
+ {
140
+ type: :ignored_nl,
141
+ body: nil,
142
+ start: lineno,
143
+ end: lineno,
144
+ char_start: char_pos,
145
+ char_end: char_pos
146
+ }
51
147
  end
52
148
 
53
- # Some nodes are lists that come back from the parser. They always start with
54
- # a `*_new` node (or in the case of string, `*_content`) and each additional
55
- # node in the list is a `*_add` node. This module takes those nodes and turns
56
- # them into one node with an array body.
57
- #
58
- # For example, the statement `[a, b, c]` would be parsed as:
59
- #
60
- # [:args_add,
61
- # [:args_add,
62
- # [:args_add,
63
- # [:args_new],
64
- # [:vcall, [:@ident, "a", [1, 1]]]
65
- # ],
66
- # [:vcall, [:@ident, "b", [1, 4]]]
67
- # ],
68
- # [:vcall, [:@ident, "c", [1, 7]]]
69
- # ]
70
- #
71
- # But after this module is applied that is instead parsed as:
72
- #
73
- # [:args,
74
- # [
75
- # [:vcall, [:@ident, "a", [1, 1]]],
76
- # [:vcall, [:@ident, "b", [1, 4]]],
77
- # [:vcall, [:@ident, "c", [1, 7]]]
78
- # ]
79
- # ]
80
- #
81
- # This makes it a lot easier to join things with commas, and ends up resulting
82
- # in a much flatter `prettier` tree once it has been converted. Note that
83
- # because of this module some extra node types are added (the aggregate of
84
- # the previous `*_add` nodes) and some nodes now have arrays in places where
85
- # they previously had single nodes.
86
149
  prepend(
87
150
  Module.new do
88
- events = %i[
89
- args
90
- mlhs
91
- mrhs
92
- qsymbols
93
- qwords
94
- regexp
95
- stmts
96
- string
97
- symbols
98
- words
99
- xstring
100
- ]
101
-
102
151
  private
103
152
 
104
- events.each do |event|
105
- suffix = event == :string ? 'content' : 'new'
153
+ # Handles __END__ syntax, which allows individual scripts to keep content
154
+ # after the main ruby code that can be read through DATA. It looks like:
155
+ #
156
+ # foo.bar
157
+ #
158
+ # __END__
159
+ # some other content that isn't normally read by ripper
160
+ def on___end__(*)
161
+ @__end__ = super(lines[lineno..-1].join("\n"))
162
+ end
106
163
 
107
- define_method(:"on_#{event}_#{suffix}") do
108
- { type: event, body: [], start: lineno, end: lineno }
109
- end
164
+ # Like comments, we need to force the encoding here so JSON doesn't break.
165
+ def on_ident(value)
166
+ super(value.force_encoding('UTF-8'))
167
+ end
110
168
 
111
- define_method(:"on_#{event}_add") do |parts, part|
112
- parts.tap do |node|
113
- node[:body] << part
114
- node[:end] = lineno
115
- end
116
- end
169
+ # Like comments, we need to force the encoding here so JSON doesn't break.
170
+ def on_tstring_content(value)
171
+ super(value.force_encoding('UTF-8'))
117
172
  end
118
173
  end
119
174
  )
120
175
 
121
- # For each node, we need to attach where it came from in order to be able to
122
- # support placing the cursor correctly before and after formatting.
176
+ # A BEGIN node is a parser event that represents the use of the BEGIN
177
+ # keyword, which hooks into the lifecycle of the interpreter. It's a bit
178
+ # of a legacy from the stream operating days, and gets its inspiration
179
+ # from tools like awk. Whatever is inside the "block" will get executed
180
+ # when the program starts. The syntax looks like the following:
123
181
  #
124
- # For most nodes, it's enough to look at the child nodes to determine the
125
- # start of the parent node. However, for some nodes it's necessary to keep
126
- # track of the keywords as they come in from the lexer and to modify the start
127
- # node once we have it.
128
- prepend(
129
- Module.new do
130
- def initialize(source, *args)
131
- super(source, *args)
182
+ # BEGIN {
183
+ # # execute stuff here
184
+ # }
185
+ #
186
+ def on_BEGIN(stmts)
187
+ beging = find_scanner_event(:@lbrace)
188
+ ending = find_scanner_event(:@rbrace)
132
189
 
133
- @scanner_events = []
134
- @line_counts = [0]
190
+ stmts.bind(beging[:char_end], ending[:char_start])
135
191
 
136
- source.lines.each { |line| line_counts << line_counts.last + line.size }
137
- end
192
+ find_scanner_event(:@kw, 'BEGIN').merge!(
193
+ type: :BEGIN,
194
+ body: [stmts],
195
+ end: ending[:end],
196
+ char_end: ending[:char_end]
197
+ )
198
+ end
138
199
 
139
- def self.prepended(base)
140
- base.attr_reader :scanner_events, :line_counts
141
- end
200
+ # A END node is a parser event that represents the use of the END keyword,
201
+ # which hooks into the lifecycle of the interpreter. It's a bit of a
202
+ # legacy from the stream operating days, and gets its inspiration from
203
+ # tools like awk. Whatever is inside the "block" will get executed when
204
+ # the program ends. The syntax looks like the following:
205
+ #
206
+ # END {
207
+ # # execute stuff here
208
+ # }
209
+ #
210
+ def on_END(stmts)
211
+ beging = find_scanner_event(:@lbrace)
212
+ ending = find_scanner_event(:@rbrace)
142
213
 
143
- private
214
+ stmts.bind(beging[:char_end], ending[:char_start])
144
215
 
145
- def char_pos
146
- line_counts[lineno - 1] + column
147
- end
216
+ find_scanner_event(:@kw, 'END').merge!(
217
+ type: :END, body: [stmts], end: ending[:end], char_end: ending[:char_end]
218
+ )
219
+ end
148
220
 
149
- def char_start_for(body)
150
- children = body.length == 1 && body[0].is_a?(Array) ? body[0] : body
151
- char_starts =
152
- children.map { |part| part[:char_start] if part.is_a?(Hash) }.compact
221
+ # alias is a parser event that represents when you're using the alias
222
+ # keyword with regular arguments. This can be either symbol literals or
223
+ # bare words. You can optionally use parentheses with this keyword, so we
224
+ # either track the location information based on those or the final
225
+ # argument to the alias method.
226
+ def on_alias(left, right)
227
+ beging = find_scanner_event(:@kw, 'alias')
228
+
229
+ paren = source[beging[:char_end]...left[:char_start]].include?('(')
230
+ ending = paren ? find_scanner_event(:@rparen) : right
231
+
232
+ {
233
+ type: :alias,
234
+ body: [left, right],
235
+ start: beging[:start],
236
+ char_start: beging[:char_start],
237
+ end: ending[:end],
238
+ char_end: ending[:char_end]
239
+ }
240
+ end
153
241
 
154
- char_starts.min || char_pos
155
- end
242
+ # aref nodes are when you're pulling a value out of a collection at a
243
+ # specific index. Put another way, it's any time you're calling the method
244
+ # #[]. As an example:
245
+ #
246
+ # foo[index]
247
+ #
248
+ # The nodes usually contains two children, the collection and the index.
249
+ # In some cases, you don't necessarily have the second child node, because
250
+ # you can call procs with a pretty esoteric syntax. In the following
251
+ # example, you wouldn't have a second child, and "foo" would be the first
252
+ # child:
253
+ #
254
+ # foo[]
255
+ #
256
+ def on_aref(collection, index)
257
+ find_scanner_event(:@lbracket)
258
+ ending = find_scanner_event(:@rbracket)
259
+
260
+ {
261
+ type: :aref,
262
+ body: [collection, index],
263
+ start: collection[:start],
264
+ char_start: collection[:char_start],
265
+ end: ending[:end],
266
+ char_end: ending[:char_end]
267
+ }
268
+ end
156
269
 
157
- def find_scanner_event(type, body = :any)
158
- index =
159
- scanner_events.rindex do |scanner_event|
160
- scanner_event[:type] == type &&
161
- (body == :any || (scanner_event[:body] == body))
162
- end
270
+ # aref_field is a parser event that is very similar to aref except that it
271
+ # is being used inside of an assignment.
272
+ def on_aref_field(collection, index)
273
+ find_scanner_event(:@lbracket)
274
+ ending = find_scanner_event(:@rbracket)
275
+
276
+ {
277
+ type: :aref_field,
278
+ body: [collection, index],
279
+ start: collection[:start],
280
+ char_start: collection[:char_start],
281
+ end: ending[:end],
282
+ char_end: ending[:char_end]
283
+ }
284
+ end
163
285
 
164
- scanner_events.delete_at(index)
165
- end
286
+ # args_new is a parser event that represents the beginning of a list of
287
+ # arguments to any method call or an array. It can be followed by any
288
+ # number of args_add events, which we'll append onto an array body.
289
+ def on_args_new
290
+ {
291
+ type: :args,
292
+ body: [],
293
+ start: lineno,
294
+ char_start: char_pos,
295
+ end: lineno,
296
+ char_end: char_pos
297
+ }
298
+ end
299
+
300
+ # args_add is a parser event that represents a single argument inside a
301
+ # list of arguments to any method call or an array. It accepts as
302
+ # arguments the parent args node as well as an arg which can be anything
303
+ # that could be passed as an argument.
304
+ def on_args_add(args, arg)
305
+ if args[:body].empty?
306
+ arg.merge(type: :args, body: [arg])
307
+ else
308
+ args.merge!(
309
+ body: args[:body] << arg, end: arg[:end], char_end: arg[:char_end]
310
+ )
311
+ end
312
+ end
313
+
314
+ # args_add_block is a parser event that represents a list of arguments and
315
+ # potentially a block argument. If no block is passed, then the second
316
+ # argument will be false.
317
+ def on_args_add_block(args, block)
318
+ ending = block || args
319
+
320
+ args.merge(
321
+ type: :args_add_block,
322
+ body: [args, block],
323
+ end: ending[:end],
324
+ char_end: ending[:char_end]
325
+ )
326
+ end
327
+
328
+ # args_add_star is a parser event that represents adding a splat of values
329
+ # to a list of arguments. If accepts as arguments the parent args node as
330
+ # well as the part that is being splatted.
331
+ def on_args_add_star(args, part)
332
+ beging = find_scanner_event(:@op, '*')
333
+ ending = part || beging
334
+
335
+ {
336
+ type: :args_add_star,
337
+ body: [args, part],
338
+ start: beging[:start],
339
+ char_start: beging[:char_start],
340
+ end: ending[:end],
341
+ char_end: ending[:char_end]
342
+ }
343
+ end
344
+
345
+ # args_forward is a parser event that represents forwarding all kinds of
346
+ # arguments onto another method call.
347
+ def on_args_forward
348
+ find_scanner_event(:@op, '...').merge!(type: :args_forward)
349
+ end
350
+
351
+ # arg_paren is a parser event that represents wrapping arguments to a
352
+ # method inside a set of parentheses.
353
+ def on_arg_paren(args)
354
+ beging = find_scanner_event(:@lparen)
355
+ ending = find_scanner_event(:@rparen)
356
+
357
+ {
358
+ type: :arg_paren,
359
+ body: [args],
360
+ start: beging[:start],
361
+ char_start: beging[:char_start],
362
+ end: ending[:end],
363
+ char_end: ending[:char_end]
364
+ }
365
+ end
166
366
 
167
- events = {
168
- BEGIN: [:@kw, 'BEGIN'],
169
- END: [:@kw, 'END'],
170
- alias: [:@kw, 'alias'],
171
- assoc_splat: [:@op, '**'],
172
- arg_paren: :@lparen,
173
- args_add_star: [:@op, '*'],
174
- args_forward: [:@op, '...'],
175
- begin: [:@kw, 'begin'],
176
- blockarg: [:@op, '&'],
177
- brace_block: :@lbrace,
178
- break: [:@kw, 'break'],
179
- case: [:@kw, 'case'],
180
- class: [:@kw, 'class'],
181
- def: [:@kw, 'def'],
182
- defined: [:@kw, 'defined?'],
183
- defs: [:@kw, 'def'],
184
- do_block: [:@kw, 'do'],
185
- else: [:@kw, 'else'],
186
- elsif: [:@kw, 'elsif'],
187
- ensure: [:@kw, 'ensure'],
188
- excessed_comma: :@comma,
189
- for: [:@kw, 'for'],
190
- hash: :@lbrace,
191
- if: [:@kw, 'if'],
192
- in: [:@kw, 'in'],
193
- kwrest_param: [:@op, '**'],
194
- lambda: :@tlambda,
195
- mlhs_paren: :@lparen,
196
- mrhs_add_star: [:@op, '*'],
197
- module: [:@kw, 'module'],
198
- next: [:@kw, 'next'],
199
- paren: :@lparen,
200
- qsymbols_new: :@qsymbols_beg,
201
- qwords_new: :@qwords_beg,
202
- redo: [:@kw, 'redo'],
203
- regexp_literal: :@regexp_beg,
204
- rescue: [:@kw, 'rescue'],
205
- rest_param: [:@op, '*'],
206
- retry: [:@kw, 'retry'],
207
- return0: [:@kw, 'return'],
208
- return: [:@kw, 'return'],
209
- sclass: [:@kw, 'class'],
210
- string_dvar: :@embvar,
211
- string_embexpr: :@embexpr_beg,
212
- super: [:@kw, 'super'],
213
- symbols_new: :@symbols_beg,
214
- top_const_field: [:@op, '::'],
215
- top_const_ref: [:@op, '::'],
216
- undef: [:@kw, 'undef'],
217
- unless: [:@kw, 'unless'],
218
- until: [:@kw, 'until'],
219
- var_alias: [:@kw, 'alias'],
220
- when: [:@kw, 'when'],
221
- while: [:@kw, 'while'],
222
- words_new: :@words_beg,
223
- xstring_literal: :@backtick,
224
- yield0: [:@kw, 'yield'],
225
- yield: [:@kw, 'yield'],
226
- zsuper: [:@kw, 'super']
367
+ # Array nodes can contain a myriad of subnodes because of the special
368
+ # array literal syntax like %w and %i. As a result, we may be looking for
369
+ # an left bracket, or we may be just looking at the children to get the
370
+ # bounds.
371
+ def on_array(contents)
372
+ if !contents || %i[args args_add_star].include?(contents[:type])
373
+ beging = find_scanner_event(:@lbracket)
374
+ ending = find_scanner_event(:@rbracket)
375
+
376
+ {
377
+ type: :array,
378
+ body: [contents],
379
+ start: beging[:start],
380
+ char_start: beging[:char_start],
381
+ end: ending[:end],
382
+ char_end: ending[:char_end]
227
383
  }
384
+ else
385
+ ending = find_scanner_event(:@tstring_end)
386
+ contents[:char_end] = ending[:char_end]
387
+
388
+ ending.merge!(
389
+ type: :array,
390
+ body: [contents],
391
+ start: contents[:start],
392
+ char_start: contents[:char_start]
393
+ )
394
+ end
395
+ end
228
396
 
229
- events.each do |event, (type, scanned)|
230
- define_method(:"on_#{event}") do |*body|
231
- node = find_scanner_event(type, scanned || :any)
397
+ # aryptn is a parser event that represents matching against an array pattern
398
+ # using the Ruby 2.7+ pattern matching syntax.
399
+ def on_aryptn(const, preargs, splatarg, postargs)
400
+ pieces = [const, *preargs, splatarg, *postargs].compact
401
+
402
+ {
403
+ type: :aryptn,
404
+ body: [const, preargs, splatarg, postargs],
405
+ start: pieces[0][:start],
406
+ char_start: pieces[0][:char_start],
407
+ end: pieces[-1][:end],
408
+ char_end: pieces[-1][:char_end]
409
+ }
410
+ end
232
411
 
233
- super(*body).merge!(
234
- start: node[:start],
235
- char_start: node[:char_start],
236
- char_end: char_pos
237
- )
238
- end
239
- end
412
+ # assign is a parser event that represents assigning something to a
413
+ # variable or constant. It accepts as arguments the left side of the
414
+ # expression before the equals sign and the right side of the expression.
415
+ def on_assign(left, right)
416
+ left.merge(
417
+ type: :assign,
418
+ body: [left, right],
419
+ end: right[:end],
420
+ char_end: right[:char_end]
421
+ )
422
+ end
240
423
 
241
- # Array nodes can contain a myriad of subnodes because of the special
242
- # array literal syntax like %w and %i. As a result, we may be looking for
243
- # an left bracket, or we may be just looking at the children.
244
- def on_array(*body)
245
- if body[0] && %i[args args_add_star].include?(body[0][:type])
246
- node = find_scanner_event(:@lbracket)
247
-
248
- super(*body).merge!(
249
- start: node[:start],
250
- char_start: node[:char_start],
251
- char_end: char_pos
252
- )
253
- else
254
- super(*body).merge!(
255
- char_start: char_start_for(body), char_end: char_pos
256
- )
257
- end
258
- end
424
+ # assoc_new is a parser event that contains a key-value pair within a
425
+ # hash. It is a child event of either an assoclist_from_args or a
426
+ # bare_assoc_hash.
427
+ def on_assoc_new(key, value)
428
+ {
429
+ type: :assoc_new,
430
+ body: [key, value],
431
+ start: key[:start],
432
+ char_start: key[:char_start],
433
+ end: value[:end],
434
+ char_end: value[:char_end]
435
+ }
436
+ end
259
437
 
260
- # Array pattern nodes contain an odd mix of potential child nodes based on
261
- # which kind of pattern is being used.
262
- def on_aryptn(*body)
263
- char_start, char_end = char_pos, char_pos
438
+ # assoc_splat is a parser event that represents splatting a value into a
439
+ # hash (either a hash literal or a bare hash in a method call).
440
+ def on_assoc_splat(contents)
441
+ find_scanner_event(:@op, '**').merge!(
442
+ type: :assoc_splat,
443
+ body: [contents],
444
+ end: contents[:end],
445
+ char_end: contents[:char_end]
446
+ )
447
+ end
264
448
 
265
- body.flatten(1).each do |part|
266
- next unless part
449
+ # assoclist_from_args is a parser event that contains a list of all of the
450
+ # associations inside of a hash literal. Its parent node is always a hash.
451
+ # It accepts as an argument an array of assoc events (either assoc_new or
452
+ # assoc_splat).
453
+ def on_assoclist_from_args(assocs)
454
+ {
455
+ type: :assoclist_from_args,
456
+ body: assocs,
457
+ start: assocs[0][:start],
458
+ char_start: assocs[0][:char_start],
459
+ end: assocs[-1][:end],
460
+ char_end: assocs[-1][:char_end]
461
+ }
462
+ end
267
463
 
268
- char_start = [char_start, part[:char_start]].min
269
- char_end = [char_end, part[:char_end]].max
270
- end
464
+ # bare_assoc_hash is a parser event that represents a hash of contents
465
+ # being passed as a method argument (and therefore has omitted braces). It
466
+ # accepts as an argument an array of assoc events (either assoc_new or
467
+ # assoc_splat).
468
+ def on_bare_assoc_hash(assoc_news)
469
+ {
470
+ type: :bare_assoc_hash,
471
+ body: assoc_news,
472
+ start: assoc_news[0][:start],
473
+ char_start: assoc_news[0][:char_start],
474
+ end: assoc_news[-1][:end],
475
+ char_end: assoc_news[-1][:char_end]
476
+ }
477
+ end
271
478
 
272
- super(*body).merge!(char_start: char_start, char_end: char_end)
479
+ # begin is a parser event that represents the beginning of a begin..end chain.
480
+ # It includes a bodystmt event that has all of the consequent clauses.
481
+ def on_begin(bodystmt)
482
+ beging = find_scanner_event(:@kw, 'begin')
483
+ char_end =
484
+ if bodystmt[:body][1..-1].any?
485
+ bodystmt[:char_end]
486
+ else
487
+ find_scanner_event(:@kw, 'end')[:char_end]
273
488
  end
274
489
 
275
- # Params have a somewhat interesting structure in that they are an array
276
- # of arrays where the position in the top-level array indicates the type
277
- # of param and the subarray is the list of parameters of that type. We
278
- # therefore have to flatten them down to get to the location.
279
- def on_params(*body)
280
- super(*body).merge!(
281
- char_start: char_start_for(body.flatten(1)), char_end: char_pos
282
- )
283
- end
490
+ bodystmt.bind(beging[:char_end], char_end)
284
491
 
285
- # String literals and either contain string parts or a heredoc. If it
286
- # contains a heredoc we can just go directly to the child nodes, otherwise
287
- # we need to look for a `tstring_beg`.
288
- def on_string_literal(*body)
289
- if body[0][:type] == :heredoc
290
- super(*body).merge!(
291
- char_start: char_start_for(body), char_end: char_pos
292
- )
293
- else
294
- node = find_scanner_event(:@tstring_beg)
295
-
296
- super(*body).merge!(
297
- start: node[:start],
298
- char_start: node[:char_start],
299
- char_end: char_pos,
300
- quote: node[:body]
301
- )
302
- end
303
- end
492
+ beging.merge!(
493
+ type: :begin,
494
+ body: [bodystmt],
495
+ end: bodystmt[:end],
496
+ char_end: bodystmt[:char_end]
497
+ )
498
+ end
304
499
 
305
- # Technically, the `not` operator is a unary operator but is reported as
306
- # a keyword and not an operator. Because of the inconsistency, we have to
307
- # manually look for the correct scanner event here.
308
- def on_unary(*body)
309
- node =
310
- if body[0] == :not
311
- find_scanner_event(:@kw, 'not')
312
- else
313
- find_scanner_event(:@op)
314
- end
315
-
316
- super(*body).merge!(
317
- start: node[:start], char_start: node[:char_start], char_end: char_pos
318
- )
319
- end
500
+ # binary is a parser event that represents a binary operation between two
501
+ # values.
502
+ def on_binary(left, oper, right)
503
+ {
504
+ type: :binary,
505
+ body: [left, oper, right],
506
+ start: left[:start],
507
+ char_start: left[:char_start],
508
+ end: right[:end],
509
+ char_end: right[:char_end]
510
+ }
511
+ end
320
512
 
321
- # Symbols don't necessarily have to have a @symbeg event fired before they
322
- # start. For example, you can have symbol literals inside an `alias` node
323
- # if you're just using bare words, as in: `alias foo bar`. So this is a
324
- # special case in which if there is a `:@symbeg` event we can hook on to
325
- # then we use it, otherwise we just look at the beginning of the first
326
- # child node.
327
- %i[dyna_symbol symbol_literal].each do |event|
328
- define_method(:"on_#{event}") do |*body|
329
- options =
330
- if scanner_events.any? { |sevent| sevent[:type] == :@symbeg }
331
- symbeg = find_scanner_event(:@symbeg)
332
-
333
- {
334
- char_start: symbeg[:char_start],
335
- char_end: char_pos,
336
- quote: symbeg[:body][1]
337
- }
338
- elsif scanner_events.any? { |sevent| sevent[:type] == :@label_end }
339
- label_end = find_scanner_event(:@label_end)
340
-
341
- {
342
- char_start: char_start_for(body),
343
- char_end: char_pos,
344
- quote: label_end[:body][0]
345
- }
346
- else
347
- { char_start: char_start_for(body), char_end: char_pos }
348
- end
349
-
350
- super(*body).merge!(options)
351
- end
513
+ # block_var is a parser event that represents the parameters being passed to
514
+ # block. Effectively they're everything contained within the pipes.
515
+ def on_block_var(params, locals)
516
+ index =
517
+ scanner_events.rindex do |event|
518
+ event[:type] == :@op && %w[| ||].include?(event[:body]) &&
519
+ event[:char_start] < params[:char_start]
352
520
  end
353
521
 
354
- def on_program(*body)
355
- super(*body).merge!(start: 1, char_start: 0, char_end: char_pos)
522
+ beging = scanner_events[index]
523
+ ending = scanner_events[-1]
524
+
525
+ {
526
+ type: :block_var,
527
+ body: [params, locals],
528
+ start: beging[:start],
529
+ char_start: beging[:char_start],
530
+ end: ending[:end],
531
+ char_end: ending[:char_end]
532
+ }
533
+ end
534
+
535
+ # blockarg is a parser event that represents defining a block variable on
536
+ # a method definition.
537
+ def on_blockarg(ident)
538
+ find_scanner_event(:@op, '&').merge!(
539
+ type: :blockarg,
540
+ body: [ident],
541
+ end: ident[:end],
542
+ char_end: ident[:char_end]
543
+ )
544
+ end
545
+
546
+ # bodystmt can't actually determine its bounds appropriately because it
547
+ # doesn't necessarily know where it started. So the parent node needs to
548
+ # report back down into this one where it goes.
549
+ class BodyStmt < SimpleDelegator
550
+ def bind(char_start, char_end)
551
+ merge!(char_start: char_start, char_end: char_end)
552
+ parts = self[:body]
553
+
554
+ # Here we're going to determine the bounds for the stmts
555
+ consequent = parts[1..-1].compact.first
556
+ self[:body][0].bind(char_start,
557
+ consequent ? consequent[:char_start] : char_end)
558
+
559
+ # Next we're going to determine the rescue clause if there is one
560
+ if parts[1]
561
+ consequent = parts[2..-1].compact.first
562
+ self[:body][1].bind(consequent ? consequent[:char_start] : char_end)
356
563
  end
564
+ end
565
+ end
357
566
 
358
- defined =
359
- private_instance_methods(false).grep(/\Aon_/) { $'.to_sym } +
360
- %i[embdoc embdoc_beg embdoc_end heredoc_beg heredoc_end]
567
+ # bodystmt is a parser event that represents all of the possible combinations
568
+ # of clauses within the body of a method or block.
569
+ def on_bodystmt(stmts, rescued, ensured, elsed)
570
+ BodyStmt.new(
571
+ type: :bodystmt,
572
+ body: [stmts, rescued, ensured, elsed],
573
+ start: lineno,
574
+ char_start: char_pos,
575
+ end: lineno,
576
+ char_end: char_pos
577
+ )
578
+ end
361
579
 
362
- (SCANNER_EVENTS - defined).each do |event|
363
- define_method(:"on_#{event}") do |body|
364
- super(body).tap do |node|
365
- char_end = char_pos + (body ? body.size : 0)
366
- node.merge!(char_start: char_pos, char_end: char_end)
580
+ # brace_block is a parser event that represents passing a block to a
581
+ # method call using the {..} operators. It accepts as arguments an
582
+ # optional block_var event that represents any parameters to the block as
583
+ # well as a stmts event that represents the statements inside the block.
584
+ def on_brace_block(block_var, stmts)
585
+ beging = find_scanner_event(:@lbrace)
586
+ ending = find_scanner_event(:@rbrace)
587
+
588
+ stmts.bind((block_var || beging)[:char_end], ending[:char_start])
589
+
590
+ {
591
+ type: :brace_block,
592
+ body: [block_var, stmts],
593
+ start: beging[:start],
594
+ char_start: beging[:char_start],
595
+ end: ending[:end],
596
+ char_end: ending[:char_end]
597
+ }
598
+ end
367
599
 
368
- scanner_events << node
369
- end
370
- end
371
- end
600
+ # break is a parser event that represents using the break keyword. It
601
+ # accepts as an argument an args or args_add_block event that contains all
602
+ # of the arguments being passed to the break.
603
+ def on_break(args_add_block)
604
+ find_scanner_event(:@kw, 'break').merge!(
605
+ type: :break,
606
+ body: [args_add_block],
607
+ end: args_add_block[:end],
608
+ char_end: args_add_block[:char_end]
609
+ )
610
+ end
372
611
 
373
- (PARSER_EVENTS - defined).each do |event|
374
- define_method(:"on_#{event}") do |*body|
375
- super(*body).merge!(
376
- char_start: char_start_for(body), char_end: char_pos
377
- )
378
- end
379
- end
612
+ # call is a parser event representing a method call with no arguments. It
613
+ # accepts as arguments the receiver of the method, the operator being used
614
+ # to send the method (., ::, or &.), and the value that is being sent to
615
+ # the receiver (which can be another nested call as well).
616
+ #
617
+ # There is one esoteric syntax that comes into play here as well. If the
618
+ # sending argument to this method is the symbol :call, then it represents
619
+ # calling a lambda in a very odd looking way, as in:
620
+ #
621
+ # foo.(1, 2, 3)
622
+ #
623
+ def on_call(receiver, oper, sending)
624
+ ending = sending
625
+
626
+ if sending == :call
627
+ ending = oper
628
+
629
+ # Special handling here for Ruby <= 2.5 because the oper argument to this
630
+ # method wasn't a parser event here it was just a plain symbol.
631
+ ending = receiver if RUBY_MAJOR <= 2 && RUBY_MINOR <= 5
380
632
  end
381
- )
382
633
 
383
- # This layer keeps track of inline comments as they come in. Ripper itself
384
- # doesn't attach comments to the AST, so we need to do it manually. In this
385
- # case, inline comments are defined as any comments wherein the lexer state is
386
- # not equal to EXPR_BEG (tracked in the BlockComments layer).
387
- prepend(
388
- Module.new do
389
- # Certain events needs to steal the comments from their children in order
390
- # for them to display properly.
391
- events = {
392
- aref: [:body, 1],
393
- args_add_block: [:body, 0],
394
- break: [:body, 0],
395
- call: [:body, 0],
396
- command: [:body, 1],
397
- command_call: [:body, 3],
398
- regexp_literal: [:body, 0],
399
- string_literal: [:body, 0],
400
- symbol_literal: [:body, 0]
401
- }
634
+ {
635
+ type: :call,
636
+ body: [receiver, oper, sending],
637
+ start: receiver[:start],
638
+ char_start: receiver[:char_start],
639
+ end: ending[:end],
640
+ char_end: ending[:char_end]
641
+ }
642
+ end
402
643
 
403
- def initialize(*args)
404
- super(*args)
405
- @inline_comments = []
406
- @last_sexp = nil
407
- end
644
+ # case is a parser event that represents the beginning of a case chain.
645
+ # It accepts as arguments the switch of the case and the consequent
646
+ # clause.
647
+ def on_case(switch, consequent)
648
+ find_scanner_event(:@kw, 'case').merge!(
649
+ type: :case,
650
+ body: [switch, consequent],
651
+ end: consequent[:end],
652
+ char_end: consequent[:char_end]
653
+ )
654
+ end
408
655
 
409
- def self.prepended(base)
410
- base.attr_reader :inline_comments, :last_sexp
411
- end
656
+ # class is a parser event that represents defining a class. It accepts as
657
+ # arguments the name of the class, the optional name of the superclass,
658
+ # and the bodystmt event that represents the statements evaluated within
659
+ # the context of the class.
660
+ def on_class(const, superclass, bodystmt)
661
+ beging = find_scanner_event(:@kw, 'class')
662
+ ending = find_scanner_event(:@kw, 'end')
663
+
664
+ bodystmt.bind((superclass || const)[:char_end], ending[:char_start])
665
+
666
+ {
667
+ type: :class,
668
+ body: [const, superclass, bodystmt],
669
+ start: beging[:start],
670
+ char_start: beging[:char_start],
671
+ end: ending[:end],
672
+ char_end: ending[:char_end]
673
+ }
674
+ end
412
675
 
413
- private
676
+ # command is a parser event representing a method call with arguments and
677
+ # no parentheses. It accepts as arguments the name of the method and the
678
+ # arguments being passed to the method.
679
+ def on_command(ident, args)
680
+ {
681
+ type: :command,
682
+ body: [ident, args],
683
+ start: ident[:start],
684
+ char_start: ident[:char_start],
685
+ end: args[:end],
686
+ char_end: args[:char_end]
687
+ }
688
+ end
414
689
 
415
- events.each do |event, path|
416
- define_method(:"on_#{event}") do |*body|
417
- @last_sexp =
418
- super(*body).tap do |sexp|
419
- comments = (sexp.dig(*path) || {}).delete(:comments)
420
- sexp.merge!(comments: comments) if comments
421
- end
422
- end
423
- end
690
+ # command_call is a parser event representing a method call on an object
691
+ # with arguments and no parentheses. It accepts as arguments the receiver
692
+ # of the method, the operator being used to send the method, the name of
693
+ # the method, and the arguments being passed to the method.
694
+ def on_command_call(receiver, oper, ident, args)
695
+ ending = args || ident
696
+
697
+ {
698
+ type: :command_call,
699
+ body: [receiver, oper, ident, args],
700
+ start: receiver[:start],
701
+ char_start: receiver[:char_start],
702
+ end: ending[:end],
703
+ char_end: ending[:char_end]
704
+ }
705
+ end
424
706
 
425
- SPECIAL_LITERALS = %i[qsymbols qwords symbols words].freeze
707
+ # A const_path_field is a parser event that is always the child of some
708
+ # kind of assignment. It represents when you're assigning to a constant
709
+ # that is being referenced as a child of another variable. For example:
710
+ #
711
+ # foo::X = 1
712
+ #
713
+ def on_const_path_field(left, const)
714
+ {
715
+ type: :const_path_field,
716
+ body: [left, const],
717
+ start: left[:start],
718
+ char_start: left[:char_start],
719
+ end: const[:end],
720
+ char_end: const[:char_end]
721
+ }
722
+ end
426
723
 
427
- # Special array literals are handled in different ways and so their
428
- # comments need to be passed up to their parent array node.
429
- def on_array(*body)
430
- @last_sexp =
431
- super(*body).tap do |sexp|
432
- next unless SPECIAL_LITERALS.include?(body.dig(0, :type))
724
+ # A const_path_ref is a parser event that is a very similar to
725
+ # const_path_field except that it is not involved in an assignment. It
726
+ # looks like the following example:
727
+ #
728
+ # foo::X
729
+ #
730
+ def on_const_path_ref(left, const)
731
+ {
732
+ type: :const_path_ref,
733
+ body: [left, const],
734
+ start: left[:start],
735
+ char_start: left[:char_start],
736
+ end: const[:end],
737
+ char_end: const[:char_end]
738
+ }
739
+ end
433
740
 
434
- comments = sexp.dig(:body, 0).delete(:comments)
435
- sexp.merge!(comments: comments) if comments
436
- end
437
- end
741
+ # A const_ref is a parser event that represents the name of the constant
742
+ # being used in a class or module declaration. In the following example it
743
+ # is the @const scanner event that has the contents of Foo.
744
+ #
745
+ # class Foo; end
746
+ #
747
+ def on_const_ref(const)
748
+ const.merge(type: :const_ref, body: [const])
749
+ end
438
750
 
439
- # Handling this specially because we want to pull the comments out of both
440
- # child nodes.
441
- def on_assoc_new(*body)
442
- @last_sexp =
443
- super(*body).tap do |sexp|
444
- comments =
445
- (sexp.dig(:body, 0).delete(:comments) || []) +
446
- (sexp.dig(:body, 1).delete(:comments) || [])
447
-
448
- sexp.merge!(comments: comments) if comments.any?
449
- end
450
- end
751
+ # A def is a parser event that represents defining a regular method on the
752
+ # current self object. It accepts as arguments the ident (the name of the
753
+ # method being defined), the params (the parameter declaration for the
754
+ # method), and a bodystmt node which represents the statements inside the
755
+ # method. As an example, here are the parts that go into this:
756
+ #
757
+ # def foo(bar) do baz end
758
+ # │ │ │
759
+ # │ │ └> bodystmt
760
+ # │ └> params
761
+ # └> ident
762
+ #
763
+ def on_def(ident, params, bodystmt)
764
+ if params[:type] == :params && !params[:body].any?
765
+ location = ident[:char_end]
766
+ params.merge!(char_start: location, char_end: location)
767
+ end
451
768
 
452
- # Most scanner events don't stand on their own as s-expressions, but the
453
- # CHAR scanner event is effectively just a string, so we need to track it
454
- # as a s-expression.
455
- def on_CHAR(body)
456
- @last_sexp = super(body)
457
- end
769
+ beging = find_scanner_event(:@kw, 'def')
770
+ ending = find_scanner_event(:@kw, 'end')
458
771
 
459
- # We need to know exactly where the comment is, switching off the current
460
- # lexer state. In Ruby 2.7.0-dev, that's defined as:
461
- #
462
- # enum lex_state_bits {
463
- # EXPR_BEG_bit, /* ignore newline, +/- is a sign. */
464
- # EXPR_END_bit, /* newline significant, +/- is an operator. */
465
- # EXPR_ENDARG_bit, /* ditto, and unbound braces. */
466
- # EXPR_ENDFN_bit, /* ditto, and unbound braces. */
467
- # EXPR_ARG_bit, /* newline significant, +/- is an operator. */
468
- # EXPR_CMDARG_bit, /* newline significant, +/- is an operator. */
469
- # EXPR_MID_bit, /* newline significant, +/- is an operator. */
470
- # EXPR_FNAME_bit, /* ignore newline, no reserved words. */
471
- # EXPR_DOT_bit, /* right after `.' or `::', no reserved words. */
472
- # EXPR_CLASS_bit, /* immediate after `class', no here document. */
473
- # EXPR_LABEL_bit, /* flag bit, label is allowed. */
474
- # EXPR_LABELED_bit, /* flag bit, just after a label. */
475
- # EXPR_FITEM_bit, /* symbol literal as FNAME. */
476
- # EXPR_MAX_STATE
477
- # };
478
- def on_comment(body)
479
- sexp = { type: :@comment, body: body.chomp, start: lineno, end: lineno }
480
-
481
- case Prettier::Parser.lex_state_name(state).gsub('EXPR_', '')
482
- when 'END', 'ARG|LABELED', 'ENDFN'
483
- last_sexp.merge!(comments: [sexp])
484
- when 'CMDARG', 'END|ENDARG', 'ENDARG', 'ARG', 'FNAME|FITEM', 'CLASS',
485
- 'END|LABEL'
486
- inline_comments << sexp
487
- when 'BEG|LABEL', 'MID'
488
- inline_comments << sexp.merge!(break: true)
489
- when 'DOT'
490
- last_sexp.merge!(comments: [sexp.merge!(break: true)])
491
- end
492
-
493
- sexp
494
- end
772
+ bodystmt.bind(params[:char_end], ending[:char_start])
773
+
774
+ {
775
+ type: :def,
776
+ body: [ident, params, bodystmt],
777
+ start: beging[:start],
778
+ char_start: beging[:char_start],
779
+ end: ending[:end],
780
+ char_end: ending[:char_end]
781
+ }
782
+ end
783
+
784
+ # A defs is a parser event that represents defining a singleton method on
785
+ # an object. It accepts the same arguments as the def event, as well as
786
+ # the target and operator that on which this method is being defined. As
787
+ # an example, here are the parts that go into this:
788
+ #
789
+ # def foo.bar(baz) do baz end
790
+ # │ │ │ │
791
+ # │ │ │ │ │
792
+ # │ │ │ └> bodystmt
793
+ # │ │ │ └> params
794
+ # │ └> ident
795
+ # │ └> oper
796
+ # └> target
797
+ #
798
+ def on_defs(target, oper, ident, params, bodystmt)
799
+ if params[:type] == :params && !params[:body].any?
800
+ location = ident[:char_end]
801
+ params.merge!(char_start: location, char_end: location)
802
+ end
495
803
 
496
- defined = private_instance_methods(false).grep(/\Aon_/) { $'.to_sym }
804
+ beging = find_scanner_event(:@kw, 'def')
805
+ ending = find_scanner_event(:@kw, 'end')
497
806
 
498
- (PARSER_EVENTS - defined).each do |event|
499
- define_method(:"on_#{event}") do |*body|
500
- super(*body).tap do |sexp|
501
- @last_sexp = sexp
502
- next if inline_comments.empty?
807
+ bodystmt.bind(params[:char_end], ending[:char_start])
503
808
 
504
- sexp[:comments] = inline_comments.reverse
505
- @inline_comments = []
506
- end
507
- end
809
+ {
810
+ type: :defs,
811
+ body: [target, oper, ident, params, bodystmt],
812
+ start: beging[:start],
813
+ char_start: beging[:char_start],
814
+ end: ending[:end],
815
+ char_end: ending[:char_end]
816
+ }
817
+ end
818
+
819
+ # A defined node represents the rather unique defined? operator. It can be
820
+ # used with and without parentheses. If they're present, we use them to
821
+ # determine our bounds, otherwise we use the value that's being passed to
822
+ # the operator.
823
+ def on_defined(value)
824
+ beging = find_scanner_event(:@kw, 'defined?')
825
+
826
+ paren = source[beging[:char_end]...value[:char_start]].include?('(')
827
+ ending = paren ? find_scanner_event(:@rparen) : value
828
+
829
+ beging.merge!(
830
+ type: :defined,
831
+ body: [value],
832
+ end: ending[:end],
833
+ char_end: ending[:char_end]
834
+ )
835
+ end
836
+
837
+ # do_block is a parser event that represents passing a block to a method
838
+ # call using the do..end keywords. It accepts as arguments an optional
839
+ # block_var event that represents any parameters to the block as well as
840
+ # a bodystmt event that represents the statements inside the block.
841
+ def on_do_block(block_var, bodystmt)
842
+ beging = find_scanner_event(:@kw, 'do')
843
+ ending = find_scanner_event(:@kw, 'end')
844
+
845
+ bodystmt.bind((block_var || beging)[:char_end], ending[:char_start])
846
+
847
+ {
848
+ type: :do_block,
849
+ body: [block_var, bodystmt],
850
+ start: beging[:start],
851
+ char_start: beging[:char_start],
852
+ end: ending[:end],
853
+ char_end: ending[:char_end]
854
+ }
855
+ end
856
+
857
+ # dot2 is a parser event that represents using the .. operator between two
858
+ # expressions. Usually this is to create a range object but sometimes it's to
859
+ # use the flip-flop operator.
860
+ def on_dot2(left, right)
861
+ operator = find_scanner_event(:@op, '..')
862
+
863
+ beging = left || operator
864
+ ending = right || operator
865
+
866
+ {
867
+ type: :dot2,
868
+ body: [left, right],
869
+ start: beging[:start],
870
+ char_start: beging[:char_start],
871
+ end: ending[:end],
872
+ char_end: ending[:char_end]
873
+ }
874
+ end
875
+
876
+ # dot3 is a parser event that represents using the ... operator between two
877
+ # expressions. Usually this is to create a range object but sometimes it's to
878
+ # use the flip-flop operator.
879
+ def on_dot3(left, right)
880
+ operator = find_scanner_event(:@op, '...')
881
+
882
+ beging = left || operator
883
+ ending = right || operator
884
+
885
+ {
886
+ type: :dot3,
887
+ body: [left, right],
888
+ start: beging[:start],
889
+ char_start: beging[:char_start],
890
+ end: ending[:end],
891
+ char_end: ending[:char_end]
892
+ }
893
+ end
894
+
895
+ # A dyna_symbol is a parser event that represents a symbol literal that
896
+ # uses quotes to interpolate its value. For example, if you had a variable
897
+ # foo and you wanted a symbol that contained its value, you would write:
898
+ #
899
+ # :"#{foo}"
900
+ #
901
+ # As such, they accept as one argument a string node, which is the same
902
+ # node that gets accepted into a string_literal (since we're basically
903
+ # talking about a string literal with a : character at the beginning).
904
+ #
905
+ # They can also come in another flavor which is a dynamic symbol as a hash
906
+ # key. This is kind of an interesting syntax which results in us having to
907
+ # look for a @label_end scanner event instead to get our bearings. That
908
+ # kind of code would look like:
909
+ #
910
+ # { "#{foo}": bar }
911
+ #
912
+ # which would be the same symbol as above.
913
+ def on_dyna_symbol(string)
914
+ if scanner_events.any? { |event| event[:type] == :@symbeg }
915
+ # A normal dynamic symbol
916
+ beging = find_scanner_event(:@symbeg)
917
+ ending = find_scanner_event(:@tstring_end)
918
+
919
+ beging.merge(
920
+ type: :dyna_symbol,
921
+ quote: beging[:body][1],
922
+ body: string[:body],
923
+ end: ending[:end],
924
+ char_end: ending[:char_end]
925
+ )
926
+ else
927
+ # A dynamic symbol as a hash key
928
+ beging = find_scanner_event(:@tstring_beg)
929
+ ending = find_scanner_event(:@label_end)
930
+
931
+ string.merge!(
932
+ type: :dyna_symbol,
933
+ quote: ending[:body][0],
934
+ start: beging[:start],
935
+ char_start: beging[:char_start],
936
+ end: ending[:end],
937
+ char_end: ending[:char_end]
938
+ )
939
+ end
940
+ end
941
+
942
+ # else can either end with an end keyword (in which case we'll want to
943
+ # consume that event) or it can end with an ensure keyword (in which case
944
+ # we'll leave that to the ensure to handle).
945
+ def find_else_ending
946
+ index =
947
+ scanner_events.rindex do |event|
948
+ event[:type] == :@kw && %w[end ensure].include?(event[:body])
508
949
  end
950
+
951
+ event = scanner_events[index]
952
+ event[:body] == 'end' ? scanner_events.delete_at(index) : event
953
+ end
954
+
955
+ # else is a parser event that represents the end of a if, unless, or begin
956
+ # chain. It accepts as an argument the statements that are contained
957
+ # within the else clause.
958
+ def on_else(stmts)
959
+ beging = find_scanner_event(:@kw, 'else')
960
+ ending = find_else_ending
961
+
962
+ stmts.bind(beging[:char_end], ending[:char_start])
963
+
964
+ {
965
+ type: :else,
966
+ body: [stmts],
967
+ start: beging[:start],
968
+ char_start: beging[:char_start],
969
+ end: ending[:end],
970
+ char_end: ending[:char_end]
971
+ }
972
+ end
973
+
974
+ # elsif is a parser event that represents another clause in an if chain.
975
+ # It accepts as arguments the predicate of the else if, the statements
976
+ # that are contained within the else if clause, and the optional
977
+ # consequent clause.
978
+ def on_elsif(predicate, stmts, consequent)
979
+ beging = find_scanner_event(:@kw, 'elsif')
980
+ ending = consequent || find_scanner_event(:@kw, 'end')
981
+
982
+ stmts.bind(predicate[:char_end], ending[:char_start])
983
+
984
+ {
985
+ type: :elsif,
986
+ body: [predicate, stmts, consequent],
987
+ start: beging[:start],
988
+ char_start: beging[:char_start],
989
+ end: ending[:end],
990
+ char_end: ending[:char_end]
991
+ }
992
+ end
993
+
994
+ # embdocs are long comments that are surrounded by =begin..=end. They
995
+ # cannot be nested, so we don't need to worry about keeping a stack around
996
+ # like we do with heredocs. Instead we can just track the current embdoc
997
+ # and add to it as we get content. It always starts with this scanner
998
+ # event, so here we'll initialize the current embdoc.
999
+ def on_embdoc_beg(value)
1000
+ @embdoc = {
1001
+ type: :@embdoc, value: value, start: lineno, char_start: char_pos
1002
+ }
1003
+ end
1004
+
1005
+ # This is a scanner event that gets hit when we're inside an embdoc and
1006
+ # receive a new line of content. Here we are guaranteed to already have
1007
+ # initialized the @embdoc variable so we can just append the new line onto
1008
+ # the existing content.
1009
+ def on_embdoc(value)
1010
+ @embdoc[:value] << value
1011
+ end
1012
+
1013
+ # This is the final scanner event for embdocs. It receives the =end. Here
1014
+ # we can finalize the embdoc with its location information and the final
1015
+ # piece of the string. We then add it to the list of comments so that
1016
+ # prettier can place it into the final source string.
1017
+ def on_embdoc_end(value)
1018
+ @comments <<
1019
+ @embdoc.merge!(
1020
+ value: @embdoc[:value] << value.chomp,
1021
+ end: lineno,
1022
+ char_end: char_pos + value.length - 1
1023
+ )
1024
+
1025
+ @embdoc = nil
1026
+ end
1027
+
1028
+ # ensure is a parser event that represents the use of the ensure keyword
1029
+ # and its subsequent statements.
1030
+ def on_ensure(stmts)
1031
+ beging = find_scanner_event(:@kw, 'ensure')
1032
+ ending = find_scanner_event(:@kw, 'end')
1033
+
1034
+ stmts.bind(beging[:char_end], ending[:char_start])
1035
+
1036
+ {
1037
+ type: :ensure,
1038
+ body: [stmts],
1039
+ start: beging[:start],
1040
+ char_start: beging[:char_start],
1041
+ end: ending[:end],
1042
+ char_end: ending[:char_end]
1043
+ }
1044
+ end
1045
+
1046
+ # An excessed_comma is a special kind of parser event that represents a comma
1047
+ # at the end of a list of parameters. It's a very strange node. It accepts a
1048
+ # different number of arguments depending on Ruby version, which is why we
1049
+ # have the anonymous splat there.
1050
+ def on_excessed_comma(*)
1051
+ find_scanner_event(:@comma).merge!(type: :excessed_comma)
1052
+ end
1053
+
1054
+ # An fcall is a parser event that represents the piece of a method call
1055
+ # that comes before any arguments (i.e., just the name of the method).
1056
+ def on_fcall(ident)
1057
+ ident.merge(type: :fcall, body: [ident])
1058
+ end
1059
+
1060
+ # A field is a parser event that is always the child of an assignment. It
1061
+ # accepts as arguments the left side of operation, the operator (. or ::),
1062
+ # and the right side of the operation. For example:
1063
+ #
1064
+ # foo.x = 1
1065
+ #
1066
+ def on_field(left, oper, right)
1067
+ {
1068
+ type: :field,
1069
+ body: [left, oper, right],
1070
+ start: left[:start],
1071
+ char_start: left[:char_start],
1072
+ end: right[:end],
1073
+ char_end: right[:char_end]
1074
+ }
1075
+ end
1076
+
1077
+ # for is a parser event that represents using the somewhat esoteric for
1078
+ # loop. It accepts as arguments an ident which is the iterating variable,
1079
+ # an enumerable for that which is being enumerated, and a stmts event that
1080
+ # represents the statements inside the for loop.
1081
+ def on_for(ident, enumerable, stmts)
1082
+ beging = find_scanner_event(:@kw, 'for')
1083
+ ending = find_scanner_event(:@kw, 'end')
1084
+
1085
+ stmts.bind(enumerable[:char_end], ending[:char_start])
1086
+
1087
+ {
1088
+ type: :for,
1089
+ body: [ident, enumerable, stmts],
1090
+ start: beging[:start],
1091
+ char_start: beging[:char_start],
1092
+ end: ending[:end],
1093
+ char_end: ending[:char_end]
1094
+ }
1095
+ end
1096
+
1097
+ # hash is a parser event that represents a hash literal. It accepts as an
1098
+ # argument an optional assoclist_from_args event which contains the
1099
+ # contents of the hash.
1100
+ def on_hash(assoclist_from_args)
1101
+ beging = find_scanner_event(:@lbrace)
1102
+ ending = find_scanner_event(:@rbrace)
1103
+
1104
+ if assoclist_from_args
1105
+ # Here we're going to expand out the location information for the assocs
1106
+ # node so that it can grab up any remaining comments inside the hash.
1107
+ assoclist_from_args.merge!(
1108
+ char_start: beging[:char_end], char_end: ending[:char_start]
1109
+ )
509
1110
  end
510
- )
511
1111
 
512
- # Nodes that are always on their own line occur when the lexer is in the
513
- # EXPR_BEG state. Those comments are tracked within the @block_comments
514
- # instance variable. Then for each node that could contain them, we attach
515
- # them after the node has been built.
516
- prepend(
517
- Module.new do
518
- events = {
519
- begin: [0, :body, 0],
520
- bodystmt: [0],
521
- class: [2, :body, 0],
522
- def: [2, :body, 0],
523
- defs: [4, :body, 0],
524
- else: [0],
525
- elsif: [1],
526
- ensure: [0],
527
- if: [1],
528
- program: [0],
529
- rescue: [2],
530
- sclass: [1, :body, 0],
531
- unless: [1],
532
- until: [1],
533
- when: [1],
534
- while: [1]
535
- }
1112
+ {
1113
+ type: :hash,
1114
+ body: [assoclist_from_args],
1115
+ start: beging[:start],
1116
+ char_start: beging[:char_start],
1117
+ end: ending[:end],
1118
+ char_end: ending[:char_end]
1119
+ }
1120
+ end
536
1121
 
537
- def initialize(*args)
538
- super(*args)
539
- @block_comments = []
540
- @current_embdoc = nil
541
- end
1122
+ # This is a scanner event that represents the beginning of the heredoc. It
1123
+ # includes the declaration (which we call beging here, which is just short
1124
+ # for beginning). The declaration looks something like <<-HERE or <<~HERE.
1125
+ # If the downcased version of the declaration actually matches an existing
1126
+ # prettier parser, we'll later attempt to print it using that parser and
1127
+ # printer through our embed function.
1128
+ def on_heredoc_beg(beging)
1129
+ {
1130
+ type: :heredoc,
1131
+ beging: beging,
1132
+ start: lineno,
1133
+ end: lineno,
1134
+ char_start: char_pos - beging.length + 1,
1135
+ char_end: char_pos
1136
+ }.tap { |node| @heredocs << node }
1137
+ end
542
1138
 
543
- def self.prepended(base)
544
- base.attr_reader :block_comments, :current_embdoc
545
- end
1139
+ # This is a parser event that occurs when you're using a heredoc with a
1140
+ # tilde. These are considered `heredoc_dedent` nodes, whereas the hyphen
1141
+ # heredocs show up as string literals.
1142
+ def on_heredoc_dedent(string, _width)
1143
+ @heredocs[-1].merge!(string.slice(:body))
1144
+ end
546
1145
 
547
- private
1146
+ # This is a scanner event that represents the end of the heredoc.
1147
+ def on_heredoc_end(ending)
1148
+ @heredocs[-1].merge!(ending: ending.chomp, end: lineno, char_end: char_pos)
1149
+ end
548
1150
 
549
- def attach_comments(sexp, stmts)
550
- range = sexp[:start]..sexp[:end]
551
- comments =
552
- block_comments.group_by { |comment| range.include?(comment[:start]) }
1151
+ # hshptn is a parser event that represents matching against a hash pattern
1152
+ # using the Ruby 2.7+ pattern matching syntax.
1153
+ def on_hshptn(const, kw, kwrest)
1154
+ pieces = [const, kw, kwrest].flatten(2).compact
1155
+
1156
+ {
1157
+ type: :hshptn,
1158
+ body: [const, kw, kwrest],
1159
+ start: pieces[0][:start],
1160
+ char_start: pieces[0][:char_start],
1161
+ end: pieces[-1][:end],
1162
+ char_end: pieces[-1][:char_end]
1163
+ }
1164
+ end
553
1165
 
554
- if comments[true]
555
- stmts[:body] =
556
- (stmts[:body] + comments[true]).sort_by { |node| node[:start] }
1166
+ # if is a parser event that represents the first clause in an if chain.
1167
+ # It accepts as arguments the predicate of the if, the statements that are
1168
+ # contained within the if clause, and the optional consequent clause.
1169
+ def on_if(predicate, stmts, consequent)
1170
+ beging = find_scanner_event(:@kw, 'if')
1171
+ ending = consequent || find_scanner_event(:@kw, 'end')
1172
+
1173
+ stmts.bind(predicate[:char_end], ending[:char_start])
1174
+
1175
+ {
1176
+ type: :if,
1177
+ body: [predicate, stmts, consequent],
1178
+ start: beging[:start],
1179
+ char_start: beging[:char_start],
1180
+ end: ending[:end],
1181
+ char_end: ending[:char_end]
1182
+ }
1183
+ end
557
1184
 
558
- @block_comments = comments.fetch(false) { [] }
559
- end
560
- end
1185
+ # ifop is a parser event that represents a ternary operator. It accepts as
1186
+ # arguments the predicate to the ternary, the truthy clause, and the falsy
1187
+ # clause.
1188
+ def on_ifop(predicate, truthy, falsy)
1189
+ predicate.merge(
1190
+ type: :ifop,
1191
+ body: [predicate, truthy, falsy],
1192
+ end: falsy[:end],
1193
+ char_end: falsy[:char_end]
1194
+ )
1195
+ end
561
1196
 
562
- events.each do |event, path|
563
- define_method(:"on_#{event}") do |*body|
564
- super(*body).tap { |sexp| attach_comments(sexp, body.dig(*path)) }
565
- end
566
- end
1197
+ # if_mod is a parser event that represents the modifier form of an if
1198
+ # statement. It accepts as arguments the predicate of the if and the
1199
+ # statement that are contained within the if clause.
1200
+ def on_if_mod(predicate, statement)
1201
+ find_scanner_event(:@kw, 'if')
1202
+
1203
+ {
1204
+ type: :if_mod,
1205
+ body: [predicate, statement],
1206
+ start: statement[:start],
1207
+ char_start: statement[:char_start],
1208
+ end: predicate[:end],
1209
+ char_end: predicate[:char_end]
1210
+ }
1211
+ end
567
1212
 
568
- def on_comment(body)
569
- super(body).tap do |sexp|
570
- lex_state = Prettier::Parser.lex_state_name(state).gsub('EXPR_', '')
571
- block_comments << sexp if lex_state == 'BEG'
572
- end
573
- end
1213
+ # in is a parser event that represents using the in keyword within the
1214
+ # Ruby 2.7+ pattern matching syntax.
1215
+ def on_in(pattern, stmts, consequent)
1216
+ beging = find_scanner_event(:@kw, 'in')
1217
+ ending = consequent || find_scanner_event(:@kw, 'end')
574
1218
 
575
- def on_embdoc_beg(comment)
576
- @current_embdoc = {
577
- type: :embdoc, body: comment, start: lineno, end: lineno
578
- }
579
- end
1219
+ stmts.bind(beging[:char_end], ending[:char_start])
580
1220
 
581
- def on_embdoc(comment)
582
- @current_embdoc[:body] << comment
583
- end
1221
+ beging.merge!(
1222
+ type: :in,
1223
+ body: [pattern, stmts, consequent],
1224
+ end: ending[:end],
1225
+ char_end: ending[:char_end]
1226
+ )
1227
+ end
584
1228
 
585
- def on_embdoc_end(comment)
586
- @current_embdoc[:body] << comment.chomp
587
- @block_comments << @current_embdoc
588
- @current_embdoc = nil
589
- end
1229
+ # kwrest_param is a parser event that represents defining a parameter in a
1230
+ # method definition that accepts all remaining keyword parameters.
1231
+ def on_kwrest_param(ident)
1232
+ oper = find_scanner_event(:@op, '**')
1233
+ return oper.merge!(type: :kwrest_param, body: [nil]) unless ident
1234
+
1235
+ oper.merge!(
1236
+ type: :kwrest_param,
1237
+ body: [ident],
1238
+ end: ident[:end],
1239
+ char_end: ident[:char_end]
1240
+ )
1241
+ end
590
1242
 
591
- def on_method_add_block(*body)
592
- super(*body).tap do |sexp|
593
- stmts = body[1][:body][1]
594
- stmts = stmts[:type] == :stmts ? stmts : body[1][:body][1][:body][0]
1243
+ # lambda is a parser event that represents using a "stabby" lambda
1244
+ # literal. It accepts as arguments a params event that represents any
1245
+ # parameters to the lambda and a stmts event that represents the
1246
+ # statements inside the lambda.
1247
+ #
1248
+ # It can be wrapped in either {..} or do..end so we look for either of
1249
+ # those combinations to get our bounds.
1250
+ def on_lambda(params, stmts)
1251
+ beging = find_scanner_event(:@tlambda)
1252
+
1253
+ if scanner_events.any? { |event| event[:type] == :@tlambeg }
1254
+ opening = find_scanner_event(:@tlambeg)
1255
+ closing = find_scanner_event(:@rbrace)
1256
+ else
1257
+ opening = find_scanner_event(:@kw, 'do')
1258
+ closing = find_scanner_event(:@kw, 'end')
1259
+ end
595
1260
 
596
- attach_comments(sexp, stmts)
597
- end
598
- end
1261
+ stmts.bind(opening[:char_end], closing[:char_start])
1262
+
1263
+ {
1264
+ type: :lambda,
1265
+ body: [params, stmts],
1266
+ start: beging[:start],
1267
+ char_start: beging[:char_start],
1268
+ end: closing[:end],
1269
+ char_end: closing[:char_end]
1270
+ }
1271
+ end
1272
+
1273
+ # massign is a parser event that is a parent node of any kind of multiple
1274
+ # assignment. This includes splitting out variables on the left like:
1275
+ #
1276
+ # a, b, c = foo
1277
+ #
1278
+ # as well as splitting out variables on the right, as in:
1279
+ #
1280
+ # foo = a, b, c
1281
+ #
1282
+ # Both sides support splats, as well as variables following them. There's
1283
+ # also slightly odd behavior that you can achieve with the following:
1284
+ #
1285
+ # a, = foo
1286
+ #
1287
+ # In this case a would receive only the first value of the foo enumerable,
1288
+ # in which case we need to explicitly track the comma and add it onto the
1289
+ # child node.
1290
+ def on_massign(left, right)
1291
+ if source[left[:char_end]...right[:char_start]].strip.start_with?(',')
1292
+ left[:comma] = true
599
1293
  end
600
- )
601
1294
 
602
- # Tracking heredocs in somewhat interesting. Straight-line heredocs are
603
- # reported as strings, whereas squiggly-line heredocs are reported as
604
- # heredocs. We track the start and matching end of the heredoc as "beging" and
605
- # "ending" respectively.
606
- prepend(
607
- Module.new do
608
- def initialize(*args)
609
- super(*args)
610
- @heredoc_stack = []
611
- end
1295
+ {
1296
+ type: :massign,
1297
+ body: [left, right],
1298
+ start: left[:start],
1299
+ char_start: left[:char_start],
1300
+ end: right[:end],
1301
+ char_end: right[:char_end]
1302
+ }
1303
+ end
612
1304
 
613
- def self.prepended(base)
614
- base.attr_reader :heredoc_stack
615
- end
1305
+ # method_add_arg is a parser event that represents a method call with
1306
+ # arguments and parentheses. It accepts as arguments the method being called
1307
+ # and the arg_paren event that contains the arguments to the method.
1308
+ def on_method_add_arg(fcall, arg_paren)
1309
+ {
1310
+ type: :method_add_arg,
1311
+ body: [fcall, arg_paren],
1312
+ start: fcall[:start],
1313
+ char_start: fcall[:char_start],
1314
+ end: arg_paren[:end],
1315
+ char_end: arg_paren[:char_end]
1316
+ }
1317
+ end
616
1318
 
617
- private
1319
+ # method_add_block is a parser event that represents a method call with a
1320
+ # block argument. It accepts as arguments the method being called and the
1321
+ # block event.
1322
+ def on_method_add_block(method_add_arg, block)
1323
+ {
1324
+ type: :method_add_block,
1325
+ body: [method_add_arg, block],
1326
+ start: method_add_arg[:start],
1327
+ char_start: method_add_arg[:char_start],
1328
+ end: block[:end],
1329
+ char_end: block[:char_end]
1330
+ }
1331
+ end
1332
+
1333
+ # An mlhs_new is a parser event that represents the beginning of the left
1334
+ # side of a multiple assignment. It is followed by any number of mlhs_add
1335
+ # nodes that each represent another variable being assigned.
1336
+ def on_mlhs_new
1337
+ {
1338
+ type: :mlhs,
1339
+ body: [],
1340
+ start: lineno,
1341
+ char_start: char_pos,
1342
+ end: lineno,
1343
+ char_end: char_pos
1344
+ }
1345
+ end
1346
+
1347
+ # An mlhs_add is a parser event that represents adding another variable
1348
+ # onto a list of assignments. It accepts as arguments the parent mlhs node
1349
+ # as well as the part that is being added to the list.
1350
+ def on_mlhs_add(mlhs, part)
1351
+ if mlhs[:body].empty?
1352
+ part.merge(type: :mlhs, body: [part])
1353
+ else
1354
+ mlhs.merge!(
1355
+ body: mlhs[:body] << part, end: part[:end], char_end: part[:char_end]
1356
+ )
1357
+ end
1358
+ end
1359
+
1360
+ # An mlhs_add_post is a parser event that represents adding another set of
1361
+ # variables onto a list of assignments after a splat variable. It accepts
1362
+ # as arguments the previous mlhs_add_star node that represented the splat
1363
+ # as well another mlhs node that represents all of the variables after the
1364
+ # splat.
1365
+ def on_mlhs_add_post(mlhs_add_star, mlhs)
1366
+ mlhs_add_star.merge(
1367
+ type: :mlhs_add_post,
1368
+ body: [mlhs_add_star, mlhs],
1369
+ end: mlhs[:end],
1370
+ char_end: mlhs[:char_end]
1371
+ )
1372
+ end
1373
+
1374
+ # An mlhs_add_star is a parser event that represents a splatted variable
1375
+ # inside of a multiple assignment on the left hand side. It accepts as
1376
+ # arguments the parent mlhs node as well as the part that represents the
1377
+ # splatted variable.
1378
+ def on_mlhs_add_star(mlhs, part)
1379
+ beging = find_scanner_event(:@op, '*')
1380
+ ending = part || beging
1381
+
1382
+ {
1383
+ type: :mlhs_add_star,
1384
+ body: [mlhs, part],
1385
+ start: beging[:start],
1386
+ char_start: beging[:char_start],
1387
+ end: ending[:end],
1388
+ char_end: ending[:char_end]
1389
+ }
1390
+ end
1391
+
1392
+ # An mlhs_paren is a parser event that represents parentheses being used
1393
+ # to deconstruct values in a multiple assignment on the left hand side. It
1394
+ # accepts as arguments the contents of the inside of the parentheses,
1395
+ # which is another mlhs node.
1396
+ def on_mlhs_paren(contents)
1397
+ beging = find_scanner_event(:@lparen)
1398
+ ending = find_scanner_event(:@rparen)
1399
+
1400
+ if source[beging[:char_end]...ending[:char_start]].strip.end_with?(',')
1401
+ contents[:comma] = true
1402
+ end
1403
+
1404
+ {
1405
+ type: :mlhs_paren,
1406
+ body: [contents],
1407
+ start: beging[:start],
1408
+ char_start: beging[:char_start],
1409
+ end: ending[:end],
1410
+ char_end: ending[:char_end]
1411
+ }
1412
+ end
1413
+
1414
+ # module is a parser event that represents defining a module. It accepts
1415
+ # as arguments the name of the module and the bodystmt event that
1416
+ # represents the statements evaluated within the context of the module.
1417
+ def on_module(const, bodystmt)
1418
+ beging = find_scanner_event(:@kw, 'module')
1419
+ ending = find_scanner_event(:@kw, 'end')
1420
+
1421
+ bodystmt.bind(const[:char_end], ending[:char_start])
1422
+
1423
+ {
1424
+ type: :module,
1425
+ body: [const, bodystmt],
1426
+ start: beging[:start],
1427
+ char_start: beging[:char_start],
1428
+ end: ending[:end],
1429
+ char_end: ending[:char_end]
1430
+ }
1431
+ end
1432
+
1433
+ # An mrhs_new is a parser event that represents the beginning of a list of
1434
+ # values that are being assigned within a multiple assignment node. It can
1435
+ # be followed by any number of mrhs_add nodes that we'll build up into an
1436
+ # array body.
1437
+ def on_mrhs_new
1438
+ {
1439
+ type: :mrhs,
1440
+ body: [],
1441
+ start: lineno,
1442
+ char_start: char_pos,
1443
+ end: lineno,
1444
+ char_end: char_pos
1445
+ }
1446
+ end
618
1447
 
619
- # This is a scanner event that represents the beginning of the heredoc.
620
- def on_heredoc_beg(beging)
1448
+ # An mrhs_add is a parser event that represents adding another value onto
1449
+ # a list on the right hand side of a multiple assignment.
1450
+ def on_mrhs_add(mrhs, part)
1451
+ if mrhs[:body].empty?
1452
+ part.merge(type: :mrhs, body: [part])
1453
+ else
1454
+ mrhs.merge!(
1455
+ body: mrhs[:body] << part, end: part[:end], char_end: part[:char_end]
1456
+ )
1457
+ end
1458
+ end
1459
+
1460
+ # An mrhs_add_star is a parser event that represents using the splat
1461
+ # operator to expand out a value on the right hand side of a multiple
1462
+ # assignment.
1463
+ def on_mrhs_add_star(mrhs, part)
1464
+ beging = find_scanner_event(:@op, '*')
1465
+ ending = part || beging
1466
+
1467
+ {
1468
+ type: :mrhs_add_star,
1469
+ body: [mrhs, part],
1470
+ start: beging[:start],
1471
+ char_start: beging[:char_start],
1472
+ end: ending[:end],
1473
+ char_end: ending[:char_end]
1474
+ }
1475
+ end
1476
+
1477
+ # An mrhs_new_from_args is a parser event that represents the shorthand
1478
+ # of a multiple assignment that allows you to assign values using just
1479
+ # commas as opposed to assigning from an array. For example, in the
1480
+ # following segment the right hand side of the assignment would trigger
1481
+ # this event:
1482
+ #
1483
+ # foo = 1, 2, 3
1484
+ #
1485
+ def on_mrhs_new_from_args(args)
1486
+ args.merge(type: :mrhs_new_from_args, body: [args])
1487
+ end
1488
+
1489
+ # next is a parser event that represents using the next keyword. It
1490
+ # accepts as an argument an args or args_add_block event that contains all
1491
+ # of the arguments being passed to the next.
1492
+ def on_next(args_add_block)
1493
+ find_scanner_event(:@kw, 'next').merge!(
1494
+ type: :next,
1495
+ body: [args_add_block],
1496
+ end: args_add_block[:end],
1497
+ char_end: args_add_block[:char_end]
1498
+ )
1499
+ end
1500
+
1501
+ # opassign is a parser event that represents assigning something to a
1502
+ # variable or constant using an operator like += or ||=. It accepts as
1503
+ # arguments the left side of the expression before the operator, the
1504
+ # operator itself, and the right side of the expression.
1505
+ def on_opassign(left, oper, right)
1506
+ left.merge(
1507
+ type: :opassign,
1508
+ body: [left, oper, right],
1509
+ end: right[:end],
1510
+ char_end: right[:char_end]
1511
+ )
1512
+ end
1513
+
1514
+ # params is a parser event that represents defining parameters on a
1515
+ # method. They have a somewhat interesting structure in that they are an
1516
+ # array of arrays where the position in the top-level array indicates the
1517
+ # type of param and the subarray is the list of parameters of that type.
1518
+ # We therefore have to flatten them down to get to the location.
1519
+ def on_params(*types)
1520
+ flattened = types.flatten(2).select { |type| type.is_a?(Hash) }
1521
+ location =
1522
+ if flattened.any?
621
1523
  {
622
- type: :heredoc,
623
- beging: beging,
624
- start: lineno,
625
- end: lineno,
626
- char_start: char_pos - beging.length + 1,
627
- char_end: char_pos
628
- }.tap { |node| heredoc_stack << node }
1524
+ start: flattened[0][:start],
1525
+ char_start: flattened[0][:char_start],
1526
+ end: flattened[-1][:end],
1527
+ char_end: flattened[-1][:char_end]
1528
+ }
1529
+ else
1530
+ { start: lineno, char_start: char_pos, end: lineno, char_end: char_pos }
629
1531
  end
630
1532
 
631
- # This is a scanner event that represents the end of the heredoc.
632
- def on_heredoc_end(ending)
633
- heredoc_stack[-1].merge!(
634
- ending: ending.chomp, end: lineno, char_end: char_pos
635
- )
636
- end
1533
+ location.merge!(type: :params, body: types)
1534
+ end
637
1535
 
638
- # This is a parser event that occurs when you're using a heredoc with a
639
- # tilde. These are considered `heredoc_dedent` nodes, whereas the hyphen
640
- # heredocs show up as string literals.
641
- def on_heredoc_dedent(string, _width)
642
- heredoc_stack[-1].merge!(string.slice(:body))
643
- end
1536
+ # A paren is a parser event that represents using parentheses pretty much
1537
+ # anywhere in a Ruby program. It accepts as arguments the contents, which
1538
+ # can be either params or statements.
1539
+ def on_paren(contents)
1540
+ ending = find_scanner_event(:@rparen)
1541
+
1542
+ find_scanner_event(:@lparen).merge!(
1543
+ type: :paren,
1544
+ body: [contents],
1545
+ end: ending[:end],
1546
+ char_end: ending[:char_end]
1547
+ )
1548
+ end
1549
+
1550
+ # The program node is the very top of the AST. Here we'll attach all of
1551
+ # the comments that we've gathered up over the course of parsing the
1552
+ # source string. We'll also attach on the __END__ content if there was
1553
+ # some found at the end of the source string.
1554
+ def on_program(stmts)
1555
+ range = {
1556
+ start: 1, end: lines.length, char_start: 0, char_end: source.length
1557
+ }
1558
+
1559
+ stmts[:body] << @__end__ if @__end__
1560
+ stmts.bind(0, source.length)
1561
+
1562
+ range.merge(type: :program, body: [stmts], comments: @comments)
1563
+ end
1564
+
1565
+ # qsymbols_new is a parser event that represents the beginning of a symbol
1566
+ # literal array, like %i[one two three]. It can be followed by any number
1567
+ # of qsymbols_add events, which we'll append onto an array body.
1568
+ def on_qsymbols_new
1569
+ find_scanner_event(:@qsymbols_beg).merge!(type: :qsymbols, body: [])
1570
+ end
1571
+
1572
+ # qsymbols_add is a parser event that represents an element inside of a
1573
+ # symbol literal array like %i[one two three]. It accepts as arguments the
1574
+ # parent qsymbols node as well as a tstring_content scanner event
1575
+ # representing the bare words.
1576
+ def on_qsymbols_add(qsymbols, tstring_content)
1577
+ qsymbols.merge!(
1578
+ body: qsymbols[:body] << tstring_content,
1579
+ end: tstring_content[:end],
1580
+ char_end: tstring_content[:char_end]
1581
+ )
1582
+ end
1583
+
1584
+ # qwords_new is a parser event that represents the beginning of a string
1585
+ # literal array, like %w[one two three]. It can be followed by any number
1586
+ # of qwords_add events, which we'll append onto an array body.
1587
+ def on_qwords_new
1588
+ find_scanner_event(:@qwords_beg).merge!(type: :qwords, body: [])
1589
+ end
1590
+
1591
+ # qsymbols_add is a parser event that represents an element inside of a
1592
+ # symbol literal array like %i[one two three]. It accepts as arguments the
1593
+ # parent qsymbols node as well as a tstring_content scanner event
1594
+ # representing the bare words.
1595
+ def on_qwords_add(qwords, tstring_content)
1596
+ qwords.merge!(
1597
+ body: qwords[:body] << tstring_content,
1598
+ end: tstring_content[:end],
1599
+ char_end: tstring_content[:char_end]
1600
+ )
1601
+ end
1602
+
1603
+ # redo is a parser event that represents the bare redo keyword. It has no
1604
+ # body as it accepts no arguments.
1605
+ def on_redo
1606
+ find_scanner_event(:@kw, 'redo').merge!(type: :redo)
1607
+ end
1608
+
1609
+ # regexp_new is a parser event that represents the beginning of a regular
1610
+ # expression literal, like /foo/. It can be followed by any number of
1611
+ # regexp_add events, which we'll append onto an array body.
1612
+ def on_regexp_new
1613
+ find_scanner_event(:@regexp_beg).merge!(type: :regexp, body: [])
1614
+ end
1615
+
1616
+ # regexp_add is a parser event that represents a piece of a regular
1617
+ # body. It accepts as arguments the parent regexp node as well as a
1618
+ # tstring_content scanner event representing string content or a
1619
+ # string_embexpr parser event representing interpolated content.
1620
+ def on_regexp_add(regexp, piece)
1621
+ regexp.merge!(
1622
+ body: regexp[:body] << piece,
1623
+ end: regexp[:end],
1624
+ char_end: regexp[:char_end]
1625
+ )
1626
+ end
644
1627
 
645
- # String literals are either going to be a normal string or they're going
646
- # to be a heredoc with a hyphen.
647
- def on_string_literal(string)
648
- heredoc = heredoc_stack[-1]
1628
+ # regexp_literal is a parser event that represents a regular expression.
1629
+ # It accepts as arguments a regexp node which is a built-up array of
1630
+ # pieces that go into the regexp content, as well as the ending used to
1631
+ # close out the regexp which includes any modifiers.
1632
+ def on_regexp_literal(regexp, ending)
1633
+ regexp.merge!(
1634
+ type: :regexp_literal,
1635
+ ending: ending[:body],
1636
+ end: ending[:end],
1637
+ char_end: ending[:char_end]
1638
+ )
1639
+ end
649
1640
 
650
- if heredoc && heredoc[:ending]
651
- heredoc_stack.pop.merge!(string.slice(:body))
652
- else
653
- super
654
- end
1641
+ # rescue is a special kind of node where you have a rescue chain but it
1642
+ # doesn't really have all of the information that it needs in order to
1643
+ # determine its ending. Therefore it relies on its parent bodystmt node to
1644
+ # report its ending to it.
1645
+ class Rescue < SimpleDelegator
1646
+ def bind(char_end)
1647
+ merge!(char_end: char_end)
1648
+
1649
+ stmts = self[:body][2]
1650
+ consequent = self[:body][3]
1651
+
1652
+ if consequent
1653
+ consequent.bind(char_end)
1654
+ stmts.bind(stmts[:char_start], consequent[:char_start])
1655
+ else
1656
+ stmts.bind(stmts[:char_start], char_end)
655
1657
  end
656
1658
  end
657
- )
1659
+ end
658
1660
 
659
- # This module contains miscellaneous fixes required to get the right
660
- # structure.
661
- prepend(
662
- Module.new do
663
- private
1661
+ # rescue is a parser event that represents the use of the rescue keyword
1662
+ # inside of a bodystmt.
1663
+ def on_rescue(exceptions, variable, stmts, consequent)
1664
+ beging = find_scanner_event(:@kw, 'rescue')
664
1665
 
665
- # These are the event types that contain _actual_ string content. If
666
- # there is an encoding magic comment at the top of the file, ripper will
667
- # actually change into that encoding for the storage of the string. This
668
- # will break everything, so we need to force the encoding back into UTF-8
669
- # so that the JSON library won't break.
670
- %w[comment ident tstring_content].each do |event|
671
- define_method(:"on_#{event}") do |body|
672
- super(body.force_encoding('UTF-8'))
673
- end
674
- end
1666
+ stmts.bind(
1667
+ ((exceptions || [])[-1] || variable || beging)[:char_end],
1668
+ char_pos
1669
+ )
675
1670
 
676
- # Handles __END__ syntax, which allows individual scripts to keep content
677
- # after the main ruby code that can be read through DATA. It looks like:
678
- #
679
- # foo.bar
680
- #
681
- # __END__
682
- # some other content that isn't normally read by ripper
683
- def on___end__(body)
684
- @__end__ = super(lines[lineno..-1].join("\n"))
685
- end
1671
+ Rescue.new(
1672
+ beging.merge!(
1673
+ type: :rescue,
1674
+ body: [exceptions, variable, stmts, consequent],
1675
+ end: lineno,
1676
+ char_end: char_pos
1677
+ )
1678
+ )
1679
+ end
686
1680
 
687
- def on_program(*body)
688
- super(*body).tap { |node| node[:body][0][:body] << __end__ if __end__ }
689
- end
1681
+ # rescue_mod represents the modifier form of a rescue clause. It accepts as
1682
+ # arguments the statement that may raise an error and the value that should
1683
+ # be used if it does.
1684
+ def on_rescue_mod(statement, rescued)
1685
+ find_scanner_event(:@kw, 'rescue')
1686
+
1687
+ {
1688
+ type: :rescue_mod,
1689
+ body: [statement, rescued],
1690
+ start: statement[:start],
1691
+ char_start: statement[:char_start],
1692
+ end: rescued[:end],
1693
+ char_end: rescued[:char_end]
1694
+ }
1695
+ end
1696
+
1697
+ # rest_param is a parser event that represents defining a parameter in a
1698
+ # method definition that accepts all remaining positional parameters. It
1699
+ # accepts as an argument an optional identifier for the parameter. If it
1700
+ # is omitted, then we're just using the plain operator.
1701
+ def on_rest_param(ident)
1702
+ oper = find_scanner_event(:@op, '*')
1703
+ return oper.merge!(type: :rest_param, body: [nil]) unless ident
1704
+
1705
+ oper.merge!(
1706
+ type: :rest_param,
1707
+ body: [ident],
1708
+ end: ident[:end],
1709
+ char_end: ident[:char_end]
1710
+ )
1711
+ end
1712
+
1713
+ # retry is a parser event that represents the bare retry keyword. It has
1714
+ # no body as it accepts no arguments.
1715
+ def on_retry
1716
+ find_scanner_event(:@kw, 'retry').merge!(type: :retry)
1717
+ end
1718
+
1719
+ # return is a parser event that represents using the return keyword with
1720
+ # arguments. It accepts as an argument an args_add_block event that
1721
+ # contains all of the arguments being passed.
1722
+ def on_return(args_add_block)
1723
+ find_scanner_event(:@kw, 'return').merge!(
1724
+ type: :return,
1725
+ body: [args_add_block],
1726
+ end: args_add_block[:end],
1727
+ char_end: args_add_block[:char_end]
1728
+ )
1729
+ end
690
1730
 
691
- # Normally access controls are reported as vcall nodes. This creates a
692
- # new node type to explicitly track those nodes instead, so that the
693
- # printer can add new lines as necessary.
694
- def on_vcall(ident)
695
- @access_controls ||= %w[private protected public].freeze
1731
+ # return0 is a parser event that represents the bare return keyword. It
1732
+ # has no body as it accepts no arguments. This is as opposed to the return
1733
+ # parser event, which is the version where you're returning one or more
1734
+ # values.
1735
+ def on_return0
1736
+ find_scanner_event(:@kw, 'return').merge!(type: :return0)
1737
+ end
696
1738
 
697
- super(ident).tap do |node|
698
- if !@access_controls.include?(ident[:body]) ||
699
- ident[:body] != lines[lineno - 1].strip
700
- next
701
- end
1739
+ # sclass is a parser event that represents a block of statements that
1740
+ # should be evaluated within the context of the singleton class of an
1741
+ # object. It's frequently used to define singleton methods. It looks like
1742
+ # the following example:
1743
+ #
1744
+ # class << self do foo end
1745
+ # │ │
1746
+ # │ └> bodystmt
1747
+ # └> target
1748
+ #
1749
+ def on_sclass(target, bodystmt)
1750
+ beging = find_scanner_event(:@kw, 'class')
1751
+ ending = find_scanner_event(:@kw, 'end')
1752
+
1753
+ bodystmt.bind(target[:char_end], ending[:char_start])
1754
+
1755
+ {
1756
+ type: :sclass,
1757
+ body: [target, bodystmt],
1758
+ start: beging[:start],
1759
+ char_start: beging[:char_start],
1760
+ end: ending[:end],
1761
+ char_end: ending[:char_end]
1762
+ }
1763
+ end
702
1764
 
703
- node.merge!(type: :access_ctrl)
704
- end
1765
+ # Everything that has a block of code inside of it has a list of statements.
1766
+ # Normally we would just track those as a node that has an array body, but we
1767
+ # have some special handling in order to handle empty statement lists. They
1768
+ # need to have the right location information, so all of the parent node of
1769
+ # stmts nodes will report back down the location information. We then
1770
+ # propagate that onto void_stmt nodes inside the stmts in order to make sure
1771
+ # all comments get printed appropriately.
1772
+ class Stmts < SimpleDelegator
1773
+ def bind(char_start, char_end)
1774
+ merge!(char_start: char_start, char_end: char_end)
1775
+
1776
+ if self[:body][0][:type] == :void_stmt
1777
+ self[:body][0].merge!(char_start: char_start, char_end: char_start)
705
1778
  end
1779
+ end
706
1780
 
707
- # When the only statement inside of a `def` node is a `begin` node, then
708
- # you can safely replace the body of the `def` with the body of the
709
- # `begin`. For example:
710
- #
711
- # def foo
712
- # begin
713
- # try_something
714
- # rescue SomeError => error
715
- # handle_error(error)
716
- # end
717
- # end
718
- #
719
- # can get transformed into:
720
- #
721
- # def foo
722
- # try_something
723
- # rescue SomeError => error
724
- # handle_error(error)
725
- # end
726
- #
727
- # This module handles this by hoisting up the `bodystmt` node from the
728
- # inner `begin` up to the `def`.
729
- def on_def(ident, params, bodystmt)
730
- def_bodystmt = bodystmt
731
- stmts, *other_parts = bodystmt[:body]
732
-
733
- if !other_parts.any? && stmts[:body].length == 1 &&
734
- stmts.dig(:body, 0, :type) == :begin
735
- def_bodystmt = stmts.dig(:body, 0, :body, 0)
736
- end
737
-
738
- super(ident, params, def_bodystmt)
1781
+ def <<(statement)
1782
+ if self[:body].any?
1783
+ merge!(statement.slice(:end, :char_end))
1784
+ else
1785
+ merge!(statement.slice(:start, :end, :char_start, :char_end))
739
1786
  end
740
1787
 
741
- # We need to track for `mlhs_paren` and `massign` nodes whether or not
742
- # there was an extra comma at the end of the expression. For some reason
743
- # it's not showing up in the AST in an obvious way. In this case we're
744
- # just simplifying everything by adding an additional field to `mlhs`
745
- # nodes called `comma` that indicates whether or not there was an extra.
746
- def on_mlhs_paren(body)
747
- super.tap do |node|
748
- next unless body[:type] == :mlhs
1788
+ self[:body] << statement
1789
+ self
1790
+ end
1791
+ end
749
1792
 
750
- ending = source.rindex(')', char_pos)
751
- buffer = source[(node[:char_start] + 1)...ending]
1793
+ # stmts_new is a parser event that represents the beginning of a list of
1794
+ # statements within any lexical block. It can be followed by any number of
1795
+ # stmts_add events, which we'll append onto an array body.
1796
+ def on_stmts_new
1797
+ Stmts.new(
1798
+ type: :stmts,
1799
+ body: [],
1800
+ start: lineno,
1801
+ end: lineno,
1802
+ char_start: char_pos,
1803
+ char_end: char_pos
1804
+ )
1805
+ end
1806
+
1807
+ # stmts_add is a parser event that represents a single statement inside a
1808
+ # list of statements within any lexical block. It accepts as arguments the
1809
+ # parent stmts node as well as an stmt which can be any expression in
1810
+ # Ruby.
1811
+ def on_stmts_add(stmts, stmt)
1812
+ stmts << stmt
1813
+ end
1814
+
1815
+ # string_concat is a parser event that represents concatenating two
1816
+ # strings together using a backward slash, as in the following example:
1817
+ #
1818
+ # 'foo' \
1819
+ # 'bar'
1820
+ #
1821
+ def on_string_concat(left, right)
1822
+ {
1823
+ type: :string_concat,
1824
+ body: [left, right],
1825
+ start: left[:start],
1826
+ char_start: left[:char_start],
1827
+ end: right[:end],
1828
+ char_end: right[:char_end]
1829
+ }
1830
+ end
1831
+
1832
+ # string_content is a parser event that represents the beginning of the
1833
+ # contents of a string, which will either be embedded inside of a
1834
+ # string_literal or a dyna_symbol node. It will have an array body so that
1835
+ # we can build up a list of @tstring_content, string_embexpr, and
1836
+ # string_dvar nodes.
1837
+ def on_string_content
1838
+ {
1839
+ type: :string,
1840
+ body: [],
1841
+ start: lineno,
1842
+ end: lineno,
1843
+ char_start: char_pos,
1844
+ char_end: char_pos
1845
+ }
1846
+ end
1847
+
1848
+ # string_add is a parser event that represents a piece of a string. It
1849
+ # could be plain @tstring_content, string_embexpr, or string_dvar nodes.
1850
+ # It accepts as arguments the parent string node as well as the additional
1851
+ # piece of the string.
1852
+ def on_string_add(string, piece)
1853
+ string.merge!(
1854
+ body: string[:body] << piece, end: piece[:end], char_end: piece[:char_end]
1855
+ )
1856
+ end
1857
+
1858
+ # string_dvar is a parser event that represents a very special kind of
1859
+ # interpolation into string. It allows you to take an instance variable,
1860
+ # class variable, or global variable and omit the braces when
1861
+ # interpolating. For example, if you wanted to interpolate the instance
1862
+ # variable @foo into a string, you could do "#@foo".
1863
+ def on_string_dvar(var_ref)
1864
+ find_scanner_event(:@embvar).merge!(
1865
+ type: :string_dvar,
1866
+ body: [var_ref],
1867
+ end: var_ref[:end],
1868
+ char_end: var_ref[:char_end]
1869
+ )
1870
+ end
1871
+
1872
+ # string_embexpr is a parser event that represents interpolated content.
1873
+ # It can go a bunch of different parent nodes, including regexp, strings,
1874
+ # xstrings, heredocs, dyna_symbols, etc. Basically it's anywhere you see
1875
+ # the #{} construct.
1876
+ def on_string_embexpr(stmts)
1877
+ beging = find_scanner_event(:@embexpr_beg)
1878
+ ending = find_scanner_event(:@embexpr_end)
1879
+
1880
+ stmts.bind(beging[:char_end], ending[:char_start])
1881
+
1882
+ {
1883
+ type: :string_embexpr,
1884
+ body: [stmts],
1885
+ start: beging[:start],
1886
+ char_start: beging[:char_start],
1887
+ end: ending[:end],
1888
+ char_end: ending[:char_end]
1889
+ }
1890
+ end
1891
+
1892
+ # String literals are either going to be a normal string or they're going
1893
+ # to be a heredoc if we've just closed a heredoc.
1894
+ def on_string_literal(string)
1895
+ heredoc = @heredocs[-1]
1896
+
1897
+ if heredoc && heredoc[:ending]
1898
+ @heredocs.pop.merge!(body: string[:body])
1899
+ else
1900
+ beging = find_scanner_event(:@tstring_beg)
1901
+ ending = find_scanner_event(:@tstring_end)
1902
+
1903
+ {
1904
+ type: :string_literal,
1905
+ body: string[:body],
1906
+ quote: beging[:body],
1907
+ start: beging[:start],
1908
+ char_start: beging[:char_start],
1909
+ end: ending[:end],
1910
+ char_end: ending[:char_end]
1911
+ }
1912
+ end
1913
+ end
1914
+
1915
+ # A super is a parser event that represents using the super keyword with
1916
+ # any number of arguments. It can optionally use parentheses (represented
1917
+ # by an arg_paren node) or just skip straight to the arguments (with an
1918
+ # args_add_block node).
1919
+ def on_super(contents)
1920
+ find_scanner_event(:@kw, 'super').merge!(
1921
+ type: :super,
1922
+ body: [contents],
1923
+ end: contents[:end],
1924
+ char_end: contents[:char_end]
1925
+ )
1926
+ end
1927
+
1928
+ # A symbol is a parser event that immediately descends from a symbol
1929
+ # literal and contains an ident representing the contents of the symbol.
1930
+ def on_symbol(ident)
1931
+ # What the heck is this here for you ask!? Turns out when Ripper is lexing
1932
+ # source text, it turns symbols into keywords if their contents match, which
1933
+ # will mess up the location information of all of our other nodes.
1934
+ #
1935
+ # So for example instead of { type: :@ident, body: "class" } you would
1936
+ # instead get { type: :@kw, body: "class" } which is all kinds of
1937
+ # problematic.
1938
+ #
1939
+ # In order to take care of this, we explicitly delete this scanner event
1940
+ # from the stack to make sure it doesn't screw things up.
1941
+ scanner_events.pop
1942
+
1943
+ ident.merge(type: :symbol, body: [ident])
1944
+ end
1945
+
1946
+ # A symbol_literal represents a symbol in the system with no interpolation
1947
+ # (as opposed to a dyna_symbol). As its only argument it accepts either a
1948
+ # symbol node (for most cases) or an ident node (in the case that we're
1949
+ # using bare words, as in an alias node like alias foo bar).
1950
+ def on_symbol_literal(contents)
1951
+ if contents[:type] == :@ident
1952
+ contents.merge(type: :symbol_literal, body: [contents])
1953
+ else
1954
+ beging = find_scanner_event(:@symbeg)
1955
+ contents.merge!(type: :symbol_literal, char_start: beging[:char_start])
1956
+ end
1957
+ end
1958
+
1959
+ # symbols_new is a parser event that represents the beginning of a symbol
1960
+ # literal array that accepts interpolation, like %I[one #{two} three]. It
1961
+ # can be followed by any number of symbols_add events, which we'll append
1962
+ # onto an array body.
1963
+ def on_symbols_new
1964
+ find_scanner_event(:@symbols_beg).merge!(type: :symbols, body: [])
1965
+ end
752
1966
 
753
- body[:comma] = buffer.strip.end_with?(',')
754
- end
1967
+ # symbols_add is a parser event that represents an element inside of a
1968
+ # symbol literal array that accepts interpolation, like
1969
+ # %I[one #{two} three]. It accepts as arguments the parent symbols node as
1970
+ # well as a word_add parser event.
1971
+ def on_symbols_add(symbols, word_add)
1972
+ symbols.merge!(
1973
+ body: symbols[:body] << word_add,
1974
+ end: word_add[:end],
1975
+ char_end: word_add[:char_end]
1976
+ )
1977
+ end
1978
+
1979
+ # A helper function to find a :: operator for the next two nodes. We do
1980
+ # special handling instead of using find_scanner_event here because we
1981
+ # don't pop off all of the :: operators so you could end up getting the
1982
+ # wrong information if you have for instance ::X::Y::Z.
1983
+ def find_colon2_before(const)
1984
+ index =
1985
+ scanner_events.rindex do |event|
1986
+ event[:type] == :@op && event[:body] == '::' &&
1987
+ event[:char_start] < const[:char_start]
755
1988
  end
756
1989
 
757
- def on_massign(left, right)
758
- super.tap do
759
- next unless left[:type] == :mlhs
1990
+ scanner_events[index]
1991
+ end
1992
+
1993
+ # A top_const_field is a parser event that is always the child of some
1994
+ # kind of assignment. It represents when you're assigning to a constant
1995
+ # that is being referenced at the top level. For example:
1996
+ #
1997
+ # ::X = 1
1998
+ #
1999
+ def on_top_const_field(const)
2000
+ beging = find_colon2_before(const)
2001
+ const.merge(
2002
+ type: :top_const_field,
2003
+ body: [const],
2004
+ start: beging[:start],
2005
+ char_start: beging[:char_start]
2006
+ )
2007
+ end
2008
+
2009
+ # A top_const_ref is a parser event that is a very similar to
2010
+ # top_const_field except that it is not involved in an assignment. It
2011
+ # looks like the following example:
2012
+ #
2013
+ # ::X
2014
+ #
2015
+ def on_top_const_ref(const)
2016
+ beging = find_colon2_before(const)
2017
+ const.merge(
2018
+ type: :top_const_ref,
2019
+ body: [const],
2020
+ start: beging[:start],
2021
+ char_start: beging[:char_start]
2022
+ )
2023
+ end
2024
+
2025
+ # A unary node represents a unary method being called on an expression, as
2026
+ # in !, ~, or not. We have somewhat special handling of the not operator
2027
+ # since if it has parentheses they don't get reported as a paren node for
2028
+ # some reason.
2029
+ def on_unary(oper, value)
2030
+ if oper == :not
2031
+ node = find_scanner_event(:@kw, 'not')
2032
+
2033
+ paren = source[node[:char_end]...value[:char_start]].include?('(')
2034
+ ending = paren ? find_scanner_event(:@rparen) : value
2035
+
2036
+ node.merge!(
2037
+ type: :unary,
2038
+ oper: oper,
2039
+ body: [value],
2040
+ end: ending[:end],
2041
+ char_end: ending[:char_end],
2042
+ paren: paren
2043
+ )
2044
+ else
2045
+ find_scanner_event(:@op).merge!(
2046
+ type: :unary,
2047
+ oper: oper[0],
2048
+ body: [value],
2049
+ end: value[:end],
2050
+ char_end: value[:char_end]
2051
+ )
2052
+ end
2053
+ end
2054
+
2055
+ # undef nodes represent using the keyword undef. It accepts as an argument
2056
+ # an array of symbol_literal nodes that represent each message that the
2057
+ # user is attempting to undefine. We use the keyword to get the beginning
2058
+ # location and the last symbol to get the ending.
2059
+ def on_undef(symbol_literals)
2060
+ last = symbol_literals.last
2061
+
2062
+ find_scanner_event(:@kw, 'undef').merge!(
2063
+ type: :undef,
2064
+ body: symbol_literals,
2065
+ end: last[:end],
2066
+ char_end: last[:char_end]
2067
+ )
2068
+ end
2069
+
2070
+ # unless is a parser event that represents the first clause in an unless
2071
+ # chain. It accepts as arguments the predicate of the unless, the
2072
+ # statements that are contained within the unless clause, and the optional
2073
+ # consequent clause.
2074
+ def on_unless(predicate, stmts, consequent)
2075
+ beging = find_scanner_event(:@kw, 'unless')
2076
+ ending = consequent || find_scanner_event(:@kw, 'end')
2077
+
2078
+ stmts.bind(predicate[:char_end], ending[:char_start])
2079
+
2080
+ {
2081
+ type: :unless,
2082
+ body: [predicate, stmts, consequent],
2083
+ start: beging[:start],
2084
+ char_start: beging[:char_start],
2085
+ end: ending[:end],
2086
+ char_end: ending[:char_end]
2087
+ }
2088
+ end
760
2089
 
761
- range = left[:char_start]..left[:char_end]
762
- left[:comma] = source[range].strip.end_with?(',')
763
- end
2090
+ # unless_mod is a parser event that represents the modifier form of an
2091
+ # unless statement. It accepts as arguments the predicate of the unless
2092
+ # and the statement that are contained within the unless clause.
2093
+ def on_unless_mod(predicate, statement)
2094
+ find_scanner_event(:@kw, 'unless')
2095
+
2096
+ {
2097
+ type: :unless_mod,
2098
+ body: [predicate, statement],
2099
+ start: statement[:start],
2100
+ char_start: statement[:char_start],
2101
+ end: predicate[:end],
2102
+ char_end: predicate[:char_end]
2103
+ }
2104
+ end
2105
+
2106
+ # until is a parser event that represents an until loop. It accepts as
2107
+ # arguments the predicate to the until and the statements that are
2108
+ # contained within the until clause.
2109
+ def on_until(predicate, stmts)
2110
+ beging = find_scanner_event(:@kw, 'until')
2111
+ ending = find_scanner_event(:@kw, 'end')
2112
+
2113
+ stmts.bind(predicate[:char_end], ending[:char_start])
2114
+
2115
+ {
2116
+ type: :until,
2117
+ body: [predicate, stmts],
2118
+ start: beging[:start],
2119
+ char_start: beging[:char_start],
2120
+ end: ending[:end],
2121
+ char_end: ending[:char_end]
2122
+ }
2123
+ end
2124
+
2125
+ # until_mod is a parser event that represents the modifier form of an
2126
+ # until loop. It accepts as arguments the predicate to the until and the
2127
+ # statement that is contained within the until loop.
2128
+ def on_until_mod(predicate, statement)
2129
+ find_scanner_event(:@kw, 'until')
2130
+
2131
+ {
2132
+ type: :until_mod,
2133
+ body: [predicate, statement],
2134
+ start: statement[:start],
2135
+ char_start: statement[:char_start],
2136
+ end: predicate[:end],
2137
+ char_end: predicate[:char_end]
2138
+ }
2139
+ end
2140
+
2141
+ # var_alias is a parser event that represents when you're using the alias
2142
+ # keyword with global variable arguments. You can optionally use
2143
+ # parentheses with this keyword, so we either track the location
2144
+ # information based on those or the final argument to the alias method.
2145
+ def on_var_alias(left, right)
2146
+ beging = find_scanner_event(:@kw, 'alias')
2147
+
2148
+ paren = source[beging[:char_end]...left[:char_start]].include?('(')
2149
+ ending = paren ? find_scanner_event(:@rparen) : right
2150
+
2151
+ {
2152
+ type: :var_alias,
2153
+ body: [left, right],
2154
+ start: beging[:start],
2155
+ char_start: beging[:char_start],
2156
+ end: ending[:end],
2157
+ char_end: ending[:char_end]
2158
+ }
2159
+ end
2160
+
2161
+ # var_ref is a parser event that represents using either a local variable,
2162
+ # a nil literal, a true or false literal, or a numbered block variable.
2163
+ def on_var_ref(contents)
2164
+ contents.merge(type: :var_ref, body: [contents])
2165
+ end
2166
+
2167
+ # var_field is a parser event that represents a variable that is being
2168
+ # assigned a value. As such, it is always a child of an assignment type
2169
+ # node. For example, in the following example foo is a var_field:
2170
+ #
2171
+ # foo = 1
2172
+ #
2173
+ def on_var_field(ident)
2174
+ if ident
2175
+ ident.merge(type: :var_field, body: [ident])
2176
+ else
2177
+ # You can hit this pattern if you're assigning to a splat using pattern
2178
+ # matching syntax in Ruby 2.7+
2179
+ { type: :var_field, body: [] }
2180
+ end
2181
+ end
2182
+
2183
+ # vcall nodes are any plain named thing with Ruby that could be either a
2184
+ # local variable or a method call. They accept as an argument the ident
2185
+ # scanner event that contains their content.
2186
+ #
2187
+ # Access controls like private, protected, and public are reported as
2188
+ # vcall nodes since they're technically method calls. We want to be able
2189
+ # add new lines around them as necessary, so here we're going to
2190
+ # explicitly track those as a different node type.
2191
+ def on_vcall(ident)
2192
+ @controls ||= %w[private protected public].freeze
2193
+
2194
+ body = ident[:body]
2195
+ type =
2196
+ if @controls.include?(body) && body == lines[lineno - 1].strip
2197
+ :access_ctrl
2198
+ else
2199
+ :vcall
764
2200
  end
2201
+
2202
+ ident.merge(type: type, body: [ident])
2203
+ end
2204
+
2205
+ # void_stmt is a special kind of parser event that represents an empty lexical
2206
+ # block of code. It often will have comments attached to it, so it requires
2207
+ # some special handling.
2208
+ def on_void_stmt
2209
+ {
2210
+ type: :void_stmt,
2211
+ start: lineno,
2212
+ end: lineno,
2213
+ char_start: char_pos,
2214
+ char_end: char_pos
2215
+ }
2216
+ end
2217
+
2218
+ # when is a parser event that represents another clause in a case chain.
2219
+ # It accepts as arguments the predicate of the when, the statements that
2220
+ # are contained within the else if clause, and the optional consequent
2221
+ # clause.
2222
+ def on_when(predicate, stmts, consequent)
2223
+ beging = find_scanner_event(:@kw, 'when')
2224
+ ending = consequent || find_scanner_event(:@kw, 'end')
2225
+
2226
+ stmts.bind(predicate[:char_end], ending[:char_start])
2227
+
2228
+ {
2229
+ type: :when,
2230
+ body: [predicate, stmts, consequent],
2231
+ start: beging[:start],
2232
+ char_start: beging[:char_start],
2233
+ end: ending[:end],
2234
+ char_end: ending[:char_end]
2235
+ }
2236
+ end
2237
+
2238
+ # while is a parser event that represents a while loop. It accepts as
2239
+ # arguments the predicate to the while and the statements that are
2240
+ # contained within the while clause.
2241
+ def on_while(predicate, stmts)
2242
+ beging = find_scanner_event(:@kw, 'while')
2243
+ ending = find_scanner_event(:@kw, 'end')
2244
+
2245
+ stmts.bind(predicate[:char_end], ending[:char_start])
2246
+
2247
+ {
2248
+ type: :while,
2249
+ body: [predicate, stmts],
2250
+ start: beging[:start],
2251
+ char_start: beging[:char_start],
2252
+ end: ending[:end],
2253
+ char_end: ending[:char_end]
2254
+ }
2255
+ end
2256
+
2257
+ # while_mod is a parser event that represents the modifier form of an
2258
+ # while loop. It accepts as arguments the predicate to the while and the
2259
+ # statement that is contained within the while loop.
2260
+ def on_while_mod(predicate, statement)
2261
+ find_scanner_event(:@kw, 'while')
2262
+
2263
+ {
2264
+ type: :while_mod,
2265
+ body: [predicate, statement],
2266
+ start: statement[:start],
2267
+ char_start: statement[:char_start],
2268
+ end: predicate[:end],
2269
+ char_end: predicate[:char_end]
2270
+ }
2271
+ end
2272
+
2273
+ # word_new is a parser event that represents the beginning of a word
2274
+ # within a special array literal (either strings or symbols) that accepts
2275
+ # interpolation. For example, in the following array, there are three
2276
+ # word nodes:
2277
+ #
2278
+ # %W[one a#{two}a three]
2279
+ #
2280
+ # Each word inside that array is represented as its own node, which is in
2281
+ # terms of the parser a tree of word_new and word_add nodes. For our
2282
+ # purposes, we're going to report this as a word node and build up an
2283
+ # array body of our parts.
2284
+ def on_word_new
2285
+ { type: :word, body: [] }
2286
+ end
2287
+
2288
+ # word_add is a parser event that represents a piece of a word within a
2289
+ # special array literal that accepts interpolation. It accepts as
2290
+ # arguments the parent word node as well as the additional piece of the
2291
+ # word, which can be either a @tstring_content node for a plain string
2292
+ # piece or a string_embexpr for an interpolated piece.
2293
+ def on_word_add(word, piece)
2294
+ if word[:body].empty?
2295
+ # Here we're making sure we get the correct bounds by using the
2296
+ # location information from the first piece.
2297
+ piece.merge(type: :word, body: [piece])
2298
+ else
2299
+ word.merge!(
2300
+ body: word[:body] << piece, end: piece[:end], char_end: piece[:char_end]
2301
+ )
765
2302
  end
766
- )
2303
+ end
2304
+
2305
+ # words_new is a parser event that represents the beginning of a string
2306
+ # literal array that accepts interpolation, like %W[one #{two} three]. It
2307
+ # can be followed by any number of words_add events, which we'll append
2308
+ # onto an array body.
2309
+ def on_words_new
2310
+ find_scanner_event(:@words_beg).merge!(type: :words, body: [])
2311
+ end
2312
+
2313
+ # words_add is a parser event that represents an element inside of a
2314
+ # string literal array that accepts interpolation, like
2315
+ # %W[one #{two} three]. It accepts as arguments the parent words node as
2316
+ # well as a word_add parser event.
2317
+ def on_words_add(words, word_add)
2318
+ words.merge!(
2319
+ body: words[:body] << word_add,
2320
+ end: word_add[:end],
2321
+ char_end: word_add[:char_end]
2322
+ )
2323
+ end
2324
+
2325
+ # xstring_new is a parser event that represents the beginning of a string
2326
+ # of commands that gets sent out to the terminal, like `ls`. It can
2327
+ # optionally include interpolation much like a regular string, so we're
2328
+ # going to build up an array body.
2329
+ #
2330
+ # If the xstring actually starts with a heredoc declaration, then we're
2331
+ # going to let heredocs continue to do their thing and instead just use
2332
+ # its location information.
2333
+ def on_xstring_new
2334
+ heredoc = @heredocs[-1]
2335
+
2336
+ if heredoc && heredoc[:beging][3] = '`'
2337
+ heredoc.merge(type: :xstring, body: [])
2338
+ else
2339
+ find_scanner_event(:@backtick).merge!(type: :xstring, body: [])
2340
+ end
2341
+ end
2342
+
2343
+ # xstring_add is a parser event that represents a piece of a string of
2344
+ # commands that gets sent out to the terminal, like `ls`. It accepts two
2345
+ # arguments, the parent xstring node as well as the piece that is being
2346
+ # added to the string. Because it supports interpolation this is either a
2347
+ # tstring_content scanner event representing bare string content or a
2348
+ # string_embexpr representing interpolated content.
2349
+ def on_xstring_add(xstring, piece)
2350
+ xstring.merge!(
2351
+ body: xstring[:body] << piece,
2352
+ end: piece[:end],
2353
+ char_end: piece[:char_end]
2354
+ )
2355
+ end
2356
+
2357
+ # xstring_literal is a parser event that represents a string of commands
2358
+ # that gets sent to the terminal, like `ls`. It accepts as its only
2359
+ # argument an xstring node that is a built up array representation of all
2360
+ # of the parts of the string (including the plain string content and the
2361
+ # interpolated content).
2362
+ #
2363
+ # They can also use heredocs to present themselves, as in the example:
2364
+ #
2365
+ # <<-`SHELL`
2366
+ # ls
2367
+ # SHELL
2368
+ #
2369
+ # In this case we need to change the node type to be a heredoc instead of
2370
+ # an xstring_literal in order to get the right formatting.
2371
+ def on_xstring_literal(xstring)
2372
+ heredoc = @heredocs[-1]
2373
+
2374
+ if heredoc && heredoc[:beging][3] = '`'
2375
+ heredoc.merge!(body: xstring[:body])
2376
+ else
2377
+ ending = find_scanner_event(:@tstring_end)
2378
+ xstring.merge!(
2379
+ type: :xstring_literal, end: ending[:end], char_end: ending[:char_end]
2380
+ )
2381
+ end
2382
+ end
2383
+
2384
+ # yield is a parser event that represents using the yield keyword with
2385
+ # arguments. It accepts as an argument an args_add_block event that
2386
+ # contains all of the arguments being passed.
2387
+ def on_yield(args_add_block)
2388
+ find_scanner_event(:@kw, 'yield').merge!(
2389
+ type: :yield,
2390
+ body: [args_add_block],
2391
+ end: args_add_block[:end],
2392
+ char_end: args_add_block[:char_end]
2393
+ )
2394
+ end
2395
+
2396
+ # yield0 is a parser event that represents the bare yield keyword. It has
2397
+ # no body as it accepts no arguments. This is as opposed to the yield
2398
+ # parser event, which is the version where you're yielding one or more
2399
+ # values.
2400
+ def on_yield0
2401
+ find_scanner_event(:@kw, 'yield').merge!(type: :yield0)
2402
+ end
2403
+
2404
+ # zsuper is a parser event that represents the bare super keyword. It has
2405
+ # no body as it accepts no arguments. This is as opposed to the super
2406
+ # parser event, which is the version where you're calling super with one
2407
+ # or more values.
2408
+ def on_zsuper
2409
+ find_scanner_event(:@kw, 'super').merge!(type: :zsuper)
2410
+ end
767
2411
  end
768
2412
 
769
2413
  # If this is the main file we're executing, then most likely this is being
770
- # executed from the parse.js spawn. In that case, read the ruby source from
2414
+ # executed from the parser.js spawn. In that case, read the ruby source from
771
2415
  # stdin and report back the AST over stdout.
772
2416
 
773
2417
  if $0 == __FILE__