prettier 0.12.2 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,105 @@
1
+ const {
2
+ concat,
3
+ dedent,
4
+ group,
5
+ hardline,
6
+ indent,
7
+ join,
8
+ line,
9
+ literalline,
10
+ markAsRoot,
11
+ softline,
12
+ trim
13
+ } = require("../prettier");
14
+
15
+ module.exports = {
16
+ "@__end__": (path, _opts, _print) => {
17
+ const { body } = path.getValue();
18
+ return concat([trim, "__END__", literalline, body]);
19
+ },
20
+ bodystmt: (path, opts, print) => {
21
+ const [_statements, rescue, elseClause, ensure] = path.getValue().body;
22
+ const parts = [path.call(print, "body", 0)];
23
+
24
+ if (rescue) {
25
+ parts.push(dedent(concat([hardline, path.call(print, "body", 1)])));
26
+ }
27
+
28
+ if (elseClause) {
29
+ // Before Ruby 2.6, this piece of bodystmt was an explicit "else" node
30
+ const stmts =
31
+ elseClause.type === "else"
32
+ ? path.call(print, "body", 2, "body", 0)
33
+ : path.call(print, "body", 2);
34
+
35
+ parts.push(concat([dedent(concat([hardline, "else"])), hardline, stmts]));
36
+ }
37
+
38
+ if (ensure) {
39
+ parts.push(dedent(concat([hardline, path.call(print, "body", 3)])));
40
+ }
41
+
42
+ return group(concat(parts));
43
+ },
44
+ embdoc: (path, _opts, _print) => concat([trim, path.getValue().body]),
45
+ paren: (path, opts, print) => {
46
+ if (!path.getValue().body[0]) {
47
+ return "()";
48
+ }
49
+
50
+ let content = path.call(print, "body", 0);
51
+
52
+ if (
53
+ ["args", "args_add_star", "args_add_block"].includes(
54
+ path.getValue().body[0].type
55
+ )
56
+ ) {
57
+ content = join(concat([",", line]), content);
58
+ }
59
+
60
+ return group(
61
+ concat([
62
+ "(",
63
+ indent(concat([softline, content])),
64
+ concat([softline, ")"])
65
+ ])
66
+ );
67
+ },
68
+ program: (path, opts, print) =>
69
+ markAsRoot(
70
+ concat([join(literalline, path.map(print, "body")), literalline])
71
+ ),
72
+ stmts: (path, opts, print) => {
73
+ const stmts = path.getValue().body;
74
+ const parts = [];
75
+ let lineNo = null;
76
+
77
+ stmts.forEach((stmt, index) => {
78
+ if (stmt.type === "void_stmt") {
79
+ return;
80
+ }
81
+
82
+ const printed = path.call(print, "body", index);
83
+
84
+ if (lineNo === null) {
85
+ parts.push(printed);
86
+ } else if (
87
+ stmt.start - lineNo > 1 ||
88
+ [stmt.type, stmts[index - 1].type].includes("access_ctrl")
89
+ ) {
90
+ parts.push(hardline, hardline, printed);
91
+ } else if (
92
+ stmt.start !== lineNo ||
93
+ path.getParentNode().type !== "string_embexpr"
94
+ ) {
95
+ parts.push(hardline, printed);
96
+ } else {
97
+ parts.push("; ", printed);
98
+ }
99
+
100
+ lineNo = stmt.end;
101
+ });
102
+
103
+ return concat(parts);
104
+ }
105
+ };
data/src/nodes/strings.js CHANGED
@@ -6,8 +6,8 @@ const {
6
6
  join,
7
7
  literalline,
8
8
  softline
9
- } = require("../builders");
10
- const { concatBody, empty, makeList, surround } = require("../utils");
9
+ } = require("../prettier");
10
+ const { concatBody, empty, makeList, prefix, surround } = require("../utils");
11
11
  const escapePattern = require("../escapePattern");
12
12
 
13
13
  // If there is some part of this string that matches an escape sequence or that
@@ -72,6 +72,11 @@ module.exports = {
72
72
  const quote = preferSingleQuotes ? "'" : '"';
73
73
  return body.length === 2 ? concat([quote, body.slice(1), quote]) : body;
74
74
  },
75
+ dyna_symbol: (path, opts, print) => {
76
+ const { quote } = path.getValue().body[0];
77
+
78
+ return concat([":", quote, concat(path.call(print, "body", 0)), quote]);
79
+ },
75
80
  heredoc: (path, opts, print) => {
76
81
  const { beging, ending } = path.getValue();
77
82
 
@@ -129,6 +134,8 @@ module.exports = {
129
134
 
130
135
  return concat([quote].concat(parts).concat([quote]));
131
136
  },
137
+ symbol: prefix(":"),
138
+ symbol_literal: concatBody,
132
139
  word_add: concatBody,
133
140
  word_new: empty,
134
141
  xstring: makeList,
@@ -4,6 +4,13 @@
4
4
  const source = process.env.RBPRETTIER ? "../node_modules/prettier" : "prettier";
5
5
 
6
6
  // eslint-disable-next-line import/no-dynamic-require
7
- const { builders, utils } = require(source).doc;
7
+ const prettier = require(source);
8
8
 
9
- module.exports = Object.assign({}, builders, utils);
9
+ // Just combine all the things into one big object so that we can import
10
+ // whatever we need from prettier without having to dive too deeply.
11
+ module.exports = Object.assign(
12
+ {},
13
+ prettier.doc.builders,
14
+ prettier.doc.utils,
15
+ prettier.util
16
+ );
data/src/ripper.rb CHANGED
@@ -9,448 +9,503 @@ end
9
9
  require 'json' unless defined?(JSON)
10
10
  require 'ripper'
11
11
 
12
- module Layer
12
+ class RipperJS < Ripper
13
+ private
14
+
15
+ # Scanner events occur when the lexer hits a new token, like a keyword or an
16
+ # end. These nodes always contain just one argument which is a string
17
+ # representing the content. For the most part these can just be printed
18
+ # directly, which very few exceptions.
19
+ SCANNER_EVENTS.each do |event|
20
+ define_method(:"on_#{event}") do |body|
21
+ { type: :"@#{event}", body: body, start: lineno, end: lineno }
22
+ end
23
+ end
24
+
25
+ # Parser events represent nodes in the ripper abstract syntax tree. The event
26
+ # is reported after the children of the node have already been built.
27
+ PARSER_EVENTS.each do |event|
28
+ define_method(:"on_#{event}") do |*body|
29
+ min = body.map { |part| part.is_a?(Hash) ? part[:start] : lineno }.min
30
+ { type: event, body: body, start: min || lineno, end: lineno }
31
+ end
32
+ end
33
+
13
34
  # Some nodes are lists that come back from the parser. They always start with
14
- # a *_new node (or in the case of string, *_content) and each additional node
15
- # in the list is a *_add node. This layer takes those nodes and turns them
16
- # into one node with an array body.
17
- module Lists
18
- events = %i[
19
- args
20
- mlhs
21
- mrhs
22
- qsymbols
23
- qwords
24
- regexp
25
- stmts
26
- string
27
- symbols
28
- words
29
- xstring
30
- ]
31
-
32
- private
33
-
34
- events.each do |event|
35
- suffix = event == :string ? 'content' : 'new'
36
-
37
- define_method(:"on_#{event}_#{suffix}") do
38
- { type: event, body: [], start: lineno, end: lineno }
39
- end
40
-
41
- define_method(:"on_#{event}_add") do |parts, part|
42
- parts.tap do |node|
43
- node[:body] << part
44
- node[:end] = lineno
35
+ # a `*_new` node (or in the case of string, `*_content`) and each additional
36
+ # node in the list is a `*_add` node. This module takes those nodes and turns
37
+ # them into one node with an array body.
38
+ #
39
+ # For example, the statement `[a, b, c]` would be parsed as:
40
+ #
41
+ # [:args_add,
42
+ # [:args_add,
43
+ # [:args_add,
44
+ # [:args_new],
45
+ # [:vcall, [:@ident, "a", [1, 1]]]
46
+ # ],
47
+ # [:vcall, [:@ident, "b", [1, 4]]]
48
+ # ],
49
+ # [:vcall, [:@ident, "c", [1, 7]]]
50
+ # ]
51
+ #
52
+ # But after this module is applied that is instead parsed as:
53
+ #
54
+ # [:args,
55
+ # [
56
+ # [:vcall, [:@ident, "a", [1, 1]]],
57
+ # [:vcall, [:@ident, "b", [1, 4]]],
58
+ # [:vcall, [:@ident, "c", [1, 7]]]
59
+ # ]
60
+ # ]
61
+ #
62
+ # This makes it a lot easier to join things with commas, and ends up resulting
63
+ # in a much flatter `prettier` tree once it has been converted. Note that
64
+ # because of this module some extra node types are added (the aggregate of
65
+ # the previous `*_add` nodes) and some nodes now have arrays in places where
66
+ # they previously had single nodes.
67
+ prepend(
68
+ Module.new do
69
+ events = %i[
70
+ args
71
+ mlhs
72
+ mrhs
73
+ qsymbols
74
+ qwords
75
+ regexp
76
+ stmts
77
+ string
78
+ symbols
79
+ words
80
+ xstring
81
+ ]
82
+
83
+ private
84
+
85
+ events.each do |event|
86
+ suffix = event == :string ? 'content' : 'new'
87
+
88
+ define_method(:"on_#{event}_#{suffix}") do
89
+ { type: event, body: [], start: lineno, end: lineno }
90
+ end
91
+
92
+ define_method(:"on_#{event}_add") do |parts, part|
93
+ parts.tap do |node|
94
+ node[:body] << part
95
+ node[:end] = lineno
96
+ end
45
97
  end
46
98
  end
47
99
  end
48
- end
100
+ )
49
101
 
50
102
  # For most nodes, it's enough to look at the child nodes to determine the
51
103
  # start of the parent node. However, for some nodes it's necessary to keep
52
104
  # track of the keywords as they come in from the lexer and to modify the start
53
- # node once we have it.
54
- module StartLine
55
- events = %i[begin else elsif ensure rescue until while]
105
+ # node once we have it. We need accurate start and end lines so that we can
106
+ # embed block comments into the right kind of node.
107
+ prepend(
108
+ Module.new do
109
+ events = %i[begin else elsif ensure if rescue until while]
56
110
 
57
- def initialize(*args)
58
- super(*args)
59
- @keywords = []
60
- end
111
+ def initialize(*args)
112
+ super(*args)
113
+ @keywords = []
114
+ end
61
115
 
62
- def self.prepended(base)
63
- base.attr_reader :keywords
64
- end
116
+ def self.prepended(base)
117
+ base.attr_reader :keywords
118
+ end
65
119
 
66
- private
120
+ private
67
121
 
68
- def find_start(body)
69
- keywords[keywords.rindex { |keyword| keyword[:body] == body }][:start]
70
- end
122
+ def find_start(body)
123
+ keywords[keywords.rindex { |keyword| keyword[:body] == body }][:start]
124
+ end
71
125
 
72
- events.each do |event|
73
- keyword = event.to_s
126
+ events.each do |event|
127
+ keyword = event.to_s
74
128
 
75
- define_method(:"on_#{event}") do |*body|
76
- super(*body).tap { |sexp| sexp.merge!(start: find_start(keyword)) }
129
+ define_method(:"on_#{event}") do |*body|
130
+ super(*body).tap { |sexp| sexp.merge!(start: find_start(keyword)) }
131
+ end
77
132
  end
78
- end
79
133
 
80
- def on_kw(body)
81
- super(body).tap { |sexp| keywords << sexp }
82
- end
134
+ def on_kw(body)
135
+ super(body).tap { |sexp| keywords << sexp }
136
+ end
83
137
 
84
- def on_program(*body)
85
- super(*body).tap { |sexp| sexp.merge!(start: 1) }
138
+ def on_program(*body)
139
+ super(*body).tap { |sexp| sexp.merge!(start: 1) }
140
+ end
86
141
  end
87
- end
142
+ )
88
143
 
89
- # Nodes that are always on their own line occur when the lexer is in the
90
- # EXPR_BEG node. Those comments are tracked within the @block_comments
91
- # instance variable. Then for each node that could contain them, we attach
92
- # them after the node has been built.
93
- module BlockComments
94
- events = {
95
- begin: [0, :body, 0],
96
- bodystmt: [0],
97
- class: [2, :body, 0],
98
- def: [2, :body, 0],
99
- defs: [4, :body, 0],
100
- else: [0],
101
- elsif: [1],
102
- ensure: [0],
103
- if: [1],
104
- program: [0],
105
- rescue: [2],
106
- sclass: [1, :body, 0],
107
- unless: [1],
108
- until: [1],
109
- when: [1],
110
- while: [1]
111
- }
112
-
113
- def initialize(*args)
114
- super(*args)
115
- @block_comments = []
116
- @current_embdoc = nil
117
- end
144
+ # This layer keeps track of inline comments as they come in. Ripper itself
145
+ # doesn't attach comments to the AST, so we need to do it manually. In this
146
+ # case, inline comments are defined as any comments wherein the lexer state is
147
+ # not equal to EXPR_BEG (tracked in the BlockComments layer).
148
+ prepend(
149
+ Module.new do
150
+ # Certain events needs to steal the comments from their children in order
151
+ # for them to display properly.
152
+ events = {
153
+ aref: [:body, 1],
154
+ args_add_block: [:body, 0],
155
+ break: [:body, 0],
156
+ command: [:body, 1],
157
+ command_call: [:body, 3],
158
+ regexp_literal: [:body, 0],
159
+ string_literal: [:body, 0],
160
+ symbol_literal: [:body, 0]
161
+ }
118
162
 
119
- def self.prepended(base)
120
- base.attr_reader :block_comments, :current_embdoc
121
- end
163
+ def initialize(*args)
164
+ super(*args)
165
+ @inline_comments = []
166
+ @last_sexp = nil
167
+ end
168
+
169
+ def self.prepended(base)
170
+ base.attr_reader :inline_comments, :last_sexp
171
+ end
172
+
173
+ private
122
174
 
123
- private
175
+ events.each do |event, path|
176
+ define_method(:"on_#{event}") do |*body|
177
+ @last_sexp =
178
+ super(*body).tap do |sexp|
179
+ comments = (sexp.dig(*path) || {}).delete(:comments)
180
+ sexp.merge!(comments: comments) if comments
181
+ end
182
+ end
183
+ end
124
184
 
125
- def attach_comments(sexp, stmts)
126
- range = sexp[:start]..sexp[:end]
127
- comments =
128
- block_comments.group_by { |comment| range.include?(comment[:start]) }
185
+ SPECIAL_LITERALS = %i[qsymbols qwords symbols words].freeze
129
186
 
130
- if comments[true]
131
- stmts[:body] =
132
- (stmts[:body] + comments[true]).sort_by { |node| node[:start] }
187
+ # Special array literals are handled in different ways and so their
188
+ # comments need to be passed up to their parent array node.
189
+ def on_array(*body)
190
+ @last_sexp =
191
+ super(*body).tap do |sexp|
192
+ next unless SPECIAL_LITERALS.include?(body.dig(0, :type))
133
193
 
134
- @block_comments = comments.fetch(false) { [] }
194
+ comments = sexp.dig(:body, 0).delete(:comments)
195
+ sexp.merge!(comments: comments) if comments
196
+ end
135
197
  end
136
- end
137
198
 
138
- events.each do |event, path|
139
- define_method(:"on_#{event}") do |*body|
140
- super(*body).tap { |sexp| attach_comments(sexp, body.dig(*path)) }
199
+ # Handling this specially because we want to pull the comments out of both
200
+ # child nodes.
201
+ def on_assoc_new(*body)
202
+ @last_sexp =
203
+ super(*body).tap do |sexp|
204
+ comments =
205
+ (sexp.dig(:body, 0).delete(:comments) || []) +
206
+ (sexp.dig(:body, 1).delete(:comments) || [])
207
+
208
+ sexp.merge!(comments: comments) if comments.any?
209
+ end
141
210
  end
142
- end
143
211
 
144
- def on_comment(body)
145
- super(body).tap do |sexp|
146
- block_comments << sexp if RipperJS.lex_state_name(state) == 'EXPR_BEG'
212
+ # Most scanner events don't stand on their own as s-expressions, but the
213
+ # CHAR scanner event is effectively just a string, so we need to track it
214
+ # as a s-expression.
215
+ def on_CHAR(body)
216
+ @last_sexp = super(body)
147
217
  end
148
- end
149
218
 
150
- def on_embdoc_beg(comment)
151
- @current_embdoc = {
152
- type: :embdoc, body: comment, start: lineno, end: lineno
153
- }
154
- end
219
+ # We need to know exactly where the comment is, switching off the current
220
+ # lexer state. In Ruby 2.7.0-dev, that's defined as:
221
+ #
222
+ # enum lex_state_bits {
223
+ # EXPR_BEG_bit, /* ignore newline, +/- is a sign. */
224
+ # EXPR_END_bit, /* newline significant, +/- is an operator. */
225
+ # EXPR_ENDARG_bit, /* ditto, and unbound braces. */
226
+ # EXPR_ENDFN_bit, /* ditto, and unbound braces. */
227
+ # EXPR_ARG_bit, /* newline significant, +/- is an operator. */
228
+ # EXPR_CMDARG_bit, /* newline significant, +/- is an operator. */
229
+ # EXPR_MID_bit, /* newline significant, +/- is an operator. */
230
+ # EXPR_FNAME_bit, /* ignore newline, no reserved words. */
231
+ # EXPR_DOT_bit, /* right after `.' or `::', no reserved words. */
232
+ # EXPR_CLASS_bit, /* immediate after `class', no here document. */
233
+ # EXPR_LABEL_bit, /* flag bit, label is allowed. */
234
+ # EXPR_LABELED_bit, /* flag bit, just after a label. */
235
+ # EXPR_FITEM_bit, /* symbol literal as FNAME. */
236
+ # EXPR_MAX_STATE
237
+ # };
238
+ def on_comment(body)
239
+ sexp = { type: :@comment, body: body.chomp, start: lineno, end: lineno }
240
+
241
+ case RipperJS.lex_state_name(state)
242
+ when 'EXPR_END', 'EXPR_ARG|EXPR_LABELED', 'EXPR_ENDFN'
243
+ last_sexp.merge!(comments: [sexp])
244
+ when 'EXPR_CMDARG', 'EXPR_END|EXPR_ENDARG', 'EXPR_ENDARG', 'EXPR_ARG',
245
+ 'EXPR_FNAME|EXPR_FITEM', 'EXPR_CLASS', 'EXPR_END|EXPR_LABEL'
246
+ inline_comments << sexp
247
+ when 'EXPR_BEG|EXPR_LABEL', 'EXPR_MID'
248
+ inline_comments << sexp.merge!(break: true)
249
+ when 'EXPR_DOT'
250
+ last_sexp.merge!(comments: [sexp.merge!(break: true)])
251
+ end
155
252
 
156
- def on_embdoc(comment)
157
- @current_embdoc[:body] << comment
158
- end
253
+ sexp
254
+ end
159
255
 
160
- def on_embdoc_end(comment)
161
- @current_embdoc[:body] << comment.chomp
162
- @block_comments << @current_embdoc
163
- @current_embdoc = nil
164
- end
256
+ defined = private_instance_methods(false).grep(/\Aon_/) { $'.to_sym }
165
257
 
166
- def on_method_add_block(*body)
167
- super(*body).tap do |sexp|
168
- stmts = body[1][:body][1]
169
- stmts = stmts[:type] == :stmts ? stmts : body[1][:body][1][:body][0]
258
+ (Ripper::PARSER_EVENTS - defined).each do |event|
259
+ define_method(:"on_#{event}") do |*body|
260
+ super(*body).tap do |sexp|
261
+ @last_sexp = sexp
262
+ next if inline_comments.empty?
170
263
 
171
- attach_comments(sexp, stmts)
264
+ sexp[:comments] = inline_comments.reverse
265
+ @inline_comments = []
266
+ end
267
+ end
172
268
  end
173
269
  end
174
- end
270
+ )
175
271
 
176
- # Tracking heredocs in somewhat interesting. Straight-line heredocs are
177
- # reported as strings, whereas squiggly-line heredocs are reported as
178
- # heredocs.
179
- module Heredocs
180
- def initialize(*args)
181
- super(*args)
182
- @heredoc_stack = []
183
- end
272
+ # Nodes that are always on their own line occur when the lexer is in the
273
+ # EXPR_BEG state. Those comments are tracked within the @block_comments
274
+ # instance variable. Then for each node that could contain them, we attach
275
+ # them after the node has been built.
276
+ prepend(
277
+ Module.new do
278
+ events = {
279
+ begin: [0, :body, 0],
280
+ bodystmt: [0],
281
+ class: [2, :body, 0],
282
+ def: [2, :body, 0],
283
+ defs: [4, :body, 0],
284
+ else: [0],
285
+ elsif: [1],
286
+ ensure: [0],
287
+ if: [1],
288
+ program: [0],
289
+ rescue: [2],
290
+ sclass: [1, :body, 0],
291
+ unless: [1],
292
+ until: [1],
293
+ when: [1],
294
+ while: [1]
295
+ }
184
296
 
185
- def self.prepended(base)
186
- base.attr_reader :heredoc_stack
187
- end
297
+ def initialize(*args)
298
+ super(*args)
299
+ @block_comments = []
300
+ @current_embdoc = nil
301
+ end
188
302
 
189
- private
303
+ def self.prepended(base)
304
+ base.attr_reader :block_comments, :current_embdoc
305
+ end
190
306
 
191
- def on_embexpr_beg(body)
192
- super(body).tap { |sexp| heredoc_stack << sexp }
193
- end
307
+ private
194
308
 
195
- def on_embexpr_end(body)
196
- super(body).tap { heredoc_stack.pop }
197
- end
309
+ def attach_comments(sexp, stmts)
310
+ range = sexp[:start]..sexp[:end]
311
+ comments =
312
+ block_comments.group_by { |comment| range.include?(comment[:start]) }
198
313
 
199
- def on_heredoc_beg(beging)
200
- heredoc = { type: :heredoc, beging: beging, start: lineno, end: lineno }
201
- heredoc_stack << heredoc
202
- end
314
+ if comments[true]
315
+ stmts[:body] =
316
+ (stmts[:body] + comments[true]).sort_by { |node| node[:start] }
203
317
 
204
- def on_heredoc_end(ending)
205
- heredoc_stack[-1].merge!(ending: ending.chomp, end: lineno)
206
- end
318
+ @block_comments = comments.fetch(false) { [] }
319
+ end
320
+ end
207
321
 
208
- def on_heredoc_dedent(string, _width)
209
- heredoc = heredoc_stack.pop
210
- string.merge!(heredoc.slice(:type, :beging, :ending, :start, :end))
211
- end
322
+ events.each do |event, path|
323
+ define_method(:"on_#{event}") do |*body|
324
+ super(*body).tap { |sexp| attach_comments(sexp, body.dig(*path)) }
325
+ end
326
+ end
212
327
 
213
- def on_string_literal(string)
214
- heredoc = heredoc_stack[-1]
328
+ def on_comment(body)
329
+ super(body).tap do |sexp|
330
+ block_comments << sexp if RipperJS.lex_state_name(state) == 'EXPR_BEG'
331
+ end
332
+ end
215
333
 
216
- if heredoc && string[:type] != :heredoc && heredoc[:type] == :heredoc
217
- heredoc_stack.pop
218
- string.merge!(heredoc.slice(:type, :beging, :ending, :start, :end))
219
- else
220
- super
334
+ def on_embdoc_beg(comment)
335
+ @current_embdoc = {
336
+ type: :embdoc, body: comment, start: lineno, end: lineno
337
+ }
221
338
  end
222
- end
223
- end
224
339
 
225
- # These are the event types that contain _actual_ string content. If there is
226
- # an encoding magic comment at the top of the file, ripper will actually
227
- # change into that encoding for the storage of the string. This will break
228
- # everything, so we need to force the encoding back into UTF-8 so that
229
- # the JSON library won't break.
230
- module Encoding
231
- events = %w[comment ident tstring_content]
340
+ def on_embdoc(comment)
341
+ @current_embdoc[:body] << comment
342
+ end
232
343
 
233
- events.each do |event|
234
- define_method(:"on_#{event}") do |body|
235
- super(body.force_encoding('UTF-8'))
344
+ def on_embdoc_end(comment)
345
+ @current_embdoc[:body] << comment.chomp
346
+ @block_comments << @current_embdoc
347
+ @current_embdoc = nil
236
348
  end
237
- end
238
- end
239
349
 
240
- # This layer keeps track of inline comments as they come in. Ripper itself
241
- # doesn't attach comments to the AST, so we need to do it manually. In this
242
- # case, inline comments are defined as any comments wherein the lexer state is
243
- # not equal to EXPR_BEG (tracked in the BlockComments layer).
244
- module InlineComments
245
- # Certain events needs to steal the comments from their children in order
246
- # for them to display properly.
247
- events = {
248
- args_add_block: [:body, 0],
249
- break: [:body, 0],
250
- command: [:body, 1],
251
- command_call: [:body, 3],
252
- regexp_literal: [:body, 0],
253
- string_literal: [:body, 0],
254
- symbol_literal: [:body, 0]
255
- }
256
-
257
- def initialize(*args)
258
- super(*args)
259
- @inline_comments = []
260
- @last_sexp = nil
261
- end
350
+ def on_method_add_block(*body)
351
+ super(*body).tap do |sexp|
352
+ stmts = body[1][:body][1]
353
+ stmts = stmts[:type] == :stmts ? stmts : body[1][:body][1][:body][0]
262
354
 
263
- def self.prepended(base)
264
- base.attr_reader :inline_comments, :last_sexp
355
+ attach_comments(sexp, stmts)
356
+ end
357
+ end
265
358
  end
359
+ )
266
360
 
267
- private
361
+ # Tracking heredocs in somewhat interesting. Straight-line heredocs are
362
+ # reported as strings, whereas squiggly-line heredocs are reported as
363
+ # heredocs. We track the start and matching end of the heredoc as "beging" and
364
+ # "ending" respectively.
365
+ prepend(
366
+ Module.new do
367
+ def initialize(*args)
368
+ super(*args)
369
+ @heredoc_stack = []
370
+ end
268
371
 
269
- events.each do |event, path|
270
- define_method(:"on_#{event}") do |*body|
271
- @last_sexp =
272
- super(*body).tap do |sexp|
273
- comments = (sexp.dig(*path) || {}).delete(:comments)
274
- sexp.merge!(comments: comments) if comments
275
- end
372
+ def self.prepended(base)
373
+ base.attr_reader :heredoc_stack
276
374
  end
277
- end
278
375
 
279
- SPECIAL_LITERALS = %i[qsymbols qwords symbols words].freeze
376
+ private
280
377
 
281
- # Special array literals are handled in different ways and so their comments
282
- # need to be passed up to their parent array node.
283
- def on_array(*body)
284
- @last_sexp =
285
- super(*body).tap do |sexp|
286
- next unless SPECIAL_LITERALS.include?(body.dig(0, :type))
378
+ def on_embexpr_beg(body)
379
+ super(body).tap { |sexp| heredoc_stack << sexp }
380
+ end
287
381
 
288
- comments = sexp.dig(:body, 0).delete(:comments)
289
- sexp.merge!(comments: comments) if comments
290
- end
291
- end
382
+ def on_embexpr_end(body)
383
+ super(body).tap { heredoc_stack.pop }
384
+ end
292
385
 
293
- # Handling this specially because we want to pull the comments out of both
294
- # child nodes.
295
- def on_assoc_new(*body)
296
- @last_sexp =
297
- super(*body).tap do |sexp|
298
- comments =
299
- (sexp.dig(:body, 0).delete(:comments) || []) +
300
- (sexp.dig(:body, 1).delete(:comments) || [])
386
+ def on_heredoc_beg(beging)
387
+ heredoc = { type: :heredoc, beging: beging, start: lineno, end: lineno }
388
+ heredoc_stack << heredoc
389
+ end
301
390
 
302
- sexp.merge!(comments: comments) if comments.any?
303
- end
304
- end
391
+ def on_heredoc_end(ending)
392
+ heredoc_stack[-1].merge!(ending: ending.chomp, end: lineno)
393
+ end
305
394
 
306
- # Most scanner events don't stand on their own a s-expressions, but the CHAR
307
- # scanner event is effectively just a string, so we need to track it as a
308
- # s-expression.
309
- def on_CHAR(body)
310
- @last_sexp = super(body)
311
- end
395
+ def on_heredoc_dedent(string, _width)
396
+ heredoc = heredoc_stack.pop
397
+ string.merge!(heredoc.slice(:type, :beging, :ending, :start, :end))
398
+ end
312
399
 
313
- # We need to know exactly where the comment is, switching off the current
314
- # lexer state. In Ruby 2.7.0-dev, that's defined as:
315
- #
316
- # enum lex_state_bits {
317
- # EXPR_BEG_bit, /* ignore newline, +/- is a sign. */
318
- # EXPR_END_bit, /* newline significant, +/- is an operator. */
319
- # EXPR_ENDARG_bit, /* ditto, and unbound braces. */
320
- # EXPR_ENDFN_bit, /* ditto, and unbound braces. */
321
- # EXPR_ARG_bit, /* newline significant, +/- is an operator. */
322
- # EXPR_CMDARG_bit, /* newline significant, +/- is an operator. */
323
- # EXPR_MID_bit, /* newline significant, +/- is an operator. */
324
- # EXPR_FNAME_bit, /* ignore newline, no reserved words. */
325
- # EXPR_DOT_bit, /* right after `.' or `::', no reserved words. */
326
- # EXPR_CLASS_bit, /* immediate after `class', no here document. */
327
- # EXPR_LABEL_bit, /* flag bit, label is allowed. */
328
- # EXPR_LABELED_bit, /* flag bit, just after a label. */
329
- # EXPR_FITEM_bit, /* symbol literal as FNAME. */
330
- # EXPR_MAX_STATE
331
- # };
332
- def on_comment(body)
333
- sexp = { type: :@comment, body: body.chomp, start: lineno, end: lineno }
334
-
335
- case RipperJS.lex_state_name(state)
336
- when 'EXPR_END', 'EXPR_ARG|EXPR_LABELED', 'EXPR_ENDFN'
337
- last_sexp.merge!(comments: [sexp])
338
- when 'EXPR_CMDARG', 'EXPR_END|EXPR_ENDARG', 'EXPR_ENDARG', 'EXPR_ARG',
339
- 'EXPR_FNAME|EXPR_FITEM', 'EXPR_CLASS', 'EXPR_END|EXPR_LABEL'
340
- inline_comments << sexp
341
- when 'EXPR_BEG|EXPR_LABEL', 'EXPR_MID'
342
- inline_comments << sexp.merge!(break: true)
343
- when 'EXPR_DOT'
344
- last_sexp.merge!(comments: [sexp.merge!(break: true)])
345
- end
346
-
347
- sexp
348
- end
400
+ def on_string_literal(string)
401
+ heredoc = heredoc_stack[-1]
349
402
 
350
- defined_events = private_instance_methods(false).grep(/\Aon_/) { $'.to_sym }
403
+ if heredoc && string[:type] != :heredoc && heredoc[:type] == :heredoc
404
+ heredoc_stack.pop
405
+ string.merge!(heredoc.slice(:type, :beging, :ending, :start, :end))
406
+ else
407
+ super
408
+ end
409
+ end
410
+ end
411
+ )
351
412
 
352
- (Ripper::PARSER_EVENTS - defined_events).each do |event|
353
- define_method(:"on_#{event}") do |*body|
354
- super(*body).tap do |sexp|
355
- @last_sexp = sexp
356
- next if inline_comments.empty?
413
+ # These are the event types that contain _actual_ string content. If there is
414
+ # an encoding magic comment at the top of the file, ripper will actually
415
+ # change into that encoding for the storage of the string. This will break
416
+ # everything, so we need to force the encoding back into UTF-8 so that
417
+ # the JSON library won't break.
418
+ prepend(
419
+ Module.new do
420
+ private
357
421
 
358
- sexp[:comments] = inline_comments.reverse
359
- @inline_comments = []
422
+ %w[comment ident tstring_content].each do |event|
423
+ define_method(:"on_#{event}") do |body|
424
+ super(body.force_encoding('UTF-8'))
360
425
  end
361
426
  end
362
427
  end
363
- end
428
+ )
364
429
 
365
430
  # Handles __END__ syntax, which allows individual scripts to keep content
366
- # after the main ruby code that can be read through DATA.
367
- module Ending
368
- def initialize(source, *args)
369
- super(source, *args)
370
- @source = source
371
- @ending = nil
372
- end
431
+ # after the main ruby code that can be read through DATA. Which looks like:
432
+ #
433
+ # foo.bar
434
+ #
435
+ # __END__
436
+ # some other content that isn't read by ripper normally
437
+ prepend(
438
+ Module.new do
439
+ def initialize(source, *args)
440
+ super(source, *args)
441
+ @source = source
442
+ @ending = nil
443
+ end
373
444
 
374
- def self.prepended(base)
375
- base.attr_reader :source, :ending
376
- end
445
+ def self.prepended(base)
446
+ base.attr_reader :source, :ending
447
+ end
377
448
 
378
- private
449
+ private
379
450
 
380
- def on___end__(body)
381
- @ending = super(source.split("\n")[lineno..-1].join("\n"))
382
- end
451
+ def on___end__(body)
452
+ @ending = super(source.split("\n")[lineno..-1].join("\n"))
453
+ end
383
454
 
384
- def on_program(*body)
385
- super(*body).tap { |sexp| sexp[:body][0][:body] << ending if ending }
455
+ def on_program(*body)
456
+ super(*body).tap { |sexp| sexp[:body][0][:body] << ending if ending }
457
+ end
386
458
  end
387
- end
459
+ )
388
460
 
389
- # Adds the used quote type onto string nodes.
390
- module Strings
391
- private
461
+ # Adds the used quote type onto string nodes. This is necessary because we're
462
+ # going to have to stick to whatever quote the user chose if there are escape
463
+ # sequences within the string. For example, if you have '\n' we can't switch
464
+ # to double quotes without changing what it means.
465
+ prepend(
466
+ Module.new do
467
+ private
392
468
 
393
- def on_tstring_end(quote)
394
- last_sexp.merge!(quote: quote)
395
- end
469
+ def on_tstring_end(quote)
470
+ last_sexp.merge!(quote: quote)
471
+ end
396
472
 
397
- def on_label_end(quote)
398
- last_sexp.merge!(quote: quote[0]) # quote is ": or ':
473
+ def on_label_end(quote)
474
+ last_sexp.merge!(quote: quote[0]) # quote is ": or ':
475
+ end
399
476
  end
400
- end
477
+ )
401
478
 
402
479
  # Normally access controls are reported as vcall nodes. This module creates a
403
- # new node type to explicitly track those nodes instead.
404
- module AccessControls
405
- def initialize(source, *args)
406
- super(source, *args)
407
- @lines = source.split("\n")
408
- end
409
-
410
- def self.prepended(base)
411
- base.attr_reader :lines
412
- end
413
-
414
- private
480
+ # new node type to explicitly track those nodes instead, so that the printer
481
+ # can add new lines as necessary.
482
+ prepend(
483
+ Module.new do
484
+ KEYWORDS = %w[private protected public].freeze
415
485
 
416
- def on_vcall(ident)
417
- super(ident).tap do |sexp|
418
- if !%w[private protected public].include?(ident[:body]) ||
419
- ident[:body] != lines[lineno - 1].strip
420
- next
421
- end
486
+ def initialize(source, *args)
487
+ super(source, *args)
488
+ @lines = source.split("\n")
489
+ end
422
490
 
423
- sexp.merge!(type: :access_ctrl)
491
+ def self.prepended(base)
492
+ base.attr_reader :lines
424
493
  end
425
- end
426
- end
427
- end
428
494
 
429
- class RipperJS < Ripper
430
- private
495
+ private
431
496
 
432
- SCANNER_EVENTS.each do |event|
433
- define_method(:"on_#{event}") do |body|
434
- { type: :"@#{event}", body: body, start: lineno, end: lineno }
435
- end
436
- end
497
+ def on_vcall(ident)
498
+ super(ident).tap do |sexp|
499
+ if !KEYWORDS.include?(ident[:body]) ||
500
+ ident[:body] != lines[lineno - 1].strip
501
+ next
502
+ end
437
503
 
438
- PARSER_EVENTS.each do |event|
439
- define_method(:"on_#{event}") do |*body|
440
- min = body.map { |part| part.is_a?(Hash) ? part[:start] : lineno }.min
441
- { type: event, body: body, start: min || lineno, end: lineno }
504
+ sexp.merge!(type: :access_ctrl)
505
+ end
506
+ end
442
507
  end
443
- end
444
-
445
- prepend Layer::Lists
446
- prepend Layer::StartLine
447
- prepend Layer::InlineComments
448
- prepend Layer::BlockComments
449
- prepend Layer::Heredocs
450
- prepend Layer::Encoding
451
- prepend Layer::Ending
452
- prepend Layer::Strings
453
- prepend Layer::AccessControls
508
+ )
454
509
 
455
510
  # When the only statement inside of a `def` node is a `begin` node, then you
456
511
  # can safely replace the body of the `def` with the body of the `begin`. For
@@ -529,6 +584,10 @@ class RipperJS < Ripper
529
584
  )
530
585
  end
531
586
 
587
+ # If this is the main file we're executing, then most likely this is being
588
+ # executed from the parse.js spawn. In that case, read the ruby source from
589
+ # stdin and report back the AST over stdout.
590
+
532
591
  if $0 == __FILE__
533
592
  builder = RipperJS.new($stdin.read)
534
593
  response = builder.parse