prettier 0.12.2 → 0.12.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,105 @@
1
+ const {
2
+ concat,
3
+ dedent,
4
+ group,
5
+ hardline,
6
+ indent,
7
+ join,
8
+ line,
9
+ literalline,
10
+ markAsRoot,
11
+ softline,
12
+ trim
13
+ } = require("../prettier");
14
+
15
+ module.exports = {
16
+ "@__end__": (path, _opts, _print) => {
17
+ const { body } = path.getValue();
18
+ return concat([trim, "__END__", literalline, body]);
19
+ },
20
+ bodystmt: (path, opts, print) => {
21
+ const [_statements, rescue, elseClause, ensure] = path.getValue().body;
22
+ const parts = [path.call(print, "body", 0)];
23
+
24
+ if (rescue) {
25
+ parts.push(dedent(concat([hardline, path.call(print, "body", 1)])));
26
+ }
27
+
28
+ if (elseClause) {
29
+ // Before Ruby 2.6, this piece of bodystmt was an explicit "else" node
30
+ const stmts =
31
+ elseClause.type === "else"
32
+ ? path.call(print, "body", 2, "body", 0)
33
+ : path.call(print, "body", 2);
34
+
35
+ parts.push(concat([dedent(concat([hardline, "else"])), hardline, stmts]));
36
+ }
37
+
38
+ if (ensure) {
39
+ parts.push(dedent(concat([hardline, path.call(print, "body", 3)])));
40
+ }
41
+
42
+ return group(concat(parts));
43
+ },
44
+ embdoc: (path, _opts, _print) => concat([trim, path.getValue().body]),
45
+ paren: (path, opts, print) => {
46
+ if (!path.getValue().body[0]) {
47
+ return "()";
48
+ }
49
+
50
+ let content = path.call(print, "body", 0);
51
+
52
+ if (
53
+ ["args", "args_add_star", "args_add_block"].includes(
54
+ path.getValue().body[0].type
55
+ )
56
+ ) {
57
+ content = join(concat([",", line]), content);
58
+ }
59
+
60
+ return group(
61
+ concat([
62
+ "(",
63
+ indent(concat([softline, content])),
64
+ concat([softline, ")"])
65
+ ])
66
+ );
67
+ },
68
+ program: (path, opts, print) =>
69
+ markAsRoot(
70
+ concat([join(literalline, path.map(print, "body")), literalline])
71
+ ),
72
+ stmts: (path, opts, print) => {
73
+ const stmts = path.getValue().body;
74
+ const parts = [];
75
+ let lineNo = null;
76
+
77
+ stmts.forEach((stmt, index) => {
78
+ if (stmt.type === "void_stmt") {
79
+ return;
80
+ }
81
+
82
+ const printed = path.call(print, "body", index);
83
+
84
+ if (lineNo === null) {
85
+ parts.push(printed);
86
+ } else if (
87
+ stmt.start - lineNo > 1 ||
88
+ [stmt.type, stmts[index - 1].type].includes("access_ctrl")
89
+ ) {
90
+ parts.push(hardline, hardline, printed);
91
+ } else if (
92
+ stmt.start !== lineNo ||
93
+ path.getParentNode().type !== "string_embexpr"
94
+ ) {
95
+ parts.push(hardline, printed);
96
+ } else {
97
+ parts.push("; ", printed);
98
+ }
99
+
100
+ lineNo = stmt.end;
101
+ });
102
+
103
+ return concat(parts);
104
+ }
105
+ };
data/src/nodes/strings.js CHANGED
@@ -6,8 +6,8 @@ const {
6
6
  join,
7
7
  literalline,
8
8
  softline
9
- } = require("../builders");
10
- const { concatBody, empty, makeList, surround } = require("../utils");
9
+ } = require("../prettier");
10
+ const { concatBody, empty, makeList, prefix, surround } = require("../utils");
11
11
  const escapePattern = require("../escapePattern");
12
12
 
13
13
  // If there is some part of this string that matches an escape sequence or that
@@ -72,6 +72,11 @@ module.exports = {
72
72
  const quote = preferSingleQuotes ? "'" : '"';
73
73
  return body.length === 2 ? concat([quote, body.slice(1), quote]) : body;
74
74
  },
75
+ dyna_symbol: (path, opts, print) => {
76
+ const { quote } = path.getValue().body[0];
77
+
78
+ return concat([":", quote, concat(path.call(print, "body", 0)), quote]);
79
+ },
75
80
  heredoc: (path, opts, print) => {
76
81
  const { beging, ending } = path.getValue();
77
82
 
@@ -129,6 +134,8 @@ module.exports = {
129
134
 
130
135
  return concat([quote].concat(parts).concat([quote]));
131
136
  },
137
+ symbol: prefix(":"),
138
+ symbol_literal: concatBody,
132
139
  word_add: concatBody,
133
140
  word_new: empty,
134
141
  xstring: makeList,
@@ -4,6 +4,13 @@
4
4
  const source = process.env.RBPRETTIER ? "../node_modules/prettier" : "prettier";
5
5
 
6
6
  // eslint-disable-next-line import/no-dynamic-require
7
- const { builders, utils } = require(source).doc;
7
+ const prettier = require(source);
8
8
 
9
- module.exports = Object.assign({}, builders, utils);
9
+ // Just combine all the things into one big object so that we can import
10
+ // whatever we need from prettier without having to dive too deeply.
11
+ module.exports = Object.assign(
12
+ {},
13
+ prettier.doc.builders,
14
+ prettier.doc.utils,
15
+ prettier.util
16
+ );
data/src/ripper.rb CHANGED
@@ -9,448 +9,503 @@ end
9
9
  require 'json' unless defined?(JSON)
10
10
  require 'ripper'
11
11
 
12
- module Layer
12
+ class RipperJS < Ripper
13
+ private
14
+
15
+ # Scanner events occur when the lexer hits a new token, like a keyword or an
16
+ # end. These nodes always contain just one argument which is a string
17
+ # representing the content. For the most part these can just be printed
18
+ # directly, which very few exceptions.
19
+ SCANNER_EVENTS.each do |event|
20
+ define_method(:"on_#{event}") do |body|
21
+ { type: :"@#{event}", body: body, start: lineno, end: lineno }
22
+ end
23
+ end
24
+
25
+ # Parser events represent nodes in the ripper abstract syntax tree. The event
26
+ # is reported after the children of the node have already been built.
27
+ PARSER_EVENTS.each do |event|
28
+ define_method(:"on_#{event}") do |*body|
29
+ min = body.map { |part| part.is_a?(Hash) ? part[:start] : lineno }.min
30
+ { type: event, body: body, start: min || lineno, end: lineno }
31
+ end
32
+ end
33
+
13
34
  # Some nodes are lists that come back from the parser. They always start with
14
- # a *_new node (or in the case of string, *_content) and each additional node
15
- # in the list is a *_add node. This layer takes those nodes and turns them
16
- # into one node with an array body.
17
- module Lists
18
- events = %i[
19
- args
20
- mlhs
21
- mrhs
22
- qsymbols
23
- qwords
24
- regexp
25
- stmts
26
- string
27
- symbols
28
- words
29
- xstring
30
- ]
31
-
32
- private
33
-
34
- events.each do |event|
35
- suffix = event == :string ? 'content' : 'new'
36
-
37
- define_method(:"on_#{event}_#{suffix}") do
38
- { type: event, body: [], start: lineno, end: lineno }
39
- end
40
-
41
- define_method(:"on_#{event}_add") do |parts, part|
42
- parts.tap do |node|
43
- node[:body] << part
44
- node[:end] = lineno
35
+ # a `*_new` node (or in the case of string, `*_content`) and each additional
36
+ # node in the list is a `*_add` node. This module takes those nodes and turns
37
+ # them into one node with an array body.
38
+ #
39
+ # For example, the statement `[a, b, c]` would be parsed as:
40
+ #
41
+ # [:args_add,
42
+ # [:args_add,
43
+ # [:args_add,
44
+ # [:args_new],
45
+ # [:vcall, [:@ident, "a", [1, 1]]]
46
+ # ],
47
+ # [:vcall, [:@ident, "b", [1, 4]]]
48
+ # ],
49
+ # [:vcall, [:@ident, "c", [1, 7]]]
50
+ # ]
51
+ #
52
+ # But after this module is applied that is instead parsed as:
53
+ #
54
+ # [:args,
55
+ # [
56
+ # [:vcall, [:@ident, "a", [1, 1]]],
57
+ # [:vcall, [:@ident, "b", [1, 4]]],
58
+ # [:vcall, [:@ident, "c", [1, 7]]]
59
+ # ]
60
+ # ]
61
+ #
62
+ # This makes it a lot easier to join things with commas, and ends up resulting
63
+ # in a much flatter `prettier` tree once it has been converted. Note that
64
+ # because of this module some extra node types are added (the aggregate of
65
+ # the previous `*_add` nodes) and some nodes now have arrays in places where
66
+ # they previously had single nodes.
67
+ prepend(
68
+ Module.new do
69
+ events = %i[
70
+ args
71
+ mlhs
72
+ mrhs
73
+ qsymbols
74
+ qwords
75
+ regexp
76
+ stmts
77
+ string
78
+ symbols
79
+ words
80
+ xstring
81
+ ]
82
+
83
+ private
84
+
85
+ events.each do |event|
86
+ suffix = event == :string ? 'content' : 'new'
87
+
88
+ define_method(:"on_#{event}_#{suffix}") do
89
+ { type: event, body: [], start: lineno, end: lineno }
90
+ end
91
+
92
+ define_method(:"on_#{event}_add") do |parts, part|
93
+ parts.tap do |node|
94
+ node[:body] << part
95
+ node[:end] = lineno
96
+ end
45
97
  end
46
98
  end
47
99
  end
48
- end
100
+ )
49
101
 
50
102
  # For most nodes, it's enough to look at the child nodes to determine the
51
103
  # start of the parent node. However, for some nodes it's necessary to keep
52
104
  # track of the keywords as they come in from the lexer and to modify the start
53
- # node once we have it.
54
- module StartLine
55
- events = %i[begin else elsif ensure rescue until while]
105
+ # node once we have it. We need accurate start and end lines so that we can
106
+ # embed block comments into the right kind of node.
107
+ prepend(
108
+ Module.new do
109
+ events = %i[begin else elsif ensure if rescue until while]
56
110
 
57
- def initialize(*args)
58
- super(*args)
59
- @keywords = []
60
- end
111
+ def initialize(*args)
112
+ super(*args)
113
+ @keywords = []
114
+ end
61
115
 
62
- def self.prepended(base)
63
- base.attr_reader :keywords
64
- end
116
+ def self.prepended(base)
117
+ base.attr_reader :keywords
118
+ end
65
119
 
66
- private
120
+ private
67
121
 
68
- def find_start(body)
69
- keywords[keywords.rindex { |keyword| keyword[:body] == body }][:start]
70
- end
122
+ def find_start(body)
123
+ keywords[keywords.rindex { |keyword| keyword[:body] == body }][:start]
124
+ end
71
125
 
72
- events.each do |event|
73
- keyword = event.to_s
126
+ events.each do |event|
127
+ keyword = event.to_s
74
128
 
75
- define_method(:"on_#{event}") do |*body|
76
- super(*body).tap { |sexp| sexp.merge!(start: find_start(keyword)) }
129
+ define_method(:"on_#{event}") do |*body|
130
+ super(*body).tap { |sexp| sexp.merge!(start: find_start(keyword)) }
131
+ end
77
132
  end
78
- end
79
133
 
80
- def on_kw(body)
81
- super(body).tap { |sexp| keywords << sexp }
82
- end
134
+ def on_kw(body)
135
+ super(body).tap { |sexp| keywords << sexp }
136
+ end
83
137
 
84
- def on_program(*body)
85
- super(*body).tap { |sexp| sexp.merge!(start: 1) }
138
+ def on_program(*body)
139
+ super(*body).tap { |sexp| sexp.merge!(start: 1) }
140
+ end
86
141
  end
87
- end
142
+ )
88
143
 
89
- # Nodes that are always on their own line occur when the lexer is in the
90
- # EXPR_BEG node. Those comments are tracked within the @block_comments
91
- # instance variable. Then for each node that could contain them, we attach
92
- # them after the node has been built.
93
- module BlockComments
94
- events = {
95
- begin: [0, :body, 0],
96
- bodystmt: [0],
97
- class: [2, :body, 0],
98
- def: [2, :body, 0],
99
- defs: [4, :body, 0],
100
- else: [0],
101
- elsif: [1],
102
- ensure: [0],
103
- if: [1],
104
- program: [0],
105
- rescue: [2],
106
- sclass: [1, :body, 0],
107
- unless: [1],
108
- until: [1],
109
- when: [1],
110
- while: [1]
111
- }
112
-
113
- def initialize(*args)
114
- super(*args)
115
- @block_comments = []
116
- @current_embdoc = nil
117
- end
144
+ # This layer keeps track of inline comments as they come in. Ripper itself
145
+ # doesn't attach comments to the AST, so we need to do it manually. In this
146
+ # case, inline comments are defined as any comments wherein the lexer state is
147
+ # not equal to EXPR_BEG (tracked in the BlockComments layer).
148
+ prepend(
149
+ Module.new do
150
+ # Certain events needs to steal the comments from their children in order
151
+ # for them to display properly.
152
+ events = {
153
+ aref: [:body, 1],
154
+ args_add_block: [:body, 0],
155
+ break: [:body, 0],
156
+ command: [:body, 1],
157
+ command_call: [:body, 3],
158
+ regexp_literal: [:body, 0],
159
+ string_literal: [:body, 0],
160
+ symbol_literal: [:body, 0]
161
+ }
118
162
 
119
- def self.prepended(base)
120
- base.attr_reader :block_comments, :current_embdoc
121
- end
163
+ def initialize(*args)
164
+ super(*args)
165
+ @inline_comments = []
166
+ @last_sexp = nil
167
+ end
168
+
169
+ def self.prepended(base)
170
+ base.attr_reader :inline_comments, :last_sexp
171
+ end
172
+
173
+ private
122
174
 
123
- private
175
+ events.each do |event, path|
176
+ define_method(:"on_#{event}") do |*body|
177
+ @last_sexp =
178
+ super(*body).tap do |sexp|
179
+ comments = (sexp.dig(*path) || {}).delete(:comments)
180
+ sexp.merge!(comments: comments) if comments
181
+ end
182
+ end
183
+ end
124
184
 
125
- def attach_comments(sexp, stmts)
126
- range = sexp[:start]..sexp[:end]
127
- comments =
128
- block_comments.group_by { |comment| range.include?(comment[:start]) }
185
+ SPECIAL_LITERALS = %i[qsymbols qwords symbols words].freeze
129
186
 
130
- if comments[true]
131
- stmts[:body] =
132
- (stmts[:body] + comments[true]).sort_by { |node| node[:start] }
187
+ # Special array literals are handled in different ways and so their
188
+ # comments need to be passed up to their parent array node.
189
+ def on_array(*body)
190
+ @last_sexp =
191
+ super(*body).tap do |sexp|
192
+ next unless SPECIAL_LITERALS.include?(body.dig(0, :type))
133
193
 
134
- @block_comments = comments.fetch(false) { [] }
194
+ comments = sexp.dig(:body, 0).delete(:comments)
195
+ sexp.merge!(comments: comments) if comments
196
+ end
135
197
  end
136
- end
137
198
 
138
- events.each do |event, path|
139
- define_method(:"on_#{event}") do |*body|
140
- super(*body).tap { |sexp| attach_comments(sexp, body.dig(*path)) }
199
+ # Handling this specially because we want to pull the comments out of both
200
+ # child nodes.
201
+ def on_assoc_new(*body)
202
+ @last_sexp =
203
+ super(*body).tap do |sexp|
204
+ comments =
205
+ (sexp.dig(:body, 0).delete(:comments) || []) +
206
+ (sexp.dig(:body, 1).delete(:comments) || [])
207
+
208
+ sexp.merge!(comments: comments) if comments.any?
209
+ end
141
210
  end
142
- end
143
211
 
144
- def on_comment(body)
145
- super(body).tap do |sexp|
146
- block_comments << sexp if RipperJS.lex_state_name(state) == 'EXPR_BEG'
212
+ # Most scanner events don't stand on their own as s-expressions, but the
213
+ # CHAR scanner event is effectively just a string, so we need to track it
214
+ # as a s-expression.
215
+ def on_CHAR(body)
216
+ @last_sexp = super(body)
147
217
  end
148
- end
149
218
 
150
- def on_embdoc_beg(comment)
151
- @current_embdoc = {
152
- type: :embdoc, body: comment, start: lineno, end: lineno
153
- }
154
- end
219
+ # We need to know exactly where the comment is, switching off the current
220
+ # lexer state. In Ruby 2.7.0-dev, that's defined as:
221
+ #
222
+ # enum lex_state_bits {
223
+ # EXPR_BEG_bit, /* ignore newline, +/- is a sign. */
224
+ # EXPR_END_bit, /* newline significant, +/- is an operator. */
225
+ # EXPR_ENDARG_bit, /* ditto, and unbound braces. */
226
+ # EXPR_ENDFN_bit, /* ditto, and unbound braces. */
227
+ # EXPR_ARG_bit, /* newline significant, +/- is an operator. */
228
+ # EXPR_CMDARG_bit, /* newline significant, +/- is an operator. */
229
+ # EXPR_MID_bit, /* newline significant, +/- is an operator. */
230
+ # EXPR_FNAME_bit, /* ignore newline, no reserved words. */
231
+ # EXPR_DOT_bit, /* right after `.' or `::', no reserved words. */
232
+ # EXPR_CLASS_bit, /* immediate after `class', no here document. */
233
+ # EXPR_LABEL_bit, /* flag bit, label is allowed. */
234
+ # EXPR_LABELED_bit, /* flag bit, just after a label. */
235
+ # EXPR_FITEM_bit, /* symbol literal as FNAME. */
236
+ # EXPR_MAX_STATE
237
+ # };
238
+ def on_comment(body)
239
+ sexp = { type: :@comment, body: body.chomp, start: lineno, end: lineno }
240
+
241
+ case RipperJS.lex_state_name(state)
242
+ when 'EXPR_END', 'EXPR_ARG|EXPR_LABELED', 'EXPR_ENDFN'
243
+ last_sexp.merge!(comments: [sexp])
244
+ when 'EXPR_CMDARG', 'EXPR_END|EXPR_ENDARG', 'EXPR_ENDARG', 'EXPR_ARG',
245
+ 'EXPR_FNAME|EXPR_FITEM', 'EXPR_CLASS', 'EXPR_END|EXPR_LABEL'
246
+ inline_comments << sexp
247
+ when 'EXPR_BEG|EXPR_LABEL', 'EXPR_MID'
248
+ inline_comments << sexp.merge!(break: true)
249
+ when 'EXPR_DOT'
250
+ last_sexp.merge!(comments: [sexp.merge!(break: true)])
251
+ end
155
252
 
156
- def on_embdoc(comment)
157
- @current_embdoc[:body] << comment
158
- end
253
+ sexp
254
+ end
159
255
 
160
- def on_embdoc_end(comment)
161
- @current_embdoc[:body] << comment.chomp
162
- @block_comments << @current_embdoc
163
- @current_embdoc = nil
164
- end
256
+ defined = private_instance_methods(false).grep(/\Aon_/) { $'.to_sym }
165
257
 
166
- def on_method_add_block(*body)
167
- super(*body).tap do |sexp|
168
- stmts = body[1][:body][1]
169
- stmts = stmts[:type] == :stmts ? stmts : body[1][:body][1][:body][0]
258
+ (Ripper::PARSER_EVENTS - defined).each do |event|
259
+ define_method(:"on_#{event}") do |*body|
260
+ super(*body).tap do |sexp|
261
+ @last_sexp = sexp
262
+ next if inline_comments.empty?
170
263
 
171
- attach_comments(sexp, stmts)
264
+ sexp[:comments] = inline_comments.reverse
265
+ @inline_comments = []
266
+ end
267
+ end
172
268
  end
173
269
  end
174
- end
270
+ )
175
271
 
176
- # Tracking heredocs in somewhat interesting. Straight-line heredocs are
177
- # reported as strings, whereas squiggly-line heredocs are reported as
178
- # heredocs.
179
- module Heredocs
180
- def initialize(*args)
181
- super(*args)
182
- @heredoc_stack = []
183
- end
272
+ # Nodes that are always on their own line occur when the lexer is in the
273
+ # EXPR_BEG state. Those comments are tracked within the @block_comments
274
+ # instance variable. Then for each node that could contain them, we attach
275
+ # them after the node has been built.
276
+ prepend(
277
+ Module.new do
278
+ events = {
279
+ begin: [0, :body, 0],
280
+ bodystmt: [0],
281
+ class: [2, :body, 0],
282
+ def: [2, :body, 0],
283
+ defs: [4, :body, 0],
284
+ else: [0],
285
+ elsif: [1],
286
+ ensure: [0],
287
+ if: [1],
288
+ program: [0],
289
+ rescue: [2],
290
+ sclass: [1, :body, 0],
291
+ unless: [1],
292
+ until: [1],
293
+ when: [1],
294
+ while: [1]
295
+ }
184
296
 
185
- def self.prepended(base)
186
- base.attr_reader :heredoc_stack
187
- end
297
+ def initialize(*args)
298
+ super(*args)
299
+ @block_comments = []
300
+ @current_embdoc = nil
301
+ end
188
302
 
189
- private
303
+ def self.prepended(base)
304
+ base.attr_reader :block_comments, :current_embdoc
305
+ end
190
306
 
191
- def on_embexpr_beg(body)
192
- super(body).tap { |sexp| heredoc_stack << sexp }
193
- end
307
+ private
194
308
 
195
- def on_embexpr_end(body)
196
- super(body).tap { heredoc_stack.pop }
197
- end
309
+ def attach_comments(sexp, stmts)
310
+ range = sexp[:start]..sexp[:end]
311
+ comments =
312
+ block_comments.group_by { |comment| range.include?(comment[:start]) }
198
313
 
199
- def on_heredoc_beg(beging)
200
- heredoc = { type: :heredoc, beging: beging, start: lineno, end: lineno }
201
- heredoc_stack << heredoc
202
- end
314
+ if comments[true]
315
+ stmts[:body] =
316
+ (stmts[:body] + comments[true]).sort_by { |node| node[:start] }
203
317
 
204
- def on_heredoc_end(ending)
205
- heredoc_stack[-1].merge!(ending: ending.chomp, end: lineno)
206
- end
318
+ @block_comments = comments.fetch(false) { [] }
319
+ end
320
+ end
207
321
 
208
- def on_heredoc_dedent(string, _width)
209
- heredoc = heredoc_stack.pop
210
- string.merge!(heredoc.slice(:type, :beging, :ending, :start, :end))
211
- end
322
+ events.each do |event, path|
323
+ define_method(:"on_#{event}") do |*body|
324
+ super(*body).tap { |sexp| attach_comments(sexp, body.dig(*path)) }
325
+ end
326
+ end
212
327
 
213
- def on_string_literal(string)
214
- heredoc = heredoc_stack[-1]
328
+ def on_comment(body)
329
+ super(body).tap do |sexp|
330
+ block_comments << sexp if RipperJS.lex_state_name(state) == 'EXPR_BEG'
331
+ end
332
+ end
215
333
 
216
- if heredoc && string[:type] != :heredoc && heredoc[:type] == :heredoc
217
- heredoc_stack.pop
218
- string.merge!(heredoc.slice(:type, :beging, :ending, :start, :end))
219
- else
220
- super
334
+ def on_embdoc_beg(comment)
335
+ @current_embdoc = {
336
+ type: :embdoc, body: comment, start: lineno, end: lineno
337
+ }
221
338
  end
222
- end
223
- end
224
339
 
225
- # These are the event types that contain _actual_ string content. If there is
226
- # an encoding magic comment at the top of the file, ripper will actually
227
- # change into that encoding for the storage of the string. This will break
228
- # everything, so we need to force the encoding back into UTF-8 so that
229
- # the JSON library won't break.
230
- module Encoding
231
- events = %w[comment ident tstring_content]
340
+ def on_embdoc(comment)
341
+ @current_embdoc[:body] << comment
342
+ end
232
343
 
233
- events.each do |event|
234
- define_method(:"on_#{event}") do |body|
235
- super(body.force_encoding('UTF-8'))
344
+ def on_embdoc_end(comment)
345
+ @current_embdoc[:body] << comment.chomp
346
+ @block_comments << @current_embdoc
347
+ @current_embdoc = nil
236
348
  end
237
- end
238
- end
239
349
 
240
- # This layer keeps track of inline comments as they come in. Ripper itself
241
- # doesn't attach comments to the AST, so we need to do it manually. In this
242
- # case, inline comments are defined as any comments wherein the lexer state is
243
- # not equal to EXPR_BEG (tracked in the BlockComments layer).
244
- module InlineComments
245
- # Certain events needs to steal the comments from their children in order
246
- # for them to display properly.
247
- events = {
248
- args_add_block: [:body, 0],
249
- break: [:body, 0],
250
- command: [:body, 1],
251
- command_call: [:body, 3],
252
- regexp_literal: [:body, 0],
253
- string_literal: [:body, 0],
254
- symbol_literal: [:body, 0]
255
- }
256
-
257
- def initialize(*args)
258
- super(*args)
259
- @inline_comments = []
260
- @last_sexp = nil
261
- end
350
+ def on_method_add_block(*body)
351
+ super(*body).tap do |sexp|
352
+ stmts = body[1][:body][1]
353
+ stmts = stmts[:type] == :stmts ? stmts : body[1][:body][1][:body][0]
262
354
 
263
- def self.prepended(base)
264
- base.attr_reader :inline_comments, :last_sexp
355
+ attach_comments(sexp, stmts)
356
+ end
357
+ end
265
358
  end
359
+ )
266
360
 
267
- private
361
+ # Tracking heredocs in somewhat interesting. Straight-line heredocs are
362
+ # reported as strings, whereas squiggly-line heredocs are reported as
363
+ # heredocs. We track the start and matching end of the heredoc as "beging" and
364
+ # "ending" respectively.
365
+ prepend(
366
+ Module.new do
367
+ def initialize(*args)
368
+ super(*args)
369
+ @heredoc_stack = []
370
+ end
268
371
 
269
- events.each do |event, path|
270
- define_method(:"on_#{event}") do |*body|
271
- @last_sexp =
272
- super(*body).tap do |sexp|
273
- comments = (sexp.dig(*path) || {}).delete(:comments)
274
- sexp.merge!(comments: comments) if comments
275
- end
372
+ def self.prepended(base)
373
+ base.attr_reader :heredoc_stack
276
374
  end
277
- end
278
375
 
279
- SPECIAL_LITERALS = %i[qsymbols qwords symbols words].freeze
376
+ private
280
377
 
281
- # Special array literals are handled in different ways and so their comments
282
- # need to be passed up to their parent array node.
283
- def on_array(*body)
284
- @last_sexp =
285
- super(*body).tap do |sexp|
286
- next unless SPECIAL_LITERALS.include?(body.dig(0, :type))
378
+ def on_embexpr_beg(body)
379
+ super(body).tap { |sexp| heredoc_stack << sexp }
380
+ end
287
381
 
288
- comments = sexp.dig(:body, 0).delete(:comments)
289
- sexp.merge!(comments: comments) if comments
290
- end
291
- end
382
+ def on_embexpr_end(body)
383
+ super(body).tap { heredoc_stack.pop }
384
+ end
292
385
 
293
- # Handling this specially because we want to pull the comments out of both
294
- # child nodes.
295
- def on_assoc_new(*body)
296
- @last_sexp =
297
- super(*body).tap do |sexp|
298
- comments =
299
- (sexp.dig(:body, 0).delete(:comments) || []) +
300
- (sexp.dig(:body, 1).delete(:comments) || [])
386
+ def on_heredoc_beg(beging)
387
+ heredoc = { type: :heredoc, beging: beging, start: lineno, end: lineno }
388
+ heredoc_stack << heredoc
389
+ end
301
390
 
302
- sexp.merge!(comments: comments) if comments.any?
303
- end
304
- end
391
+ def on_heredoc_end(ending)
392
+ heredoc_stack[-1].merge!(ending: ending.chomp, end: lineno)
393
+ end
305
394
 
306
- # Most scanner events don't stand on their own a s-expressions, but the CHAR
307
- # scanner event is effectively just a string, so we need to track it as a
308
- # s-expression.
309
- def on_CHAR(body)
310
- @last_sexp = super(body)
311
- end
395
+ def on_heredoc_dedent(string, _width)
396
+ heredoc = heredoc_stack.pop
397
+ string.merge!(heredoc.slice(:type, :beging, :ending, :start, :end))
398
+ end
312
399
 
313
- # We need to know exactly where the comment is, switching off the current
314
- # lexer state. In Ruby 2.7.0-dev, that's defined as:
315
- #
316
- # enum lex_state_bits {
317
- # EXPR_BEG_bit, /* ignore newline, +/- is a sign. */
318
- # EXPR_END_bit, /* newline significant, +/- is an operator. */
319
- # EXPR_ENDARG_bit, /* ditto, and unbound braces. */
320
- # EXPR_ENDFN_bit, /* ditto, and unbound braces. */
321
- # EXPR_ARG_bit, /* newline significant, +/- is an operator. */
322
- # EXPR_CMDARG_bit, /* newline significant, +/- is an operator. */
323
- # EXPR_MID_bit, /* newline significant, +/- is an operator. */
324
- # EXPR_FNAME_bit, /* ignore newline, no reserved words. */
325
- # EXPR_DOT_bit, /* right after `.' or `::', no reserved words. */
326
- # EXPR_CLASS_bit, /* immediate after `class', no here document. */
327
- # EXPR_LABEL_bit, /* flag bit, label is allowed. */
328
- # EXPR_LABELED_bit, /* flag bit, just after a label. */
329
- # EXPR_FITEM_bit, /* symbol literal as FNAME. */
330
- # EXPR_MAX_STATE
331
- # };
332
- def on_comment(body)
333
- sexp = { type: :@comment, body: body.chomp, start: lineno, end: lineno }
334
-
335
- case RipperJS.lex_state_name(state)
336
- when 'EXPR_END', 'EXPR_ARG|EXPR_LABELED', 'EXPR_ENDFN'
337
- last_sexp.merge!(comments: [sexp])
338
- when 'EXPR_CMDARG', 'EXPR_END|EXPR_ENDARG', 'EXPR_ENDARG', 'EXPR_ARG',
339
- 'EXPR_FNAME|EXPR_FITEM', 'EXPR_CLASS', 'EXPR_END|EXPR_LABEL'
340
- inline_comments << sexp
341
- when 'EXPR_BEG|EXPR_LABEL', 'EXPR_MID'
342
- inline_comments << sexp.merge!(break: true)
343
- when 'EXPR_DOT'
344
- last_sexp.merge!(comments: [sexp.merge!(break: true)])
345
- end
346
-
347
- sexp
348
- end
400
+ def on_string_literal(string)
401
+ heredoc = heredoc_stack[-1]
349
402
 
350
- defined_events = private_instance_methods(false).grep(/\Aon_/) { $'.to_sym }
403
+ if heredoc && string[:type] != :heredoc && heredoc[:type] == :heredoc
404
+ heredoc_stack.pop
405
+ string.merge!(heredoc.slice(:type, :beging, :ending, :start, :end))
406
+ else
407
+ super
408
+ end
409
+ end
410
+ end
411
+ )
351
412
 
352
- (Ripper::PARSER_EVENTS - defined_events).each do |event|
353
- define_method(:"on_#{event}") do |*body|
354
- super(*body).tap do |sexp|
355
- @last_sexp = sexp
356
- next if inline_comments.empty?
413
+ # These are the event types that contain _actual_ string content. If there is
414
+ # an encoding magic comment at the top of the file, ripper will actually
415
+ # change into that encoding for the storage of the string. This will break
416
+ # everything, so we need to force the encoding back into UTF-8 so that
417
+ # the JSON library won't break.
418
+ prepend(
419
+ Module.new do
420
+ private
357
421
 
358
- sexp[:comments] = inline_comments.reverse
359
- @inline_comments = []
422
+ %w[comment ident tstring_content].each do |event|
423
+ define_method(:"on_#{event}") do |body|
424
+ super(body.force_encoding('UTF-8'))
360
425
  end
361
426
  end
362
427
  end
363
- end
428
+ )
364
429
 
365
430
  # Handles __END__ syntax, which allows individual scripts to keep content
366
- # after the main ruby code that can be read through DATA.
367
- module Ending
368
- def initialize(source, *args)
369
- super(source, *args)
370
- @source = source
371
- @ending = nil
372
- end
431
+ # after the main ruby code that can be read through DATA. Which looks like:
432
+ #
433
+ # foo.bar
434
+ #
435
+ # __END__
436
+ # some other content that isn't read by ripper normally
437
+ prepend(
438
+ Module.new do
439
+ def initialize(source, *args)
440
+ super(source, *args)
441
+ @source = source
442
+ @ending = nil
443
+ end
373
444
 
374
- def self.prepended(base)
375
- base.attr_reader :source, :ending
376
- end
445
+ def self.prepended(base)
446
+ base.attr_reader :source, :ending
447
+ end
377
448
 
378
- private
449
+ private
379
450
 
380
- def on___end__(body)
381
- @ending = super(source.split("\n")[lineno..-1].join("\n"))
382
- end
451
+ def on___end__(body)
452
+ @ending = super(source.split("\n")[lineno..-1].join("\n"))
453
+ end
383
454
 
384
- def on_program(*body)
385
- super(*body).tap { |sexp| sexp[:body][0][:body] << ending if ending }
455
+ def on_program(*body)
456
+ super(*body).tap { |sexp| sexp[:body][0][:body] << ending if ending }
457
+ end
386
458
  end
387
- end
459
+ )
388
460
 
389
- # Adds the used quote type onto string nodes.
390
- module Strings
391
- private
461
+ # Adds the used quote type onto string nodes. This is necessary because we're
462
+ # going to have to stick to whatever quote the user chose if there are escape
463
+ # sequences within the string. For example, if you have '\n' we can't switch
464
+ # to double quotes without changing what it means.
465
+ prepend(
466
+ Module.new do
467
+ private
392
468
 
393
- def on_tstring_end(quote)
394
- last_sexp.merge!(quote: quote)
395
- end
469
+ def on_tstring_end(quote)
470
+ last_sexp.merge!(quote: quote)
471
+ end
396
472
 
397
- def on_label_end(quote)
398
- last_sexp.merge!(quote: quote[0]) # quote is ": or ':
473
+ def on_label_end(quote)
474
+ last_sexp.merge!(quote: quote[0]) # quote is ": or ':
475
+ end
399
476
  end
400
- end
477
+ )
401
478
 
402
479
  # Normally access controls are reported as vcall nodes. This module creates a
403
- # new node type to explicitly track those nodes instead.
404
- module AccessControls
405
- def initialize(source, *args)
406
- super(source, *args)
407
- @lines = source.split("\n")
408
- end
409
-
410
- def self.prepended(base)
411
- base.attr_reader :lines
412
- end
413
-
414
- private
480
+ # new node type to explicitly track those nodes instead, so that the printer
481
+ # can add new lines as necessary.
482
+ prepend(
483
+ Module.new do
484
+ KEYWORDS = %w[private protected public].freeze
415
485
 
416
- def on_vcall(ident)
417
- super(ident).tap do |sexp|
418
- if !%w[private protected public].include?(ident[:body]) ||
419
- ident[:body] != lines[lineno - 1].strip
420
- next
421
- end
486
+ def initialize(source, *args)
487
+ super(source, *args)
488
+ @lines = source.split("\n")
489
+ end
422
490
 
423
- sexp.merge!(type: :access_ctrl)
491
+ def self.prepended(base)
492
+ base.attr_reader :lines
424
493
  end
425
- end
426
- end
427
- end
428
494
 
429
- class RipperJS < Ripper
430
- private
495
+ private
431
496
 
432
- SCANNER_EVENTS.each do |event|
433
- define_method(:"on_#{event}") do |body|
434
- { type: :"@#{event}", body: body, start: lineno, end: lineno }
435
- end
436
- end
497
+ def on_vcall(ident)
498
+ super(ident).tap do |sexp|
499
+ if !KEYWORDS.include?(ident[:body]) ||
500
+ ident[:body] != lines[lineno - 1].strip
501
+ next
502
+ end
437
503
 
438
- PARSER_EVENTS.each do |event|
439
- define_method(:"on_#{event}") do |*body|
440
- min = body.map { |part| part.is_a?(Hash) ? part[:start] : lineno }.min
441
- { type: event, body: body, start: min || lineno, end: lineno }
504
+ sexp.merge!(type: :access_ctrl)
505
+ end
506
+ end
442
507
  end
443
- end
444
-
445
- prepend Layer::Lists
446
- prepend Layer::StartLine
447
- prepend Layer::InlineComments
448
- prepend Layer::BlockComments
449
- prepend Layer::Heredocs
450
- prepend Layer::Encoding
451
- prepend Layer::Ending
452
- prepend Layer::Strings
453
- prepend Layer::AccessControls
508
+ )
454
509
 
455
510
  # When the only statement inside of a `def` node is a `begin` node, then you
456
511
  # can safely replace the body of the `def` with the body of the `begin`. For
@@ -529,6 +584,10 @@ class RipperJS < Ripper
529
584
  )
530
585
  end
531
586
 
587
+ # If this is the main file we're executing, then most likely this is being
588
+ # executed from the parse.js spawn. In that case, read the ruby source from
589
+ # stdin and report back the AST over stdout.
590
+
532
591
  if $0 == __FILE__
533
592
  builder = RipperJS.new($stdin.read)
534
593
  response = builder.parse