irb 1.6.4 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/irb/ruby-lex.rb CHANGED
@@ -6,842 +6,496 @@
6
6
 
7
7
  require "ripper"
8
8
  require "jruby" if RUBY_ENGINE == "jruby"
9
-
10
- # :stopdoc:
11
- class RubyLex
12
-
13
- class TerminateLineInput < StandardError
14
- def initialize
15
- super("Terminate Line Input")
9
+ require_relative "nesting_parser"
10
+
11
+ module IRB
12
+ # :stopdoc:
13
+ class RubyLex
14
+ ASSIGNMENT_NODE_TYPES = [
15
+ # Local, instance, global, class, constant, instance, and index assignment:
16
+ # "foo = bar",
17
+ # "@foo = bar",
18
+ # "$foo = bar",
19
+ # "@@foo = bar",
20
+ # "::Foo = bar",
21
+ # "a::Foo = bar",
22
+ # "Foo = bar"
23
+ # "foo.bar = 1"
24
+ # "foo[1] = bar"
25
+ :assign,
26
+
27
+ # Operation assignment:
28
+ # "foo += bar"
29
+ # "foo -= bar"
30
+ # "foo ||= bar"
31
+ # "foo &&= bar"
32
+ :opassign,
33
+
34
+ # Multiple assignment:
35
+ # "foo, bar = 1, 2
36
+ :massign,
37
+ ]
38
+
39
+ class TerminateLineInput < StandardError
40
+ def initialize
41
+ super("Terminate Line Input")
42
+ end
16
43
  end
17
- end
18
44
 
19
- def initialize(context)
20
- @context = context
21
- @exp_line_no = @line_no = 1
22
- @indent = 0
23
- @continue = false
24
- @line = ""
25
- @prompt = nil
26
- end
45
+ attr_reader :line_no
27
46
 
28
- def self.compile_with_errors_suppressed(code, line_no: 1)
29
- begin
30
- result = yield code, line_no
31
- rescue ArgumentError
32
- # Ruby can issue an error for the code if there is an
33
- # incomplete magic comment for encoding in it. Force an
34
- # expression with a new line before the code in this
35
- # case to prevent magic comment handling. To make sure
36
- # line numbers in the lexed code remain the same,
37
- # decrease the line number by one.
38
- code = ";\n#{code}"
39
- line_no -= 1
40
- result = yield code, line_no
47
+ def initialize(context)
48
+ @context = context
49
+ @line_no = 1
50
+ @prompt = nil
41
51
  end
42
- result
43
- end
44
-
45
- # io functions
46
- def set_input(io, &block)
47
- @io = io
48
- if @io.respond_to?(:check_termination)
49
- @io.check_termination do |code|
50
- if Reline::IOGate.in_pasting?
51
- lex = RubyLex.new(@context)
52
- rest = lex.check_termination_in_prev_line(code)
53
- if rest
54
- Reline.delete_text
55
- rest.bytes.reverse_each do |c|
56
- Reline.ungetc(c)
57
- end
58
- true
59
- else
60
- false
61
- end
62
- else
63
- # Accept any single-line input for symbol aliases or commands that transform args
64
- command = code.split(/\s/, 2).first
65
- if @context.symbol_alias?(command) || @context.transform_args?(command)
66
- next true
67
- end
68
52
 
69
- code.gsub!(/\s*\z/, '').concat("\n")
70
- tokens = self.class.ripper_lex_without_warning(code, context: @context)
71
- ltype, indent, continue, code_block_open = check_state(code, tokens)
72
- if ltype or indent > 0 or continue or code_block_open
73
- false
74
- else
75
- true
76
- end
77
- end
53
+ def self.compile_with_errors_suppressed(code, line_no: 1)
54
+ begin
55
+ result = yield code, line_no
56
+ rescue ArgumentError
57
+ # Ruby can issue an error for the code if there is an
58
+ # incomplete magic comment for encoding in it. Force an
59
+ # expression with a new line before the code in this
60
+ # case to prevent magic comment handling. To make sure
61
+ # line numbers in the lexed code remain the same,
62
+ # decrease the line number by one.
63
+ code = ";\n#{code}"
64
+ line_no -= 1
65
+ result = yield code, line_no
78
66
  end
67
+ result
79
68
  end
80
- if @io.respond_to?(:dynamic_prompt)
81
- @io.dynamic_prompt do |lines|
82
- lines << '' if lines.empty?
83
- result = []
84
- tokens = self.class.ripper_lex_without_warning(lines.map{ |l| l + "\n" }.join, context: @context)
85
- code = String.new
86
- partial_tokens = []
87
- unprocessed_tokens = []
88
- line_num_offset = 0
89
- tokens.each do |t|
90
- partial_tokens << t
91
- unprocessed_tokens << t
92
- if t.tok.include?("\n")
93
- t_str = t.tok
94
- t_str.each_line("\n") do |s|
95
- code << s
96
- next unless s.include?("\n")
97
- ltype, indent, continue, code_block_open = check_state(code, partial_tokens)
98
- result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset)
99
- line_num_offset += 1
100
- end
101
- unprocessed_tokens = []
102
- else
103
- code << t.tok
104
- end
105
- end
106
69
 
107
- unless unprocessed_tokens.empty?
108
- ltype, indent, continue, code_block_open = check_state(code, unprocessed_tokens)
109
- result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset)
110
- end
111
- result
112
- end
70
+ def set_prompt(&block)
71
+ @prompt = block
113
72
  end
114
73
 
115
- if block_given?
116
- @input = block
117
- else
118
- @input = Proc.new{@io.gets}
74
+ ERROR_TOKENS = [
75
+ :on_parse_error,
76
+ :compile_error,
77
+ :on_assign_error,
78
+ :on_alias_error,
79
+ :on_class_name_error,
80
+ :on_param_error
81
+ ]
82
+
83
+ def self.generate_local_variables_assign_code(local_variables)
84
+ "#{local_variables.join('=')}=nil;" unless local_variables.empty?
119
85
  end
120
- end
121
86
 
122
- def set_prompt(&block)
123
- @prompt = block
124
- end
125
-
126
- ERROR_TOKENS = [
127
- :on_parse_error,
128
- :compile_error,
129
- :on_assign_error,
130
- :on_alias_error,
131
- :on_class_name_error,
132
- :on_param_error
133
- ]
134
-
135
- def self.generate_local_variables_assign_code(local_variables)
136
- "#{local_variables.join('=')}=nil;" unless local_variables.empty?
137
- end
138
-
139
- def self.ripper_lex_without_warning(code, context: nil)
140
- verbose, $VERBOSE = $VERBOSE, nil
141
- lvars_code = generate_local_variables_assign_code(context&.local_variables || [])
142
- if lvars_code
143
- code = "#{lvars_code}\n#{code}"
144
- line_no = 0
145
- else
146
- line_no = 1
147
- end
148
-
149
- compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no|
150
- lexer = Ripper::Lexer.new(inner_code, '-', line_no)
151
- lexer.scan.each_with_object([]) do |t, tokens|
152
- next if t.pos.first == 0
153
- prev_tk = tokens.last
154
- position_overlapped = prev_tk && t.pos[0] == prev_tk.pos[0] && t.pos[1] < prev_tk.pos[1] + prev_tk.tok.bytesize
155
- if position_overlapped
156
- tokens[-1] = t if ERROR_TOKENS.include?(prev_tk.event) && !ERROR_TOKENS.include?(t.event)
157
- else
158
- tokens << t
87
+ # Some part of the code is not included in Ripper's token.
88
+ # Example: DATA part, token after heredoc_beg when heredoc has unclosed embexpr.
89
+ # With interpolated tokens, tokens.map(&:tok).join will be equal to code.
90
+ def self.interpolate_ripper_ignored_tokens(code, tokens)
91
+ line_positions = [0]
92
+ code.lines.each do |line|
93
+ line_positions << line_positions.last + line.bytesize
94
+ end
95
+ prev_byte_pos = 0
96
+ interpolated = []
97
+ prev_line = 1
98
+ tokens.each do |t|
99
+ line, col = t.pos
100
+ byte_pos = line_positions[line - 1] + col
101
+ if prev_byte_pos < byte_pos
102
+ tok = code.byteslice(prev_byte_pos...byte_pos)
103
+ pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
104
+ interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
105
+ prev_line += tok.count("\n")
159
106
  end
107
+ interpolated << t
108
+ prev_byte_pos = byte_pos + t.tok.bytesize
109
+ prev_line += t.tok.count("\n")
160
110
  end
111
+ if prev_byte_pos < code.bytesize
112
+ tok = code.byteslice(prev_byte_pos..)
113
+ pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
114
+ interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
115
+ end
116
+ interpolated
161
117
  end
162
- ensure
163
- $VERBOSE = verbose
164
- end
165
118
 
166
- def find_prev_spaces(line_index)
167
- return 0 if @tokens.size == 0
168
- md = @tokens[0].tok.match(/(\A +)/)
169
- prev_spaces = md.nil? ? 0 : md[1].count(' ')
170
- line_count = 0
171
- @tokens.each_with_index do |t, i|
172
- if t.tok.include?("\n")
173
- line_count += t.tok.count("\n")
174
- if line_count >= line_index
175
- return prev_spaces
176
- end
177
- next if t.event == :on_tstring_content || t.event == :on_words_sep
178
- if (@tokens.size - 1) > i
179
- md = @tokens[i + 1].tok.match(/(\A +)/)
180
- prev_spaces = md.nil? ? 0 : md[1].count(' ')
181
- end
119
+ def self.ripper_lex_without_warning(code, context: nil)
120
+ verbose, $VERBOSE = $VERBOSE, nil
121
+ lvars_code = generate_local_variables_assign_code(context&.local_variables || [])
122
+ original_code = code
123
+ if lvars_code
124
+ code = "#{lvars_code}\n#{code}"
125
+ line_no = 0
126
+ else
127
+ line_no = 1
182
128
  end
183
- end
184
- prev_spaces
185
- end
186
129
 
187
- def set_auto_indent
188
- if @io.respond_to?(:auto_indent) and @context.auto_indent_mode
189
- @io.auto_indent do |lines, line_index, byte_pointer, is_newline|
190
- if is_newline
191
- @tokens = self.class.ripper_lex_without_warning(lines[0..line_index].join("\n"), context: @context)
192
- prev_spaces = find_prev_spaces(line_index)
193
- depth_difference = check_newline_depth_difference
194
- depth_difference = 0 if depth_difference < 0
195
- prev_spaces + depth_difference * 2
196
- else
197
- code = line_index.zero? ? '' : lines[0..(line_index - 1)].map{ |l| l + "\n" }.join
198
- last_line = lines[line_index]&.byteslice(0, byte_pointer)
199
- code += last_line if last_line
200
- @tokens = self.class.ripper_lex_without_warning(code, context: @context)
201
- check_corresponding_token_depth(lines, line_index)
130
+ compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no|
131
+ lexer = Ripper::Lexer.new(inner_code, '-', line_no)
132
+ tokens = []
133
+ lexer.scan.each do |t|
134
+ next if t.pos.first == 0
135
+ prev_tk = tokens.last
136
+ position_overlapped = prev_tk && t.pos[0] == prev_tk.pos[0] && t.pos[1] < prev_tk.pos[1] + prev_tk.tok.bytesize
137
+ if position_overlapped
138
+ tokens[-1] = t if ERROR_TOKENS.include?(prev_tk.event) && !ERROR_TOKENS.include?(t.event)
139
+ else
140
+ tokens << t
141
+ end
202
142
  end
143
+ interpolate_ripper_ignored_tokens(original_code, tokens)
203
144
  end
145
+ ensure
146
+ $VERBOSE = verbose
204
147
  end
205
- end
206
-
207
- def check_state(code, tokens)
208
- ltype = process_literal_type(tokens)
209
- indent = process_nesting_level(tokens)
210
- continue = process_continue(tokens)
211
- lvars_code = self.class.generate_local_variables_assign_code(@context.local_variables)
212
- code = "#{lvars_code}\n#{code}" if lvars_code
213
- code_block_open = check_code_block(code, tokens)
214
- [ltype, indent, continue, code_block_open]
215
- end
216
148
 
217
- def prompt
218
- if @prompt
219
- @prompt.call(@ltype, @indent, @continue, @line_no)
149
+ def prompt(opens, continue, line_num_offset)
150
+ ltype = ltype_from_open_tokens(opens)
151
+ indent_level = calc_indent_level(opens)
152
+ @prompt&.call(ltype, indent_level, opens.any? || continue, @line_no + line_num_offset)
220
153
  end
221
- end
222
154
 
223
- def initialize_input
224
- @ltype = nil
225
- @indent = 0
226
- @continue = false
227
- @line = ""
228
- @exp_line_no = @line_no
229
- @code_block_open = false
230
- end
155
+ def check_code_state(code)
156
+ tokens = self.class.ripper_lex_without_warning(code, context: @context)
157
+ opens = NestingParser.open_tokens(tokens)
158
+ [tokens, opens, code_terminated?(code, tokens, opens)]
159
+ end
231
160
 
232
- def each_top_level_statement
233
- initialize_input
234
- catch(:TERM_INPUT) do
235
- loop do
236
- begin
237
- prompt
238
- unless l = lex
239
- throw :TERM_INPUT if @line == ''
240
- else
241
- @line_no += l.count("\n")
242
- if l == "\n"
243
- @exp_line_no += 1
244
- next
245
- end
246
- @line.concat l
247
- if @code_block_open or @ltype or @continue or @indent > 0
248
- next
249
- end
250
- end
251
- if @line != "\n"
252
- @line.force_encoding(@io.encoding)
253
- yield @line, @exp_line_no
254
- end
255
- raise TerminateLineInput if @io.eof?
256
- @line = ''
257
- @exp_line_no = @line_no
258
-
259
- @indent = 0
260
- rescue TerminateLineInput
261
- initialize_input
262
- prompt
263
- end
161
+ def code_terminated?(code, tokens, opens)
162
+ case check_code_syntax(code)
163
+ when :unrecoverable_error
164
+ true
165
+ when :recoverable_error
166
+ false
167
+ when :other_error
168
+ opens.empty? && !should_continue?(tokens)
169
+ when :valid
170
+ !should_continue?(tokens)
264
171
  end
265
172
  end
266
- end
267
173
 
268
- def lex
269
- line = @input.call
270
- if @io.respond_to?(:check_termination)
271
- return line # multiline
174
+ def save_prompt_to_context_io(opens, continue, line_num_offset)
175
+ # Implicitly saves prompt string to `@context.io.prompt`. This will be used in the next `@input.call`.
176
+ prompt(opens, continue, line_num_offset)
272
177
  end
273
- code = @line + (line.nil? ? '' : line)
274
- code.gsub!(/\s*\z/, '').concat("\n")
275
- @tokens = self.class.ripper_lex_without_warning(code, context: @context)
276
- @ltype, @indent, @continue, @code_block_open = check_state(code, @tokens)
277
- line
278
- end
279
178
 
280
- def process_continue(tokens)
281
- # last token is always newline
282
- if tokens.size >= 2 and tokens[-2].event == :on_regexp_end
283
- # end of regexp literal
284
- return false
285
- elsif tokens.size >= 2 and tokens[-2].event == :on_semicolon
286
- return false
287
- elsif tokens.size >= 2 and tokens[-2].event == :on_kw and ['begin', 'else', 'ensure'].include?(tokens[-2].tok)
288
- return false
289
- elsif !tokens.empty? and tokens.last.tok == "\\\n"
290
- return true
291
- elsif tokens.size >= 1 and tokens[-1].event == :on_heredoc_end # "EOH\n"
292
- return false
293
- elsif tokens.size >= 2 and tokens[-2].state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_FNAME) and tokens[-2].tok !~ /\A\.\.\.?\z/
294
- # end of literal except for regexp
295
- # endless range at end of line is not a continue
296
- return true
179
+ def increase_line_no(addition)
180
+ @line_no += addition
297
181
  end
298
- false
299
- end
300
182
 
301
- def check_code_block(code, tokens)
302
- return true if tokens.empty?
303
- if tokens.last.event == :on_heredoc_beg
304
- return true
305
- end
183
+ def assignment_expression?(code)
184
+ # Try to parse the code and check if the last of possibly multiple
185
+ # expressions is an assignment type.
306
186
 
307
- begin # check if parser error are available
187
+ # If the expression is invalid, Ripper.sexp should return nil which will
188
+ # result in false being returned. Any valid expression should return an
189
+ # s-expression where the second element of the top level array is an
190
+ # array of parsed expressions. The first element of each expression is the
191
+ # expression's type.
308
192
  verbose, $VERBOSE = $VERBOSE, nil
309
- case RUBY_ENGINE
310
- when 'ruby'
311
- self.class.compile_with_errors_suppressed(code) do |inner_code, line_no|
312
- RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no)
313
- end
314
- when 'jruby'
315
- JRuby.compile_ir(code)
316
- else
317
- catch(:valid) do
318
- eval("BEGIN { throw :valid, true }\n#{code}")
319
- false
320
- end
321
- end
322
- rescue EncodingError
323
- # This is for a hash with invalid encoding symbol, {"\xAE": 1}
324
- rescue SyntaxError => e
325
- case e.message
326
- when /unterminated (?:string|regexp) meets end of file/
327
- # "unterminated regexp meets end of file"
328
- #
329
- # example:
330
- # /
331
- #
332
- # "unterminated string meets end of file"
333
- #
334
- # example:
335
- # '
336
- return true
337
- when /syntax error, unexpected end-of-input/
338
- # "syntax error, unexpected end-of-input, expecting keyword_end"
339
- #
340
- # example:
341
- # if true
342
- # hoge
343
- # if false
344
- # fuga
345
- # end
346
- return true
347
- when /syntax error, unexpected keyword_end/
348
- # "syntax error, unexpected keyword_end"
349
- #
350
- # example:
351
- # if (
352
- # end
353
- #
354
- # example:
355
- # end
356
- return false
357
- when /syntax error, unexpected '\.'/
358
- # "syntax error, unexpected '.'"
359
- #
360
- # example:
361
- # .
362
- return false
363
- when /unexpected tREGEXP_BEG/
364
- # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
365
- #
366
- # example:
367
- # method / f /
368
- return false
369
- end
193
+ code = "#{RubyLex.generate_local_variables_assign_code(@context.local_variables) || 'nil;'}\n#{code}"
194
+ # Get the last node_type of the line. drop(1) is to ignore the local_variables_assign_code part.
195
+ node_type = Ripper.sexp(code)&.dig(1)&.drop(1)&.dig(-1, 0)
196
+ ASSIGNMENT_NODE_TYPES.include?(node_type)
370
197
  ensure
371
198
  $VERBOSE = verbose
372
199
  end
373
200
 
374
- last_lex_state = tokens.last.state
375
-
376
- if last_lex_state.allbits?(Ripper::EXPR_BEG)
377
- return false
378
- elsif last_lex_state.allbits?(Ripper::EXPR_DOT)
379
- return true
380
- elsif last_lex_state.allbits?(Ripper::EXPR_CLASS)
381
- return true
382
- elsif last_lex_state.allbits?(Ripper::EXPR_FNAME)
383
- return true
384
- elsif last_lex_state.allbits?(Ripper::EXPR_VALUE)
385
- return true
386
- elsif last_lex_state.allbits?(Ripper::EXPR_ARG)
387
- return false
201
+ def should_continue?(tokens)
202
+ # Look at the last token and check if IRB need to continue reading next line.
203
+ # Example code that should continue: `a\` `a +` `a.`
204
+ # Trailing spaces, newline, comments are skipped
205
+ return true if tokens.last&.event == :on_sp && tokens.last.tok == "\\\n"
206
+
207
+ tokens.reverse_each do |token|
208
+ case token.event
209
+ when :on_sp, :on_nl, :on_ignored_nl, :on_comment, :on_embdoc_beg, :on_embdoc, :on_embdoc_end
210
+ # Skip
211
+ when :on_regexp_end, :on_heredoc_end, :on_semicolon
212
+ # State is EXPR_BEG but should not continue
213
+ return false
214
+ else
215
+ # Endless range should not continue
216
+ return false if token.event == :on_op && token.tok.match?(/\A\.\.\.?\z/)
217
+
218
+ # EXPR_DOT and most of the EXPR_BEG should continue
219
+ return token.state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_DOT)
220
+ end
221
+ end
222
+ false
388
223
  end
389
224
 
390
- false
391
- end
225
+ def check_code_syntax(code)
226
+ lvars_code = RubyLex.generate_local_variables_assign_code(@context.local_variables)
227
+ code = "#{lvars_code}\n#{code}"
392
228
 
393
- def process_nesting_level(tokens)
394
- indent = 0
395
- in_oneliner_def = nil
396
- tokens.each_with_index { |t, index|
397
- # detecting one-liner method definition
398
- if in_oneliner_def.nil?
399
- if t.state.allbits?(Ripper::EXPR_ENDFN)
400
- in_oneliner_def = :ENDFN
401
- end
402
- else
403
- if t.state.allbits?(Ripper::EXPR_ENDFN)
404
- # continuing
405
- elsif t.state.allbits?(Ripper::EXPR_BEG)
406
- if t.tok == '='
407
- in_oneliner_def = :BODY
229
+ begin # check if parser error are available
230
+ verbose, $VERBOSE = $VERBOSE, nil
231
+ case RUBY_ENGINE
232
+ when 'ruby'
233
+ self.class.compile_with_errors_suppressed(code) do |inner_code, line_no|
234
+ RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no)
408
235
  end
236
+ when 'jruby'
237
+ JRuby.compile_ir(code)
409
238
  else
410
- if in_oneliner_def == :BODY
411
- # one-liner method definition
412
- indent -= 1
239
+ catch(:valid) do
240
+ eval("BEGIN { throw :valid, true }\n#{code}")
241
+ false
413
242
  end
414
- in_oneliner_def = nil
415
243
  end
416
- end
417
-
418
- case t.event
419
- when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
420
- indent += 1
421
- when :on_rbracket, :on_rbrace, :on_rparen
422
- indent -= 1
423
- when :on_kw
424
- next if index > 0 and tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
425
- case t.tok
426
- when 'do'
427
- syntax_of_do = take_corresponding_syntax_to_kw_do(tokens, index)
428
- indent += 1 if syntax_of_do == :method_calling
429
- when 'def', 'case', 'for', 'begin', 'class', 'module'
430
- indent += 1
431
- when 'if', 'unless', 'while', 'until'
432
- # postfix if/unless/while/until must be Ripper::EXPR_LABEL
433
- indent += 1 unless t.state.allbits?(Ripper::EXPR_LABEL)
434
- when 'end'
435
- indent -= 1
244
+ rescue EncodingError
245
+ # This is for a hash with invalid encoding symbol, {"\xAE": 1}
246
+ :unrecoverable_error
247
+ rescue SyntaxError => e
248
+ case e.message
249
+ when /unterminated (?:string|regexp) meets end of file/
250
+ # "unterminated regexp meets end of file"
251
+ #
252
+ # example:
253
+ # /
254
+ #
255
+ # "unterminated string meets end of file"
256
+ #
257
+ # example:
258
+ # '
259
+ return :recoverable_error
260
+ when /syntax error, unexpected end-of-input/
261
+ # "syntax error, unexpected end-of-input, expecting keyword_end"
262
+ #
263
+ # example:
264
+ # if true
265
+ # hoge
266
+ # if false
267
+ # fuga
268
+ # end
269
+ return :recoverable_error
270
+ when /syntax error, unexpected keyword_end/
271
+ # "syntax error, unexpected keyword_end"
272
+ #
273
+ # example:
274
+ # if (
275
+ # end
276
+ #
277
+ # example:
278
+ # end
279
+ return :unrecoverable_error
280
+ when /syntax error, unexpected '\.'/
281
+ # "syntax error, unexpected '.'"
282
+ #
283
+ # example:
284
+ # .
285
+ return :unrecoverable_error
286
+ when /unexpected tREGEXP_BEG/
287
+ # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
288
+ #
289
+ # example:
290
+ # method / f /
291
+ return :unrecoverable_error
292
+ else
293
+ return :other_error
436
294
  end
295
+ ensure
296
+ $VERBOSE = verbose
437
297
  end
438
- # percent literals are not indented
439
- }
440
- indent
441
- end
298
+ :valid
299
+ end
442
300
 
443
- def is_method_calling?(tokens, index)
444
- tk = tokens[index]
445
- if tk.state.anybits?(Ripper::EXPR_CMDARG) and tk.event == :on_ident
446
- # The target method call to pass the block with "do".
447
- return true
448
- elsif tk.state.anybits?(Ripper::EXPR_ARG) and tk.event == :on_ident
449
- non_sp_index = tokens[0..(index - 1)].rindex{ |t| t.event != :on_sp }
450
- if non_sp_index
451
- prev_tk = tokens[non_sp_index]
452
- if prev_tk.state.anybits?(Ripper::EXPR_DOT) and prev_tk.event == :on_period
453
- # The target method call with receiver to pass the block with "do".
454
- return true
301
+ def calc_indent_level(opens)
302
+ indent_level = 0
303
+ opens.each_with_index do |t, index|
304
+ case t.event
305
+ when :on_heredoc_beg
306
+ if opens[index + 1]&.event != :on_heredoc_beg
307
+ if t.tok.match?(/^<<[~-]/)
308
+ indent_level += 1
309
+ else
310
+ indent_level = 0
311
+ end
312
+ end
313
+ when :on_tstring_beg, :on_regexp_beg, :on_symbeg, :on_backtick
314
+ # No indent: "", //, :"", ``
315
+ # Indent: %(), %r(), %i(), %x()
316
+ indent_level += 1 if t.tok.start_with? '%'
317
+ when :on_embdoc_beg
318
+ indent_level = 0
319
+ else
320
+ indent_level += 1
455
321
  end
456
322
  end
323
+ indent_level
457
324
  end
458
- false
459
- end
460
325
 
461
- def take_corresponding_syntax_to_kw_do(tokens, index)
462
- syntax_of_do = nil
463
- # Finding a syntax corresponding to "do".
464
- index.downto(0) do |i|
465
- tk = tokens[i]
466
- # In "continue", the token isn't the corresponding syntax to "do".
467
- non_sp_index = tokens[0..(i - 1)].rindex{ |t| t.event != :on_sp }
468
- first_in_fomula = false
469
- if non_sp_index.nil?
470
- first_in_fomula = true
471
- elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index].event)
472
- first_in_fomula = true
473
- end
474
- if is_method_calling?(tokens, i)
475
- syntax_of_do = :method_calling
476
- break if first_in_fomula
477
- elsif tk.event == :on_kw && %w{while until for}.include?(tk.tok)
478
- # A loop syntax in front of "do" found.
479
- #
480
- # while cond do # also "until" or "for"
481
- # end
482
- #
483
- # This "do" doesn't increment indent because the loop syntax already
484
- # incremented.
485
- syntax_of_do = :loop_syntax
486
- break if first_in_fomula
487
- end
326
+ FREE_INDENT_TOKENS = %i[on_tstring_beg on_backtick on_regexp_beg on_symbeg]
327
+
328
+ def free_indent_token?(token)
329
+ FREE_INDENT_TOKENS.include?(token&.event)
488
330
  end
489
- syntax_of_do
490
- end
491
331
 
492
- def is_the_in_correspond_to_a_for(tokens, index)
493
- syntax_of_in = nil
494
- # Finding a syntax corresponding to "do".
495
- index.downto(0) do |i|
496
- tk = tokens[i]
497
- # In "continue", the token isn't the corresponding syntax to "do".
498
- non_sp_index = tokens[0..(i - 1)].rindex{ |t| t.event != :on_sp }
499
- first_in_fomula = false
500
- if non_sp_index.nil?
501
- first_in_fomula = true
502
- elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index].event)
503
- first_in_fomula = true
504
- end
505
- if tk.event == :on_kw && tk.tok == 'for'
506
- # A loop syntax in front of "do" found.
507
- #
508
- # while cond do # also "until" or "for"
509
- # end
510
- #
511
- # This "do" doesn't increment indent because the loop syntax already
512
- # incremented.
513
- syntax_of_in = :for
332
+ # Calculates the difference of pasted code's indent and indent calculated from tokens
333
+ def indent_difference(lines, line_results, line_index)
334
+ loop do
335
+ _tokens, prev_opens, _next_opens, min_depth = line_results[line_index]
336
+ open_token = prev_opens.last
337
+ if !open_token || (open_token.event != :on_heredoc_beg && !free_indent_token?(open_token))
338
+ # If the leading whitespace is an indent, return the difference
339
+ indent_level = calc_indent_level(prev_opens.take(min_depth))
340
+ calculated_indent = 2 * indent_level
341
+ actual_indent = lines[line_index][/^ */].size
342
+ return actual_indent - calculated_indent
343
+ elsif open_token.event == :on_heredoc_beg && open_token.tok.match?(/^<<[^-~]/)
344
+ return 0
345
+ end
346
+ # If the leading whitespace is not an indent but part of a multiline token
347
+ # Calculate base_indent of the multiline token's beginning line
348
+ line_index = open_token.pos[0] - 1
514
349
  end
515
- break if first_in_fomula
516
350
  end
517
- syntax_of_in
518
- end
519
351
 
520
- def check_newline_depth_difference
521
- depth_difference = 0
522
- open_brace_on_line = 0
523
- in_oneliner_def = nil
524
- @tokens.each_with_index do |t, index|
525
- # detecting one-liner method definition
526
- if in_oneliner_def.nil?
527
- if t.state.allbits?(Ripper::EXPR_ENDFN)
528
- in_oneliner_def = :ENDFN
529
- end
352
+ def process_indent_level(tokens, lines, line_index, is_newline)
353
+ line_results = NestingParser.parse_by_line(tokens)
354
+ result = line_results[line_index]
355
+ if result
356
+ _tokens, prev_opens, next_opens, min_depth = result
530
357
  else
531
- if t.state.allbits?(Ripper::EXPR_ENDFN)
532
- # continuing
533
- elsif t.state.allbits?(Ripper::EXPR_BEG)
534
- if t.tok == '='
535
- in_oneliner_def = :BODY
536
- end
537
- else
538
- if in_oneliner_def == :BODY
539
- # one-liner method definition
540
- depth_difference -= 1
541
- end
542
- in_oneliner_def = nil
543
- end
358
+ # When last line is empty
359
+ prev_opens = next_opens = line_results.last[2]
360
+ min_depth = next_opens.size
544
361
  end
545
362
 
546
- case t.event
547
- when :on_ignored_nl, :on_nl, :on_comment
548
- if index != (@tokens.size - 1) and in_oneliner_def != :BODY
549
- depth_difference = 0
550
- open_brace_on_line = 0
551
- end
552
- next
553
- when :on_sp
554
- next
555
- end
363
+ # To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation.
364
+ # Shortest open tokens can be calculated by `opens.take(min_depth)`
365
+ indent = 2 * calc_indent_level(prev_opens.take(min_depth))
556
366
 
557
- case t.event
558
- when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
559
- depth_difference += 1
560
- open_brace_on_line += 1
561
- when :on_rbracket, :on_rbrace, :on_rparen
562
- depth_difference -= 1 if open_brace_on_line > 0
563
- when :on_kw
564
- next if index > 0 and @tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
565
- case t.tok
566
- when 'do'
567
- syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index)
568
- depth_difference += 1 if syntax_of_do == :method_calling
569
- when 'def', 'case', 'for', 'begin', 'class', 'module'
570
- depth_difference += 1
571
- when 'if', 'unless', 'while', 'until', 'rescue'
572
- # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
573
- unless t.state.allbits?(Ripper::EXPR_LABEL)
574
- depth_difference += 1
575
- end
576
- when 'else', 'elsif', 'ensure', 'when'
577
- depth_difference += 1
578
- when 'in'
579
- unless is_the_in_correspond_to_a_for(@tokens, index)
580
- depth_difference += 1
581
- end
582
- when 'end'
583
- depth_difference -= 1
584
- end
585
- end
586
- end
587
- depth_difference
588
- end
367
+ preserve_indent = lines[line_index - (is_newline ? 1 : 0)][/^ */].size
589
368
 
590
- def check_corresponding_token_depth(lines, line_index)
591
- corresponding_token_depth = nil
592
- is_first_spaces_of_line = true
593
- is_first_printable_of_line = true
594
- spaces_of_nest = []
595
- spaces_at_line_head = 0
596
- open_brace_on_line = 0
597
- in_oneliner_def = nil
598
-
599
- if heredoc_scope?
600
- return lines[line_index][/^ */].length
601
- end
369
+ prev_open_token = prev_opens.last
370
+ next_open_token = next_opens.last
602
371
 
603
- @tokens.each_with_index do |t, index|
604
- # detecting one-liner method definition
605
- if in_oneliner_def.nil?
606
- if t.state.allbits?(Ripper::EXPR_ENDFN)
607
- in_oneliner_def = :ENDFN
608
- end
372
+ # Calculates base indent for pasted code on the line where prev_open_token is located
373
+ # irb(main):001:1* if a # base_indent is 2, indent calculated from tokens is 0
374
+ # irb(main):002:1* if b # base_indent is 6, indent calculated from tokens is 2
375
+ # irb(main):003:0> c # base_indent is 6, indent calculated from tokens is 4
376
+ if prev_open_token
377
+ base_indent = [0, indent_difference(lines, line_results, prev_open_token.pos[0] - 1)].max
609
378
  else
610
- if t.state.allbits?(Ripper::EXPR_ENDFN)
611
- # continuing
612
- elsif t.state.allbits?(Ripper::EXPR_BEG)
613
- if t.tok == '='
614
- in_oneliner_def = :BODY
615
- end
616
- else
617
- if in_oneliner_def == :BODY
618
- # one-liner method definition
619
- if is_first_printable_of_line
620
- corresponding_token_depth = spaces_of_nest.pop
621
- else
622
- spaces_of_nest.pop
623
- corresponding_token_depth = nil
624
- end
625
- end
626
- in_oneliner_def = nil
627
- end
379
+ base_indent = 0
628
380
  end
629
381
 
630
- case t.event
631
- when :on_ignored_nl, :on_nl, :on_comment, :on_heredoc_end, :on_embdoc_end
632
- if in_oneliner_def != :BODY
633
- corresponding_token_depth = nil
634
- spaces_at_line_head = 0
635
- is_first_spaces_of_line = true
636
- is_first_printable_of_line = true
637
- open_brace_on_line = 0
382
+ if free_indent_token?(prev_open_token)
383
+ if is_newline && prev_open_token.pos[0] == line_index
384
+ # First newline inside free-indent token
385
+ base_indent + indent
386
+ else
387
+ # Accept any number of indent inside free-indent token
388
+ preserve_indent
638
389
  end
639
- next
640
- when :on_sp
641
- spaces_at_line_head = t.tok.count(' ') if is_first_spaces_of_line
642
- is_first_spaces_of_line = false
643
- next
644
- end
645
-
646
- case t.event
647
- when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
648
- spaces_of_nest.push(spaces_at_line_head + open_brace_on_line * 2)
649
- open_brace_on_line += 1
650
- when :on_rbracket, :on_rbrace, :on_rparen
651
- if is_first_printable_of_line
652
- corresponding_token_depth = spaces_of_nest.pop
390
+ elsif prev_open_token&.event == :on_embdoc_beg || next_open_token&.event == :on_embdoc_beg
391
+ if prev_open_token&.event == next_open_token&.event
392
+ # Accept any number of indent inside embdoc content
393
+ preserve_indent
653
394
  else
654
- spaces_of_nest.pop
655
- corresponding_token_depth = nil
395
+ # =begin or =end
396
+ 0
656
397
  end
657
- open_brace_on_line -= 1
658
- when :on_kw
659
- next if index > 0 and @tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
660
- case t.tok
661
- when 'do'
662
- syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index)
663
- if syntax_of_do == :method_calling
664
- spaces_of_nest.push(spaces_at_line_head)
665
- end
666
- when 'def', 'case', 'for', 'begin', 'class', 'module'
667
- spaces_of_nest.push(spaces_at_line_head)
668
- when 'rescue'
669
- unless t.state.allbits?(Ripper::EXPR_LABEL)
670
- corresponding_token_depth = spaces_of_nest.last
671
- end
672
- when 'if', 'unless', 'while', 'until'
673
- # postfix if/unless/while/until must be Ripper::EXPR_LABEL
674
- unless t.state.allbits?(Ripper::EXPR_LABEL)
675
- spaces_of_nest.push(spaces_at_line_head)
676
- end
677
- when 'else', 'elsif', 'ensure', 'when'
678
- corresponding_token_depth = spaces_of_nest.last
679
- when 'in'
680
- if in_keyword_case_scope?
681
- corresponding_token_depth = spaces_of_nest.last
682
- end
683
- when 'end'
684
- if is_first_printable_of_line
685
- corresponding_token_depth = spaces_of_nest.pop
398
+ elsif prev_open_token&.event == :on_heredoc_beg
399
+ tok = prev_open_token.tok
400
+ if prev_opens.size <= next_opens.size
401
+ if is_newline && lines[line_index].empty? && line_results[line_index - 1][1].last != next_open_token
402
+ # First line in heredoc
403
+ tok.match?(/^<<[-~]/) ? base_indent + indent : indent
404
+ elsif tok.match?(/^<<~/)
405
+ # Accept extra indent spaces inside `<<~` heredoc
406
+ [base_indent + indent, preserve_indent].max
686
407
  else
687
- spaces_of_nest.pop
688
- corresponding_token_depth = nil
408
+ # Accept any number of indent inside other heredoc
409
+ preserve_indent
689
410
  end
411
+ else
412
+ # Heredoc close
413
+ prev_line_indent_level = calc_indent_level(prev_opens)
414
+ tok.match?(/^<<[~-]/) ? base_indent + 2 * (prev_line_indent_level - 1) : 0
690
415
  end
416
+ else
417
+ base_indent + indent
691
418
  end
692
- is_first_spaces_of_line = false
693
- is_first_printable_of_line = false
694
419
  end
695
- corresponding_token_depth
696
- end
697
420
 
698
- def check_string_literal(tokens)
699
- i = 0
700
- start_token = []
701
- end_type = []
702
- pending_heredocs = []
703
- while i < tokens.size
704
- t = tokens[i]
705
- case t.event
706
- when *end_type.last
707
- start_token.pop
708
- end_type.pop
709
- when :on_tstring_beg
710
- start_token << t
711
- end_type << [:on_tstring_end, :on_label_end]
712
- when :on_regexp_beg
713
- start_token << t
714
- end_type << :on_regexp_end
715
- when :on_symbeg
716
- acceptable_single_tokens = %i{on_ident on_const on_op on_cvar on_ivar on_gvar on_kw on_int on_backtick}
717
- if (i + 1) < tokens.size
718
- if acceptable_single_tokens.all?{ |st| tokens[i + 1].event != st }
719
- start_token << t
720
- end_type << :on_tstring_end
721
- else
722
- i += 1
723
- end
724
- end
725
- when :on_backtick
726
- if t.state.allbits?(Ripper::EXPR_BEG)
727
- start_token << t
728
- end_type << :on_tstring_end
729
- end
730
- when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg
731
- start_token << t
732
- end_type << :on_tstring_end
733
- when :on_heredoc_beg
734
- pending_heredocs << t
735
- end
736
-
737
- if pending_heredocs.any? && t.tok.include?("\n")
738
- pending_heredocs.reverse_each do |t|
739
- start_token << t
740
- end_type << :on_heredoc_end
741
- end
742
- pending_heredocs = []
743
- end
744
- i += 1
745
- end
746
- pending_heredocs.first || start_token.last
747
- end
421
+ LTYPE_TOKENS = %i[
422
+ on_heredoc_beg on_tstring_beg
423
+ on_regexp_beg on_symbeg on_backtick
424
+ on_symbols_beg on_qsymbols_beg
425
+ on_words_beg on_qwords_beg
426
+ ]
748
427
 
749
- def process_literal_type(tokens)
750
- start_token = check_string_literal(tokens)
751
- return nil if start_token == ""
752
-
753
- case start_token&.event
754
- when :on_tstring_beg
755
- case start_token&.tok
756
- when ?" then ?"
757
- when /^%.$/ then ?"
758
- when /^%Q.$/ then ?"
759
- when ?' then ?'
760
- when /^%q.$/ then ?'
428
+ def ltype_from_open_tokens(opens)
429
+ start_token = opens.reverse_each.find do |tok|
430
+ LTYPE_TOKENS.include?(tok.event)
761
431
  end
762
- when :on_regexp_beg then ?/
763
- when :on_symbeg then ?:
764
- when :on_backtick then ?`
765
- when :on_qwords_beg then ?]
766
- when :on_words_beg then ?]
767
- when :on_qsymbols_beg then ?]
768
- when :on_symbols_beg then ?]
769
- when :on_heredoc_beg
770
- start_token&.tok =~ /<<[-~]?(['"`])\w+\1/
771
- $1 || ?"
772
- else
773
- nil
774
- end
775
- end
432
+ return nil unless start_token
776
433
 
777
- def check_termination_in_prev_line(code)
778
- tokens = self.class.ripper_lex_without_warning(code, context: @context)
779
- past_first_newline = false
780
- index = tokens.rindex do |t|
781
- # traverse first token before last line
782
- if past_first_newline
783
- if t.tok.include?("\n")
784
- true
434
+ case start_token&.event
435
+ when :on_tstring_beg
436
+ case start_token&.tok
437
+ when ?" then ?"
438
+ when /^%.$/ then ?"
439
+ when /^%Q.$/ then ?"
440
+ when ?' then ?'
441
+ when /^%q.$/ then ?'
785
442
  end
786
- elsif t.tok.include?("\n")
787
- past_first_newline = true
788
- false
443
+ when :on_regexp_beg then ?/
444
+ when :on_symbeg then ?:
445
+ when :on_backtick then ?`
446
+ when :on_qwords_beg then ?]
447
+ when :on_words_beg then ?]
448
+ when :on_qsymbols_beg then ?]
449
+ when :on_symbols_beg then ?]
450
+ when :on_heredoc_beg
451
+ start_token&.tok =~ /<<[-~]?(['"`])\w+\1/
452
+ $1 || ?"
789
453
  else
790
- false
454
+ nil
791
455
  end
792
456
  end
793
457
 
794
- if index
795
- first_token = nil
796
- last_line_tokens = tokens[(index + 1)..(tokens.size - 1)]
797
- last_line_tokens.each do |t|
798
- unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event)
799
- first_token = t
800
- break
801
- end
802
- end
803
-
804
- if first_token.nil?
805
- return false
806
- elsif first_token && first_token.state == Ripper::EXPR_DOT
807
- return false
808
- else
809
- tokens_without_last_line = tokens[0..index]
810
- ltype = process_literal_type(tokens_without_last_line)
811
- indent = process_nesting_level(tokens_without_last_line)
812
- continue = process_continue(tokens_without_last_line)
813
- code_block_open = check_code_block(tokens_without_last_line.map(&:tok).join(''), tokens_without_last_line)
814
- if ltype or indent > 0 or continue or code_block_open
815
- return false
458
+ def check_termination_in_prev_line(code)
459
+ tokens = self.class.ripper_lex_without_warning(code, context: @context)
460
+ past_first_newline = false
461
+ index = tokens.rindex do |t|
462
+ # traverse first token before last line
463
+ if past_first_newline
464
+ if t.tok.include?("\n")
465
+ true
466
+ end
467
+ elsif t.tok.include?("\n")
468
+ past_first_newline = true
469
+ false
816
470
  else
817
- return last_line_tokens.map(&:tok).join('')
471
+ false
818
472
  end
819
473
  end
820
- end
821
- false
822
- end
823
474
 
824
- private
825
-
826
- def heredoc_scope?
827
- heredoc_tokens = @tokens.select { |t| [:on_heredoc_beg, :on_heredoc_end].include?(t.event) }
828
- heredoc_tokens[-1]&.event == :on_heredoc_beg
829
- end
475
+ if index
476
+ first_token = nil
477
+ last_line_tokens = tokens[(index + 1)..(tokens.size - 1)]
478
+ last_line_tokens.each do |t|
479
+ unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event)
480
+ first_token = t
481
+ break
482
+ end
483
+ end
830
484
 
831
- def in_keyword_case_scope?
832
- kw_tokens = @tokens.select { |t| t.event == :on_kw && ['case', 'for', 'end'].include?(t.tok) }
833
- counter = 0
834
- kw_tokens.reverse.each do |t|
835
- if t.tok == 'case'
836
- return true if counter.zero?
837
- counter += 1
838
- elsif t.tok == 'for'
839
- counter += 1
840
- elsif t.tok == 'end'
841
- counter -= 1
485
+ if first_token && first_token.state != Ripper::EXPR_DOT
486
+ tokens_without_last_line = tokens[0..index]
487
+ code_without_last_line = tokens_without_last_line.map(&:tok).join
488
+ opens_without_last_line = NestingParser.open_tokens(tokens_without_last_line)
489
+ if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line)
490
+ return last_line_tokens.map(&:tok).join
491
+ end
492
+ end
842
493
  end
494
+ false
843
495
  end
844
- false
845
496
  end
497
+ # :startdoc:
846
498
  end
847
- # :startdoc:
499
+
500
+ RubyLex = IRB::RubyLex
501
+ Object.deprecate_constant(:RubyLex)