irb 1.6.4 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/irb/ruby-lex.rb CHANGED
@@ -6,6 +6,7 @@
6
6
 
7
7
  require "ripper"
8
8
  require "jruby" if RUBY_ENGINE == "jruby"
9
+ require_relative "nesting_parser"
9
10
 
10
11
  # :stopdoc:
11
12
  class RubyLex
@@ -18,10 +19,7 @@ class RubyLex
18
19
 
19
20
  def initialize(context)
20
21
  @context = context
21
- @exp_line_no = @line_no = 1
22
- @indent = 0
23
- @continue = false
24
- @line = ""
22
+ @line_no = 1
25
23
  @prompt = nil
26
24
  end
27
25
 
@@ -42,14 +40,22 @@ class RubyLex
42
40
  result
43
41
  end
44
42
 
43
+ def single_line_command?(code)
44
+ command = code.split(/\s/, 2).first
45
+ @context.symbol_alias?(command) || @context.transform_args?(command)
46
+ end
47
+
45
48
  # io functions
46
- def set_input(io, &block)
49
+ def set_input(&block)
50
+ @input = block
51
+ end
52
+
53
+ def configure_io(io)
47
54
  @io = io
48
55
  if @io.respond_to?(:check_termination)
49
56
  @io.check_termination do |code|
50
57
  if Reline::IOGate.in_pasting?
51
- lex = RubyLex.new(@context)
52
- rest = lex.check_termination_in_prev_line(code)
58
+ rest = check_termination_in_prev_line(code)
53
59
  if rest
54
60
  Reline.delete_text
55
61
  rest.bytes.reverse_each do |c|
@@ -61,61 +67,39 @@ class RubyLex
61
67
  end
62
68
  else
63
69
  # Accept any single-line input for symbol aliases or commands that transform args
64
- command = code.split(/\s/, 2).first
65
- if @context.symbol_alias?(command) || @context.transform_args?(command)
66
- next true
67
- end
70
+ next true if single_line_command?(code)
68
71
 
69
- code.gsub!(/\s*\z/, '').concat("\n")
70
- tokens = self.class.ripper_lex_without_warning(code, context: @context)
71
- ltype, indent, continue, code_block_open = check_state(code, tokens)
72
- if ltype or indent > 0 or continue or code_block_open
73
- false
74
- else
75
- true
76
- end
72
+ _tokens, _opens, terminated = check_code_state(code)
73
+ terminated
77
74
  end
78
75
  end
79
76
  end
80
77
  if @io.respond_to?(:dynamic_prompt)
81
78
  @io.dynamic_prompt do |lines|
82
79
  lines << '' if lines.empty?
83
- result = []
84
80
  tokens = self.class.ripper_lex_without_warning(lines.map{ |l| l + "\n" }.join, context: @context)
85
- code = String.new
86
- partial_tokens = []
87
- unprocessed_tokens = []
88
- line_num_offset = 0
89
- tokens.each do |t|
90
- partial_tokens << t
91
- unprocessed_tokens << t
92
- if t.tok.include?("\n")
93
- t_str = t.tok
94
- t_str.each_line("\n") do |s|
95
- code << s
96
- next unless s.include?("\n")
97
- ltype, indent, continue, code_block_open = check_state(code, partial_tokens)
98
- result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset)
99
- line_num_offset += 1
100
- end
101
- unprocessed_tokens = []
102
- else
103
- code << t.tok
81
+ line_results = IRB::NestingParser.parse_by_line(tokens)
82
+ tokens_until_line = []
83
+ line_results.map.with_index do |(line_tokens, _prev_opens, next_opens, _min_depth), line_num_offset|
84
+ line_tokens.each do |token, _s|
85
+ # Avoid appending duplicated token. Tokens that include "\n" like multiline tstring_content can exist in multiple lines.
86
+ tokens_until_line << token if token != tokens_until_line.last
104
87
  end
88
+ continue = should_continue?(tokens_until_line)
89
+ prompt(next_opens, continue, line_num_offset)
105
90
  end
106
-
107
- unless unprocessed_tokens.empty?
108
- ltype, indent, continue, code_block_open = check_state(code, unprocessed_tokens)
109
- result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset)
110
- end
111
- result
112
91
  end
113
92
  end
114
93
 
115
- if block_given?
116
- @input = block
117
- else
118
- @input = Proc.new{@io.gets}
94
+ if @io.respond_to?(:auto_indent) and @context.auto_indent_mode
95
+ @io.auto_indent do |lines, line_index, byte_pointer, is_newline|
96
+ next nil if lines == [nil] # Workaround for exit IRB with CTRL+d
97
+ next nil if !is_newline && lines[line_index]&.byteslice(0, byte_pointer)&.match?(/\A\s*\z/)
98
+
99
+ code = lines[0..line_index].map { |l| "#{l}\n" }.join
100
+ tokens = self.class.ripper_lex_without_warning(code, context: @context)
101
+ process_indent_level(tokens, lines, line_index, is_newline)
102
+ end
119
103
  end
120
104
  end
121
105
 
@@ -136,9 +120,42 @@ class RubyLex
136
120
  "#{local_variables.join('=')}=nil;" unless local_variables.empty?
137
121
  end
138
122
 
123
+ # Some part of the code is not included in Ripper's token.
124
+ # Example: DATA part, token after heredoc_beg when heredoc has unclosed embexpr.
125
+ # With interpolated tokens, tokens.map(&:tok).join will be equal to code.
126
+ def self.interpolate_ripper_ignored_tokens(code, tokens)
127
+ line_positions = [0]
128
+ code.lines.each do |line|
129
+ line_positions << line_positions.last + line.bytesize
130
+ end
131
+ prev_byte_pos = 0
132
+ interpolated = []
133
+ prev_line = 1
134
+ tokens.each do |t|
135
+ line, col = t.pos
136
+ byte_pos = line_positions[line - 1] + col
137
+ if prev_byte_pos < byte_pos
138
+ tok = code.byteslice(prev_byte_pos...byte_pos)
139
+ pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
140
+ interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
141
+ prev_line += tok.count("\n")
142
+ end
143
+ interpolated << t
144
+ prev_byte_pos = byte_pos + t.tok.bytesize
145
+ prev_line += t.tok.count("\n")
146
+ end
147
+ if prev_byte_pos < code.bytesize
148
+ tok = code.byteslice(prev_byte_pos..)
149
+ pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
150
+ interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
151
+ end
152
+ interpolated
153
+ end
154
+
139
155
  def self.ripper_lex_without_warning(code, context: nil)
140
156
  verbose, $VERBOSE = $VERBOSE, nil
141
157
  lvars_code = generate_local_variables_assign_code(context&.local_variables || [])
158
+ original_code = code
142
159
  if lvars_code
143
160
  code = "#{lvars_code}\n#{code}"
144
161
  line_no = 0
@@ -148,7 +165,8 @@ class RubyLex
148
165
 
149
166
  compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no|
150
167
  lexer = Ripper::Lexer.new(inner_code, '-', line_no)
151
- lexer.scan.each_with_object([]) do |t, tokens|
168
+ tokens = []
169
+ lexer.scan.each do |t|
152
170
  next if t.pos.first == 0
153
171
  prev_tk = tokens.last
154
172
  position_overlapped = prev_tk && t.pos[0] == prev_tk.pos[0] && t.pos[1] < prev_tk.pos[1] + prev_tk.tok.bytesize
@@ -158,151 +176,112 @@ class RubyLex
158
176
  tokens << t
159
177
  end
160
178
  end
179
+ interpolate_ripper_ignored_tokens(original_code, tokens)
161
180
  end
162
181
  ensure
163
182
  $VERBOSE = verbose
164
183
  end
165
184
 
166
- def find_prev_spaces(line_index)
167
- return 0 if @tokens.size == 0
168
- md = @tokens[0].tok.match(/(\A +)/)
169
- prev_spaces = md.nil? ? 0 : md[1].count(' ')
170
- line_count = 0
171
- @tokens.each_with_index do |t, i|
172
- if t.tok.include?("\n")
173
- line_count += t.tok.count("\n")
174
- if line_count >= line_index
175
- return prev_spaces
176
- end
177
- next if t.event == :on_tstring_content || t.event == :on_words_sep
178
- if (@tokens.size - 1) > i
179
- md = @tokens[i + 1].tok.match(/(\A +)/)
180
- prev_spaces = md.nil? ? 0 : md[1].count(' ')
181
- end
182
- end
183
- end
184
- prev_spaces
185
+ def prompt(opens, continue, line_num_offset)
186
+ ltype = ltype_from_open_tokens(opens)
187
+ indent_level = calc_indent_level(opens)
188
+ @prompt&.call(ltype, indent_level, opens.any? || continue, @line_no + line_num_offset)
185
189
  end
186
190
 
187
- def set_auto_indent
188
- if @io.respond_to?(:auto_indent) and @context.auto_indent_mode
189
- @io.auto_indent do |lines, line_index, byte_pointer, is_newline|
190
- if is_newline
191
- @tokens = self.class.ripper_lex_without_warning(lines[0..line_index].join("\n"), context: @context)
192
- prev_spaces = find_prev_spaces(line_index)
193
- depth_difference = check_newline_depth_difference
194
- depth_difference = 0 if depth_difference < 0
195
- prev_spaces + depth_difference * 2
196
- else
197
- code = line_index.zero? ? '' : lines[0..(line_index - 1)].map{ |l| l + "\n" }.join
198
- last_line = lines[line_index]&.byteslice(0, byte_pointer)
199
- code += last_line if last_line
200
- @tokens = self.class.ripper_lex_without_warning(code, context: @context)
201
- check_corresponding_token_depth(lines, line_index)
202
- end
203
- end
204
- end
191
+ def check_code_state(code)
192
+ check_target_code = code.gsub(/\s*\z/, '').concat("\n")
193
+ tokens = self.class.ripper_lex_without_warning(check_target_code, context: @context)
194
+ opens = IRB::NestingParser.open_tokens(tokens)
195
+ [tokens, opens, code_terminated?(code, tokens, opens)]
205
196
  end
206
197
 
207
- def check_state(code, tokens)
208
- ltype = process_literal_type(tokens)
209
- indent = process_nesting_level(tokens)
210
- continue = process_continue(tokens)
211
- lvars_code = self.class.generate_local_variables_assign_code(@context.local_variables)
212
- code = "#{lvars_code}\n#{code}" if lvars_code
213
- code_block_open = check_code_block(code, tokens)
214
- [ltype, indent, continue, code_block_open]
215
- end
216
-
217
- def prompt
218
- if @prompt
219
- @prompt.call(@ltype, @indent, @continue, @line_no)
198
+ def code_terminated?(code, tokens, opens)
199
+ case check_code_syntax(code)
200
+ when :unrecoverable_error
201
+ true
202
+ when :recoverable_error
203
+ false
204
+ when :other_error
205
+ opens.empty? && !should_continue?(tokens)
206
+ when :valid
207
+ !should_continue?(tokens)
220
208
  end
221
209
  end
222
210
 
223
- def initialize_input
224
- @ltype = nil
225
- @indent = 0
226
- @continue = false
227
- @line = ""
228
- @exp_line_no = @line_no
229
- @code_block_open = false
211
+ def save_prompt_to_context_io(opens, continue, line_num_offset)
212
+ # Implicitly saves prompt string to `@context.io.prompt`. This will be used in the next `@input.call`.
213
+ prompt(opens, continue, line_num_offset)
230
214
  end
231
215
 
232
- def each_top_level_statement
233
- initialize_input
234
- catch(:TERM_INPUT) do
235
- loop do
236
- begin
237
- prompt
238
- unless l = lex
239
- throw :TERM_INPUT if @line == ''
240
- else
241
- @line_no += l.count("\n")
242
- if l == "\n"
243
- @exp_line_no += 1
244
- next
245
- end
246
- @line.concat l
247
- if @code_block_open or @ltype or @continue or @indent > 0
248
- next
249
- end
250
- end
251
- if @line != "\n"
252
- @line.force_encoding(@io.encoding)
253
- yield @line, @exp_line_no
254
- end
255
- raise TerminateLineInput if @io.eof?
256
- @line = ''
257
- @exp_line_no = @line_no
258
-
259
- @indent = 0
260
- rescue TerminateLineInput
261
- initialize_input
262
- prompt
263
- end
216
+ def readmultiline
217
+ save_prompt_to_context_io([], false, 0)
218
+
219
+ # multiline
220
+ return @input.call if @io.respond_to?(:check_termination)
221
+
222
+ # nomultiline
223
+ code = ''
224
+ line_offset = 0
225
+ loop do
226
+ line = @input.call
227
+ unless line
228
+ return code.empty? ? nil : code
264
229
  end
230
+
231
+ code << line
232
+ # Accept any single-line input for symbol aliases or commands that transform args
233
+ return code if single_line_command?(code)
234
+
235
+ tokens, opens, terminated = check_code_state(code)
236
+ return code if terminated
237
+
238
+ line_offset += 1
239
+ continue = should_continue?(tokens)
240
+ save_prompt_to_context_io(opens, continue, line_offset)
265
241
  end
266
242
  end
267
243
 
268
- def lex
269
- line = @input.call
270
- if @io.respond_to?(:check_termination)
271
- return line # multiline
244
+ def each_top_level_statement
245
+ loop do
246
+ code = readmultiline
247
+ break unless code
248
+
249
+ if code != "\n"
250
+ code.force_encoding(@io.encoding)
251
+ yield code, @line_no
252
+ end
253
+ @line_no += code.count("\n")
254
+ rescue TerminateLineInput
272
255
  end
273
- code = @line + (line.nil? ? '' : line)
274
- code.gsub!(/\s*\z/, '').concat("\n")
275
- @tokens = self.class.ripper_lex_without_warning(code, context: @context)
276
- @ltype, @indent, @continue, @code_block_open = check_state(code, @tokens)
277
- line
278
256
  end
279
257
 
280
- def process_continue(tokens)
281
- # last token is always newline
282
- if tokens.size >= 2 and tokens[-2].event == :on_regexp_end
283
- # end of regexp literal
284
- return false
285
- elsif tokens.size >= 2 and tokens[-2].event == :on_semicolon
286
- return false
287
- elsif tokens.size >= 2 and tokens[-2].event == :on_kw and ['begin', 'else', 'ensure'].include?(tokens[-2].tok)
288
- return false
289
- elsif !tokens.empty? and tokens.last.tok == "\\\n"
290
- return true
291
- elsif tokens.size >= 1 and tokens[-1].event == :on_heredoc_end # "EOH\n"
292
- return false
293
- elsif tokens.size >= 2 and tokens[-2].state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_FNAME) and tokens[-2].tok !~ /\A\.\.\.?\z/
294
- # end of literal except for regexp
295
- # endless range at end of line is not a continue
296
- return true
258
+ def should_continue?(tokens)
259
+ # Look at the last token and check if IRB need to continue reading next line.
260
+ # Example code that should continue: `a\` `a +` `a.`
261
+ # Trailing spaces, newline, comments are skipped
262
+ return true if tokens.last&.event == :on_sp && tokens.last.tok == "\\\n"
263
+
264
+ tokens.reverse_each do |token|
265
+ case token.event
266
+ when :on_sp, :on_nl, :on_ignored_nl, :on_comment, :on_embdoc_beg, :on_embdoc, :on_embdoc_end
267
+ # Skip
268
+ when :on_regexp_end, :on_heredoc_end, :on_semicolon
269
+ # State is EXPR_BEG but should not continue
270
+ return false
271
+ else
272
+ # Endless range should not continue
273
+ return false if token.event == :on_op && token.tok.match?(/\A\.\.\.?\z/)
274
+
275
+ # EXPR_DOT and most of the EXPR_BEG should continue
276
+ return token.state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_DOT)
277
+ end
297
278
  end
298
279
  false
299
280
  end
300
281
 
301
- def check_code_block(code, tokens)
302
- return true if tokens.empty?
303
- if tokens.last.event == :on_heredoc_beg
304
- return true
305
- end
282
+ def check_code_syntax(code)
283
+ lvars_code = RubyLex.generate_local_variables_assign_code(@context.local_variables)
284
+ code = "#{lvars_code}\n#{code}"
306
285
 
307
286
  begin # check if parser error are available
308
287
  verbose, $VERBOSE = $VERBOSE, nil
@@ -321,6 +300,7 @@ class RubyLex
321
300
  end
322
301
  rescue EncodingError
323
302
  # This is for a hash with invalid encoding symbol, {"\xAE": 1}
303
+ :unrecoverable_error
324
304
  rescue SyntaxError => e
325
305
  case e.message
326
306
  when /unterminated (?:string|regexp) meets end of file/
@@ -333,7 +313,7 @@ class RubyLex
333
313
  #
334
314
  # example:
335
315
  # '
336
- return true
316
+ return :recoverable_error
337
317
  when /syntax error, unexpected end-of-input/
338
318
  # "syntax error, unexpected end-of-input, expecting keyword_end"
339
319
  #
@@ -343,7 +323,7 @@ class RubyLex
343
323
  # if false
344
324
  # fuga
345
325
  # end
346
- return true
326
+ return :recoverable_error
347
327
  when /syntax error, unexpected keyword_end/
348
328
  # "syntax error, unexpected keyword_end"
349
329
  #
@@ -353,402 +333,160 @@ class RubyLex
353
333
  #
354
334
  # example:
355
335
  # end
356
- return false
336
+ return :unrecoverable_error
357
337
  when /syntax error, unexpected '\.'/
358
338
  # "syntax error, unexpected '.'"
359
339
  #
360
340
  # example:
361
341
  # .
362
- return false
342
+ return :unrecoverable_error
363
343
  when /unexpected tREGEXP_BEG/
364
344
  # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
365
345
  #
366
346
  # example:
367
347
  # method / f /
368
- return false
348
+ return :unrecoverable_error
349
+ else
350
+ return :other_error
369
351
  end
370
352
  ensure
371
353
  $VERBOSE = verbose
372
354
  end
373
-
374
- last_lex_state = tokens.last.state
375
-
376
- if last_lex_state.allbits?(Ripper::EXPR_BEG)
377
- return false
378
- elsif last_lex_state.allbits?(Ripper::EXPR_DOT)
379
- return true
380
- elsif last_lex_state.allbits?(Ripper::EXPR_CLASS)
381
- return true
382
- elsif last_lex_state.allbits?(Ripper::EXPR_FNAME)
383
- return true
384
- elsif last_lex_state.allbits?(Ripper::EXPR_VALUE)
385
- return true
386
- elsif last_lex_state.allbits?(Ripper::EXPR_ARG)
387
- return false
388
- end
389
-
390
- false
355
+ :valid
391
356
  end
392
357
 
393
- def process_nesting_level(tokens)
394
- indent = 0
395
- in_oneliner_def = nil
396
- tokens.each_with_index { |t, index|
397
- # detecting one-liner method definition
398
- if in_oneliner_def.nil?
399
- if t.state.allbits?(Ripper::EXPR_ENDFN)
400
- in_oneliner_def = :ENDFN
401
- end
402
- else
403
- if t.state.allbits?(Ripper::EXPR_ENDFN)
404
- # continuing
405
- elsif t.state.allbits?(Ripper::EXPR_BEG)
406
- if t.tok == '='
407
- in_oneliner_def = :BODY
408
- end
409
- else
410
- if in_oneliner_def == :BODY
411
- # one-liner method definition
412
- indent -= 1
413
- end
414
- in_oneliner_def = nil
415
- end
416
- end
417
-
358
+ def calc_indent_level(opens)
359
+ indent_level = 0
360
+ opens.each_with_index do |t, index|
418
361
  case t.event
419
- when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
420
- indent += 1
421
- when :on_rbracket, :on_rbrace, :on_rparen
422
- indent -= 1
423
- when :on_kw
424
- next if index > 0 and tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
425
- case t.tok
426
- when 'do'
427
- syntax_of_do = take_corresponding_syntax_to_kw_do(tokens, index)
428
- indent += 1 if syntax_of_do == :method_calling
429
- when 'def', 'case', 'for', 'begin', 'class', 'module'
430
- indent += 1
431
- when 'if', 'unless', 'while', 'until'
432
- # postfix if/unless/while/until must be Ripper::EXPR_LABEL
433
- indent += 1 unless t.state.allbits?(Ripper::EXPR_LABEL)
434
- when 'end'
435
- indent -= 1
362
+ when :on_heredoc_beg
363
+ if opens[index + 1]&.event != :on_heredoc_beg
364
+ if t.tok.match?(/^<<[~-]/)
365
+ indent_level += 1
366
+ else
367
+ indent_level = 0
368
+ end
436
369
  end
370
+ when :on_tstring_beg, :on_regexp_beg, :on_symbeg, :on_backtick
371
+ # can be indented if t.tok starts with `%`
372
+ when :on_words_beg, :on_qwords_beg, :on_symbols_beg, :on_qsymbols_beg, :on_embexpr_beg
373
+ # can be indented but not indented in current implementation
374
+ when :on_embdoc_beg
375
+ indent_level = 0
376
+ else
377
+ indent_level += 1
437
378
  end
438
- # percent literals are not indented
439
- }
440
- indent
379
+ end
380
+ indent_level
441
381
  end
442
382
 
443
- def is_method_calling?(tokens, index)
444
- tk = tokens[index]
445
- if tk.state.anybits?(Ripper::EXPR_CMDARG) and tk.event == :on_ident
446
- # The target method call to pass the block with "do".
447
- return true
448
- elsif tk.state.anybits?(Ripper::EXPR_ARG) and tk.event == :on_ident
449
- non_sp_index = tokens[0..(index - 1)].rindex{ |t| t.event != :on_sp }
450
- if non_sp_index
451
- prev_tk = tokens[non_sp_index]
452
- if prev_tk.state.anybits?(Ripper::EXPR_DOT) and prev_tk.event == :on_period
453
- # The target method call with receiver to pass the block with "do".
454
- return true
455
- end
456
- end
457
- end
458
- false
383
+ FREE_INDENT_TOKENS = %i[on_tstring_beg on_backtick on_regexp_beg on_symbeg]
384
+
385
+ def free_indent_token?(token)
386
+ FREE_INDENT_TOKENS.include?(token&.event)
459
387
  end
460
388
 
461
- def take_corresponding_syntax_to_kw_do(tokens, index)
462
- syntax_of_do = nil
463
- # Finding a syntax corresponding to "do".
464
- index.downto(0) do |i|
465
- tk = tokens[i]
466
- # In "continue", the token isn't the corresponding syntax to "do".
467
- non_sp_index = tokens[0..(i - 1)].rindex{ |t| t.event != :on_sp }
468
- first_in_fomula = false
469
- if non_sp_index.nil?
470
- first_in_fomula = true
471
- elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index].event)
472
- first_in_fomula = true
473
- end
474
- if is_method_calling?(tokens, i)
475
- syntax_of_do = :method_calling
476
- break if first_in_fomula
477
- elsif tk.event == :on_kw && %w{while until for}.include?(tk.tok)
478
- # A loop syntax in front of "do" found.
479
- #
480
- # while cond do # also "until" or "for"
481
- # end
482
- #
483
- # This "do" doesn't increment indent because the loop syntax already
484
- # incremented.
485
- syntax_of_do = :loop_syntax
486
- break if first_in_fomula
389
+ # Calculates the difference of pasted code's indent and indent calculated from tokens
390
+ def indent_difference(lines, line_results, line_index)
391
+ loop do
392
+ _tokens, prev_opens, _next_opens, min_depth = line_results[line_index]
393
+ open_token = prev_opens.last
394
+ if !open_token || (open_token.event != :on_heredoc_beg && !free_indent_token?(open_token))
395
+ # If the leading whitespace is an indent, return the difference
396
+ indent_level = calc_indent_level(prev_opens.take(min_depth))
397
+ calculated_indent = 2 * indent_level
398
+ actual_indent = lines[line_index][/^ */].size
399
+ return actual_indent - calculated_indent
400
+ elsif open_token.event == :on_heredoc_beg && open_token.tok.match?(/^<<[^-~]/)
401
+ return 0
487
402
  end
403
+ # If the leading whitespace is not an indent but part of a multiline token
404
+ # Calculate base_indent of the multiline token's beginning line
405
+ line_index = open_token.pos[0] - 1
488
406
  end
489
- syntax_of_do
490
407
  end
491
408
 
492
- def is_the_in_correspond_to_a_for(tokens, index)
493
- syntax_of_in = nil
494
- # Finding a syntax corresponding to "do".
495
- index.downto(0) do |i|
496
- tk = tokens[i]
497
- # In "continue", the token isn't the corresponding syntax to "do".
498
- non_sp_index = tokens[0..(i - 1)].rindex{ |t| t.event != :on_sp }
499
- first_in_fomula = false
500
- if non_sp_index.nil?
501
- first_in_fomula = true
502
- elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index].event)
503
- first_in_fomula = true
504
- end
505
- if tk.event == :on_kw && tk.tok == 'for'
506
- # A loop syntax in front of "do" found.
507
- #
508
- # while cond do # also "until" or "for"
509
- # end
510
- #
511
- # This "do" doesn't increment indent because the loop syntax already
512
- # incremented.
513
- syntax_of_in = :for
514
- end
515
- break if first_in_fomula
409
+ def process_indent_level(tokens, lines, line_index, is_newline)
410
+ line_results = IRB::NestingParser.parse_by_line(tokens)
411
+ result = line_results[line_index]
412
+ if result
413
+ _tokens, prev_opens, next_opens, min_depth = result
414
+ else
415
+ # When last line is empty
416
+ prev_opens = next_opens = line_results.last[2]
417
+ min_depth = next_opens.size
516
418
  end
517
- syntax_of_in
518
- end
519
419
 
520
- def check_newline_depth_difference
521
- depth_difference = 0
522
- open_brace_on_line = 0
523
- in_oneliner_def = nil
524
- @tokens.each_with_index do |t, index|
525
- # detecting one-liner method definition
526
- if in_oneliner_def.nil?
527
- if t.state.allbits?(Ripper::EXPR_ENDFN)
528
- in_oneliner_def = :ENDFN
529
- end
530
- else
531
- if t.state.allbits?(Ripper::EXPR_ENDFN)
532
- # continuing
533
- elsif t.state.allbits?(Ripper::EXPR_BEG)
534
- if t.tok == '='
535
- in_oneliner_def = :BODY
536
- end
537
- else
538
- if in_oneliner_def == :BODY
539
- # one-liner method definition
540
- depth_difference -= 1
541
- end
542
- in_oneliner_def = nil
543
- end
544
- end
420
+ # To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation.
421
+ # Shortest open tokens can be calculated by `opens.take(min_depth)`
422
+ indent = 2 * calc_indent_level(prev_opens.take(min_depth))
545
423
 
546
- case t.event
547
- when :on_ignored_nl, :on_nl, :on_comment
548
- if index != (@tokens.size - 1) and in_oneliner_def != :BODY
549
- depth_difference = 0
550
- open_brace_on_line = 0
551
- end
552
- next
553
- when :on_sp
554
- next
555
- end
424
+ preserve_indent = lines[line_index - (is_newline ? 1 : 0)][/^ */].size
556
425
 
557
- case t.event
558
- when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
559
- depth_difference += 1
560
- open_brace_on_line += 1
561
- when :on_rbracket, :on_rbrace, :on_rparen
562
- depth_difference -= 1 if open_brace_on_line > 0
563
- when :on_kw
564
- next if index > 0 and @tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
565
- case t.tok
566
- when 'do'
567
- syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index)
568
- depth_difference += 1 if syntax_of_do == :method_calling
569
- when 'def', 'case', 'for', 'begin', 'class', 'module'
570
- depth_difference += 1
571
- when 'if', 'unless', 'while', 'until', 'rescue'
572
- # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
573
- unless t.state.allbits?(Ripper::EXPR_LABEL)
574
- depth_difference += 1
575
- end
576
- when 'else', 'elsif', 'ensure', 'when'
577
- depth_difference += 1
578
- when 'in'
579
- unless is_the_in_correspond_to_a_for(@tokens, index)
580
- depth_difference += 1
581
- end
582
- when 'end'
583
- depth_difference -= 1
584
- end
585
- end
586
- end
587
- depth_difference
588
- end
426
+ prev_open_token = prev_opens.last
427
+ next_open_token = next_opens.last
589
428
 
590
- def check_corresponding_token_depth(lines, line_index)
591
- corresponding_token_depth = nil
592
- is_first_spaces_of_line = true
593
- is_first_printable_of_line = true
594
- spaces_of_nest = []
595
- spaces_at_line_head = 0
596
- open_brace_on_line = 0
597
- in_oneliner_def = nil
598
-
599
- if heredoc_scope?
600
- return lines[line_index][/^ */].length
429
+ # Calculates base indent for pasted code on the line where prev_open_token is located
430
+ # irb(main):001:1* if a # base_indent is 2, indent calculated from tokens is 0
431
+ # irb(main):002:1* if b # base_indent is 6, indent calculated from tokens is 2
432
+ # irb(main):003:0> c # base_indent is 6, indent calculated from tokens is 4
433
+ if prev_open_token
434
+ base_indent = [0, indent_difference(lines, line_results, prev_open_token.pos[0] - 1)].max
435
+ else
436
+ base_indent = 0
601
437
  end
602
438
 
603
- @tokens.each_with_index do |t, index|
604
- # detecting one-liner method definition
605
- if in_oneliner_def.nil?
606
- if t.state.allbits?(Ripper::EXPR_ENDFN)
607
- in_oneliner_def = :ENDFN
608
- end
439
+ if free_indent_token?(prev_open_token)
440
+ if is_newline && prev_open_token.pos[0] == line_index
441
+ # First newline inside free-indent token
442
+ base_indent + indent
609
443
  else
610
- if t.state.allbits?(Ripper::EXPR_ENDFN)
611
- # continuing
612
- elsif t.state.allbits?(Ripper::EXPR_BEG)
613
- if t.tok == '='
614
- in_oneliner_def = :BODY
615
- end
616
- else
617
- if in_oneliner_def == :BODY
618
- # one-liner method definition
619
- if is_first_printable_of_line
620
- corresponding_token_depth = spaces_of_nest.pop
621
- else
622
- spaces_of_nest.pop
623
- corresponding_token_depth = nil
624
- end
625
- end
626
- in_oneliner_def = nil
627
- end
444
+ # Accept any number of indent inside free-indent token
445
+ preserve_indent
628
446
  end
629
-
630
- case t.event
631
- when :on_ignored_nl, :on_nl, :on_comment, :on_heredoc_end, :on_embdoc_end
632
- if in_oneliner_def != :BODY
633
- corresponding_token_depth = nil
634
- spaces_at_line_head = 0
635
- is_first_spaces_of_line = true
636
- is_first_printable_of_line = true
637
- open_brace_on_line = 0
638
- end
639
- next
640
- when :on_sp
641
- spaces_at_line_head = t.tok.count(' ') if is_first_spaces_of_line
642
- is_first_spaces_of_line = false
643
- next
447
+ elsif prev_open_token&.event == :on_embdoc_beg || next_open_token&.event == :on_embdoc_beg
448
+ if prev_open_token&.event == next_open_token&.event
449
+ # Accept any number of indent inside embdoc content
450
+ preserve_indent
451
+ else
452
+ # =begin or =end
453
+ 0
644
454
  end
645
-
646
- case t.event
647
- when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
648
- spaces_of_nest.push(spaces_at_line_head + open_brace_on_line * 2)
649
- open_brace_on_line += 1
650
- when :on_rbracket, :on_rbrace, :on_rparen
651
- if is_first_printable_of_line
652
- corresponding_token_depth = spaces_of_nest.pop
455
+ elsif prev_open_token&.event == :on_heredoc_beg
456
+ tok = prev_open_token.tok
457
+ if prev_opens.size <= next_opens.size
458
+ if is_newline && lines[line_index].empty? && line_results[line_index - 1][1].last != next_open_token
459
+ # First line in heredoc
460
+ tok.match?(/^<<[-~]/) ? base_indent + indent : indent
461
+ elsif tok.match?(/^<<~/)
462
+ # Accept extra indent spaces inside `<<~` heredoc
463
+ [base_indent + indent, preserve_indent].max
653
464
  else
654
- spaces_of_nest.pop
655
- corresponding_token_depth = nil
656
- end
657
- open_brace_on_line -= 1
658
- when :on_kw
659
- next if index > 0 and @tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
660
- case t.tok
661
- when 'do'
662
- syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index)
663
- if syntax_of_do == :method_calling
664
- spaces_of_nest.push(spaces_at_line_head)
665
- end
666
- when 'def', 'case', 'for', 'begin', 'class', 'module'
667
- spaces_of_nest.push(spaces_at_line_head)
668
- when 'rescue'
669
- unless t.state.allbits?(Ripper::EXPR_LABEL)
670
- corresponding_token_depth = spaces_of_nest.last
671
- end
672
- when 'if', 'unless', 'while', 'until'
673
- # postfix if/unless/while/until must be Ripper::EXPR_LABEL
674
- unless t.state.allbits?(Ripper::EXPR_LABEL)
675
- spaces_of_nest.push(spaces_at_line_head)
676
- end
677
- when 'else', 'elsif', 'ensure', 'when'
678
- corresponding_token_depth = spaces_of_nest.last
679
- when 'in'
680
- if in_keyword_case_scope?
681
- corresponding_token_depth = spaces_of_nest.last
682
- end
683
- when 'end'
684
- if is_first_printable_of_line
685
- corresponding_token_depth = spaces_of_nest.pop
686
- else
687
- spaces_of_nest.pop
688
- corresponding_token_depth = nil
689
- end
465
+ # Accept any number of indent inside other heredoc
466
+ preserve_indent
690
467
  end
468
+ else
469
+ # Heredoc close
470
+ prev_line_indent_level = calc_indent_level(prev_opens)
471
+ tok.match?(/^<<[~-]/) ? base_indent + 2 * (prev_line_indent_level - 1) : 0
691
472
  end
692
- is_first_spaces_of_line = false
693
- is_first_printable_of_line = false
473
+ else
474
+ base_indent + indent
694
475
  end
695
- corresponding_token_depth
696
476
  end
697
477
 
698
- def check_string_literal(tokens)
699
- i = 0
700
- start_token = []
701
- end_type = []
702
- pending_heredocs = []
703
- while i < tokens.size
704
- t = tokens[i]
705
- case t.event
706
- when *end_type.last
707
- start_token.pop
708
- end_type.pop
709
- when :on_tstring_beg
710
- start_token << t
711
- end_type << [:on_tstring_end, :on_label_end]
712
- when :on_regexp_beg
713
- start_token << t
714
- end_type << :on_regexp_end
715
- when :on_symbeg
716
- acceptable_single_tokens = %i{on_ident on_const on_op on_cvar on_ivar on_gvar on_kw on_int on_backtick}
717
- if (i + 1) < tokens.size
718
- if acceptable_single_tokens.all?{ |st| tokens[i + 1].event != st }
719
- start_token << t
720
- end_type << :on_tstring_end
721
- else
722
- i += 1
723
- end
724
- end
725
- when :on_backtick
726
- if t.state.allbits?(Ripper::EXPR_BEG)
727
- start_token << t
728
- end_type << :on_tstring_end
729
- end
730
- when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg
731
- start_token << t
732
- end_type << :on_tstring_end
733
- when :on_heredoc_beg
734
- pending_heredocs << t
735
- end
478
+ LTYPE_TOKENS = %i[
479
+ on_heredoc_beg on_tstring_beg
480
+ on_regexp_beg on_symbeg on_backtick
481
+ on_symbols_beg on_qsymbols_beg
482
+ on_words_beg on_qwords_beg
483
+ ]
736
484
 
737
- if pending_heredocs.any? && t.tok.include?("\n")
738
- pending_heredocs.reverse_each do |t|
739
- start_token << t
740
- end_type << :on_heredoc_end
741
- end
742
- pending_heredocs = []
743
- end
744
- i += 1
485
+ def ltype_from_open_tokens(opens)
486
+ start_token = opens.reverse_each.find do |tok|
487
+ LTYPE_TOKENS.include?(tok.event)
745
488
  end
746
- pending_heredocs.first || start_token.last
747
- end
748
-
749
- def process_literal_type(tokens)
750
- start_token = check_string_literal(tokens)
751
- return nil if start_token == ""
489
+ return nil unless start_token
752
490
 
753
491
  case start_token&.event
754
492
  when :on_tstring_beg
@@ -801,47 +539,16 @@ class RubyLex
801
539
  end
802
540
  end
803
541
 
804
- if first_token.nil?
805
- return false
806
- elsif first_token && first_token.state == Ripper::EXPR_DOT
807
- return false
808
- else
542
+ if first_token && first_token.state != Ripper::EXPR_DOT
809
543
  tokens_without_last_line = tokens[0..index]
810
- ltype = process_literal_type(tokens_without_last_line)
811
- indent = process_nesting_level(tokens_without_last_line)
812
- continue = process_continue(tokens_without_last_line)
813
- code_block_open = check_code_block(tokens_without_last_line.map(&:tok).join(''), tokens_without_last_line)
814
- if ltype or indent > 0 or continue or code_block_open
815
- return false
816
- else
817
- return last_line_tokens.map(&:tok).join('')
544
+ code_without_last_line = tokens_without_last_line.map(&:tok).join
545
+ opens_without_last_line = IRB::NestingParser.open_tokens(tokens_without_last_line)
546
+ if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line)
547
+ return last_line_tokens.map(&:tok).join
818
548
  end
819
549
  end
820
550
  end
821
551
  false
822
552
  end
823
-
824
- private
825
-
826
- def heredoc_scope?
827
- heredoc_tokens = @tokens.select { |t| [:on_heredoc_beg, :on_heredoc_end].include?(t.event) }
828
- heredoc_tokens[-1]&.event == :on_heredoc_beg
829
- end
830
-
831
- def in_keyword_case_scope?
832
- kw_tokens = @tokens.select { |t| t.event == :on_kw && ['case', 'for', 'end'].include?(t.tok) }
833
- counter = 0
834
- kw_tokens.reverse.each do |t|
835
- if t.tok == 'case'
836
- return true if counter.zero?
837
- counter += 1
838
- elsif t.tok == 'for'
839
- counter += 1
840
- elsif t.tok == 'end'
841
- counter -= 1
842
- end
843
- end
844
- false
845
- end
846
553
  end
847
554
  # :startdoc: