irb 1.0.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.document +4 -0
  3. data/Gemfile +10 -2
  4. data/LICENSE.txt +3 -3
  5. data/README.md +3 -3
  6. data/Rakefile +17 -1
  7. data/doc/irb/irb-tools.rd.ja +184 -0
  8. data/doc/irb/irb.rd.ja +427 -0
  9. data/irb.gemspec +18 -4
  10. data/lib/irb/cmd/fork.rb +2 -4
  11. data/lib/irb/cmd/help.rb +10 -5
  12. data/lib/irb/cmd/info.rb +32 -0
  13. data/lib/irb/cmd/ls.rb +101 -0
  14. data/lib/irb/cmd/measure.rb +43 -0
  15. data/lib/irb/cmd/nop.rb +10 -4
  16. data/lib/irb/cmd/pushws.rb +0 -1
  17. data/lib/irb/cmd/show_source.rb +93 -0
  18. data/lib/irb/cmd/whereami.rb +20 -0
  19. data/lib/irb/color.rb +246 -0
  20. data/lib/irb/color_printer.rb +47 -0
  21. data/lib/irb/completion.rb +254 -55
  22. data/lib/irb/context.rb +165 -72
  23. data/lib/irb/easter-egg.rb +138 -0
  24. data/lib/irb/ext/change-ws.rb +0 -1
  25. data/lib/irb/ext/history.rb +47 -11
  26. data/lib/irb/ext/loader.rb +46 -20
  27. data/lib/irb/ext/multi-irb.rb +7 -7
  28. data/lib/irb/ext/save-history.rb +36 -11
  29. data/lib/irb/ext/tracer.rb +14 -2
  30. data/lib/irb/ext/use-loader.rb +4 -3
  31. data/lib/irb/ext/workspaces.rb +0 -1
  32. data/lib/irb/extend-command.rb +113 -63
  33. data/lib/irb/frame.rb +12 -7
  34. data/lib/irb/help.rb +0 -1
  35. data/lib/irb/init.rb +146 -26
  36. data/lib/irb/input-method.rb +287 -9
  37. data/lib/irb/inspector.rb +15 -11
  38. data/lib/irb/lc/error.rb +55 -16
  39. data/lib/irb/lc/help-message +25 -13
  40. data/lib/irb/lc/ja/error.rb +55 -14
  41. data/lib/irb/lc/ja/help-message +11 -6
  42. data/lib/irb/locale.rb +13 -4
  43. data/lib/irb/notifier.rb +12 -8
  44. data/lib/irb/output-method.rb +6 -6
  45. data/lib/irb/ruby-lex.rb +673 -992
  46. data/lib/irb/ruby_logo.aa +37 -0
  47. data/lib/irb/version.rb +2 -2
  48. data/lib/irb/workspace.rb +65 -21
  49. data/lib/irb/xmp.rb +1 -1
  50. data/lib/irb.rb +276 -96
  51. data/man/irb.1 +229 -0
  52. metadata +25 -31
  53. data/.gitignore +0 -9
  54. data/.travis.yml +0 -6
  55. data/lib/irb/lc/.document +0 -4
  56. data/lib/irb/ruby-token.rb +0 -267
  57. data/lib/irb/slex.rb +0 -282
data/lib/irb/ruby-lex.rb CHANGED
@@ -10,74 +10,105 @@
10
10
  #
11
11
  #
12
12
 
13
- require "e2mmap"
14
- require_relative "slex"
15
- require_relative "ruby-token"
13
+ require "ripper"
14
+ require "jruby" if RUBY_ENGINE == "jruby"
16
15
 
17
16
  # :stopdoc:
18
17
  class RubyLex
19
18
 
20
- extend Exception2MessageMapper
21
- def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
22
- def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
23
- def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
24
- def_exception(:TkReading2TokenDuplicateError,
25
- "key duplicate(token_n='%s', key='%s')")
26
- def_exception(:SyntaxError, "%s")
27
-
28
- def_exception(:TerminateLineInput, "Terminate Line Input")
29
-
30
- include RubyToken
31
-
32
- class << self
33
- attr_accessor :debug_level
34
- def debug?
35
- @debug_level > 0
19
+ class TerminateLineInput < StandardError
20
+ def initialize
21
+ super("Terminate Line Input")
36
22
  end
37
23
  end
38
- @debug_level = 0
39
24
 
40
25
  def initialize
41
- lex_init
42
- set_input(STDIN)
43
-
44
- @seek = 0
45
26
  @exp_line_no = @line_no = 1
46
- @base_char_no = 0
47
- @char_no = 0
48
- @rests = []
49
- @readed = []
50
- @here_readed = []
51
-
52
27
  @indent = 0
53
- @indent_stack = []
54
- @lex_state = EXPR_BEG
55
- @space_seen = false
56
- @here_header = false
57
- @post_symbeg = false
58
-
59
28
  @continue = false
60
29
  @line = ""
61
-
62
- @skip_space = false
63
- @readed_auto_clean_up = false
64
- @exception_on_syntax_error = true
65
-
66
30
  @prompt = nil
67
31
  end
68
32
 
69
- attr_accessor :skip_space
70
- attr_accessor :readed_auto_clean_up
71
- attr_accessor :exception_on_syntax_error
72
-
73
- attr_reader :seek
74
- attr_reader :char_no
75
- attr_reader :line_no
76
- attr_reader :indent
33
+ def self.compile_with_errors_suppressed(code, line_no: 1)
34
+ begin
35
+ result = yield code, line_no
36
+ rescue ArgumentError
37
+ # Ruby can issue an error for the code if there is an
38
+ # incomplete magic comment for encoding in it. Force an
39
+ # expression with a new line before the code in this
40
+ # case to prevent magic comment handling. To make sure
41
+ # line numbers in the lexed code remain the same,
42
+ # decrease the line number by one.
43
+ code = ";\n#{code}"
44
+ line_no -= 1
45
+ result = yield code, line_no
46
+ end
47
+ result
48
+ end
77
49
 
78
50
  # io functions
79
- def set_input(io, p = nil, &block)
51
+ def set_input(io, p = nil, context: nil, &block)
80
52
  @io = io
53
+ if @io.respond_to?(:check_termination)
54
+ @io.check_termination do |code|
55
+ if Reline::IOGate.in_pasting?
56
+ lex = RubyLex.new
57
+ rest = lex.check_termination_in_prev_line(code, context: context)
58
+ if rest
59
+ Reline.delete_text
60
+ rest.bytes.reverse_each do |c|
61
+ Reline.ungetc(c)
62
+ end
63
+ true
64
+ else
65
+ false
66
+ end
67
+ else
68
+ code.gsub!(/\s*\z/, '').concat("\n")
69
+ ltype, indent, continue, code_block_open = check_state(code, context: context)
70
+ if ltype or indent > 0 or continue or code_block_open
71
+ false
72
+ else
73
+ true
74
+ end
75
+ end
76
+ end
77
+ end
78
+ if @io.respond_to?(:dynamic_prompt)
79
+ @io.dynamic_prompt do |lines|
80
+ lines << '' if lines.empty?
81
+ result = []
82
+ tokens = self.class.ripper_lex_without_warning(lines.map{ |l| l + "\n" }.join, context: context)
83
+ code = String.new
84
+ partial_tokens = []
85
+ unprocessed_tokens = []
86
+ line_num_offset = 0
87
+ tokens.each do |t|
88
+ partial_tokens << t
89
+ unprocessed_tokens << t
90
+ if t.tok.include?("\n")
91
+ t_str = t.tok
92
+ t_str.each_line("\n") do |s|
93
+ code << s << "\n"
94
+ ltype, indent, continue, code_block_open = check_state(code, partial_tokens, context: context)
95
+ result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset)
96
+ line_num_offset += 1
97
+ end
98
+ unprocessed_tokens = []
99
+ else
100
+ code << t.tok
101
+ end
102
+ end
103
+
104
+ unless unprocessed_tokens.empty?
105
+ ltype, indent, continue, code_block_open = check_state(code, unprocessed_tokens, context: context)
106
+ result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset)
107
+ end
108
+ result
109
+ end
110
+ end
111
+
81
112
  if p.respond_to?(:call)
82
113
  @input = p
83
114
  elsif block_given?
@@ -87,119 +118,115 @@ class RubyLex
87
118
  end
88
119
  end
89
120
 
90
- def get_readed
91
- if idx = @readed.rindex("\n")
92
- @base_char_no = @readed.size - (idx + 1)
93
- else
94
- @base_char_no += @readed.size
95
- end
96
-
97
- readed = @readed.join("")
98
- @readed = []
99
- readed
100
- end
101
-
102
- def getc
103
- while @rests.empty?
104
- @rests.push nil unless buf_input
105
- end
106
- c = @rests.shift
107
- if @here_header
108
- @here_readed.push c
109
- else
110
- @readed.push c
111
- end
112
- @seek += 1
113
- if c == "\n"
114
- @line_no += 1
115
- @char_no = 0
121
+ def set_prompt(p = nil, &block)
122
+ p = block if block_given?
123
+ if p.respond_to?(:call)
124
+ @prompt = p
116
125
  else
117
- @char_no += 1
118
- end
119
- c
120
- end
121
-
122
- def gets
123
- l = ""
124
- while c = getc
125
- l.concat(c)
126
- break if c == "\n"
126
+ @prompt = Proc.new{print p}
127
127
  end
128
- return nil if l == "" and c.nil?
129
- l
130
128
  end
131
129
 
132
- def eof?
133
- @io.eof?
134
- end
135
-
136
- def getc_of_rests
137
- if @rests.empty?
138
- nil
139
- else
140
- getc
141
- end
142
- end
130
+ ERROR_TOKENS = [
131
+ :on_parse_error,
132
+ :compile_error,
133
+ :on_assign_error,
134
+ :on_alias_error,
135
+ :on_class_name_error,
136
+ :on_param_error
137
+ ]
143
138
 
144
- def ungetc(c = nil)
145
- if @here_readed.empty?
146
- c2 = @readed.pop
147
- else
148
- c2 = @here_readed.pop
149
- end
150
- c = c2 unless c
151
- @rests.unshift c #c =
152
- @seek -= 1
153
- if c == "\n"
154
- @line_no -= 1
155
- if idx = @readed.rindex("\n")
156
- @char_no = idx + 1
139
+ def self.ripper_lex_without_warning(code, context: nil)
140
+ verbose, $VERBOSE = $VERBOSE, nil
141
+ if context
142
+ lvars = context&.workspace&.binding&.local_variables
143
+ if lvars && !lvars.empty?
144
+ code = "#{lvars.join('=')}=nil\n#{code}"
145
+ line_no = 0
146
+ else
147
+ line_no = 1
148
+ end
149
+ end
150
+ tokens = nil
151
+ compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no|
152
+ lexer = Ripper::Lexer.new(inner_code, '-', line_no)
153
+ if lexer.respond_to?(:scan) # Ruby 2.7+
154
+ tokens = []
155
+ pos_to_index = {}
156
+ lexer.scan.each do |t|
157
+ next if t.pos.first == 0
158
+ if pos_to_index.has_key?(t.pos)
159
+ index = pos_to_index[t.pos]
160
+ found_tk = tokens[index]
161
+ if ERROR_TOKENS.include?(found_tk.event) && !ERROR_TOKENS.include?(t.event)
162
+ tokens[index] = t
163
+ end
164
+ else
165
+ pos_to_index[t.pos] = tokens.size
166
+ tokens << t
167
+ end
168
+ end
157
169
  else
158
- @char_no = @base_char_no + @readed.size
170
+ tokens = lexer.parse.reject { |it| it.pos.first == 0 }
159
171
  end
160
- else
161
- @char_no -= 1
162
172
  end
173
+ tokens
174
+ ensure
175
+ $VERBOSE = verbose
163
176
  end
164
177
 
165
- def peek_equal?(str)
166
- chrs = str.split(//)
167
- until @rests.size >= chrs.size
168
- return false unless buf_input
169
- end
170
- @rests[0, chrs.size] == chrs
171
- end
172
-
173
- def peek_match?(regexp)
174
- while @rests.empty?
175
- return false unless buf_input
178
+ def find_prev_spaces(line_index)
179
+ return 0 if @tokens.size == 0
180
+ md = @tokens[0].tok.match(/(\A +)/)
181
+ prev_spaces = md.nil? ? 0 : md[1].count(' ')
182
+ line_count = 0
183
+ @tokens.each_with_index do |t, i|
184
+ if t.tok.include?("\n")
185
+ line_count += t.tok.count("\n")
186
+ if line_count >= line_index
187
+ return prev_spaces
188
+ end
189
+ if (@tokens.size - 1) > i
190
+ md = @tokens[i + 1].tok.match(/(\A +)/)
191
+ prev_spaces = md.nil? ? 0 : md[1].count(' ')
192
+ end
193
+ end
176
194
  end
177
- regexp =~ @rests.join("")
195
+ prev_spaces
178
196
  end
179
197
 
180
- def peek(i = 0)
181
- while @rests.size <= i
182
- return nil unless buf_input
198
+ def set_auto_indent(context)
199
+ if @io.respond_to?(:auto_indent) and context.auto_indent_mode
200
+ @io.auto_indent do |lines, line_index, byte_pointer, is_newline|
201
+ if is_newline
202
+ @tokens = self.class.ripper_lex_without_warning(lines[0..line_index].join("\n"), context: context)
203
+ prev_spaces = find_prev_spaces(line_index)
204
+ depth_difference = check_newline_depth_difference
205
+ depth_difference = 0 if depth_difference < 0
206
+ prev_spaces + depth_difference * 2
207
+ else
208
+ code = line_index.zero? ? '' : lines[0..(line_index - 1)].map{ |l| l + "\n" }.join
209
+ last_line = lines[line_index]&.byteslice(0, byte_pointer)
210
+ code += last_line if last_line
211
+ @tokens = self.class.ripper_lex_without_warning(code, context: context)
212
+ corresponding_token_depth = check_corresponding_token_depth(lines, line_index)
213
+ if corresponding_token_depth
214
+ corresponding_token_depth
215
+ else
216
+ nil
217
+ end
218
+ end
219
+ end
183
220
  end
184
- @rests[i]
185
221
  end
186
222
 
187
- def buf_input
188
- prompt
189
- line = @input.call
190
- return nil unless line
191
- @rests.concat line.chars.to_a
192
- true
193
- end
194
- private :buf_input
195
-
196
- def set_prompt(p = nil, &block)
197
- p = block if block_given?
198
- if p.respond_to?(:call)
199
- @prompt = p
200
- else
201
- @prompt = Proc.new{print p}
202
- end
223
+ def check_state(code, tokens = nil, context: nil)
224
+ tokens = self.class.ripper_lex_without_warning(code, context: context) unless tokens
225
+ ltype = process_literal_type(tokens)
226
+ indent = process_nesting_level(tokens)
227
+ continue = process_continue(tokens)
228
+ code_block_open = check_code_block(code, tokens)
229
+ [ltype, indent, continue, code_block_open]
203
230
  end
204
231
 
205
232
  def prompt
@@ -210,20 +237,11 @@ class RubyLex
210
237
 
211
238
  def initialize_input
212
239
  @ltype = nil
213
- @quoted = nil
214
240
  @indent = 0
215
- @indent_stack = []
216
- @lex_state = EXPR_BEG
217
- @space_seen = false
218
- @here_header = false
219
-
220
241
  @continue = false
221
- @post_symbeg = false
222
-
223
- prompt
224
-
225
242
  @line = ""
226
243
  @exp_line_no = @line_no
244
+ @code_block_open = false
227
245
  end
228
246
 
229
247
  def each_top_level_statement
@@ -231,13 +249,17 @@ class RubyLex
231
249
  catch(:TERM_INPUT) do
232
250
  loop do
233
251
  begin
234
- @continue = false
235
252
  prompt
236
253
  unless l = lex
237
254
  throw :TERM_INPUT if @line == ''
238
255
  else
256
+ @line_no += l.count("\n")
257
+ if l == "\n"
258
+ @exp_line_no += 1
259
+ next
260
+ end
239
261
  @line.concat l
240
- if @ltype or @continue or @indent > 0
262
+ if @code_block_open or @ltype or @continue or @indent > 0
241
263
  next
242
264
  end
243
265
  end
@@ -245,936 +267,595 @@ class RubyLex
245
267
  @line.force_encoding(@io.encoding)
246
268
  yield @line, @exp_line_no
247
269
  end
248
- break unless l
270
+ raise TerminateLineInput if @io.eof?
249
271
  @line = ''
250
272
  @exp_line_no = @line_no
251
273
 
252
274
  @indent = 0
253
- @indent_stack = []
254
- prompt
255
275
  rescue TerminateLineInput
256
276
  initialize_input
257
277
  prompt
258
- get_readed
259
278
  end
260
279
  end
261
280
  end
262
281
  end
263
282
 
264
283
  def lex
265
- continue = @continue
266
- while tk = token
267
- case tk
268
- when TkNL, TkEND_OF_SCRIPT
269
- @continue = continue unless continue.nil?
270
- break unless @continue
271
- when TkSPACE, TkCOMMENT
272
- when TkSEMICOLON, TkBEGIN, TkELSE
273
- @continue = continue = false
274
- else
275
- continue = nil
276
- end
277
- end
278
- line = get_readed
279
- if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
280
- nil
281
- else
282
- line
283
- end
284
+ line = @input.call
285
+ if @io.respond_to?(:check_termination)
286
+ return line # multiline
287
+ end
288
+ code = @line + (line.nil? ? '' : line)
289
+ code.gsub!(/\s*\z/, '').concat("\n")
290
+ @tokens = self.class.ripper_lex_without_warning(code)
291
+ @continue = process_continue
292
+ @code_block_open = check_code_block(code)
293
+ @indent = process_nesting_level
294
+ @ltype = process_literal_type
295
+ line
284
296
  end
285
297
 
286
- def token
287
- @prev_seek = @seek
288
- @prev_line_no = @line_no
289
- @prev_char_no = @char_no
290
- begin
291
- begin
292
- tk = @OP.match(self)
293
- @space_seen = tk.kind_of?(TkSPACE)
294
- @lex_state = EXPR_END if @post_symbeg && tk.kind_of?(TkOp)
295
- @post_symbeg = tk.kind_of?(TkSYMBEG)
296
- rescue SyntaxError
297
- raise if @exception_on_syntax_error
298
- tk = TkError.new(@seek, @line_no, @char_no)
299
- end
300
- end while @skip_space and tk.kind_of?(TkSPACE)
301
- if @readed_auto_clean_up
302
- get_readed
303
- end
304
- tk
298
+ def process_continue(tokens = @tokens)
299
+ # last token is always newline
300
+ if tokens.size >= 2 and tokens[-2].event == :on_regexp_end
301
+ # end of regexp literal
302
+ return false
303
+ elsif tokens.size >= 2 and tokens[-2].event == :on_semicolon
304
+ return false
305
+ elsif tokens.size >= 2 and tokens[-2].event == :on_kw and ['begin', 'else', 'ensure'].include?(tokens[-2].tok)
306
+ return false
307
+ elsif !tokens.empty? and tokens.last.tok == "\\\n"
308
+ return true
309
+ elsif tokens.size >= 1 and tokens[-1].event == :on_heredoc_end # "EOH\n"
310
+ return false
311
+ elsif tokens.size >= 2 and defined?(Ripper::EXPR_BEG) and tokens[-2].state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_FNAME) and tokens[-2].tok !~ /\A\.\.\.?\z/
312
+ # end of literal except for regexp
313
+ # endless range at end of line is not a continue
314
+ return true
315
+ end
316
+ false
305
317
  end
306
318
 
307
- ENINDENT_CLAUSE = [
308
- "case", "class", "def", "do", "for", "if",
309
- "module", "unless", "until", "while", "begin"
310
- ]
311
- DEINDENT_CLAUSE = ["end"
312
- ]
313
-
314
- PERCENT_LTYPE = {
315
- "q" => "\'",
316
- "Q" => "\"",
317
- "x" => "\`",
318
- "r" => "/",
319
- "w" => "]",
320
- "W" => "]",
321
- "i" => "]",
322
- "I" => "]",
323
- "s" => ":"
324
- }
325
-
326
- PERCENT_PAREN = {
327
- "{" => "}",
328
- "[" => "]",
329
- "<" => ">",
330
- "(" => ")"
331
- }
332
-
333
- Ltype2Token = {
334
- "\'" => TkSTRING,
335
- "\"" => TkSTRING,
336
- "\`" => TkXSTRING,
337
- "/" => TkREGEXP,
338
- "]" => TkDSTRING,
339
- ":" => TkSYMBOL
340
- }
341
- DLtype2Token = {
342
- "\"" => TkDSTRING,
343
- "\`" => TkDXSTRING,
344
- "/" => TkDREGEXP,
345
- }
346
-
347
- def lex_init()
348
- @OP = IRB::SLex.new
349
- @OP.def_rules("\0", "\004", "\032") do |op, io|
350
- Token(TkEND_OF_SCRIPT)
351
- end
352
-
353
- @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io|
354
- @space_seen = true
355
- while getc =~ /[ \t\f\r\13]/; end
356
- ungetc
357
- Token(TkSPACE)
358
- end
359
-
360
- @OP.def_rule("#") do |op, io|
361
- identify_comment
319
+ def check_code_block(code, tokens = @tokens)
320
+ return true if tokens.empty?
321
+ if tokens.last.event == :on_heredoc_beg
322
+ return true
362
323
  end
363
324
 
364
- @OP.def_rule("=begin",
365
- proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do
366
- |op, io|
367
- @ltype = "="
368
- until getc == "\n"; end
369
- until peek_equal?("=end") && peek(4) =~ /\s/
370
- until getc == "\n"; end
371
- end
372
- gets
373
- @ltype = nil
374
- Token(TkRD_COMMENT)
375
- end
376
-
377
- @OP.def_rule("\n") do |op, io|
378
- print "\\n\n" if RubyLex.debug?
379
- case @lex_state
380
- when EXPR_BEG, EXPR_FNAME, EXPR_DOT
381
- @continue = true
382
- else
383
- @continue = false
384
- @lex_state = EXPR_BEG
385
- until (@indent_stack.empty? ||
386
- [TkLPAREN, TkLBRACK, TkLBRACE,
387
- TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
388
- @indent_stack.pop
325
+ begin # check if parser error are available
326
+ verbose, $VERBOSE = $VERBOSE, nil
327
+ case RUBY_ENGINE
328
+ when 'ruby'
329
+ self.class.compile_with_errors_suppressed(code) do |inner_code, line_no|
330
+ RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no)
389
331
  end
390
- end
391
- @here_header = false
392
- @here_readed = []
393
- Token(TkNL)
394
- end
395
-
396
- @OP.def_rules("*", "**",
397
- "=", "==", "===",
398
- "=~", "<=>",
399
- "<", "<=",
400
- ">", ">=", ">>",
401
- "!", "!=", "!~") do
402
- |op, io|
403
- case @lex_state
404
- when EXPR_FNAME, EXPR_DOT
405
- @lex_state = EXPR_ARG
332
+ when 'jruby'
333
+ JRuby.compile_ir(code)
406
334
  else
407
- @lex_state = EXPR_BEG
408
- end
409
- Token(op)
410
- end
411
-
412
- @OP.def_rules("<<") do
413
- |op, io|
414
- tk = nil
415
- if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
416
- (@lex_state != EXPR_ARG || @space_seen)
417
- c = peek(0)
418
- if /[-~"'`\w]/ =~ c
419
- tk = identify_here_document
335
+ catch(:valid) do
336
+ eval("BEGIN { throw :valid, true }\n#{code}")
337
+ false
420
338
  end
421
339
  end
422
- unless tk
423
- tk = Token(op)
424
- case @lex_state
425
- when EXPR_FNAME, EXPR_DOT
426
- @lex_state = EXPR_ARG
427
- else
428
- @lex_state = EXPR_BEG
429
- end
340
+ rescue EncodingError
341
+ # This is for a hash with invalid encoding symbol, {"\xAE": 1}
342
+ rescue SyntaxError => e
343
+ case e.message
344
+ when /unterminated (?:string|regexp) meets end of file/
345
+ # "unterminated regexp meets end of file"
346
+ #
347
+ # example:
348
+ # /
349
+ #
350
+ # "unterminated string meets end of file"
351
+ #
352
+ # example:
353
+ # '
354
+ return true
355
+ when /syntax error, unexpected end-of-input/
356
+ # "syntax error, unexpected end-of-input, expecting keyword_end"
357
+ #
358
+ # example:
359
+ # if true
360
+ # hoge
361
+ # if false
362
+ # fuga
363
+ # end
364
+ return true
365
+ when /syntax error, unexpected keyword_end/
366
+ # "syntax error, unexpected keyword_end"
367
+ #
368
+ # example:
369
+ # if (
370
+ # end
371
+ #
372
+ # example:
373
+ # end
374
+ return false
375
+ when /syntax error, unexpected '\.'/
376
+ # "syntax error, unexpected '.'"
377
+ #
378
+ # example:
379
+ # .
380
+ return false
381
+ when /unexpected tREGEXP_BEG/
382
+ # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
383
+ #
384
+ # example:
385
+ # method / f /
386
+ return false
430
387
  end
431
- tk
432
- end
433
-
434
- @OP.def_rules("'", '"') do
435
- |op, io|
436
- identify_string(op)
388
+ ensure
389
+ $VERBOSE = verbose
437
390
  end
438
391
 
439
- @OP.def_rules("`") do
440
- |op, io|
441
- if @lex_state == EXPR_FNAME
442
- @lex_state = EXPR_END
443
- Token(op)
444
- else
445
- identify_string(op)
392
+ if defined?(Ripper::EXPR_BEG)
393
+ last_lex_state = tokens.last.state
394
+ if last_lex_state.allbits?(Ripper::EXPR_BEG)
395
+ return false
396
+ elsif last_lex_state.allbits?(Ripper::EXPR_DOT)
397
+ return true
398
+ elsif last_lex_state.allbits?(Ripper::EXPR_CLASS)
399
+ return true
400
+ elsif last_lex_state.allbits?(Ripper::EXPR_FNAME)
401
+ return true
402
+ elsif last_lex_state.allbits?(Ripper::EXPR_VALUE)
403
+ return true
404
+ elsif last_lex_state.allbits?(Ripper::EXPR_ARG)
405
+ return false
446
406
  end
447
407
  end
448
408
 
449
- @OP.def_rules('?') do
450
- |op, io|
451
- if @lex_state == EXPR_END
452
- @lex_state = EXPR_BEG
453
- Token(TkQUESTION)
409
+ false
410
+ end
411
+
412
+ def process_nesting_level(tokens = @tokens)
413
+ indent = 0
414
+ in_oneliner_def = nil
415
+ tokens.each_with_index { |t, index|
416
+ # detecting one-liner method definition
417
+ if in_oneliner_def.nil?
418
+ if t.state.allbits?(Ripper::EXPR_ENDFN)
419
+ in_oneliner_def = :ENDFN
420
+ end
454
421
  else
455
- ch = getc
456
- if @lex_state == EXPR_ARG && ch =~ /\s/
457
- ungetc
458
- @lex_state = EXPR_BEG;
459
- Token(TkQUESTION)
422
+ if t.state.allbits?(Ripper::EXPR_ENDFN)
423
+ # continuing
424
+ elsif t.state.allbits?(Ripper::EXPR_BEG)
425
+ if t.tok == '='
426
+ in_oneliner_def = :BODY
427
+ end
460
428
  else
461
- if (ch == '\\')
462
- read_escape
429
+ if in_oneliner_def == :BODY
430
+ # one-liner method definition
431
+ indent -= 1
463
432
  end
464
- @lex_state = EXPR_END
465
- Token(TkINTEGER)
433
+ in_oneliner_def = nil
466
434
  end
467
435
  end
468
- end
469
436
 
470
- @OP.def_rules("&", "&&", "|", "||") do
471
- |op, io|
472
- @lex_state = EXPR_BEG
473
- Token(op)
474
- end
475
-
476
- @OP.def_rules("+=", "-=", "*=", "**=",
477
- "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
478
- |op, io|
479
- @lex_state = EXPR_BEG
480
- op =~ /^(.*)=$/
481
- Token(TkOPASGN, $1)
482
- end
483
-
484
- @OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do
485
- |op, io|
486
- @lex_state = EXPR_ARG
487
- Token(op)
488
- end
489
-
490
- @OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do
491
- |op, io|
492
- @lex_state = EXPR_ARG
493
- Token(op)
494
- end
495
-
496
- @OP.def_rules("+", "-") do
497
- |op, io|
498
- catch(:RET) do
499
- if @lex_state == EXPR_ARG
500
- if @space_seen and peek(0) =~ /[0-9]/
501
- throw :RET, identify_number
502
- else
503
- @lex_state = EXPR_BEG
504
- end
505
- elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
506
- throw :RET, identify_number
507
- else
508
- @lex_state = EXPR_BEG
437
+ case t.event
438
+ when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
439
+ indent += 1
440
+ when :on_rbracket, :on_rbrace, :on_rparen
441
+ indent -= 1
442
+ when :on_kw
443
+ next if index > 0 and tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
444
+ case t.tok
445
+ when 'do'
446
+ syntax_of_do = take_corresponding_syntax_to_kw_do(tokens, index)
447
+ indent += 1 if syntax_of_do == :method_calling
448
+ when 'def', 'case', 'for', 'begin', 'class', 'module'
449
+ indent += 1
450
+ when 'if', 'unless', 'while', 'until'
451
+ # postfix if/unless/while/until must be Ripper::EXPR_LABEL
452
+ indent += 1 unless t.state.allbits?(Ripper::EXPR_LABEL)
453
+ when 'end'
454
+ indent -= 1
509
455
  end
510
- Token(op)
511
- end
512
- end
513
-
514
- @OP.def_rule(".") do
515
- |op, io|
516
- @lex_state = EXPR_BEG
517
- if peek(0) =~ /[0-9]/
518
- ungetc
519
- identify_number
520
- else
521
- # for "obj.if" etc.
522
- @lex_state = EXPR_DOT
523
- Token(TkDOT)
524
456
  end
525
- end
526
-
527
- @OP.def_rules("..", "...") do
528
- |op, io|
529
- @lex_state = EXPR_BEG
530
- Token(op)
531
- end
532
-
533
- lex_int2
457
+ # percent literals are not indented
458
+ }
459
+ indent
534
460
  end
535
461
 
536
- def lex_int2
537
- @OP.def_rules("]", "}", ")") do
538
- |op, io|
539
- @lex_state = EXPR_END
540
- @indent -= 1
541
- @indent_stack.pop
542
- Token(op)
543
- end
544
-
545
- @OP.def_rule(":") do
546
- |op, io|
547
- if @lex_state == EXPR_END || peek(0) =~ /\s/
548
- @lex_state = EXPR_BEG
549
- Token(TkCOLON)
550
- else
551
- @lex_state = EXPR_FNAME
552
- Token(TkSYMBEG)
553
- end
554
- end
555
-
556
- @OP.def_rule("::") do
557
- |op, io|
558
- if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
559
- @lex_state = EXPR_BEG
560
- Token(TkCOLON3)
561
- else
562
- @lex_state = EXPR_DOT
563
- Token(TkCOLON2)
564
- end
565
- end
566
-
567
- @OP.def_rule("/") do
568
- |op, io|
569
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
570
- identify_string(op)
571
- elsif peek(0) == '='
572
- getc
573
- @lex_state = EXPR_BEG
574
- Token(TkOPASGN, "/") #/)
575
- elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
576
- identify_string(op)
577
- else
578
- @lex_state = EXPR_BEG
579
- Token("/") #/)
580
- end
581
- end
582
-
583
- @OP.def_rules("^") do
584
- |op, io|
585
- @lex_state = EXPR_BEG
586
- Token("^")
587
- end
588
-
589
- @OP.def_rules(",") do
590
- |op, io|
591
- @lex_state = EXPR_BEG
592
- Token(op)
593
- end
594
-
595
- @OP.def_rules(";") do
596
- |op, io|
597
- @lex_state = EXPR_BEG
598
- until (@indent_stack.empty? ||
599
- [TkLPAREN, TkLBRACK, TkLBRACE,
600
- TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
601
- @indent_stack.pop
602
- end
603
- Token(op)
604
- end
605
-
606
- @OP.def_rule("~") do
607
- |op, io|
608
- @lex_state = EXPR_BEG
609
- Token("~")
610
- end
611
-
612
- @OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do
613
- |op, io|
614
- @lex_state = EXPR_BEG
615
- Token("~")
616
- end
617
-
618
- @OP.def_rule("(") do
619
- |op, io|
620
- @indent += 1
621
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
622
- @lex_state = EXPR_BEG
623
- tk_c = TkfLPAREN
624
- else
625
- @lex_state = EXPR_BEG
626
- tk_c = TkLPAREN
627
- end
628
- @indent_stack.push tk_c
629
- Token(tk_c)
630
- end
631
-
632
- @OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do
633
- |op, io|
634
- @lex_state = EXPR_ARG
635
- Token("[]")
636
- end
637
-
638
- @OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do
639
- |op, io|
640
- @lex_state = EXPR_ARG
641
- Token("[]=")
642
- end
643
-
644
- @OP.def_rule("[") do
645
- |op, io|
646
- @indent += 1
647
- if @lex_state == EXPR_FNAME
648
- tk_c = TkfLBRACK
649
- else
650
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
651
- tk_c = TkLBRACK
652
- elsif @lex_state == EXPR_ARG && @space_seen
653
- tk_c = TkLBRACK
654
- else
655
- tk_c = TkfLBRACK
462
+ def is_method_calling?(tokens, index)
463
+ tk = tokens[index]
464
+ if tk.state.anybits?(Ripper::EXPR_CMDARG) and tk.event == :on_ident
465
+ # The target method call to pass the block with "do".
466
+ return true
467
+ elsif tk.state.anybits?(Ripper::EXPR_ARG) and tk.event == :on_ident
468
+ non_sp_index = tokens[0..(index - 1)].rindex{ |t| t.event != :on_sp }
469
+ if non_sp_index
470
+ prev_tk = tokens[non_sp_index]
471
+ if prev_tk.state.anybits?(Ripper::EXPR_DOT) and prev_tk.event == :on_period
472
+ # The target method call with receiver to pass the block with "do".
473
+ return true
656
474
  end
657
- @lex_state = EXPR_BEG
658
- end
659
- @indent_stack.push tk_c
660
- Token(tk_c)
661
- end
662
-
663
- @OP.def_rule("{") do
664
- |op, io|
665
- @indent += 1
666
- if @lex_state != EXPR_END && @lex_state != EXPR_ARG
667
- tk_c = TkLBRACE
668
- else
669
- tk_c = TkfLBRACE
670
- end
671
- @lex_state = EXPR_BEG
672
- @indent_stack.push tk_c
673
- Token(tk_c)
674
- end
675
-
676
- @OP.def_rule('\\') do
677
- |op, io|
678
- if getc == "\n"
679
- @space_seen = true
680
- @continue = true
681
- Token(TkSPACE)
682
- else
683
- read_escape
684
- Token("\\")
685
- end
686
- end
687
-
688
- @OP.def_rule('%') do
689
- |op, io|
690
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
691
- identify_quotation
692
- elsif peek(0) == '='
693
- getc
694
- Token(TkOPASGN, :%)
695
- elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
696
- identify_quotation
697
- else
698
- @lex_state = EXPR_BEG
699
- Token("%") #))
700
475
  end
701
476
  end
702
-
703
- @OP.def_rule('$') do
704
- |op, io|
705
- identify_gvar
706
- end
707
-
708
- @OP.def_rule('@') do
709
- |op, io|
710
- if peek(0) =~ /[\w@]/
711
- ungetc
712
- identify_identifier
713
- else
714
- Token("@")
715
- end
716
- end
717
-
718
- @OP.def_rule("") do
719
- |op, io|
720
- printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
721
- if peek(0) =~ /[0-9]/
722
- t = identify_number
723
- elsif peek(0) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
724
- t = identify_identifier
725
- end
726
- printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
727
- t
728
- end
729
-
730
- p @OP if RubyLex.debug?
477
+ false
731
478
  end
732
479
 
733
- def identify_gvar
734
- @lex_state = EXPR_END
735
-
736
- case ch = getc
737
- when /[~_*$?!@\/\\;,=:<>".]/ #"
738
- Token(TkGVAR, "$" + ch)
739
- when "-"
740
- Token(TkGVAR, "$-" + getc)
741
- when "&", "`", "'", "+"
742
- Token(TkBACK_REF, "$"+ch)
743
- when /[1-9]/
744
- while getc =~ /[0-9]/; end
745
- ungetc
746
- Token(TkNTH_REF)
747
- when /\w/
748
- ungetc
749
- ungetc
750
- identify_identifier
751
- else
752
- ungetc
753
- Token("$")
754
- end
480
+ def take_corresponding_syntax_to_kw_do(tokens, index)
481
+ syntax_of_do = nil
482
+ # Finding a syntax corresponding to "do".
483
+ index.downto(0) do |i|
484
+ tk = tokens[i]
485
+ # In "continue", the token isn't the corresponding syntax to "do".
486
+ non_sp_index = tokens[0..(i - 1)].rindex{ |t| t.event != :on_sp }
487
+ first_in_fomula = false
488
+ if non_sp_index.nil?
489
+ first_in_fomula = true
490
+ elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index].event)
491
+ first_in_fomula = true
492
+ end
493
+ if is_method_calling?(tokens, i)
494
+ syntax_of_do = :method_calling
495
+ break if first_in_fomula
496
+ elsif tk.event == :on_kw && %w{while until for}.include?(tk.tok)
497
+ # A loop syntax in front of "do" found.
498
+ #
499
+ # while cond do # also "until" or "for"
500
+ # end
501
+ #
502
+ # This "do" doesn't increment indent because the loop syntax already
503
+ # incremented.
504
+ syntax_of_do = :loop_syntax
505
+ break if first_in_fomula
506
+ end
507
+ end
508
+ syntax_of_do
755
509
  end
756
510
 
757
- def identify_identifier
758
- token = ""
759
- if peek(0) =~ /[$@]/
760
- token.concat(c = getc)
761
- if c == "@" and peek(0) == "@"
762
- token.concat getc
763
- end
764
- end
765
-
766
- while (ch = getc) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
767
- print ":", ch, ":" if RubyLex.debug?
768
- token.concat ch
769
- end
770
- ungetc
771
-
772
- if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
773
- token.concat getc
774
- end
775
-
776
- # almost fix token
777
-
778
- case token
779
- when /^\$/
780
- return Token(TkGVAR, token)
781
- when /^\@\@/
782
- @lex_state = EXPR_END
783
- # p Token(TkCVAR, token)
784
- return Token(TkCVAR, token)
785
- when /^\@/
786
- @lex_state = EXPR_END
787
- return Token(TkIVAR, token)
788
- end
789
-
790
- if @lex_state != EXPR_DOT
791
- print token, "\n" if RubyLex.debug?
792
-
793
- token_c, *trans = TkReading2Token[token]
794
- if token_c
795
- # reserved word?
511
+ def is_the_in_correspond_to_a_for(tokens, index)
512
+ syntax_of_in = nil
513
+ # Finding a syntax corresponding to "do".
514
+ index.downto(0) do |i|
515
+ tk = tokens[i]
516
+ # In "continue", the token isn't the corresponding syntax to "do".
517
+ non_sp_index = tokens[0..(i - 1)].rindex{ |t| t.event != :on_sp }
518
+ first_in_fomula = false
519
+ if non_sp_index.nil?
520
+ first_in_fomula = true
521
+ elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index].event)
522
+ first_in_fomula = true
523
+ end
524
+ if tk.event == :on_kw && tk.tok == 'for'
525
+ # A loop syntax in front of "do" found.
526
+ #
527
+ # while cond do # also "until" or "for"
528
+ # end
529
+ #
530
+ # This "do" doesn't increment indent because the loop syntax already
531
+ # incremented.
532
+ syntax_of_in = :for
533
+ end
534
+ break if first_in_fomula
535
+ end
536
+ syntax_of_in
537
+ end
796
538
 
797
- if (@lex_state != EXPR_BEG &&
798
- @lex_state != EXPR_FNAME &&
799
- trans[1])
800
- # modifiers
801
- token_c = TkSymbol2Token[trans[1]]
802
- @lex_state = trans[0]
539
+ def check_newline_depth_difference
540
+ depth_difference = 0
541
+ open_brace_on_line = 0
542
+ in_oneliner_def = nil
543
+ @tokens.each_with_index do |t, index|
544
+ # detecting one-liner method definition
545
+ if in_oneliner_def.nil?
546
+ if t.state.allbits?(Ripper::EXPR_ENDFN)
547
+ in_oneliner_def = :ENDFN
548
+ end
549
+ else
550
+ if t.state.allbits?(Ripper::EXPR_ENDFN)
551
+ # continuing
552
+ elsif t.state.allbits?(Ripper::EXPR_BEG)
553
+ if t.tok == '='
554
+ in_oneliner_def = :BODY
555
+ end
803
556
  else
804
- if @lex_state != EXPR_FNAME and peek(0) != ':'
805
- if ENINDENT_CLAUSE.include?(token)
806
- # check for ``class = val'' etc.
807
- valid = true
808
- case token
809
- when "class"
810
- valid = false unless peek_match?(/^\s*(<<|\w|::)/)
811
- when "def"
812
- valid = false if peek_match?(/^\s*(([+\-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/)
813
- when "do"
814
- valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&)/)
815
- when *ENINDENT_CLAUSE
816
- valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&|\|)/)
817
- else
818
- # no nothing
819
- end
820
- if valid
821
- if token == "do"
822
- if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
823
- @indent += 1
824
- @indent_stack.push token_c
825
- end
826
- else
827
- @indent += 1
828
- @indent_stack.push token_c
829
- end
830
- end
831
-
832
- elsif DEINDENT_CLAUSE.include?(token)
833
- @indent -= 1
834
- @indent_stack.pop
835
- end
836
- @lex_state = trans[0]
837
- else
838
- @lex_state = EXPR_END
557
+ if in_oneliner_def == :BODY
558
+ # one-liner method definition
559
+ depth_difference -= 1
839
560
  end
561
+ in_oneliner_def = nil
840
562
  end
841
- return Token(token_c, token)
842
563
  end
843
- end
844
564
 
845
- if @lex_state == EXPR_FNAME
846
- @lex_state = EXPR_END
847
- if peek(0) == '='
848
- token.concat getc
849
- end
850
- elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
851
- @lex_state = EXPR_ARG
852
- else
853
- @lex_state = EXPR_END
854
- end
855
-
856
- if token[0, 1] =~ /[A-Z]/
857
- return Token(TkCONSTANT, token)
858
- elsif token[token.size - 1, 1] =~ /[!?]/
859
- return Token(TkFID, token)
860
- else
861
- return Token(TkIDENTIFIER, token)
862
- end
863
- end
864
-
865
- def identify_here_document
866
- ch = getc
867
- if ch == "-" || ch == "~"
868
- ch = getc
869
- indent = true
870
- end
871
- if /['"`]/ =~ ch
872
- lt = ch
873
- quoted = ""
874
- while (c = getc) && c != lt
875
- quoted.concat c
876
- end
877
- else
878
- lt = '"'
879
- quoted = ch.dup
880
- while (c = getc) && c =~ /\w/
881
- quoted.concat c
882
- end
883
- ungetc
884
- end
885
-
886
- ltback, @ltype = @ltype, lt
887
- reserve = []
888
- while ch = getc
889
- reserve.push ch
890
- if ch == "\\"
891
- reserve.push ch = getc
892
- elsif ch == "\n"
893
- break
894
- end
895
- end
896
-
897
- @here_header = false
898
-
899
- line = ""
900
- while ch = getc
901
- if ch == "\n"
902
- if line == quoted
903
- break
565
+ case t.event
566
+ when :on_ignored_nl, :on_nl, :on_comment
567
+ if index != (@tokens.size - 1) and in_oneliner_def != :BODY
568
+ depth_difference = 0
569
+ open_brace_on_line = 0
904
570
  end
905
- line = ""
906
- else
907
- line.concat ch unless indent && line == "" && /\s/ =~ ch
908
- if @ltype != "'" && ch == "#" && peek(0) == "{"
909
- identify_string_dvar
571
+ next
572
+ when :on_sp
573
+ next
574
+ end
575
+
576
+ case t.event
577
+ when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
578
+ depth_difference += 1
579
+ open_brace_on_line += 1
580
+ when :on_rbracket, :on_rbrace, :on_rparen
581
+ depth_difference -= 1 if open_brace_on_line > 0
582
+ when :on_kw
583
+ next if index > 0 and @tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
584
+ case t.tok
585
+ when 'do'
586
+ syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index)
587
+ depth_difference += 1 if syntax_of_do == :method_calling
588
+ when 'def', 'case', 'for', 'begin', 'class', 'module'
589
+ depth_difference += 1
590
+ when 'if', 'unless', 'while', 'until', 'rescue'
591
+ # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
592
+ unless t.state.allbits?(Ripper::EXPR_LABEL)
593
+ depth_difference += 1
594
+ end
595
+ when 'else', 'elsif', 'ensure', 'when'
596
+ depth_difference += 1
597
+ when 'in'
598
+ unless is_the_in_correspond_to_a_for(@tokens, index)
599
+ depth_difference += 1
600
+ end
601
+ when 'end'
602
+ depth_difference -= 1
910
603
  end
911
604
  end
912
605
  end
913
-
914
- @here_header = true
915
- @here_readed.concat reserve
916
- while ch = reserve.pop
917
- ungetc ch
918
- end
919
-
920
- @ltype = ltback
921
- @lex_state = EXPR_END
922
- Token(Ltype2Token[lt])
606
+ depth_difference
923
607
  end
924
608
 
925
- def identify_quotation
926
- ch = getc
927
- if lt = PERCENT_LTYPE[ch]
928
- ch = getc
929
- elsif ch =~ /\W/
930
- lt = "\""
931
- else
932
- RubyLex.fail SyntaxError, "unknown type of %string"
933
- end
934
- @quoted = ch unless @quoted = PERCENT_PAREN[ch]
935
- identify_string(lt, @quoted)
936
- end
609
+ def check_corresponding_token_depth(lines, line_index)
610
+ corresponding_token_depth = nil
611
+ is_first_spaces_of_line = true
612
+ is_first_printable_of_line = true
613
+ spaces_of_nest = []
614
+ spaces_at_line_head = 0
615
+ open_brace_on_line = 0
616
+ in_oneliner_def = nil
937
617
 
938
- def identify_number
939
- @lex_state = EXPR_END
618
+ if heredoc_scope?
619
+ return lines[line_index][/^ */].length
620
+ end
940
621
 
941
- if peek(0) == "0" && peek(1) !~ /[.eE]/
942
- getc
943
- case peek(0)
944
- when /[xX]/
945
- ch = getc
946
- match = /[0-9a-fA-F_]/
947
- when /[bB]/
948
- ch = getc
949
- match = /[01_]/
950
- when /[oO]/
951
- ch = getc
952
- match = /[0-7_]/
953
- when /[dD]/
954
- ch = getc
955
- match = /[0-9_]/
956
- when /[0-7]/
957
- match = /[0-7_]/
958
- when /[89]/
959
- RubyLex.fail SyntaxError, "Invalid octal digit"
622
+ @tokens.each_with_index do |t, index|
623
+ # detecting one-liner method definition
624
+ if in_oneliner_def.nil?
625
+ if t.state.allbits?(Ripper::EXPR_ENDFN)
626
+ in_oneliner_def = :ENDFN
627
+ end
960
628
  else
961
- return Token(TkINTEGER)
962
- end
963
-
964
- len0 = true
965
- non_digit = false
966
- while ch = getc
967
- if match =~ ch
968
- if ch == "_"
969
- if non_digit
970
- RubyLex.fail SyntaxError, "trailing `#{ch}' in number"
971
- else
972
- non_digit = ch
973
- end
974
- else
975
- non_digit = false
976
- len0 = false
629
+ if t.state.allbits?(Ripper::EXPR_ENDFN)
630
+ # continuing
631
+ elsif t.state.allbits?(Ripper::EXPR_BEG)
632
+ if t.tok == '='
633
+ in_oneliner_def = :BODY
977
634
  end
978
635
  else
979
- ungetc
980
- if len0
981
- RubyLex.fail SyntaxError, "numeric literal without digits"
982
- end
983
- if non_digit
984
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
636
+ if in_oneliner_def == :BODY
637
+ # one-liner method definition
638
+ if is_first_printable_of_line
639
+ corresponding_token_depth = spaces_of_nest.pop
640
+ else
641
+ spaces_of_nest.pop
642
+ corresponding_token_depth = nil
643
+ end
985
644
  end
986
- break
645
+ in_oneliner_def = nil
987
646
  end
988
647
  end
989
- return Token(TkINTEGER)
990
- end
991
648
 
992
- type = TkINTEGER
993
- allow_point = true
994
- allow_e = true
995
- non_digit = false
996
- while ch = getc
997
- case ch
998
- when /[0-9]/
999
- non_digit = false
1000
- when "_"
1001
- non_digit = ch
1002
- when allow_point && "."
1003
- if non_digit
1004
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
649
+ case t.event
650
+ when :on_ignored_nl, :on_nl, :on_comment
651
+ if in_oneliner_def != :BODY
652
+ corresponding_token_depth = nil
653
+ spaces_at_line_head = 0
654
+ is_first_spaces_of_line = true
655
+ is_first_printable_of_line = true
656
+ open_brace_on_line = 0
1005
657
  end
1006
- type = TkFLOAT
1007
- if peek(0) !~ /[0-9]/
1008
- type = TkINTEGER
1009
- ungetc
1010
- break
1011
- end
1012
- allow_point = false
1013
- when allow_e && "e", allow_e && "E"
1014
- if non_digit
1015
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
1016
- end
1017
- type = TkFLOAT
1018
- if peek(0) =~ /[+-]/
1019
- getc
658
+ next
659
+ when :on_sp
660
+ spaces_at_line_head = t.tok.count(' ') if is_first_spaces_of_line
661
+ is_first_spaces_of_line = false
662
+ next
663
+ end
664
+
665
+ case t.event
666
+ when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
667
+ spaces_of_nest.push(spaces_at_line_head + open_brace_on_line * 2)
668
+ open_brace_on_line += 1
669
+ when :on_rbracket, :on_rbrace, :on_rparen
670
+ if is_first_printable_of_line
671
+ corresponding_token_depth = spaces_of_nest.pop
672
+ else
673
+ spaces_of_nest.pop
674
+ corresponding_token_depth = nil
1020
675
  end
1021
- allow_e = false
1022
- allow_point = false
1023
- non_digit = ch
1024
- else
1025
- if non_digit
1026
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
676
+ open_brace_on_line -= 1
677
+ when :on_kw
678
+ next if index > 0 and @tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
679
+ case t.tok
680
+ when 'do'
681
+ syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index)
682
+ if syntax_of_do == :method_calling
683
+ spaces_of_nest.push(spaces_at_line_head)
684
+ end
685
+ when 'def', 'case', 'for', 'begin', 'class', 'module'
686
+ spaces_of_nest.push(spaces_at_line_head)
687
+ when 'rescue'
688
+ unless t.state.allbits?(Ripper::EXPR_LABEL)
689
+ corresponding_token_depth = spaces_of_nest.last
690
+ end
691
+ when 'if', 'unless', 'while', 'until'
692
+ # postfix if/unless/while/until must be Ripper::EXPR_LABEL
693
+ unless t.state.allbits?(Ripper::EXPR_LABEL)
694
+ spaces_of_nest.push(spaces_at_line_head)
695
+ end
696
+ when 'else', 'elsif', 'ensure', 'when'
697
+ corresponding_token_depth = spaces_of_nest.last
698
+ when 'in'
699
+ if in_keyword_case_scope?
700
+ corresponding_token_depth = spaces_of_nest.last
701
+ end
702
+ when 'end'
703
+ if is_first_printable_of_line
704
+ corresponding_token_depth = spaces_of_nest.pop
705
+ else
706
+ spaces_of_nest.pop
707
+ corresponding_token_depth = nil
708
+ end
1027
709
  end
1028
- ungetc
1029
- break
1030
710
  end
711
+ is_first_spaces_of_line = false
712
+ is_first_printable_of_line = false
1031
713
  end
1032
- Token(type)
714
+ corresponding_token_depth
1033
715
  end
1034
716
 
1035
- def identify_string(ltype, quoted = ltype)
1036
- @ltype = ltype
1037
- @quoted = quoted
1038
- subtype = nil
1039
- begin
1040
- nest = 0
1041
- while ch = getc
1042
- if @quoted == ch and nest == 0
1043
- break
1044
- elsif @ltype != "'" && ch == "#" && peek(0) == "{"
1045
- identify_string_dvar
1046
- elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
1047
- subtype = true
1048
- elsif ch == '\\' and @ltype == "'" #'
1049
- case ch = getc
1050
- when "\\", "\n", "'"
717
+ def check_string_literal(tokens)
718
+ i = 0
719
+ start_token = []
720
+ end_type = []
721
+ while i < tokens.size
722
+ t = tokens[i]
723
+ case t.event
724
+ when *end_type.last
725
+ start_token.pop
726
+ end_type.pop
727
+ when :on_tstring_beg
728
+ start_token << t
729
+ end_type << [:on_tstring_end, :on_label_end]
730
+ when :on_regexp_beg
731
+ start_token << t
732
+ end_type << :on_regexp_end
733
+ when :on_symbeg
734
+ acceptable_single_tokens = %i{on_ident on_const on_op on_cvar on_ivar on_gvar on_kw on_int on_backtick}
735
+ if (i + 1) < tokens.size
736
+ if acceptable_single_tokens.all?{ |st| tokens[i + 1].event != st }
737
+ start_token << t
738
+ end_type << :on_tstring_end
1051
739
  else
1052
- ungetc
1053
- end
1054
- elsif ch == '\\' #'
1055
- read_escape
1056
- end
1057
- if PERCENT_PAREN.values.include?(@quoted)
1058
- if PERCENT_PAREN[ch] == @quoted
1059
- nest += 1
1060
- elsif ch == @quoted
1061
- nest -= 1
740
+ i += 1
1062
741
  end
1063
742
  end
1064
- end
1065
- if @ltype == "/"
1066
- while /[imxoesun]/ =~ peek(0)
1067
- getc
1068
- end
1069
- end
1070
- if subtype
1071
- Token(DLtype2Token[ltype])
1072
- else
1073
- Token(Ltype2Token[ltype])
1074
- end
1075
- ensure
1076
- @ltype = nil
1077
- @quoted = nil
1078
- @lex_state = EXPR_END
1079
- end
743
+ when :on_backtick
744
+ start_token << t
745
+ end_type << :on_tstring_end
746
+ when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg
747
+ start_token << t
748
+ end_type << :on_tstring_end
749
+ when :on_heredoc_beg
750
+ start_token << t
751
+ end_type << :on_heredoc_end
752
+ end
753
+ i += 1
754
+ end
755
+ start_token.last.nil? ? nil : start_token.last
1080
756
  end
1081
757
 
1082
- def identify_string_dvar
1083
- begin
1084
- getc
1085
-
1086
- reserve_continue = @continue
1087
- reserve_ltype = @ltype
1088
- reserve_indent = @indent
1089
- reserve_indent_stack = @indent_stack
1090
- reserve_state = @lex_state
1091
- reserve_quoted = @quoted
1092
-
1093
- @ltype = nil
1094
- @quoted = nil
1095
- @indent = 0
1096
- @indent_stack = []
1097
- @lex_state = EXPR_BEG
1098
-
1099
- loop do
1100
- @continue = false
1101
- prompt
1102
- tk = token
1103
- if @ltype or @continue or @indent >= 0
1104
- next
1105
- end
1106
- break if tk.kind_of?(TkRBRACE)
758
+ def process_literal_type(tokens = @tokens)
759
+ start_token = check_string_literal(tokens)
760
+ return nil if start_token == ""
761
+
762
+ case start_token&.event
763
+ when :on_tstring_beg
764
+ case start_token&.tok
765
+ when ?" then ?"
766
+ when /^%.$/ then ?"
767
+ when /^%Q.$/ then ?"
768
+ when ?' then ?'
769
+ when /^%q.$/ then ?'
770
+ end
771
+ when :on_regexp_beg then ?/
772
+ when :on_symbeg then ?:
773
+ when :on_backtick then ?`
774
+ when :on_qwords_beg then ?]
775
+ when :on_words_beg then ?]
776
+ when :on_qsymbols_beg then ?]
777
+ when :on_symbols_beg then ?]
778
+ when :on_heredoc_beg
779
+ start_token&.tok =~ /<<[-~]?(['"`])[_a-zA-Z0-9]+\1/
780
+ case $1
781
+ when ?" then ?"
782
+ when ?' then ?'
783
+ when ?` then ?`
784
+ else ?"
1107
785
  end
1108
- ensure
1109
- @continue = reserve_continue
1110
- @ltype = reserve_ltype
1111
- @indent = reserve_indent
1112
- @indent_stack = reserve_indent_stack
1113
- @lex_state = reserve_state
1114
- @quoted = reserve_quoted
786
+ else
787
+ nil
1115
788
  end
1116
789
  end
1117
790
 
1118
- def identify_comment
1119
- @ltype = "#"
1120
-
1121
- while ch = getc
1122
- if ch == "\n"
1123
- @ltype = nil
1124
- ungetc
1125
- break
791
+ def check_termination_in_prev_line(code, context: nil)
792
+ tokens = self.class.ripper_lex_without_warning(code, context: context)
793
+ past_first_newline = false
794
+ index = tokens.rindex do |t|
795
+ # traverse first token before last line
796
+ if past_first_newline
797
+ if t.tok.include?("\n")
798
+ true
799
+ end
800
+ elsif t.tok.include?("\n")
801
+ past_first_newline = true
802
+ false
803
+ else
804
+ false
1126
805
  end
1127
806
  end
1128
- return Token(TkCOMMENT)
1129
- end
1130
807
 
1131
- def read_escape
1132
- case ch = getc
1133
- when "\n", "\r", "\f"
1134
- when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #"
1135
- when /[0-7]/
1136
- ungetc ch
1137
- 3.times do
1138
- case ch = getc
1139
- when /[0-7]/
1140
- when nil
1141
- break
1142
- else
1143
- ungetc
808
+ if index
809
+ first_token = nil
810
+ last_line_tokens = tokens[(index + 1)..(tokens.size - 1)]
811
+ last_line_tokens.each do |t|
812
+ unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event)
813
+ first_token = t
1144
814
  break
1145
815
  end
1146
816
  end
1147
817
 
1148
- when "x"
1149
- 2.times do
1150
- case ch = getc
1151
- when /[0-9a-fA-F]/
1152
- when nil
1153
- break
818
+ if first_token.nil?
819
+ return false
820
+ elsif first_token && first_token.state == Ripper::EXPR_DOT
821
+ return false
822
+ else
823
+ tokens_without_last_line = tokens[0..index]
824
+ ltype = process_literal_type(tokens_without_last_line)
825
+ indent = process_nesting_level(tokens_without_last_line)
826
+ continue = process_continue(tokens_without_last_line)
827
+ code_block_open = check_code_block(tokens_without_last_line.map(&:tok).join(''), tokens_without_last_line)
828
+ if ltype or indent > 0 or continue or code_block_open
829
+ return false
1154
830
  else
1155
- ungetc
1156
- break
831
+ return last_line_tokens.map(&:tok).join('')
1157
832
  end
1158
833
  end
834
+ end
835
+ false
836
+ end
1159
837
 
1160
- when "M"
1161
- if (ch = getc) != '-'
1162
- ungetc
1163
- else
1164
- if (ch = getc) == "\\" #"
1165
- read_escape
1166
- end
1167
- end
838
+ private
839
+
840
+ def heredoc_scope?
841
+ heredoc_tokens = @tokens.select { |t| [:on_heredoc_beg, :on_heredoc_end].include?(t.event) }
842
+ heredoc_tokens[-1]&.event == :on_heredoc_beg
843
+ end
1168
844
 
1169
- when "C", "c" #, "^"
1170
- if ch == "C" and (ch = getc) != "-"
1171
- ungetc
1172
- elsif (ch = getc) == "\\" #"
1173
- read_escape
845
+ def in_keyword_case_scope?
846
+ kw_tokens = @tokens.select { |t| t.event == :on_kw && ['case', 'for', 'end'].include?(t.tok) }
847
+ counter = 0
848
+ kw_tokens.reverse.each do |t|
849
+ if t.tok == 'case'
850
+ return true if counter.zero?
851
+ counter += 1
852
+ elsif t.tok == 'for'
853
+ counter += 1
854
+ elsif t.tok == 'end'
855
+ counter -= 1
1174
856
  end
1175
- else
1176
- # other characters
1177
857
  end
858
+ false
1178
859
  end
1179
860
  end
1180
861
  # :startdoc: