irb 1.0.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.document +4 -0
  3. data/Gemfile +10 -2
  4. data/LICENSE.txt +3 -3
  5. data/README.md +3 -3
  6. data/Rakefile +17 -1
  7. data/doc/irb/irb-tools.rd.ja +184 -0
  8. data/doc/irb/irb.rd.ja +427 -0
  9. data/irb.gemspec +18 -4
  10. data/lib/irb/cmd/fork.rb +2 -4
  11. data/lib/irb/cmd/help.rb +10 -5
  12. data/lib/irb/cmd/info.rb +32 -0
  13. data/lib/irb/cmd/ls.rb +101 -0
  14. data/lib/irb/cmd/measure.rb +43 -0
  15. data/lib/irb/cmd/nop.rb +10 -4
  16. data/lib/irb/cmd/pushws.rb +0 -1
  17. data/lib/irb/cmd/show_source.rb +93 -0
  18. data/lib/irb/cmd/whereami.rb +20 -0
  19. data/lib/irb/color.rb +246 -0
  20. data/lib/irb/color_printer.rb +47 -0
  21. data/lib/irb/completion.rb +254 -55
  22. data/lib/irb/context.rb +165 -72
  23. data/lib/irb/easter-egg.rb +138 -0
  24. data/lib/irb/ext/change-ws.rb +0 -1
  25. data/lib/irb/ext/history.rb +47 -11
  26. data/lib/irb/ext/loader.rb +46 -20
  27. data/lib/irb/ext/multi-irb.rb +7 -7
  28. data/lib/irb/ext/save-history.rb +36 -11
  29. data/lib/irb/ext/tracer.rb +14 -2
  30. data/lib/irb/ext/use-loader.rb +4 -3
  31. data/lib/irb/ext/workspaces.rb +0 -1
  32. data/lib/irb/extend-command.rb +113 -63
  33. data/lib/irb/frame.rb +12 -7
  34. data/lib/irb/help.rb +0 -1
  35. data/lib/irb/init.rb +146 -26
  36. data/lib/irb/input-method.rb +287 -9
  37. data/lib/irb/inspector.rb +15 -11
  38. data/lib/irb/lc/error.rb +55 -16
  39. data/lib/irb/lc/help-message +25 -13
  40. data/lib/irb/lc/ja/error.rb +55 -14
  41. data/lib/irb/lc/ja/help-message +11 -6
  42. data/lib/irb/locale.rb +13 -4
  43. data/lib/irb/notifier.rb +12 -8
  44. data/lib/irb/output-method.rb +6 -6
  45. data/lib/irb/ruby-lex.rb +673 -992
  46. data/lib/irb/ruby_logo.aa +37 -0
  47. data/lib/irb/version.rb +2 -2
  48. data/lib/irb/workspace.rb +65 -21
  49. data/lib/irb/xmp.rb +1 -1
  50. data/lib/irb.rb +276 -96
  51. data/man/irb.1 +229 -0
  52. metadata +25 -31
  53. data/.gitignore +0 -9
  54. data/.travis.yml +0 -6
  55. data/lib/irb/lc/.document +0 -4
  56. data/lib/irb/ruby-token.rb +0 -267
  57. data/lib/irb/slex.rb +0 -282
data/lib/irb/ruby-lex.rb CHANGED
@@ -10,74 +10,105 @@
10
10
  #
11
11
  #
12
12
 
13
- require "e2mmap"
14
- require_relative "slex"
15
- require_relative "ruby-token"
13
+ require "ripper"
14
+ require "jruby" if RUBY_ENGINE == "jruby"
16
15
 
17
16
  # :stopdoc:
18
17
  class RubyLex
19
18
 
20
- extend Exception2MessageMapper
21
- def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
22
- def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
23
- def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
24
- def_exception(:TkReading2TokenDuplicateError,
25
- "key duplicate(token_n='%s', key='%s')")
26
- def_exception(:SyntaxError, "%s")
27
-
28
- def_exception(:TerminateLineInput, "Terminate Line Input")
29
-
30
- include RubyToken
31
-
32
- class << self
33
- attr_accessor :debug_level
34
- def debug?
35
- @debug_level > 0
19
+ class TerminateLineInput < StandardError
20
+ def initialize
21
+ super("Terminate Line Input")
36
22
  end
37
23
  end
38
- @debug_level = 0
39
24
 
40
25
  def initialize
41
- lex_init
42
- set_input(STDIN)
43
-
44
- @seek = 0
45
26
  @exp_line_no = @line_no = 1
46
- @base_char_no = 0
47
- @char_no = 0
48
- @rests = []
49
- @readed = []
50
- @here_readed = []
51
-
52
27
  @indent = 0
53
- @indent_stack = []
54
- @lex_state = EXPR_BEG
55
- @space_seen = false
56
- @here_header = false
57
- @post_symbeg = false
58
-
59
28
  @continue = false
60
29
  @line = ""
61
-
62
- @skip_space = false
63
- @readed_auto_clean_up = false
64
- @exception_on_syntax_error = true
65
-
66
30
  @prompt = nil
67
31
  end
68
32
 
69
- attr_accessor :skip_space
70
- attr_accessor :readed_auto_clean_up
71
- attr_accessor :exception_on_syntax_error
72
-
73
- attr_reader :seek
74
- attr_reader :char_no
75
- attr_reader :line_no
76
- attr_reader :indent
33
+ def self.compile_with_errors_suppressed(code, line_no: 1)
34
+ begin
35
+ result = yield code, line_no
36
+ rescue ArgumentError
37
+ # Ruby can issue an error for the code if there is an
38
+ # incomplete magic comment for encoding in it. Force an
39
+ # expression with a new line before the code in this
40
+ # case to prevent magic comment handling. To make sure
41
+ # line numbers in the lexed code remain the same,
42
+ # decrease the line number by one.
43
+ code = ";\n#{code}"
44
+ line_no -= 1
45
+ result = yield code, line_no
46
+ end
47
+ result
48
+ end
77
49
 
78
50
  # io functions
79
- def set_input(io, p = nil, &block)
51
+ def set_input(io, p = nil, context: nil, &block)
80
52
  @io = io
53
+ if @io.respond_to?(:check_termination)
54
+ @io.check_termination do |code|
55
+ if Reline::IOGate.in_pasting?
56
+ lex = RubyLex.new
57
+ rest = lex.check_termination_in_prev_line(code, context: context)
58
+ if rest
59
+ Reline.delete_text
60
+ rest.bytes.reverse_each do |c|
61
+ Reline.ungetc(c)
62
+ end
63
+ true
64
+ else
65
+ false
66
+ end
67
+ else
68
+ code.gsub!(/\s*\z/, '').concat("\n")
69
+ ltype, indent, continue, code_block_open = check_state(code, context: context)
70
+ if ltype or indent > 0 or continue or code_block_open
71
+ false
72
+ else
73
+ true
74
+ end
75
+ end
76
+ end
77
+ end
78
+ if @io.respond_to?(:dynamic_prompt)
79
+ @io.dynamic_prompt do |lines|
80
+ lines << '' if lines.empty?
81
+ result = []
82
+ tokens = self.class.ripper_lex_without_warning(lines.map{ |l| l + "\n" }.join, context: context)
83
+ code = String.new
84
+ partial_tokens = []
85
+ unprocessed_tokens = []
86
+ line_num_offset = 0
87
+ tokens.each do |t|
88
+ partial_tokens << t
89
+ unprocessed_tokens << t
90
+ if t.tok.include?("\n")
91
+ t_str = t.tok
92
+ t_str.each_line("\n") do |s|
93
+ code << s << "\n"
94
+ ltype, indent, continue, code_block_open = check_state(code, partial_tokens, context: context)
95
+ result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset)
96
+ line_num_offset += 1
97
+ end
98
+ unprocessed_tokens = []
99
+ else
100
+ code << t.tok
101
+ end
102
+ end
103
+
104
+ unless unprocessed_tokens.empty?
105
+ ltype, indent, continue, code_block_open = check_state(code, unprocessed_tokens, context: context)
106
+ result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset)
107
+ end
108
+ result
109
+ end
110
+ end
111
+
81
112
  if p.respond_to?(:call)
82
113
  @input = p
83
114
  elsif block_given?
@@ -87,119 +118,115 @@ class RubyLex
87
118
  end
88
119
  end
89
120
 
90
- def get_readed
91
- if idx = @readed.rindex("\n")
92
- @base_char_no = @readed.size - (idx + 1)
93
- else
94
- @base_char_no += @readed.size
95
- end
96
-
97
- readed = @readed.join("")
98
- @readed = []
99
- readed
100
- end
101
-
102
- def getc
103
- while @rests.empty?
104
- @rests.push nil unless buf_input
105
- end
106
- c = @rests.shift
107
- if @here_header
108
- @here_readed.push c
109
- else
110
- @readed.push c
111
- end
112
- @seek += 1
113
- if c == "\n"
114
- @line_no += 1
115
- @char_no = 0
121
+ def set_prompt(p = nil, &block)
122
+ p = block if block_given?
123
+ if p.respond_to?(:call)
124
+ @prompt = p
116
125
  else
117
- @char_no += 1
118
- end
119
- c
120
- end
121
-
122
- def gets
123
- l = ""
124
- while c = getc
125
- l.concat(c)
126
- break if c == "\n"
126
+ @prompt = Proc.new{print p}
127
127
  end
128
- return nil if l == "" and c.nil?
129
- l
130
128
  end
131
129
 
132
- def eof?
133
- @io.eof?
134
- end
135
-
136
- def getc_of_rests
137
- if @rests.empty?
138
- nil
139
- else
140
- getc
141
- end
142
- end
130
+ ERROR_TOKENS = [
131
+ :on_parse_error,
132
+ :compile_error,
133
+ :on_assign_error,
134
+ :on_alias_error,
135
+ :on_class_name_error,
136
+ :on_param_error
137
+ ]
143
138
 
144
- def ungetc(c = nil)
145
- if @here_readed.empty?
146
- c2 = @readed.pop
147
- else
148
- c2 = @here_readed.pop
149
- end
150
- c = c2 unless c
151
- @rests.unshift c #c =
152
- @seek -= 1
153
- if c == "\n"
154
- @line_no -= 1
155
- if idx = @readed.rindex("\n")
156
- @char_no = idx + 1
139
+ def self.ripper_lex_without_warning(code, context: nil)
140
+ verbose, $VERBOSE = $VERBOSE, nil
141
+ if context
142
+ lvars = context&.workspace&.binding&.local_variables
143
+ if lvars && !lvars.empty?
144
+ code = "#{lvars.join('=')}=nil\n#{code}"
145
+ line_no = 0
146
+ else
147
+ line_no = 1
148
+ end
149
+ end
150
+ tokens = nil
151
+ compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no|
152
+ lexer = Ripper::Lexer.new(inner_code, '-', line_no)
153
+ if lexer.respond_to?(:scan) # Ruby 2.7+
154
+ tokens = []
155
+ pos_to_index = {}
156
+ lexer.scan.each do |t|
157
+ next if t.pos.first == 0
158
+ if pos_to_index.has_key?(t.pos)
159
+ index = pos_to_index[t.pos]
160
+ found_tk = tokens[index]
161
+ if ERROR_TOKENS.include?(found_tk.event) && !ERROR_TOKENS.include?(t.event)
162
+ tokens[index] = t
163
+ end
164
+ else
165
+ pos_to_index[t.pos] = tokens.size
166
+ tokens << t
167
+ end
168
+ end
157
169
  else
158
- @char_no = @base_char_no + @readed.size
170
+ tokens = lexer.parse.reject { |it| it.pos.first == 0 }
159
171
  end
160
- else
161
- @char_no -= 1
162
172
  end
173
+ tokens
174
+ ensure
175
+ $VERBOSE = verbose
163
176
  end
164
177
 
165
- def peek_equal?(str)
166
- chrs = str.split(//)
167
- until @rests.size >= chrs.size
168
- return false unless buf_input
169
- end
170
- @rests[0, chrs.size] == chrs
171
- end
172
-
173
- def peek_match?(regexp)
174
- while @rests.empty?
175
- return false unless buf_input
178
+ def find_prev_spaces(line_index)
179
+ return 0 if @tokens.size == 0
180
+ md = @tokens[0].tok.match(/(\A +)/)
181
+ prev_spaces = md.nil? ? 0 : md[1].count(' ')
182
+ line_count = 0
183
+ @tokens.each_with_index do |t, i|
184
+ if t.tok.include?("\n")
185
+ line_count += t.tok.count("\n")
186
+ if line_count >= line_index
187
+ return prev_spaces
188
+ end
189
+ if (@tokens.size - 1) > i
190
+ md = @tokens[i + 1].tok.match(/(\A +)/)
191
+ prev_spaces = md.nil? ? 0 : md[1].count(' ')
192
+ end
193
+ end
176
194
  end
177
- regexp =~ @rests.join("")
195
+ prev_spaces
178
196
  end
179
197
 
180
- def peek(i = 0)
181
- while @rests.size <= i
182
- return nil unless buf_input
198
+ def set_auto_indent(context)
199
+ if @io.respond_to?(:auto_indent) and context.auto_indent_mode
200
+ @io.auto_indent do |lines, line_index, byte_pointer, is_newline|
201
+ if is_newline
202
+ @tokens = self.class.ripper_lex_without_warning(lines[0..line_index].join("\n"), context: context)
203
+ prev_spaces = find_prev_spaces(line_index)
204
+ depth_difference = check_newline_depth_difference
205
+ depth_difference = 0 if depth_difference < 0
206
+ prev_spaces + depth_difference * 2
207
+ else
208
+ code = line_index.zero? ? '' : lines[0..(line_index - 1)].map{ |l| l + "\n" }.join
209
+ last_line = lines[line_index]&.byteslice(0, byte_pointer)
210
+ code += last_line if last_line
211
+ @tokens = self.class.ripper_lex_without_warning(code, context: context)
212
+ corresponding_token_depth = check_corresponding_token_depth(lines, line_index)
213
+ if corresponding_token_depth
214
+ corresponding_token_depth
215
+ else
216
+ nil
217
+ end
218
+ end
219
+ end
183
220
  end
184
- @rests[i]
185
221
  end
186
222
 
187
- def buf_input
188
- prompt
189
- line = @input.call
190
- return nil unless line
191
- @rests.concat line.chars.to_a
192
- true
193
- end
194
- private :buf_input
195
-
196
- def set_prompt(p = nil, &block)
197
- p = block if block_given?
198
- if p.respond_to?(:call)
199
- @prompt = p
200
- else
201
- @prompt = Proc.new{print p}
202
- end
223
+ def check_state(code, tokens = nil, context: nil)
224
+ tokens = self.class.ripper_lex_without_warning(code, context: context) unless tokens
225
+ ltype = process_literal_type(tokens)
226
+ indent = process_nesting_level(tokens)
227
+ continue = process_continue(tokens)
228
+ code_block_open = check_code_block(code, tokens)
229
+ [ltype, indent, continue, code_block_open]
203
230
  end
204
231
 
205
232
  def prompt
@@ -210,20 +237,11 @@ class RubyLex
210
237
 
211
238
  def initialize_input
212
239
  @ltype = nil
213
- @quoted = nil
214
240
  @indent = 0
215
- @indent_stack = []
216
- @lex_state = EXPR_BEG
217
- @space_seen = false
218
- @here_header = false
219
-
220
241
  @continue = false
221
- @post_symbeg = false
222
-
223
- prompt
224
-
225
242
  @line = ""
226
243
  @exp_line_no = @line_no
244
+ @code_block_open = false
227
245
  end
228
246
 
229
247
  def each_top_level_statement
@@ -231,13 +249,17 @@ class RubyLex
231
249
  catch(:TERM_INPUT) do
232
250
  loop do
233
251
  begin
234
- @continue = false
235
252
  prompt
236
253
  unless l = lex
237
254
  throw :TERM_INPUT if @line == ''
238
255
  else
256
+ @line_no += l.count("\n")
257
+ if l == "\n"
258
+ @exp_line_no += 1
259
+ next
260
+ end
239
261
  @line.concat l
240
- if @ltype or @continue or @indent > 0
262
+ if @code_block_open or @ltype or @continue or @indent > 0
241
263
  next
242
264
  end
243
265
  end
@@ -245,936 +267,595 @@ class RubyLex
245
267
  @line.force_encoding(@io.encoding)
246
268
  yield @line, @exp_line_no
247
269
  end
248
- break unless l
270
+ raise TerminateLineInput if @io.eof?
249
271
  @line = ''
250
272
  @exp_line_no = @line_no
251
273
 
252
274
  @indent = 0
253
- @indent_stack = []
254
- prompt
255
275
  rescue TerminateLineInput
256
276
  initialize_input
257
277
  prompt
258
- get_readed
259
278
  end
260
279
  end
261
280
  end
262
281
  end
263
282
 
264
283
  def lex
265
- continue = @continue
266
- while tk = token
267
- case tk
268
- when TkNL, TkEND_OF_SCRIPT
269
- @continue = continue unless continue.nil?
270
- break unless @continue
271
- when TkSPACE, TkCOMMENT
272
- when TkSEMICOLON, TkBEGIN, TkELSE
273
- @continue = continue = false
274
- else
275
- continue = nil
276
- end
277
- end
278
- line = get_readed
279
- if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
280
- nil
281
- else
282
- line
283
- end
284
+ line = @input.call
285
+ if @io.respond_to?(:check_termination)
286
+ return line # multiline
287
+ end
288
+ code = @line + (line.nil? ? '' : line)
289
+ code.gsub!(/\s*\z/, '').concat("\n")
290
+ @tokens = self.class.ripper_lex_without_warning(code)
291
+ @continue = process_continue
292
+ @code_block_open = check_code_block(code)
293
+ @indent = process_nesting_level
294
+ @ltype = process_literal_type
295
+ line
284
296
  end
285
297
 
286
- def token
287
- @prev_seek = @seek
288
- @prev_line_no = @line_no
289
- @prev_char_no = @char_no
290
- begin
291
- begin
292
- tk = @OP.match(self)
293
- @space_seen = tk.kind_of?(TkSPACE)
294
- @lex_state = EXPR_END if @post_symbeg && tk.kind_of?(TkOp)
295
- @post_symbeg = tk.kind_of?(TkSYMBEG)
296
- rescue SyntaxError
297
- raise if @exception_on_syntax_error
298
- tk = TkError.new(@seek, @line_no, @char_no)
299
- end
300
- end while @skip_space and tk.kind_of?(TkSPACE)
301
- if @readed_auto_clean_up
302
- get_readed
303
- end
304
- tk
298
+ def process_continue(tokens = @tokens)
299
+ # last token is always newline
300
+ if tokens.size >= 2 and tokens[-2].event == :on_regexp_end
301
+ # end of regexp literal
302
+ return false
303
+ elsif tokens.size >= 2 and tokens[-2].event == :on_semicolon
304
+ return false
305
+ elsif tokens.size >= 2 and tokens[-2].event == :on_kw and ['begin', 'else', 'ensure'].include?(tokens[-2].tok)
306
+ return false
307
+ elsif !tokens.empty? and tokens.last.tok == "\\\n"
308
+ return true
309
+ elsif tokens.size >= 1 and tokens[-1].event == :on_heredoc_end # "EOH\n"
310
+ return false
311
+ elsif tokens.size >= 2 and defined?(Ripper::EXPR_BEG) and tokens[-2].state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_FNAME) and tokens[-2].tok !~ /\A\.\.\.?\z/
312
+ # end of literal except for regexp
313
+ # endless range at end of line is not a continue
314
+ return true
315
+ end
316
+ false
305
317
  end
306
318
 
307
- ENINDENT_CLAUSE = [
308
- "case", "class", "def", "do", "for", "if",
309
- "module", "unless", "until", "while", "begin"
310
- ]
311
- DEINDENT_CLAUSE = ["end"
312
- ]
313
-
314
- PERCENT_LTYPE = {
315
- "q" => "\'",
316
- "Q" => "\"",
317
- "x" => "\`",
318
- "r" => "/",
319
- "w" => "]",
320
- "W" => "]",
321
- "i" => "]",
322
- "I" => "]",
323
- "s" => ":"
324
- }
325
-
326
- PERCENT_PAREN = {
327
- "{" => "}",
328
- "[" => "]",
329
- "<" => ">",
330
- "(" => ")"
331
- }
332
-
333
- Ltype2Token = {
334
- "\'" => TkSTRING,
335
- "\"" => TkSTRING,
336
- "\`" => TkXSTRING,
337
- "/" => TkREGEXP,
338
- "]" => TkDSTRING,
339
- ":" => TkSYMBOL
340
- }
341
- DLtype2Token = {
342
- "\"" => TkDSTRING,
343
- "\`" => TkDXSTRING,
344
- "/" => TkDREGEXP,
345
- }
346
-
347
- def lex_init()
348
- @OP = IRB::SLex.new
349
- @OP.def_rules("\0", "\004", "\032") do |op, io|
350
- Token(TkEND_OF_SCRIPT)
351
- end
352
-
353
- @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io|
354
- @space_seen = true
355
- while getc =~ /[ \t\f\r\13]/; end
356
- ungetc
357
- Token(TkSPACE)
358
- end
359
-
360
- @OP.def_rule("#") do |op, io|
361
- identify_comment
319
+ def check_code_block(code, tokens = @tokens)
320
+ return true if tokens.empty?
321
+ if tokens.last.event == :on_heredoc_beg
322
+ return true
362
323
  end
363
324
 
364
- @OP.def_rule("=begin",
365
- proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do
366
- |op, io|
367
- @ltype = "="
368
- until getc == "\n"; end
369
- until peek_equal?("=end") && peek(4) =~ /\s/
370
- until getc == "\n"; end
371
- end
372
- gets
373
- @ltype = nil
374
- Token(TkRD_COMMENT)
375
- end
376
-
377
- @OP.def_rule("\n") do |op, io|
378
- print "\\n\n" if RubyLex.debug?
379
- case @lex_state
380
- when EXPR_BEG, EXPR_FNAME, EXPR_DOT
381
- @continue = true
382
- else
383
- @continue = false
384
- @lex_state = EXPR_BEG
385
- until (@indent_stack.empty? ||
386
- [TkLPAREN, TkLBRACK, TkLBRACE,
387
- TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
388
- @indent_stack.pop
325
+ begin # check if parser error are available
326
+ verbose, $VERBOSE = $VERBOSE, nil
327
+ case RUBY_ENGINE
328
+ when 'ruby'
329
+ self.class.compile_with_errors_suppressed(code) do |inner_code, line_no|
330
+ RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no)
389
331
  end
390
- end
391
- @here_header = false
392
- @here_readed = []
393
- Token(TkNL)
394
- end
395
-
396
- @OP.def_rules("*", "**",
397
- "=", "==", "===",
398
- "=~", "<=>",
399
- "<", "<=",
400
- ">", ">=", ">>",
401
- "!", "!=", "!~") do
402
- |op, io|
403
- case @lex_state
404
- when EXPR_FNAME, EXPR_DOT
405
- @lex_state = EXPR_ARG
332
+ when 'jruby'
333
+ JRuby.compile_ir(code)
406
334
  else
407
- @lex_state = EXPR_BEG
408
- end
409
- Token(op)
410
- end
411
-
412
- @OP.def_rules("<<") do
413
- |op, io|
414
- tk = nil
415
- if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
416
- (@lex_state != EXPR_ARG || @space_seen)
417
- c = peek(0)
418
- if /[-~"'`\w]/ =~ c
419
- tk = identify_here_document
335
+ catch(:valid) do
336
+ eval("BEGIN { throw :valid, true }\n#{code}")
337
+ false
420
338
  end
421
339
  end
422
- unless tk
423
- tk = Token(op)
424
- case @lex_state
425
- when EXPR_FNAME, EXPR_DOT
426
- @lex_state = EXPR_ARG
427
- else
428
- @lex_state = EXPR_BEG
429
- end
340
+ rescue EncodingError
341
+ # This is for a hash with invalid encoding symbol, {"\xAE": 1}
342
+ rescue SyntaxError => e
343
+ case e.message
344
+ when /unterminated (?:string|regexp) meets end of file/
345
+ # "unterminated regexp meets end of file"
346
+ #
347
+ # example:
348
+ # /
349
+ #
350
+ # "unterminated string meets end of file"
351
+ #
352
+ # example:
353
+ # '
354
+ return true
355
+ when /syntax error, unexpected end-of-input/
356
+ # "syntax error, unexpected end-of-input, expecting keyword_end"
357
+ #
358
+ # example:
359
+ # if true
360
+ # hoge
361
+ # if false
362
+ # fuga
363
+ # end
364
+ return true
365
+ when /syntax error, unexpected keyword_end/
366
+ # "syntax error, unexpected keyword_end"
367
+ #
368
+ # example:
369
+ # if (
370
+ # end
371
+ #
372
+ # example:
373
+ # end
374
+ return false
375
+ when /syntax error, unexpected '\.'/
376
+ # "syntax error, unexpected '.'"
377
+ #
378
+ # example:
379
+ # .
380
+ return false
381
+ when /unexpected tREGEXP_BEG/
382
+ # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
383
+ #
384
+ # example:
385
+ # method / f /
386
+ return false
430
387
  end
431
- tk
432
- end
433
-
434
- @OP.def_rules("'", '"') do
435
- |op, io|
436
- identify_string(op)
388
+ ensure
389
+ $VERBOSE = verbose
437
390
  end
438
391
 
439
- @OP.def_rules("`") do
440
- |op, io|
441
- if @lex_state == EXPR_FNAME
442
- @lex_state = EXPR_END
443
- Token(op)
444
- else
445
- identify_string(op)
392
+ if defined?(Ripper::EXPR_BEG)
393
+ last_lex_state = tokens.last.state
394
+ if last_lex_state.allbits?(Ripper::EXPR_BEG)
395
+ return false
396
+ elsif last_lex_state.allbits?(Ripper::EXPR_DOT)
397
+ return true
398
+ elsif last_lex_state.allbits?(Ripper::EXPR_CLASS)
399
+ return true
400
+ elsif last_lex_state.allbits?(Ripper::EXPR_FNAME)
401
+ return true
402
+ elsif last_lex_state.allbits?(Ripper::EXPR_VALUE)
403
+ return true
404
+ elsif last_lex_state.allbits?(Ripper::EXPR_ARG)
405
+ return false
446
406
  end
447
407
  end
448
408
 
449
- @OP.def_rules('?') do
450
- |op, io|
451
- if @lex_state == EXPR_END
452
- @lex_state = EXPR_BEG
453
- Token(TkQUESTION)
409
+ false
410
+ end
411
+
412
+ def process_nesting_level(tokens = @tokens)
413
+ indent = 0
414
+ in_oneliner_def = nil
415
+ tokens.each_with_index { |t, index|
416
+ # detecting one-liner method definition
417
+ if in_oneliner_def.nil?
418
+ if t.state.allbits?(Ripper::EXPR_ENDFN)
419
+ in_oneliner_def = :ENDFN
420
+ end
454
421
  else
455
- ch = getc
456
- if @lex_state == EXPR_ARG && ch =~ /\s/
457
- ungetc
458
- @lex_state = EXPR_BEG;
459
- Token(TkQUESTION)
422
+ if t.state.allbits?(Ripper::EXPR_ENDFN)
423
+ # continuing
424
+ elsif t.state.allbits?(Ripper::EXPR_BEG)
425
+ if t.tok == '='
426
+ in_oneliner_def = :BODY
427
+ end
460
428
  else
461
- if (ch == '\\')
462
- read_escape
429
+ if in_oneliner_def == :BODY
430
+ # one-liner method definition
431
+ indent -= 1
463
432
  end
464
- @lex_state = EXPR_END
465
- Token(TkINTEGER)
433
+ in_oneliner_def = nil
466
434
  end
467
435
  end
468
- end
469
436
 
470
- @OP.def_rules("&", "&&", "|", "||") do
471
- |op, io|
472
- @lex_state = EXPR_BEG
473
- Token(op)
474
- end
475
-
476
- @OP.def_rules("+=", "-=", "*=", "**=",
477
- "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
478
- |op, io|
479
- @lex_state = EXPR_BEG
480
- op =~ /^(.*)=$/
481
- Token(TkOPASGN, $1)
482
- end
483
-
484
- @OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do
485
- |op, io|
486
- @lex_state = EXPR_ARG
487
- Token(op)
488
- end
489
-
490
- @OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do
491
- |op, io|
492
- @lex_state = EXPR_ARG
493
- Token(op)
494
- end
495
-
496
- @OP.def_rules("+", "-") do
497
- |op, io|
498
- catch(:RET) do
499
- if @lex_state == EXPR_ARG
500
- if @space_seen and peek(0) =~ /[0-9]/
501
- throw :RET, identify_number
502
- else
503
- @lex_state = EXPR_BEG
504
- end
505
- elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
506
- throw :RET, identify_number
507
- else
508
- @lex_state = EXPR_BEG
437
+ case t.event
438
+ when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
439
+ indent += 1
440
+ when :on_rbracket, :on_rbrace, :on_rparen
441
+ indent -= 1
442
+ when :on_kw
443
+ next if index > 0 and tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
444
+ case t.tok
445
+ when 'do'
446
+ syntax_of_do = take_corresponding_syntax_to_kw_do(tokens, index)
447
+ indent += 1 if syntax_of_do == :method_calling
448
+ when 'def', 'case', 'for', 'begin', 'class', 'module'
449
+ indent += 1
450
+ when 'if', 'unless', 'while', 'until'
451
+ # postfix if/unless/while/until must be Ripper::EXPR_LABEL
452
+ indent += 1 unless t.state.allbits?(Ripper::EXPR_LABEL)
453
+ when 'end'
454
+ indent -= 1
509
455
  end
510
- Token(op)
511
- end
512
- end
513
-
514
- @OP.def_rule(".") do
515
- |op, io|
516
- @lex_state = EXPR_BEG
517
- if peek(0) =~ /[0-9]/
518
- ungetc
519
- identify_number
520
- else
521
- # for "obj.if" etc.
522
- @lex_state = EXPR_DOT
523
- Token(TkDOT)
524
456
  end
525
- end
526
-
527
- @OP.def_rules("..", "...") do
528
- |op, io|
529
- @lex_state = EXPR_BEG
530
- Token(op)
531
- end
532
-
533
- lex_int2
457
+ # percent literals are not indented
458
+ }
459
+ indent
534
460
  end
535
461
 
536
- def lex_int2
537
- @OP.def_rules("]", "}", ")") do
538
- |op, io|
539
- @lex_state = EXPR_END
540
- @indent -= 1
541
- @indent_stack.pop
542
- Token(op)
543
- end
544
-
545
- @OP.def_rule(":") do
546
- |op, io|
547
- if @lex_state == EXPR_END || peek(0) =~ /\s/
548
- @lex_state = EXPR_BEG
549
- Token(TkCOLON)
550
- else
551
- @lex_state = EXPR_FNAME
552
- Token(TkSYMBEG)
553
- end
554
- end
555
-
556
- @OP.def_rule("::") do
557
- |op, io|
558
- if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
559
- @lex_state = EXPR_BEG
560
- Token(TkCOLON3)
561
- else
562
- @lex_state = EXPR_DOT
563
- Token(TkCOLON2)
564
- end
565
- end
566
-
567
- @OP.def_rule("/") do
568
- |op, io|
569
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
570
- identify_string(op)
571
- elsif peek(0) == '='
572
- getc
573
- @lex_state = EXPR_BEG
574
- Token(TkOPASGN, "/") #/)
575
- elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
576
- identify_string(op)
577
- else
578
- @lex_state = EXPR_BEG
579
- Token("/") #/)
580
- end
581
- end
582
-
583
- @OP.def_rules("^") do
584
- |op, io|
585
- @lex_state = EXPR_BEG
586
- Token("^")
587
- end
588
-
589
- @OP.def_rules(",") do
590
- |op, io|
591
- @lex_state = EXPR_BEG
592
- Token(op)
593
- end
594
-
595
- @OP.def_rules(";") do
596
- |op, io|
597
- @lex_state = EXPR_BEG
598
- until (@indent_stack.empty? ||
599
- [TkLPAREN, TkLBRACK, TkLBRACE,
600
- TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
601
- @indent_stack.pop
602
- end
603
- Token(op)
604
- end
605
-
606
- @OP.def_rule("~") do
607
- |op, io|
608
- @lex_state = EXPR_BEG
609
- Token("~")
610
- end
611
-
612
- @OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do
613
- |op, io|
614
- @lex_state = EXPR_BEG
615
- Token("~")
616
- end
617
-
618
- @OP.def_rule("(") do
619
- |op, io|
620
- @indent += 1
621
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
622
- @lex_state = EXPR_BEG
623
- tk_c = TkfLPAREN
624
- else
625
- @lex_state = EXPR_BEG
626
- tk_c = TkLPAREN
627
- end
628
- @indent_stack.push tk_c
629
- Token(tk_c)
630
- end
631
-
632
- @OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do
633
- |op, io|
634
- @lex_state = EXPR_ARG
635
- Token("[]")
636
- end
637
-
638
- @OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do
639
- |op, io|
640
- @lex_state = EXPR_ARG
641
- Token("[]=")
642
- end
643
-
644
- @OP.def_rule("[") do
645
- |op, io|
646
- @indent += 1
647
- if @lex_state == EXPR_FNAME
648
- tk_c = TkfLBRACK
649
- else
650
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
651
- tk_c = TkLBRACK
652
- elsif @lex_state == EXPR_ARG && @space_seen
653
- tk_c = TkLBRACK
654
- else
655
- tk_c = TkfLBRACK
462
+ def is_method_calling?(tokens, index)
463
+ tk = tokens[index]
464
+ if tk.state.anybits?(Ripper::EXPR_CMDARG) and tk.event == :on_ident
465
+ # The target method call to pass the block with "do".
466
+ return true
467
+ elsif tk.state.anybits?(Ripper::EXPR_ARG) and tk.event == :on_ident
468
+ non_sp_index = tokens[0..(index - 1)].rindex{ |t| t.event != :on_sp }
469
+ if non_sp_index
470
+ prev_tk = tokens[non_sp_index]
471
+ if prev_tk.state.anybits?(Ripper::EXPR_DOT) and prev_tk.event == :on_period
472
+ # The target method call with receiver to pass the block with "do".
473
+ return true
656
474
  end
657
- @lex_state = EXPR_BEG
658
- end
659
- @indent_stack.push tk_c
660
- Token(tk_c)
661
- end
662
-
663
- @OP.def_rule("{") do
664
- |op, io|
665
- @indent += 1
666
- if @lex_state != EXPR_END && @lex_state != EXPR_ARG
667
- tk_c = TkLBRACE
668
- else
669
- tk_c = TkfLBRACE
670
- end
671
- @lex_state = EXPR_BEG
672
- @indent_stack.push tk_c
673
- Token(tk_c)
674
- end
675
-
676
- @OP.def_rule('\\') do
677
- |op, io|
678
- if getc == "\n"
679
- @space_seen = true
680
- @continue = true
681
- Token(TkSPACE)
682
- else
683
- read_escape
684
- Token("\\")
685
- end
686
- end
687
-
688
- @OP.def_rule('%') do
689
- |op, io|
690
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
691
- identify_quotation
692
- elsif peek(0) == '='
693
- getc
694
- Token(TkOPASGN, :%)
695
- elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
696
- identify_quotation
697
- else
698
- @lex_state = EXPR_BEG
699
- Token("%") #))
700
475
  end
701
476
  end
702
-
703
- @OP.def_rule('$') do
704
- |op, io|
705
- identify_gvar
706
- end
707
-
708
- @OP.def_rule('@') do
709
- |op, io|
710
- if peek(0) =~ /[\w@]/
711
- ungetc
712
- identify_identifier
713
- else
714
- Token("@")
715
- end
716
- end
717
-
718
- @OP.def_rule("") do
719
- |op, io|
720
- printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
721
- if peek(0) =~ /[0-9]/
722
- t = identify_number
723
- elsif peek(0) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
724
- t = identify_identifier
725
- end
726
- printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
727
- t
728
- end
729
-
730
- p @OP if RubyLex.debug?
477
+ false
731
478
  end
732
479
 
733
- def identify_gvar
734
- @lex_state = EXPR_END
735
-
736
- case ch = getc
737
- when /[~_*$?!@\/\\;,=:<>".]/ #"
738
- Token(TkGVAR, "$" + ch)
739
- when "-"
740
- Token(TkGVAR, "$-" + getc)
741
- when "&", "`", "'", "+"
742
- Token(TkBACK_REF, "$"+ch)
743
- when /[1-9]/
744
- while getc =~ /[0-9]/; end
745
- ungetc
746
- Token(TkNTH_REF)
747
- when /\w/
748
- ungetc
749
- ungetc
750
- identify_identifier
751
- else
752
- ungetc
753
- Token("$")
754
- end
480
+ def take_corresponding_syntax_to_kw_do(tokens, index)
481
+ syntax_of_do = nil
482
+ # Finding a syntax corresponding to "do".
483
+ index.downto(0) do |i|
484
+ tk = tokens[i]
485
+ # In "continue", the token isn't the corresponding syntax to "do".
486
+ non_sp_index = tokens[0..(i - 1)].rindex{ |t| t.event != :on_sp }
487
+ first_in_fomula = false
488
+ if non_sp_index.nil?
489
+ first_in_fomula = true
490
+ elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index].event)
491
+ first_in_fomula = true
492
+ end
493
+ if is_method_calling?(tokens, i)
494
+ syntax_of_do = :method_calling
495
+ break if first_in_fomula
496
+ elsif tk.event == :on_kw && %w{while until for}.include?(tk.tok)
497
+ # A loop syntax in front of "do" found.
498
+ #
499
+ # while cond do # also "until" or "for"
500
+ # end
501
+ #
502
+ # This "do" doesn't increment indent because the loop syntax already
503
+ # incremented.
504
+ syntax_of_do = :loop_syntax
505
+ break if first_in_fomula
506
+ end
507
+ end
508
+ syntax_of_do
755
509
  end
756
510
 
757
- def identify_identifier
758
- token = ""
759
- if peek(0) =~ /[$@]/
760
- token.concat(c = getc)
761
- if c == "@" and peek(0) == "@"
762
- token.concat getc
763
- end
764
- end
765
-
766
- while (ch = getc) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
767
- print ":", ch, ":" if RubyLex.debug?
768
- token.concat ch
769
- end
770
- ungetc
771
-
772
- if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
773
- token.concat getc
774
- end
775
-
776
- # almost fix token
777
-
778
- case token
779
- when /^\$/
780
- return Token(TkGVAR, token)
781
- when /^\@\@/
782
- @lex_state = EXPR_END
783
- # p Token(TkCVAR, token)
784
- return Token(TkCVAR, token)
785
- when /^\@/
786
- @lex_state = EXPR_END
787
- return Token(TkIVAR, token)
788
- end
789
-
790
- if @lex_state != EXPR_DOT
791
- print token, "\n" if RubyLex.debug?
792
-
793
- token_c, *trans = TkReading2Token[token]
794
- if token_c
795
- # reserved word?
511
+ def is_the_in_correspond_to_a_for(tokens, index)
512
+ syntax_of_in = nil
513
+ # Finding a syntax corresponding to "do".
514
+ index.downto(0) do |i|
515
+ tk = tokens[i]
516
+ # In "continue", the token isn't the corresponding syntax to "do".
517
+ non_sp_index = tokens[0..(i - 1)].rindex{ |t| t.event != :on_sp }
518
+ first_in_fomula = false
519
+ if non_sp_index.nil?
520
+ first_in_fomula = true
521
+ elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index].event)
522
+ first_in_fomula = true
523
+ end
524
+ if tk.event == :on_kw && tk.tok == 'for'
525
+ # A loop syntax in front of "do" found.
526
+ #
527
+ # while cond do # also "until" or "for"
528
+ # end
529
+ #
530
+ # This "do" doesn't increment indent because the loop syntax already
531
+ # incremented.
532
+ syntax_of_in = :for
533
+ end
534
+ break if first_in_fomula
535
+ end
536
+ syntax_of_in
537
+ end
796
538
 
797
- if (@lex_state != EXPR_BEG &&
798
- @lex_state != EXPR_FNAME &&
799
- trans[1])
800
- # modifiers
801
- token_c = TkSymbol2Token[trans[1]]
802
- @lex_state = trans[0]
539
+ def check_newline_depth_difference
540
+ depth_difference = 0
541
+ open_brace_on_line = 0
542
+ in_oneliner_def = nil
543
+ @tokens.each_with_index do |t, index|
544
+ # detecting one-liner method definition
545
+ if in_oneliner_def.nil?
546
+ if t.state.allbits?(Ripper::EXPR_ENDFN)
547
+ in_oneliner_def = :ENDFN
548
+ end
549
+ else
550
+ if t.state.allbits?(Ripper::EXPR_ENDFN)
551
+ # continuing
552
+ elsif t.state.allbits?(Ripper::EXPR_BEG)
553
+ if t.tok == '='
554
+ in_oneliner_def = :BODY
555
+ end
803
556
  else
804
- if @lex_state != EXPR_FNAME and peek(0) != ':'
805
- if ENINDENT_CLAUSE.include?(token)
806
- # check for ``class = val'' etc.
807
- valid = true
808
- case token
809
- when "class"
810
- valid = false unless peek_match?(/^\s*(<<|\w|::)/)
811
- when "def"
812
- valid = false if peek_match?(/^\s*(([+\-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/)
813
- when "do"
814
- valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&)/)
815
- when *ENINDENT_CLAUSE
816
- valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&|\|)/)
817
- else
818
- # no nothing
819
- end
820
- if valid
821
- if token == "do"
822
- if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
823
- @indent += 1
824
- @indent_stack.push token_c
825
- end
826
- else
827
- @indent += 1
828
- @indent_stack.push token_c
829
- end
830
- end
831
-
832
- elsif DEINDENT_CLAUSE.include?(token)
833
- @indent -= 1
834
- @indent_stack.pop
835
- end
836
- @lex_state = trans[0]
837
- else
838
- @lex_state = EXPR_END
557
+ if in_oneliner_def == :BODY
558
+ # one-liner method definition
559
+ depth_difference -= 1
839
560
  end
561
+ in_oneliner_def = nil
840
562
  end
841
- return Token(token_c, token)
842
563
  end
843
- end
844
564
 
845
- if @lex_state == EXPR_FNAME
846
- @lex_state = EXPR_END
847
- if peek(0) == '='
848
- token.concat getc
849
- end
850
- elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
851
- @lex_state = EXPR_ARG
852
- else
853
- @lex_state = EXPR_END
854
- end
855
-
856
- if token[0, 1] =~ /[A-Z]/
857
- return Token(TkCONSTANT, token)
858
- elsif token[token.size - 1, 1] =~ /[!?]/
859
- return Token(TkFID, token)
860
- else
861
- return Token(TkIDENTIFIER, token)
862
- end
863
- end
864
-
865
- def identify_here_document
866
- ch = getc
867
- if ch == "-" || ch == "~"
868
- ch = getc
869
- indent = true
870
- end
871
- if /['"`]/ =~ ch
872
- lt = ch
873
- quoted = ""
874
- while (c = getc) && c != lt
875
- quoted.concat c
876
- end
877
- else
878
- lt = '"'
879
- quoted = ch.dup
880
- while (c = getc) && c =~ /\w/
881
- quoted.concat c
882
- end
883
- ungetc
884
- end
885
-
886
- ltback, @ltype = @ltype, lt
887
- reserve = []
888
- while ch = getc
889
- reserve.push ch
890
- if ch == "\\"
891
- reserve.push ch = getc
892
- elsif ch == "\n"
893
- break
894
- end
895
- end
896
-
897
- @here_header = false
898
-
899
- line = ""
900
- while ch = getc
901
- if ch == "\n"
902
- if line == quoted
903
- break
565
+ case t.event
566
+ when :on_ignored_nl, :on_nl, :on_comment
567
+ if index != (@tokens.size - 1) and in_oneliner_def != :BODY
568
+ depth_difference = 0
569
+ open_brace_on_line = 0
904
570
  end
905
- line = ""
906
- else
907
- line.concat ch unless indent && line == "" && /\s/ =~ ch
908
- if @ltype != "'" && ch == "#" && peek(0) == "{"
909
- identify_string_dvar
571
+ next
572
+ when :on_sp
573
+ next
574
+ end
575
+
576
+ case t.event
577
+ when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
578
+ depth_difference += 1
579
+ open_brace_on_line += 1
580
+ when :on_rbracket, :on_rbrace, :on_rparen
581
+ depth_difference -= 1 if open_brace_on_line > 0
582
+ when :on_kw
583
+ next if index > 0 and @tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
584
+ case t.tok
585
+ when 'do'
586
+ syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index)
587
+ depth_difference += 1 if syntax_of_do == :method_calling
588
+ when 'def', 'case', 'for', 'begin', 'class', 'module'
589
+ depth_difference += 1
590
+ when 'if', 'unless', 'while', 'until', 'rescue'
591
+ # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
592
+ unless t.state.allbits?(Ripper::EXPR_LABEL)
593
+ depth_difference += 1
594
+ end
595
+ when 'else', 'elsif', 'ensure', 'when'
596
+ depth_difference += 1
597
+ when 'in'
598
+ unless is_the_in_correspond_to_a_for(@tokens, index)
599
+ depth_difference += 1
600
+ end
601
+ when 'end'
602
+ depth_difference -= 1
910
603
  end
911
604
  end
912
605
  end
913
-
914
- @here_header = true
915
- @here_readed.concat reserve
916
- while ch = reserve.pop
917
- ungetc ch
918
- end
919
-
920
- @ltype = ltback
921
- @lex_state = EXPR_END
922
- Token(Ltype2Token[lt])
606
+ depth_difference
923
607
  end
924
608
 
925
- def identify_quotation
926
- ch = getc
927
- if lt = PERCENT_LTYPE[ch]
928
- ch = getc
929
- elsif ch =~ /\W/
930
- lt = "\""
931
- else
932
- RubyLex.fail SyntaxError, "unknown type of %string"
933
- end
934
- @quoted = ch unless @quoted = PERCENT_PAREN[ch]
935
- identify_string(lt, @quoted)
936
- end
609
+ def check_corresponding_token_depth(lines, line_index)
610
+ corresponding_token_depth = nil
611
+ is_first_spaces_of_line = true
612
+ is_first_printable_of_line = true
613
+ spaces_of_nest = []
614
+ spaces_at_line_head = 0
615
+ open_brace_on_line = 0
616
+ in_oneliner_def = nil
937
617
 
938
- def identify_number
939
- @lex_state = EXPR_END
618
+ if heredoc_scope?
619
+ return lines[line_index][/^ */].length
620
+ end
940
621
 
941
- if peek(0) == "0" && peek(1) !~ /[.eE]/
942
- getc
943
- case peek(0)
944
- when /[xX]/
945
- ch = getc
946
- match = /[0-9a-fA-F_]/
947
- when /[bB]/
948
- ch = getc
949
- match = /[01_]/
950
- when /[oO]/
951
- ch = getc
952
- match = /[0-7_]/
953
- when /[dD]/
954
- ch = getc
955
- match = /[0-9_]/
956
- when /[0-7]/
957
- match = /[0-7_]/
958
- when /[89]/
959
- RubyLex.fail SyntaxError, "Invalid octal digit"
622
+ @tokens.each_with_index do |t, index|
623
+ # detecting one-liner method definition
624
+ if in_oneliner_def.nil?
625
+ if t.state.allbits?(Ripper::EXPR_ENDFN)
626
+ in_oneliner_def = :ENDFN
627
+ end
960
628
  else
961
- return Token(TkINTEGER)
962
- end
963
-
964
- len0 = true
965
- non_digit = false
966
- while ch = getc
967
- if match =~ ch
968
- if ch == "_"
969
- if non_digit
970
- RubyLex.fail SyntaxError, "trailing `#{ch}' in number"
971
- else
972
- non_digit = ch
973
- end
974
- else
975
- non_digit = false
976
- len0 = false
629
+ if t.state.allbits?(Ripper::EXPR_ENDFN)
630
+ # continuing
631
+ elsif t.state.allbits?(Ripper::EXPR_BEG)
632
+ if t.tok == '='
633
+ in_oneliner_def = :BODY
977
634
  end
978
635
  else
979
- ungetc
980
- if len0
981
- RubyLex.fail SyntaxError, "numeric literal without digits"
982
- end
983
- if non_digit
984
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
636
+ if in_oneliner_def == :BODY
637
+ # one-liner method definition
638
+ if is_first_printable_of_line
639
+ corresponding_token_depth = spaces_of_nest.pop
640
+ else
641
+ spaces_of_nest.pop
642
+ corresponding_token_depth = nil
643
+ end
985
644
  end
986
- break
645
+ in_oneliner_def = nil
987
646
  end
988
647
  end
989
- return Token(TkINTEGER)
990
- end
991
648
 
992
- type = TkINTEGER
993
- allow_point = true
994
- allow_e = true
995
- non_digit = false
996
- while ch = getc
997
- case ch
998
- when /[0-9]/
999
- non_digit = false
1000
- when "_"
1001
- non_digit = ch
1002
- when allow_point && "."
1003
- if non_digit
1004
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
649
+ case t.event
650
+ when :on_ignored_nl, :on_nl, :on_comment
651
+ if in_oneliner_def != :BODY
652
+ corresponding_token_depth = nil
653
+ spaces_at_line_head = 0
654
+ is_first_spaces_of_line = true
655
+ is_first_printable_of_line = true
656
+ open_brace_on_line = 0
1005
657
  end
1006
- type = TkFLOAT
1007
- if peek(0) !~ /[0-9]/
1008
- type = TkINTEGER
1009
- ungetc
1010
- break
1011
- end
1012
- allow_point = false
1013
- when allow_e && "e", allow_e && "E"
1014
- if non_digit
1015
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
1016
- end
1017
- type = TkFLOAT
1018
- if peek(0) =~ /[+-]/
1019
- getc
658
+ next
659
+ when :on_sp
660
+ spaces_at_line_head = t.tok.count(' ') if is_first_spaces_of_line
661
+ is_first_spaces_of_line = false
662
+ next
663
+ end
664
+
665
+ case t.event
666
+ when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
667
+ spaces_of_nest.push(spaces_at_line_head + open_brace_on_line * 2)
668
+ open_brace_on_line += 1
669
+ when :on_rbracket, :on_rbrace, :on_rparen
670
+ if is_first_printable_of_line
671
+ corresponding_token_depth = spaces_of_nest.pop
672
+ else
673
+ spaces_of_nest.pop
674
+ corresponding_token_depth = nil
1020
675
  end
1021
- allow_e = false
1022
- allow_point = false
1023
- non_digit = ch
1024
- else
1025
- if non_digit
1026
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
676
+ open_brace_on_line -= 1
677
+ when :on_kw
678
+ next if index > 0 and @tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
679
+ case t.tok
680
+ when 'do'
681
+ syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index)
682
+ if syntax_of_do == :method_calling
683
+ spaces_of_nest.push(spaces_at_line_head)
684
+ end
685
+ when 'def', 'case', 'for', 'begin', 'class', 'module'
686
+ spaces_of_nest.push(spaces_at_line_head)
687
+ when 'rescue'
688
+ unless t.state.allbits?(Ripper::EXPR_LABEL)
689
+ corresponding_token_depth = spaces_of_nest.last
690
+ end
691
+ when 'if', 'unless', 'while', 'until'
692
+ # postfix if/unless/while/until must be Ripper::EXPR_LABEL
693
+ unless t.state.allbits?(Ripper::EXPR_LABEL)
694
+ spaces_of_nest.push(spaces_at_line_head)
695
+ end
696
+ when 'else', 'elsif', 'ensure', 'when'
697
+ corresponding_token_depth = spaces_of_nest.last
698
+ when 'in'
699
+ if in_keyword_case_scope?
700
+ corresponding_token_depth = spaces_of_nest.last
701
+ end
702
+ when 'end'
703
+ if is_first_printable_of_line
704
+ corresponding_token_depth = spaces_of_nest.pop
705
+ else
706
+ spaces_of_nest.pop
707
+ corresponding_token_depth = nil
708
+ end
1027
709
  end
1028
- ungetc
1029
- break
1030
710
  end
711
+ is_first_spaces_of_line = false
712
+ is_first_printable_of_line = false
1031
713
  end
1032
- Token(type)
714
+ corresponding_token_depth
1033
715
  end
1034
716
 
1035
- def identify_string(ltype, quoted = ltype)
1036
- @ltype = ltype
1037
- @quoted = quoted
1038
- subtype = nil
1039
- begin
1040
- nest = 0
1041
- while ch = getc
1042
- if @quoted == ch and nest == 0
1043
- break
1044
- elsif @ltype != "'" && ch == "#" && peek(0) == "{"
1045
- identify_string_dvar
1046
- elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
1047
- subtype = true
1048
- elsif ch == '\\' and @ltype == "'" #'
1049
- case ch = getc
1050
- when "\\", "\n", "'"
717
+ def check_string_literal(tokens)
718
+ i = 0
719
+ start_token = []
720
+ end_type = []
721
+ while i < tokens.size
722
+ t = tokens[i]
723
+ case t.event
724
+ when *end_type.last
725
+ start_token.pop
726
+ end_type.pop
727
+ when :on_tstring_beg
728
+ start_token << t
729
+ end_type << [:on_tstring_end, :on_label_end]
730
+ when :on_regexp_beg
731
+ start_token << t
732
+ end_type << :on_regexp_end
733
+ when :on_symbeg
734
+ acceptable_single_tokens = %i{on_ident on_const on_op on_cvar on_ivar on_gvar on_kw on_int on_backtick}
735
+ if (i + 1) < tokens.size
736
+ if acceptable_single_tokens.all?{ |st| tokens[i + 1].event != st }
737
+ start_token << t
738
+ end_type << :on_tstring_end
1051
739
  else
1052
- ungetc
1053
- end
1054
- elsif ch == '\\' #'
1055
- read_escape
1056
- end
1057
- if PERCENT_PAREN.values.include?(@quoted)
1058
- if PERCENT_PAREN[ch] == @quoted
1059
- nest += 1
1060
- elsif ch == @quoted
1061
- nest -= 1
740
+ i += 1
1062
741
  end
1063
742
  end
1064
- end
1065
- if @ltype == "/"
1066
- while /[imxoesun]/ =~ peek(0)
1067
- getc
1068
- end
1069
- end
1070
- if subtype
1071
- Token(DLtype2Token[ltype])
1072
- else
1073
- Token(Ltype2Token[ltype])
1074
- end
1075
- ensure
1076
- @ltype = nil
1077
- @quoted = nil
1078
- @lex_state = EXPR_END
1079
- end
743
+ when :on_backtick
744
+ start_token << t
745
+ end_type << :on_tstring_end
746
+ when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg
747
+ start_token << t
748
+ end_type << :on_tstring_end
749
+ when :on_heredoc_beg
750
+ start_token << t
751
+ end_type << :on_heredoc_end
752
+ end
753
+ i += 1
754
+ end
755
+ start_token.last.nil? ? nil : start_token.last
1080
756
  end
1081
757
 
1082
- def identify_string_dvar
1083
- begin
1084
- getc
1085
-
1086
- reserve_continue = @continue
1087
- reserve_ltype = @ltype
1088
- reserve_indent = @indent
1089
- reserve_indent_stack = @indent_stack
1090
- reserve_state = @lex_state
1091
- reserve_quoted = @quoted
1092
-
1093
- @ltype = nil
1094
- @quoted = nil
1095
- @indent = 0
1096
- @indent_stack = []
1097
- @lex_state = EXPR_BEG
1098
-
1099
- loop do
1100
- @continue = false
1101
- prompt
1102
- tk = token
1103
- if @ltype or @continue or @indent >= 0
1104
- next
1105
- end
1106
- break if tk.kind_of?(TkRBRACE)
758
+ def process_literal_type(tokens = @tokens)
759
+ start_token = check_string_literal(tokens)
760
+ return nil if start_token == ""
761
+
762
+ case start_token&.event
763
+ when :on_tstring_beg
764
+ case start_token&.tok
765
+ when ?" then ?"
766
+ when /^%.$/ then ?"
767
+ when /^%Q.$/ then ?"
768
+ when ?' then ?'
769
+ when /^%q.$/ then ?'
770
+ end
771
+ when :on_regexp_beg then ?/
772
+ when :on_symbeg then ?:
773
+ when :on_backtick then ?`
774
+ when :on_qwords_beg then ?]
775
+ when :on_words_beg then ?]
776
+ when :on_qsymbols_beg then ?]
777
+ when :on_symbols_beg then ?]
778
+ when :on_heredoc_beg
779
+ start_token&.tok =~ /<<[-~]?(['"`])[_a-zA-Z0-9]+\1/
780
+ case $1
781
+ when ?" then ?"
782
+ when ?' then ?'
783
+ when ?` then ?`
784
+ else ?"
1107
785
  end
1108
- ensure
1109
- @continue = reserve_continue
1110
- @ltype = reserve_ltype
1111
- @indent = reserve_indent
1112
- @indent_stack = reserve_indent_stack
1113
- @lex_state = reserve_state
1114
- @quoted = reserve_quoted
786
+ else
787
+ nil
1115
788
  end
1116
789
  end
1117
790
 
1118
- def identify_comment
1119
- @ltype = "#"
1120
-
1121
- while ch = getc
1122
- if ch == "\n"
1123
- @ltype = nil
1124
- ungetc
1125
- break
791
+ def check_termination_in_prev_line(code, context: nil)
792
+ tokens = self.class.ripper_lex_without_warning(code, context: context)
793
+ past_first_newline = false
794
+ index = tokens.rindex do |t|
795
+ # traverse first token before last line
796
+ if past_first_newline
797
+ if t.tok.include?("\n")
798
+ true
799
+ end
800
+ elsif t.tok.include?("\n")
801
+ past_first_newline = true
802
+ false
803
+ else
804
+ false
1126
805
  end
1127
806
  end
1128
- return Token(TkCOMMENT)
1129
- end
1130
807
 
1131
- def read_escape
1132
- case ch = getc
1133
- when "\n", "\r", "\f"
1134
- when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #"
1135
- when /[0-7]/
1136
- ungetc ch
1137
- 3.times do
1138
- case ch = getc
1139
- when /[0-7]/
1140
- when nil
1141
- break
1142
- else
1143
- ungetc
808
+ if index
809
+ first_token = nil
810
+ last_line_tokens = tokens[(index + 1)..(tokens.size - 1)]
811
+ last_line_tokens.each do |t|
812
+ unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event)
813
+ first_token = t
1144
814
  break
1145
815
  end
1146
816
  end
1147
817
 
1148
- when "x"
1149
- 2.times do
1150
- case ch = getc
1151
- when /[0-9a-fA-F]/
1152
- when nil
1153
- break
818
+ if first_token.nil?
819
+ return false
820
+ elsif first_token && first_token.state == Ripper::EXPR_DOT
821
+ return false
822
+ else
823
+ tokens_without_last_line = tokens[0..index]
824
+ ltype = process_literal_type(tokens_without_last_line)
825
+ indent = process_nesting_level(tokens_without_last_line)
826
+ continue = process_continue(tokens_without_last_line)
827
+ code_block_open = check_code_block(tokens_without_last_line.map(&:tok).join(''), tokens_without_last_line)
828
+ if ltype or indent > 0 or continue or code_block_open
829
+ return false
1154
830
  else
1155
- ungetc
1156
- break
831
+ return last_line_tokens.map(&:tok).join('')
1157
832
  end
1158
833
  end
834
+ end
835
+ false
836
+ end
1159
837
 
1160
- when "M"
1161
- if (ch = getc) != '-'
1162
- ungetc
1163
- else
1164
- if (ch = getc) == "\\" #"
1165
- read_escape
1166
- end
1167
- end
838
+ private
839
+
840
+ def heredoc_scope?
841
+ heredoc_tokens = @tokens.select { |t| [:on_heredoc_beg, :on_heredoc_end].include?(t.event) }
842
+ heredoc_tokens[-1]&.event == :on_heredoc_beg
843
+ end
1168
844
 
1169
- when "C", "c" #, "^"
1170
- if ch == "C" and (ch = getc) != "-"
1171
- ungetc
1172
- elsif (ch = getc) == "\\" #"
1173
- read_escape
845
+ def in_keyword_case_scope?
846
+ kw_tokens = @tokens.select { |t| t.event == :on_kw && ['case', 'for', 'end'].include?(t.tok) }
847
+ counter = 0
848
+ kw_tokens.reverse.each do |t|
849
+ if t.tok == 'case'
850
+ return true if counter.zero?
851
+ counter += 1
852
+ elsif t.tok == 'for'
853
+ counter += 1
854
+ elsif t.tok == 'end'
855
+ counter -= 1
1174
856
  end
1175
- else
1176
- # other characters
1177
857
  end
858
+ false
1178
859
  end
1179
860
  end
1180
861
  # :startdoc: