irb 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -10,21 +10,21 @@
10
10
  #
11
11
  #
12
12
 
13
- require "e2mmap"
14
-
15
13
  module IRB
16
14
  # An abstract output class for IO in irb. This is mainly used internally by
17
15
  # IRB::Notifier. You can define your own output method to use with Irb.new,
18
16
  # or Context.new
19
17
  class OutputMethod
20
- extend Exception2MessageMapper
21
- def_exception :NotImplementedError, "Need to define `%s'"
22
-
18
+ class NotImplementedError < StandardError
19
+ def initialize(val)
20
+ super("Need to define `#{val}'")
21
+ end
22
+ end
23
23
 
24
24
  # Open this method to implement your own output method, raises a
25
25
  # NotImplementedError if you don't define #print in your own class.
26
26
  def print(*opts)
27
- OutputMethod.Raise NotImplementedError, "print"
27
+ raise NotImplementedError, "print"
28
28
  end
29
29
 
30
30
  # Prints the given +opts+, with a newline delimiter.
@@ -10,74 +10,51 @@
10
10
  #
11
11
  #
12
12
 
13
- require "e2mmap"
14
- require_relative "slex"
15
- require_relative "ruby-token"
13
+ require "ripper"
16
14
 
17
15
  # :stopdoc:
18
16
  class RubyLex
19
17
 
20
- extend Exception2MessageMapper
21
- def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
22
- def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
23
- def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
24
- def_exception(:TkReading2TokenDuplicateError,
25
- "key duplicate(token_n='%s', key='%s')")
26
- def_exception(:SyntaxError, "%s")
27
-
28
- def_exception(:TerminateLineInput, "Terminate Line Input")
29
-
30
- include RubyToken
31
-
32
- class << self
33
- attr_accessor :debug_level
34
- def debug?
35
- @debug_level > 0
18
+ class TerminateLineInput < StandardError
19
+ def initialize
20
+ super("Terminate Line Input")
36
21
  end
37
22
  end
38
- @debug_level = 0
39
23
 
40
24
  def initialize
41
- lex_init
42
- set_input(STDIN)
43
-
44
- @seek = 0
45
25
  @exp_line_no = @line_no = 1
46
- @base_char_no = 0
47
- @char_no = 0
48
- @rests = []
49
- @readed = []
50
- @here_readed = []
51
-
52
26
  @indent = 0
53
- @indent_stack = []
54
- @lex_state = EXPR_BEG
55
- @space_seen = false
56
- @here_header = false
57
- @post_symbeg = false
58
-
59
27
  @continue = false
60
28
  @line = ""
61
-
62
- @skip_space = false
63
- @readed_auto_clean_up = false
64
- @exception_on_syntax_error = true
65
-
66
29
  @prompt = nil
67
30
  end
68
31
 
69
- attr_accessor :skip_space
70
- attr_accessor :readed_auto_clean_up
71
- attr_accessor :exception_on_syntax_error
72
-
73
- attr_reader :seek
74
- attr_reader :char_no
75
- attr_reader :line_no
76
- attr_reader :indent
77
-
78
32
  # io functions
79
33
  def set_input(io, p = nil, &block)
80
34
  @io = io
35
+ if @io.respond_to?(:check_termination)
36
+ @io.check_termination do |code|
37
+ code.gsub!(/\s*\z/, '').concat("\n")
38
+ ltype, indent, continue, code_block_open = check_state(code)
39
+ if ltype or indent > 0 or continue or code_block_open
40
+ false
41
+ else
42
+ true
43
+ end
44
+ end
45
+ end
46
+ if @io.respond_to?(:dynamic_prompt)
47
+ @io.dynamic_prompt do |lines|
48
+ lines << '' if lines.empty?
49
+ result = []
50
+ lines.each_index { |i|
51
+ c = lines[0..i].map{ |l| l + "\n" }.join
52
+ ltype, indent, continue, code_block_open = check_state(c)
53
+ result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + i)
54
+ }
55
+ result
56
+ end
57
+ end
81
58
  if p.respond_to?(:call)
82
59
  @input = p
83
60
  elsif block_given?
@@ -87,119 +64,54 @@ class RubyLex
87
64
  end
88
65
  end
89
66
 
90
- def get_readed
91
- if idx = @readed.rindex("\n")
92
- @base_char_no = @readed.size - (idx + 1)
93
- else
94
- @base_char_no += @readed.size
95
- end
96
-
97
- readed = @readed.join("")
98
- @readed = []
99
- readed
100
- end
101
-
102
- def getc
103
- while @rests.empty?
104
- @rests.push nil unless buf_input
105
- end
106
- c = @rests.shift
107
- if @here_header
108
- @here_readed.push c
109
- else
110
- @readed.push c
111
- end
112
- @seek += 1
113
- if c == "\n"
114
- @line_no += 1
115
- @char_no = 0
67
+ def set_prompt(p = nil, &block)
68
+ p = block if block_given?
69
+ if p.respond_to?(:call)
70
+ @prompt = p
116
71
  else
117
- @char_no += 1
118
- end
119
- c
120
- end
121
-
122
- def gets
123
- l = ""
124
- while c = getc
125
- l.concat(c)
126
- break if c == "\n"
72
+ @prompt = Proc.new{print p}
127
73
  end
128
- return nil if l == "" and c.nil?
129
- l
130
74
  end
131
75
 
132
- def eof?
133
- @io.eof?
134
- end
135
-
136
- def getc_of_rests
137
- if @rests.empty?
138
- nil
139
- else
140
- getc
141
- end
76
+ def ripper_lex_without_warning(code)
77
+ verbose, $VERBOSE = $VERBOSE, nil
78
+ tokens = Ripper.lex(code)
79
+ $VERBOSE = verbose
80
+ tokens
142
81
  end
143
82
 
144
- def ungetc(c = nil)
145
- if @here_readed.empty?
146
- c2 = @readed.pop
147
- else
148
- c2 = @here_readed.pop
149
- end
150
- c = c2 unless c
151
- @rests.unshift c #c =
152
- @seek -= 1
153
- if c == "\n"
154
- @line_no -= 1
155
- if idx = @readed.rindex("\n")
156
- @char_no = idx + 1
157
- else
158
- @char_no = @base_char_no + @readed.size
83
+ def set_auto_indent(context)
84
+ if @io.respond_to?(:auto_indent) and context.auto_indent_mode
85
+ @io.auto_indent do |lines, line_index, byte_pointer, is_newline|
86
+ if is_newline
87
+ md = lines[line_index - 1].match(/(\A +)/)
88
+ prev_spaces = md.nil? ? 0 : md[1].count(' ')
89
+ @tokens = ripper_lex_without_warning(lines[0..line_index].join("\n"))
90
+ depth_difference = check_newline_depth_difference
91
+ prev_spaces + depth_difference * 2
92
+ else
93
+ code = line_index.zero? ? '' : lines[0..(line_index - 1)].map{ |l| l + "\n" }.join
94
+ last_line = lines[line_index]&.byteslice(0, byte_pointer)
95
+ code += last_line if last_line
96
+ @tokens = ripper_lex_without_warning(code)
97
+ corresponding_token_depth = check_corresponding_token_depth
98
+ if corresponding_token_depth
99
+ corresponding_token_depth
100
+ else
101
+ nil
102
+ end
103
+ end
159
104
  end
160
- else
161
- @char_no -= 1
162
105
  end
163
106
  end
164
107
 
165
- def peek_equal?(str)
166
- chrs = str.split(//)
167
- until @rests.size >= chrs.size
168
- return false unless buf_input
169
- end
170
- @rests[0, chrs.size] == chrs
171
- end
172
-
173
- def peek_match?(regexp)
174
- while @rests.empty?
175
- return false unless buf_input
176
- end
177
- regexp =~ @rests.join("")
178
- end
179
-
180
- def peek(i = 0)
181
- while @rests.size <= i
182
- return nil unless buf_input
183
- end
184
- @rests[i]
185
- end
186
-
187
- def buf_input
188
- prompt
189
- line = @input.call
190
- return nil unless line
191
- @rests.concat line.chars.to_a
192
- true
193
- end
194
- private :buf_input
195
-
196
- def set_prompt(p = nil, &block)
197
- p = block if block_given?
198
- if p.respond_to?(:call)
199
- @prompt = p
200
- else
201
- @prompt = Proc.new{print p}
202
- end
108
+ def check_state(code)
109
+ @tokens = ripper_lex_without_warning(code)
110
+ ltype = process_literal_type
111
+ indent = process_nesting_level
112
+ continue = process_continue
113
+ code_block_open = check_code_block(code)
114
+ [ltype, indent, continue, code_block_open]
203
115
  end
204
116
 
205
117
  def prompt
@@ -210,20 +122,11 @@ class RubyLex
210
122
 
211
123
  def initialize_input
212
124
  @ltype = nil
213
- @quoted = nil
214
125
  @indent = 0
215
- @indent_stack = []
216
- @lex_state = EXPR_BEG
217
- @space_seen = false
218
- @here_header = false
219
-
220
126
  @continue = false
221
- @post_symbeg = false
222
-
223
- prompt
224
-
225
127
  @line = ""
226
128
  @exp_line_no = @line_no
129
+ @code_block_open = false
227
130
  end
228
131
 
229
132
  def each_top_level_statement
@@ -231,13 +134,14 @@ class RubyLex
231
134
  catch(:TERM_INPUT) do
232
135
  loop do
233
136
  begin
234
- @continue = false
235
137
  prompt
236
138
  unless l = lex
237
139
  throw :TERM_INPUT if @line == ''
238
140
  else
141
+ @line_no += l.count("\n")
142
+ next if l == "\n"
239
143
  @line.concat l
240
- if @ltype or @continue or @indent > 0
144
+ if @code_block_open or @ltype or @continue or @indent > 0
241
145
  next
242
146
  end
243
147
  end
@@ -245,935 +149,337 @@ class RubyLex
245
149
  @line.force_encoding(@io.encoding)
246
150
  yield @line, @exp_line_no
247
151
  end
248
- break unless l
152
+ break if @io.eof?
249
153
  @line = ''
250
154
  @exp_line_no = @line_no
251
155
 
252
156
  @indent = 0
253
- @indent_stack = []
254
- prompt
255
157
  rescue TerminateLineInput
256
158
  initialize_input
257
159
  prompt
258
- get_readed
259
160
  end
260
161
  end
261
162
  end
262
163
  end
263
164
 
264
165
  def lex
265
- continue = @continue
266
- while tk = token
267
- case tk
268
- when TkNL, TkEND_OF_SCRIPT
269
- @continue = continue unless continue.nil?
270
- break unless @continue
271
- when TkSPACE, TkCOMMENT
272
- when TkSEMICOLON, TkBEGIN, TkELSE
273
- @continue = continue = false
274
- else
275
- continue = nil
276
- end
277
- end
278
- line = get_readed
279
- if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
280
- nil
281
- else
282
- line
283
- end
284
- end
285
-
286
- def token
287
- @prev_seek = @seek
288
- @prev_line_no = @line_no
289
- @prev_char_no = @char_no
290
- begin
291
- begin
292
- tk = @OP.match(self)
293
- @space_seen = tk.kind_of?(TkSPACE)
294
- @lex_state = EXPR_END if @post_symbeg && tk.kind_of?(TkOp)
295
- @post_symbeg = tk.kind_of?(TkSYMBEG)
296
- rescue SyntaxError
297
- raise if @exception_on_syntax_error
298
- tk = TkError.new(@seek, @line_no, @char_no)
299
- end
300
- end while @skip_space and tk.kind_of?(TkSPACE)
301
- if @readed_auto_clean_up
302
- get_readed
303
- end
304
- tk
166
+ line = @input.call
167
+ if @io.respond_to?(:check_termination)
168
+ return line # multiline
169
+ end
170
+ code = @line + (line.nil? ? '' : line)
171
+ code.gsub!(/\s*\z/, '').concat("\n")
172
+ @tokens = ripper_lex_without_warning(code)
173
+ @continue = process_continue
174
+ @code_block_open = check_code_block(code)
175
+ @indent = process_nesting_level
176
+ @ltype = process_literal_type
177
+ line
305
178
  end
306
179
 
307
- ENINDENT_CLAUSE = [
308
- "case", "class", "def", "do", "for", "if",
309
- "module", "unless", "until", "while", "begin"
310
- ]
311
- DEINDENT_CLAUSE = ["end"
312
- ]
313
-
314
- PERCENT_LTYPE = {
315
- "q" => "\'",
316
- "Q" => "\"",
317
- "x" => "\`",
318
- "r" => "/",
319
- "w" => "]",
320
- "W" => "]",
321
- "i" => "]",
322
- "I" => "]",
323
- "s" => ":"
324
- }
325
-
326
- PERCENT_PAREN = {
327
- "{" => "}",
328
- "[" => "]",
329
- "<" => ">",
330
- "(" => ")"
331
- }
332
-
333
- Ltype2Token = {
334
- "\'" => TkSTRING,
335
- "\"" => TkSTRING,
336
- "\`" => TkXSTRING,
337
- "/" => TkREGEXP,
338
- "]" => TkDSTRING,
339
- ":" => TkSYMBOL
340
- }
341
- DLtype2Token = {
342
- "\"" => TkDSTRING,
343
- "\`" => TkDXSTRING,
344
- "/" => TkDREGEXP,
345
- }
346
-
347
- def lex_init()
348
- @OP = IRB::SLex.new
349
- @OP.def_rules("\0", "\004", "\032") do |op, io|
350
- Token(TkEND_OF_SCRIPT)
351
- end
352
-
353
- @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io|
354
- @space_seen = true
355
- while getc =~ /[ \t\f\r\13]/; end
356
- ungetc
357
- Token(TkSPACE)
358
- end
359
-
360
- @OP.def_rule("#") do |op, io|
361
- identify_comment
362
- end
363
-
364
- @OP.def_rule("=begin",
365
- proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do
366
- |op, io|
367
- @ltype = "="
368
- until getc == "\n"; end
369
- until peek_equal?("=end") && peek(4) =~ /\s/
370
- until getc == "\n"; end
371
- end
372
- gets
373
- @ltype = nil
374
- Token(TkRD_COMMENT)
375
- end
376
-
377
- @OP.def_rule("\n") do |op, io|
378
- print "\\n\n" if RubyLex.debug?
379
- case @lex_state
380
- when EXPR_BEG, EXPR_FNAME, EXPR_DOT
381
- @continue = true
382
- else
383
- @continue = false
384
- @lex_state = EXPR_BEG
385
- until (@indent_stack.empty? ||
386
- [TkLPAREN, TkLBRACK, TkLBRACE,
387
- TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
388
- @indent_stack.pop
389
- end
390
- end
391
- @here_header = false
392
- @here_readed = []
393
- Token(TkNL)
394
- end
395
-
396
- @OP.def_rules("*", "**",
397
- "=", "==", "===",
398
- "=~", "<=>",
399
- "<", "<=",
400
- ">", ">=", ">>",
401
- "!", "!=", "!~") do
402
- |op, io|
403
- case @lex_state
404
- when EXPR_FNAME, EXPR_DOT
405
- @lex_state = EXPR_ARG
406
- else
407
- @lex_state = EXPR_BEG
408
- end
409
- Token(op)
410
- end
411
-
412
- @OP.def_rules("<<") do
413
- |op, io|
414
- tk = nil
415
- if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
416
- (@lex_state != EXPR_ARG || @space_seen)
417
- c = peek(0)
418
- if /[-~"'`\w]/ =~ c
419
- tk = identify_here_document
420
- end
421
- end
422
- unless tk
423
- tk = Token(op)
424
- case @lex_state
425
- when EXPR_FNAME, EXPR_DOT
426
- @lex_state = EXPR_ARG
427
- else
428
- @lex_state = EXPR_BEG
429
- end
430
- end
431
- tk
432
- end
433
-
434
- @OP.def_rules("'", '"') do
435
- |op, io|
436
- identify_string(op)
437
- end
438
-
439
- @OP.def_rules("`") do
440
- |op, io|
441
- if @lex_state == EXPR_FNAME
442
- @lex_state = EXPR_END
443
- Token(op)
444
- else
445
- identify_string(op)
446
- end
447
- end
448
-
449
- @OP.def_rules('?') do
450
- |op, io|
451
- if @lex_state == EXPR_END
452
- @lex_state = EXPR_BEG
453
- Token(TkQUESTION)
454
- else
455
- ch = getc
456
- if @lex_state == EXPR_ARG && ch =~ /\s/
457
- ungetc
458
- @lex_state = EXPR_BEG;
459
- Token(TkQUESTION)
460
- else
461
- if (ch == '\\')
462
- read_escape
463
- end
464
- @lex_state = EXPR_END
465
- Token(TkINTEGER)
466
- end
467
- end
468
- end
469
-
470
- @OP.def_rules("&", "&&", "|", "||") do
471
- |op, io|
472
- @lex_state = EXPR_BEG
473
- Token(op)
474
- end
475
-
476
- @OP.def_rules("+=", "-=", "*=", "**=",
477
- "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
478
- |op, io|
479
- @lex_state = EXPR_BEG
480
- op =~ /^(.*)=$/
481
- Token(TkOPASGN, $1)
482
- end
483
-
484
- @OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do
485
- |op, io|
486
- @lex_state = EXPR_ARG
487
- Token(op)
488
- end
489
-
490
- @OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do
491
- |op, io|
492
- @lex_state = EXPR_ARG
493
- Token(op)
494
- end
495
-
496
- @OP.def_rules("+", "-") do
497
- |op, io|
498
- catch(:RET) do
499
- if @lex_state == EXPR_ARG
500
- if @space_seen and peek(0) =~ /[0-9]/
501
- throw :RET, identify_number
502
- else
503
- @lex_state = EXPR_BEG
504
- end
505
- elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
506
- throw :RET, identify_number
507
- else
508
- @lex_state = EXPR_BEG
509
- end
510
- Token(op)
511
- end
512
- end
513
-
514
- @OP.def_rule(".") do
515
- |op, io|
516
- @lex_state = EXPR_BEG
517
- if peek(0) =~ /[0-9]/
518
- ungetc
519
- identify_number
520
- else
521
- # for "obj.if" etc.
522
- @lex_state = EXPR_DOT
523
- Token(TkDOT)
524
- end
525
- end
526
-
527
- @OP.def_rules("..", "...") do
528
- |op, io|
529
- @lex_state = EXPR_BEG
530
- Token(op)
531
- end
532
-
533
- lex_int2
180
+ def process_continue
181
+ # last token is always newline
182
+ if @tokens.size >= 2 and @tokens[-2][1] == :on_regexp_end
183
+ # end of regexp literal
184
+ return false
185
+ elsif @tokens.size >= 2 and @tokens[-2][1] == :on_semicolon
186
+ return false
187
+ elsif @tokens.size >= 2 and @tokens[-2][1] == :on_kw and ['begin', 'else', 'ensure'].include?(@tokens[-2][2])
188
+ return false
189
+ elsif !@tokens.empty? and @tokens.last[2] == "\\\n"
190
+ return true
191
+ elsif @tokens.size >= 1 and @tokens[-1][1] == :on_heredoc_end # "EOH\n"
192
+ return false
193
+ elsif @tokens.size >= 2 and defined?(Ripper::EXPR_BEG) and @tokens[-2][3].anybits?(Ripper::EXPR_BEG | Ripper::EXPR_FNAME)
194
+ # end of literal except for regexp
195
+ return true
196
+ end
197
+ false
534
198
  end
535
199
 
536
- def lex_int2
537
- @OP.def_rules("]", "}", ")") do
538
- |op, io|
539
- @lex_state = EXPR_END
540
- @indent -= 1
541
- @indent_stack.pop
542
- Token(op)
543
- end
544
-
545
- @OP.def_rule(":") do
546
- |op, io|
547
- if @lex_state == EXPR_END || peek(0) =~ /\s/
548
- @lex_state = EXPR_BEG
549
- Token(TkCOLON)
550
- else
551
- @lex_state = EXPR_FNAME
552
- Token(TkSYMBEG)
553
- end
554
- end
555
-
556
- @OP.def_rule("::") do
557
- |op, io|
558
- if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
559
- @lex_state = EXPR_BEG
560
- Token(TkCOLON3)
561
- else
562
- @lex_state = EXPR_DOT
563
- Token(TkCOLON2)
564
- end
565
- end
566
-
567
- @OP.def_rule("/") do
568
- |op, io|
569
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
570
- identify_string(op)
571
- elsif peek(0) == '='
572
- getc
573
- @lex_state = EXPR_BEG
574
- Token(TkOPASGN, "/") #/)
575
- elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
576
- identify_string(op)
577
- else
578
- @lex_state = EXPR_BEG
579
- Token("/") #/)
580
- end
581
- end
582
-
583
- @OP.def_rules("^") do
584
- |op, io|
585
- @lex_state = EXPR_BEG
586
- Token("^")
587
- end
588
-
589
- @OP.def_rules(",") do
590
- |op, io|
591
- @lex_state = EXPR_BEG
592
- Token(op)
593
- end
594
-
595
- @OP.def_rules(";") do
596
- |op, io|
597
- @lex_state = EXPR_BEG
598
- until (@indent_stack.empty? ||
599
- [TkLPAREN, TkLBRACK, TkLBRACE,
600
- TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
601
- @indent_stack.pop
602
- end
603
- Token(op)
604
- end
605
-
606
- @OP.def_rule("~") do
607
- |op, io|
608
- @lex_state = EXPR_BEG
609
- Token("~")
610
- end
611
-
612
- @OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do
613
- |op, io|
614
- @lex_state = EXPR_BEG
615
- Token("~")
616
- end
617
-
618
- @OP.def_rule("(") do
619
- |op, io|
620
- @indent += 1
621
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
622
- @lex_state = EXPR_BEG
623
- tk_c = TkfLPAREN
624
- else
625
- @lex_state = EXPR_BEG
626
- tk_c = TkLPAREN
627
- end
628
- @indent_stack.push tk_c
629
- Token(tk_c)
630
- end
631
-
632
- @OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do
633
- |op, io|
634
- @lex_state = EXPR_ARG
635
- Token("[]")
636
- end
637
-
638
- @OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do
639
- |op, io|
640
- @lex_state = EXPR_ARG
641
- Token("[]=")
642
- end
643
-
644
- @OP.def_rule("[") do
645
- |op, io|
646
- @indent += 1
647
- if @lex_state == EXPR_FNAME
648
- tk_c = TkfLBRACK
649
- else
650
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
651
- tk_c = TkLBRACK
652
- elsif @lex_state == EXPR_ARG && @space_seen
653
- tk_c = TkLBRACK
654
- else
655
- tk_c = TkfLBRACK
656
- end
657
- @lex_state = EXPR_BEG
658
- end
659
- @indent_stack.push tk_c
660
- Token(tk_c)
661
- end
662
-
663
- @OP.def_rule("{") do
664
- |op, io|
665
- @indent += 1
666
- if @lex_state != EXPR_END && @lex_state != EXPR_ARG
667
- tk_c = TkLBRACE
668
- else
669
- tk_c = TkfLBRACE
670
- end
671
- @lex_state = EXPR_BEG
672
- @indent_stack.push tk_c
673
- Token(tk_c)
674
- end
675
-
676
- @OP.def_rule('\\') do
677
- |op, io|
678
- if getc == "\n"
679
- @space_seen = true
680
- @continue = true
681
- Token(TkSPACE)
682
- else
683
- read_escape
684
- Token("\\")
685
- end
686
- end
687
-
688
- @OP.def_rule('%') do
689
- |op, io|
690
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
691
- identify_quotation
692
- elsif peek(0) == '='
693
- getc
694
- Token(TkOPASGN, :%)
695
- elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
696
- identify_quotation
697
- else
698
- @lex_state = EXPR_BEG
699
- Token("%") #))
700
- end
701
- end
702
-
703
- @OP.def_rule('$') do
704
- |op, io|
705
- identify_gvar
200
+ def check_code_block(code)
201
+ return true if @tokens.empty?
202
+ if @tokens.last[1] == :on_heredoc_beg
203
+ return true
706
204
  end
707
205
 
708
- @OP.def_rule('@') do
709
- |op, io|
710
- if peek(0) =~ /[\w@]/
711
- ungetc
712
- identify_identifier
206
+ begin # check if parser error are available
207
+ verbose, $VERBOSE = $VERBOSE, nil
208
+ case RUBY_ENGINE
209
+ when 'jruby'
210
+ JRuby.compile_ir(code)
713
211
  else
714
- Token("@")
212
+ RubyVM::InstructionSequence.compile(code)
213
+ end
214
+ rescue SyntaxError => e
215
+ case e.message
216
+ when /unterminated (?:string|regexp) meets end of file/
217
+ # "unterminated regexp meets end of file"
218
+ #
219
+ # example:
220
+ # /
221
+ #
222
+ # "unterminated string meets end of file"
223
+ #
224
+ # example:
225
+ # '
226
+ return true
227
+ when /syntax error, unexpected end-of-input/
228
+ # "syntax error, unexpected end-of-input, expecting keyword_end"
229
+ #
230
+ # example:
231
+ # if ture
232
+ # hoge
233
+ # if false
234
+ # fuga
235
+ # end
236
+ return true
237
+ when /syntax error, unexpected keyword_end/
238
+ # "syntax error, unexpected keyword_end"
239
+ #
240
+ # example:
241
+ # if (
242
+ # end
243
+ #
244
+ # example:
245
+ # end
246
+ return false
247
+ when /syntax error, unexpected '\.'/
248
+ # "syntax error, unexpected '.'"
249
+ #
250
+ # example:
251
+ # .
252
+ return false
253
+ when /unexpected tREGEXP_BEG/
254
+ # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
255
+ #
256
+ # example:
257
+ # method / f /
258
+ return false
715
259
  end
260
+ ensure
261
+ $VERBOSE = verbose
716
262
  end
717
263
 
718
- @OP.def_rule("") do
719
- |op, io|
720
- printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
721
- if peek(0) =~ /[0-9]/
722
- t = identify_number
723
- elsif peek(0) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
724
- t = identify_identifier
264
+ if defined?(Ripper::EXPR_BEG)
265
+ last_lex_state = @tokens.last[3]
266
+ if last_lex_state.allbits?(Ripper::EXPR_BEG)
267
+ return false
268
+ elsif last_lex_state.allbits?(Ripper::EXPR_DOT)
269
+ return true
270
+ elsif last_lex_state.allbits?(Ripper::EXPR_CLASS)
271
+ return true
272
+ elsif last_lex_state.allbits?(Ripper::EXPR_FNAME)
273
+ return true
274
+ elsif last_lex_state.allbits?(Ripper::EXPR_VALUE)
275
+ return true
276
+ elsif last_lex_state.allbits?(Ripper::EXPR_ARG)
277
+ return false
725
278
  end
726
- printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
727
- t
728
279
  end
729
280
 
730
- p @OP if RubyLex.debug?
281
+ false
731
282
  end
732
283
 
733
- def identify_gvar
734
- @lex_state = EXPR_END
735
-
736
- case ch = getc
737
- when /[~_*$?!@\/\\;,=:<>".]/ #"
738
- Token(TkGVAR, "$" + ch)
739
- when "-"
740
- Token(TkGVAR, "$-" + getc)
741
- when "&", "`", "'", "+"
742
- Token(TkBACK_REF, "$"+ch)
743
- when /[1-9]/
744
- while getc =~ /[0-9]/; end
745
- ungetc
746
- Token(TkNTH_REF)
747
- when /\w/
748
- ungetc
749
- ungetc
750
- identify_identifier
751
- else
752
- ungetc
753
- Token("$")
754
- end
755
- end
756
-
757
- def identify_identifier
758
- token = ""
759
- if peek(0) =~ /[$@]/
760
- token.concat(c = getc)
761
- if c == "@" and peek(0) == "@"
762
- token.concat getc
763
- end
764
- end
765
-
766
- while (ch = getc) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
767
- print ":", ch, ":" if RubyLex.debug?
768
- token.concat ch
769
- end
770
- ungetc
771
-
772
- if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
773
- token.concat getc
774
- end
775
-
776
- # almost fix token
777
-
778
- case token
779
- when /^\$/
780
- return Token(TkGVAR, token)
781
- when /^\@\@/
782
- @lex_state = EXPR_END
783
- # p Token(TkCVAR, token)
784
- return Token(TkCVAR, token)
785
- when /^\@/
786
- @lex_state = EXPR_END
787
- return Token(TkIVAR, token)
788
- end
789
-
790
- if @lex_state != EXPR_DOT
791
- print token, "\n" if RubyLex.debug?
792
-
793
- token_c, *trans = TkReading2Token[token]
794
- if token_c
795
- # reserved word?
796
-
797
- if (@lex_state != EXPR_BEG &&
798
- @lex_state != EXPR_FNAME &&
799
- trans[1])
800
- # modifiers
801
- token_c = TkSymbol2Token[trans[1]]
802
- @lex_state = trans[0]
803
- else
804
- if @lex_state != EXPR_FNAME and peek(0) != ':'
805
- if ENINDENT_CLAUSE.include?(token)
806
- # check for ``class = val'' etc.
807
- valid = true
808
- case token
809
- when "class"
810
- valid = false unless peek_match?(/^\s*(<<|\w|::)/)
811
- when "def"
812
- valid = false if peek_match?(/^\s*(([+\-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/)
813
- when "do"
814
- valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&)/)
815
- when *ENINDENT_CLAUSE
816
- valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&|\|)/)
817
- else
818
- # no nothing
819
- end
820
- if valid
821
- if token == "do"
822
- if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
823
- @indent += 1
824
- @indent_stack.push token_c
825
- end
826
- else
827
- @indent += 1
828
- @indent_stack.push token_c
829
- end
830
- end
831
-
832
- elsif DEINDENT_CLAUSE.include?(token)
833
- @indent -= 1
834
- @indent_stack.pop
835
- end
836
- @lex_state = trans[0]
284
+ def process_nesting_level
285
+ indent = 0
286
+ @tokens.each_with_index { |t, index|
287
+ case t[1]
288
+ when :on_lbracket, :on_lbrace, :on_lparen
289
+ indent += 1
290
+ when :on_rbracket, :on_rbrace, :on_rparen
291
+ indent -= 1
292
+ when :on_kw
293
+ next if index > 0 and @tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME)
294
+ case t[2]
295
+ when 'do'
296
+ if index > 0 and @tokens[index - 1][3].anybits?(Ripper::EXPR_CMDARG | Ripper::EXPR_ENDFN)
297
+ # method_with_block do; end
298
+ indent += 1
837
299
  else
838
- @lex_state = EXPR_END
300
+ # while cond do; end # also "until" or "for"
301
+ # This "do" doesn't increment indent because "while" already
302
+ # incremented.
839
303
  end
304
+ when 'def', 'case', 'for', 'begin', 'class', 'module'
305
+ indent += 1
306
+ when 'if', 'unless', 'while', 'until'
307
+ # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
308
+ indent += 1 unless t[3].allbits?(Ripper::EXPR_LABEL)
309
+ when 'end'
310
+ indent -= 1
840
311
  end
841
- return Token(token_c, token)
842
312
  end
843
- end
844
-
845
- if @lex_state == EXPR_FNAME
846
- @lex_state = EXPR_END
847
- if peek(0) == '='
848
- token.concat getc
849
- end
850
- elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
851
- @lex_state = EXPR_ARG
852
- else
853
- @lex_state = EXPR_END
854
- end
855
-
856
- if token[0, 1] =~ /[A-Z]/
857
- return Token(TkCONSTANT, token)
858
- elsif token[token.size - 1, 1] =~ /[!?]/
859
- return Token(TkFID, token)
860
- else
861
- return Token(TkIDENTIFIER, token)
862
- end
313
+ # percent literals are not indented
314
+ }
315
+ indent
863
316
  end
864
317
 
865
- def identify_here_document
866
- ch = getc
867
- if ch == "-" || ch == "~"
868
- ch = getc
869
- indent = true
870
- end
871
- if /['"`]/ =~ ch
872
- lt = ch
873
- quoted = ""
874
- while (c = getc) && c != lt
875
- quoted.concat c
876
- end
877
- else
878
- lt = '"'
879
- quoted = ch.dup
880
- while (c = getc) && c =~ /\w/
881
- quoted.concat c
882
- end
883
- ungetc
884
- end
885
-
886
- ltback, @ltype = @ltype, lt
887
- reserve = []
888
- while ch = getc
889
- reserve.push ch
890
- if ch == "\\"
891
- reserve.push ch = getc
892
- elsif ch == "\n"
893
- break
894
- end
895
- end
896
-
897
- @here_header = false
898
-
899
- line = ""
900
- while ch = getc
901
- if ch == "\n"
902
- if line == quoted
903
- break
318
+ def check_newline_depth_difference
319
+ depth_difference = 0
320
+ @tokens.each_with_index do |t, index|
321
+ case t[1]
322
+ when :on_ignored_nl, :on_nl, :on_comment
323
+ if index != (@tokens.size - 1)
324
+ depth_difference = 0
904
325
  end
905
- line = ""
906
- else
907
- line.concat ch unless indent && line == "" && /\s/ =~ ch
908
- if @ltype != "'" && ch == "#" && peek(0) == "{"
909
- identify_string_dvar
910
- end
911
- end
912
- end
913
-
914
- @here_header = true
915
- @here_readed.concat reserve
916
- while ch = reserve.pop
917
- ungetc ch
918
- end
919
-
920
- @ltype = ltback
921
- @lex_state = EXPR_END
922
- Token(Ltype2Token[lt])
923
- end
924
-
925
- def identify_quotation
926
- ch = getc
927
- if lt = PERCENT_LTYPE[ch]
928
- ch = getc
929
- elsif ch =~ /\W/
930
- lt = "\""
931
- else
932
- RubyLex.fail SyntaxError, "unknown type of %string"
933
- end
934
- @quoted = ch unless @quoted = PERCENT_PAREN[ch]
935
- identify_string(lt, @quoted)
936
- end
937
-
938
- def identify_number
939
- @lex_state = EXPR_END
940
-
941
- if peek(0) == "0" && peek(1) !~ /[.eE]/
942
- getc
943
- case peek(0)
944
- when /[xX]/
945
- ch = getc
946
- match = /[0-9a-fA-F_]/
947
- when /[bB]/
948
- ch = getc
949
- match = /[01_]/
950
- when /[oO]/
951
- ch = getc
952
- match = /[0-7_]/
953
- when /[dD]/
954
- ch = getc
955
- match = /[0-9_]/
956
- when /[0-7]/
957
- match = /[0-7_]/
958
- when /[89]/
959
- RubyLex.fail SyntaxError, "Invalid octal digit"
960
- else
961
- return Token(TkINTEGER)
962
- end
963
-
964
- len0 = true
965
- non_digit = false
966
- while ch = getc
967
- if match =~ ch
968
- if ch == "_"
969
- if non_digit
970
- RubyLex.fail SyntaxError, "trailing `#{ch}' in number"
971
- else
972
- non_digit = ch
973
- end
326
+ next
327
+ when :on_sp
328
+ next
329
+ end
330
+ case t[1]
331
+ when :on_lbracket, :on_lbrace, :on_lparen
332
+ depth_difference += 1
333
+ when :on_rbracket, :on_rbrace, :on_rparen
334
+ depth_difference -= 1
335
+ when :on_kw
336
+ next if index > 0 and @tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME)
337
+ case t[2]
338
+ when 'do'
339
+ if index > 0 and @tokens[index - 1][3].anybits?(Ripper::EXPR_CMDARG | Ripper::EXPR_ENDFN)
340
+ # method_with_block do; end
341
+ depth_difference += 1
974
342
  else
975
- non_digit = false
976
- len0 = false
343
+ # while cond do; end # also "until" or "for"
344
+ # This "do" doesn't increment indent because "while" already
345
+ # incremented.
977
346
  end
978
- else
979
- ungetc
980
- if len0
981
- RubyLex.fail SyntaxError, "numeric literal without digits"
982
- end
983
- if non_digit
984
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
347
+ when 'def', 'case', 'for', 'begin', 'class', 'module'
348
+ depth_difference += 1
349
+ when 'if', 'unless', 'while', 'until'
350
+ # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
351
+ unless t[3].allbits?(Ripper::EXPR_LABEL)
352
+ depth_difference += 1
985
353
  end
986
- break
987
- end
988
- end
989
- return Token(TkINTEGER)
990
- end
991
-
992
- type = TkINTEGER
993
- allow_point = true
994
- allow_e = true
995
- non_digit = false
996
- while ch = getc
997
- case ch
998
- when /[0-9]/
999
- non_digit = false
1000
- when "_"
1001
- non_digit = ch
1002
- when allow_point && "."
1003
- if non_digit
1004
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
1005
- end
1006
- type = TkFLOAT
1007
- if peek(0) !~ /[0-9]/
1008
- type = TkINTEGER
1009
- ungetc
1010
- break
1011
- end
1012
- allow_point = false
1013
- when allow_e && "e", allow_e && "E"
1014
- if non_digit
1015
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
1016
- end
1017
- type = TkFLOAT
1018
- if peek(0) =~ /[+-]/
1019
- getc
354
+ when 'else', 'elsif', 'rescue', 'ensure', 'when', 'in'
355
+ depth_difference += 1
1020
356
  end
1021
- allow_e = false
1022
- allow_point = false
1023
- non_digit = ch
1024
- else
1025
- if non_digit
1026
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
1027
- end
1028
- ungetc
1029
- break
1030
357
  end
1031
358
  end
1032
- Token(type)
359
+ depth_difference
1033
360
  end
1034
361
 
1035
- def identify_string(ltype, quoted = ltype)
1036
- @ltype = ltype
1037
- @quoted = quoted
1038
- subtype = nil
1039
- begin
1040
- nest = 0
1041
- while ch = getc
1042
- if @quoted == ch and nest == 0
1043
- break
1044
- elsif @ltype != "'" && ch == "#" && peek(0) == "{"
1045
- identify_string_dvar
1046
- elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
1047
- subtype = true
1048
- elsif ch == '\\' and @ltype == "'" #'
1049
- case ch = getc
1050
- when "\\", "\n", "'"
1051
- else
1052
- ungetc
1053
- end
1054
- elsif ch == '\\' #'
1055
- read_escape
362
+ def check_corresponding_token_depth
363
+ corresponding_token_depth = nil
364
+ is_first_spaces_of_line = true
365
+ is_first_printable_of_line = true
366
+ spaces_of_nest = []
367
+ spaces_at_line_head = 0
368
+ @tokens.each_with_index do |t, index|
369
+ case t[1]
370
+ when :on_ignored_nl, :on_nl, :on_comment
371
+ corresponding_token_depth = nil
372
+ spaces_at_line_head = 0
373
+ is_first_spaces_of_line = true
374
+ is_first_printable_of_line = true
375
+ next
376
+ when :on_sp
377
+ spaces_at_line_head = t[2].count(' ') if is_first_spaces_of_line
378
+ is_first_spaces_of_line = false
379
+ next
380
+ end
381
+ case t[1]
382
+ when :on_lbracket, :on_lbrace, :on_lparen
383
+ spaces_of_nest.push(spaces_at_line_head)
384
+ when :on_rbracket, :on_rbrace, :on_rparen
385
+ if is_first_printable_of_line
386
+ corresponding_token_depth = spaces_of_nest.pop
387
+ else
388
+ spaces_of_nest.pop
389
+ corresponding_token_depth = nil
1056
390
  end
1057
- if PERCENT_PAREN.values.include?(@quoted)
1058
- if PERCENT_PAREN[ch] == @quoted
1059
- nest += 1
1060
- elsif ch == @quoted
1061
- nest -= 1
391
+ when :on_kw
392
+ next if index > 0 and @tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME)
393
+ case t[2]
394
+ when 'def', 'do', 'case', 'for', 'begin', 'class', 'module'
395
+ spaces_of_nest.push(spaces_at_line_head)
396
+ when 'if', 'unless', 'while', 'until'
397
+ # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
398
+ unless t[3].allbits?(Ripper::EXPR_LABEL)
399
+ spaces_of_nest.push(spaces_at_line_head)
400
+ end
401
+ when 'else', 'elsif', 'rescue', 'ensure', 'when', 'in'
402
+ corresponding_token_depth = spaces_of_nest.last
403
+ when 'end'
404
+ if is_first_printable_of_line
405
+ corresponding_token_depth = spaces_of_nest.pop
406
+ else
407
+ spaces_of_nest.pop
408
+ corresponding_token_depth = nil
1062
409
  end
1063
410
  end
1064
411
  end
1065
- if @ltype == "/"
1066
- while /[imxoesun]/ =~ peek(0)
1067
- getc
1068
- end
1069
- end
1070
- if subtype
1071
- Token(DLtype2Token[ltype])
1072
- else
1073
- Token(Ltype2Token[ltype])
1074
- end
1075
- ensure
1076
- @ltype = nil
1077
- @quoted = nil
1078
- @lex_state = EXPR_END
412
+ is_first_spaces_of_line = false
413
+ is_first_printable_of_line = false
1079
414
  end
415
+ corresponding_token_depth
1080
416
  end
1081
417
 
1082
- def identify_string_dvar
1083
- begin
1084
- getc
1085
-
1086
- reserve_continue = @continue
1087
- reserve_ltype = @ltype
1088
- reserve_indent = @indent
1089
- reserve_indent_stack = @indent_stack
1090
- reserve_state = @lex_state
1091
- reserve_quoted = @quoted
1092
-
1093
- @ltype = nil
1094
- @quoted = nil
1095
- @indent = 0
1096
- @indent_stack = []
1097
- @lex_state = EXPR_BEG
1098
-
1099
- loop do
1100
- @continue = false
1101
- prompt
1102
- tk = token
1103
- if @ltype or @continue or @indent >= 0
1104
- next
418
+ def check_string_literal
419
+ i = 0
420
+ start_token = []
421
+ end_type = []
422
+ while i < @tokens.size
423
+ t = @tokens[i]
424
+ case t[1]
425
+ when :on_tstring_beg
426
+ start_token << t
427
+ end_type << [:on_tstring_end, :on_label_end]
428
+ when :on_regexp_beg
429
+ start_token << t
430
+ end_type << :on_regexp_end
431
+ when :on_symbeg
432
+ acceptable_single_tokens = %i{on_ident on_const on_op on_cvar on_ivar on_gvar on_kw}
433
+ if (i + 1) < @tokens.size and acceptable_single_tokens.all?{ |t| @tokens[i + 1][1] != t }
434
+ start_token << t
435
+ end_type << :on_tstring_end
1105
436
  end
1106
- break if tk.kind_of?(TkRBRACE)
1107
- end
1108
- ensure
1109
- @continue = reserve_continue
1110
- @ltype = reserve_ltype
1111
- @indent = reserve_indent
1112
- @indent_stack = reserve_indent_stack
1113
- @lex_state = reserve_state
1114
- @quoted = reserve_quoted
1115
- end
437
+ when :on_backtick
438
+ start_token << t
439
+ end_type << :on_tstring_end
440
+ when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg
441
+ start_token << t
442
+ end_type << :on_tstring_end
443
+ when :on_heredoc_beg
444
+ start_token << t
445
+ end_type << :on_heredoc_end
446
+ when *end_type.last
447
+ start_token.pop
448
+ end_type.pop
449
+ end
450
+ i += 1
451
+ end
452
+ start_token.last.nil? ? '' : start_token.last
1116
453
  end
1117
454
 
1118
- def identify_comment
1119
- @ltype = "#"
1120
-
1121
- while ch = getc
1122
- if ch == "\n"
1123
- @ltype = nil
1124
- ungetc
1125
- break
1126
- end
1127
- end
1128
- return Token(TkCOMMENT)
1129
- end
1130
-
1131
- def read_escape
1132
- case ch = getc
1133
- when "\n", "\r", "\f"
1134
- when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #"
1135
- when /[0-7]/
1136
- ungetc ch
1137
- 3.times do
1138
- case ch = getc
1139
- when /[0-7]/
1140
- when nil
1141
- break
1142
- else
1143
- ungetc
1144
- break
1145
- end
1146
- end
1147
-
1148
- when "x"
1149
- 2.times do
1150
- case ch = getc
1151
- when /[0-9a-fA-F]/
1152
- when nil
1153
- break
1154
- else
1155
- ungetc
1156
- break
1157
- end
1158
- end
1159
-
1160
- when "M"
1161
- if (ch = getc) != '-'
1162
- ungetc
1163
- else
1164
- if (ch = getc) == "\\" #"
1165
- read_escape
1166
- end
1167
- end
1168
-
1169
- when "C", "c" #, "^"
1170
- if ch == "C" and (ch = getc) != "-"
1171
- ungetc
1172
- elsif (ch = getc) == "\\" #"
1173
- read_escape
455
+ def process_literal_type
456
+ start_token = check_string_literal
457
+ case start_token[1]
458
+ when :on_tstring_beg
459
+ case start_token[2]
460
+ when ?" then ?"
461
+ when /^%.$/ then ?"
462
+ when /^%Q.$/ then ?"
463
+ when ?' then ?'
464
+ when /^%q.$/ then ?'
465
+ end
466
+ when :on_regexp_beg then ?/
467
+ when :on_symbeg then ?:
468
+ when :on_backtick then ?`
469
+ when :on_qwords_beg then ?]
470
+ when :on_words_beg then ?]
471
+ when :on_qsymbols_beg then ?]
472
+ when :on_symbols_beg then ?]
473
+ when :on_heredoc_beg
474
+ start_token[2] =~ /<<[-~]?(['"`])[_a-zA-Z0-9]+\1/
475
+ case $1
476
+ when ?" then ?"
477
+ when ?' then ?'
478
+ when ?` then ?`
479
+ else ?"
1174
480
  end
1175
481
  else
1176
- # other characters
482
+ nil
1177
483
  end
1178
484
  end
1179
485
  end