irb 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,21 +10,21 @@
10
10
  #
11
11
  #
12
12
 
13
- require "e2mmap"
14
-
15
13
  module IRB
16
14
  # An abstract output class for IO in irb. This is mainly used internally by
17
15
  # IRB::Notifier. You can define your own output method to use with Irb.new,
18
16
  # or Context.new
19
17
  class OutputMethod
20
- extend Exception2MessageMapper
21
- def_exception :NotImplementedError, "Need to define `%s'"
22
-
18
+ class NotImplementedError < StandardError
19
+ def initialize(val)
20
+ super("Need to define `#{val}'")
21
+ end
22
+ end
23
23
 
24
24
  # Open this method to implement your own output method, raises a
25
25
  # NotImplementedError if you don't define #print in your own class.
26
26
  def print(*opts)
27
- OutputMethod.Raise NotImplementedError, "print"
27
+ raise NotImplementedError, "print"
28
28
  end
29
29
 
30
30
  # Prints the given +opts+, with a newline delimiter.
@@ -10,74 +10,51 @@
10
10
  #
11
11
  #
12
12
 
13
- require "e2mmap"
14
- require_relative "slex"
15
- require_relative "ruby-token"
13
+ require "ripper"
16
14
 
17
15
  # :stopdoc:
18
16
  class RubyLex
19
17
 
20
- extend Exception2MessageMapper
21
- def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
22
- def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
23
- def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
24
- def_exception(:TkReading2TokenDuplicateError,
25
- "key duplicate(token_n='%s', key='%s')")
26
- def_exception(:SyntaxError, "%s")
27
-
28
- def_exception(:TerminateLineInput, "Terminate Line Input")
29
-
30
- include RubyToken
31
-
32
- class << self
33
- attr_accessor :debug_level
34
- def debug?
35
- @debug_level > 0
18
+ class TerminateLineInput < StandardError
19
+ def initialize
20
+ super("Terminate Line Input")
36
21
  end
37
22
  end
38
- @debug_level = 0
39
23
 
40
24
  def initialize
41
- lex_init
42
- set_input(STDIN)
43
-
44
- @seek = 0
45
25
  @exp_line_no = @line_no = 1
46
- @base_char_no = 0
47
- @char_no = 0
48
- @rests = []
49
- @readed = []
50
- @here_readed = []
51
-
52
26
  @indent = 0
53
- @indent_stack = []
54
- @lex_state = EXPR_BEG
55
- @space_seen = false
56
- @here_header = false
57
- @post_symbeg = false
58
-
59
27
  @continue = false
60
28
  @line = ""
61
-
62
- @skip_space = false
63
- @readed_auto_clean_up = false
64
- @exception_on_syntax_error = true
65
-
66
29
  @prompt = nil
67
30
  end
68
31
 
69
- attr_accessor :skip_space
70
- attr_accessor :readed_auto_clean_up
71
- attr_accessor :exception_on_syntax_error
72
-
73
- attr_reader :seek
74
- attr_reader :char_no
75
- attr_reader :line_no
76
- attr_reader :indent
77
-
78
32
  # io functions
79
33
  def set_input(io, p = nil, &block)
80
34
  @io = io
35
+ if @io.respond_to?(:check_termination)
36
+ @io.check_termination do |code|
37
+ code.gsub!(/\s*\z/, '').concat("\n")
38
+ ltype, indent, continue, code_block_open = check_state(code)
39
+ if ltype or indent > 0 or continue or code_block_open
40
+ false
41
+ else
42
+ true
43
+ end
44
+ end
45
+ end
46
+ if @io.respond_to?(:dynamic_prompt)
47
+ @io.dynamic_prompt do |lines|
48
+ lines << '' if lines.empty?
49
+ result = []
50
+ lines.each_index { |i|
51
+ c = lines[0..i].map{ |l| l + "\n" }.join
52
+ ltype, indent, continue, code_block_open = check_state(c)
53
+ result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + i)
54
+ }
55
+ result
56
+ end
57
+ end
81
58
  if p.respond_to?(:call)
82
59
  @input = p
83
60
  elsif block_given?
@@ -87,119 +64,54 @@ class RubyLex
87
64
  end
88
65
  end
89
66
 
90
- def get_readed
91
- if idx = @readed.rindex("\n")
92
- @base_char_no = @readed.size - (idx + 1)
93
- else
94
- @base_char_no += @readed.size
95
- end
96
-
97
- readed = @readed.join("")
98
- @readed = []
99
- readed
100
- end
101
-
102
- def getc
103
- while @rests.empty?
104
- @rests.push nil unless buf_input
105
- end
106
- c = @rests.shift
107
- if @here_header
108
- @here_readed.push c
109
- else
110
- @readed.push c
111
- end
112
- @seek += 1
113
- if c == "\n"
114
- @line_no += 1
115
- @char_no = 0
67
+ def set_prompt(p = nil, &block)
68
+ p = block if block_given?
69
+ if p.respond_to?(:call)
70
+ @prompt = p
116
71
  else
117
- @char_no += 1
118
- end
119
- c
120
- end
121
-
122
- def gets
123
- l = ""
124
- while c = getc
125
- l.concat(c)
126
- break if c == "\n"
72
+ @prompt = Proc.new{print p}
127
73
  end
128
- return nil if l == "" and c.nil?
129
- l
130
74
  end
131
75
 
132
- def eof?
133
- @io.eof?
134
- end
135
-
136
- def getc_of_rests
137
- if @rests.empty?
138
- nil
139
- else
140
- getc
141
- end
76
+ def ripper_lex_without_warning(code)
77
+ verbose, $VERBOSE = $VERBOSE, nil
78
+ tokens = Ripper.lex(code)
79
+ $VERBOSE = verbose
80
+ tokens
142
81
  end
143
82
 
144
- def ungetc(c = nil)
145
- if @here_readed.empty?
146
- c2 = @readed.pop
147
- else
148
- c2 = @here_readed.pop
149
- end
150
- c = c2 unless c
151
- @rests.unshift c #c =
152
- @seek -= 1
153
- if c == "\n"
154
- @line_no -= 1
155
- if idx = @readed.rindex("\n")
156
- @char_no = idx + 1
157
- else
158
- @char_no = @base_char_no + @readed.size
83
+ def set_auto_indent(context)
84
+ if @io.respond_to?(:auto_indent) and context.auto_indent_mode
85
+ @io.auto_indent do |lines, line_index, byte_pointer, is_newline|
86
+ if is_newline
87
+ md = lines[line_index - 1].match(/(\A +)/)
88
+ prev_spaces = md.nil? ? 0 : md[1].count(' ')
89
+ @tokens = ripper_lex_without_warning(lines[0..line_index].join("\n"))
90
+ depth_difference = check_newline_depth_difference
91
+ prev_spaces + depth_difference * 2
92
+ else
93
+ code = line_index.zero? ? '' : lines[0..(line_index - 1)].map{ |l| l + "\n" }.join
94
+ last_line = lines[line_index]&.byteslice(0, byte_pointer)
95
+ code += last_line if last_line
96
+ @tokens = ripper_lex_without_warning(code)
97
+ corresponding_token_depth = check_corresponding_token_depth
98
+ if corresponding_token_depth
99
+ corresponding_token_depth
100
+ else
101
+ nil
102
+ end
103
+ end
159
104
  end
160
- else
161
- @char_no -= 1
162
105
  end
163
106
  end
164
107
 
165
- def peek_equal?(str)
166
- chrs = str.split(//)
167
- until @rests.size >= chrs.size
168
- return false unless buf_input
169
- end
170
- @rests[0, chrs.size] == chrs
171
- end
172
-
173
- def peek_match?(regexp)
174
- while @rests.empty?
175
- return false unless buf_input
176
- end
177
- regexp =~ @rests.join("")
178
- end
179
-
180
- def peek(i = 0)
181
- while @rests.size <= i
182
- return nil unless buf_input
183
- end
184
- @rests[i]
185
- end
186
-
187
- def buf_input
188
- prompt
189
- line = @input.call
190
- return nil unless line
191
- @rests.concat line.chars.to_a
192
- true
193
- end
194
- private :buf_input
195
-
196
- def set_prompt(p = nil, &block)
197
- p = block if block_given?
198
- if p.respond_to?(:call)
199
- @prompt = p
200
- else
201
- @prompt = Proc.new{print p}
202
- end
108
+ def check_state(code)
109
+ @tokens = ripper_lex_without_warning(code)
110
+ ltype = process_literal_type
111
+ indent = process_nesting_level
112
+ continue = process_continue
113
+ code_block_open = check_code_block(code)
114
+ [ltype, indent, continue, code_block_open]
203
115
  end
204
116
 
205
117
  def prompt
@@ -210,20 +122,11 @@ class RubyLex
210
122
 
211
123
  def initialize_input
212
124
  @ltype = nil
213
- @quoted = nil
214
125
  @indent = 0
215
- @indent_stack = []
216
- @lex_state = EXPR_BEG
217
- @space_seen = false
218
- @here_header = false
219
-
220
126
  @continue = false
221
- @post_symbeg = false
222
-
223
- prompt
224
-
225
127
  @line = ""
226
128
  @exp_line_no = @line_no
129
+ @code_block_open = false
227
130
  end
228
131
 
229
132
  def each_top_level_statement
@@ -231,13 +134,14 @@ class RubyLex
231
134
  catch(:TERM_INPUT) do
232
135
  loop do
233
136
  begin
234
- @continue = false
235
137
  prompt
236
138
  unless l = lex
237
139
  throw :TERM_INPUT if @line == ''
238
140
  else
141
+ @line_no += l.count("\n")
142
+ next if l == "\n"
239
143
  @line.concat l
240
- if @ltype or @continue or @indent > 0
144
+ if @code_block_open or @ltype or @continue or @indent > 0
241
145
  next
242
146
  end
243
147
  end
@@ -245,935 +149,337 @@ class RubyLex
245
149
  @line.force_encoding(@io.encoding)
246
150
  yield @line, @exp_line_no
247
151
  end
248
- break unless l
152
+ break if @io.eof?
249
153
  @line = ''
250
154
  @exp_line_no = @line_no
251
155
 
252
156
  @indent = 0
253
- @indent_stack = []
254
- prompt
255
157
  rescue TerminateLineInput
256
158
  initialize_input
257
159
  prompt
258
- get_readed
259
160
  end
260
161
  end
261
162
  end
262
163
  end
263
164
 
264
165
  def lex
265
- continue = @continue
266
- while tk = token
267
- case tk
268
- when TkNL, TkEND_OF_SCRIPT
269
- @continue = continue unless continue.nil?
270
- break unless @continue
271
- when TkSPACE, TkCOMMENT
272
- when TkSEMICOLON, TkBEGIN, TkELSE
273
- @continue = continue = false
274
- else
275
- continue = nil
276
- end
277
- end
278
- line = get_readed
279
- if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
280
- nil
281
- else
282
- line
283
- end
284
- end
285
-
286
- def token
287
- @prev_seek = @seek
288
- @prev_line_no = @line_no
289
- @prev_char_no = @char_no
290
- begin
291
- begin
292
- tk = @OP.match(self)
293
- @space_seen = tk.kind_of?(TkSPACE)
294
- @lex_state = EXPR_END if @post_symbeg && tk.kind_of?(TkOp)
295
- @post_symbeg = tk.kind_of?(TkSYMBEG)
296
- rescue SyntaxError
297
- raise if @exception_on_syntax_error
298
- tk = TkError.new(@seek, @line_no, @char_no)
299
- end
300
- end while @skip_space and tk.kind_of?(TkSPACE)
301
- if @readed_auto_clean_up
302
- get_readed
303
- end
304
- tk
166
+ line = @input.call
167
+ if @io.respond_to?(:check_termination)
168
+ return line # multiline
169
+ end
170
+ code = @line + (line.nil? ? '' : line)
171
+ code.gsub!(/\s*\z/, '').concat("\n")
172
+ @tokens = ripper_lex_without_warning(code)
173
+ @continue = process_continue
174
+ @code_block_open = check_code_block(code)
175
+ @indent = process_nesting_level
176
+ @ltype = process_literal_type
177
+ line
305
178
  end
306
179
 
307
- ENINDENT_CLAUSE = [
308
- "case", "class", "def", "do", "for", "if",
309
- "module", "unless", "until", "while", "begin"
310
- ]
311
- DEINDENT_CLAUSE = ["end"
312
- ]
313
-
314
- PERCENT_LTYPE = {
315
- "q" => "\'",
316
- "Q" => "\"",
317
- "x" => "\`",
318
- "r" => "/",
319
- "w" => "]",
320
- "W" => "]",
321
- "i" => "]",
322
- "I" => "]",
323
- "s" => ":"
324
- }
325
-
326
- PERCENT_PAREN = {
327
- "{" => "}",
328
- "[" => "]",
329
- "<" => ">",
330
- "(" => ")"
331
- }
332
-
333
- Ltype2Token = {
334
- "\'" => TkSTRING,
335
- "\"" => TkSTRING,
336
- "\`" => TkXSTRING,
337
- "/" => TkREGEXP,
338
- "]" => TkDSTRING,
339
- ":" => TkSYMBOL
340
- }
341
- DLtype2Token = {
342
- "\"" => TkDSTRING,
343
- "\`" => TkDXSTRING,
344
- "/" => TkDREGEXP,
345
- }
346
-
347
- def lex_init()
348
- @OP = IRB::SLex.new
349
- @OP.def_rules("\0", "\004", "\032") do |op, io|
350
- Token(TkEND_OF_SCRIPT)
351
- end
352
-
353
- @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io|
354
- @space_seen = true
355
- while getc =~ /[ \t\f\r\13]/; end
356
- ungetc
357
- Token(TkSPACE)
358
- end
359
-
360
- @OP.def_rule("#") do |op, io|
361
- identify_comment
362
- end
363
-
364
- @OP.def_rule("=begin",
365
- proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do
366
- |op, io|
367
- @ltype = "="
368
- until getc == "\n"; end
369
- until peek_equal?("=end") && peek(4) =~ /\s/
370
- until getc == "\n"; end
371
- end
372
- gets
373
- @ltype = nil
374
- Token(TkRD_COMMENT)
375
- end
376
-
377
- @OP.def_rule("\n") do |op, io|
378
- print "\\n\n" if RubyLex.debug?
379
- case @lex_state
380
- when EXPR_BEG, EXPR_FNAME, EXPR_DOT
381
- @continue = true
382
- else
383
- @continue = false
384
- @lex_state = EXPR_BEG
385
- until (@indent_stack.empty? ||
386
- [TkLPAREN, TkLBRACK, TkLBRACE,
387
- TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
388
- @indent_stack.pop
389
- end
390
- end
391
- @here_header = false
392
- @here_readed = []
393
- Token(TkNL)
394
- end
395
-
396
- @OP.def_rules("*", "**",
397
- "=", "==", "===",
398
- "=~", "<=>",
399
- "<", "<=",
400
- ">", ">=", ">>",
401
- "!", "!=", "!~") do
402
- |op, io|
403
- case @lex_state
404
- when EXPR_FNAME, EXPR_DOT
405
- @lex_state = EXPR_ARG
406
- else
407
- @lex_state = EXPR_BEG
408
- end
409
- Token(op)
410
- end
411
-
412
- @OP.def_rules("<<") do
413
- |op, io|
414
- tk = nil
415
- if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
416
- (@lex_state != EXPR_ARG || @space_seen)
417
- c = peek(0)
418
- if /[-~"'`\w]/ =~ c
419
- tk = identify_here_document
420
- end
421
- end
422
- unless tk
423
- tk = Token(op)
424
- case @lex_state
425
- when EXPR_FNAME, EXPR_DOT
426
- @lex_state = EXPR_ARG
427
- else
428
- @lex_state = EXPR_BEG
429
- end
430
- end
431
- tk
432
- end
433
-
434
- @OP.def_rules("'", '"') do
435
- |op, io|
436
- identify_string(op)
437
- end
438
-
439
- @OP.def_rules("`") do
440
- |op, io|
441
- if @lex_state == EXPR_FNAME
442
- @lex_state = EXPR_END
443
- Token(op)
444
- else
445
- identify_string(op)
446
- end
447
- end
448
-
449
- @OP.def_rules('?') do
450
- |op, io|
451
- if @lex_state == EXPR_END
452
- @lex_state = EXPR_BEG
453
- Token(TkQUESTION)
454
- else
455
- ch = getc
456
- if @lex_state == EXPR_ARG && ch =~ /\s/
457
- ungetc
458
- @lex_state = EXPR_BEG;
459
- Token(TkQUESTION)
460
- else
461
- if (ch == '\\')
462
- read_escape
463
- end
464
- @lex_state = EXPR_END
465
- Token(TkINTEGER)
466
- end
467
- end
468
- end
469
-
470
- @OP.def_rules("&", "&&", "|", "||") do
471
- |op, io|
472
- @lex_state = EXPR_BEG
473
- Token(op)
474
- end
475
-
476
- @OP.def_rules("+=", "-=", "*=", "**=",
477
- "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
478
- |op, io|
479
- @lex_state = EXPR_BEG
480
- op =~ /^(.*)=$/
481
- Token(TkOPASGN, $1)
482
- end
483
-
484
- @OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do
485
- |op, io|
486
- @lex_state = EXPR_ARG
487
- Token(op)
488
- end
489
-
490
- @OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do
491
- |op, io|
492
- @lex_state = EXPR_ARG
493
- Token(op)
494
- end
495
-
496
- @OP.def_rules("+", "-") do
497
- |op, io|
498
- catch(:RET) do
499
- if @lex_state == EXPR_ARG
500
- if @space_seen and peek(0) =~ /[0-9]/
501
- throw :RET, identify_number
502
- else
503
- @lex_state = EXPR_BEG
504
- end
505
- elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
506
- throw :RET, identify_number
507
- else
508
- @lex_state = EXPR_BEG
509
- end
510
- Token(op)
511
- end
512
- end
513
-
514
- @OP.def_rule(".") do
515
- |op, io|
516
- @lex_state = EXPR_BEG
517
- if peek(0) =~ /[0-9]/
518
- ungetc
519
- identify_number
520
- else
521
- # for "obj.if" etc.
522
- @lex_state = EXPR_DOT
523
- Token(TkDOT)
524
- end
525
- end
526
-
527
- @OP.def_rules("..", "...") do
528
- |op, io|
529
- @lex_state = EXPR_BEG
530
- Token(op)
531
- end
532
-
533
- lex_int2
180
+ def process_continue
181
+ # last token is always newline
182
+ if @tokens.size >= 2 and @tokens[-2][1] == :on_regexp_end
183
+ # end of regexp literal
184
+ return false
185
+ elsif @tokens.size >= 2 and @tokens[-2][1] == :on_semicolon
186
+ return false
187
+ elsif @tokens.size >= 2 and @tokens[-2][1] == :on_kw and ['begin', 'else', 'ensure'].include?(@tokens[-2][2])
188
+ return false
189
+ elsif !@tokens.empty? and @tokens.last[2] == "\\\n"
190
+ return true
191
+ elsif @tokens.size >= 1 and @tokens[-1][1] == :on_heredoc_end # "EOH\n"
192
+ return false
193
+ elsif @tokens.size >= 2 and defined?(Ripper::EXPR_BEG) and @tokens[-2][3].anybits?(Ripper::EXPR_BEG | Ripper::EXPR_FNAME)
194
+ # end of literal except for regexp
195
+ return true
196
+ end
197
+ false
534
198
  end
535
199
 
536
- def lex_int2
537
- @OP.def_rules("]", "}", ")") do
538
- |op, io|
539
- @lex_state = EXPR_END
540
- @indent -= 1
541
- @indent_stack.pop
542
- Token(op)
543
- end
544
-
545
- @OP.def_rule(":") do
546
- |op, io|
547
- if @lex_state == EXPR_END || peek(0) =~ /\s/
548
- @lex_state = EXPR_BEG
549
- Token(TkCOLON)
550
- else
551
- @lex_state = EXPR_FNAME
552
- Token(TkSYMBEG)
553
- end
554
- end
555
-
556
- @OP.def_rule("::") do
557
- |op, io|
558
- if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
559
- @lex_state = EXPR_BEG
560
- Token(TkCOLON3)
561
- else
562
- @lex_state = EXPR_DOT
563
- Token(TkCOLON2)
564
- end
565
- end
566
-
567
- @OP.def_rule("/") do
568
- |op, io|
569
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
570
- identify_string(op)
571
- elsif peek(0) == '='
572
- getc
573
- @lex_state = EXPR_BEG
574
- Token(TkOPASGN, "/") #/)
575
- elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
576
- identify_string(op)
577
- else
578
- @lex_state = EXPR_BEG
579
- Token("/") #/)
580
- end
581
- end
582
-
583
- @OP.def_rules("^") do
584
- |op, io|
585
- @lex_state = EXPR_BEG
586
- Token("^")
587
- end
588
-
589
- @OP.def_rules(",") do
590
- |op, io|
591
- @lex_state = EXPR_BEG
592
- Token(op)
593
- end
594
-
595
- @OP.def_rules(";") do
596
- |op, io|
597
- @lex_state = EXPR_BEG
598
- until (@indent_stack.empty? ||
599
- [TkLPAREN, TkLBRACK, TkLBRACE,
600
- TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
601
- @indent_stack.pop
602
- end
603
- Token(op)
604
- end
605
-
606
- @OP.def_rule("~") do
607
- |op, io|
608
- @lex_state = EXPR_BEG
609
- Token("~")
610
- end
611
-
612
- @OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do
613
- |op, io|
614
- @lex_state = EXPR_BEG
615
- Token("~")
616
- end
617
-
618
- @OP.def_rule("(") do
619
- |op, io|
620
- @indent += 1
621
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
622
- @lex_state = EXPR_BEG
623
- tk_c = TkfLPAREN
624
- else
625
- @lex_state = EXPR_BEG
626
- tk_c = TkLPAREN
627
- end
628
- @indent_stack.push tk_c
629
- Token(tk_c)
630
- end
631
-
632
- @OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do
633
- |op, io|
634
- @lex_state = EXPR_ARG
635
- Token("[]")
636
- end
637
-
638
- @OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do
639
- |op, io|
640
- @lex_state = EXPR_ARG
641
- Token("[]=")
642
- end
643
-
644
- @OP.def_rule("[") do
645
- |op, io|
646
- @indent += 1
647
- if @lex_state == EXPR_FNAME
648
- tk_c = TkfLBRACK
649
- else
650
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
651
- tk_c = TkLBRACK
652
- elsif @lex_state == EXPR_ARG && @space_seen
653
- tk_c = TkLBRACK
654
- else
655
- tk_c = TkfLBRACK
656
- end
657
- @lex_state = EXPR_BEG
658
- end
659
- @indent_stack.push tk_c
660
- Token(tk_c)
661
- end
662
-
663
- @OP.def_rule("{") do
664
- |op, io|
665
- @indent += 1
666
- if @lex_state != EXPR_END && @lex_state != EXPR_ARG
667
- tk_c = TkLBRACE
668
- else
669
- tk_c = TkfLBRACE
670
- end
671
- @lex_state = EXPR_BEG
672
- @indent_stack.push tk_c
673
- Token(tk_c)
674
- end
675
-
676
- @OP.def_rule('\\') do
677
- |op, io|
678
- if getc == "\n"
679
- @space_seen = true
680
- @continue = true
681
- Token(TkSPACE)
682
- else
683
- read_escape
684
- Token("\\")
685
- end
686
- end
687
-
688
- @OP.def_rule('%') do
689
- |op, io|
690
- if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
691
- identify_quotation
692
- elsif peek(0) == '='
693
- getc
694
- Token(TkOPASGN, :%)
695
- elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
696
- identify_quotation
697
- else
698
- @lex_state = EXPR_BEG
699
- Token("%") #))
700
- end
701
- end
702
-
703
- @OP.def_rule('$') do
704
- |op, io|
705
- identify_gvar
200
+ def check_code_block(code)
201
+ return true if @tokens.empty?
202
+ if @tokens.last[1] == :on_heredoc_beg
203
+ return true
706
204
  end
707
205
 
708
- @OP.def_rule('@') do
709
- |op, io|
710
- if peek(0) =~ /[\w@]/
711
- ungetc
712
- identify_identifier
206
+ begin # check if parser error are available
207
+ verbose, $VERBOSE = $VERBOSE, nil
208
+ case RUBY_ENGINE
209
+ when 'jruby'
210
+ JRuby.compile_ir(code)
713
211
  else
714
- Token("@")
212
+ RubyVM::InstructionSequence.compile(code)
213
+ end
214
+ rescue SyntaxError => e
215
+ case e.message
216
+ when /unterminated (?:string|regexp) meets end of file/
217
+ # "unterminated regexp meets end of file"
218
+ #
219
+ # example:
220
+ # /
221
+ #
222
+ # "unterminated string meets end of file"
223
+ #
224
+ # example:
225
+ # '
226
+ return true
227
+ when /syntax error, unexpected end-of-input/
228
+ # "syntax error, unexpected end-of-input, expecting keyword_end"
229
+ #
230
+ # example:
231
+ # if ture
232
+ # hoge
233
+ # if false
234
+ # fuga
235
+ # end
236
+ return true
237
+ when /syntax error, unexpected keyword_end/
238
+ # "syntax error, unexpected keyword_end"
239
+ #
240
+ # example:
241
+ # if (
242
+ # end
243
+ #
244
+ # example:
245
+ # end
246
+ return false
247
+ when /syntax error, unexpected '\.'/
248
+ # "syntax error, unexpected '.'"
249
+ #
250
+ # example:
251
+ # .
252
+ return false
253
+ when /unexpected tREGEXP_BEG/
254
+ # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
255
+ #
256
+ # example:
257
+ # method / f /
258
+ return false
715
259
  end
260
+ ensure
261
+ $VERBOSE = verbose
716
262
  end
717
263
 
718
- @OP.def_rule("") do
719
- |op, io|
720
- printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
721
- if peek(0) =~ /[0-9]/
722
- t = identify_number
723
- elsif peek(0) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
724
- t = identify_identifier
264
+ if defined?(Ripper::EXPR_BEG)
265
+ last_lex_state = @tokens.last[3]
266
+ if last_lex_state.allbits?(Ripper::EXPR_BEG)
267
+ return false
268
+ elsif last_lex_state.allbits?(Ripper::EXPR_DOT)
269
+ return true
270
+ elsif last_lex_state.allbits?(Ripper::EXPR_CLASS)
271
+ return true
272
+ elsif last_lex_state.allbits?(Ripper::EXPR_FNAME)
273
+ return true
274
+ elsif last_lex_state.allbits?(Ripper::EXPR_VALUE)
275
+ return true
276
+ elsif last_lex_state.allbits?(Ripper::EXPR_ARG)
277
+ return false
725
278
  end
726
- printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
727
- t
728
279
  end
729
280
 
730
- p @OP if RubyLex.debug?
281
+ false
731
282
  end
732
283
 
733
- def identify_gvar
734
- @lex_state = EXPR_END
735
-
736
- case ch = getc
737
- when /[~_*$?!@\/\\;,=:<>".]/ #"
738
- Token(TkGVAR, "$" + ch)
739
- when "-"
740
- Token(TkGVAR, "$-" + getc)
741
- when "&", "`", "'", "+"
742
- Token(TkBACK_REF, "$"+ch)
743
- when /[1-9]/
744
- while getc =~ /[0-9]/; end
745
- ungetc
746
- Token(TkNTH_REF)
747
- when /\w/
748
- ungetc
749
- ungetc
750
- identify_identifier
751
- else
752
- ungetc
753
- Token("$")
754
- end
755
- end
756
-
757
- def identify_identifier
758
- token = ""
759
- if peek(0) =~ /[$@]/
760
- token.concat(c = getc)
761
- if c == "@" and peek(0) == "@"
762
- token.concat getc
763
- end
764
- end
765
-
766
- while (ch = getc) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
767
- print ":", ch, ":" if RubyLex.debug?
768
- token.concat ch
769
- end
770
- ungetc
771
-
772
- if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
773
- token.concat getc
774
- end
775
-
776
- # almost fix token
777
-
778
- case token
779
- when /^\$/
780
- return Token(TkGVAR, token)
781
- when /^\@\@/
782
- @lex_state = EXPR_END
783
- # p Token(TkCVAR, token)
784
- return Token(TkCVAR, token)
785
- when /^\@/
786
- @lex_state = EXPR_END
787
- return Token(TkIVAR, token)
788
- end
789
-
790
- if @lex_state != EXPR_DOT
791
- print token, "\n" if RubyLex.debug?
792
-
793
- token_c, *trans = TkReading2Token[token]
794
- if token_c
795
- # reserved word?
796
-
797
- if (@lex_state != EXPR_BEG &&
798
- @lex_state != EXPR_FNAME &&
799
- trans[1])
800
- # modifiers
801
- token_c = TkSymbol2Token[trans[1]]
802
- @lex_state = trans[0]
803
- else
804
- if @lex_state != EXPR_FNAME and peek(0) != ':'
805
- if ENINDENT_CLAUSE.include?(token)
806
- # check for ``class = val'' etc.
807
- valid = true
808
- case token
809
- when "class"
810
- valid = false unless peek_match?(/^\s*(<<|\w|::)/)
811
- when "def"
812
- valid = false if peek_match?(/^\s*(([+\-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/)
813
- when "do"
814
- valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&)/)
815
- when *ENINDENT_CLAUSE
816
- valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&|\|)/)
817
- else
818
- # no nothing
819
- end
820
- if valid
821
- if token == "do"
822
- if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
823
- @indent += 1
824
- @indent_stack.push token_c
825
- end
826
- else
827
- @indent += 1
828
- @indent_stack.push token_c
829
- end
830
- end
831
-
832
- elsif DEINDENT_CLAUSE.include?(token)
833
- @indent -= 1
834
- @indent_stack.pop
835
- end
836
- @lex_state = trans[0]
284
+ def process_nesting_level
285
+ indent = 0
286
+ @tokens.each_with_index { |t, index|
287
+ case t[1]
288
+ when :on_lbracket, :on_lbrace, :on_lparen
289
+ indent += 1
290
+ when :on_rbracket, :on_rbrace, :on_rparen
291
+ indent -= 1
292
+ when :on_kw
293
+ next if index > 0 and @tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME)
294
+ case t[2]
295
+ when 'do'
296
+ if index > 0 and @tokens[index - 1][3].anybits?(Ripper::EXPR_CMDARG | Ripper::EXPR_ENDFN)
297
+ # method_with_block do; end
298
+ indent += 1
837
299
  else
838
- @lex_state = EXPR_END
300
+ # while cond do; end # also "until" or "for"
301
+ # This "do" doesn't increment indent because "while" already
302
+ # incremented.
839
303
  end
304
+ when 'def', 'case', 'for', 'begin', 'class', 'module'
305
+ indent += 1
306
+ when 'if', 'unless', 'while', 'until'
307
+ # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
308
+ indent += 1 unless t[3].allbits?(Ripper::EXPR_LABEL)
309
+ when 'end'
310
+ indent -= 1
840
311
  end
841
- return Token(token_c, token)
842
312
  end
843
- end
844
-
845
- if @lex_state == EXPR_FNAME
846
- @lex_state = EXPR_END
847
- if peek(0) == '='
848
- token.concat getc
849
- end
850
- elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
851
- @lex_state = EXPR_ARG
852
- else
853
- @lex_state = EXPR_END
854
- end
855
-
856
- if token[0, 1] =~ /[A-Z]/
857
- return Token(TkCONSTANT, token)
858
- elsif token[token.size - 1, 1] =~ /[!?]/
859
- return Token(TkFID, token)
860
- else
861
- return Token(TkIDENTIFIER, token)
862
- end
313
+ # percent literals are not indented
314
+ }
315
+ indent
863
316
  end
864
317
 
865
- def identify_here_document
866
- ch = getc
867
- if ch == "-" || ch == "~"
868
- ch = getc
869
- indent = true
870
- end
871
- if /['"`]/ =~ ch
872
- lt = ch
873
- quoted = ""
874
- while (c = getc) && c != lt
875
- quoted.concat c
876
- end
877
- else
878
- lt = '"'
879
- quoted = ch.dup
880
- while (c = getc) && c =~ /\w/
881
- quoted.concat c
882
- end
883
- ungetc
884
- end
885
-
886
- ltback, @ltype = @ltype, lt
887
- reserve = []
888
- while ch = getc
889
- reserve.push ch
890
- if ch == "\\"
891
- reserve.push ch = getc
892
- elsif ch == "\n"
893
- break
894
- end
895
- end
896
-
897
- @here_header = false
898
-
899
- line = ""
900
- while ch = getc
901
- if ch == "\n"
902
- if line == quoted
903
- break
318
+ def check_newline_depth_difference
319
+ depth_difference = 0
320
+ @tokens.each_with_index do |t, index|
321
+ case t[1]
322
+ when :on_ignored_nl, :on_nl, :on_comment
323
+ if index != (@tokens.size - 1)
324
+ depth_difference = 0
904
325
  end
905
- line = ""
906
- else
907
- line.concat ch unless indent && line == "" && /\s/ =~ ch
908
- if @ltype != "'" && ch == "#" && peek(0) == "{"
909
- identify_string_dvar
910
- end
911
- end
912
- end
913
-
914
- @here_header = true
915
- @here_readed.concat reserve
916
- while ch = reserve.pop
917
- ungetc ch
918
- end
919
-
920
- @ltype = ltback
921
- @lex_state = EXPR_END
922
- Token(Ltype2Token[lt])
923
- end
924
-
925
- def identify_quotation
926
- ch = getc
927
- if lt = PERCENT_LTYPE[ch]
928
- ch = getc
929
- elsif ch =~ /\W/
930
- lt = "\""
931
- else
932
- RubyLex.fail SyntaxError, "unknown type of %string"
933
- end
934
- @quoted = ch unless @quoted = PERCENT_PAREN[ch]
935
- identify_string(lt, @quoted)
936
- end
937
-
938
- def identify_number
939
- @lex_state = EXPR_END
940
-
941
- if peek(0) == "0" && peek(1) !~ /[.eE]/
942
- getc
943
- case peek(0)
944
- when /[xX]/
945
- ch = getc
946
- match = /[0-9a-fA-F_]/
947
- when /[bB]/
948
- ch = getc
949
- match = /[01_]/
950
- when /[oO]/
951
- ch = getc
952
- match = /[0-7_]/
953
- when /[dD]/
954
- ch = getc
955
- match = /[0-9_]/
956
- when /[0-7]/
957
- match = /[0-7_]/
958
- when /[89]/
959
- RubyLex.fail SyntaxError, "Invalid octal digit"
960
- else
961
- return Token(TkINTEGER)
962
- end
963
-
964
- len0 = true
965
- non_digit = false
966
- while ch = getc
967
- if match =~ ch
968
- if ch == "_"
969
- if non_digit
970
- RubyLex.fail SyntaxError, "trailing `#{ch}' in number"
971
- else
972
- non_digit = ch
973
- end
326
+ next
327
+ when :on_sp
328
+ next
329
+ end
330
+ case t[1]
331
+ when :on_lbracket, :on_lbrace, :on_lparen
332
+ depth_difference += 1
333
+ when :on_rbracket, :on_rbrace, :on_rparen
334
+ depth_difference -= 1
335
+ when :on_kw
336
+ next if index > 0 and @tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME)
337
+ case t[2]
338
+ when 'do'
339
+ if index > 0 and @tokens[index - 1][3].anybits?(Ripper::EXPR_CMDARG | Ripper::EXPR_ENDFN)
340
+ # method_with_block do; end
341
+ depth_difference += 1
974
342
  else
975
- non_digit = false
976
- len0 = false
343
+ # while cond do; end # also "until" or "for"
344
+ # This "do" doesn't increment indent because "while" already
345
+ # incremented.
977
346
  end
978
- else
979
- ungetc
980
- if len0
981
- RubyLex.fail SyntaxError, "numeric literal without digits"
982
- end
983
- if non_digit
984
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
347
+ when 'def', 'case', 'for', 'begin', 'class', 'module'
348
+ depth_difference += 1
349
+ when 'if', 'unless', 'while', 'until'
350
+ # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
351
+ unless t[3].allbits?(Ripper::EXPR_LABEL)
352
+ depth_difference += 1
985
353
  end
986
- break
987
- end
988
- end
989
- return Token(TkINTEGER)
990
- end
991
-
992
- type = TkINTEGER
993
- allow_point = true
994
- allow_e = true
995
- non_digit = false
996
- while ch = getc
997
- case ch
998
- when /[0-9]/
999
- non_digit = false
1000
- when "_"
1001
- non_digit = ch
1002
- when allow_point && "."
1003
- if non_digit
1004
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
1005
- end
1006
- type = TkFLOAT
1007
- if peek(0) !~ /[0-9]/
1008
- type = TkINTEGER
1009
- ungetc
1010
- break
1011
- end
1012
- allow_point = false
1013
- when allow_e && "e", allow_e && "E"
1014
- if non_digit
1015
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
1016
- end
1017
- type = TkFLOAT
1018
- if peek(0) =~ /[+-]/
1019
- getc
354
+ when 'else', 'elsif', 'rescue', 'ensure', 'when', 'in'
355
+ depth_difference += 1
1020
356
  end
1021
- allow_e = false
1022
- allow_point = false
1023
- non_digit = ch
1024
- else
1025
- if non_digit
1026
- RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
1027
- end
1028
- ungetc
1029
- break
1030
357
  end
1031
358
  end
1032
- Token(type)
359
+ depth_difference
1033
360
  end
1034
361
 
1035
- def identify_string(ltype, quoted = ltype)
1036
- @ltype = ltype
1037
- @quoted = quoted
1038
- subtype = nil
1039
- begin
1040
- nest = 0
1041
- while ch = getc
1042
- if @quoted == ch and nest == 0
1043
- break
1044
- elsif @ltype != "'" && ch == "#" && peek(0) == "{"
1045
- identify_string_dvar
1046
- elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
1047
- subtype = true
1048
- elsif ch == '\\' and @ltype == "'" #'
1049
- case ch = getc
1050
- when "\\", "\n", "'"
1051
- else
1052
- ungetc
1053
- end
1054
- elsif ch == '\\' #'
1055
- read_escape
362
+ def check_corresponding_token_depth
363
+ corresponding_token_depth = nil
364
+ is_first_spaces_of_line = true
365
+ is_first_printable_of_line = true
366
+ spaces_of_nest = []
367
+ spaces_at_line_head = 0
368
+ @tokens.each_with_index do |t, index|
369
+ case t[1]
370
+ when :on_ignored_nl, :on_nl, :on_comment
371
+ corresponding_token_depth = nil
372
+ spaces_at_line_head = 0
373
+ is_first_spaces_of_line = true
374
+ is_first_printable_of_line = true
375
+ next
376
+ when :on_sp
377
+ spaces_at_line_head = t[2].count(' ') if is_first_spaces_of_line
378
+ is_first_spaces_of_line = false
379
+ next
380
+ end
381
+ case t[1]
382
+ when :on_lbracket, :on_lbrace, :on_lparen
383
+ spaces_of_nest.push(spaces_at_line_head)
384
+ when :on_rbracket, :on_rbrace, :on_rparen
385
+ if is_first_printable_of_line
386
+ corresponding_token_depth = spaces_of_nest.pop
387
+ else
388
+ spaces_of_nest.pop
389
+ corresponding_token_depth = nil
1056
390
  end
1057
- if PERCENT_PAREN.values.include?(@quoted)
1058
- if PERCENT_PAREN[ch] == @quoted
1059
- nest += 1
1060
- elsif ch == @quoted
1061
- nest -= 1
391
+ when :on_kw
392
+ next if index > 0 and @tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME)
393
+ case t[2]
394
+ when 'def', 'do', 'case', 'for', 'begin', 'class', 'module'
395
+ spaces_of_nest.push(spaces_at_line_head)
396
+ when 'if', 'unless', 'while', 'until'
397
+ # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
398
+ unless t[3].allbits?(Ripper::EXPR_LABEL)
399
+ spaces_of_nest.push(spaces_at_line_head)
400
+ end
401
+ when 'else', 'elsif', 'rescue', 'ensure', 'when', 'in'
402
+ corresponding_token_depth = spaces_of_nest.last
403
+ when 'end'
404
+ if is_first_printable_of_line
405
+ corresponding_token_depth = spaces_of_nest.pop
406
+ else
407
+ spaces_of_nest.pop
408
+ corresponding_token_depth = nil
1062
409
  end
1063
410
  end
1064
411
  end
1065
- if @ltype == "/"
1066
- while /[imxoesun]/ =~ peek(0)
1067
- getc
1068
- end
1069
- end
1070
- if subtype
1071
- Token(DLtype2Token[ltype])
1072
- else
1073
- Token(Ltype2Token[ltype])
1074
- end
1075
- ensure
1076
- @ltype = nil
1077
- @quoted = nil
1078
- @lex_state = EXPR_END
412
+ is_first_spaces_of_line = false
413
+ is_first_printable_of_line = false
1079
414
  end
415
+ corresponding_token_depth
1080
416
  end
1081
417
 
1082
- def identify_string_dvar
1083
- begin
1084
- getc
1085
-
1086
- reserve_continue = @continue
1087
- reserve_ltype = @ltype
1088
- reserve_indent = @indent
1089
- reserve_indent_stack = @indent_stack
1090
- reserve_state = @lex_state
1091
- reserve_quoted = @quoted
1092
-
1093
- @ltype = nil
1094
- @quoted = nil
1095
- @indent = 0
1096
- @indent_stack = []
1097
- @lex_state = EXPR_BEG
1098
-
1099
- loop do
1100
- @continue = false
1101
- prompt
1102
- tk = token
1103
- if @ltype or @continue or @indent >= 0
1104
- next
418
+ def check_string_literal
419
+ i = 0
420
+ start_token = []
421
+ end_type = []
422
+ while i < @tokens.size
423
+ t = @tokens[i]
424
+ case t[1]
425
+ when :on_tstring_beg
426
+ start_token << t
427
+ end_type << [:on_tstring_end, :on_label_end]
428
+ when :on_regexp_beg
429
+ start_token << t
430
+ end_type << :on_regexp_end
431
+ when :on_symbeg
432
+ acceptable_single_tokens = %i{on_ident on_const on_op on_cvar on_ivar on_gvar on_kw}
433
+ if (i + 1) < @tokens.size and acceptable_single_tokens.all?{ |t| @tokens[i + 1][1] != t }
434
+ start_token << t
435
+ end_type << :on_tstring_end
1105
436
  end
1106
- break if tk.kind_of?(TkRBRACE)
1107
- end
1108
- ensure
1109
- @continue = reserve_continue
1110
- @ltype = reserve_ltype
1111
- @indent = reserve_indent
1112
- @indent_stack = reserve_indent_stack
1113
- @lex_state = reserve_state
1114
- @quoted = reserve_quoted
1115
- end
437
+ when :on_backtick
438
+ start_token << t
439
+ end_type << :on_tstring_end
440
+ when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg
441
+ start_token << t
442
+ end_type << :on_tstring_end
443
+ when :on_heredoc_beg
444
+ start_token << t
445
+ end_type << :on_heredoc_end
446
+ when *end_type.last
447
+ start_token.pop
448
+ end_type.pop
449
+ end
450
+ i += 1
451
+ end
452
+ start_token.last.nil? ? '' : start_token.last
1116
453
  end
1117
454
 
1118
- def identify_comment
1119
- @ltype = "#"
1120
-
1121
- while ch = getc
1122
- if ch == "\n"
1123
- @ltype = nil
1124
- ungetc
1125
- break
1126
- end
1127
- end
1128
- return Token(TkCOMMENT)
1129
- end
1130
-
1131
- def read_escape
1132
- case ch = getc
1133
- when "\n", "\r", "\f"
1134
- when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #"
1135
- when /[0-7]/
1136
- ungetc ch
1137
- 3.times do
1138
- case ch = getc
1139
- when /[0-7]/
1140
- when nil
1141
- break
1142
- else
1143
- ungetc
1144
- break
1145
- end
1146
- end
1147
-
1148
- when "x"
1149
- 2.times do
1150
- case ch = getc
1151
- when /[0-9a-fA-F]/
1152
- when nil
1153
- break
1154
- else
1155
- ungetc
1156
- break
1157
- end
1158
- end
1159
-
1160
- when "M"
1161
- if (ch = getc) != '-'
1162
- ungetc
1163
- else
1164
- if (ch = getc) == "\\" #"
1165
- read_escape
1166
- end
1167
- end
1168
-
1169
- when "C", "c" #, "^"
1170
- if ch == "C" and (ch = getc) != "-"
1171
- ungetc
1172
- elsif (ch = getc) == "\\" #"
1173
- read_escape
455
+ def process_literal_type
456
+ start_token = check_string_literal
457
+ case start_token[1]
458
+ when :on_tstring_beg
459
+ case start_token[2]
460
+ when ?" then ?"
461
+ when /^%.$/ then ?"
462
+ when /^%Q.$/ then ?"
463
+ when ?' then ?'
464
+ when /^%q.$/ then ?'
465
+ end
466
+ when :on_regexp_beg then ?/
467
+ when :on_symbeg then ?:
468
+ when :on_backtick then ?`
469
+ when :on_qwords_beg then ?]
470
+ when :on_words_beg then ?]
471
+ when :on_qsymbols_beg then ?]
472
+ when :on_symbols_beg then ?]
473
+ when :on_heredoc_beg
474
+ start_token[2] =~ /<<[-~]?(['"`])[_a-zA-Z0-9]+\1/
475
+ case $1
476
+ when ?" then ?"
477
+ when ?' then ?'
478
+ when ?` then ?`
479
+ else ?"
1174
480
  end
1175
481
  else
1176
- # other characters
482
+ nil
1177
483
  end
1178
484
  end
1179
485
  end