ruby_parser 3.12.0 → 3.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/History.rdoc +47 -0
- data/Manifest.txt +5 -4
- data/Rakefile +41 -28
- data/compare/normalize.rb +29 -2
- data/debugging.md +18 -0
- data/lib/rp_extensions.rb +0 -7
- data/lib/ruby20_parser.rb +3689 -3502
- data/lib/ruby20_parser.y +284 -201
- data/lib/ruby21_parser.rb +3755 -3570
- data/lib/ruby21_parser.y +281 -197
- data/lib/ruby22_parser.rb +3780 -3600
- data/lib/ruby22_parser.y +281 -202
- data/lib/ruby23_parser.rb +3755 -3591
- data/lib/ruby23_parser.y +282 -203
- data/lib/ruby24_parser.rb +3755 -3591
- data/lib/ruby24_parser.y +282 -203
- data/lib/ruby25_parser.rb +3754 -3591
- data/lib/ruby25_parser.y +282 -203
- data/lib/ruby26_parser.rb +6999 -0
- data/lib/{ruby19_parser.y → ruby26_parser.y} +658 -305
- data/lib/ruby_lexer.rb +116 -118
- data/lib/ruby_lexer.rex +10 -8
- data/lib/ruby_lexer.rex.rb +8 -8
- data/lib/ruby_parser.rb +5 -7
- data/lib/ruby_parser.yy +308 -218
- data/lib/ruby_parser_extras.rb +88 -106
- data/test/test_ruby_lexer.rb +68 -121
- data/test/test_ruby_parser.rb +173 -277
- data/tools/munge.rb +216 -0
- data/tools/ripper.rb +23 -0
- metadata +18 -17
- metadata.gz.sig +1 -1
- data/lib/ruby18_parser.rb +0 -5793
- data/lib/ruby18_parser.y +0 -1908
- data/lib/ruby19_parser.rb +0 -6185
data/lib/ruby_lexer.rb
CHANGED
@@ -34,6 +34,10 @@ class RubyLexer
|
|
34
34
|
STR_SSYM = STR_FUNC_SYMBOL
|
35
35
|
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
36
36
|
|
37
|
+
EXPR_BEG_ANY = [:expr_beg, :expr_mid, :expr_class ]
|
38
|
+
EXPR_ARG_ANY = [:expr_arg, :expr_cmdarg, ]
|
39
|
+
EXPR_END_ANY = [:expr_end, :expr_endarg, :expr_endfn]
|
40
|
+
|
37
41
|
ESCAPES = {
|
38
42
|
"a" => "\007",
|
39
43
|
"b" => "\010",
|
@@ -77,7 +81,7 @@ class RubyLexer
|
|
77
81
|
attr_accessor :brace_nest
|
78
82
|
attr_accessor :cmdarg
|
79
83
|
attr_accessor :command_start
|
80
|
-
attr_accessor :
|
84
|
+
attr_accessor :cmd_state # temporary--ivar to avoid passing everywhere
|
81
85
|
attr_accessor :last_state
|
82
86
|
attr_accessor :cond
|
83
87
|
attr_accessor :extra_lineno
|
@@ -109,20 +113,13 @@ class RubyLexer
|
|
109
113
|
# Last token read via next_token.
|
110
114
|
attr_accessor :token
|
111
115
|
|
112
|
-
##
|
113
|
-
# What version of ruby to parse. 18 and 19 are the only valid values
|
114
|
-
# currently supported.
|
115
|
-
|
116
|
-
attr_accessor :version
|
117
|
-
|
118
116
|
attr_writer :comments
|
119
117
|
|
120
|
-
def initialize
|
121
|
-
self.version = v
|
118
|
+
def initialize _ = nil
|
122
119
|
@lex_state = :expr_none
|
123
120
|
|
124
|
-
self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
|
125
121
|
self.cond = RubyParserStuff::StackState.new(:cond, $DEBUG)
|
122
|
+
self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
|
126
123
|
|
127
124
|
reset
|
128
125
|
end
|
@@ -343,23 +340,30 @@ class RubyLexer
|
|
343
340
|
end
|
344
341
|
|
345
342
|
def is_arg?
|
346
|
-
in_lex_state?
|
343
|
+
in_lex_state?(*EXPR_ARG_ANY)
|
347
344
|
end
|
348
345
|
|
349
346
|
def is_beg?
|
350
|
-
in_lex_state?
|
347
|
+
# TODO: in_lex_state?(*EXPR_BEG_ANY) || lex_state == [:expr_arg, :expr_labeled]
|
348
|
+
in_lex_state?(*EXPR_BEG_ANY, :expr_value, :expr_labeled)
|
351
349
|
end
|
352
350
|
|
353
351
|
def is_end?
|
354
|
-
in_lex_state?
|
352
|
+
in_lex_state?(*EXPR_END_ANY)
|
355
353
|
end
|
356
354
|
|
355
|
+
def lvar_defined? id
|
356
|
+
# TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
|
357
|
+
self.parser.env[id.to_sym] == :lvar
|
358
|
+
end
|
359
|
+
|
360
|
+
|
357
361
|
def ruby22_label?
|
358
362
|
ruby22plus? and is_label_possible?
|
359
363
|
end
|
360
364
|
|
361
365
|
def is_label_possible?
|
362
|
-
(in_lex_state?(:expr_beg, :expr_endfn) && !
|
366
|
+
(in_lex_state?(:expr_beg, :expr_endfn) && !cmd_state) || is_arg?
|
363
367
|
end
|
364
368
|
|
365
369
|
def is_label_suffix?
|
@@ -370,6 +374,10 @@ class RubyLexer
|
|
370
374
|
is_arg? and space_seen and c !~ /\s/
|
371
375
|
end
|
372
376
|
|
377
|
+
def lambda_beginning?
|
378
|
+
lpar_beg && lpar_beg == paren_nest
|
379
|
+
end
|
380
|
+
|
373
381
|
def matched
|
374
382
|
ss.matched
|
375
383
|
end
|
@@ -411,20 +419,17 @@ class RubyLexer
|
|
411
419
|
nil # TODO
|
412
420
|
end
|
413
421
|
|
414
|
-
def
|
422
|
+
def process_brace_close text
|
423
|
+
# matching compare/parse23.y:8561
|
415
424
|
cond.lexpop
|
416
425
|
cmdarg.lexpop
|
417
426
|
|
418
427
|
case matched
|
419
428
|
when "}" then
|
420
429
|
self.brace_nest -= 1
|
421
|
-
self.lex_state = :expr_endarg
|
422
|
-
|
423
|
-
# TODO
|
424
|
-
# if (c == '}') {
|
425
|
-
# if (!brace_nest--) c = tSTRING_DEND;
|
426
|
-
# }
|
430
|
+
self.lex_state = :expr_endarg # TODO: :expr_end ? Look at 2.6
|
427
431
|
|
432
|
+
return :tSTRING_DEND, matched if brace_nest < 0
|
428
433
|
return :tRCURLY, matched
|
429
434
|
when "]" then
|
430
435
|
self.paren_nest -= 1
|
@@ -463,24 +468,30 @@ class RubyLexer
|
|
463
468
|
end
|
464
469
|
end
|
465
470
|
|
466
|
-
def
|
471
|
+
def process_brace_open text
|
472
|
+
# matching compare/parse23.y:8694
|
467
473
|
self.brace_nest += 1
|
468
|
-
|
474
|
+
|
475
|
+
if lambda_beginning? then
|
469
476
|
self.lpar_beg = nil
|
470
|
-
self.paren_nest -= 1
|
477
|
+
self.paren_nest -= 1 # close arg list when lambda opens body
|
471
478
|
|
472
479
|
return expr_result(:tLAMBEG, "{")
|
473
480
|
end
|
474
481
|
|
475
|
-
token =
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
+
token = case lex_state
|
483
|
+
when :expr_labeled then
|
484
|
+
:tLBRACE # hash
|
485
|
+
when *EXPR_ARG_ANY, :expr_end, :expr_endfn then
|
486
|
+
:tLCURLY # block (primary)
|
487
|
+
when :expr_endarg
|
488
|
+
:tLBRACE_ARG # block (expr)
|
489
|
+
else
|
490
|
+
:tLBRACE # hash
|
491
|
+
end
|
482
492
|
|
483
|
-
self.
|
493
|
+
# TODO: self.lex_state |= :expr_label if token != :tLBRACE_ARG
|
494
|
+
self.command_start = true if token != :tLBRACE
|
484
495
|
|
485
496
|
return expr_result(token, "{")
|
486
497
|
end
|
@@ -519,12 +530,19 @@ class RubyLexer
|
|
519
530
|
def process_lchevron text
|
520
531
|
if (!in_lex_state?(:expr_dot, :expr_class) &&
|
521
532
|
!is_end? &&
|
522
|
-
(!is_arg? || space_seen)) then
|
533
|
+
(!is_arg? || space_seen)) then # TODO: || in_state(:expr_labeled)
|
523
534
|
tok = self.heredoc_identifier
|
524
535
|
return tok if tok
|
525
536
|
end
|
526
537
|
|
527
|
-
|
538
|
+
if in_arg_state? then
|
539
|
+
self.lex_state = :expr_arg
|
540
|
+
else
|
541
|
+
self.command_start = true if lex_state == :expr_class
|
542
|
+
self.lex_state = :expr_beg
|
543
|
+
end
|
544
|
+
|
545
|
+
return result(lex_state, :tLSHFT, "\<\<")
|
528
546
|
end
|
529
547
|
|
530
548
|
def process_newline_or_comment text
|
@@ -534,6 +552,7 @@ class RubyLexer
|
|
534
552
|
if c == '#' then
|
535
553
|
ss.pos -= 1
|
536
554
|
|
555
|
+
# TODO: handle magic comments
|
537
556
|
while scan(/\s*\#.*(\n+|\z)/) do
|
538
557
|
hit = true
|
539
558
|
self.lineno += matched.lines.to_a.size
|
@@ -548,8 +567,21 @@ class RubyLexer
|
|
548
567
|
# Replace a string of newlines with a single one
|
549
568
|
self.lineno += matched.lines.to_a.size if scan(/\n+/)
|
550
569
|
|
551
|
-
|
552
|
-
|
570
|
+
# TODO: remove :expr_value -- audit all uses of it
|
571
|
+
c = in_lex_state?(:expr_beg, :expr_value, :expr_class,
|
572
|
+
:expr_fname, :expr_dot) && !in_lex_state?(:expr_labeled)
|
573
|
+
|
574
|
+
# TODO: figure out what token_seen is for
|
575
|
+
# TODO: if c || self.lex_state == [:expr_beg, :expr_labeled] then
|
576
|
+
if c || self.lex_state == :expr_labeled then
|
577
|
+
# ignore if !fallthrough?
|
578
|
+
if !c && parser.in_kwarg then
|
579
|
+
# normal newline
|
580
|
+
return result(:expr_beg, :tNL, nil)
|
581
|
+
else
|
582
|
+
return # skip
|
583
|
+
end
|
584
|
+
end
|
553
585
|
|
554
586
|
if scan(/([\ \t\r\f\v]*)(\.|&)/) then
|
555
587
|
self.space_seen = true unless ss[1].empty?
|
@@ -569,11 +601,7 @@ class RubyLexer
|
|
569
601
|
end
|
570
602
|
|
571
603
|
def process_paren text
|
572
|
-
token =
|
573
|
-
process_paren18
|
574
|
-
else
|
575
|
-
process_paren19
|
576
|
-
end
|
604
|
+
token = process_paren19
|
577
605
|
|
578
606
|
self.paren_nest += 1
|
579
607
|
|
@@ -581,25 +609,6 @@ class RubyLexer
|
|
581
609
|
return expr_result(token, "(")
|
582
610
|
end
|
583
611
|
|
584
|
-
def process_paren18
|
585
|
-
self.command_start = true
|
586
|
-
token = :tLPAREN2
|
587
|
-
|
588
|
-
if in_lex_state? :expr_beg, :expr_mid then
|
589
|
-
token = :tLPAREN
|
590
|
-
elsif space_seen then
|
591
|
-
if in_lex_state? :expr_cmdarg then
|
592
|
-
token = :tLPAREN_ARG
|
593
|
-
elsif in_lex_state? :expr_arg then
|
594
|
-
warning "don't put space before argument parentheses"
|
595
|
-
end
|
596
|
-
else
|
597
|
-
# not a ternary -- do nothing?
|
598
|
-
end
|
599
|
-
|
600
|
-
token
|
601
|
-
end
|
602
|
-
|
603
612
|
def process_paren19
|
604
613
|
if is_beg? then
|
605
614
|
:tLPAREN
|
@@ -654,8 +663,7 @@ class RubyLexer
|
|
654
663
|
|
655
664
|
def process_questionmark text
|
656
665
|
if is_end? then
|
657
|
-
|
658
|
-
return result(state, :tEH, "?")
|
666
|
+
return result(:expr_value, :tEH, "?")
|
659
667
|
end
|
660
668
|
|
661
669
|
if end_of_stream? then
|
@@ -677,8 +685,7 @@ class RubyLexer
|
|
677
685
|
end
|
678
686
|
|
679
687
|
# ternary
|
680
|
-
|
681
|
-
return result(state, :tEH, "?")
|
688
|
+
return result(:expr_value, :tEH, "?")
|
682
689
|
elsif check(/\w(?=\w)/) then # ternary, also
|
683
690
|
return result(:expr_beg, :tEH, "?")
|
684
691
|
end
|
@@ -689,11 +696,7 @@ class RubyLexer
|
|
689
696
|
ss.getch
|
690
697
|
end
|
691
698
|
|
692
|
-
|
693
|
-
return result(:expr_end, :tINTEGER, c[0].ord & 0xff)
|
694
|
-
else
|
695
|
-
return result(:expr_end, :tSTRING, c)
|
696
|
-
end
|
699
|
+
return result(:expr_end, :tSTRING, c)
|
697
700
|
end
|
698
701
|
|
699
702
|
def process_slash text
|
@@ -742,7 +745,9 @@ class RubyLexer
|
|
742
745
|
token = :tLBRACK2
|
743
746
|
end
|
744
747
|
|
745
|
-
|
748
|
+
# TODO: this is done by expr_result except "|EXPR_LABEL")
|
749
|
+
# SET_LEX_STATE(EXPR_BEG|EXPR_LABEL);
|
750
|
+
expr_result token, "["
|
746
751
|
end
|
747
752
|
|
748
753
|
def possibly_escape_string text, check
|
@@ -758,9 +763,6 @@ class RubyLexer
|
|
758
763
|
def process_symbol text
|
759
764
|
symbol = possibly_escape_string text, /^:"/
|
760
765
|
|
761
|
-
rb_compile_error "symbol cannot contain '\\0'" if
|
762
|
-
ruby18 && symbol =~ /\0/
|
763
|
-
|
764
766
|
return result(:expr_end, :tSYMBOL, symbol)
|
765
767
|
end
|
766
768
|
|
@@ -784,11 +786,14 @@ class RubyLexer
|
|
784
786
|
def process_label text
|
785
787
|
symbol = possibly_escape_string text, /^"/
|
786
788
|
|
787
|
-
result(:
|
789
|
+
result(:expr_labeled, :tLABEL, [symbol, self.lineno]) # TODO: expr_arg|expr_labeled
|
788
790
|
end
|
789
791
|
|
790
792
|
def process_token text
|
793
|
+
# matching: parse_ident in compare/parse23.y:7989
|
791
794
|
# TODO: make this always return [token, lineno]
|
795
|
+
self.last_state = lex_state
|
796
|
+
|
792
797
|
token = self.token = text
|
793
798
|
token << matched if scan(/[\!\?](?!=)/)
|
794
799
|
|
@@ -807,36 +812,33 @@ class RubyLexer
|
|
807
812
|
:tIDENTIFIER
|
808
813
|
end
|
809
814
|
|
810
|
-
if
|
815
|
+
if is_label_possible? and is_label_suffix? then
|
811
816
|
scan(/:/)
|
812
|
-
|
817
|
+
# TODO: :expr_arg|:expr_labeled
|
818
|
+
return result :expr_labeled, :tLABEL, [token, self.lineno]
|
813
819
|
end
|
814
820
|
|
821
|
+
# TODO: mb == ENC_CODERANGE_7BIT && !in_lex_state?(:expr_dot)
|
815
822
|
unless in_lex_state? :expr_dot then
|
816
823
|
# See if it is a reserved word.
|
817
|
-
keyword =
|
818
|
-
RubyParserStuff::Keyword.keyword18 token
|
819
|
-
else
|
820
|
-
RubyParserStuff::Keyword.keyword19 token
|
821
|
-
end
|
824
|
+
keyword = RubyParserStuff::Keyword.keyword token
|
822
825
|
|
823
826
|
return process_token_keyword keyword if keyword
|
824
827
|
end # unless in_lex_state? :expr_dot
|
825
828
|
|
826
|
-
#
|
827
|
-
# if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
|
828
|
-
|
829
|
+
# matching: compare/parse23.y:8079
|
829
830
|
state = if is_beg? or is_arg? or in_lex_state? :expr_dot then
|
830
|
-
|
831
|
-
elsif
|
831
|
+
cmd_state ? :expr_cmdarg : :expr_arg
|
832
|
+
elsif in_lex_state? :expr_fname then
|
832
833
|
:expr_endfn
|
833
834
|
else
|
834
835
|
:expr_end
|
835
836
|
end
|
836
837
|
|
837
838
|
if not [:expr_dot, :expr_fname].include? last_state and
|
838
|
-
|
839
|
-
|
839
|
+
(tok_id == :tIDENTIFIER) and # not :expr_fname, not attrasgn
|
840
|
+
lvar_defined?(token) then
|
841
|
+
state = :expr_end # TODO: EXPR_END|EXPR_LABEL
|
840
842
|
end
|
841
843
|
|
842
844
|
token.lineno = self.lineno # yes, on a string. I know... I know...
|
@@ -845,38 +847,38 @@ class RubyLexer
|
|
845
847
|
end
|
846
848
|
|
847
849
|
def process_token_keyword keyword
|
848
|
-
|
850
|
+
# matching MIDDLE of parse_ident in compare/parse23.y:8046
|
851
|
+
state = lex_state
|
852
|
+
self.lex_state = keyword.state
|
849
853
|
|
850
854
|
value = [token, self.lineno]
|
851
855
|
|
852
|
-
|
856
|
+
return result(lex_state, keyword.id0, value) if state == :expr_fname
|
857
|
+
|
858
|
+
self.command_start = true if lex_state == :expr_beg
|
853
859
|
|
854
860
|
case
|
855
|
-
when lex_state == :expr_fname then
|
856
|
-
result(state, keyword.id0, keyword.name)
|
857
861
|
when keyword.id0 == :kDO then
|
858
862
|
case
|
859
|
-
when
|
860
|
-
self.lpar_beg = nil
|
863
|
+
when lambda_beginning? then
|
864
|
+
self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
|
861
865
|
self.paren_nest -= 1
|
862
|
-
|
866
|
+
result(lex_state, :kDO_LAMBDA, value)
|
863
867
|
when cond.is_in_state then
|
864
|
-
result(
|
865
|
-
when cmdarg.is_in_state &&
|
866
|
-
result(
|
867
|
-
when
|
868
|
-
result(
|
869
|
-
when lex_state == :expr_end # eg: a -> do end do end
|
870
|
-
result(state, :kDO_BLOCK, value)
|
868
|
+
result(lex_state, :kDO_COND, value)
|
869
|
+
when cmdarg.is_in_state && state != :expr_cmdarg then
|
870
|
+
result(lex_state, :kDO_BLOCK, value)
|
871
|
+
when [:expr_beg, :expr_endarg].include?(state) then
|
872
|
+
result(lex_state, :kDO_BLOCK, value)
|
871
873
|
else
|
872
|
-
result(
|
874
|
+
result(lex_state, :kDO, value)
|
873
875
|
end
|
874
|
-
when
|
875
|
-
result(
|
876
|
+
when [:expr_beg, :expr_labeled].include?(state) then
|
877
|
+
result(lex_state, keyword.id0, value)
|
876
878
|
when keyword.id0 != keyword.id1 then
|
877
|
-
result(:expr_beg, keyword.id1, value)
|
879
|
+
result(:expr_beg, keyword.id1, value) # TODO: :expr_beg|:expr_label
|
878
880
|
else
|
879
|
-
result(
|
881
|
+
result(lex_state, keyword.id1, value)
|
880
882
|
end
|
881
883
|
end
|
882
884
|
|
@@ -982,8 +984,8 @@ class RubyLexer
|
|
982
984
|
self.token = nil
|
983
985
|
self.extra_lineno = 0
|
984
986
|
|
985
|
-
self.cmdarg.reset
|
986
987
|
self.cond.reset
|
988
|
+
self.cmdarg.reset
|
987
989
|
end
|
988
990
|
|
989
991
|
def result lex_state, token, text # :nodoc:
|
@@ -992,10 +994,6 @@ class RubyLexer
|
|
992
994
|
[token, text]
|
993
995
|
end
|
994
996
|
|
995
|
-
def ruby18
|
996
|
-
RubyParser::V18 === parser
|
997
|
-
end
|
998
|
-
|
999
997
|
def scan re
|
1000
998
|
ss.scan re
|
1001
999
|
end
|
@@ -1133,11 +1131,7 @@ class RubyLexer
|
|
1133
1131
|
t = Regexp.escape term
|
1134
1132
|
x = Regexp.escape(paren) if paren && paren != "\000"
|
1135
1133
|
re = if qwords then
|
1136
|
-
|
1137
|
-
/[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
|
1138
|
-
else
|
1139
|
-
/[^#{t}#{x}\#\0\\\s\v]+|./ # argh. 1.8's \s doesn't pick up \v
|
1140
|
-
end
|
1134
|
+
/[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
|
1141
1135
|
else
|
1142
1136
|
/[^#{t}#{x}\#\0\\]+|./
|
1143
1137
|
end
|
@@ -1201,6 +1195,7 @@ class RubyLexer
|
|
1201
1195
|
end
|
1202
1196
|
|
1203
1197
|
def process_string # TODO: rewrite / remove
|
1198
|
+
# matches top of parser_yylex in compare/parse23.y:8113
|
1204
1199
|
token = if lex_strterm[0] == :heredoc then
|
1205
1200
|
self.heredoc lex_strterm
|
1206
1201
|
else
|
@@ -1209,6 +1204,7 @@ class RubyLexer
|
|
1209
1204
|
|
1210
1205
|
token_type, c = token
|
1211
1206
|
|
1207
|
+
# matches parser_string_term
|
1212
1208
|
if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
|
1213
1209
|
if (([:expr_beg, :expr_endfn].include?(lex_state) &&
|
1214
1210
|
!cond.is_in_state) || is_arg?) &&
|
@@ -1220,7 +1216,8 @@ class RubyLexer
|
|
1220
1216
|
|
1221
1217
|
if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
|
1222
1218
|
self.lex_strterm = nil
|
1223
|
-
|
1219
|
+
# TODO: :expr_beg|:expr_label
|
1220
|
+
self.lex_state = (token_type == :tLABEL_END) ? :expr_label : :expr_end
|
1224
1221
|
end
|
1225
1222
|
|
1226
1223
|
return token
|
@@ -1326,6 +1323,7 @@ class RubyLexer
|
|
1326
1323
|
# TODO: !ISASCII
|
1327
1324
|
return :tSTRING_DVAR, nil
|
1328
1325
|
when scan(/#[{]/) then
|
1326
|
+
self.command_start = true
|
1329
1327
|
return :tSTRING_DBEG, nil
|
1330
1328
|
when scan(/#/) then
|
1331
1329
|
string_buffer << '#'
|