ruby_parser 3.14.0 → 3.16.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ruby_lexer.rb CHANGED
@@ -25,11 +25,10 @@ class RubyLexer
25
25
 
26
26
  HAS_ENC = "".respond_to? :encoding
27
27
 
28
- IDENT_CHAR = if HAS_ENC then
29
- /[\w\u0080-\u{10ffff}]/u
30
- else
31
- /[\w\x80-\xFF]/n
32
- end
28
+ BTOKENS = {
29
+ ".." => :tBDOT2,
30
+ "..." => :tBDOT3,
31
+ }
33
32
 
34
33
  TOKENS = {
35
34
  "!" => :tBANG,
@@ -137,6 +136,10 @@ class RubyLexer
137
136
  ss.eos?
138
137
  end
139
138
 
139
+ def expr_beg?
140
+ lex_state =~ EXPR_BEG
141
+ end
142
+
140
143
  def expr_dot?
141
144
  lex_state =~ EXPR_DOT
142
145
  end
@@ -162,7 +165,7 @@ class RubyLexer
162
165
  indent = func =~ STR_FUNC_INDENT ? "[ \t]*" : nil
163
166
  expand = func =~ STR_FUNC_EXPAND
164
167
  eol = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n"
165
- eos_re = /#{indent}#{Regexp.escape eos}(#{eol}|\z)/
168
+ eos_re = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
166
169
  err_msg = "can't match #{eos_re.inspect} anywhere in "
167
170
 
168
171
  rb_compile_error err_msg if end_of_stream?
@@ -177,10 +180,15 @@ class RubyLexer
177
180
 
178
181
  if expand then
179
182
  case
180
- when scan(/#[$@]/) then
181
- ss.pos -= 1 # FIX omg stupid
183
+ when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
184
+ # TODO: !ISASCII
185
+ # ?! see parser_peek_variable_name
186
+ return :tSTRING_DVAR, matched
187
+ when scan(/#(?=\@\@?[a-zA-Z_])/) then
188
+ # TODO: !ISASCII
182
189
  return :tSTRING_DVAR, matched
183
190
  when scan(/#[{]/) then
191
+ self.command_start = true
184
192
  return :tSTRING_DBEG, matched
185
193
  when scan(/#/) then
186
194
  string_buffer << "#"
@@ -320,6 +328,11 @@ class RubyLexer
320
328
  lpar_beg && lpar_beg == paren_nest
321
329
  end
322
330
 
331
+ def is_local_id id
332
+ # maybe just make this false for now
333
+ self.parser.env[id.to_sym] == :lvar # HACK: this isn't remotely right
334
+ end
335
+
323
336
  def lvar_defined? id
324
337
  # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
325
338
  self.parser.env[id.to_sym] == :lvar
@@ -338,9 +351,9 @@ class RubyLexer
338
351
 
339
352
  if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
340
353
  rb_compile_error "unknown type of %string" if ss.matched_size == 2
341
- c, beg, short_hand = matched, ss.getch, false
354
+ c, beg, short_hand = matched, getch, false
342
355
  else # Short-hand (e.g. %{, %., %!, etc)
343
- c, beg, short_hand = "Q", ss.getch, true
356
+ c, beg, short_hand = "Q", getch, true
344
357
  end
345
358
 
346
359
  if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
@@ -457,7 +470,7 @@ class RubyLexer
457
470
  if text =~ check then
458
471
  content.gsub(ESC) { unescape $1 }
459
472
  else
460
- content.gsub(/\\\\/, "\\").gsub(/\\'/, "'")
473
+ content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
461
474
  end
462
475
  end
463
476
 
@@ -495,16 +508,19 @@ class RubyLexer
495
508
  end
496
509
 
497
510
  def process_brace_close text
498
- # matching compare/parse23.y:8561
499
- cond.lexpop
500
- cmdarg.lexpop
501
-
502
511
  case matched
503
512
  when "}" then
504
513
  self.brace_nest -= 1
505
- self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
506
-
507
514
  return :tSTRING_DEND, matched if brace_nest < 0
515
+ end
516
+
517
+ # matching compare/parse26.y:8099
518
+ cond.pop
519
+ cmdarg.pop
520
+
521
+ case matched
522
+ when "}" then
523
+ self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
508
524
  return :tRCURLY, matched
509
525
  when "]" then
510
526
  self.paren_nest -= 1
@@ -573,6 +589,12 @@ class RubyLexer
573
589
  end
574
590
  end
575
591
 
592
+ def process_dots text
593
+ tokens = ruby27plus? && expr_beg? ? BTOKENS : TOKENS
594
+
595
+ result EXPR_BEG, tokens[text], text
596
+ end
597
+
576
598
  def process_float text
577
599
  rb_compile_error "Invalid numeric format" if text =~ /__/
578
600
 
@@ -605,7 +627,7 @@ class RubyLexer
605
627
  end
606
628
 
607
629
  def process_label text
608
- symbol = possibly_escape_string text, /^"/
630
+ symbol = possibly_escape_string text, /^\"/
609
631
 
610
632
  result EXPR_LAB, :tLABEL, [symbol, self.lineno]
611
633
  end
@@ -619,7 +641,7 @@ class RubyLexer
619
641
  text = text[0..-2]
620
642
  end
621
643
 
622
- result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
644
+ result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
623
645
  end
624
646
 
625
647
  def process_lchevron text
@@ -791,12 +813,22 @@ class RubyLexer
791
813
  c = if scan(/\\/) then
792
814
  self.read_escape
793
815
  else
794
- ss.getch
816
+ getch
795
817
  end
796
818
 
797
819
  result EXPR_END, :tSTRING, c
798
820
  end
799
821
 
822
+ def process_simple_string text
823
+ replacement = text[1..-2].gsub(ESC) {
824
+ unescape($1).b.force_encoding Encoding::UTF_8
825
+ }
826
+
827
+ replacement = replacement.b unless replacement.valid_encoding?
828
+
829
+ result EXPR_END, :tSTRING, replacement
830
+ end
831
+
800
832
  def process_slash text
801
833
  if is_beg? then
802
834
  string STR_REGEXP
@@ -870,16 +902,16 @@ class RubyLexer
870
902
 
871
903
  if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
872
904
  self.lex_strterm = nil
873
- self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_END|EXPR_ENDARG
905
+ self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_LIT
874
906
  end
875
907
 
876
908
  return token
877
909
  end
878
910
 
879
911
  def process_symbol text
880
- symbol = possibly_escape_string text, /^:"/
912
+ symbol = possibly_escape_string text, /^:\"/ # stupid emacs
881
913
 
882
- result EXPR_END|EXPR_ENDARG, :tSYMBOL, symbol
914
+ result EXPR_LIT, :tSYMBOL, symbol
883
915
  end
884
916
 
885
917
  def process_token text
@@ -928,6 +960,8 @@ class RubyLexer
928
960
  EXPR_END
929
961
  end
930
962
 
963
+ tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
964
+
931
965
  if last_state !~ EXPR_DOT|EXPR_FNAME and
932
966
  (tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
933
967
  lvar_defined?(token) then
@@ -951,18 +985,16 @@ class RubyLexer
951
985
  self.command_start = true if lex_state =~ EXPR_BEG
952
986
 
953
987
  case
954
- when keyword.id0 == :kDO then
988
+ when keyword.id0 == :kDO then # parse26.y line 7591
955
989
  case
956
990
  when lambda_beginning? then
957
991
  self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
958
- self.paren_nest -= 1
992
+ self.paren_nest -= 1 # TODO: question this?
959
993
  result lex_state, :kDO_LAMBDA, value
960
994
  when cond.is_in_state then
961
995
  result lex_state, :kDO_COND, value
962
996
  when cmdarg.is_in_state && state != EXPR_CMDARG then
963
997
  result lex_state, :kDO_BLOCK, value
964
- when state =~ EXPR_BEG|EXPR_ENDARG then
965
- result lex_state, :kDO_BLOCK, value
966
998
  else
967
999
  result lex_state, :kDO, value
968
1000
  end
@@ -979,9 +1011,9 @@ class RubyLexer
979
1011
  ss.unscan # put back "_"
980
1012
 
981
1013
  if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
982
- return [RubyLexer::EOF, RubyLexer::EOF]
983
- elsif scan(/\_\w*/) then
984
- return process_token matched
1014
+ [RubyLexer::EOF, RubyLexer::EOF]
1015
+ elsif scan(/#{IDENT_CHAR}+/) then
1016
+ process_token matched
985
1017
  end
986
1018
  end
987
1019
 
@@ -1018,7 +1050,7 @@ class RubyLexer
1018
1050
  when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
1019
1051
  # TODO: force encode everything to UTF-8?
1020
1052
  ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
1021
- when check(/M-\\[\\MCc]/) then
1053
+ when check(/M-\\./) then
1022
1054
  scan(/M-\\/) # eat it
1023
1055
  c = self.read_escape
1024
1056
  c[0] = (c[0].ord | 0x80).chr
@@ -1032,6 +1064,11 @@ class RubyLexer
1032
1064
  c = self.read_escape
1033
1065
  c[0] = (c[0].ord & 0x9f).chr
1034
1066
  c
1067
+ when check(/(C-|c)\\(?!u|\\)/) then
1068
+ scan(/(C-|c)\\/) # eat it
1069
+ c = read_escape
1070
+ c[0] = (c[0].ord & 0x9f).chr
1071
+ c
1035
1072
  when scan(/C-\?|c\?/) then
1036
1073
  127.chr
1037
1074
  when scan(/(C-|c)(.)/) then
@@ -1040,17 +1077,25 @@ class RubyLexer
1040
1077
  c
1041
1078
  when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
1042
1079
  matched
1043
- when scan(/u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/) then
1044
- [ss[1].delete("{}").to_i(16)].pack("U")
1045
- when scan(/u([0-9a-fA-F]{1,3})/) then
1080
+ when scan(/u(\h{4})/) then
1081
+ [ss[1].to_i(16)].pack("U")
1082
+ when scan(/u(\h{1,3})/) then
1046
1083
  rb_compile_error "Invalid escape character syntax"
1084
+ when scan(/u\{(\h+(?:\s+\h+)*)\}/) then
1085
+ ss[1].split.map { |s| s.to_i(16) }.pack("U*")
1047
1086
  when scan(/[McCx0-9]/) || end_of_stream? then
1048
1087
  rb_compile_error("Invalid escape character syntax")
1049
1088
  else
1050
- ss.getch
1089
+ getch
1051
1090
  end.dup
1052
1091
  end
1053
1092
 
1093
+ def getch
1094
+ c = ss.getch
1095
+ c = ss.getch if c == "\r" && ss.peek(1) == "\n"
1096
+ c
1097
+ end
1098
+
1054
1099
  def regx_options # TODO: rewrite / remove
1055
1100
  good, bad = [], []
1056
1101
 
@@ -1106,6 +1151,10 @@ class RubyLexer
1106
1151
  parser.class.version <= 24
1107
1152
  end
1108
1153
 
1154
+ def ruby27plus?
1155
+ parser.class.version >= 27
1156
+ end
1157
+
1109
1158
  def scan re
1110
1159
  ss.scan re
1111
1160
  end
@@ -1177,8 +1226,6 @@ class RubyLexer
1177
1226
  handled = true
1178
1227
 
1179
1228
  case
1180
- when paren_re && scan(paren_re) then
1181
- self.string_nest += 1
1182
1229
  when scan(term_re) then
1183
1230
  if self.string_nest == 0 then
1184
1231
  ss.pos -= 1
@@ -1186,6 +1233,8 @@ class RubyLexer
1186
1233
  else
1187
1234
  self.string_nest -= 1
1188
1235
  end
1236
+ when paren_re && scan(paren_re) then
1237
+ self.string_nest += 1
1189
1238
  when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
1190
1239
  ss.pos -= 1
1191
1240
  break
@@ -1232,9 +1281,9 @@ class RubyLexer
1232
1281
  end
1233
1282
  x = Regexp.escape paren if paren && paren != "\000"
1234
1283
  re = if qwords then
1235
- /[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
1284
+ /[^#{t}#{x}\#\\\s]+|./ # |. to pick up whatever
1236
1285
  else
1237
- /[^#{t}#{x}\#\0\\]+|./
1286
+ /[^#{t}#{x}\#\\]+|./
1238
1287
  end
1239
1288
 
1240
1289
  scan re
@@ -1274,10 +1323,12 @@ class RubyLexer
1274
1323
  s
1275
1324
  when /^[McCx0-9]/ then
1276
1325
  rb_compile_error("Invalid escape character syntax")
1277
- when /u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/ then
1326
+ when /u(\h{4})/ then
1278
1327
  [$1.delete("{}").to_i(16)].pack("U")
1279
- when /u([0-9a-fA-F]{1,3})/ then
1328
+ when /u(\h{1,3})/ then
1280
1329
  rb_compile_error("Invalid escape character syntax")
1330
+ when /u\{(\h+(?:\s+\h+)*)\}/ then
1331
+ $1.split.map { |s| s.to_i(16) }.pack("U*")
1281
1332
  else
1282
1333
  s
1283
1334
  end
@@ -1355,11 +1406,11 @@ class RubyLexer
1355
1406
  # extra fake lex_state names to make things a bit cleaner
1356
1407
 
1357
1408
  EXPR_LAB = EXPR_ARG|EXPR_LABELED
1358
- EXPR_NUM = EXPR_END|EXPR_ENDARG
1409
+ EXPR_LIT = EXPR_END|EXPR_ENDARG
1359
1410
  EXPR_PAR = EXPR_BEG|EXPR_LABEL
1360
1411
  EXPR_PAD = EXPR_BEG|EXPR_LABELED
1361
1412
 
1362
- EXPR_LIT = EXPR_NUM # TODO: migrate to EXPR_LIT
1413
+ EXPR_NUM = EXPR_LIT
1363
1414
 
1364
1415
  expr_names.merge!(EXPR_NONE => "EXPR_NONE",
1365
1416
  EXPR_BEG => "EXPR_BEG",
data/lib/ruby_lexer.rex CHANGED
@@ -6,9 +6,9 @@ class RubyLexer
6
6
 
7
7
  macro
8
8
 
9
- IDENT /^#{IDENT_CHAR}+/o
9
+ IDENT_CHAR /[a-zA-Z0-9_[:^ascii:]]/
10
10
 
11
- ESC /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]{1,4}|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/
11
+ ESC /\\((?>[0-7]{1,3}|x\h{1,2}|M-[^\\]|(C-|c)[^\\]|u\h{1,4}|u\{\h+(?:\s+\h+)*\}|[^0-7xMCc]))/
12
12
  SIMPLE_STRING /((#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*)/o
13
13
  SSTRING /((\\.|[^\'])*)/
14
14
 
@@ -48,7 +48,7 @@ rule
48
48
  | /\![=~]?/ { result :arg_state, TOKENS[text], text }
49
49
 
50
50
  : /\./
51
- | /\.\.\.?/ { result EXPR_BEG, TOKENS[text], text }
51
+ | /\.\.\.?/ process_dots
52
52
  | /\.\d/ { rb_compile_error "no .<digit> floating literal anymore put 0 before dot" }
53
53
  | /\./ { self.lex_state = EXPR_BEG; result EXPR_DOT, :tDOT, "." }
54
54
 
@@ -62,7 +62,7 @@ rule
62
62
  | /\=(?=begin\b)/ { result arg_state, TOKENS[text], text }
63
63
 
64
64
  ruby22_label? /\"#{SIMPLE_STRING}\":/o process_label
65
- /\"(#{SIMPLE_STRING})\"/o { result EXPR_END, :tSTRING, text[1..-2].gsub(ESC) { unescape $1 } }
65
+ /\"(#{SIMPLE_STRING})\"/o process_simple_string
66
66
  /\"/ { string STR_DQUOTE; result nil, :tSTRING_BEG, text }
67
67
 
68
68
  /\@\@?\d/ { rb_compile_error "`#{text}` is not allowed as a variable name" }
@@ -164,13 +164,12 @@ was_label? /\'#{SSTRING}\':?/o process_label_or_string
164
164
  | in_fname? /\$([1-9]\d*)/ process_gvar
165
165
  | /\$([1-9]\d*)/ process_nthref
166
166
  | /\$0/ process_gvar
167
- | /\$[^[:ascii:]]+/ process_gvar
167
+ | /\$#{IDENT_CHAR}+/ process_gvar
168
168
  | /\$\W|\$\z/ process_gvar_oddity
169
- | /\$\w+/ process_gvar
170
169
 
171
170
  /\_/ process_underscore
172
171
 
173
- /#{IDENT}/o process_token
172
+ /#{IDENT_CHAR}+/o process_token
174
173
 
175
174
  /\004|\032|\000|\Z/ { [RubyLexer::EOF, RubyLexer::EOF] }
176
175
 
@@ -1,7 +1,7 @@
1
1
  # encoding: UTF-8
2
2
  #--
3
3
  # This file is automatically generated. Do not modify it.
4
- # Generated by: oedipus_lex version 2.5.1.
4
+ # Generated by: oedipus_lex version 2.5.2.
5
5
  # Source: lib/ruby_lexer.rex
6
6
  #++
7
7
 
@@ -16,8 +16,8 @@ class RubyLexer
16
16
  require 'strscan'
17
17
 
18
18
  # :stopdoc:
19
- IDENT = /^#{IDENT_CHAR}+/o
20
- ESC = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]{1,4}|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/
19
+ IDENT_CHAR = /[a-zA-Z0-9_[:^ascii:]]/
20
+ ESC = /\\((?>[0-7]{1,3}|x\h{1,2}|M-[^\\]|(C-|c)[^\\]|u\h{1,4}|u\{\h+(?:\s+\h+)*\}|[^0-7xMCc]))/
21
21
  SIMPLE_STRING = /((#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*)/o
22
22
  SSTRING = /((\\.|[^\'])*)/
23
23
  INT_DEC = /[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)(ri|r|i)?\b|0d[0-9_]+)(ri|r|i)?/i
@@ -138,7 +138,7 @@ class RubyLexer
138
138
  when ss.match?(/\./) then
139
139
  case
140
140
  when text = ss.scan(/\.\.\.?/) then
141
- action { result EXPR_BEG, TOKENS[text], text }
141
+ process_dots text
142
142
  when ss.skip(/\.\d/) then
143
143
  action { rb_compile_error "no .<digit> floating literal anymore put 0 before dot" }
144
144
  when ss.skip(/\./) then
@@ -160,7 +160,7 @@ class RubyLexer
160
160
  when ruby22_label? && (text = ss.scan(/\"#{SIMPLE_STRING}\":/o)) then
161
161
  process_label text
162
162
  when text = ss.scan(/\"(#{SIMPLE_STRING})\"/o) then
163
- action { result EXPR_END, :tSTRING, text[1..-2].gsub(ESC) { unescape $1 } }
163
+ process_simple_string text
164
164
  when text = ss.scan(/\"/) then
165
165
  action { string STR_DQUOTE; result nil, :tSTRING_BEG, text }
166
166
  when text = ss.scan(/\@\@?\d/) then
@@ -328,16 +328,14 @@ class RubyLexer
328
328
  process_nthref text
329
329
  when text = ss.scan(/\$0/) then
330
330
  process_gvar text
331
- when text = ss.scan(/\$[^[:ascii:]]+/) then
331
+ when text = ss.scan(/\$#{IDENT_CHAR}+/) then
332
332
  process_gvar text
333
333
  when text = ss.scan(/\$\W|\$\z/) then
334
334
  process_gvar_oddity text
335
- when text = ss.scan(/\$\w+/) then
336
- process_gvar text
337
335
  end # group /\$/
338
336
  when text = ss.scan(/\_/) then
339
337
  process_underscore text
340
- when text = ss.scan(/#{IDENT}/o) then
338
+ when text = ss.scan(/#{IDENT_CHAR}+/o) then
341
339
  process_token text
342
340
  when ss.skip(/\004|\032|\000|\Z/) then
343
341
  action { [RubyLexer::EOF, RubyLexer::EOF] }
data/lib/ruby_parser.rb CHANGED
@@ -78,10 +78,14 @@ require "ruby23_parser"
78
78
  require "ruby24_parser"
79
79
  require "ruby25_parser"
80
80
  require "ruby26_parser"
81
+ require "ruby27_parser"
82
+ require "ruby30_parser"
81
83
 
82
84
  class RubyParser # HACK
83
85
  VERSIONS.clear # also a HACK caused by racc namespace issues
84
86
 
87
+ class V30 < ::Ruby30Parser; end
88
+ class V27 < ::Ruby27Parser; end
85
89
  class V26 < ::Ruby26Parser; end
86
90
  class V25 < ::Ruby25Parser; end
87
91
  class V24 < ::Ruby24Parser; end