ruby_parser 3.14.0 → 3.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ruby_lexer.rb CHANGED
@@ -25,11 +25,10 @@ class RubyLexer
25
25
 
26
26
  HAS_ENC = "".respond_to? :encoding
27
27
 
28
- IDENT_CHAR = if HAS_ENC then
29
- /[\w\u0080-\u{10ffff}]/u
30
- else
31
- /[\w\x80-\xFF]/n
32
- end
28
+ BTOKENS = {
29
+ ".." => :tBDOT2,
30
+ "..." => :tBDOT3,
31
+ }
33
32
 
34
33
  TOKENS = {
35
34
  "!" => :tBANG,
@@ -137,6 +136,10 @@ class RubyLexer
137
136
  ss.eos?
138
137
  end
139
138
 
139
+ def expr_beg?
140
+ lex_state =~ EXPR_BEG
141
+ end
142
+
140
143
  def expr_dot?
141
144
  lex_state =~ EXPR_DOT
142
145
  end
@@ -162,7 +165,7 @@ class RubyLexer
162
165
  indent = func =~ STR_FUNC_INDENT ? "[ \t]*" : nil
163
166
  expand = func =~ STR_FUNC_EXPAND
164
167
  eol = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n"
165
- eos_re = /#{indent}#{Regexp.escape eos}(#{eol}|\z)/
168
+ eos_re = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
166
169
  err_msg = "can't match #{eos_re.inspect} anywhere in "
167
170
 
168
171
  rb_compile_error err_msg if end_of_stream?
@@ -177,10 +180,15 @@ class RubyLexer
177
180
 
178
181
  if expand then
179
182
  case
180
- when scan(/#[$@]/) then
181
- ss.pos -= 1 # FIX omg stupid
183
+ when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
184
+ # TODO: !ISASCII
185
+ # ?! see parser_peek_variable_name
186
+ return :tSTRING_DVAR, matched
187
+ when scan(/#(?=\@\@?[a-zA-Z_])/) then
188
+ # TODO: !ISASCII
182
189
  return :tSTRING_DVAR, matched
183
190
  when scan(/#[{]/) then
191
+ self.command_start = true
184
192
  return :tSTRING_DBEG, matched
185
193
  when scan(/#/) then
186
194
  string_buffer << "#"
@@ -320,6 +328,11 @@ class RubyLexer
320
328
  lpar_beg && lpar_beg == paren_nest
321
329
  end
322
330
 
331
+ def is_local_id id
332
+ # maybe just make this false for now
333
+ self.parser.env[id.to_sym] == :lvar # HACK: this isn't remotely right
334
+ end
335
+
323
336
  def lvar_defined? id
324
337
  # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
325
338
  self.parser.env[id.to_sym] == :lvar
@@ -338,9 +351,9 @@ class RubyLexer
338
351
 
339
352
  if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
340
353
  rb_compile_error "unknown type of %string" if ss.matched_size == 2
341
- c, beg, short_hand = matched, ss.getch, false
354
+ c, beg, short_hand = matched, getch, false
342
355
  else # Short-hand (e.g. %{, %., %!, etc)
343
- c, beg, short_hand = "Q", ss.getch, true
356
+ c, beg, short_hand = "Q", getch, true
344
357
  end
345
358
 
346
359
  if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
@@ -457,7 +470,7 @@ class RubyLexer
457
470
  if text =~ check then
458
471
  content.gsub(ESC) { unescape $1 }
459
472
  else
460
- content.gsub(/\\\\/, "\\").gsub(/\\'/, "'")
473
+ content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
461
474
  end
462
475
  end
463
476
 
@@ -495,16 +508,19 @@ class RubyLexer
495
508
  end
496
509
 
497
510
  def process_brace_close text
498
- # matching compare/parse23.y:8561
499
- cond.lexpop
500
- cmdarg.lexpop
501
-
502
511
  case matched
503
512
  when "}" then
504
513
  self.brace_nest -= 1
505
- self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
506
-
507
514
  return :tSTRING_DEND, matched if brace_nest < 0
515
+ end
516
+
517
+ # matching compare/parse26.y:8099
518
+ cond.pop
519
+ cmdarg.pop
520
+
521
+ case matched
522
+ when "}" then
523
+ self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
508
524
  return :tRCURLY, matched
509
525
  when "]" then
510
526
  self.paren_nest -= 1
@@ -573,6 +589,12 @@ class RubyLexer
573
589
  end
574
590
  end
575
591
 
592
+ def process_dots text
593
+ tokens = ruby27plus? && expr_beg? ? BTOKENS : TOKENS
594
+
595
+ result EXPR_BEG, tokens[text], text
596
+ end
597
+
576
598
  def process_float text
577
599
  rb_compile_error "Invalid numeric format" if text =~ /__/
578
600
 
@@ -605,7 +627,7 @@ class RubyLexer
605
627
  end
606
628
 
607
629
  def process_label text
608
- symbol = possibly_escape_string text, /^"/
630
+ symbol = possibly_escape_string text, /^\"/
609
631
 
610
632
  result EXPR_LAB, :tLABEL, [symbol, self.lineno]
611
633
  end
@@ -619,7 +641,7 @@ class RubyLexer
619
641
  text = text[0..-2]
620
642
  end
621
643
 
622
- result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
644
+ result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
623
645
  end
624
646
 
625
647
  def process_lchevron text
@@ -791,12 +813,22 @@ class RubyLexer
791
813
  c = if scan(/\\/) then
792
814
  self.read_escape
793
815
  else
794
- ss.getch
816
+ getch
795
817
  end
796
818
 
797
819
  result EXPR_END, :tSTRING, c
798
820
  end
799
821
 
822
+ def process_simple_string text
823
+ replacement = text[1..-2].gsub(ESC) {
824
+ unescape($1).b.force_encoding Encoding::UTF_8
825
+ }
826
+
827
+ replacement = replacement.b unless replacement.valid_encoding?
828
+
829
+ result EXPR_END, :tSTRING, replacement
830
+ end
831
+
800
832
  def process_slash text
801
833
  if is_beg? then
802
834
  string STR_REGEXP
@@ -870,16 +902,16 @@ class RubyLexer
870
902
 
871
903
  if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
872
904
  self.lex_strterm = nil
873
- self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_END|EXPR_ENDARG
905
+ self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_LIT
874
906
  end
875
907
 
876
908
  return token
877
909
  end
878
910
 
879
911
  def process_symbol text
880
- symbol = possibly_escape_string text, /^:"/
912
+ symbol = possibly_escape_string text, /^:\"/ # stupid emacs
881
913
 
882
- result EXPR_END|EXPR_ENDARG, :tSYMBOL, symbol
914
+ result EXPR_LIT, :tSYMBOL, symbol
883
915
  end
884
916
 
885
917
  def process_token text
@@ -928,6 +960,8 @@ class RubyLexer
928
960
  EXPR_END
929
961
  end
930
962
 
963
+ tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
964
+
931
965
  if last_state !~ EXPR_DOT|EXPR_FNAME and
932
966
  (tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
933
967
  lvar_defined?(token) then
@@ -951,18 +985,16 @@ class RubyLexer
951
985
  self.command_start = true if lex_state =~ EXPR_BEG
952
986
 
953
987
  case
954
- when keyword.id0 == :kDO then
988
+ when keyword.id0 == :kDO then # parse26.y line 7591
955
989
  case
956
990
  when lambda_beginning? then
957
991
  self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
958
- self.paren_nest -= 1
992
+ self.paren_nest -= 1 # TODO: question this?
959
993
  result lex_state, :kDO_LAMBDA, value
960
994
  when cond.is_in_state then
961
995
  result lex_state, :kDO_COND, value
962
996
  when cmdarg.is_in_state && state != EXPR_CMDARG then
963
997
  result lex_state, :kDO_BLOCK, value
964
- when state =~ EXPR_BEG|EXPR_ENDARG then
965
- result lex_state, :kDO_BLOCK, value
966
998
  else
967
999
  result lex_state, :kDO, value
968
1000
  end
@@ -979,9 +1011,9 @@ class RubyLexer
979
1011
  ss.unscan # put back "_"
980
1012
 
981
1013
  if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
982
- return [RubyLexer::EOF, RubyLexer::EOF]
983
- elsif scan(/\_\w*/) then
984
- return process_token matched
1014
+ [RubyLexer::EOF, RubyLexer::EOF]
1015
+ elsif scan(/#{IDENT_CHAR}+/) then
1016
+ process_token matched
985
1017
  end
986
1018
  end
987
1019
 
@@ -1018,7 +1050,7 @@ class RubyLexer
1018
1050
  when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
1019
1051
  # TODO: force encode everything to UTF-8?
1020
1052
  ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
1021
- when check(/M-\\[\\MCc]/) then
1053
+ when check(/M-\\./) then
1022
1054
  scan(/M-\\/) # eat it
1023
1055
  c = self.read_escape
1024
1056
  c[0] = (c[0].ord | 0x80).chr
@@ -1032,6 +1064,11 @@ class RubyLexer
1032
1064
  c = self.read_escape
1033
1065
  c[0] = (c[0].ord & 0x9f).chr
1034
1066
  c
1067
+ when check(/(C-|c)\\(?!u|\\)/) then
1068
+ scan(/(C-|c)\\/) # eat it
1069
+ c = read_escape
1070
+ c[0] = (c[0].ord & 0x9f).chr
1071
+ c
1035
1072
  when scan(/C-\?|c\?/) then
1036
1073
  127.chr
1037
1074
  when scan(/(C-|c)(.)/) then
@@ -1040,17 +1077,25 @@ class RubyLexer
1040
1077
  c
1041
1078
  when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
1042
1079
  matched
1043
- when scan(/u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/) then
1044
- [ss[1].delete("{}").to_i(16)].pack("U")
1045
- when scan(/u([0-9a-fA-F]{1,3})/) then
1080
+ when scan(/u(\h{4})/) then
1081
+ [ss[1].to_i(16)].pack("U")
1082
+ when scan(/u(\h{1,3})/) then
1046
1083
  rb_compile_error "Invalid escape character syntax"
1084
+ when scan(/u\{(\h+(?:\s+\h+)*)\}/) then
1085
+ ss[1].split.map { |s| s.to_i(16) }.pack("U*")
1047
1086
  when scan(/[McCx0-9]/) || end_of_stream? then
1048
1087
  rb_compile_error("Invalid escape character syntax")
1049
1088
  else
1050
- ss.getch
1089
+ getch
1051
1090
  end.dup
1052
1091
  end
1053
1092
 
1093
+ def getch
1094
+ c = ss.getch
1095
+ c = ss.getch if c == "\r" && ss.peek(1) == "\n"
1096
+ c
1097
+ end
1098
+
1054
1099
  def regx_options # TODO: rewrite / remove
1055
1100
  good, bad = [], []
1056
1101
 
@@ -1106,6 +1151,10 @@ class RubyLexer
1106
1151
  parser.class.version <= 24
1107
1152
  end
1108
1153
 
1154
+ def ruby27plus?
1155
+ parser.class.version >= 27
1156
+ end
1157
+
1109
1158
  def scan re
1110
1159
  ss.scan re
1111
1160
  end
@@ -1177,8 +1226,6 @@ class RubyLexer
1177
1226
  handled = true
1178
1227
 
1179
1228
  case
1180
- when paren_re && scan(paren_re) then
1181
- self.string_nest += 1
1182
1229
  when scan(term_re) then
1183
1230
  if self.string_nest == 0 then
1184
1231
  ss.pos -= 1
@@ -1186,6 +1233,8 @@ class RubyLexer
1186
1233
  else
1187
1234
  self.string_nest -= 1
1188
1235
  end
1236
+ when paren_re && scan(paren_re) then
1237
+ self.string_nest += 1
1189
1238
  when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
1190
1239
  ss.pos -= 1
1191
1240
  break
@@ -1232,9 +1281,9 @@ class RubyLexer
1232
1281
  end
1233
1282
  x = Regexp.escape paren if paren && paren != "\000"
1234
1283
  re = if qwords then
1235
- /[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
1284
+ /[^#{t}#{x}\#\\\s]+|./ # |. to pick up whatever
1236
1285
  else
1237
- /[^#{t}#{x}\#\0\\]+|./
1286
+ /[^#{t}#{x}\#\\]+|./
1238
1287
  end
1239
1288
 
1240
1289
  scan re
@@ -1274,10 +1323,12 @@ class RubyLexer
1274
1323
  s
1275
1324
  when /^[McCx0-9]/ then
1276
1325
  rb_compile_error("Invalid escape character syntax")
1277
- when /u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/ then
1326
+ when /u(\h{4})/ then
1278
1327
  [$1.delete("{}").to_i(16)].pack("U")
1279
- when /u([0-9a-fA-F]{1,3})/ then
1328
+ when /u(\h{1,3})/ then
1280
1329
  rb_compile_error("Invalid escape character syntax")
1330
+ when /u\{(\h+(?:\s+\h+)*)\}/ then
1331
+ $1.split.map { |s| s.to_i(16) }.pack("U*")
1281
1332
  else
1282
1333
  s
1283
1334
  end
@@ -1355,11 +1406,11 @@ class RubyLexer
1355
1406
  # extra fake lex_state names to make things a bit cleaner
1356
1407
 
1357
1408
  EXPR_LAB = EXPR_ARG|EXPR_LABELED
1358
- EXPR_NUM = EXPR_END|EXPR_ENDARG
1409
+ EXPR_LIT = EXPR_END|EXPR_ENDARG
1359
1410
  EXPR_PAR = EXPR_BEG|EXPR_LABEL
1360
1411
  EXPR_PAD = EXPR_BEG|EXPR_LABELED
1361
1412
 
1362
- EXPR_LIT = EXPR_NUM # TODO: migrate to EXPR_LIT
1413
+ EXPR_NUM = EXPR_LIT
1363
1414
 
1364
1415
  expr_names.merge!(EXPR_NONE => "EXPR_NONE",
1365
1416
  EXPR_BEG => "EXPR_BEG",
data/lib/ruby_lexer.rex CHANGED
@@ -6,9 +6,9 @@ class RubyLexer
6
6
 
7
7
  macro
8
8
 
9
- IDENT /^#{IDENT_CHAR}+/o
9
+ IDENT_CHAR /[a-zA-Z0-9_[:^ascii:]]/
10
10
 
11
- ESC /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]{1,4}|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/
11
+ ESC /\\((?>[0-7]{1,3}|x\h{1,2}|M-[^\\]|(C-|c)[^\\]|u\h{1,4}|u\{\h+(?:\s+\h+)*\}|[^0-7xMCc]))/
12
12
  SIMPLE_STRING /((#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*)/o
13
13
  SSTRING /((\\.|[^\'])*)/
14
14
 
@@ -48,7 +48,7 @@ rule
48
48
  | /\![=~]?/ { result :arg_state, TOKENS[text], text }
49
49
 
50
50
  : /\./
51
- | /\.\.\.?/ { result EXPR_BEG, TOKENS[text], text }
51
+ | /\.\.\.?/ process_dots
52
52
  | /\.\d/ { rb_compile_error "no .<digit> floating literal anymore put 0 before dot" }
53
53
  | /\./ { self.lex_state = EXPR_BEG; result EXPR_DOT, :tDOT, "." }
54
54
 
@@ -62,7 +62,7 @@ rule
62
62
  | /\=(?=begin\b)/ { result arg_state, TOKENS[text], text }
63
63
 
64
64
  ruby22_label? /\"#{SIMPLE_STRING}\":/o process_label
65
- /\"(#{SIMPLE_STRING})\"/o { result EXPR_END, :tSTRING, text[1..-2].gsub(ESC) { unescape $1 } }
65
+ /\"(#{SIMPLE_STRING})\"/o process_simple_string
66
66
  /\"/ { string STR_DQUOTE; result nil, :tSTRING_BEG, text }
67
67
 
68
68
  /\@\@?\d/ { rb_compile_error "`#{text}` is not allowed as a variable name" }
@@ -164,13 +164,12 @@ was_label? /\'#{SSTRING}\':?/o process_label_or_string
164
164
  | in_fname? /\$([1-9]\d*)/ process_gvar
165
165
  | /\$([1-9]\d*)/ process_nthref
166
166
  | /\$0/ process_gvar
167
- | /\$[^[:ascii:]]+/ process_gvar
167
+ | /\$#{IDENT_CHAR}+/ process_gvar
168
168
  | /\$\W|\$\z/ process_gvar_oddity
169
- | /\$\w+/ process_gvar
170
169
 
171
170
  /\_/ process_underscore
172
171
 
173
- /#{IDENT}/o process_token
172
+ /#{IDENT_CHAR}+/o process_token
174
173
 
175
174
  /\004|\032|\000|\Z/ { [RubyLexer::EOF, RubyLexer::EOF] }
176
175
 
@@ -1,7 +1,7 @@
1
1
  # encoding: UTF-8
2
2
  #--
3
3
  # This file is automatically generated. Do not modify it.
4
- # Generated by: oedipus_lex version 2.5.1.
4
+ # Generated by: oedipus_lex version 2.5.2.
5
5
  # Source: lib/ruby_lexer.rex
6
6
  #++
7
7
 
@@ -16,8 +16,8 @@ class RubyLexer
16
16
  require 'strscan'
17
17
 
18
18
  # :stopdoc:
19
- IDENT = /^#{IDENT_CHAR}+/o
20
- ESC = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]{1,4}|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/
19
+ IDENT_CHAR = /[a-zA-Z0-9_[:^ascii:]]/
20
+ ESC = /\\((?>[0-7]{1,3}|x\h{1,2}|M-[^\\]|(C-|c)[^\\]|u\h{1,4}|u\{\h+(?:\s+\h+)*\}|[^0-7xMCc]))/
21
21
  SIMPLE_STRING = /((#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*)/o
22
22
  SSTRING = /((\\.|[^\'])*)/
23
23
  INT_DEC = /[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)(ri|r|i)?\b|0d[0-9_]+)(ri|r|i)?/i
@@ -138,7 +138,7 @@ class RubyLexer
138
138
  when ss.match?(/\./) then
139
139
  case
140
140
  when text = ss.scan(/\.\.\.?/) then
141
- action { result EXPR_BEG, TOKENS[text], text }
141
+ process_dots text
142
142
  when ss.skip(/\.\d/) then
143
143
  action { rb_compile_error "no .<digit> floating literal anymore put 0 before dot" }
144
144
  when ss.skip(/\./) then
@@ -160,7 +160,7 @@ class RubyLexer
160
160
  when ruby22_label? && (text = ss.scan(/\"#{SIMPLE_STRING}\":/o)) then
161
161
  process_label text
162
162
  when text = ss.scan(/\"(#{SIMPLE_STRING})\"/o) then
163
- action { result EXPR_END, :tSTRING, text[1..-2].gsub(ESC) { unescape $1 } }
163
+ process_simple_string text
164
164
  when text = ss.scan(/\"/) then
165
165
  action { string STR_DQUOTE; result nil, :tSTRING_BEG, text }
166
166
  when text = ss.scan(/\@\@?\d/) then
@@ -328,16 +328,14 @@ class RubyLexer
328
328
  process_nthref text
329
329
  when text = ss.scan(/\$0/) then
330
330
  process_gvar text
331
- when text = ss.scan(/\$[^[:ascii:]]+/) then
331
+ when text = ss.scan(/\$#{IDENT_CHAR}+/) then
332
332
  process_gvar text
333
333
  when text = ss.scan(/\$\W|\$\z/) then
334
334
  process_gvar_oddity text
335
- when text = ss.scan(/\$\w+/) then
336
- process_gvar text
337
335
  end # group /\$/
338
336
  when text = ss.scan(/\_/) then
339
337
  process_underscore text
340
- when text = ss.scan(/#{IDENT}/o) then
338
+ when text = ss.scan(/#{IDENT_CHAR}+/o) then
341
339
  process_token text
342
340
  when ss.skip(/\004|\032|\000|\Z/) then
343
341
  action { [RubyLexer::EOF, RubyLexer::EOF] }
data/lib/ruby_parser.rb CHANGED
@@ -78,10 +78,14 @@ require "ruby23_parser"
78
78
  require "ruby24_parser"
79
79
  require "ruby25_parser"
80
80
  require "ruby26_parser"
81
+ require "ruby27_parser"
82
+ require "ruby30_parser"
81
83
 
82
84
  class RubyParser # HACK
83
85
  VERSIONS.clear # also a HACK caused by racc namespace issues
84
86
 
87
+ class V30 < ::Ruby30Parser; end
88
+ class V27 < ::Ruby27Parser; end
85
89
  class V26 < ::Ruby26Parser; end
86
90
  class V25 < ::Ruby25Parser; end
87
91
  class V24 < ::Ruby24Parser; end