ruby_parser 3.14.0 → 3.16.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ruby_parser.yy CHANGED
@@ -14,6 +14,10 @@ class Ruby24Parser
14
14
  class Ruby25Parser
15
15
  #elif V == 26
16
16
  class Ruby26Parser
17
+ #elif V == 27
18
+ class Ruby27Parser
19
+ #elif V == 30
20
+ class Ruby30Parser
17
21
  #else
18
22
  fail "version not specified or supported on code generation"
19
23
  #endif
@@ -44,6 +48,9 @@ token kCLASS kMODULE kDEF kUNDEF kBEGIN kRESCUE kENSURE kEND kIF kUNLESS
44
48
  #if V >= 23
45
49
  tLONELY
46
50
  #endif
51
+ #if V >= 26
52
+ tBDOT2 tBDOT3
53
+ #endif
47
54
 
48
55
  preclow
49
56
  nonassoc tLOWEST
@@ -55,7 +62,7 @@ preclow
55
62
  right tEQL tOP_ASGN
56
63
  left kRESCUE_MOD
57
64
  right tEH tCOLON
58
- nonassoc tDOT2 tDOT3
65
+ nonassoc tDOT2 tDOT3 tBDOT2 tBDOT3
59
66
  left tOROP
60
67
  left tANDOP
61
68
  nonassoc tCMP tEQ tEQQ tNEQ tMATCH tNMATCH
@@ -78,6 +85,9 @@ rule
78
85
  top_compstmt
79
86
  {
80
87
  result = new_compstmt val
88
+
89
+ lexer.cond.pop # local_pop
90
+ lexer.cmdarg.pop
81
91
  }
82
92
 
83
93
  top_compstmt: top_stmts opt_terms
@@ -286,13 +296,15 @@ rule
286
296
  }
287
297
  | primary_value tCOLON2 tCONSTANT tOP_ASGN command_rhs
288
298
  {
289
- result = s(:op_asgn, val[0], val[4], val[2], val[3])
290
- debug20 4, val, result
299
+ lhs1, _, lhs2, op, rhs = val
300
+
301
+ result = s(:op_asgn, lhs1, rhs, lhs2.to_sym, op.to_sym)
291
302
  }
292
303
  | primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_rhs
293
304
  {
294
- result = s(:op_asgn, val[0], val[4], val[2], val[3])
295
- debug20 5, val, result
305
+ lhs1, _, lhs2, op, rhs = val
306
+
307
+ result = s(:op_asgn, lhs1, rhs, lhs2.to_sym, op.to_sym)
296
308
  }
297
309
  | backref tOP_ASGN command_rhs
298
310
  {
@@ -852,6 +864,24 @@ rule
852
864
  result = s(:dot3, v1, v2).line v1.line
853
865
  }
854
866
  #endif
867
+
868
+ #if V >= 27
869
+ | tBDOT2 arg
870
+ {
871
+ _, v2, = val
872
+ v1 = nil
873
+
874
+ result = s(:dot2, v1, v2).line v2.line
875
+ }
876
+ | tBDOT3 arg
877
+ {
878
+ _, v2 = val
879
+ v1 = nil
880
+
881
+ result = s(:dot3, v1, v2).line v2.line
882
+ }
883
+ #endif
884
+
855
885
  | arg tPLUS arg
856
886
  {
857
887
  result = new_call val[0], :+, argl(val[2])
@@ -1063,17 +1093,14 @@ rule
1063
1093
  | args opt_block_arg
1064
1094
  {
1065
1095
  result = call_args val
1066
- result = self.arg_blk_pass val[0], val[1]
1067
1096
  }
1068
1097
  | assocs opt_block_arg
1069
1098
  {
1070
- result = call_args [array_to_hash(val[0])]
1071
- result = self.arg_blk_pass result, val[1]
1099
+ result = call_args [array_to_hash(val[0]), val[1]]
1072
1100
  }
1073
1101
  | args tCOMMA assocs opt_block_arg
1074
1102
  {
1075
- result = call_args [val[0], array_to_hash(val[2])]
1076
- result = self.arg_blk_pass result, val[3]
1103
+ result = call_args [val[0], array_to_hash(val[2]), val[3]]
1077
1104
  }
1078
1105
  | block_arg
1079
1106
  {
@@ -1081,12 +1108,39 @@ rule
1081
1108
  }
1082
1109
 
1083
1110
  command_args: {
1084
- result = lexer.cmdarg.store true
1111
+ # parse26.y line 2200
1112
+
1113
+ # If call_args starts with a open paren '(' or
1114
+ # '[', look-ahead reading of the letters calls
1115
+ # CMDARG_PUSH(0), but the push must be done
1116
+ # after CMDARG_PUSH(1). So this code makes them
1117
+ # consistent by first cancelling the premature
1118
+ # CMDARG_PUSH(0), doing CMDARG_PUSH(1), and
1119
+ # finally redoing CMDARG_PUSH(0).
1120
+
1121
+ result = yychar = self.last_token_type.first
1122
+ lookahead = [:tLPAREN, :tLPAREN_ARG, :tLPAREN2, :tLBRACK, :tLBRACK2].include?(yychar)
1123
+ lexer.cmdarg.pop if lookahead
1124
+ lexer.cmdarg.push true
1125
+ lexer.cmdarg.push false if lookahead
1085
1126
  }
1086
1127
  call_args
1087
1128
  {
1088
- lexer.cmdarg.restore val[0]
1089
- result = val[1]
1129
+ yychar, args = val
1130
+
1131
+ # call_args can be followed by tLBRACE_ARG (that
1132
+ # does CMDARG_PUSH(0) in the lexer) but the push
1133
+ # must be done after CMDARG_POP() in the parser.
1134
+ # So this code does CMDARG_POP() to pop 0 pushed
1135
+ # by tLBRACE_ARG, CMDARG_POP() to pop 1 pushed
1136
+ # by command_args, and CMDARG_PUSH(0) to restore
1137
+ # back the flag set by tLBRACE_ARG.
1138
+
1139
+ lookahead = [:tLBRACE_ARG].include?(yychar)
1140
+ lexer.cmdarg.pop if lookahead
1141
+ lexer.cmdarg.pop
1142
+ lexer.cmdarg.push false if lookahead
1143
+ result = args
1090
1144
  }
1091
1145
 
1092
1146
  block_arg: tAMPER arg_value
@@ -1104,8 +1158,9 @@ rule
1104
1158
  args: arg_value
1105
1159
  {
1106
1160
  arg, = val
1161
+ lineno = arg.line || lexer.lineno # HACK
1107
1162
 
1108
- result = s(:array, arg).line arg.line
1163
+ result = s(:array, arg).line lineno
1109
1164
  }
1110
1165
  | tSTAR arg_value
1111
1166
  {
@@ -1117,9 +1172,11 @@ rule
1117
1172
  args, _, id = val
1118
1173
  result = self.list_append args, id
1119
1174
  }
1120
- | args tCOMMA tSTAR { result = lexer.lineno } arg_value
1175
+ | args tCOMMA tSTAR arg_value
1121
1176
  {
1122
- args, _, _, line, id = val
1177
+ # TODO: the line number from tSTAR has been dropped
1178
+ args, _, _, id = val
1179
+ line = lexer.lineno
1123
1180
  result = self.list_append args, s(:splat, id).line(line)
1124
1181
  }
1125
1182
 
@@ -1140,7 +1197,6 @@ rule
1140
1197
  }
1141
1198
  | args tCOMMA tSTAR arg_value
1142
1199
  {
1143
- # FIX: bad shift/reduce conflict with rhs' comma star prod
1144
1200
  # TODO: make all tXXXX terminals include lexer.lineno
1145
1201
  arg, _, _, splat = val
1146
1202
  result = self.arg_concat arg, splat
@@ -1168,21 +1224,13 @@ rule
1168
1224
  }
1169
1225
  | k_begin
1170
1226
  {
1227
+ lexer.cmdarg.push false
1171
1228
  result = self.lexer.lineno
1172
- # TODO:
1173
- # $<val>1 = cmdarg_stack;
1174
- # CMDARG_SET(0);
1175
1229
  }
1176
1230
  bodystmt k_end
1177
1231
  {
1178
- # TODO: CMDARG_SET($<val>1);
1179
- unless val[2] then
1180
- result = s(:nil)
1181
- else
1182
- result = s(:begin, val[2])
1183
- end
1184
-
1185
- result.line = val[1]
1232
+ lexer.cmdarg.pop
1233
+ result = new_begin val
1186
1234
  }
1187
1235
  | tLPAREN_ARG
1188
1236
  {
@@ -1195,18 +1243,14 @@ rule
1195
1243
  result = s(:begin).line line
1196
1244
  }
1197
1245
  | tLPAREN_ARG
1198
- {
1199
- result = lexer.cmdarg.store false
1200
- }
1201
1246
  stmt
1202
1247
  {
1203
1248
  lexer.lex_state = EXPR_ENDARG
1204
1249
  }
1205
1250
  rparen
1206
1251
  {
1207
- _, cmdarg, stmt, _, _, = val
1208
- warning "(...) interpreted as grouped expression"
1209
- lexer.cmdarg.restore cmdarg
1252
+ _, stmt, _, _, = val
1253
+ # warning "(...) interpreted as grouped expression"
1210
1254
  result = stmt
1211
1255
  }
1212
1256
  | tLPAREN compstmt tRPAREN
@@ -1389,48 +1433,61 @@ rule
1389
1433
  }
1390
1434
  | k_def fname
1391
1435
  {
1392
- result = [self.in_def, self.lexer.cmdarg.stack.dup]
1436
+ result = self.in_def
1393
1437
 
1394
- self.comments.push self.lexer.comments
1395
- self.in_def = true
1438
+ self.in_def = true # group = local_push
1396
1439
  self.env.extend
1397
- # TODO: local->cmdargs = cmdarg_stack;
1398
- # TODO: port local_push_gen and local_pop_gen
1399
- lexer.cmdarg.stack.replace [false]
1440
+ lexer.cmdarg.push false
1441
+ lexer.cond.push false
1442
+
1443
+ self.comments.push self.lexer.comments
1400
1444
  }
1401
1445
  f_arglist bodystmt { result = lexer.lineno } k_end
1402
1446
  {
1403
- in_def, cmdarg = val[2]
1447
+ in_def = val[2]
1404
1448
 
1405
1449
  result = new_defn val
1406
1450
 
1407
- lexer.cmdarg.stack.replace cmdarg
1451
+ lexer.cond.pop # group = local_pop
1452
+ lexer.cmdarg.pop
1408
1453
  self.env.unextend
1409
1454
  self.in_def = in_def
1455
+
1410
1456
  self.lexer.comments # we don't care about comments in the body
1411
1457
  }
1412
1458
  | k_def singleton dot_or_colon
1413
1459
  {
1414
- self.comments.push self.lexer.comments
1415
1460
  lexer.lex_state = EXPR_FNAME
1416
1461
  }
1417
1462
  fname
1418
1463
  {
1419
- self.in_single += 1
1464
+ result = [self.in_def, lexer.lineno]
1465
+
1466
+ self.in_single += 1 # TODO: remove?
1467
+
1468
+ self.in_def = true # local_push
1420
1469
  self.env.extend
1421
- lexer.lex_state = EXPR_ENDFN # force for args
1422
- result = [lexer.lineno, self.lexer.cmdarg.stack.dup]
1423
- lexer.cmdarg.stack.replace [false]
1470
+ lexer.cmdarg.push false
1471
+ lexer.cond.push false
1472
+
1473
+ lexer.lex_state = EXPR_ENDFN|EXPR_LABEL
1474
+ self.comments.push self.lexer.comments
1424
1475
  }
1425
1476
  f_arglist bodystmt k_end
1426
1477
  {
1427
- _, cmdarg = val[5]
1428
- result = new_defs val
1478
+ _, _recv, _, _, _name, (in_def, _lineno), _args, _body, _ = val
1429
1479
 
1430
- lexer.cmdarg.stack.replace cmdarg
1480
+ result = new_defs val
1431
1481
 
1482
+ lexer.cond.pop # group = local_pop
1483
+ lexer.cmdarg.pop
1432
1484
  self.env.unextend
1485
+ self.in_def = in_def
1486
+
1433
1487
  self.in_single -= 1
1488
+
1489
+ # TODO: restore cur_arg ? what's cur_arg?
1490
+
1434
1491
  self.lexer.comments # we don't care about comments in the body
1435
1492
  }
1436
1493
  | kBREAK
@@ -1715,20 +1772,19 @@ opt_block_args_tail: tCOMMA block_args_tail
1715
1772
  }
1716
1773
  f_larglist
1717
1774
  {
1718
- result = lexer.cmdarg.store(false)
1775
+ lexer.cmdarg.push false
1719
1776
  }
1720
1777
  lambda_body
1721
1778
  {
1722
- (line, lpar), args, cmdarg, body = val
1779
+ (line, lpar), args, _cmdarg, body = val
1723
1780
  lexer.lpar_beg = lpar
1724
1781
 
1725
- lexer.cmdarg.restore cmdarg
1726
- lexer.cmdarg.lexpop
1782
+ lexer.cmdarg.pop
1727
1783
 
1728
1784
  call = s(:lambda).line line
1729
1785
  result = new_iter call, args, body
1730
1786
  result.line = line
1731
- self.env.unextend
1787
+ self.env.unextend # TODO: dynapush & dynapop
1732
1788
  }
1733
1789
 
1734
1790
  f_larglist: tLPAREN2 f_args opt_bv_decl rparen
@@ -1881,7 +1937,7 @@ opt_block_args_tail: tCOMMA block_args_tail
1881
1937
  }
1882
1938
 
1883
1939
  do_body: { self.env.extend :dynamic; result = self.lexer.lineno }
1884
- { result = lexer.cmdarg.store(false) }
1940
+ { lexer.cmdarg.push false }
1885
1941
  opt_block_param
1886
1942
  #if V >= 25
1887
1943
  bodystmt
@@ -1889,11 +1945,11 @@ opt_block_args_tail: tCOMMA block_args_tail
1889
1945
  compstmt
1890
1946
  #endif
1891
1947
  {
1892
- line, cmdarg, param, cmpstmt = val
1948
+ line, _cmdarg, param, cmpstmt = val
1893
1949
 
1894
1950
  result = new_do_body param, cmpstmt, line
1951
+ lexer.cmdarg.pop
1895
1952
  self.env.unextend
1896
- lexer.cmdarg.restore cmdarg
1897
1953
  }
1898
1954
 
1899
1955
  case_body: k_when
@@ -1993,7 +2049,7 @@ opt_block_args_tail: tCOMMA block_args_tail
1993
2049
 
1994
2050
  xstring: tXSTRING_BEG xstring_contents tSTRING_END
1995
2051
  {
1996
- result = new_xstring val[1]
2052
+ result = new_xstring val
1997
2053
  # TODO: dedent?!?! SERIOUSLY?!?
1998
2054
  }
1999
2055
 
@@ -2135,12 +2191,13 @@ regexp_contents: none
2135
2191
  result = [lexer.lex_strterm,
2136
2192
  lexer.brace_nest,
2137
2193
  lexer.string_nest, # TODO: remove
2138
- lexer.cond.store,
2139
- lexer.cmdarg.store,
2140
2194
  lexer.lex_state,
2141
2195
  lexer.lineno,
2142
2196
  ]
2143
2197
 
2198
+ lexer.cmdarg.push false
2199
+ lexer.cond.push false
2200
+
2144
2201
  lexer.lex_strterm = nil
2145
2202
  lexer.brace_nest = 0
2146
2203
  lexer.string_nest = 0
@@ -2152,14 +2209,15 @@ regexp_contents: none
2152
2209
  {
2153
2210
  _, memo, stmt, _ = val
2154
2211
 
2155
- lex_strterm, brace_nest, string_nest, oldcond, oldcmdarg, oldlex_state, line = memo
2212
+ lex_strterm, brace_nest, string_nest, oldlex_state, line = memo
2213
+ # TODO: heredoc_indent
2156
2214
 
2157
2215
  lexer.lex_strterm = lex_strterm
2158
2216
  lexer.brace_nest = brace_nest
2159
2217
  lexer.string_nest = string_nest
2160
2218
 
2161
- lexer.cond.restore oldcond
2162
- lexer.cmdarg.restore oldcmdarg
2219
+ lexer.cmdarg.pop
2220
+ lexer.cond.pop
2163
2221
 
2164
2222
  lexer.lex_state = oldlex_state
2165
2223
 
@@ -1,4 +1,5 @@
1
1
  # encoding: ASCII-8BIT
2
+ # TODO: remove
2
3
 
3
4
  require "sexp"
4
5
  require "ruby_lexer"
@@ -28,7 +29,7 @@ class Sexp
28
29
  end
29
30
 
30
31
  module RubyParserStuff
31
- VERSION = "3.14.0"
32
+ VERSION = "3.16.0"
32
33
 
33
34
  attr_accessor :lexer, :in_def, :in_single, :file
34
35
  attr_accessor :in_kwarg
@@ -45,6 +46,11 @@ module RubyParserStuff
45
46
 
46
47
  attr_accessor :canonicalize_conditions
47
48
 
49
+ ##
50
+ # The last token type returned from #next_token
51
+
52
+ attr_accessor :last_token_type
53
+
48
54
  $good20 = []
49
55
 
50
56
  %w[
@@ -109,7 +115,7 @@ module RubyParserStuff
109
115
  def initialize(options = {})
110
116
  super()
111
117
 
112
- v = self.class.name[/2\d/]
118
+ v = self.class.name[/[23]\d/]
113
119
  raise "Bad Class name #{self.class}" unless v
114
120
 
115
121
  self.lexer = RubyLexer.new v && v.to_i
@@ -124,12 +130,6 @@ module RubyParserStuff
124
130
  self.reset
125
131
  end
126
132
 
127
- def arg_blk_pass node1, node2 # TODO: nuke
128
- node1 = s(:arglist, node1) unless ARG_TYPES[node1.sexp_type]
129
- node1 << node2 if node2
130
- node1
131
- end
132
-
133
133
  def arg_concat node1, node2 # TODO: nuke
134
134
  raise "huh" unless node2
135
135
 
@@ -499,6 +499,8 @@ module RubyParserStuff
499
499
  str.encode! Encoding::UTF_8
500
500
  break
501
501
  end
502
+ rescue ArgumentError # unknown encoding name
503
+ # do nothing
502
504
  rescue Encoding::InvalidByteSequenceError
503
505
  # do nothing
504
506
  rescue Encoding::UndefinedConversionError
@@ -538,7 +540,7 @@ module RubyParserStuff
538
540
  header.map! { |s| s.force_encoding "ASCII-8BIT" } if has_enc
539
541
 
540
542
  first = header.first || ""
541
- encoding, str = "utf-8", str[3..-1] if first =~ /\A\xEF\xBB\xBF/
543
+ encoding, str = "utf-8", str.b[3..-1] if first =~ /\A\xEF\xBB\xBF/
542
544
 
543
545
  encoding = $1.strip if header.find { |s|
544
546
  s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
@@ -598,7 +600,9 @@ module RubyParserStuff
598
600
  case ttype
599
601
  when :str then
600
602
  if htype == :str
601
- head.last << tail.last
603
+ a, b = head.last, tail.last
604
+ b = b.dup.force_encoding a.encoding unless Encoding.compatible?(a, b)
605
+ a << b
602
606
  elsif htype == :dstr and head.size == 2 then
603
607
  head.last << tail.last
604
608
  else
@@ -702,6 +706,15 @@ module RubyParserStuff
702
706
  result
703
707
  end
704
708
 
709
+ def new_begin val
710
+ _, lineno, body, _ = val
711
+
712
+ result = body ? s(:begin, body) : s(:nil)
713
+ result.line lineno
714
+
715
+ result
716
+ end
717
+
705
718
  def new_body val
706
719
  body, resbody, elsebody, ensurebody = val
707
720
 
@@ -729,7 +742,10 @@ module RubyParserStuff
729
742
  result = block_append(result, elsebody)
730
743
  end
731
744
 
732
- result = s(:ensure, result, ensurebody).compact.line result.line if ensurebody
745
+ if ensurebody
746
+ lineno = (result || ensurebody).line
747
+ result = s(:ensure, result, ensurebody).compact.line lineno
748
+ end
733
749
 
734
750
  result
735
751
  end
@@ -831,6 +847,8 @@ module RubyParserStuff
831
847
  (_, line), name, _, args, body, nil_body_line, * = val
832
848
  body ||= s(:nil).line nil_body_line
833
849
 
850
+ args.line line
851
+
834
852
  result = s(:defn, name.to_sym, args).line line
835
853
 
836
854
  if body then
@@ -847,14 +865,17 @@ module RubyParserStuff
847
865
  end
848
866
 
849
867
  def new_defs val
850
- recv, (name, _line), args, body = val[1], val[4], val[6], val[7]
851
- line, _ = val[5]
868
+ _, recv, _, _, name, (_in_def, line), args, body, _ = val
869
+
852
870
  body ||= s(:nil).line line
853
871
 
854
872
  args.line line
855
873
 
856
874
  result = s(:defs, recv, name.to_sym, args)
857
875
 
876
+ # TODO: remove_begin
877
+ # TODO: reduce_nodes
878
+
858
879
  if body then
859
880
  if body.sexp_type == :block then
860
881
  result.push(*body.sexp_body)
@@ -879,7 +900,9 @@ module RubyParserStuff
879
900
  end
880
901
 
881
902
  def new_hash val
882
- s(:hash, *val[2].values).line(val[1])
903
+ _, line, assocs = val
904
+
905
+ s(:hash).line(line).concat assocs.values
883
906
  end
884
907
 
885
908
  def new_if c, t, f
@@ -1138,6 +1161,7 @@ module RubyParserStuff
1138
1161
  def new_string val
1139
1162
  str, = val
1140
1163
  str.force_encoding("UTF-8")
1164
+ # TODO: remove:
1141
1165
  str.force_encoding("ASCII-8BIT") unless str.valid_encoding?
1142
1166
  result = s(:str, str).line lexer.lineno
1143
1167
  self.lexer.fixup_lineno str.count("\n")
@@ -1232,20 +1256,23 @@ module RubyParserStuff
1232
1256
  result
1233
1257
  end
1234
1258
 
1235
- def new_xstring str
1236
- if str then
1237
- case str.sexp_type
1259
+ def new_xstring val
1260
+ _, node = val
1261
+
1262
+ node ||= s(:str, "").line lexer.lineno
1263
+
1264
+ if node then
1265
+ case node.sexp_type
1238
1266
  when :str
1239
- str.sexp_type = :xstr
1267
+ node.sexp_type = :xstr
1240
1268
  when :dstr
1241
- str.sexp_type = :dxstr
1269
+ node.sexp_type = :dxstr
1242
1270
  else
1243
- str = s(:dxstr, "", str)
1271
+ node = s(:dxstr, "", node).line node.line
1244
1272
  end
1245
- str
1246
- else
1247
- s(:xstr, "")
1248
1273
  end
1274
+
1275
+ node
1249
1276
  end
1250
1277
 
1251
1278
  def new_yield args = nil
@@ -1266,6 +1293,7 @@ module RubyParserStuff
1266
1293
  token = self.lexer.next_token
1267
1294
 
1268
1295
  if token and token.first != RubyLexer::EOF then
1296
+ self.last_token_type = token
1269
1297
  return token
1270
1298
  else
1271
1299
  return [false, false]
@@ -1324,6 +1352,7 @@ module RubyParserStuff
1324
1352
  self.in_single = 0
1325
1353
  self.env.reset
1326
1354
  self.comments.clear
1355
+ self.last_token_type = nil
1327
1356
  end
1328
1357
 
1329
1358
  def ret_args node