ruby_parser 3.14.0 → 3.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ruby_parser.yy CHANGED
@@ -14,6 +14,10 @@ class Ruby24Parser
14
14
  class Ruby25Parser
15
15
  #elif V == 26
16
16
  class Ruby26Parser
17
+ #elif V == 27
18
+ class Ruby27Parser
19
+ #elif V == 30
20
+ class Ruby30Parser
17
21
  #else
18
22
  fail "version not specified or supported on code generation"
19
23
  #endif
@@ -44,6 +48,9 @@ token kCLASS kMODULE kDEF kUNDEF kBEGIN kRESCUE kENSURE kEND kIF kUNLESS
44
48
  #if V >= 23
45
49
  tLONELY
46
50
  #endif
51
+ #if V >= 26
52
+ tBDOT2 tBDOT3
53
+ #endif
47
54
 
48
55
  preclow
49
56
  nonassoc tLOWEST
@@ -55,7 +62,7 @@ preclow
55
62
  right tEQL tOP_ASGN
56
63
  left kRESCUE_MOD
57
64
  right tEH tCOLON
58
- nonassoc tDOT2 tDOT3
65
+ nonassoc tDOT2 tDOT3 tBDOT2 tBDOT3
59
66
  left tOROP
60
67
  left tANDOP
61
68
  nonassoc tCMP tEQ tEQQ tNEQ tMATCH tNMATCH
@@ -78,6 +85,9 @@ rule
78
85
  top_compstmt
79
86
  {
80
87
  result = new_compstmt val
88
+
89
+ lexer.cond.pop # local_pop
90
+ lexer.cmdarg.pop
81
91
  }
82
92
 
83
93
  top_compstmt: top_stmts opt_terms
@@ -286,13 +296,15 @@ rule
286
296
  }
287
297
  | primary_value tCOLON2 tCONSTANT tOP_ASGN command_rhs
288
298
  {
289
- result = s(:op_asgn, val[0], val[4], val[2], val[3])
290
- debug20 4, val, result
299
+ lhs1, _, lhs2, op, rhs = val
300
+
301
+ result = s(:op_asgn, lhs1, rhs, lhs2.to_sym, op.to_sym)
291
302
  }
292
303
  | primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_rhs
293
304
  {
294
- result = s(:op_asgn, val[0], val[4], val[2], val[3])
295
- debug20 5, val, result
305
+ lhs1, _, lhs2, op, rhs = val
306
+
307
+ result = s(:op_asgn, lhs1, rhs, lhs2.to_sym, op.to_sym)
296
308
  }
297
309
  | backref tOP_ASGN command_rhs
298
310
  {
@@ -852,6 +864,24 @@ rule
852
864
  result = s(:dot3, v1, v2).line v1.line
853
865
  }
854
866
  #endif
867
+
868
+ #if V >= 27
869
+ | tBDOT2 arg
870
+ {
871
+ _, v2, = val
872
+ v1 = nil
873
+
874
+ result = s(:dot2, v1, v2).line v2.line
875
+ }
876
+ | tBDOT3 arg
877
+ {
878
+ _, v2 = val
879
+ v1 = nil
880
+
881
+ result = s(:dot3, v1, v2).line v2.line
882
+ }
883
+ #endif
884
+
855
885
  | arg tPLUS arg
856
886
  {
857
887
  result = new_call val[0], :+, argl(val[2])
@@ -1063,17 +1093,14 @@ rule
1063
1093
  | args opt_block_arg
1064
1094
  {
1065
1095
  result = call_args val
1066
- result = self.arg_blk_pass val[0], val[1]
1067
1096
  }
1068
1097
  | assocs opt_block_arg
1069
1098
  {
1070
- result = call_args [array_to_hash(val[0])]
1071
- result = self.arg_blk_pass result, val[1]
1099
+ result = call_args [array_to_hash(val[0]), val[1]]
1072
1100
  }
1073
1101
  | args tCOMMA assocs opt_block_arg
1074
1102
  {
1075
- result = call_args [val[0], array_to_hash(val[2])]
1076
- result = self.arg_blk_pass result, val[3]
1103
+ result = call_args [val[0], array_to_hash(val[2]), val[3]]
1077
1104
  }
1078
1105
  | block_arg
1079
1106
  {
@@ -1081,12 +1108,39 @@ rule
1081
1108
  }
1082
1109
 
1083
1110
  command_args: {
1084
- result = lexer.cmdarg.store true
1111
+ # parse26.y line 2200
1112
+
1113
+ # If call_args starts with a open paren '(' or
1114
+ # '[', look-ahead reading of the letters calls
1115
+ # CMDARG_PUSH(0), but the push must be done
1116
+ # after CMDARG_PUSH(1). So this code makes them
1117
+ # consistent by first cancelling the premature
1118
+ # CMDARG_PUSH(0), doing CMDARG_PUSH(1), and
1119
+ # finally redoing CMDARG_PUSH(0).
1120
+
1121
+ result = yychar = self.last_token_type.first
1122
+ lookahead = [:tLPAREN, :tLPAREN_ARG, :tLPAREN2, :tLBRACK, :tLBRACK2].include?(yychar)
1123
+ lexer.cmdarg.pop if lookahead
1124
+ lexer.cmdarg.push true
1125
+ lexer.cmdarg.push false if lookahead
1085
1126
  }
1086
1127
  call_args
1087
1128
  {
1088
- lexer.cmdarg.restore val[0]
1089
- result = val[1]
1129
+ yychar, args = val
1130
+
1131
+ # call_args can be followed by tLBRACE_ARG (that
1132
+ # does CMDARG_PUSH(0) in the lexer) but the push
1133
+ # must be done after CMDARG_POP() in the parser.
1134
+ # So this code does CMDARG_POP() to pop 0 pushed
1135
+ # by tLBRACE_ARG, CMDARG_POP() to pop 1 pushed
1136
+ # by command_args, and CMDARG_PUSH(0) to restore
1137
+ # back the flag set by tLBRACE_ARG.
1138
+
1139
+ lookahead = [:tLBRACE_ARG].include?(yychar)
1140
+ lexer.cmdarg.pop if lookahead
1141
+ lexer.cmdarg.pop
1142
+ lexer.cmdarg.push false if lookahead
1143
+ result = args
1090
1144
  }
1091
1145
 
1092
1146
  block_arg: tAMPER arg_value
@@ -1104,8 +1158,9 @@ rule
1104
1158
  args: arg_value
1105
1159
  {
1106
1160
  arg, = val
1161
+ lineno = arg.line || lexer.lineno # HACK
1107
1162
 
1108
- result = s(:array, arg).line arg.line
1163
+ result = s(:array, arg).line lineno
1109
1164
  }
1110
1165
  | tSTAR arg_value
1111
1166
  {
@@ -1117,9 +1172,11 @@ rule
1117
1172
  args, _, id = val
1118
1173
  result = self.list_append args, id
1119
1174
  }
1120
- | args tCOMMA tSTAR { result = lexer.lineno } arg_value
1175
+ | args tCOMMA tSTAR arg_value
1121
1176
  {
1122
- args, _, _, line, id = val
1177
+ # TODO: the line number from tSTAR has been dropped
1178
+ args, _, _, id = val
1179
+ line = lexer.lineno
1123
1180
  result = self.list_append args, s(:splat, id).line(line)
1124
1181
  }
1125
1182
 
@@ -1140,7 +1197,6 @@ rule
1140
1197
  }
1141
1198
  | args tCOMMA tSTAR arg_value
1142
1199
  {
1143
- # FIX: bad shift/reduce conflict with rhs' comma star prod
1144
1200
  # TODO: make all tXXXX terminals include lexer.lineno
1145
1201
  arg, _, _, splat = val
1146
1202
  result = self.arg_concat arg, splat
@@ -1168,21 +1224,13 @@ rule
1168
1224
  }
1169
1225
  | k_begin
1170
1226
  {
1227
+ lexer.cmdarg.push false
1171
1228
  result = self.lexer.lineno
1172
- # TODO:
1173
- # $<val>1 = cmdarg_stack;
1174
- # CMDARG_SET(0);
1175
1229
  }
1176
1230
  bodystmt k_end
1177
1231
  {
1178
- # TODO: CMDARG_SET($<val>1);
1179
- unless val[2] then
1180
- result = s(:nil)
1181
- else
1182
- result = s(:begin, val[2])
1183
- end
1184
-
1185
- result.line = val[1]
1232
+ lexer.cmdarg.pop
1233
+ result = new_begin val
1186
1234
  }
1187
1235
  | tLPAREN_ARG
1188
1236
  {
@@ -1195,18 +1243,14 @@ rule
1195
1243
  result = s(:begin).line line
1196
1244
  }
1197
1245
  | tLPAREN_ARG
1198
- {
1199
- result = lexer.cmdarg.store false
1200
- }
1201
1246
  stmt
1202
1247
  {
1203
1248
  lexer.lex_state = EXPR_ENDARG
1204
1249
  }
1205
1250
  rparen
1206
1251
  {
1207
- _, cmdarg, stmt, _, _, = val
1208
- warning "(...) interpreted as grouped expression"
1209
- lexer.cmdarg.restore cmdarg
1252
+ _, stmt, _, _, = val
1253
+ # warning "(...) interpreted as grouped expression"
1210
1254
  result = stmt
1211
1255
  }
1212
1256
  | tLPAREN compstmt tRPAREN
@@ -1389,48 +1433,61 @@ rule
1389
1433
  }
1390
1434
  | k_def fname
1391
1435
  {
1392
- result = [self.in_def, self.lexer.cmdarg.stack.dup]
1436
+ result = self.in_def
1393
1437
 
1394
- self.comments.push self.lexer.comments
1395
- self.in_def = true
1438
+ self.in_def = true # group = local_push
1396
1439
  self.env.extend
1397
- # TODO: local->cmdargs = cmdarg_stack;
1398
- # TODO: port local_push_gen and local_pop_gen
1399
- lexer.cmdarg.stack.replace [false]
1440
+ lexer.cmdarg.push false
1441
+ lexer.cond.push false
1442
+
1443
+ self.comments.push self.lexer.comments
1400
1444
  }
1401
1445
  f_arglist bodystmt { result = lexer.lineno } k_end
1402
1446
  {
1403
- in_def, cmdarg = val[2]
1447
+ in_def = val[2]
1404
1448
 
1405
1449
  result = new_defn val
1406
1450
 
1407
- lexer.cmdarg.stack.replace cmdarg
1451
+ lexer.cond.pop # group = local_pop
1452
+ lexer.cmdarg.pop
1408
1453
  self.env.unextend
1409
1454
  self.in_def = in_def
1455
+
1410
1456
  self.lexer.comments # we don't care about comments in the body
1411
1457
  }
1412
1458
  | k_def singleton dot_or_colon
1413
1459
  {
1414
- self.comments.push self.lexer.comments
1415
1460
  lexer.lex_state = EXPR_FNAME
1416
1461
  }
1417
1462
  fname
1418
1463
  {
1419
- self.in_single += 1
1464
+ result = [self.in_def, lexer.lineno]
1465
+
1466
+ self.in_single += 1 # TODO: remove?
1467
+
1468
+ self.in_def = true # local_push
1420
1469
  self.env.extend
1421
- lexer.lex_state = EXPR_ENDFN # force for args
1422
- result = [lexer.lineno, self.lexer.cmdarg.stack.dup]
1423
- lexer.cmdarg.stack.replace [false]
1470
+ lexer.cmdarg.push false
1471
+ lexer.cond.push false
1472
+
1473
+ lexer.lex_state = EXPR_ENDFN|EXPR_LABEL
1474
+ self.comments.push self.lexer.comments
1424
1475
  }
1425
1476
  f_arglist bodystmt k_end
1426
1477
  {
1427
- _, cmdarg = val[5]
1428
- result = new_defs val
1478
+ _, _recv, _, _, _name, (in_def, _lineno), _args, _body, _ = val
1429
1479
 
1430
- lexer.cmdarg.stack.replace cmdarg
1480
+ result = new_defs val
1431
1481
 
1482
+ lexer.cond.pop # group = local_pop
1483
+ lexer.cmdarg.pop
1432
1484
  self.env.unextend
1485
+ self.in_def = in_def
1486
+
1433
1487
  self.in_single -= 1
1488
+
1489
+ # TODO: restore cur_arg ? what's cur_arg?
1490
+
1434
1491
  self.lexer.comments # we don't care about comments in the body
1435
1492
  }
1436
1493
  | kBREAK
@@ -1715,20 +1772,19 @@ opt_block_args_tail: tCOMMA block_args_tail
1715
1772
  }
1716
1773
  f_larglist
1717
1774
  {
1718
- result = lexer.cmdarg.store(false)
1775
+ lexer.cmdarg.push false
1719
1776
  }
1720
1777
  lambda_body
1721
1778
  {
1722
- (line, lpar), args, cmdarg, body = val
1779
+ (line, lpar), args, _cmdarg, body = val
1723
1780
  lexer.lpar_beg = lpar
1724
1781
 
1725
- lexer.cmdarg.restore cmdarg
1726
- lexer.cmdarg.lexpop
1782
+ lexer.cmdarg.pop
1727
1783
 
1728
1784
  call = s(:lambda).line line
1729
1785
  result = new_iter call, args, body
1730
1786
  result.line = line
1731
- self.env.unextend
1787
+ self.env.unextend # TODO: dynapush & dynapop
1732
1788
  }
1733
1789
 
1734
1790
  f_larglist: tLPAREN2 f_args opt_bv_decl rparen
@@ -1881,7 +1937,7 @@ opt_block_args_tail: tCOMMA block_args_tail
1881
1937
  }
1882
1938
 
1883
1939
  do_body: { self.env.extend :dynamic; result = self.lexer.lineno }
1884
- { result = lexer.cmdarg.store(false) }
1940
+ { lexer.cmdarg.push false }
1885
1941
  opt_block_param
1886
1942
  #if V >= 25
1887
1943
  bodystmt
@@ -1889,11 +1945,11 @@ opt_block_args_tail: tCOMMA block_args_tail
1889
1945
  compstmt
1890
1946
  #endif
1891
1947
  {
1892
- line, cmdarg, param, cmpstmt = val
1948
+ line, _cmdarg, param, cmpstmt = val
1893
1949
 
1894
1950
  result = new_do_body param, cmpstmt, line
1951
+ lexer.cmdarg.pop
1895
1952
  self.env.unextend
1896
- lexer.cmdarg.restore cmdarg
1897
1953
  }
1898
1954
 
1899
1955
  case_body: k_when
@@ -1993,7 +2049,7 @@ opt_block_args_tail: tCOMMA block_args_tail
1993
2049
 
1994
2050
  xstring: tXSTRING_BEG xstring_contents tSTRING_END
1995
2051
  {
1996
- result = new_xstring val[1]
2052
+ result = new_xstring val
1997
2053
  # TODO: dedent?!?! SERIOUSLY?!?
1998
2054
  }
1999
2055
 
@@ -2135,12 +2191,13 @@ regexp_contents: none
2135
2191
  result = [lexer.lex_strterm,
2136
2192
  lexer.brace_nest,
2137
2193
  lexer.string_nest, # TODO: remove
2138
- lexer.cond.store,
2139
- lexer.cmdarg.store,
2140
2194
  lexer.lex_state,
2141
2195
  lexer.lineno,
2142
2196
  ]
2143
2197
 
2198
+ lexer.cmdarg.push false
2199
+ lexer.cond.push false
2200
+
2144
2201
  lexer.lex_strterm = nil
2145
2202
  lexer.brace_nest = 0
2146
2203
  lexer.string_nest = 0
@@ -2152,14 +2209,15 @@ regexp_contents: none
2152
2209
  {
2153
2210
  _, memo, stmt, _ = val
2154
2211
 
2155
- lex_strterm, brace_nest, string_nest, oldcond, oldcmdarg, oldlex_state, line = memo
2212
+ lex_strterm, brace_nest, string_nest, oldlex_state, line = memo
2213
+ # TODO: heredoc_indent
2156
2214
 
2157
2215
  lexer.lex_strterm = lex_strterm
2158
2216
  lexer.brace_nest = brace_nest
2159
2217
  lexer.string_nest = string_nest
2160
2218
 
2161
- lexer.cond.restore oldcond
2162
- lexer.cmdarg.restore oldcmdarg
2219
+ lexer.cmdarg.pop
2220
+ lexer.cond.pop
2163
2221
 
2164
2222
  lexer.lex_state = oldlex_state
2165
2223
 
@@ -1,4 +1,5 @@
1
1
  # encoding: ASCII-8BIT
2
+ # TODO: remove
2
3
 
3
4
  require "sexp"
4
5
  require "ruby_lexer"
@@ -28,7 +29,7 @@ class Sexp
28
29
  end
29
30
 
30
31
  module RubyParserStuff
31
- VERSION = "3.14.0"
32
+ VERSION = "3.16.0"
32
33
 
33
34
  attr_accessor :lexer, :in_def, :in_single, :file
34
35
  attr_accessor :in_kwarg
@@ -45,6 +46,11 @@ module RubyParserStuff
45
46
 
46
47
  attr_accessor :canonicalize_conditions
47
48
 
49
+ ##
50
+ # The last token type returned from #next_token
51
+
52
+ attr_accessor :last_token_type
53
+
48
54
  $good20 = []
49
55
 
50
56
  %w[
@@ -109,7 +115,7 @@ module RubyParserStuff
109
115
  def initialize(options = {})
110
116
  super()
111
117
 
112
- v = self.class.name[/2\d/]
118
+ v = self.class.name[/[23]\d/]
113
119
  raise "Bad Class name #{self.class}" unless v
114
120
 
115
121
  self.lexer = RubyLexer.new v && v.to_i
@@ -124,12 +130,6 @@ module RubyParserStuff
124
130
  self.reset
125
131
  end
126
132
 
127
- def arg_blk_pass node1, node2 # TODO: nuke
128
- node1 = s(:arglist, node1) unless ARG_TYPES[node1.sexp_type]
129
- node1 << node2 if node2
130
- node1
131
- end
132
-
133
133
  def arg_concat node1, node2 # TODO: nuke
134
134
  raise "huh" unless node2
135
135
 
@@ -499,6 +499,8 @@ module RubyParserStuff
499
499
  str.encode! Encoding::UTF_8
500
500
  break
501
501
  end
502
+ rescue ArgumentError # unknown encoding name
503
+ # do nothing
502
504
  rescue Encoding::InvalidByteSequenceError
503
505
  # do nothing
504
506
  rescue Encoding::UndefinedConversionError
@@ -538,7 +540,7 @@ module RubyParserStuff
538
540
  header.map! { |s| s.force_encoding "ASCII-8BIT" } if has_enc
539
541
 
540
542
  first = header.first || ""
541
- encoding, str = "utf-8", str[3..-1] if first =~ /\A\xEF\xBB\xBF/
543
+ encoding, str = "utf-8", str.b[3..-1] if first =~ /\A\xEF\xBB\xBF/
542
544
 
543
545
  encoding = $1.strip if header.find { |s|
544
546
  s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
@@ -598,7 +600,9 @@ module RubyParserStuff
598
600
  case ttype
599
601
  when :str then
600
602
  if htype == :str
601
- head.last << tail.last
603
+ a, b = head.last, tail.last
604
+ b = b.dup.force_encoding a.encoding unless Encoding.compatible?(a, b)
605
+ a << b
602
606
  elsif htype == :dstr and head.size == 2 then
603
607
  head.last << tail.last
604
608
  else
@@ -702,6 +706,15 @@ module RubyParserStuff
702
706
  result
703
707
  end
704
708
 
709
+ def new_begin val
710
+ _, lineno, body, _ = val
711
+
712
+ result = body ? s(:begin, body) : s(:nil)
713
+ result.line lineno
714
+
715
+ result
716
+ end
717
+
705
718
  def new_body val
706
719
  body, resbody, elsebody, ensurebody = val
707
720
 
@@ -729,7 +742,10 @@ module RubyParserStuff
729
742
  result = block_append(result, elsebody)
730
743
  end
731
744
 
732
- result = s(:ensure, result, ensurebody).compact.line result.line if ensurebody
745
+ if ensurebody
746
+ lineno = (result || ensurebody).line
747
+ result = s(:ensure, result, ensurebody).compact.line lineno
748
+ end
733
749
 
734
750
  result
735
751
  end
@@ -831,6 +847,8 @@ module RubyParserStuff
831
847
  (_, line), name, _, args, body, nil_body_line, * = val
832
848
  body ||= s(:nil).line nil_body_line
833
849
 
850
+ args.line line
851
+
834
852
  result = s(:defn, name.to_sym, args).line line
835
853
 
836
854
  if body then
@@ -847,14 +865,17 @@ module RubyParserStuff
847
865
  end
848
866
 
849
867
  def new_defs val
850
- recv, (name, _line), args, body = val[1], val[4], val[6], val[7]
851
- line, _ = val[5]
868
+ _, recv, _, _, name, (_in_def, line), args, body, _ = val
869
+
852
870
  body ||= s(:nil).line line
853
871
 
854
872
  args.line line
855
873
 
856
874
  result = s(:defs, recv, name.to_sym, args)
857
875
 
876
+ # TODO: remove_begin
877
+ # TODO: reduce_nodes
878
+
858
879
  if body then
859
880
  if body.sexp_type == :block then
860
881
  result.push(*body.sexp_body)
@@ -879,7 +900,9 @@ module RubyParserStuff
879
900
  end
880
901
 
881
902
  def new_hash val
882
- s(:hash, *val[2].values).line(val[1])
903
+ _, line, assocs = val
904
+
905
+ s(:hash).line(line).concat assocs.values
883
906
  end
884
907
 
885
908
  def new_if c, t, f
@@ -1138,6 +1161,7 @@ module RubyParserStuff
1138
1161
  def new_string val
1139
1162
  str, = val
1140
1163
  str.force_encoding("UTF-8")
1164
+ # TODO: remove:
1141
1165
  str.force_encoding("ASCII-8BIT") unless str.valid_encoding?
1142
1166
  result = s(:str, str).line lexer.lineno
1143
1167
  self.lexer.fixup_lineno str.count("\n")
@@ -1232,20 +1256,23 @@ module RubyParserStuff
1232
1256
  result
1233
1257
  end
1234
1258
 
1235
- def new_xstring str
1236
- if str then
1237
- case str.sexp_type
1259
+ def new_xstring val
1260
+ _, node = val
1261
+
1262
+ node ||= s(:str, "").line lexer.lineno
1263
+
1264
+ if node then
1265
+ case node.sexp_type
1238
1266
  when :str
1239
- str.sexp_type = :xstr
1267
+ node.sexp_type = :xstr
1240
1268
  when :dstr
1241
- str.sexp_type = :dxstr
1269
+ node.sexp_type = :dxstr
1242
1270
  else
1243
- str = s(:dxstr, "", str)
1271
+ node = s(:dxstr, "", node).line node.line
1244
1272
  end
1245
- str
1246
- else
1247
- s(:xstr, "")
1248
1273
  end
1274
+
1275
+ node
1249
1276
  end
1250
1277
 
1251
1278
  def new_yield args = nil
@@ -1266,6 +1293,7 @@ module RubyParserStuff
1266
1293
  token = self.lexer.next_token
1267
1294
 
1268
1295
  if token and token.first != RubyLexer::EOF then
1296
+ self.last_token_type = token
1269
1297
  return token
1270
1298
  else
1271
1299
  return [false, false]
@@ -1324,6 +1352,7 @@ module RubyParserStuff
1324
1352
  self.in_single = 0
1325
1353
  self.env.reset
1326
1354
  self.comments.clear
1355
+ self.last_token_type = nil
1327
1356
  end
1328
1357
 
1329
1358
  def ret_args node