ruby_parser 3.8.2 → 3.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,12 +19,8 @@ token kCLASS kMODULE kDEF kUNDEF kBEGIN kRESCUE kENSURE kEND kIF kUNLESS
19
19
  tWORDS_BEG tQWORDS_BEG tSTRING_DBEG tSTRING_DVAR tSTRING_END
20
20
  tSTRING tSYMBOL tNL tEH tCOLON tCOMMA tSPACE tSEMI tLAMBDA
21
21
  tLAMBEG tDSTAR tCHAR tSYMBOLS_BEG tQSYMBOLS_BEG tSTRING_DEND tUBANG
22
- #if defined(RUBY21) || defined(RUBY22) || defined(RUBY23))
23
22
  tRATIONAL tIMAGINARY
24
- #endif
25
- #if defined(RUBY22 || defined(RUBY23))
26
23
  tLABEL_END
27
- #endif
28
24
  tLONELY
29
25
 
30
26
  prechigh
@@ -695,9 +691,7 @@ rule
695
691
  {
696
692
  result = new_call val[0], :**, argl(val[2])
697
693
  }
698
- #if defined(RUBY21) || defined(RUBY22 || defined(RUBY23))
699
694
  | tUMINUS_NUM simple_numeric tPOW arg
700
- #endif
701
695
  {
702
696
  result = new_call(new_call(s(:lit, val[1]), :"**", argl(val[3])), :"-@")
703
697
  }
@@ -1729,12 +1723,11 @@ opt_block_args_tail: tCOMMA block_args_tail
1729
1723
 
1730
1724
  word_list: none
1731
1725
  {
1732
- result = s(:array)
1726
+ result = new_word_list
1733
1727
  }
1734
1728
  | word_list word tSPACE
1735
1729
  {
1736
- word = val[1][0] == :evstr ? s(:dstr, "", val[1]) : val[1]
1737
- result = val[0].dup << word
1730
+ result = val[0].dup << new_word_list_entry(val)
1738
1731
  }
1739
1732
 
1740
1733
  word: string_content
@@ -1754,23 +1747,11 @@ opt_block_args_tail: tCOMMA block_args_tail
1754
1747
 
1755
1748
  symbol_list: none
1756
1749
  {
1757
- result = s(:array)
1750
+ result = new_symbol_list
1758
1751
  }
1759
1752
  | symbol_list word tSPACE
1760
1753
  {
1761
- list, sym, _ = val
1762
-
1763
- case sym[0]
1764
- when :dstr then
1765
- sym[0] = :dsym
1766
- when :str then
1767
- sym = s(:lit, sym.last.to_sym)
1768
- else
1769
- debug20 24
1770
- sym = s(:dsym, "", result)
1771
- end
1772
-
1773
- result = list.dup << sym
1754
+ result = val[0].dup << new_symbol_list_entry(val)
1774
1755
  }
1775
1756
 
1776
1757
  qwords: tQWORDS_BEG tSPACE tSTRING_END
@@ -1793,20 +1774,20 @@ opt_block_args_tail: tCOMMA block_args_tail
1793
1774
 
1794
1775
  qword_list: none
1795
1776
  {
1796
- result = s(:array)
1777
+ result = new_qword_list
1797
1778
  }
1798
1779
  | qword_list tSTRING_CONTENT tSPACE
1799
1780
  {
1800
- result = val[0].dup << s(:str, val[1])
1781
+ result = val[0].dup << new_qword_list_entry(val)
1801
1782
  }
1802
1783
 
1803
1784
  qsym_list: none
1804
1785
  {
1805
- result = s(:array)
1786
+ result = new_qsym_list
1806
1787
  }
1807
1788
  | qsym_list tSTRING_CONTENT tSPACE
1808
1789
  {
1809
- result = val[0].dup << s(:lit, val[1].to_sym)
1790
+ result = val[0].dup << new_qsym_list_entry(val)
1810
1791
  }
1811
1792
 
1812
1793
  string_contents: none
@@ -1854,10 +1835,10 @@ regexp_contents: none
1854
1835
  }
1855
1836
  | tSTRING_DBEG
1856
1837
  {
1857
- result = [lexer.lex_strterm,
1858
- lexer.brace_nest,
1838
+ result = [lexer.lex_strterm,
1839
+ lexer.brace_nest,
1859
1840
  lexer.string_nest, # TODO: remove
1860
- lexer.cond.store,
1841
+ lexer.cond.store,
1861
1842
  lexer.cmdarg.store,
1862
1843
  lexer.lex_state,
1863
1844
  ]
@@ -1938,21 +1919,17 @@ regexp_contents: none
1938
1919
  end
1939
1920
  }
1940
1921
 
1941
- #if defined(RUBY21) || defined(RUBY22 || defined(RUBY23))
1942
1922
  numeric: simple_numeric
1943
1923
  | tUMINUS_NUM simple_numeric
1944
- #endif
1945
1924
  {
1946
1925
  result = -val[1] # TODO: pt_testcase
1947
1926
  }
1948
1927
 
1949
- #if defined(RUBY21) || defined(RUBY22) || defined(RUBY23))
1950
1928
  simple_numeric: tINTEGER
1951
1929
  | tFLOAT
1952
1930
  | tRATIONAL
1953
1931
  | tIMAGINARY
1954
1932
 
1955
- #endif
1956
1933
  user_variable: tIDENTIFIER
1957
1934
  | tIVAR
1958
1935
  | tGVAR
@@ -2039,15 +2016,15 @@ keyword_variable: kNIL { result = s(:nil) }
2039
2016
  {
2040
2017
  result = args val
2041
2018
  }
2042
- | f_kwarg opt_f_block_arg
2019
+ | f_kwarg opt_f_block_arg
2043
2020
  {
2044
2021
  result = args val
2045
2022
  }
2046
- | f_kwrest opt_f_block_arg
2023
+ | f_kwrest opt_f_block_arg
2047
2024
  {
2048
2025
  result = args val
2049
2026
  }
2050
- | f_block_arg
2027
+ | f_block_arg
2051
2028
 
2052
2029
  opt_args_tail: tCOMMA args_tail
2053
2030
  {
@@ -2145,7 +2122,6 @@ keyword_variable: kNIL { result = s(:nil) }
2145
2122
  result = identifier
2146
2123
  }
2147
2124
 
2148
- #if defined(RUBY22) || defined(RUBY23))
2149
2125
  f_arg_asgn: f_norm_arg
2150
2126
 
2151
2127
  f_arg_item: f_arg_asgn
@@ -2153,13 +2129,6 @@ keyword_variable: kNIL { result = s(:nil) }
2153
2129
  {
2154
2130
  result = val[1]
2155
2131
  }
2156
- #else
2157
- f_arg_item: f_norm_arg
2158
- | tLPAREN f_margs rparen
2159
- {
2160
- result = val[1]
2161
- }
2162
- #endif
2163
2132
 
2164
2133
  f_arg: f_arg_item
2165
2134
  {
@@ -258,7 +258,18 @@ class RubyLexer
258
258
 
259
259
  def int_with_base base
260
260
  rb_compile_error "Invalid numeric format" if matched =~ /__/
261
- return result(:expr_end, :tINTEGER, matched.to_i(base))
261
+
262
+ text = matched
263
+ case
264
+ when text.end_with?('ri')
265
+ return result(:expr_end, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base))))
266
+ when text.end_with?('r')
267
+ return result(:expr_end, :tRATIONAL, Rational(text.chop.to_i(base)))
268
+ when text.end_with?('i')
269
+ return result(:expr_end, :tIMAGINARY, Complex(0, text.chop.to_i(base)))
270
+ else
271
+ return result(:expr_end, :tINTEGER, text.to_i(base))
272
+ end
262
273
  end
263
274
 
264
275
  def is_arg?
@@ -406,7 +417,17 @@ class RubyLexer
406
417
 
407
418
  def process_float text
408
419
  rb_compile_error "Invalid numeric format" if text =~ /__/
409
- return result(:expr_end, :tFLOAT, text.to_f)
420
+
421
+ case
422
+ when text.end_with?('ri')
423
+ return result(:expr_end, :tIMAGINARY, Complex(0, Rational(text.chop.chop)))
424
+ when text.end_with?('r')
425
+ return result(:expr_end, :tRATIONAL, Rational(text.chop))
426
+ when text.end_with?('i')
427
+ return result(:expr_end, :tIMAGINARY, Complex(0, text.chop.to_f))
428
+ else
429
+ return result(:expr_end, :tFLOAT, text.to_f)
430
+ end
410
431
  end
411
432
 
412
433
  def process_gvar text
@@ -903,6 +924,17 @@ class RubyLexer
903
924
  ss.check re
904
925
  end
905
926
 
927
+ def eat_whitespace
928
+ r = scan(/\s+/)
929
+ self.extra_lineno += r.count("\n") if r
930
+ r
931
+ end
932
+
933
+ def fixup_lineno extra = 0
934
+ self.lineno += self.extra_lineno + extra
935
+ self.extra_lineno = 0
936
+ end
937
+
906
938
  def scanner_class # TODO: design this out of oedipus_lex. or something.
907
939
  RPStringScanner
908
940
  end
@@ -1043,7 +1075,8 @@ class RubyLexer
1043
1075
  def unescape s
1044
1076
  r = ESCAPES[s]
1045
1077
 
1046
- self.extra_lineno -= 1 if r && s == "n"
1078
+ self.extra_lineno += 1 if s == "\n" # eg backslash newline strings
1079
+ self.extra_lineno -= 1 if r && s == "n" # literal \n, not newline
1047
1080
 
1048
1081
  return r if r
1049
1082
 
@@ -1131,10 +1164,10 @@ class RubyLexer
1131
1164
  when 'q' then
1132
1165
  [:tSTRING_BEG, STR_SQUOTE]
1133
1166
  when 'W' then
1134
- scan(/\s*/)
1167
+ eat_whitespace
1135
1168
  [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
1136
1169
  when 'w' then
1137
- scan(/\s*/)
1170
+ eat_whitespace
1138
1171
  [:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
1139
1172
  when 'x' then
1140
1173
  [:tXSTRING_BEG, STR_XQUOTE]
@@ -1144,10 +1177,10 @@ class RubyLexer
1144
1177
  self.lex_state = :expr_fname
1145
1178
  [:tSYMBEG, STR_SSYM]
1146
1179
  when 'I' then
1147
- scan(/\s*/)
1180
+ eat_whitespace
1148
1181
  [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
1149
1182
  when 'i' then
1150
- scan(/\s*/)
1183
+ eat_whitespace
1151
1184
  [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
1152
1185
  end
1153
1186
 
@@ -1177,7 +1210,7 @@ class RubyLexer
1177
1210
  return :tSTRING_END, nil
1178
1211
  end
1179
1212
 
1180
- space = true if qwords and scan(/\s+/)
1213
+ space = true if qwords and eat_whitespace
1181
1214
 
1182
1215
  if self.string_nest == 0 && scan(/#{term_re}/) then
1183
1216
  if qwords then
@@ -12,12 +12,12 @@ macro
12
12
  SIMPLE_STRING /(#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*/o
13
13
  SSTRING /(\\.|[^\'])*/
14
14
 
15
- INT_DEC /[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0d[0-9_]+)/i
16
- INT_HEX /[+]?0x[a-f0-9_]+/i
17
- INT_BIN /[+]?0b[01_]+/i
18
- INT_OCT /[+]?0o?[0-7_]+|0o/i
19
- FLOAT /[+]?\d[\d_]*\.[\d_]+(e[+-]?[\d_]+)?\b|[+]?[\d_]+e[+-]?[\d_]+\b/i
20
- INT_DEC2 /[+]?\d[0-9_]*(?![e])/i
15
+ INT_DEC /[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)(ri|r|i)?\b|0d[0-9_]+)(ri|r|i)?/i
16
+ INT_HEX /[+]?0x[a-f0-9_]+(ri|r|i)?/i
17
+ INT_BIN /[+]?0b[01_]+(ri|r|i)?/i
18
+ INT_OCT /[+]?0o?[0-7_]+(ri|r|i)?|0o(ri|r|i)?/i
19
+ FLOAT /[+]?\d[\d_]*\.[\d_]+(e[+-]?[\d_]+)?(?:(ri|r|i)\b)?|[+]?[\d_]+e[+-]?[\d_]+(?:(ri|r|i)\b)?/i
20
+ INT_DEC2 /[+]?\d[0-9_]*(?![e])((ri|r|i)\b)?/i
21
21
 
22
22
  NUM_BAD /[+]?0[xbd]\b/i
23
23
  INT_OCT_BAD /[+]?0o?[0-7_]*[89]/i
@@ -15,17 +15,18 @@ class RubyLexer
15
15
  ESC = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]+|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/
16
16
  SIMPLE_STRING = /(#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*/o
17
17
  SSTRING = /(\\.|[^\'])*/
18
- INT_DEC = /[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0d[0-9_]+)/i
19
- INT_HEX = /[+]?0x[a-f0-9_]+/i
20
- INT_BIN = /[+]?0b[01_]+/i
21
- INT_OCT = /[+]?0o?[0-7_]+|0o/i
22
- FLOAT = /[+]?\d[\d_]*\.[\d_]+(e[+-]?[\d_]+)?\b|[+]?[\d_]+e[+-]?[\d_]+\b/i
23
- INT_DEC2 = /[+]?\d[0-9_]*(?![e])/i
18
+ INT_DEC = /[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)(ri|r|i)?\b|0d[0-9_]+)(ri|r|i)?/i
19
+ INT_HEX = /[+]?0x[a-f0-9_]+(ri|r|i)?/i
20
+ INT_BIN = /[+]?0b[01_]+(ri|r|i)?/i
21
+ INT_OCT = /[+]?0o?[0-7_]+(ri|r|i)?|0o(ri|r|i)?/i
22
+ FLOAT = /[+]?\d[\d_]*\.[\d_]+(e[+-]?[\d_]+)?(?:(ri|r|i)\b)?|[+]?[\d_]+e[+-]?[\d_]+(?:(ri|r|i)\b)?/i
23
+ INT_DEC2 = /[+]?\d[0-9_]*(?![e])((ri|r|i)\b)?/i
24
24
  NUM_BAD = /[+]?0[xbd]\b/i
25
25
  INT_OCT_BAD = /[+]?0o?[0-7_]*[89]/i
26
26
  FLOAT_BAD = /[+]?\d[\d_]*_(e|\.)/i
27
27
 
28
- class ScanError < StandardError ; end
28
+ class LexerError < StandardError ; end
29
+ class ScanError < LexerError ; end
29
30
 
30
31
  attr_accessor :filename
31
32
  attr_accessor :ss
@@ -43,7 +44,6 @@ class RubyLexer
43
44
  yield
44
45
  end
45
46
 
46
-
47
47
  def scanner_class
48
48
  StringScanner
49
49
  end unless instance_methods(false).map(&:to_s).include?("scanner_class")
@@ -62,6 +62,12 @@ class RubyLexer
62
62
  end
63
63
  end
64
64
 
65
+ def location
66
+ [
67
+ (filename || "<input>"),
68
+ ].compact.join(":")
69
+ end
70
+
65
71
  def next_token
66
72
  return process_string if lex_strterm
67
73
  self.command_state = self.command_start
@@ -297,16 +303,16 @@ class RubyLexer
297
303
  action { rb_compile_error "Invalid char #{text.inspect} in expression" }
298
304
  else
299
305
  text = ss.string[ss.pos .. -1]
300
- raise ScanError, "can not match (#{state.inspect}): '#{text}'"
306
+ raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
301
307
  end
302
308
  else
303
- raise ScanError, "undefined state: '#{state}'"
309
+ raise ScanError, "undefined state at #{location}: '#{state}'"
304
310
  end # token = case state
305
311
 
306
312
  next unless token # allow functions to trigger redo w/ nil
307
313
  end # while
308
314
 
309
- raise "bad lexical result: #{token.inspect}" unless
315
+ raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless
310
316
  token.nil? || (Array === token && token.size >= 2)
311
317
 
312
318
  # auto-switch state
@@ -27,10 +27,10 @@ token kCLASS kMODULE kDEF kUNDEF kBEGIN kRESCUE kENSURE kEND kIF kUNLESS
27
27
  tWORDS_BEG tQWORDS_BEG tSTRING_DBEG tSTRING_DVAR tSTRING_END
28
28
  tSTRING tSYMBOL tNL tEH tCOLON tCOMMA tSPACE tSEMI tLAMBDA
29
29
  tLAMBEG tDSTAR tCHAR tSYMBOLS_BEG tQSYMBOLS_BEG tSTRING_DEND tUBANG
30
- #if defined(RUBY21) || defined(RUBY22) || defined(RUBY23))
30
+ #if defined(RUBY21) || defined(RUBY22) || defined(RUBY23)
31
31
  tRATIONAL tIMAGINARY
32
32
  #endif
33
- #if defined(RUBY22 || defined(RUBY23))
33
+ #if defined(RUBY22) || defined(RUBY23)
34
34
  tLABEL_END
35
35
  #endif
36
36
  #if defined(RUBY23)
@@ -720,7 +720,7 @@ rule
720
720
  result = new_call(new_call(s(:lit, val[1]), :"**", argl(val[3])), :"-@")
721
721
  }
722
722
  | tUMINUS_NUM tFLOAT tPOW arg
723
- #elif defined(RUBY21) || defined(RUBY22 || defined(RUBY23))
723
+ #elif defined(RUBY21) || defined(RUBY22) || defined(RUBY23)
724
724
  | tUMINUS_NUM simple_numeric tPOW arg
725
725
  #endif
726
726
  {
@@ -1758,12 +1758,11 @@ opt_block_args_tail: tCOMMA block_args_tail
1758
1758
 
1759
1759
  word_list: none
1760
1760
  {
1761
- result = s(:array)
1761
+ result = new_word_list
1762
1762
  }
1763
1763
  | word_list word tSPACE
1764
1764
  {
1765
- word = val[1][0] == :evstr ? s(:dstr, "", val[1]) : val[1]
1766
- result = val[0].dup << word
1765
+ result = val[0].dup << new_word_list_entry(val)
1767
1766
  }
1768
1767
 
1769
1768
  word: string_content
@@ -1783,23 +1782,11 @@ opt_block_args_tail: tCOMMA block_args_tail
1783
1782
 
1784
1783
  symbol_list: none
1785
1784
  {
1786
- result = s(:array)
1785
+ result = new_symbol_list
1787
1786
  }
1788
1787
  | symbol_list word tSPACE
1789
1788
  {
1790
- list, sym, _ = val
1791
-
1792
- case sym[0]
1793
- when :dstr then
1794
- sym[0] = :dsym
1795
- when :str then
1796
- sym = s(:lit, sym.last.to_sym)
1797
- else
1798
- debug20 24
1799
- sym = s(:dsym, "", result)
1800
- end
1801
-
1802
- result = list.dup << sym
1789
+ result = val[0].dup << new_symbol_list_entry(val)
1803
1790
  }
1804
1791
 
1805
1792
  qwords: tQWORDS_BEG tSPACE tSTRING_END
@@ -1822,20 +1809,20 @@ opt_block_args_tail: tCOMMA block_args_tail
1822
1809
 
1823
1810
  qword_list: none
1824
1811
  {
1825
- result = s(:array)
1812
+ result = new_qword_list
1826
1813
  }
1827
1814
  | qword_list tSTRING_CONTENT tSPACE
1828
1815
  {
1829
- result = val[0].dup << s(:str, val[1])
1816
+ result = val[0].dup << new_qword_list_entry(val)
1830
1817
  }
1831
1818
 
1832
1819
  qsym_list: none
1833
1820
  {
1834
- result = s(:array)
1821
+ result = new_qsym_list
1835
1822
  }
1836
1823
  | qsym_list tSTRING_CONTENT tSPACE
1837
1824
  {
1838
- result = val[0].dup << s(:lit, val[1].to_sym)
1825
+ result = val[0].dup << new_qsym_list_entry(val)
1839
1826
  }
1840
1827
 
1841
1828
  string_contents: none
@@ -1883,10 +1870,10 @@ regexp_contents: none
1883
1870
  }
1884
1871
  | tSTRING_DBEG
1885
1872
  {
1886
- result = [lexer.lex_strterm,
1887
- lexer.brace_nest,
1873
+ result = [lexer.lex_strterm,
1874
+ lexer.brace_nest,
1888
1875
  lexer.string_nest, # TODO: remove
1889
- lexer.cond.store,
1876
+ lexer.cond.store,
1890
1877
  lexer.cmdarg.store,
1891
1878
  lexer.lex_state,
1892
1879
  ]
@@ -1973,7 +1960,7 @@ regexp_contents: none
1973
1960
  numeric: tINTEGER
1974
1961
  | tFLOAT
1975
1962
  | tUMINUS_NUM tINTEGER =tLOWEST
1976
- #elif defined(RUBY21) || defined(RUBY22 || defined(RUBY23))
1963
+ #elif defined(RUBY21) || defined(RUBY22) || defined(RUBY23)
1977
1964
  numeric: simple_numeric
1978
1965
  | tUMINUS_NUM simple_numeric
1979
1966
  #endif
@@ -1987,7 +1974,7 @@ regexp_contents: none
1987
1974
  #endif
1988
1975
  }
1989
1976
 
1990
- #if defined(RUBY21) || defined(RUBY22) || defined(RUBY23))
1977
+ #if defined(RUBY21) || defined(RUBY22) || defined(RUBY23)
1991
1978
  simple_numeric: tINTEGER
1992
1979
  | tFLOAT
1993
1980
  | tRATIONAL
@@ -2080,15 +2067,15 @@ keyword_variable: kNIL { result = s(:nil) }
2080
2067
  {
2081
2068
  result = args val
2082
2069
  }
2083
- | f_kwarg opt_f_block_arg
2070
+ | f_kwarg opt_f_block_arg
2084
2071
  {
2085
2072
  result = args val
2086
2073
  }
2087
- | f_kwrest opt_f_block_arg
2074
+ | f_kwrest opt_f_block_arg
2088
2075
  {
2089
2076
  result = args val
2090
2077
  }
2091
- | f_block_arg
2078
+ | f_block_arg
2092
2079
 
2093
2080
  opt_args_tail: tCOMMA args_tail
2094
2081
  {
@@ -2186,7 +2173,7 @@ keyword_variable: kNIL { result = s(:nil) }
2186
2173
  result = identifier
2187
2174
  }
2188
2175
 
2189
- #if defined(RUBY22) || defined(RUBY23))
2176
+ #if defined(RUBY22) || defined(RUBY23)
2190
2177
  f_arg_asgn: f_norm_arg
2191
2178
 
2192
2179
  f_arg_item: f_arg_asgn