brakeman 4.5.0 → 4.5.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of brakeman might be problematic. Click here for more details.

Files changed (159) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +15 -0
  3. data/README.md +6 -6
  4. data/bundle/load.rb +3 -3
  5. data/bundle/ruby/2.5.0/gems/{highline-1.7.10 → highline-2.0.2}/AUTHORS +0 -0
  6. data/bundle/ruby/2.5.0/gems/{highline-1.7.10 → highline-2.0.2}/COPYING +0 -0
  7. data/bundle/ruby/2.5.0/gems/{highline-1.7.10 → highline-2.0.2}/Changelog.md +211 -15
  8. data/bundle/ruby/2.5.0/gems/highline-2.0.2/Gemfile +22 -0
  9. data/bundle/ruby/2.5.0/gems/{highline-1.7.10 → highline-2.0.2}/LICENSE +0 -0
  10. data/bundle/ruby/2.5.0/gems/highline-2.0.2/README.md +202 -0
  11. data/bundle/ruby/2.5.0/gems/{highline-1.7.10 → highline-2.0.2}/TODO +0 -0
  12. data/bundle/ruby/2.5.0/gems/highline-2.0.2/appveyor.yml +37 -0
  13. data/bundle/ruby/2.5.0/gems/highline-2.0.2/highline.gemspec +35 -0
  14. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline.rb +650 -0
  15. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/builtin_styles.rb +129 -0
  16. data/bundle/ruby/2.5.0/gems/{highline-1.7.10 → highline-2.0.2}/lib/highline/color_scheme.rb +49 -32
  17. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/compatibility.rb +23 -0
  18. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/custom_errors.rb +57 -0
  19. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/import.rb +48 -0
  20. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/io_console_compatible.rb +37 -0
  21. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/list.rb +177 -0
  22. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/list_renderer.rb +261 -0
  23. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/menu.rb +576 -0
  24. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/menu/item.rb +32 -0
  25. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/paginator.rb +52 -0
  26. data/bundle/ruby/2.5.0/gems/{highline-1.7.10 → highline-2.0.2}/lib/highline/question.rb +281 -131
  27. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/question/answer_converter.rb +103 -0
  28. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/question_asker.rb +150 -0
  29. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/simulate.rb +59 -0
  30. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/statement.rb +88 -0
  31. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/string.rb +36 -0
  32. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/string_extensions.rb +130 -0
  33. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/style.rb +325 -0
  34. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/template_renderer.rb +62 -0
  35. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/terminal.rb +190 -0
  36. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/terminal/io_console.rb +36 -0
  37. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/terminal/ncurses.rb +38 -0
  38. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/terminal/unix_stty.rb +51 -0
  39. data/bundle/ruby/2.5.0/gems/{highline-1.7.10 → highline-2.0.2}/lib/highline/version.rb +3 -1
  40. data/bundle/ruby/2.5.0/gems/highline-2.0.2/lib/highline/wrapper.rb +53 -0
  41. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/History.rdoc +32 -0
  42. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/Manifest.txt +0 -0
  43. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/README.rdoc +0 -0
  44. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/compare/normalize.rb +0 -0
  45. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/debugging.md +0 -0
  46. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/rp_extensions.rb +1 -1
  47. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/rp_stringscanner.rb +0 -0
  48. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby20_parser.rb +2427 -2432
  49. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby20_parser.y +32 -29
  50. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby21_parser.rb +2101 -2109
  51. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby21_parser.y +32 -29
  52. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby22_parser.rb +2080 -2095
  53. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby22_parser.y +32 -29
  54. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0/lib/ruby25_parser.rb → ruby_parser-3.13.1/lib/ruby23_parser.rb} +2339 -2333
  55. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby23_parser.y +32 -29
  56. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby24_parser.rb +2347 -2335
  57. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby24_parser.y +32 -23
  58. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0/lib/ruby23_parser.rb → ruby_parser-3.13.1/lib/ruby25_parser.rb} +2349 -2337
  59. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby25_parser.y +32 -23
  60. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby26_parser.rb +2351 -2338
  61. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby26_parser.y +32 -23
  62. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby_lexer.rb +253 -161
  63. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby_lexer.rex +25 -25
  64. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby_lexer.rex.rb +68 -26
  65. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby_parser.rb +3 -1
  66. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby_parser.yy +34 -23
  67. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/lib/ruby_parser_extras.rb +64 -43
  68. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/tools/munge.rb +2 -1
  69. data/bundle/ruby/2.5.0/gems/{ruby_parser-3.13.0 → ruby_parser-3.13.1}/tools/ripper.rb +6 -1
  70. data/bundle/ruby/2.5.0/gems/{unicode-display_width-1.5.0 → unicode-display_width-1.6.0}/CHANGELOG.md +4 -0
  71. data/bundle/ruby/2.5.0/gems/{unicode-display_width-1.5.0 → unicode-display_width-1.6.0}/MIT-LICENSE.txt +0 -0
  72. data/bundle/ruby/2.5.0/gems/{unicode-display_width-1.5.0 → unicode-display_width-1.6.0}/README.md +1 -1
  73. data/bundle/ruby/2.5.0/gems/{unicode-display_width-1.5.0 → unicode-display_width-1.6.0}/data/display_width.marshal.gz +0 -0
  74. data/bundle/ruby/2.5.0/gems/{unicode-display_width-1.5.0 → unicode-display_width-1.6.0}/lib/unicode/display_width.rb +0 -0
  75. data/bundle/ruby/2.5.0/gems/{unicode-display_width-1.5.0 → unicode-display_width-1.6.0}/lib/unicode/display_width/constants.rb +2 -2
  76. data/bundle/ruby/2.5.0/gems/{unicode-display_width-1.5.0 → unicode-display_width-1.6.0}/lib/unicode/display_width/index.rb +0 -0
  77. data/bundle/ruby/2.5.0/gems/{unicode-display_width-1.5.0 → unicode-display_width-1.6.0}/lib/unicode/display_width/no_string_ext.rb +0 -0
  78. data/bundle/ruby/2.5.0/gems/{unicode-display_width-1.5.0 → unicode-display_width-1.6.0}/lib/unicode/display_width/string_ext.rb +0 -0
  79. data/lib/brakeman.rb +7 -0
  80. data/lib/brakeman/app_tree.rb +34 -22
  81. data/lib/brakeman/checks.rb +7 -7
  82. data/lib/brakeman/checks/base_check.rb +9 -9
  83. data/lib/brakeman/checks/check_cross_site_scripting.rb +5 -0
  84. data/lib/brakeman/checks/check_default_routes.rb +5 -0
  85. data/lib/brakeman/checks/check_deserialize.rb +52 -0
  86. data/lib/brakeman/checks/check_dynamic_finders.rb +1 -1
  87. data/lib/brakeman/checks/check_force_ssl.rb +27 -0
  88. data/lib/brakeman/checks/check_json_parsing.rb +5 -0
  89. data/lib/brakeman/checks/check_link_to_href.rb +6 -1
  90. data/lib/brakeman/checks/check_mail_to.rb +1 -1
  91. data/lib/brakeman/checks/check_model_attr_accessible.rb +1 -1
  92. data/lib/brakeman/checks/check_model_attributes.rb +12 -50
  93. data/lib/brakeman/checks/check_model_serialize.rb +1 -1
  94. data/lib/brakeman/checks/check_nested_attributes_bypass.rb +3 -3
  95. data/lib/brakeman/checks/check_secrets.rb +1 -1
  96. data/lib/brakeman/checks/check_session_settings.rb +10 -10
  97. data/lib/brakeman/checks/check_simple_format.rb +5 -0
  98. data/lib/brakeman/checks/check_skip_before_filter.rb +1 -1
  99. data/lib/brakeman/checks/check_sql.rb +15 -17
  100. data/lib/brakeman/checks/check_validation_regex.rb +1 -1
  101. data/lib/brakeman/file_parser.rb +6 -8
  102. data/lib/brakeman/file_path.rb +71 -0
  103. data/lib/brakeman/options.rb +7 -0
  104. data/lib/brakeman/parsers/template_parser.rb +3 -3
  105. data/lib/brakeman/processor.rb +3 -4
  106. data/lib/brakeman/processors/alias_processor.rb +12 -6
  107. data/lib/brakeman/processors/base_processor.rb +8 -7
  108. data/lib/brakeman/processors/controller_alias_processor.rb +10 -7
  109. data/lib/brakeman/processors/controller_processor.rb +5 -9
  110. data/lib/brakeman/processors/haml_template_processor.rb +5 -0
  111. data/lib/brakeman/processors/lib/module_helper.rb +8 -8
  112. data/lib/brakeman/processors/lib/processor_helper.rb +3 -3
  113. data/lib/brakeman/processors/lib/rails2_config_processor.rb +3 -3
  114. data/lib/brakeman/processors/lib/rails2_route_processor.rb +2 -2
  115. data/lib/brakeman/processors/lib/rails3_config_processor.rb +3 -3
  116. data/lib/brakeman/processors/lib/rails3_route_processor.rb +2 -2
  117. data/lib/brakeman/processors/lib/render_helper.rb +2 -2
  118. data/lib/brakeman/processors/lib/render_path.rb +18 -1
  119. data/lib/brakeman/processors/library_processor.rb +5 -5
  120. data/lib/brakeman/processors/model_processor.rb +4 -5
  121. data/lib/brakeman/processors/output_processor.rb +5 -0
  122. data/lib/brakeman/processors/template_alias_processor.rb +4 -5
  123. data/lib/brakeman/processors/template_processor.rb +4 -4
  124. data/lib/brakeman/report.rb +3 -3
  125. data/lib/brakeman/report/ignore/config.rb +2 -3
  126. data/lib/brakeman/report/ignore/interactive.rb +2 -2
  127. data/lib/brakeman/report/pager.rb +1 -0
  128. data/lib/brakeman/report/report_base.rb +51 -6
  129. data/lib/brakeman/report/report_codeclimate.rb +3 -3
  130. data/lib/brakeman/report/report_hash.rb +1 -1
  131. data/lib/brakeman/report/report_html.rb +2 -2
  132. data/lib/brakeman/report/report_json.rb +1 -24
  133. data/lib/brakeman/report/report_table.rb +20 -4
  134. data/lib/brakeman/report/report_tabs.rb +1 -1
  135. data/lib/brakeman/report/report_text.rb +2 -2
  136. data/lib/brakeman/rescanner.rb +9 -12
  137. data/lib/brakeman/scanner.rb +19 -14
  138. data/lib/brakeman/tracker.rb +4 -4
  139. data/lib/brakeman/tracker/collection.rb +4 -3
  140. data/lib/brakeman/tracker/config.rb +6 -0
  141. data/lib/brakeman/util.rb +1 -147
  142. data/lib/brakeman/version.rb +1 -1
  143. data/lib/brakeman/warning.rb +23 -13
  144. data/lib/brakeman/warning_codes.rb +1 -0
  145. data/lib/ruby_parser/bm_sexp_processor.rb +1 -0
  146. metadata +78 -61
  147. data/bundle/ruby/2.5.0/gems/highline-1.7.10/Gemfile +0 -11
  148. data/bundle/ruby/2.5.0/gems/highline-1.7.10/INSTALL +0 -59
  149. data/bundle/ruby/2.5.0/gems/highline-1.7.10/README.rdoc +0 -74
  150. data/bundle/ruby/2.5.0/gems/highline-1.7.10/highline.gemspec +0 -37
  151. data/bundle/ruby/2.5.0/gems/highline-1.7.10/lib/highline.rb +0 -1048
  152. data/bundle/ruby/2.5.0/gems/highline-1.7.10/lib/highline/compatibility.rb +0 -16
  153. data/bundle/ruby/2.5.0/gems/highline-1.7.10/lib/highline/import.rb +0 -41
  154. data/bundle/ruby/2.5.0/gems/highline-1.7.10/lib/highline/menu.rb +0 -381
  155. data/bundle/ruby/2.5.0/gems/highline-1.7.10/lib/highline/simulate.rb +0 -48
  156. data/bundle/ruby/2.5.0/gems/highline-1.7.10/lib/highline/string_extensions.rb +0 -111
  157. data/bundle/ruby/2.5.0/gems/highline-1.7.10/lib/highline/style.rb +0 -192
  158. data/bundle/ruby/2.5.0/gems/highline-1.7.10/lib/highline/system_extensions.rb +0 -254
  159. data/bundle/ruby/2.5.0/gems/highline-1.7.10/setup.rb +0 -1360
@@ -51,7 +51,7 @@ preclow
51
51
  rule
52
52
 
53
53
  program: {
54
- self.lexer.lex_state = :expr_beg
54
+ self.lexer.lex_state = EXPR_BEG
55
55
  }
56
56
  top_compstmt
57
57
  {
@@ -153,7 +153,7 @@ rule
153
153
 
154
154
  stmt: kALIAS fitem
155
155
  {
156
- lexer.lex_state = :expr_fname
156
+ lexer.lex_state = EXPR_FNAME
157
157
  result = self.lexer.lineno
158
158
  }
159
159
  fitem
@@ -613,14 +613,14 @@ rule
613
613
  fname: tIDENTIFIER | tCONSTANT | tFID
614
614
  | op
615
615
  {
616
- lexer.lex_state = :expr_end
616
+ lexer.lex_state = EXPR_END
617
617
  result = val[0]
618
618
  }
619
619
 
620
620
  | reswords
621
621
  {
622
622
  (sym, _line), = val
623
- lexer.lex_state = :expr_end
623
+ lexer.lex_state = EXPR_END
624
624
  result = sym
625
625
  }
626
626
 
@@ -639,7 +639,7 @@ rule
639
639
  |
640
640
  undef_list tCOMMA
641
641
  {
642
- lexer.lex_state = :expr_fname
642
+ lexer.lex_state = EXPR_FNAME
643
643
  }
644
644
  fitem
645
645
  {
@@ -690,13 +690,21 @@ rule
690
690
  }
691
691
  | primary_value tCOLON2 tCONSTANT tOP_ASGN arg_rhs
692
692
  {
693
- # TODO: assignment
694
- raise "not yet: %p" % [val]
693
+ lhs1, _, lhs2, op, rhs = val
694
+
695
+ lhs = s(:colon2, lhs1, lhs2.to_sym).line lhs1.line
696
+ result = new_const_op_asgn [lhs, op, rhs]
697
+ }
698
+ | tCOLON3 tCONSTANT
699
+ {
700
+ result = self.lexer.lineno
695
701
  }
696
- | tCOLON3 tCONSTANT tOP_ASGN arg_rhs
702
+ tOP_ASGN arg_rhs
697
703
  {
698
- # TODO: assignment
699
- raise "not yet: %p" % [val]
704
+ _, lhs, line, op, rhs = val
705
+
706
+ lhs = s(:colon3, lhs.to_sym).line line
707
+ result = new_const_op_asgn [lhs, op, rhs]
700
708
  }
701
709
  | backref tOP_ASGN arg_rhs
702
710
  {
@@ -1037,7 +1045,7 @@ rule
1037
1045
  }
1038
1046
  | tLPAREN_ARG rparen
1039
1047
  {
1040
- # TODO: lex_state = :expr_endarg in between
1048
+ # TODO: lex_state = EXPR_ENDARG in between
1041
1049
  debug20 13, val, result
1042
1050
  }
1043
1051
  | tLPAREN_ARG
@@ -1048,7 +1056,7 @@ rule
1048
1056
  }
1049
1057
  stmt
1050
1058
  {
1051
- lexer.lex_state = :expr_endarg
1059
+ lexer.lex_state = EXPR_ENDARG
1052
1060
  }
1053
1061
  rparen
1054
1062
  {
@@ -1248,13 +1256,13 @@ rule
1248
1256
  | k_def singleton dot_or_colon
1249
1257
  {
1250
1258
  self.comments.push self.lexer.comments
1251
- lexer.lex_state = :expr_fname
1259
+ lexer.lex_state = EXPR_FNAME
1252
1260
  }
1253
1261
  fname
1254
1262
  {
1255
1263
  self.in_single += 1
1256
1264
  self.env.extend
1257
- lexer.lex_state = :expr_endfn # force for args
1265
+ lexer.lex_state = EXPR_ENDFN # force for args
1258
1266
  result = [lexer.lineno, self.lexer.cmdarg.stack.dup]
1259
1267
  lexer.cmdarg.stack.replace [false]
1260
1268
  }
@@ -1924,7 +1932,7 @@ regexp_contents: none
1924
1932
  result = lexer.lex_strterm
1925
1933
 
1926
1934
  lexer.lex_strterm = nil
1927
- lexer.lex_state = :expr_beg
1935
+ lexer.lex_state = EXPR_BEG
1928
1936
  }
1929
1937
  string_dvar
1930
1938
  {
@@ -1945,7 +1953,7 @@ regexp_contents: none
1945
1953
  lexer.brace_nest = 0
1946
1954
  lexer.string_nest = 0
1947
1955
 
1948
- lexer.lex_state = :expr_beg
1956
+ lexer.lex_state = EXPR_BEG
1949
1957
  }
1950
1958
  compstmt
1951
1959
  tSTRING_DEND
@@ -1986,7 +1994,7 @@ regexp_contents: none
1986
1994
 
1987
1995
  symbol: tSYMBEG sym
1988
1996
  {
1989
- lexer.lex_state = :expr_end
1997
+ lexer.lex_state = EXPR_END
1990
1998
  result = val[1].to_sym
1991
1999
  }
1992
2000
  | tSYMBOL
@@ -1998,7 +2006,7 @@ regexp_contents: none
1998
2006
 
1999
2007
  dsym: tSYMBEG xstring_contents tSTRING_END
2000
2008
  {
2001
- lexer.lex_state = :expr_end
2009
+ lexer.lex_state = EXPR_END
2002
2010
  result = val[1]
2003
2011
 
2004
2012
  result ||= s(:str, "")
@@ -2074,7 +2082,7 @@ keyword_variable: kNIL { result = s(:nil) }
2074
2082
 
2075
2083
  superclass: tLT
2076
2084
  {
2077
- lexer.lex_state = :expr_beg
2085
+ lexer.lex_state = EXPR_BEG
2078
2086
  lexer.command_start = true
2079
2087
  }
2080
2088
  expr_value term
@@ -2089,13 +2097,13 @@ keyword_variable: kNIL { result = s(:nil) }
2089
2097
  f_arglist: tLPAREN2 f_args rparen
2090
2098
  {
2091
2099
  result = val[1]
2092
- self.lexer.lex_state = :expr_beg
2100
+ self.lexer.lex_state = EXPR_BEG
2093
2101
  self.lexer.command_start = true
2094
2102
  }
2095
2103
  | {
2096
2104
  result = self.in_kwarg
2097
2105
  self.in_kwarg = true
2098
- # TODO: self.lexer.lex_state |= :expr_label
2106
+ self.lexer.lex_state |= EXPR_LABEL
2099
2107
  }
2100
2108
  f_args term
2101
2109
  {
@@ -2103,7 +2111,7 @@ keyword_variable: kNIL { result = s(:nil) }
2103
2111
 
2104
2112
  self.in_kwarg = kwarg
2105
2113
  result = args
2106
- lexer.lex_state = :expr_beg
2114
+ lexer.lex_state = EXPR_BEG
2107
2115
  lexer.command_start = true
2108
2116
  }
2109
2117
 
@@ -2384,7 +2392,7 @@ keyword_variable: kNIL { result = s(:nil) }
2384
2392
  singleton: var_ref
2385
2393
  | tLPAREN2
2386
2394
  {
2387
- lexer.lex_state = :expr_beg
2395
+ lexer.lex_state = EXPR_BEG
2388
2396
  }
2389
2397
  expr rparen
2390
2398
  {
@@ -2461,6 +2469,7 @@ end
2461
2469
 
2462
2470
  require "ruby_lexer"
2463
2471
  require "ruby_parser_extras"
2472
+ include RubyLexer::State::Values
2464
2473
 
2465
2474
  # :stopdoc:
2466
2475
 
@@ -34,9 +34,104 @@ class RubyLexer
34
34
  STR_SSYM = STR_FUNC_SYMBOL
35
35
  STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
36
36
 
37
- EXPR_BEG_ANY = [:expr_beg, :expr_mid, :expr_class ]
38
- EXPR_ARG_ANY = [:expr_arg, :expr_cmdarg, ]
39
- EXPR_END_ANY = [:expr_end, :expr_endarg, :expr_endfn]
37
+ class State
38
+ attr_accessor :n
39
+
40
+ def initialize o
41
+ raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
42
+
43
+ self.n = o
44
+ end
45
+
46
+ def == o
47
+ o.class == self.class && o.n == self.n
48
+ end
49
+
50
+ def =~ v
51
+ (self.n & v.n) != 0
52
+ end
53
+
54
+ def | v
55
+ self.class.new(self.n | v.n)
56
+ end
57
+
58
+ def inspect
59
+ return "EXPR_NONE" if n.zero?
60
+ NAMES.map { |v,k| k if self =~ v }.compact.join "|"
61
+ end
62
+
63
+ module Values
64
+ EXPR_NONE = State.new 0x0
65
+ EXPR_BEG = State.new 0x1
66
+ EXPR_END = State.new 0x2
67
+ EXPR_ENDARG = State.new 0x4
68
+ EXPR_ENDFN = State.new 0x8
69
+ EXPR_ARG = State.new 0x10
70
+ EXPR_CMDARG = State.new 0x20
71
+ EXPR_MID = State.new 0x40
72
+ EXPR_FNAME = State.new 0x80
73
+ EXPR_DOT = State.new 0x100
74
+ EXPR_CLASS = State.new 0x200
75
+ EXPR_LABEL = State.new 0x400
76
+ EXPR_LABELED = State.new 0x800
77
+ EXPR_FITEM = State.new 0x1000
78
+
79
+ EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
80
+ EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
81
+ EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
82
+
83
+ # extra fake lex_state names to make things a bit cleaner
84
+
85
+ EXPR_LAB = EXPR_ARG|EXPR_LABELED
86
+ EXPR_NUM = EXPR_END|EXPR_ENDARG
87
+ EXPR_PAR = EXPR_BEG|EXPR_LABEL
88
+ EXPR_PAD = EXPR_BEG|EXPR_LABELED
89
+ end
90
+
91
+ include Values
92
+
93
+ NAMES = {
94
+ EXPR_NONE => "EXPR_NONE",
95
+ EXPR_BEG => "EXPR_BEG",
96
+ EXPR_END => "EXPR_END",
97
+ EXPR_ENDARG => "EXPR_ENDARG",
98
+ EXPR_ENDFN => "EXPR_ENDFN",
99
+ EXPR_ARG => "EXPR_ARG",
100
+ EXPR_CMDARG => "EXPR_CMDARG",
101
+ EXPR_MID => "EXPR_MID",
102
+ EXPR_FNAME => "EXPR_FNAME",
103
+ EXPR_DOT => "EXPR_DOT",
104
+ EXPR_CLASS => "EXPR_CLASS",
105
+ EXPR_LABEL => "EXPR_LABEL",
106
+ EXPR_LABELED => "EXPR_LABELED",
107
+ EXPR_FITEM => "EXPR_FITEM",
108
+ }
109
+ end
110
+
111
+ include State::Values
112
+
113
+ if $DEBUG then
114
+ def lex_state= o
115
+ return if @lex_state == o
116
+ raise ArgumentError, "bad state: %p" % [o] unless State === o
117
+ if ENV["V"] then
118
+ c = caller[0]
119
+ c = caller[1] if c =~ /\b(expr_)?result\b/
120
+ c = caller[2] if c =~ /\b(expr_)?result\b/
121
+ warn "lex_state: %p -> %p from %s" % [lex_state, o, c.clean_caller]
122
+ else
123
+ warn "lex_state: %p -> %p" % [lex_state, o]
124
+ end
125
+ @lex_state = o
126
+ end
127
+ else
128
+ def lex_state= o
129
+ raise ArgumentError, "bad state: %p" % [o] unless State === o
130
+ @lex_state = o
131
+ end
132
+ end
133
+
134
+ attr_reader :lex_state
40
135
 
41
136
  ESCAPES = {
42
137
  "a" => "\007",
@@ -90,7 +185,6 @@ class RubyLexer
90
185
  # Additional context surrounding tokens that both the lexer and
91
186
  # grammar use.
92
187
 
93
- attr_accessor :lex_state
94
188
  attr_accessor :lex_strterm
95
189
  attr_accessor :lpar_beg
96
190
  attr_accessor :paren_nest
@@ -99,24 +193,14 @@ class RubyLexer
99
193
  attr_accessor :string_buffer
100
194
  attr_accessor :string_nest
101
195
 
102
- if $DEBUG then
103
- alias lex_state= lex_state=
104
- def lex_state=o
105
- return if @lex_state == o
106
- c = caller.first
107
- c = caller[1] if c =~ /\bresult\b/
108
- warn "lex_state: %p -> %p from %s" % [@lex_state, o, c.clean_caller]
109
- @lex_state = o
110
- end
111
- end
112
-
113
196
  # Last token read via next_token.
114
197
  attr_accessor :token
115
198
 
116
199
  attr_writer :comments
117
200
 
118
201
  def initialize _ = nil
119
- @lex_state = :expr_none
202
+ @lex_state = nil # remove one warning under $DEBUG
203
+ self.lex_state = EXPR_NONE
120
204
 
121
205
  self.cond = RubyParserStuff::StackState.new(:cond, $DEBUG)
122
206
  self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
@@ -129,7 +213,7 @@ class RubyLexer
129
213
  end
130
214
 
131
215
  def arg_state
132
- in_arg_state? ? :expr_arg : :expr_beg
216
+ is_after_operator? ? EXPR_ARG : EXPR_BEG
133
217
  end
134
218
 
135
219
  def beginning_of_line?
@@ -148,17 +232,17 @@ class RubyLexer
148
232
  end
149
233
 
150
234
  def expr_dot?
151
- lex_state == :expr_dot
235
+ lex_state =~ EXPR_DOT
152
236
  end
153
237
 
154
- def expr_fname?
155
- lex_state == :expr_fname
238
+ def expr_fname? # REFACTOR
239
+ lex_state =~ EXPR_FNAME
156
240
  end
157
241
 
158
242
  def expr_result token, text
159
243
  cond.push false
160
244
  cmdarg.push false
161
- result :expr_beg, token, text
245
+ result EXPR_BEG, token, text
162
246
  end
163
247
 
164
248
  def heredoc here # TODO: rewrite / remove
@@ -214,7 +298,12 @@ class RubyLexer
214
298
 
215
299
  self.lex_strterm = [:heredoc, eos, func, last_line]
216
300
 
217
- string_content = string_buffer.join.delete("\r")
301
+ string_content = begin
302
+ s = string_buffer.join
303
+ s.delete "\r"
304
+ rescue ArgumentError
305
+ s.b.delete("\r").force_encoding Encoding::UTF_8
306
+ end
218
307
 
219
308
  string_content = heredoc_dedent(string_content) if content_indent && ruby23plus?
220
309
 
@@ -311,16 +400,12 @@ class RubyLexer
311
400
  end
312
401
  end
313
402
 
314
- def in_fname?
315
- in_lex_state? :expr_fname
316
- end
317
-
318
- def in_arg_state? # TODO: rename is_after_operator?
319
- in_lex_state? :expr_fname, :expr_dot
403
+ def in_fname? # REFACTOR
404
+ lex_state =~ EXPR_FNAME
320
405
  end
321
406
 
322
- def in_lex_state?(*states)
323
- states.include? lex_state
407
+ def is_after_operator?
408
+ lex_state =~ EXPR_FNAME|EXPR_DOT
324
409
  end
325
410
 
326
411
  def int_with_base base
@@ -329,27 +414,26 @@ class RubyLexer
329
414
  text = matched
330
415
  case
331
416
  when text.end_with?('ri')
332
- return result(:expr_end, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base))))
417
+ return result(EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base))))
333
418
  when text.end_with?('r')
334
- return result(:expr_end, :tRATIONAL, Rational(text.chop.to_i(base)))
419
+ return result(EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base)))
335
420
  when text.end_with?('i')
336
- return result(:expr_end, :tIMAGINARY, Complex(0, text.chop.to_i(base)))
421
+ return result(EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base)))
337
422
  else
338
- return result(:expr_end, :tINTEGER, text.to_i(base))
423
+ return result(EXPR_NUM, :tINTEGER, text.to_i(base))
339
424
  end
340
425
  end
341
426
 
342
427
  def is_arg?
343
- in_lex_state?(*EXPR_ARG_ANY)
428
+ lex_state =~ EXPR_ARG_ANY
344
429
  end
345
430
 
346
431
  def is_beg?
347
- # TODO: in_lex_state?(*EXPR_BEG_ANY) || lex_state == [:expr_arg, :expr_labeled]
348
- in_lex_state?(*EXPR_BEG_ANY, :expr_value, :expr_labeled)
432
+ lex_state =~ EXPR_BEG_ANY || lex_state == EXPR_LAB # yes, == EXPR_LAB
349
433
  end
350
434
 
351
435
  def is_end?
352
- in_lex_state?(*EXPR_END_ANY)
436
+ lex_state =~ EXPR_END_ANY
353
437
  end
354
438
 
355
439
  def lvar_defined? id
@@ -357,13 +441,12 @@ class RubyLexer
357
441
  self.parser.env[id.to_sym] == :lvar
358
442
  end
359
443
 
360
-
361
444
  def ruby22_label?
362
445
  ruby22plus? and is_label_possible?
363
446
  end
364
447
 
365
448
  def is_label_possible?
366
- (in_lex_state?(:expr_beg, :expr_endfn) && !cmd_state) || is_arg?
449
+ (lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
367
450
  end
368
451
 
369
452
  def is_label_suffix?
@@ -390,7 +473,7 @@ class RubyLexer
390
473
  token = if is_arg? && space_seen && !check(/\s/) then
391
474
  warning("`&' interpreted as argument prefix")
392
475
  :tAMPER
393
- elsif in_lex_state? :expr_beg, :expr_mid then
476
+ elsif lex_state =~ EXPR_BEG|EXPR_MID then
394
477
  :tAMPER
395
478
  else
396
479
  :tAMPER2
@@ -402,7 +485,7 @@ class RubyLexer
402
485
  def process_backref text
403
486
  token = ss[1].to_sym
404
487
  # TODO: can't do lineno hack w/ symbol
405
- result :expr_end, :tBACK_REF, token
488
+ result EXPR_END, :tBACK_REF, token
406
489
  end
407
490
 
408
491
  def process_begin text
@@ -427,17 +510,17 @@ class RubyLexer
427
510
  case matched
428
511
  when "}" then
429
512
  self.brace_nest -= 1
430
- self.lex_state = :expr_endarg # TODO: :expr_end ? Look at 2.6
513
+ self.lex_state = EXPR_ENDARG # TODO: EXPR_END ? Look at 2.6
431
514
 
432
515
  return :tSTRING_DEND, matched if brace_nest < 0
433
516
  return :tRCURLY, matched
434
517
  when "]" then
435
518
  self.paren_nest -= 1
436
- self.lex_state = :expr_endarg
519
+ self.lex_state = EXPR_ENDARG
437
520
  return :tRBRACK, matched
438
521
  when ")" then
439
522
  self.paren_nest -= 1
440
- self.lex_state = :expr_endfn
523
+ self.lex_state = EXPR_ENDFN
441
524
  return :tRPAREN, matched
442
525
  else
443
526
  raise "Unknown bracing: #{matched.inspect}"
@@ -447,7 +530,7 @@ class RubyLexer
447
530
  def process_colon1 text
448
531
  # ?: / then / when
449
532
  if is_end? || check(/\s/) then
450
- return result :expr_beg, :tCOLON, text
533
+ return result EXPR_BEG, :tCOLON, text
451
534
  end
452
535
 
453
536
  case
@@ -457,14 +540,14 @@ class RubyLexer
457
540
  string STR_DSYM
458
541
  end
459
542
 
460
- result :expr_fname, :tSYMBEG, text
543
+ result EXPR_FNAME, :tSYMBEG, text
461
544
  end
462
545
 
463
546
  def process_colon2 text
464
- if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
465
- result :expr_beg, :tCOLON3, text
547
+ if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
548
+ result EXPR_BEG, :tCOLON3, text
466
549
  else
467
- result :expr_dot, :tCOLON2, text
550
+ result EXPR_DOT, :tCOLON2, text
468
551
  end
469
552
  end
470
553
 
@@ -479,21 +562,23 @@ class RubyLexer
479
562
  return expr_result(:tLAMBEG, "{")
480
563
  end
481
564
 
482
- token = case lex_state
483
- when :expr_labeled then
565
+ token = case
566
+ when lex_state =~ EXPR_LABELED then
484
567
  :tLBRACE # hash
485
- when *EXPR_ARG_ANY, :expr_end, :expr_endfn then
486
- :tLCURLY # block (primary)
487
- when :expr_endarg
568
+ when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
569
+ :tLCURLY # block (primary) '{' in parse.y
570
+ when lex_state =~ EXPR_ENDARG then
488
571
  :tLBRACE_ARG # block (expr)
489
572
  else
490
573
  :tLBRACE # hash
491
574
  end
492
575
 
493
- # TODO: self.lex_state |= :expr_label if token != :tLBRACE_ARG
576
+ state = token == :tLBRACE_ARG ? EXPR_BEG : EXPR_PAR
494
577
  self.command_start = true if token != :tLBRACE
495
578
 
496
- return expr_result(token, "{")
579
+ cond.push false
580
+ cmdarg.push false
581
+ result state, token, text
497
582
  end
498
583
 
499
584
  def process_float text
@@ -501,45 +586,45 @@ class RubyLexer
501
586
 
502
587
  case
503
588
  when text.end_with?('ri')
504
- return result(:expr_end, :tIMAGINARY, Complex(0, Rational(text.chop.chop)))
505
- when text.end_with?('r')
506
- return result(:expr_end, :tRATIONAL, Rational(text.chop))
589
+ return result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
507
590
  when text.end_with?('i')
508
- return result(:expr_end, :tIMAGINARY, Complex(0, text.chop.to_f))
591
+ return result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
592
+ when text.end_with?('r')
593
+ return result EXPR_NUM, :tRATIONAL, Rational(text.chop)
509
594
  else
510
- return result(:expr_end, :tFLOAT, text.to_f)
595
+ return result EXPR_NUM, :tFLOAT, text.to_f
511
596
  end
512
597
  end
513
598
 
514
599
  def process_gvar text
515
600
  text.lineno = self.lineno
516
- result(:expr_end, :tGVAR, text)
601
+ result EXPR_END, :tGVAR, text
517
602
  end
518
603
 
519
604
  def process_gvar_oddity text
520
- return result :expr_end, "$", "$" if text == "$" # TODO: wtf is this?
605
+ return result EXPR_END, "$", "$" if text == "$" # TODO: wtf is this?
521
606
  rb_compile_error "#{text.inspect} is not allowed as a global variable name"
522
607
  end
523
608
 
524
609
  def process_ivar text
525
610
  tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
526
611
  text.lineno = self.lineno
527
- return result(:expr_end, tok_id, text)
612
+ result EXPR_END, tok_id, text
528
613
  end
529
614
 
530
615
  def process_lchevron text
531
- if (!in_lex_state?(:expr_dot, :expr_class) &&
616
+ if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
532
617
  !is_end? &&
533
- (!is_arg? || space_seen)) then # TODO: || in_state(:expr_labeled)
618
+ (!is_arg? || lex_state =~ EXPR_LABELED || space_seen)) then
534
619
  tok = self.heredoc_identifier
535
620
  return tok if tok
536
621
  end
537
622
 
538
- if in_arg_state? then
539
- self.lex_state = :expr_arg
623
+ if is_after_operator? then
624
+ self.lex_state = EXPR_ARG
540
625
  else
541
- self.command_start = true if lex_state == :expr_class
542
- self.lex_state = :expr_beg
626
+ self.command_start = true if lex_state =~ EXPR_CLASS
627
+ self.lex_state = EXPR_BEG
543
628
  end
544
629
 
545
630
  return result(lex_state, :tLSHFT, "\<\<")
@@ -567,17 +652,15 @@ class RubyLexer
567
652
  # Replace a string of newlines with a single one
568
653
  self.lineno += matched.lines.to_a.size if scan(/\n+/)
569
654
 
570
- # TODO: remove :expr_value -- audit all uses of it
571
- c = in_lex_state?(:expr_beg, :expr_value, :expr_class,
572
- :expr_fname, :expr_dot) && !in_lex_state?(:expr_labeled)
573
-
655
+ c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
656
+ lex_state !~ EXPR_LABELED)
574
657
  # TODO: figure out what token_seen is for
575
- # TODO: if c || self.lex_state == [:expr_beg, :expr_labeled] then
576
- if c || self.lex_state == :expr_labeled then
658
+ if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
577
659
  # ignore if !fallthrough?
578
660
  if !c && parser.in_kwarg then
579
661
  # normal newline
580
- return result(:expr_beg, :tNL, nil)
662
+ self.command_start = true
663
+ return result EXPR_BEG, :tNL, nil
581
664
  else
582
665
  return # skip
583
666
  end
@@ -592,41 +675,46 @@ class RubyLexer
592
675
 
593
676
  self.command_start = true
594
677
 
595
- return result(:expr_beg, :tNL, nil)
678
+ return result(EXPR_BEG, :tNL, nil)
596
679
  end
597
680
 
598
681
  def process_nthref text
599
682
  # TODO: can't do lineno hack w/ number
600
- result :expr_end, :tNTH_REF, ss[1].to_i
683
+ result EXPR_END, :tNTH_REF, ss[1].to_i
601
684
  end
602
685
 
603
686
  def process_paren text
604
- token = process_paren19
687
+ token = if is_beg? then
688
+ :tLPAREN
689
+ elsif !space_seen then
690
+ # foo( ... ) => method call, no ambiguity
691
+ :tLPAREN2
692
+ elsif is_space_arg? then
693
+ :tLPAREN_ARG
694
+ elsif lex_state =~ EXPR_ENDFN && !lambda_beginning? then
695
+ # TODO:
696
+ # warn("parentheses after method name is interpreted as " \
697
+ # "an argument list, not a decomposed argument")
698
+ :tLPAREN2
699
+ else
700
+ :tLPAREN2 # plain '(' in parse.y
701
+ end
605
702
 
606
703
  self.paren_nest += 1
607
704
 
608
- # TODO: add :expr_label to :expr_beg (set in expr_result below)
609
- return expr_result(token, "(")
610
- end
611
-
612
- def process_paren19
613
- if is_beg? then
614
- :tLPAREN
615
- elsif is_space_arg? then
616
- :tLPAREN_ARG
617
- else
618
- :tLPAREN2 # plain '(' in parse.y
619
- end
705
+ cond.push false
706
+ cmdarg.push false
707
+ result EXPR_PAR, token, text
620
708
  end
621
709
 
622
710
  def process_percent text
623
711
  return parse_quote if is_beg?
624
712
 
625
- return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)
713
+ return result EXPR_BEG, :tOP_ASGN, "%" if scan(/\=/)
626
714
 
627
- return parse_quote if is_arg? && space_seen && ! check(/\s/)
715
+ return parse_quote if is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
628
716
 
629
- return result(:arg_state, :tPERCENT, "%")
717
+ return result :arg_state, :tPERCENT, "%"
630
718
  end
631
719
 
632
720
  def process_plus_minus text
@@ -637,33 +725,33 @@ class RubyLexer
637
725
  [:tUMINUS, :tMINUS]
638
726
  end
639
727
 
640
- if in_arg_state? then
728
+ if is_after_operator? then
641
729
  if scan(/@/) then
642
- return result(:expr_arg, utype, "#{sign}@")
730
+ return result(EXPR_ARG, utype, "#{sign}@")
643
731
  else
644
- return result(:expr_arg, type, sign)
732
+ return result(EXPR_ARG, type, sign)
645
733
  end
646
734
  end
647
735
 
648
- return result(:expr_beg, :tOP_ASGN, sign) if scan(/\=/)
736
+ return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)
649
737
 
650
738
  if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
651
739
  arg_ambiguous if is_arg?
652
740
 
653
741
  if check(/\d/) then
654
742
  return nil if utype == :tUPLUS
655
- return result(:expr_beg, :tUMINUS_NUM, sign)
743
+ return result EXPR_BEG, :tUMINUS_NUM, sign
656
744
  end
657
745
 
658
- return result(:expr_beg, utype, sign)
746
+ return result EXPR_BEG, utype, sign
659
747
  end
660
748
 
661
- return result(:expr_beg, type, sign)
749
+ result EXPR_BEG, type, sign
662
750
  end
663
751
 
664
752
  def process_questionmark text
665
753
  if is_end? then
666
- return result(:expr_value, :tEH, "?")
754
+ return result EXPR_BEG, :tEH, "?"
667
755
  end
668
756
 
669
757
  if end_of_stream? then
@@ -685,9 +773,9 @@ class RubyLexer
685
773
  end
686
774
 
687
775
  # ternary
688
- return result(:expr_value, :tEH, "?")
776
+ return result EXPR_BEG, :tEH, "?"
689
777
  elsif check(/\w(?=\w)/) then # ternary, also
690
- return result(:expr_beg, :tEH, "?")
778
+ return result EXPR_BEG, :tEH, "?"
691
779
  end
692
780
 
693
781
  c = if scan(/\\/) then
@@ -696,7 +784,7 @@ class RubyLexer
696
784
  ss.getch
697
785
  end
698
786
 
699
- return result(:expr_end, :tSTRING, c)
787
+ result EXPR_END, :tSTRING, c
700
788
  end
701
789
 
702
790
  def process_slash text
@@ -707,7 +795,7 @@ class RubyLexer
707
795
  end
708
796
 
709
797
  if scan(/\=/) then
710
- return result(:expr_beg, :tOP_ASGN, "/")
798
+ return result(EXPR_BEG, :tOP_ASGN, "/")
711
799
  end
712
800
 
713
801
  if is_arg? && space_seen then
@@ -726,28 +814,28 @@ class RubyLexer
726
814
 
727
815
  token = nil
728
816
 
729
- if in_arg_state? then
817
+ if is_after_operator? then
730
818
  case
731
819
  when scan(/\]\=/) then
732
820
  self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
733
- return result(:expr_arg, :tASET, "[]=")
821
+ return result EXPR_ARG, :tASET, "[]="
734
822
  when scan(/\]/) then
735
823
  self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
736
- return result(:expr_arg, :tAREF, "[]")
824
+ return result EXPR_ARG, :tAREF, "[]"
737
825
  else
738
826
  rb_compile_error "unexpected '['"
739
827
  end
740
828
  elsif is_beg? then
741
829
  token = :tLBRACK
742
- elsif is_arg? && space_seen then
830
+ elsif is_arg? && (space_seen || lex_state =~ EXPR_LABELED) then
743
831
  token = :tLBRACK
744
832
  else
745
833
  token = :tLBRACK2
746
834
  end
747
835
 
748
- # TODO: this is done by expr_result except "|EXPR_LABEL")
749
- # SET_LEX_STATE(EXPR_BEG|EXPR_LABEL);
750
- expr_result token, "["
836
+ cond.push false
837
+ cmdarg.push false
838
+ result EXPR_PAR, token, text
751
839
  end
752
840
 
753
841
  def possibly_escape_string text, check
@@ -763,7 +851,7 @@ class RubyLexer
763
851
  def process_symbol text
764
852
  symbol = possibly_escape_string text, /^:"/
765
853
 
766
- return result(:expr_end, :tSYMBOL, symbol)
854
+ result EXPR_END, :tSYMBOL, symbol
767
855
  end
768
856
 
769
857
  def was_label?
@@ -780,19 +868,19 @@ class RubyLexer
780
868
  text = text[0..-2]
781
869
  end
782
870
 
783
- result :expr_end, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
871
+ result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
784
872
  end
785
873
 
786
874
  def process_label text
787
875
  symbol = possibly_escape_string text, /^"/
788
876
 
789
- result(:expr_labeled, :tLABEL, [symbol, self.lineno]) # TODO: expr_arg|expr_labeled
877
+ result EXPR_LAB, :tLABEL, [symbol, self.lineno]
790
878
  end
791
879
 
792
880
  def process_token text
793
881
  # matching: parse_ident in compare/parse23.y:7989
794
882
  # TODO: make this always return [token, lineno]
795
- self.last_state = lex_state
883
+ # FIX: remove: self.last_state = lex_state
796
884
 
797
885
  token = self.token = text
798
886
  token << matched if scan(/[\!\?](?!=)/)
@@ -801,7 +889,7 @@ class RubyLexer
801
889
  case
802
890
  when token =~ /[!?]$/ then
803
891
  :tFID
804
- when in_lex_state?(:expr_fname) && scan(/=(?:(?![~>=])|(?==>))/) then
892
+ when lex_state =~ EXPR_FNAME && scan(/=(?:(?![~>=])|(?==>))/) then
805
893
  # ident=, not =~ => == or followed by =>
806
894
  # TODO test lexing of a=>b vs a==>b
807
895
  token << matched
@@ -814,31 +902,30 @@ class RubyLexer
814
902
 
815
903
  if is_label_possible? and is_label_suffix? then
816
904
  scan(/:/)
817
- # TODO: :expr_arg|:expr_labeled
818
- return result :expr_labeled, :tLABEL, [token, self.lineno]
905
+ return result EXPR_LAB, :tLABEL, [token, self.lineno]
819
906
  end
820
907
 
821
- # TODO: mb == ENC_CODERANGE_7BIT && !in_lex_state?(:expr_dot)
822
- unless in_lex_state? :expr_dot then
908
+ # TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
909
+ if lex_state !~ EXPR_DOT then
823
910
  # See if it is a reserved word.
824
911
  keyword = RubyParserStuff::Keyword.keyword token
825
912
 
826
913
  return process_token_keyword keyword if keyword
827
- end # unless in_lex_state? :expr_dot
914
+ end
828
915
 
829
916
  # matching: compare/parse23.y:8079
830
- state = if is_beg? or is_arg? or in_lex_state? :expr_dot then
831
- cmd_state ? :expr_cmdarg : :expr_arg
832
- elsif in_lex_state? :expr_fname then
833
- :expr_endfn
917
+ state = if is_beg? or is_arg? or lex_state =~ EXPR_DOT then
918
+ cmd_state ? EXPR_CMDARG : EXPR_ARG
919
+ elsif lex_state =~ EXPR_FNAME then
920
+ EXPR_ENDFN
834
921
  else
835
- :expr_end
922
+ EXPR_END
836
923
  end
837
924
 
838
- if not [:expr_dot, :expr_fname].include? last_state and
839
- (tok_id == :tIDENTIFIER) and # not :expr_fname, not attrasgn
925
+ if last_state !~ EXPR_DOT|EXPR_FNAME and
926
+ (tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
840
927
  lvar_defined?(token) then
841
- state = :expr_end # TODO: EXPR_END|EXPR_LABEL
928
+ state = EXPR_END|EXPR_LABEL
842
929
  end
843
930
 
844
931
  token.lineno = self.lineno # yes, on a string. I know... I know...
@@ -853,9 +940,9 @@ class RubyLexer
853
940
 
854
941
  value = [token, self.lineno]
855
942
 
856
- return result(lex_state, keyword.id0, value) if state == :expr_fname
943
+ return result(lex_state, keyword.id0, value) if state =~ EXPR_FNAME
857
944
 
858
- self.command_start = true if lex_state == :expr_beg
945
+ self.command_start = true if lex_state =~ EXPR_BEG
859
946
 
860
947
  case
861
948
  when keyword.id0 == :kDO then
@@ -863,22 +950,22 @@ class RubyLexer
863
950
  when lambda_beginning? then
864
951
  self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
865
952
  self.paren_nest -= 1
866
- result(lex_state, :kDO_LAMBDA, value)
953
+ result lex_state, :kDO_LAMBDA, value
867
954
  when cond.is_in_state then
868
- result(lex_state, :kDO_COND, value)
869
- when cmdarg.is_in_state && state != :expr_cmdarg then
870
- result(lex_state, :kDO_BLOCK, value)
871
- when [:expr_beg, :expr_endarg].include?(state) then
872
- result(lex_state, :kDO_BLOCK, value)
955
+ result lex_state, :kDO_COND, value
956
+ when cmdarg.is_in_state && state != EXPR_CMDARG then
957
+ result lex_state, :kDO_BLOCK, value
958
+ when state =~ EXPR_BEG|EXPR_ENDARG then
959
+ result lex_state, :kDO_BLOCK, value
873
960
  else
874
- result(lex_state, :kDO, value)
961
+ result lex_state, :kDO, value
875
962
  end
876
- when [:expr_beg, :expr_labeled].include?(state) then
877
- result(lex_state, keyword.id0, value)
963
+ when state =~ EXPR_PAD then
964
+ result lex_state, keyword.id0, value
878
965
  when keyword.id0 != keyword.id1 then
879
- result(:expr_beg, keyword.id1, value) # TODO: :expr_beg|:expr_label
966
+ result EXPR_PAR, keyword.id1, value
880
967
  else
881
- result(lex_state, keyword.id1, value)
968
+ result lex_state, keyword.id1, value
882
969
  end
883
970
  end
884
971
 
@@ -921,9 +1008,10 @@ class RubyLexer
921
1008
  when scan(/s/) then # space
922
1009
  " "
923
1010
  when scan(/[0-7]{1,3}/) then # octal constant
924
- (matched.to_i(8) & 0xFF).chr
1011
+ (matched.to_i(8) & 0xFF).chr.force_encoding Encoding::UTF_8
925
1012
  when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
926
- ss[1].to_i(16).chr
1013
+ # TODO: force encode everything to UTF-8?
1014
+ ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
927
1015
  when check(/M-\\[\\MCc]/) then
928
1016
  scan(/M-\\/) # eat it
929
1017
  c = self.read_escape
@@ -946,8 +1034,10 @@ class RubyLexer
946
1034
  c
947
1035
  when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
948
1036
  matched
949
- when scan(/u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/) then
1037
+ when scan(/u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/) then
950
1038
  [ss[1].delete("{}").to_i(16)].pack("U")
1039
+ when scan(/u([0-9a-fA-F]{1,3})/) then
1040
+ rb_compile_error "Invalid escape character syntax"
951
1041
  when scan(/[McCx0-9]/) || end_of_stream? then
952
1042
  rb_compile_error("Invalid escape character syntax")
953
1043
  else
@@ -974,7 +1064,7 @@ class RubyLexer
974
1064
  self.brace_nest = 0
975
1065
  self.command_start = true
976
1066
  self.comments = []
977
- self.lex_state = :expr_none
1067
+ self.lex_state = EXPR_NONE
978
1068
  self.lex_strterm = nil
979
1069
  self.lineno = 1
980
1070
  self.lpar_beg = nil
@@ -988,9 +1078,9 @@ class RubyLexer
988
1078
  self.cmdarg.reset
989
1079
  end
990
1080
 
991
- def result lex_state, token, text # :nodoc:
992
- lex_state = self.arg_state if lex_state == :arg_state
993
- self.lex_state = lex_state if lex_state
1081
+ def result new_state, token, text # :nodoc:
1082
+ new_state = self.arg_state if new_state == :arg_state
1083
+ self.lex_state = new_state if new_state
994
1084
  [token, text]
995
1085
  end
996
1086
 
@@ -1057,8 +1147,10 @@ class RubyLexer
1057
1147
  prev = self.string_buffer.last
1058
1148
  if term == chr && prev && prev.end_with?("(?") then
1059
1149
  self.string_buffer << chr
1150
+ elsif term == chr || chr.ascii_only? then
1151
+ self.string_buffer << matched # dunno why we keep them for ascii
1060
1152
  else
1061
- self.string_buffer << matched
1153
+ self.string_buffer << chr # HACK? this is such a rat's nest
1062
1154
  end
1063
1155
  else
1064
1156
  rb_compile_error "Invalid escape character syntax"
@@ -1089,7 +1181,7 @@ class RubyLexer
1089
1181
  else
1090
1182
  self.string_nest -= 1
1091
1183
  end
1092
- when expand && scan(/#(?=[\$\@\{])/) then
1184
+ when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
1093
1185
  ss.pos -= 1
1094
1186
  break
1095
1187
  when qwords && scan(/\s/) then
@@ -1173,12 +1265,13 @@ class RubyLexer
1173
1265
  s
1174
1266
  when /^[McCx0-9]/ then
1175
1267
  rb_compile_error("Invalid escape character syntax")
1176
- when /u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/ then
1268
+ when /u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/ then
1177
1269
  [$1.delete("{}").to_i(16)].pack("U")
1270
+ when /u([0-9a-fA-F]{1,3})/ then
1271
+ rb_compile_error("Invalid escape character syntax")
1178
1272
  else
1179
1273
  s
1180
1274
  end
1181
- x.force_encoding "UTF-8" if HAS_ENC
1182
1275
  x
1183
1276
  end
1184
1277
 
@@ -1206,7 +1299,7 @@ class RubyLexer
1206
1299
 
1207
1300
  # matches parser_string_term
1208
1301
  if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
1209
- if (([:expr_beg, :expr_endfn].include?(lex_state) &&
1302
+ if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
1210
1303
  !cond.is_in_state) || is_arg?) &&
1211
1304
  is_label_suffix? then
1212
1305
  scan(/:/)
@@ -1216,8 +1309,7 @@ class RubyLexer
1216
1309
 
1217
1310
  if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
1218
1311
  self.lex_strterm = nil
1219
- # TODO: :expr_beg|:expr_label
1220
- self.lex_state = (token_type == :tLABEL_END) ? :expr_label : :expr_end
1312
+ self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_END
1221
1313
  end
1222
1314
 
1223
1315
  return token
@@ -1260,7 +1352,7 @@ class RubyLexer
1260
1352
  when 'r' then
1261
1353
  [:tREGEXP_BEG, STR_REGEXP]
1262
1354
  when 's' then
1263
- self.lex_state = :expr_fname
1355
+ self.lex_state = EXPR_FNAME
1264
1356
  [:tSYMBEG, STR_SSYM]
1265
1357
  when 'I' then
1266
1358
  eat_whitespace