ruby_parser 3.2.2 → 3.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -263,7 +263,7 @@ rule
263
263
  }
264
264
  opt_block_param
265
265
  {
266
- result = self.env.dynamic.keys
266
+ result = nil # self.env.dynamic.keys
267
267
  }
268
268
  compstmt tRCURLY
269
269
  {
@@ -785,7 +785,6 @@ rule
785
785
  }
786
786
  | arg tEH arg opt_nl tCOLON arg
787
787
  {
788
- lexer.tern.pop
789
788
  result = s(:if, val[0], val[2], val[5])
790
789
  }
791
790
  | primary
@@ -1136,7 +1135,7 @@ rule
1136
1135
  }
1137
1136
  | kDEF fname
1138
1137
  {
1139
- result = [lexer.lineno, self.in_def]
1138
+ result = self.in_def
1140
1139
 
1141
1140
  self.comments.push self.lexer.comments
1142
1141
  self.in_def = true
@@ -1144,10 +1143,9 @@ rule
1144
1143
  }
1145
1144
  f_arglist bodystmt kEND
1146
1145
  {
1147
- line, in_def = val[2]
1146
+ in_def = val[2]
1148
1147
 
1149
1148
  result = new_defn val
1150
- result[2].line line
1151
1149
 
1152
1150
  self.env.unextend
1153
1151
  self.in_def = in_def
@@ -1429,7 +1427,9 @@ opt_block_args_tail: tCOMMA block_args_tail
1429
1427
  | f_bad_arg
1430
1428
 
1431
1429
  lambda: {
1432
- # TODO: dyna_push ? hrm
1430
+ self.env.extend :dynamic
1431
+ result = self.lexer.lineno
1432
+
1433
1433
  result = lexer.lpar_beg
1434
1434
  lexer.paren_nest += 1
1435
1435
  lexer.lpar_beg = lexer.paren_nest
@@ -1443,6 +1443,7 @@ opt_block_args_tail: tCOMMA block_args_tail
1443
1443
 
1444
1444
  call = new_call nil, :lambda
1445
1445
  result = new_iter call, args, body
1446
+ self.env.unextend
1446
1447
  }
1447
1448
 
1448
1449
  f_larglist: tLPAREN2 f_args opt_bv_decl rparen
@@ -1470,7 +1471,7 @@ opt_block_args_tail: tCOMMA block_args_tail
1470
1471
  }
1471
1472
  opt_block_param
1472
1473
  {
1473
- result = self.env.dynamic.keys
1474
+ result = nil # self.env.dynamic.keys
1474
1475
  }
1475
1476
  compstmt kEND
1476
1477
  {
@@ -1556,7 +1557,7 @@ opt_block_args_tail: tCOMMA block_args_tail
1556
1557
  }
1557
1558
  opt_block_param
1558
1559
  {
1559
- result = self.env.dynamic.keys
1560
+ result = nil # self.env.dynamic.keys
1560
1561
  }
1561
1562
  compstmt tRCURLY
1562
1563
  {
@@ -1574,7 +1575,7 @@ opt_block_args_tail: tCOMMA block_args_tail
1574
1575
  }
1575
1576
  opt_block_param
1576
1577
  {
1577
- result = self.env.dynamic.keys
1578
+ result = nil # self.env.dynamic.keys
1578
1579
  }
1579
1580
  compstmt kEND
1580
1581
  {
@@ -1972,13 +1973,14 @@ keyword_variable: kNIL { result = s(:nil) }
1972
1973
  f_arglist: tLPAREN2 f_args rparen
1973
1974
  {
1974
1975
  result = val[1]
1975
- lexer.lex_state = :expr_beg
1976
+ self.lexer.lex_state = :expr_beg
1976
1977
  self.lexer.command_start = true
1977
1978
  }
1978
1979
  | f_args term
1979
1980
  {
1980
- self.lexer.lex_state = :expr_beg
1981
1981
  result = val[0]
1982
+ self.lexer.lex_state = :expr_beg
1983
+ self.lexer.command_start = true
1982
1984
  }
1983
1985
 
1984
1986
  args_tail: f_kwarg tCOMMA f_kwrest opt_f_block_arg
@@ -2127,14 +2129,20 @@ keyword_variable: kNIL { result = s(:nil) }
2127
2129
  {
2128
2130
  # TODO: call_args
2129
2131
  label, _ = val[0] # TODO: fix lineno?
2130
- result = s(:array, s(:kwarg, label.to_sym, val[1]))
2132
+ identifier = label.to_sym
2133
+ self.env[identifier] = :lvar
2134
+
2135
+ result = s(:array, s(:kwarg, identifier, val[1]))
2131
2136
  }
2132
2137
 
2133
2138
  f_block_kw: tLABEL primary_value
2134
2139
  {
2135
2140
  # TODO: call_args
2136
2141
  label, _ = val[0] # TODO: fix lineno?
2137
- result = s(:array, s(:kwarg, label.to_sym, val[1]))
2142
+ identifier = label.to_sym
2143
+ self.env[identifier] = :lvar
2144
+
2145
+ result = s(:array, s(:kwarg, identifier, val[1]))
2138
2146
  }
2139
2147
 
2140
2148
  f_block_kwarg: f_block_kw
@@ -5,61 +5,21 @@ class RubyLexer
5
5
  # :stopdoc:
6
6
  RUBY19 = "".respond_to? :encoding
7
7
 
8
- IDENT_CHAR_RE = if RUBY19 then
9
- /[\w\u0080-\u{10ffff}]/u
10
- else
11
- /[\w\x80-\xFF]/n
12
- end
13
-
14
- IDENT_RE = /^#{IDENT_CHAR_RE}+/o
15
-
16
- attr_accessor :command_start
17
- attr_accessor :cmdarg
18
- attr_accessor :cond
19
- attr_accessor :tern # TODO: rename ternary damnit... wtf
20
- attr_accessor :string_nest
21
-
22
- ESC_RE = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|[^0-7xMCc]))/u
23
- # :startdoc:
24
-
25
- ##
26
- # What version of ruby to parse. 18 and 19 are the only valid values
27
- # currently supported.
28
-
29
- attr_accessor :version
30
-
31
- # Additional context surrounding tokens that both the lexer and
32
- # grammar use.
33
- attr_reader :lex_state
34
-
35
- attr_accessor :lex_strterm
36
-
37
- attr_accessor :parser # HACK for very end of lexer... *sigh*
38
-
39
- # Stream of data that yylex examines.
40
- attr_reader :src
41
-
42
- # Last token read via yylex.
43
- attr_accessor :token
44
-
45
- attr_accessor :string_buffer
46
-
47
- # Value of last token which had a value associated with it.
48
- attr_accessor :yacc_value
49
-
50
- # What handles warnings
51
- attr_accessor :warnings
8
+ IDENT_CHAR = if RUBY19 then
9
+ /[\w\u0080-\u{10ffff}]/u
10
+ else
11
+ /[\w\x80-\xFF]/n
12
+ end
52
13
 
53
- attr_accessor :space_seen
54
- attr_accessor :paren_nest
55
- attr_accessor :brace_nest
56
- attr_accessor :lpar_beg
14
+ IDENT = /^#{IDENT_CHAR}+/o
15
+ ESC = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]+|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/u
16
+ SIMPLE_STRING = /(#{ESC}|#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*/o
17
+ SIMPLE_SSTRING = /(\\.|[^\'])*/
57
18
 
58
19
  EOF = :eof_haha!
59
20
 
60
21
  # ruby constants for strings (should this be moved somewhere else?)
61
22
 
62
- # :stopdoc:
63
23
  STR_FUNC_BORING = 0x00
64
24
  STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
65
25
  STR_FUNC_EXPAND = 0x02
@@ -75,6 +35,22 @@ class RubyLexer
75
35
  STR_SSYM = STR_FUNC_SYMBOL
76
36
  STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
77
37
 
38
+ ESCAPES = {
39
+ "a" => "\007",
40
+ "b" => "\010",
41
+ "e" => "\033",
42
+ "f" => "\f",
43
+ "n" => "\n",
44
+ "r" => "\r",
45
+ "s" => " ",
46
+ "t" => "\t",
47
+ "v" => "\13",
48
+ "\\" => '\\',
49
+ "\n" => "",
50
+ "C-\?" => 127.chr,
51
+ "c\?" => 127.chr,
52
+ }
53
+
78
54
  TOKENS = {
79
55
  "!" => :tBANG,
80
56
  "!=" => :tNEQ,
@@ -90,8 +66,58 @@ class RubyLexer
90
66
  "=~" => :tMATCH,
91
67
  "->" => :tLAMBDA,
92
68
  }
69
+
70
+ @@regexp_cache = Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }
71
+ @@regexp_cache[nil] = nil
72
+
93
73
  # :startdoc:
94
74
 
75
+ attr_accessor :brace_nest
76
+ attr_accessor :cmdarg
77
+ attr_accessor :command_start
78
+ attr_accessor :cond
79
+
80
+ ##
81
+ # Additional context surrounding tokens that both the lexer and
82
+ # grammar use.
83
+
84
+ attr_accessor :lex_state
85
+
86
+ attr_accessor :lex_strterm
87
+ attr_accessor :lpar_beg
88
+ attr_accessor :paren_nest
89
+ attr_accessor :parser # HACK for very end of lexer... *sigh*
90
+ attr_accessor :space_seen
91
+ attr_accessor :string_buffer
92
+ attr_accessor :string_nest
93
+
94
+ # Stream of data that yylex examines.
95
+ attr_reader :src
96
+ alias :ss :src
97
+
98
+ # Last token read via yylex.
99
+ attr_accessor :token
100
+
101
+ ##
102
+ # What version of ruby to parse. 18 and 19 are the only valid values
103
+ # currently supported.
104
+
105
+ attr_accessor :version
106
+
107
+ # Value of last token which had a value associated with it.
108
+ attr_accessor :yacc_value
109
+
110
+ attr_writer :lineno # reader is lazy initalizer
111
+
112
+ attr_writer :comments
113
+
114
+ def initialize v = 18
115
+ self.version = v
116
+
117
+ reset
118
+ end
119
+
120
+ ##
95
121
  # How the parser advances to the next token.
96
122
  #
97
123
  # @return true if not at end of file (EOF).
@@ -100,7 +126,7 @@ class RubyLexer
100
126
  r = yylex
101
127
  self.token = r
102
128
 
103
- raise "yylex returned nil" unless r
129
+ raise "yylex returned nil, near #{ss.rest[0,10].inspect}" unless r
104
130
 
105
131
  return RubyLexer::EOF != r
106
132
  end
@@ -109,28 +135,35 @@ class RubyLexer
109
135
  self.warning("Ambiguous first argument. make sure.")
110
136
  end
111
137
 
112
- def comments
138
+ def arg_state
139
+ in_arg_state? ? :expr_arg : :expr_beg
140
+ end
141
+
142
+ def beginning_of_line?
143
+ ss.bol?
144
+ end
145
+
146
+ def check re
147
+ ss.check re
148
+ end
149
+
150
+ def comments # TODO: remove this... maybe comment_string + attr_accessor
113
151
  c = @comments.join
114
152
  @comments.clear
115
153
  c
116
154
  end
117
155
 
118
- def expr_beg_push val
119
- cond.push false
120
- cmdarg.push false
121
- self.lex_state = :expr_beg
122
- self.yacc_value = val
156
+ def end_of_stream?
157
+ ss.eos?
123
158
  end
124
159
 
125
- def fix_arg_lex_state
126
- self.lex_state = if in_lex_state? :expr_fname, :expr_dot then
127
- :expr_arg
128
- else
129
- :expr_beg
130
- end
160
+ def expr_result token, text
161
+ cond.push false
162
+ cmdarg.push false
163
+ result :expr_beg, token, text
131
164
  end
132
165
 
133
- def heredoc here # 63 lines
166
+ def heredoc here # TODO: rewrite / remove
134
167
  _, eos, func, last_line = here
135
168
 
136
169
  indent = (func & STR_FUNC_INDENT) != 0 ? "[ \t]*" : nil
@@ -138,11 +171,10 @@ class RubyLexer
138
171
  eos_re = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
139
172
  err_msg = "can't match #{eos_re.inspect} anywhere in "
140
173
 
141
- rb_compile_error err_msg if
142
- src.eos?
174
+ rb_compile_error err_msg if end_of_stream?
143
175
 
144
- if src.beginning_of_line? && src.scan(eos_re) then
145
- src.unread_many last_line # TODO: figure out how to remove this
176
+ if beginning_of_line? && scan(eos_re) then
177
+ ss.unread_many last_line # TODO: figure out how to remove this
146
178
  self.yacc_value = eos
147
179
  return :tSTRING_END
148
180
  end
@@ -151,14 +183,14 @@ class RubyLexer
151
183
 
152
184
  if expand then
153
185
  case
154
- when src.scan(/#[$@]/) then
155
- src.pos -= 1 # FIX omg stupid
156
- self.yacc_value = src.matched
186
+ when scan(/#[$@]/) then
187
+ ss.pos -= 1 # FIX omg stupid
188
+ self.yacc_value = matched
157
189
  return :tSTRING_DVAR
158
- when src.scan(/#[{]/) then
159
- self.yacc_value = src.matched
190
+ when scan(/#[{]/) then
191
+ self.yacc_value = matched
160
192
  return :tSTRING_DBEG
161
- when src.scan(/#/) then
193
+ when scan(/#/) then
162
194
  string_buffer << '#'
163
195
  end
164
196
 
@@ -172,34 +204,32 @@ class RubyLexer
172
204
  self.yacc_value = string_buffer.join.delete("\r")
173
205
  return :tSTRING_CONTENT
174
206
  else
175
- string_buffer << src.scan(/\n/)
207
+ string_buffer << scan(/\n/)
176
208
  end
177
209
 
178
- rb_compile_error err_msg if
179
- src.eos?
180
- end until src.check(eos_re)
210
+ rb_compile_error err_msg if end_of_stream?
211
+ end until check(eos_re)
181
212
  else
182
- until src.check(eos_re) do
183
- string_buffer << src.scan(/.*(\n|\z)/)
184
- rb_compile_error err_msg if
185
- src.eos?
213
+ until check(eos_re) do
214
+ string_buffer << scan(/.*(\n|\z)/)
215
+ rb_compile_error err_msg if end_of_stream?
186
216
  end
187
217
  end
188
218
 
189
219
  self.lex_strterm = [:heredoc, eos, func, last_line]
190
- self.yacc_value = string_buffer.join.delete("\r")
191
220
 
221
+ self.yacc_value = string_buffer.join.delete("\r")
192
222
  return :tSTRING_CONTENT
193
223
  end
194
224
 
195
- def heredoc_identifier # 51 lines
225
+ def heredoc_identifier # TODO: remove / rewrite
196
226
  term, func = nil, STR_FUNC_BORING
197
227
  self.string_buffer = []
198
228
 
199
229
  case
200
- when src.scan(/(-?)([\'\"\`])(.*?)\2/) then
201
- term = src[2]
202
- func |= STR_FUNC_INDENT unless src[1].empty?
230
+ when scan(/(-?)([\'\"\`])(.*?)\2/) then
231
+ term = ss[2]
232
+ func |= STR_FUNC_INDENT unless ss[1].empty?
203
233
  func |= case term
204
234
  when "\'" then
205
235
  STR_SQUOTE
@@ -208,24 +238,24 @@ class RubyLexer
208
238
  else
209
239
  STR_XQUOTE
210
240
  end
211
- string_buffer << src[3]
212
- when src.scan(/-?([\'\"\`])(?!\1*\Z)/) then
241
+ string_buffer << ss[3]
242
+ when scan(/-?([\'\"\`])(?!\1*\Z)/) then
213
243
  rb_compile_error "unterminated here document identifier"
214
- when src.scan(/(-?)(#{IDENT_CHAR_RE}+)/) then
244
+ when scan(/(-?)(#{IDENT_CHAR}+)/) then
215
245
  term = '"'
216
246
  func |= STR_DQUOTE
217
- unless src[1].empty? then
247
+ unless ss[1].empty? then
218
248
  func |= STR_FUNC_INDENT
219
249
  end
220
- string_buffer << src[2]
250
+ string_buffer << ss[2]
221
251
  else
222
252
  return nil
223
253
  end
224
254
 
225
- if src.scan(/.*\n/) then
255
+ if scan(/.*\n/) then
226
256
  # TODO: think about storing off the char range instead
227
- line = src.matched
228
- src.extra_lines_added += 1
257
+ line = matched
258
+ ss.extra_lines_added += 1 # FIX: ugh
229
259
  else
230
260
  line = nil
231
261
  end
@@ -241,41 +271,47 @@ class RubyLexer
241
271
  end
242
272
  end
243
273
 
274
+ def in_arg_state? # TODO: rename is_after_operator?
275
+ in_lex_state? :expr_fname, :expr_dot
276
+ end
277
+
244
278
  def in_lex_state?(*states)
245
279
  states.include? lex_state
246
280
  end
247
281
 
248
- def initialize v = 18
249
- self.version = v
250
- self.cond = RubyParserStuff::StackState.new(:cond)
251
- self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
252
- self.tern = RubyParserStuff::StackState.new(:tern)
253
- self.string_nest = 0
254
- self.paren_nest = 0
255
- self.brace_nest = 0
256
- self.lpar_beg = nil
282
+ def int_with_base base
283
+ rb_compile_error "Invalid numeric format" if matched =~ /__/
257
284
 
258
- @comments = []
285
+ self.yacc_value = matched.to_i(base)
286
+ return :tINTEGER
287
+ end
259
288
 
260
- reset
289
+ def is_arg?
290
+ in_lex_state? :expr_arg, :expr_cmdarg
261
291
  end
262
292
 
263
- def int_with_base base
264
- rb_compile_error "Invalid numeric format" if src.matched =~ /__/
293
+ def is_beg?
294
+ in_lex_state? :expr_beg, :expr_value, :expr_mid, :expr_class
295
+ end
265
296
 
266
- self.yacc_value = src.matched.to_i(base)
267
- return :tINTEGER
297
+ def is_end?
298
+ in_lex_state? :expr_end, :expr_endarg, :expr_endfn
268
299
  end
269
300
 
270
- def lex_state= o
271
- # warn "wtf lex_state = #{o.inspect} from #{caller.first}"
272
- raise "wtf\?" unless Symbol === o
273
- @lex_state = o
301
+ def is_label_possible? command_state
302
+ (in_lex_state?(:expr_beg, :expr_endfn) && !command_state) || is_arg?
303
+ end
304
+
305
+ def is_space_arg? c = "x"
306
+ is_arg? and space_seen and c !~ /\s/
274
307
  end
275
308
 
276
- attr_writer :lineno
277
309
  def lineno
278
- @lineno ||= src.lineno
310
+ @lineno ||= ss.lineno
311
+ end
312
+
313
+ def matched
314
+ ss.matched
279
315
  end
280
316
 
281
317
  ##
@@ -288,45 +324,45 @@ class RubyLexer
288
324
  self.lex_state = :expr_end
289
325
 
290
326
  case
291
- when src.scan(/[+-]?0[xXbBdD]\b/) then
327
+ when scan(/[+-]?0[xXbBdD]\b/) then
292
328
  rb_compile_error "Invalid numeric format"
293
- when src.scan(/[+-]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0[Dd][0-9_]+)/) then
329
+ when scan(/[+-]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0[Dd][0-9_]+)/) then
294
330
  int_with_base(10)
295
- when src.scan(/[+-]?0x[a-f0-9_]+/i) then
331
+ when scan(/[+-]?0x[a-f0-9_]+/i) then
296
332
  int_with_base(16)
297
- when src.scan(/[+-]?0[Bb][01_]+/) then
333
+ when scan(/[+-]?0[Bb][01_]+/) then
298
334
  int_with_base(2)
299
- when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
335
+ when scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
300
336
  rb_compile_error "Illegal octal digit."
301
- when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
337
+ when scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
302
338
  int_with_base(8)
303
- when src.scan(/[+-]?[\d_]+_(e|\.)/) then
339
+ when scan(/[+-]?[\d_]+_(e|\.)/) then
304
340
  rb_compile_error "Trailing '_' in number."
305
- when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
306
- number = src.matched
341
+ when scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
342
+ number = matched
307
343
  if number =~ /__/ then
308
344
  rb_compile_error "Invalid numeric format"
309
345
  end
310
346
  self.yacc_value = number.to_f
311
347
  :tFLOAT
312
- when src.scan(/[+-]?[0-9_]+(?![e])/) then
348
+ when scan(/[+-]?[0-9_]+(?![e])/) then
313
349
  int_with_base(10)
314
350
  else
315
351
  rb_compile_error "Bad number format"
316
352
  end
317
353
  end
318
354
 
319
- def parse_quote # 58 lines
355
+ def parse_quote # TODO: remove / rewrite
320
356
  beg, nnd, short_hand, c = nil, nil, false, nil
321
357
 
322
- if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
323
- rb_compile_error "unknown type of %string" if src.matched_size == 2
324
- c, beg, short_hand = src.matched, src.getch, false
358
+ if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
359
+ rb_compile_error "unknown type of %string" if ss.matched_size == 2
360
+ c, beg, short_hand = matched, ss.getch, false
325
361
  else # Short-hand (e.g. %{, %., %!, etc)
326
- c, beg, short_hand = 'Q', src.getch, true
362
+ c, beg, short_hand = 'Q', ss.getch, true
327
363
  end
328
364
 
329
- if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
365
+ if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
330
366
  rb_compile_error "unterminated quoted string meets end of file"
331
367
  end
332
368
 
@@ -334,19 +370,19 @@ class RubyLexer
334
370
  nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
335
371
  nnd, beg = beg, "\0" if nnd.nil?
336
372
 
337
- token_type, self.yacc_value = nil, "%#{c}#{beg}"
373
+ token_type, text = nil, "%#{c}#{beg}"
338
374
  token_type, string_type = case c
339
375
  when 'Q' then
340
376
  ch = short_hand ? nnd : c + beg
341
- self.yacc_value = "%#{ch}"
377
+ text = "%#{ch}"
342
378
  [:tSTRING_BEG, STR_DQUOTE]
343
379
  when 'q' then
344
380
  [:tSTRING_BEG, STR_SQUOTE]
345
381
  when 'W' then
346
- src.scan(/\s*/)
382
+ scan(/\s*/)
347
383
  [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
348
384
  when 'w' then
349
- src.scan(/\s*/)
385
+ scan(/\s*/)
350
386
  [:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
351
387
  when 'x' then
352
388
  [:tXSTRING_BEG, STR_XQUOTE]
@@ -356,20 +392,25 @@ class RubyLexer
356
392
  self.lex_state = :expr_fname
357
393
  [:tSYMBEG, STR_SSYM]
358
394
  when 'I' then
395
+ src.scan(/\s*/)
359
396
  [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
360
397
  when 'i' then
398
+ src.scan(/\s*/)
361
399
  [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
362
400
  end
363
401
 
364
- rb_compile_error "Bad %string type. Expected [Qq\Wwxrs], found '#{c}'." if
402
+ rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
365
403
  token_type.nil?
366
404
 
367
- self.lex_strterm = [:strterm, string_type, nnd, beg]
405
+ raise "huh" unless string_type
406
+
407
+ string string_type, nnd, beg
368
408
 
409
+ self.yacc_value = text
369
410
  return token_type
370
411
  end
371
412
 
372
- def parse_string(quote) # 65 lines
413
+ def parse_string quote # TODO: rewrite / remove
373
414
  _, string_type, term, open = quote
374
415
 
375
416
  space = false # FIX: remove these
@@ -381,41 +422,39 @@ class RubyLexer
381
422
  regexp = (func & STR_FUNC_REGEXP) != 0
382
423
  expand = (func & STR_FUNC_EXPAND) != 0
383
424
 
384
- unless func then # FIX: impossible, prolly needs == 0
425
+ unless func then # nil'ed from qwords below. *sigh*
385
426
  self.lineno = nil
386
427
  return :tSTRING_END
387
428
  end
388
429
 
389
- space = true if qwords and src.scan(/\s+/)
430
+ space = true if qwords and scan(/\s+/)
390
431
 
391
- if self.string_nest == 0 && src.scan(/#{term_re}/) then
432
+ if self.string_nest == 0 && scan(/#{term_re}/) then
392
433
  if qwords then
393
- quote[1] = nil # TODO: make struct
434
+ quote[1] = nil
394
435
  return :tSPACE
395
436
  elsif regexp then
396
- self.yacc_value = self.regx_options
397
437
  self.lineno = nil
438
+ self.yacc_value = self.regx_options
398
439
  return :tREGEXP_END
399
440
  else
400
- self.yacc_value = term
401
441
  self.lineno = nil
442
+ self.yacc_value = term
402
443
  return :tSTRING_END
403
444
  end
404
445
  end
405
446
 
406
- if space then
407
- return :tSPACE
408
- end
447
+ return :tSPACE if space
409
448
 
410
449
  self.string_buffer = []
411
450
 
412
451
  if expand
413
452
  case
414
- when src.scan(/#(?=[$@])/) then
453
+ when scan(/#(?=[$@])/) then
415
454
  return :tSTRING_DVAR
416
- when src.scan(/#[{]/) then
455
+ when scan(/#[{]/) then
417
456
  return :tSTRING_DBEG
418
- when src.scan(/#/) then
457
+ when scan(/#/) then
419
458
  string_buffer << '#'
420
459
  end
421
460
  end
@@ -429,71 +468,159 @@ class RubyLexer
429
468
  return :tSTRING_CONTENT
430
469
  end
431
470
 
471
+ def process_token command_state, last_state
472
+ token = self.token
473
+ token << matched if scan(/[\!\?](?!=)/)
474
+
475
+ tok_id =
476
+ case
477
+ when token =~ /[!?]$/ then
478
+ :tFID
479
+ when in_lex_state?(:expr_fname) && scan(/=(?:(?![~>=])|(?==>))/) then
480
+ # ident=, not =~ => == or followed by =>
481
+ # TODO test lexing of a=>b vs a==>b
482
+ token << matched
483
+ :tIDENTIFIER
484
+ when token =~ /^[A-Z]/ then
485
+ :tCONSTANT
486
+ else
487
+ :tIDENTIFIER
488
+ end
489
+
490
+ if !ruby18 and is_label_possible?(command_state) and scan(/:(?!:)/) then
491
+ return result(:expr_beg, :tLABEL, [token, ss.lineno]) # HACK: array? TODO: self.lineno
492
+ end
493
+
494
+ unless in_lex_state? :expr_dot then
495
+ # See if it is a reserved word.
496
+ keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
497
+ RubyParserStuff::Keyword.keyword18 token
498
+ else
499
+ RubyParserStuff::Keyword.keyword19 token
500
+ end
501
+
502
+ return process_token_keyword keyword if keyword
503
+ end # unless in_lex_state? :expr_dot
504
+
505
+ # TODO:
506
+ # if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
507
+
508
+ state = if is_beg? or is_arg? or in_lex_state? :expr_dot then
509
+ command_state ? :expr_cmdarg : :expr_arg
510
+ elsif not ruby18 and in_lex_state? :expr_fname then
511
+ :expr_endfn
512
+ else
513
+ :expr_end
514
+ end
515
+
516
+ if not [:expr_dot, :expr_fname].include? last_state and
517
+ self.parser.env[token.to_sym] == :lvar then
518
+ state = :expr_end
519
+ end
520
+
521
+ return result(state, tok_id, token)
522
+ end
523
+
524
+ def process_token_keyword keyword
525
+ state = keyword.state
526
+ value = [token, ss.lineno] # TODO: use self.lineno ?
527
+
528
+ self.command_start = true if state == :expr_beg and lex_state != :expr_fname
529
+
530
+ case
531
+ when lex_state == :expr_fname then
532
+ result(state, keyword.id0, keyword.name)
533
+ when keyword.id0 == :kDO then
534
+ case
535
+ when lpar_beg && lpar_beg == paren_nest then
536
+ self.lpar_beg = nil
537
+ self.paren_nest -= 1
538
+ result(state, :kDO_LAMBDA, value)
539
+ when cond.is_in_state then
540
+ result(state, :kDO_COND, value)
541
+ when cmdarg.is_in_state && lex_state != :expr_cmdarg then
542
+ result(state, :kDO_BLOCK, value)
543
+ when in_lex_state?(:expr_beg, :expr_endarg) then
544
+ result(state, :kDO_BLOCK, value)
545
+ else
546
+ result(state, :kDO, value)
547
+ end
548
+ when in_lex_state?(:expr_beg, :expr_value) then
549
+ result(state, keyword.id0, value)
550
+ when keyword.id0 != keyword.id1 then
551
+ result(:expr_beg, keyword.id1, value)
552
+ else
553
+ result(state, keyword.id1, value)
554
+ end
555
+ end
556
+
432
557
  def rb_compile_error msg
433
- msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
558
+ msg += ". near line #{self.lineno}: #{ss.rest[/^.*/].inspect}"
434
559
  raise RubyParser::SyntaxError, msg
435
560
  end
436
561
 
437
- def read_escape # 51 lines
562
+ def read_escape # TODO: remove / rewrite
438
563
  case
439
- when src.scan(/\\/) then # Backslash
564
+ when scan(/\\/) then # Backslash
440
565
  '\\'
441
- when src.scan(/n/) then # newline
566
+ when scan(/n/) then # newline
442
567
  "\n"
443
- when src.scan(/t/) then # horizontal tab
568
+ when scan(/t/) then # horizontal tab
444
569
  "\t"
445
- when src.scan(/r/) then # carriage-return
570
+ when scan(/r/) then # carriage-return
446
571
  "\r"
447
- when src.scan(/f/) then # form-feed
572
+ when scan(/f/) then # form-feed
448
573
  "\f"
449
- when src.scan(/v/) then # vertical tab
574
+ when scan(/v/) then # vertical tab
450
575
  "\13"
451
- when src.scan(/a/) then # alarm(bell)
576
+ when scan(/a/) then # alarm(bell)
452
577
  "\007"
453
- when src.scan(/e/) then # escape
578
+ when scan(/e/) then # escape
454
579
  "\033"
455
- when src.scan(/b/) then # backspace
580
+ when scan(/b/) then # backspace
456
581
  "\010"
457
- when src.scan(/s/) then # space
582
+ when scan(/s/) then # space
458
583
  " "
459
- when src.scan(/[0-7]{1,3}/) then # octal constant
460
- (src.matched.to_i(8) & 0xFF).chr
461
- when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
462
- src[1].to_i(16).chr
463
- when src.check(/M-\\[\\MCc]/) then
464
- src.scan(/M-\\/) # eat it
584
+ when scan(/[0-7]{1,3}/) then # octal constant
585
+ (matched.to_i(8) & 0xFF).chr
586
+ when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
587
+ ss[1].to_i(16).chr
588
+ when check(/M-\\[\\MCc]/) then
589
+ scan(/M-\\/) # eat it
465
590
  c = self.read_escape
466
591
  c[0] = (c[0].ord | 0x80).chr
467
592
  c
468
- when src.scan(/M-(.)/) then
469
- c = src[1]
593
+ when scan(/M-(.)/) then
594
+ c = ss[1]
470
595
  c[0] = (c[0].ord | 0x80).chr
471
596
  c
472
- when src.check(/(C-|c)\\[\\MCc]/) then
473
- src.scan(/(C-|c)\\/) # eat it
597
+ when check(/(C-|c)\\[\\MCc]/) then
598
+ scan(/(C-|c)\\/) # eat it
474
599
  c = self.read_escape
475
600
  c[0] = (c[0].ord & 0x9f).chr
476
601
  c
477
- when src.scan(/C-\?|c\?/) then
602
+ when scan(/C-\?|c\?/) then
478
603
  127.chr
479
- when src.scan(/(C-|c)(.)/) then
480
- c = src[2]
604
+ when scan(/(C-|c)(.)/) then
605
+ c = ss[2]
481
606
  c[0] = (c[0].ord & 0x9f).chr
482
607
  c
483
- when src.scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
484
- src.matched
485
- when src.scan(/[McCx0-9]/) || src.eos? then
608
+ when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
609
+ matched
610
+ when scan(/u([0-9a-fA-F]+|\{[0-9a-fA-F]+\})/) then
611
+ [ss[1].delete("{}").to_i(16)].pack("U")
612
+ when scan(/[McCx0-9]/) || end_of_stream? then
486
613
  rb_compile_error("Invalid escape character syntax")
487
614
  else
488
- src.getch
615
+ ss.getch
489
616
  end
490
617
  end
491
618
 
492
- def regx_options # 15 lines
619
+ def regx_options # TODO: rewrite / remove
493
620
  good, bad = [], []
494
621
 
495
- if src.scan(/[a-z]+/) then
496
- good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
622
+ if scan(/[a-z]+/) then
623
+ good, bad = matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
497
624
  end
498
625
 
499
626
  unless bad.empty? then
@@ -505,13 +632,30 @@ class RubyLexer
505
632
  end
506
633
 
507
634
  def reset
635
+ self.brace_nest = 0
508
636
  self.command_start = true
637
+ self.comments = []
638
+ self.lex_state = nil
509
639
  self.lex_strterm = nil
640
+ self.lineno = 1
641
+ self.lpar_beg = nil
642
+ self.paren_nest = 0
643
+ self.space_seen = false
644
+ self.string_nest = 0
510
645
  self.token = nil
511
646
  self.yacc_value = nil
512
647
 
513
- @src = nil
514
- @lex_state = nil
648
+ self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
649
+ self.cond = RubyParserStuff::StackState.new(:cond)
650
+
651
+ @src = nil
652
+ end
653
+
654
+ def result lex_state, token, text # :nodoc:
655
+ lex_state = self.arg_state if lex_state == :arg_state
656
+ self.lex_state = lex_state if lex_state
657
+ self.yacc_value = text
658
+ token
515
659
  end
516
660
 
517
661
  def ruby18
@@ -522,35 +666,52 @@ class RubyLexer
522
666
  Ruby19Parser === parser
523
667
  end
524
668
 
669
+ def scan re
670
+ ss.scan re
671
+ end
672
+
673
+ def space_vs_beginning space_type, beg_type, fallback
674
+ if is_space_arg? check(/./m) then
675
+ warning "`**' interpreted as argument prefix"
676
+ space_type
677
+ elsif is_beg? then
678
+ beg_type
679
+ else
680
+ # TODO: warn_balanced("**", "argument prefix");
681
+ fallback
682
+ end
683
+ end
684
+
685
+ def string type, beg = matched, nnd = "\0"
686
+ self.lex_strterm = [:strterm, type, beg, nnd]
687
+ end
688
+
525
689
  def src= src
526
690
  raise "bad src: #{src.inspect}" unless String === src
527
691
  @src = RPStringScanner.new(src)
528
692
  end
529
693
 
530
- def tokadd_escape term # 20 lines
694
+ def tokadd_escape term # TODO: rewrite / remove
531
695
  case
532
- when src.scan(/\\\n/) then
696
+ when scan(/\\\n/) then
533
697
  # just ignore
534
- when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
535
- self.string_buffer << src.matched
536
- when src.scan(/\\([MC]-|c)(?=\\)/) then
537
- self.string_buffer << src.matched
698
+ when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
699
+ self.string_buffer << matched
700
+ when scan(/\\([MC]-|c)(?=\\)/) then
701
+ self.string_buffer << matched
538
702
  self.tokadd_escape term
539
- when src.scan(/\\([MC]-|c)(.)/) then
540
- self.string_buffer << src.matched
541
- when src.scan(/\\[McCx]/) then
703
+ when scan(/\\([MC]-|c)(.)/) then
704
+ self.string_buffer << matched
705
+ when scan(/\\[McCx]/) then
542
706
  rb_compile_error "Invalid escape character syntax"
543
- when src.scan(/\\(.)/m) then
544
- self.string_buffer << src.matched
707
+ when scan(/\\(.)/m) then
708
+ self.string_buffer << matched
545
709
  else
546
710
  rb_compile_error "Invalid escape character syntax"
547
711
  end
548
712
  end
549
713
 
550
- @@regexp_cache = Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }
551
- @@regexp_cache[nil] = nil
552
-
553
- def tokadd_string(func, term, paren) # 105 lines
714
+ def tokadd_string(func, term, paren) # TODO: rewrite / remove
554
715
  qwords = (func & STR_FUNC_QWORDS) != 0
555
716
  escape = (func & STR_FUNC_ESCAPE) != 0
556
717
  expand = (func & STR_FUNC_EXPAND) != 0
@@ -560,49 +721,49 @@ class RubyLexer
560
721
  paren_re = @@regexp_cache[paren]
561
722
  term_re = @@regexp_cache[term]
562
723
 
563
- until src.eos? do
724
+ until end_of_stream? do
564
725
  c = nil
565
726
  handled = true
566
727
 
567
728
  case
568
- when paren_re && src.scan(paren_re) then
729
+ when paren_re && scan(paren_re) then
569
730
  self.string_nest += 1
570
- when src.scan(term_re) then
731
+ when scan(term_re) then
571
732
  if self.string_nest == 0 then
572
- src.pos -= 1
733
+ ss.pos -= 1
573
734
  break
574
735
  else
575
736
  self.string_nest -= 1
576
737
  end
577
- when expand && src.scan(/#(?=[\$\@\{])/) then
578
- src.pos -= 1
738
+ when expand && scan(/#(?=[\$\@\{])/) then
739
+ ss.pos -= 1
579
740
  break
580
- when qwords && src.scan(/\s/) then
581
- src.pos -= 1
741
+ when qwords && scan(/\s/) then
742
+ ss.pos -= 1
582
743
  break
583
- when expand && src.scan(/#(?!\n)/) then
744
+ when expand && scan(/#(?!\n)/) then
584
745
  # do nothing
585
- when src.check(/\\/) then
746
+ when check(/\\/) then
586
747
  case
587
- when qwords && src.scan(/\\\n/) then
748
+ when qwords && scan(/\\\n/) then
588
749
  string_buffer << "\n"
589
750
  next
590
- when qwords && src.scan(/\\\s/) then
751
+ when qwords && scan(/\\\s/) then
591
752
  c = ' '
592
- when expand && src.scan(/\\\n/) then
753
+ when expand && scan(/\\\n/) then
593
754
  next
594
- when regexp && src.check(/\\/) then
755
+ when regexp && check(/\\/) then
595
756
  self.tokadd_escape term
596
757
  next
597
- when expand && src.scan(/\\/) then
758
+ when expand && scan(/\\/) then
598
759
  c = self.read_escape
599
- when src.scan(/\\\n/) then
760
+ when scan(/\\\n/) then
600
761
  # do nothing
601
- when src.scan(/\\\\/) then
762
+ when scan(/\\\\/) then
602
763
  string_buffer << '\\' if escape
603
764
  c = '\\'
604
- when src.scan(/\\/) then
605
- unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
765
+ when scan(/\\/) then
766
+ unless scan(term_re) || paren.nil? || scan(paren_re) then
606
767
  string_buffer << "\\"
607
768
  end
608
769
  else
@@ -625,38 +786,22 @@ class RubyLexer
625
786
  /[^#{t}#{x}\#\0\\]+|./
626
787
  end
627
788
 
628
- src.scan re
629
- c = src.matched
789
+ scan re
790
+ c = matched
630
791
 
631
792
  rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
632
793
  end # unless handled
633
794
 
634
- c ||= src.matched
795
+ c ||= matched
635
796
  string_buffer << c
636
797
  end # until
637
798
 
638
- c ||= src.matched
639
- c = RubyLexer::EOF if src.eos?
799
+ c ||= matched
800
+ c = RubyLexer::EOF if end_of_stream?
640
801
 
641
802
  return c
642
803
  end
643
804
 
644
- ESCAPES = {
645
- "a" => "\007",
646
- "b" => "\010",
647
- "e" => "\033",
648
- "f" => "\f",
649
- "n" => "\n",
650
- "r" => "\r",
651
- "s" => " ",
652
- "t" => "\t",
653
- "v" => "\13",
654
- "\\" => '\\',
655
- "\n" => "",
656
- "C-\?" => 127.chr,
657
- "c\?" => 127.chr,
658
- }
659
-
660
805
  def unescape s
661
806
  r = ESCAPES[s]
662
807
 
@@ -675,6 +820,8 @@ class RubyLexer
675
820
  s
676
821
  when /^[McCx0-9]/ then
677
822
  rb_compile_error("Invalid escape character syntax")
823
+ when /u([0-9a-fA-F]+|\{[0-9a-fA-F]+\})/ then
824
+ [$1.delete("{}").to_i(16)].pack("U")
678
825
  else
679
826
  s
680
827
  end
@@ -691,11 +838,11 @@ class RubyLexer
691
838
  #
692
839
  # @return Description of the Returned Value
693
840
 
694
- def yylex # 826 lines
841
+ def yylex # 461 lines
695
842
  c = ''
696
843
  self.space_seen = false
697
844
  command_state = false
698
- src = self.src
845
+ ss = self.src
699
846
 
700
847
  self.token = nil
701
848
  self.yacc_value = nil
@@ -708,41 +855,42 @@ class RubyLexer
708
855
  last_state = lex_state
709
856
 
710
857
  loop do # START OF CASE
711
- if src.scan(/[\ \t\r\f\v]/) then # \s - \n + \v
858
+ if scan(/[\ \t\r\f\v]/) then # \s - \n + \v
712
859
  self.space_seen = true
713
860
  next
714
- elsif src.check(/[^a-zA-Z]/) then
715
- if src.scan(/\n|#/) then
861
+ elsif check(/[^a-zA-Z]/) then
862
+ if scan(/\n|\#/) then
716
863
  self.lineno = nil
717
- c = src.matched
864
+ c = matched
718
865
  if c == '#' then
719
- src.pos -= 1
866
+ ss.pos -= 1
720
867
 
721
- while src.scan(/\s*#.*(\n+|\z)/) do
722
- @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
868
+ while scan(/\s*#.*(\n+|\z)/) do
869
+ # TODO: self.lineno += matched.lines.to_a.size
870
+ @comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
723
871
  end
724
872
 
725
- return RubyLexer::EOF if src.eos?
873
+ return RubyLexer::EOF if end_of_stream?
726
874
  end
727
875
 
728
876
  # Replace a string of newlines with a single one
729
- src.scan(/\n+/)
877
+ scan(/\n+/)
730
878
 
731
879
  next if in_lex_state?(:expr_beg, :expr_value, :expr_class,
732
880
  :expr_fname, :expr_dot)
733
881
 
734
- if src.scan(/([\ \t\r\f\v]*)\./) then
735
- self.space_seen = true unless src[1].empty?
882
+ if scan(/([\ \t\r\f\v]*)\./) then
883
+ self.space_seen = true unless ss[1].empty?
736
884
 
737
- src.pos -= 1
738
- next unless src.check(/\.\./)
885
+ ss.pos -= 1
886
+ next unless check(/\.\./)
739
887
  end
740
888
 
741
889
  self.command_start = true
742
- self.lex_state = :expr_beg
743
- return :tNL
744
- elsif src.scan(/[\]\)\}]/) then
745
- if src.matched == "}" then
890
+
891
+ return result(:expr_beg, :tNL, nil)
892
+ elsif scan(/[\]\)\}]/) then
893
+ if matched == "}" then
746
894
  self.brace_nest -= 1
747
895
  else
748
896
  self.paren_nest -= 1
@@ -750,54 +898,34 @@ class RubyLexer
750
898
 
751
899
  cond.lexpop
752
900
  cmdarg.lexpop
753
- tern.lexpop
754
-
755
- self.lex_state = if src.matched == ")" then
756
- :expr_endfn
757
- else
758
- :expr_endarg
759
- end
760
901
 
761
- self.yacc_value = src.matched
762
- result = {
902
+ text = matched
903
+ state = text == ")" ? :expr_endfn : :expr_endarg
904
+ token = {
763
905
  ")" => :tRPAREN,
764
906
  "]" => :tRBRACK,
765
907
  "}" => :tRCURLY
766
- }[src.matched]
767
- return result
768
- elsif src.scan(/\!/) then
769
- if in_lex_state?(:expr_fname, :expr_dot) then
770
- self.lex_state = :expr_arg
771
-
772
- if src.scan(/@/) then
773
- self.yacc_value = "!@"
774
- return :tUBANG
775
- end
776
- else
777
- self.lex_state = :expr_beg
778
- end
908
+ }[text]
779
909
 
780
- if src.scan(/[=~]/) then
781
- self.yacc_value = "!#{src.matched}"
782
- else
783
- self.yacc_value = "!"
910
+ return result(state, token, text)
911
+ elsif scan(/\!/) then
912
+ if in_arg_state? then
913
+ return result(:expr_arg, :tUBANG, "!@") if scan(/@/)
784
914
  end
785
915
 
786
- return TOKENS[self.yacc_value]
787
- elsif src.scan(/\.\.\.?|,|![=~]?/) then
788
- self.lex_state = :expr_beg
789
- tok = self.yacc_value = src.matched
790
- return TOKENS[tok]
791
- elsif src.check(/\./) then
792
- if src.scan(/\.\d/) then
916
+ text = scan(/[=~]/) ? "!#{matched}" : "!"
917
+
918
+ return result(arg_state, TOKENS[text], text)
919
+ elsif scan(/\.\.\.?|,|![=~]?/) then
920
+ return result(:expr_beg, TOKENS[matched], matched)
921
+ elsif check(/\./) then
922
+ if scan(/\.\d/) then
793
923
  rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
794
- elsif src.scan(/\./) then
795
- self.lex_state = :expr_dot
796
- self.yacc_value = "."
797
- return :tDOT
924
+ elsif scan(/\./) then
925
+ return result(:expr_dot, :tDOT, ".")
798
926
  end
799
- elsif src.scan(/\(/) then
800
- result = if ruby18 then
927
+ elsif scan(/\(/) then
928
+ token = if ruby18 then
801
929
  yylex_paren18
802
930
  else
803
931
  yylex_paren19
@@ -805,259 +933,188 @@ class RubyLexer
805
933
 
806
934
  self.paren_nest += 1
807
935
 
808
- self.expr_beg_push "("
936
+ return expr_result(token, "(")
937
+ elsif check(/\=/) then
938
+ if scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
939
+ tok = matched
940
+ return result(:arg_state, TOKENS[tok], tok)
941
+ elsif beginning_of_line? and scan(/\=begin(?=\s)/) then
942
+ @comments << matched
809
943
 
810
- return result
811
- elsif src.check(/\=/) then
812
- if src.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
813
- self.fix_arg_lex_state
814
- tok = self.yacc_value = src.matched
815
- return TOKENS[tok]
816
- elsif src.scan(/\=begin(?=\s)/) then
817
- @comments << src.matched
818
-
819
- unless src.scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
944
+ unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
820
945
  @comments.clear
821
946
  rb_compile_error("embedded document meets end of file")
822
947
  end
823
948
 
824
- @comments << src.matched
949
+ @comments << matched
825
950
 
826
951
  next
952
+ elsif scan(/\=(?=begin\b)/) then # h[k]=begin ... end
953
+ tok = matched
954
+ return result(:arg_state, TOKENS[tok], tok)
827
955
  else
828
956
  raise "you shouldn't be able to get here"
829
957
  end
830
- elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/o) then
831
- self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
832
- self.lex_state = :expr_end
833
- return :tSTRING
834
- elsif src.scan(/\"/) then # FALLBACK
835
- self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\0"] # TODO: question this
836
- self.yacc_value = "\""
837
- return :tSTRING_BEG
838
- elsif src.scan(/\@\@?#{IDENT_CHAR_RE}+/o) then
839
- self.token = src.matched
840
-
841
- rb_compile_error "`#{token}` is not allowed as a variable name" if
842
- token =~ /\@\d/
843
-
844
- return process_token(command_state)
845
- elsif src.scan(/\:\:/) then
958
+ elsif scan(/\"(#{SIMPLE_STRING})\"/o) then
959
+ string = matched[1..-2].gsub(ESC) { unescape $1 }
960
+ return result(:expr_end, :tSTRING, string)
961
+ elsif scan(/\"/) then # FALLBACK
962
+ string STR_DQUOTE, '"' # TODO: question this
963
+ return result(nil, :tSTRING_BEG, '"')
964
+ elsif scan(/\@\@?#{IDENT_CHAR}+/o) then
965
+ self.token = matched
966
+
967
+ rb_compile_error "`#{self.token}` is not allowed as a variable name" if
968
+ self.token =~ /\@\d/
969
+
970
+ tok_id = matched =~ /^@@/ ? :tCVAR : :tIVAR
971
+ return result(:expr_end, tok_id, self.token)
972
+ elsif scan(/\:\:/) then
846
973
  if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
847
- self.lex_state = :expr_beg
848
- self.yacc_value = "::"
849
- return :tCOLON3
974
+ return result(:expr_beg, :tCOLON3, "::")
850
975
  end
851
976
 
852
- self.lex_state = :expr_dot
853
- self.yacc_value = "::"
854
- return :tCOLON2
855
- elsif ! is_end? && src.scan(/:([a-zA-Z_]#{IDENT_CHAR_RE}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
977
+ return result(:expr_dot, :tCOLON2, "::")
978
+ elsif ! is_end? && scan(/:([a-zA-Z_]#{IDENT_CHAR}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
856
979
  # scanning shortcut to symbols
857
- self.yacc_value = src[1]
858
- self.lex_state = :expr_end
859
- return :tSYMBOL
860
- elsif src.scan(/\:/) then
980
+ return result(:expr_end, :tSYMBOL, ss[1])
981
+ elsif ! is_end? && (scan(/\:\"(#{SIMPLE_STRING})\"/) ||
982
+ scan(/\:\'(#{SIMPLE_SSTRING})\'/)) then
983
+ symbol = ss[1].gsub(ESC) { unescape $1 }
984
+
985
+ rb_compile_error "symbol cannot contain '\\0'" if
986
+ ruby18 && symbol =~ /\0/
987
+
988
+ return result(:expr_end, :tSYMBOL, symbol)
989
+ elsif scan(/\:/) then
861
990
  # ?: / then / when
862
- if is_end? || src.check(/\s/) then
863
- self.lex_state = :expr_beg
991
+ if is_end? || check(/\s/) then
864
992
  # TODO warn_balanced(":", "symbol literal");
865
- self.yacc_value = ":"
866
- return :tCOLON
993
+ return result(:expr_beg, :tCOLON, ":")
867
994
  end
868
995
 
869
996
  case
870
- when src.scan(/\'/) then
871
- self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\0"]
872
- when src.scan(/\"/) then
873
- self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\0"]
997
+ when scan(/\'/) then
998
+ string STR_SSYM, matched
999
+ when scan(/\"/) then
1000
+ string STR_DSYM, matched
874
1001
  end
875
1002
 
876
- self.lex_state = :expr_fname
877
- self.yacc_value = ":"
878
- return :tSYMBEG
879
- elsif src.check(/[0-9]/) then
1003
+ return result(:expr_fname, :tSYMBEG, ":")
1004
+ elsif check(/[0-9]/) then
880
1005
  return parse_number
881
- elsif src.scan(/\[/) then
1006
+ elsif scan(/\[/) then
882
1007
  self.paren_nest += 1
883
1008
 
884
- result = src.matched
1009
+ token = nil
885
1010
 
886
1011
  if in_lex_state? :expr_fname, :expr_dot then
887
- self.lex_state = :expr_arg
888
1012
  case
889
- when src.scan(/\]\=/) then
1013
+ when scan(/\]\=/) then
890
1014
  self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
891
- self.yacc_value = "[]="
892
- return :tASET
893
- when src.scan(/\]/) then
1015
+ return result(:expr_arg, :tASET, "[]=")
1016
+ when scan(/\]/) then
894
1017
  self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
895
- self.yacc_value = "[]"
896
- return :tAREF
1018
+ return result(:expr_arg, :tAREF, "[]")
897
1019
  else
898
1020
  rb_compile_error "unexpected '['"
899
1021
  end
900
1022
  elsif is_beg? then
901
- self.tern.push false
902
- result = :tLBRACK
1023
+ token = :tLBRACK
903
1024
  elsif is_arg? && space_seen then
904
- self.tern.push false
905
- result = :tLBRACK
1025
+ token = :tLBRACK
906
1026
  else
907
- result = :tLBRACK2
1027
+ token = :tLBRACK2
908
1028
  end
909
1029
 
910
- self.expr_beg_push "["
911
-
912
- return result
913
- elsif src.scan(/\'(\\.|[^\'])*\'/) then
914
- self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") # "
915
- self.lex_state = :expr_end
916
- return :tSTRING
917
- elsif src.check(/\|/) then
918
- if src.scan(/\|\|\=/) then
919
- self.lex_state = :expr_beg
920
- self.yacc_value = "||"
921
- return :tOP_ASGN
922
- elsif src.scan(/\|\|/) then
923
- self.lex_state = :expr_beg
924
- self.yacc_value = "||"
925
- return :tOROP
926
- elsif src.scan(/\|\=/) then
927
- self.lex_state = :expr_beg
928
- self.yacc_value = "|"
929
- return :tOP_ASGN
930
- elsif src.scan(/\|/) then
931
- self.fix_arg_lex_state
932
- self.yacc_value = "|"
933
- return :tPIPE
1030
+ return expr_result(token, "[")
1031
+ elsif scan(/\'#{SIMPLE_SSTRING}\'/) then
1032
+ text = matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") # "
1033
+ return result(:expr_end, :tSTRING, text)
1034
+ elsif check(/\|/) then
1035
+ if scan(/\|\|\=/) then
1036
+ return result(:expr_beg, :tOP_ASGN, "||")
1037
+ elsif scan(/\|\|/) then
1038
+ return result(:expr_beg, :tOROP, "||")
1039
+ elsif scan(/\|\=/) then
1040
+ return result(:expr_beg, :tOP_ASGN, "|")
1041
+ elsif scan(/\|/) then
1042
+ return result(:arg_state, :tPIPE, "|")
934
1043
  end
935
- elsif src.scan(/\{/) then
1044
+ elsif scan(/\{/) then
936
1045
  self.brace_nest += 1
937
1046
  if lpar_beg && lpar_beg == paren_nest then
938
1047
  self.lpar_beg = nil
939
1048
  self.paren_nest -= 1
940
1049
 
941
- expr_beg_push "{"
942
-
943
- return :tLAMBEG
1050
+ return expr_result(:tLAMBEG, "{")
944
1051
  end
945
1052
 
946
- result = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
1053
+ token = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
947
1054
  :tLCURLY # block (primary)
948
1055
  elsif in_lex_state?(:expr_endarg) then
949
1056
  :tLBRACE_ARG # block (expr)
950
1057
  else
951
- self.tern.push false
952
1058
  :tLBRACE # hash
953
1059
  end
954
1060
 
955
- self.expr_beg_push "{"
956
- self.command_start = true unless result == :tLBRACE
1061
+ self.command_start = true unless token == :tLBRACE
957
1062
 
958
- return result
959
- elsif src.scan(/->/) then
960
- self.lex_state = :expr_endfn
961
- return :tLAMBDA
962
- elsif src.scan(/[+-]/) then
963
- sign = src.matched
1063
+ return expr_result(token, "{")
1064
+ elsif scan(/->/) then
1065
+ return result(:expr_endfn, :tLAMBDA, nil)
1066
+ elsif scan(/[+-]/) then
1067
+ sign = matched
964
1068
  utype, type = if sign == "+" then
965
1069
  [:tUPLUS, :tPLUS]
966
1070
  else
967
1071
  [:tUMINUS, :tMINUS]
968
1072
  end
969
1073
 
970
- if in_lex_state? :expr_fname, :expr_dot then
971
- self.lex_state = :expr_arg
972
- if src.scan(/@/) then
973
- self.yacc_value = "#{sign}@"
974
- return utype
1074
+ if in_arg_state? then
1075
+ if scan(/@/) then
1076
+ return result(:expr_arg, utype, "#{sign}@")
975
1077
  else
976
- self.yacc_value = sign
977
- return type
1078
+ return result(:expr_arg, type, sign)
978
1079
  end
979
1080
  end
980
1081
 
981
- if src.scan(/\=/) then
982
- self.lex_state = :expr_beg
983
- self.yacc_value = sign
984
- return :tOP_ASGN
985
- end
1082
+ return result(:expr_beg, :tOP_ASGN, sign) if scan(/\=/)
986
1083
 
987
- if (is_beg? || (is_arg? && space_seen && !src.check(/\s/))) then
988
- if is_arg? then
989
- arg_ambiguous
990
- end
1084
+ if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
1085
+ arg_ambiguous if is_arg?
991
1086
 
992
- self.lex_state = :expr_beg
993
- self.yacc_value = sign
994
-
995
- if src.check(/\d/) then
996
- if utype == :tUPLUS then
997
- return self.parse_number
998
- else
999
- return :tUMINUS_NUM
1000
- end
1087
+ if check(/\d/) then
1088
+ return self.parse_number if utype == :tUPLUS
1089
+ return result(:expr_beg, :tUMINUS_NUM, sign)
1001
1090
  end
1002
1091
 
1003
- return utype
1092
+ return result(:expr_beg, utype, sign)
1004
1093
  end
1005
1094
 
1006
- self.lex_state = :expr_beg
1007
- self.yacc_value = sign
1008
- return type
1009
- elsif src.check(/\*/) then
1010
- if src.scan(/\*\*=/) then
1011
- self.lex_state = :expr_beg
1012
- self.yacc_value = "**"
1013
- return :tOP_ASGN
1014
- elsif src.scan(/\*\*/) then
1015
- result = if is_space_arg? src.check(/./m) then
1016
- warning "`**' interpreted as argument prefix"
1017
- :tDSTAR
1018
- elsif is_beg? then
1019
- :tDSTAR
1020
- else
1021
- # TODO: warn_balanced("**", "argument prefix");
1022
- :tPOW
1023
- end
1024
- self.yacc_value = "**"
1025
- self.fix_arg_lex_state
1026
- return result
1027
- elsif src.scan(/\*\=/) then
1028
- self.lex_state = :expr_beg
1029
- self.yacc_value = "*"
1030
- return :tOP_ASGN
1031
- elsif src.scan(/\*/) then
1032
- result = if is_space_arg? src.check(/./m) then
1033
- warning("`*' interpreted as argument prefix")
1034
- :tSTAR
1035
- elsif is_beg? then
1036
- :tSTAR
1037
- else
1038
- # TODO: warn_balanced("*", "argument prefix");
1039
- :tSTAR2 # TODO: rename
1040
- end
1095
+ return result(:expr_beg, type, sign)
1096
+ elsif check(/\*/) then
1097
+ if scan(/\*\*=/) then
1098
+ return result(:expr_beg, :tOP_ASGN, "**")
1099
+ elsif scan(/\*\*/) then
1100
+ token = space_vs_beginning :tDSTAR, :tDSTAR, :tPOW
1101
+
1102
+ return result(:arg_state, token, "**")
1103
+ elsif scan(/\*\=/) then
1104
+ return result(:expr_beg, :tOP_ASGN, "*")
1105
+ elsif scan(/\*/) then
1106
+ token = space_vs_beginning :tSTAR, :tSTAR, :tSTAR2
1041
1107
 
1042
- self.yacc_value = "*"
1043
- self.fix_arg_lex_state
1044
- return result
1108
+ return result(:arg_state, token, "*")
1045
1109
  end
1046
- elsif src.check(/\</) then
1047
- if src.scan(/\<\=\>/) then
1048
- self.fix_arg_lex_state
1049
- self.yacc_value = "<=>"
1050
- return :tCMP
1051
- elsif src.scan(/\<\=/) then
1052
- self.fix_arg_lex_state
1053
- self.yacc_value = "<="
1054
- return :tLEQ
1055
- elsif src.scan(/\<\<\=/) then
1056
- self.fix_arg_lex_state
1057
- self.lex_state = :expr_beg
1058
- self.yacc_value = "\<\<"
1059
- return :tOP_ASGN
1060
- elsif src.scan(/\<\</) then
1110
+ elsif check(/\</) then
1111
+ if scan(/\<\=\>/) then
1112
+ return result(:arg_state, :tCMP, "<=>")
1113
+ elsif scan(/\<\=/) then
1114
+ return result(:arg_state, :tLEQ, "<=")
1115
+ elsif scan(/\<\<\=/) then
1116
+ return result(:arg_state, :tOP_ASGN, "<<")
1117
+ elsif scan(/\<\</) then
1061
1118
  if (!in_lex_state?(:expr_dot, :expr_class) &&
1062
1119
  !is_end? &&
1063
1120
  (!is_arg? || space_seen)) then
@@ -1065,70 +1122,49 @@ class RubyLexer
1065
1122
  return tok if tok
1066
1123
  end
1067
1124
 
1068
- self.fix_arg_lex_state
1069
- self.yacc_value = "\<\<"
1070
- return :tLSHFT
1071
- elsif src.scan(/\</) then
1072
- self.fix_arg_lex_state
1073
- self.yacc_value = "<"
1074
- return :tLT
1125
+ return result(:arg_state, :tLSHFT, "\<\<")
1126
+ elsif scan(/\</) then
1127
+ return result(:arg_state, :tLT, "<")
1075
1128
  end
1076
- elsif src.check(/\>/) then
1077
- if src.scan(/\>\=/) then
1078
- self.fix_arg_lex_state
1079
- self.yacc_value = ">="
1080
- return :tGEQ
1081
- elsif src.scan(/\>\>=/) then
1082
- self.fix_arg_lex_state
1083
- self.lex_state = :expr_beg
1084
- self.yacc_value = ">>"
1085
- return :tOP_ASGN
1086
- elsif src.scan(/\>\>/) then
1087
- self.fix_arg_lex_state
1088
- self.yacc_value = ">>"
1089
- return :tRSHFT
1090
- elsif src.scan(/\>/) then
1091
- self.fix_arg_lex_state
1092
- self.yacc_value = ">"
1093
- return :tGT
1129
+ elsif check(/\>/) then
1130
+ if scan(/\>\=/) then
1131
+ return result(:arg_state, :tGEQ, ">=")
1132
+ elsif scan(/\>\>=/) then
1133
+ return result(:arg_state, :tOP_ASGN, ">>")
1134
+ elsif scan(/\>\>/) then
1135
+ return result(:arg_state, :tRSHFT, ">>")
1136
+ elsif scan(/\>/) then
1137
+ return result(:arg_state, :tGT, ">")
1094
1138
  end
1095
- elsif src.scan(/\`/) then
1096
- self.yacc_value = "`"
1139
+ elsif scan(/\`/) then
1097
1140
  case lex_state
1098
1141
  when :expr_fname then
1099
- self.lex_state = :expr_end
1100
- return :tBACK_REF2
1142
+ return result(:expr_end, :tBACK_REF2, "`")
1101
1143
  when :expr_dot then
1102
- self.lex_state = if command_state then
1103
- :expr_cmdarg
1104
- else
1105
- :expr_arg
1106
- end
1107
- return :tBACK_REF2
1144
+ state = command_state ? :expr_cmdarg : :expr_arg
1145
+ return result(state, :tBACK_REF2, "`")
1146
+ else
1147
+ string STR_XQUOTE, '`'
1148
+ return result(nil, :tXSTRING_BEG, "`")
1108
1149
  end
1109
- self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"]
1110
- return :tXSTRING_BEG
1111
- elsif src.scan(/\?/) then
1112
-
1150
+ elsif scan(/\?/) then
1113
1151
  if is_end? then
1114
- self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
1115
- self.tern.push true
1116
- self.yacc_value = "?"
1117
- return :tEH
1152
+ state = ruby18 ? :expr_beg : :expr_value # HACK?
1153
+ return result(state, :tEH, "?")
1118
1154
  end
1119
1155
 
1120
- if src.eos? then
1156
+ if end_of_stream? then
1121
1157
  rb_compile_error "incomplete character syntax"
1122
1158
  end
1123
1159
 
1124
- if src.check(/\s|\v/) then
1160
+ if check(/\s|\v/) then
1125
1161
  unless is_arg? then
1126
1162
  c2 = { " " => 's',
1127
1163
  "\n" => 'n',
1128
1164
  "\t" => 't',
1129
1165
  "\v" => 'v',
1130
1166
  "\r" => 'r',
1131
- "\f" => 'f' }[src.matched]
1167
+ "\f" => 'f' }[matched]
1132
1168
 
1133
1169
  if c2 then
1134
1170
  warning("invalid character syntax; use ?\\" + c2)
@@ -1136,47 +1172,32 @@ class RubyLexer
1136
1172
  end
1137
1173
 
1138
1174
  # ternary
1139
- self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
1140
- self.tern.push true
1141
- self.yacc_value = "?"
1142
- return :tEH
1143
- elsif src.check(/\w(?=\w)/) then # ternary, also
1144
- self.lex_state = :expr_beg
1145
- self.tern.push true
1146
- self.yacc_value = "?"
1147
- return :tEH
1175
+ state = ruby18 ? :expr_beg : :expr_value # HACK?
1176
+ return result(state, :tEH, "?")
1177
+ elsif check(/\w(?=\w)/) then # ternary, also
1178
+ return result(:expr_beg, :tEH, "?")
1148
1179
  end
1149
1180
 
1150
- c = if src.scan(/\\/) then
1181
+ c = if scan(/\\/) then
1151
1182
  self.read_escape
1152
1183
  else
1153
- src.getch
1184
+ ss.getch
1154
1185
  end
1155
- self.lex_state = :expr_end
1156
1186
 
1157
1187
  if version == 18 then
1158
- self.yacc_value = c[0].ord & 0xff
1159
- return :tINTEGER
1188
+ return result(:expr_end, :tINTEGER, c[0].ord & 0xff)
1160
1189
  else
1161
- self.yacc_value = c
1162
- return :tSTRING
1190
+ return result(:expr_end, :tSTRING, c)
1163
1191
  end
1164
- elsif src.check(/\&/) then
1165
- if src.scan(/\&\&\=/) then
1166
- self.yacc_value = "&&"
1167
- self.lex_state = :expr_beg
1168
- return :tOP_ASGN
1169
- elsif src.scan(/\&\&/) then
1170
- self.lex_state = :expr_beg
1171
- self.yacc_value = "&&"
1172
- return :tANDOP
1173
- elsif src.scan(/\&\=/) then
1174
- self.yacc_value = "&"
1175
- self.lex_state = :expr_beg
1176
- return :tOP_ASGN
1177
- elsif src.scan(/&/) then
1178
- result = if is_arg? && space_seen &&
1179
- !src.check(/\s/) then
1192
+ elsif check(/\&/) then
1193
+ if scan(/\&\&\=/) then
1194
+ return result(:expr_beg, :tOP_ASGN, "&&")
1195
+ elsif scan(/\&\&/) then
1196
+ return result(:expr_beg, :tANDOP, "&&")
1197
+ elsif scan(/\&\=/) then
1198
+ return result(:expr_beg, :tOP_ASGN, "&")
1199
+ elsif scan(/&/) then
1200
+ token = if is_arg? && space_seen && !check(/\s/) then
1180
1201
  warning("`&' interpreted as argument prefix")
1181
1202
  :tAMPER
1182
1203
  elsif in_lex_state? :expr_beg, :expr_mid then
@@ -1185,170 +1206,121 @@ class RubyLexer
1185
1206
  :tAMPER2
1186
1207
  end
1187
1208
 
1188
- self.fix_arg_lex_state
1189
- self.yacc_value = "&"
1190
- return result
1209
+ return result(:arg_state, token, "&")
1191
1210
  end
1192
- elsif src.scan(/\//) then
1211
+ elsif scan(/\//) then
1193
1212
  if is_beg? then
1194
- self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1195
- self.yacc_value = "/"
1196
- return :tREGEXP_BEG
1213
+ string STR_REGEXP, '/'
1214
+ return result(nil, :tREGEXP_BEG, "/")
1197
1215
  end
1198
1216
 
1199
- if src.scan(/\=/) then
1200
- self.yacc_value = "/"
1201
- self.lex_state = :expr_beg
1202
- return :tOP_ASGN
1217
+ if scan(/\=/) then
1218
+ return result(:expr_beg, :tOP_ASGN, "/")
1203
1219
  end
1204
1220
 
1205
1221
  if is_arg? && space_seen then
1206
- unless src.scan(/\s/) then
1222
+ unless scan(/\s/) then
1207
1223
  arg_ambiguous
1208
- self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1209
- self.yacc_value = "/"
1210
- return :tREGEXP_BEG
1224
+ string STR_REGEXP, '/'
1225
+ return result(nil, :tREGEXP_BEG, "/")
1211
1226
  end
1212
1227
  end
1213
1228
 
1214
- self.fix_arg_lex_state
1215
- self.yacc_value = "/"
1216
-
1217
- return :tDIVIDE
1218
- elsif src.scan(/\^=/) then
1219
- self.lex_state = :expr_beg
1220
- self.yacc_value = "^"
1221
- return :tOP_ASGN
1222
- elsif src.scan(/\^/) then
1223
- self.fix_arg_lex_state
1224
- self.yacc_value = "^"
1225
- return :tCARET
1226
- elsif src.scan(/\;/) then
1229
+ return result(:arg_state, :tDIVIDE, "/")
1230
+ elsif scan(/\^=/) then
1231
+ return result(:expr_beg, :tOP_ASGN, "^")
1232
+ elsif scan(/\^/) then
1233
+ return result(:arg_state, :tCARET, "^")
1234
+ elsif scan(/\;/) then
1227
1235
  self.command_start = true
1228
- self.lex_state = :expr_beg
1229
- self.yacc_value = ";"
1230
- return :tSEMI
1231
- elsif src.scan(/\~/) then
1232
- if in_lex_state? :expr_fname, :expr_dot then
1233
- src.scan(/@/)
1234
- end
1235
-
1236
- self.fix_arg_lex_state
1237
- self.yacc_value = "~"
1238
-
1239
- return :tTILDE
1240
- elsif src.scan(/\\/) then
1241
- if src.scan(/\r?\n/) then
1236
+ return result(:expr_beg, :tSEMI, ";")
1237
+ elsif scan(/\~/) then
1238
+ scan(/@/) if in_lex_state? :expr_fname, :expr_dot
1239
+ return result(:arg_state, :tTILDE, "~")
1240
+ elsif scan(/\\/) then
1241
+ if scan(/\r?\n/) then
1242
1242
  self.lineno = nil
1243
1243
  self.space_seen = true
1244
1244
  next
1245
1245
  end
1246
1246
  rb_compile_error "bare backslash only allowed before newline"
1247
- elsif src.scan(/\%/) then
1248
- if is_beg? then
1249
- return parse_quote
1250
- end
1251
-
1252
- if src.scan(/\=/) then
1253
- self.lex_state = :expr_beg
1254
- self.yacc_value = "%"
1255
- return :tOP_ASGN
1256
- end
1257
-
1258
- return parse_quote if is_arg? && space_seen && ! src.check(/\s/)
1259
-
1260
- self.fix_arg_lex_state
1261
- self.yacc_value = "%"
1262
-
1263
- return :tPERCENT
1264
- elsif src.check(/\$/) then
1265
- if src.scan(/(\$_)(\w+)/) then
1266
- self.lex_state = :expr_end
1267
- self.token = src.matched
1268
- return process_token(command_state)
1269
- elsif src.scan(/\$_/) then
1270
- self.lex_state = :expr_end
1271
- self.token = src.matched
1272
- self.yacc_value = src.matched
1273
- return :tGVAR
1274
- elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1275
- self.lex_state = :expr_end
1276
- self.yacc_value = src.matched
1277
- return :tGVAR
1278
- elsif src.scan(/\$([\&\`\'\+])/) then
1279
- self.lex_state = :expr_end
1247
+ elsif scan(/\%/) then
1248
+ return parse_quote if is_beg?
1249
+
1250
+ return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)
1251
+
1252
+ return parse_quote if is_arg? && space_seen && ! check(/\s/)
1253
+
1254
+ return result(:arg_state, :tPERCENT, "%")
1255
+ elsif check(/\$/) then
1256
+ if scan(/(\$_)(\w+)/) then
1257
+ self.token = matched
1258
+ return result(:expr_end, :tGVAR, matched)
1259
+ elsif scan(/\$_/) then
1260
+ return result(:expr_end, :tGVAR, matched)
1261
+ elsif scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1262
+ return result(:expr_end, :tGVAR, matched)
1263
+ elsif scan(/\$([\&\`\'\+])/) then
1280
1264
  # Explicit reference to these vars as symbols...
1281
- if last_state == :expr_fname then
1282
- self.yacc_value = src.matched
1283
- return :tGVAR
1265
+ if lex_state == :expr_fname then
1266
+ return result(:expr_end, :tGVAR, matched)
1284
1267
  else
1285
- self.yacc_value = src[1].to_sym
1286
- return :tBACK_REF
1268
+ return result(:expr_end, :tBACK_REF, ss[1].to_sym)
1287
1269
  end
1288
- elsif src.scan(/\$([1-9]\d*)/) then
1289
- self.lex_state = :expr_end
1290
- if last_state == :expr_fname then
1291
- self.yacc_value = src.matched
1292
- return :tGVAR
1270
+ elsif scan(/\$([1-9]\d*)/) then
1271
+ if lex_state == :expr_fname then
1272
+ return result(:expr_end, :tGVAR, matched)
1293
1273
  else
1294
- self.yacc_value = src[1].to_i
1295
- return :tNTH_REF
1274
+ return result(:expr_end, :tNTH_REF, ss[1].to_i)
1296
1275
  end
1297
- elsif src.scan(/\$0/) then
1298
- self.lex_state = :expr_end
1299
- self.token = src.matched
1300
- return process_token(command_state)
1301
- elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
1302
- self.lex_state = :expr_end
1303
- self.yacc_value = "$"
1304
- return "$"
1305
- elsif src.scan(/\$\w+/)
1306
- self.lex_state = :expr_end
1307
- self.token = src.matched
1308
- return process_token(command_state)
1276
+ elsif scan(/\$0/) then
1277
+ return result(:expr_end, :tGVAR, matched)
1278
+ elsif scan(/\$\W|\$\z/) then # TODO: remove?
1279
+ return result(:expr_end, "$", "$") # FIX: "$"??
1280
+ elsif scan(/\$\w+/)
1281
+ return result(:expr_end, :tGVAR, matched)
1309
1282
  end
1310
- elsif src.check(/\_/) then
1311
- if src.beginning_of_line? && src.scan(/\__END__(\r?\n|\Z)/) then
1283
+ elsif check(/\_/) then
1284
+ if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
1312
1285
  self.lineno = nil
1313
1286
  return RubyLexer::EOF
1314
- elsif src.scan(/\_\w*/) then
1315
- self.token = src.matched
1316
- return process_token(command_state)
1287
+ elsif scan(/\_\w*/) then
1288
+ self.token = matched
1289
+ return process_token command_state, last_state
1317
1290
  end
1318
1291
  end
1319
1292
  end # END OF CASE
1320
1293
 
1321
- if src.scan(/\004|\032|\000/) || src.eos? then # ^D, ^Z, EOF
1294
+ if scan(/\004|\032|\000/) || end_of_stream? then # ^D, ^Z, EOF
1322
1295
  return RubyLexer::EOF
1323
1296
  else # alpha check
1324
- rb_compile_error "Invalid char #{src.rest[0].chr} in expression" unless
1325
- src.check IDENT_RE
1297
+ rb_compile_error "Invalid char #{ss.rest[0].chr} in expression" unless
1298
+ check IDENT
1326
1299
  end
1327
1300
 
1328
- self.token = src.matched if self.src.scan IDENT_RE
1301
+ self.token = matched if self.scan IDENT
1329
1302
 
1330
- return process_token(command_state)
1303
+ return process_token command_state, last_state
1331
1304
  end
1332
1305
  end
1333
1306
 
1334
1307
  def yylex_paren18
1335
1308
  self.command_start = true
1336
- result = :tLPAREN2
1309
+ token = :tLPAREN2
1337
1310
 
1338
1311
  if in_lex_state? :expr_beg, :expr_mid then
1339
- result = :tLPAREN
1312
+ token = :tLPAREN
1340
1313
  elsif space_seen then
1341
1314
  if in_lex_state? :expr_cmdarg then
1342
- result = :tLPAREN_ARG
1315
+ token = :tLPAREN_ARG
1343
1316
  elsif in_lex_state? :expr_arg then
1344
- self.tern.push false
1345
1317
  warning "don't put space before argument parentheses"
1346
1318
  end
1347
1319
  else
1348
- self.tern.push false
1320
+ # not a ternary -- do nothing?
1349
1321
  end
1350
1322
 
1351
- result
1323
+ token
1352
1324
  end
1353
1325
 
1354
1326
  def yylex_paren19
@@ -1361,146 +1333,7 @@ class RubyLexer
1361
1333
  end
1362
1334
  end
1363
1335
 
1364
- def is_arg?
1365
- in_lex_state? :expr_arg, :expr_cmdarg
1366
- end
1367
-
1368
- def is_end?
1369
- in_lex_state? :expr_end, :expr_endarg, :expr_endfn
1370
- end
1371
-
1372
- def is_beg?
1373
- in_lex_state? :expr_beg, :expr_value, :expr_mid, :expr_class
1374
- end
1375
-
1376
- # TODO #define IS_AFTER_OPERATOR() IS_lex_state(EXPR_FNAME | EXPR_DOT)
1377
-
1378
- def is_space_arg? c = "x"
1379
- is_arg? and space_seen and c !~ /\s/
1380
- end
1381
-
1382
- def is_label_possible? command_state
1383
- (in_lex_state?(:expr_beg) && !command_state) || is_arg?
1384
- end
1385
-
1386
- def process_token(command_state)
1387
- token << src.matched if token =~ IDENT_RE && src.scan(/[\!\?](?!=)/)
1388
-
1389
- result = nil
1390
- last_state = lex_state
1391
-
1392
- case token
1393
- when /^\$/ then
1394
- self.lex_state, result = :expr_end, :tGVAR
1395
- when /^@@/ then
1396
- self.lex_state, result = :expr_end, :tCVAR
1397
- when /^@/ then
1398
- self.lex_state, result = :expr_end, :tIVAR
1399
- else
1400
- if token =~ /[!?]$/ then
1401
- result = :tFID
1402
- else
1403
- if in_lex_state? :expr_fname then
1404
- # ident=, not =~ => == or followed by =>
1405
- # TODO test lexing of a=>b vs a==>b
1406
- if src.scan(/=(?:(?![~>=])|(?==>))/) then
1407
- result = :tIDENTIFIER
1408
- token << src.matched
1409
- end
1410
- end
1411
-
1412
- result ||= if token =~ /^[A-Z]/ then
1413
- :tCONSTANT
1414
- else
1415
- :tIDENTIFIER
1416
- end
1417
- end
1418
-
1419
- unless ruby18
1420
- if is_label_possible? command_state then
1421
- colon = src.scan(/:/)
1422
-
1423
- if colon && src.peek(1) != ":" then
1424
- self.lex_state = :expr_beg
1425
- self.yacc_value = [token, src.lineno]
1426
- return :tLABEL
1427
- end
1428
-
1429
- src.unscan if colon
1430
- end
1431
- end
1432
-
1433
- unless in_lex_state? :expr_dot then
1434
- # See if it is a reserved word.
1435
- keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
1436
- RubyParserStuff::Keyword.keyword18 token
1437
- else
1438
- RubyParserStuff::Keyword.keyword19 token
1439
- end
1440
-
1441
- if keyword then
1442
- state = lex_state
1443
- self.lex_state = keyword.state
1444
- self.yacc_value = [token, src.lineno]
1445
-
1446
- if state == :expr_fname then
1447
- self.yacc_value = keyword.name
1448
- return keyword.id0
1449
- end
1450
-
1451
- self.command_start = true if lex_state == :expr_beg
1452
-
1453
- if keyword.id0 == :kDO then
1454
- if lpar_beg && lpar_beg == paren_nest then
1455
- self.lpar_beg = nil
1456
- self.paren_nest -= 1
1457
-
1458
- return :kDO_LAMBDA
1459
- end
1460
-
1461
- return :kDO_COND if cond.is_in_state
1462
- return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
1463
- return :kDO_BLOCK if [:expr_beg, :expr_endarg].include? state
1464
- return :kDO
1465
- end
1466
-
1467
- return keyword.id0 if [:expr_beg, :expr_value].include? state
1468
-
1469
- self.lex_state = :expr_beg if keyword.id0 != keyword.id1
1470
-
1471
- return keyword.id1
1472
- end
1473
- end
1474
-
1475
- # TODO:
1476
- # if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
1477
-
1478
- self.lex_state =
1479
- if is_beg? || is_arg? || in_lex_state?(:expr_dot) then
1480
- if command_state then
1481
- :expr_cmdarg
1482
- else
1483
- :expr_arg
1484
- end
1485
- elsif !ruby18 && in_lex_state?(:expr_fname) then
1486
- :expr_endfn
1487
- else
1488
- :expr_end
1489
- end
1490
-
1491
- end
1492
-
1493
- self.yacc_value = token
1494
-
1495
- if (![:expr_dot, :expr_fname].include?(last_state) &&
1496
- self.parser.env[token.to_sym] == :lvar) then
1497
- self.lex_state = :expr_end
1498
- end
1499
-
1500
- return result
1501
- end
1502
-
1503
- def yylex_string # 23 lines
1336
+ def yylex_string # TODO: rewrite / remove
1504
1337
  token = if lex_strterm[0] == :heredoc then
1505
1338
  self.heredoc lex_strterm
1506
1339
  else