ruby_parser 3.2.2 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -263,7 +263,7 @@ rule
263
263
  }
264
264
  opt_block_param
265
265
  {
266
- result = self.env.dynamic.keys
266
+ result = nil # self.env.dynamic.keys
267
267
  }
268
268
  compstmt tRCURLY
269
269
  {
@@ -785,7 +785,6 @@ rule
785
785
  }
786
786
  | arg tEH arg opt_nl tCOLON arg
787
787
  {
788
- lexer.tern.pop
789
788
  result = s(:if, val[0], val[2], val[5])
790
789
  }
791
790
  | primary
@@ -1136,7 +1135,7 @@ rule
1136
1135
  }
1137
1136
  | kDEF fname
1138
1137
  {
1139
- result = [lexer.lineno, self.in_def]
1138
+ result = self.in_def
1140
1139
 
1141
1140
  self.comments.push self.lexer.comments
1142
1141
  self.in_def = true
@@ -1144,10 +1143,9 @@ rule
1144
1143
  }
1145
1144
  f_arglist bodystmt kEND
1146
1145
  {
1147
- line, in_def = val[2]
1146
+ in_def = val[2]
1148
1147
 
1149
1148
  result = new_defn val
1150
- result[2].line line
1151
1149
 
1152
1150
  self.env.unextend
1153
1151
  self.in_def = in_def
@@ -1429,7 +1427,9 @@ opt_block_args_tail: tCOMMA block_args_tail
1429
1427
  | f_bad_arg
1430
1428
 
1431
1429
  lambda: {
1432
- # TODO: dyna_push ? hrm
1430
+ self.env.extend :dynamic
1431
+ result = self.lexer.lineno
1432
+
1433
1433
  result = lexer.lpar_beg
1434
1434
  lexer.paren_nest += 1
1435
1435
  lexer.lpar_beg = lexer.paren_nest
@@ -1443,6 +1443,7 @@ opt_block_args_tail: tCOMMA block_args_tail
1443
1443
 
1444
1444
  call = new_call nil, :lambda
1445
1445
  result = new_iter call, args, body
1446
+ self.env.unextend
1446
1447
  }
1447
1448
 
1448
1449
  f_larglist: tLPAREN2 f_args opt_bv_decl rparen
@@ -1470,7 +1471,7 @@ opt_block_args_tail: tCOMMA block_args_tail
1470
1471
  }
1471
1472
  opt_block_param
1472
1473
  {
1473
- result = self.env.dynamic.keys
1474
+ result = nil # self.env.dynamic.keys
1474
1475
  }
1475
1476
  compstmt kEND
1476
1477
  {
@@ -1556,7 +1557,7 @@ opt_block_args_tail: tCOMMA block_args_tail
1556
1557
  }
1557
1558
  opt_block_param
1558
1559
  {
1559
- result = self.env.dynamic.keys
1560
+ result = nil # self.env.dynamic.keys
1560
1561
  }
1561
1562
  compstmt tRCURLY
1562
1563
  {
@@ -1574,7 +1575,7 @@ opt_block_args_tail: tCOMMA block_args_tail
1574
1575
  }
1575
1576
  opt_block_param
1576
1577
  {
1577
- result = self.env.dynamic.keys
1578
+ result = nil # self.env.dynamic.keys
1578
1579
  }
1579
1580
  compstmt kEND
1580
1581
  {
@@ -1972,13 +1973,14 @@ keyword_variable: kNIL { result = s(:nil) }
1972
1973
  f_arglist: tLPAREN2 f_args rparen
1973
1974
  {
1974
1975
  result = val[1]
1975
- lexer.lex_state = :expr_beg
1976
+ self.lexer.lex_state = :expr_beg
1976
1977
  self.lexer.command_start = true
1977
1978
  }
1978
1979
  | f_args term
1979
1980
  {
1980
- self.lexer.lex_state = :expr_beg
1981
1981
  result = val[0]
1982
+ self.lexer.lex_state = :expr_beg
1983
+ self.lexer.command_start = true
1982
1984
  }
1983
1985
 
1984
1986
  args_tail: f_kwarg tCOMMA f_kwrest opt_f_block_arg
@@ -2127,14 +2129,20 @@ keyword_variable: kNIL { result = s(:nil) }
2127
2129
  {
2128
2130
  # TODO: call_args
2129
2131
  label, _ = val[0] # TODO: fix lineno?
2130
- result = s(:array, s(:kwarg, label.to_sym, val[1]))
2132
+ identifier = label.to_sym
2133
+ self.env[identifier] = :lvar
2134
+
2135
+ result = s(:array, s(:kwarg, identifier, val[1]))
2131
2136
  }
2132
2137
 
2133
2138
  f_block_kw: tLABEL primary_value
2134
2139
  {
2135
2140
  # TODO: call_args
2136
2141
  label, _ = val[0] # TODO: fix lineno?
2137
- result = s(:array, s(:kwarg, label.to_sym, val[1]))
2142
+ identifier = label.to_sym
2143
+ self.env[identifier] = :lvar
2144
+
2145
+ result = s(:array, s(:kwarg, identifier, val[1]))
2138
2146
  }
2139
2147
 
2140
2148
  f_block_kwarg: f_block_kw
@@ -5,61 +5,21 @@ class RubyLexer
5
5
  # :stopdoc:
6
6
  RUBY19 = "".respond_to? :encoding
7
7
 
8
- IDENT_CHAR_RE = if RUBY19 then
9
- /[\w\u0080-\u{10ffff}]/u
10
- else
11
- /[\w\x80-\xFF]/n
12
- end
13
-
14
- IDENT_RE = /^#{IDENT_CHAR_RE}+/o
15
-
16
- attr_accessor :command_start
17
- attr_accessor :cmdarg
18
- attr_accessor :cond
19
- attr_accessor :tern # TODO: rename ternary damnit... wtf
20
- attr_accessor :string_nest
21
-
22
- ESC_RE = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|[^0-7xMCc]))/u
23
- # :startdoc:
24
-
25
- ##
26
- # What version of ruby to parse. 18 and 19 are the only valid values
27
- # currently supported.
28
-
29
- attr_accessor :version
30
-
31
- # Additional context surrounding tokens that both the lexer and
32
- # grammar use.
33
- attr_reader :lex_state
34
-
35
- attr_accessor :lex_strterm
36
-
37
- attr_accessor :parser # HACK for very end of lexer... *sigh*
38
-
39
- # Stream of data that yylex examines.
40
- attr_reader :src
41
-
42
- # Last token read via yylex.
43
- attr_accessor :token
44
-
45
- attr_accessor :string_buffer
46
-
47
- # Value of last token which had a value associated with it.
48
- attr_accessor :yacc_value
49
-
50
- # What handles warnings
51
- attr_accessor :warnings
8
+ IDENT_CHAR = if RUBY19 then
9
+ /[\w\u0080-\u{10ffff}]/u
10
+ else
11
+ /[\w\x80-\xFF]/n
12
+ end
52
13
 
53
- attr_accessor :space_seen
54
- attr_accessor :paren_nest
55
- attr_accessor :brace_nest
56
- attr_accessor :lpar_beg
14
+ IDENT = /^#{IDENT_CHAR}+/o
15
+ ESC = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]+|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/u
16
+ SIMPLE_STRING = /(#{ESC}|#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*/o
17
+ SIMPLE_SSTRING = /(\\.|[^\'])*/
57
18
 
58
19
  EOF = :eof_haha!
59
20
 
60
21
  # ruby constants for strings (should this be moved somewhere else?)
61
22
 
62
- # :stopdoc:
63
23
  STR_FUNC_BORING = 0x00
64
24
  STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
65
25
  STR_FUNC_EXPAND = 0x02
@@ -75,6 +35,22 @@ class RubyLexer
75
35
  STR_SSYM = STR_FUNC_SYMBOL
76
36
  STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
77
37
 
38
+ ESCAPES = {
39
+ "a" => "\007",
40
+ "b" => "\010",
41
+ "e" => "\033",
42
+ "f" => "\f",
43
+ "n" => "\n",
44
+ "r" => "\r",
45
+ "s" => " ",
46
+ "t" => "\t",
47
+ "v" => "\13",
48
+ "\\" => '\\',
49
+ "\n" => "",
50
+ "C-\?" => 127.chr,
51
+ "c\?" => 127.chr,
52
+ }
53
+
78
54
  TOKENS = {
79
55
  "!" => :tBANG,
80
56
  "!=" => :tNEQ,
@@ -90,8 +66,58 @@ class RubyLexer
90
66
  "=~" => :tMATCH,
91
67
  "->" => :tLAMBDA,
92
68
  }
69
+
70
+ @@regexp_cache = Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }
71
+ @@regexp_cache[nil] = nil
72
+
93
73
  # :startdoc:
94
74
 
75
+ attr_accessor :brace_nest
76
+ attr_accessor :cmdarg
77
+ attr_accessor :command_start
78
+ attr_accessor :cond
79
+
80
+ ##
81
+ # Additional context surrounding tokens that both the lexer and
82
+ # grammar use.
83
+
84
+ attr_accessor :lex_state
85
+
86
+ attr_accessor :lex_strterm
87
+ attr_accessor :lpar_beg
88
+ attr_accessor :paren_nest
89
+ attr_accessor :parser # HACK for very end of lexer... *sigh*
90
+ attr_accessor :space_seen
91
+ attr_accessor :string_buffer
92
+ attr_accessor :string_nest
93
+
94
+ # Stream of data that yylex examines.
95
+ attr_reader :src
96
+ alias :ss :src
97
+
98
+ # Last token read via yylex.
99
+ attr_accessor :token
100
+
101
+ ##
102
+ # What version of ruby to parse. 18 and 19 are the only valid values
103
+ # currently supported.
104
+
105
+ attr_accessor :version
106
+
107
+ # Value of last token which had a value associated with it.
108
+ attr_accessor :yacc_value
109
+
110
+ attr_writer :lineno # reader is lazy initalizer
111
+
112
+ attr_writer :comments
113
+
114
+ def initialize v = 18
115
+ self.version = v
116
+
117
+ reset
118
+ end
119
+
120
+ ##
95
121
  # How the parser advances to the next token.
96
122
  #
97
123
  # @return true if not at end of file (EOF).
@@ -100,7 +126,7 @@ class RubyLexer
100
126
  r = yylex
101
127
  self.token = r
102
128
 
103
- raise "yylex returned nil" unless r
129
+ raise "yylex returned nil, near #{ss.rest[0,10].inspect}" unless r
104
130
 
105
131
  return RubyLexer::EOF != r
106
132
  end
@@ -109,28 +135,35 @@ class RubyLexer
109
135
  self.warning("Ambiguous first argument. make sure.")
110
136
  end
111
137
 
112
- def comments
138
+ def arg_state
139
+ in_arg_state? ? :expr_arg : :expr_beg
140
+ end
141
+
142
+ def beginning_of_line?
143
+ ss.bol?
144
+ end
145
+
146
+ def check re
147
+ ss.check re
148
+ end
149
+
150
+ def comments # TODO: remove this... maybe comment_string + attr_accessor
113
151
  c = @comments.join
114
152
  @comments.clear
115
153
  c
116
154
  end
117
155
 
118
- def expr_beg_push val
119
- cond.push false
120
- cmdarg.push false
121
- self.lex_state = :expr_beg
122
- self.yacc_value = val
156
+ def end_of_stream?
157
+ ss.eos?
123
158
  end
124
159
 
125
- def fix_arg_lex_state
126
- self.lex_state = if in_lex_state? :expr_fname, :expr_dot then
127
- :expr_arg
128
- else
129
- :expr_beg
130
- end
160
+ def expr_result token, text
161
+ cond.push false
162
+ cmdarg.push false
163
+ result :expr_beg, token, text
131
164
  end
132
165
 
133
- def heredoc here # 63 lines
166
+ def heredoc here # TODO: rewrite / remove
134
167
  _, eos, func, last_line = here
135
168
 
136
169
  indent = (func & STR_FUNC_INDENT) != 0 ? "[ \t]*" : nil
@@ -138,11 +171,10 @@ class RubyLexer
138
171
  eos_re = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
139
172
  err_msg = "can't match #{eos_re.inspect} anywhere in "
140
173
 
141
- rb_compile_error err_msg if
142
- src.eos?
174
+ rb_compile_error err_msg if end_of_stream?
143
175
 
144
- if src.beginning_of_line? && src.scan(eos_re) then
145
- src.unread_many last_line # TODO: figure out how to remove this
176
+ if beginning_of_line? && scan(eos_re) then
177
+ ss.unread_many last_line # TODO: figure out how to remove this
146
178
  self.yacc_value = eos
147
179
  return :tSTRING_END
148
180
  end
@@ -151,14 +183,14 @@ class RubyLexer
151
183
 
152
184
  if expand then
153
185
  case
154
- when src.scan(/#[$@]/) then
155
- src.pos -= 1 # FIX omg stupid
156
- self.yacc_value = src.matched
186
+ when scan(/#[$@]/) then
187
+ ss.pos -= 1 # FIX omg stupid
188
+ self.yacc_value = matched
157
189
  return :tSTRING_DVAR
158
- when src.scan(/#[{]/) then
159
- self.yacc_value = src.matched
190
+ when scan(/#[{]/) then
191
+ self.yacc_value = matched
160
192
  return :tSTRING_DBEG
161
- when src.scan(/#/) then
193
+ when scan(/#/) then
162
194
  string_buffer << '#'
163
195
  end
164
196
 
@@ -172,34 +204,32 @@ class RubyLexer
172
204
  self.yacc_value = string_buffer.join.delete("\r")
173
205
  return :tSTRING_CONTENT
174
206
  else
175
- string_buffer << src.scan(/\n/)
207
+ string_buffer << scan(/\n/)
176
208
  end
177
209
 
178
- rb_compile_error err_msg if
179
- src.eos?
180
- end until src.check(eos_re)
210
+ rb_compile_error err_msg if end_of_stream?
211
+ end until check(eos_re)
181
212
  else
182
- until src.check(eos_re) do
183
- string_buffer << src.scan(/.*(\n|\z)/)
184
- rb_compile_error err_msg if
185
- src.eos?
213
+ until check(eos_re) do
214
+ string_buffer << scan(/.*(\n|\z)/)
215
+ rb_compile_error err_msg if end_of_stream?
186
216
  end
187
217
  end
188
218
 
189
219
  self.lex_strterm = [:heredoc, eos, func, last_line]
190
- self.yacc_value = string_buffer.join.delete("\r")
191
220
 
221
+ self.yacc_value = string_buffer.join.delete("\r")
192
222
  return :tSTRING_CONTENT
193
223
  end
194
224
 
195
- def heredoc_identifier # 51 lines
225
+ def heredoc_identifier # TODO: remove / rewrite
196
226
  term, func = nil, STR_FUNC_BORING
197
227
  self.string_buffer = []
198
228
 
199
229
  case
200
- when src.scan(/(-?)([\'\"\`])(.*?)\2/) then
201
- term = src[2]
202
- func |= STR_FUNC_INDENT unless src[1].empty?
230
+ when scan(/(-?)([\'\"\`])(.*?)\2/) then
231
+ term = ss[2]
232
+ func |= STR_FUNC_INDENT unless ss[1].empty?
203
233
  func |= case term
204
234
  when "\'" then
205
235
  STR_SQUOTE
@@ -208,24 +238,24 @@ class RubyLexer
208
238
  else
209
239
  STR_XQUOTE
210
240
  end
211
- string_buffer << src[3]
212
- when src.scan(/-?([\'\"\`])(?!\1*\Z)/) then
241
+ string_buffer << ss[3]
242
+ when scan(/-?([\'\"\`])(?!\1*\Z)/) then
213
243
  rb_compile_error "unterminated here document identifier"
214
- when src.scan(/(-?)(#{IDENT_CHAR_RE}+)/) then
244
+ when scan(/(-?)(#{IDENT_CHAR}+)/) then
215
245
  term = '"'
216
246
  func |= STR_DQUOTE
217
- unless src[1].empty? then
247
+ unless ss[1].empty? then
218
248
  func |= STR_FUNC_INDENT
219
249
  end
220
- string_buffer << src[2]
250
+ string_buffer << ss[2]
221
251
  else
222
252
  return nil
223
253
  end
224
254
 
225
- if src.scan(/.*\n/) then
255
+ if scan(/.*\n/) then
226
256
  # TODO: think about storing off the char range instead
227
- line = src.matched
228
- src.extra_lines_added += 1
257
+ line = matched
258
+ ss.extra_lines_added += 1 # FIX: ugh
229
259
  else
230
260
  line = nil
231
261
  end
@@ -241,41 +271,47 @@ class RubyLexer
241
271
  end
242
272
  end
243
273
 
274
+ def in_arg_state? # TODO: rename is_after_operator?
275
+ in_lex_state? :expr_fname, :expr_dot
276
+ end
277
+
244
278
  def in_lex_state?(*states)
245
279
  states.include? lex_state
246
280
  end
247
281
 
248
- def initialize v = 18
249
- self.version = v
250
- self.cond = RubyParserStuff::StackState.new(:cond)
251
- self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
252
- self.tern = RubyParserStuff::StackState.new(:tern)
253
- self.string_nest = 0
254
- self.paren_nest = 0
255
- self.brace_nest = 0
256
- self.lpar_beg = nil
282
+ def int_with_base base
283
+ rb_compile_error "Invalid numeric format" if matched =~ /__/
257
284
 
258
- @comments = []
285
+ self.yacc_value = matched.to_i(base)
286
+ return :tINTEGER
287
+ end
259
288
 
260
- reset
289
+ def is_arg?
290
+ in_lex_state? :expr_arg, :expr_cmdarg
261
291
  end
262
292
 
263
- def int_with_base base
264
- rb_compile_error "Invalid numeric format" if src.matched =~ /__/
293
+ def is_beg?
294
+ in_lex_state? :expr_beg, :expr_value, :expr_mid, :expr_class
295
+ end
265
296
 
266
- self.yacc_value = src.matched.to_i(base)
267
- return :tINTEGER
297
+ def is_end?
298
+ in_lex_state? :expr_end, :expr_endarg, :expr_endfn
268
299
  end
269
300
 
270
- def lex_state= o
271
- # warn "wtf lex_state = #{o.inspect} from #{caller.first}"
272
- raise "wtf\?" unless Symbol === o
273
- @lex_state = o
301
+ def is_label_possible? command_state
302
+ (in_lex_state?(:expr_beg, :expr_endfn) && !command_state) || is_arg?
303
+ end
304
+
305
+ def is_space_arg? c = "x"
306
+ is_arg? and space_seen and c !~ /\s/
274
307
  end
275
308
 
276
- attr_writer :lineno
277
309
  def lineno
278
- @lineno ||= src.lineno
310
+ @lineno ||= ss.lineno
311
+ end
312
+
313
+ def matched
314
+ ss.matched
279
315
  end
280
316
 
281
317
  ##
@@ -288,45 +324,45 @@ class RubyLexer
288
324
  self.lex_state = :expr_end
289
325
 
290
326
  case
291
- when src.scan(/[+-]?0[xXbBdD]\b/) then
327
+ when scan(/[+-]?0[xXbBdD]\b/) then
292
328
  rb_compile_error "Invalid numeric format"
293
- when src.scan(/[+-]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0[Dd][0-9_]+)/) then
329
+ when scan(/[+-]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0[Dd][0-9_]+)/) then
294
330
  int_with_base(10)
295
- when src.scan(/[+-]?0x[a-f0-9_]+/i) then
331
+ when scan(/[+-]?0x[a-f0-9_]+/i) then
296
332
  int_with_base(16)
297
- when src.scan(/[+-]?0[Bb][01_]+/) then
333
+ when scan(/[+-]?0[Bb][01_]+/) then
298
334
  int_with_base(2)
299
- when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
335
+ when scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
300
336
  rb_compile_error "Illegal octal digit."
301
- when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
337
+ when scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
302
338
  int_with_base(8)
303
- when src.scan(/[+-]?[\d_]+_(e|\.)/) then
339
+ when scan(/[+-]?[\d_]+_(e|\.)/) then
304
340
  rb_compile_error "Trailing '_' in number."
305
- when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
306
- number = src.matched
341
+ when scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
342
+ number = matched
307
343
  if number =~ /__/ then
308
344
  rb_compile_error "Invalid numeric format"
309
345
  end
310
346
  self.yacc_value = number.to_f
311
347
  :tFLOAT
312
- when src.scan(/[+-]?[0-9_]+(?![e])/) then
348
+ when scan(/[+-]?[0-9_]+(?![e])/) then
313
349
  int_with_base(10)
314
350
  else
315
351
  rb_compile_error "Bad number format"
316
352
  end
317
353
  end
318
354
 
319
- def parse_quote # 58 lines
355
+ def parse_quote # TODO: remove / rewrite
320
356
  beg, nnd, short_hand, c = nil, nil, false, nil
321
357
 
322
- if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
323
- rb_compile_error "unknown type of %string" if src.matched_size == 2
324
- c, beg, short_hand = src.matched, src.getch, false
358
+ if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
359
+ rb_compile_error "unknown type of %string" if ss.matched_size == 2
360
+ c, beg, short_hand = matched, ss.getch, false
325
361
  else # Short-hand (e.g. %{, %., %!, etc)
326
- c, beg, short_hand = 'Q', src.getch, true
362
+ c, beg, short_hand = 'Q', ss.getch, true
327
363
  end
328
364
 
329
- if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
365
+ if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
330
366
  rb_compile_error "unterminated quoted string meets end of file"
331
367
  end
332
368
 
@@ -334,19 +370,19 @@ class RubyLexer
334
370
  nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
335
371
  nnd, beg = beg, "\0" if nnd.nil?
336
372
 
337
- token_type, self.yacc_value = nil, "%#{c}#{beg}"
373
+ token_type, text = nil, "%#{c}#{beg}"
338
374
  token_type, string_type = case c
339
375
  when 'Q' then
340
376
  ch = short_hand ? nnd : c + beg
341
- self.yacc_value = "%#{ch}"
377
+ text = "%#{ch}"
342
378
  [:tSTRING_BEG, STR_DQUOTE]
343
379
  when 'q' then
344
380
  [:tSTRING_BEG, STR_SQUOTE]
345
381
  when 'W' then
346
- src.scan(/\s*/)
382
+ scan(/\s*/)
347
383
  [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
348
384
  when 'w' then
349
- src.scan(/\s*/)
385
+ scan(/\s*/)
350
386
  [:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
351
387
  when 'x' then
352
388
  [:tXSTRING_BEG, STR_XQUOTE]
@@ -356,20 +392,25 @@ class RubyLexer
356
392
  self.lex_state = :expr_fname
357
393
  [:tSYMBEG, STR_SSYM]
358
394
  when 'I' then
395
+ src.scan(/\s*/)
359
396
  [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
360
397
  when 'i' then
398
+ src.scan(/\s*/)
361
399
  [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
362
400
  end
363
401
 
364
- rb_compile_error "Bad %string type. Expected [Qq\Wwxrs], found '#{c}'." if
402
+ rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
365
403
  token_type.nil?
366
404
 
367
- self.lex_strterm = [:strterm, string_type, nnd, beg]
405
+ raise "huh" unless string_type
406
+
407
+ string string_type, nnd, beg
368
408
 
409
+ self.yacc_value = text
369
410
  return token_type
370
411
  end
371
412
 
372
- def parse_string(quote) # 65 lines
413
+ def parse_string quote # TODO: rewrite / remove
373
414
  _, string_type, term, open = quote
374
415
 
375
416
  space = false # FIX: remove these
@@ -381,41 +422,39 @@ class RubyLexer
381
422
  regexp = (func & STR_FUNC_REGEXP) != 0
382
423
  expand = (func & STR_FUNC_EXPAND) != 0
383
424
 
384
- unless func then # FIX: impossible, prolly needs == 0
425
+ unless func then # nil'ed from qwords below. *sigh*
385
426
  self.lineno = nil
386
427
  return :tSTRING_END
387
428
  end
388
429
 
389
- space = true if qwords and src.scan(/\s+/)
430
+ space = true if qwords and scan(/\s+/)
390
431
 
391
- if self.string_nest == 0 && src.scan(/#{term_re}/) then
432
+ if self.string_nest == 0 && scan(/#{term_re}/) then
392
433
  if qwords then
393
- quote[1] = nil # TODO: make struct
434
+ quote[1] = nil
394
435
  return :tSPACE
395
436
  elsif regexp then
396
- self.yacc_value = self.regx_options
397
437
  self.lineno = nil
438
+ self.yacc_value = self.regx_options
398
439
  return :tREGEXP_END
399
440
  else
400
- self.yacc_value = term
401
441
  self.lineno = nil
442
+ self.yacc_value = term
402
443
  return :tSTRING_END
403
444
  end
404
445
  end
405
446
 
406
- if space then
407
- return :tSPACE
408
- end
447
+ return :tSPACE if space
409
448
 
410
449
  self.string_buffer = []
411
450
 
412
451
  if expand
413
452
  case
414
- when src.scan(/#(?=[$@])/) then
453
+ when scan(/#(?=[$@])/) then
415
454
  return :tSTRING_DVAR
416
- when src.scan(/#[{]/) then
455
+ when scan(/#[{]/) then
417
456
  return :tSTRING_DBEG
418
- when src.scan(/#/) then
457
+ when scan(/#/) then
419
458
  string_buffer << '#'
420
459
  end
421
460
  end
@@ -429,71 +468,159 @@ class RubyLexer
429
468
  return :tSTRING_CONTENT
430
469
  end
431
470
 
471
+ def process_token command_state, last_state
472
+ token = self.token
473
+ token << matched if scan(/[\!\?](?!=)/)
474
+
475
+ tok_id =
476
+ case
477
+ when token =~ /[!?]$/ then
478
+ :tFID
479
+ when in_lex_state?(:expr_fname) && scan(/=(?:(?![~>=])|(?==>))/) then
480
+ # ident=, not =~ => == or followed by =>
481
+ # TODO test lexing of a=>b vs a==>b
482
+ token << matched
483
+ :tIDENTIFIER
484
+ when token =~ /^[A-Z]/ then
485
+ :tCONSTANT
486
+ else
487
+ :tIDENTIFIER
488
+ end
489
+
490
+ if !ruby18 and is_label_possible?(command_state) and scan(/:(?!:)/) then
491
+ return result(:expr_beg, :tLABEL, [token, ss.lineno]) # HACK: array? TODO: self.lineno
492
+ end
493
+
494
+ unless in_lex_state? :expr_dot then
495
+ # See if it is a reserved word.
496
+ keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
497
+ RubyParserStuff::Keyword.keyword18 token
498
+ else
499
+ RubyParserStuff::Keyword.keyword19 token
500
+ end
501
+
502
+ return process_token_keyword keyword if keyword
503
+ end # unless in_lex_state? :expr_dot
504
+
505
+ # TODO:
506
+ # if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
507
+
508
+ state = if is_beg? or is_arg? or in_lex_state? :expr_dot then
509
+ command_state ? :expr_cmdarg : :expr_arg
510
+ elsif not ruby18 and in_lex_state? :expr_fname then
511
+ :expr_endfn
512
+ else
513
+ :expr_end
514
+ end
515
+
516
+ if not [:expr_dot, :expr_fname].include? last_state and
517
+ self.parser.env[token.to_sym] == :lvar then
518
+ state = :expr_end
519
+ end
520
+
521
+ return result(state, tok_id, token)
522
+ end
523
+
524
+ def process_token_keyword keyword
525
+ state = keyword.state
526
+ value = [token, ss.lineno] # TODO: use self.lineno ?
527
+
528
+ self.command_start = true if state == :expr_beg and lex_state != :expr_fname
529
+
530
+ case
531
+ when lex_state == :expr_fname then
532
+ result(state, keyword.id0, keyword.name)
533
+ when keyword.id0 == :kDO then
534
+ case
535
+ when lpar_beg && lpar_beg == paren_nest then
536
+ self.lpar_beg = nil
537
+ self.paren_nest -= 1
538
+ result(state, :kDO_LAMBDA, value)
539
+ when cond.is_in_state then
540
+ result(state, :kDO_COND, value)
541
+ when cmdarg.is_in_state && lex_state != :expr_cmdarg then
542
+ result(state, :kDO_BLOCK, value)
543
+ when in_lex_state?(:expr_beg, :expr_endarg) then
544
+ result(state, :kDO_BLOCK, value)
545
+ else
546
+ result(state, :kDO, value)
547
+ end
548
+ when in_lex_state?(:expr_beg, :expr_value) then
549
+ result(state, keyword.id0, value)
550
+ when keyword.id0 != keyword.id1 then
551
+ result(:expr_beg, keyword.id1, value)
552
+ else
553
+ result(state, keyword.id1, value)
554
+ end
555
+ end
556
+
432
557
  def rb_compile_error msg
433
- msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
558
+ msg += ". near line #{self.lineno}: #{ss.rest[/^.*/].inspect}"
434
559
  raise RubyParser::SyntaxError, msg
435
560
  end
436
561
 
437
- def read_escape # 51 lines
562
+ def read_escape # TODO: remove / rewrite
438
563
  case
439
- when src.scan(/\\/) then # Backslash
564
+ when scan(/\\/) then # Backslash
440
565
  '\\'
441
- when src.scan(/n/) then # newline
566
+ when scan(/n/) then # newline
442
567
  "\n"
443
- when src.scan(/t/) then # horizontal tab
568
+ when scan(/t/) then # horizontal tab
444
569
  "\t"
445
- when src.scan(/r/) then # carriage-return
570
+ when scan(/r/) then # carriage-return
446
571
  "\r"
447
- when src.scan(/f/) then # form-feed
572
+ when scan(/f/) then # form-feed
448
573
  "\f"
449
- when src.scan(/v/) then # vertical tab
574
+ when scan(/v/) then # vertical tab
450
575
  "\13"
451
- when src.scan(/a/) then # alarm(bell)
576
+ when scan(/a/) then # alarm(bell)
452
577
  "\007"
453
- when src.scan(/e/) then # escape
578
+ when scan(/e/) then # escape
454
579
  "\033"
455
- when src.scan(/b/) then # backspace
580
+ when scan(/b/) then # backspace
456
581
  "\010"
457
- when src.scan(/s/) then # space
582
+ when scan(/s/) then # space
458
583
  " "
459
- when src.scan(/[0-7]{1,3}/) then # octal constant
460
- (src.matched.to_i(8) & 0xFF).chr
461
- when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
462
- src[1].to_i(16).chr
463
- when src.check(/M-\\[\\MCc]/) then
464
- src.scan(/M-\\/) # eat it
584
+ when scan(/[0-7]{1,3}/) then # octal constant
585
+ (matched.to_i(8) & 0xFF).chr
586
+ when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
587
+ ss[1].to_i(16).chr
588
+ when check(/M-\\[\\MCc]/) then
589
+ scan(/M-\\/) # eat it
465
590
  c = self.read_escape
466
591
  c[0] = (c[0].ord | 0x80).chr
467
592
  c
468
- when src.scan(/M-(.)/) then
469
- c = src[1]
593
+ when scan(/M-(.)/) then
594
+ c = ss[1]
470
595
  c[0] = (c[0].ord | 0x80).chr
471
596
  c
472
- when src.check(/(C-|c)\\[\\MCc]/) then
473
- src.scan(/(C-|c)\\/) # eat it
597
+ when check(/(C-|c)\\[\\MCc]/) then
598
+ scan(/(C-|c)\\/) # eat it
474
599
  c = self.read_escape
475
600
  c[0] = (c[0].ord & 0x9f).chr
476
601
  c
477
- when src.scan(/C-\?|c\?/) then
602
+ when scan(/C-\?|c\?/) then
478
603
  127.chr
479
- when src.scan(/(C-|c)(.)/) then
480
- c = src[2]
604
+ when scan(/(C-|c)(.)/) then
605
+ c = ss[2]
481
606
  c[0] = (c[0].ord & 0x9f).chr
482
607
  c
483
- when src.scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
484
- src.matched
485
- when src.scan(/[McCx0-9]/) || src.eos? then
608
+ when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
609
+ matched
610
+ when scan(/u([0-9a-fA-F]+|\{[0-9a-fA-F]+\})/) then
611
+ [ss[1].delete("{}").to_i(16)].pack("U")
612
+ when scan(/[McCx0-9]/) || end_of_stream? then
486
613
  rb_compile_error("Invalid escape character syntax")
487
614
  else
488
- src.getch
615
+ ss.getch
489
616
  end
490
617
  end
491
618
 
492
- def regx_options # 15 lines
619
+ def regx_options # TODO: rewrite / remove
493
620
  good, bad = [], []
494
621
 
495
- if src.scan(/[a-z]+/) then
496
- good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
622
+ if scan(/[a-z]+/) then
623
+ good, bad = matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
497
624
  end
498
625
 
499
626
  unless bad.empty? then
@@ -505,13 +632,30 @@ class RubyLexer
505
632
  end
506
633
 
507
634
  def reset
635
+ self.brace_nest = 0
508
636
  self.command_start = true
637
+ self.comments = []
638
+ self.lex_state = nil
509
639
  self.lex_strterm = nil
640
+ self.lineno = 1
641
+ self.lpar_beg = nil
642
+ self.paren_nest = 0
643
+ self.space_seen = false
644
+ self.string_nest = 0
510
645
  self.token = nil
511
646
  self.yacc_value = nil
512
647
 
513
- @src = nil
514
- @lex_state = nil
648
+ self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
649
+ self.cond = RubyParserStuff::StackState.new(:cond)
650
+
651
+ @src = nil
652
+ end
653
+
654
+ def result lex_state, token, text # :nodoc:
655
+ lex_state = self.arg_state if lex_state == :arg_state
656
+ self.lex_state = lex_state if lex_state
657
+ self.yacc_value = text
658
+ token
515
659
  end
516
660
 
517
661
  def ruby18
@@ -522,35 +666,52 @@ class RubyLexer
522
666
  Ruby19Parser === parser
523
667
  end
524
668
 
669
+ def scan re
670
+ ss.scan re
671
+ end
672
+
673
+ def space_vs_beginning space_type, beg_type, fallback
674
+ if is_space_arg? check(/./m) then
675
+ warning "`**' interpreted as argument prefix"
676
+ space_type
677
+ elsif is_beg? then
678
+ beg_type
679
+ else
680
+ # TODO: warn_balanced("**", "argument prefix");
681
+ fallback
682
+ end
683
+ end
684
+
685
+ def string type, beg = matched, nnd = "\0"
686
+ self.lex_strterm = [:strterm, type, beg, nnd]
687
+ end
688
+
525
689
  def src= src
526
690
  raise "bad src: #{src.inspect}" unless String === src
527
691
  @src = RPStringScanner.new(src)
528
692
  end
529
693
 
530
- def tokadd_escape term # 20 lines
694
+ def tokadd_escape term # TODO: rewrite / remove
531
695
  case
532
- when src.scan(/\\\n/) then
696
+ when scan(/\\\n/) then
533
697
  # just ignore
534
- when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
535
- self.string_buffer << src.matched
536
- when src.scan(/\\([MC]-|c)(?=\\)/) then
537
- self.string_buffer << src.matched
698
+ when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
699
+ self.string_buffer << matched
700
+ when scan(/\\([MC]-|c)(?=\\)/) then
701
+ self.string_buffer << matched
538
702
  self.tokadd_escape term
539
- when src.scan(/\\([MC]-|c)(.)/) then
540
- self.string_buffer << src.matched
541
- when src.scan(/\\[McCx]/) then
703
+ when scan(/\\([MC]-|c)(.)/) then
704
+ self.string_buffer << matched
705
+ when scan(/\\[McCx]/) then
542
706
  rb_compile_error "Invalid escape character syntax"
543
- when src.scan(/\\(.)/m) then
544
- self.string_buffer << src.matched
707
+ when scan(/\\(.)/m) then
708
+ self.string_buffer << matched
545
709
  else
546
710
  rb_compile_error "Invalid escape character syntax"
547
711
  end
548
712
  end
549
713
 
550
- @@regexp_cache = Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }
551
- @@regexp_cache[nil] = nil
552
-
553
- def tokadd_string(func, term, paren) # 105 lines
714
+ def tokadd_string(func, term, paren) # TODO: rewrite / remove
554
715
  qwords = (func & STR_FUNC_QWORDS) != 0
555
716
  escape = (func & STR_FUNC_ESCAPE) != 0
556
717
  expand = (func & STR_FUNC_EXPAND) != 0
@@ -560,49 +721,49 @@ class RubyLexer
560
721
  paren_re = @@regexp_cache[paren]
561
722
  term_re = @@regexp_cache[term]
562
723
 
563
- until src.eos? do
724
+ until end_of_stream? do
564
725
  c = nil
565
726
  handled = true
566
727
 
567
728
  case
568
- when paren_re && src.scan(paren_re) then
729
+ when paren_re && scan(paren_re) then
569
730
  self.string_nest += 1
570
- when src.scan(term_re) then
731
+ when scan(term_re) then
571
732
  if self.string_nest == 0 then
572
- src.pos -= 1
733
+ ss.pos -= 1
573
734
  break
574
735
  else
575
736
  self.string_nest -= 1
576
737
  end
577
- when expand && src.scan(/#(?=[\$\@\{])/) then
578
- src.pos -= 1
738
+ when expand && scan(/#(?=[\$\@\{])/) then
739
+ ss.pos -= 1
579
740
  break
580
- when qwords && src.scan(/\s/) then
581
- src.pos -= 1
741
+ when qwords && scan(/\s/) then
742
+ ss.pos -= 1
582
743
  break
583
- when expand && src.scan(/#(?!\n)/) then
744
+ when expand && scan(/#(?!\n)/) then
584
745
  # do nothing
585
- when src.check(/\\/) then
746
+ when check(/\\/) then
586
747
  case
587
- when qwords && src.scan(/\\\n/) then
748
+ when qwords && scan(/\\\n/) then
588
749
  string_buffer << "\n"
589
750
  next
590
- when qwords && src.scan(/\\\s/) then
751
+ when qwords && scan(/\\\s/) then
591
752
  c = ' '
592
- when expand && src.scan(/\\\n/) then
753
+ when expand && scan(/\\\n/) then
593
754
  next
594
- when regexp && src.check(/\\/) then
755
+ when regexp && check(/\\/) then
595
756
  self.tokadd_escape term
596
757
  next
597
- when expand && src.scan(/\\/) then
758
+ when expand && scan(/\\/) then
598
759
  c = self.read_escape
599
- when src.scan(/\\\n/) then
760
+ when scan(/\\\n/) then
600
761
  # do nothing
601
- when src.scan(/\\\\/) then
762
+ when scan(/\\\\/) then
602
763
  string_buffer << '\\' if escape
603
764
  c = '\\'
604
- when src.scan(/\\/) then
605
- unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
765
+ when scan(/\\/) then
766
+ unless scan(term_re) || paren.nil? || scan(paren_re) then
606
767
  string_buffer << "\\"
607
768
  end
608
769
  else
@@ -625,38 +786,22 @@ class RubyLexer
625
786
  /[^#{t}#{x}\#\0\\]+|./
626
787
  end
627
788
 
628
- src.scan re
629
- c = src.matched
789
+ scan re
790
+ c = matched
630
791
 
631
792
  rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
632
793
  end # unless handled
633
794
 
634
- c ||= src.matched
795
+ c ||= matched
635
796
  string_buffer << c
636
797
  end # until
637
798
 
638
- c ||= src.matched
639
- c = RubyLexer::EOF if src.eos?
799
+ c ||= matched
800
+ c = RubyLexer::EOF if end_of_stream?
640
801
 
641
802
  return c
642
803
  end
643
804
 
644
- ESCAPES = {
645
- "a" => "\007",
646
- "b" => "\010",
647
- "e" => "\033",
648
- "f" => "\f",
649
- "n" => "\n",
650
- "r" => "\r",
651
- "s" => " ",
652
- "t" => "\t",
653
- "v" => "\13",
654
- "\\" => '\\',
655
- "\n" => "",
656
- "C-\?" => 127.chr,
657
- "c\?" => 127.chr,
658
- }
659
-
660
805
  def unescape s
661
806
  r = ESCAPES[s]
662
807
 
@@ -675,6 +820,8 @@ class RubyLexer
675
820
  s
676
821
  when /^[McCx0-9]/ then
677
822
  rb_compile_error("Invalid escape character syntax")
823
+ when /u([0-9a-fA-F]+|\{[0-9a-fA-F]+\})/ then
824
+ [$1.delete("{}").to_i(16)].pack("U")
678
825
  else
679
826
  s
680
827
  end
@@ -691,11 +838,11 @@ class RubyLexer
691
838
  #
692
839
  # @return Description of the Returned Value
693
840
 
694
- def yylex # 826 lines
841
+ def yylex # 461 lines
695
842
  c = ''
696
843
  self.space_seen = false
697
844
  command_state = false
698
- src = self.src
845
+ ss = self.src
699
846
 
700
847
  self.token = nil
701
848
  self.yacc_value = nil
@@ -708,41 +855,42 @@ class RubyLexer
708
855
  last_state = lex_state
709
856
 
710
857
  loop do # START OF CASE
711
- if src.scan(/[\ \t\r\f\v]/) then # \s - \n + \v
858
+ if scan(/[\ \t\r\f\v]/) then # \s - \n + \v
712
859
  self.space_seen = true
713
860
  next
714
- elsif src.check(/[^a-zA-Z]/) then
715
- if src.scan(/\n|#/) then
861
+ elsif check(/[^a-zA-Z]/) then
862
+ if scan(/\n|\#/) then
716
863
  self.lineno = nil
717
- c = src.matched
864
+ c = matched
718
865
  if c == '#' then
719
- src.pos -= 1
866
+ ss.pos -= 1
720
867
 
721
- while src.scan(/\s*#.*(\n+|\z)/) do
722
- @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
868
+ while scan(/\s*#.*(\n+|\z)/) do
869
+ # TODO: self.lineno += matched.lines.to_a.size
870
+ @comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
723
871
  end
724
872
 
725
- return RubyLexer::EOF if src.eos?
873
+ return RubyLexer::EOF if end_of_stream?
726
874
  end
727
875
 
728
876
  # Replace a string of newlines with a single one
729
- src.scan(/\n+/)
877
+ scan(/\n+/)
730
878
 
731
879
  next if in_lex_state?(:expr_beg, :expr_value, :expr_class,
732
880
  :expr_fname, :expr_dot)
733
881
 
734
- if src.scan(/([\ \t\r\f\v]*)\./) then
735
- self.space_seen = true unless src[1].empty?
882
+ if scan(/([\ \t\r\f\v]*)\./) then
883
+ self.space_seen = true unless ss[1].empty?
736
884
 
737
- src.pos -= 1
738
- next unless src.check(/\.\./)
885
+ ss.pos -= 1
886
+ next unless check(/\.\./)
739
887
  end
740
888
 
741
889
  self.command_start = true
742
- self.lex_state = :expr_beg
743
- return :tNL
744
- elsif src.scan(/[\]\)\}]/) then
745
- if src.matched == "}" then
890
+
891
+ return result(:expr_beg, :tNL, nil)
892
+ elsif scan(/[\]\)\}]/) then
893
+ if matched == "}" then
746
894
  self.brace_nest -= 1
747
895
  else
748
896
  self.paren_nest -= 1
@@ -750,54 +898,34 @@ class RubyLexer
750
898
 
751
899
  cond.lexpop
752
900
  cmdarg.lexpop
753
- tern.lexpop
754
-
755
- self.lex_state = if src.matched == ")" then
756
- :expr_endfn
757
- else
758
- :expr_endarg
759
- end
760
901
 
761
- self.yacc_value = src.matched
762
- result = {
902
+ text = matched
903
+ state = text == ")" ? :expr_endfn : :expr_endarg
904
+ token = {
763
905
  ")" => :tRPAREN,
764
906
  "]" => :tRBRACK,
765
907
  "}" => :tRCURLY
766
- }[src.matched]
767
- return result
768
- elsif src.scan(/\!/) then
769
- if in_lex_state?(:expr_fname, :expr_dot) then
770
- self.lex_state = :expr_arg
771
-
772
- if src.scan(/@/) then
773
- self.yacc_value = "!@"
774
- return :tUBANG
775
- end
776
- else
777
- self.lex_state = :expr_beg
778
- end
908
+ }[text]
779
909
 
780
- if src.scan(/[=~]/) then
781
- self.yacc_value = "!#{src.matched}"
782
- else
783
- self.yacc_value = "!"
910
+ return result(state, token, text)
911
+ elsif scan(/\!/) then
912
+ if in_arg_state? then
913
+ return result(:expr_arg, :tUBANG, "!@") if scan(/@/)
784
914
  end
785
915
 
786
- return TOKENS[self.yacc_value]
787
- elsif src.scan(/\.\.\.?|,|![=~]?/) then
788
- self.lex_state = :expr_beg
789
- tok = self.yacc_value = src.matched
790
- return TOKENS[tok]
791
- elsif src.check(/\./) then
792
- if src.scan(/\.\d/) then
916
+ text = scan(/[=~]/) ? "!#{matched}" : "!"
917
+
918
+ return result(arg_state, TOKENS[text], text)
919
+ elsif scan(/\.\.\.?|,|![=~]?/) then
920
+ return result(:expr_beg, TOKENS[matched], matched)
921
+ elsif check(/\./) then
922
+ if scan(/\.\d/) then
793
923
  rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
794
- elsif src.scan(/\./) then
795
- self.lex_state = :expr_dot
796
- self.yacc_value = "."
797
- return :tDOT
924
+ elsif scan(/\./) then
925
+ return result(:expr_dot, :tDOT, ".")
798
926
  end
799
- elsif src.scan(/\(/) then
800
- result = if ruby18 then
927
+ elsif scan(/\(/) then
928
+ token = if ruby18 then
801
929
  yylex_paren18
802
930
  else
803
931
  yylex_paren19
@@ -805,259 +933,188 @@ class RubyLexer
805
933
 
806
934
  self.paren_nest += 1
807
935
 
808
- self.expr_beg_push "("
936
+ return expr_result(token, "(")
937
+ elsif check(/\=/) then
938
+ if scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
939
+ tok = matched
940
+ return result(:arg_state, TOKENS[tok], tok)
941
+ elsif beginning_of_line? and scan(/\=begin(?=\s)/) then
942
+ @comments << matched
809
943
 
810
- return result
811
- elsif src.check(/\=/) then
812
- if src.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
813
- self.fix_arg_lex_state
814
- tok = self.yacc_value = src.matched
815
- return TOKENS[tok]
816
- elsif src.scan(/\=begin(?=\s)/) then
817
- @comments << src.matched
818
-
819
- unless src.scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
944
+ unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
820
945
  @comments.clear
821
946
  rb_compile_error("embedded document meets end of file")
822
947
  end
823
948
 
824
- @comments << src.matched
949
+ @comments << matched
825
950
 
826
951
  next
952
+ elsif scan(/\=(?=begin\b)/) then # h[k]=begin ... end
953
+ tok = matched
954
+ return result(:arg_state, TOKENS[tok], tok)
827
955
  else
828
956
  raise "you shouldn't be able to get here"
829
957
  end
830
- elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/o) then
831
- self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
832
- self.lex_state = :expr_end
833
- return :tSTRING
834
- elsif src.scan(/\"/) then # FALLBACK
835
- self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\0"] # TODO: question this
836
- self.yacc_value = "\""
837
- return :tSTRING_BEG
838
- elsif src.scan(/\@\@?#{IDENT_CHAR_RE}+/o) then
839
- self.token = src.matched
840
-
841
- rb_compile_error "`#{token}` is not allowed as a variable name" if
842
- token =~ /\@\d/
843
-
844
- return process_token(command_state)
845
- elsif src.scan(/\:\:/) then
958
+ elsif scan(/\"(#{SIMPLE_STRING})\"/o) then
959
+ string = matched[1..-2].gsub(ESC) { unescape $1 }
960
+ return result(:expr_end, :tSTRING, string)
961
+ elsif scan(/\"/) then # FALLBACK
962
+ string STR_DQUOTE, '"' # TODO: question this
963
+ return result(nil, :tSTRING_BEG, '"')
964
+ elsif scan(/\@\@?#{IDENT_CHAR}+/o) then
965
+ self.token = matched
966
+
967
+ rb_compile_error "`#{self.token}` is not allowed as a variable name" if
968
+ self.token =~ /\@\d/
969
+
970
+ tok_id = matched =~ /^@@/ ? :tCVAR : :tIVAR
971
+ return result(:expr_end, tok_id, self.token)
972
+ elsif scan(/\:\:/) then
846
973
  if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
847
- self.lex_state = :expr_beg
848
- self.yacc_value = "::"
849
- return :tCOLON3
974
+ return result(:expr_beg, :tCOLON3, "::")
850
975
  end
851
976
 
852
- self.lex_state = :expr_dot
853
- self.yacc_value = "::"
854
- return :tCOLON2
855
- elsif ! is_end? && src.scan(/:([a-zA-Z_]#{IDENT_CHAR_RE}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
977
+ return result(:expr_dot, :tCOLON2, "::")
978
+ elsif ! is_end? && scan(/:([a-zA-Z_]#{IDENT_CHAR}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
856
979
  # scanning shortcut to symbols
857
- self.yacc_value = src[1]
858
- self.lex_state = :expr_end
859
- return :tSYMBOL
860
- elsif src.scan(/\:/) then
980
+ return result(:expr_end, :tSYMBOL, ss[1])
981
+ elsif ! is_end? && (scan(/\:\"(#{SIMPLE_STRING})\"/) ||
982
+ scan(/\:\'(#{SIMPLE_SSTRING})\'/)) then
983
+ symbol = ss[1].gsub(ESC) { unescape $1 }
984
+
985
+ rb_compile_error "symbol cannot contain '\\0'" if
986
+ ruby18 && symbol =~ /\0/
987
+
988
+ return result(:expr_end, :tSYMBOL, symbol)
989
+ elsif scan(/\:/) then
861
990
  # ?: / then / when
862
- if is_end? || src.check(/\s/) then
863
- self.lex_state = :expr_beg
991
+ if is_end? || check(/\s/) then
864
992
  # TODO warn_balanced(":", "symbol literal");
865
- self.yacc_value = ":"
866
- return :tCOLON
993
+ return result(:expr_beg, :tCOLON, ":")
867
994
  end
868
995
 
869
996
  case
870
- when src.scan(/\'/) then
871
- self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\0"]
872
- when src.scan(/\"/) then
873
- self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\0"]
997
+ when scan(/\'/) then
998
+ string STR_SSYM, matched
999
+ when scan(/\"/) then
1000
+ string STR_DSYM, matched
874
1001
  end
875
1002
 
876
- self.lex_state = :expr_fname
877
- self.yacc_value = ":"
878
- return :tSYMBEG
879
- elsif src.check(/[0-9]/) then
1003
+ return result(:expr_fname, :tSYMBEG, ":")
1004
+ elsif check(/[0-9]/) then
880
1005
  return parse_number
881
- elsif src.scan(/\[/) then
1006
+ elsif scan(/\[/) then
882
1007
  self.paren_nest += 1
883
1008
 
884
- result = src.matched
1009
+ token = nil
885
1010
 
886
1011
  if in_lex_state? :expr_fname, :expr_dot then
887
- self.lex_state = :expr_arg
888
1012
  case
889
- when src.scan(/\]\=/) then
1013
+ when scan(/\]\=/) then
890
1014
  self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
891
- self.yacc_value = "[]="
892
- return :tASET
893
- when src.scan(/\]/) then
1015
+ return result(:expr_arg, :tASET, "[]=")
1016
+ when scan(/\]/) then
894
1017
  self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
895
- self.yacc_value = "[]"
896
- return :tAREF
1018
+ return result(:expr_arg, :tAREF, "[]")
897
1019
  else
898
1020
  rb_compile_error "unexpected '['"
899
1021
  end
900
1022
  elsif is_beg? then
901
- self.tern.push false
902
- result = :tLBRACK
1023
+ token = :tLBRACK
903
1024
  elsif is_arg? && space_seen then
904
- self.tern.push false
905
- result = :tLBRACK
1025
+ token = :tLBRACK
906
1026
  else
907
- result = :tLBRACK2
1027
+ token = :tLBRACK2
908
1028
  end
909
1029
 
910
- self.expr_beg_push "["
911
-
912
- return result
913
- elsif src.scan(/\'(\\.|[^\'])*\'/) then
914
- self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") # "
915
- self.lex_state = :expr_end
916
- return :tSTRING
917
- elsif src.check(/\|/) then
918
- if src.scan(/\|\|\=/) then
919
- self.lex_state = :expr_beg
920
- self.yacc_value = "||"
921
- return :tOP_ASGN
922
- elsif src.scan(/\|\|/) then
923
- self.lex_state = :expr_beg
924
- self.yacc_value = "||"
925
- return :tOROP
926
- elsif src.scan(/\|\=/) then
927
- self.lex_state = :expr_beg
928
- self.yacc_value = "|"
929
- return :tOP_ASGN
930
- elsif src.scan(/\|/) then
931
- self.fix_arg_lex_state
932
- self.yacc_value = "|"
933
- return :tPIPE
1030
+ return expr_result(token, "[")
1031
+ elsif scan(/\'#{SIMPLE_SSTRING}\'/) then
1032
+ text = matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") # "
1033
+ return result(:expr_end, :tSTRING, text)
1034
+ elsif check(/\|/) then
1035
+ if scan(/\|\|\=/) then
1036
+ return result(:expr_beg, :tOP_ASGN, "||")
1037
+ elsif scan(/\|\|/) then
1038
+ return result(:expr_beg, :tOROP, "||")
1039
+ elsif scan(/\|\=/) then
1040
+ return result(:expr_beg, :tOP_ASGN, "|")
1041
+ elsif scan(/\|/) then
1042
+ return result(:arg_state, :tPIPE, "|")
934
1043
  end
935
- elsif src.scan(/\{/) then
1044
+ elsif scan(/\{/) then
936
1045
  self.brace_nest += 1
937
1046
  if lpar_beg && lpar_beg == paren_nest then
938
1047
  self.lpar_beg = nil
939
1048
  self.paren_nest -= 1
940
1049
 
941
- expr_beg_push "{"
942
-
943
- return :tLAMBEG
1050
+ return expr_result(:tLAMBEG, "{")
944
1051
  end
945
1052
 
946
- result = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
1053
+ token = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
947
1054
  :tLCURLY # block (primary)
948
1055
  elsif in_lex_state?(:expr_endarg) then
949
1056
  :tLBRACE_ARG # block (expr)
950
1057
  else
951
- self.tern.push false
952
1058
  :tLBRACE # hash
953
1059
  end
954
1060
 
955
- self.expr_beg_push "{"
956
- self.command_start = true unless result == :tLBRACE
1061
+ self.command_start = true unless token == :tLBRACE
957
1062
 
958
- return result
959
- elsif src.scan(/->/) then
960
- self.lex_state = :expr_endfn
961
- return :tLAMBDA
962
- elsif src.scan(/[+-]/) then
963
- sign = src.matched
1063
+ return expr_result(token, "{")
1064
+ elsif scan(/->/) then
1065
+ return result(:expr_endfn, :tLAMBDA, nil)
1066
+ elsif scan(/[+-]/) then
1067
+ sign = matched
964
1068
  utype, type = if sign == "+" then
965
1069
  [:tUPLUS, :tPLUS]
966
1070
  else
967
1071
  [:tUMINUS, :tMINUS]
968
1072
  end
969
1073
 
970
- if in_lex_state? :expr_fname, :expr_dot then
971
- self.lex_state = :expr_arg
972
- if src.scan(/@/) then
973
- self.yacc_value = "#{sign}@"
974
- return utype
1074
+ if in_arg_state? then
1075
+ if scan(/@/) then
1076
+ return result(:expr_arg, utype, "#{sign}@")
975
1077
  else
976
- self.yacc_value = sign
977
- return type
1078
+ return result(:expr_arg, type, sign)
978
1079
  end
979
1080
  end
980
1081
 
981
- if src.scan(/\=/) then
982
- self.lex_state = :expr_beg
983
- self.yacc_value = sign
984
- return :tOP_ASGN
985
- end
1082
+ return result(:expr_beg, :tOP_ASGN, sign) if scan(/\=/)
986
1083
 
987
- if (is_beg? || (is_arg? && space_seen && !src.check(/\s/))) then
988
- if is_arg? then
989
- arg_ambiguous
990
- end
1084
+ if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
1085
+ arg_ambiguous if is_arg?
991
1086
 
992
- self.lex_state = :expr_beg
993
- self.yacc_value = sign
994
-
995
- if src.check(/\d/) then
996
- if utype == :tUPLUS then
997
- return self.parse_number
998
- else
999
- return :tUMINUS_NUM
1000
- end
1087
+ if check(/\d/) then
1088
+ return self.parse_number if utype == :tUPLUS
1089
+ return result(:expr_beg, :tUMINUS_NUM, sign)
1001
1090
  end
1002
1091
 
1003
- return utype
1092
+ return result(:expr_beg, utype, sign)
1004
1093
  end
1005
1094
 
1006
- self.lex_state = :expr_beg
1007
- self.yacc_value = sign
1008
- return type
1009
- elsif src.check(/\*/) then
1010
- if src.scan(/\*\*=/) then
1011
- self.lex_state = :expr_beg
1012
- self.yacc_value = "**"
1013
- return :tOP_ASGN
1014
- elsif src.scan(/\*\*/) then
1015
- result = if is_space_arg? src.check(/./m) then
1016
- warning "`**' interpreted as argument prefix"
1017
- :tDSTAR
1018
- elsif is_beg? then
1019
- :tDSTAR
1020
- else
1021
- # TODO: warn_balanced("**", "argument prefix");
1022
- :tPOW
1023
- end
1024
- self.yacc_value = "**"
1025
- self.fix_arg_lex_state
1026
- return result
1027
- elsif src.scan(/\*\=/) then
1028
- self.lex_state = :expr_beg
1029
- self.yacc_value = "*"
1030
- return :tOP_ASGN
1031
- elsif src.scan(/\*/) then
1032
- result = if is_space_arg? src.check(/./m) then
1033
- warning("`*' interpreted as argument prefix")
1034
- :tSTAR
1035
- elsif is_beg? then
1036
- :tSTAR
1037
- else
1038
- # TODO: warn_balanced("*", "argument prefix");
1039
- :tSTAR2 # TODO: rename
1040
- end
1095
+ return result(:expr_beg, type, sign)
1096
+ elsif check(/\*/) then
1097
+ if scan(/\*\*=/) then
1098
+ return result(:expr_beg, :tOP_ASGN, "**")
1099
+ elsif scan(/\*\*/) then
1100
+ token = space_vs_beginning :tDSTAR, :tDSTAR, :tPOW
1101
+
1102
+ return result(:arg_state, token, "**")
1103
+ elsif scan(/\*\=/) then
1104
+ return result(:expr_beg, :tOP_ASGN, "*")
1105
+ elsif scan(/\*/) then
1106
+ token = space_vs_beginning :tSTAR, :tSTAR, :tSTAR2
1041
1107
 
1042
- self.yacc_value = "*"
1043
- self.fix_arg_lex_state
1044
- return result
1108
+ return result(:arg_state, token, "*")
1045
1109
  end
1046
- elsif src.check(/\</) then
1047
- if src.scan(/\<\=\>/) then
1048
- self.fix_arg_lex_state
1049
- self.yacc_value = "<=>"
1050
- return :tCMP
1051
- elsif src.scan(/\<\=/) then
1052
- self.fix_arg_lex_state
1053
- self.yacc_value = "<="
1054
- return :tLEQ
1055
- elsif src.scan(/\<\<\=/) then
1056
- self.fix_arg_lex_state
1057
- self.lex_state = :expr_beg
1058
- self.yacc_value = "\<\<"
1059
- return :tOP_ASGN
1060
- elsif src.scan(/\<\</) then
1110
+ elsif check(/\</) then
1111
+ if scan(/\<\=\>/) then
1112
+ return result(:arg_state, :tCMP, "<=>")
1113
+ elsif scan(/\<\=/) then
1114
+ return result(:arg_state, :tLEQ, "<=")
1115
+ elsif scan(/\<\<\=/) then
1116
+ return result(:arg_state, :tOP_ASGN, "<<")
1117
+ elsif scan(/\<\</) then
1061
1118
  if (!in_lex_state?(:expr_dot, :expr_class) &&
1062
1119
  !is_end? &&
1063
1120
  (!is_arg? || space_seen)) then
@@ -1065,70 +1122,49 @@ class RubyLexer
1065
1122
  return tok if tok
1066
1123
  end
1067
1124
 
1068
- self.fix_arg_lex_state
1069
- self.yacc_value = "\<\<"
1070
- return :tLSHFT
1071
- elsif src.scan(/\</) then
1072
- self.fix_arg_lex_state
1073
- self.yacc_value = "<"
1074
- return :tLT
1125
+ return result(:arg_state, :tLSHFT, "\<\<")
1126
+ elsif scan(/\</) then
1127
+ return result(:arg_state, :tLT, "<")
1075
1128
  end
1076
- elsif src.check(/\>/) then
1077
- if src.scan(/\>\=/) then
1078
- self.fix_arg_lex_state
1079
- self.yacc_value = ">="
1080
- return :tGEQ
1081
- elsif src.scan(/\>\>=/) then
1082
- self.fix_arg_lex_state
1083
- self.lex_state = :expr_beg
1084
- self.yacc_value = ">>"
1085
- return :tOP_ASGN
1086
- elsif src.scan(/\>\>/) then
1087
- self.fix_arg_lex_state
1088
- self.yacc_value = ">>"
1089
- return :tRSHFT
1090
- elsif src.scan(/\>/) then
1091
- self.fix_arg_lex_state
1092
- self.yacc_value = ">"
1093
- return :tGT
1129
+ elsif check(/\>/) then
1130
+ if scan(/\>\=/) then
1131
+ return result(:arg_state, :tGEQ, ">=")
1132
+ elsif scan(/\>\>=/) then
1133
+ return result(:arg_state, :tOP_ASGN, ">>")
1134
+ elsif scan(/\>\>/) then
1135
+ return result(:arg_state, :tRSHFT, ">>")
1136
+ elsif scan(/\>/) then
1137
+ return result(:arg_state, :tGT, ">")
1094
1138
  end
1095
- elsif src.scan(/\`/) then
1096
- self.yacc_value = "`"
1139
+ elsif scan(/\`/) then
1097
1140
  case lex_state
1098
1141
  when :expr_fname then
1099
- self.lex_state = :expr_end
1100
- return :tBACK_REF2
1142
+ return result(:expr_end, :tBACK_REF2, "`")
1101
1143
  when :expr_dot then
1102
- self.lex_state = if command_state then
1103
- :expr_cmdarg
1104
- else
1105
- :expr_arg
1106
- end
1107
- return :tBACK_REF2
1144
+ state = command_state ? :expr_cmdarg : :expr_arg
1145
+ return result(state, :tBACK_REF2, "`")
1146
+ else
1147
+ string STR_XQUOTE, '`'
1148
+ return result(nil, :tXSTRING_BEG, "`")
1108
1149
  end
1109
- self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"]
1110
- return :tXSTRING_BEG
1111
- elsif src.scan(/\?/) then
1112
-
1150
+ elsif scan(/\?/) then
1113
1151
  if is_end? then
1114
- self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
1115
- self.tern.push true
1116
- self.yacc_value = "?"
1117
- return :tEH
1152
+ state = ruby18 ? :expr_beg : :expr_value # HACK?
1153
+ return result(state, :tEH, "?")
1118
1154
  end
1119
1155
 
1120
- if src.eos? then
1156
+ if end_of_stream? then
1121
1157
  rb_compile_error "incomplete character syntax"
1122
1158
  end
1123
1159
 
1124
- if src.check(/\s|\v/) then
1160
+ if check(/\s|\v/) then
1125
1161
  unless is_arg? then
1126
1162
  c2 = { " " => 's',
1127
1163
  "\n" => 'n',
1128
1164
  "\t" => 't',
1129
1165
  "\v" => 'v',
1130
1166
  "\r" => 'r',
1131
- "\f" => 'f' }[src.matched]
1167
+ "\f" => 'f' }[matched]
1132
1168
 
1133
1169
  if c2 then
1134
1170
  warning("invalid character syntax; use ?\\" + c2)
@@ -1136,47 +1172,32 @@ class RubyLexer
1136
1172
  end
1137
1173
 
1138
1174
  # ternary
1139
- self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
1140
- self.tern.push true
1141
- self.yacc_value = "?"
1142
- return :tEH
1143
- elsif src.check(/\w(?=\w)/) then # ternary, also
1144
- self.lex_state = :expr_beg
1145
- self.tern.push true
1146
- self.yacc_value = "?"
1147
- return :tEH
1175
+ state = ruby18 ? :expr_beg : :expr_value # HACK?
1176
+ return result(state, :tEH, "?")
1177
+ elsif check(/\w(?=\w)/) then # ternary, also
1178
+ return result(:expr_beg, :tEH, "?")
1148
1179
  end
1149
1180
 
1150
- c = if src.scan(/\\/) then
1181
+ c = if scan(/\\/) then
1151
1182
  self.read_escape
1152
1183
  else
1153
- src.getch
1184
+ ss.getch
1154
1185
  end
1155
- self.lex_state = :expr_end
1156
1186
 
1157
1187
  if version == 18 then
1158
- self.yacc_value = c[0].ord & 0xff
1159
- return :tINTEGER
1188
+ return result(:expr_end, :tINTEGER, c[0].ord & 0xff)
1160
1189
  else
1161
- self.yacc_value = c
1162
- return :tSTRING
1190
+ return result(:expr_end, :tSTRING, c)
1163
1191
  end
1164
- elsif src.check(/\&/) then
1165
- if src.scan(/\&\&\=/) then
1166
- self.yacc_value = "&&"
1167
- self.lex_state = :expr_beg
1168
- return :tOP_ASGN
1169
- elsif src.scan(/\&\&/) then
1170
- self.lex_state = :expr_beg
1171
- self.yacc_value = "&&"
1172
- return :tANDOP
1173
- elsif src.scan(/\&\=/) then
1174
- self.yacc_value = "&"
1175
- self.lex_state = :expr_beg
1176
- return :tOP_ASGN
1177
- elsif src.scan(/&/) then
1178
- result = if is_arg? && space_seen &&
1179
- !src.check(/\s/) then
1192
+ elsif check(/\&/) then
1193
+ if scan(/\&\&\=/) then
1194
+ return result(:expr_beg, :tOP_ASGN, "&&")
1195
+ elsif scan(/\&\&/) then
1196
+ return result(:expr_beg, :tANDOP, "&&")
1197
+ elsif scan(/\&\=/) then
1198
+ return result(:expr_beg, :tOP_ASGN, "&")
1199
+ elsif scan(/&/) then
1200
+ token = if is_arg? && space_seen && !check(/\s/) then
1180
1201
  warning("`&' interpreted as argument prefix")
1181
1202
  :tAMPER
1182
1203
  elsif in_lex_state? :expr_beg, :expr_mid then
@@ -1185,170 +1206,121 @@ class RubyLexer
1185
1206
  :tAMPER2
1186
1207
  end
1187
1208
 
1188
- self.fix_arg_lex_state
1189
- self.yacc_value = "&"
1190
- return result
1209
+ return result(:arg_state, token, "&")
1191
1210
  end
1192
- elsif src.scan(/\//) then
1211
+ elsif scan(/\//) then
1193
1212
  if is_beg? then
1194
- self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1195
- self.yacc_value = "/"
1196
- return :tREGEXP_BEG
1213
+ string STR_REGEXP, '/'
1214
+ return result(nil, :tREGEXP_BEG, "/")
1197
1215
  end
1198
1216
 
1199
- if src.scan(/\=/) then
1200
- self.yacc_value = "/"
1201
- self.lex_state = :expr_beg
1202
- return :tOP_ASGN
1217
+ if scan(/\=/) then
1218
+ return result(:expr_beg, :tOP_ASGN, "/")
1203
1219
  end
1204
1220
 
1205
1221
  if is_arg? && space_seen then
1206
- unless src.scan(/\s/) then
1222
+ unless scan(/\s/) then
1207
1223
  arg_ambiguous
1208
- self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1209
- self.yacc_value = "/"
1210
- return :tREGEXP_BEG
1224
+ string STR_REGEXP, '/'
1225
+ return result(nil, :tREGEXP_BEG, "/")
1211
1226
  end
1212
1227
  end
1213
1228
 
1214
- self.fix_arg_lex_state
1215
- self.yacc_value = "/"
1216
-
1217
- return :tDIVIDE
1218
- elsif src.scan(/\^=/) then
1219
- self.lex_state = :expr_beg
1220
- self.yacc_value = "^"
1221
- return :tOP_ASGN
1222
- elsif src.scan(/\^/) then
1223
- self.fix_arg_lex_state
1224
- self.yacc_value = "^"
1225
- return :tCARET
1226
- elsif src.scan(/\;/) then
1229
+ return result(:arg_state, :tDIVIDE, "/")
1230
+ elsif scan(/\^=/) then
1231
+ return result(:expr_beg, :tOP_ASGN, "^")
1232
+ elsif scan(/\^/) then
1233
+ return result(:arg_state, :tCARET, "^")
1234
+ elsif scan(/\;/) then
1227
1235
  self.command_start = true
1228
- self.lex_state = :expr_beg
1229
- self.yacc_value = ";"
1230
- return :tSEMI
1231
- elsif src.scan(/\~/) then
1232
- if in_lex_state? :expr_fname, :expr_dot then
1233
- src.scan(/@/)
1234
- end
1235
-
1236
- self.fix_arg_lex_state
1237
- self.yacc_value = "~"
1238
-
1239
- return :tTILDE
1240
- elsif src.scan(/\\/) then
1241
- if src.scan(/\r?\n/) then
1236
+ return result(:expr_beg, :tSEMI, ";")
1237
+ elsif scan(/\~/) then
1238
+ scan(/@/) if in_lex_state? :expr_fname, :expr_dot
1239
+ return result(:arg_state, :tTILDE, "~")
1240
+ elsif scan(/\\/) then
1241
+ if scan(/\r?\n/) then
1242
1242
  self.lineno = nil
1243
1243
  self.space_seen = true
1244
1244
  next
1245
1245
  end
1246
1246
  rb_compile_error "bare backslash only allowed before newline"
1247
- elsif src.scan(/\%/) then
1248
- if is_beg? then
1249
- return parse_quote
1250
- end
1251
-
1252
- if src.scan(/\=/) then
1253
- self.lex_state = :expr_beg
1254
- self.yacc_value = "%"
1255
- return :tOP_ASGN
1256
- end
1257
-
1258
- return parse_quote if is_arg? && space_seen && ! src.check(/\s/)
1259
-
1260
- self.fix_arg_lex_state
1261
- self.yacc_value = "%"
1262
-
1263
- return :tPERCENT
1264
- elsif src.check(/\$/) then
1265
- if src.scan(/(\$_)(\w+)/) then
1266
- self.lex_state = :expr_end
1267
- self.token = src.matched
1268
- return process_token(command_state)
1269
- elsif src.scan(/\$_/) then
1270
- self.lex_state = :expr_end
1271
- self.token = src.matched
1272
- self.yacc_value = src.matched
1273
- return :tGVAR
1274
- elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1275
- self.lex_state = :expr_end
1276
- self.yacc_value = src.matched
1277
- return :tGVAR
1278
- elsif src.scan(/\$([\&\`\'\+])/) then
1279
- self.lex_state = :expr_end
1247
+ elsif scan(/\%/) then
1248
+ return parse_quote if is_beg?
1249
+
1250
+ return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)
1251
+
1252
+ return parse_quote if is_arg? && space_seen && ! check(/\s/)
1253
+
1254
+ return result(:arg_state, :tPERCENT, "%")
1255
+ elsif check(/\$/) then
1256
+ if scan(/(\$_)(\w+)/) then
1257
+ self.token = matched
1258
+ return result(:expr_end, :tGVAR, matched)
1259
+ elsif scan(/\$_/) then
1260
+ return result(:expr_end, :tGVAR, matched)
1261
+ elsif scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1262
+ return result(:expr_end, :tGVAR, matched)
1263
+ elsif scan(/\$([\&\`\'\+])/) then
1280
1264
  # Explicit reference to these vars as symbols...
1281
- if last_state == :expr_fname then
1282
- self.yacc_value = src.matched
1283
- return :tGVAR
1265
+ if lex_state == :expr_fname then
1266
+ return result(:expr_end, :tGVAR, matched)
1284
1267
  else
1285
- self.yacc_value = src[1].to_sym
1286
- return :tBACK_REF
1268
+ return result(:expr_end, :tBACK_REF, ss[1].to_sym)
1287
1269
  end
1288
- elsif src.scan(/\$([1-9]\d*)/) then
1289
- self.lex_state = :expr_end
1290
- if last_state == :expr_fname then
1291
- self.yacc_value = src.matched
1292
- return :tGVAR
1270
+ elsif scan(/\$([1-9]\d*)/) then
1271
+ if lex_state == :expr_fname then
1272
+ return result(:expr_end, :tGVAR, matched)
1293
1273
  else
1294
- self.yacc_value = src[1].to_i
1295
- return :tNTH_REF
1274
+ return result(:expr_end, :tNTH_REF, ss[1].to_i)
1296
1275
  end
1297
- elsif src.scan(/\$0/) then
1298
- self.lex_state = :expr_end
1299
- self.token = src.matched
1300
- return process_token(command_state)
1301
- elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
1302
- self.lex_state = :expr_end
1303
- self.yacc_value = "$"
1304
- return "$"
1305
- elsif src.scan(/\$\w+/)
1306
- self.lex_state = :expr_end
1307
- self.token = src.matched
1308
- return process_token(command_state)
1276
+ elsif scan(/\$0/) then
1277
+ return result(:expr_end, :tGVAR, matched)
1278
+ elsif scan(/\$\W|\$\z/) then # TODO: remove?
1279
+ return result(:expr_end, "$", "$") # FIX: "$"??
1280
+ elsif scan(/\$\w+/)
1281
+ return result(:expr_end, :tGVAR, matched)
1309
1282
  end
1310
- elsif src.check(/\_/) then
1311
- if src.beginning_of_line? && src.scan(/\__END__(\r?\n|\Z)/) then
1283
+ elsif check(/\_/) then
1284
+ if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
1312
1285
  self.lineno = nil
1313
1286
  return RubyLexer::EOF
1314
- elsif src.scan(/\_\w*/) then
1315
- self.token = src.matched
1316
- return process_token(command_state)
1287
+ elsif scan(/\_\w*/) then
1288
+ self.token = matched
1289
+ return process_token command_state, last_state
1317
1290
  end
1318
1291
  end
1319
1292
  end # END OF CASE
1320
1293
 
1321
- if src.scan(/\004|\032|\000/) || src.eos? then # ^D, ^Z, EOF
1294
+ if scan(/\004|\032|\000/) || end_of_stream? then # ^D, ^Z, EOF
1322
1295
  return RubyLexer::EOF
1323
1296
  else # alpha check
1324
- rb_compile_error "Invalid char #{src.rest[0].chr} in expression" unless
1325
- src.check IDENT_RE
1297
+ rb_compile_error "Invalid char #{ss.rest[0].chr} in expression" unless
1298
+ check IDENT
1326
1299
  end
1327
1300
 
1328
- self.token = src.matched if self.src.scan IDENT_RE
1301
+ self.token = matched if self.scan IDENT
1329
1302
 
1330
- return process_token(command_state)
1303
+ return process_token command_state, last_state
1331
1304
  end
1332
1305
  end
1333
1306
 
1334
1307
  def yylex_paren18
1335
1308
  self.command_start = true
1336
- result = :tLPAREN2
1309
+ token = :tLPAREN2
1337
1310
 
1338
1311
  if in_lex_state? :expr_beg, :expr_mid then
1339
- result = :tLPAREN
1312
+ token = :tLPAREN
1340
1313
  elsif space_seen then
1341
1314
  if in_lex_state? :expr_cmdarg then
1342
- result = :tLPAREN_ARG
1315
+ token = :tLPAREN_ARG
1343
1316
  elsif in_lex_state? :expr_arg then
1344
- self.tern.push false
1345
1317
  warning "don't put space before argument parentheses"
1346
1318
  end
1347
1319
  else
1348
- self.tern.push false
1320
+ # not a ternary -- do nothing?
1349
1321
  end
1350
1322
 
1351
- result
1323
+ token
1352
1324
  end
1353
1325
 
1354
1326
  def yylex_paren19
@@ -1361,146 +1333,7 @@ class RubyLexer
1361
1333
  end
1362
1334
  end
1363
1335
 
1364
- def is_arg?
1365
- in_lex_state? :expr_arg, :expr_cmdarg
1366
- end
1367
-
1368
- def is_end?
1369
- in_lex_state? :expr_end, :expr_endarg, :expr_endfn
1370
- end
1371
-
1372
- def is_beg?
1373
- in_lex_state? :expr_beg, :expr_value, :expr_mid, :expr_class
1374
- end
1375
-
1376
- # TODO #define IS_AFTER_OPERATOR() IS_lex_state(EXPR_FNAME | EXPR_DOT)
1377
-
1378
- def is_space_arg? c = "x"
1379
- is_arg? and space_seen and c !~ /\s/
1380
- end
1381
-
1382
- def is_label_possible? command_state
1383
- (in_lex_state?(:expr_beg) && !command_state) || is_arg?
1384
- end
1385
-
1386
- def process_token(command_state)
1387
- token << src.matched if token =~ IDENT_RE && src.scan(/[\!\?](?!=)/)
1388
-
1389
- result = nil
1390
- last_state = lex_state
1391
-
1392
- case token
1393
- when /^\$/ then
1394
- self.lex_state, result = :expr_end, :tGVAR
1395
- when /^@@/ then
1396
- self.lex_state, result = :expr_end, :tCVAR
1397
- when /^@/ then
1398
- self.lex_state, result = :expr_end, :tIVAR
1399
- else
1400
- if token =~ /[!?]$/ then
1401
- result = :tFID
1402
- else
1403
- if in_lex_state? :expr_fname then
1404
- # ident=, not =~ => == or followed by =>
1405
- # TODO test lexing of a=>b vs a==>b
1406
- if src.scan(/=(?:(?![~>=])|(?==>))/) then
1407
- result = :tIDENTIFIER
1408
- token << src.matched
1409
- end
1410
- end
1411
-
1412
- result ||= if token =~ /^[A-Z]/ then
1413
- :tCONSTANT
1414
- else
1415
- :tIDENTIFIER
1416
- end
1417
- end
1418
-
1419
- unless ruby18
1420
- if is_label_possible? command_state then
1421
- colon = src.scan(/:/)
1422
-
1423
- if colon && src.peek(1) != ":" then
1424
- self.lex_state = :expr_beg
1425
- self.yacc_value = [token, src.lineno]
1426
- return :tLABEL
1427
- end
1428
-
1429
- src.unscan if colon
1430
- end
1431
- end
1432
-
1433
- unless in_lex_state? :expr_dot then
1434
- # See if it is a reserved word.
1435
- keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
1436
- RubyParserStuff::Keyword.keyword18 token
1437
- else
1438
- RubyParserStuff::Keyword.keyword19 token
1439
- end
1440
-
1441
- if keyword then
1442
- state = lex_state
1443
- self.lex_state = keyword.state
1444
- self.yacc_value = [token, src.lineno]
1445
-
1446
- if state == :expr_fname then
1447
- self.yacc_value = keyword.name
1448
- return keyword.id0
1449
- end
1450
-
1451
- self.command_start = true if lex_state == :expr_beg
1452
-
1453
- if keyword.id0 == :kDO then
1454
- if lpar_beg && lpar_beg == paren_nest then
1455
- self.lpar_beg = nil
1456
- self.paren_nest -= 1
1457
-
1458
- return :kDO_LAMBDA
1459
- end
1460
-
1461
- return :kDO_COND if cond.is_in_state
1462
- return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
1463
- return :kDO_BLOCK if [:expr_beg, :expr_endarg].include? state
1464
- return :kDO
1465
- end
1466
-
1467
- return keyword.id0 if [:expr_beg, :expr_value].include? state
1468
-
1469
- self.lex_state = :expr_beg if keyword.id0 != keyword.id1
1470
-
1471
- return keyword.id1
1472
- end
1473
- end
1474
-
1475
- # TODO:
1476
- # if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
1477
-
1478
- self.lex_state =
1479
- if is_beg? || is_arg? || in_lex_state?(:expr_dot) then
1480
- if command_state then
1481
- :expr_cmdarg
1482
- else
1483
- :expr_arg
1484
- end
1485
- elsif !ruby18 && in_lex_state?(:expr_fname) then
1486
- :expr_endfn
1487
- else
1488
- :expr_end
1489
- end
1490
-
1491
- end
1492
-
1493
- self.yacc_value = token
1494
-
1495
- if (![:expr_dot, :expr_fname].include?(last_state) &&
1496
- self.parser.env[token.to_sym] == :lvar) then
1497
- self.lex_state = :expr_end
1498
- end
1499
-
1500
- return result
1501
- end
1502
-
1503
- def yylex_string # 23 lines
1336
+ def yylex_string # TODO: rewrite / remove
1504
1337
  token = if lex_strterm[0] == :heredoc then
1505
1338
  self.heredoc lex_strterm
1506
1339
  else