ruby_parser 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ruby_parser might be problematic. Click here for more details.

data/lib/ruby_lexer.rb CHANGED
@@ -1,2751 +1,1332 @@
1
- require 'pp'
2
- require 'stringio'
3
- require 'racc/parser'
4
1
  $: << File.expand_path("~/Work/p4/zss/src/ParseTree/dev/lib") # for me, not you.
5
2
  require 'sexp'
3
+ require 'ruby_parser_extras'
6
4
 
7
- ############################################################
8
- # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
9
-
10
- class Module
11
- def kill *methods
12
- methods.each do |method|
13
- define_method method do |*args|
14
- c = caller
15
- raise "#{method} is dead - called from #{c[0]}"
16
- end
17
- end
18
- end
19
- end
20
-
21
- # END HACK
22
- ############################################################
23
-
24
- class RubyParser < Racc::Parser
25
- VERSION = '1.0.0'
26
-
27
- attr_accessor :lexer, :in_def, :in_single, :file
28
- attr_reader :env, :warnings
5
+ class RubyLexer
6
+ attr_accessor :command_start
7
+ attr_accessor :cmdarg
8
+ attr_accessor :cond
9
+ attr_accessor :nest
29
10
 
30
- def initialize
31
- super
32
- self.lexer = RubyLexer.new
33
- self.in_def = false
34
- self.in_single = 0
35
- @env = Environment.new
36
- end
11
+ ESC_RE = /\\([0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-.|(C-|c)\?|(C-|c).|[^0-7xMCc])/
37
12
 
38
- alias :old_yyerror :yyerror
39
- def yyerror msg=nil
40
- warn msg if msg
41
- old_yyerror
42
- end
13
+ # Additional context surrounding tokens that both the lexer and
14
+ # grammar use.
15
+ attr_reader :lex_state
43
16
 
44
- def parse(str, file = "(string)")
45
- raise "bad val: #{str.inspect}" unless String === str
17
+ attr_accessor :lex_strterm
46
18
 
47
- self.file = file
48
- self.lexer.src = StringIO.new(str)
19
+ attr_accessor :parser # HACK for very end of lexer... *sigh*
49
20
 
50
- @yydebug = ENV.has_key? 'DEBUG'
21
+ # Stream of data that yylex examines.
22
+ attr_reader :src
51
23
 
52
- do_parse
53
- end
24
+ # Last token read via yylex.
25
+ attr_accessor :token
54
26
 
55
- def do_parse
56
- _racc_do_parse_rb(_racc_setup, false)
57
- end
27
+ attr_accessor :string_buffer
58
28
 
59
- def yyparse(recv, mid)
60
- _racc_yyparse_rb(recv, mid, _racc_setup, true)
61
- end
29
+ # Value of last token which had a value associated with it.
30
+ attr_accessor :yacc_value
62
31
 
63
- def on_error( error_token_id, error_value, value_stack )
64
- p :error => [ error_token_id, error_value, value_stack ]
65
- raise "boom"
66
- end if ENV["DEBUG"]
32
+ # What handles warnings
33
+ attr_accessor :warnings
67
34
 
68
- def next_token
69
- if self.lexer.advance then
70
- [self.lexer.token, self.lexer.yacc_value]
71
- else
72
- return [false, '$end']
73
- end
74
- end
35
+ EOF = :eof_haha!
75
36
 
76
- def assignable(lhs, value = nil)
77
- id = lhs.to_sym
78
- id = id.to_sym if Token === id
79
-
80
- raise SyntaxError, "Can't change the value of #{id}" if
81
- id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
82
-
83
- result = case id.to_s
84
- when /^@@/ then
85
- asgn = in_def || in_single > 0
86
- s((asgn ? :cvasgn : :cvdecl), id)
87
- when /^@/ then
88
- s(:iasgn, id)
89
- when /^\$/ then
90
- s(:gasgn, id)
91
- when /^[A-Z]/ then
92
- s(:cdecl, id)
93
- else
37
+ # ruby constants for strings (should this be moved somewhere else?)
38
+ STR_FUNC_BORING = 0x00
39
+ STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
40
+ STR_FUNC_EXPAND = 0x02
41
+ STR_FUNC_REGEXP = 0x04
42
+ STR_FUNC_AWORDS = 0x08
43
+ STR_FUNC_SYMBOL = 0x10
44
+ STR_FUNC_INDENT = 0x20 # <<-HEREDOC
45
+
46
+ STR_SQUOTE = STR_FUNC_BORING
47
+ STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
48
+ STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
49
+ STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
50
+ STR_SSYM = STR_FUNC_SYMBOL
51
+ STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
94
52
 
95
- case self.env[id]
96
- when :lvar then
97
- s(:lasgn, id)
98
- when :dvar, nil then
99
- if self.env.current[id] == :dvar then
100
- s(:dasgn_curr, id)
101
- elsif self.env[id] == :dvar then
102
- self.env.use(id)
103
- s(:dasgn, id)
104
- elsif ! self.env.dynamic? then
105
- s(:lasgn, id)
106
- else
107
- s(:dasgn_curr, id)
108
- end
109
- # if env.dynamic? then
110
- # if env.dasgn_curr? id then
111
- # s(:dasgn_curr, id)
112
- # else
113
- # s(:dasgn, id)
114
- # end
115
- # else
116
- # s(:lasgn, id)
117
- # end
118
- else
119
- raise "wtf?"
120
- end
121
- end
53
+ # How the parser advances to the next token.
54
+ #
55
+ # @return true if not at end of file (EOF).
122
56
 
123
- self.env[id] = (self.env.dynamic? ? :dvar : :lvar) unless self.env[id]
57
+ def advance
58
+ r = yylex
59
+ self.token = r
124
60
 
125
- result << value if value
61
+ raise "yylex returned nil" unless r
126
62
 
127
- return result
63
+ return RubyLexer::EOF != r
128
64
  end
129
65
 
130
- def warnings= warnings
131
- @warnings = warnings
132
-
133
- self.lexer.warnings = warnings
66
+ def arg_ambiguous
67
+ self.warning("Ambiguous first argument. make sure.")
134
68
  end
135
69
 
136
- def arg_add(node1, node2)
137
- return s(:array, node2) unless node1
138
- return node1 << node2 if node1[0] == :array
139
- return s(:argspush, node1, node2)
70
+ def comments
71
+ c = @comments.join
72
+ @comments.clear
73
+ c
140
74
  end
141
75
 
142
- def node_assign(lhs, rhs)
143
- return nil unless lhs
144
-
145
- rhs = value_expr rhs
146
-
147
- case lhs[0]
148
- when :gasgn, :iasgn, :lasgn, :dasgn, :dasgn_curr,
149
- :masgn, :cdecl, :cvdecl, :cvasgn then
150
- lhs << rhs
151
- when :attrasgn, :call then
152
- args = lhs.array(true) || lhs.argscat(true) || lhs.splat(true) # FIX: fragile
153
- # args = case lhs[1][1]
154
- # when :array, :argscat, :splat then
155
- # lhs.delete_at 1
156
- # else
157
- # nil # TODO: check - no clue what it should be, or even if
158
- # end
159
-
160
- lhs << arg_add(args, rhs)
161
- end
162
-
163
- lhs
76
+ def expr_beg_push val
77
+ cond.push false
78
+ cmdarg.push false
79
+ self.lex_state = :expr_beg
80
+ self.yacc_value = val
164
81
  end
165
82
 
166
- def gettable(id)
167
- id = id.to_sym if Token === id # HACK
168
- id = id.last.to_sym if Sexp === id # HACK
169
- id = id.to_sym if String === id # HACK
170
-
171
- return s(:self) if id == :self
172
- return s(:nil) if id == :nil
173
- return s(:true) if id == :true
174
- return s(:false) if id == :false
175
- return s(:str, self.file) if id == :"__FILE__"
176
- return s(:lit, lexer.src.current_line) if id == :"__LINE__"
177
-
178
- result = case id.to_s
179
- when /^@@/ then
180
- s(:cvar, id)
181
- when /^@/ then
182
- s(:ivar, id)
183
- when /^\$/ then
184
- s(:gvar, id)
185
- when /^[A-Z]/ then
186
- s(:const, id)
187
- else
188
- type = env[id]
189
- if type then
190
- s(type, id)
191
- elsif env.dynamic? and :dvar == env[id] then
192
- s(:dvar, id)
193
- else
194
- s(:vcall, id)
195
- end
196
- end
197
-
198
- return result if result
199
-
200
- raise "identifier #{id.inspect} is not valid"
83
+ def fix_arg_lex_state
84
+ self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
85
+ :expr_arg
86
+ else
87
+ :expr_beg
88
+ end
201
89
  end
202
90
 
203
- def block_append(head, tail, strip_tail_block=false)
204
- return head unless tail
205
- return tail unless head
91
+ def heredoc here # 63 lines
92
+ _, eos, func, last_line = here
206
93
 
207
- case head[0]
208
- when :lit, :str then
209
- return tail
210
- end
94
+ indent = (func & STR_FUNC_INDENT) != 0
95
+ expand = (func & STR_FUNC_EXPAND) != 0
96
+ eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
97
+ err_msg = "can't match #{eos_re.inspect} anywhere in "
211
98
 
212
- head = remove_begin(head)
213
- head = s(:block, head) unless head[0] == :block
99
+ rb_compile_error err_msg if
100
+ src.eos?
214
101
 
215
- if strip_tail_block and Sexp === tail and tail[0] == :block then
216
- head.push(*tail.values)
217
- else
218
- head << tail
102
+ if src.beginning_of_line? && src.scan(eos_re) then
103
+ src.unread_many last_line # TODO: figure out how to remove this
104
+ self.yacc_value = eos
105
+ return :tSTRING_END
219
106
  end
220
- end
221
107
 
222
- def new_yield(node)
223
- if node then
224
- raise SyntaxError, "Block argument should not be given." if
225
- node.node_type == :block_pass
108
+ self.string_buffer = []
226
109
 
227
- node = node.last if node.node_type == :array and node.size == 2
228
- end
110
+ if expand then
111
+ case
112
+ when src.scan(/#[$@]/) then
113
+ src.pos -= 1 # FIX omg stupid
114
+ self.yacc_value = src.matched
115
+ return :tSTRING_DVAR
116
+ when src.scan(/#[{]/) then
117
+ self.yacc_value = src.matched
118
+ return :tSTRING_DBEG
119
+ when src.scan(/#/) then
120
+ string_buffer << '#'
121
+ end
229
122
 
230
- return s(:yield, node)
231
- end
123
+ until src.scan(eos_re) do
124
+ c = tokadd_string func, "\n", nil
232
125
 
233
- def logop(type, left, right)
234
- left = value_expr left
126
+ rb_compile_error err_msg if
127
+ c == RubyLexer::EOF
235
128
 
236
- if left and left[0] == type and not left.paren then
237
- node, second = left, nil
129
+ if c != "\n" then
130
+ self.yacc_value = string_buffer.join.delete("\r")
131
+ return :tSTRING_CONTENT
132
+ else
133
+ string_buffer << src.scan(/\n/)
134
+ end
238
135
 
239
- while (second = node[2]) && second[0] == type and not second.paren do
240
- node = second
136
+ rb_compile_error err_msg if
137
+ src.eos?
241
138
  end
242
139
 
243
- node[2] = s(type, second, right)
244
-
245
- return left
140
+ # tack on a NL after the heredoc token - FIX NL should not be needed
141
+ src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid
142
+ else
143
+ until src.check(eos_re) do
144
+ string_buffer << src.scan(/.*(\n|\z)/)
145
+ rb_compile_error err_msg if
146
+ src.eos?
147
+ end
246
148
  end
247
149
 
248
- return s(type, left, right)
150
+ self.lex_strterm = [:heredoc, eos, func, last_line]
151
+ self.yacc_value = string_buffer.join.delete("\r")
152
+
153
+ return :tSTRING_CONTENT
249
154
  end
250
155
 
251
- def new_call recv, meth, args = nil # REFACTOR - merge with fcall
252
- if args && args[0] == :block_pass then
253
- new_args = args.array(true) || args.argscat(true) || args.splat(true)
254
- call = s(:call, recv, meth)
255
- call << new_args if new_args
256
- args << call
156
+ def heredoc_identifier # 51 lines
157
+ term, func = nil, STR_FUNC_BORING
158
+ self.string_buffer = []
257
159
 
258
- return args
160
+ case
161
+ when src.scan(/(-?)(['"`])(.*?)\2/) then
162
+ term = src[2]
163
+ unless src[1].empty? then
164
+ func |= STR_FUNC_INDENT
165
+ end
166
+ func |= case term
167
+ when "\'" then
168
+ STR_SQUOTE
169
+ when '"' then
170
+ STR_DQUOTE
171
+ else
172
+ STR_XQUOTE
173
+ end
174
+ string_buffer << src[3]
175
+ when src.scan(/-?(['"`])(?!\1*\Z)/) then
176
+ rb_compile_error "unterminated here document identifier"
177
+ when src.scan(/(-?)(\w+)/) then
178
+ term = '"'
179
+ func |= STR_DQUOTE
180
+ unless src[1].empty? then
181
+ func |= STR_FUNC_INDENT
182
+ end
183
+ string_buffer << src[2]
184
+ else
185
+ return nil
259
186
  end
260
- result = s(:call, recv, meth)
261
- result << args if args
262
- result
263
- end
264
187
 
265
- def new_fcall meth, args
266
- if args and args[0] == :block_pass then
267
- new_args = args.array(true) || args.argscat(true) || args.splat(true)
268
- call = s(:fcall, meth)
269
- call << new_args if new_args
270
- args << call
271
- return args
188
+ if src.check(/.*\n/) then
189
+ # TODO: think about storing off the char range instead
190
+ line = src.string[src.pos, src.matched_size]
191
+ src.string[src.pos, src.matched_size] = ''
192
+ else
193
+ line = nil
272
194
  end
273
195
 
274
- r = s(:fcall, meth)
275
- r << args if args and args != s(:array)
276
- r
277
- end
196
+ self.lex_strterm = [:heredoc, string_buffer.join, func, line]
278
197
 
279
- def arg_blk_pass node1, node2
280
- if node2 then
281
- node2.insert 1, node1
282
- return node2
198
+ if term == '`' then
199
+ self.yacc_value = "`"
200
+ return :tXSTRING_BEG
283
201
  else
284
- node1
202
+ self.yacc_value = "\""
203
+ return :tSTRING_BEG
285
204
  end
286
205
  end
287
206
 
288
- def get_match_node lhs, rhs
289
- if lhs then
290
- case lhs[0]
291
- when :dregx, :dregx_once then
292
- return s(:match2, lhs, rhs)
293
- when :lit then
294
- return s(:match2, lhs, rhs) if Regexp === lhs.last
295
- end
296
- end
207
+ def initialize
208
+ self.cond = StackState.new(:cond)
209
+ self.cmdarg = StackState.new(:cmdarg)
210
+ self.nest = 0
211
+ @comments = []
297
212
 
298
- if rhs then
299
- case rhs[0]
300
- when :dregx, :dregx_once then
301
- return s(:match3, rhs, lhs)
302
- when :lit then
303
- return s(:match3, rhs, lhs) if Regexp === rhs.last
304
- end
305
- end
213
+ reset
214
+ end
306
215
 
307
- return s(:call, lhs, :"=~", s(:array, rhs))
216
+ def int_with_base base
217
+ rb_compile_error "Invalid numeric format" if src.matched =~ /__/
218
+ self.yacc_value = src.matched.to_i(base)
219
+ return :tINTEGER
308
220
  end
309
221
 
310
- def cond node
311
- return nil if node.nil?
312
- node = value_expr node
313
-
314
- case node.first
315
- when :dregex then
316
- return s(:match2, node, s(:gvar, "$_".to_sym))
317
- when :regex then
318
- return s(:match, node)
319
- when :lit then
320
- if Regexp === node.last then
321
- return s(:match, node)
322
- else
323
- return node
222
+ def lex_state= o
223
+ raise "wtf?" unless Symbol === o
224
+ @lex_state = o
225
+ end
226
+
227
+ attr_writer :lineno
228
+ def lineno
229
+ @lineno ||= src.lineno
230
+ end
231
+
232
+ ##
233
+ # Parse a number from the input stream.
234
+ #
235
+ # @param c The first character of the number.
236
+ # @return A int constant wich represents a token.
237
+
238
+ def parse_number
239
+ self.lex_state = :expr_end
240
+
241
+ case
242
+ when src.scan(/[+-]?0[xbd]\b/) then
243
+ rb_compile_error "Invalid numeric format"
244
+ when src.scan(/[+-]?0x[a-f0-9_]+/i) then
245
+ int_with_base(16)
246
+ when src.scan(/[+-]?0b[01_]+/) then
247
+ int_with_base(2)
248
+ when src.scan(/[+-]?0d[0-9_]+/) then
249
+ int_with_base(10)
250
+ when src.scan(/[+-]?0o?[0-7_]*[89]/) then
251
+ rb_compile_error "Illegal octal digit."
252
+ when src.scan(/[+-]?0o?[0-7_]+|0o/) then
253
+ int_with_base(8)
254
+ when src.scan(/[+-]?[\d_]+_(e|\.)/) then
255
+ rb_compile_error "Trailing '_' in number."
256
+ when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
257
+ number = src.matched
258
+ if number =~ /__/ then
259
+ rb_compile_error "Invalid numeric format"
324
260
  end
325
- when :and then
326
- return s(:and, cond(node[1]), cond(node[2]))
327
- when :or then
328
- return s(:or, cond(node[1]), cond(node[2]))
329
- when :dot2 then
330
- label = "flip#{node.hash}"
331
- env[label] = self.env.dynamic? ? :dvar : :lvar
332
- return s(:flip2, node[1], node[2])
333
- when :dot3 then
334
- label = "flip#{node.hash}"
335
- env[label] = self.env.dynamic? ? :dvar : :lvar
336
- return s(:flip3, node[1], node[2])
261
+ self.yacc_value = number.to_f
262
+ :tFLOAT
263
+ when src.scan(/[+-]?0\b/) then
264
+ int_with_base(10)
265
+ when src.scan(/[+-]?[\d_]+\b/) then
266
+ int_with_base(10)
337
267
  else
338
- return node
268
+ rb_compile_error "Bad number format"
339
269
  end
340
270
  end
341
271
 
342
- def append_to_block head, tail # FIX: wtf is this?!? switch to block_append
343
- return head if tail.nil?
344
- return tail if head.nil?
272
+ def parse_quote # 58 lines
273
+ beg, nnd, short_hand, c = nil, nil, false, nil
345
274
 
346
- head = s(:block, head) unless head.first == :block
347
- head << tail
348
- end
275
+ if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
276
+ rb_compile_error "unknown type of %string" if src.matched_size == 2
277
+ c, beg, short_hand = src.matched, src.getch, false
278
+ else # Short-hand (e.g. %{, %., %!, etc)
279
+ c, beg, short_hand = 'Q', src.getch, true
280
+ end
349
281
 
350
- def new_super args
351
- if args && args.first == :block_pass then
352
- t, body, bp = args
353
- result = s(t, bp, s(:super, body))
354
- else
355
- result = s(:super)
356
- result << args if args and args != s(:array)
282
+ if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
283
+ rb_compile_error "unterminated quoted string meets end of file"
357
284
  end
358
- result
359
- end
360
285
 
361
- def aryset receiver, index
362
- s(:attrasgn, receiver, :"[]=", index)
363
- end
286
+ # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
287
+ nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
288
+ nnd, beg = beg, "\0" if nnd.nil?
289
+
290
+ token_type, self.yacc_value = nil, "%#{c}#{beg}"
291
+ token_type, string_type = case c
292
+ when 'Q' then
293
+ ch = short_hand ? nnd : c + beg
294
+ self.yacc_value = "%#{ch}"
295
+ [:tSTRING_BEG, STR_DQUOTE]
296
+ when 'q' then
297
+ [:tSTRING_BEG, STR_SQUOTE]
298
+ when 'W' then
299
+ src.scan(/\s*/)
300
+ [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS]
301
+ when 'w' then
302
+ src.scan(/\s*/)
303
+ [:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS]
304
+ when 'x' then
305
+ [:tXSTRING_BEG, STR_XQUOTE]
306
+ when 'r' then
307
+ [:tREGEXP_BEG, STR_REGEXP]
308
+ when 's' then
309
+ self.lex_state = :expr_fname
310
+ [:tSYMBEG, STR_SSYM]
311
+ end
312
+
313
+ rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
314
+ token_type.nil?
315
+
316
+ self.lex_strterm = [:strterm, string_type, nnd, beg]
364
317
 
365
- def arg_concat node1, node2
366
- return node2.nil? ? node1 : s(:argscat, node1, node2)
318
+ return token_type
367
319
  end
368
320
 
369
- def list_append list, item # TODO: nuke me *sigh*
370
- return s(:array, item) unless list
371
- list << item
372
- end
321
+ def parse_string(quote) # 65 lines
322
+ _, string_type, term, open = quote
373
323
 
374
- def literal_concat head, tail
375
- return tail unless head
376
- return head unless tail
324
+ space = false # FIX: remove these
325
+ func = string_type
326
+ paren = open
327
+ term_re = Regexp.escape term
377
328
 
378
- htype, ttype = head[0], tail[0]
329
+ awords = (func & STR_FUNC_AWORDS) != 0
330
+ regexp = (func & STR_FUNC_REGEXP) != 0
331
+ expand = (func & STR_FUNC_EXPAND) != 0
379
332
 
380
- head = s(:dstr, '', head) if htype == :evstr
333
+ unless func then # FIX: impossible, prolly needs == 0
334
+ self.lineno = nil
335
+ return :tSTRING_END
336
+ end
381
337
 
382
- case ttype
383
- when :str then
384
- if htype == :str
385
- head[-1] << tail[-1]
386
- elsif htype == :dstr and head.size == 2 then
387
- head[-1] << tail[-1]
388
- else
389
- head << tail
390
- end
391
- when :dstr then
392
- if htype == :str then
393
- tail[1] = head[-1] + tail[1]
394
- head = tail
395
- else
396
- tail[0] = :array
397
- tail[1] = s(:str, tail[1])
398
- tail.delete_at 1 if tail[1] == s(:str, '')
338
+ space = true if awords and src.scan(/\s+/)
399
339
 
400
- head.push(*tail[1..-1])
401
- end
402
- when :evstr then
403
- head[0] = :dstr if htype == :str
404
- if head.size == 2 and tail[1][0] == :str then
405
- head[-1] << tail[1][-1]
406
- head[0] = :str if head.size == 2 # HACK ?
340
+ if self.nest == 0 && src.scan(/#{term_re}/) then
341
+ if awords then
342
+ quote[1] = nil
343
+ return :tSPACE
344
+ elsif regexp then
345
+ self.yacc_value = self.regx_options
346
+ self.lineno = nil
347
+ return :tREGEXP_END
407
348
  else
408
- head.push(tail)
349
+ self.yacc_value = term
350
+ self.lineno = nil
351
+ return :tSTRING_END
409
352
  end
410
353
  end
411
354
 
412
- return head
413
- end
355
+ if space then
356
+ return :tSPACE
357
+ end
414
358
 
415
- def remove_begin node
416
- node = node[-1] if node and node[0] == :begin and node.size == 2
417
- node
418
- end
359
+ self.string_buffer = []
419
360
 
420
- def ret_args node
421
- if node then
422
- if node[0] == :block_pass then
423
- raise SyntaxError, "block argument should not be given"
361
+ if expand
362
+ case
363
+ when src.scan(/#(?=[$@])/) then
364
+ return :tSTRING_DVAR
365
+ when src.scan(/#[{]/) then
366
+ return :tSTRING_DBEG
367
+ when src.scan(/#/) then
368
+ string_buffer << '#'
424
369
  end
425
-
426
- node = node.last if node[0] == :array && node.size == 2
427
- node = s(:svalue, node) if node[0] == :splat and not node.paren # HACK matz wraps ONE of the FOUR splats in a newline to distinguish. I use paren for now. ugh
428
370
  end
429
371
 
430
- node
431
- end
372
+ if tokadd_string(func, term, paren) == RubyLexer::EOF then
373
+ rb_compile_error "unterminated string meets end of file"
374
+ end
432
375
 
433
- def value_expr node # HACK
434
- node = remove_begin node
435
- node[2] = value_expr(node[2]) if node and node[0] == :if
436
- node
437
- end
376
+ self.yacc_value = string_buffer.join
438
377
 
439
- def void_stmts node
440
- return nil unless node
441
- return node unless node[0] == :block
442
378
 
443
- node[1..-2] = node[1..-2].map { |n| remove_begin(n) }
444
- node
379
+ return :tSTRING_CONTENT
445
380
  end
446
381
 
447
- ############################################################
448
- # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
382
+ def rb_compile_error msg
383
+ msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
384
+ raise SyntaxError, msg
385
+ end
386
+
387
+ def read_escape # 51 lines
388
+ case
389
+ when src.scan(/\\/) then # Backslash
390
+ '\\'
391
+ when src.scan(/n/) then # newline
392
+ "\n"
393
+ when src.scan(/t/) then # horizontal tab
394
+ "\t"
395
+ when src.scan(/r/) then # carriage-return
396
+ "\r"
397
+ when src.scan(/f/) then # form-feed
398
+ "\f"
399
+ when src.scan(/v/) then # vertical tab
400
+ "\13"
401
+ when src.scan(/a/) then # alarm(bell)
402
+ "\007"
403
+ when src.scan(/e/) then # escape
404
+ "\033"
405
+ when src.scan(/b/) then # backspace
406
+ "\010"
407
+ when src.scan(/s/) then # space
408
+ " "
409
+ when src.scan(/[0-7]{1,3}/) then # octal constant
410
+ src.matched.to_i(8).chr
411
+ when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
412
+ src[1].to_i(16).chr
413
+ when src.scan(/M-\\/) then
414
+ c = self.read_escape
415
+ c[0] = (c[0].ord | 0x80).chr
416
+ c
417
+ when src.scan(/M-(.)/) then
418
+ c = src[1]
419
+ c[0] = (c[0].ord | 0x80).chr
420
+ c
421
+ when src.scan(/C-\\|c\\/) then
422
+ c = self.read_escape
423
+ c[0] = (c[0].ord & 0x9f).chr
424
+ c
425
+ when src.scan(/C-\?|c\?/) then
426
+ 0177.chr
427
+ when src.scan(/(C-|c)(.)/) then
428
+ c = src[2]
429
+ c[0] = (c[0].ord & 0x9f).chr
430
+ c
431
+ when src.scan(/[McCx0-9]/) || src.eos? then
432
+ rb_compile_error("Invalid escape character syntax")
433
+ else
434
+ src.getch
435
+ end
436
+ end
449
437
 
450
- def dyna_init body, known_vars = []
451
- var = nil
452
- vars = self.env.dynamic.keys - known_vars
438
+ def regx_options # 15 lines
439
+ good, bad = [], []
453
440
 
454
- vars.each do |id|
455
- if self.env.used? id then
456
- var = s(:dasgn_curr, id, var).compact
457
- end
441
+ if src.scan(/[a-z]+/) then
442
+ good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
458
443
  end
459
444
 
460
- self.block_append(var, body, body && body[0] == :block)
461
- end
445
+ unless bad.empty? then
446
+ rb_compile_error("unknown regexp option%s - %s" %
447
+ [(bad.size > 1 ? "s" : ""), bad.join.inspect])
448
+ end
462
449
 
463
- def warning s
464
- # do nothing for now
450
+ return good.join
465
451
  end
466
452
 
467
- kill :is_in_def, :is_in_single, :push_local_scope, :pop_local_scope, :support
453
+ def reset
454
+ self.command_start = true
455
+ self.lex_strterm = nil
456
+ self.token = nil
457
+ self.yacc_value = nil
468
458
 
469
- # END HACK
470
- ############################################################$
459
+ @src = nil
460
+ @lex_state = nil
461
+ end
471
462
 
472
- end
463
+ def src= src
464
+ raise "bad src: #{src.inspect}" unless String === src
465
+ @src = RPStringScanner.new(src)
466
+ end
467
+
468
+ def tokadd_escape term # 20 lines
469
+ case
470
+ when src.scan(/\\\n/) then
471
+ # just ignore
472
+ when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
473
+ self.string_buffer << src.matched
474
+ when src.scan(/\\([MC]-|c)(?=\\)/) then
475
+ self.string_buffer << src.matched
476
+ self.tokadd_escape term
477
+ when src.scan(/\\([MC]-|c)(.)/) then
478
+ self.string_buffer << src.matched
479
+ when src.scan(/\\[McCx]/) then
480
+ rb_compile_error "Invalid escape character syntax"
481
+ when src.scan(/\\(.)/m) then
482
+ self.string_buffer << src.matched
483
+ else
484
+ rb_compile_error "Invalid escape character syntax"
485
+ end
486
+ end
473
487
 
474
- class RubyLexer
475
- attr_accessor :command_start
476
- attr_accessor :cmdarg
477
- attr_accessor :cond
478
- attr_accessor :nest
488
+ def tokadd_string(func, term, paren) # 105 lines
489
+ awords = (func & STR_FUNC_AWORDS) != 0
490
+ escape = (func & STR_FUNC_ESCAPE) != 0
491
+ expand = (func & STR_FUNC_EXPAND) != 0
492
+ regexp = (func & STR_FUNC_REGEXP) != 0
493
+ symbol = (func & STR_FUNC_SYMBOL) != 0
479
494
 
480
- # Additional context surrounding tokens that both the lexer and
481
- # grammar use.
482
- attr_reader :lex_state
495
+ paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
496
+ term_re = Regexp.new(Regexp.escape(term))
483
497
 
484
- def lex_state= o
485
- raise "wtf?" unless Symbol === o
486
- @lex_state = o
487
- end
498
+ until src.eos? do
499
+ c = nil
500
+ handled = true
501
+ case
502
+ when self.nest == 0 && src.scan(term_re) then
503
+ src.pos -= 1
504
+ break
505
+ when paren_re && src.scan(paren_re) then
506
+ self.nest += 1
507
+ when src.scan(term_re) then
508
+ self.nest -= 1
509
+ when awords && src.scan(/\s/) then
510
+ src.pos -= 1
511
+ break
512
+ when expand && src.scan(/#(?=[\$\@\{])/) then
513
+ src.pos -= 1
514
+ break
515
+ when expand && src.scan(/#(?!\n)/) then
516
+ # do nothing
517
+ when src.check(/\\/) then
518
+ case
519
+ when awords && src.scan(/\\\n/) then
520
+ string_buffer << "\n"
521
+ next
522
+ when awords && src.scan(/\\\s/) then
523
+ c = ' '
524
+ when expand && src.scan(/\\\n/) then
525
+ next
526
+ when regexp && src.check(/\\/) then
527
+ self.tokadd_escape term
528
+ next
529
+ when expand && src.scan(/\\/) then
530
+ c = self.read_escape
531
+ when src.scan(/\\\n/) then
532
+ # do nothing
533
+ when src.scan(/\\\\/) then
534
+ string_buffer << '\\' if escape
535
+ c = '\\'
536
+ when src.scan(/\\/) then
537
+ unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
538
+ string_buffer << "\\"
539
+ end
540
+ else
541
+ handled = false
542
+ end
543
+ else
544
+ handled = false
545
+ end # case
488
546
 
489
- attr_accessor :end_seen # TODO: figure out if I really need this
547
+ unless handled then
490
548
 
491
- attr_accessor :lex_strterm
549
+ t = Regexp.escape term
550
+ x = Regexp.escape(paren) if paren && paren != "\000"
551
+ re = if awords then
552
+ /[^#{t}#{x}\#\0\\\n\ ]+|./ # |. to pick up whatever
553
+ else
554
+ /[^#{t}#{x}\#\0\\]+|./
555
+ end
492
556
 
493
- # Used for tiny smidgen of grammar in lexer
494
- attr_accessor :parser_support # TODO: remove
557
+ src.scan re
558
+ c = src.matched
495
559
 
496
- # Stream of data that yylex examines.
497
- attr_accessor :src
560
+ rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
561
+ end # unless handled
498
562
 
499
- # Last token read via yylex.
500
- attr_accessor :token
563
+ c ||= src.matched
564
+ string_buffer << c
565
+ end # until
501
566
 
502
- # Tempory buffer to build up a potential token. Consumer takes
503
- # responsibility to reset this before use.
504
- attr_accessor :token_buffer
567
+ c ||= src.matched
568
+ c = RubyLexer::EOF if src.eos?
505
569
 
506
- # Value of last token which had a value associated with it.
507
- attr_accessor :yacc_value
508
570
 
509
- # What handles warnings
510
- attr_accessor :warnings
571
+ return c
572
+ end
511
573
 
512
- # TODO: remove all of these
513
- alias :source= :src=
514
- alias :str_term :lex_strterm
515
- alias :str_term= :lex_strterm=
516
- alias :state :lex_state
517
- alias :state= :lex_state=
518
- alias :value :yacc_value
519
- alias :value= :yacc_value=
520
- alias :getCmdArgumentState :cmdarg
574
+ def unescape s
575
+
576
+ r = {
577
+ "a" => "\007",
578
+ "b" => "\010",
579
+ "e" => "\033",
580
+ "f" => "\f",
581
+ "n" => "\n",
582
+ "r" => "\r",
583
+ "s" => " ",
584
+ "t" => "\t",
585
+ "v" => "\13",
586
+ "\\" => '\\',
587
+ "\n" => "",
588
+ "C-\?" => 0177.chr,
589
+ "c\?" => 0177.chr,
590
+ }[s]
591
+
592
+ return r if r
593
+
594
+ case s
595
+ when /^[0-7]{1,3}/ then
596
+ $&.to_i(8).chr
597
+ when /^x([0-9a-fA-F]{1,2})/ then
598
+ $1.to_i(16).chr
599
+ when /^M-(.)/ then
600
+ ($1[0].ord | 0x80).chr
601
+ when /^(C-|c)(.)/ then
602
+ ($2[0].ord & 0x9f).chr
603
+ when /^[McCx0-9]/ then
604
+ rb_compile_error("Invalid escape character syntax")
605
+ else
606
+ s
607
+ end
608
+ end
521
609
 
522
- # Give a name to a value. Enebo: This should be used more.
523
- # HACK OMG HORRIBLE KILL ME NOW. Enebo, no. this shouldn't be used more
524
- EOF = nil # was 0... ugh
610
+ def warning s
611
+ # do nothing for now
612
+ end
525
613
 
526
- # ruby constants for strings (should this be moved somewhere else?)
527
- STR_FUNC_ESCAPE=0x01
528
- STR_FUNC_EXPAND=0x02
529
- STR_FUNC_REGEXP=0x04
530
- STR_FUNC_QWORDS=0x08
531
- STR_FUNC_SYMBOL=0x10
532
- STR_FUNC_INDENT=0x20 # <<-HEREDOC
533
-
534
- STR_SQUOTE = 0
535
- STR_DQUOTE = STR_FUNC_EXPAND
536
- STR_XQUOTE = STR_FUNC_EXPAND
537
- STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
538
- STR_SSYM = STR_FUNC_SYMBOL
539
- STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
614
+ ##
615
+ # Returns the next token. Also sets yy_val is needed.
616
+ #
617
+ # @return Description of the Returned Value
540
618
 
541
- def initialize
542
- self.parser_support = nil
543
- self.token_buffer = []
544
- self.cond = StackState.new(:cond)
545
- self.cmdarg = StackState.new(:cmdarg)
546
- self.nest = 0
547
- self.end_seen = false
619
+ def yylex # 826 lines
548
620
 
549
- reset
550
- end
621
+ c = ''
622
+ space_seen = false
623
+ command_state = false
624
+ src = self.src
551
625
 
552
- def reset
553
626
  self.token = nil
554
627
  self.yacc_value = nil
555
- self.src = nil
556
- @lex_state = nil
557
- self.lex_strterm = nil
558
- self.command_start = true
559
- end
560
628
 
561
- # How the parser advances to the next token.
562
- #
563
- # @return true if not at end of file (EOF).
629
+ return yylex_string if lex_strterm
564
630
 
565
- def advance
566
- r = yylex
567
- self.token = r
568
- return r != RubyLexer::EOF
569
- end
631
+ command_state = self.command_start
632
+ self.command_start = false
570
633
 
571
- def parse_string(quote)
572
- _, string_type, term, open = quote
634
+ last_state = lex_state
573
635
 
574
- space = false # FIX: remove these
575
- func = string_type
576
- paren = open
636
+ loop do # START OF CASE
637
+ if src.scan(/\ |\t|\r|\f|\13/) then # white spaces, 13 = '\v
638
+ space_seen = true
639
+ next
640
+ elsif src.check(/[^a-zA-Z]/) then
641
+ if src.scan(/\n|#/) then
642
+ self.lineno = nil
643
+ c = src.matched
644
+ if c == '#' then
645
+ src.unread c # ok
646
+
647
+ while src.scan(/\s*#.*(\n+|\z)/) do
648
+ @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
649
+ end
577
650
 
578
- return :tSTRING_END unless func
651
+ if src.eos? then
652
+ return RubyLexer::EOF
653
+ end
654
+ else
655
+ end
579
656
 
580
- c = src.read
657
+ # Replace a string of newlines with a single one
658
+ src.scan(/\n+/)
581
659
 
582
- if (func & STR_FUNC_QWORDS) != 0 && c =~ /\s/ then
583
- begin
584
- c = src.read
585
- break if c == RubyLexer::EOF # HACK UGH
586
- end while String === c and c =~ /\s/
587
- space = true
588
- end
660
+ if [:expr_beg, :expr_fname,
661
+ :expr_dot, :expr_class].include? lex_state then
662
+ next
663
+ end
589
664
 
590
- if c == term && self.nest == 0 then
591
- if func & STR_FUNC_QWORDS != 0 then
592
- quote[1] = nil
593
- return ' '
594
- end
595
- unless func & STR_FUNC_REGEXP != 0 then
596
- self.yacc_value = t(term)
597
- return :tSTRING_END
598
- end
599
- self.yacc_value = self.regx_options
600
- return :tREGEXP_END
601
- end
602
-
603
- if space then
604
- src.unread c
605
- return ' '
606
- end
607
-
608
- self.token_buffer = []
609
-
610
- if (func & STR_FUNC_EXPAND) != 0 && c == '#' then
611
- case c = src.read
612
- when '$', '@' then
613
- src.unread c
614
- return :tSTRING_DVAR
615
- when '{' then
616
- return :tSTRING_DBEG
617
- end
618
- token_buffer << '#'
619
- end
620
-
621
- src.unread c
622
-
623
- if tokadd_string(func, term, paren, token_buffer) == RubyLexer::EOF then
624
- # HACK ruby_sourceline = nd_line(quote)
625
- raise "unterminated string meets end of file"
626
- return :tSTRING_END
627
- end
628
-
629
- self.yacc_value = s(:str, token_buffer.join)
630
- return :tSTRING_CONTENT
631
- end
632
-
633
- def regx_options
634
- options = []
635
- bad = []
636
-
637
- while c = src.read and c =~ /[a-z]/ do
638
- case c
639
- when /^[ixmonesu]$/ then
640
- options << c
641
- else
642
- bad << c
643
- end
644
- end
645
-
646
- src.unread c
647
-
648
- rb_compile_error("unknown regexp option%s - %s" %
649
- [(bad.size > 1 ? "s" : ""), bad.join.inspect]) unless bad.empty?
650
-
651
- return options.join
652
- end
653
-
654
- def tokadd_escape term
655
- case c = src.read
656
- when "\n" then
657
- return false # just ignore
658
- when /0-7/ then # octal constant
659
- tokadd "\\"
660
- tokadd c
661
-
662
- 2.times do |i|
663
- c = src.read
664
- # HACK goto eof if (c == -1)
665
- if c < "0" || "7" < c then
666
- pushback c
667
- break
668
- end
669
- tokadd c
670
- end
671
-
672
- return false
673
- when "x" then # hex constant
674
- tokadd "\\"
675
- tokadd c
676
-
677
- 2.times do
678
- c = src.read
679
- unless c =~ /[0-9a-f]/i then # TODO error case? empty?
680
- src.unread c
681
- break
682
- end
683
- tokadd c
684
- end
685
-
686
- return false
687
- when "M" then
688
- if (c = src.read()) != "-" then
689
- yyerror "Invalid escape character syntax"
690
- pushback c
691
- return false
692
- end
693
- tokadd "\\"
694
- tokadd "M"
695
- tokadd "-"
696
- raise "not yet"
697
- # goto escaped;
698
- when "C" then
699
- if (c = src.read) != "-" then
700
- yyerror "Invalid escape character syntax"
701
- pushback c
702
- return false
703
- end
704
- tokadd "\\"
705
- tokadd "C"
706
- tokadd "-"
707
- raise "not yet"
708
- # HACK goto escaped;
709
- when "c" then
710
- tokadd "\\"
711
- tokadd "c"
712
- # HACK escaped:
713
- if (c = src.read) == "\\" then
714
- return tokadd_escape(term)
715
- elsif c == -1 then
716
- raise "no"
717
- # HACK goto eof
718
- end
719
- tokadd c
720
- return false
721
- # HACK eof
722
- when RubyLexer::EOF then
723
- yyerror "Invalid escape character syntax"
724
- return true
725
- else
726
- if (c != "\\" || c != term)
727
- tokadd "\\"
728
- end
729
- tokadd c
730
- end
731
- return false
732
- end
733
-
734
- def read_escape
735
- case c = src.read
736
- when "\\" then # Backslash
737
- return c
738
- when "n" then # newline
739
- return "\n"
740
- when "t" then # horizontal tab
741
- return "\t"
742
- when "r" then # carriage-return
743
- return "\r"
744
- when "f" then # form-feed
745
- return "\f"
746
- when "v" then # vertical tab
747
- return "\13"
748
- when "a" then # alarm(bell)
749
- return "\007"
750
- when 'e' then # escape
751
- return "\033"
752
- when /[0-7]/ then # octal constant
753
- src.unread c # TODO this seems dumb
754
-
755
- n = 0
756
-
757
- 3.times do
758
- c = src.read
759
- unless c =~ /[0-7]/ then
760
- src.unread c
761
- break
762
- end
763
- n <<= 3
764
- n |= c[0] - ?0
765
- end
766
-
767
- return n.chr
768
- when "x" then # hex constant
769
- n = 0
770
-
771
- 2.times do
772
- c = src.read.downcase
773
- unless c =~ /[0-9a-f]/i then
774
- src.unread c
775
- break
776
- end
777
- n <<= 4
778
- n |= case c[0] # TODO: I'm sure there is a better way... but I'm tired
779
- when ?a..?f then
780
- c[0] - ?a + 10
781
- when ?A..?F then
782
- c[0] - ?A + 10
783
- when ?0..?9 then
784
- c[0] - ?0
785
- else
786
- raise "wtf?: #{c.inspect}"
787
- end
788
- end
789
-
790
- return n.chr
791
- when "b" then # backspace
792
- return "\010"
793
- when "s" then # space
794
- return " "
795
- when "M" then
796
- c = src.read
797
- if c != "-" then
798
- yyerror("Invalid escape character syntax")
799
- src.unread c
800
- return "\0"
801
- end
802
-
803
- c = src.read
804
- case c
805
- when "\\" then
806
- c = self.read_escape
807
- c[0] |= 0x80
808
- return c
809
- when RubyLexer::EOF then
810
- yyerror("Invalid escape character syntax");
811
- return '\0';
812
- else
813
- c[0] |= 0x80
814
- return c
815
- end
816
- when "C", "c" then
817
- if (c = src.read) != "-" then
818
- yyerror("Invalid escape character syntax")
819
- pushback(c)
820
- return "\0"
821
- end if c == "C"
822
-
823
- case c = src.read
824
- when "\\" then
825
- c = read_escape
826
- when "?" then
827
- return 0177
828
- when RubyLexer::EOF then
829
- yyerror("Invalid escape character syntax");
830
- return "\0";
831
- end
832
- c[0] &= 0x9f
833
- return c
834
- when RubyLexer::EOF then
835
- yyerror("Invalid escape character syntax")
836
- return "\0"
837
- else
838
- return c
839
- end
840
- end
841
-
842
- def tokadd_string(func, term, paren, buffer)
843
- until (c = src.read) == RubyLexer::EOF do
844
- if c == paren then
845
- self.nest += 1
846
- elsif c == term then
847
- if self.nest == 0 then
848
- src.unread c
849
- break
850
- end
851
- self.nest -= 1
852
- elsif (func & RubyLexer::STR_FUNC_EXPAND) != 0 && c == '#' && !src.peek("\n") then
853
- c2 = src.read
854
665
 
855
- if c2 == '$' || c2 == '@' || c2 == '{' then
856
- src.unread c2
857
- src.unread c
858
- break
859
- end
860
- src.unread(c2)
861
- elsif c == "\\" then
862
- c = src.read
863
- case c
864
- when "\n" then
865
- break if ((func & RubyLexer::STR_FUNC_QWORDS) != 0) # TODO: check break
866
- next if ((func & RubyLexer::STR_FUNC_EXPAND) != 0)
867
-
868
- buffer << "\\"
869
- when "\\" then
870
- buffer << c if (func & RubyLexer::STR_FUNC_ESCAPE) != 0
871
- else
872
- if (func & RubyLexer::STR_FUNC_REGEXP) != 0 then
873
- src.unread c
874
- tokadd_escape term
875
- next
876
- elsif (func & RubyLexer::STR_FUNC_EXPAND) != 0 then
877
- src.unread c
878
- if (func & RubyLexer::STR_FUNC_ESCAPE) != 0 then
879
- buffer << "\\"
880
- end
881
- c = read_escape
882
- elsif (func & RubyLexer::STR_FUNC_QWORDS) != 0 && c =~ /\s/ then
883
- # ignore backslashed spaces in %w
884
- elsif c != term && !(paren && c == paren) then
885
- buffer << "\\"
886
- end
887
- end
888
- # else if (ismbchar(c)) {
889
- # int i, len = mbclen(c)-1;
890
- # for (i = 0; i < len; i++) {
891
- # tokadd(c);
892
- # c = nextc();
893
- # }
894
- # }
895
- elsif (func & RubyLexer::STR_FUNC_QWORDS) != 0 && c =~ /\s/ then
896
- src.unread c
897
- break
898
- end
899
-
900
- if c == "\0" && (func & RubyLexer::STR_FUNC_SYMBOL) != 0 then
901
- raise SyntaxError, "symbol cannot contain '\\0'"
902
- end
903
-
904
- buffer << c # unless c == "\r"
905
- end # while
906
-
907
- return c
908
- end
909
-
910
- def heredoc here
911
- _, eos, func, last_line = here
912
-
913
- eosn = eos + "\n"
914
- err_msg = "can't find string #{eos.inspect} anywhere before EOF"
915
-
916
- indent = (func & RubyLexer::STR_FUNC_INDENT) != 0
917
- str = []
918
-
919
- raise SyntaxError, err_msg if src.peek == RubyLexer::EOF
920
-
921
- if src.begin_of_line? && src.match_string(eosn, indent) then
922
- src.unread_many last_line
923
- self.yacc_value = t(eos)
924
- return :tSTRING_END
925
- end
926
-
927
- if (func & RubyLexer::STR_FUNC_EXPAND) == 0 then
928
- begin
929
- str << src.read_line
930
- raise SyntaxError, err_msg if src.peek == RubyLexer::EOF
931
- end until src.match_string(eosn, indent)
932
- else
933
- c = src.read
934
- buffer = []
935
-
936
- if c == "#" then
937
- case c = src.read
938
- when "$", "@" then
939
- src.unread c
940
- self.yacc_value = t("#" + c)
941
- return :tSTRING_DVAR
942
- when "{" then
943
- self.yacc_value = t("#" + c)
944
- return :tSTRING_DBEG
945
- end
946
- buffer << "#"
947
- end
948
-
949
- src.unread c
950
-
951
- begin
952
- c = tokadd_string func, "\n", nil, buffer
953
-
954
- raise SyntaxError, err_msg if c == RubyLexer::EOF
955
-
956
- if c != "\n" then
957
- self.yacc_value = s(:str, buffer.join)
958
- return :tSTRING_CONTENT
959
- end
960
-
961
- buffer << src.read
962
-
963
- raise SyntaxError, err_msg if src.peek == RubyLexer::EOF
964
- end until src.match_string(eosn, indent)
965
-
966
- str = buffer
967
- end
968
-
969
- src.unread_many eosn
970
-
971
- self.lex_strterm = s(:heredoc, eos, func, last_line)
972
- self.yacc_value = s(:str, str.join)
973
-
974
- return :tSTRING_CONTENT
975
- end
976
-
977
- def parse_quote(c)
978
- beg, nnd = nil, nil
979
- short_hand = false
980
-
981
- # Short-hand (e.g. %{,%.,%!,... versus %Q{).
982
- unless c =~ /[a-z0-9]/i then
983
- beg, c = c, 'Q'
984
- short_hand = true
985
- else # Long-hand (e.g. %Q{}).
986
- short_hand = false
987
- beg = src.read
988
- if beg =~ /[a-z0-9]/i then
989
- raise SyntaxError, "unknown type of %string"
990
- end
991
- end
992
-
993
- if c == RubyLexer::EOF or beg == RubyLexer::EOF then
994
- raise SyntaxError, "unterminated quoted string meets nnd of file"
995
- end
996
-
997
- # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
998
- nnd = case beg
999
- when '(' then
1000
- ')'
1001
- when '[' then
1002
- ']'
1003
- when '{' then
1004
- '}'
1005
- when '<' then
1006
- '>'
1007
- else
1008
- nnd, beg = beg, "\0"
1009
- nnd
1010
- end
1011
-
1012
- string_type, token_type = STR_DQUOTE, :tSTRING_BEG
1013
- self.yacc_value = t("%#{c}#{beg}")
1014
-
1015
- case (c)
1016
- when 'Q' then
1017
- self.yacc_value = t("%#{short_hand ? nnd : c + beg}")
1018
- when 'q' then
1019
- string_type, token_type = STR_SQUOTE, :tSTRING_BEG
1020
- when 'W' then
1021
- string_type, token_type = STR_DQUOTE | STR_FUNC_QWORDS, :tWORDS_BEG
1022
- begin c = src.read end while c =~ /\s/
1023
- src.unread(c)
1024
- when 'w' then
1025
- string_type, token_type = STR_SQUOTE | STR_FUNC_QWORDS, :tQWORDS_BEG
1026
- begin c = src.read end while c =~ /\s/
1027
- src.unread(c)
1028
- when 'x' then
1029
- string_type, token_type = STR_XQUOTE, :tXSTRING_BEG
1030
- when 'r' then
1031
- string_type, token_type = STR_REGEXP, :tREGEXP_BEG
1032
- when 's' then
1033
- string_type, token_type = STR_SSYM, :tSYMBEG
1034
- self.lex_state = :expr_fname
1035
- else
1036
- raise SyntaxError, "Unknown type of %string. Expected 'Q', 'q', 'w', 'x', 'r' or any non letter character, but found '" + c + "'."
1037
- end
1038
-
1039
- self.lex_strterm = s(:strterm, string_type, nnd, beg)
1040
-
1041
- return token_type
1042
- end
1043
-
1044
- def heredoc_identifier
1045
- c = src.read
1046
- term = 42 # HACK
1047
- func = 0
1048
-
1049
- if c == '-' then
1050
- c = src.read
1051
- func = STR_FUNC_INDENT
1052
- end
1053
-
1054
- if c == "\'" || c == '"' || c == '`' then
1055
- if c == "\'" then
1056
- func |= STR_SQUOTE
1057
- elsif c == '"'
1058
- func |= STR_DQUOTE
1059
- else
1060
- func |= STR_XQUOTE
1061
- end
1062
-
1063
- token_buffer.clear
1064
- term = c
1065
-
1066
- while (c = src.read) != RubyLexer::EOF && c != term
1067
- token_buffer << c
1068
- end
1069
-
1070
- if c == RubyLexer::EOF then
1071
- raise SyntaxError, "unterminated here document identifier"
1072
- end
1073
- else
1074
- unless c =~ /\w/ then
1075
- src.unread c
1076
- src.unread '-' if (func & STR_FUNC_INDENT) != 0
1077
- return 0 # TODO: RubyLexer::EOF?
1078
- end
1079
- token_buffer.clear
1080
- term = '"'
1081
- func |= STR_DQUOTE
1082
- begin
1083
- token_buffer << c
1084
- end while (c = src.read) != RubyLexer::EOF && c =~ /\w/
1085
- src.unread c
1086
- end
1087
-
1088
- line = src.read_line
1089
- tok = token_buffer.join
1090
- self.lex_strterm = s(:heredoc, tok, func, line)
1091
-
1092
- if term == '`' then
1093
- self.yacc_value = t("`")
1094
- return :tXSTRING_BEG
1095
- end
1096
-
1097
- self.yacc_value = t("\"")
1098
- return :tSTRING_BEG
1099
- end
1100
-
1101
- def arg_ambiguous
1102
- self.warning("Ambiguous first argument. make sure.")
1103
- end
1104
-
1105
- ##
1106
- # Read a comment up to end of line. When found each comment will
1107
- # get stored away into the parser result so that any interested
1108
- # party can use them as they seem fit. One idea is that IDE authors
1109
- # can do distance based heuristics to associate these comments to
1110
- # the AST node they think they belong to.
1111
- #
1112
- # @param c last character read from lexer source
1113
- # @return newline or eof value
1114
-
1115
- def read_comment c
1116
- token_buffer.clear
1117
- token_buffer << c
1118
-
1119
- while (c = src.read) != "\n" do
1120
- break if c == RubyLexer::EOF
1121
- token_buffer << c
1122
- end
1123
- src.unread c
1124
-
1125
- # Store away each comment to parser result so IDEs can do whatever
1126
- # they want with them.
1127
- # HACK parser_support.result.add_comment(Node.comment(token_buffer.join))
1128
-
1129
- return c
1130
- end
1131
-
1132
- ##
1133
- # Returns the next token. Also sets yy_val is needed.
1134
- #
1135
- # @return Description of the Returned Value
1136
- # TODO: remove ALL sexps coming from here and move up to grammar
1137
- # TODO: only literal values should come up from the lexer.
1138
-
1139
- def yylex
1140
- c = ''
1141
- space_seen = false
1142
- command_state = false
1143
-
1144
- if lex_strterm then
1145
- token = nil
1146
-
1147
- if lex_strterm[0] == :heredoc then
1148
- token = self.heredoc(lex_strterm)
1149
- if token == :tSTRING_END then
1150
- self.lex_strterm = nil
1151
- self.lex_state = :expr_end
1152
- end
1153
- else
1154
- token = self.parse_string(lex_strterm)
1155
-
1156
- if token == :tSTRING_END || token == :tREGEXP_END then
1157
- self.lex_strterm = nil
666
+ self.command_start = true
667
+ self.lex_state = :expr_beg
668
+ return :tNL
669
+ elsif src.scan(/[\]\)\}]/) then
670
+ cond.lexpop
671
+ cmdarg.lexpop
1158
672
  self.lex_state = :expr_end
1159
- end
1160
- end
1161
-
1162
- return token
1163
- end
1164
-
1165
- command_state = self.command_start
1166
- self.command_start = false
1167
-
1168
- last_state = lex_state
1169
-
1170
- loop do
1171
- c = src.read
1172
- case c
1173
- when /\004|\032|\000/, RubyLexer::EOF then # ^D, ^Z, EOF
1174
- return RubyLexer::EOF
1175
- when /\ |\t|\f|\r|\13/ then # white spaces, 13 = '\v
1176
- space_seen = true
1177
- next
1178
- when /#|\n/ then
1179
- return 0 if c == '#' and read_comment(c) == 0 # FIX 0?
1180
- # Replace a string of newlines with a single one
1181
- while (c = src.read) == "\n"
1182
- # do nothing
1183
- end
1184
-
1185
- src.unread c
1186
-
1187
- if (lex_state == :expr_beg ||
1188
- lex_state == :expr_fname ||
1189
- lex_state == :expr_dot ||
1190
- lex_state == :expr_class) then
1191
- next
1192
- end
1193
-
1194
- self.command_start = true
1195
- self.lex_state = :expr_beg
1196
- return "\n"
1197
- when '*' then
1198
- c = src.read
1199
- if c == '*' then
1200
- c = src.read
1201
- if c == '=' then
673
+ self.yacc_value = src.matched
674
+ result = {
675
+ ")" => :tRPAREN,
676
+ "]" => :tRBRACK,
677
+ "}" => :tRCURLY
678
+ }[src.matched]
679
+ return result
680
+ elsif src.check(/\./) then
681
+ if src.scan(/\.\.\./) then
1202
682
  self.lex_state = :expr_beg
1203
- self.yacc_value = t("**")
1204
- return :tOP_ASGN
1205
- end
1206
- src.unread c
1207
- self.yacc_value = t("**")
1208
- c = :tPOW
1209
- else
1210
- if c == '=' then
683
+ self.yacc_value = "..."
684
+ return :tDOT3
685
+ elsif src.scan(/\.\./) then
1211
686
  self.lex_state = :expr_beg
1212
- self.yacc_value = t("*")
1213
- return :tOP_ASGN
1214
- end
1215
- src.unread c
1216
- if lex_state.is_argument && space_seen && c !~ /\s/ then
1217
- warning("`*' interpreted as argument prefix")
1218
- c = :tSTAR
1219
- elsif lex_state == :expr_beg || lex_state == :expr_mid then
1220
- c = :tSTAR
1221
- else
1222
- c = :tSTAR2
687
+ self.yacc_value = ".."
688
+ return :tDOT2
689
+ elsif src.scan(/\.\d/) then
690
+ rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
691
+ elsif src.scan(/\./) then
692
+ self.lex_state = :expr_dot
693
+ self.yacc_value = "."
694
+ return :tDOT
1223
695
  end
1224
- self.yacc_value = t("*")
1225
- end
1226
-
1227
- if lex_state == :expr_fname || lex_state == :expr_dot then
1228
- self.lex_state = :expr_arg
1229
- else
696
+ elsif src.scan(/\,/) then
1230
697
  self.lex_state = :expr_beg
1231
- end
1232
-
1233
- return c
1234
- when '!' then
1235
- self.lex_state = :expr_beg
1236
- if (c = src.read) == '=' then
1237
- self.yacc_value = t("!=")
1238
- return :tNEQ
1239
- end
1240
- if c == '~' then
1241
- self.yacc_value = t("!~")
1242
- return :tNMATCH
1243
- end
1244
- src.unread(c)
1245
- self.yacc_value = t("!")
1246
- return :tBANG
1247
- when '=' then
1248
- # documentation nodes - FIX: cruby much cleaner w/ lookahead
1249
- if src.was_begin_of_line and src.match_string "begin" then
1250
- self.token_buffer.clear
1251
- self.token_buffer << "begin"
1252
- c = src.read
1253
-
1254
- if c =~ /\s/ then
1255
- # In case last next was the newline.
1256
- src.unread(c)
1257
-
1258
- loop do
1259
- c = src.read
1260
- token_buffer << c
1261
-
1262
- # If a line is followed by a blank line put it back.
1263
- while c == "\n"
1264
- c = src.read
1265
- token_buffer << c
1266
- end
1267
-
1268
- if c == RubyLexer::EOF then
1269
- raise SyntaxError, "embedded document meets end of file"
1270
- end
1271
-
1272
- next unless c == '='
1273
-
1274
- if src.was_begin_of_line && src.match_string("end") then
1275
- token_buffer << "end"
1276
- token_buffer << src.read_line
1277
- src.unread "\n"
1278
- break
1279
- end
698
+ self.yacc_value = ","
699
+ return :tCOMMA
700
+ elsif src.scan(/\(/) then
701
+ result = :tLPAREN2
702
+ self.command_start = true
703
+ if lex_state == :expr_beg || lex_state == :expr_mid then
704
+ result = :tLPAREN
705
+ elsif space_seen then
706
+ if lex_state == :expr_cmdarg then
707
+ result = :tLPAREN_ARG
708
+ elsif lex_state == :expr_arg then
709
+ warning("don't put space before argument parentheses")
710
+ result = :tLPAREN2
1280
711
  end
1281
-
1282
- # parser_support.result.add_comment(Node.comment(token_buffer.join))
1283
- next
1284
712
  end
1285
- src.unread(c)
1286
- end
1287
713
 
714
+ self.expr_beg_push "("
1288
715
 
1289
- if lex_state == :expr_fname || lex_state == :expr_dot then
1290
- self.lex_state = :expr_arg
1291
- else
1292
- self.lex_state = :expr_beg
1293
- end
1294
-
1295
- c = src.read
1296
- if c == '=' then
1297
- c = src.read
1298
- if c == '=' then
1299
- self.yacc_value = t("===")
716
+ return result
717
+ elsif src.check(/\=/) then
718
+ if src.scan(/\=\=\=/) then
719
+ self.fix_arg_lex_state
720
+ self.yacc_value = "==="
1300
721
  return :tEQQ
1301
- end
1302
- src.unread(c)
1303
- self.yacc_value = t("==")
1304
- return :tEQ
1305
- end
1306
- if c == '~' then
1307
- self.yacc_value = t("=~")
1308
- return :tMATCH
1309
- elsif c == '>' then
1310
- self.yacc_value = t("=>")
1311
- return :tASSOC
1312
- end
1313
- src.unread(c)
1314
- self.yacc_value = t("=")
1315
- return '='
1316
- when '<' then
1317
- c = src.read
1318
- if (c == '<' &&
1319
- lex_state != :expr_end &&
1320
- lex_state != :expr_dot &&
1321
- lex_state != :expr_endarg &&
1322
- lex_state != :expr_class &&
1323
- (!lex_state.is_argument || space_seen)) then
1324
- tok = self.heredoc_identifier
1325
- return tok unless tok == 0
1326
- end
1327
- if lex_state == :expr_fname || lex_state == :expr_dot then
1328
- self.lex_state = :expr_arg
1329
- else
1330
- self.lex_state = :expr_beg
1331
- end
1332
- if c == '=' then
1333
- if (c = src.read) == '>' then
1334
- self.yacc_value = t("<=>")
1335
- return :tCMP
1336
- end
1337
- src.unread c
1338
- self.yacc_value = t("<=")
1339
- return :tLEQ
1340
- end
1341
- if c == '<' then
1342
- if (c = src.read) == '=' then
1343
- self.lex_state = :expr_beg
1344
- self.yacc_value = t("\<\<")
1345
- return :tOP_ASGN
1346
- end
1347
- src.unread(c)
1348
- self.yacc_value = t("<<")
1349
- return :tLSHFT
1350
- end
1351
- self.yacc_value = t("<")
1352
- src.unread(c)
1353
- return :tLT
1354
- when '>' then
1355
- if lex_state == :expr_fname || lex_state == :expr_dot then
1356
- self.lex_state = :expr_arg
1357
- else
1358
- self.lex_state = :expr_beg
1359
- end
722
+ elsif src.scan(/\=\=/) then
723
+ self.fix_arg_lex_state
724
+ self.yacc_value = "=="
725
+ return :tEQ
726
+ elsif src.scan(/\=~/) then
727
+ self.fix_arg_lex_state
728
+ self.yacc_value = "=~"
729
+ return :tMATCH
730
+ elsif src.scan(/\=>/) then
731
+ self.fix_arg_lex_state
732
+ self.yacc_value = "=>"
733
+ return :tASSOC
734
+ elsif src.scan(/\=/) then
735
+ if src.was_begin_of_line and src.scan(/begin(?=\s)/) then
736
+ @comments << '=' << src.matched
737
+
738
+ unless src.scan(/.*?\n=end\s*(\n|\z)/m) then
739
+ @comments.clear
740
+ rb_compile_error("embedded document meets end of file")
741
+ end
1360
742
 
1361
- if (c = src.read) == '=' then
1362
- self.yacc_value = t(">=")
1363
- return :tGEQ
1364
- end
1365
- if c == '>' then
1366
- if (c = src.read) == '=' then
1367
- self.lex_state = :expr_beg
1368
- self.yacc_value = t(">>")
1369
- return :tOP_ASGN
1370
- end
1371
- src.unread c
1372
- self.yacc_value = t(">>")
1373
- return :tRSHFT
1374
- end
1375
- src.unread c
1376
- self.yacc_value = t(">")
1377
- return :tGT
1378
- when '"' then
1379
- self.lex_strterm = s(:strterm, STR_DQUOTE, '"', "\0") # TODO: question this
1380
- self.yacc_value = t("\"")
1381
- return :tSTRING_BEG
1382
- when '`' then
1383
- self.yacc_value = t("`")
1384
- if lex_state == :expr_fname then
1385
- self.lex_state = :expr_end
1386
- return :tBACK_REF2
1387
- end
1388
- if lex_state == :expr_dot then
1389
- if command_state then
1390
- self.lex_state = :expr_cmdarg
1391
- else
1392
- self.lex_state = :expr_arg
1393
- end
1394
- return :tBACK_REF2
1395
- end
1396
- self.lex_strterm = s(:strterm, STR_XQUOTE, '`', "\0")
1397
- return :tXSTRING_BEG
1398
- when "\'" then
1399
- self.lex_strterm = s(:strterm, STR_SQUOTE, "\'", "\0")
1400
- self.yacc_value = t("'")
1401
- return :tSTRING_BEG
1402
- when '?' then
1403
- if lex_state == :expr_end || lex_state == :expr_endarg then
1404
- self.lex_state = :expr_beg
1405
- self.yacc_value = t("?")
1406
- return '?'
1407
- end
743
+ @comments << src.matched
1408
744
 
1409
- c = src.read
1410
-
1411
- raise SyntaxError, "incomplete character syntax" if c == RubyLexer::EOF
1412
-
1413
- if c =~ /\s/ then
1414
- if !lex_state.is_argument then
1415
- c2 = 0
1416
- c2 = case c
1417
- when ' ' then
1418
- 's'
1419
- when "\n" then
1420
- 'n'
1421
- when "\t" then
1422
- 't'
1423
- when "\v" then
1424
- 'v'
1425
- when "\r" then
1426
- 'r'
1427
- when "\f" then
1428
- 'f'
1429
- end
1430
-
1431
- if c2 != 0 then
1432
- warning("invalid character syntax; use ?\\" + c2)
745
+ next
746
+ else
747
+ self.fix_arg_lex_state
748
+ self.yacc_value = '='
749
+ return :tEQL
1433
750
  end
1434
751
  end
1435
-
1436
- # ternary
1437
- src.unread c
1438
- self.lex_state = :expr_beg
1439
- self.yacc_value = t("?")
1440
- return '?'
1441
- # elsif ismbchar(c) then # ternary, also
1442
- # rb_warn("multibyte character literal not supported yet; use ?\\" + c)
1443
- # support.unread c
1444
- # self.lex_state = :expr_beg
1445
- # return '?'
1446
- elsif c =~ /\w/ && ! src.peek("\n") && self.is_next_identchar then
1447
- # ternary, also
1448
- src.unread c
1449
- self.lex_state = :expr_beg
1450
- self.yacc_value = t("?")
1451
- return '?'
1452
- elsif c == "\\" then
1453
- c = self.read_escape
1454
- end
1455
- c[0] &= 0xff
1456
- self.lex_state = :expr_end
1457
- self.yacc_value = c[0]
1458
- return :tINTEGER
1459
- when '&' then
1460
- if (c = src.read) == '&' then
1461
- self.lex_state = :expr_beg
1462
- if (c = src.read) == '=' then
1463
- self.yacc_value = t("&&")
1464
- self.lex_state = :expr_beg
1465
- return :tOP_ASGN
1466
- end
1467
- src.unread c
1468
- self.yacc_value = t("&&")
1469
- return :tANDOP
1470
- elsif c == '=' then
1471
- self.yacc_value = t("&")
1472
- self.lex_state = :expr_beg
1473
- return :tOP_ASGN
1474
- end
1475
-
1476
- src.unread c
1477
-
1478
- if lex_state.is_argument && space_seen && c !~ /\s/ then
1479
- warning("`&' interpreted as argument prefix")
1480
- c = :tAMPER
1481
- elsif lex_state == :expr_beg || lex_state == :expr_mid then
1482
- c = :tAMPER
1483
- else
1484
- c = :tAMPER2
1485
- end
1486
-
1487
- if lex_state == :expr_fname || lex_state == :expr_dot then
1488
- self.lex_state = :expr_arg
1489
- else
1490
- self.lex_state = :expr_beg
1491
- end
1492
- self.yacc_value = t("&")
1493
- return c
1494
- when '|' then
1495
- if (c = src.read) == '|' then
1496
- self.lex_state = :expr_beg
1497
- if (c = src.read) == '=' then
1498
- self.lex_state = :expr_beg
1499
- self.yacc_value = t("||")
1500
- return :tOP_ASGN
1501
- end
1502
- src.unread c
1503
- self.yacc_value = t("||")
1504
- return :tOROP
1505
- end
1506
- if c == '=' then
1507
- self.lex_state = :expr_beg
1508
- self.yacc_value = t("|")
1509
- return :tOP_ASGN
1510
- end
1511
- if lex_state == :expr_fname || lex_state == :expr_dot then
1512
- self.lex_state = :expr_arg
1513
- else
1514
- self.lex_state = :expr_beg
1515
- end
1516
- src.unread c
1517
- self.yacc_value = t("|")
1518
- return :tPIPE
1519
- when '+' then
1520
- c = src.read
1521
- if lex_state == :expr_fname || lex_state == :expr_dot then
1522
- self.lex_state = :expr_arg
1523
- if c == '@' then
1524
- self.yacc_value = t("+@")
1525
- return :tUPLUS
1526
- end
1527
- src.unread c
1528
- self.yacc_value = t("+")
1529
- return :tPLUS
1530
- end
1531
-
1532
- if c == '=' then
1533
- self.lex_state = :expr_beg
1534
- self.yacc_value = t("+")
1535
- return :tOP_ASGN
1536
- end
1537
-
1538
- if (lex_state == :expr_beg || lex_state == :expr_mid ||
1539
- (lex_state.is_argument && space_seen && c !~ /\s/)) then
1540
- arg_ambiguous if lex_state.is_argument
1541
- self.lex_state = :expr_beg
1542
- src.unread c
1543
- if c =~ /\d/ then
1544
- c = '+'
1545
- return parse_number(c)
1546
- end
1547
- self.yacc_value = t("+")
1548
- return :tUPLUS
1549
- end
1550
- self.lex_state = :expr_beg
1551
- src.unread c
1552
- self.yacc_value = t("+")
1553
- return :tPLUS
1554
- when '-' then
1555
- c = src.read
1556
- if lex_state == :expr_fname || lex_state == :expr_dot then
1557
- self.lex_state = :expr_arg
1558
- if c == '@' then
1559
- self.yacc_value = t("-@")
1560
- return :tUMINUS
1561
- end
1562
- src.unread c
1563
- self.yacc_value = t("-")
1564
- return :tMINUS
1565
- end
1566
- if c == '=' then
1567
- self.lex_state = :expr_beg
1568
- self.yacc_value = t("-")
1569
- return :tOP_ASGN
1570
- end
1571
- if (lex_state == :expr_beg || lex_state == :expr_mid ||
1572
- (lex_state.is_argument && space_seen && c !~ /\s/)) then
1573
- arg_ambiguous if lex_state.is_argument
1574
- self.lex_state = :expr_beg
1575
- src.unread c
1576
- self.yacc_value = t("-")
1577
- if c =~ /\d/ then
1578
- return :tUMINUS_NUM
1579
- end
1580
- return :tUMINUS
1581
- end
1582
- self.lex_state = :expr_beg
1583
- src.unread c
1584
- self.yacc_value = t("-")
1585
- return :tMINUS
1586
- when '.' then
1587
- self.lex_state = :expr_beg
1588
- if (c = src.read) == '.' then
1589
- if (c = src.read) == '.' then
1590
- self.yacc_value = t("...")
1591
- return :tDOT3
1592
- end
1593
- src.unread c
1594
- self.yacc_value = t("..")
1595
- return :tDOT2
1596
- end
1597
- src.unread c
1598
- if c =~ /\d/ then
1599
- raise SyntaxError, "no .<digit> floating literal anymore put 0 before dot"
1600
- end
1601
- self.lex_state = :expr_dot
1602
- self.yacc_value = t(".")
1603
- return :tDOT
1604
- when /[0-9]/ then
1605
- return parse_number(c)
1606
- when ')' then # REFACTOR: omg this is lame... next 3 are all the same
1607
- cond.lexpop
1608
- cmdarg.lexpop
1609
- self.lex_state = :expr_end
1610
- self.yacc_value = t(")")
1611
- return :tRPAREN
1612
- when ']' then
1613
- cond.lexpop
1614
- cmdarg.lexpop
1615
- self.lex_state = :expr_end
1616
- self.yacc_value = t("]")
1617
- return :tRBRACK
1618
- when '}' then
1619
- cond.lexpop
1620
- cmdarg.lexpop
1621
- self.lex_state = :expr_end
1622
- self.yacc_value = t("end")
1623
- return :tRCURLY
1624
- when ':' then
1625
- c = src.read
1626
- if c == ':' then
752
+ elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/o) then
753
+ self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
754
+ self.lex_state = :expr_end
755
+ return :tSTRING
756
+ elsif src.scan(/\"/) then # FALLBACK
757
+ self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\0"] # TODO: question this
758
+ self.yacc_value = "\""
759
+ return :tSTRING_BEG
760
+ elsif src.scan(/\@\@?\w*/) then
761
+ self.token = src.matched
762
+
763
+ rb_compile_error "`#{token}` is not allowed as a variable name" if
764
+ token =~ /\@\d/
765
+
766
+ return process_token(command_state)
767
+ elsif src.scan(/\:\:/) then
1627
768
  if (lex_state == :expr_beg ||
1628
769
  lex_state == :expr_mid ||
1629
770
  lex_state == :expr_class ||
1630
771
  (lex_state.is_argument && space_seen)) then
1631
772
  self.lex_state = :expr_beg
1632
- self.yacc_value = t("::")
773
+ self.yacc_value = "::"
1633
774
  return :tCOLON3
1634
775
  end
1635
776
 
1636
- self.lex_state = :expr_dot
1637
- self.yacc_value = t(":")
1638
- return :tCOLON2
1639
- end
1640
-
1641
- if lex_state == :expr_end || lex_state == :expr_endarg || c =~ /\s/ then
1642
- src.unread c
1643
- self.lex_state = :expr_beg
1644
- self.yacc_value = t(":")
1645
- return ':'
1646
- end
1647
-
1648
- case c
1649
- when "\'" then
1650
- self.lex_strterm = s(:strterm, STR_SSYM, c, "\0")
1651
- when '"' then
1652
- self.lex_strterm = s(:strterm, STR_DSYM, c, "\0")
1653
- else
1654
- src.unread c
1655
- end
1656
-
1657
- self.lex_state = :expr_fname
1658
- self.yacc_value = t(":")
1659
- return :tSYMBEG
1660
- when '/' then
1661
- if lex_state == :expr_beg || lex_state == :expr_mid then
1662
- self.lex_strterm = s(:strterm, STR_REGEXP, '/', "\0")
1663
- self.yacc_value = t("/")
1664
- return :tREGEXP_BEG
1665
- end
1666
-
1667
- if (c = src.read) == '=' then
1668
- self.yacc_value = t("/")
1669
- self.lex_state = :expr_beg
1670
- return :tOP_ASGN
1671
- end
1672
-
1673
- src.unread c
1674
-
1675
- if lex_state.is_argument && space_seen then
1676
- unless c =~ /\s/ then
1677
- arg_ambiguous
1678
- self.lex_strterm = s(:strterm, STR_REGEXP, '/', "\0")
1679
- self.yacc_value = t("/")
1680
- return :tREGEXP_BEG
1681
- end
1682
- end
1683
-
1684
- self.lex_state = if (lex_state == :expr_fname ||
1685
- lex_state == :expr_dot) then
1686
- :expr_arg
1687
- else
1688
- :expr_beg
1689
- end
1690
-
1691
- self.yacc_value = t("/")
1692
- return :tDIVIDE
1693
- when '^' then
1694
- if (c = src.read) == '=' then
1695
- self.lex_state = :expr_beg
1696
- self.yacc_value = t("^")
1697
- return :tOP_ASGN
1698
- end
1699
- if lex_state == :expr_fname || self.lex_state == :expr_dot then
1700
- self.lex_state = :expr_arg
1701
- else
1702
- self.lex_state = :expr_beg
1703
- end
1704
- src.unread c
1705
- self.yacc_value = t("^")
1706
- return :tCARET
1707
- when ';' then
1708
- self.command_start = true
1709
- self.lex_state = :expr_beg
1710
- self.yacc_value = t(";")
1711
- return c
1712
- when ',' then
1713
- self.lex_state = :expr_beg
1714
- self.yacc_value = t(",")
1715
- return c
1716
- when '~' then
1717
- if lex_state == :expr_fname || lex_state == :expr_dot then
1718
- if (c = src.read) != '@' then
1719
- src.unread c
1720
- end
1721
- end
1722
- if lex_state == :expr_fname || lex_state == :expr_dot then
1723
- self.lex_state = :expr_arg
1724
- else
1725
- self.lex_state = :expr_beg
1726
- end
1727
- self.yacc_value = t("~")
1728
- return :tTILDE
1729
- when '(' then
1730
- c = :tLPAREN2
1731
- self.command_start = true
1732
- if lex_state == :expr_beg || lex_state == :expr_mid then
1733
- c = :tLPAREN
1734
- elsif space_seen then
1735
- if lex_state == :expr_cmdarg then
1736
- c = :tLPAREN_ARG
1737
- elsif lex_state == :expr_arg then
1738
- warning("don't put space before argument parentheses")
1739
- c = :tLPAREN2
1740
- end
1741
- end
1742
- cond.push false
1743
- cmdarg.push false
1744
- self.lex_state = :expr_beg
1745
- self.yacc_value = t("(")
1746
- return c
1747
- when '[' then
1748
- if lex_state == :expr_fname || lex_state == :expr_dot then
1749
- self.lex_state = :expr_arg
1750
- if (c = src.read) == ']' then
1751
- if src.peek('=') then
1752
- c = src.read
1753
- self.yacc_value = t("[]=")
1754
- return :tASET
1755
- end
1756
- self.yacc_value = t("[]")
1757
- return :tAREF
1758
- end
1759
- src.unread c
1760
- self.yacc_value = t("[")
1761
- return '['
1762
- elsif lex_state == :expr_beg || lex_state == :expr_mid then
1763
- c = :tLBRACK
1764
- elsif lex_state.is_argument && space_seen then
1765
- c = :tLBRACK
1766
- end
1767
- self.lex_state = :expr_beg
1768
- cond.push false
1769
- cmdarg.push false
1770
- self.yacc_value = t("[")
1771
- return c
1772
- when '{' then
1773
- c = :tLCURLY
1774
-
1775
- if lex_state.is_argument || lex_state == :expr_end then
1776
- c = :tLCURLY # block (primary)
1777
- elsif lex_state == :expr_endarg then
1778
- c = :tLBRACE_ARG # block (expr)
1779
- else
1780
- c = :tLBRACE # hash
1781
- end
1782
- cond.push false
1783
- cmdarg.push false
1784
- self.lex_state = :expr_beg
1785
- self.yacc_value = t("{")
1786
- return c
1787
- when "\\" then
1788
- c = src.read
1789
- if c == "\n" then
1790
- space_seen = true
1791
- next # skip \\n
1792
- end
1793
- src.unread c
1794
- self.yacc_value = t("\\")
1795
- return "\\"
1796
- when '%' then
1797
- if lex_state == :expr_beg || lex_state == :expr_mid then
1798
- return parse_quote(src.read)
1799
- end
1800
-
1801
- c = src.read
1802
- if c == '=' then
1803
- self.lex_state = :expr_beg
1804
- self.yacc_value = t("%")
1805
- return :tOP_ASGN
1806
- end
1807
-
1808
- return parse_quote(c) if lex_state.is_argument && space_seen && c !~ /\s/
1809
-
1810
- self.lex_state = case lex_state
1811
- when :expr_fname, :expr_dot then
1812
- :expr_arg
1813
- else
1814
- :expr_beg
1815
- end
1816
-
1817
- src.unread c
1818
- self.yacc_value = t("%")
1819
-
1820
- return :tPERCENT
1821
- when '$' then
1822
- last_state = lex_state
1823
- self.lex_state = :expr_end
1824
- token_buffer.clear
1825
- c = src.read
1826
- case c
1827
- when '_' then # $_: last read line string
1828
- c = src.read
1829
-
1830
- token_buffer << '$'
1831
- token_buffer << '_'
1832
-
1833
- unless c =~ /\w/ then
1834
- src.unread c
1835
- self.yacc_value = t(token_buffer.join)
1836
- return :tGVAR
1837
- end
1838
- when /[~*$?!@\/\\;,.=:<>\"]/ then
1839
- token_buffer << '$'
1840
- token_buffer << c
1841
- self.yacc_value = t(token_buffer.join)
1842
- return :tGVAR
1843
- when '-' then
1844
- token_buffer << '$'
1845
- token_buffer << c
1846
- c = src.read
1847
- if c =~ /\w/ then
1848
- token_buffer << c
1849
- else
1850
- src.unread c
1851
- end
1852
- self.yacc_value = t(token_buffer.join)
1853
- # xxx shouldn't check if valid option variable
1854
- return :tGVAR
1855
- when /[\&\`\'\+]/ then
1856
- # Explicit reference to these vars as symbols...
1857
- if last_state == :expr_fname then
1858
- token_buffer << '$'
1859
- token_buffer << c
1860
- self.yacc_value = t(token_buffer.join)
1861
- return :tGVAR
1862
- end
1863
-
1864
- self.yacc_value = s(:back_ref, c.to_sym)
1865
- return :tBACK_REF
1866
- when /[1-9]/ then
1867
- token_buffer << '$'
1868
- begin
1869
- token_buffer << c
1870
- c = src.read
1871
- end while c =~ /\d/
1872
- src.unread c
1873
- if last_state == :expr_fname then
1874
- self.yacc_value = t(token_buffer.join)
1875
- return :tGVAR
1876
- else
1877
- self.yacc_value = s(:nth_ref, token_buffer.join[1..-1].to_i)
1878
- return :tNTH_REF
1879
- end
1880
- when '0' then
1881
- token_buffer << '$'
1882
- else
1883
- unless c =~ /\w/ then
1884
- src.unread c
1885
- self.yacc_value = t("$")
1886
- return '$'
1887
- end
1888
- token_buffer << '$'
1889
- end
1890
- when '@' then
1891
- c = src.read
1892
- token_buffer.clear
1893
- token_buffer << '@'
1894
- if c == '@' then
1895
- token_buffer << '@'
1896
- c = src.read
1897
- end
1898
- if c =~ /\d/ then
1899
- if token_buffer.length == 1 then
1900
- raise SyntaxError, "`@" + c + "' is not allowed as an instance variable name"
1901
- else
1902
- raise SyntaxError, "`@@" + c + "' is not allowed as a class variable name"
1903
- end
1904
- end
1905
- unless c =~ /\w/ then
1906
- src.unread c
1907
- self.yacc_value = t("@")
1908
- return '@'
1909
- end
1910
- when '_' then
1911
- if src.was_begin_of_line && src.match_string("_END__\n", false) then
1912
- self.end_seen = true
1913
- return RubyLexer::EOF
1914
- end
1915
- token_buffer.clear
1916
- else
1917
- unless c =~ /\w/ then
1918
- raise SyntaxError, "Invalid char '#{c.inspect}' in expression"
1919
- end
1920
- token_buffer.clear
1921
- end
1922
-
1923
- begin
1924
- token_buffer << c
1925
- # if ismbchar(c) then
1926
- # len = mbclen(c) - 1
1927
- # (0..len).each do
1928
- # c = src.read;
1929
- # token_buffer << c
1930
- # end
1931
- # end
1932
- c = src.read
1933
- end while c =~ /\w/
1934
-
1935
- if c =~ /\!|\?/ && token_buffer[0] =~ /\w/ && src.peek != '=' then
1936
- token_buffer << c
1937
- else
1938
- src.unread c
1939
- end
1940
-
1941
- result = nil
1942
- last_state = lex_state
1943
-
1944
- case token_buffer[0]
1945
- when '$' then
1946
- self.lex_state = :expr_end
1947
- result = :tGVAR
1948
- when '@' then
1949
- self.lex_state = :expr_end
1950
- if token_buffer[1] == '@' then
1951
- result = :tCVAR
1952
- else
1953
- result = :tIVAR
1954
- end
1955
- else
1956
- if token_buffer[-1] =~ /[!?]/ then
1957
- result = :tFID
1958
- else
1959
- if lex_state == :expr_fname then
1960
- if (c = src.read) == '=' then
1961
- c2 = src.read
1962
-
1963
- if c2 != '~' && c2 != '>' && (c2 != '=' || (c2 == "\n" && src.peek('>'))) then
1964
- result = :tIDENTIFIER
1965
- token_buffer << c
1966
- src.unread c2
1967
- else
1968
- src.unread c2
1969
- src.unread c
1970
- end
1971
- else
1972
- src.unread c
1973
- end
1974
- end
1975
- if result.nil? && token_buffer[0] =~ /[A-Z]/ then
1976
- result = :tCONSTANT
1977
- else
1978
- result = :tIDENTIFIER
1979
- end
1980
- end
1981
-
1982
- unless lex_state == :expr_dot then
1983
- # See if it is a reserved word.
1984
- keyword = Keyword.keyword(token_buffer.join, token_buffer.length)
1985
-
1986
- unless keyword.nil? then
1987
- state = lex_state
1988
- self.lex_state = keyword.state
1989
-
1990
- if state == :expr_fname then
1991
- self.yacc_value = t(keyword.name)
1992
- else
1993
- self.yacc_value = t(token_buffer.join)
1994
- end
1995
-
1996
- if keyword.id0 == :kDO then
1997
- self.command_start = true
1998
- return :kDO_COND if cond.is_in_state
1999
- return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
2000
- return :kDO_BLOCK if state == :expr_endarg
2001
- return :kDO
2002
- end
2003
-
2004
- return keyword.id0 if state == :expr_beg
2005
-
2006
- self.lex_state = :expr_beg unless keyword.id0 == keyword.id1
2007
-
2008
- return keyword.id1
2009
- end
2010
- end
2011
-
2012
- if (lex_state == :expr_beg ||
2013
- lex_state == :expr_mid ||
2014
- lex_state == :expr_dot ||
2015
- lex_state == :expr_arg ||
2016
- lex_state == :expr_cmdarg) then
2017
- if command_state then
2018
- self.lex_state = :expr_cmdarg
2019
- else
2020
- self.lex_state = :expr_arg
2021
- end
2022
- else
2023
- self.lex_state = :expr_end
2024
- end
2025
- end
2026
-
2027
-
2028
- temp_val = token_buffer.join
2029
-
2030
- # Lame: parsing logic made it into lexer in ruby...So we
2031
- # are emulating
2032
- # FIXME: I believe this is much simpler now...
2033
- # HACK
2034
- # scope = parser_support.current_scope
2035
- # if (IdUtil.var_type(temp_val) == IdUtil.LOCAL_VAR &&
2036
- # last_state != :expr_dot &&
2037
- # (BlockStaticScope === scope && (scope.is_defined(temp_val) >= 0)) ||
2038
- # (scope.local_scope.is_defined(temp_val) >= 0)) then
2039
- # self.lex_state = :expr_end
2040
- # end
2041
-
2042
- self.yacc_value = t(temp_val)
2043
-
2044
- return result
2045
- end
2046
- end
2047
-
2048
- ##
2049
- # Parse a number from the input stream.
2050
- #
2051
- # @param c The first character of the number.
2052
- # @return A int constant wich represents a token.
2053
-
2054
- def parse_number c
2055
- self.lex_state = :expr_end
2056
-
2057
- token_buffer.clear
2058
-
2059
- if c == '-' then
2060
- token_buffer << c
2061
- c = src.read
2062
- elsif c == '+' then
2063
- # We don't append '+' since Java number parser gets confused FIX
2064
- c = src.read
2065
- end
2066
-
2067
- nondigit = "\0"
777
+ self.lex_state = :expr_dot
778
+ self.yacc_value = "::"
779
+ return :tCOLON2
780
+ elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then
781
+ self.yacc_value = src[1]
782
+ self.lex_state = :expr_end
783
+ return :tSYMBOL
784
+ elsif src.scan(/\:/) then
785
+ # ?: / then / when
786
+ if (lex_state == :expr_end || lex_state == :expr_endarg||
787
+ src.check(/\s/)) then
788
+ self.lex_state = :expr_beg
789
+ self.yacc_value = ":"
790
+ return :tCOLON
791
+ end
2068
792
 
2069
- if c == '0' then
2070
- start_len = token_buffer.length
2071
- c = src.read
793
+ case
794
+ when src.scan(/\'/) then
795
+ self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\0"]
796
+ when src.scan(/\"/) then
797
+ self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\0"]
798
+ end
2072
799
 
2073
- case c
2074
- when /x/i then # hexadecimal
2075
- c = src.read
800
+ self.lex_state = :expr_fname
801
+ self.yacc_value = ":"
802
+ return :tSYMBEG
803
+ elsif src.check(/[0-9]/) then
804
+ return parse_number
805
+ elsif src.scan(/\[/) then
806
+ result = src.matched
2076
807
 
2077
- if c =~ /[a-f0-9]/i then
2078
- loop do
2079
- if c == '_' then
2080
- break unless nondigit == "\0"
2081
- nondigit = c
2082
- elsif c =~ /[a-f0-9]/i then
2083
- nondigit = "\0"
2084
- token_buffer << c
808
+ if lex_state == :expr_fname || lex_state == :expr_dot then
809
+ self.lex_state = :expr_arg
810
+ case
811
+ when src.scan(/\]\=/) then
812
+ self.yacc_value = "[]="
813
+ return :tASET
814
+ when src.scan(/\]/) then
815
+ self.yacc_value = "[]"
816
+ return :tAREF
2085
817
  else
2086
- break
818
+ rb_compile_error "unexpected '['"
2087
819
  end
2088
- c = src.read
820
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
821
+ result = :tLBRACK
822
+ elsif lex_state.is_argument && space_seen then
823
+ result = :tLBRACK
2089
824
  end
2090
- end
2091
825
 
2092
- src.unread c
826
+ self.expr_beg_push "["
2093
827
 
2094
- if token_buffer.length == start_len then
2095
- raise SyntaxError, "Hexadecimal number without hex-digits."
2096
- elsif nondigit != "\0" then
2097
- raise SyntaxError, "Trailing '_' in number."
2098
- end
2099
- self.yacc_value = token_buffer.join.to_i(16)
2100
- return :tINTEGER
2101
- when /b/i # binary
2102
- c = src.read
2103
- if c == '0' or c == '1' then
2104
- loop do
2105
- if c == '_' then
2106
- break if nondigit != "\0"
2107
- nondigit = c
2108
- elsif c == '0' or c == '1' then
2109
- nondigit = "\0"
2110
- token_buffer << c
828
+ return result
829
+ elsif src.scan(/\'(\\.|[^\'])*\'/) then
830
+ self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
831
+ self.lex_state = :expr_end
832
+ return :tSTRING
833
+ elsif src.check(/\|/) then
834
+ if src.scan(/\|\|\=/) then
835
+ self.lex_state = :expr_beg
836
+ self.yacc_value = "||"
837
+ return :tOP_ASGN
838
+ elsif src.scan(/\|\|/) then
839
+ self.lex_state = :expr_beg
840
+ self.yacc_value = "||"
841
+ return :tOROP
842
+ elsif src.scan(/\|\=/) then
843
+ self.lex_state = :expr_beg
844
+ self.yacc_value = "|"
845
+ return :tOP_ASGN
846
+ elsif src.scan(/\|/) then
847
+ self.fix_arg_lex_state
848
+ self.yacc_value = "|"
849
+ return :tPIPE
850
+ end
851
+ elsif src.scan(/\{/) then
852
+ result = if lex_state.is_argument || lex_state == :expr_end then
853
+ :tLCURLY # block (primary)
854
+ elsif lex_state == :expr_endarg then
855
+ :tLBRACE_ARG # block (expr)
856
+ else
857
+ :tLBRACE # hash
858
+ end
859
+
860
+ self.expr_beg_push "{"
861
+
862
+ return result
863
+ elsif src.scan(/[+-]/) then
864
+ sign = src.matched
865
+ utype, type = if sign == "+" then
866
+ [:tUPLUS, :tPLUS]
867
+ else
868
+ [:tUMINUS, :tMINUS]
869
+ end
870
+
871
+ if lex_state == :expr_fname || lex_state == :expr_dot then
872
+ self.lex_state = :expr_arg
873
+ if src.scan(/@/) then
874
+ self.yacc_value = "#{sign}@"
875
+ return utype
2111
876
  else
2112
- break
877
+ self.yacc_value = sign
878
+ return type
2113
879
  end
2114
- c = src.read
2115
880
  end
2116
- end
2117
881
 
2118
- src.unread c
882
+ if src.scan(/\=/) then
883
+ self.lex_state = :expr_beg
884
+ self.yacc_value = sign
885
+ return :tOP_ASGN
886
+ end
2119
887
 
2120
- if token_buffer.length == start_len then
2121
- raise SyntaxError, "Binary number without digits."
2122
- elsif nondigit != "\0" then
2123
- raise SyntaxError, "Trailing '_' in number."
2124
- end
2125
- self.yacc_value = token_buffer.join.to_i(2)
2126
- return :tINTEGER
2127
- when /d/i then # decimal
2128
- c = src.read
2129
- if c =~ /\d/ then
2130
- loop do
2131
- if c == '_' then
2132
- break if nondigit != "\0"
2133
- nondigit = c
2134
- elsif c =~ /\d/ then
2135
- nondigit = "\0"
2136
- token_buffer << c
2137
- else
2138
- break
888
+ if (lex_state == :expr_beg || lex_state == :expr_mid ||
889
+ (lex_state.is_argument && space_seen && !src.check(/\s/))) then
890
+ if lex_state.is_argument then
891
+ arg_ambiguous
2139
892
  end
2140
- c = src.read
2141
- end
2142
- end
2143
893
 
2144
- src.unread c
894
+ self.lex_state = :expr_beg
895
+ self.yacc_value = sign
2145
896
 
2146
- if token_buffer.length == start_len then
2147
- raise SyntaxError, "Binary number without digits."
2148
- elsif nondigit != "\0" then
2149
- raise SyntaxError, "Trailing '_' in number."
2150
- end
897
+ if src.check(/\d/) then
898
+ if utype == :tUPLUS then
899
+ return self.parse_number
900
+ else
901
+ return :tUMINUS_NUM
902
+ end
903
+ end
2151
904
 
2152
- self.yacc_value = token_buffer.join.to_i(10)
2153
- return :tINTEGER
2154
- when /o/i, /[0-7_]/ then # octal
2155
- c = src.read if c =~ /o/i # prefixed octal - kill me
2156
- loop do
2157
- if c == '_' then
2158
- break if (nondigit != "\0")
2159
- nondigit = c
2160
- elsif c >= '0' && c <= '7' then
2161
- nondigit = "\0"
2162
- token_buffer << c
2163
- else
2164
- break
905
+ return utype
2165
906
  end
2166
- c = src.read
2167
- end
2168
- if token_buffer.length > start_len then
2169
- src.unread c
2170
907
 
2171
- if nondigit != "\0" then
2172
- raise SyntaxError, "Trailing '_' in number."
908
+ self.lex_state = :expr_beg
909
+ self.yacc_value = sign
910
+ return type
911
+ elsif src.check(/\*/) then
912
+ if src.scan(/\*\*=/) then
913
+ self.lex_state = :expr_beg
914
+ self.yacc_value = "**"
915
+ return :tOP_ASGN
916
+ elsif src.scan(/\*\*/) then
917
+ self.yacc_value = "**"
918
+ self.fix_arg_lex_state
919
+ return :tPOW
920
+ elsif src.scan(/\*\=/) then
921
+ self.lex_state = :expr_beg
922
+ self.yacc_value = "*"
923
+ return :tOP_ASGN
924
+ elsif src.scan(/\*/) then
925
+ result = if lex_state.is_argument && space_seen && src.check(/\S/) then
926
+ warning("`*' interpreted as argument prefix")
927
+ :tSTAR
928
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
929
+ :tSTAR
930
+ else
931
+ :tSTAR2
932
+ end
933
+ self.yacc_value = "*"
934
+ self.fix_arg_lex_state
935
+
936
+ return result
2173
937
  end
2174
-
2175
- self.yacc_value = token_buffer.join.to_i(8)
2176
- return :tINTEGER
2177
- end
2178
- when /[89]/ then
2179
- raise SyntaxError, "Illegal octal digit."
2180
- when /[\.eE]/ then
2181
- token_buffer << '0'
2182
- else
2183
- src.unread c
2184
- self.yacc_value = 0
2185
- return :tINTEGER
2186
- end
2187
- end
2188
-
2189
- seen_point = false
2190
- seen_e = false
2191
-
2192
- loop do
2193
- case c
2194
- when /\d/ then
2195
- nondigit = "\0"
2196
- token_buffer << c
2197
- when '.' then
2198
- if nondigit != "\0" then
2199
- src.unread c
2200
- raise SyntaxError, "Trailing '_' in number."
2201
- elsif seen_point or seen_e then
2202
- src.unread c
2203
- return number_token(token_buffer.join, true, nondigit)
2204
- else
2205
- c2 = src.read
2206
- unless c2 =~ /\d/ then
2207
- src.unread c2
2208
- src.unread '.'
2209
- if c == '_' then
2210
- # Enebo: c can never be antrhign but '.'
2211
- # Why did I put this here?
2212
- else
2213
- self.yacc_value = token_buffer.join.to_i(10)
2214
- return :tINTEGER
938
+ elsif src.check(/\!/) then
939
+ if src.scan(/\!\=/) then
940
+ self.lex_state = :expr_beg
941
+ self.yacc_value = "!="
942
+ return :tNEQ
943
+ elsif src.scan(/\!~/) then
944
+ self.lex_state = :expr_beg
945
+ self.yacc_value = "!~"
946
+ return :tNMATCH
947
+ elsif src.scan(/\!/) then
948
+ self.lex_state = :expr_beg
949
+ self.yacc_value = "!"
950
+ return :tBANG
951
+ end
952
+ elsif src.check(/\</) then
953
+ if src.scan(/\<\=\>/) then
954
+ self.fix_arg_lex_state
955
+ self.yacc_value = "<=>"
956
+ return :tCMP
957
+ elsif src.scan(/\<\=/) then
958
+ self.fix_arg_lex_state
959
+ self.yacc_value = "<="
960
+ return :tLEQ
961
+ elsif src.scan(/\<\<\=/) then
962
+ self.fix_arg_lex_state
963
+ self.lex_state = :expr_beg
964
+ self.yacc_value = "\<\<"
965
+ return :tOP_ASGN
966
+ elsif src.scan(/\<\</) then
967
+ if (! [:expr_end, :expr_dot,
968
+ :expr_endarg, :expr_class].include?(lex_state) &&
969
+ (!lex_state.is_argument || space_seen)) then
970
+ tok = self.heredoc_identifier
971
+ if tok then
972
+ return tok
973
+ end
2215
974
  end
2216
- else
2217
- token_buffer << '.'
2218
- token_buffer << c2
2219
- seen_point = true
2220
- nondigit = "\0"
975
+
976
+ self.fix_arg_lex_state
977
+ self.yacc_value = "\<\<"
978
+ return :tLSHFT
979
+ elsif src.scan(/\</) then
980
+ self.fix_arg_lex_state
981
+ self.yacc_value = "<"
982
+ return :tLT
2221
983
  end
2222
- end
2223
- when /e/i then
2224
- if nondigit != "\0" then
2225
- raise SyntaxError, "Trailing '_' in number."
2226
- elsif seen_e then
2227
- src.unread c
2228
- return number_token(token_buffer.join, true, nondigit)
2229
- else
2230
- token_buffer << c
2231
- seen_e = true
2232
- nondigit = c
2233
- c = src.read
2234
- if c == '-' or c == '+' then
2235
- token_buffer << c
2236
- nondigit = c
2237
- else
2238
- src.unread c
984
+ elsif src.check(/\>/) then
985
+ if src.scan(/\>\=/) then
986
+ self.fix_arg_lex_state
987
+ self.yacc_value = ">="
988
+ return :tGEQ
989
+ elsif src.scan(/\>\>=/) then
990
+ self.fix_arg_lex_state
991
+ self.lex_state = :expr_beg
992
+ self.yacc_value = ">>"
993
+ return :tOP_ASGN
994
+ elsif src.scan(/\>\>/) then
995
+ self.fix_arg_lex_state
996
+ self.yacc_value = ">>"
997
+ return :tRSHFT
998
+ elsif src.scan(/\>/) then
999
+ self.fix_arg_lex_state
1000
+ self.yacc_value = ">"
1001
+ return :tGT
1002
+ end
1003
+ elsif src.scan(/\`/) then
1004
+ self.yacc_value = "`"
1005
+ case lex_state
1006
+ when :expr_fname then
1007
+ self.lex_state = :expr_end
1008
+ return :tBACK_REF2
1009
+ when :expr_dot then
1010
+ self.lex_state = if command_state then
1011
+ :expr_cmdarg
1012
+ else
1013
+ :expr_arg
1014
+ end
1015
+ return :tBACK_REF2
1016
+ end
1017
+ self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"]
1018
+ return :tXSTRING_BEG
1019
+ elsif src.scan(/\?/) then
1020
+ if lex_state == :expr_end || lex_state == :expr_endarg then
1021
+ self.lex_state = :expr_beg
1022
+ self.yacc_value = "?"
1023
+ return :tEH
2239
1024
  end
2240
- end
2241
- when '_' then # '_' in number just ignored
2242
- if nondigit != "\0" then
2243
- raise SyntaxError, "Trailing '_' in number."
2244
- end
2245
- nondigit = c
2246
- else
2247
- src.unread c
2248
- r = number_token(token_buffer.join, seen_e || seen_point, nondigit)
2249
- return r
2250
- end
2251
- c = src.read
2252
- end
2253
- end
2254
-
2255
- # TODO: remove me
2256
- def number_token(number, is_float, nondigit)
2257
- if nondigit != "\0" then
2258
- raise SyntaxError, "Trailing '_' in number."
2259
- end
2260
-
2261
- if is_float then
2262
- self.yacc_value = number.to_f
2263
- return :tFLOAT
2264
- end
2265
-
2266
- self.yacc_value = number.to_i
2267
- return :tINTEGER
2268
- end
2269
-
2270
- ############################################################
2271
- # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
2272
-
2273
- def tokadd s # HACK
2274
- self.token_buffer << s
2275
- end
2276
-
2277
- def warning s
2278
- # do nothing for now
2279
- end
2280
-
2281
- def rb_compile_error msg
2282
- raise msg
2283
- end
2284
-
2285
- def is_next_identchar # TODO: ?
2286
- c = src.read
2287
- src.unread c
2288
1025
 
2289
- return c != RubyLexer::EOF && c =~ /\w/
2290
- end
1026
+ if src.eos? then
1027
+ rb_compile_error "incomplete character syntax"
1028
+ end
2291
1029
 
2292
- def is_next_no_case(s) # FIX: replace this whole thing with something clean
2293
- buf = []
2294
- old_pos = src.pos
1030
+ if src.check(/\s|\v/) then
1031
+ unless lex_state.is_argument then
1032
+ c2 = { " " => 's',
1033
+ "\n" => 'n',
1034
+ "\t" => 't',
1035
+ "\v" => 'v',
1036
+ "\r" => 'r',
1037
+ "\f" => 'f' }[src.matched]
1038
+
1039
+ if c2 then
1040
+ warning("invalid character syntax; use ?\\" + c2)
1041
+ end
1042
+ end
2295
1043
 
2296
- s.each_byte do |b|
2297
- c = b.chr
2298
- r = src.read
2299
- buf << r
1044
+ # ternary
1045
+ self.lex_state = :expr_beg
1046
+ self.yacc_value = "?"
1047
+ return :tEH
1048
+ elsif src.check(/\w(?=\w)/) then # ternary, also
1049
+ self.lex_state = :expr_beg
1050
+ self.yacc_value = "?"
1051
+ return :tEH
1052
+ end
2300
1053
 
2301
- if c.downcase != r.downcase then
2302
- src.pos = old_pos
2303
- return nil
2304
- end
2305
- end
1054
+ c = if src.scan(/\\/) then
1055
+ self.read_escape
1056
+ else
1057
+ src.getch
1058
+ end
1059
+ self.lex_state = :expr_end
1060
+ self.yacc_value = c[0].ord & 0xff
1061
+ return :tINTEGER
1062
+ elsif src.check(/\&/) then
1063
+ if src.scan(/\&\&\=/) then
1064
+ self.yacc_value = "&&"
1065
+ self.lex_state = :expr_beg
1066
+ return :tOP_ASGN
1067
+ elsif src.scan(/\&\&/) then
1068
+ self.lex_state = :expr_beg
1069
+ self.yacc_value = "&&"
1070
+ return :tANDOP
1071
+ elsif src.scan(/\&\=/) then
1072
+ self.yacc_value = "&"
1073
+ self.lex_state = :expr_beg
1074
+ return :tOP_ASGN
1075
+ elsif src.scan(/&/) then
1076
+ result = if lex_state.is_argument && space_seen &&
1077
+ !src.check(/\s/) then
1078
+ warning("`&' interpreted as argument prefix")
1079
+ :tAMPER
1080
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
1081
+ :tAMPER
1082
+ else
1083
+ :tAMPER2
1084
+ end
1085
+
1086
+ self.fix_arg_lex_state
1087
+ self.yacc_value = "&"
1088
+ return result
1089
+ end
1090
+ elsif src.scan(/\//) then
1091
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1092
+ self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1093
+ self.yacc_value = "/"
1094
+ return :tREGEXP_BEG
1095
+ end
2306
1096
 
2307
- return buf.join
2308
- end
1097
+ if src.scan(/\=/) then
1098
+ self.yacc_value = "/"
1099
+ self.lex_state = :expr_beg
1100
+ return :tOP_ASGN
1101
+ end
2309
1102
 
2310
- kill :is_hex_char, :is_oct_char, :is_identifier_char, :nextc, :pushback
1103
+ if lex_state.is_argument && space_seen then
1104
+ unless src.scan(/\s/) then
1105
+ arg_ambiguous
1106
+ self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1107
+ self.yacc_value = "/"
1108
+ return :tREGEXP_BEG
1109
+ end
1110
+ end
2311
1111
 
2312
- # END HACK
2313
- ############################################################$
1112
+ self.fix_arg_lex_state
1113
+ self.yacc_value = "/"
2314
1114
 
2315
- end
1115
+ return :tDIVIDE
1116
+ elsif src.scan(/\^=/) then
1117
+ self.lex_state = :expr_beg
1118
+ self.yacc_value = "^"
1119
+ return :tOP_ASGN
1120
+ elsif src.scan(/\^/) then
1121
+ self.fix_arg_lex_state
1122
+ self.yacc_value = "^"
1123
+ return :tCARET
1124
+ elsif src.scan(/\;/) then
1125
+ self.command_start = true
1126
+ self.lex_state = :expr_beg
1127
+ self.yacc_value = ";"
1128
+ return :tSEMI
1129
+ elsif src.scan(/\~/) then
1130
+ if lex_state == :expr_fname || lex_state == :expr_dot then
1131
+ src.scan(/@/)
1132
+ end
2316
1133
 
2317
- class Keyword
2318
- class KWtable
2319
- attr_accessor :name, :id, :state
2320
- def initialize(name, id=[], state=nil)
2321
- @name = name
2322
- @id = id
2323
- @state = state
2324
- end
1134
+ self.fix_arg_lex_state
1135
+ self.yacc_value = "~"
2325
1136
 
2326
- def id0
2327
- self.id.first
2328
- end
1137
+ return :tTILDE
1138
+ elsif src.scan(/\\/) then
1139
+ if src.scan(/\n/) then
1140
+ self.lineno = nil
1141
+ space_seen = true
1142
+ next
1143
+ end
1144
+ rb_compile_error "bare backslash only allowed before newline"
1145
+ elsif src.scan(/\%/) then
1146
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1147
+ return parse_quote
1148
+ end
2329
1149
 
2330
- def id1
2331
- self.id.last
2332
- end
2333
- end
1150
+ if src.scan(/\=/) then
1151
+ self.lex_state = :expr_beg
1152
+ self.yacc_value = "%"
1153
+ return :tOP_ASGN
1154
+ end
2334
1155
 
2335
- TOTAL_KEYWORDS = 40
2336
- MIN_WORD_LENGTH = 2
2337
- MAX_WORD_LENGTH = 8
2338
- MIN_HASH_VALUE = 6
2339
- MAX_HASH_VALUE = 55
2340
- # maximum key range = 50, duplicates = 0
2341
-
2342
- def self.hash_keyword(str, len)
2343
- hval = len
2344
-
2345
- asso_values = [
2346
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2347
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2348
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2349
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2350
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2351
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2352
- 56, 56, 56, 11, 56, 56, 36, 56, 1, 37,
2353
- 31, 1, 56, 56, 56, 56, 29, 56, 1, 56,
2354
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2355
- 56, 56, 56, 56, 56, 1, 56, 32, 1, 2,
2356
- 1, 1, 4, 23, 56, 17, 56, 20, 9, 2,
2357
- 9, 26, 14, 56, 5, 1, 1, 16, 56, 21,
2358
- 20, 9, 56, 56, 56, 56, 56, 56, 56, 56,
2359
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2360
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2361
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2362
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2363
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2364
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2365
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2366
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2367
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2368
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2369
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2370
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2371
- 56, 56, 56, 56, 56, 56
2372
- ]
2373
-
2374
- case hval
2375
- when 2, 1 then
2376
- hval += asso_values[str[0]]
2377
- else
2378
- hval += asso_values[str[2]]
2379
- hval += asso_values[str[0]]
2380
- end
1156
+ if lex_state.is_argument && space_seen && ! src.check(/\s/) then
1157
+ return parse_quote
1158
+ end
2381
1159
 
2382
- hval += asso_values[str[len - 1]]
2383
- return hval
2384
- end
1160
+ self.fix_arg_lex_state
1161
+ self.yacc_value = "%"
1162
+
1163
+ return :tPERCENT
1164
+ elsif src.check(/\$/) then
1165
+ if src.scan(/(\$_)(\w+)/) then
1166
+ self.lex_state = :expr_end
1167
+ self.token = src.matched
1168
+ return process_token(command_state)
1169
+ elsif src.scan(/\$_/) then
1170
+ self.lex_state = :expr_end
1171
+ self.token = src.matched
1172
+ self.yacc_value = src.matched
1173
+ return :tGVAR
1174
+ elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1175
+ self.lex_state = :expr_end
1176
+ self.yacc_value = src.matched
1177
+ return :tGVAR
1178
+ elsif src.scan(/\$([\&\`\'\+])/) then
1179
+ self.lex_state = :expr_end
1180
+ # Explicit reference to these vars as symbols...
1181
+ if last_state == :expr_fname then
1182
+ self.yacc_value = src.matched
1183
+ return :tGVAR
1184
+ else
1185
+ self.yacc_value = src[1].to_sym
1186
+ return :tBACK_REF
1187
+ end
1188
+ elsif src.scan(/\$([1-9]\d*)/) then
1189
+ self.lex_state = :expr_end
1190
+ if last_state == :expr_fname then
1191
+ self.yacc_value = src.matched
1192
+ return :tGVAR
1193
+ else
1194
+ self.yacc_value = src[1].to_i
1195
+ return :tNTH_REF
1196
+ end
1197
+ elsif src.scan(/\$0/) then
1198
+ self.lex_state = :expr_end
1199
+ self.token = src.matched
1200
+ return process_token(command_state)
1201
+ elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
1202
+ self.lex_state = :expr_end
1203
+ self.yacc_value = "$"
1204
+ return "$"
1205
+ elsif src.scan(/\$\w+/)
1206
+ self.lex_state = :expr_end
1207
+ self.token = src.matched
1208
+ return process_token(command_state)
1209
+ end
1210
+ elsif src.check(/\_/) then
1211
+ if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
1212
+ self.lineno = nil
1213
+ return RubyLexer::EOF
1214
+ elsif src.scan(/\_\w*/) then
1215
+ self.token = src.matched
1216
+ return process_token(command_state)
1217
+ end
1218
+ end
1219
+ end # END OF CASE
2385
1220
 
2386
- ##
2387
- # :expr_beg = ignore newline, +/- is a sign.
2388
- # :expr_end = newline significant, +/- is a operator.
2389
- # :expr_arg = newline significant, +/- is a operator.
2390
- # :expr_cmdarg = newline significant, +/- is a operator.
2391
- # :expr_endarg = newline significant, +/- is a operator.
2392
- # :expr_mid = newline significant, +/- is a operator.
2393
- # :expr_fname = ignore newline, no reserved words.
2394
- # :expr_dot = right after . or ::, no reserved words.
2395
- # :expr_class = immediate after class, no here document.
2396
-
2397
- def self.keyword(str, len = str.size)
2398
- wordlist = [
2399
- [""], [""], [""], [""], [""], [""],
2400
- ["end", [:kEND, :kEND ], :expr_end ],
2401
- ["else", [:kELSE, :kELSE ], :expr_beg ],
2402
- ["case", [:kCASE, :kCASE ], :expr_beg ],
2403
- ["ensure", [:kENSURE, :kENSURE ], :expr_beg ],
2404
- ["module", [:kMODULE, :kMODULE ], :expr_beg ],
2405
- ["elsif", [:kELSIF, :kELSIF ], :expr_beg ],
2406
- ["def", [:kDEF, :kDEF ], :expr_fname ],
2407
- ["rescue", [:kRESCUE, :kRESCUE_MOD ], :expr_mid ],
2408
- ["not", [:kNOT, :kNOT ], :expr_beg ],
2409
- ["then", [:kTHEN, :kTHEN ], :expr_beg ],
2410
- ["yield", [:kYIELD, :kYIELD ], :expr_arg ],
2411
- ["for", [:kFOR, :kFOR ], :expr_beg ],
2412
- ["self", [:kSELF, :kSELF ], :expr_end ],
2413
- ["false", [:kFALSE, :kFALSE ], :expr_end ],
2414
- ["retry", [:kRETRY, :kRETRY ], :expr_end ],
2415
- ["return", [:kRETURN, :kRETURN ], :expr_mid ],
2416
- ["true", [:kTRUE, :kTRUE ], :expr_end ],
2417
- ["if", [:kIF, :kIF_MOD ], :expr_beg ],
2418
- ["defined?", [:kDEFINED, :kDEFINED ], :expr_arg ],
2419
- ["super", [:kSUPER, :kSUPER ], :expr_arg ],
2420
- ["undef", [:kUNDEF, :kUNDEF ], :expr_fname ],
2421
- ["break", [:kBREAK, :kBREAK ], :expr_mid ],
2422
- ["in", [:kIN, :kIN ], :expr_beg ],
2423
- ["do", [:kDO, :kDO ], :expr_beg ],
2424
- ["nil", [:kNIL, :kNIL ], :expr_end ],
2425
- ["until", [:kUNTIL, :kUNTIL_MOD ], :expr_beg ],
2426
- ["unless", [:kUNLESS, :kUNLESS_MOD ], :expr_beg ],
2427
- ["or", [:kOR, :kOR ], :expr_beg ],
2428
- ["next", [:kNEXT, :kNEXT ], :expr_mid ],
2429
- ["when", [:kWHEN, :kWHEN ], :expr_beg ],
2430
- ["redo", [:kREDO, :kREDO ], :expr_end ],
2431
- ["and", [:kAND, :kAND ], :expr_beg ],
2432
- ["begin", [:kBEGIN, :kBEGIN ], :expr_beg ],
2433
- ["__LINE__", [:k__LINE__, :k__LINE__ ], :expr_end ],
2434
- ["class", [:kCLASS, :kCLASS ], :expr_class ],
2435
- ["__FILE__", [:k__FILE__, :k__FILE__ ], :expr_end ],
2436
- ["END", [:klEND, :klEND ], :expr_end ],
2437
- ["BEGIN", [:klBEGIN, :klBEGIN ], :expr_end ],
2438
- ["while", [:kWHILE, :kWHILE_MOD ], :expr_beg ],
2439
- [""], [""], [""], [""], [""], [""], [""], [""], [""],
2440
- [""],
2441
- ["alias", [:kALIAS, :kALIAS ], :expr_fname ],
2442
- ].map { |args| KWtable.new(*args) }
2443
-
2444
- if len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH then
2445
- key = hash_keyword(str, len)
2446
- if key <= MAX_HASH_VALUE && key >= 0 then
2447
- s = wordlist[key].name
2448
- return wordlist[key] if str == s
1221
+ if src.scan(/\004|\032|\000/) || src.eos? then # ^D, ^Z, EOF
1222
+ return RubyLexer::EOF
1223
+ else # alpha check
1224
+ if src.scan(/\W/) then
1225
+ rb_compile_error "Invalid char #{src.matched.inspect} in expression"
1226
+ end
2449
1227
  end
2450
- end
2451
-
2452
- return nil
2453
- end
2454
- end
2455
-
2456
- class Environment
2457
- attr_reader :env, :dyn
2458
- attr_accessor :init
2459
1228
 
2460
- def initialize dyn = false
2461
- @dyn = []
2462
- @env = []
2463
- @use = []
2464
- @init = false
2465
- self.extend
2466
- end
1229
+ self.token = src.matched if self.src.scan(/\w+/)
2467
1230
 
2468
- def use id
2469
- @env.each_with_index do |env, i|
2470
- if env[id] then
2471
- @use[i][id] = true
2472
- end
1231
+ return process_token(command_state)
2473
1232
  end
2474
1233
  end
2475
1234
 
2476
- def used? id
2477
- idx = @dyn.index false # REFACTOR
2478
- u = @use[0...idx].reverse.inject { |env, scope| env.merge scope } || {}
2479
- u[id]
2480
- end
2481
-
2482
- def [] k
2483
- self.all[k]
2484
- end
2485
-
2486
- def []= k, v
2487
- raise "no" if v == true
2488
- self.current[k] = v
2489
- end
2490
-
2491
- def has_key? k
2492
- self.all.has_key? k
2493
- end
2494
-
2495
- def all
2496
- idx = @dyn.index false
2497
- @env[0..idx].reverse.inject { |env, scope| env.merge scope }
2498
- end
2499
-
2500
- def dynamic
2501
- idx = @dyn.index false
2502
- @env[0...idx].reverse.inject { |env, scope| env.merge scope } || {}
2503
- end
2504
-
2505
- def current
2506
- @env.first
2507
- end
2508
-
2509
- def dynamic?
2510
- @dyn[0] != false
2511
- end
2512
-
2513
- def dasgn_curr? name # TODO: I think this is wrong - nuke
2514
- (! has_key?(name) && dynamic?) || current.has_key?(name)
2515
- end
2516
-
2517
- def extend dyn = false
2518
- @dyn.unshift dyn
2519
- @env.unshift({})
2520
- @use.unshift({})
2521
- end
2522
-
2523
- def unextend
2524
- @dyn.shift
2525
- @env.shift
2526
- @use.shift
2527
- raise "You went too far unextending env" if @env.empty?
2528
- end
2529
- end
2530
-
2531
- class StackState
2532
- attr_reader :stack
2533
-
2534
- def inspect
2535
- "StackState(#{@name}, #{@stack.inspect})"
2536
- end
2537
-
2538
- def initialize(name)
2539
- @name = name
2540
- @stack = [false]
2541
- end
2542
-
2543
- def pop
2544
- # raise "#{@name} empty" if @stack.size <= 1
2545
- r = @stack.pop
2546
- @stack.push false if @stack.size == 0
2547
- r
2548
- end
2549
-
2550
- def lexpop
2551
- raise if @stack.size == 0
2552
- a = @stack.pop
2553
- b = @stack.pop
2554
- @stack.push(a || b)
2555
- end
2556
-
2557
- def push val
2558
- raise if val != true and val != false
2559
- @stack.push val
2560
- end
2561
-
2562
- def is_in_state
2563
- @stack.last
2564
- end
2565
- end
2566
-
2567
- def t str
2568
- Token.new str
2569
- end
2570
-
2571
- class Token # TODO: nuke this and use sexps
2572
- attr_accessor :args
2573
- def initialize(token)
2574
- @args = Array(token)
2575
- end
2576
-
2577
- def value # TODO: eventually phase this out (or make it official)
2578
- self.args.first
2579
- end
2580
-
2581
- def first # HACK
2582
- self.args.first
2583
- end
2584
-
2585
- def inspect
2586
- "t(#{args.join.inspect})"
2587
- end
2588
-
2589
- def to_sym
2590
- self.value.to_sym
2591
- end
2592
-
2593
- def == o
2594
- Token === o and self.args == o.args
2595
- end
2596
- end
1235
+ def process_token(command_state)
2597
1236
 
2598
- ############################################################
2599
- # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
1237
+ token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/)
2600
1238
 
2601
- class Symbol
2602
- def is_argument # TODO: phase this out
2603
- return self == :expr_arg || self == :expr_cmdarg
2604
- end
2605
- end
1239
+ result = nil
1240
+ last_state = lex_state
2606
1241
 
2607
- class StringIO # HACK: everything in here is a hack
2608
- attr_accessor :begin_of_line, :was_begin_of_line
2609
- alias :begin_of_line? :begin_of_line
2610
- alias :read_all :read
2611
-
2612
- alias :old_initialize :initialize
2613
-
2614
- def initialize(*args)
2615
- self.begin_of_line = true
2616
- self.was_begin_of_line = false
2617
- old_initialize(*args)
2618
- @original_string = self.string.dup
2619
- end
2620
1242
 
2621
- def rest
2622
- self.string[self.pos..-1]
2623
- end
1243
+ case token
1244
+ when /^\$/ then
1245
+ self.lex_state, result = :expr_end, :tGVAR
1246
+ when /^@@/ then
1247
+ self.lex_state, result = :expr_end, :tCVAR
1248
+ when /^@/ then
1249
+ self.lex_state, result = :expr_end, :tIVAR
1250
+ else
1251
+ if token =~ /[!?]$/ then
1252
+ result = :tFID
1253
+ else
1254
+ if lex_state == :expr_fname then
1255
+ # ident=, not =~ => == or followed by =>
1256
+ # TODO test lexing of a=>b vs a==>b
1257
+ if src.scan(/=(?:(?![~>=])|(?==>))/) then
1258
+ result = :tIDENTIFIER
1259
+ token << src.matched
1260
+ end
1261
+ end
2624
1262
 
2625
- def current_line # HAHA fuck you
2626
- @original_string[0..self.pos][/\A.*__LINE__/m].split(/\n/).size
2627
- end
1263
+ result ||= if token =~ /^[A-Z]/ then
1264
+ :tCONSTANT
1265
+ else
1266
+ :tIDENTIFIER
1267
+ end
1268
+ end
2628
1269
 
2629
- def read
2630
- c = self.getc
1270
+ unless lex_state == :expr_dot then
1271
+ # See if it is a reserved word.
1272
+ keyword = Keyword.keyword token
1273
+
1274
+ if keyword then
1275
+ state = lex_state
1276
+ self.lex_state = keyword.state
1277
+ self.yacc_value = token
1278
+
1279
+ if keyword.id0 == :kDO then
1280
+ self.command_start = true
1281
+ return :kDO_COND if cond.is_in_state
1282
+ return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
1283
+ return :kDO_BLOCK if state == :expr_endarg
1284
+ return :kDO
1285
+ end
2631
1286
 
2632
- if c == ?\r then
2633
- d = self.getc
2634
- self.ungetc d if d and d != ?\n
2635
- c = ?\n
2636
- end
2637
-
2638
- self.was_begin_of_line = self.begin_of_line
2639
- self.begin_of_line = c == ?\n
2640
- if c and c != 0 then
2641
- c.chr
2642
- else
2643
- ::RubyLexer::EOF
2644
- end
2645
- end
1287
+ return keyword.id0 if state == :expr_beg
2646
1288
 
2647
- def match_string term, indent=false # TODO: add case insensitivity, or just remove
2648
- buffer = []
1289
+ self.lex_state = :expr_beg if keyword.id0 != keyword.id1
2649
1290
 
2650
- if indent
2651
- while c = self.read do
2652
- if c !~ /\s/ or c == "\n" or c == "\r" then
2653
- self.unread c
2654
- break
1291
+ return keyword.id1
2655
1292
  end
2656
- buffer << c
2657
1293
  end
2658
- end
2659
1294
 
2660
- term.each_byte do |c2|
2661
- c = self.read
2662
- c = self.read if c and c == "\r"
2663
- buffer << c
2664
- if c and c2 != c[0] then
2665
- self.unread_many buffer.join # HACK omg
2666
- return false
1295
+ if (lex_state == :expr_beg || lex_state == :expr_mid ||
1296
+ lex_state == :expr_dot || lex_state == :expr_arg ||
1297
+ lex_state == :expr_cmdarg) then
1298
+ if command_state then
1299
+ self.lex_state = :expr_cmdarg
1300
+ else
1301
+ self.lex_state = :expr_arg
1302
+ end
1303
+ else
1304
+ self.lex_state = :expr_end
2667
1305
  end
2668
1306
  end
2669
1307
 
2670
- return true
2671
- end
2672
-
2673
- def read_line
2674
- self.begin_of_line = true
2675
- self.was_begin_of_line = false
2676
- gets.sub(/\r\n?$/, "\n") # HACK
2677
- end
2678
-
2679
- def peek expected = nil # FIX: barf
2680
- c = self.getc
2681
- return RubyLexer::EOF if c.nil?
2682
- self.ungetc c if c
2683
- c = c.chr if c
2684
- if expected then
2685
- c == expected
2686
- else
2687
- c
2688
- end
2689
- end
2690
-
2691
- def unread(c)
2692
- return if c.nil? # UGH
2693
-
2694
- # HACK: only depth is 2... who cares? really I want to remove all of this
2695
- self.begin_of_line = self.was_begin_of_line || true
2696
- self.was_begin_of_line = nil
2697
-
2698
- c = c[0] if String === c
2699
- self.ungetc c
2700
- end
1308
+ self.yacc_value = token
2701
1309
 
2702
- def unread_many str
2703
- str.split(//).reverse.each do |c|
2704
- unread c
2705
- end
2706
- end
2707
- end
2708
1310
 
2709
- class Sexp
2710
- attr_writer :paren
1311
+ self.lex_state = :expr_end if
1312
+ last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar
2711
1313
 
2712
- def paren
2713
- @paren ||= false
1314
+ return result
2714
1315
  end
2715
1316
 
2716
- def value
2717
- raise "multi item sexp" if size > 2
2718
- last
2719
- end
1317
+ def yylex_string # 23 lines
1318
+ token = if lex_strterm[0] == :heredoc then
1319
+ self.heredoc lex_strterm
1320
+ else
1321
+ self.parse_string lex_strterm
1322
+ end
2720
1323
 
2721
- def values
2722
- self[1..-1]
2723
- end
1324
+ if token == :tSTRING_END || token == :tREGEXP_END then
1325
+ self.lineno = nil
1326
+ self.lex_strterm = nil
1327
+ self.lex_state = :expr_end
1328
+ end
2724
1329
 
2725
- def node_type
2726
- first
1330
+ return token
2727
1331
  end
2728
-
2729
- kill :add, :add_all
2730
- end
2731
-
2732
- def bitch
2733
- c = caller
2734
- m = c[0].split.last
2735
- warn "bitch: you shouldn't be doing #{m}: from #{c[1]}"
2736
1332
  end
2737
-
2738
- # class NilClass
2739
- # def method_missing msg, *args
2740
- # c = caller
2741
- # warn "called #{msg} on nil (args = #{args.inspect}): from #{c[0]}"
2742
- # nil
2743
- # end
2744
- # end
2745
-
2746
- # def d s
2747
- # warn s.inspect
2748
- # end
2749
-
2750
- # END HACK
2751
- ############################################################