ruby_parser 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ruby_parser might be problematic. Click here for more details.

data/lib/ruby_lexer.rb CHANGED
@@ -1,2751 +1,1332 @@
1
- require 'pp'
2
- require 'stringio'
3
- require 'racc/parser'
4
1
  $: << File.expand_path("~/Work/p4/zss/src/ParseTree/dev/lib") # for me, not you.
5
2
  require 'sexp'
3
+ require 'ruby_parser_extras'
6
4
 
7
- ############################################################
8
- # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
9
-
10
- class Module
11
- def kill *methods
12
- methods.each do |method|
13
- define_method method do |*args|
14
- c = caller
15
- raise "#{method} is dead - called from #{c[0]}"
16
- end
17
- end
18
- end
19
- end
20
-
21
- # END HACK
22
- ############################################################
23
-
24
- class RubyParser < Racc::Parser
25
- VERSION = '1.0.0'
26
-
27
- attr_accessor :lexer, :in_def, :in_single, :file
28
- attr_reader :env, :warnings
5
+ class RubyLexer
6
+ attr_accessor :command_start
7
+ attr_accessor :cmdarg
8
+ attr_accessor :cond
9
+ attr_accessor :nest
29
10
 
30
- def initialize
31
- super
32
- self.lexer = RubyLexer.new
33
- self.in_def = false
34
- self.in_single = 0
35
- @env = Environment.new
36
- end
11
+ ESC_RE = /\\([0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-.|(C-|c)\?|(C-|c).|[^0-7xMCc])/
37
12
 
38
- alias :old_yyerror :yyerror
39
- def yyerror msg=nil
40
- warn msg if msg
41
- old_yyerror
42
- end
13
+ # Additional context surrounding tokens that both the lexer and
14
+ # grammar use.
15
+ attr_reader :lex_state
43
16
 
44
- def parse(str, file = "(string)")
45
- raise "bad val: #{str.inspect}" unless String === str
17
+ attr_accessor :lex_strterm
46
18
 
47
- self.file = file
48
- self.lexer.src = StringIO.new(str)
19
+ attr_accessor :parser # HACK for very end of lexer... *sigh*
49
20
 
50
- @yydebug = ENV.has_key? 'DEBUG'
21
+ # Stream of data that yylex examines.
22
+ attr_reader :src
51
23
 
52
- do_parse
53
- end
24
+ # Last token read via yylex.
25
+ attr_accessor :token
54
26
 
55
- def do_parse
56
- _racc_do_parse_rb(_racc_setup, false)
57
- end
27
+ attr_accessor :string_buffer
58
28
 
59
- def yyparse(recv, mid)
60
- _racc_yyparse_rb(recv, mid, _racc_setup, true)
61
- end
29
+ # Value of last token which had a value associated with it.
30
+ attr_accessor :yacc_value
62
31
 
63
- def on_error( error_token_id, error_value, value_stack )
64
- p :error => [ error_token_id, error_value, value_stack ]
65
- raise "boom"
66
- end if ENV["DEBUG"]
32
+ # What handles warnings
33
+ attr_accessor :warnings
67
34
 
68
- def next_token
69
- if self.lexer.advance then
70
- [self.lexer.token, self.lexer.yacc_value]
71
- else
72
- return [false, '$end']
73
- end
74
- end
35
+ EOF = :eof_haha!
75
36
 
76
- def assignable(lhs, value = nil)
77
- id = lhs.to_sym
78
- id = id.to_sym if Token === id
79
-
80
- raise SyntaxError, "Can't change the value of #{id}" if
81
- id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
82
-
83
- result = case id.to_s
84
- when /^@@/ then
85
- asgn = in_def || in_single > 0
86
- s((asgn ? :cvasgn : :cvdecl), id)
87
- when /^@/ then
88
- s(:iasgn, id)
89
- when /^\$/ then
90
- s(:gasgn, id)
91
- when /^[A-Z]/ then
92
- s(:cdecl, id)
93
- else
37
+ # ruby constants for strings (should this be moved somewhere else?)
38
+ STR_FUNC_BORING = 0x00
39
+ STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
40
+ STR_FUNC_EXPAND = 0x02
41
+ STR_FUNC_REGEXP = 0x04
42
+ STR_FUNC_AWORDS = 0x08
43
+ STR_FUNC_SYMBOL = 0x10
44
+ STR_FUNC_INDENT = 0x20 # <<-HEREDOC
45
+
46
+ STR_SQUOTE = STR_FUNC_BORING
47
+ STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
48
+ STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
49
+ STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
50
+ STR_SSYM = STR_FUNC_SYMBOL
51
+ STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
94
52
 
95
- case self.env[id]
96
- when :lvar then
97
- s(:lasgn, id)
98
- when :dvar, nil then
99
- if self.env.current[id] == :dvar then
100
- s(:dasgn_curr, id)
101
- elsif self.env[id] == :dvar then
102
- self.env.use(id)
103
- s(:dasgn, id)
104
- elsif ! self.env.dynamic? then
105
- s(:lasgn, id)
106
- else
107
- s(:dasgn_curr, id)
108
- end
109
- # if env.dynamic? then
110
- # if env.dasgn_curr? id then
111
- # s(:dasgn_curr, id)
112
- # else
113
- # s(:dasgn, id)
114
- # end
115
- # else
116
- # s(:lasgn, id)
117
- # end
118
- else
119
- raise "wtf?"
120
- end
121
- end
53
+ # How the parser advances to the next token.
54
+ #
55
+ # @return true if not at end of file (EOF).
122
56
 
123
- self.env[id] = (self.env.dynamic? ? :dvar : :lvar) unless self.env[id]
57
+ def advance
58
+ r = yylex
59
+ self.token = r
124
60
 
125
- result << value if value
61
+ raise "yylex returned nil" unless r
126
62
 
127
- return result
63
+ return RubyLexer::EOF != r
128
64
  end
129
65
 
130
- def warnings= warnings
131
- @warnings = warnings
132
-
133
- self.lexer.warnings = warnings
66
+ def arg_ambiguous
67
+ self.warning("Ambiguous first argument. make sure.")
134
68
  end
135
69
 
136
- def arg_add(node1, node2)
137
- return s(:array, node2) unless node1
138
- return node1 << node2 if node1[0] == :array
139
- return s(:argspush, node1, node2)
70
+ def comments
71
+ c = @comments.join
72
+ @comments.clear
73
+ c
140
74
  end
141
75
 
142
- def node_assign(lhs, rhs)
143
- return nil unless lhs
144
-
145
- rhs = value_expr rhs
146
-
147
- case lhs[0]
148
- when :gasgn, :iasgn, :lasgn, :dasgn, :dasgn_curr,
149
- :masgn, :cdecl, :cvdecl, :cvasgn then
150
- lhs << rhs
151
- when :attrasgn, :call then
152
- args = lhs.array(true) || lhs.argscat(true) || lhs.splat(true) # FIX: fragile
153
- # args = case lhs[1][1]
154
- # when :array, :argscat, :splat then
155
- # lhs.delete_at 1
156
- # else
157
- # nil # TODO: check - no clue what it should be, or even if
158
- # end
159
-
160
- lhs << arg_add(args, rhs)
161
- end
162
-
163
- lhs
76
+ def expr_beg_push val
77
+ cond.push false
78
+ cmdarg.push false
79
+ self.lex_state = :expr_beg
80
+ self.yacc_value = val
164
81
  end
165
82
 
166
- def gettable(id)
167
- id = id.to_sym if Token === id # HACK
168
- id = id.last.to_sym if Sexp === id # HACK
169
- id = id.to_sym if String === id # HACK
170
-
171
- return s(:self) if id == :self
172
- return s(:nil) if id == :nil
173
- return s(:true) if id == :true
174
- return s(:false) if id == :false
175
- return s(:str, self.file) if id == :"__FILE__"
176
- return s(:lit, lexer.src.current_line) if id == :"__LINE__"
177
-
178
- result = case id.to_s
179
- when /^@@/ then
180
- s(:cvar, id)
181
- when /^@/ then
182
- s(:ivar, id)
183
- when /^\$/ then
184
- s(:gvar, id)
185
- when /^[A-Z]/ then
186
- s(:const, id)
187
- else
188
- type = env[id]
189
- if type then
190
- s(type, id)
191
- elsif env.dynamic? and :dvar == env[id] then
192
- s(:dvar, id)
193
- else
194
- s(:vcall, id)
195
- end
196
- end
197
-
198
- return result if result
199
-
200
- raise "identifier #{id.inspect} is not valid"
83
+ def fix_arg_lex_state
84
+ self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
85
+ :expr_arg
86
+ else
87
+ :expr_beg
88
+ end
201
89
  end
202
90
 
203
- def block_append(head, tail, strip_tail_block=false)
204
- return head unless tail
205
- return tail unless head
91
+ def heredoc here # 63 lines
92
+ _, eos, func, last_line = here
206
93
 
207
- case head[0]
208
- when :lit, :str then
209
- return tail
210
- end
94
+ indent = (func & STR_FUNC_INDENT) != 0
95
+ expand = (func & STR_FUNC_EXPAND) != 0
96
+ eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
97
+ err_msg = "can't match #{eos_re.inspect} anywhere in "
211
98
 
212
- head = remove_begin(head)
213
- head = s(:block, head) unless head[0] == :block
99
+ rb_compile_error err_msg if
100
+ src.eos?
214
101
 
215
- if strip_tail_block and Sexp === tail and tail[0] == :block then
216
- head.push(*tail.values)
217
- else
218
- head << tail
102
+ if src.beginning_of_line? && src.scan(eos_re) then
103
+ src.unread_many last_line # TODO: figure out how to remove this
104
+ self.yacc_value = eos
105
+ return :tSTRING_END
219
106
  end
220
- end
221
107
 
222
- def new_yield(node)
223
- if node then
224
- raise SyntaxError, "Block argument should not be given." if
225
- node.node_type == :block_pass
108
+ self.string_buffer = []
226
109
 
227
- node = node.last if node.node_type == :array and node.size == 2
228
- end
110
+ if expand then
111
+ case
112
+ when src.scan(/#[$@]/) then
113
+ src.pos -= 1 # FIX omg stupid
114
+ self.yacc_value = src.matched
115
+ return :tSTRING_DVAR
116
+ when src.scan(/#[{]/) then
117
+ self.yacc_value = src.matched
118
+ return :tSTRING_DBEG
119
+ when src.scan(/#/) then
120
+ string_buffer << '#'
121
+ end
229
122
 
230
- return s(:yield, node)
231
- end
123
+ until src.scan(eos_re) do
124
+ c = tokadd_string func, "\n", nil
232
125
 
233
- def logop(type, left, right)
234
- left = value_expr left
126
+ rb_compile_error err_msg if
127
+ c == RubyLexer::EOF
235
128
 
236
- if left and left[0] == type and not left.paren then
237
- node, second = left, nil
129
+ if c != "\n" then
130
+ self.yacc_value = string_buffer.join.delete("\r")
131
+ return :tSTRING_CONTENT
132
+ else
133
+ string_buffer << src.scan(/\n/)
134
+ end
238
135
 
239
- while (second = node[2]) && second[0] == type and not second.paren do
240
- node = second
136
+ rb_compile_error err_msg if
137
+ src.eos?
241
138
  end
242
139
 
243
- node[2] = s(type, second, right)
244
-
245
- return left
140
+ # tack on a NL after the heredoc token - FIX NL should not be needed
141
+ src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid
142
+ else
143
+ until src.check(eos_re) do
144
+ string_buffer << src.scan(/.*(\n|\z)/)
145
+ rb_compile_error err_msg if
146
+ src.eos?
147
+ end
246
148
  end
247
149
 
248
- return s(type, left, right)
150
+ self.lex_strterm = [:heredoc, eos, func, last_line]
151
+ self.yacc_value = string_buffer.join.delete("\r")
152
+
153
+ return :tSTRING_CONTENT
249
154
  end
250
155
 
251
- def new_call recv, meth, args = nil # REFACTOR - merge with fcall
252
- if args && args[0] == :block_pass then
253
- new_args = args.array(true) || args.argscat(true) || args.splat(true)
254
- call = s(:call, recv, meth)
255
- call << new_args if new_args
256
- args << call
156
+ def heredoc_identifier # 51 lines
157
+ term, func = nil, STR_FUNC_BORING
158
+ self.string_buffer = []
257
159
 
258
- return args
160
+ case
161
+ when src.scan(/(-?)(['"`])(.*?)\2/) then
162
+ term = src[2]
163
+ unless src[1].empty? then
164
+ func |= STR_FUNC_INDENT
165
+ end
166
+ func |= case term
167
+ when "\'" then
168
+ STR_SQUOTE
169
+ when '"' then
170
+ STR_DQUOTE
171
+ else
172
+ STR_XQUOTE
173
+ end
174
+ string_buffer << src[3]
175
+ when src.scan(/-?(['"`])(?!\1*\Z)/) then
176
+ rb_compile_error "unterminated here document identifier"
177
+ when src.scan(/(-?)(\w+)/) then
178
+ term = '"'
179
+ func |= STR_DQUOTE
180
+ unless src[1].empty? then
181
+ func |= STR_FUNC_INDENT
182
+ end
183
+ string_buffer << src[2]
184
+ else
185
+ return nil
259
186
  end
260
- result = s(:call, recv, meth)
261
- result << args if args
262
- result
263
- end
264
187
 
265
- def new_fcall meth, args
266
- if args and args[0] == :block_pass then
267
- new_args = args.array(true) || args.argscat(true) || args.splat(true)
268
- call = s(:fcall, meth)
269
- call << new_args if new_args
270
- args << call
271
- return args
188
+ if src.check(/.*\n/) then
189
+ # TODO: think about storing off the char range instead
190
+ line = src.string[src.pos, src.matched_size]
191
+ src.string[src.pos, src.matched_size] = ''
192
+ else
193
+ line = nil
272
194
  end
273
195
 
274
- r = s(:fcall, meth)
275
- r << args if args and args != s(:array)
276
- r
277
- end
196
+ self.lex_strterm = [:heredoc, string_buffer.join, func, line]
278
197
 
279
- def arg_blk_pass node1, node2
280
- if node2 then
281
- node2.insert 1, node1
282
- return node2
198
+ if term == '`' then
199
+ self.yacc_value = "`"
200
+ return :tXSTRING_BEG
283
201
  else
284
- node1
202
+ self.yacc_value = "\""
203
+ return :tSTRING_BEG
285
204
  end
286
205
  end
287
206
 
288
- def get_match_node lhs, rhs
289
- if lhs then
290
- case lhs[0]
291
- when :dregx, :dregx_once then
292
- return s(:match2, lhs, rhs)
293
- when :lit then
294
- return s(:match2, lhs, rhs) if Regexp === lhs.last
295
- end
296
- end
207
+ def initialize
208
+ self.cond = StackState.new(:cond)
209
+ self.cmdarg = StackState.new(:cmdarg)
210
+ self.nest = 0
211
+ @comments = []
297
212
 
298
- if rhs then
299
- case rhs[0]
300
- when :dregx, :dregx_once then
301
- return s(:match3, rhs, lhs)
302
- when :lit then
303
- return s(:match3, rhs, lhs) if Regexp === rhs.last
304
- end
305
- end
213
+ reset
214
+ end
306
215
 
307
- return s(:call, lhs, :"=~", s(:array, rhs))
216
+ def int_with_base base
217
+ rb_compile_error "Invalid numeric format" if src.matched =~ /__/
218
+ self.yacc_value = src.matched.to_i(base)
219
+ return :tINTEGER
308
220
  end
309
221
 
310
- def cond node
311
- return nil if node.nil?
312
- node = value_expr node
313
-
314
- case node.first
315
- when :dregex then
316
- return s(:match2, node, s(:gvar, "$_".to_sym))
317
- when :regex then
318
- return s(:match, node)
319
- when :lit then
320
- if Regexp === node.last then
321
- return s(:match, node)
322
- else
323
- return node
222
+ def lex_state= o
223
+ raise "wtf?" unless Symbol === o
224
+ @lex_state = o
225
+ end
226
+
227
+ attr_writer :lineno
228
+ def lineno
229
+ @lineno ||= src.lineno
230
+ end
231
+
232
+ ##
233
+ # Parse a number from the input stream.
234
+ #
235
+ # @param c The first character of the number.
236
+ # @return A int constant wich represents a token.
237
+
238
+ def parse_number
239
+ self.lex_state = :expr_end
240
+
241
+ case
242
+ when src.scan(/[+-]?0[xbd]\b/) then
243
+ rb_compile_error "Invalid numeric format"
244
+ when src.scan(/[+-]?0x[a-f0-9_]+/i) then
245
+ int_with_base(16)
246
+ when src.scan(/[+-]?0b[01_]+/) then
247
+ int_with_base(2)
248
+ when src.scan(/[+-]?0d[0-9_]+/) then
249
+ int_with_base(10)
250
+ when src.scan(/[+-]?0o?[0-7_]*[89]/) then
251
+ rb_compile_error "Illegal octal digit."
252
+ when src.scan(/[+-]?0o?[0-7_]+|0o/) then
253
+ int_with_base(8)
254
+ when src.scan(/[+-]?[\d_]+_(e|\.)/) then
255
+ rb_compile_error "Trailing '_' in number."
256
+ when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
257
+ number = src.matched
258
+ if number =~ /__/ then
259
+ rb_compile_error "Invalid numeric format"
324
260
  end
325
- when :and then
326
- return s(:and, cond(node[1]), cond(node[2]))
327
- when :or then
328
- return s(:or, cond(node[1]), cond(node[2]))
329
- when :dot2 then
330
- label = "flip#{node.hash}"
331
- env[label] = self.env.dynamic? ? :dvar : :lvar
332
- return s(:flip2, node[1], node[2])
333
- when :dot3 then
334
- label = "flip#{node.hash}"
335
- env[label] = self.env.dynamic? ? :dvar : :lvar
336
- return s(:flip3, node[1], node[2])
261
+ self.yacc_value = number.to_f
262
+ :tFLOAT
263
+ when src.scan(/[+-]?0\b/) then
264
+ int_with_base(10)
265
+ when src.scan(/[+-]?[\d_]+\b/) then
266
+ int_with_base(10)
337
267
  else
338
- return node
268
+ rb_compile_error "Bad number format"
339
269
  end
340
270
  end
341
271
 
342
- def append_to_block head, tail # FIX: wtf is this?!? switch to block_append
343
- return head if tail.nil?
344
- return tail if head.nil?
272
+ def parse_quote # 58 lines
273
+ beg, nnd, short_hand, c = nil, nil, false, nil
345
274
 
346
- head = s(:block, head) unless head.first == :block
347
- head << tail
348
- end
275
+ if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
276
+ rb_compile_error "unknown type of %string" if src.matched_size == 2
277
+ c, beg, short_hand = src.matched, src.getch, false
278
+ else # Short-hand (e.g. %{, %., %!, etc)
279
+ c, beg, short_hand = 'Q', src.getch, true
280
+ end
349
281
 
350
- def new_super args
351
- if args && args.first == :block_pass then
352
- t, body, bp = args
353
- result = s(t, bp, s(:super, body))
354
- else
355
- result = s(:super)
356
- result << args if args and args != s(:array)
282
+ if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
283
+ rb_compile_error "unterminated quoted string meets end of file"
357
284
  end
358
- result
359
- end
360
285
 
361
- def aryset receiver, index
362
- s(:attrasgn, receiver, :"[]=", index)
363
- end
286
+ # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
287
+ nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
288
+ nnd, beg = beg, "\0" if nnd.nil?
289
+
290
+ token_type, self.yacc_value = nil, "%#{c}#{beg}"
291
+ token_type, string_type = case c
292
+ when 'Q' then
293
+ ch = short_hand ? nnd : c + beg
294
+ self.yacc_value = "%#{ch}"
295
+ [:tSTRING_BEG, STR_DQUOTE]
296
+ when 'q' then
297
+ [:tSTRING_BEG, STR_SQUOTE]
298
+ when 'W' then
299
+ src.scan(/\s*/)
300
+ [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS]
301
+ when 'w' then
302
+ src.scan(/\s*/)
303
+ [:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS]
304
+ when 'x' then
305
+ [:tXSTRING_BEG, STR_XQUOTE]
306
+ when 'r' then
307
+ [:tREGEXP_BEG, STR_REGEXP]
308
+ when 's' then
309
+ self.lex_state = :expr_fname
310
+ [:tSYMBEG, STR_SSYM]
311
+ end
312
+
313
+ rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
314
+ token_type.nil?
315
+
316
+ self.lex_strterm = [:strterm, string_type, nnd, beg]
364
317
 
365
- def arg_concat node1, node2
366
- return node2.nil? ? node1 : s(:argscat, node1, node2)
318
+ return token_type
367
319
  end
368
320
 
369
- def list_append list, item # TODO: nuke me *sigh*
370
- return s(:array, item) unless list
371
- list << item
372
- end
321
+ def parse_string(quote) # 65 lines
322
+ _, string_type, term, open = quote
373
323
 
374
- def literal_concat head, tail
375
- return tail unless head
376
- return head unless tail
324
+ space = false # FIX: remove these
325
+ func = string_type
326
+ paren = open
327
+ term_re = Regexp.escape term
377
328
 
378
- htype, ttype = head[0], tail[0]
329
+ awords = (func & STR_FUNC_AWORDS) != 0
330
+ regexp = (func & STR_FUNC_REGEXP) != 0
331
+ expand = (func & STR_FUNC_EXPAND) != 0
379
332
 
380
- head = s(:dstr, '', head) if htype == :evstr
333
+ unless func then # FIX: impossible, prolly needs == 0
334
+ self.lineno = nil
335
+ return :tSTRING_END
336
+ end
381
337
 
382
- case ttype
383
- when :str then
384
- if htype == :str
385
- head[-1] << tail[-1]
386
- elsif htype == :dstr and head.size == 2 then
387
- head[-1] << tail[-1]
388
- else
389
- head << tail
390
- end
391
- when :dstr then
392
- if htype == :str then
393
- tail[1] = head[-1] + tail[1]
394
- head = tail
395
- else
396
- tail[0] = :array
397
- tail[1] = s(:str, tail[1])
398
- tail.delete_at 1 if tail[1] == s(:str, '')
338
+ space = true if awords and src.scan(/\s+/)
399
339
 
400
- head.push(*tail[1..-1])
401
- end
402
- when :evstr then
403
- head[0] = :dstr if htype == :str
404
- if head.size == 2 and tail[1][0] == :str then
405
- head[-1] << tail[1][-1]
406
- head[0] = :str if head.size == 2 # HACK ?
340
+ if self.nest == 0 && src.scan(/#{term_re}/) then
341
+ if awords then
342
+ quote[1] = nil
343
+ return :tSPACE
344
+ elsif regexp then
345
+ self.yacc_value = self.regx_options
346
+ self.lineno = nil
347
+ return :tREGEXP_END
407
348
  else
408
- head.push(tail)
349
+ self.yacc_value = term
350
+ self.lineno = nil
351
+ return :tSTRING_END
409
352
  end
410
353
  end
411
354
 
412
- return head
413
- end
355
+ if space then
356
+ return :tSPACE
357
+ end
414
358
 
415
- def remove_begin node
416
- node = node[-1] if node and node[0] == :begin and node.size == 2
417
- node
418
- end
359
+ self.string_buffer = []
419
360
 
420
- def ret_args node
421
- if node then
422
- if node[0] == :block_pass then
423
- raise SyntaxError, "block argument should not be given"
361
+ if expand
362
+ case
363
+ when src.scan(/#(?=[$@])/) then
364
+ return :tSTRING_DVAR
365
+ when src.scan(/#[{]/) then
366
+ return :tSTRING_DBEG
367
+ when src.scan(/#/) then
368
+ string_buffer << '#'
424
369
  end
425
-
426
- node = node.last if node[0] == :array && node.size == 2
427
- node = s(:svalue, node) if node[0] == :splat and not node.paren # HACK matz wraps ONE of the FOUR splats in a newline to distinguish. I use paren for now. ugh
428
370
  end
429
371
 
430
- node
431
- end
372
+ if tokadd_string(func, term, paren) == RubyLexer::EOF then
373
+ rb_compile_error "unterminated string meets end of file"
374
+ end
432
375
 
433
- def value_expr node # HACK
434
- node = remove_begin node
435
- node[2] = value_expr(node[2]) if node and node[0] == :if
436
- node
437
- end
376
+ self.yacc_value = string_buffer.join
438
377
 
439
- def void_stmts node
440
- return nil unless node
441
- return node unless node[0] == :block
442
378
 
443
- node[1..-2] = node[1..-2].map { |n| remove_begin(n) }
444
- node
379
+ return :tSTRING_CONTENT
445
380
  end
446
381
 
447
- ############################################################
448
- # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
382
+ def rb_compile_error msg
383
+ msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
384
+ raise SyntaxError, msg
385
+ end
386
+
387
+ def read_escape # 51 lines
388
+ case
389
+ when src.scan(/\\/) then # Backslash
390
+ '\\'
391
+ when src.scan(/n/) then # newline
392
+ "\n"
393
+ when src.scan(/t/) then # horizontal tab
394
+ "\t"
395
+ when src.scan(/r/) then # carriage-return
396
+ "\r"
397
+ when src.scan(/f/) then # form-feed
398
+ "\f"
399
+ when src.scan(/v/) then # vertical tab
400
+ "\13"
401
+ when src.scan(/a/) then # alarm(bell)
402
+ "\007"
403
+ when src.scan(/e/) then # escape
404
+ "\033"
405
+ when src.scan(/b/) then # backspace
406
+ "\010"
407
+ when src.scan(/s/) then # space
408
+ " "
409
+ when src.scan(/[0-7]{1,3}/) then # octal constant
410
+ src.matched.to_i(8).chr
411
+ when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
412
+ src[1].to_i(16).chr
413
+ when src.scan(/M-\\/) then
414
+ c = self.read_escape
415
+ c[0] = (c[0].ord | 0x80).chr
416
+ c
417
+ when src.scan(/M-(.)/) then
418
+ c = src[1]
419
+ c[0] = (c[0].ord | 0x80).chr
420
+ c
421
+ when src.scan(/C-\\|c\\/) then
422
+ c = self.read_escape
423
+ c[0] = (c[0].ord & 0x9f).chr
424
+ c
425
+ when src.scan(/C-\?|c\?/) then
426
+ 0177.chr
427
+ when src.scan(/(C-|c)(.)/) then
428
+ c = src[2]
429
+ c[0] = (c[0].ord & 0x9f).chr
430
+ c
431
+ when src.scan(/[McCx0-9]/) || src.eos? then
432
+ rb_compile_error("Invalid escape character syntax")
433
+ else
434
+ src.getch
435
+ end
436
+ end
449
437
 
450
- def dyna_init body, known_vars = []
451
- var = nil
452
- vars = self.env.dynamic.keys - known_vars
438
+ def regx_options # 15 lines
439
+ good, bad = [], []
453
440
 
454
- vars.each do |id|
455
- if self.env.used? id then
456
- var = s(:dasgn_curr, id, var).compact
457
- end
441
+ if src.scan(/[a-z]+/) then
442
+ good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
458
443
  end
459
444
 
460
- self.block_append(var, body, body && body[0] == :block)
461
- end
445
+ unless bad.empty? then
446
+ rb_compile_error("unknown regexp option%s - %s" %
447
+ [(bad.size > 1 ? "s" : ""), bad.join.inspect])
448
+ end
462
449
 
463
- def warning s
464
- # do nothing for now
450
+ return good.join
465
451
  end
466
452
 
467
- kill :is_in_def, :is_in_single, :push_local_scope, :pop_local_scope, :support
453
+ def reset
454
+ self.command_start = true
455
+ self.lex_strterm = nil
456
+ self.token = nil
457
+ self.yacc_value = nil
468
458
 
469
- # END HACK
470
- ############################################################$
459
+ @src = nil
460
+ @lex_state = nil
461
+ end
471
462
 
472
- end
463
+ def src= src
464
+ raise "bad src: #{src.inspect}" unless String === src
465
+ @src = RPStringScanner.new(src)
466
+ end
467
+
468
+ def tokadd_escape term # 20 lines
469
+ case
470
+ when src.scan(/\\\n/) then
471
+ # just ignore
472
+ when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
473
+ self.string_buffer << src.matched
474
+ when src.scan(/\\([MC]-|c)(?=\\)/) then
475
+ self.string_buffer << src.matched
476
+ self.tokadd_escape term
477
+ when src.scan(/\\([MC]-|c)(.)/) then
478
+ self.string_buffer << src.matched
479
+ when src.scan(/\\[McCx]/) then
480
+ rb_compile_error "Invalid escape character syntax"
481
+ when src.scan(/\\(.)/m) then
482
+ self.string_buffer << src.matched
483
+ else
484
+ rb_compile_error "Invalid escape character syntax"
485
+ end
486
+ end
473
487
 
474
- class RubyLexer
475
- attr_accessor :command_start
476
- attr_accessor :cmdarg
477
- attr_accessor :cond
478
- attr_accessor :nest
488
+ def tokadd_string(func, term, paren) # 105 lines
489
+ awords = (func & STR_FUNC_AWORDS) != 0
490
+ escape = (func & STR_FUNC_ESCAPE) != 0
491
+ expand = (func & STR_FUNC_EXPAND) != 0
492
+ regexp = (func & STR_FUNC_REGEXP) != 0
493
+ symbol = (func & STR_FUNC_SYMBOL) != 0
479
494
 
480
- # Additional context surrounding tokens that both the lexer and
481
- # grammar use.
482
- attr_reader :lex_state
495
+ paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
496
+ term_re = Regexp.new(Regexp.escape(term))
483
497
 
484
- def lex_state= o
485
- raise "wtf?" unless Symbol === o
486
- @lex_state = o
487
- end
498
+ until src.eos? do
499
+ c = nil
500
+ handled = true
501
+ case
502
+ when self.nest == 0 && src.scan(term_re) then
503
+ src.pos -= 1
504
+ break
505
+ when paren_re && src.scan(paren_re) then
506
+ self.nest += 1
507
+ when src.scan(term_re) then
508
+ self.nest -= 1
509
+ when awords && src.scan(/\s/) then
510
+ src.pos -= 1
511
+ break
512
+ when expand && src.scan(/#(?=[\$\@\{])/) then
513
+ src.pos -= 1
514
+ break
515
+ when expand && src.scan(/#(?!\n)/) then
516
+ # do nothing
517
+ when src.check(/\\/) then
518
+ case
519
+ when awords && src.scan(/\\\n/) then
520
+ string_buffer << "\n"
521
+ next
522
+ when awords && src.scan(/\\\s/) then
523
+ c = ' '
524
+ when expand && src.scan(/\\\n/) then
525
+ next
526
+ when regexp && src.check(/\\/) then
527
+ self.tokadd_escape term
528
+ next
529
+ when expand && src.scan(/\\/) then
530
+ c = self.read_escape
531
+ when src.scan(/\\\n/) then
532
+ # do nothing
533
+ when src.scan(/\\\\/) then
534
+ string_buffer << '\\' if escape
535
+ c = '\\'
536
+ when src.scan(/\\/) then
537
+ unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
538
+ string_buffer << "\\"
539
+ end
540
+ else
541
+ handled = false
542
+ end
543
+ else
544
+ handled = false
545
+ end # case
488
546
 
489
- attr_accessor :end_seen # TODO: figure out if I really need this
547
+ unless handled then
490
548
 
491
- attr_accessor :lex_strterm
549
+ t = Regexp.escape term
550
+ x = Regexp.escape(paren) if paren && paren != "\000"
551
+ re = if awords then
552
+ /[^#{t}#{x}\#\0\\\n\ ]+|./ # |. to pick up whatever
553
+ else
554
+ /[^#{t}#{x}\#\0\\]+|./
555
+ end
492
556
 
493
- # Used for tiny smidgen of grammar in lexer
494
- attr_accessor :parser_support # TODO: remove
557
+ src.scan re
558
+ c = src.matched
495
559
 
496
- # Stream of data that yylex examines.
497
- attr_accessor :src
560
+ rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
561
+ end # unless handled
498
562
 
499
- # Last token read via yylex.
500
- attr_accessor :token
563
+ c ||= src.matched
564
+ string_buffer << c
565
+ end # until
501
566
 
502
- # Tempory buffer to build up a potential token. Consumer takes
503
- # responsibility to reset this before use.
504
- attr_accessor :token_buffer
567
+ c ||= src.matched
568
+ c = RubyLexer::EOF if src.eos?
505
569
 
506
- # Value of last token which had a value associated with it.
507
- attr_accessor :yacc_value
508
570
 
509
- # What handles warnings
510
- attr_accessor :warnings
571
+ return c
572
+ end
511
573
 
512
- # TODO: remove all of these
513
- alias :source= :src=
514
- alias :str_term :lex_strterm
515
- alias :str_term= :lex_strterm=
516
- alias :state :lex_state
517
- alias :state= :lex_state=
518
- alias :value :yacc_value
519
- alias :value= :yacc_value=
520
- alias :getCmdArgumentState :cmdarg
574
+ def unescape s
575
+
576
+ r = {
577
+ "a" => "\007",
578
+ "b" => "\010",
579
+ "e" => "\033",
580
+ "f" => "\f",
581
+ "n" => "\n",
582
+ "r" => "\r",
583
+ "s" => " ",
584
+ "t" => "\t",
585
+ "v" => "\13",
586
+ "\\" => '\\',
587
+ "\n" => "",
588
+ "C-\?" => 0177.chr,
589
+ "c\?" => 0177.chr,
590
+ }[s]
591
+
592
+ return r if r
593
+
594
+ case s
595
+ when /^[0-7]{1,3}/ then
596
+ $&.to_i(8).chr
597
+ when /^x([0-9a-fA-F]{1,2})/ then
598
+ $1.to_i(16).chr
599
+ when /^M-(.)/ then
600
+ ($1[0].ord | 0x80).chr
601
+ when /^(C-|c)(.)/ then
602
+ ($2[0].ord & 0x9f).chr
603
+ when /^[McCx0-9]/ then
604
+ rb_compile_error("Invalid escape character syntax")
605
+ else
606
+ s
607
+ end
608
+ end
521
609
 
522
- # Give a name to a value. Enebo: This should be used more.
523
- # HACK OMG HORRIBLE KILL ME NOW. Enebo, no. this shouldn't be used more
524
- EOF = nil # was 0... ugh
610
+ def warning s
611
+ # do nothing for now
612
+ end
525
613
 
526
- # ruby constants for strings (should this be moved somewhere else?)
527
- STR_FUNC_ESCAPE=0x01
528
- STR_FUNC_EXPAND=0x02
529
- STR_FUNC_REGEXP=0x04
530
- STR_FUNC_QWORDS=0x08
531
- STR_FUNC_SYMBOL=0x10
532
- STR_FUNC_INDENT=0x20 # <<-HEREDOC
533
-
534
- STR_SQUOTE = 0
535
- STR_DQUOTE = STR_FUNC_EXPAND
536
- STR_XQUOTE = STR_FUNC_EXPAND
537
- STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
538
- STR_SSYM = STR_FUNC_SYMBOL
539
- STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
614
+ ##
615
+ # Returns the next token. Also sets yy_val is needed.
616
+ #
617
+ # @return Description of the Returned Value
540
618
 
541
- def initialize
542
- self.parser_support = nil
543
- self.token_buffer = []
544
- self.cond = StackState.new(:cond)
545
- self.cmdarg = StackState.new(:cmdarg)
546
- self.nest = 0
547
- self.end_seen = false
619
+ def yylex # 826 lines
548
620
 
549
- reset
550
- end
621
+ c = ''
622
+ space_seen = false
623
+ command_state = false
624
+ src = self.src
551
625
 
552
- def reset
553
626
  self.token = nil
554
627
  self.yacc_value = nil
555
- self.src = nil
556
- @lex_state = nil
557
- self.lex_strterm = nil
558
- self.command_start = true
559
- end
560
628
 
561
- # How the parser advances to the next token.
562
- #
563
- # @return true if not at end of file (EOF).
629
+ return yylex_string if lex_strterm
564
630
 
565
- def advance
566
- r = yylex
567
- self.token = r
568
- return r != RubyLexer::EOF
569
- end
631
+ command_state = self.command_start
632
+ self.command_start = false
570
633
 
571
- def parse_string(quote)
572
- _, string_type, term, open = quote
634
+ last_state = lex_state
573
635
 
574
- space = false # FIX: remove these
575
- func = string_type
576
- paren = open
636
+ loop do # START OF CASE
637
+ if src.scan(/\ |\t|\r|\f|\13/) then # white spaces, 13 = '\v
638
+ space_seen = true
639
+ next
640
+ elsif src.check(/[^a-zA-Z]/) then
641
+ if src.scan(/\n|#/) then
642
+ self.lineno = nil
643
+ c = src.matched
644
+ if c == '#' then
645
+ src.unread c # ok
646
+
647
+ while src.scan(/\s*#.*(\n+|\z)/) do
648
+ @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
649
+ end
577
650
 
578
- return :tSTRING_END unless func
651
+ if src.eos? then
652
+ return RubyLexer::EOF
653
+ end
654
+ else
655
+ end
579
656
 
580
- c = src.read
657
+ # Replace a string of newlines with a single one
658
+ src.scan(/\n+/)
581
659
 
582
- if (func & STR_FUNC_QWORDS) != 0 && c =~ /\s/ then
583
- begin
584
- c = src.read
585
- break if c == RubyLexer::EOF # HACK UGH
586
- end while String === c and c =~ /\s/
587
- space = true
588
- end
660
+ if [:expr_beg, :expr_fname,
661
+ :expr_dot, :expr_class].include? lex_state then
662
+ next
663
+ end
589
664
 
590
- if c == term && self.nest == 0 then
591
- if func & STR_FUNC_QWORDS != 0 then
592
- quote[1] = nil
593
- return ' '
594
- end
595
- unless func & STR_FUNC_REGEXP != 0 then
596
- self.yacc_value = t(term)
597
- return :tSTRING_END
598
- end
599
- self.yacc_value = self.regx_options
600
- return :tREGEXP_END
601
- end
602
-
603
- if space then
604
- src.unread c
605
- return ' '
606
- end
607
-
608
- self.token_buffer = []
609
-
610
- if (func & STR_FUNC_EXPAND) != 0 && c == '#' then
611
- case c = src.read
612
- when '$', '@' then
613
- src.unread c
614
- return :tSTRING_DVAR
615
- when '{' then
616
- return :tSTRING_DBEG
617
- end
618
- token_buffer << '#'
619
- end
620
-
621
- src.unread c
622
-
623
- if tokadd_string(func, term, paren, token_buffer) == RubyLexer::EOF then
624
- # HACK ruby_sourceline = nd_line(quote)
625
- raise "unterminated string meets end of file"
626
- return :tSTRING_END
627
- end
628
-
629
- self.yacc_value = s(:str, token_buffer.join)
630
- return :tSTRING_CONTENT
631
- end
632
-
633
- def regx_options
634
- options = []
635
- bad = []
636
-
637
- while c = src.read and c =~ /[a-z]/ do
638
- case c
639
- when /^[ixmonesu]$/ then
640
- options << c
641
- else
642
- bad << c
643
- end
644
- end
645
-
646
- src.unread c
647
-
648
- rb_compile_error("unknown regexp option%s - %s" %
649
- [(bad.size > 1 ? "s" : ""), bad.join.inspect]) unless bad.empty?
650
-
651
- return options.join
652
- end
653
-
654
- def tokadd_escape term
655
- case c = src.read
656
- when "\n" then
657
- return false # just ignore
658
- when /0-7/ then # octal constant
659
- tokadd "\\"
660
- tokadd c
661
-
662
- 2.times do |i|
663
- c = src.read
664
- # HACK goto eof if (c == -1)
665
- if c < "0" || "7" < c then
666
- pushback c
667
- break
668
- end
669
- tokadd c
670
- end
671
-
672
- return false
673
- when "x" then # hex constant
674
- tokadd "\\"
675
- tokadd c
676
-
677
- 2.times do
678
- c = src.read
679
- unless c =~ /[0-9a-f]/i then # TODO error case? empty?
680
- src.unread c
681
- break
682
- end
683
- tokadd c
684
- end
685
-
686
- return false
687
- when "M" then
688
- if (c = src.read()) != "-" then
689
- yyerror "Invalid escape character syntax"
690
- pushback c
691
- return false
692
- end
693
- tokadd "\\"
694
- tokadd "M"
695
- tokadd "-"
696
- raise "not yet"
697
- # goto escaped;
698
- when "C" then
699
- if (c = src.read) != "-" then
700
- yyerror "Invalid escape character syntax"
701
- pushback c
702
- return false
703
- end
704
- tokadd "\\"
705
- tokadd "C"
706
- tokadd "-"
707
- raise "not yet"
708
- # HACK goto escaped;
709
- when "c" then
710
- tokadd "\\"
711
- tokadd "c"
712
- # HACK escaped:
713
- if (c = src.read) == "\\" then
714
- return tokadd_escape(term)
715
- elsif c == -1 then
716
- raise "no"
717
- # HACK goto eof
718
- end
719
- tokadd c
720
- return false
721
- # HACK eof
722
- when RubyLexer::EOF then
723
- yyerror "Invalid escape character syntax"
724
- return true
725
- else
726
- if (c != "\\" || c != term)
727
- tokadd "\\"
728
- end
729
- tokadd c
730
- end
731
- return false
732
- end
733
-
734
- def read_escape
735
- case c = src.read
736
- when "\\" then # Backslash
737
- return c
738
- when "n" then # newline
739
- return "\n"
740
- when "t" then # horizontal tab
741
- return "\t"
742
- when "r" then # carriage-return
743
- return "\r"
744
- when "f" then # form-feed
745
- return "\f"
746
- when "v" then # vertical tab
747
- return "\13"
748
- when "a" then # alarm(bell)
749
- return "\007"
750
- when 'e' then # escape
751
- return "\033"
752
- when /[0-7]/ then # octal constant
753
- src.unread c # TODO this seems dumb
754
-
755
- n = 0
756
-
757
- 3.times do
758
- c = src.read
759
- unless c =~ /[0-7]/ then
760
- src.unread c
761
- break
762
- end
763
- n <<= 3
764
- n |= c[0] - ?0
765
- end
766
-
767
- return n.chr
768
- when "x" then # hex constant
769
- n = 0
770
-
771
- 2.times do
772
- c = src.read.downcase
773
- unless c =~ /[0-9a-f]/i then
774
- src.unread c
775
- break
776
- end
777
- n <<= 4
778
- n |= case c[0] # TODO: I'm sure there is a better way... but I'm tired
779
- when ?a..?f then
780
- c[0] - ?a + 10
781
- when ?A..?F then
782
- c[0] - ?A + 10
783
- when ?0..?9 then
784
- c[0] - ?0
785
- else
786
- raise "wtf?: #{c.inspect}"
787
- end
788
- end
789
-
790
- return n.chr
791
- when "b" then # backspace
792
- return "\010"
793
- when "s" then # space
794
- return " "
795
- when "M" then
796
- c = src.read
797
- if c != "-" then
798
- yyerror("Invalid escape character syntax")
799
- src.unread c
800
- return "\0"
801
- end
802
-
803
- c = src.read
804
- case c
805
- when "\\" then
806
- c = self.read_escape
807
- c[0] |= 0x80
808
- return c
809
- when RubyLexer::EOF then
810
- yyerror("Invalid escape character syntax");
811
- return '\0';
812
- else
813
- c[0] |= 0x80
814
- return c
815
- end
816
- when "C", "c" then
817
- if (c = src.read) != "-" then
818
- yyerror("Invalid escape character syntax")
819
- pushback(c)
820
- return "\0"
821
- end if c == "C"
822
-
823
- case c = src.read
824
- when "\\" then
825
- c = read_escape
826
- when "?" then
827
- return 0177
828
- when RubyLexer::EOF then
829
- yyerror("Invalid escape character syntax");
830
- return "\0";
831
- end
832
- c[0] &= 0x9f
833
- return c
834
- when RubyLexer::EOF then
835
- yyerror("Invalid escape character syntax")
836
- return "\0"
837
- else
838
- return c
839
- end
840
- end
841
-
842
- def tokadd_string(func, term, paren, buffer)
843
- until (c = src.read) == RubyLexer::EOF do
844
- if c == paren then
845
- self.nest += 1
846
- elsif c == term then
847
- if self.nest == 0 then
848
- src.unread c
849
- break
850
- end
851
- self.nest -= 1
852
- elsif (func & RubyLexer::STR_FUNC_EXPAND) != 0 && c == '#' && !src.peek("\n") then
853
- c2 = src.read
854
665
 
855
- if c2 == '$' || c2 == '@' || c2 == '{' then
856
- src.unread c2
857
- src.unread c
858
- break
859
- end
860
- src.unread(c2)
861
- elsif c == "\\" then
862
- c = src.read
863
- case c
864
- when "\n" then
865
- break if ((func & RubyLexer::STR_FUNC_QWORDS) != 0) # TODO: check break
866
- next if ((func & RubyLexer::STR_FUNC_EXPAND) != 0)
867
-
868
- buffer << "\\"
869
- when "\\" then
870
- buffer << c if (func & RubyLexer::STR_FUNC_ESCAPE) != 0
871
- else
872
- if (func & RubyLexer::STR_FUNC_REGEXP) != 0 then
873
- src.unread c
874
- tokadd_escape term
875
- next
876
- elsif (func & RubyLexer::STR_FUNC_EXPAND) != 0 then
877
- src.unread c
878
- if (func & RubyLexer::STR_FUNC_ESCAPE) != 0 then
879
- buffer << "\\"
880
- end
881
- c = read_escape
882
- elsif (func & RubyLexer::STR_FUNC_QWORDS) != 0 && c =~ /\s/ then
883
- # ignore backslashed spaces in %w
884
- elsif c != term && !(paren && c == paren) then
885
- buffer << "\\"
886
- end
887
- end
888
- # else if (ismbchar(c)) {
889
- # int i, len = mbclen(c)-1;
890
- # for (i = 0; i < len; i++) {
891
- # tokadd(c);
892
- # c = nextc();
893
- # }
894
- # }
895
- elsif (func & RubyLexer::STR_FUNC_QWORDS) != 0 && c =~ /\s/ then
896
- src.unread c
897
- break
898
- end
899
-
900
- if c == "\0" && (func & RubyLexer::STR_FUNC_SYMBOL) != 0 then
901
- raise SyntaxError, "symbol cannot contain '\\0'"
902
- end
903
-
904
- buffer << c # unless c == "\r"
905
- end # while
906
-
907
- return c
908
- end
909
-
910
- def heredoc here
911
- _, eos, func, last_line = here
912
-
913
- eosn = eos + "\n"
914
- err_msg = "can't find string #{eos.inspect} anywhere before EOF"
915
-
916
- indent = (func & RubyLexer::STR_FUNC_INDENT) != 0
917
- str = []
918
-
919
- raise SyntaxError, err_msg if src.peek == RubyLexer::EOF
920
-
921
- if src.begin_of_line? && src.match_string(eosn, indent) then
922
- src.unread_many last_line
923
- self.yacc_value = t(eos)
924
- return :tSTRING_END
925
- end
926
-
927
- if (func & RubyLexer::STR_FUNC_EXPAND) == 0 then
928
- begin
929
- str << src.read_line
930
- raise SyntaxError, err_msg if src.peek == RubyLexer::EOF
931
- end until src.match_string(eosn, indent)
932
- else
933
- c = src.read
934
- buffer = []
935
-
936
- if c == "#" then
937
- case c = src.read
938
- when "$", "@" then
939
- src.unread c
940
- self.yacc_value = t("#" + c)
941
- return :tSTRING_DVAR
942
- when "{" then
943
- self.yacc_value = t("#" + c)
944
- return :tSTRING_DBEG
945
- end
946
- buffer << "#"
947
- end
948
-
949
- src.unread c
950
-
951
- begin
952
- c = tokadd_string func, "\n", nil, buffer
953
-
954
- raise SyntaxError, err_msg if c == RubyLexer::EOF
955
-
956
- if c != "\n" then
957
- self.yacc_value = s(:str, buffer.join)
958
- return :tSTRING_CONTENT
959
- end
960
-
961
- buffer << src.read
962
-
963
- raise SyntaxError, err_msg if src.peek == RubyLexer::EOF
964
- end until src.match_string(eosn, indent)
965
-
966
- str = buffer
967
- end
968
-
969
- src.unread_many eosn
970
-
971
- self.lex_strterm = s(:heredoc, eos, func, last_line)
972
- self.yacc_value = s(:str, str.join)
973
-
974
- return :tSTRING_CONTENT
975
- end
976
-
977
- def parse_quote(c)
978
- beg, nnd = nil, nil
979
- short_hand = false
980
-
981
- # Short-hand (e.g. %{,%.,%!,... versus %Q{).
982
- unless c =~ /[a-z0-9]/i then
983
- beg, c = c, 'Q'
984
- short_hand = true
985
- else # Long-hand (e.g. %Q{}).
986
- short_hand = false
987
- beg = src.read
988
- if beg =~ /[a-z0-9]/i then
989
- raise SyntaxError, "unknown type of %string"
990
- end
991
- end
992
-
993
- if c == RubyLexer::EOF or beg == RubyLexer::EOF then
994
- raise SyntaxError, "unterminated quoted string meets nnd of file"
995
- end
996
-
997
- # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
998
- nnd = case beg
999
- when '(' then
1000
- ')'
1001
- when '[' then
1002
- ']'
1003
- when '{' then
1004
- '}'
1005
- when '<' then
1006
- '>'
1007
- else
1008
- nnd, beg = beg, "\0"
1009
- nnd
1010
- end
1011
-
1012
- string_type, token_type = STR_DQUOTE, :tSTRING_BEG
1013
- self.yacc_value = t("%#{c}#{beg}")
1014
-
1015
- case (c)
1016
- when 'Q' then
1017
- self.yacc_value = t("%#{short_hand ? nnd : c + beg}")
1018
- when 'q' then
1019
- string_type, token_type = STR_SQUOTE, :tSTRING_BEG
1020
- when 'W' then
1021
- string_type, token_type = STR_DQUOTE | STR_FUNC_QWORDS, :tWORDS_BEG
1022
- begin c = src.read end while c =~ /\s/
1023
- src.unread(c)
1024
- when 'w' then
1025
- string_type, token_type = STR_SQUOTE | STR_FUNC_QWORDS, :tQWORDS_BEG
1026
- begin c = src.read end while c =~ /\s/
1027
- src.unread(c)
1028
- when 'x' then
1029
- string_type, token_type = STR_XQUOTE, :tXSTRING_BEG
1030
- when 'r' then
1031
- string_type, token_type = STR_REGEXP, :tREGEXP_BEG
1032
- when 's' then
1033
- string_type, token_type = STR_SSYM, :tSYMBEG
1034
- self.lex_state = :expr_fname
1035
- else
1036
- raise SyntaxError, "Unknown type of %string. Expected 'Q', 'q', 'w', 'x', 'r' or any non letter character, but found '" + c + "'."
1037
- end
1038
-
1039
- self.lex_strterm = s(:strterm, string_type, nnd, beg)
1040
-
1041
- return token_type
1042
- end
1043
-
1044
- def heredoc_identifier
1045
- c = src.read
1046
- term = 42 # HACK
1047
- func = 0
1048
-
1049
- if c == '-' then
1050
- c = src.read
1051
- func = STR_FUNC_INDENT
1052
- end
1053
-
1054
- if c == "\'" || c == '"' || c == '`' then
1055
- if c == "\'" then
1056
- func |= STR_SQUOTE
1057
- elsif c == '"'
1058
- func |= STR_DQUOTE
1059
- else
1060
- func |= STR_XQUOTE
1061
- end
1062
-
1063
- token_buffer.clear
1064
- term = c
1065
-
1066
- while (c = src.read) != RubyLexer::EOF && c != term
1067
- token_buffer << c
1068
- end
1069
-
1070
- if c == RubyLexer::EOF then
1071
- raise SyntaxError, "unterminated here document identifier"
1072
- end
1073
- else
1074
- unless c =~ /\w/ then
1075
- src.unread c
1076
- src.unread '-' if (func & STR_FUNC_INDENT) != 0
1077
- return 0 # TODO: RubyLexer::EOF?
1078
- end
1079
- token_buffer.clear
1080
- term = '"'
1081
- func |= STR_DQUOTE
1082
- begin
1083
- token_buffer << c
1084
- end while (c = src.read) != RubyLexer::EOF && c =~ /\w/
1085
- src.unread c
1086
- end
1087
-
1088
- line = src.read_line
1089
- tok = token_buffer.join
1090
- self.lex_strterm = s(:heredoc, tok, func, line)
1091
-
1092
- if term == '`' then
1093
- self.yacc_value = t("`")
1094
- return :tXSTRING_BEG
1095
- end
1096
-
1097
- self.yacc_value = t("\"")
1098
- return :tSTRING_BEG
1099
- end
1100
-
1101
- def arg_ambiguous
1102
- self.warning("Ambiguous first argument. make sure.")
1103
- end
1104
-
1105
- ##
1106
- # Read a comment up to end of line. When found each comment will
1107
- # get stored away into the parser result so that any interested
1108
- # party can use them as they seem fit. One idea is that IDE authors
1109
- # can do distance based heuristics to associate these comments to
1110
- # the AST node they think they belong to.
1111
- #
1112
- # @param c last character read from lexer source
1113
- # @return newline or eof value
1114
-
1115
- def read_comment c
1116
- token_buffer.clear
1117
- token_buffer << c
1118
-
1119
- while (c = src.read) != "\n" do
1120
- break if c == RubyLexer::EOF
1121
- token_buffer << c
1122
- end
1123
- src.unread c
1124
-
1125
- # Store away each comment to parser result so IDEs can do whatever
1126
- # they want with them.
1127
- # HACK parser_support.result.add_comment(Node.comment(token_buffer.join))
1128
-
1129
- return c
1130
- end
1131
-
1132
- ##
1133
- # Returns the next token. Also sets yy_val is needed.
1134
- #
1135
- # @return Description of the Returned Value
1136
- # TODO: remove ALL sexps coming from here and move up to grammar
1137
- # TODO: only literal values should come up from the lexer.
1138
-
1139
- def yylex
1140
- c = ''
1141
- space_seen = false
1142
- command_state = false
1143
-
1144
- if lex_strterm then
1145
- token = nil
1146
-
1147
- if lex_strterm[0] == :heredoc then
1148
- token = self.heredoc(lex_strterm)
1149
- if token == :tSTRING_END then
1150
- self.lex_strterm = nil
1151
- self.lex_state = :expr_end
1152
- end
1153
- else
1154
- token = self.parse_string(lex_strterm)
1155
-
1156
- if token == :tSTRING_END || token == :tREGEXP_END then
1157
- self.lex_strterm = nil
666
+ self.command_start = true
667
+ self.lex_state = :expr_beg
668
+ return :tNL
669
+ elsif src.scan(/[\]\)\}]/) then
670
+ cond.lexpop
671
+ cmdarg.lexpop
1158
672
  self.lex_state = :expr_end
1159
- end
1160
- end
1161
-
1162
- return token
1163
- end
1164
-
1165
- command_state = self.command_start
1166
- self.command_start = false
1167
-
1168
- last_state = lex_state
1169
-
1170
- loop do
1171
- c = src.read
1172
- case c
1173
- when /\004|\032|\000/, RubyLexer::EOF then # ^D, ^Z, EOF
1174
- return RubyLexer::EOF
1175
- when /\ |\t|\f|\r|\13/ then # white spaces, 13 = '\v
1176
- space_seen = true
1177
- next
1178
- when /#|\n/ then
1179
- return 0 if c == '#' and read_comment(c) == 0 # FIX 0?
1180
- # Replace a string of newlines with a single one
1181
- while (c = src.read) == "\n"
1182
- # do nothing
1183
- end
1184
-
1185
- src.unread c
1186
-
1187
- if (lex_state == :expr_beg ||
1188
- lex_state == :expr_fname ||
1189
- lex_state == :expr_dot ||
1190
- lex_state == :expr_class) then
1191
- next
1192
- end
1193
-
1194
- self.command_start = true
1195
- self.lex_state = :expr_beg
1196
- return "\n"
1197
- when '*' then
1198
- c = src.read
1199
- if c == '*' then
1200
- c = src.read
1201
- if c == '=' then
673
+ self.yacc_value = src.matched
674
+ result = {
675
+ ")" => :tRPAREN,
676
+ "]" => :tRBRACK,
677
+ "}" => :tRCURLY
678
+ }[src.matched]
679
+ return result
680
+ elsif src.check(/\./) then
681
+ if src.scan(/\.\.\./) then
1202
682
  self.lex_state = :expr_beg
1203
- self.yacc_value = t("**")
1204
- return :tOP_ASGN
1205
- end
1206
- src.unread c
1207
- self.yacc_value = t("**")
1208
- c = :tPOW
1209
- else
1210
- if c == '=' then
683
+ self.yacc_value = "..."
684
+ return :tDOT3
685
+ elsif src.scan(/\.\./) then
1211
686
  self.lex_state = :expr_beg
1212
- self.yacc_value = t("*")
1213
- return :tOP_ASGN
1214
- end
1215
- src.unread c
1216
- if lex_state.is_argument && space_seen && c !~ /\s/ then
1217
- warning("`*' interpreted as argument prefix")
1218
- c = :tSTAR
1219
- elsif lex_state == :expr_beg || lex_state == :expr_mid then
1220
- c = :tSTAR
1221
- else
1222
- c = :tSTAR2
687
+ self.yacc_value = ".."
688
+ return :tDOT2
689
+ elsif src.scan(/\.\d/) then
690
+ rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
691
+ elsif src.scan(/\./) then
692
+ self.lex_state = :expr_dot
693
+ self.yacc_value = "."
694
+ return :tDOT
1223
695
  end
1224
- self.yacc_value = t("*")
1225
- end
1226
-
1227
- if lex_state == :expr_fname || lex_state == :expr_dot then
1228
- self.lex_state = :expr_arg
1229
- else
696
+ elsif src.scan(/\,/) then
1230
697
  self.lex_state = :expr_beg
1231
- end
1232
-
1233
- return c
1234
- when '!' then
1235
- self.lex_state = :expr_beg
1236
- if (c = src.read) == '=' then
1237
- self.yacc_value = t("!=")
1238
- return :tNEQ
1239
- end
1240
- if c == '~' then
1241
- self.yacc_value = t("!~")
1242
- return :tNMATCH
1243
- end
1244
- src.unread(c)
1245
- self.yacc_value = t("!")
1246
- return :tBANG
1247
- when '=' then
1248
- # documentation nodes - FIX: cruby much cleaner w/ lookahead
1249
- if src.was_begin_of_line and src.match_string "begin" then
1250
- self.token_buffer.clear
1251
- self.token_buffer << "begin"
1252
- c = src.read
1253
-
1254
- if c =~ /\s/ then
1255
- # In case last next was the newline.
1256
- src.unread(c)
1257
-
1258
- loop do
1259
- c = src.read
1260
- token_buffer << c
1261
-
1262
- # If a line is followed by a blank line put it back.
1263
- while c == "\n"
1264
- c = src.read
1265
- token_buffer << c
1266
- end
1267
-
1268
- if c == RubyLexer::EOF then
1269
- raise SyntaxError, "embedded document meets end of file"
1270
- end
1271
-
1272
- next unless c == '='
1273
-
1274
- if src.was_begin_of_line && src.match_string("end") then
1275
- token_buffer << "end"
1276
- token_buffer << src.read_line
1277
- src.unread "\n"
1278
- break
1279
- end
698
+ self.yacc_value = ","
699
+ return :tCOMMA
700
+ elsif src.scan(/\(/) then
701
+ result = :tLPAREN2
702
+ self.command_start = true
703
+ if lex_state == :expr_beg || lex_state == :expr_mid then
704
+ result = :tLPAREN
705
+ elsif space_seen then
706
+ if lex_state == :expr_cmdarg then
707
+ result = :tLPAREN_ARG
708
+ elsif lex_state == :expr_arg then
709
+ warning("don't put space before argument parentheses")
710
+ result = :tLPAREN2
1280
711
  end
1281
-
1282
- # parser_support.result.add_comment(Node.comment(token_buffer.join))
1283
- next
1284
712
  end
1285
- src.unread(c)
1286
- end
1287
713
 
714
+ self.expr_beg_push "("
1288
715
 
1289
- if lex_state == :expr_fname || lex_state == :expr_dot then
1290
- self.lex_state = :expr_arg
1291
- else
1292
- self.lex_state = :expr_beg
1293
- end
1294
-
1295
- c = src.read
1296
- if c == '=' then
1297
- c = src.read
1298
- if c == '=' then
1299
- self.yacc_value = t("===")
716
+ return result
717
+ elsif src.check(/\=/) then
718
+ if src.scan(/\=\=\=/) then
719
+ self.fix_arg_lex_state
720
+ self.yacc_value = "==="
1300
721
  return :tEQQ
1301
- end
1302
- src.unread(c)
1303
- self.yacc_value = t("==")
1304
- return :tEQ
1305
- end
1306
- if c == '~' then
1307
- self.yacc_value = t("=~")
1308
- return :tMATCH
1309
- elsif c == '>' then
1310
- self.yacc_value = t("=>")
1311
- return :tASSOC
1312
- end
1313
- src.unread(c)
1314
- self.yacc_value = t("=")
1315
- return '='
1316
- when '<' then
1317
- c = src.read
1318
- if (c == '<' &&
1319
- lex_state != :expr_end &&
1320
- lex_state != :expr_dot &&
1321
- lex_state != :expr_endarg &&
1322
- lex_state != :expr_class &&
1323
- (!lex_state.is_argument || space_seen)) then
1324
- tok = self.heredoc_identifier
1325
- return tok unless tok == 0
1326
- end
1327
- if lex_state == :expr_fname || lex_state == :expr_dot then
1328
- self.lex_state = :expr_arg
1329
- else
1330
- self.lex_state = :expr_beg
1331
- end
1332
- if c == '=' then
1333
- if (c = src.read) == '>' then
1334
- self.yacc_value = t("<=>")
1335
- return :tCMP
1336
- end
1337
- src.unread c
1338
- self.yacc_value = t("<=")
1339
- return :tLEQ
1340
- end
1341
- if c == '<' then
1342
- if (c = src.read) == '=' then
1343
- self.lex_state = :expr_beg
1344
- self.yacc_value = t("\<\<")
1345
- return :tOP_ASGN
1346
- end
1347
- src.unread(c)
1348
- self.yacc_value = t("<<")
1349
- return :tLSHFT
1350
- end
1351
- self.yacc_value = t("<")
1352
- src.unread(c)
1353
- return :tLT
1354
- when '>' then
1355
- if lex_state == :expr_fname || lex_state == :expr_dot then
1356
- self.lex_state = :expr_arg
1357
- else
1358
- self.lex_state = :expr_beg
1359
- end
722
+ elsif src.scan(/\=\=/) then
723
+ self.fix_arg_lex_state
724
+ self.yacc_value = "=="
725
+ return :tEQ
726
+ elsif src.scan(/\=~/) then
727
+ self.fix_arg_lex_state
728
+ self.yacc_value = "=~"
729
+ return :tMATCH
730
+ elsif src.scan(/\=>/) then
731
+ self.fix_arg_lex_state
732
+ self.yacc_value = "=>"
733
+ return :tASSOC
734
+ elsif src.scan(/\=/) then
735
+ if src.was_begin_of_line and src.scan(/begin(?=\s)/) then
736
+ @comments << '=' << src.matched
737
+
738
+ unless src.scan(/.*?\n=end\s*(\n|\z)/m) then
739
+ @comments.clear
740
+ rb_compile_error("embedded document meets end of file")
741
+ end
1360
742
 
1361
- if (c = src.read) == '=' then
1362
- self.yacc_value = t(">=")
1363
- return :tGEQ
1364
- end
1365
- if c == '>' then
1366
- if (c = src.read) == '=' then
1367
- self.lex_state = :expr_beg
1368
- self.yacc_value = t(">>")
1369
- return :tOP_ASGN
1370
- end
1371
- src.unread c
1372
- self.yacc_value = t(">>")
1373
- return :tRSHFT
1374
- end
1375
- src.unread c
1376
- self.yacc_value = t(">")
1377
- return :tGT
1378
- when '"' then
1379
- self.lex_strterm = s(:strterm, STR_DQUOTE, '"', "\0") # TODO: question this
1380
- self.yacc_value = t("\"")
1381
- return :tSTRING_BEG
1382
- when '`' then
1383
- self.yacc_value = t("`")
1384
- if lex_state == :expr_fname then
1385
- self.lex_state = :expr_end
1386
- return :tBACK_REF2
1387
- end
1388
- if lex_state == :expr_dot then
1389
- if command_state then
1390
- self.lex_state = :expr_cmdarg
1391
- else
1392
- self.lex_state = :expr_arg
1393
- end
1394
- return :tBACK_REF2
1395
- end
1396
- self.lex_strterm = s(:strterm, STR_XQUOTE, '`', "\0")
1397
- return :tXSTRING_BEG
1398
- when "\'" then
1399
- self.lex_strterm = s(:strterm, STR_SQUOTE, "\'", "\0")
1400
- self.yacc_value = t("'")
1401
- return :tSTRING_BEG
1402
- when '?' then
1403
- if lex_state == :expr_end || lex_state == :expr_endarg then
1404
- self.lex_state = :expr_beg
1405
- self.yacc_value = t("?")
1406
- return '?'
1407
- end
743
+ @comments << src.matched
1408
744
 
1409
- c = src.read
1410
-
1411
- raise SyntaxError, "incomplete character syntax" if c == RubyLexer::EOF
1412
-
1413
- if c =~ /\s/ then
1414
- if !lex_state.is_argument then
1415
- c2 = 0
1416
- c2 = case c
1417
- when ' ' then
1418
- 's'
1419
- when "\n" then
1420
- 'n'
1421
- when "\t" then
1422
- 't'
1423
- when "\v" then
1424
- 'v'
1425
- when "\r" then
1426
- 'r'
1427
- when "\f" then
1428
- 'f'
1429
- end
1430
-
1431
- if c2 != 0 then
1432
- warning("invalid character syntax; use ?\\" + c2)
745
+ next
746
+ else
747
+ self.fix_arg_lex_state
748
+ self.yacc_value = '='
749
+ return :tEQL
1433
750
  end
1434
751
  end
1435
-
1436
- # ternary
1437
- src.unread c
1438
- self.lex_state = :expr_beg
1439
- self.yacc_value = t("?")
1440
- return '?'
1441
- # elsif ismbchar(c) then # ternary, also
1442
- # rb_warn("multibyte character literal not supported yet; use ?\\" + c)
1443
- # support.unread c
1444
- # self.lex_state = :expr_beg
1445
- # return '?'
1446
- elsif c =~ /\w/ && ! src.peek("\n") && self.is_next_identchar then
1447
- # ternary, also
1448
- src.unread c
1449
- self.lex_state = :expr_beg
1450
- self.yacc_value = t("?")
1451
- return '?'
1452
- elsif c == "\\" then
1453
- c = self.read_escape
1454
- end
1455
- c[0] &= 0xff
1456
- self.lex_state = :expr_end
1457
- self.yacc_value = c[0]
1458
- return :tINTEGER
1459
- when '&' then
1460
- if (c = src.read) == '&' then
1461
- self.lex_state = :expr_beg
1462
- if (c = src.read) == '=' then
1463
- self.yacc_value = t("&&")
1464
- self.lex_state = :expr_beg
1465
- return :tOP_ASGN
1466
- end
1467
- src.unread c
1468
- self.yacc_value = t("&&")
1469
- return :tANDOP
1470
- elsif c == '=' then
1471
- self.yacc_value = t("&")
1472
- self.lex_state = :expr_beg
1473
- return :tOP_ASGN
1474
- end
1475
-
1476
- src.unread c
1477
-
1478
- if lex_state.is_argument && space_seen && c !~ /\s/ then
1479
- warning("`&' interpreted as argument prefix")
1480
- c = :tAMPER
1481
- elsif lex_state == :expr_beg || lex_state == :expr_mid then
1482
- c = :tAMPER
1483
- else
1484
- c = :tAMPER2
1485
- end
1486
-
1487
- if lex_state == :expr_fname || lex_state == :expr_dot then
1488
- self.lex_state = :expr_arg
1489
- else
1490
- self.lex_state = :expr_beg
1491
- end
1492
- self.yacc_value = t("&")
1493
- return c
1494
- when '|' then
1495
- if (c = src.read) == '|' then
1496
- self.lex_state = :expr_beg
1497
- if (c = src.read) == '=' then
1498
- self.lex_state = :expr_beg
1499
- self.yacc_value = t("||")
1500
- return :tOP_ASGN
1501
- end
1502
- src.unread c
1503
- self.yacc_value = t("||")
1504
- return :tOROP
1505
- end
1506
- if c == '=' then
1507
- self.lex_state = :expr_beg
1508
- self.yacc_value = t("|")
1509
- return :tOP_ASGN
1510
- end
1511
- if lex_state == :expr_fname || lex_state == :expr_dot then
1512
- self.lex_state = :expr_arg
1513
- else
1514
- self.lex_state = :expr_beg
1515
- end
1516
- src.unread c
1517
- self.yacc_value = t("|")
1518
- return :tPIPE
1519
- when '+' then
1520
- c = src.read
1521
- if lex_state == :expr_fname || lex_state == :expr_dot then
1522
- self.lex_state = :expr_arg
1523
- if c == '@' then
1524
- self.yacc_value = t("+@")
1525
- return :tUPLUS
1526
- end
1527
- src.unread c
1528
- self.yacc_value = t("+")
1529
- return :tPLUS
1530
- end
1531
-
1532
- if c == '=' then
1533
- self.lex_state = :expr_beg
1534
- self.yacc_value = t("+")
1535
- return :tOP_ASGN
1536
- end
1537
-
1538
- if (lex_state == :expr_beg || lex_state == :expr_mid ||
1539
- (lex_state.is_argument && space_seen && c !~ /\s/)) then
1540
- arg_ambiguous if lex_state.is_argument
1541
- self.lex_state = :expr_beg
1542
- src.unread c
1543
- if c =~ /\d/ then
1544
- c = '+'
1545
- return parse_number(c)
1546
- end
1547
- self.yacc_value = t("+")
1548
- return :tUPLUS
1549
- end
1550
- self.lex_state = :expr_beg
1551
- src.unread c
1552
- self.yacc_value = t("+")
1553
- return :tPLUS
1554
- when '-' then
1555
- c = src.read
1556
- if lex_state == :expr_fname || lex_state == :expr_dot then
1557
- self.lex_state = :expr_arg
1558
- if c == '@' then
1559
- self.yacc_value = t("-@")
1560
- return :tUMINUS
1561
- end
1562
- src.unread c
1563
- self.yacc_value = t("-")
1564
- return :tMINUS
1565
- end
1566
- if c == '=' then
1567
- self.lex_state = :expr_beg
1568
- self.yacc_value = t("-")
1569
- return :tOP_ASGN
1570
- end
1571
- if (lex_state == :expr_beg || lex_state == :expr_mid ||
1572
- (lex_state.is_argument && space_seen && c !~ /\s/)) then
1573
- arg_ambiguous if lex_state.is_argument
1574
- self.lex_state = :expr_beg
1575
- src.unread c
1576
- self.yacc_value = t("-")
1577
- if c =~ /\d/ then
1578
- return :tUMINUS_NUM
1579
- end
1580
- return :tUMINUS
1581
- end
1582
- self.lex_state = :expr_beg
1583
- src.unread c
1584
- self.yacc_value = t("-")
1585
- return :tMINUS
1586
- when '.' then
1587
- self.lex_state = :expr_beg
1588
- if (c = src.read) == '.' then
1589
- if (c = src.read) == '.' then
1590
- self.yacc_value = t("...")
1591
- return :tDOT3
1592
- end
1593
- src.unread c
1594
- self.yacc_value = t("..")
1595
- return :tDOT2
1596
- end
1597
- src.unread c
1598
- if c =~ /\d/ then
1599
- raise SyntaxError, "no .<digit> floating literal anymore put 0 before dot"
1600
- end
1601
- self.lex_state = :expr_dot
1602
- self.yacc_value = t(".")
1603
- return :tDOT
1604
- when /[0-9]/ then
1605
- return parse_number(c)
1606
- when ')' then # REFACTOR: omg this is lame... next 3 are all the same
1607
- cond.lexpop
1608
- cmdarg.lexpop
1609
- self.lex_state = :expr_end
1610
- self.yacc_value = t(")")
1611
- return :tRPAREN
1612
- when ']' then
1613
- cond.lexpop
1614
- cmdarg.lexpop
1615
- self.lex_state = :expr_end
1616
- self.yacc_value = t("]")
1617
- return :tRBRACK
1618
- when '}' then
1619
- cond.lexpop
1620
- cmdarg.lexpop
1621
- self.lex_state = :expr_end
1622
- self.yacc_value = t("end")
1623
- return :tRCURLY
1624
- when ':' then
1625
- c = src.read
1626
- if c == ':' then
752
+ elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/o) then
753
+ self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
754
+ self.lex_state = :expr_end
755
+ return :tSTRING
756
+ elsif src.scan(/\"/) then # FALLBACK
757
+ self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\0"] # TODO: question this
758
+ self.yacc_value = "\""
759
+ return :tSTRING_BEG
760
+ elsif src.scan(/\@\@?\w*/) then
761
+ self.token = src.matched
762
+
763
+ rb_compile_error "`#{token}` is not allowed as a variable name" if
764
+ token =~ /\@\d/
765
+
766
+ return process_token(command_state)
767
+ elsif src.scan(/\:\:/) then
1627
768
  if (lex_state == :expr_beg ||
1628
769
  lex_state == :expr_mid ||
1629
770
  lex_state == :expr_class ||
1630
771
  (lex_state.is_argument && space_seen)) then
1631
772
  self.lex_state = :expr_beg
1632
- self.yacc_value = t("::")
773
+ self.yacc_value = "::"
1633
774
  return :tCOLON3
1634
775
  end
1635
776
 
1636
- self.lex_state = :expr_dot
1637
- self.yacc_value = t(":")
1638
- return :tCOLON2
1639
- end
1640
-
1641
- if lex_state == :expr_end || lex_state == :expr_endarg || c =~ /\s/ then
1642
- src.unread c
1643
- self.lex_state = :expr_beg
1644
- self.yacc_value = t(":")
1645
- return ':'
1646
- end
1647
-
1648
- case c
1649
- when "\'" then
1650
- self.lex_strterm = s(:strterm, STR_SSYM, c, "\0")
1651
- when '"' then
1652
- self.lex_strterm = s(:strterm, STR_DSYM, c, "\0")
1653
- else
1654
- src.unread c
1655
- end
1656
-
1657
- self.lex_state = :expr_fname
1658
- self.yacc_value = t(":")
1659
- return :tSYMBEG
1660
- when '/' then
1661
- if lex_state == :expr_beg || lex_state == :expr_mid then
1662
- self.lex_strterm = s(:strterm, STR_REGEXP, '/', "\0")
1663
- self.yacc_value = t("/")
1664
- return :tREGEXP_BEG
1665
- end
1666
-
1667
- if (c = src.read) == '=' then
1668
- self.yacc_value = t("/")
1669
- self.lex_state = :expr_beg
1670
- return :tOP_ASGN
1671
- end
1672
-
1673
- src.unread c
1674
-
1675
- if lex_state.is_argument && space_seen then
1676
- unless c =~ /\s/ then
1677
- arg_ambiguous
1678
- self.lex_strterm = s(:strterm, STR_REGEXP, '/', "\0")
1679
- self.yacc_value = t("/")
1680
- return :tREGEXP_BEG
1681
- end
1682
- end
1683
-
1684
- self.lex_state = if (lex_state == :expr_fname ||
1685
- lex_state == :expr_dot) then
1686
- :expr_arg
1687
- else
1688
- :expr_beg
1689
- end
1690
-
1691
- self.yacc_value = t("/")
1692
- return :tDIVIDE
1693
- when '^' then
1694
- if (c = src.read) == '=' then
1695
- self.lex_state = :expr_beg
1696
- self.yacc_value = t("^")
1697
- return :tOP_ASGN
1698
- end
1699
- if lex_state == :expr_fname || self.lex_state == :expr_dot then
1700
- self.lex_state = :expr_arg
1701
- else
1702
- self.lex_state = :expr_beg
1703
- end
1704
- src.unread c
1705
- self.yacc_value = t("^")
1706
- return :tCARET
1707
- when ';' then
1708
- self.command_start = true
1709
- self.lex_state = :expr_beg
1710
- self.yacc_value = t(";")
1711
- return c
1712
- when ',' then
1713
- self.lex_state = :expr_beg
1714
- self.yacc_value = t(",")
1715
- return c
1716
- when '~' then
1717
- if lex_state == :expr_fname || lex_state == :expr_dot then
1718
- if (c = src.read) != '@' then
1719
- src.unread c
1720
- end
1721
- end
1722
- if lex_state == :expr_fname || lex_state == :expr_dot then
1723
- self.lex_state = :expr_arg
1724
- else
1725
- self.lex_state = :expr_beg
1726
- end
1727
- self.yacc_value = t("~")
1728
- return :tTILDE
1729
- when '(' then
1730
- c = :tLPAREN2
1731
- self.command_start = true
1732
- if lex_state == :expr_beg || lex_state == :expr_mid then
1733
- c = :tLPAREN
1734
- elsif space_seen then
1735
- if lex_state == :expr_cmdarg then
1736
- c = :tLPAREN_ARG
1737
- elsif lex_state == :expr_arg then
1738
- warning("don't put space before argument parentheses")
1739
- c = :tLPAREN2
1740
- end
1741
- end
1742
- cond.push false
1743
- cmdarg.push false
1744
- self.lex_state = :expr_beg
1745
- self.yacc_value = t("(")
1746
- return c
1747
- when '[' then
1748
- if lex_state == :expr_fname || lex_state == :expr_dot then
1749
- self.lex_state = :expr_arg
1750
- if (c = src.read) == ']' then
1751
- if src.peek('=') then
1752
- c = src.read
1753
- self.yacc_value = t("[]=")
1754
- return :tASET
1755
- end
1756
- self.yacc_value = t("[]")
1757
- return :tAREF
1758
- end
1759
- src.unread c
1760
- self.yacc_value = t("[")
1761
- return '['
1762
- elsif lex_state == :expr_beg || lex_state == :expr_mid then
1763
- c = :tLBRACK
1764
- elsif lex_state.is_argument && space_seen then
1765
- c = :tLBRACK
1766
- end
1767
- self.lex_state = :expr_beg
1768
- cond.push false
1769
- cmdarg.push false
1770
- self.yacc_value = t("[")
1771
- return c
1772
- when '{' then
1773
- c = :tLCURLY
1774
-
1775
- if lex_state.is_argument || lex_state == :expr_end then
1776
- c = :tLCURLY # block (primary)
1777
- elsif lex_state == :expr_endarg then
1778
- c = :tLBRACE_ARG # block (expr)
1779
- else
1780
- c = :tLBRACE # hash
1781
- end
1782
- cond.push false
1783
- cmdarg.push false
1784
- self.lex_state = :expr_beg
1785
- self.yacc_value = t("{")
1786
- return c
1787
- when "\\" then
1788
- c = src.read
1789
- if c == "\n" then
1790
- space_seen = true
1791
- next # skip \\n
1792
- end
1793
- src.unread c
1794
- self.yacc_value = t("\\")
1795
- return "\\"
1796
- when '%' then
1797
- if lex_state == :expr_beg || lex_state == :expr_mid then
1798
- return parse_quote(src.read)
1799
- end
1800
-
1801
- c = src.read
1802
- if c == '=' then
1803
- self.lex_state = :expr_beg
1804
- self.yacc_value = t("%")
1805
- return :tOP_ASGN
1806
- end
1807
-
1808
- return parse_quote(c) if lex_state.is_argument && space_seen && c !~ /\s/
1809
-
1810
- self.lex_state = case lex_state
1811
- when :expr_fname, :expr_dot then
1812
- :expr_arg
1813
- else
1814
- :expr_beg
1815
- end
1816
-
1817
- src.unread c
1818
- self.yacc_value = t("%")
1819
-
1820
- return :tPERCENT
1821
- when '$' then
1822
- last_state = lex_state
1823
- self.lex_state = :expr_end
1824
- token_buffer.clear
1825
- c = src.read
1826
- case c
1827
- when '_' then # $_: last read line string
1828
- c = src.read
1829
-
1830
- token_buffer << '$'
1831
- token_buffer << '_'
1832
-
1833
- unless c =~ /\w/ then
1834
- src.unread c
1835
- self.yacc_value = t(token_buffer.join)
1836
- return :tGVAR
1837
- end
1838
- when /[~*$?!@\/\\;,.=:<>\"]/ then
1839
- token_buffer << '$'
1840
- token_buffer << c
1841
- self.yacc_value = t(token_buffer.join)
1842
- return :tGVAR
1843
- when '-' then
1844
- token_buffer << '$'
1845
- token_buffer << c
1846
- c = src.read
1847
- if c =~ /\w/ then
1848
- token_buffer << c
1849
- else
1850
- src.unread c
1851
- end
1852
- self.yacc_value = t(token_buffer.join)
1853
- # xxx shouldn't check if valid option variable
1854
- return :tGVAR
1855
- when /[\&\`\'\+]/ then
1856
- # Explicit reference to these vars as symbols...
1857
- if last_state == :expr_fname then
1858
- token_buffer << '$'
1859
- token_buffer << c
1860
- self.yacc_value = t(token_buffer.join)
1861
- return :tGVAR
1862
- end
1863
-
1864
- self.yacc_value = s(:back_ref, c.to_sym)
1865
- return :tBACK_REF
1866
- when /[1-9]/ then
1867
- token_buffer << '$'
1868
- begin
1869
- token_buffer << c
1870
- c = src.read
1871
- end while c =~ /\d/
1872
- src.unread c
1873
- if last_state == :expr_fname then
1874
- self.yacc_value = t(token_buffer.join)
1875
- return :tGVAR
1876
- else
1877
- self.yacc_value = s(:nth_ref, token_buffer.join[1..-1].to_i)
1878
- return :tNTH_REF
1879
- end
1880
- when '0' then
1881
- token_buffer << '$'
1882
- else
1883
- unless c =~ /\w/ then
1884
- src.unread c
1885
- self.yacc_value = t("$")
1886
- return '$'
1887
- end
1888
- token_buffer << '$'
1889
- end
1890
- when '@' then
1891
- c = src.read
1892
- token_buffer.clear
1893
- token_buffer << '@'
1894
- if c == '@' then
1895
- token_buffer << '@'
1896
- c = src.read
1897
- end
1898
- if c =~ /\d/ then
1899
- if token_buffer.length == 1 then
1900
- raise SyntaxError, "`@" + c + "' is not allowed as an instance variable name"
1901
- else
1902
- raise SyntaxError, "`@@" + c + "' is not allowed as a class variable name"
1903
- end
1904
- end
1905
- unless c =~ /\w/ then
1906
- src.unread c
1907
- self.yacc_value = t("@")
1908
- return '@'
1909
- end
1910
- when '_' then
1911
- if src.was_begin_of_line && src.match_string("_END__\n", false) then
1912
- self.end_seen = true
1913
- return RubyLexer::EOF
1914
- end
1915
- token_buffer.clear
1916
- else
1917
- unless c =~ /\w/ then
1918
- raise SyntaxError, "Invalid char '#{c.inspect}' in expression"
1919
- end
1920
- token_buffer.clear
1921
- end
1922
-
1923
- begin
1924
- token_buffer << c
1925
- # if ismbchar(c) then
1926
- # len = mbclen(c) - 1
1927
- # (0..len).each do
1928
- # c = src.read;
1929
- # token_buffer << c
1930
- # end
1931
- # end
1932
- c = src.read
1933
- end while c =~ /\w/
1934
-
1935
- if c =~ /\!|\?/ && token_buffer[0] =~ /\w/ && src.peek != '=' then
1936
- token_buffer << c
1937
- else
1938
- src.unread c
1939
- end
1940
-
1941
- result = nil
1942
- last_state = lex_state
1943
-
1944
- case token_buffer[0]
1945
- when '$' then
1946
- self.lex_state = :expr_end
1947
- result = :tGVAR
1948
- when '@' then
1949
- self.lex_state = :expr_end
1950
- if token_buffer[1] == '@' then
1951
- result = :tCVAR
1952
- else
1953
- result = :tIVAR
1954
- end
1955
- else
1956
- if token_buffer[-1] =~ /[!?]/ then
1957
- result = :tFID
1958
- else
1959
- if lex_state == :expr_fname then
1960
- if (c = src.read) == '=' then
1961
- c2 = src.read
1962
-
1963
- if c2 != '~' && c2 != '>' && (c2 != '=' || (c2 == "\n" && src.peek('>'))) then
1964
- result = :tIDENTIFIER
1965
- token_buffer << c
1966
- src.unread c2
1967
- else
1968
- src.unread c2
1969
- src.unread c
1970
- end
1971
- else
1972
- src.unread c
1973
- end
1974
- end
1975
- if result.nil? && token_buffer[0] =~ /[A-Z]/ then
1976
- result = :tCONSTANT
1977
- else
1978
- result = :tIDENTIFIER
1979
- end
1980
- end
1981
-
1982
- unless lex_state == :expr_dot then
1983
- # See if it is a reserved word.
1984
- keyword = Keyword.keyword(token_buffer.join, token_buffer.length)
1985
-
1986
- unless keyword.nil? then
1987
- state = lex_state
1988
- self.lex_state = keyword.state
1989
-
1990
- if state == :expr_fname then
1991
- self.yacc_value = t(keyword.name)
1992
- else
1993
- self.yacc_value = t(token_buffer.join)
1994
- end
1995
-
1996
- if keyword.id0 == :kDO then
1997
- self.command_start = true
1998
- return :kDO_COND if cond.is_in_state
1999
- return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
2000
- return :kDO_BLOCK if state == :expr_endarg
2001
- return :kDO
2002
- end
2003
-
2004
- return keyword.id0 if state == :expr_beg
2005
-
2006
- self.lex_state = :expr_beg unless keyword.id0 == keyword.id1
2007
-
2008
- return keyword.id1
2009
- end
2010
- end
2011
-
2012
- if (lex_state == :expr_beg ||
2013
- lex_state == :expr_mid ||
2014
- lex_state == :expr_dot ||
2015
- lex_state == :expr_arg ||
2016
- lex_state == :expr_cmdarg) then
2017
- if command_state then
2018
- self.lex_state = :expr_cmdarg
2019
- else
2020
- self.lex_state = :expr_arg
2021
- end
2022
- else
2023
- self.lex_state = :expr_end
2024
- end
2025
- end
2026
-
2027
-
2028
- temp_val = token_buffer.join
2029
-
2030
- # Lame: parsing logic made it into lexer in ruby...So we
2031
- # are emulating
2032
- # FIXME: I believe this is much simpler now...
2033
- # HACK
2034
- # scope = parser_support.current_scope
2035
- # if (IdUtil.var_type(temp_val) == IdUtil.LOCAL_VAR &&
2036
- # last_state != :expr_dot &&
2037
- # (BlockStaticScope === scope && (scope.is_defined(temp_val) >= 0)) ||
2038
- # (scope.local_scope.is_defined(temp_val) >= 0)) then
2039
- # self.lex_state = :expr_end
2040
- # end
2041
-
2042
- self.yacc_value = t(temp_val)
2043
-
2044
- return result
2045
- end
2046
- end
2047
-
2048
- ##
2049
- # Parse a number from the input stream.
2050
- #
2051
- # @param c The first character of the number.
2052
- # @return A int constant wich represents a token.
2053
-
2054
- def parse_number c
2055
- self.lex_state = :expr_end
2056
-
2057
- token_buffer.clear
2058
-
2059
- if c == '-' then
2060
- token_buffer << c
2061
- c = src.read
2062
- elsif c == '+' then
2063
- # We don't append '+' since Java number parser gets confused FIX
2064
- c = src.read
2065
- end
2066
-
2067
- nondigit = "\0"
777
+ self.lex_state = :expr_dot
778
+ self.yacc_value = "::"
779
+ return :tCOLON2
780
+ elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then
781
+ self.yacc_value = src[1]
782
+ self.lex_state = :expr_end
783
+ return :tSYMBOL
784
+ elsif src.scan(/\:/) then
785
+ # ?: / then / when
786
+ if (lex_state == :expr_end || lex_state == :expr_endarg||
787
+ src.check(/\s/)) then
788
+ self.lex_state = :expr_beg
789
+ self.yacc_value = ":"
790
+ return :tCOLON
791
+ end
2068
792
 
2069
- if c == '0' then
2070
- start_len = token_buffer.length
2071
- c = src.read
793
+ case
794
+ when src.scan(/\'/) then
795
+ self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\0"]
796
+ when src.scan(/\"/) then
797
+ self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\0"]
798
+ end
2072
799
 
2073
- case c
2074
- when /x/i then # hexadecimal
2075
- c = src.read
800
+ self.lex_state = :expr_fname
801
+ self.yacc_value = ":"
802
+ return :tSYMBEG
803
+ elsif src.check(/[0-9]/) then
804
+ return parse_number
805
+ elsif src.scan(/\[/) then
806
+ result = src.matched
2076
807
 
2077
- if c =~ /[a-f0-9]/i then
2078
- loop do
2079
- if c == '_' then
2080
- break unless nondigit == "\0"
2081
- nondigit = c
2082
- elsif c =~ /[a-f0-9]/i then
2083
- nondigit = "\0"
2084
- token_buffer << c
808
+ if lex_state == :expr_fname || lex_state == :expr_dot then
809
+ self.lex_state = :expr_arg
810
+ case
811
+ when src.scan(/\]\=/) then
812
+ self.yacc_value = "[]="
813
+ return :tASET
814
+ when src.scan(/\]/) then
815
+ self.yacc_value = "[]"
816
+ return :tAREF
2085
817
  else
2086
- break
818
+ rb_compile_error "unexpected '['"
2087
819
  end
2088
- c = src.read
820
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
821
+ result = :tLBRACK
822
+ elsif lex_state.is_argument && space_seen then
823
+ result = :tLBRACK
2089
824
  end
2090
- end
2091
825
 
2092
- src.unread c
826
+ self.expr_beg_push "["
2093
827
 
2094
- if token_buffer.length == start_len then
2095
- raise SyntaxError, "Hexadecimal number without hex-digits."
2096
- elsif nondigit != "\0" then
2097
- raise SyntaxError, "Trailing '_' in number."
2098
- end
2099
- self.yacc_value = token_buffer.join.to_i(16)
2100
- return :tINTEGER
2101
- when /b/i # binary
2102
- c = src.read
2103
- if c == '0' or c == '1' then
2104
- loop do
2105
- if c == '_' then
2106
- break if nondigit != "\0"
2107
- nondigit = c
2108
- elsif c == '0' or c == '1' then
2109
- nondigit = "\0"
2110
- token_buffer << c
828
+ return result
829
+ elsif src.scan(/\'(\\.|[^\'])*\'/) then
830
+ self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
831
+ self.lex_state = :expr_end
832
+ return :tSTRING
833
+ elsif src.check(/\|/) then
834
+ if src.scan(/\|\|\=/) then
835
+ self.lex_state = :expr_beg
836
+ self.yacc_value = "||"
837
+ return :tOP_ASGN
838
+ elsif src.scan(/\|\|/) then
839
+ self.lex_state = :expr_beg
840
+ self.yacc_value = "||"
841
+ return :tOROP
842
+ elsif src.scan(/\|\=/) then
843
+ self.lex_state = :expr_beg
844
+ self.yacc_value = "|"
845
+ return :tOP_ASGN
846
+ elsif src.scan(/\|/) then
847
+ self.fix_arg_lex_state
848
+ self.yacc_value = "|"
849
+ return :tPIPE
850
+ end
851
+ elsif src.scan(/\{/) then
852
+ result = if lex_state.is_argument || lex_state == :expr_end then
853
+ :tLCURLY # block (primary)
854
+ elsif lex_state == :expr_endarg then
855
+ :tLBRACE_ARG # block (expr)
856
+ else
857
+ :tLBRACE # hash
858
+ end
859
+
860
+ self.expr_beg_push "{"
861
+
862
+ return result
863
+ elsif src.scan(/[+-]/) then
864
+ sign = src.matched
865
+ utype, type = if sign == "+" then
866
+ [:tUPLUS, :tPLUS]
867
+ else
868
+ [:tUMINUS, :tMINUS]
869
+ end
870
+
871
+ if lex_state == :expr_fname || lex_state == :expr_dot then
872
+ self.lex_state = :expr_arg
873
+ if src.scan(/@/) then
874
+ self.yacc_value = "#{sign}@"
875
+ return utype
2111
876
  else
2112
- break
877
+ self.yacc_value = sign
878
+ return type
2113
879
  end
2114
- c = src.read
2115
880
  end
2116
- end
2117
881
 
2118
- src.unread c
882
+ if src.scan(/\=/) then
883
+ self.lex_state = :expr_beg
884
+ self.yacc_value = sign
885
+ return :tOP_ASGN
886
+ end
2119
887
 
2120
- if token_buffer.length == start_len then
2121
- raise SyntaxError, "Binary number without digits."
2122
- elsif nondigit != "\0" then
2123
- raise SyntaxError, "Trailing '_' in number."
2124
- end
2125
- self.yacc_value = token_buffer.join.to_i(2)
2126
- return :tINTEGER
2127
- when /d/i then # decimal
2128
- c = src.read
2129
- if c =~ /\d/ then
2130
- loop do
2131
- if c == '_' then
2132
- break if nondigit != "\0"
2133
- nondigit = c
2134
- elsif c =~ /\d/ then
2135
- nondigit = "\0"
2136
- token_buffer << c
2137
- else
2138
- break
888
+ if (lex_state == :expr_beg || lex_state == :expr_mid ||
889
+ (lex_state.is_argument && space_seen && !src.check(/\s/))) then
890
+ if lex_state.is_argument then
891
+ arg_ambiguous
2139
892
  end
2140
- c = src.read
2141
- end
2142
- end
2143
893
 
2144
- src.unread c
894
+ self.lex_state = :expr_beg
895
+ self.yacc_value = sign
2145
896
 
2146
- if token_buffer.length == start_len then
2147
- raise SyntaxError, "Binary number without digits."
2148
- elsif nondigit != "\0" then
2149
- raise SyntaxError, "Trailing '_' in number."
2150
- end
897
+ if src.check(/\d/) then
898
+ if utype == :tUPLUS then
899
+ return self.parse_number
900
+ else
901
+ return :tUMINUS_NUM
902
+ end
903
+ end
2151
904
 
2152
- self.yacc_value = token_buffer.join.to_i(10)
2153
- return :tINTEGER
2154
- when /o/i, /[0-7_]/ then # octal
2155
- c = src.read if c =~ /o/i # prefixed octal - kill me
2156
- loop do
2157
- if c == '_' then
2158
- break if (nondigit != "\0")
2159
- nondigit = c
2160
- elsif c >= '0' && c <= '7' then
2161
- nondigit = "\0"
2162
- token_buffer << c
2163
- else
2164
- break
905
+ return utype
2165
906
  end
2166
- c = src.read
2167
- end
2168
- if token_buffer.length > start_len then
2169
- src.unread c
2170
907
 
2171
- if nondigit != "\0" then
2172
- raise SyntaxError, "Trailing '_' in number."
908
+ self.lex_state = :expr_beg
909
+ self.yacc_value = sign
910
+ return type
911
+ elsif src.check(/\*/) then
912
+ if src.scan(/\*\*=/) then
913
+ self.lex_state = :expr_beg
914
+ self.yacc_value = "**"
915
+ return :tOP_ASGN
916
+ elsif src.scan(/\*\*/) then
917
+ self.yacc_value = "**"
918
+ self.fix_arg_lex_state
919
+ return :tPOW
920
+ elsif src.scan(/\*\=/) then
921
+ self.lex_state = :expr_beg
922
+ self.yacc_value = "*"
923
+ return :tOP_ASGN
924
+ elsif src.scan(/\*/) then
925
+ result = if lex_state.is_argument && space_seen && src.check(/\S/) then
926
+ warning("`*' interpreted as argument prefix")
927
+ :tSTAR
928
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
929
+ :tSTAR
930
+ else
931
+ :tSTAR2
932
+ end
933
+ self.yacc_value = "*"
934
+ self.fix_arg_lex_state
935
+
936
+ return result
2173
937
  end
2174
-
2175
- self.yacc_value = token_buffer.join.to_i(8)
2176
- return :tINTEGER
2177
- end
2178
- when /[89]/ then
2179
- raise SyntaxError, "Illegal octal digit."
2180
- when /[\.eE]/ then
2181
- token_buffer << '0'
2182
- else
2183
- src.unread c
2184
- self.yacc_value = 0
2185
- return :tINTEGER
2186
- end
2187
- end
2188
-
2189
- seen_point = false
2190
- seen_e = false
2191
-
2192
- loop do
2193
- case c
2194
- when /\d/ then
2195
- nondigit = "\0"
2196
- token_buffer << c
2197
- when '.' then
2198
- if nondigit != "\0" then
2199
- src.unread c
2200
- raise SyntaxError, "Trailing '_' in number."
2201
- elsif seen_point or seen_e then
2202
- src.unread c
2203
- return number_token(token_buffer.join, true, nondigit)
2204
- else
2205
- c2 = src.read
2206
- unless c2 =~ /\d/ then
2207
- src.unread c2
2208
- src.unread '.'
2209
- if c == '_' then
2210
- # Enebo: c can never be antrhign but '.'
2211
- # Why did I put this here?
2212
- else
2213
- self.yacc_value = token_buffer.join.to_i(10)
2214
- return :tINTEGER
938
+ elsif src.check(/\!/) then
939
+ if src.scan(/\!\=/) then
940
+ self.lex_state = :expr_beg
941
+ self.yacc_value = "!="
942
+ return :tNEQ
943
+ elsif src.scan(/\!~/) then
944
+ self.lex_state = :expr_beg
945
+ self.yacc_value = "!~"
946
+ return :tNMATCH
947
+ elsif src.scan(/\!/) then
948
+ self.lex_state = :expr_beg
949
+ self.yacc_value = "!"
950
+ return :tBANG
951
+ end
952
+ elsif src.check(/\</) then
953
+ if src.scan(/\<\=\>/) then
954
+ self.fix_arg_lex_state
955
+ self.yacc_value = "<=>"
956
+ return :tCMP
957
+ elsif src.scan(/\<\=/) then
958
+ self.fix_arg_lex_state
959
+ self.yacc_value = "<="
960
+ return :tLEQ
961
+ elsif src.scan(/\<\<\=/) then
962
+ self.fix_arg_lex_state
963
+ self.lex_state = :expr_beg
964
+ self.yacc_value = "\<\<"
965
+ return :tOP_ASGN
966
+ elsif src.scan(/\<\</) then
967
+ if (! [:expr_end, :expr_dot,
968
+ :expr_endarg, :expr_class].include?(lex_state) &&
969
+ (!lex_state.is_argument || space_seen)) then
970
+ tok = self.heredoc_identifier
971
+ if tok then
972
+ return tok
973
+ end
2215
974
  end
2216
- else
2217
- token_buffer << '.'
2218
- token_buffer << c2
2219
- seen_point = true
2220
- nondigit = "\0"
975
+
976
+ self.fix_arg_lex_state
977
+ self.yacc_value = "\<\<"
978
+ return :tLSHFT
979
+ elsif src.scan(/\</) then
980
+ self.fix_arg_lex_state
981
+ self.yacc_value = "<"
982
+ return :tLT
2221
983
  end
2222
- end
2223
- when /e/i then
2224
- if nondigit != "\0" then
2225
- raise SyntaxError, "Trailing '_' in number."
2226
- elsif seen_e then
2227
- src.unread c
2228
- return number_token(token_buffer.join, true, nondigit)
2229
- else
2230
- token_buffer << c
2231
- seen_e = true
2232
- nondigit = c
2233
- c = src.read
2234
- if c == '-' or c == '+' then
2235
- token_buffer << c
2236
- nondigit = c
2237
- else
2238
- src.unread c
984
+ elsif src.check(/\>/) then
985
+ if src.scan(/\>\=/) then
986
+ self.fix_arg_lex_state
987
+ self.yacc_value = ">="
988
+ return :tGEQ
989
+ elsif src.scan(/\>\>=/) then
990
+ self.fix_arg_lex_state
991
+ self.lex_state = :expr_beg
992
+ self.yacc_value = ">>"
993
+ return :tOP_ASGN
994
+ elsif src.scan(/\>\>/) then
995
+ self.fix_arg_lex_state
996
+ self.yacc_value = ">>"
997
+ return :tRSHFT
998
+ elsif src.scan(/\>/) then
999
+ self.fix_arg_lex_state
1000
+ self.yacc_value = ">"
1001
+ return :tGT
1002
+ end
1003
+ elsif src.scan(/\`/) then
1004
+ self.yacc_value = "`"
1005
+ case lex_state
1006
+ when :expr_fname then
1007
+ self.lex_state = :expr_end
1008
+ return :tBACK_REF2
1009
+ when :expr_dot then
1010
+ self.lex_state = if command_state then
1011
+ :expr_cmdarg
1012
+ else
1013
+ :expr_arg
1014
+ end
1015
+ return :tBACK_REF2
1016
+ end
1017
+ self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"]
1018
+ return :tXSTRING_BEG
1019
+ elsif src.scan(/\?/) then
1020
+ if lex_state == :expr_end || lex_state == :expr_endarg then
1021
+ self.lex_state = :expr_beg
1022
+ self.yacc_value = "?"
1023
+ return :tEH
2239
1024
  end
2240
- end
2241
- when '_' then # '_' in number just ignored
2242
- if nondigit != "\0" then
2243
- raise SyntaxError, "Trailing '_' in number."
2244
- end
2245
- nondigit = c
2246
- else
2247
- src.unread c
2248
- r = number_token(token_buffer.join, seen_e || seen_point, nondigit)
2249
- return r
2250
- end
2251
- c = src.read
2252
- end
2253
- end
2254
-
2255
- # TODO: remove me
2256
- def number_token(number, is_float, nondigit)
2257
- if nondigit != "\0" then
2258
- raise SyntaxError, "Trailing '_' in number."
2259
- end
2260
-
2261
- if is_float then
2262
- self.yacc_value = number.to_f
2263
- return :tFLOAT
2264
- end
2265
-
2266
- self.yacc_value = number.to_i
2267
- return :tINTEGER
2268
- end
2269
-
2270
- ############################################################
2271
- # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
2272
-
2273
- def tokadd s # HACK
2274
- self.token_buffer << s
2275
- end
2276
-
2277
- def warning s
2278
- # do nothing for now
2279
- end
2280
-
2281
- def rb_compile_error msg
2282
- raise msg
2283
- end
2284
-
2285
- def is_next_identchar # TODO: ?
2286
- c = src.read
2287
- src.unread c
2288
1025
 
2289
- return c != RubyLexer::EOF && c =~ /\w/
2290
- end
1026
+ if src.eos? then
1027
+ rb_compile_error "incomplete character syntax"
1028
+ end
2291
1029
 
2292
- def is_next_no_case(s) # FIX: replace this whole thing with something clean
2293
- buf = []
2294
- old_pos = src.pos
1030
+ if src.check(/\s|\v/) then
1031
+ unless lex_state.is_argument then
1032
+ c2 = { " " => 's',
1033
+ "\n" => 'n',
1034
+ "\t" => 't',
1035
+ "\v" => 'v',
1036
+ "\r" => 'r',
1037
+ "\f" => 'f' }[src.matched]
1038
+
1039
+ if c2 then
1040
+ warning("invalid character syntax; use ?\\" + c2)
1041
+ end
1042
+ end
2295
1043
 
2296
- s.each_byte do |b|
2297
- c = b.chr
2298
- r = src.read
2299
- buf << r
1044
+ # ternary
1045
+ self.lex_state = :expr_beg
1046
+ self.yacc_value = "?"
1047
+ return :tEH
1048
+ elsif src.check(/\w(?=\w)/) then # ternary, also
1049
+ self.lex_state = :expr_beg
1050
+ self.yacc_value = "?"
1051
+ return :tEH
1052
+ end
2300
1053
 
2301
- if c.downcase != r.downcase then
2302
- src.pos = old_pos
2303
- return nil
2304
- end
2305
- end
1054
+ c = if src.scan(/\\/) then
1055
+ self.read_escape
1056
+ else
1057
+ src.getch
1058
+ end
1059
+ self.lex_state = :expr_end
1060
+ self.yacc_value = c[0].ord & 0xff
1061
+ return :tINTEGER
1062
+ elsif src.check(/\&/) then
1063
+ if src.scan(/\&\&\=/) then
1064
+ self.yacc_value = "&&"
1065
+ self.lex_state = :expr_beg
1066
+ return :tOP_ASGN
1067
+ elsif src.scan(/\&\&/) then
1068
+ self.lex_state = :expr_beg
1069
+ self.yacc_value = "&&"
1070
+ return :tANDOP
1071
+ elsif src.scan(/\&\=/) then
1072
+ self.yacc_value = "&"
1073
+ self.lex_state = :expr_beg
1074
+ return :tOP_ASGN
1075
+ elsif src.scan(/&/) then
1076
+ result = if lex_state.is_argument && space_seen &&
1077
+ !src.check(/\s/) then
1078
+ warning("`&' interpreted as argument prefix")
1079
+ :tAMPER
1080
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
1081
+ :tAMPER
1082
+ else
1083
+ :tAMPER2
1084
+ end
1085
+
1086
+ self.fix_arg_lex_state
1087
+ self.yacc_value = "&"
1088
+ return result
1089
+ end
1090
+ elsif src.scan(/\//) then
1091
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1092
+ self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1093
+ self.yacc_value = "/"
1094
+ return :tREGEXP_BEG
1095
+ end
2306
1096
 
2307
- return buf.join
2308
- end
1097
+ if src.scan(/\=/) then
1098
+ self.yacc_value = "/"
1099
+ self.lex_state = :expr_beg
1100
+ return :tOP_ASGN
1101
+ end
2309
1102
 
2310
- kill :is_hex_char, :is_oct_char, :is_identifier_char, :nextc, :pushback
1103
+ if lex_state.is_argument && space_seen then
1104
+ unless src.scan(/\s/) then
1105
+ arg_ambiguous
1106
+ self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1107
+ self.yacc_value = "/"
1108
+ return :tREGEXP_BEG
1109
+ end
1110
+ end
2311
1111
 
2312
- # END HACK
2313
- ############################################################$
1112
+ self.fix_arg_lex_state
1113
+ self.yacc_value = "/"
2314
1114
 
2315
- end
1115
+ return :tDIVIDE
1116
+ elsif src.scan(/\^=/) then
1117
+ self.lex_state = :expr_beg
1118
+ self.yacc_value = "^"
1119
+ return :tOP_ASGN
1120
+ elsif src.scan(/\^/) then
1121
+ self.fix_arg_lex_state
1122
+ self.yacc_value = "^"
1123
+ return :tCARET
1124
+ elsif src.scan(/\;/) then
1125
+ self.command_start = true
1126
+ self.lex_state = :expr_beg
1127
+ self.yacc_value = ";"
1128
+ return :tSEMI
1129
+ elsif src.scan(/\~/) then
1130
+ if lex_state == :expr_fname || lex_state == :expr_dot then
1131
+ src.scan(/@/)
1132
+ end
2316
1133
 
2317
- class Keyword
2318
- class KWtable
2319
- attr_accessor :name, :id, :state
2320
- def initialize(name, id=[], state=nil)
2321
- @name = name
2322
- @id = id
2323
- @state = state
2324
- end
1134
+ self.fix_arg_lex_state
1135
+ self.yacc_value = "~"
2325
1136
 
2326
- def id0
2327
- self.id.first
2328
- end
1137
+ return :tTILDE
1138
+ elsif src.scan(/\\/) then
1139
+ if src.scan(/\n/) then
1140
+ self.lineno = nil
1141
+ space_seen = true
1142
+ next
1143
+ end
1144
+ rb_compile_error "bare backslash only allowed before newline"
1145
+ elsif src.scan(/\%/) then
1146
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1147
+ return parse_quote
1148
+ end
2329
1149
 
2330
- def id1
2331
- self.id.last
2332
- end
2333
- end
1150
+ if src.scan(/\=/) then
1151
+ self.lex_state = :expr_beg
1152
+ self.yacc_value = "%"
1153
+ return :tOP_ASGN
1154
+ end
2334
1155
 
2335
- TOTAL_KEYWORDS = 40
2336
- MIN_WORD_LENGTH = 2
2337
- MAX_WORD_LENGTH = 8
2338
- MIN_HASH_VALUE = 6
2339
- MAX_HASH_VALUE = 55
2340
- # maximum key range = 50, duplicates = 0
2341
-
2342
- def self.hash_keyword(str, len)
2343
- hval = len
2344
-
2345
- asso_values = [
2346
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2347
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2348
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2349
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2350
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2351
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2352
- 56, 56, 56, 11, 56, 56, 36, 56, 1, 37,
2353
- 31, 1, 56, 56, 56, 56, 29, 56, 1, 56,
2354
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2355
- 56, 56, 56, 56, 56, 1, 56, 32, 1, 2,
2356
- 1, 1, 4, 23, 56, 17, 56, 20, 9, 2,
2357
- 9, 26, 14, 56, 5, 1, 1, 16, 56, 21,
2358
- 20, 9, 56, 56, 56, 56, 56, 56, 56, 56,
2359
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2360
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2361
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2362
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2363
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2364
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2365
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2366
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2367
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2368
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2369
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2370
- 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
2371
- 56, 56, 56, 56, 56, 56
2372
- ]
2373
-
2374
- case hval
2375
- when 2, 1 then
2376
- hval += asso_values[str[0]]
2377
- else
2378
- hval += asso_values[str[2]]
2379
- hval += asso_values[str[0]]
2380
- end
1156
+ if lex_state.is_argument && space_seen && ! src.check(/\s/) then
1157
+ return parse_quote
1158
+ end
2381
1159
 
2382
- hval += asso_values[str[len - 1]]
2383
- return hval
2384
- end
1160
+ self.fix_arg_lex_state
1161
+ self.yacc_value = "%"
1162
+
1163
+ return :tPERCENT
1164
+ elsif src.check(/\$/) then
1165
+ if src.scan(/(\$_)(\w+)/) then
1166
+ self.lex_state = :expr_end
1167
+ self.token = src.matched
1168
+ return process_token(command_state)
1169
+ elsif src.scan(/\$_/) then
1170
+ self.lex_state = :expr_end
1171
+ self.token = src.matched
1172
+ self.yacc_value = src.matched
1173
+ return :tGVAR
1174
+ elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1175
+ self.lex_state = :expr_end
1176
+ self.yacc_value = src.matched
1177
+ return :tGVAR
1178
+ elsif src.scan(/\$([\&\`\'\+])/) then
1179
+ self.lex_state = :expr_end
1180
+ # Explicit reference to these vars as symbols...
1181
+ if last_state == :expr_fname then
1182
+ self.yacc_value = src.matched
1183
+ return :tGVAR
1184
+ else
1185
+ self.yacc_value = src[1].to_sym
1186
+ return :tBACK_REF
1187
+ end
1188
+ elsif src.scan(/\$([1-9]\d*)/) then
1189
+ self.lex_state = :expr_end
1190
+ if last_state == :expr_fname then
1191
+ self.yacc_value = src.matched
1192
+ return :tGVAR
1193
+ else
1194
+ self.yacc_value = src[1].to_i
1195
+ return :tNTH_REF
1196
+ end
1197
+ elsif src.scan(/\$0/) then
1198
+ self.lex_state = :expr_end
1199
+ self.token = src.matched
1200
+ return process_token(command_state)
1201
+ elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
1202
+ self.lex_state = :expr_end
1203
+ self.yacc_value = "$"
1204
+ return "$"
1205
+ elsif src.scan(/\$\w+/)
1206
+ self.lex_state = :expr_end
1207
+ self.token = src.matched
1208
+ return process_token(command_state)
1209
+ end
1210
+ elsif src.check(/\_/) then
1211
+ if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
1212
+ self.lineno = nil
1213
+ return RubyLexer::EOF
1214
+ elsif src.scan(/\_\w*/) then
1215
+ self.token = src.matched
1216
+ return process_token(command_state)
1217
+ end
1218
+ end
1219
+ end # END OF CASE
2385
1220
 
2386
- ##
2387
- # :expr_beg = ignore newline, +/- is a sign.
2388
- # :expr_end = newline significant, +/- is a operator.
2389
- # :expr_arg = newline significant, +/- is a operator.
2390
- # :expr_cmdarg = newline significant, +/- is a operator.
2391
- # :expr_endarg = newline significant, +/- is a operator.
2392
- # :expr_mid = newline significant, +/- is a operator.
2393
- # :expr_fname = ignore newline, no reserved words.
2394
- # :expr_dot = right after . or ::, no reserved words.
2395
- # :expr_class = immediate after class, no here document.
2396
-
2397
- def self.keyword(str, len = str.size)
2398
- wordlist = [
2399
- [""], [""], [""], [""], [""], [""],
2400
- ["end", [:kEND, :kEND ], :expr_end ],
2401
- ["else", [:kELSE, :kELSE ], :expr_beg ],
2402
- ["case", [:kCASE, :kCASE ], :expr_beg ],
2403
- ["ensure", [:kENSURE, :kENSURE ], :expr_beg ],
2404
- ["module", [:kMODULE, :kMODULE ], :expr_beg ],
2405
- ["elsif", [:kELSIF, :kELSIF ], :expr_beg ],
2406
- ["def", [:kDEF, :kDEF ], :expr_fname ],
2407
- ["rescue", [:kRESCUE, :kRESCUE_MOD ], :expr_mid ],
2408
- ["not", [:kNOT, :kNOT ], :expr_beg ],
2409
- ["then", [:kTHEN, :kTHEN ], :expr_beg ],
2410
- ["yield", [:kYIELD, :kYIELD ], :expr_arg ],
2411
- ["for", [:kFOR, :kFOR ], :expr_beg ],
2412
- ["self", [:kSELF, :kSELF ], :expr_end ],
2413
- ["false", [:kFALSE, :kFALSE ], :expr_end ],
2414
- ["retry", [:kRETRY, :kRETRY ], :expr_end ],
2415
- ["return", [:kRETURN, :kRETURN ], :expr_mid ],
2416
- ["true", [:kTRUE, :kTRUE ], :expr_end ],
2417
- ["if", [:kIF, :kIF_MOD ], :expr_beg ],
2418
- ["defined?", [:kDEFINED, :kDEFINED ], :expr_arg ],
2419
- ["super", [:kSUPER, :kSUPER ], :expr_arg ],
2420
- ["undef", [:kUNDEF, :kUNDEF ], :expr_fname ],
2421
- ["break", [:kBREAK, :kBREAK ], :expr_mid ],
2422
- ["in", [:kIN, :kIN ], :expr_beg ],
2423
- ["do", [:kDO, :kDO ], :expr_beg ],
2424
- ["nil", [:kNIL, :kNIL ], :expr_end ],
2425
- ["until", [:kUNTIL, :kUNTIL_MOD ], :expr_beg ],
2426
- ["unless", [:kUNLESS, :kUNLESS_MOD ], :expr_beg ],
2427
- ["or", [:kOR, :kOR ], :expr_beg ],
2428
- ["next", [:kNEXT, :kNEXT ], :expr_mid ],
2429
- ["when", [:kWHEN, :kWHEN ], :expr_beg ],
2430
- ["redo", [:kREDO, :kREDO ], :expr_end ],
2431
- ["and", [:kAND, :kAND ], :expr_beg ],
2432
- ["begin", [:kBEGIN, :kBEGIN ], :expr_beg ],
2433
- ["__LINE__", [:k__LINE__, :k__LINE__ ], :expr_end ],
2434
- ["class", [:kCLASS, :kCLASS ], :expr_class ],
2435
- ["__FILE__", [:k__FILE__, :k__FILE__ ], :expr_end ],
2436
- ["END", [:klEND, :klEND ], :expr_end ],
2437
- ["BEGIN", [:klBEGIN, :klBEGIN ], :expr_end ],
2438
- ["while", [:kWHILE, :kWHILE_MOD ], :expr_beg ],
2439
- [""], [""], [""], [""], [""], [""], [""], [""], [""],
2440
- [""],
2441
- ["alias", [:kALIAS, :kALIAS ], :expr_fname ],
2442
- ].map { |args| KWtable.new(*args) }
2443
-
2444
- if len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH then
2445
- key = hash_keyword(str, len)
2446
- if key <= MAX_HASH_VALUE && key >= 0 then
2447
- s = wordlist[key].name
2448
- return wordlist[key] if str == s
1221
+ if src.scan(/\004|\032|\000/) || src.eos? then # ^D, ^Z, EOF
1222
+ return RubyLexer::EOF
1223
+ else # alpha check
1224
+ if src.scan(/\W/) then
1225
+ rb_compile_error "Invalid char #{src.matched.inspect} in expression"
1226
+ end
2449
1227
  end
2450
- end
2451
-
2452
- return nil
2453
- end
2454
- end
2455
-
2456
- class Environment
2457
- attr_reader :env, :dyn
2458
- attr_accessor :init
2459
1228
 
2460
- def initialize dyn = false
2461
- @dyn = []
2462
- @env = []
2463
- @use = []
2464
- @init = false
2465
- self.extend
2466
- end
1229
+ self.token = src.matched if self.src.scan(/\w+/)
2467
1230
 
2468
- def use id
2469
- @env.each_with_index do |env, i|
2470
- if env[id] then
2471
- @use[i][id] = true
2472
- end
1231
+ return process_token(command_state)
2473
1232
  end
2474
1233
  end
2475
1234
 
2476
- def used? id
2477
- idx = @dyn.index false # REFACTOR
2478
- u = @use[0...idx].reverse.inject { |env, scope| env.merge scope } || {}
2479
- u[id]
2480
- end
2481
-
2482
- def [] k
2483
- self.all[k]
2484
- end
2485
-
2486
- def []= k, v
2487
- raise "no" if v == true
2488
- self.current[k] = v
2489
- end
2490
-
2491
- def has_key? k
2492
- self.all.has_key? k
2493
- end
2494
-
2495
- def all
2496
- idx = @dyn.index false
2497
- @env[0..idx].reverse.inject { |env, scope| env.merge scope }
2498
- end
2499
-
2500
- def dynamic
2501
- idx = @dyn.index false
2502
- @env[0...idx].reverse.inject { |env, scope| env.merge scope } || {}
2503
- end
2504
-
2505
- def current
2506
- @env.first
2507
- end
2508
-
2509
- def dynamic?
2510
- @dyn[0] != false
2511
- end
2512
-
2513
- def dasgn_curr? name # TODO: I think this is wrong - nuke
2514
- (! has_key?(name) && dynamic?) || current.has_key?(name)
2515
- end
2516
-
2517
- def extend dyn = false
2518
- @dyn.unshift dyn
2519
- @env.unshift({})
2520
- @use.unshift({})
2521
- end
2522
-
2523
- def unextend
2524
- @dyn.shift
2525
- @env.shift
2526
- @use.shift
2527
- raise "You went too far unextending env" if @env.empty?
2528
- end
2529
- end
2530
-
2531
- class StackState
2532
- attr_reader :stack
2533
-
2534
- def inspect
2535
- "StackState(#{@name}, #{@stack.inspect})"
2536
- end
2537
-
2538
- def initialize(name)
2539
- @name = name
2540
- @stack = [false]
2541
- end
2542
-
2543
- def pop
2544
- # raise "#{@name} empty" if @stack.size <= 1
2545
- r = @stack.pop
2546
- @stack.push false if @stack.size == 0
2547
- r
2548
- end
2549
-
2550
- def lexpop
2551
- raise if @stack.size == 0
2552
- a = @stack.pop
2553
- b = @stack.pop
2554
- @stack.push(a || b)
2555
- end
2556
-
2557
- def push val
2558
- raise if val != true and val != false
2559
- @stack.push val
2560
- end
2561
-
2562
- def is_in_state
2563
- @stack.last
2564
- end
2565
- end
2566
-
2567
- def t str
2568
- Token.new str
2569
- end
2570
-
2571
- class Token # TODO: nuke this and use sexps
2572
- attr_accessor :args
2573
- def initialize(token)
2574
- @args = Array(token)
2575
- end
2576
-
2577
- def value # TODO: eventually phase this out (or make it official)
2578
- self.args.first
2579
- end
2580
-
2581
- def first # HACK
2582
- self.args.first
2583
- end
2584
-
2585
- def inspect
2586
- "t(#{args.join.inspect})"
2587
- end
2588
-
2589
- def to_sym
2590
- self.value.to_sym
2591
- end
2592
-
2593
- def == o
2594
- Token === o and self.args == o.args
2595
- end
2596
- end
1235
+ def process_token(command_state)
2597
1236
 
2598
- ############################################################
2599
- # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
1237
+ token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/)
2600
1238
 
2601
- class Symbol
2602
- def is_argument # TODO: phase this out
2603
- return self == :expr_arg || self == :expr_cmdarg
2604
- end
2605
- end
1239
+ result = nil
1240
+ last_state = lex_state
2606
1241
 
2607
- class StringIO # HACK: everything in here is a hack
2608
- attr_accessor :begin_of_line, :was_begin_of_line
2609
- alias :begin_of_line? :begin_of_line
2610
- alias :read_all :read
2611
-
2612
- alias :old_initialize :initialize
2613
-
2614
- def initialize(*args)
2615
- self.begin_of_line = true
2616
- self.was_begin_of_line = false
2617
- old_initialize(*args)
2618
- @original_string = self.string.dup
2619
- end
2620
1242
 
2621
- def rest
2622
- self.string[self.pos..-1]
2623
- end
1243
+ case token
1244
+ when /^\$/ then
1245
+ self.lex_state, result = :expr_end, :tGVAR
1246
+ when /^@@/ then
1247
+ self.lex_state, result = :expr_end, :tCVAR
1248
+ when /^@/ then
1249
+ self.lex_state, result = :expr_end, :tIVAR
1250
+ else
1251
+ if token =~ /[!?]$/ then
1252
+ result = :tFID
1253
+ else
1254
+ if lex_state == :expr_fname then
1255
+ # ident=, not =~ => == or followed by =>
1256
+ # TODO test lexing of a=>b vs a==>b
1257
+ if src.scan(/=(?:(?![~>=])|(?==>))/) then
1258
+ result = :tIDENTIFIER
1259
+ token << src.matched
1260
+ end
1261
+ end
2624
1262
 
2625
- def current_line # HAHA fuck you
2626
- @original_string[0..self.pos][/\A.*__LINE__/m].split(/\n/).size
2627
- end
1263
+ result ||= if token =~ /^[A-Z]/ then
1264
+ :tCONSTANT
1265
+ else
1266
+ :tIDENTIFIER
1267
+ end
1268
+ end
2628
1269
 
2629
- def read
2630
- c = self.getc
1270
+ unless lex_state == :expr_dot then
1271
+ # See if it is a reserved word.
1272
+ keyword = Keyword.keyword token
1273
+
1274
+ if keyword then
1275
+ state = lex_state
1276
+ self.lex_state = keyword.state
1277
+ self.yacc_value = token
1278
+
1279
+ if keyword.id0 == :kDO then
1280
+ self.command_start = true
1281
+ return :kDO_COND if cond.is_in_state
1282
+ return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
1283
+ return :kDO_BLOCK if state == :expr_endarg
1284
+ return :kDO
1285
+ end
2631
1286
 
2632
- if c == ?\r then
2633
- d = self.getc
2634
- self.ungetc d if d and d != ?\n
2635
- c = ?\n
2636
- end
2637
-
2638
- self.was_begin_of_line = self.begin_of_line
2639
- self.begin_of_line = c == ?\n
2640
- if c and c != 0 then
2641
- c.chr
2642
- else
2643
- ::RubyLexer::EOF
2644
- end
2645
- end
1287
+ return keyword.id0 if state == :expr_beg
2646
1288
 
2647
- def match_string term, indent=false # TODO: add case insensitivity, or just remove
2648
- buffer = []
1289
+ self.lex_state = :expr_beg if keyword.id0 != keyword.id1
2649
1290
 
2650
- if indent
2651
- while c = self.read do
2652
- if c !~ /\s/ or c == "\n" or c == "\r" then
2653
- self.unread c
2654
- break
1291
+ return keyword.id1
2655
1292
  end
2656
- buffer << c
2657
1293
  end
2658
- end
2659
1294
 
2660
- term.each_byte do |c2|
2661
- c = self.read
2662
- c = self.read if c and c == "\r"
2663
- buffer << c
2664
- if c and c2 != c[0] then
2665
- self.unread_many buffer.join # HACK omg
2666
- return false
1295
+ if (lex_state == :expr_beg || lex_state == :expr_mid ||
1296
+ lex_state == :expr_dot || lex_state == :expr_arg ||
1297
+ lex_state == :expr_cmdarg) then
1298
+ if command_state then
1299
+ self.lex_state = :expr_cmdarg
1300
+ else
1301
+ self.lex_state = :expr_arg
1302
+ end
1303
+ else
1304
+ self.lex_state = :expr_end
2667
1305
  end
2668
1306
  end
2669
1307
 
2670
- return true
2671
- end
2672
-
2673
- def read_line
2674
- self.begin_of_line = true
2675
- self.was_begin_of_line = false
2676
- gets.sub(/\r\n?$/, "\n") # HACK
2677
- end
2678
-
2679
- def peek expected = nil # FIX: barf
2680
- c = self.getc
2681
- return RubyLexer::EOF if c.nil?
2682
- self.ungetc c if c
2683
- c = c.chr if c
2684
- if expected then
2685
- c == expected
2686
- else
2687
- c
2688
- end
2689
- end
2690
-
2691
- def unread(c)
2692
- return if c.nil? # UGH
2693
-
2694
- # HACK: only depth is 2... who cares? really I want to remove all of this
2695
- self.begin_of_line = self.was_begin_of_line || true
2696
- self.was_begin_of_line = nil
2697
-
2698
- c = c[0] if String === c
2699
- self.ungetc c
2700
- end
1308
+ self.yacc_value = token
2701
1309
 
2702
- def unread_many str
2703
- str.split(//).reverse.each do |c|
2704
- unread c
2705
- end
2706
- end
2707
- end
2708
1310
 
2709
- class Sexp
2710
- attr_writer :paren
1311
+ self.lex_state = :expr_end if
1312
+ last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar
2711
1313
 
2712
- def paren
2713
- @paren ||= false
1314
+ return result
2714
1315
  end
2715
1316
 
2716
- def value
2717
- raise "multi item sexp" if size > 2
2718
- last
2719
- end
1317
+ def yylex_string # 23 lines
1318
+ token = if lex_strterm[0] == :heredoc then
1319
+ self.heredoc lex_strterm
1320
+ else
1321
+ self.parse_string lex_strterm
1322
+ end
2720
1323
 
2721
- def values
2722
- self[1..-1]
2723
- end
1324
+ if token == :tSTRING_END || token == :tREGEXP_END then
1325
+ self.lineno = nil
1326
+ self.lex_strterm = nil
1327
+ self.lex_state = :expr_end
1328
+ end
2724
1329
 
2725
- def node_type
2726
- first
1330
+ return token
2727
1331
  end
2728
-
2729
- kill :add, :add_all
2730
- end
2731
-
2732
- def bitch
2733
- c = caller
2734
- m = c[0].split.last
2735
- warn "bitch: you shouldn't be doing #{m}: from #{c[1]}"
2736
1332
  end
2737
-
2738
- # class NilClass
2739
- # def method_missing msg, *args
2740
- # c = caller
2741
- # warn "called #{msg} on nil (args = #{args.inspect}): from #{c[0]}"
2742
- # nil
2743
- # end
2744
- # end
2745
-
2746
- # def d s
2747
- # warn s.inspect
2748
- # end
2749
-
2750
- # END HACK
2751
- ############################################################