ruby_parser 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ruby_parser might be problematic. Click here for more details.
- data/.autotest +26 -3
- data/History.txt +108 -0
- data/Manifest.txt +3 -0
- data/README.txt +1 -1
- data/Rakefile +126 -28
- data/bin/ruby_parse +89 -0
- data/lib/ruby_lexer.rb +1117 -2536
- data/lib/ruby_parser.rb +5407 -5849
- data/lib/ruby_parser.y +1763 -1621
- data/lib/ruby_parser_extras.rb +1051 -0
- data/test/test_ruby_lexer.rb +1607 -267
- data/test/test_ruby_parser.rb +317 -175
- data/test/test_ruby_parser_extras.rb +177 -0
- metadata +27 -10
data/lib/ruby_lexer.rb
CHANGED
@@ -1,2751 +1,1332 @@
|
|
1
|
-
require 'pp'
|
2
|
-
require 'stringio'
|
3
|
-
require 'racc/parser'
|
4
1
|
$: << File.expand_path("~/Work/p4/zss/src/ParseTree/dev/lib") # for me, not you.
|
5
2
|
require 'sexp'
|
3
|
+
require 'ruby_parser_extras'
|
6
4
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
methods.each do |method|
|
13
|
-
define_method method do |*args|
|
14
|
-
c = caller
|
15
|
-
raise "#{method} is dead - called from #{c[0]}"
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
# END HACK
|
22
|
-
############################################################
|
23
|
-
|
24
|
-
class RubyParser < Racc::Parser
|
25
|
-
VERSION = '1.0.0'
|
26
|
-
|
27
|
-
attr_accessor :lexer, :in_def, :in_single, :file
|
28
|
-
attr_reader :env, :warnings
|
5
|
+
class RubyLexer
|
6
|
+
attr_accessor :command_start
|
7
|
+
attr_accessor :cmdarg
|
8
|
+
attr_accessor :cond
|
9
|
+
attr_accessor :nest
|
29
10
|
|
30
|
-
|
31
|
-
super
|
32
|
-
self.lexer = RubyLexer.new
|
33
|
-
self.in_def = false
|
34
|
-
self.in_single = 0
|
35
|
-
@env = Environment.new
|
36
|
-
end
|
11
|
+
ESC_RE = /\\([0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-.|(C-|c)\?|(C-|c).|[^0-7xMCc])/
|
37
12
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
old_yyerror
|
42
|
-
end
|
13
|
+
# Additional context surrounding tokens that both the lexer and
|
14
|
+
# grammar use.
|
15
|
+
attr_reader :lex_state
|
43
16
|
|
44
|
-
|
45
|
-
raise "bad val: #{str.inspect}" unless String === str
|
17
|
+
attr_accessor :lex_strterm
|
46
18
|
|
47
|
-
|
48
|
-
self.lexer.src = StringIO.new(str)
|
19
|
+
attr_accessor :parser # HACK for very end of lexer... *sigh*
|
49
20
|
|
50
|
-
|
21
|
+
# Stream of data that yylex examines.
|
22
|
+
attr_reader :src
|
51
23
|
|
52
|
-
|
53
|
-
|
24
|
+
# Last token read via yylex.
|
25
|
+
attr_accessor :token
|
54
26
|
|
55
|
-
|
56
|
-
_racc_do_parse_rb(_racc_setup, false)
|
57
|
-
end
|
27
|
+
attr_accessor :string_buffer
|
58
28
|
|
59
|
-
|
60
|
-
|
61
|
-
end
|
29
|
+
# Value of last token which had a value associated with it.
|
30
|
+
attr_accessor :yacc_value
|
62
31
|
|
63
|
-
|
64
|
-
|
65
|
-
raise "boom"
|
66
|
-
end if ENV["DEBUG"]
|
32
|
+
# What handles warnings
|
33
|
+
attr_accessor :warnings
|
67
34
|
|
68
|
-
|
69
|
-
if self.lexer.advance then
|
70
|
-
[self.lexer.token, self.lexer.yacc_value]
|
71
|
-
else
|
72
|
-
return [false, '$end']
|
73
|
-
end
|
74
|
-
end
|
35
|
+
EOF = :eof_haha!
|
75
36
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
when /^[A-Z]/ then
|
92
|
-
s(:cdecl, id)
|
93
|
-
else
|
37
|
+
# ruby constants for strings (should this be moved somewhere else?)
|
38
|
+
STR_FUNC_BORING = 0x00
|
39
|
+
STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
|
40
|
+
STR_FUNC_EXPAND = 0x02
|
41
|
+
STR_FUNC_REGEXP = 0x04
|
42
|
+
STR_FUNC_AWORDS = 0x08
|
43
|
+
STR_FUNC_SYMBOL = 0x10
|
44
|
+
STR_FUNC_INDENT = 0x20 # <<-HEREDOC
|
45
|
+
|
46
|
+
STR_SQUOTE = STR_FUNC_BORING
|
47
|
+
STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
|
48
|
+
STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
|
49
|
+
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
|
50
|
+
STR_SSYM = STR_FUNC_SYMBOL
|
51
|
+
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
94
52
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
when :dvar, nil then
|
99
|
-
if self.env.current[id] == :dvar then
|
100
|
-
s(:dasgn_curr, id)
|
101
|
-
elsif self.env[id] == :dvar then
|
102
|
-
self.env.use(id)
|
103
|
-
s(:dasgn, id)
|
104
|
-
elsif ! self.env.dynamic? then
|
105
|
-
s(:lasgn, id)
|
106
|
-
else
|
107
|
-
s(:dasgn_curr, id)
|
108
|
-
end
|
109
|
-
# if env.dynamic? then
|
110
|
-
# if env.dasgn_curr? id then
|
111
|
-
# s(:dasgn_curr, id)
|
112
|
-
# else
|
113
|
-
# s(:dasgn, id)
|
114
|
-
# end
|
115
|
-
# else
|
116
|
-
# s(:lasgn, id)
|
117
|
-
# end
|
118
|
-
else
|
119
|
-
raise "wtf?"
|
120
|
-
end
|
121
|
-
end
|
53
|
+
# How the parser advances to the next token.
|
54
|
+
#
|
55
|
+
# @return true if not at end of file (EOF).
|
122
56
|
|
123
|
-
|
57
|
+
def advance
|
58
|
+
r = yylex
|
59
|
+
self.token = r
|
124
60
|
|
125
|
-
|
61
|
+
raise "yylex returned nil" unless r
|
126
62
|
|
127
|
-
return
|
63
|
+
return RubyLexer::EOF != r
|
128
64
|
end
|
129
65
|
|
130
|
-
def
|
131
|
-
|
132
|
-
|
133
|
-
self.lexer.warnings = warnings
|
66
|
+
def arg_ambiguous
|
67
|
+
self.warning("Ambiguous first argument. make sure.")
|
134
68
|
end
|
135
69
|
|
136
|
-
def
|
137
|
-
|
138
|
-
|
139
|
-
|
70
|
+
def comments
|
71
|
+
c = @comments.join
|
72
|
+
@comments.clear
|
73
|
+
c
|
140
74
|
end
|
141
75
|
|
142
|
-
def
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
case lhs[0]
|
148
|
-
when :gasgn, :iasgn, :lasgn, :dasgn, :dasgn_curr,
|
149
|
-
:masgn, :cdecl, :cvdecl, :cvasgn then
|
150
|
-
lhs << rhs
|
151
|
-
when :attrasgn, :call then
|
152
|
-
args = lhs.array(true) || lhs.argscat(true) || lhs.splat(true) # FIX: fragile
|
153
|
-
# args = case lhs[1][1]
|
154
|
-
# when :array, :argscat, :splat then
|
155
|
-
# lhs.delete_at 1
|
156
|
-
# else
|
157
|
-
# nil # TODO: check - no clue what it should be, or even if
|
158
|
-
# end
|
159
|
-
|
160
|
-
lhs << arg_add(args, rhs)
|
161
|
-
end
|
162
|
-
|
163
|
-
lhs
|
76
|
+
def expr_beg_push val
|
77
|
+
cond.push false
|
78
|
+
cmdarg.push false
|
79
|
+
self.lex_state = :expr_beg
|
80
|
+
self.yacc_value = val
|
164
81
|
end
|
165
82
|
|
166
|
-
def
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
return s(:nil) if id == :nil
|
173
|
-
return s(:true) if id == :true
|
174
|
-
return s(:false) if id == :false
|
175
|
-
return s(:str, self.file) if id == :"__FILE__"
|
176
|
-
return s(:lit, lexer.src.current_line) if id == :"__LINE__"
|
177
|
-
|
178
|
-
result = case id.to_s
|
179
|
-
when /^@@/ then
|
180
|
-
s(:cvar, id)
|
181
|
-
when /^@/ then
|
182
|
-
s(:ivar, id)
|
183
|
-
when /^\$/ then
|
184
|
-
s(:gvar, id)
|
185
|
-
when /^[A-Z]/ then
|
186
|
-
s(:const, id)
|
187
|
-
else
|
188
|
-
type = env[id]
|
189
|
-
if type then
|
190
|
-
s(type, id)
|
191
|
-
elsif env.dynamic? and :dvar == env[id] then
|
192
|
-
s(:dvar, id)
|
193
|
-
else
|
194
|
-
s(:vcall, id)
|
195
|
-
end
|
196
|
-
end
|
197
|
-
|
198
|
-
return result if result
|
199
|
-
|
200
|
-
raise "identifier #{id.inspect} is not valid"
|
83
|
+
def fix_arg_lex_state
|
84
|
+
self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
|
85
|
+
:expr_arg
|
86
|
+
else
|
87
|
+
:expr_beg
|
88
|
+
end
|
201
89
|
end
|
202
90
|
|
203
|
-
def
|
204
|
-
|
205
|
-
return tail unless head
|
91
|
+
def heredoc here # 63 lines
|
92
|
+
_, eos, func, last_line = here
|
206
93
|
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
94
|
+
indent = (func & STR_FUNC_INDENT) != 0
|
95
|
+
expand = (func & STR_FUNC_EXPAND) != 0
|
96
|
+
eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
|
97
|
+
err_msg = "can't match #{eos_re.inspect} anywhere in "
|
211
98
|
|
212
|
-
|
213
|
-
|
99
|
+
rb_compile_error err_msg if
|
100
|
+
src.eos?
|
214
101
|
|
215
|
-
if
|
216
|
-
|
217
|
-
|
218
|
-
|
102
|
+
if src.beginning_of_line? && src.scan(eos_re) then
|
103
|
+
src.unread_many last_line # TODO: figure out how to remove this
|
104
|
+
self.yacc_value = eos
|
105
|
+
return :tSTRING_END
|
219
106
|
end
|
220
|
-
end
|
221
107
|
|
222
|
-
|
223
|
-
if node then
|
224
|
-
raise SyntaxError, "Block argument should not be given." if
|
225
|
-
node.node_type == :block_pass
|
108
|
+
self.string_buffer = []
|
226
109
|
|
227
|
-
|
228
|
-
|
110
|
+
if expand then
|
111
|
+
case
|
112
|
+
when src.scan(/#[$@]/) then
|
113
|
+
src.pos -= 1 # FIX omg stupid
|
114
|
+
self.yacc_value = src.matched
|
115
|
+
return :tSTRING_DVAR
|
116
|
+
when src.scan(/#[{]/) then
|
117
|
+
self.yacc_value = src.matched
|
118
|
+
return :tSTRING_DBEG
|
119
|
+
when src.scan(/#/) then
|
120
|
+
string_buffer << '#'
|
121
|
+
end
|
229
122
|
|
230
|
-
|
231
|
-
|
123
|
+
until src.scan(eos_re) do
|
124
|
+
c = tokadd_string func, "\n", nil
|
232
125
|
|
233
|
-
|
234
|
-
|
126
|
+
rb_compile_error err_msg if
|
127
|
+
c == RubyLexer::EOF
|
235
128
|
|
236
|
-
|
237
|
-
|
129
|
+
if c != "\n" then
|
130
|
+
self.yacc_value = string_buffer.join.delete("\r")
|
131
|
+
return :tSTRING_CONTENT
|
132
|
+
else
|
133
|
+
string_buffer << src.scan(/\n/)
|
134
|
+
end
|
238
135
|
|
239
|
-
|
240
|
-
|
136
|
+
rb_compile_error err_msg if
|
137
|
+
src.eos?
|
241
138
|
end
|
242
139
|
|
243
|
-
|
244
|
-
|
245
|
-
|
140
|
+
# tack on a NL after the heredoc token - FIX NL should not be needed
|
141
|
+
src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid
|
142
|
+
else
|
143
|
+
until src.check(eos_re) do
|
144
|
+
string_buffer << src.scan(/.*(\n|\z)/)
|
145
|
+
rb_compile_error err_msg if
|
146
|
+
src.eos?
|
147
|
+
end
|
246
148
|
end
|
247
149
|
|
248
|
-
|
150
|
+
self.lex_strterm = [:heredoc, eos, func, last_line]
|
151
|
+
self.yacc_value = string_buffer.join.delete("\r")
|
152
|
+
|
153
|
+
return :tSTRING_CONTENT
|
249
154
|
end
|
250
155
|
|
251
|
-
def
|
252
|
-
|
253
|
-
|
254
|
-
call = s(:call, recv, meth)
|
255
|
-
call << new_args if new_args
|
256
|
-
args << call
|
156
|
+
def heredoc_identifier # 51 lines
|
157
|
+
term, func = nil, STR_FUNC_BORING
|
158
|
+
self.string_buffer = []
|
257
159
|
|
258
|
-
|
160
|
+
case
|
161
|
+
when src.scan(/(-?)(['"`])(.*?)\2/) then
|
162
|
+
term = src[2]
|
163
|
+
unless src[1].empty? then
|
164
|
+
func |= STR_FUNC_INDENT
|
165
|
+
end
|
166
|
+
func |= case term
|
167
|
+
when "\'" then
|
168
|
+
STR_SQUOTE
|
169
|
+
when '"' then
|
170
|
+
STR_DQUOTE
|
171
|
+
else
|
172
|
+
STR_XQUOTE
|
173
|
+
end
|
174
|
+
string_buffer << src[3]
|
175
|
+
when src.scan(/-?(['"`])(?!\1*\Z)/) then
|
176
|
+
rb_compile_error "unterminated here document identifier"
|
177
|
+
when src.scan(/(-?)(\w+)/) then
|
178
|
+
term = '"'
|
179
|
+
func |= STR_DQUOTE
|
180
|
+
unless src[1].empty? then
|
181
|
+
func |= STR_FUNC_INDENT
|
182
|
+
end
|
183
|
+
string_buffer << src[2]
|
184
|
+
else
|
185
|
+
return nil
|
259
186
|
end
|
260
|
-
result = s(:call, recv, meth)
|
261
|
-
result << args if args
|
262
|
-
result
|
263
|
-
end
|
264
187
|
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
return args
|
188
|
+
if src.check(/.*\n/) then
|
189
|
+
# TODO: think about storing off the char range instead
|
190
|
+
line = src.string[src.pos, src.matched_size]
|
191
|
+
src.string[src.pos, src.matched_size] = ''
|
192
|
+
else
|
193
|
+
line = nil
|
272
194
|
end
|
273
195
|
|
274
|
-
|
275
|
-
r << args if args and args != s(:array)
|
276
|
-
r
|
277
|
-
end
|
196
|
+
self.lex_strterm = [:heredoc, string_buffer.join, func, line]
|
278
197
|
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
return node2
|
198
|
+
if term == '`' then
|
199
|
+
self.yacc_value = "`"
|
200
|
+
return :tXSTRING_BEG
|
283
201
|
else
|
284
|
-
|
202
|
+
self.yacc_value = "\""
|
203
|
+
return :tSTRING_BEG
|
285
204
|
end
|
286
205
|
end
|
287
206
|
|
288
|
-
def
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
when :lit then
|
294
|
-
return s(:match2, lhs, rhs) if Regexp === lhs.last
|
295
|
-
end
|
296
|
-
end
|
207
|
+
def initialize
|
208
|
+
self.cond = StackState.new(:cond)
|
209
|
+
self.cmdarg = StackState.new(:cmdarg)
|
210
|
+
self.nest = 0
|
211
|
+
@comments = []
|
297
212
|
|
298
|
-
|
299
|
-
|
300
|
-
when :dregx, :dregx_once then
|
301
|
-
return s(:match3, rhs, lhs)
|
302
|
-
when :lit then
|
303
|
-
return s(:match3, rhs, lhs) if Regexp === rhs.last
|
304
|
-
end
|
305
|
-
end
|
213
|
+
reset
|
214
|
+
end
|
306
215
|
|
307
|
-
|
216
|
+
def int_with_base base
|
217
|
+
rb_compile_error "Invalid numeric format" if src.matched =~ /__/
|
218
|
+
self.yacc_value = src.matched.to_i(base)
|
219
|
+
return :tINTEGER
|
308
220
|
end
|
309
221
|
|
310
|
-
def
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
222
|
+
def lex_state= o
|
223
|
+
raise "wtf?" unless Symbol === o
|
224
|
+
@lex_state = o
|
225
|
+
end
|
226
|
+
|
227
|
+
attr_writer :lineno
|
228
|
+
def lineno
|
229
|
+
@lineno ||= src.lineno
|
230
|
+
end
|
231
|
+
|
232
|
+
##
|
233
|
+
# Parse a number from the input stream.
|
234
|
+
#
|
235
|
+
# @param c The first character of the number.
|
236
|
+
# @return A int constant wich represents a token.
|
237
|
+
|
238
|
+
def parse_number
|
239
|
+
self.lex_state = :expr_end
|
240
|
+
|
241
|
+
case
|
242
|
+
when src.scan(/[+-]?0[xbd]\b/) then
|
243
|
+
rb_compile_error "Invalid numeric format"
|
244
|
+
when src.scan(/[+-]?0x[a-f0-9_]+/i) then
|
245
|
+
int_with_base(16)
|
246
|
+
when src.scan(/[+-]?0b[01_]+/) then
|
247
|
+
int_with_base(2)
|
248
|
+
when src.scan(/[+-]?0d[0-9_]+/) then
|
249
|
+
int_with_base(10)
|
250
|
+
when src.scan(/[+-]?0o?[0-7_]*[89]/) then
|
251
|
+
rb_compile_error "Illegal octal digit."
|
252
|
+
when src.scan(/[+-]?0o?[0-7_]+|0o/) then
|
253
|
+
int_with_base(8)
|
254
|
+
when src.scan(/[+-]?[\d_]+_(e|\.)/) then
|
255
|
+
rb_compile_error "Trailing '_' in number."
|
256
|
+
when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
|
257
|
+
number = src.matched
|
258
|
+
if number =~ /__/ then
|
259
|
+
rb_compile_error "Invalid numeric format"
|
324
260
|
end
|
325
|
-
|
326
|
-
|
327
|
-
when
|
328
|
-
|
329
|
-
when
|
330
|
-
|
331
|
-
env[label] = self.env.dynamic? ? :dvar : :lvar
|
332
|
-
return s(:flip2, node[1], node[2])
|
333
|
-
when :dot3 then
|
334
|
-
label = "flip#{node.hash}"
|
335
|
-
env[label] = self.env.dynamic? ? :dvar : :lvar
|
336
|
-
return s(:flip3, node[1], node[2])
|
261
|
+
self.yacc_value = number.to_f
|
262
|
+
:tFLOAT
|
263
|
+
when src.scan(/[+-]?0\b/) then
|
264
|
+
int_with_base(10)
|
265
|
+
when src.scan(/[+-]?[\d_]+\b/) then
|
266
|
+
int_with_base(10)
|
337
267
|
else
|
338
|
-
|
268
|
+
rb_compile_error "Bad number format"
|
339
269
|
end
|
340
270
|
end
|
341
271
|
|
342
|
-
def
|
343
|
-
|
344
|
-
return tail if head.nil?
|
272
|
+
def parse_quote # 58 lines
|
273
|
+
beg, nnd, short_hand, c = nil, nil, false, nil
|
345
274
|
|
346
|
-
|
347
|
-
|
348
|
-
|
275
|
+
if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
|
276
|
+
rb_compile_error "unknown type of %string" if src.matched_size == 2
|
277
|
+
c, beg, short_hand = src.matched, src.getch, false
|
278
|
+
else # Short-hand (e.g. %{, %., %!, etc)
|
279
|
+
c, beg, short_hand = 'Q', src.getch, true
|
280
|
+
end
|
349
281
|
|
350
|
-
|
351
|
-
|
352
|
-
t, body, bp = args
|
353
|
-
result = s(t, bp, s(:super, body))
|
354
|
-
else
|
355
|
-
result = s(:super)
|
356
|
-
result << args if args and args != s(:array)
|
282
|
+
if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
|
283
|
+
rb_compile_error "unterminated quoted string meets end of file"
|
357
284
|
end
|
358
|
-
result
|
359
|
-
end
|
360
285
|
|
361
|
-
|
362
|
-
|
363
|
-
|
286
|
+
# Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
|
287
|
+
nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
|
288
|
+
nnd, beg = beg, "\0" if nnd.nil?
|
289
|
+
|
290
|
+
token_type, self.yacc_value = nil, "%#{c}#{beg}"
|
291
|
+
token_type, string_type = case c
|
292
|
+
when 'Q' then
|
293
|
+
ch = short_hand ? nnd : c + beg
|
294
|
+
self.yacc_value = "%#{ch}"
|
295
|
+
[:tSTRING_BEG, STR_DQUOTE]
|
296
|
+
when 'q' then
|
297
|
+
[:tSTRING_BEG, STR_SQUOTE]
|
298
|
+
when 'W' then
|
299
|
+
src.scan(/\s*/)
|
300
|
+
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS]
|
301
|
+
when 'w' then
|
302
|
+
src.scan(/\s*/)
|
303
|
+
[:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS]
|
304
|
+
when 'x' then
|
305
|
+
[:tXSTRING_BEG, STR_XQUOTE]
|
306
|
+
when 'r' then
|
307
|
+
[:tREGEXP_BEG, STR_REGEXP]
|
308
|
+
when 's' then
|
309
|
+
self.lex_state = :expr_fname
|
310
|
+
[:tSYMBEG, STR_SSYM]
|
311
|
+
end
|
312
|
+
|
313
|
+
rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
|
314
|
+
token_type.nil?
|
315
|
+
|
316
|
+
self.lex_strterm = [:strterm, string_type, nnd, beg]
|
364
317
|
|
365
|
-
|
366
|
-
return node2.nil? ? node1 : s(:argscat, node1, node2)
|
318
|
+
return token_type
|
367
319
|
end
|
368
320
|
|
369
|
-
def
|
370
|
-
|
371
|
-
list << item
|
372
|
-
end
|
321
|
+
def parse_string(quote) # 65 lines
|
322
|
+
_, string_type, term, open = quote
|
373
323
|
|
374
|
-
|
375
|
-
|
376
|
-
|
324
|
+
space = false # FIX: remove these
|
325
|
+
func = string_type
|
326
|
+
paren = open
|
327
|
+
term_re = Regexp.escape term
|
377
328
|
|
378
|
-
|
329
|
+
awords = (func & STR_FUNC_AWORDS) != 0
|
330
|
+
regexp = (func & STR_FUNC_REGEXP) != 0
|
331
|
+
expand = (func & STR_FUNC_EXPAND) != 0
|
379
332
|
|
380
|
-
|
333
|
+
unless func then # FIX: impossible, prolly needs == 0
|
334
|
+
self.lineno = nil
|
335
|
+
return :tSTRING_END
|
336
|
+
end
|
381
337
|
|
382
|
-
|
383
|
-
when :str then
|
384
|
-
if htype == :str
|
385
|
-
head[-1] << tail[-1]
|
386
|
-
elsif htype == :dstr and head.size == 2 then
|
387
|
-
head[-1] << tail[-1]
|
388
|
-
else
|
389
|
-
head << tail
|
390
|
-
end
|
391
|
-
when :dstr then
|
392
|
-
if htype == :str then
|
393
|
-
tail[1] = head[-1] + tail[1]
|
394
|
-
head = tail
|
395
|
-
else
|
396
|
-
tail[0] = :array
|
397
|
-
tail[1] = s(:str, tail[1])
|
398
|
-
tail.delete_at 1 if tail[1] == s(:str, '')
|
338
|
+
space = true if awords and src.scan(/\s+/)
|
399
339
|
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
340
|
+
if self.nest == 0 && src.scan(/#{term_re}/) then
|
341
|
+
if awords then
|
342
|
+
quote[1] = nil
|
343
|
+
return :tSPACE
|
344
|
+
elsif regexp then
|
345
|
+
self.yacc_value = self.regx_options
|
346
|
+
self.lineno = nil
|
347
|
+
return :tREGEXP_END
|
407
348
|
else
|
408
|
-
|
349
|
+
self.yacc_value = term
|
350
|
+
self.lineno = nil
|
351
|
+
return :tSTRING_END
|
409
352
|
end
|
410
353
|
end
|
411
354
|
|
412
|
-
|
413
|
-
|
355
|
+
if space then
|
356
|
+
return :tSPACE
|
357
|
+
end
|
414
358
|
|
415
|
-
|
416
|
-
node = node[-1] if node and node[0] == :begin and node.size == 2
|
417
|
-
node
|
418
|
-
end
|
359
|
+
self.string_buffer = []
|
419
360
|
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
361
|
+
if expand
|
362
|
+
case
|
363
|
+
when src.scan(/#(?=[$@])/) then
|
364
|
+
return :tSTRING_DVAR
|
365
|
+
when src.scan(/#[{]/) then
|
366
|
+
return :tSTRING_DBEG
|
367
|
+
when src.scan(/#/) then
|
368
|
+
string_buffer << '#'
|
424
369
|
end
|
425
|
-
|
426
|
-
node = node.last if node[0] == :array && node.size == 2
|
427
|
-
node = s(:svalue, node) if node[0] == :splat and not node.paren # HACK matz wraps ONE of the FOUR splats in a newline to distinguish. I use paren for now. ugh
|
428
370
|
end
|
429
371
|
|
430
|
-
|
431
|
-
|
372
|
+
if tokadd_string(func, term, paren) == RubyLexer::EOF then
|
373
|
+
rb_compile_error "unterminated string meets end of file"
|
374
|
+
end
|
432
375
|
|
433
|
-
|
434
|
-
node = remove_begin node
|
435
|
-
node[2] = value_expr(node[2]) if node and node[0] == :if
|
436
|
-
node
|
437
|
-
end
|
376
|
+
self.yacc_value = string_buffer.join
|
438
377
|
|
439
|
-
def void_stmts node
|
440
|
-
return nil unless node
|
441
|
-
return node unless node[0] == :block
|
442
378
|
|
443
|
-
|
444
|
-
node
|
379
|
+
return :tSTRING_CONTENT
|
445
380
|
end
|
446
381
|
|
447
|
-
|
448
|
-
|
382
|
+
def rb_compile_error msg
|
383
|
+
msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
|
384
|
+
raise SyntaxError, msg
|
385
|
+
end
|
386
|
+
|
387
|
+
def read_escape # 51 lines
|
388
|
+
case
|
389
|
+
when src.scan(/\\/) then # Backslash
|
390
|
+
'\\'
|
391
|
+
when src.scan(/n/) then # newline
|
392
|
+
"\n"
|
393
|
+
when src.scan(/t/) then # horizontal tab
|
394
|
+
"\t"
|
395
|
+
when src.scan(/r/) then # carriage-return
|
396
|
+
"\r"
|
397
|
+
when src.scan(/f/) then # form-feed
|
398
|
+
"\f"
|
399
|
+
when src.scan(/v/) then # vertical tab
|
400
|
+
"\13"
|
401
|
+
when src.scan(/a/) then # alarm(bell)
|
402
|
+
"\007"
|
403
|
+
when src.scan(/e/) then # escape
|
404
|
+
"\033"
|
405
|
+
when src.scan(/b/) then # backspace
|
406
|
+
"\010"
|
407
|
+
when src.scan(/s/) then # space
|
408
|
+
" "
|
409
|
+
when src.scan(/[0-7]{1,3}/) then # octal constant
|
410
|
+
src.matched.to_i(8).chr
|
411
|
+
when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
|
412
|
+
src[1].to_i(16).chr
|
413
|
+
when src.scan(/M-\\/) then
|
414
|
+
c = self.read_escape
|
415
|
+
c[0] = (c[0].ord | 0x80).chr
|
416
|
+
c
|
417
|
+
when src.scan(/M-(.)/) then
|
418
|
+
c = src[1]
|
419
|
+
c[0] = (c[0].ord | 0x80).chr
|
420
|
+
c
|
421
|
+
when src.scan(/C-\\|c\\/) then
|
422
|
+
c = self.read_escape
|
423
|
+
c[0] = (c[0].ord & 0x9f).chr
|
424
|
+
c
|
425
|
+
when src.scan(/C-\?|c\?/) then
|
426
|
+
0177.chr
|
427
|
+
when src.scan(/(C-|c)(.)/) then
|
428
|
+
c = src[2]
|
429
|
+
c[0] = (c[0].ord & 0x9f).chr
|
430
|
+
c
|
431
|
+
when src.scan(/[McCx0-9]/) || src.eos? then
|
432
|
+
rb_compile_error("Invalid escape character syntax")
|
433
|
+
else
|
434
|
+
src.getch
|
435
|
+
end
|
436
|
+
end
|
449
437
|
|
450
|
-
def
|
451
|
-
|
452
|
-
vars = self.env.dynamic.keys - known_vars
|
438
|
+
def regx_options # 15 lines
|
439
|
+
good, bad = [], []
|
453
440
|
|
454
|
-
|
455
|
-
|
456
|
-
var = s(:dasgn_curr, id, var).compact
|
457
|
-
end
|
441
|
+
if src.scan(/[a-z]+/) then
|
442
|
+
good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
|
458
443
|
end
|
459
444
|
|
460
|
-
|
461
|
-
|
445
|
+
unless bad.empty? then
|
446
|
+
rb_compile_error("unknown regexp option%s - %s" %
|
447
|
+
[(bad.size > 1 ? "s" : ""), bad.join.inspect])
|
448
|
+
end
|
462
449
|
|
463
|
-
|
464
|
-
# do nothing for now
|
450
|
+
return good.join
|
465
451
|
end
|
466
452
|
|
467
|
-
|
453
|
+
def reset
|
454
|
+
self.command_start = true
|
455
|
+
self.lex_strterm = nil
|
456
|
+
self.token = nil
|
457
|
+
self.yacc_value = nil
|
468
458
|
|
469
|
-
|
470
|
-
|
459
|
+
@src = nil
|
460
|
+
@lex_state = nil
|
461
|
+
end
|
471
462
|
|
472
|
-
|
463
|
+
def src= src
|
464
|
+
raise "bad src: #{src.inspect}" unless String === src
|
465
|
+
@src = RPStringScanner.new(src)
|
466
|
+
end
|
467
|
+
|
468
|
+
def tokadd_escape term # 20 lines
|
469
|
+
case
|
470
|
+
when src.scan(/\\\n/) then
|
471
|
+
# just ignore
|
472
|
+
when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
|
473
|
+
self.string_buffer << src.matched
|
474
|
+
when src.scan(/\\([MC]-|c)(?=\\)/) then
|
475
|
+
self.string_buffer << src.matched
|
476
|
+
self.tokadd_escape term
|
477
|
+
when src.scan(/\\([MC]-|c)(.)/) then
|
478
|
+
self.string_buffer << src.matched
|
479
|
+
when src.scan(/\\[McCx]/) then
|
480
|
+
rb_compile_error "Invalid escape character syntax"
|
481
|
+
when src.scan(/\\(.)/m) then
|
482
|
+
self.string_buffer << src.matched
|
483
|
+
else
|
484
|
+
rb_compile_error "Invalid escape character syntax"
|
485
|
+
end
|
486
|
+
end
|
473
487
|
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
488
|
+
def tokadd_string(func, term, paren) # 105 lines
|
489
|
+
awords = (func & STR_FUNC_AWORDS) != 0
|
490
|
+
escape = (func & STR_FUNC_ESCAPE) != 0
|
491
|
+
expand = (func & STR_FUNC_EXPAND) != 0
|
492
|
+
regexp = (func & STR_FUNC_REGEXP) != 0
|
493
|
+
symbol = (func & STR_FUNC_SYMBOL) != 0
|
479
494
|
|
480
|
-
|
481
|
-
|
482
|
-
attr_reader :lex_state
|
495
|
+
paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
|
496
|
+
term_re = Regexp.new(Regexp.escape(term))
|
483
497
|
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
498
|
+
until src.eos? do
|
499
|
+
c = nil
|
500
|
+
handled = true
|
501
|
+
case
|
502
|
+
when self.nest == 0 && src.scan(term_re) then
|
503
|
+
src.pos -= 1
|
504
|
+
break
|
505
|
+
when paren_re && src.scan(paren_re) then
|
506
|
+
self.nest += 1
|
507
|
+
when src.scan(term_re) then
|
508
|
+
self.nest -= 1
|
509
|
+
when awords && src.scan(/\s/) then
|
510
|
+
src.pos -= 1
|
511
|
+
break
|
512
|
+
when expand && src.scan(/#(?=[\$\@\{])/) then
|
513
|
+
src.pos -= 1
|
514
|
+
break
|
515
|
+
when expand && src.scan(/#(?!\n)/) then
|
516
|
+
# do nothing
|
517
|
+
when src.check(/\\/) then
|
518
|
+
case
|
519
|
+
when awords && src.scan(/\\\n/) then
|
520
|
+
string_buffer << "\n"
|
521
|
+
next
|
522
|
+
when awords && src.scan(/\\\s/) then
|
523
|
+
c = ' '
|
524
|
+
when expand && src.scan(/\\\n/) then
|
525
|
+
next
|
526
|
+
when regexp && src.check(/\\/) then
|
527
|
+
self.tokadd_escape term
|
528
|
+
next
|
529
|
+
when expand && src.scan(/\\/) then
|
530
|
+
c = self.read_escape
|
531
|
+
when src.scan(/\\\n/) then
|
532
|
+
# do nothing
|
533
|
+
when src.scan(/\\\\/) then
|
534
|
+
string_buffer << '\\' if escape
|
535
|
+
c = '\\'
|
536
|
+
when src.scan(/\\/) then
|
537
|
+
unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
|
538
|
+
string_buffer << "\\"
|
539
|
+
end
|
540
|
+
else
|
541
|
+
handled = false
|
542
|
+
end
|
543
|
+
else
|
544
|
+
handled = false
|
545
|
+
end # case
|
488
546
|
|
489
|
-
|
547
|
+
unless handled then
|
490
548
|
|
491
|
-
|
549
|
+
t = Regexp.escape term
|
550
|
+
x = Regexp.escape(paren) if paren && paren != "\000"
|
551
|
+
re = if awords then
|
552
|
+
/[^#{t}#{x}\#\0\\\n\ ]+|./ # |. to pick up whatever
|
553
|
+
else
|
554
|
+
/[^#{t}#{x}\#\0\\]+|./
|
555
|
+
end
|
492
556
|
|
493
|
-
|
494
|
-
|
557
|
+
src.scan re
|
558
|
+
c = src.matched
|
495
559
|
|
496
|
-
|
497
|
-
|
560
|
+
rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
|
561
|
+
end # unless handled
|
498
562
|
|
499
|
-
|
500
|
-
|
563
|
+
c ||= src.matched
|
564
|
+
string_buffer << c
|
565
|
+
end # until
|
501
566
|
|
502
|
-
|
503
|
-
|
504
|
-
attr_accessor :token_buffer
|
567
|
+
c ||= src.matched
|
568
|
+
c = RubyLexer::EOF if src.eos?
|
505
569
|
|
506
|
-
# Value of last token which had a value associated with it.
|
507
|
-
attr_accessor :yacc_value
|
508
570
|
|
509
|
-
|
510
|
-
|
571
|
+
return c
|
572
|
+
end
|
511
573
|
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
574
|
+
def unescape s
|
575
|
+
|
576
|
+
r = {
|
577
|
+
"a" => "\007",
|
578
|
+
"b" => "\010",
|
579
|
+
"e" => "\033",
|
580
|
+
"f" => "\f",
|
581
|
+
"n" => "\n",
|
582
|
+
"r" => "\r",
|
583
|
+
"s" => " ",
|
584
|
+
"t" => "\t",
|
585
|
+
"v" => "\13",
|
586
|
+
"\\" => '\\',
|
587
|
+
"\n" => "",
|
588
|
+
"C-\?" => 0177.chr,
|
589
|
+
"c\?" => 0177.chr,
|
590
|
+
}[s]
|
591
|
+
|
592
|
+
return r if r
|
593
|
+
|
594
|
+
case s
|
595
|
+
when /^[0-7]{1,3}/ then
|
596
|
+
$&.to_i(8).chr
|
597
|
+
when /^x([0-9a-fA-F]{1,2})/ then
|
598
|
+
$1.to_i(16).chr
|
599
|
+
when /^M-(.)/ then
|
600
|
+
($1[0].ord | 0x80).chr
|
601
|
+
when /^(C-|c)(.)/ then
|
602
|
+
($2[0].ord & 0x9f).chr
|
603
|
+
when /^[McCx0-9]/ then
|
604
|
+
rb_compile_error("Invalid escape character syntax")
|
605
|
+
else
|
606
|
+
s
|
607
|
+
end
|
608
|
+
end
|
521
609
|
|
522
|
-
|
523
|
-
|
524
|
-
|
610
|
+
def warning s
|
611
|
+
# do nothing for now
|
612
|
+
end
|
525
613
|
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
STR_FUNC_QWORDS=0x08
|
531
|
-
STR_FUNC_SYMBOL=0x10
|
532
|
-
STR_FUNC_INDENT=0x20 # <<-HEREDOC
|
533
|
-
|
534
|
-
STR_SQUOTE = 0
|
535
|
-
STR_DQUOTE = STR_FUNC_EXPAND
|
536
|
-
STR_XQUOTE = STR_FUNC_EXPAND
|
537
|
-
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
|
538
|
-
STR_SSYM = STR_FUNC_SYMBOL
|
539
|
-
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
614
|
+
##
|
615
|
+
# Returns the next token. Also sets yy_val is needed.
|
616
|
+
#
|
617
|
+
# @return Description of the Returned Value
|
540
618
|
|
541
|
-
def
|
542
|
-
self.parser_support = nil
|
543
|
-
self.token_buffer = []
|
544
|
-
self.cond = StackState.new(:cond)
|
545
|
-
self.cmdarg = StackState.new(:cmdarg)
|
546
|
-
self.nest = 0
|
547
|
-
self.end_seen = false
|
619
|
+
def yylex # 826 lines
|
548
620
|
|
549
|
-
|
550
|
-
|
621
|
+
c = ''
|
622
|
+
space_seen = false
|
623
|
+
command_state = false
|
624
|
+
src = self.src
|
551
625
|
|
552
|
-
def reset
|
553
626
|
self.token = nil
|
554
627
|
self.yacc_value = nil
|
555
|
-
self.src = nil
|
556
|
-
@lex_state = nil
|
557
|
-
self.lex_strterm = nil
|
558
|
-
self.command_start = true
|
559
|
-
end
|
560
628
|
|
561
|
-
|
562
|
-
#
|
563
|
-
# @return true if not at end of file (EOF).
|
629
|
+
return yylex_string if lex_strterm
|
564
630
|
|
565
|
-
|
566
|
-
|
567
|
-
self.token = r
|
568
|
-
return r != RubyLexer::EOF
|
569
|
-
end
|
631
|
+
command_state = self.command_start
|
632
|
+
self.command_start = false
|
570
633
|
|
571
|
-
|
572
|
-
_, string_type, term, open = quote
|
634
|
+
last_state = lex_state
|
573
635
|
|
574
|
-
|
575
|
-
|
576
|
-
|
636
|
+
loop do # START OF CASE
|
637
|
+
if src.scan(/\ |\t|\r|\f|\13/) then # white spaces, 13 = '\v
|
638
|
+
space_seen = true
|
639
|
+
next
|
640
|
+
elsif src.check(/[^a-zA-Z]/) then
|
641
|
+
if src.scan(/\n|#/) then
|
642
|
+
self.lineno = nil
|
643
|
+
c = src.matched
|
644
|
+
if c == '#' then
|
645
|
+
src.unread c # ok
|
646
|
+
|
647
|
+
while src.scan(/\s*#.*(\n+|\z)/) do
|
648
|
+
@comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
|
649
|
+
end
|
577
650
|
|
578
|
-
|
651
|
+
if src.eos? then
|
652
|
+
return RubyLexer::EOF
|
653
|
+
end
|
654
|
+
else
|
655
|
+
end
|
579
656
|
|
580
|
-
|
657
|
+
# Replace a string of newlines with a single one
|
658
|
+
src.scan(/\n+/)
|
581
659
|
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
end while String === c and c =~ /\s/
|
587
|
-
space = true
|
588
|
-
end
|
660
|
+
if [:expr_beg, :expr_fname,
|
661
|
+
:expr_dot, :expr_class].include? lex_state then
|
662
|
+
next
|
663
|
+
end
|
589
664
|
|
590
|
-
if c == term && self.nest == 0 then
|
591
|
-
if func & STR_FUNC_QWORDS != 0 then
|
592
|
-
quote[1] = nil
|
593
|
-
return ' '
|
594
|
-
end
|
595
|
-
unless func & STR_FUNC_REGEXP != 0 then
|
596
|
-
self.yacc_value = t(term)
|
597
|
-
return :tSTRING_END
|
598
|
-
end
|
599
|
-
self.yacc_value = self.regx_options
|
600
|
-
return :tREGEXP_END
|
601
|
-
end
|
602
|
-
|
603
|
-
if space then
|
604
|
-
src.unread c
|
605
|
-
return ' '
|
606
|
-
end
|
607
|
-
|
608
|
-
self.token_buffer = []
|
609
|
-
|
610
|
-
if (func & STR_FUNC_EXPAND) != 0 && c == '#' then
|
611
|
-
case c = src.read
|
612
|
-
when '$', '@' then
|
613
|
-
src.unread c
|
614
|
-
return :tSTRING_DVAR
|
615
|
-
when '{' then
|
616
|
-
return :tSTRING_DBEG
|
617
|
-
end
|
618
|
-
token_buffer << '#'
|
619
|
-
end
|
620
|
-
|
621
|
-
src.unread c
|
622
|
-
|
623
|
-
if tokadd_string(func, term, paren, token_buffer) == RubyLexer::EOF then
|
624
|
-
# HACK ruby_sourceline = nd_line(quote)
|
625
|
-
raise "unterminated string meets end of file"
|
626
|
-
return :tSTRING_END
|
627
|
-
end
|
628
|
-
|
629
|
-
self.yacc_value = s(:str, token_buffer.join)
|
630
|
-
return :tSTRING_CONTENT
|
631
|
-
end
|
632
|
-
|
633
|
-
def regx_options
|
634
|
-
options = []
|
635
|
-
bad = []
|
636
|
-
|
637
|
-
while c = src.read and c =~ /[a-z]/ do
|
638
|
-
case c
|
639
|
-
when /^[ixmonesu]$/ then
|
640
|
-
options << c
|
641
|
-
else
|
642
|
-
bad << c
|
643
|
-
end
|
644
|
-
end
|
645
|
-
|
646
|
-
src.unread c
|
647
|
-
|
648
|
-
rb_compile_error("unknown regexp option%s - %s" %
|
649
|
-
[(bad.size > 1 ? "s" : ""), bad.join.inspect]) unless bad.empty?
|
650
|
-
|
651
|
-
return options.join
|
652
|
-
end
|
653
|
-
|
654
|
-
def tokadd_escape term
|
655
|
-
case c = src.read
|
656
|
-
when "\n" then
|
657
|
-
return false # just ignore
|
658
|
-
when /0-7/ then # octal constant
|
659
|
-
tokadd "\\"
|
660
|
-
tokadd c
|
661
|
-
|
662
|
-
2.times do |i|
|
663
|
-
c = src.read
|
664
|
-
# HACK goto eof if (c == -1)
|
665
|
-
if c < "0" || "7" < c then
|
666
|
-
pushback c
|
667
|
-
break
|
668
|
-
end
|
669
|
-
tokadd c
|
670
|
-
end
|
671
|
-
|
672
|
-
return false
|
673
|
-
when "x" then # hex constant
|
674
|
-
tokadd "\\"
|
675
|
-
tokadd c
|
676
|
-
|
677
|
-
2.times do
|
678
|
-
c = src.read
|
679
|
-
unless c =~ /[0-9a-f]/i then # TODO error case? empty?
|
680
|
-
src.unread c
|
681
|
-
break
|
682
|
-
end
|
683
|
-
tokadd c
|
684
|
-
end
|
685
|
-
|
686
|
-
return false
|
687
|
-
when "M" then
|
688
|
-
if (c = src.read()) != "-" then
|
689
|
-
yyerror "Invalid escape character syntax"
|
690
|
-
pushback c
|
691
|
-
return false
|
692
|
-
end
|
693
|
-
tokadd "\\"
|
694
|
-
tokadd "M"
|
695
|
-
tokadd "-"
|
696
|
-
raise "not yet"
|
697
|
-
# goto escaped;
|
698
|
-
when "C" then
|
699
|
-
if (c = src.read) != "-" then
|
700
|
-
yyerror "Invalid escape character syntax"
|
701
|
-
pushback c
|
702
|
-
return false
|
703
|
-
end
|
704
|
-
tokadd "\\"
|
705
|
-
tokadd "C"
|
706
|
-
tokadd "-"
|
707
|
-
raise "not yet"
|
708
|
-
# HACK goto escaped;
|
709
|
-
when "c" then
|
710
|
-
tokadd "\\"
|
711
|
-
tokadd "c"
|
712
|
-
# HACK escaped:
|
713
|
-
if (c = src.read) == "\\" then
|
714
|
-
return tokadd_escape(term)
|
715
|
-
elsif c == -1 then
|
716
|
-
raise "no"
|
717
|
-
# HACK goto eof
|
718
|
-
end
|
719
|
-
tokadd c
|
720
|
-
return false
|
721
|
-
# HACK eof
|
722
|
-
when RubyLexer::EOF then
|
723
|
-
yyerror "Invalid escape character syntax"
|
724
|
-
return true
|
725
|
-
else
|
726
|
-
if (c != "\\" || c != term)
|
727
|
-
tokadd "\\"
|
728
|
-
end
|
729
|
-
tokadd c
|
730
|
-
end
|
731
|
-
return false
|
732
|
-
end
|
733
|
-
|
734
|
-
def read_escape
|
735
|
-
case c = src.read
|
736
|
-
when "\\" then # Backslash
|
737
|
-
return c
|
738
|
-
when "n" then # newline
|
739
|
-
return "\n"
|
740
|
-
when "t" then # horizontal tab
|
741
|
-
return "\t"
|
742
|
-
when "r" then # carriage-return
|
743
|
-
return "\r"
|
744
|
-
when "f" then # form-feed
|
745
|
-
return "\f"
|
746
|
-
when "v" then # vertical tab
|
747
|
-
return "\13"
|
748
|
-
when "a" then # alarm(bell)
|
749
|
-
return "\007"
|
750
|
-
when 'e' then # escape
|
751
|
-
return "\033"
|
752
|
-
when /[0-7]/ then # octal constant
|
753
|
-
src.unread c # TODO this seems dumb
|
754
|
-
|
755
|
-
n = 0
|
756
|
-
|
757
|
-
3.times do
|
758
|
-
c = src.read
|
759
|
-
unless c =~ /[0-7]/ then
|
760
|
-
src.unread c
|
761
|
-
break
|
762
|
-
end
|
763
|
-
n <<= 3
|
764
|
-
n |= c[0] - ?0
|
765
|
-
end
|
766
|
-
|
767
|
-
return n.chr
|
768
|
-
when "x" then # hex constant
|
769
|
-
n = 0
|
770
|
-
|
771
|
-
2.times do
|
772
|
-
c = src.read.downcase
|
773
|
-
unless c =~ /[0-9a-f]/i then
|
774
|
-
src.unread c
|
775
|
-
break
|
776
|
-
end
|
777
|
-
n <<= 4
|
778
|
-
n |= case c[0] # TODO: I'm sure there is a better way... but I'm tired
|
779
|
-
when ?a..?f then
|
780
|
-
c[0] - ?a + 10
|
781
|
-
when ?A..?F then
|
782
|
-
c[0] - ?A + 10
|
783
|
-
when ?0..?9 then
|
784
|
-
c[0] - ?0
|
785
|
-
else
|
786
|
-
raise "wtf?: #{c.inspect}"
|
787
|
-
end
|
788
|
-
end
|
789
|
-
|
790
|
-
return n.chr
|
791
|
-
when "b" then # backspace
|
792
|
-
return "\010"
|
793
|
-
when "s" then # space
|
794
|
-
return " "
|
795
|
-
when "M" then
|
796
|
-
c = src.read
|
797
|
-
if c != "-" then
|
798
|
-
yyerror("Invalid escape character syntax")
|
799
|
-
src.unread c
|
800
|
-
return "\0"
|
801
|
-
end
|
802
|
-
|
803
|
-
c = src.read
|
804
|
-
case c
|
805
|
-
when "\\" then
|
806
|
-
c = self.read_escape
|
807
|
-
c[0] |= 0x80
|
808
|
-
return c
|
809
|
-
when RubyLexer::EOF then
|
810
|
-
yyerror("Invalid escape character syntax");
|
811
|
-
return '\0';
|
812
|
-
else
|
813
|
-
c[0] |= 0x80
|
814
|
-
return c
|
815
|
-
end
|
816
|
-
when "C", "c" then
|
817
|
-
if (c = src.read) != "-" then
|
818
|
-
yyerror("Invalid escape character syntax")
|
819
|
-
pushback(c)
|
820
|
-
return "\0"
|
821
|
-
end if c == "C"
|
822
|
-
|
823
|
-
case c = src.read
|
824
|
-
when "\\" then
|
825
|
-
c = read_escape
|
826
|
-
when "?" then
|
827
|
-
return 0177
|
828
|
-
when RubyLexer::EOF then
|
829
|
-
yyerror("Invalid escape character syntax");
|
830
|
-
return "\0";
|
831
|
-
end
|
832
|
-
c[0] &= 0x9f
|
833
|
-
return c
|
834
|
-
when RubyLexer::EOF then
|
835
|
-
yyerror("Invalid escape character syntax")
|
836
|
-
return "\0"
|
837
|
-
else
|
838
|
-
return c
|
839
|
-
end
|
840
|
-
end
|
841
|
-
|
842
|
-
def tokadd_string(func, term, paren, buffer)
|
843
|
-
until (c = src.read) == RubyLexer::EOF do
|
844
|
-
if c == paren then
|
845
|
-
self.nest += 1
|
846
|
-
elsif c == term then
|
847
|
-
if self.nest == 0 then
|
848
|
-
src.unread c
|
849
|
-
break
|
850
|
-
end
|
851
|
-
self.nest -= 1
|
852
|
-
elsif (func & RubyLexer::STR_FUNC_EXPAND) != 0 && c == '#' && !src.peek("\n") then
|
853
|
-
c2 = src.read
|
854
665
|
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
elsif c == "\\" then
|
862
|
-
c = src.read
|
863
|
-
case c
|
864
|
-
when "\n" then
|
865
|
-
break if ((func & RubyLexer::STR_FUNC_QWORDS) != 0) # TODO: check break
|
866
|
-
next if ((func & RubyLexer::STR_FUNC_EXPAND) != 0)
|
867
|
-
|
868
|
-
buffer << "\\"
|
869
|
-
when "\\" then
|
870
|
-
buffer << c if (func & RubyLexer::STR_FUNC_ESCAPE) != 0
|
871
|
-
else
|
872
|
-
if (func & RubyLexer::STR_FUNC_REGEXP) != 0 then
|
873
|
-
src.unread c
|
874
|
-
tokadd_escape term
|
875
|
-
next
|
876
|
-
elsif (func & RubyLexer::STR_FUNC_EXPAND) != 0 then
|
877
|
-
src.unread c
|
878
|
-
if (func & RubyLexer::STR_FUNC_ESCAPE) != 0 then
|
879
|
-
buffer << "\\"
|
880
|
-
end
|
881
|
-
c = read_escape
|
882
|
-
elsif (func & RubyLexer::STR_FUNC_QWORDS) != 0 && c =~ /\s/ then
|
883
|
-
# ignore backslashed spaces in %w
|
884
|
-
elsif c != term && !(paren && c == paren) then
|
885
|
-
buffer << "\\"
|
886
|
-
end
|
887
|
-
end
|
888
|
-
# else if (ismbchar(c)) {
|
889
|
-
# int i, len = mbclen(c)-1;
|
890
|
-
# for (i = 0; i < len; i++) {
|
891
|
-
# tokadd(c);
|
892
|
-
# c = nextc();
|
893
|
-
# }
|
894
|
-
# }
|
895
|
-
elsif (func & RubyLexer::STR_FUNC_QWORDS) != 0 && c =~ /\s/ then
|
896
|
-
src.unread c
|
897
|
-
break
|
898
|
-
end
|
899
|
-
|
900
|
-
if c == "\0" && (func & RubyLexer::STR_FUNC_SYMBOL) != 0 then
|
901
|
-
raise SyntaxError, "symbol cannot contain '\\0'"
|
902
|
-
end
|
903
|
-
|
904
|
-
buffer << c # unless c == "\r"
|
905
|
-
end # while
|
906
|
-
|
907
|
-
return c
|
908
|
-
end
|
909
|
-
|
910
|
-
def heredoc here
|
911
|
-
_, eos, func, last_line = here
|
912
|
-
|
913
|
-
eosn = eos + "\n"
|
914
|
-
err_msg = "can't find string #{eos.inspect} anywhere before EOF"
|
915
|
-
|
916
|
-
indent = (func & RubyLexer::STR_FUNC_INDENT) != 0
|
917
|
-
str = []
|
918
|
-
|
919
|
-
raise SyntaxError, err_msg if src.peek == RubyLexer::EOF
|
920
|
-
|
921
|
-
if src.begin_of_line? && src.match_string(eosn, indent) then
|
922
|
-
src.unread_many last_line
|
923
|
-
self.yacc_value = t(eos)
|
924
|
-
return :tSTRING_END
|
925
|
-
end
|
926
|
-
|
927
|
-
if (func & RubyLexer::STR_FUNC_EXPAND) == 0 then
|
928
|
-
begin
|
929
|
-
str << src.read_line
|
930
|
-
raise SyntaxError, err_msg if src.peek == RubyLexer::EOF
|
931
|
-
end until src.match_string(eosn, indent)
|
932
|
-
else
|
933
|
-
c = src.read
|
934
|
-
buffer = []
|
935
|
-
|
936
|
-
if c == "#" then
|
937
|
-
case c = src.read
|
938
|
-
when "$", "@" then
|
939
|
-
src.unread c
|
940
|
-
self.yacc_value = t("#" + c)
|
941
|
-
return :tSTRING_DVAR
|
942
|
-
when "{" then
|
943
|
-
self.yacc_value = t("#" + c)
|
944
|
-
return :tSTRING_DBEG
|
945
|
-
end
|
946
|
-
buffer << "#"
|
947
|
-
end
|
948
|
-
|
949
|
-
src.unread c
|
950
|
-
|
951
|
-
begin
|
952
|
-
c = tokadd_string func, "\n", nil, buffer
|
953
|
-
|
954
|
-
raise SyntaxError, err_msg if c == RubyLexer::EOF
|
955
|
-
|
956
|
-
if c != "\n" then
|
957
|
-
self.yacc_value = s(:str, buffer.join)
|
958
|
-
return :tSTRING_CONTENT
|
959
|
-
end
|
960
|
-
|
961
|
-
buffer << src.read
|
962
|
-
|
963
|
-
raise SyntaxError, err_msg if src.peek == RubyLexer::EOF
|
964
|
-
end until src.match_string(eosn, indent)
|
965
|
-
|
966
|
-
str = buffer
|
967
|
-
end
|
968
|
-
|
969
|
-
src.unread_many eosn
|
970
|
-
|
971
|
-
self.lex_strterm = s(:heredoc, eos, func, last_line)
|
972
|
-
self.yacc_value = s(:str, str.join)
|
973
|
-
|
974
|
-
return :tSTRING_CONTENT
|
975
|
-
end
|
976
|
-
|
977
|
-
def parse_quote(c)
|
978
|
-
beg, nnd = nil, nil
|
979
|
-
short_hand = false
|
980
|
-
|
981
|
-
# Short-hand (e.g. %{,%.,%!,... versus %Q{).
|
982
|
-
unless c =~ /[a-z0-9]/i then
|
983
|
-
beg, c = c, 'Q'
|
984
|
-
short_hand = true
|
985
|
-
else # Long-hand (e.g. %Q{}).
|
986
|
-
short_hand = false
|
987
|
-
beg = src.read
|
988
|
-
if beg =~ /[a-z0-9]/i then
|
989
|
-
raise SyntaxError, "unknown type of %string"
|
990
|
-
end
|
991
|
-
end
|
992
|
-
|
993
|
-
if c == RubyLexer::EOF or beg == RubyLexer::EOF then
|
994
|
-
raise SyntaxError, "unterminated quoted string meets nnd of file"
|
995
|
-
end
|
996
|
-
|
997
|
-
# Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
|
998
|
-
nnd = case beg
|
999
|
-
when '(' then
|
1000
|
-
')'
|
1001
|
-
when '[' then
|
1002
|
-
']'
|
1003
|
-
when '{' then
|
1004
|
-
'}'
|
1005
|
-
when '<' then
|
1006
|
-
'>'
|
1007
|
-
else
|
1008
|
-
nnd, beg = beg, "\0"
|
1009
|
-
nnd
|
1010
|
-
end
|
1011
|
-
|
1012
|
-
string_type, token_type = STR_DQUOTE, :tSTRING_BEG
|
1013
|
-
self.yacc_value = t("%#{c}#{beg}")
|
1014
|
-
|
1015
|
-
case (c)
|
1016
|
-
when 'Q' then
|
1017
|
-
self.yacc_value = t("%#{short_hand ? nnd : c + beg}")
|
1018
|
-
when 'q' then
|
1019
|
-
string_type, token_type = STR_SQUOTE, :tSTRING_BEG
|
1020
|
-
when 'W' then
|
1021
|
-
string_type, token_type = STR_DQUOTE | STR_FUNC_QWORDS, :tWORDS_BEG
|
1022
|
-
begin c = src.read end while c =~ /\s/
|
1023
|
-
src.unread(c)
|
1024
|
-
when 'w' then
|
1025
|
-
string_type, token_type = STR_SQUOTE | STR_FUNC_QWORDS, :tQWORDS_BEG
|
1026
|
-
begin c = src.read end while c =~ /\s/
|
1027
|
-
src.unread(c)
|
1028
|
-
when 'x' then
|
1029
|
-
string_type, token_type = STR_XQUOTE, :tXSTRING_BEG
|
1030
|
-
when 'r' then
|
1031
|
-
string_type, token_type = STR_REGEXP, :tREGEXP_BEG
|
1032
|
-
when 's' then
|
1033
|
-
string_type, token_type = STR_SSYM, :tSYMBEG
|
1034
|
-
self.lex_state = :expr_fname
|
1035
|
-
else
|
1036
|
-
raise SyntaxError, "Unknown type of %string. Expected 'Q', 'q', 'w', 'x', 'r' or any non letter character, but found '" + c + "'."
|
1037
|
-
end
|
1038
|
-
|
1039
|
-
self.lex_strterm = s(:strterm, string_type, nnd, beg)
|
1040
|
-
|
1041
|
-
return token_type
|
1042
|
-
end
|
1043
|
-
|
1044
|
-
def heredoc_identifier
|
1045
|
-
c = src.read
|
1046
|
-
term = 42 # HACK
|
1047
|
-
func = 0
|
1048
|
-
|
1049
|
-
if c == '-' then
|
1050
|
-
c = src.read
|
1051
|
-
func = STR_FUNC_INDENT
|
1052
|
-
end
|
1053
|
-
|
1054
|
-
if c == "\'" || c == '"' || c == '`' then
|
1055
|
-
if c == "\'" then
|
1056
|
-
func |= STR_SQUOTE
|
1057
|
-
elsif c == '"'
|
1058
|
-
func |= STR_DQUOTE
|
1059
|
-
else
|
1060
|
-
func |= STR_XQUOTE
|
1061
|
-
end
|
1062
|
-
|
1063
|
-
token_buffer.clear
|
1064
|
-
term = c
|
1065
|
-
|
1066
|
-
while (c = src.read) != RubyLexer::EOF && c != term
|
1067
|
-
token_buffer << c
|
1068
|
-
end
|
1069
|
-
|
1070
|
-
if c == RubyLexer::EOF then
|
1071
|
-
raise SyntaxError, "unterminated here document identifier"
|
1072
|
-
end
|
1073
|
-
else
|
1074
|
-
unless c =~ /\w/ then
|
1075
|
-
src.unread c
|
1076
|
-
src.unread '-' if (func & STR_FUNC_INDENT) != 0
|
1077
|
-
return 0 # TODO: RubyLexer::EOF?
|
1078
|
-
end
|
1079
|
-
token_buffer.clear
|
1080
|
-
term = '"'
|
1081
|
-
func |= STR_DQUOTE
|
1082
|
-
begin
|
1083
|
-
token_buffer << c
|
1084
|
-
end while (c = src.read) != RubyLexer::EOF && c =~ /\w/
|
1085
|
-
src.unread c
|
1086
|
-
end
|
1087
|
-
|
1088
|
-
line = src.read_line
|
1089
|
-
tok = token_buffer.join
|
1090
|
-
self.lex_strterm = s(:heredoc, tok, func, line)
|
1091
|
-
|
1092
|
-
if term == '`' then
|
1093
|
-
self.yacc_value = t("`")
|
1094
|
-
return :tXSTRING_BEG
|
1095
|
-
end
|
1096
|
-
|
1097
|
-
self.yacc_value = t("\"")
|
1098
|
-
return :tSTRING_BEG
|
1099
|
-
end
|
1100
|
-
|
1101
|
-
def arg_ambiguous
|
1102
|
-
self.warning("Ambiguous first argument. make sure.")
|
1103
|
-
end
|
1104
|
-
|
1105
|
-
##
|
1106
|
-
# Read a comment up to end of line. When found each comment will
|
1107
|
-
# get stored away into the parser result so that any interested
|
1108
|
-
# party can use them as they seem fit. One idea is that IDE authors
|
1109
|
-
# can do distance based heuristics to associate these comments to
|
1110
|
-
# the AST node they think they belong to.
|
1111
|
-
#
|
1112
|
-
# @param c last character read from lexer source
|
1113
|
-
# @return newline or eof value
|
1114
|
-
|
1115
|
-
def read_comment c
|
1116
|
-
token_buffer.clear
|
1117
|
-
token_buffer << c
|
1118
|
-
|
1119
|
-
while (c = src.read) != "\n" do
|
1120
|
-
break if c == RubyLexer::EOF
|
1121
|
-
token_buffer << c
|
1122
|
-
end
|
1123
|
-
src.unread c
|
1124
|
-
|
1125
|
-
# Store away each comment to parser result so IDEs can do whatever
|
1126
|
-
# they want with them.
|
1127
|
-
# HACK parser_support.result.add_comment(Node.comment(token_buffer.join))
|
1128
|
-
|
1129
|
-
return c
|
1130
|
-
end
|
1131
|
-
|
1132
|
-
##
|
1133
|
-
# Returns the next token. Also sets yy_val is needed.
|
1134
|
-
#
|
1135
|
-
# @return Description of the Returned Value
|
1136
|
-
# TODO: remove ALL sexps coming from here and move up to grammar
|
1137
|
-
# TODO: only literal values should come up from the lexer.
|
1138
|
-
|
1139
|
-
def yylex
|
1140
|
-
c = ''
|
1141
|
-
space_seen = false
|
1142
|
-
command_state = false
|
1143
|
-
|
1144
|
-
if lex_strterm then
|
1145
|
-
token = nil
|
1146
|
-
|
1147
|
-
if lex_strterm[0] == :heredoc then
|
1148
|
-
token = self.heredoc(lex_strterm)
|
1149
|
-
if token == :tSTRING_END then
|
1150
|
-
self.lex_strterm = nil
|
1151
|
-
self.lex_state = :expr_end
|
1152
|
-
end
|
1153
|
-
else
|
1154
|
-
token = self.parse_string(lex_strterm)
|
1155
|
-
|
1156
|
-
if token == :tSTRING_END || token == :tREGEXP_END then
|
1157
|
-
self.lex_strterm = nil
|
666
|
+
self.command_start = true
|
667
|
+
self.lex_state = :expr_beg
|
668
|
+
return :tNL
|
669
|
+
elsif src.scan(/[\]\)\}]/) then
|
670
|
+
cond.lexpop
|
671
|
+
cmdarg.lexpop
|
1158
672
|
self.lex_state = :expr_end
|
1159
|
-
|
1160
|
-
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
last_state = lex_state
|
1169
|
-
|
1170
|
-
loop do
|
1171
|
-
c = src.read
|
1172
|
-
case c
|
1173
|
-
when /\004|\032|\000/, RubyLexer::EOF then # ^D, ^Z, EOF
|
1174
|
-
return RubyLexer::EOF
|
1175
|
-
when /\ |\t|\f|\r|\13/ then # white spaces, 13 = '\v
|
1176
|
-
space_seen = true
|
1177
|
-
next
|
1178
|
-
when /#|\n/ then
|
1179
|
-
return 0 if c == '#' and read_comment(c) == 0 # FIX 0?
|
1180
|
-
# Replace a string of newlines with a single one
|
1181
|
-
while (c = src.read) == "\n"
|
1182
|
-
# do nothing
|
1183
|
-
end
|
1184
|
-
|
1185
|
-
src.unread c
|
1186
|
-
|
1187
|
-
if (lex_state == :expr_beg ||
|
1188
|
-
lex_state == :expr_fname ||
|
1189
|
-
lex_state == :expr_dot ||
|
1190
|
-
lex_state == :expr_class) then
|
1191
|
-
next
|
1192
|
-
end
|
1193
|
-
|
1194
|
-
self.command_start = true
|
1195
|
-
self.lex_state = :expr_beg
|
1196
|
-
return "\n"
|
1197
|
-
when '*' then
|
1198
|
-
c = src.read
|
1199
|
-
if c == '*' then
|
1200
|
-
c = src.read
|
1201
|
-
if c == '=' then
|
673
|
+
self.yacc_value = src.matched
|
674
|
+
result = {
|
675
|
+
")" => :tRPAREN,
|
676
|
+
"]" => :tRBRACK,
|
677
|
+
"}" => :tRCURLY
|
678
|
+
}[src.matched]
|
679
|
+
return result
|
680
|
+
elsif src.check(/\./) then
|
681
|
+
if src.scan(/\.\.\./) then
|
1202
682
|
self.lex_state = :expr_beg
|
1203
|
-
self.yacc_value =
|
1204
|
-
return :
|
1205
|
-
|
1206
|
-
src.unread c
|
1207
|
-
self.yacc_value = t("**")
|
1208
|
-
c = :tPOW
|
1209
|
-
else
|
1210
|
-
if c == '=' then
|
683
|
+
self.yacc_value = "..."
|
684
|
+
return :tDOT3
|
685
|
+
elsif src.scan(/\.\./) then
|
1211
686
|
self.lex_state = :expr_beg
|
1212
|
-
self.yacc_value =
|
1213
|
-
return :
|
1214
|
-
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1218
|
-
|
1219
|
-
|
1220
|
-
c = :tSTAR
|
1221
|
-
else
|
1222
|
-
c = :tSTAR2
|
687
|
+
self.yacc_value = ".."
|
688
|
+
return :tDOT2
|
689
|
+
elsif src.scan(/\.\d/) then
|
690
|
+
rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
|
691
|
+
elsif src.scan(/\./) then
|
692
|
+
self.lex_state = :expr_dot
|
693
|
+
self.yacc_value = "."
|
694
|
+
return :tDOT
|
1223
695
|
end
|
1224
|
-
|
1225
|
-
end
|
1226
|
-
|
1227
|
-
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1228
|
-
self.lex_state = :expr_arg
|
1229
|
-
else
|
696
|
+
elsif src.scan(/\,/) then
|
1230
697
|
self.lex_state = :expr_beg
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
src.unread(c)
|
1245
|
-
self.yacc_value = t("!")
|
1246
|
-
return :tBANG
|
1247
|
-
when '=' then
|
1248
|
-
# documentation nodes - FIX: cruby much cleaner w/ lookahead
|
1249
|
-
if src.was_begin_of_line and src.match_string "begin" then
|
1250
|
-
self.token_buffer.clear
|
1251
|
-
self.token_buffer << "begin"
|
1252
|
-
c = src.read
|
1253
|
-
|
1254
|
-
if c =~ /\s/ then
|
1255
|
-
# In case last next was the newline.
|
1256
|
-
src.unread(c)
|
1257
|
-
|
1258
|
-
loop do
|
1259
|
-
c = src.read
|
1260
|
-
token_buffer << c
|
1261
|
-
|
1262
|
-
# If a line is followed by a blank line put it back.
|
1263
|
-
while c == "\n"
|
1264
|
-
c = src.read
|
1265
|
-
token_buffer << c
|
1266
|
-
end
|
1267
|
-
|
1268
|
-
if c == RubyLexer::EOF then
|
1269
|
-
raise SyntaxError, "embedded document meets end of file"
|
1270
|
-
end
|
1271
|
-
|
1272
|
-
next unless c == '='
|
1273
|
-
|
1274
|
-
if src.was_begin_of_line && src.match_string("end") then
|
1275
|
-
token_buffer << "end"
|
1276
|
-
token_buffer << src.read_line
|
1277
|
-
src.unread "\n"
|
1278
|
-
break
|
1279
|
-
end
|
698
|
+
self.yacc_value = ","
|
699
|
+
return :tCOMMA
|
700
|
+
elsif src.scan(/\(/) then
|
701
|
+
result = :tLPAREN2
|
702
|
+
self.command_start = true
|
703
|
+
if lex_state == :expr_beg || lex_state == :expr_mid then
|
704
|
+
result = :tLPAREN
|
705
|
+
elsif space_seen then
|
706
|
+
if lex_state == :expr_cmdarg then
|
707
|
+
result = :tLPAREN_ARG
|
708
|
+
elsif lex_state == :expr_arg then
|
709
|
+
warning("don't put space before argument parentheses")
|
710
|
+
result = :tLPAREN2
|
1280
711
|
end
|
1281
|
-
|
1282
|
-
# parser_support.result.add_comment(Node.comment(token_buffer.join))
|
1283
|
-
next
|
1284
712
|
end
|
1285
|
-
src.unread(c)
|
1286
|
-
end
|
1287
713
|
|
714
|
+
self.expr_beg_push "("
|
1288
715
|
|
1289
|
-
|
1290
|
-
|
1291
|
-
|
1292
|
-
|
1293
|
-
|
1294
|
-
|
1295
|
-
c = src.read
|
1296
|
-
if c == '=' then
|
1297
|
-
c = src.read
|
1298
|
-
if c == '=' then
|
1299
|
-
self.yacc_value = t("===")
|
716
|
+
return result
|
717
|
+
elsif src.check(/\=/) then
|
718
|
+
if src.scan(/\=\=\=/) then
|
719
|
+
self.fix_arg_lex_state
|
720
|
+
self.yacc_value = "==="
|
1300
721
|
return :tEQQ
|
1301
|
-
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1312
|
-
|
1313
|
-
|
1314
|
-
|
1315
|
-
|
1316
|
-
|
1317
|
-
|
1318
|
-
|
1319
|
-
|
1320
|
-
|
1321
|
-
lex_state != :expr_endarg &&
|
1322
|
-
lex_state != :expr_class &&
|
1323
|
-
(!lex_state.is_argument || space_seen)) then
|
1324
|
-
tok = self.heredoc_identifier
|
1325
|
-
return tok unless tok == 0
|
1326
|
-
end
|
1327
|
-
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1328
|
-
self.lex_state = :expr_arg
|
1329
|
-
else
|
1330
|
-
self.lex_state = :expr_beg
|
1331
|
-
end
|
1332
|
-
if c == '=' then
|
1333
|
-
if (c = src.read) == '>' then
|
1334
|
-
self.yacc_value = t("<=>")
|
1335
|
-
return :tCMP
|
1336
|
-
end
|
1337
|
-
src.unread c
|
1338
|
-
self.yacc_value = t("<=")
|
1339
|
-
return :tLEQ
|
1340
|
-
end
|
1341
|
-
if c == '<' then
|
1342
|
-
if (c = src.read) == '=' then
|
1343
|
-
self.lex_state = :expr_beg
|
1344
|
-
self.yacc_value = t("\<\<")
|
1345
|
-
return :tOP_ASGN
|
1346
|
-
end
|
1347
|
-
src.unread(c)
|
1348
|
-
self.yacc_value = t("<<")
|
1349
|
-
return :tLSHFT
|
1350
|
-
end
|
1351
|
-
self.yacc_value = t("<")
|
1352
|
-
src.unread(c)
|
1353
|
-
return :tLT
|
1354
|
-
when '>' then
|
1355
|
-
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1356
|
-
self.lex_state = :expr_arg
|
1357
|
-
else
|
1358
|
-
self.lex_state = :expr_beg
|
1359
|
-
end
|
722
|
+
elsif src.scan(/\=\=/) then
|
723
|
+
self.fix_arg_lex_state
|
724
|
+
self.yacc_value = "=="
|
725
|
+
return :tEQ
|
726
|
+
elsif src.scan(/\=~/) then
|
727
|
+
self.fix_arg_lex_state
|
728
|
+
self.yacc_value = "=~"
|
729
|
+
return :tMATCH
|
730
|
+
elsif src.scan(/\=>/) then
|
731
|
+
self.fix_arg_lex_state
|
732
|
+
self.yacc_value = "=>"
|
733
|
+
return :tASSOC
|
734
|
+
elsif src.scan(/\=/) then
|
735
|
+
if src.was_begin_of_line and src.scan(/begin(?=\s)/) then
|
736
|
+
@comments << '=' << src.matched
|
737
|
+
|
738
|
+
unless src.scan(/.*?\n=end\s*(\n|\z)/m) then
|
739
|
+
@comments.clear
|
740
|
+
rb_compile_error("embedded document meets end of file")
|
741
|
+
end
|
1360
742
|
|
1361
|
-
|
1362
|
-
self.yacc_value = t(">=")
|
1363
|
-
return :tGEQ
|
1364
|
-
end
|
1365
|
-
if c == '>' then
|
1366
|
-
if (c = src.read) == '=' then
|
1367
|
-
self.lex_state = :expr_beg
|
1368
|
-
self.yacc_value = t(">>")
|
1369
|
-
return :tOP_ASGN
|
1370
|
-
end
|
1371
|
-
src.unread c
|
1372
|
-
self.yacc_value = t(">>")
|
1373
|
-
return :tRSHFT
|
1374
|
-
end
|
1375
|
-
src.unread c
|
1376
|
-
self.yacc_value = t(">")
|
1377
|
-
return :tGT
|
1378
|
-
when '"' then
|
1379
|
-
self.lex_strterm = s(:strterm, STR_DQUOTE, '"', "\0") # TODO: question this
|
1380
|
-
self.yacc_value = t("\"")
|
1381
|
-
return :tSTRING_BEG
|
1382
|
-
when '`' then
|
1383
|
-
self.yacc_value = t("`")
|
1384
|
-
if lex_state == :expr_fname then
|
1385
|
-
self.lex_state = :expr_end
|
1386
|
-
return :tBACK_REF2
|
1387
|
-
end
|
1388
|
-
if lex_state == :expr_dot then
|
1389
|
-
if command_state then
|
1390
|
-
self.lex_state = :expr_cmdarg
|
1391
|
-
else
|
1392
|
-
self.lex_state = :expr_arg
|
1393
|
-
end
|
1394
|
-
return :tBACK_REF2
|
1395
|
-
end
|
1396
|
-
self.lex_strterm = s(:strterm, STR_XQUOTE, '`', "\0")
|
1397
|
-
return :tXSTRING_BEG
|
1398
|
-
when "\'" then
|
1399
|
-
self.lex_strterm = s(:strterm, STR_SQUOTE, "\'", "\0")
|
1400
|
-
self.yacc_value = t("'")
|
1401
|
-
return :tSTRING_BEG
|
1402
|
-
when '?' then
|
1403
|
-
if lex_state == :expr_end || lex_state == :expr_endarg then
|
1404
|
-
self.lex_state = :expr_beg
|
1405
|
-
self.yacc_value = t("?")
|
1406
|
-
return '?'
|
1407
|
-
end
|
743
|
+
@comments << src.matched
|
1408
744
|
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1412
|
-
|
1413
|
-
|
1414
|
-
if !lex_state.is_argument then
|
1415
|
-
c2 = 0
|
1416
|
-
c2 = case c
|
1417
|
-
when ' ' then
|
1418
|
-
's'
|
1419
|
-
when "\n" then
|
1420
|
-
'n'
|
1421
|
-
when "\t" then
|
1422
|
-
't'
|
1423
|
-
when "\v" then
|
1424
|
-
'v'
|
1425
|
-
when "\r" then
|
1426
|
-
'r'
|
1427
|
-
when "\f" then
|
1428
|
-
'f'
|
1429
|
-
end
|
1430
|
-
|
1431
|
-
if c2 != 0 then
|
1432
|
-
warning("invalid character syntax; use ?\\" + c2)
|
745
|
+
next
|
746
|
+
else
|
747
|
+
self.fix_arg_lex_state
|
748
|
+
self.yacc_value = '='
|
749
|
+
return :tEQL
|
1433
750
|
end
|
1434
751
|
end
|
1435
|
-
|
1436
|
-
|
1437
|
-
|
1438
|
-
|
1439
|
-
|
1440
|
-
|
1441
|
-
|
1442
|
-
|
1443
|
-
|
1444
|
-
|
1445
|
-
|
1446
|
-
|
1447
|
-
|
1448
|
-
|
1449
|
-
|
1450
|
-
|
1451
|
-
return '?'
|
1452
|
-
elsif c == "\\" then
|
1453
|
-
c = self.read_escape
|
1454
|
-
end
|
1455
|
-
c[0] &= 0xff
|
1456
|
-
self.lex_state = :expr_end
|
1457
|
-
self.yacc_value = c[0]
|
1458
|
-
return :tINTEGER
|
1459
|
-
when '&' then
|
1460
|
-
if (c = src.read) == '&' then
|
1461
|
-
self.lex_state = :expr_beg
|
1462
|
-
if (c = src.read) == '=' then
|
1463
|
-
self.yacc_value = t("&&")
|
1464
|
-
self.lex_state = :expr_beg
|
1465
|
-
return :tOP_ASGN
|
1466
|
-
end
|
1467
|
-
src.unread c
|
1468
|
-
self.yacc_value = t("&&")
|
1469
|
-
return :tANDOP
|
1470
|
-
elsif c == '=' then
|
1471
|
-
self.yacc_value = t("&")
|
1472
|
-
self.lex_state = :expr_beg
|
1473
|
-
return :tOP_ASGN
|
1474
|
-
end
|
1475
|
-
|
1476
|
-
src.unread c
|
1477
|
-
|
1478
|
-
if lex_state.is_argument && space_seen && c !~ /\s/ then
|
1479
|
-
warning("`&' interpreted as argument prefix")
|
1480
|
-
c = :tAMPER
|
1481
|
-
elsif lex_state == :expr_beg || lex_state == :expr_mid then
|
1482
|
-
c = :tAMPER
|
1483
|
-
else
|
1484
|
-
c = :tAMPER2
|
1485
|
-
end
|
1486
|
-
|
1487
|
-
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1488
|
-
self.lex_state = :expr_arg
|
1489
|
-
else
|
1490
|
-
self.lex_state = :expr_beg
|
1491
|
-
end
|
1492
|
-
self.yacc_value = t("&")
|
1493
|
-
return c
|
1494
|
-
when '|' then
|
1495
|
-
if (c = src.read) == '|' then
|
1496
|
-
self.lex_state = :expr_beg
|
1497
|
-
if (c = src.read) == '=' then
|
1498
|
-
self.lex_state = :expr_beg
|
1499
|
-
self.yacc_value = t("||")
|
1500
|
-
return :tOP_ASGN
|
1501
|
-
end
|
1502
|
-
src.unread c
|
1503
|
-
self.yacc_value = t("||")
|
1504
|
-
return :tOROP
|
1505
|
-
end
|
1506
|
-
if c == '=' then
|
1507
|
-
self.lex_state = :expr_beg
|
1508
|
-
self.yacc_value = t("|")
|
1509
|
-
return :tOP_ASGN
|
1510
|
-
end
|
1511
|
-
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1512
|
-
self.lex_state = :expr_arg
|
1513
|
-
else
|
1514
|
-
self.lex_state = :expr_beg
|
1515
|
-
end
|
1516
|
-
src.unread c
|
1517
|
-
self.yacc_value = t("|")
|
1518
|
-
return :tPIPE
|
1519
|
-
when '+' then
|
1520
|
-
c = src.read
|
1521
|
-
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1522
|
-
self.lex_state = :expr_arg
|
1523
|
-
if c == '@' then
|
1524
|
-
self.yacc_value = t("+@")
|
1525
|
-
return :tUPLUS
|
1526
|
-
end
|
1527
|
-
src.unread c
|
1528
|
-
self.yacc_value = t("+")
|
1529
|
-
return :tPLUS
|
1530
|
-
end
|
1531
|
-
|
1532
|
-
if c == '=' then
|
1533
|
-
self.lex_state = :expr_beg
|
1534
|
-
self.yacc_value = t("+")
|
1535
|
-
return :tOP_ASGN
|
1536
|
-
end
|
1537
|
-
|
1538
|
-
if (lex_state == :expr_beg || lex_state == :expr_mid ||
|
1539
|
-
(lex_state.is_argument && space_seen && c !~ /\s/)) then
|
1540
|
-
arg_ambiguous if lex_state.is_argument
|
1541
|
-
self.lex_state = :expr_beg
|
1542
|
-
src.unread c
|
1543
|
-
if c =~ /\d/ then
|
1544
|
-
c = '+'
|
1545
|
-
return parse_number(c)
|
1546
|
-
end
|
1547
|
-
self.yacc_value = t("+")
|
1548
|
-
return :tUPLUS
|
1549
|
-
end
|
1550
|
-
self.lex_state = :expr_beg
|
1551
|
-
src.unread c
|
1552
|
-
self.yacc_value = t("+")
|
1553
|
-
return :tPLUS
|
1554
|
-
when '-' then
|
1555
|
-
c = src.read
|
1556
|
-
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1557
|
-
self.lex_state = :expr_arg
|
1558
|
-
if c == '@' then
|
1559
|
-
self.yacc_value = t("-@")
|
1560
|
-
return :tUMINUS
|
1561
|
-
end
|
1562
|
-
src.unread c
|
1563
|
-
self.yacc_value = t("-")
|
1564
|
-
return :tMINUS
|
1565
|
-
end
|
1566
|
-
if c == '=' then
|
1567
|
-
self.lex_state = :expr_beg
|
1568
|
-
self.yacc_value = t("-")
|
1569
|
-
return :tOP_ASGN
|
1570
|
-
end
|
1571
|
-
if (lex_state == :expr_beg || lex_state == :expr_mid ||
|
1572
|
-
(lex_state.is_argument && space_seen && c !~ /\s/)) then
|
1573
|
-
arg_ambiguous if lex_state.is_argument
|
1574
|
-
self.lex_state = :expr_beg
|
1575
|
-
src.unread c
|
1576
|
-
self.yacc_value = t("-")
|
1577
|
-
if c =~ /\d/ then
|
1578
|
-
return :tUMINUS_NUM
|
1579
|
-
end
|
1580
|
-
return :tUMINUS
|
1581
|
-
end
|
1582
|
-
self.lex_state = :expr_beg
|
1583
|
-
src.unread c
|
1584
|
-
self.yacc_value = t("-")
|
1585
|
-
return :tMINUS
|
1586
|
-
when '.' then
|
1587
|
-
self.lex_state = :expr_beg
|
1588
|
-
if (c = src.read) == '.' then
|
1589
|
-
if (c = src.read) == '.' then
|
1590
|
-
self.yacc_value = t("...")
|
1591
|
-
return :tDOT3
|
1592
|
-
end
|
1593
|
-
src.unread c
|
1594
|
-
self.yacc_value = t("..")
|
1595
|
-
return :tDOT2
|
1596
|
-
end
|
1597
|
-
src.unread c
|
1598
|
-
if c =~ /\d/ then
|
1599
|
-
raise SyntaxError, "no .<digit> floating literal anymore put 0 before dot"
|
1600
|
-
end
|
1601
|
-
self.lex_state = :expr_dot
|
1602
|
-
self.yacc_value = t(".")
|
1603
|
-
return :tDOT
|
1604
|
-
when /[0-9]/ then
|
1605
|
-
return parse_number(c)
|
1606
|
-
when ')' then # REFACTOR: omg this is lame... next 3 are all the same
|
1607
|
-
cond.lexpop
|
1608
|
-
cmdarg.lexpop
|
1609
|
-
self.lex_state = :expr_end
|
1610
|
-
self.yacc_value = t(")")
|
1611
|
-
return :tRPAREN
|
1612
|
-
when ']' then
|
1613
|
-
cond.lexpop
|
1614
|
-
cmdarg.lexpop
|
1615
|
-
self.lex_state = :expr_end
|
1616
|
-
self.yacc_value = t("]")
|
1617
|
-
return :tRBRACK
|
1618
|
-
when '}' then
|
1619
|
-
cond.lexpop
|
1620
|
-
cmdarg.lexpop
|
1621
|
-
self.lex_state = :expr_end
|
1622
|
-
self.yacc_value = t("end")
|
1623
|
-
return :tRCURLY
|
1624
|
-
when ':' then
|
1625
|
-
c = src.read
|
1626
|
-
if c == ':' then
|
752
|
+
elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/o) then
|
753
|
+
self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
|
754
|
+
self.lex_state = :expr_end
|
755
|
+
return :tSTRING
|
756
|
+
elsif src.scan(/\"/) then # FALLBACK
|
757
|
+
self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\0"] # TODO: question this
|
758
|
+
self.yacc_value = "\""
|
759
|
+
return :tSTRING_BEG
|
760
|
+
elsif src.scan(/\@\@?\w*/) then
|
761
|
+
self.token = src.matched
|
762
|
+
|
763
|
+
rb_compile_error "`#{token}` is not allowed as a variable name" if
|
764
|
+
token =~ /\@\d/
|
765
|
+
|
766
|
+
return process_token(command_state)
|
767
|
+
elsif src.scan(/\:\:/) then
|
1627
768
|
if (lex_state == :expr_beg ||
|
1628
769
|
lex_state == :expr_mid ||
|
1629
770
|
lex_state == :expr_class ||
|
1630
771
|
(lex_state.is_argument && space_seen)) then
|
1631
772
|
self.lex_state = :expr_beg
|
1632
|
-
self.yacc_value =
|
773
|
+
self.yacc_value = "::"
|
1633
774
|
return :tCOLON3
|
1634
775
|
end
|
1635
776
|
|
1636
|
-
self.lex_state = :expr_dot
|
1637
|
-
self.yacc_value =
|
1638
|
-
return :tCOLON2
|
1639
|
-
|
1640
|
-
|
1641
|
-
|
1642
|
-
|
1643
|
-
|
1644
|
-
|
1645
|
-
|
1646
|
-
|
1647
|
-
|
1648
|
-
|
1649
|
-
|
1650
|
-
|
1651
|
-
when '"' then
|
1652
|
-
self.lex_strterm = s(:strterm, STR_DSYM, c, "\0")
|
1653
|
-
else
|
1654
|
-
src.unread c
|
1655
|
-
end
|
1656
|
-
|
1657
|
-
self.lex_state = :expr_fname
|
1658
|
-
self.yacc_value = t(":")
|
1659
|
-
return :tSYMBEG
|
1660
|
-
when '/' then
|
1661
|
-
if lex_state == :expr_beg || lex_state == :expr_mid then
|
1662
|
-
self.lex_strterm = s(:strterm, STR_REGEXP, '/', "\0")
|
1663
|
-
self.yacc_value = t("/")
|
1664
|
-
return :tREGEXP_BEG
|
1665
|
-
end
|
1666
|
-
|
1667
|
-
if (c = src.read) == '=' then
|
1668
|
-
self.yacc_value = t("/")
|
1669
|
-
self.lex_state = :expr_beg
|
1670
|
-
return :tOP_ASGN
|
1671
|
-
end
|
1672
|
-
|
1673
|
-
src.unread c
|
1674
|
-
|
1675
|
-
if lex_state.is_argument && space_seen then
|
1676
|
-
unless c =~ /\s/ then
|
1677
|
-
arg_ambiguous
|
1678
|
-
self.lex_strterm = s(:strterm, STR_REGEXP, '/', "\0")
|
1679
|
-
self.yacc_value = t("/")
|
1680
|
-
return :tREGEXP_BEG
|
1681
|
-
end
|
1682
|
-
end
|
1683
|
-
|
1684
|
-
self.lex_state = if (lex_state == :expr_fname ||
|
1685
|
-
lex_state == :expr_dot) then
|
1686
|
-
:expr_arg
|
1687
|
-
else
|
1688
|
-
:expr_beg
|
1689
|
-
end
|
1690
|
-
|
1691
|
-
self.yacc_value = t("/")
|
1692
|
-
return :tDIVIDE
|
1693
|
-
when '^' then
|
1694
|
-
if (c = src.read) == '=' then
|
1695
|
-
self.lex_state = :expr_beg
|
1696
|
-
self.yacc_value = t("^")
|
1697
|
-
return :tOP_ASGN
|
1698
|
-
end
|
1699
|
-
if lex_state == :expr_fname || self.lex_state == :expr_dot then
|
1700
|
-
self.lex_state = :expr_arg
|
1701
|
-
else
|
1702
|
-
self.lex_state = :expr_beg
|
1703
|
-
end
|
1704
|
-
src.unread c
|
1705
|
-
self.yacc_value = t("^")
|
1706
|
-
return :tCARET
|
1707
|
-
when ';' then
|
1708
|
-
self.command_start = true
|
1709
|
-
self.lex_state = :expr_beg
|
1710
|
-
self.yacc_value = t(";")
|
1711
|
-
return c
|
1712
|
-
when ',' then
|
1713
|
-
self.lex_state = :expr_beg
|
1714
|
-
self.yacc_value = t(",")
|
1715
|
-
return c
|
1716
|
-
when '~' then
|
1717
|
-
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1718
|
-
if (c = src.read) != '@' then
|
1719
|
-
src.unread c
|
1720
|
-
end
|
1721
|
-
end
|
1722
|
-
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1723
|
-
self.lex_state = :expr_arg
|
1724
|
-
else
|
1725
|
-
self.lex_state = :expr_beg
|
1726
|
-
end
|
1727
|
-
self.yacc_value = t("~")
|
1728
|
-
return :tTILDE
|
1729
|
-
when '(' then
|
1730
|
-
c = :tLPAREN2
|
1731
|
-
self.command_start = true
|
1732
|
-
if lex_state == :expr_beg || lex_state == :expr_mid then
|
1733
|
-
c = :tLPAREN
|
1734
|
-
elsif space_seen then
|
1735
|
-
if lex_state == :expr_cmdarg then
|
1736
|
-
c = :tLPAREN_ARG
|
1737
|
-
elsif lex_state == :expr_arg then
|
1738
|
-
warning("don't put space before argument parentheses")
|
1739
|
-
c = :tLPAREN2
|
1740
|
-
end
|
1741
|
-
end
|
1742
|
-
cond.push false
|
1743
|
-
cmdarg.push false
|
1744
|
-
self.lex_state = :expr_beg
|
1745
|
-
self.yacc_value = t("(")
|
1746
|
-
return c
|
1747
|
-
when '[' then
|
1748
|
-
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1749
|
-
self.lex_state = :expr_arg
|
1750
|
-
if (c = src.read) == ']' then
|
1751
|
-
if src.peek('=') then
|
1752
|
-
c = src.read
|
1753
|
-
self.yacc_value = t("[]=")
|
1754
|
-
return :tASET
|
1755
|
-
end
|
1756
|
-
self.yacc_value = t("[]")
|
1757
|
-
return :tAREF
|
1758
|
-
end
|
1759
|
-
src.unread c
|
1760
|
-
self.yacc_value = t("[")
|
1761
|
-
return '['
|
1762
|
-
elsif lex_state == :expr_beg || lex_state == :expr_mid then
|
1763
|
-
c = :tLBRACK
|
1764
|
-
elsif lex_state.is_argument && space_seen then
|
1765
|
-
c = :tLBRACK
|
1766
|
-
end
|
1767
|
-
self.lex_state = :expr_beg
|
1768
|
-
cond.push false
|
1769
|
-
cmdarg.push false
|
1770
|
-
self.yacc_value = t("[")
|
1771
|
-
return c
|
1772
|
-
when '{' then
|
1773
|
-
c = :tLCURLY
|
1774
|
-
|
1775
|
-
if lex_state.is_argument || lex_state == :expr_end then
|
1776
|
-
c = :tLCURLY # block (primary)
|
1777
|
-
elsif lex_state == :expr_endarg then
|
1778
|
-
c = :tLBRACE_ARG # block (expr)
|
1779
|
-
else
|
1780
|
-
c = :tLBRACE # hash
|
1781
|
-
end
|
1782
|
-
cond.push false
|
1783
|
-
cmdarg.push false
|
1784
|
-
self.lex_state = :expr_beg
|
1785
|
-
self.yacc_value = t("{")
|
1786
|
-
return c
|
1787
|
-
when "\\" then
|
1788
|
-
c = src.read
|
1789
|
-
if c == "\n" then
|
1790
|
-
space_seen = true
|
1791
|
-
next # skip \\n
|
1792
|
-
end
|
1793
|
-
src.unread c
|
1794
|
-
self.yacc_value = t("\\")
|
1795
|
-
return "\\"
|
1796
|
-
when '%' then
|
1797
|
-
if lex_state == :expr_beg || lex_state == :expr_mid then
|
1798
|
-
return parse_quote(src.read)
|
1799
|
-
end
|
1800
|
-
|
1801
|
-
c = src.read
|
1802
|
-
if c == '=' then
|
1803
|
-
self.lex_state = :expr_beg
|
1804
|
-
self.yacc_value = t("%")
|
1805
|
-
return :tOP_ASGN
|
1806
|
-
end
|
1807
|
-
|
1808
|
-
return parse_quote(c) if lex_state.is_argument && space_seen && c !~ /\s/
|
1809
|
-
|
1810
|
-
self.lex_state = case lex_state
|
1811
|
-
when :expr_fname, :expr_dot then
|
1812
|
-
:expr_arg
|
1813
|
-
else
|
1814
|
-
:expr_beg
|
1815
|
-
end
|
1816
|
-
|
1817
|
-
src.unread c
|
1818
|
-
self.yacc_value = t("%")
|
1819
|
-
|
1820
|
-
return :tPERCENT
|
1821
|
-
when '$' then
|
1822
|
-
last_state = lex_state
|
1823
|
-
self.lex_state = :expr_end
|
1824
|
-
token_buffer.clear
|
1825
|
-
c = src.read
|
1826
|
-
case c
|
1827
|
-
when '_' then # $_: last read line string
|
1828
|
-
c = src.read
|
1829
|
-
|
1830
|
-
token_buffer << '$'
|
1831
|
-
token_buffer << '_'
|
1832
|
-
|
1833
|
-
unless c =~ /\w/ then
|
1834
|
-
src.unread c
|
1835
|
-
self.yacc_value = t(token_buffer.join)
|
1836
|
-
return :tGVAR
|
1837
|
-
end
|
1838
|
-
when /[~*$?!@\/\\;,.=:<>\"]/ then
|
1839
|
-
token_buffer << '$'
|
1840
|
-
token_buffer << c
|
1841
|
-
self.yacc_value = t(token_buffer.join)
|
1842
|
-
return :tGVAR
|
1843
|
-
when '-' then
|
1844
|
-
token_buffer << '$'
|
1845
|
-
token_buffer << c
|
1846
|
-
c = src.read
|
1847
|
-
if c =~ /\w/ then
|
1848
|
-
token_buffer << c
|
1849
|
-
else
|
1850
|
-
src.unread c
|
1851
|
-
end
|
1852
|
-
self.yacc_value = t(token_buffer.join)
|
1853
|
-
# xxx shouldn't check if valid option variable
|
1854
|
-
return :tGVAR
|
1855
|
-
when /[\&\`\'\+]/ then
|
1856
|
-
# Explicit reference to these vars as symbols...
|
1857
|
-
if last_state == :expr_fname then
|
1858
|
-
token_buffer << '$'
|
1859
|
-
token_buffer << c
|
1860
|
-
self.yacc_value = t(token_buffer.join)
|
1861
|
-
return :tGVAR
|
1862
|
-
end
|
1863
|
-
|
1864
|
-
self.yacc_value = s(:back_ref, c.to_sym)
|
1865
|
-
return :tBACK_REF
|
1866
|
-
when /[1-9]/ then
|
1867
|
-
token_buffer << '$'
|
1868
|
-
begin
|
1869
|
-
token_buffer << c
|
1870
|
-
c = src.read
|
1871
|
-
end while c =~ /\d/
|
1872
|
-
src.unread c
|
1873
|
-
if last_state == :expr_fname then
|
1874
|
-
self.yacc_value = t(token_buffer.join)
|
1875
|
-
return :tGVAR
|
1876
|
-
else
|
1877
|
-
self.yacc_value = s(:nth_ref, token_buffer.join[1..-1].to_i)
|
1878
|
-
return :tNTH_REF
|
1879
|
-
end
|
1880
|
-
when '0' then
|
1881
|
-
token_buffer << '$'
|
1882
|
-
else
|
1883
|
-
unless c =~ /\w/ then
|
1884
|
-
src.unread c
|
1885
|
-
self.yacc_value = t("$")
|
1886
|
-
return '$'
|
1887
|
-
end
|
1888
|
-
token_buffer << '$'
|
1889
|
-
end
|
1890
|
-
when '@' then
|
1891
|
-
c = src.read
|
1892
|
-
token_buffer.clear
|
1893
|
-
token_buffer << '@'
|
1894
|
-
if c == '@' then
|
1895
|
-
token_buffer << '@'
|
1896
|
-
c = src.read
|
1897
|
-
end
|
1898
|
-
if c =~ /\d/ then
|
1899
|
-
if token_buffer.length == 1 then
|
1900
|
-
raise SyntaxError, "`@" + c + "' is not allowed as an instance variable name"
|
1901
|
-
else
|
1902
|
-
raise SyntaxError, "`@@" + c + "' is not allowed as a class variable name"
|
1903
|
-
end
|
1904
|
-
end
|
1905
|
-
unless c =~ /\w/ then
|
1906
|
-
src.unread c
|
1907
|
-
self.yacc_value = t("@")
|
1908
|
-
return '@'
|
1909
|
-
end
|
1910
|
-
when '_' then
|
1911
|
-
if src.was_begin_of_line && src.match_string("_END__\n", false) then
|
1912
|
-
self.end_seen = true
|
1913
|
-
return RubyLexer::EOF
|
1914
|
-
end
|
1915
|
-
token_buffer.clear
|
1916
|
-
else
|
1917
|
-
unless c =~ /\w/ then
|
1918
|
-
raise SyntaxError, "Invalid char '#{c.inspect}' in expression"
|
1919
|
-
end
|
1920
|
-
token_buffer.clear
|
1921
|
-
end
|
1922
|
-
|
1923
|
-
begin
|
1924
|
-
token_buffer << c
|
1925
|
-
# if ismbchar(c) then
|
1926
|
-
# len = mbclen(c) - 1
|
1927
|
-
# (0..len).each do
|
1928
|
-
# c = src.read;
|
1929
|
-
# token_buffer << c
|
1930
|
-
# end
|
1931
|
-
# end
|
1932
|
-
c = src.read
|
1933
|
-
end while c =~ /\w/
|
1934
|
-
|
1935
|
-
if c =~ /\!|\?/ && token_buffer[0] =~ /\w/ && src.peek != '=' then
|
1936
|
-
token_buffer << c
|
1937
|
-
else
|
1938
|
-
src.unread c
|
1939
|
-
end
|
1940
|
-
|
1941
|
-
result = nil
|
1942
|
-
last_state = lex_state
|
1943
|
-
|
1944
|
-
case token_buffer[0]
|
1945
|
-
when '$' then
|
1946
|
-
self.lex_state = :expr_end
|
1947
|
-
result = :tGVAR
|
1948
|
-
when '@' then
|
1949
|
-
self.lex_state = :expr_end
|
1950
|
-
if token_buffer[1] == '@' then
|
1951
|
-
result = :tCVAR
|
1952
|
-
else
|
1953
|
-
result = :tIVAR
|
1954
|
-
end
|
1955
|
-
else
|
1956
|
-
if token_buffer[-1] =~ /[!?]/ then
|
1957
|
-
result = :tFID
|
1958
|
-
else
|
1959
|
-
if lex_state == :expr_fname then
|
1960
|
-
if (c = src.read) == '=' then
|
1961
|
-
c2 = src.read
|
1962
|
-
|
1963
|
-
if c2 != '~' && c2 != '>' && (c2 != '=' || (c2 == "\n" && src.peek('>'))) then
|
1964
|
-
result = :tIDENTIFIER
|
1965
|
-
token_buffer << c
|
1966
|
-
src.unread c2
|
1967
|
-
else
|
1968
|
-
src.unread c2
|
1969
|
-
src.unread c
|
1970
|
-
end
|
1971
|
-
else
|
1972
|
-
src.unread c
|
1973
|
-
end
|
1974
|
-
end
|
1975
|
-
if result.nil? && token_buffer[0] =~ /[A-Z]/ then
|
1976
|
-
result = :tCONSTANT
|
1977
|
-
else
|
1978
|
-
result = :tIDENTIFIER
|
1979
|
-
end
|
1980
|
-
end
|
1981
|
-
|
1982
|
-
unless lex_state == :expr_dot then
|
1983
|
-
# See if it is a reserved word.
|
1984
|
-
keyword = Keyword.keyword(token_buffer.join, token_buffer.length)
|
1985
|
-
|
1986
|
-
unless keyword.nil? then
|
1987
|
-
state = lex_state
|
1988
|
-
self.lex_state = keyword.state
|
1989
|
-
|
1990
|
-
if state == :expr_fname then
|
1991
|
-
self.yacc_value = t(keyword.name)
|
1992
|
-
else
|
1993
|
-
self.yacc_value = t(token_buffer.join)
|
1994
|
-
end
|
1995
|
-
|
1996
|
-
if keyword.id0 == :kDO then
|
1997
|
-
self.command_start = true
|
1998
|
-
return :kDO_COND if cond.is_in_state
|
1999
|
-
return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
|
2000
|
-
return :kDO_BLOCK if state == :expr_endarg
|
2001
|
-
return :kDO
|
2002
|
-
end
|
2003
|
-
|
2004
|
-
return keyword.id0 if state == :expr_beg
|
2005
|
-
|
2006
|
-
self.lex_state = :expr_beg unless keyword.id0 == keyword.id1
|
2007
|
-
|
2008
|
-
return keyword.id1
|
2009
|
-
end
|
2010
|
-
end
|
2011
|
-
|
2012
|
-
if (lex_state == :expr_beg ||
|
2013
|
-
lex_state == :expr_mid ||
|
2014
|
-
lex_state == :expr_dot ||
|
2015
|
-
lex_state == :expr_arg ||
|
2016
|
-
lex_state == :expr_cmdarg) then
|
2017
|
-
if command_state then
|
2018
|
-
self.lex_state = :expr_cmdarg
|
2019
|
-
else
|
2020
|
-
self.lex_state = :expr_arg
|
2021
|
-
end
|
2022
|
-
else
|
2023
|
-
self.lex_state = :expr_end
|
2024
|
-
end
|
2025
|
-
end
|
2026
|
-
|
2027
|
-
|
2028
|
-
temp_val = token_buffer.join
|
2029
|
-
|
2030
|
-
# Lame: parsing logic made it into lexer in ruby...So we
|
2031
|
-
# are emulating
|
2032
|
-
# FIXME: I believe this is much simpler now...
|
2033
|
-
# HACK
|
2034
|
-
# scope = parser_support.current_scope
|
2035
|
-
# if (IdUtil.var_type(temp_val) == IdUtil.LOCAL_VAR &&
|
2036
|
-
# last_state != :expr_dot &&
|
2037
|
-
# (BlockStaticScope === scope && (scope.is_defined(temp_val) >= 0)) ||
|
2038
|
-
# (scope.local_scope.is_defined(temp_val) >= 0)) then
|
2039
|
-
# self.lex_state = :expr_end
|
2040
|
-
# end
|
2041
|
-
|
2042
|
-
self.yacc_value = t(temp_val)
|
2043
|
-
|
2044
|
-
return result
|
2045
|
-
end
|
2046
|
-
end
|
2047
|
-
|
2048
|
-
##
|
2049
|
-
# Parse a number from the input stream.
|
2050
|
-
#
|
2051
|
-
# @param c The first character of the number.
|
2052
|
-
# @return A int constant wich represents a token.
|
2053
|
-
|
2054
|
-
def parse_number c
|
2055
|
-
self.lex_state = :expr_end
|
2056
|
-
|
2057
|
-
token_buffer.clear
|
2058
|
-
|
2059
|
-
if c == '-' then
|
2060
|
-
token_buffer << c
|
2061
|
-
c = src.read
|
2062
|
-
elsif c == '+' then
|
2063
|
-
# We don't append '+' since Java number parser gets confused FIX
|
2064
|
-
c = src.read
|
2065
|
-
end
|
2066
|
-
|
2067
|
-
nondigit = "\0"
|
777
|
+
self.lex_state = :expr_dot
|
778
|
+
self.yacc_value = "::"
|
779
|
+
return :tCOLON2
|
780
|
+
elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then
|
781
|
+
self.yacc_value = src[1]
|
782
|
+
self.lex_state = :expr_end
|
783
|
+
return :tSYMBOL
|
784
|
+
elsif src.scan(/\:/) then
|
785
|
+
# ?: / then / when
|
786
|
+
if (lex_state == :expr_end || lex_state == :expr_endarg||
|
787
|
+
src.check(/\s/)) then
|
788
|
+
self.lex_state = :expr_beg
|
789
|
+
self.yacc_value = ":"
|
790
|
+
return :tCOLON
|
791
|
+
end
|
2068
792
|
|
2069
|
-
|
2070
|
-
|
2071
|
-
|
793
|
+
case
|
794
|
+
when src.scan(/\'/) then
|
795
|
+
self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\0"]
|
796
|
+
when src.scan(/\"/) then
|
797
|
+
self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\0"]
|
798
|
+
end
|
2072
799
|
|
2073
|
-
|
2074
|
-
|
2075
|
-
|
800
|
+
self.lex_state = :expr_fname
|
801
|
+
self.yacc_value = ":"
|
802
|
+
return :tSYMBEG
|
803
|
+
elsif src.check(/[0-9]/) then
|
804
|
+
return parse_number
|
805
|
+
elsif src.scan(/\[/) then
|
806
|
+
result = src.matched
|
2076
807
|
|
2077
|
-
|
2078
|
-
|
2079
|
-
|
2080
|
-
|
2081
|
-
|
2082
|
-
|
2083
|
-
|
2084
|
-
|
808
|
+
if lex_state == :expr_fname || lex_state == :expr_dot then
|
809
|
+
self.lex_state = :expr_arg
|
810
|
+
case
|
811
|
+
when src.scan(/\]\=/) then
|
812
|
+
self.yacc_value = "[]="
|
813
|
+
return :tASET
|
814
|
+
when src.scan(/\]/) then
|
815
|
+
self.yacc_value = "[]"
|
816
|
+
return :tAREF
|
2085
817
|
else
|
2086
|
-
|
818
|
+
rb_compile_error "unexpected '['"
|
2087
819
|
end
|
2088
|
-
|
820
|
+
elsif lex_state == :expr_beg || lex_state == :expr_mid then
|
821
|
+
result = :tLBRACK
|
822
|
+
elsif lex_state.is_argument && space_seen then
|
823
|
+
result = :tLBRACK
|
2089
824
|
end
|
2090
|
-
end
|
2091
825
|
|
2092
|
-
|
826
|
+
self.expr_beg_push "["
|
2093
827
|
|
2094
|
-
|
2095
|
-
|
2096
|
-
|
2097
|
-
|
2098
|
-
|
2099
|
-
|
2100
|
-
|
2101
|
-
|
2102
|
-
|
2103
|
-
|
2104
|
-
|
2105
|
-
|
2106
|
-
|
2107
|
-
|
2108
|
-
|
2109
|
-
|
2110
|
-
|
828
|
+
return result
|
829
|
+
elsif src.scan(/\'(\\.|[^\'])*\'/) then
|
830
|
+
self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
|
831
|
+
self.lex_state = :expr_end
|
832
|
+
return :tSTRING
|
833
|
+
elsif src.check(/\|/) then
|
834
|
+
if src.scan(/\|\|\=/) then
|
835
|
+
self.lex_state = :expr_beg
|
836
|
+
self.yacc_value = "||"
|
837
|
+
return :tOP_ASGN
|
838
|
+
elsif src.scan(/\|\|/) then
|
839
|
+
self.lex_state = :expr_beg
|
840
|
+
self.yacc_value = "||"
|
841
|
+
return :tOROP
|
842
|
+
elsif src.scan(/\|\=/) then
|
843
|
+
self.lex_state = :expr_beg
|
844
|
+
self.yacc_value = "|"
|
845
|
+
return :tOP_ASGN
|
846
|
+
elsif src.scan(/\|/) then
|
847
|
+
self.fix_arg_lex_state
|
848
|
+
self.yacc_value = "|"
|
849
|
+
return :tPIPE
|
850
|
+
end
|
851
|
+
elsif src.scan(/\{/) then
|
852
|
+
result = if lex_state.is_argument || lex_state == :expr_end then
|
853
|
+
:tLCURLY # block (primary)
|
854
|
+
elsif lex_state == :expr_endarg then
|
855
|
+
:tLBRACE_ARG # block (expr)
|
856
|
+
else
|
857
|
+
:tLBRACE # hash
|
858
|
+
end
|
859
|
+
|
860
|
+
self.expr_beg_push "{"
|
861
|
+
|
862
|
+
return result
|
863
|
+
elsif src.scan(/[+-]/) then
|
864
|
+
sign = src.matched
|
865
|
+
utype, type = if sign == "+" then
|
866
|
+
[:tUPLUS, :tPLUS]
|
867
|
+
else
|
868
|
+
[:tUMINUS, :tMINUS]
|
869
|
+
end
|
870
|
+
|
871
|
+
if lex_state == :expr_fname || lex_state == :expr_dot then
|
872
|
+
self.lex_state = :expr_arg
|
873
|
+
if src.scan(/@/) then
|
874
|
+
self.yacc_value = "#{sign}@"
|
875
|
+
return utype
|
2111
876
|
else
|
2112
|
-
|
877
|
+
self.yacc_value = sign
|
878
|
+
return type
|
2113
879
|
end
|
2114
|
-
c = src.read
|
2115
880
|
end
|
2116
|
-
end
|
2117
881
|
|
2118
|
-
|
882
|
+
if src.scan(/\=/) then
|
883
|
+
self.lex_state = :expr_beg
|
884
|
+
self.yacc_value = sign
|
885
|
+
return :tOP_ASGN
|
886
|
+
end
|
2119
887
|
|
2120
|
-
|
2121
|
-
|
2122
|
-
|
2123
|
-
|
2124
|
-
end
|
2125
|
-
self.yacc_value = token_buffer.join.to_i(2)
|
2126
|
-
return :tINTEGER
|
2127
|
-
when /d/i then # decimal
|
2128
|
-
c = src.read
|
2129
|
-
if c =~ /\d/ then
|
2130
|
-
loop do
|
2131
|
-
if c == '_' then
|
2132
|
-
break if nondigit != "\0"
|
2133
|
-
nondigit = c
|
2134
|
-
elsif c =~ /\d/ then
|
2135
|
-
nondigit = "\0"
|
2136
|
-
token_buffer << c
|
2137
|
-
else
|
2138
|
-
break
|
888
|
+
if (lex_state == :expr_beg || lex_state == :expr_mid ||
|
889
|
+
(lex_state.is_argument && space_seen && !src.check(/\s/))) then
|
890
|
+
if lex_state.is_argument then
|
891
|
+
arg_ambiguous
|
2139
892
|
end
|
2140
|
-
c = src.read
|
2141
|
-
end
|
2142
|
-
end
|
2143
893
|
|
2144
|
-
|
894
|
+
self.lex_state = :expr_beg
|
895
|
+
self.yacc_value = sign
|
2145
896
|
|
2146
|
-
|
2147
|
-
|
2148
|
-
|
2149
|
-
|
2150
|
-
|
897
|
+
if src.check(/\d/) then
|
898
|
+
if utype == :tUPLUS then
|
899
|
+
return self.parse_number
|
900
|
+
else
|
901
|
+
return :tUMINUS_NUM
|
902
|
+
end
|
903
|
+
end
|
2151
904
|
|
2152
|
-
|
2153
|
-
return :tINTEGER
|
2154
|
-
when /o/i, /[0-7_]/ then # octal
|
2155
|
-
c = src.read if c =~ /o/i # prefixed octal - kill me
|
2156
|
-
loop do
|
2157
|
-
if c == '_' then
|
2158
|
-
break if (nondigit != "\0")
|
2159
|
-
nondigit = c
|
2160
|
-
elsif c >= '0' && c <= '7' then
|
2161
|
-
nondigit = "\0"
|
2162
|
-
token_buffer << c
|
2163
|
-
else
|
2164
|
-
break
|
905
|
+
return utype
|
2165
906
|
end
|
2166
|
-
c = src.read
|
2167
|
-
end
|
2168
|
-
if token_buffer.length > start_len then
|
2169
|
-
src.unread c
|
2170
907
|
|
2171
|
-
|
2172
|
-
|
908
|
+
self.lex_state = :expr_beg
|
909
|
+
self.yacc_value = sign
|
910
|
+
return type
|
911
|
+
elsif src.check(/\*/) then
|
912
|
+
if src.scan(/\*\*=/) then
|
913
|
+
self.lex_state = :expr_beg
|
914
|
+
self.yacc_value = "**"
|
915
|
+
return :tOP_ASGN
|
916
|
+
elsif src.scan(/\*\*/) then
|
917
|
+
self.yacc_value = "**"
|
918
|
+
self.fix_arg_lex_state
|
919
|
+
return :tPOW
|
920
|
+
elsif src.scan(/\*\=/) then
|
921
|
+
self.lex_state = :expr_beg
|
922
|
+
self.yacc_value = "*"
|
923
|
+
return :tOP_ASGN
|
924
|
+
elsif src.scan(/\*/) then
|
925
|
+
result = if lex_state.is_argument && space_seen && src.check(/\S/) then
|
926
|
+
warning("`*' interpreted as argument prefix")
|
927
|
+
:tSTAR
|
928
|
+
elsif lex_state == :expr_beg || lex_state == :expr_mid then
|
929
|
+
:tSTAR
|
930
|
+
else
|
931
|
+
:tSTAR2
|
932
|
+
end
|
933
|
+
self.yacc_value = "*"
|
934
|
+
self.fix_arg_lex_state
|
935
|
+
|
936
|
+
return result
|
2173
937
|
end
|
2174
|
-
|
2175
|
-
|
2176
|
-
|
2177
|
-
|
2178
|
-
|
2179
|
-
|
2180
|
-
|
2181
|
-
|
2182
|
-
|
2183
|
-
|
2184
|
-
|
2185
|
-
|
2186
|
-
|
2187
|
-
|
2188
|
-
|
2189
|
-
|
2190
|
-
|
2191
|
-
|
2192
|
-
|
2193
|
-
|
2194
|
-
|
2195
|
-
|
2196
|
-
|
2197
|
-
|
2198
|
-
|
2199
|
-
|
2200
|
-
|
2201
|
-
|
2202
|
-
src.
|
2203
|
-
|
2204
|
-
|
2205
|
-
|
2206
|
-
|
2207
|
-
|
2208
|
-
|
2209
|
-
|
2210
|
-
# Enebo: c can never be antrhign but '.'
|
2211
|
-
# Why did I put this here?
|
2212
|
-
else
|
2213
|
-
self.yacc_value = token_buffer.join.to_i(10)
|
2214
|
-
return :tINTEGER
|
938
|
+
elsif src.check(/\!/) then
|
939
|
+
if src.scan(/\!\=/) then
|
940
|
+
self.lex_state = :expr_beg
|
941
|
+
self.yacc_value = "!="
|
942
|
+
return :tNEQ
|
943
|
+
elsif src.scan(/\!~/) then
|
944
|
+
self.lex_state = :expr_beg
|
945
|
+
self.yacc_value = "!~"
|
946
|
+
return :tNMATCH
|
947
|
+
elsif src.scan(/\!/) then
|
948
|
+
self.lex_state = :expr_beg
|
949
|
+
self.yacc_value = "!"
|
950
|
+
return :tBANG
|
951
|
+
end
|
952
|
+
elsif src.check(/\</) then
|
953
|
+
if src.scan(/\<\=\>/) then
|
954
|
+
self.fix_arg_lex_state
|
955
|
+
self.yacc_value = "<=>"
|
956
|
+
return :tCMP
|
957
|
+
elsif src.scan(/\<\=/) then
|
958
|
+
self.fix_arg_lex_state
|
959
|
+
self.yacc_value = "<="
|
960
|
+
return :tLEQ
|
961
|
+
elsif src.scan(/\<\<\=/) then
|
962
|
+
self.fix_arg_lex_state
|
963
|
+
self.lex_state = :expr_beg
|
964
|
+
self.yacc_value = "\<\<"
|
965
|
+
return :tOP_ASGN
|
966
|
+
elsif src.scan(/\<\</) then
|
967
|
+
if (! [:expr_end, :expr_dot,
|
968
|
+
:expr_endarg, :expr_class].include?(lex_state) &&
|
969
|
+
(!lex_state.is_argument || space_seen)) then
|
970
|
+
tok = self.heredoc_identifier
|
971
|
+
if tok then
|
972
|
+
return tok
|
973
|
+
end
|
2215
974
|
end
|
2216
|
-
|
2217
|
-
|
2218
|
-
|
2219
|
-
|
2220
|
-
|
975
|
+
|
976
|
+
self.fix_arg_lex_state
|
977
|
+
self.yacc_value = "\<\<"
|
978
|
+
return :tLSHFT
|
979
|
+
elsif src.scan(/\</) then
|
980
|
+
self.fix_arg_lex_state
|
981
|
+
self.yacc_value = "<"
|
982
|
+
return :tLT
|
2221
983
|
end
|
2222
|
-
|
2223
|
-
|
2224
|
-
|
2225
|
-
|
2226
|
-
|
2227
|
-
src.
|
2228
|
-
|
2229
|
-
|
2230
|
-
|
2231
|
-
|
2232
|
-
|
2233
|
-
|
2234
|
-
|
2235
|
-
|
2236
|
-
|
2237
|
-
|
2238
|
-
|
984
|
+
elsif src.check(/\>/) then
|
985
|
+
if src.scan(/\>\=/) then
|
986
|
+
self.fix_arg_lex_state
|
987
|
+
self.yacc_value = ">="
|
988
|
+
return :tGEQ
|
989
|
+
elsif src.scan(/\>\>=/) then
|
990
|
+
self.fix_arg_lex_state
|
991
|
+
self.lex_state = :expr_beg
|
992
|
+
self.yacc_value = ">>"
|
993
|
+
return :tOP_ASGN
|
994
|
+
elsif src.scan(/\>\>/) then
|
995
|
+
self.fix_arg_lex_state
|
996
|
+
self.yacc_value = ">>"
|
997
|
+
return :tRSHFT
|
998
|
+
elsif src.scan(/\>/) then
|
999
|
+
self.fix_arg_lex_state
|
1000
|
+
self.yacc_value = ">"
|
1001
|
+
return :tGT
|
1002
|
+
end
|
1003
|
+
elsif src.scan(/\`/) then
|
1004
|
+
self.yacc_value = "`"
|
1005
|
+
case lex_state
|
1006
|
+
when :expr_fname then
|
1007
|
+
self.lex_state = :expr_end
|
1008
|
+
return :tBACK_REF2
|
1009
|
+
when :expr_dot then
|
1010
|
+
self.lex_state = if command_state then
|
1011
|
+
:expr_cmdarg
|
1012
|
+
else
|
1013
|
+
:expr_arg
|
1014
|
+
end
|
1015
|
+
return :tBACK_REF2
|
1016
|
+
end
|
1017
|
+
self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"]
|
1018
|
+
return :tXSTRING_BEG
|
1019
|
+
elsif src.scan(/\?/) then
|
1020
|
+
if lex_state == :expr_end || lex_state == :expr_endarg then
|
1021
|
+
self.lex_state = :expr_beg
|
1022
|
+
self.yacc_value = "?"
|
1023
|
+
return :tEH
|
2239
1024
|
end
|
2240
|
-
end
|
2241
|
-
when '_' then # '_' in number just ignored
|
2242
|
-
if nondigit != "\0" then
|
2243
|
-
raise SyntaxError, "Trailing '_' in number."
|
2244
|
-
end
|
2245
|
-
nondigit = c
|
2246
|
-
else
|
2247
|
-
src.unread c
|
2248
|
-
r = number_token(token_buffer.join, seen_e || seen_point, nondigit)
|
2249
|
-
return r
|
2250
|
-
end
|
2251
|
-
c = src.read
|
2252
|
-
end
|
2253
|
-
end
|
2254
|
-
|
2255
|
-
# TODO: remove me
|
2256
|
-
def number_token(number, is_float, nondigit)
|
2257
|
-
if nondigit != "\0" then
|
2258
|
-
raise SyntaxError, "Trailing '_' in number."
|
2259
|
-
end
|
2260
|
-
|
2261
|
-
if is_float then
|
2262
|
-
self.yacc_value = number.to_f
|
2263
|
-
return :tFLOAT
|
2264
|
-
end
|
2265
|
-
|
2266
|
-
self.yacc_value = number.to_i
|
2267
|
-
return :tINTEGER
|
2268
|
-
end
|
2269
|
-
|
2270
|
-
############################################################
|
2271
|
-
# HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
|
2272
|
-
|
2273
|
-
def tokadd s # HACK
|
2274
|
-
self.token_buffer << s
|
2275
|
-
end
|
2276
|
-
|
2277
|
-
def warning s
|
2278
|
-
# do nothing for now
|
2279
|
-
end
|
2280
|
-
|
2281
|
-
def rb_compile_error msg
|
2282
|
-
raise msg
|
2283
|
-
end
|
2284
|
-
|
2285
|
-
def is_next_identchar # TODO: ?
|
2286
|
-
c = src.read
|
2287
|
-
src.unread c
|
2288
1025
|
|
2289
|
-
|
2290
|
-
|
1026
|
+
if src.eos? then
|
1027
|
+
rb_compile_error "incomplete character syntax"
|
1028
|
+
end
|
2291
1029
|
|
2292
|
-
|
2293
|
-
|
2294
|
-
|
1030
|
+
if src.check(/\s|\v/) then
|
1031
|
+
unless lex_state.is_argument then
|
1032
|
+
c2 = { " " => 's',
|
1033
|
+
"\n" => 'n',
|
1034
|
+
"\t" => 't',
|
1035
|
+
"\v" => 'v',
|
1036
|
+
"\r" => 'r',
|
1037
|
+
"\f" => 'f' }[src.matched]
|
1038
|
+
|
1039
|
+
if c2 then
|
1040
|
+
warning("invalid character syntax; use ?\\" + c2)
|
1041
|
+
end
|
1042
|
+
end
|
2295
1043
|
|
2296
|
-
|
2297
|
-
|
2298
|
-
|
2299
|
-
|
1044
|
+
# ternary
|
1045
|
+
self.lex_state = :expr_beg
|
1046
|
+
self.yacc_value = "?"
|
1047
|
+
return :tEH
|
1048
|
+
elsif src.check(/\w(?=\w)/) then # ternary, also
|
1049
|
+
self.lex_state = :expr_beg
|
1050
|
+
self.yacc_value = "?"
|
1051
|
+
return :tEH
|
1052
|
+
end
|
2300
1053
|
|
2301
|
-
|
2302
|
-
|
2303
|
-
|
2304
|
-
|
2305
|
-
|
1054
|
+
c = if src.scan(/\\/) then
|
1055
|
+
self.read_escape
|
1056
|
+
else
|
1057
|
+
src.getch
|
1058
|
+
end
|
1059
|
+
self.lex_state = :expr_end
|
1060
|
+
self.yacc_value = c[0].ord & 0xff
|
1061
|
+
return :tINTEGER
|
1062
|
+
elsif src.check(/\&/) then
|
1063
|
+
if src.scan(/\&\&\=/) then
|
1064
|
+
self.yacc_value = "&&"
|
1065
|
+
self.lex_state = :expr_beg
|
1066
|
+
return :tOP_ASGN
|
1067
|
+
elsif src.scan(/\&\&/) then
|
1068
|
+
self.lex_state = :expr_beg
|
1069
|
+
self.yacc_value = "&&"
|
1070
|
+
return :tANDOP
|
1071
|
+
elsif src.scan(/\&\=/) then
|
1072
|
+
self.yacc_value = "&"
|
1073
|
+
self.lex_state = :expr_beg
|
1074
|
+
return :tOP_ASGN
|
1075
|
+
elsif src.scan(/&/) then
|
1076
|
+
result = if lex_state.is_argument && space_seen &&
|
1077
|
+
!src.check(/\s/) then
|
1078
|
+
warning("`&' interpreted as argument prefix")
|
1079
|
+
:tAMPER
|
1080
|
+
elsif lex_state == :expr_beg || lex_state == :expr_mid then
|
1081
|
+
:tAMPER
|
1082
|
+
else
|
1083
|
+
:tAMPER2
|
1084
|
+
end
|
1085
|
+
|
1086
|
+
self.fix_arg_lex_state
|
1087
|
+
self.yacc_value = "&"
|
1088
|
+
return result
|
1089
|
+
end
|
1090
|
+
elsif src.scan(/\//) then
|
1091
|
+
if lex_state == :expr_beg || lex_state == :expr_mid then
|
1092
|
+
self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
|
1093
|
+
self.yacc_value = "/"
|
1094
|
+
return :tREGEXP_BEG
|
1095
|
+
end
|
2306
1096
|
|
2307
|
-
|
2308
|
-
|
1097
|
+
if src.scan(/\=/) then
|
1098
|
+
self.yacc_value = "/"
|
1099
|
+
self.lex_state = :expr_beg
|
1100
|
+
return :tOP_ASGN
|
1101
|
+
end
|
2309
1102
|
|
2310
|
-
|
1103
|
+
if lex_state.is_argument && space_seen then
|
1104
|
+
unless src.scan(/\s/) then
|
1105
|
+
arg_ambiguous
|
1106
|
+
self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
|
1107
|
+
self.yacc_value = "/"
|
1108
|
+
return :tREGEXP_BEG
|
1109
|
+
end
|
1110
|
+
end
|
2311
1111
|
|
2312
|
-
|
2313
|
-
|
1112
|
+
self.fix_arg_lex_state
|
1113
|
+
self.yacc_value = "/"
|
2314
1114
|
|
2315
|
-
|
1115
|
+
return :tDIVIDE
|
1116
|
+
elsif src.scan(/\^=/) then
|
1117
|
+
self.lex_state = :expr_beg
|
1118
|
+
self.yacc_value = "^"
|
1119
|
+
return :tOP_ASGN
|
1120
|
+
elsif src.scan(/\^/) then
|
1121
|
+
self.fix_arg_lex_state
|
1122
|
+
self.yacc_value = "^"
|
1123
|
+
return :tCARET
|
1124
|
+
elsif src.scan(/\;/) then
|
1125
|
+
self.command_start = true
|
1126
|
+
self.lex_state = :expr_beg
|
1127
|
+
self.yacc_value = ";"
|
1128
|
+
return :tSEMI
|
1129
|
+
elsif src.scan(/\~/) then
|
1130
|
+
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1131
|
+
src.scan(/@/)
|
1132
|
+
end
|
2316
1133
|
|
2317
|
-
|
2318
|
-
|
2319
|
-
attr_accessor :name, :id, :state
|
2320
|
-
def initialize(name, id=[], state=nil)
|
2321
|
-
@name = name
|
2322
|
-
@id = id
|
2323
|
-
@state = state
|
2324
|
-
end
|
1134
|
+
self.fix_arg_lex_state
|
1135
|
+
self.yacc_value = "~"
|
2325
1136
|
|
2326
|
-
|
2327
|
-
|
2328
|
-
|
1137
|
+
return :tTILDE
|
1138
|
+
elsif src.scan(/\\/) then
|
1139
|
+
if src.scan(/\n/) then
|
1140
|
+
self.lineno = nil
|
1141
|
+
space_seen = true
|
1142
|
+
next
|
1143
|
+
end
|
1144
|
+
rb_compile_error "bare backslash only allowed before newline"
|
1145
|
+
elsif src.scan(/\%/) then
|
1146
|
+
if lex_state == :expr_beg || lex_state == :expr_mid then
|
1147
|
+
return parse_quote
|
1148
|
+
end
|
2329
1149
|
|
2330
|
-
|
2331
|
-
|
2332
|
-
|
2333
|
-
|
1150
|
+
if src.scan(/\=/) then
|
1151
|
+
self.lex_state = :expr_beg
|
1152
|
+
self.yacc_value = "%"
|
1153
|
+
return :tOP_ASGN
|
1154
|
+
end
|
2334
1155
|
|
2335
|
-
|
2336
|
-
|
2337
|
-
|
2338
|
-
MIN_HASH_VALUE = 6
|
2339
|
-
MAX_HASH_VALUE = 55
|
2340
|
-
# maximum key range = 50, duplicates = 0
|
2341
|
-
|
2342
|
-
def self.hash_keyword(str, len)
|
2343
|
-
hval = len
|
2344
|
-
|
2345
|
-
asso_values = [
|
2346
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2347
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2348
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2349
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2350
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2351
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2352
|
-
56, 56, 56, 11, 56, 56, 36, 56, 1, 37,
|
2353
|
-
31, 1, 56, 56, 56, 56, 29, 56, 1, 56,
|
2354
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2355
|
-
56, 56, 56, 56, 56, 1, 56, 32, 1, 2,
|
2356
|
-
1, 1, 4, 23, 56, 17, 56, 20, 9, 2,
|
2357
|
-
9, 26, 14, 56, 5, 1, 1, 16, 56, 21,
|
2358
|
-
20, 9, 56, 56, 56, 56, 56, 56, 56, 56,
|
2359
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2360
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2361
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2362
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2363
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2364
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2365
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2366
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2367
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2368
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2369
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2370
|
-
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2371
|
-
56, 56, 56, 56, 56, 56
|
2372
|
-
]
|
2373
|
-
|
2374
|
-
case hval
|
2375
|
-
when 2, 1 then
|
2376
|
-
hval += asso_values[str[0]]
|
2377
|
-
else
|
2378
|
-
hval += asso_values[str[2]]
|
2379
|
-
hval += asso_values[str[0]]
|
2380
|
-
end
|
1156
|
+
if lex_state.is_argument && space_seen && ! src.check(/\s/) then
|
1157
|
+
return parse_quote
|
1158
|
+
end
|
2381
1159
|
|
2382
|
-
|
2383
|
-
|
2384
|
-
|
1160
|
+
self.fix_arg_lex_state
|
1161
|
+
self.yacc_value = "%"
|
1162
|
+
|
1163
|
+
return :tPERCENT
|
1164
|
+
elsif src.check(/\$/) then
|
1165
|
+
if src.scan(/(\$_)(\w+)/) then
|
1166
|
+
self.lex_state = :expr_end
|
1167
|
+
self.token = src.matched
|
1168
|
+
return process_token(command_state)
|
1169
|
+
elsif src.scan(/\$_/) then
|
1170
|
+
self.lex_state = :expr_end
|
1171
|
+
self.token = src.matched
|
1172
|
+
self.yacc_value = src.matched
|
1173
|
+
return :tGVAR
|
1174
|
+
elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
|
1175
|
+
self.lex_state = :expr_end
|
1176
|
+
self.yacc_value = src.matched
|
1177
|
+
return :tGVAR
|
1178
|
+
elsif src.scan(/\$([\&\`\'\+])/) then
|
1179
|
+
self.lex_state = :expr_end
|
1180
|
+
# Explicit reference to these vars as symbols...
|
1181
|
+
if last_state == :expr_fname then
|
1182
|
+
self.yacc_value = src.matched
|
1183
|
+
return :tGVAR
|
1184
|
+
else
|
1185
|
+
self.yacc_value = src[1].to_sym
|
1186
|
+
return :tBACK_REF
|
1187
|
+
end
|
1188
|
+
elsif src.scan(/\$([1-9]\d*)/) then
|
1189
|
+
self.lex_state = :expr_end
|
1190
|
+
if last_state == :expr_fname then
|
1191
|
+
self.yacc_value = src.matched
|
1192
|
+
return :tGVAR
|
1193
|
+
else
|
1194
|
+
self.yacc_value = src[1].to_i
|
1195
|
+
return :tNTH_REF
|
1196
|
+
end
|
1197
|
+
elsif src.scan(/\$0/) then
|
1198
|
+
self.lex_state = :expr_end
|
1199
|
+
self.token = src.matched
|
1200
|
+
return process_token(command_state)
|
1201
|
+
elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
|
1202
|
+
self.lex_state = :expr_end
|
1203
|
+
self.yacc_value = "$"
|
1204
|
+
return "$"
|
1205
|
+
elsif src.scan(/\$\w+/)
|
1206
|
+
self.lex_state = :expr_end
|
1207
|
+
self.token = src.matched
|
1208
|
+
return process_token(command_state)
|
1209
|
+
end
|
1210
|
+
elsif src.check(/\_/) then
|
1211
|
+
if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
|
1212
|
+
self.lineno = nil
|
1213
|
+
return RubyLexer::EOF
|
1214
|
+
elsif src.scan(/\_\w*/) then
|
1215
|
+
self.token = src.matched
|
1216
|
+
return process_token(command_state)
|
1217
|
+
end
|
1218
|
+
end
|
1219
|
+
end # END OF CASE
|
2385
1220
|
|
2386
|
-
|
2387
|
-
|
2388
|
-
|
2389
|
-
|
2390
|
-
|
2391
|
-
|
2392
|
-
# :expr_mid = newline significant, +/- is a operator.
|
2393
|
-
# :expr_fname = ignore newline, no reserved words.
|
2394
|
-
# :expr_dot = right after . or ::, no reserved words.
|
2395
|
-
# :expr_class = immediate after class, no here document.
|
2396
|
-
|
2397
|
-
def self.keyword(str, len = str.size)
|
2398
|
-
wordlist = [
|
2399
|
-
[""], [""], [""], [""], [""], [""],
|
2400
|
-
["end", [:kEND, :kEND ], :expr_end ],
|
2401
|
-
["else", [:kELSE, :kELSE ], :expr_beg ],
|
2402
|
-
["case", [:kCASE, :kCASE ], :expr_beg ],
|
2403
|
-
["ensure", [:kENSURE, :kENSURE ], :expr_beg ],
|
2404
|
-
["module", [:kMODULE, :kMODULE ], :expr_beg ],
|
2405
|
-
["elsif", [:kELSIF, :kELSIF ], :expr_beg ],
|
2406
|
-
["def", [:kDEF, :kDEF ], :expr_fname ],
|
2407
|
-
["rescue", [:kRESCUE, :kRESCUE_MOD ], :expr_mid ],
|
2408
|
-
["not", [:kNOT, :kNOT ], :expr_beg ],
|
2409
|
-
["then", [:kTHEN, :kTHEN ], :expr_beg ],
|
2410
|
-
["yield", [:kYIELD, :kYIELD ], :expr_arg ],
|
2411
|
-
["for", [:kFOR, :kFOR ], :expr_beg ],
|
2412
|
-
["self", [:kSELF, :kSELF ], :expr_end ],
|
2413
|
-
["false", [:kFALSE, :kFALSE ], :expr_end ],
|
2414
|
-
["retry", [:kRETRY, :kRETRY ], :expr_end ],
|
2415
|
-
["return", [:kRETURN, :kRETURN ], :expr_mid ],
|
2416
|
-
["true", [:kTRUE, :kTRUE ], :expr_end ],
|
2417
|
-
["if", [:kIF, :kIF_MOD ], :expr_beg ],
|
2418
|
-
["defined?", [:kDEFINED, :kDEFINED ], :expr_arg ],
|
2419
|
-
["super", [:kSUPER, :kSUPER ], :expr_arg ],
|
2420
|
-
["undef", [:kUNDEF, :kUNDEF ], :expr_fname ],
|
2421
|
-
["break", [:kBREAK, :kBREAK ], :expr_mid ],
|
2422
|
-
["in", [:kIN, :kIN ], :expr_beg ],
|
2423
|
-
["do", [:kDO, :kDO ], :expr_beg ],
|
2424
|
-
["nil", [:kNIL, :kNIL ], :expr_end ],
|
2425
|
-
["until", [:kUNTIL, :kUNTIL_MOD ], :expr_beg ],
|
2426
|
-
["unless", [:kUNLESS, :kUNLESS_MOD ], :expr_beg ],
|
2427
|
-
["or", [:kOR, :kOR ], :expr_beg ],
|
2428
|
-
["next", [:kNEXT, :kNEXT ], :expr_mid ],
|
2429
|
-
["when", [:kWHEN, :kWHEN ], :expr_beg ],
|
2430
|
-
["redo", [:kREDO, :kREDO ], :expr_end ],
|
2431
|
-
["and", [:kAND, :kAND ], :expr_beg ],
|
2432
|
-
["begin", [:kBEGIN, :kBEGIN ], :expr_beg ],
|
2433
|
-
["__LINE__", [:k__LINE__, :k__LINE__ ], :expr_end ],
|
2434
|
-
["class", [:kCLASS, :kCLASS ], :expr_class ],
|
2435
|
-
["__FILE__", [:k__FILE__, :k__FILE__ ], :expr_end ],
|
2436
|
-
["END", [:klEND, :klEND ], :expr_end ],
|
2437
|
-
["BEGIN", [:klBEGIN, :klBEGIN ], :expr_end ],
|
2438
|
-
["while", [:kWHILE, :kWHILE_MOD ], :expr_beg ],
|
2439
|
-
[""], [""], [""], [""], [""], [""], [""], [""], [""],
|
2440
|
-
[""],
|
2441
|
-
["alias", [:kALIAS, :kALIAS ], :expr_fname ],
|
2442
|
-
].map { |args| KWtable.new(*args) }
|
2443
|
-
|
2444
|
-
if len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH then
|
2445
|
-
key = hash_keyword(str, len)
|
2446
|
-
if key <= MAX_HASH_VALUE && key >= 0 then
|
2447
|
-
s = wordlist[key].name
|
2448
|
-
return wordlist[key] if str == s
|
1221
|
+
if src.scan(/\004|\032|\000/) || src.eos? then # ^D, ^Z, EOF
|
1222
|
+
return RubyLexer::EOF
|
1223
|
+
else # alpha check
|
1224
|
+
if src.scan(/\W/) then
|
1225
|
+
rb_compile_error "Invalid char #{src.matched.inspect} in expression"
|
1226
|
+
end
|
2449
1227
|
end
|
2450
|
-
end
|
2451
|
-
|
2452
|
-
return nil
|
2453
|
-
end
|
2454
|
-
end
|
2455
|
-
|
2456
|
-
class Environment
|
2457
|
-
attr_reader :env, :dyn
|
2458
|
-
attr_accessor :init
|
2459
1228
|
|
2460
|
-
|
2461
|
-
@dyn = []
|
2462
|
-
@env = []
|
2463
|
-
@use = []
|
2464
|
-
@init = false
|
2465
|
-
self.extend
|
2466
|
-
end
|
1229
|
+
self.token = src.matched if self.src.scan(/\w+/)
|
2467
1230
|
|
2468
|
-
|
2469
|
-
@env.each_with_index do |env, i|
|
2470
|
-
if env[id] then
|
2471
|
-
@use[i][id] = true
|
2472
|
-
end
|
1231
|
+
return process_token(command_state)
|
2473
1232
|
end
|
2474
1233
|
end
|
2475
1234
|
|
2476
|
-
def
|
2477
|
-
idx = @dyn.index false # REFACTOR
|
2478
|
-
u = @use[0...idx].reverse.inject { |env, scope| env.merge scope } || {}
|
2479
|
-
u[id]
|
2480
|
-
end
|
2481
|
-
|
2482
|
-
def [] k
|
2483
|
-
self.all[k]
|
2484
|
-
end
|
2485
|
-
|
2486
|
-
def []= k, v
|
2487
|
-
raise "no" if v == true
|
2488
|
-
self.current[k] = v
|
2489
|
-
end
|
2490
|
-
|
2491
|
-
def has_key? k
|
2492
|
-
self.all.has_key? k
|
2493
|
-
end
|
2494
|
-
|
2495
|
-
def all
|
2496
|
-
idx = @dyn.index false
|
2497
|
-
@env[0..idx].reverse.inject { |env, scope| env.merge scope }
|
2498
|
-
end
|
2499
|
-
|
2500
|
-
def dynamic
|
2501
|
-
idx = @dyn.index false
|
2502
|
-
@env[0...idx].reverse.inject { |env, scope| env.merge scope } || {}
|
2503
|
-
end
|
2504
|
-
|
2505
|
-
def current
|
2506
|
-
@env.first
|
2507
|
-
end
|
2508
|
-
|
2509
|
-
def dynamic?
|
2510
|
-
@dyn[0] != false
|
2511
|
-
end
|
2512
|
-
|
2513
|
-
def dasgn_curr? name # TODO: I think this is wrong - nuke
|
2514
|
-
(! has_key?(name) && dynamic?) || current.has_key?(name)
|
2515
|
-
end
|
2516
|
-
|
2517
|
-
def extend dyn = false
|
2518
|
-
@dyn.unshift dyn
|
2519
|
-
@env.unshift({})
|
2520
|
-
@use.unshift({})
|
2521
|
-
end
|
2522
|
-
|
2523
|
-
def unextend
|
2524
|
-
@dyn.shift
|
2525
|
-
@env.shift
|
2526
|
-
@use.shift
|
2527
|
-
raise "You went too far unextending env" if @env.empty?
|
2528
|
-
end
|
2529
|
-
end
|
2530
|
-
|
2531
|
-
class StackState
|
2532
|
-
attr_reader :stack
|
2533
|
-
|
2534
|
-
def inspect
|
2535
|
-
"StackState(#{@name}, #{@stack.inspect})"
|
2536
|
-
end
|
2537
|
-
|
2538
|
-
def initialize(name)
|
2539
|
-
@name = name
|
2540
|
-
@stack = [false]
|
2541
|
-
end
|
2542
|
-
|
2543
|
-
def pop
|
2544
|
-
# raise "#{@name} empty" if @stack.size <= 1
|
2545
|
-
r = @stack.pop
|
2546
|
-
@stack.push false if @stack.size == 0
|
2547
|
-
r
|
2548
|
-
end
|
2549
|
-
|
2550
|
-
def lexpop
|
2551
|
-
raise if @stack.size == 0
|
2552
|
-
a = @stack.pop
|
2553
|
-
b = @stack.pop
|
2554
|
-
@stack.push(a || b)
|
2555
|
-
end
|
2556
|
-
|
2557
|
-
def push val
|
2558
|
-
raise if val != true and val != false
|
2559
|
-
@stack.push val
|
2560
|
-
end
|
2561
|
-
|
2562
|
-
def is_in_state
|
2563
|
-
@stack.last
|
2564
|
-
end
|
2565
|
-
end
|
2566
|
-
|
2567
|
-
def t str
|
2568
|
-
Token.new str
|
2569
|
-
end
|
2570
|
-
|
2571
|
-
class Token # TODO: nuke this and use sexps
|
2572
|
-
attr_accessor :args
|
2573
|
-
def initialize(token)
|
2574
|
-
@args = Array(token)
|
2575
|
-
end
|
2576
|
-
|
2577
|
-
def value # TODO: eventually phase this out (or make it official)
|
2578
|
-
self.args.first
|
2579
|
-
end
|
2580
|
-
|
2581
|
-
def first # HACK
|
2582
|
-
self.args.first
|
2583
|
-
end
|
2584
|
-
|
2585
|
-
def inspect
|
2586
|
-
"t(#{args.join.inspect})"
|
2587
|
-
end
|
2588
|
-
|
2589
|
-
def to_sym
|
2590
|
-
self.value.to_sym
|
2591
|
-
end
|
2592
|
-
|
2593
|
-
def == o
|
2594
|
-
Token === o and self.args == o.args
|
2595
|
-
end
|
2596
|
-
end
|
1235
|
+
def process_token(command_state)
|
2597
1236
|
|
2598
|
-
|
2599
|
-
# HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
|
1237
|
+
token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/)
|
2600
1238
|
|
2601
|
-
|
2602
|
-
|
2603
|
-
return self == :expr_arg || self == :expr_cmdarg
|
2604
|
-
end
|
2605
|
-
end
|
1239
|
+
result = nil
|
1240
|
+
last_state = lex_state
|
2606
1241
|
|
2607
|
-
class StringIO # HACK: everything in here is a hack
|
2608
|
-
attr_accessor :begin_of_line, :was_begin_of_line
|
2609
|
-
alias :begin_of_line? :begin_of_line
|
2610
|
-
alias :read_all :read
|
2611
|
-
|
2612
|
-
alias :old_initialize :initialize
|
2613
|
-
|
2614
|
-
def initialize(*args)
|
2615
|
-
self.begin_of_line = true
|
2616
|
-
self.was_begin_of_line = false
|
2617
|
-
old_initialize(*args)
|
2618
|
-
@original_string = self.string.dup
|
2619
|
-
end
|
2620
1242
|
|
2621
|
-
|
2622
|
-
|
2623
|
-
|
1243
|
+
case token
|
1244
|
+
when /^\$/ then
|
1245
|
+
self.lex_state, result = :expr_end, :tGVAR
|
1246
|
+
when /^@@/ then
|
1247
|
+
self.lex_state, result = :expr_end, :tCVAR
|
1248
|
+
when /^@/ then
|
1249
|
+
self.lex_state, result = :expr_end, :tIVAR
|
1250
|
+
else
|
1251
|
+
if token =~ /[!?]$/ then
|
1252
|
+
result = :tFID
|
1253
|
+
else
|
1254
|
+
if lex_state == :expr_fname then
|
1255
|
+
# ident=, not =~ => == or followed by =>
|
1256
|
+
# TODO test lexing of a=>b vs a==>b
|
1257
|
+
if src.scan(/=(?:(?![~>=])|(?==>))/) then
|
1258
|
+
result = :tIDENTIFIER
|
1259
|
+
token << src.matched
|
1260
|
+
end
|
1261
|
+
end
|
2624
1262
|
|
2625
|
-
|
2626
|
-
|
2627
|
-
|
1263
|
+
result ||= if token =~ /^[A-Z]/ then
|
1264
|
+
:tCONSTANT
|
1265
|
+
else
|
1266
|
+
:tIDENTIFIER
|
1267
|
+
end
|
1268
|
+
end
|
2628
1269
|
|
2629
|
-
|
2630
|
-
|
1270
|
+
unless lex_state == :expr_dot then
|
1271
|
+
# See if it is a reserved word.
|
1272
|
+
keyword = Keyword.keyword token
|
1273
|
+
|
1274
|
+
if keyword then
|
1275
|
+
state = lex_state
|
1276
|
+
self.lex_state = keyword.state
|
1277
|
+
self.yacc_value = token
|
1278
|
+
|
1279
|
+
if keyword.id0 == :kDO then
|
1280
|
+
self.command_start = true
|
1281
|
+
return :kDO_COND if cond.is_in_state
|
1282
|
+
return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
|
1283
|
+
return :kDO_BLOCK if state == :expr_endarg
|
1284
|
+
return :kDO
|
1285
|
+
end
|
2631
1286
|
|
2632
|
-
|
2633
|
-
d = self.getc
|
2634
|
-
self.ungetc d if d and d != ?\n
|
2635
|
-
c = ?\n
|
2636
|
-
end
|
2637
|
-
|
2638
|
-
self.was_begin_of_line = self.begin_of_line
|
2639
|
-
self.begin_of_line = c == ?\n
|
2640
|
-
if c and c != 0 then
|
2641
|
-
c.chr
|
2642
|
-
else
|
2643
|
-
::RubyLexer::EOF
|
2644
|
-
end
|
2645
|
-
end
|
1287
|
+
return keyword.id0 if state == :expr_beg
|
2646
1288
|
|
2647
|
-
|
2648
|
-
buffer = []
|
1289
|
+
self.lex_state = :expr_beg if keyword.id0 != keyword.id1
|
2649
1290
|
|
2650
|
-
|
2651
|
-
while c = self.read do
|
2652
|
-
if c !~ /\s/ or c == "\n" or c == "\r" then
|
2653
|
-
self.unread c
|
2654
|
-
break
|
1291
|
+
return keyword.id1
|
2655
1292
|
end
|
2656
|
-
buffer << c
|
2657
1293
|
end
|
2658
|
-
end
|
2659
1294
|
|
2660
|
-
|
2661
|
-
|
2662
|
-
|
2663
|
-
|
2664
|
-
|
2665
|
-
|
2666
|
-
|
1295
|
+
if (lex_state == :expr_beg || lex_state == :expr_mid ||
|
1296
|
+
lex_state == :expr_dot || lex_state == :expr_arg ||
|
1297
|
+
lex_state == :expr_cmdarg) then
|
1298
|
+
if command_state then
|
1299
|
+
self.lex_state = :expr_cmdarg
|
1300
|
+
else
|
1301
|
+
self.lex_state = :expr_arg
|
1302
|
+
end
|
1303
|
+
else
|
1304
|
+
self.lex_state = :expr_end
|
2667
1305
|
end
|
2668
1306
|
end
|
2669
1307
|
|
2670
|
-
|
2671
|
-
end
|
2672
|
-
|
2673
|
-
def read_line
|
2674
|
-
self.begin_of_line = true
|
2675
|
-
self.was_begin_of_line = false
|
2676
|
-
gets.sub(/\r\n?$/, "\n") # HACK
|
2677
|
-
end
|
2678
|
-
|
2679
|
-
def peek expected = nil # FIX: barf
|
2680
|
-
c = self.getc
|
2681
|
-
return RubyLexer::EOF if c.nil?
|
2682
|
-
self.ungetc c if c
|
2683
|
-
c = c.chr if c
|
2684
|
-
if expected then
|
2685
|
-
c == expected
|
2686
|
-
else
|
2687
|
-
c
|
2688
|
-
end
|
2689
|
-
end
|
2690
|
-
|
2691
|
-
def unread(c)
|
2692
|
-
return if c.nil? # UGH
|
2693
|
-
|
2694
|
-
# HACK: only depth is 2... who cares? really I want to remove all of this
|
2695
|
-
self.begin_of_line = self.was_begin_of_line || true
|
2696
|
-
self.was_begin_of_line = nil
|
2697
|
-
|
2698
|
-
c = c[0] if String === c
|
2699
|
-
self.ungetc c
|
2700
|
-
end
|
1308
|
+
self.yacc_value = token
|
2701
1309
|
|
2702
|
-
def unread_many str
|
2703
|
-
str.split(//).reverse.each do |c|
|
2704
|
-
unread c
|
2705
|
-
end
|
2706
|
-
end
|
2707
|
-
end
|
2708
1310
|
|
2709
|
-
|
2710
|
-
|
1311
|
+
self.lex_state = :expr_end if
|
1312
|
+
last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar
|
2711
1313
|
|
2712
|
-
|
2713
|
-
@paren ||= false
|
1314
|
+
return result
|
2714
1315
|
end
|
2715
1316
|
|
2716
|
-
def
|
2717
|
-
|
2718
|
-
|
2719
|
-
|
1317
|
+
def yylex_string # 23 lines
|
1318
|
+
token = if lex_strterm[0] == :heredoc then
|
1319
|
+
self.heredoc lex_strterm
|
1320
|
+
else
|
1321
|
+
self.parse_string lex_strterm
|
1322
|
+
end
|
2720
1323
|
|
2721
|
-
|
2722
|
-
|
2723
|
-
|
1324
|
+
if token == :tSTRING_END || token == :tREGEXP_END then
|
1325
|
+
self.lineno = nil
|
1326
|
+
self.lex_strterm = nil
|
1327
|
+
self.lex_state = :expr_end
|
1328
|
+
end
|
2724
1329
|
|
2725
|
-
|
2726
|
-
first
|
1330
|
+
return token
|
2727
1331
|
end
|
2728
|
-
|
2729
|
-
kill :add, :add_all
|
2730
|
-
end
|
2731
|
-
|
2732
|
-
def bitch
|
2733
|
-
c = caller
|
2734
|
-
m = c[0].split.last
|
2735
|
-
warn "bitch: you shouldn't be doing #{m}: from #{c[1]}"
|
2736
1332
|
end
|
2737
|
-
|
2738
|
-
# class NilClass
|
2739
|
-
# def method_missing msg, *args
|
2740
|
-
# c = caller
|
2741
|
-
# warn "called #{msg} on nil (args = #{args.inspect}): from #{c[0]}"
|
2742
|
-
# nil
|
2743
|
-
# end
|
2744
|
-
# end
|
2745
|
-
|
2746
|
-
# def d s
|
2747
|
-
# warn s.inspect
|
2748
|
-
# end
|
2749
|
-
|
2750
|
-
# END HACK
|
2751
|
-
############################################################
|