ruby_parser 3.2.2 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/.autotest +0 -2
- data/History.txt +38 -0
- data/Rakefile +2 -6
- data/lib/ruby18_parser.rb +2003 -2010
- data/lib/ruby18_parser.y +6 -8
- data/lib/ruby19_parser.rb +2016 -2004
- data/lib/ruby19_parser.y +6 -8
- data/lib/ruby20_parser.rb +2189 -2154
- data/lib/ruby20_parser.y +21 -13
- data/lib/ruby_lexer.rb +645 -812
- data/lib/ruby_parser_extras.rb +17 -46
- data/test/test_ruby_lexer.rb +1280 -1108
- data/test/test_ruby_parser.rb +101 -1
- data/test/test_ruby_parser_extras.rb +1 -63
- metadata +112 -133
- metadata.gz.sig +0 -0
data/lib/ruby20_parser.y
CHANGED
@@ -263,7 +263,7 @@ rule
|
|
263
263
|
}
|
264
264
|
opt_block_param
|
265
265
|
{
|
266
|
-
result = self.env.dynamic.keys
|
266
|
+
result = nil # self.env.dynamic.keys
|
267
267
|
}
|
268
268
|
compstmt tRCURLY
|
269
269
|
{
|
@@ -785,7 +785,6 @@ rule
|
|
785
785
|
}
|
786
786
|
| arg tEH arg opt_nl tCOLON arg
|
787
787
|
{
|
788
|
-
lexer.tern.pop
|
789
788
|
result = s(:if, val[0], val[2], val[5])
|
790
789
|
}
|
791
790
|
| primary
|
@@ -1136,7 +1135,7 @@ rule
|
|
1136
1135
|
}
|
1137
1136
|
| kDEF fname
|
1138
1137
|
{
|
1139
|
-
result =
|
1138
|
+
result = self.in_def
|
1140
1139
|
|
1141
1140
|
self.comments.push self.lexer.comments
|
1142
1141
|
self.in_def = true
|
@@ -1144,10 +1143,9 @@ rule
|
|
1144
1143
|
}
|
1145
1144
|
f_arglist bodystmt kEND
|
1146
1145
|
{
|
1147
|
-
|
1146
|
+
in_def = val[2]
|
1148
1147
|
|
1149
1148
|
result = new_defn val
|
1150
|
-
result[2].line line
|
1151
1149
|
|
1152
1150
|
self.env.unextend
|
1153
1151
|
self.in_def = in_def
|
@@ -1429,7 +1427,9 @@ opt_block_args_tail: tCOMMA block_args_tail
|
|
1429
1427
|
| f_bad_arg
|
1430
1428
|
|
1431
1429
|
lambda: {
|
1432
|
-
|
1430
|
+
self.env.extend :dynamic
|
1431
|
+
result = self.lexer.lineno
|
1432
|
+
|
1433
1433
|
result = lexer.lpar_beg
|
1434
1434
|
lexer.paren_nest += 1
|
1435
1435
|
lexer.lpar_beg = lexer.paren_nest
|
@@ -1443,6 +1443,7 @@ opt_block_args_tail: tCOMMA block_args_tail
|
|
1443
1443
|
|
1444
1444
|
call = new_call nil, :lambda
|
1445
1445
|
result = new_iter call, args, body
|
1446
|
+
self.env.unextend
|
1446
1447
|
}
|
1447
1448
|
|
1448
1449
|
f_larglist: tLPAREN2 f_args opt_bv_decl rparen
|
@@ -1470,7 +1471,7 @@ opt_block_args_tail: tCOMMA block_args_tail
|
|
1470
1471
|
}
|
1471
1472
|
opt_block_param
|
1472
1473
|
{
|
1473
|
-
result = self.env.dynamic.keys
|
1474
|
+
result = nil # self.env.dynamic.keys
|
1474
1475
|
}
|
1475
1476
|
compstmt kEND
|
1476
1477
|
{
|
@@ -1556,7 +1557,7 @@ opt_block_args_tail: tCOMMA block_args_tail
|
|
1556
1557
|
}
|
1557
1558
|
opt_block_param
|
1558
1559
|
{
|
1559
|
-
result = self.env.dynamic.keys
|
1560
|
+
result = nil # self.env.dynamic.keys
|
1560
1561
|
}
|
1561
1562
|
compstmt tRCURLY
|
1562
1563
|
{
|
@@ -1574,7 +1575,7 @@ opt_block_args_tail: tCOMMA block_args_tail
|
|
1574
1575
|
}
|
1575
1576
|
opt_block_param
|
1576
1577
|
{
|
1577
|
-
result = self.env.dynamic.keys
|
1578
|
+
result = nil # self.env.dynamic.keys
|
1578
1579
|
}
|
1579
1580
|
compstmt kEND
|
1580
1581
|
{
|
@@ -1972,13 +1973,14 @@ keyword_variable: kNIL { result = s(:nil) }
|
|
1972
1973
|
f_arglist: tLPAREN2 f_args rparen
|
1973
1974
|
{
|
1974
1975
|
result = val[1]
|
1975
|
-
lexer.lex_state = :expr_beg
|
1976
|
+
self.lexer.lex_state = :expr_beg
|
1976
1977
|
self.lexer.command_start = true
|
1977
1978
|
}
|
1978
1979
|
| f_args term
|
1979
1980
|
{
|
1980
|
-
self.lexer.lex_state = :expr_beg
|
1981
1981
|
result = val[0]
|
1982
|
+
self.lexer.lex_state = :expr_beg
|
1983
|
+
self.lexer.command_start = true
|
1982
1984
|
}
|
1983
1985
|
|
1984
1986
|
args_tail: f_kwarg tCOMMA f_kwrest opt_f_block_arg
|
@@ -2127,14 +2129,20 @@ keyword_variable: kNIL { result = s(:nil) }
|
|
2127
2129
|
{
|
2128
2130
|
# TODO: call_args
|
2129
2131
|
label, _ = val[0] # TODO: fix lineno?
|
2130
|
-
|
2132
|
+
identifier = label.to_sym
|
2133
|
+
self.env[identifier] = :lvar
|
2134
|
+
|
2135
|
+
result = s(:array, s(:kwarg, identifier, val[1]))
|
2131
2136
|
}
|
2132
2137
|
|
2133
2138
|
f_block_kw: tLABEL primary_value
|
2134
2139
|
{
|
2135
2140
|
# TODO: call_args
|
2136
2141
|
label, _ = val[0] # TODO: fix lineno?
|
2137
|
-
|
2142
|
+
identifier = label.to_sym
|
2143
|
+
self.env[identifier] = :lvar
|
2144
|
+
|
2145
|
+
result = s(:array, s(:kwarg, identifier, val[1]))
|
2138
2146
|
}
|
2139
2147
|
|
2140
2148
|
f_block_kwarg: f_block_kw
|
data/lib/ruby_lexer.rb
CHANGED
@@ -5,61 +5,21 @@ class RubyLexer
|
|
5
5
|
# :stopdoc:
|
6
6
|
RUBY19 = "".respond_to? :encoding
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
IDENT_RE = /^#{IDENT_CHAR_RE}+/o
|
15
|
-
|
16
|
-
attr_accessor :command_start
|
17
|
-
attr_accessor :cmdarg
|
18
|
-
attr_accessor :cond
|
19
|
-
attr_accessor :tern # TODO: rename ternary damnit... wtf
|
20
|
-
attr_accessor :string_nest
|
21
|
-
|
22
|
-
ESC_RE = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|[^0-7xMCc]))/u
|
23
|
-
# :startdoc:
|
24
|
-
|
25
|
-
##
|
26
|
-
# What version of ruby to parse. 18 and 19 are the only valid values
|
27
|
-
# currently supported.
|
28
|
-
|
29
|
-
attr_accessor :version
|
30
|
-
|
31
|
-
# Additional context surrounding tokens that both the lexer and
|
32
|
-
# grammar use.
|
33
|
-
attr_reader :lex_state
|
34
|
-
|
35
|
-
attr_accessor :lex_strterm
|
36
|
-
|
37
|
-
attr_accessor :parser # HACK for very end of lexer... *sigh*
|
38
|
-
|
39
|
-
# Stream of data that yylex examines.
|
40
|
-
attr_reader :src
|
41
|
-
|
42
|
-
# Last token read via yylex.
|
43
|
-
attr_accessor :token
|
44
|
-
|
45
|
-
attr_accessor :string_buffer
|
46
|
-
|
47
|
-
# Value of last token which had a value associated with it.
|
48
|
-
attr_accessor :yacc_value
|
49
|
-
|
50
|
-
# What handles warnings
|
51
|
-
attr_accessor :warnings
|
8
|
+
IDENT_CHAR = if RUBY19 then
|
9
|
+
/[\w\u0080-\u{10ffff}]/u
|
10
|
+
else
|
11
|
+
/[\w\x80-\xFF]/n
|
12
|
+
end
|
52
13
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
14
|
+
IDENT = /^#{IDENT_CHAR}+/o
|
15
|
+
ESC = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]+|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/u
|
16
|
+
SIMPLE_STRING = /(#{ESC}|#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*/o
|
17
|
+
SIMPLE_SSTRING = /(\\.|[^\'])*/
|
57
18
|
|
58
19
|
EOF = :eof_haha!
|
59
20
|
|
60
21
|
# ruby constants for strings (should this be moved somewhere else?)
|
61
22
|
|
62
|
-
# :stopdoc:
|
63
23
|
STR_FUNC_BORING = 0x00
|
64
24
|
STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
|
65
25
|
STR_FUNC_EXPAND = 0x02
|
@@ -75,6 +35,22 @@ class RubyLexer
|
|
75
35
|
STR_SSYM = STR_FUNC_SYMBOL
|
76
36
|
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
77
37
|
|
38
|
+
ESCAPES = {
|
39
|
+
"a" => "\007",
|
40
|
+
"b" => "\010",
|
41
|
+
"e" => "\033",
|
42
|
+
"f" => "\f",
|
43
|
+
"n" => "\n",
|
44
|
+
"r" => "\r",
|
45
|
+
"s" => " ",
|
46
|
+
"t" => "\t",
|
47
|
+
"v" => "\13",
|
48
|
+
"\\" => '\\',
|
49
|
+
"\n" => "",
|
50
|
+
"C-\?" => 127.chr,
|
51
|
+
"c\?" => 127.chr,
|
52
|
+
}
|
53
|
+
|
78
54
|
TOKENS = {
|
79
55
|
"!" => :tBANG,
|
80
56
|
"!=" => :tNEQ,
|
@@ -90,8 +66,58 @@ class RubyLexer
|
|
90
66
|
"=~" => :tMATCH,
|
91
67
|
"->" => :tLAMBDA,
|
92
68
|
}
|
69
|
+
|
70
|
+
@@regexp_cache = Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }
|
71
|
+
@@regexp_cache[nil] = nil
|
72
|
+
|
93
73
|
# :startdoc:
|
94
74
|
|
75
|
+
attr_accessor :brace_nest
|
76
|
+
attr_accessor :cmdarg
|
77
|
+
attr_accessor :command_start
|
78
|
+
attr_accessor :cond
|
79
|
+
|
80
|
+
##
|
81
|
+
# Additional context surrounding tokens that both the lexer and
|
82
|
+
# grammar use.
|
83
|
+
|
84
|
+
attr_accessor :lex_state
|
85
|
+
|
86
|
+
attr_accessor :lex_strterm
|
87
|
+
attr_accessor :lpar_beg
|
88
|
+
attr_accessor :paren_nest
|
89
|
+
attr_accessor :parser # HACK for very end of lexer... *sigh*
|
90
|
+
attr_accessor :space_seen
|
91
|
+
attr_accessor :string_buffer
|
92
|
+
attr_accessor :string_nest
|
93
|
+
|
94
|
+
# Stream of data that yylex examines.
|
95
|
+
attr_reader :src
|
96
|
+
alias :ss :src
|
97
|
+
|
98
|
+
# Last token read via yylex.
|
99
|
+
attr_accessor :token
|
100
|
+
|
101
|
+
##
|
102
|
+
# What version of ruby to parse. 18 and 19 are the only valid values
|
103
|
+
# currently supported.
|
104
|
+
|
105
|
+
attr_accessor :version
|
106
|
+
|
107
|
+
# Value of last token which had a value associated with it.
|
108
|
+
attr_accessor :yacc_value
|
109
|
+
|
110
|
+
attr_writer :lineno # reader is lazy initalizer
|
111
|
+
|
112
|
+
attr_writer :comments
|
113
|
+
|
114
|
+
def initialize v = 18
|
115
|
+
self.version = v
|
116
|
+
|
117
|
+
reset
|
118
|
+
end
|
119
|
+
|
120
|
+
##
|
95
121
|
# How the parser advances to the next token.
|
96
122
|
#
|
97
123
|
# @return true if not at end of file (EOF).
|
@@ -100,7 +126,7 @@ class RubyLexer
|
|
100
126
|
r = yylex
|
101
127
|
self.token = r
|
102
128
|
|
103
|
-
raise "yylex returned nil" unless r
|
129
|
+
raise "yylex returned nil, near #{ss.rest[0,10].inspect}" unless r
|
104
130
|
|
105
131
|
return RubyLexer::EOF != r
|
106
132
|
end
|
@@ -109,28 +135,35 @@ class RubyLexer
|
|
109
135
|
self.warning("Ambiguous first argument. make sure.")
|
110
136
|
end
|
111
137
|
|
112
|
-
def
|
138
|
+
def arg_state
|
139
|
+
in_arg_state? ? :expr_arg : :expr_beg
|
140
|
+
end
|
141
|
+
|
142
|
+
def beginning_of_line?
|
143
|
+
ss.bol?
|
144
|
+
end
|
145
|
+
|
146
|
+
def check re
|
147
|
+
ss.check re
|
148
|
+
end
|
149
|
+
|
150
|
+
def comments # TODO: remove this... maybe comment_string + attr_accessor
|
113
151
|
c = @comments.join
|
114
152
|
@comments.clear
|
115
153
|
c
|
116
154
|
end
|
117
155
|
|
118
|
-
def
|
119
|
-
|
120
|
-
cmdarg.push false
|
121
|
-
self.lex_state = :expr_beg
|
122
|
-
self.yacc_value = val
|
156
|
+
def end_of_stream?
|
157
|
+
ss.eos?
|
123
158
|
end
|
124
159
|
|
125
|
-
def
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
:expr_beg
|
130
|
-
end
|
160
|
+
def expr_result token, text
|
161
|
+
cond.push false
|
162
|
+
cmdarg.push false
|
163
|
+
result :expr_beg, token, text
|
131
164
|
end
|
132
165
|
|
133
|
-
def heredoc here #
|
166
|
+
def heredoc here # TODO: rewrite / remove
|
134
167
|
_, eos, func, last_line = here
|
135
168
|
|
136
169
|
indent = (func & STR_FUNC_INDENT) != 0 ? "[ \t]*" : nil
|
@@ -138,11 +171,10 @@ class RubyLexer
|
|
138
171
|
eos_re = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
|
139
172
|
err_msg = "can't match #{eos_re.inspect} anywhere in "
|
140
173
|
|
141
|
-
rb_compile_error err_msg if
|
142
|
-
src.eos?
|
174
|
+
rb_compile_error err_msg if end_of_stream?
|
143
175
|
|
144
|
-
if
|
145
|
-
|
176
|
+
if beginning_of_line? && scan(eos_re) then
|
177
|
+
ss.unread_many last_line # TODO: figure out how to remove this
|
146
178
|
self.yacc_value = eos
|
147
179
|
return :tSTRING_END
|
148
180
|
end
|
@@ -151,14 +183,14 @@ class RubyLexer
|
|
151
183
|
|
152
184
|
if expand then
|
153
185
|
case
|
154
|
-
when
|
155
|
-
|
156
|
-
self.yacc_value =
|
186
|
+
when scan(/#[$@]/) then
|
187
|
+
ss.pos -= 1 # FIX omg stupid
|
188
|
+
self.yacc_value = matched
|
157
189
|
return :tSTRING_DVAR
|
158
|
-
when
|
159
|
-
self.yacc_value =
|
190
|
+
when scan(/#[{]/) then
|
191
|
+
self.yacc_value = matched
|
160
192
|
return :tSTRING_DBEG
|
161
|
-
when
|
193
|
+
when scan(/#/) then
|
162
194
|
string_buffer << '#'
|
163
195
|
end
|
164
196
|
|
@@ -172,34 +204,32 @@ class RubyLexer
|
|
172
204
|
self.yacc_value = string_buffer.join.delete("\r")
|
173
205
|
return :tSTRING_CONTENT
|
174
206
|
else
|
175
|
-
string_buffer <<
|
207
|
+
string_buffer << scan(/\n/)
|
176
208
|
end
|
177
209
|
|
178
|
-
rb_compile_error err_msg if
|
179
|
-
|
180
|
-
end until src.check(eos_re)
|
210
|
+
rb_compile_error err_msg if end_of_stream?
|
211
|
+
end until check(eos_re)
|
181
212
|
else
|
182
|
-
until
|
183
|
-
string_buffer <<
|
184
|
-
rb_compile_error err_msg if
|
185
|
-
src.eos?
|
213
|
+
until check(eos_re) do
|
214
|
+
string_buffer << scan(/.*(\n|\z)/)
|
215
|
+
rb_compile_error err_msg if end_of_stream?
|
186
216
|
end
|
187
217
|
end
|
188
218
|
|
189
219
|
self.lex_strterm = [:heredoc, eos, func, last_line]
|
190
|
-
self.yacc_value = string_buffer.join.delete("\r")
|
191
220
|
|
221
|
+
self.yacc_value = string_buffer.join.delete("\r")
|
192
222
|
return :tSTRING_CONTENT
|
193
223
|
end
|
194
224
|
|
195
|
-
def heredoc_identifier #
|
225
|
+
def heredoc_identifier # TODO: remove / rewrite
|
196
226
|
term, func = nil, STR_FUNC_BORING
|
197
227
|
self.string_buffer = []
|
198
228
|
|
199
229
|
case
|
200
|
-
when
|
201
|
-
term =
|
202
|
-
func |= STR_FUNC_INDENT unless
|
230
|
+
when scan(/(-?)([\'\"\`])(.*?)\2/) then
|
231
|
+
term = ss[2]
|
232
|
+
func |= STR_FUNC_INDENT unless ss[1].empty?
|
203
233
|
func |= case term
|
204
234
|
when "\'" then
|
205
235
|
STR_SQUOTE
|
@@ -208,24 +238,24 @@ class RubyLexer
|
|
208
238
|
else
|
209
239
|
STR_XQUOTE
|
210
240
|
end
|
211
|
-
string_buffer <<
|
212
|
-
when
|
241
|
+
string_buffer << ss[3]
|
242
|
+
when scan(/-?([\'\"\`])(?!\1*\Z)/) then
|
213
243
|
rb_compile_error "unterminated here document identifier"
|
214
|
-
when
|
244
|
+
when scan(/(-?)(#{IDENT_CHAR}+)/) then
|
215
245
|
term = '"'
|
216
246
|
func |= STR_DQUOTE
|
217
|
-
unless
|
247
|
+
unless ss[1].empty? then
|
218
248
|
func |= STR_FUNC_INDENT
|
219
249
|
end
|
220
|
-
string_buffer <<
|
250
|
+
string_buffer << ss[2]
|
221
251
|
else
|
222
252
|
return nil
|
223
253
|
end
|
224
254
|
|
225
|
-
if
|
255
|
+
if scan(/.*\n/) then
|
226
256
|
# TODO: think about storing off the char range instead
|
227
|
-
line =
|
228
|
-
|
257
|
+
line = matched
|
258
|
+
ss.extra_lines_added += 1 # FIX: ugh
|
229
259
|
else
|
230
260
|
line = nil
|
231
261
|
end
|
@@ -241,41 +271,47 @@ class RubyLexer
|
|
241
271
|
end
|
242
272
|
end
|
243
273
|
|
274
|
+
def in_arg_state? # TODO: rename is_after_operator?
|
275
|
+
in_lex_state? :expr_fname, :expr_dot
|
276
|
+
end
|
277
|
+
|
244
278
|
def in_lex_state?(*states)
|
245
279
|
states.include? lex_state
|
246
280
|
end
|
247
281
|
|
248
|
-
def
|
249
|
-
|
250
|
-
self.cond = RubyParserStuff::StackState.new(:cond)
|
251
|
-
self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
|
252
|
-
self.tern = RubyParserStuff::StackState.new(:tern)
|
253
|
-
self.string_nest = 0
|
254
|
-
self.paren_nest = 0
|
255
|
-
self.brace_nest = 0
|
256
|
-
self.lpar_beg = nil
|
282
|
+
def int_with_base base
|
283
|
+
rb_compile_error "Invalid numeric format" if matched =~ /__/
|
257
284
|
|
258
|
-
|
285
|
+
self.yacc_value = matched.to_i(base)
|
286
|
+
return :tINTEGER
|
287
|
+
end
|
259
288
|
|
260
|
-
|
289
|
+
def is_arg?
|
290
|
+
in_lex_state? :expr_arg, :expr_cmdarg
|
261
291
|
end
|
262
292
|
|
263
|
-
def
|
264
|
-
|
293
|
+
def is_beg?
|
294
|
+
in_lex_state? :expr_beg, :expr_value, :expr_mid, :expr_class
|
295
|
+
end
|
265
296
|
|
266
|
-
|
267
|
-
|
297
|
+
def is_end?
|
298
|
+
in_lex_state? :expr_end, :expr_endarg, :expr_endfn
|
268
299
|
end
|
269
300
|
|
270
|
-
def
|
271
|
-
|
272
|
-
|
273
|
-
|
301
|
+
def is_label_possible? command_state
|
302
|
+
(in_lex_state?(:expr_beg, :expr_endfn) && !command_state) || is_arg?
|
303
|
+
end
|
304
|
+
|
305
|
+
def is_space_arg? c = "x"
|
306
|
+
is_arg? and space_seen and c !~ /\s/
|
274
307
|
end
|
275
308
|
|
276
|
-
attr_writer :lineno
|
277
309
|
def lineno
|
278
|
-
@lineno ||=
|
310
|
+
@lineno ||= ss.lineno
|
311
|
+
end
|
312
|
+
|
313
|
+
def matched
|
314
|
+
ss.matched
|
279
315
|
end
|
280
316
|
|
281
317
|
##
|
@@ -288,45 +324,45 @@ class RubyLexer
|
|
288
324
|
self.lex_state = :expr_end
|
289
325
|
|
290
326
|
case
|
291
|
-
when
|
327
|
+
when scan(/[+-]?0[xXbBdD]\b/) then
|
292
328
|
rb_compile_error "Invalid numeric format"
|
293
|
-
when
|
329
|
+
when scan(/[+-]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0[Dd][0-9_]+)/) then
|
294
330
|
int_with_base(10)
|
295
|
-
when
|
331
|
+
when scan(/[+-]?0x[a-f0-9_]+/i) then
|
296
332
|
int_with_base(16)
|
297
|
-
when
|
333
|
+
when scan(/[+-]?0[Bb][01_]+/) then
|
298
334
|
int_with_base(2)
|
299
|
-
when
|
335
|
+
when scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
|
300
336
|
rb_compile_error "Illegal octal digit."
|
301
|
-
when
|
337
|
+
when scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
|
302
338
|
int_with_base(8)
|
303
|
-
when
|
339
|
+
when scan(/[+-]?[\d_]+_(e|\.)/) then
|
304
340
|
rb_compile_error "Trailing '_' in number."
|
305
|
-
when
|
306
|
-
number =
|
341
|
+
when scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
|
342
|
+
number = matched
|
307
343
|
if number =~ /__/ then
|
308
344
|
rb_compile_error "Invalid numeric format"
|
309
345
|
end
|
310
346
|
self.yacc_value = number.to_f
|
311
347
|
:tFLOAT
|
312
|
-
when
|
348
|
+
when scan(/[+-]?[0-9_]+(?![e])/) then
|
313
349
|
int_with_base(10)
|
314
350
|
else
|
315
351
|
rb_compile_error "Bad number format"
|
316
352
|
end
|
317
353
|
end
|
318
354
|
|
319
|
-
def parse_quote #
|
355
|
+
def parse_quote # TODO: remove / rewrite
|
320
356
|
beg, nnd, short_hand, c = nil, nil, false, nil
|
321
357
|
|
322
|
-
if
|
323
|
-
rb_compile_error "unknown type of %string" if
|
324
|
-
c, beg, short_hand =
|
358
|
+
if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
|
359
|
+
rb_compile_error "unknown type of %string" if ss.matched_size == 2
|
360
|
+
c, beg, short_hand = matched, ss.getch, false
|
325
361
|
else # Short-hand (e.g. %{, %., %!, etc)
|
326
|
-
c, beg, short_hand = 'Q',
|
362
|
+
c, beg, short_hand = 'Q', ss.getch, true
|
327
363
|
end
|
328
364
|
|
329
|
-
if
|
365
|
+
if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
|
330
366
|
rb_compile_error "unterminated quoted string meets end of file"
|
331
367
|
end
|
332
368
|
|
@@ -334,19 +370,19 @@ class RubyLexer
|
|
334
370
|
nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
|
335
371
|
nnd, beg = beg, "\0" if nnd.nil?
|
336
372
|
|
337
|
-
token_type,
|
373
|
+
token_type, text = nil, "%#{c}#{beg}"
|
338
374
|
token_type, string_type = case c
|
339
375
|
when 'Q' then
|
340
376
|
ch = short_hand ? nnd : c + beg
|
341
|
-
|
377
|
+
text = "%#{ch}"
|
342
378
|
[:tSTRING_BEG, STR_DQUOTE]
|
343
379
|
when 'q' then
|
344
380
|
[:tSTRING_BEG, STR_SQUOTE]
|
345
381
|
when 'W' then
|
346
|
-
|
382
|
+
scan(/\s*/)
|
347
383
|
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
348
384
|
when 'w' then
|
349
|
-
|
385
|
+
scan(/\s*/)
|
350
386
|
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
351
387
|
when 'x' then
|
352
388
|
[:tXSTRING_BEG, STR_XQUOTE]
|
@@ -356,20 +392,25 @@ class RubyLexer
|
|
356
392
|
self.lex_state = :expr_fname
|
357
393
|
[:tSYMBEG, STR_SSYM]
|
358
394
|
when 'I' then
|
395
|
+
src.scan(/\s*/)
|
359
396
|
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
360
397
|
when 'i' then
|
398
|
+
src.scan(/\s*/)
|
361
399
|
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
362
400
|
end
|
363
401
|
|
364
|
-
rb_compile_error "Bad %string type. Expected [
|
402
|
+
rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
|
365
403
|
token_type.nil?
|
366
404
|
|
367
|
-
|
405
|
+
raise "huh" unless string_type
|
406
|
+
|
407
|
+
string string_type, nnd, beg
|
368
408
|
|
409
|
+
self.yacc_value = text
|
369
410
|
return token_type
|
370
411
|
end
|
371
412
|
|
372
|
-
def parse_string
|
413
|
+
def parse_string quote # TODO: rewrite / remove
|
373
414
|
_, string_type, term, open = quote
|
374
415
|
|
375
416
|
space = false # FIX: remove these
|
@@ -381,41 +422,39 @@ class RubyLexer
|
|
381
422
|
regexp = (func & STR_FUNC_REGEXP) != 0
|
382
423
|
expand = (func & STR_FUNC_EXPAND) != 0
|
383
424
|
|
384
|
-
unless func then #
|
425
|
+
unless func then # nil'ed from qwords below. *sigh*
|
385
426
|
self.lineno = nil
|
386
427
|
return :tSTRING_END
|
387
428
|
end
|
388
429
|
|
389
|
-
space = true if qwords and
|
430
|
+
space = true if qwords and scan(/\s+/)
|
390
431
|
|
391
|
-
if self.string_nest == 0 &&
|
432
|
+
if self.string_nest == 0 && scan(/#{term_re}/) then
|
392
433
|
if qwords then
|
393
|
-
quote[1] = nil
|
434
|
+
quote[1] = nil
|
394
435
|
return :tSPACE
|
395
436
|
elsif regexp then
|
396
|
-
self.yacc_value = self.regx_options
|
397
437
|
self.lineno = nil
|
438
|
+
self.yacc_value = self.regx_options
|
398
439
|
return :tREGEXP_END
|
399
440
|
else
|
400
|
-
self.yacc_value = term
|
401
441
|
self.lineno = nil
|
442
|
+
self.yacc_value = term
|
402
443
|
return :tSTRING_END
|
403
444
|
end
|
404
445
|
end
|
405
446
|
|
406
|
-
if space
|
407
|
-
return :tSPACE
|
408
|
-
end
|
447
|
+
return :tSPACE if space
|
409
448
|
|
410
449
|
self.string_buffer = []
|
411
450
|
|
412
451
|
if expand
|
413
452
|
case
|
414
|
-
when
|
453
|
+
when scan(/#(?=[$@])/) then
|
415
454
|
return :tSTRING_DVAR
|
416
|
-
when
|
455
|
+
when scan(/#[{]/) then
|
417
456
|
return :tSTRING_DBEG
|
418
|
-
when
|
457
|
+
when scan(/#/) then
|
419
458
|
string_buffer << '#'
|
420
459
|
end
|
421
460
|
end
|
@@ -429,71 +468,159 @@ class RubyLexer
|
|
429
468
|
return :tSTRING_CONTENT
|
430
469
|
end
|
431
470
|
|
471
|
+
def process_token command_state, last_state
|
472
|
+
token = self.token
|
473
|
+
token << matched if scan(/[\!\?](?!=)/)
|
474
|
+
|
475
|
+
tok_id =
|
476
|
+
case
|
477
|
+
when token =~ /[!?]$/ then
|
478
|
+
:tFID
|
479
|
+
when in_lex_state?(:expr_fname) && scan(/=(?:(?![~>=])|(?==>))/) then
|
480
|
+
# ident=, not =~ => == or followed by =>
|
481
|
+
# TODO test lexing of a=>b vs a==>b
|
482
|
+
token << matched
|
483
|
+
:tIDENTIFIER
|
484
|
+
when token =~ /^[A-Z]/ then
|
485
|
+
:tCONSTANT
|
486
|
+
else
|
487
|
+
:tIDENTIFIER
|
488
|
+
end
|
489
|
+
|
490
|
+
if !ruby18 and is_label_possible?(command_state) and scan(/:(?!:)/) then
|
491
|
+
return result(:expr_beg, :tLABEL, [token, ss.lineno]) # HACK: array? TODO: self.lineno
|
492
|
+
end
|
493
|
+
|
494
|
+
unless in_lex_state? :expr_dot then
|
495
|
+
# See if it is a reserved word.
|
496
|
+
keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
|
497
|
+
RubyParserStuff::Keyword.keyword18 token
|
498
|
+
else
|
499
|
+
RubyParserStuff::Keyword.keyword19 token
|
500
|
+
end
|
501
|
+
|
502
|
+
return process_token_keyword keyword if keyword
|
503
|
+
end # unless in_lex_state? :expr_dot
|
504
|
+
|
505
|
+
# TODO:
|
506
|
+
# if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
|
507
|
+
|
508
|
+
state = if is_beg? or is_arg? or in_lex_state? :expr_dot then
|
509
|
+
command_state ? :expr_cmdarg : :expr_arg
|
510
|
+
elsif not ruby18 and in_lex_state? :expr_fname then
|
511
|
+
:expr_endfn
|
512
|
+
else
|
513
|
+
:expr_end
|
514
|
+
end
|
515
|
+
|
516
|
+
if not [:expr_dot, :expr_fname].include? last_state and
|
517
|
+
self.parser.env[token.to_sym] == :lvar then
|
518
|
+
state = :expr_end
|
519
|
+
end
|
520
|
+
|
521
|
+
return result(state, tok_id, token)
|
522
|
+
end
|
523
|
+
|
524
|
+
def process_token_keyword keyword
|
525
|
+
state = keyword.state
|
526
|
+
value = [token, ss.lineno] # TODO: use self.lineno ?
|
527
|
+
|
528
|
+
self.command_start = true if state == :expr_beg and lex_state != :expr_fname
|
529
|
+
|
530
|
+
case
|
531
|
+
when lex_state == :expr_fname then
|
532
|
+
result(state, keyword.id0, keyword.name)
|
533
|
+
when keyword.id0 == :kDO then
|
534
|
+
case
|
535
|
+
when lpar_beg && lpar_beg == paren_nest then
|
536
|
+
self.lpar_beg = nil
|
537
|
+
self.paren_nest -= 1
|
538
|
+
result(state, :kDO_LAMBDA, value)
|
539
|
+
when cond.is_in_state then
|
540
|
+
result(state, :kDO_COND, value)
|
541
|
+
when cmdarg.is_in_state && lex_state != :expr_cmdarg then
|
542
|
+
result(state, :kDO_BLOCK, value)
|
543
|
+
when in_lex_state?(:expr_beg, :expr_endarg) then
|
544
|
+
result(state, :kDO_BLOCK, value)
|
545
|
+
else
|
546
|
+
result(state, :kDO, value)
|
547
|
+
end
|
548
|
+
when in_lex_state?(:expr_beg, :expr_value) then
|
549
|
+
result(state, keyword.id0, value)
|
550
|
+
when keyword.id0 != keyword.id1 then
|
551
|
+
result(:expr_beg, keyword.id1, value)
|
552
|
+
else
|
553
|
+
result(state, keyword.id1, value)
|
554
|
+
end
|
555
|
+
end
|
556
|
+
|
432
557
|
def rb_compile_error msg
|
433
|
-
msg += ". near line #{self.lineno}: #{
|
558
|
+
msg += ". near line #{self.lineno}: #{ss.rest[/^.*/].inspect}"
|
434
559
|
raise RubyParser::SyntaxError, msg
|
435
560
|
end
|
436
561
|
|
437
|
-
def read_escape #
|
562
|
+
def read_escape # TODO: remove / rewrite
|
438
563
|
case
|
439
|
-
when
|
564
|
+
when scan(/\\/) then # Backslash
|
440
565
|
'\\'
|
441
|
-
when
|
566
|
+
when scan(/n/) then # newline
|
442
567
|
"\n"
|
443
|
-
when
|
568
|
+
when scan(/t/) then # horizontal tab
|
444
569
|
"\t"
|
445
|
-
when
|
570
|
+
when scan(/r/) then # carriage-return
|
446
571
|
"\r"
|
447
|
-
when
|
572
|
+
when scan(/f/) then # form-feed
|
448
573
|
"\f"
|
449
|
-
when
|
574
|
+
when scan(/v/) then # vertical tab
|
450
575
|
"\13"
|
451
|
-
when
|
576
|
+
when scan(/a/) then # alarm(bell)
|
452
577
|
"\007"
|
453
|
-
when
|
578
|
+
when scan(/e/) then # escape
|
454
579
|
"\033"
|
455
|
-
when
|
580
|
+
when scan(/b/) then # backspace
|
456
581
|
"\010"
|
457
|
-
when
|
582
|
+
when scan(/s/) then # space
|
458
583
|
" "
|
459
|
-
when
|
460
|
-
(
|
461
|
-
when
|
462
|
-
|
463
|
-
when
|
464
|
-
|
584
|
+
when scan(/[0-7]{1,3}/) then # octal constant
|
585
|
+
(matched.to_i(8) & 0xFF).chr
|
586
|
+
when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
|
587
|
+
ss[1].to_i(16).chr
|
588
|
+
when check(/M-\\[\\MCc]/) then
|
589
|
+
scan(/M-\\/) # eat it
|
465
590
|
c = self.read_escape
|
466
591
|
c[0] = (c[0].ord | 0x80).chr
|
467
592
|
c
|
468
|
-
when
|
469
|
-
c =
|
593
|
+
when scan(/M-(.)/) then
|
594
|
+
c = ss[1]
|
470
595
|
c[0] = (c[0].ord | 0x80).chr
|
471
596
|
c
|
472
|
-
when
|
473
|
-
|
597
|
+
when check(/(C-|c)\\[\\MCc]/) then
|
598
|
+
scan(/(C-|c)\\/) # eat it
|
474
599
|
c = self.read_escape
|
475
600
|
c[0] = (c[0].ord & 0x9f).chr
|
476
601
|
c
|
477
|
-
when
|
602
|
+
when scan(/C-\?|c\?/) then
|
478
603
|
127.chr
|
479
|
-
when
|
480
|
-
c =
|
604
|
+
when scan(/(C-|c)(.)/) then
|
605
|
+
c = ss[2]
|
481
606
|
c[0] = (c[0].ord & 0x9f).chr
|
482
607
|
c
|
483
|
-
when
|
484
|
-
|
485
|
-
when
|
608
|
+
when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
|
609
|
+
matched
|
610
|
+
when scan(/u([0-9a-fA-F]+|\{[0-9a-fA-F]+\})/) then
|
611
|
+
[ss[1].delete("{}").to_i(16)].pack("U")
|
612
|
+
when scan(/[McCx0-9]/) || end_of_stream? then
|
486
613
|
rb_compile_error("Invalid escape character syntax")
|
487
614
|
else
|
488
|
-
|
615
|
+
ss.getch
|
489
616
|
end
|
490
617
|
end
|
491
618
|
|
492
|
-
def regx_options #
|
619
|
+
def regx_options # TODO: rewrite / remove
|
493
620
|
good, bad = [], []
|
494
621
|
|
495
|
-
if
|
496
|
-
good, bad =
|
622
|
+
if scan(/[a-z]+/) then
|
623
|
+
good, bad = matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
|
497
624
|
end
|
498
625
|
|
499
626
|
unless bad.empty? then
|
@@ -505,13 +632,30 @@ class RubyLexer
|
|
505
632
|
end
|
506
633
|
|
507
634
|
def reset
|
635
|
+
self.brace_nest = 0
|
508
636
|
self.command_start = true
|
637
|
+
self.comments = []
|
638
|
+
self.lex_state = nil
|
509
639
|
self.lex_strterm = nil
|
640
|
+
self.lineno = 1
|
641
|
+
self.lpar_beg = nil
|
642
|
+
self.paren_nest = 0
|
643
|
+
self.space_seen = false
|
644
|
+
self.string_nest = 0
|
510
645
|
self.token = nil
|
511
646
|
self.yacc_value = nil
|
512
647
|
|
513
|
-
|
514
|
-
|
648
|
+
self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
|
649
|
+
self.cond = RubyParserStuff::StackState.new(:cond)
|
650
|
+
|
651
|
+
@src = nil
|
652
|
+
end
|
653
|
+
|
654
|
+
def result lex_state, token, text # :nodoc:
|
655
|
+
lex_state = self.arg_state if lex_state == :arg_state
|
656
|
+
self.lex_state = lex_state if lex_state
|
657
|
+
self.yacc_value = text
|
658
|
+
token
|
515
659
|
end
|
516
660
|
|
517
661
|
def ruby18
|
@@ -522,35 +666,52 @@ class RubyLexer
|
|
522
666
|
Ruby19Parser === parser
|
523
667
|
end
|
524
668
|
|
669
|
+
def scan re
|
670
|
+
ss.scan re
|
671
|
+
end
|
672
|
+
|
673
|
+
def space_vs_beginning space_type, beg_type, fallback
|
674
|
+
if is_space_arg? check(/./m) then
|
675
|
+
warning "`**' interpreted as argument prefix"
|
676
|
+
space_type
|
677
|
+
elsif is_beg? then
|
678
|
+
beg_type
|
679
|
+
else
|
680
|
+
# TODO: warn_balanced("**", "argument prefix");
|
681
|
+
fallback
|
682
|
+
end
|
683
|
+
end
|
684
|
+
|
685
|
+
def string type, beg = matched, nnd = "\0"
|
686
|
+
self.lex_strterm = [:strterm, type, beg, nnd]
|
687
|
+
end
|
688
|
+
|
525
689
|
def src= src
|
526
690
|
raise "bad src: #{src.inspect}" unless String === src
|
527
691
|
@src = RPStringScanner.new(src)
|
528
692
|
end
|
529
693
|
|
530
|
-
def tokadd_escape term #
|
694
|
+
def tokadd_escape term # TODO: rewrite / remove
|
531
695
|
case
|
532
|
-
when
|
696
|
+
when scan(/\\\n/) then
|
533
697
|
# just ignore
|
534
|
-
when
|
535
|
-
self.string_buffer <<
|
536
|
-
when
|
537
|
-
self.string_buffer <<
|
698
|
+
when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
|
699
|
+
self.string_buffer << matched
|
700
|
+
when scan(/\\([MC]-|c)(?=\\)/) then
|
701
|
+
self.string_buffer << matched
|
538
702
|
self.tokadd_escape term
|
539
|
-
when
|
540
|
-
self.string_buffer <<
|
541
|
-
when
|
703
|
+
when scan(/\\([MC]-|c)(.)/) then
|
704
|
+
self.string_buffer << matched
|
705
|
+
when scan(/\\[McCx]/) then
|
542
706
|
rb_compile_error "Invalid escape character syntax"
|
543
|
-
when
|
544
|
-
self.string_buffer <<
|
707
|
+
when scan(/\\(.)/m) then
|
708
|
+
self.string_buffer << matched
|
545
709
|
else
|
546
710
|
rb_compile_error "Invalid escape character syntax"
|
547
711
|
end
|
548
712
|
end
|
549
713
|
|
550
|
-
|
551
|
-
@@regexp_cache[nil] = nil
|
552
|
-
|
553
|
-
def tokadd_string(func, term, paren) # 105 lines
|
714
|
+
def tokadd_string(func, term, paren) # TODO: rewrite / remove
|
554
715
|
qwords = (func & STR_FUNC_QWORDS) != 0
|
555
716
|
escape = (func & STR_FUNC_ESCAPE) != 0
|
556
717
|
expand = (func & STR_FUNC_EXPAND) != 0
|
@@ -560,49 +721,49 @@ class RubyLexer
|
|
560
721
|
paren_re = @@regexp_cache[paren]
|
561
722
|
term_re = @@regexp_cache[term]
|
562
723
|
|
563
|
-
until
|
724
|
+
until end_of_stream? do
|
564
725
|
c = nil
|
565
726
|
handled = true
|
566
727
|
|
567
728
|
case
|
568
|
-
when paren_re &&
|
729
|
+
when paren_re && scan(paren_re) then
|
569
730
|
self.string_nest += 1
|
570
|
-
when
|
731
|
+
when scan(term_re) then
|
571
732
|
if self.string_nest == 0 then
|
572
|
-
|
733
|
+
ss.pos -= 1
|
573
734
|
break
|
574
735
|
else
|
575
736
|
self.string_nest -= 1
|
576
737
|
end
|
577
|
-
when expand &&
|
578
|
-
|
738
|
+
when expand && scan(/#(?=[\$\@\{])/) then
|
739
|
+
ss.pos -= 1
|
579
740
|
break
|
580
|
-
when qwords &&
|
581
|
-
|
741
|
+
when qwords && scan(/\s/) then
|
742
|
+
ss.pos -= 1
|
582
743
|
break
|
583
|
-
when expand &&
|
744
|
+
when expand && scan(/#(?!\n)/) then
|
584
745
|
# do nothing
|
585
|
-
when
|
746
|
+
when check(/\\/) then
|
586
747
|
case
|
587
|
-
when qwords &&
|
748
|
+
when qwords && scan(/\\\n/) then
|
588
749
|
string_buffer << "\n"
|
589
750
|
next
|
590
|
-
when qwords &&
|
751
|
+
when qwords && scan(/\\\s/) then
|
591
752
|
c = ' '
|
592
|
-
when expand &&
|
753
|
+
when expand && scan(/\\\n/) then
|
593
754
|
next
|
594
|
-
when regexp &&
|
755
|
+
when regexp && check(/\\/) then
|
595
756
|
self.tokadd_escape term
|
596
757
|
next
|
597
|
-
when expand &&
|
758
|
+
when expand && scan(/\\/) then
|
598
759
|
c = self.read_escape
|
599
|
-
when
|
760
|
+
when scan(/\\\n/) then
|
600
761
|
# do nothing
|
601
|
-
when
|
762
|
+
when scan(/\\\\/) then
|
602
763
|
string_buffer << '\\' if escape
|
603
764
|
c = '\\'
|
604
|
-
when
|
605
|
-
unless
|
765
|
+
when scan(/\\/) then
|
766
|
+
unless scan(term_re) || paren.nil? || scan(paren_re) then
|
606
767
|
string_buffer << "\\"
|
607
768
|
end
|
608
769
|
else
|
@@ -625,38 +786,22 @@ class RubyLexer
|
|
625
786
|
/[^#{t}#{x}\#\0\\]+|./
|
626
787
|
end
|
627
788
|
|
628
|
-
|
629
|
-
c =
|
789
|
+
scan re
|
790
|
+
c = matched
|
630
791
|
|
631
792
|
rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
|
632
793
|
end # unless handled
|
633
794
|
|
634
|
-
c ||=
|
795
|
+
c ||= matched
|
635
796
|
string_buffer << c
|
636
797
|
end # until
|
637
798
|
|
638
|
-
c ||=
|
639
|
-
c = RubyLexer::EOF if
|
799
|
+
c ||= matched
|
800
|
+
c = RubyLexer::EOF if end_of_stream?
|
640
801
|
|
641
802
|
return c
|
642
803
|
end
|
643
804
|
|
644
|
-
ESCAPES = {
|
645
|
-
"a" => "\007",
|
646
|
-
"b" => "\010",
|
647
|
-
"e" => "\033",
|
648
|
-
"f" => "\f",
|
649
|
-
"n" => "\n",
|
650
|
-
"r" => "\r",
|
651
|
-
"s" => " ",
|
652
|
-
"t" => "\t",
|
653
|
-
"v" => "\13",
|
654
|
-
"\\" => '\\',
|
655
|
-
"\n" => "",
|
656
|
-
"C-\?" => 127.chr,
|
657
|
-
"c\?" => 127.chr,
|
658
|
-
}
|
659
|
-
|
660
805
|
def unescape s
|
661
806
|
r = ESCAPES[s]
|
662
807
|
|
@@ -675,6 +820,8 @@ class RubyLexer
|
|
675
820
|
s
|
676
821
|
when /^[McCx0-9]/ then
|
677
822
|
rb_compile_error("Invalid escape character syntax")
|
823
|
+
when /u([0-9a-fA-F]+|\{[0-9a-fA-F]+\})/ then
|
824
|
+
[$1.delete("{}").to_i(16)].pack("U")
|
678
825
|
else
|
679
826
|
s
|
680
827
|
end
|
@@ -691,11 +838,11 @@ class RubyLexer
|
|
691
838
|
#
|
692
839
|
# @return Description of the Returned Value
|
693
840
|
|
694
|
-
def yylex #
|
841
|
+
def yylex # 461 lines
|
695
842
|
c = ''
|
696
843
|
self.space_seen = false
|
697
844
|
command_state = false
|
698
|
-
|
845
|
+
ss = self.src
|
699
846
|
|
700
847
|
self.token = nil
|
701
848
|
self.yacc_value = nil
|
@@ -708,41 +855,42 @@ class RubyLexer
|
|
708
855
|
last_state = lex_state
|
709
856
|
|
710
857
|
loop do # START OF CASE
|
711
|
-
if
|
858
|
+
if scan(/[\ \t\r\f\v]/) then # \s - \n + \v
|
712
859
|
self.space_seen = true
|
713
860
|
next
|
714
|
-
elsif
|
715
|
-
if
|
861
|
+
elsif check(/[^a-zA-Z]/) then
|
862
|
+
if scan(/\n|\#/) then
|
716
863
|
self.lineno = nil
|
717
|
-
c =
|
864
|
+
c = matched
|
718
865
|
if c == '#' then
|
719
|
-
|
866
|
+
ss.pos -= 1
|
720
867
|
|
721
|
-
while
|
722
|
-
|
868
|
+
while scan(/\s*#.*(\n+|\z)/) do
|
869
|
+
# TODO: self.lineno += matched.lines.to_a.size
|
870
|
+
@comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
|
723
871
|
end
|
724
872
|
|
725
|
-
return RubyLexer::EOF if
|
873
|
+
return RubyLexer::EOF if end_of_stream?
|
726
874
|
end
|
727
875
|
|
728
876
|
# Replace a string of newlines with a single one
|
729
|
-
|
877
|
+
scan(/\n+/)
|
730
878
|
|
731
879
|
next if in_lex_state?(:expr_beg, :expr_value, :expr_class,
|
732
880
|
:expr_fname, :expr_dot)
|
733
881
|
|
734
|
-
if
|
735
|
-
self.space_seen = true unless
|
882
|
+
if scan(/([\ \t\r\f\v]*)\./) then
|
883
|
+
self.space_seen = true unless ss[1].empty?
|
736
884
|
|
737
|
-
|
738
|
-
next unless
|
885
|
+
ss.pos -= 1
|
886
|
+
next unless check(/\.\./)
|
739
887
|
end
|
740
888
|
|
741
889
|
self.command_start = true
|
742
|
-
|
743
|
-
return :tNL
|
744
|
-
elsif
|
745
|
-
if
|
890
|
+
|
891
|
+
return result(:expr_beg, :tNL, nil)
|
892
|
+
elsif scan(/[\]\)\}]/) then
|
893
|
+
if matched == "}" then
|
746
894
|
self.brace_nest -= 1
|
747
895
|
else
|
748
896
|
self.paren_nest -= 1
|
@@ -750,54 +898,34 @@ class RubyLexer
|
|
750
898
|
|
751
899
|
cond.lexpop
|
752
900
|
cmdarg.lexpop
|
753
|
-
tern.lexpop
|
754
|
-
|
755
|
-
self.lex_state = if src.matched == ")" then
|
756
|
-
:expr_endfn
|
757
|
-
else
|
758
|
-
:expr_endarg
|
759
|
-
end
|
760
901
|
|
761
|
-
|
762
|
-
|
902
|
+
text = matched
|
903
|
+
state = text == ")" ? :expr_endfn : :expr_endarg
|
904
|
+
token = {
|
763
905
|
")" => :tRPAREN,
|
764
906
|
"]" => :tRBRACK,
|
765
907
|
"}" => :tRCURLY
|
766
|
-
}[
|
767
|
-
return result
|
768
|
-
elsif src.scan(/\!/) then
|
769
|
-
if in_lex_state?(:expr_fname, :expr_dot) then
|
770
|
-
self.lex_state = :expr_arg
|
771
|
-
|
772
|
-
if src.scan(/@/) then
|
773
|
-
self.yacc_value = "!@"
|
774
|
-
return :tUBANG
|
775
|
-
end
|
776
|
-
else
|
777
|
-
self.lex_state = :expr_beg
|
778
|
-
end
|
908
|
+
}[text]
|
779
909
|
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
910
|
+
return result(state, token, text)
|
911
|
+
elsif scan(/\!/) then
|
912
|
+
if in_arg_state? then
|
913
|
+
return result(:expr_arg, :tUBANG, "!@") if scan(/@/)
|
784
914
|
end
|
785
915
|
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
return TOKENS[
|
791
|
-
elsif
|
792
|
-
if
|
916
|
+
text = scan(/[=~]/) ? "!#{matched}" : "!"
|
917
|
+
|
918
|
+
return result(arg_state, TOKENS[text], text)
|
919
|
+
elsif scan(/\.\.\.?|,|![=~]?/) then
|
920
|
+
return result(:expr_beg, TOKENS[matched], matched)
|
921
|
+
elsif check(/\./) then
|
922
|
+
if scan(/\.\d/) then
|
793
923
|
rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
|
794
|
-
elsif
|
795
|
-
|
796
|
-
self.yacc_value = "."
|
797
|
-
return :tDOT
|
924
|
+
elsif scan(/\./) then
|
925
|
+
return result(:expr_dot, :tDOT, ".")
|
798
926
|
end
|
799
|
-
elsif
|
800
|
-
|
927
|
+
elsif scan(/\(/) then
|
928
|
+
token = if ruby18 then
|
801
929
|
yylex_paren18
|
802
930
|
else
|
803
931
|
yylex_paren19
|
@@ -805,259 +933,188 @@ class RubyLexer
|
|
805
933
|
|
806
934
|
self.paren_nest += 1
|
807
935
|
|
808
|
-
|
936
|
+
return expr_result(token, "(")
|
937
|
+
elsif check(/\=/) then
|
938
|
+
if scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
|
939
|
+
tok = matched
|
940
|
+
return result(:arg_state, TOKENS[tok], tok)
|
941
|
+
elsif beginning_of_line? and scan(/\=begin(?=\s)/) then
|
942
|
+
@comments << matched
|
809
943
|
|
810
|
-
|
811
|
-
elsif src.check(/\=/) then
|
812
|
-
if src.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
|
813
|
-
self.fix_arg_lex_state
|
814
|
-
tok = self.yacc_value = src.matched
|
815
|
-
return TOKENS[tok]
|
816
|
-
elsif src.scan(/\=begin(?=\s)/) then
|
817
|
-
@comments << src.matched
|
818
|
-
|
819
|
-
unless src.scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
|
944
|
+
unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
|
820
945
|
@comments.clear
|
821
946
|
rb_compile_error("embedded document meets end of file")
|
822
947
|
end
|
823
948
|
|
824
|
-
@comments <<
|
949
|
+
@comments << matched
|
825
950
|
|
826
951
|
next
|
952
|
+
elsif scan(/\=(?=begin\b)/) then # h[k]=begin ... end
|
953
|
+
tok = matched
|
954
|
+
return result(:arg_state, TOKENS[tok], tok)
|
827
955
|
else
|
828
956
|
raise "you shouldn't be able to get here"
|
829
957
|
end
|
830
|
-
elsif
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
self.token
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
elsif src.scan(/\:\:/) then
|
958
|
+
elsif scan(/\"(#{SIMPLE_STRING})\"/o) then
|
959
|
+
string = matched[1..-2].gsub(ESC) { unescape $1 }
|
960
|
+
return result(:expr_end, :tSTRING, string)
|
961
|
+
elsif scan(/\"/) then # FALLBACK
|
962
|
+
string STR_DQUOTE, '"' # TODO: question this
|
963
|
+
return result(nil, :tSTRING_BEG, '"')
|
964
|
+
elsif scan(/\@\@?#{IDENT_CHAR}+/o) then
|
965
|
+
self.token = matched
|
966
|
+
|
967
|
+
rb_compile_error "`#{self.token}` is not allowed as a variable name" if
|
968
|
+
self.token =~ /\@\d/
|
969
|
+
|
970
|
+
tok_id = matched =~ /^@@/ ? :tCVAR : :tIVAR
|
971
|
+
return result(:expr_end, tok_id, self.token)
|
972
|
+
elsif scan(/\:\:/) then
|
846
973
|
if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
|
847
|
-
|
848
|
-
self.yacc_value = "::"
|
849
|
-
return :tCOLON3
|
974
|
+
return result(:expr_beg, :tCOLON3, "::")
|
850
975
|
end
|
851
976
|
|
852
|
-
|
853
|
-
|
854
|
-
return :tCOLON2
|
855
|
-
elsif ! is_end? && src.scan(/:([a-zA-Z_]#{IDENT_CHAR_RE}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
|
977
|
+
return result(:expr_dot, :tCOLON2, "::")
|
978
|
+
elsif ! is_end? && scan(/:([a-zA-Z_]#{IDENT_CHAR}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
|
856
979
|
# scanning shortcut to symbols
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
980
|
+
return result(:expr_end, :tSYMBOL, ss[1])
|
981
|
+
elsif ! is_end? && (scan(/\:\"(#{SIMPLE_STRING})\"/) ||
|
982
|
+
scan(/\:\'(#{SIMPLE_SSTRING})\'/)) then
|
983
|
+
symbol = ss[1].gsub(ESC) { unescape $1 }
|
984
|
+
|
985
|
+
rb_compile_error "symbol cannot contain '\\0'" if
|
986
|
+
ruby18 && symbol =~ /\0/
|
987
|
+
|
988
|
+
return result(:expr_end, :tSYMBOL, symbol)
|
989
|
+
elsif scan(/\:/) then
|
861
990
|
# ?: / then / when
|
862
|
-
if is_end? ||
|
863
|
-
self.lex_state = :expr_beg
|
991
|
+
if is_end? || check(/\s/) then
|
864
992
|
# TODO warn_balanced(":", "symbol literal");
|
865
|
-
|
866
|
-
return :tCOLON
|
993
|
+
return result(:expr_beg, :tCOLON, ":")
|
867
994
|
end
|
868
995
|
|
869
996
|
case
|
870
|
-
when
|
871
|
-
|
872
|
-
when
|
873
|
-
|
997
|
+
when scan(/\'/) then
|
998
|
+
string STR_SSYM, matched
|
999
|
+
when scan(/\"/) then
|
1000
|
+
string STR_DSYM, matched
|
874
1001
|
end
|
875
1002
|
|
876
|
-
|
877
|
-
|
878
|
-
return :tSYMBEG
|
879
|
-
elsif src.check(/[0-9]/) then
|
1003
|
+
return result(:expr_fname, :tSYMBEG, ":")
|
1004
|
+
elsif check(/[0-9]/) then
|
880
1005
|
return parse_number
|
881
|
-
elsif
|
1006
|
+
elsif scan(/\[/) then
|
882
1007
|
self.paren_nest += 1
|
883
1008
|
|
884
|
-
|
1009
|
+
token = nil
|
885
1010
|
|
886
1011
|
if in_lex_state? :expr_fname, :expr_dot then
|
887
|
-
self.lex_state = :expr_arg
|
888
1012
|
case
|
889
|
-
when
|
1013
|
+
when scan(/\]\=/) then
|
890
1014
|
self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
|
891
|
-
|
892
|
-
|
893
|
-
when src.scan(/\]/) then
|
1015
|
+
return result(:expr_arg, :tASET, "[]=")
|
1016
|
+
when scan(/\]/) then
|
894
1017
|
self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
|
895
|
-
|
896
|
-
return :tAREF
|
1018
|
+
return result(:expr_arg, :tAREF, "[]")
|
897
1019
|
else
|
898
1020
|
rb_compile_error "unexpected '['"
|
899
1021
|
end
|
900
1022
|
elsif is_beg? then
|
901
|
-
|
902
|
-
result = :tLBRACK
|
1023
|
+
token = :tLBRACK
|
903
1024
|
elsif is_arg? && space_seen then
|
904
|
-
|
905
|
-
result = :tLBRACK
|
1025
|
+
token = :tLBRACK
|
906
1026
|
else
|
907
|
-
|
1027
|
+
token = :tLBRACK2
|
908
1028
|
end
|
909
1029
|
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
self.lex_state = :expr_beg
|
924
|
-
self.yacc_value = "||"
|
925
|
-
return :tOROP
|
926
|
-
elsif src.scan(/\|\=/) then
|
927
|
-
self.lex_state = :expr_beg
|
928
|
-
self.yacc_value = "|"
|
929
|
-
return :tOP_ASGN
|
930
|
-
elsif src.scan(/\|/) then
|
931
|
-
self.fix_arg_lex_state
|
932
|
-
self.yacc_value = "|"
|
933
|
-
return :tPIPE
|
1030
|
+
return expr_result(token, "[")
|
1031
|
+
elsif scan(/\'#{SIMPLE_SSTRING}\'/) then
|
1032
|
+
text = matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") # "
|
1033
|
+
return result(:expr_end, :tSTRING, text)
|
1034
|
+
elsif check(/\|/) then
|
1035
|
+
if scan(/\|\|\=/) then
|
1036
|
+
return result(:expr_beg, :tOP_ASGN, "||")
|
1037
|
+
elsif scan(/\|\|/) then
|
1038
|
+
return result(:expr_beg, :tOROP, "||")
|
1039
|
+
elsif scan(/\|\=/) then
|
1040
|
+
return result(:expr_beg, :tOP_ASGN, "|")
|
1041
|
+
elsif scan(/\|/) then
|
1042
|
+
return result(:arg_state, :tPIPE, "|")
|
934
1043
|
end
|
935
|
-
elsif
|
1044
|
+
elsif scan(/\{/) then
|
936
1045
|
self.brace_nest += 1
|
937
1046
|
if lpar_beg && lpar_beg == paren_nest then
|
938
1047
|
self.lpar_beg = nil
|
939
1048
|
self.paren_nest -= 1
|
940
1049
|
|
941
|
-
|
942
|
-
|
943
|
-
return :tLAMBEG
|
1050
|
+
return expr_result(:tLAMBEG, "{")
|
944
1051
|
end
|
945
1052
|
|
946
|
-
|
1053
|
+
token = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
|
947
1054
|
:tLCURLY # block (primary)
|
948
1055
|
elsif in_lex_state?(:expr_endarg) then
|
949
1056
|
:tLBRACE_ARG # block (expr)
|
950
1057
|
else
|
951
|
-
self.tern.push false
|
952
1058
|
:tLBRACE # hash
|
953
1059
|
end
|
954
1060
|
|
955
|
-
self.
|
956
|
-
self.command_start = true unless result == :tLBRACE
|
1061
|
+
self.command_start = true unless token == :tLBRACE
|
957
1062
|
|
958
|
-
return
|
959
|
-
elsif
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
sign = src.matched
|
1063
|
+
return expr_result(token, "{")
|
1064
|
+
elsif scan(/->/) then
|
1065
|
+
return result(:expr_endfn, :tLAMBDA, nil)
|
1066
|
+
elsif scan(/[+-]/) then
|
1067
|
+
sign = matched
|
964
1068
|
utype, type = if sign == "+" then
|
965
1069
|
[:tUPLUS, :tPLUS]
|
966
1070
|
else
|
967
1071
|
[:tUMINUS, :tMINUS]
|
968
1072
|
end
|
969
1073
|
|
970
|
-
if
|
971
|
-
|
972
|
-
|
973
|
-
self.yacc_value = "#{sign}@"
|
974
|
-
return utype
|
1074
|
+
if in_arg_state? then
|
1075
|
+
if scan(/@/) then
|
1076
|
+
return result(:expr_arg, utype, "#{sign}@")
|
975
1077
|
else
|
976
|
-
|
977
|
-
return type
|
1078
|
+
return result(:expr_arg, type, sign)
|
978
1079
|
end
|
979
1080
|
end
|
980
1081
|
|
981
|
-
if
|
982
|
-
self.lex_state = :expr_beg
|
983
|
-
self.yacc_value = sign
|
984
|
-
return :tOP_ASGN
|
985
|
-
end
|
1082
|
+
return result(:expr_beg, :tOP_ASGN, sign) if scan(/\=/)
|
986
1083
|
|
987
|
-
if (is_beg? || (is_arg? && space_seen && !
|
988
|
-
if is_arg?
|
989
|
-
arg_ambiguous
|
990
|
-
end
|
1084
|
+
if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
|
1085
|
+
arg_ambiguous if is_arg?
|
991
1086
|
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
if src.check(/\d/) then
|
996
|
-
if utype == :tUPLUS then
|
997
|
-
return self.parse_number
|
998
|
-
else
|
999
|
-
return :tUMINUS_NUM
|
1000
|
-
end
|
1087
|
+
if check(/\d/) then
|
1088
|
+
return self.parse_number if utype == :tUPLUS
|
1089
|
+
return result(:expr_beg, :tUMINUS_NUM, sign)
|
1001
1090
|
end
|
1002
1091
|
|
1003
|
-
return utype
|
1092
|
+
return result(:expr_beg, utype, sign)
|
1004
1093
|
end
|
1005
1094
|
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
1013
|
-
return :
|
1014
|
-
elsif
|
1015
|
-
result
|
1016
|
-
|
1017
|
-
|
1018
|
-
elsif is_beg? then
|
1019
|
-
:tDSTAR
|
1020
|
-
else
|
1021
|
-
# TODO: warn_balanced("**", "argument prefix");
|
1022
|
-
:tPOW
|
1023
|
-
end
|
1024
|
-
self.yacc_value = "**"
|
1025
|
-
self.fix_arg_lex_state
|
1026
|
-
return result
|
1027
|
-
elsif src.scan(/\*\=/) then
|
1028
|
-
self.lex_state = :expr_beg
|
1029
|
-
self.yacc_value = "*"
|
1030
|
-
return :tOP_ASGN
|
1031
|
-
elsif src.scan(/\*/) then
|
1032
|
-
result = if is_space_arg? src.check(/./m) then
|
1033
|
-
warning("`*' interpreted as argument prefix")
|
1034
|
-
:tSTAR
|
1035
|
-
elsif is_beg? then
|
1036
|
-
:tSTAR
|
1037
|
-
else
|
1038
|
-
# TODO: warn_balanced("*", "argument prefix");
|
1039
|
-
:tSTAR2 # TODO: rename
|
1040
|
-
end
|
1095
|
+
return result(:expr_beg, type, sign)
|
1096
|
+
elsif check(/\*/) then
|
1097
|
+
if scan(/\*\*=/) then
|
1098
|
+
return result(:expr_beg, :tOP_ASGN, "**")
|
1099
|
+
elsif scan(/\*\*/) then
|
1100
|
+
token = space_vs_beginning :tDSTAR, :tDSTAR, :tPOW
|
1101
|
+
|
1102
|
+
return result(:arg_state, token, "**")
|
1103
|
+
elsif scan(/\*\=/) then
|
1104
|
+
return result(:expr_beg, :tOP_ASGN, "*")
|
1105
|
+
elsif scan(/\*/) then
|
1106
|
+
token = space_vs_beginning :tSTAR, :tSTAR, :tSTAR2
|
1041
1107
|
|
1042
|
-
|
1043
|
-
self.fix_arg_lex_state
|
1044
|
-
return result
|
1108
|
+
return result(:arg_state, token, "*")
|
1045
1109
|
end
|
1046
|
-
elsif
|
1047
|
-
if
|
1048
|
-
|
1049
|
-
|
1050
|
-
return :
|
1051
|
-
elsif
|
1052
|
-
|
1053
|
-
|
1054
|
-
return :tLEQ
|
1055
|
-
elsif src.scan(/\<\<\=/) then
|
1056
|
-
self.fix_arg_lex_state
|
1057
|
-
self.lex_state = :expr_beg
|
1058
|
-
self.yacc_value = "\<\<"
|
1059
|
-
return :tOP_ASGN
|
1060
|
-
elsif src.scan(/\<\</) then
|
1110
|
+
elsif check(/\</) then
|
1111
|
+
if scan(/\<\=\>/) then
|
1112
|
+
return result(:arg_state, :tCMP, "<=>")
|
1113
|
+
elsif scan(/\<\=/) then
|
1114
|
+
return result(:arg_state, :tLEQ, "<=")
|
1115
|
+
elsif scan(/\<\<\=/) then
|
1116
|
+
return result(:arg_state, :tOP_ASGN, "<<")
|
1117
|
+
elsif scan(/\<\</) then
|
1061
1118
|
if (!in_lex_state?(:expr_dot, :expr_class) &&
|
1062
1119
|
!is_end? &&
|
1063
1120
|
(!is_arg? || space_seen)) then
|
@@ -1065,70 +1122,49 @@ class RubyLexer
|
|
1065
1122
|
return tok if tok
|
1066
1123
|
end
|
1067
1124
|
|
1068
|
-
|
1069
|
-
|
1070
|
-
return :
|
1071
|
-
elsif src.scan(/\</) then
|
1072
|
-
self.fix_arg_lex_state
|
1073
|
-
self.yacc_value = "<"
|
1074
|
-
return :tLT
|
1125
|
+
return result(:arg_state, :tLSHFT, "\<\<")
|
1126
|
+
elsif scan(/\</) then
|
1127
|
+
return result(:arg_state, :tLT, "<")
|
1075
1128
|
end
|
1076
|
-
elsif
|
1077
|
-
if
|
1078
|
-
|
1079
|
-
|
1080
|
-
return :
|
1081
|
-
elsif
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
return :tOP_ASGN
|
1086
|
-
elsif src.scan(/\>\>/) then
|
1087
|
-
self.fix_arg_lex_state
|
1088
|
-
self.yacc_value = ">>"
|
1089
|
-
return :tRSHFT
|
1090
|
-
elsif src.scan(/\>/) then
|
1091
|
-
self.fix_arg_lex_state
|
1092
|
-
self.yacc_value = ">"
|
1093
|
-
return :tGT
|
1129
|
+
elsif check(/\>/) then
|
1130
|
+
if scan(/\>\=/) then
|
1131
|
+
return result(:arg_state, :tGEQ, ">=")
|
1132
|
+
elsif scan(/\>\>=/) then
|
1133
|
+
return result(:arg_state, :tOP_ASGN, ">>")
|
1134
|
+
elsif scan(/\>\>/) then
|
1135
|
+
return result(:arg_state, :tRSHFT, ">>")
|
1136
|
+
elsif scan(/\>/) then
|
1137
|
+
return result(:arg_state, :tGT, ">")
|
1094
1138
|
end
|
1095
|
-
elsif
|
1096
|
-
self.yacc_value = "`"
|
1139
|
+
elsif scan(/\`/) then
|
1097
1140
|
case lex_state
|
1098
1141
|
when :expr_fname then
|
1099
|
-
|
1100
|
-
return :tBACK_REF2
|
1142
|
+
return result(:expr_end, :tBACK_REF2, "`")
|
1101
1143
|
when :expr_dot then
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
return :tBACK_REF2
|
1144
|
+
state = command_state ? :expr_cmdarg : :expr_arg
|
1145
|
+
return result(state, :tBACK_REF2, "`")
|
1146
|
+
else
|
1147
|
+
string STR_XQUOTE, '`'
|
1148
|
+
return result(nil, :tXSTRING_BEG, "`")
|
1108
1149
|
end
|
1109
|
-
|
1110
|
-
return :tXSTRING_BEG
|
1111
|
-
elsif src.scan(/\?/) then
|
1112
|
-
|
1150
|
+
elsif scan(/\?/) then
|
1113
1151
|
if is_end? then
|
1114
|
-
|
1115
|
-
|
1116
|
-
self.yacc_value = "?"
|
1117
|
-
return :tEH
|
1152
|
+
state = ruby18 ? :expr_beg : :expr_value # HACK?
|
1153
|
+
return result(state, :tEH, "?")
|
1118
1154
|
end
|
1119
1155
|
|
1120
|
-
if
|
1156
|
+
if end_of_stream? then
|
1121
1157
|
rb_compile_error "incomplete character syntax"
|
1122
1158
|
end
|
1123
1159
|
|
1124
|
-
if
|
1160
|
+
if check(/\s|\v/) then
|
1125
1161
|
unless is_arg? then
|
1126
1162
|
c2 = { " " => 's',
|
1127
1163
|
"\n" => 'n',
|
1128
1164
|
"\t" => 't',
|
1129
1165
|
"\v" => 'v',
|
1130
1166
|
"\r" => 'r',
|
1131
|
-
"\f" => 'f' }[
|
1167
|
+
"\f" => 'f' }[matched]
|
1132
1168
|
|
1133
1169
|
if c2 then
|
1134
1170
|
warning("invalid character syntax; use ?\\" + c2)
|
@@ -1136,47 +1172,32 @@ class RubyLexer
|
|
1136
1172
|
end
|
1137
1173
|
|
1138
1174
|
# ternary
|
1139
|
-
|
1140
|
-
|
1141
|
-
|
1142
|
-
return :tEH
|
1143
|
-
elsif src.check(/\w(?=\w)/) then # ternary, also
|
1144
|
-
self.lex_state = :expr_beg
|
1145
|
-
self.tern.push true
|
1146
|
-
self.yacc_value = "?"
|
1147
|
-
return :tEH
|
1175
|
+
state = ruby18 ? :expr_beg : :expr_value # HACK?
|
1176
|
+
return result(state, :tEH, "?")
|
1177
|
+
elsif check(/\w(?=\w)/) then # ternary, also
|
1178
|
+
return result(:expr_beg, :tEH, "?")
|
1148
1179
|
end
|
1149
1180
|
|
1150
|
-
c = if
|
1181
|
+
c = if scan(/\\/) then
|
1151
1182
|
self.read_escape
|
1152
1183
|
else
|
1153
|
-
|
1184
|
+
ss.getch
|
1154
1185
|
end
|
1155
|
-
self.lex_state = :expr_end
|
1156
1186
|
|
1157
1187
|
if version == 18 then
|
1158
|
-
|
1159
|
-
return :tINTEGER
|
1188
|
+
return result(:expr_end, :tINTEGER, c[0].ord & 0xff)
|
1160
1189
|
else
|
1161
|
-
|
1162
|
-
return :tSTRING
|
1190
|
+
return result(:expr_end, :tSTRING, c)
|
1163
1191
|
end
|
1164
|
-
elsif
|
1165
|
-
if
|
1166
|
-
|
1167
|
-
|
1168
|
-
return :
|
1169
|
-
elsif
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
elsif src.scan(/\&\=/) then
|
1174
|
-
self.yacc_value = "&"
|
1175
|
-
self.lex_state = :expr_beg
|
1176
|
-
return :tOP_ASGN
|
1177
|
-
elsif src.scan(/&/) then
|
1178
|
-
result = if is_arg? && space_seen &&
|
1179
|
-
!src.check(/\s/) then
|
1192
|
+
elsif check(/\&/) then
|
1193
|
+
if scan(/\&\&\=/) then
|
1194
|
+
return result(:expr_beg, :tOP_ASGN, "&&")
|
1195
|
+
elsif scan(/\&\&/) then
|
1196
|
+
return result(:expr_beg, :tANDOP, "&&")
|
1197
|
+
elsif scan(/\&\=/) then
|
1198
|
+
return result(:expr_beg, :tOP_ASGN, "&")
|
1199
|
+
elsif scan(/&/) then
|
1200
|
+
token = if is_arg? && space_seen && !check(/\s/) then
|
1180
1201
|
warning("`&' interpreted as argument prefix")
|
1181
1202
|
:tAMPER
|
1182
1203
|
elsif in_lex_state? :expr_beg, :expr_mid then
|
@@ -1185,170 +1206,121 @@ class RubyLexer
|
|
1185
1206
|
:tAMPER2
|
1186
1207
|
end
|
1187
1208
|
|
1188
|
-
|
1189
|
-
self.yacc_value = "&"
|
1190
|
-
return result
|
1209
|
+
return result(:arg_state, token, "&")
|
1191
1210
|
end
|
1192
|
-
elsif
|
1211
|
+
elsif scan(/\//) then
|
1193
1212
|
if is_beg? then
|
1194
|
-
|
1195
|
-
|
1196
|
-
return :tREGEXP_BEG
|
1213
|
+
string STR_REGEXP, '/'
|
1214
|
+
return result(nil, :tREGEXP_BEG, "/")
|
1197
1215
|
end
|
1198
1216
|
|
1199
|
-
if
|
1200
|
-
|
1201
|
-
self.lex_state = :expr_beg
|
1202
|
-
return :tOP_ASGN
|
1217
|
+
if scan(/\=/) then
|
1218
|
+
return result(:expr_beg, :tOP_ASGN, "/")
|
1203
1219
|
end
|
1204
1220
|
|
1205
1221
|
if is_arg? && space_seen then
|
1206
|
-
unless
|
1222
|
+
unless scan(/\s/) then
|
1207
1223
|
arg_ambiguous
|
1208
|
-
|
1209
|
-
|
1210
|
-
return :tREGEXP_BEG
|
1224
|
+
string STR_REGEXP, '/'
|
1225
|
+
return result(nil, :tREGEXP_BEG, "/")
|
1211
1226
|
end
|
1212
1227
|
end
|
1213
1228
|
|
1214
|
-
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1218
|
-
|
1219
|
-
|
1220
|
-
self.yacc_value = "^"
|
1221
|
-
return :tOP_ASGN
|
1222
|
-
elsif src.scan(/\^/) then
|
1223
|
-
self.fix_arg_lex_state
|
1224
|
-
self.yacc_value = "^"
|
1225
|
-
return :tCARET
|
1226
|
-
elsif src.scan(/\;/) then
|
1229
|
+
return result(:arg_state, :tDIVIDE, "/")
|
1230
|
+
elsif scan(/\^=/) then
|
1231
|
+
return result(:expr_beg, :tOP_ASGN, "^")
|
1232
|
+
elsif scan(/\^/) then
|
1233
|
+
return result(:arg_state, :tCARET, "^")
|
1234
|
+
elsif scan(/\;/) then
|
1227
1235
|
self.command_start = true
|
1228
|
-
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
end
|
1235
|
-
|
1236
|
-
self.fix_arg_lex_state
|
1237
|
-
self.yacc_value = "~"
|
1238
|
-
|
1239
|
-
return :tTILDE
|
1240
|
-
elsif src.scan(/\\/) then
|
1241
|
-
if src.scan(/\r?\n/) then
|
1236
|
+
return result(:expr_beg, :tSEMI, ";")
|
1237
|
+
elsif scan(/\~/) then
|
1238
|
+
scan(/@/) if in_lex_state? :expr_fname, :expr_dot
|
1239
|
+
return result(:arg_state, :tTILDE, "~")
|
1240
|
+
elsif scan(/\\/) then
|
1241
|
+
if scan(/\r?\n/) then
|
1242
1242
|
self.lineno = nil
|
1243
1243
|
self.space_seen = true
|
1244
1244
|
next
|
1245
1245
|
end
|
1246
1246
|
rb_compile_error "bare backslash only allowed before newline"
|
1247
|
-
elsif
|
1248
|
-
if is_beg?
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
if
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
elsif src.check(/\$/) then
|
1265
|
-
if src.scan(/(\$_)(\w+)/) then
|
1266
|
-
self.lex_state = :expr_end
|
1267
|
-
self.token = src.matched
|
1268
|
-
return process_token(command_state)
|
1269
|
-
elsif src.scan(/\$_/) then
|
1270
|
-
self.lex_state = :expr_end
|
1271
|
-
self.token = src.matched
|
1272
|
-
self.yacc_value = src.matched
|
1273
|
-
return :tGVAR
|
1274
|
-
elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
|
1275
|
-
self.lex_state = :expr_end
|
1276
|
-
self.yacc_value = src.matched
|
1277
|
-
return :tGVAR
|
1278
|
-
elsif src.scan(/\$([\&\`\'\+])/) then
|
1279
|
-
self.lex_state = :expr_end
|
1247
|
+
elsif scan(/\%/) then
|
1248
|
+
return parse_quote if is_beg?
|
1249
|
+
|
1250
|
+
return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)
|
1251
|
+
|
1252
|
+
return parse_quote if is_arg? && space_seen && ! check(/\s/)
|
1253
|
+
|
1254
|
+
return result(:arg_state, :tPERCENT, "%")
|
1255
|
+
elsif check(/\$/) then
|
1256
|
+
if scan(/(\$_)(\w+)/) then
|
1257
|
+
self.token = matched
|
1258
|
+
return result(:expr_end, :tGVAR, matched)
|
1259
|
+
elsif scan(/\$_/) then
|
1260
|
+
return result(:expr_end, :tGVAR, matched)
|
1261
|
+
elsif scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
|
1262
|
+
return result(:expr_end, :tGVAR, matched)
|
1263
|
+
elsif scan(/\$([\&\`\'\+])/) then
|
1280
1264
|
# Explicit reference to these vars as symbols...
|
1281
|
-
if
|
1282
|
-
|
1283
|
-
return :tGVAR
|
1265
|
+
if lex_state == :expr_fname then
|
1266
|
+
return result(:expr_end, :tGVAR, matched)
|
1284
1267
|
else
|
1285
|
-
|
1286
|
-
return :tBACK_REF
|
1268
|
+
return result(:expr_end, :tBACK_REF, ss[1].to_sym)
|
1287
1269
|
end
|
1288
|
-
elsif
|
1289
|
-
|
1290
|
-
|
1291
|
-
self.yacc_value = src.matched
|
1292
|
-
return :tGVAR
|
1270
|
+
elsif scan(/\$([1-9]\d*)/) then
|
1271
|
+
if lex_state == :expr_fname then
|
1272
|
+
return result(:expr_end, :tGVAR, matched)
|
1293
1273
|
else
|
1294
|
-
|
1295
|
-
return :tNTH_REF
|
1274
|
+
return result(:expr_end, :tNTH_REF, ss[1].to_i)
|
1296
1275
|
end
|
1297
|
-
elsif
|
1298
|
-
|
1299
|
-
|
1300
|
-
return
|
1301
|
-
elsif
|
1302
|
-
|
1303
|
-
self.yacc_value = "$"
|
1304
|
-
return "$"
|
1305
|
-
elsif src.scan(/\$\w+/)
|
1306
|
-
self.lex_state = :expr_end
|
1307
|
-
self.token = src.matched
|
1308
|
-
return process_token(command_state)
|
1276
|
+
elsif scan(/\$0/) then
|
1277
|
+
return result(:expr_end, :tGVAR, matched)
|
1278
|
+
elsif scan(/\$\W|\$\z/) then # TODO: remove?
|
1279
|
+
return result(:expr_end, "$", "$") # FIX: "$"??
|
1280
|
+
elsif scan(/\$\w+/)
|
1281
|
+
return result(:expr_end, :tGVAR, matched)
|
1309
1282
|
end
|
1310
|
-
elsif
|
1311
|
-
if
|
1283
|
+
elsif check(/\_/) then
|
1284
|
+
if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
|
1312
1285
|
self.lineno = nil
|
1313
1286
|
return RubyLexer::EOF
|
1314
|
-
elsif
|
1315
|
-
self.token =
|
1316
|
-
return process_token
|
1287
|
+
elsif scan(/\_\w*/) then
|
1288
|
+
self.token = matched
|
1289
|
+
return process_token command_state, last_state
|
1317
1290
|
end
|
1318
1291
|
end
|
1319
1292
|
end # END OF CASE
|
1320
1293
|
|
1321
|
-
if
|
1294
|
+
if scan(/\004|\032|\000/) || end_of_stream? then # ^D, ^Z, EOF
|
1322
1295
|
return RubyLexer::EOF
|
1323
1296
|
else # alpha check
|
1324
|
-
rb_compile_error "Invalid char #{
|
1325
|
-
|
1297
|
+
rb_compile_error "Invalid char #{ss.rest[0].chr} in expression" unless
|
1298
|
+
check IDENT
|
1326
1299
|
end
|
1327
1300
|
|
1328
|
-
self.token =
|
1301
|
+
self.token = matched if self.scan IDENT
|
1329
1302
|
|
1330
|
-
return process_token
|
1303
|
+
return process_token command_state, last_state
|
1331
1304
|
end
|
1332
1305
|
end
|
1333
1306
|
|
1334
1307
|
def yylex_paren18
|
1335
1308
|
self.command_start = true
|
1336
|
-
|
1309
|
+
token = :tLPAREN2
|
1337
1310
|
|
1338
1311
|
if in_lex_state? :expr_beg, :expr_mid then
|
1339
|
-
|
1312
|
+
token = :tLPAREN
|
1340
1313
|
elsif space_seen then
|
1341
1314
|
if in_lex_state? :expr_cmdarg then
|
1342
|
-
|
1315
|
+
token = :tLPAREN_ARG
|
1343
1316
|
elsif in_lex_state? :expr_arg then
|
1344
|
-
self.tern.push false
|
1345
1317
|
warning "don't put space before argument parentheses"
|
1346
1318
|
end
|
1347
1319
|
else
|
1348
|
-
|
1320
|
+
# not a ternary -- do nothing?
|
1349
1321
|
end
|
1350
1322
|
|
1351
|
-
|
1323
|
+
token
|
1352
1324
|
end
|
1353
1325
|
|
1354
1326
|
def yylex_paren19
|
@@ -1361,146 +1333,7 @@ class RubyLexer
|
|
1361
1333
|
end
|
1362
1334
|
end
|
1363
1335
|
|
1364
|
-
def
|
1365
|
-
in_lex_state? :expr_arg, :expr_cmdarg
|
1366
|
-
end
|
1367
|
-
|
1368
|
-
def is_end?
|
1369
|
-
in_lex_state? :expr_end, :expr_endarg, :expr_endfn
|
1370
|
-
end
|
1371
|
-
|
1372
|
-
def is_beg?
|
1373
|
-
in_lex_state? :expr_beg, :expr_value, :expr_mid, :expr_class
|
1374
|
-
end
|
1375
|
-
|
1376
|
-
# TODO #define IS_AFTER_OPERATOR() IS_lex_state(EXPR_FNAME | EXPR_DOT)
|
1377
|
-
|
1378
|
-
def is_space_arg? c = "x"
|
1379
|
-
is_arg? and space_seen and c !~ /\s/
|
1380
|
-
end
|
1381
|
-
|
1382
|
-
def is_label_possible? command_state
|
1383
|
-
(in_lex_state?(:expr_beg) && !command_state) || is_arg?
|
1384
|
-
end
|
1385
|
-
|
1386
|
-
def process_token(command_state)
|
1387
|
-
token << src.matched if token =~ IDENT_RE && src.scan(/[\!\?](?!=)/)
|
1388
|
-
|
1389
|
-
result = nil
|
1390
|
-
last_state = lex_state
|
1391
|
-
|
1392
|
-
case token
|
1393
|
-
when /^\$/ then
|
1394
|
-
self.lex_state, result = :expr_end, :tGVAR
|
1395
|
-
when /^@@/ then
|
1396
|
-
self.lex_state, result = :expr_end, :tCVAR
|
1397
|
-
when /^@/ then
|
1398
|
-
self.lex_state, result = :expr_end, :tIVAR
|
1399
|
-
else
|
1400
|
-
if token =~ /[!?]$/ then
|
1401
|
-
result = :tFID
|
1402
|
-
else
|
1403
|
-
if in_lex_state? :expr_fname then
|
1404
|
-
# ident=, not =~ => == or followed by =>
|
1405
|
-
# TODO test lexing of a=>b vs a==>b
|
1406
|
-
if src.scan(/=(?:(?![~>=])|(?==>))/) then
|
1407
|
-
result = :tIDENTIFIER
|
1408
|
-
token << src.matched
|
1409
|
-
end
|
1410
|
-
end
|
1411
|
-
|
1412
|
-
result ||= if token =~ /^[A-Z]/ then
|
1413
|
-
:tCONSTANT
|
1414
|
-
else
|
1415
|
-
:tIDENTIFIER
|
1416
|
-
end
|
1417
|
-
end
|
1418
|
-
|
1419
|
-
unless ruby18
|
1420
|
-
if is_label_possible? command_state then
|
1421
|
-
colon = src.scan(/:/)
|
1422
|
-
|
1423
|
-
if colon && src.peek(1) != ":" then
|
1424
|
-
self.lex_state = :expr_beg
|
1425
|
-
self.yacc_value = [token, src.lineno]
|
1426
|
-
return :tLABEL
|
1427
|
-
end
|
1428
|
-
|
1429
|
-
src.unscan if colon
|
1430
|
-
end
|
1431
|
-
end
|
1432
|
-
|
1433
|
-
unless in_lex_state? :expr_dot then
|
1434
|
-
# See if it is a reserved word.
|
1435
|
-
keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
|
1436
|
-
RubyParserStuff::Keyword.keyword18 token
|
1437
|
-
else
|
1438
|
-
RubyParserStuff::Keyword.keyword19 token
|
1439
|
-
end
|
1440
|
-
|
1441
|
-
if keyword then
|
1442
|
-
state = lex_state
|
1443
|
-
self.lex_state = keyword.state
|
1444
|
-
self.yacc_value = [token, src.lineno]
|
1445
|
-
|
1446
|
-
if state == :expr_fname then
|
1447
|
-
self.yacc_value = keyword.name
|
1448
|
-
return keyword.id0
|
1449
|
-
end
|
1450
|
-
|
1451
|
-
self.command_start = true if lex_state == :expr_beg
|
1452
|
-
|
1453
|
-
if keyword.id0 == :kDO then
|
1454
|
-
if lpar_beg && lpar_beg == paren_nest then
|
1455
|
-
self.lpar_beg = nil
|
1456
|
-
self.paren_nest -= 1
|
1457
|
-
|
1458
|
-
return :kDO_LAMBDA
|
1459
|
-
end
|
1460
|
-
|
1461
|
-
return :kDO_COND if cond.is_in_state
|
1462
|
-
return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
|
1463
|
-
return :kDO_BLOCK if [:expr_beg, :expr_endarg].include? state
|
1464
|
-
return :kDO
|
1465
|
-
end
|
1466
|
-
|
1467
|
-
return keyword.id0 if [:expr_beg, :expr_value].include? state
|
1468
|
-
|
1469
|
-
self.lex_state = :expr_beg if keyword.id0 != keyword.id1
|
1470
|
-
|
1471
|
-
return keyword.id1
|
1472
|
-
end
|
1473
|
-
end
|
1474
|
-
|
1475
|
-
# TODO:
|
1476
|
-
# if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
|
1477
|
-
|
1478
|
-
self.lex_state =
|
1479
|
-
if is_beg? || is_arg? || in_lex_state?(:expr_dot) then
|
1480
|
-
if command_state then
|
1481
|
-
:expr_cmdarg
|
1482
|
-
else
|
1483
|
-
:expr_arg
|
1484
|
-
end
|
1485
|
-
elsif !ruby18 && in_lex_state?(:expr_fname) then
|
1486
|
-
:expr_endfn
|
1487
|
-
else
|
1488
|
-
:expr_end
|
1489
|
-
end
|
1490
|
-
|
1491
|
-
end
|
1492
|
-
|
1493
|
-
self.yacc_value = token
|
1494
|
-
|
1495
|
-
if (![:expr_dot, :expr_fname].include?(last_state) &&
|
1496
|
-
self.parser.env[token.to_sym] == :lvar) then
|
1497
|
-
self.lex_state = :expr_end
|
1498
|
-
end
|
1499
|
-
|
1500
|
-
return result
|
1501
|
-
end
|
1502
|
-
|
1503
|
-
def yylex_string # 23 lines
|
1336
|
+
def yylex_string # TODO: rewrite / remove
|
1504
1337
|
token = if lex_strterm[0] == :heredoc then
|
1505
1338
|
self.heredoc lex_strterm
|
1506
1339
|
else
|