ruby_parser 3.3.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +2 -2
- data/.autotest +21 -14
- data/History.txt +50 -0
- data/Manifest.txt +3 -1
- data/Rakefile +20 -14
- data/bin/ruby_parse_extract_error +8 -2
- data/lib/.document +1 -0
- data/lib/ruby18_parser.rb +12 -7
- data/lib/ruby18_parser.y +12 -7
- data/lib/ruby19_parser.rb +5 -3
- data/lib/ruby19_parser.y +5 -3
- data/lib/ruby20_parser.rb +17 -5
- data/lib/ruby20_parser.y +17 -5
- data/lib/ruby_lexer.rb +502 -664
- data/lib/ruby_lexer.rex +189 -0
- data/lib/ruby_lexer.rex.rb +263 -0
- data/lib/ruby_parser_extras.rb +45 -58
- data/test/test_ruby_lexer.rb +22 -13
- data/test/test_ruby_parser.rb +85 -25
- metadata +21 -5
- metadata.gz.sig +0 -0
- data/lib/gauntlet_rubyparser.rb +0 -117
data/lib/ruby20_parser.rb
CHANGED
@@ -4913,13 +4913,15 @@ def _reduce_311(val, _values, result)
|
|
4913
4913
|
end
|
4914
4914
|
|
4915
4915
|
def _reduce_312(val, _values, result)
|
4916
|
-
|
4916
|
+
(_, line), expr, _, body, _ = val
|
4917
|
+
result = new_case expr, body, line
|
4917
4918
|
|
4918
4919
|
result
|
4919
4920
|
end
|
4920
4921
|
|
4921
4922
|
def _reduce_313(val, _values, result)
|
4922
|
-
|
4923
|
+
(_, line), _, body, _ = val
|
4924
|
+
result = new_case nil, body, line
|
4923
4925
|
|
4924
4926
|
result
|
4925
4927
|
end
|
@@ -5514,13 +5516,23 @@ def _reduce_414(val, _values, result)
|
|
5514
5516
|
end
|
5515
5517
|
|
5516
5518
|
def _reduce_415(val, _values, result)
|
5517
|
-
|
5519
|
+
iter1, _, name, args, iter2 = val
|
5520
|
+
|
5521
|
+
call = new_call iter1, name.to_sym, args
|
5522
|
+
iter2.insert 1, call
|
5523
|
+
|
5524
|
+
result = iter2
|
5518
5525
|
|
5519
5526
|
result
|
5520
5527
|
end
|
5521
5528
|
|
5522
5529
|
def _reduce_416(val, _values, result)
|
5523
|
-
|
5530
|
+
iter1, _, name, args, iter2 = val
|
5531
|
+
|
5532
|
+
call = new_call iter1, name.to_sym, args
|
5533
|
+
iter2.insert 1, call
|
5534
|
+
|
5535
|
+
result = iter2
|
5524
5536
|
|
5525
5537
|
result
|
5526
5538
|
end
|
@@ -6088,7 +6100,7 @@ def _reduce_510(val, _values, result)
|
|
6088
6100
|
end
|
6089
6101
|
|
6090
6102
|
def _reduce_511(val, _values, result)
|
6091
|
-
result = s(:lit, lexer.
|
6103
|
+
result = s(:lit, lexer.lineno)
|
6092
6104
|
result
|
6093
6105
|
end
|
6094
6106
|
|
data/lib/ruby20_parser.y
CHANGED
@@ -1058,11 +1058,13 @@ rule
|
|
1058
1058
|
}
|
1059
1059
|
| kCASE expr_value opt_terms case_body kEND
|
1060
1060
|
{
|
1061
|
-
|
1061
|
+
(_, line), expr, _, body, _ = val
|
1062
|
+
result = new_case expr, body, line
|
1062
1063
|
}
|
1063
1064
|
| kCASE opt_terms case_body kEND
|
1064
1065
|
{
|
1065
|
-
|
1066
|
+
(_, line), _, body, _ = val
|
1067
|
+
result = new_case nil, body, line
|
1066
1068
|
}
|
1067
1069
|
| kFOR for_var kIN
|
1068
1070
|
{
|
@@ -1501,11 +1503,21 @@ opt_block_args_tail: tCOMMA block_args_tail
|
|
1501
1503
|
}
|
1502
1504
|
| block_call dot_or_colon operation2 opt_paren_args brace_block
|
1503
1505
|
{
|
1504
|
-
|
1506
|
+
iter1, _, name, args, iter2 = val
|
1507
|
+
|
1508
|
+
call = new_call iter1, name.to_sym, args
|
1509
|
+
iter2.insert 1, call
|
1510
|
+
|
1511
|
+
result = iter2
|
1505
1512
|
}
|
1506
1513
|
| block_call dot_or_colon operation2 command_args do_block
|
1507
1514
|
{
|
1508
|
-
|
1515
|
+
iter1, _, name, args, iter2 = val
|
1516
|
+
|
1517
|
+
call = new_call iter1, name.to_sym, args
|
1518
|
+
iter2.insert 1, call
|
1519
|
+
|
1520
|
+
result = iter2
|
1509
1521
|
}
|
1510
1522
|
|
1511
1523
|
method_call: fcall
|
@@ -1916,7 +1928,7 @@ keyword_variable: kNIL { result = s(:nil) }
|
|
1916
1928
|
| kTRUE { result = s(:true) }
|
1917
1929
|
| kFALSE { result = s(:false) }
|
1918
1930
|
| k__FILE__ { result = s(:str, self.file) }
|
1919
|
-
| k__LINE__ { result = s(:lit, lexer.
|
1931
|
+
| k__LINE__ { result = s(:lit, lexer.lineno) }
|
1920
1932
|
| k__ENCODING__
|
1921
1933
|
{
|
1922
1934
|
result =
|
data/lib/ruby_lexer.rb
CHANGED
@@ -11,11 +11,6 @@ class RubyLexer
|
|
11
11
|
/[\w\x80-\xFF]/n
|
12
12
|
end
|
13
13
|
|
14
|
-
IDENT = /^#{IDENT_CHAR}+/o
|
15
|
-
ESC = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]+|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/u
|
16
|
-
SIMPLE_STRING = /(#{ESC}|#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*/o
|
17
|
-
SIMPLE_SSTRING = /(\\.|[^\'])*/
|
18
|
-
|
19
14
|
EOF = :eof_haha!
|
20
15
|
|
21
16
|
# ruby constants for strings (should this be moved somewhere else?)
|
@@ -75,6 +70,8 @@ class RubyLexer
|
|
75
70
|
attr_accessor :brace_nest
|
76
71
|
attr_accessor :cmdarg
|
77
72
|
attr_accessor :command_start
|
73
|
+
attr_accessor :command_state
|
74
|
+
attr_accessor :last_state
|
78
75
|
attr_accessor :cond
|
79
76
|
|
80
77
|
##
|
@@ -91,11 +88,7 @@ class RubyLexer
|
|
91
88
|
attr_accessor :string_buffer
|
92
89
|
attr_accessor :string_nest
|
93
90
|
|
94
|
-
#
|
95
|
-
attr_reader :src
|
96
|
-
alias :ss :src
|
97
|
-
|
98
|
-
# Last token read via yylex.
|
91
|
+
# Last token read via next_token.
|
99
92
|
attr_accessor :token
|
100
93
|
|
101
94
|
##
|
@@ -104,11 +97,6 @@ class RubyLexer
|
|
104
97
|
|
105
98
|
attr_accessor :version
|
106
99
|
|
107
|
-
# Value of last token which had a value associated with it.
|
108
|
-
attr_accessor :yacc_value
|
109
|
-
|
110
|
-
attr_writer :lineno # reader is lazy initalizer
|
111
|
-
|
112
100
|
attr_writer :comments
|
113
101
|
|
114
102
|
def initialize v = 18
|
@@ -117,20 +105,6 @@ class RubyLexer
|
|
117
105
|
reset
|
118
106
|
end
|
119
107
|
|
120
|
-
##
|
121
|
-
# How the parser advances to the next token.
|
122
|
-
#
|
123
|
-
# @return true if not at end of file (EOF).
|
124
|
-
|
125
|
-
def advance
|
126
|
-
r = yylex
|
127
|
-
self.token = r
|
128
|
-
|
129
|
-
raise "yylex returned nil, near #{ss.rest[0,10].inspect}" unless r
|
130
|
-
|
131
|
-
return RubyLexer::EOF != r
|
132
|
-
end
|
133
|
-
|
134
108
|
def arg_ambiguous
|
135
109
|
self.warning("Ambiguous first argument. make sure.")
|
136
110
|
end
|
@@ -142,6 +116,7 @@ class RubyLexer
|
|
142
116
|
def beginning_of_line?
|
143
117
|
ss.bol?
|
144
118
|
end
|
119
|
+
alias :bol? :beginning_of_line? # to make .rex file more readable
|
145
120
|
|
146
121
|
def check re
|
147
122
|
ss.check re
|
@@ -174,9 +149,9 @@ class RubyLexer
|
|
174
149
|
rb_compile_error err_msg if end_of_stream?
|
175
150
|
|
176
151
|
if beginning_of_line? && scan(eos_re) then
|
152
|
+
self.lineno += 1
|
177
153
|
ss.unread_many last_line # TODO: figure out how to remove this
|
178
|
-
|
179
|
-
return :tSTRING_END
|
154
|
+
return :tSTRING_END, eos
|
180
155
|
end
|
181
156
|
|
182
157
|
self.string_buffer = []
|
@@ -185,11 +160,9 @@ class RubyLexer
|
|
185
160
|
case
|
186
161
|
when scan(/#[$@]/) then
|
187
162
|
ss.pos -= 1 # FIX omg stupid
|
188
|
-
|
189
|
-
return :tSTRING_DVAR
|
163
|
+
return :tSTRING_DVAR, matched
|
190
164
|
when scan(/#[{]/) then
|
191
|
-
|
192
|
-
return :tSTRING_DBEG
|
165
|
+
return :tSTRING_DBEG, matched
|
193
166
|
when scan(/#/) then
|
194
167
|
string_buffer << '#'
|
195
168
|
end
|
@@ -201,9 +174,9 @@ class RubyLexer
|
|
201
174
|
c == RubyLexer::EOF
|
202
175
|
|
203
176
|
if c != "\n" then
|
204
|
-
|
205
|
-
return :tSTRING_CONTENT
|
177
|
+
return :tSTRING_CONTENT, string_buffer.join.delete("\r")
|
206
178
|
else
|
179
|
+
self.lineno += 1
|
207
180
|
string_buffer << scan(/\n/)
|
208
181
|
end
|
209
182
|
|
@@ -218,8 +191,7 @@ class RubyLexer
|
|
218
191
|
|
219
192
|
self.lex_strterm = [:heredoc, eos, func, last_line]
|
220
193
|
|
221
|
-
|
222
|
-
return :tSTRING_CONTENT
|
194
|
+
return :tSTRING_CONTENT, string_buffer.join.delete("\r")
|
223
195
|
end
|
224
196
|
|
225
197
|
def heredoc_identifier # TODO: remove / rewrite
|
@@ -255,7 +227,6 @@ class RubyLexer
|
|
255
227
|
if scan(/.*\n/) then
|
256
228
|
# TODO: think about storing off the char range instead
|
257
229
|
line = matched
|
258
|
-
ss.extra_lines_added += 1 # FIX: ugh
|
259
230
|
else
|
260
231
|
line = nil
|
261
232
|
end
|
@@ -263,14 +234,16 @@ class RubyLexer
|
|
263
234
|
self.lex_strterm = [:heredoc, string_buffer.join, func, line]
|
264
235
|
|
265
236
|
if term == '`' then
|
266
|
-
|
267
|
-
return :tXSTRING_BEG
|
237
|
+
result nil, :tXSTRING_BEG, "`"
|
268
238
|
else
|
269
|
-
|
270
|
-
return :tSTRING_BEG
|
239
|
+
result nil, :tSTRING_BEG, "\""
|
271
240
|
end
|
272
241
|
end
|
273
242
|
|
243
|
+
def in_fname?
|
244
|
+
in_lex_state? :expr_fname
|
245
|
+
end
|
246
|
+
|
274
247
|
def in_arg_state? # TODO: rename is_after_operator?
|
275
248
|
in_lex_state? :expr_fname, :expr_dot
|
276
249
|
end
|
@@ -281,9 +254,7 @@ class RubyLexer
|
|
281
254
|
|
282
255
|
def int_with_base base
|
283
256
|
rb_compile_error "Invalid numeric format" if matched =~ /__/
|
284
|
-
|
285
|
-
self.yacc_value = matched.to_i(base)
|
286
|
-
return :tINTEGER
|
257
|
+
return result(:expr_end, :tINTEGER, matched.to_i(base))
|
287
258
|
end
|
288
259
|
|
289
260
|
def is_arg?
|
@@ -298,7 +269,7 @@ class RubyLexer
|
|
298
269
|
in_lex_state? :expr_end, :expr_endarg, :expr_endfn
|
299
270
|
end
|
300
271
|
|
301
|
-
def is_label_possible?
|
272
|
+
def is_label_possible?
|
302
273
|
(in_lex_state?(:expr_beg, :expr_endfn) && !command_state) || is_arg?
|
303
274
|
end
|
304
275
|
|
@@ -306,170 +277,396 @@ class RubyLexer
|
|
306
277
|
is_arg? and space_seen and c !~ /\s/
|
307
278
|
end
|
308
279
|
|
309
|
-
def lineno
|
310
|
-
@lineno ||= ss.lineno
|
311
|
-
end
|
312
|
-
|
313
280
|
def matched
|
314
281
|
ss.matched
|
315
282
|
end
|
316
283
|
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
# @param c The first character of the number.
|
321
|
-
# @return A int constant wich represents a token.
|
284
|
+
def not_end?
|
285
|
+
not is_end?
|
286
|
+
end
|
322
287
|
|
323
|
-
def
|
324
|
-
|
288
|
+
def process_amper text
|
289
|
+
token = if is_arg? && space_seen && !check(/\s/) then
|
290
|
+
warning("`&' interpreted as argument prefix")
|
291
|
+
:tAMPER
|
292
|
+
elsif in_lex_state? :expr_beg, :expr_mid then
|
293
|
+
:tAMPER
|
294
|
+
else
|
295
|
+
:tAMPER2
|
296
|
+
end
|
297
|
+
|
298
|
+
return result(:arg_state, token, "&")
|
299
|
+
end
|
300
|
+
|
301
|
+
def process_backref text
|
302
|
+
token = ss[1].to_sym
|
303
|
+
# TODO: can't do lineno hack w/ symbol
|
304
|
+
result :expr_end, :tBACK_REF, token
|
305
|
+
end
|
306
|
+
|
307
|
+
def process_backtick text
|
308
|
+
case lex_state
|
309
|
+
when :expr_fname then
|
310
|
+
result :expr_end, :tBACK_REF2, "`"
|
311
|
+
when :expr_dot then
|
312
|
+
result((command_state ? :expr_cmdarg : :expr_arg), :tBACK_REF2, "`")
|
313
|
+
else
|
314
|
+
string STR_XQUOTE
|
315
|
+
result nil, :tXSTRING_BEG, "`"
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
def process_bang text
|
320
|
+
if in_arg_state? then
|
321
|
+
return result(:expr_arg, :tUBANG, "!@") if scan(/@/)
|
322
|
+
end
|
323
|
+
|
324
|
+
text = scan(/[=~]/) ? "!#{matched}" : "!"
|
325
|
+
|
326
|
+
return result(arg_state, TOKENS[text], text)
|
327
|
+
end
|
328
|
+
|
329
|
+
def process_begin text
|
330
|
+
@comments << matched
|
331
|
+
|
332
|
+
unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
|
333
|
+
@comments.clear
|
334
|
+
rb_compile_error("embedded document meets end of file")
|
335
|
+
end
|
336
|
+
|
337
|
+
@comments << matched
|
338
|
+
|
339
|
+
nil # TODO
|
340
|
+
end
|
341
|
+
|
342
|
+
def process_bracing text
|
343
|
+
cond.lexpop
|
344
|
+
cmdarg.lexpop
|
345
|
+
|
346
|
+
case matched
|
347
|
+
when "}" then
|
348
|
+
self.brace_nest -= 1
|
349
|
+
self.lex_state = :expr_endarg
|
350
|
+
return :tRCURLY, matched
|
351
|
+
when "]" then
|
352
|
+
self.paren_nest -= 1
|
353
|
+
self.lex_state = :expr_endarg
|
354
|
+
return :tRBRACK, matched
|
355
|
+
when ")" then
|
356
|
+
self.paren_nest -= 1
|
357
|
+
self.lex_state = :expr_endfn
|
358
|
+
return :tRPAREN, matched
|
359
|
+
else
|
360
|
+
raise "Unknown bracing: #{matched.inspect}"
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def process_colon1 text
|
365
|
+
# ?: / then / when
|
366
|
+
if is_end? || check(/\s/) then
|
367
|
+
return result :expr_beg, :tCOLON, text
|
368
|
+
end
|
325
369
|
|
326
370
|
case
|
327
|
-
when scan(/
|
328
|
-
|
329
|
-
when scan(/
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
when scan(/[+-]?[\d_]+_(e|\.)/) then
|
340
|
-
rb_compile_error "Trailing '_' in number."
|
341
|
-
when scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
|
342
|
-
number = matched
|
343
|
-
if number =~ /__/ then
|
344
|
-
rb_compile_error "Invalid numeric format"
|
345
|
-
end
|
346
|
-
self.yacc_value = number.to_f
|
347
|
-
:tFLOAT
|
348
|
-
when scan(/[+-]?[0-9_]+(?![e])/) then
|
349
|
-
int_with_base(10)
|
371
|
+
when scan(/\'/) then
|
372
|
+
string STR_SSYM
|
373
|
+
when scan(/\"/) then
|
374
|
+
string STR_DSYM
|
375
|
+
end
|
376
|
+
|
377
|
+
result :expr_fname, :tSYMBEG, text
|
378
|
+
end
|
379
|
+
|
380
|
+
def process_colon2 text
|
381
|
+
if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
|
382
|
+
result :expr_beg, :tCOLON3, text
|
350
383
|
else
|
351
|
-
|
384
|
+
result :expr_dot, :tCOLON2, text
|
352
385
|
end
|
353
386
|
end
|
354
387
|
|
355
|
-
def
|
356
|
-
|
388
|
+
def process_curly_brace text
|
389
|
+
self.brace_nest += 1
|
390
|
+
if lpar_beg && lpar_beg == paren_nest then
|
391
|
+
self.lpar_beg = nil
|
392
|
+
self.paren_nest -= 1
|
357
393
|
|
358
|
-
|
359
|
-
rb_compile_error "unknown type of %string" if ss.matched_size == 2
|
360
|
-
c, beg, short_hand = matched, ss.getch, false
|
361
|
-
else # Short-hand (e.g. %{, %., %!, etc)
|
362
|
-
c, beg, short_hand = 'Q', ss.getch, true
|
394
|
+
return expr_result(:tLAMBEG, "{")
|
363
395
|
end
|
364
396
|
|
365
|
-
if
|
366
|
-
|
397
|
+
token = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
|
398
|
+
:tLCURLY # block (primary)
|
399
|
+
elsif in_lex_state?(:expr_endarg) then
|
400
|
+
:tLBRACE_ARG # block (expr)
|
401
|
+
else
|
402
|
+
:tLBRACE # hash
|
403
|
+
end
|
404
|
+
|
405
|
+
self.command_start = true unless token == :tLBRACE
|
406
|
+
|
407
|
+
return expr_result(token, "{")
|
408
|
+
end
|
409
|
+
|
410
|
+
def process_float text
|
411
|
+
rb_compile_error "Invalid numeric format" if text =~ /__/
|
412
|
+
return result(:expr_end, :tFLOAT, text.to_f)
|
413
|
+
end
|
414
|
+
|
415
|
+
def process_gvar text
|
416
|
+
text.lineno = self.lineno
|
417
|
+
result(:expr_end, :tGVAR, text)
|
418
|
+
end
|
419
|
+
|
420
|
+
def process_gvar_oddity text
|
421
|
+
result :expr_end, "$", "$" # TODO: wtf is this?
|
422
|
+
end
|
423
|
+
|
424
|
+
def process_ivar text
|
425
|
+
tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
|
426
|
+
text.lineno = self.lineno
|
427
|
+
return result(:expr_end, tok_id, text)
|
428
|
+
end
|
429
|
+
|
430
|
+
def process_lchevron text
|
431
|
+
if (!in_lex_state?(:expr_dot, :expr_class) &&
|
432
|
+
!is_end? &&
|
433
|
+
(!is_arg? || space_seen)) then
|
434
|
+
tok = self.heredoc_identifier
|
435
|
+
return tok if tok
|
367
436
|
end
|
368
437
|
|
369
|
-
|
370
|
-
|
371
|
-
nnd, beg = beg, "\0" if nnd.nil?
|
438
|
+
return result(:arg_state, :tLSHFT, "\<\<")
|
439
|
+
end
|
372
440
|
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
ch = short_hand ? nnd : c + beg
|
377
|
-
text = "%#{ch}"
|
378
|
-
[:tSTRING_BEG, STR_DQUOTE]
|
379
|
-
when 'q' then
|
380
|
-
[:tSTRING_BEG, STR_SQUOTE]
|
381
|
-
when 'W' then
|
382
|
-
scan(/\s*/)
|
383
|
-
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
384
|
-
when 'w' then
|
385
|
-
scan(/\s*/)
|
386
|
-
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
387
|
-
when 'x' then
|
388
|
-
[:tXSTRING_BEG, STR_XQUOTE]
|
389
|
-
when 'r' then
|
390
|
-
[:tREGEXP_BEG, STR_REGEXP]
|
391
|
-
when 's' then
|
392
|
-
self.lex_state = :expr_fname
|
393
|
-
[:tSYMBEG, STR_SSYM]
|
394
|
-
when 'I' then
|
395
|
-
src.scan(/\s*/)
|
396
|
-
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
397
|
-
when 'i' then
|
398
|
-
src.scan(/\s*/)
|
399
|
-
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
400
|
-
end
|
441
|
+
def process_newline_or_comment text
|
442
|
+
c = matched
|
443
|
+
hit = false
|
401
444
|
|
402
|
-
|
403
|
-
|
445
|
+
if c == '#' then
|
446
|
+
ss.pos -= 1
|
404
447
|
|
405
|
-
|
448
|
+
while scan(/\s*\#.*(\n+|\z)/) do
|
449
|
+
hit = true
|
450
|
+
self.lineno += matched.lines.to_a.size
|
451
|
+
@comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
|
452
|
+
end
|
406
453
|
|
407
|
-
|
454
|
+
return nil if end_of_stream?
|
455
|
+
end
|
456
|
+
|
457
|
+
self.lineno += 1 unless hit
|
458
|
+
|
459
|
+
# Replace a string of newlines with a single one
|
460
|
+
self.lineno += matched.lines.to_a.size if scan(/\n+/)
|
461
|
+
|
462
|
+
return if in_lex_state?(:expr_beg, :expr_value, :expr_class,
|
463
|
+
:expr_fname, :expr_dot)
|
464
|
+
|
465
|
+
if scan(/([\ \t\r\f\v]*)\./) then
|
466
|
+
self.space_seen = true unless ss[1].empty?
|
467
|
+
|
468
|
+
ss.pos -= 1
|
469
|
+
return unless check(/\.\./)
|
470
|
+
end
|
471
|
+
|
472
|
+
self.command_start = true
|
408
473
|
|
409
|
-
|
410
|
-
return token_type
|
474
|
+
return result(:expr_beg, :tNL, nil)
|
411
475
|
end
|
412
476
|
|
413
|
-
def
|
414
|
-
|
477
|
+
def process_nthref text
|
478
|
+
# TODO: can't do lineno hack w/ number
|
479
|
+
result :expr_end, :tNTH_REF, ss[1].to_i
|
480
|
+
end
|
415
481
|
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
482
|
+
def process_paren text
|
483
|
+
token = if ruby18 then
|
484
|
+
process_paren18
|
485
|
+
else
|
486
|
+
process_paren19
|
487
|
+
end
|
420
488
|
|
421
|
-
|
422
|
-
regexp = (func & STR_FUNC_REGEXP) != 0
|
423
|
-
expand = (func & STR_FUNC_EXPAND) != 0
|
489
|
+
self.paren_nest += 1
|
424
490
|
|
425
|
-
|
426
|
-
|
427
|
-
|
491
|
+
return expr_result(token, "(")
|
492
|
+
end
|
493
|
+
|
494
|
+
def process_paren18
|
495
|
+
self.command_start = true
|
496
|
+
token = :tLPAREN2
|
497
|
+
|
498
|
+
if in_lex_state? :expr_beg, :expr_mid then
|
499
|
+
token = :tLPAREN
|
500
|
+
elsif space_seen then
|
501
|
+
if in_lex_state? :expr_cmdarg then
|
502
|
+
token = :tLPAREN_ARG
|
503
|
+
elsif in_lex_state? :expr_arg then
|
504
|
+
warning "don't put space before argument parentheses"
|
505
|
+
end
|
506
|
+
else
|
507
|
+
# not a ternary -- do nothing?
|
428
508
|
end
|
429
509
|
|
430
|
-
|
510
|
+
token
|
511
|
+
end
|
431
512
|
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
513
|
+
def process_paren19
|
514
|
+
if is_beg? then
|
515
|
+
:tLPAREN
|
516
|
+
elsif is_space_arg? then
|
517
|
+
:tLPAREN_ARG
|
518
|
+
else
|
519
|
+
:tLPAREN2 # plain '(' in parse.y
|
520
|
+
end
|
521
|
+
end
|
522
|
+
|
523
|
+
def process_percent text
|
524
|
+
return parse_quote if is_beg?
|
525
|
+
|
526
|
+
return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)
|
527
|
+
|
528
|
+
return parse_quote if is_arg? && space_seen && ! check(/\s/)
|
529
|
+
|
530
|
+
return result(:arg_state, :tPERCENT, "%")
|
531
|
+
end
|
532
|
+
|
533
|
+
def process_plus_minus text
|
534
|
+
sign = matched
|
535
|
+
utype, type = if sign == "+" then
|
536
|
+
[:tUPLUS, :tPLUS]
|
537
|
+
else
|
538
|
+
[:tUMINUS, :tMINUS]
|
539
|
+
end
|
540
|
+
|
541
|
+
if in_arg_state? then
|
542
|
+
if scan(/@/) then
|
543
|
+
return result(:expr_arg, utype, "#{sign}@")
|
440
544
|
else
|
441
|
-
|
442
|
-
self.yacc_value = term
|
443
|
-
return :tSTRING_END
|
545
|
+
return result(:expr_arg, type, sign)
|
444
546
|
end
|
445
547
|
end
|
446
548
|
|
447
|
-
return :
|
549
|
+
return result(:expr_beg, :tOP_ASGN, sign) if scan(/\=/)
|
448
550
|
|
449
|
-
|
551
|
+
if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
|
552
|
+
arg_ambiguous if is_arg?
|
450
553
|
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
return :tSTRING_DVAR
|
455
|
-
when scan(/#[{]/) then
|
456
|
-
return :tSTRING_DBEG
|
457
|
-
when scan(/#/) then
|
458
|
-
string_buffer << '#'
|
554
|
+
if check(/\d/) then
|
555
|
+
return nil if utype == :tUPLUS
|
556
|
+
return result(:expr_beg, :tUMINUS_NUM, sign)
|
459
557
|
end
|
558
|
+
|
559
|
+
return result(:expr_beg, utype, sign)
|
460
560
|
end
|
461
561
|
|
462
|
-
|
463
|
-
|
562
|
+
return result(:expr_beg, type, sign)
|
563
|
+
end
|
564
|
+
|
565
|
+
def process_questionmark text
|
566
|
+
if is_end? then
|
567
|
+
state = ruby18 ? :expr_beg : :expr_value # HACK?
|
568
|
+
return result(state, :tEH, "?")
|
569
|
+
end
|
570
|
+
|
571
|
+
if end_of_stream? then
|
572
|
+
rb_compile_error "incomplete character syntax: parsed #{text.inspect}"
|
464
573
|
end
|
465
574
|
|
466
|
-
|
575
|
+
if check(/\s|\v/) then
|
576
|
+
unless is_arg? then
|
577
|
+
c2 = { " " => 's',
|
578
|
+
"\n" => 'n',
|
579
|
+
"\t" => 't',
|
580
|
+
"\v" => 'v',
|
581
|
+
"\r" => 'r',
|
582
|
+
"\f" => 'f' }[matched]
|
583
|
+
|
584
|
+
if c2 then
|
585
|
+
warning("invalid character syntax; use ?\\" + c2)
|
586
|
+
end
|
587
|
+
end
|
467
588
|
|
468
|
-
|
589
|
+
# ternary
|
590
|
+
state = ruby18 ? :expr_beg : :expr_value # HACK?
|
591
|
+
return result(state, :tEH, "?")
|
592
|
+
elsif check(/\w(?=\w)/) then # ternary, also
|
593
|
+
return result(:expr_beg, :tEH, "?")
|
594
|
+
end
|
595
|
+
|
596
|
+
c = if scan(/\\/) then
|
597
|
+
self.read_escape
|
598
|
+
else
|
599
|
+
ss.getch
|
600
|
+
end
|
601
|
+
|
602
|
+
if version == 18 then
|
603
|
+
return result(:expr_end, :tINTEGER, c[0].ord & 0xff)
|
604
|
+
else
|
605
|
+
return result(:expr_end, :tSTRING, c)
|
606
|
+
end
|
469
607
|
end
|
470
608
|
|
471
|
-
def
|
472
|
-
|
609
|
+
def process_slash text
|
610
|
+
if is_beg? then
|
611
|
+
string STR_REGEXP
|
612
|
+
|
613
|
+
return result(nil, :tREGEXP_BEG, "/")
|
614
|
+
end
|
615
|
+
|
616
|
+
if scan(/\=/) then
|
617
|
+
return result(:expr_beg, :tOP_ASGN, "/")
|
618
|
+
end
|
619
|
+
|
620
|
+
if is_arg? && space_seen then
|
621
|
+
unless scan(/\s/) then
|
622
|
+
arg_ambiguous
|
623
|
+
string STR_REGEXP, "/"
|
624
|
+
return result(nil, :tREGEXP_BEG, "/")
|
625
|
+
end
|
626
|
+
end
|
627
|
+
|
628
|
+
return result(:arg_state, :tDIVIDE, "/")
|
629
|
+
end
|
630
|
+
|
631
|
+
def process_square_bracket text
|
632
|
+
self.paren_nest += 1
|
633
|
+
|
634
|
+
token = nil
|
635
|
+
|
636
|
+
if in_arg_state? then
|
637
|
+
case
|
638
|
+
when scan(/\]\=/) then
|
639
|
+
self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
|
640
|
+
return result(:expr_arg, :tASET, "[]=")
|
641
|
+
when scan(/\]/) then
|
642
|
+
self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
|
643
|
+
return result(:expr_arg, :tAREF, "[]")
|
644
|
+
else
|
645
|
+
rb_compile_error "unexpected '['"
|
646
|
+
end
|
647
|
+
elsif is_beg? then
|
648
|
+
token = :tLBRACK
|
649
|
+
elsif is_arg? && space_seen then
|
650
|
+
token = :tLBRACK
|
651
|
+
else
|
652
|
+
token = :tLBRACK2
|
653
|
+
end
|
654
|
+
|
655
|
+
return expr_result(token, "[")
|
656
|
+
end
|
657
|
+
|
658
|
+
def process_symbol text
|
659
|
+
symbol = match[1].gsub(ESC) { unescape $1 }
|
660
|
+
|
661
|
+
rb_compile_error "symbol cannot contain '\\0'" if
|
662
|
+
ruby18 && symbol =~ /\0/
|
663
|
+
|
664
|
+
return result(:expr_end, :tSYMBOL, symbol)
|
665
|
+
end
|
666
|
+
|
667
|
+
def process_token text
|
668
|
+
# TODO: make this always return [token, lineno]
|
669
|
+
token = self.token = text
|
473
670
|
token << matched if scan(/[\!\?](?!=)/)
|
474
671
|
|
475
672
|
tok_id =
|
@@ -487,8 +684,8 @@ class RubyLexer
|
|
487
684
|
:tIDENTIFIER
|
488
685
|
end
|
489
686
|
|
490
|
-
if !ruby18 and is_label_possible?
|
491
|
-
return result(:expr_beg, :tLABEL, [token,
|
687
|
+
if !ruby18 and is_label_possible? and scan(/:(?!:)/) then
|
688
|
+
return result(:expr_beg, :tLABEL, [token, self.lineno])
|
492
689
|
end
|
493
690
|
|
494
691
|
unless in_lex_state? :expr_dot then
|
@@ -518,12 +715,15 @@ class RubyLexer
|
|
518
715
|
state = :expr_end
|
519
716
|
end
|
520
717
|
|
718
|
+
token.lineno = self.lineno # yes, on a string. I know... I know...
|
719
|
+
|
521
720
|
return result(state, tok_id, token)
|
522
721
|
end
|
523
722
|
|
524
723
|
def process_token_keyword keyword
|
525
724
|
state = keyword.state
|
526
|
-
|
725
|
+
|
726
|
+
value = [token, self.lineno]
|
527
727
|
|
528
728
|
self.command_start = true if state == :expr_beg and lex_state != :expr_fname
|
529
729
|
|
@@ -554,6 +754,16 @@ class RubyLexer
|
|
554
754
|
end
|
555
755
|
end
|
556
756
|
|
757
|
+
def process_underscore text
|
758
|
+
ss.unscan # put back "_"
|
759
|
+
|
760
|
+
if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
|
761
|
+
return [RubyLexer::EOF, RubyLexer::EOF]
|
762
|
+
elsif scan(/\_\w*/) then
|
763
|
+
return process_token matched
|
764
|
+
end
|
765
|
+
end
|
766
|
+
|
557
767
|
def rb_compile_error msg
|
558
768
|
msg += ". near line #{self.lineno}: #{ss.rest[/^.*/].inspect}"
|
559
769
|
raise RubyParser::SyntaxError, msg
|
@@ -607,7 +817,7 @@ class RubyLexer
|
|
607
817
|
c
|
608
818
|
when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
|
609
819
|
matched
|
610
|
-
when scan(/u([0-9a-fA-F]
|
820
|
+
when scan(/u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/) then
|
611
821
|
[ss[1].delete("{}").to_i(16)].pack("U")
|
612
822
|
when scan(/[McCx0-9]/) || end_of_stream? then
|
613
823
|
rb_compile_error("Invalid escape character syntax")
|
@@ -643,19 +853,15 @@ class RubyLexer
|
|
643
853
|
self.space_seen = false
|
644
854
|
self.string_nest = 0
|
645
855
|
self.token = nil
|
646
|
-
self.yacc_value = nil
|
647
856
|
|
648
857
|
self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
|
649
858
|
self.cond = RubyParserStuff::StackState.new(:cond)
|
650
|
-
|
651
|
-
@src = nil
|
652
859
|
end
|
653
860
|
|
654
861
|
def result lex_state, token, text # :nodoc:
|
655
862
|
lex_state = self.arg_state if lex_state == :arg_state
|
656
863
|
self.lex_state = lex_state if lex_state
|
657
|
-
|
658
|
-
token
|
864
|
+
[token, text]
|
659
865
|
end
|
660
866
|
|
661
867
|
def ruby18
|
@@ -670,6 +876,10 @@ class RubyLexer
|
|
670
876
|
ss.scan re
|
671
877
|
end
|
672
878
|
|
879
|
+
def scanner_class # TODO: design this out of oedipus_lex. or something.
|
880
|
+
RPStringScanner
|
881
|
+
end
|
882
|
+
|
673
883
|
def space_vs_beginning space_type, beg_type, fallback
|
674
884
|
if is_space_arg? check(/./m) then
|
675
885
|
warning "`**' interpreted as argument prefix"
|
@@ -686,10 +896,11 @@ class RubyLexer
|
|
686
896
|
self.lex_strterm = [:strterm, type, beg, nnd]
|
687
897
|
end
|
688
898
|
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
899
|
+
# TODO: consider
|
900
|
+
# def src= src
|
901
|
+
# raise "bad src: #{src.inspect}" unless String === src
|
902
|
+
# @src = RPStringScanner.new(src)
|
903
|
+
# end
|
693
904
|
|
694
905
|
def tokadd_escape term # TODO: rewrite / remove
|
695
906
|
case
|
@@ -820,7 +1031,7 @@ class RubyLexer
|
|
820
1031
|
s
|
821
1032
|
when /^[McCx0-9]/ then
|
822
1033
|
rb_compile_error("Invalid escape character syntax")
|
823
|
-
when /u([0-9a-fA-F]
|
1034
|
+
when /u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/ then
|
824
1035
|
[$1.delete("{}").to_i(16)].pack("U")
|
825
1036
|
else
|
826
1037
|
s
|
@@ -833,519 +1044,146 @@ class RubyLexer
|
|
833
1044
|
# do nothing for now
|
834
1045
|
end
|
835
1046
|
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
# @return Description of the Returned Value
|
840
|
-
|
841
|
-
def yylex # 461 lines
|
842
|
-
c = ''
|
843
|
-
self.space_seen = false
|
844
|
-
command_state = false
|
845
|
-
ss = self.src
|
846
|
-
|
847
|
-
self.token = nil
|
848
|
-
self.yacc_value = nil
|
849
|
-
|
850
|
-
return yylex_string if lex_strterm
|
851
|
-
|
852
|
-
command_state = self.command_start
|
853
|
-
self.command_start = false
|
854
|
-
|
855
|
-
last_state = lex_state
|
856
|
-
|
857
|
-
loop do # START OF CASE
|
858
|
-
if scan(/[\ \t\r\f\v]/) then # \s - \n + \v
|
859
|
-
self.space_seen = true
|
860
|
-
next
|
861
|
-
elsif check(/[^a-zA-Z]/) then
|
862
|
-
if scan(/\n|\#/) then
|
863
|
-
self.lineno = nil
|
864
|
-
c = matched
|
865
|
-
if c == '#' then
|
866
|
-
ss.pos -= 1
|
867
|
-
|
868
|
-
while scan(/\s*#.*(\n+|\z)/) do
|
869
|
-
# TODO: self.lineno += matched.lines.to_a.size
|
870
|
-
@comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
|
871
|
-
end
|
872
|
-
|
873
|
-
return RubyLexer::EOF if end_of_stream?
|
874
|
-
end
|
875
|
-
|
876
|
-
# Replace a string of newlines with a single one
|
877
|
-
scan(/\n+/)
|
878
|
-
|
879
|
-
next if in_lex_state?(:expr_beg, :expr_value, :expr_class,
|
880
|
-
:expr_fname, :expr_dot)
|
881
|
-
|
882
|
-
if scan(/([\ \t\r\f\v]*)\./) then
|
883
|
-
self.space_seen = true unless ss[1].empty?
|
884
|
-
|
885
|
-
ss.pos -= 1
|
886
|
-
next unless check(/\.\./)
|
887
|
-
end
|
888
|
-
|
889
|
-
self.command_start = true
|
890
|
-
|
891
|
-
return result(:expr_beg, :tNL, nil)
|
892
|
-
elsif scan(/[\]\)\}]/) then
|
893
|
-
if matched == "}" then
|
894
|
-
self.brace_nest -= 1
|
895
|
-
else
|
896
|
-
self.paren_nest -= 1
|
897
|
-
end
|
898
|
-
|
899
|
-
cond.lexpop
|
900
|
-
cmdarg.lexpop
|
901
|
-
|
902
|
-
text = matched
|
903
|
-
state = text == ")" ? :expr_endfn : :expr_endarg
|
904
|
-
token = {
|
905
|
-
")" => :tRPAREN,
|
906
|
-
"]" => :tRBRACK,
|
907
|
-
"}" => :tRCURLY
|
908
|
-
}[text]
|
909
|
-
|
910
|
-
return result(state, token, text)
|
911
|
-
elsif scan(/\!/) then
|
912
|
-
if in_arg_state? then
|
913
|
-
return result(:expr_arg, :tUBANG, "!@") if scan(/@/)
|
914
|
-
end
|
915
|
-
|
916
|
-
text = scan(/[=~]/) ? "!#{matched}" : "!"
|
917
|
-
|
918
|
-
return result(arg_state, TOKENS[text], text)
|
919
|
-
elsif scan(/\.\.\.?|,|![=~]?/) then
|
920
|
-
return result(:expr_beg, TOKENS[matched], matched)
|
921
|
-
elsif check(/\./) then
|
922
|
-
if scan(/\.\d/) then
|
923
|
-
rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
|
924
|
-
elsif scan(/\./) then
|
925
|
-
return result(:expr_dot, :tDOT, ".")
|
926
|
-
end
|
927
|
-
elsif scan(/\(/) then
|
928
|
-
token = if ruby18 then
|
929
|
-
yylex_paren18
|
930
|
-
else
|
931
|
-
yylex_paren19
|
932
|
-
end
|
933
|
-
|
934
|
-
self.paren_nest += 1
|
935
|
-
|
936
|
-
return expr_result(token, "(")
|
937
|
-
elsif check(/\=/) then
|
938
|
-
if scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
|
939
|
-
tok = matched
|
940
|
-
return result(:arg_state, TOKENS[tok], tok)
|
941
|
-
elsif beginning_of_line? and scan(/\=begin(?=\s)/) then
|
942
|
-
@comments << matched
|
943
|
-
|
944
|
-
unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
|
945
|
-
@comments.clear
|
946
|
-
rb_compile_error("embedded document meets end of file")
|
947
|
-
end
|
948
|
-
|
949
|
-
@comments << matched
|
950
|
-
|
951
|
-
next
|
952
|
-
elsif scan(/\=(?=begin\b)/) then # h[k]=begin ... end
|
953
|
-
tok = matched
|
954
|
-
return result(:arg_state, TOKENS[tok], tok)
|
955
|
-
else
|
956
|
-
raise "you shouldn't be able to get here"
|
957
|
-
end
|
958
|
-
elsif scan(/\"(#{SIMPLE_STRING})\"/o) then
|
959
|
-
string = matched[1..-2].gsub(ESC) { unescape $1 }
|
960
|
-
return result(:expr_end, :tSTRING, string)
|
961
|
-
elsif scan(/\"/) then # FALLBACK
|
962
|
-
string STR_DQUOTE, '"' # TODO: question this
|
963
|
-
return result(nil, :tSTRING_BEG, '"')
|
964
|
-
elsif scan(/\@\@?#{IDENT_CHAR}+/o) then
|
965
|
-
self.token = matched
|
966
|
-
|
967
|
-
rb_compile_error "`#{self.token}` is not allowed as a variable name" if
|
968
|
-
self.token =~ /\@\d/
|
969
|
-
|
970
|
-
tok_id = matched =~ /^@@/ ? :tCVAR : :tIVAR
|
971
|
-
return result(:expr_end, tok_id, self.token)
|
972
|
-
elsif scan(/\:\:/) then
|
973
|
-
if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
|
974
|
-
return result(:expr_beg, :tCOLON3, "::")
|
975
|
-
end
|
976
|
-
|
977
|
-
return result(:expr_dot, :tCOLON2, "::")
|
978
|
-
elsif ! is_end? && scan(/:([a-zA-Z_]#{IDENT_CHAR}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
|
979
|
-
# scanning shortcut to symbols
|
980
|
-
return result(:expr_end, :tSYMBOL, ss[1])
|
981
|
-
elsif ! is_end? && (scan(/\:\"(#{SIMPLE_STRING})\"/) ||
|
982
|
-
scan(/\:\'(#{SIMPLE_SSTRING})\'/)) then
|
983
|
-
symbol = ss[1].gsub(ESC) { unescape $1 }
|
984
|
-
|
985
|
-
rb_compile_error "symbol cannot contain '\\0'" if
|
986
|
-
ruby18 && symbol =~ /\0/
|
987
|
-
|
988
|
-
return result(:expr_end, :tSYMBOL, symbol)
|
989
|
-
elsif scan(/\:/) then
|
990
|
-
# ?: / then / when
|
991
|
-
if is_end? || check(/\s/) then
|
992
|
-
# TODO warn_balanced(":", "symbol literal");
|
993
|
-
return result(:expr_beg, :tCOLON, ":")
|
994
|
-
end
|
995
|
-
|
996
|
-
case
|
997
|
-
when scan(/\'/) then
|
998
|
-
string STR_SSYM, matched
|
999
|
-
when scan(/\"/) then
|
1000
|
-
string STR_DSYM, matched
|
1001
|
-
end
|
1002
|
-
|
1003
|
-
return result(:expr_fname, :tSYMBEG, ":")
|
1004
|
-
elsif check(/[0-9]/) then
|
1005
|
-
return parse_number
|
1006
|
-
elsif scan(/\[/) then
|
1007
|
-
self.paren_nest += 1
|
1008
|
-
|
1009
|
-
token = nil
|
1010
|
-
|
1011
|
-
if in_lex_state? :expr_fname, :expr_dot then
|
1012
|
-
case
|
1013
|
-
when scan(/\]\=/) then
|
1014
|
-
self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
|
1015
|
-
return result(:expr_arg, :tASET, "[]=")
|
1016
|
-
when scan(/\]/) then
|
1017
|
-
self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
|
1018
|
-
return result(:expr_arg, :tAREF, "[]")
|
1047
|
+
def process_string # TODO: rewrite / remove
|
1048
|
+
token = if lex_strterm[0] == :heredoc then
|
1049
|
+
self.heredoc lex_strterm
|
1019
1050
|
else
|
1020
|
-
|
1051
|
+
self.parse_string lex_strterm
|
1021
1052
|
end
|
1022
|
-
elsif is_beg? then
|
1023
|
-
token = :tLBRACK
|
1024
|
-
elsif is_arg? && space_seen then
|
1025
|
-
token = :tLBRACK
|
1026
|
-
else
|
1027
|
-
token = :tLBRACK2
|
1028
|
-
end
|
1029
1053
|
|
1030
|
-
|
1031
|
-
elsif scan(/\'#{SIMPLE_SSTRING}\'/) then
|
1032
|
-
text = matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") # "
|
1033
|
-
return result(:expr_end, :tSTRING, text)
|
1034
|
-
elsif check(/\|/) then
|
1035
|
-
if scan(/\|\|\=/) then
|
1036
|
-
return result(:expr_beg, :tOP_ASGN, "||")
|
1037
|
-
elsif scan(/\|\|/) then
|
1038
|
-
return result(:expr_beg, :tOROP, "||")
|
1039
|
-
elsif scan(/\|\=/) then
|
1040
|
-
return result(:expr_beg, :tOP_ASGN, "|")
|
1041
|
-
elsif scan(/\|/) then
|
1042
|
-
return result(:arg_state, :tPIPE, "|")
|
1043
|
-
end
|
1044
|
-
elsif scan(/\{/) then
|
1045
|
-
self.brace_nest += 1
|
1046
|
-
if lpar_beg && lpar_beg == paren_nest then
|
1047
|
-
self.lpar_beg = nil
|
1048
|
-
self.paren_nest -= 1
|
1054
|
+
token_type, _ = token
|
1049
1055
|
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1053
|
-
|
1054
|
-
:tLCURLY # block (primary)
|
1055
|
-
elsif in_lex_state?(:expr_endarg) then
|
1056
|
-
:tLBRACE_ARG # block (expr)
|
1057
|
-
else
|
1058
|
-
:tLBRACE # hash
|
1059
|
-
end
|
1060
|
-
|
1061
|
-
self.command_start = true unless token == :tLBRACE
|
1062
|
-
|
1063
|
-
return expr_result(token, "{")
|
1064
|
-
elsif scan(/->/) then
|
1065
|
-
return result(:expr_endfn, :tLAMBDA, nil)
|
1066
|
-
elsif scan(/[+-]/) then
|
1067
|
-
sign = matched
|
1068
|
-
utype, type = if sign == "+" then
|
1069
|
-
[:tUPLUS, :tPLUS]
|
1070
|
-
else
|
1071
|
-
[:tUMINUS, :tMINUS]
|
1072
|
-
end
|
1073
|
-
|
1074
|
-
if in_arg_state? then
|
1075
|
-
if scan(/@/) then
|
1076
|
-
return result(:expr_arg, utype, "#{sign}@")
|
1077
|
-
else
|
1078
|
-
return result(:expr_arg, type, sign)
|
1079
|
-
end
|
1080
|
-
end
|
1081
|
-
|
1082
|
-
return result(:expr_beg, :tOP_ASGN, sign) if scan(/\=/)
|
1056
|
+
if token_type == :tSTRING_END || token_type == :tREGEXP_END then
|
1057
|
+
self.lex_strterm = nil
|
1058
|
+
self.lex_state = :expr_end
|
1059
|
+
end
|
1083
1060
|
|
1084
|
-
|
1085
|
-
|
1061
|
+
return token
|
1062
|
+
end
|
1086
1063
|
|
1087
|
-
|
1088
|
-
|
1089
|
-
return result(:expr_beg, :tUMINUS_NUM, sign)
|
1090
|
-
end
|
1064
|
+
def parse_quote # TODO: remove / rewrite
|
1065
|
+
beg, nnd, short_hand, c = nil, nil, false, nil
|
1091
1066
|
|
1092
|
-
|
1093
|
-
|
1067
|
+
if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
|
1068
|
+
rb_compile_error "unknown type of %string" if ss.matched_size == 2
|
1069
|
+
c, beg, short_hand = matched, ss.getch, false
|
1070
|
+
else # Short-hand (e.g. %{, %., %!, etc)
|
1071
|
+
c, beg, short_hand = 'Q', ss.getch, true
|
1072
|
+
end
|
1094
1073
|
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
return result(:expr_beg, :tOP_ASGN, "**")
|
1099
|
-
elsif scan(/\*\*/) then
|
1100
|
-
token = space_vs_beginning :tDSTAR, :tDSTAR, :tPOW
|
1074
|
+
if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
|
1075
|
+
rb_compile_error "unterminated quoted string meets end of file"
|
1076
|
+
end
|
1101
1077
|
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
elsif scan(/\*/) then
|
1106
|
-
token = space_vs_beginning :tSTAR, :tSTAR, :tSTAR2
|
1078
|
+
# Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
|
1079
|
+
nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
|
1080
|
+
nnd, beg = beg, "\0" if nnd.nil?
|
1107
1081
|
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1082
|
+
token_type, text = nil, "%#{c}#{beg}"
|
1083
|
+
token_type, string_type = case c
|
1084
|
+
when 'Q' then
|
1085
|
+
ch = short_hand ? nnd : c + beg
|
1086
|
+
text = "%#{ch}"
|
1087
|
+
[:tSTRING_BEG, STR_DQUOTE]
|
1088
|
+
when 'q' then
|
1089
|
+
[:tSTRING_BEG, STR_SQUOTE]
|
1090
|
+
when 'W' then
|
1091
|
+
scan(/\s*/)
|
1092
|
+
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
1093
|
+
when 'w' then
|
1094
|
+
scan(/\s*/)
|
1095
|
+
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
1096
|
+
when 'x' then
|
1097
|
+
[:tXSTRING_BEG, STR_XQUOTE]
|
1098
|
+
when 'r' then
|
1099
|
+
[:tREGEXP_BEG, STR_REGEXP]
|
1100
|
+
when 's' then
|
1101
|
+
self.lex_state = :expr_fname
|
1102
|
+
[:tSYMBEG, STR_SSYM]
|
1103
|
+
when 'I' then
|
1104
|
+
scan(/\s*/)
|
1105
|
+
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
1106
|
+
when 'i' then
|
1107
|
+
scan(/\s*/)
|
1108
|
+
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
1109
|
+
end
|
1124
1110
|
|
1125
|
-
|
1126
|
-
|
1127
|
-
return result(:arg_state, :tLT, "<")
|
1128
|
-
end
|
1129
|
-
elsif check(/\>/) then
|
1130
|
-
if scan(/\>\=/) then
|
1131
|
-
return result(:arg_state, :tGEQ, ">=")
|
1132
|
-
elsif scan(/\>\>=/) then
|
1133
|
-
return result(:arg_state, :tOP_ASGN, ">>")
|
1134
|
-
elsif scan(/\>\>/) then
|
1135
|
-
return result(:arg_state, :tRSHFT, ">>")
|
1136
|
-
elsif scan(/\>/) then
|
1137
|
-
return result(:arg_state, :tGT, ">")
|
1138
|
-
end
|
1139
|
-
elsif scan(/\`/) then
|
1140
|
-
case lex_state
|
1141
|
-
when :expr_fname then
|
1142
|
-
return result(:expr_end, :tBACK_REF2, "`")
|
1143
|
-
when :expr_dot then
|
1144
|
-
state = command_state ? :expr_cmdarg : :expr_arg
|
1145
|
-
return result(state, :tBACK_REF2, "`")
|
1146
|
-
else
|
1147
|
-
string STR_XQUOTE, '`'
|
1148
|
-
return result(nil, :tXSTRING_BEG, "`")
|
1149
|
-
end
|
1150
|
-
elsif scan(/\?/) then
|
1151
|
-
if is_end? then
|
1152
|
-
state = ruby18 ? :expr_beg : :expr_value # HACK?
|
1153
|
-
return result(state, :tEH, "?")
|
1154
|
-
end
|
1111
|
+
rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
|
1112
|
+
token_type.nil?
|
1155
1113
|
|
1156
|
-
|
1157
|
-
rb_compile_error "incomplete character syntax"
|
1158
|
-
end
|
1114
|
+
raise "huh" unless string_type
|
1159
1115
|
|
1160
|
-
|
1161
|
-
unless is_arg? then
|
1162
|
-
c2 = { " " => 's',
|
1163
|
-
"\n" => 'n',
|
1164
|
-
"\t" => 't',
|
1165
|
-
"\v" => 'v',
|
1166
|
-
"\r" => 'r',
|
1167
|
-
"\f" => 'f' }[matched]
|
1168
|
-
|
1169
|
-
if c2 then
|
1170
|
-
warning("invalid character syntax; use ?\\" + c2)
|
1171
|
-
end
|
1172
|
-
end
|
1116
|
+
string string_type, nnd, beg
|
1173
1117
|
|
1174
|
-
|
1175
|
-
|
1176
|
-
return result(state, :tEH, "?")
|
1177
|
-
elsif check(/\w(?=\w)/) then # ternary, also
|
1178
|
-
return result(:expr_beg, :tEH, "?")
|
1179
|
-
end
|
1118
|
+
return token_type, text
|
1119
|
+
end
|
1180
1120
|
|
1181
|
-
|
1182
|
-
|
1183
|
-
else
|
1184
|
-
ss.getch
|
1185
|
-
end
|
1121
|
+
def parse_string quote # TODO: rewrite / remove
|
1122
|
+
_, string_type, term, open = quote
|
1186
1123
|
|
1187
|
-
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
end
|
1192
|
-
elsif check(/\&/) then
|
1193
|
-
if scan(/\&\&\=/) then
|
1194
|
-
return result(:expr_beg, :tOP_ASGN, "&&")
|
1195
|
-
elsif scan(/\&\&/) then
|
1196
|
-
return result(:expr_beg, :tANDOP, "&&")
|
1197
|
-
elsif scan(/\&\=/) then
|
1198
|
-
return result(:expr_beg, :tOP_ASGN, "&")
|
1199
|
-
elsif scan(/&/) then
|
1200
|
-
token = if is_arg? && space_seen && !check(/\s/) then
|
1201
|
-
warning("`&' interpreted as argument prefix")
|
1202
|
-
:tAMPER
|
1203
|
-
elsif in_lex_state? :expr_beg, :expr_mid then
|
1204
|
-
:tAMPER
|
1205
|
-
else
|
1206
|
-
:tAMPER2
|
1207
|
-
end
|
1208
|
-
|
1209
|
-
return result(:arg_state, token, "&")
|
1210
|
-
end
|
1211
|
-
elsif scan(/\//) then
|
1212
|
-
if is_beg? then
|
1213
|
-
string STR_REGEXP, '/'
|
1214
|
-
return result(nil, :tREGEXP_BEG, "/")
|
1215
|
-
end
|
1124
|
+
space = false # FIX: remove these
|
1125
|
+
func = string_type
|
1126
|
+
paren = open
|
1127
|
+
term_re = @@regexp_cache[term]
|
1216
1128
|
|
1217
|
-
|
1218
|
-
|
1219
|
-
|
1129
|
+
qwords = (func & STR_FUNC_QWORDS) != 0
|
1130
|
+
regexp = (func & STR_FUNC_REGEXP) != 0
|
1131
|
+
expand = (func & STR_FUNC_EXPAND) != 0
|
1220
1132
|
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
string STR_REGEXP, '/'
|
1225
|
-
return result(nil, :tREGEXP_BEG, "/")
|
1226
|
-
end
|
1227
|
-
end
|
1133
|
+
unless func then # nil'ed from qwords below. *sigh*
|
1134
|
+
return :tSTRING_END, nil
|
1135
|
+
end
|
1228
1136
|
|
1229
|
-
|
1230
|
-
elsif scan(/\^=/) then
|
1231
|
-
return result(:expr_beg, :tOP_ASGN, "^")
|
1232
|
-
elsif scan(/\^/) then
|
1233
|
-
return result(:arg_state, :tCARET, "^")
|
1234
|
-
elsif scan(/\;/) then
|
1235
|
-
self.command_start = true
|
1236
|
-
return result(:expr_beg, :tSEMI, ";")
|
1237
|
-
elsif scan(/\~/) then
|
1238
|
-
scan(/@/) if in_lex_state? :expr_fname, :expr_dot
|
1239
|
-
return result(:arg_state, :tTILDE, "~")
|
1240
|
-
elsif scan(/\\/) then
|
1241
|
-
if scan(/\r?\n/) then
|
1242
|
-
self.lineno = nil
|
1243
|
-
self.space_seen = true
|
1244
|
-
next
|
1245
|
-
end
|
1246
|
-
rb_compile_error "bare backslash only allowed before newline"
|
1247
|
-
elsif scan(/\%/) then
|
1248
|
-
return parse_quote if is_beg?
|
1249
|
-
|
1250
|
-
return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)
|
1251
|
-
|
1252
|
-
return parse_quote if is_arg? && space_seen && ! check(/\s/)
|
1253
|
-
|
1254
|
-
return result(:arg_state, :tPERCENT, "%")
|
1255
|
-
elsif check(/\$/) then
|
1256
|
-
if scan(/(\$_)(\w+)/) then
|
1257
|
-
self.token = matched
|
1258
|
-
return result(:expr_end, :tGVAR, matched)
|
1259
|
-
elsif scan(/\$_/) then
|
1260
|
-
return result(:expr_end, :tGVAR, matched)
|
1261
|
-
elsif scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
|
1262
|
-
return result(:expr_end, :tGVAR, matched)
|
1263
|
-
elsif scan(/\$([\&\`\'\+])/) then
|
1264
|
-
# Explicit reference to these vars as symbols...
|
1265
|
-
if lex_state == :expr_fname then
|
1266
|
-
return result(:expr_end, :tGVAR, matched)
|
1267
|
-
else
|
1268
|
-
return result(:expr_end, :tBACK_REF, ss[1].to_sym)
|
1269
|
-
end
|
1270
|
-
elsif scan(/\$([1-9]\d*)/) then
|
1271
|
-
if lex_state == :expr_fname then
|
1272
|
-
return result(:expr_end, :tGVAR, matched)
|
1273
|
-
else
|
1274
|
-
return result(:expr_end, :tNTH_REF, ss[1].to_i)
|
1275
|
-
end
|
1276
|
-
elsif scan(/\$0/) then
|
1277
|
-
return result(:expr_end, :tGVAR, matched)
|
1278
|
-
elsif scan(/\$\W|\$\z/) then # TODO: remove?
|
1279
|
-
return result(:expr_end, "$", "$") # FIX: "$"??
|
1280
|
-
elsif scan(/\$\w+/)
|
1281
|
-
return result(:expr_end, :tGVAR, matched)
|
1282
|
-
end
|
1283
|
-
elsif check(/\_/) then
|
1284
|
-
if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
|
1285
|
-
self.lineno = nil
|
1286
|
-
return RubyLexer::EOF
|
1287
|
-
elsif scan(/\_\w*/) then
|
1288
|
-
self.token = matched
|
1289
|
-
return process_token command_state, last_state
|
1290
|
-
end
|
1291
|
-
end
|
1292
|
-
end # END OF CASE
|
1137
|
+
space = true if qwords and scan(/\s+/)
|
1293
1138
|
|
1294
|
-
|
1295
|
-
|
1296
|
-
|
1297
|
-
|
1298
|
-
|
1139
|
+
if self.string_nest == 0 && scan(/#{term_re}/) then
|
1140
|
+
if qwords then
|
1141
|
+
quote[1] = nil
|
1142
|
+
return :tSPACE, nil
|
1143
|
+
elsif regexp then
|
1144
|
+
return :tREGEXP_END, self.regx_options
|
1145
|
+
else
|
1146
|
+
return :tSTRING_END, term
|
1299
1147
|
end
|
1300
|
-
|
1301
|
-
self.token = matched if self.scan IDENT
|
1302
|
-
|
1303
|
-
return process_token command_state, last_state
|
1304
1148
|
end
|
1305
|
-
end
|
1306
1149
|
|
1307
|
-
|
1308
|
-
self.command_start = true
|
1309
|
-
token = :tLPAREN2
|
1150
|
+
return :tSPACE, nil if space
|
1310
1151
|
|
1311
|
-
|
1312
|
-
|
1313
|
-
|
1314
|
-
|
1315
|
-
|
1316
|
-
|
1317
|
-
|
1152
|
+
self.string_buffer = []
|
1153
|
+
|
1154
|
+
if expand
|
1155
|
+
case
|
1156
|
+
when scan(/#(?=[$@])/) then
|
1157
|
+
return :tSTRING_DVAR, nil
|
1158
|
+
when scan(/#[{]/) then
|
1159
|
+
return :tSTRING_DBEG, nil
|
1160
|
+
when scan(/#/) then
|
1161
|
+
string_buffer << '#'
|
1318
1162
|
end
|
1319
|
-
else
|
1320
|
-
# not a ternary -- do nothing?
|
1321
1163
|
end
|
1322
1164
|
|
1323
|
-
|
1324
|
-
|
1325
|
-
|
1326
|
-
def yylex_paren19
|
1327
|
-
if is_beg? then
|
1328
|
-
:tLPAREN
|
1329
|
-
elsif is_space_arg? then
|
1330
|
-
:tLPAREN_ARG
|
1331
|
-
else
|
1332
|
-
:tLPAREN2 # plain '(' in parse.y
|
1165
|
+
if tokadd_string(func, term, paren) == RubyLexer::EOF then
|
1166
|
+
rb_compile_error "unterminated string meets end of file"
|
1333
1167
|
end
|
1168
|
+
|
1169
|
+
return :tSTRING_CONTENT, string_buffer.join
|
1334
1170
|
end
|
1171
|
+
end
|
1335
1172
|
|
1336
|
-
|
1337
|
-
token = if lex_strterm[0] == :heredoc then
|
1338
|
-
self.heredoc lex_strterm
|
1339
|
-
else
|
1340
|
-
self.parse_string lex_strterm
|
1341
|
-
end
|
1173
|
+
require "ruby_lexer.rex"
|
1342
1174
|
|
1343
|
-
|
1344
|
-
|
1345
|
-
|
1346
|
-
|
1175
|
+
if ENV["DEBUG"] then
|
1176
|
+
class RubyLexer
|
1177
|
+
alias :old_lineno= :lineno=
|
1178
|
+
|
1179
|
+
def d o
|
1180
|
+
$stderr.puts o.inspect
|
1347
1181
|
end
|
1348
1182
|
|
1349
|
-
|
1183
|
+
def lineno= n
|
1184
|
+
self.old_lineno= n
|
1185
|
+
where = caller.first.split(/:/).first(2).join(":")
|
1186
|
+
d :lineno => [n, where, ss && ss.rest[0,40]]
|
1187
|
+
end
|
1350
1188
|
end
|
1351
1189
|
end
|