ruby_parser 3.17.0 → 3.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/History.rdoc +109 -0
- data/Manifest.txt +5 -0
- data/README.rdoc +9 -6
- data/Rakefile +85 -24
- data/bin/ruby_parse_extract_error +1 -1
- data/compare/normalize.rb +6 -1
- data/gauntlet.md +108 -0
- data/lib/rp_extensions.rb +15 -36
- data/lib/rp_stringscanner.rb +20 -51
- data/lib/ruby20_parser.rb +7430 -3528
- data/lib/ruby20_parser.y +328 -257
- data/lib/ruby21_parser.rb +7408 -3572
- data/lib/ruby21_parser.y +323 -254
- data/lib/ruby22_parser.rb +7543 -3601
- data/lib/ruby22_parser.y +327 -256
- data/lib/ruby23_parser.rb +7549 -3612
- data/lib/ruby23_parser.y +327 -256
- data/lib/ruby24_parser.rb +7640 -3624
- data/lib/ruby24_parser.y +327 -256
- data/lib/ruby25_parser.rb +7640 -3623
- data/lib/ruby25_parser.y +327 -256
- data/lib/ruby26_parser.rb +7649 -3632
- data/lib/ruby26_parser.y +326 -255
- data/lib/ruby27_parser.rb +10132 -4545
- data/lib/ruby27_parser.y +871 -262
- data/lib/ruby30_parser.rb +10504 -4655
- data/lib/ruby30_parser.y +1065 -333
- data/lib/ruby31_parser.rb +13622 -0
- data/lib/ruby31_parser.y +3481 -0
- data/lib/ruby3_parser.yy +3536 -0
- data/lib/ruby_lexer.rb +261 -609
- data/lib/ruby_lexer.rex +27 -20
- data/lib/ruby_lexer.rex.rb +59 -23
- data/lib/ruby_lexer_strings.rb +638 -0
- data/lib/ruby_parser.rb +2 -0
- data/lib/ruby_parser.yy +903 -272
- data/lib/ruby_parser_extras.rb +333 -113
- data/test/test_ruby_lexer.rb +181 -129
- data/test/test_ruby_parser.rb +1529 -288
- data/tools/munge.rb +34 -6
- data/tools/ripper.rb +15 -10
- data.tar.gz.sig +0 -0
- metadata +27 -23
- metadata.gz.sig +0 -0
data/lib/ruby_lexer.rb
CHANGED
@@ -33,7 +33,7 @@ class RubyLexer
|
|
33
33
|
TOKENS = {
|
34
34
|
"!" => :tBANG,
|
35
35
|
"!=" => :tNEQ,
|
36
|
-
|
36
|
+
"!@" => :tBANG,
|
37
37
|
"!~" => :tNMATCH,
|
38
38
|
"," => :tCOMMA,
|
39
39
|
".." => :tDOT2,
|
@@ -46,17 +46,38 @@ class RubyLexer
|
|
46
46
|
"->" => :tLAMBDA,
|
47
47
|
}
|
48
48
|
|
49
|
+
PERCENT_END = {
|
50
|
+
"(" => ")",
|
51
|
+
"[" => "]",
|
52
|
+
"{" => "}",
|
53
|
+
"<" => ">",
|
54
|
+
}
|
55
|
+
|
56
|
+
SIMPLE_RE_META = /[\$\*\+\.\?\^\|\)\]\}\>]/
|
57
|
+
|
49
58
|
@@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
|
50
59
|
@@regexp_cache[nil] = nil
|
51
60
|
|
61
|
+
def regexp_cache
|
62
|
+
@@regexp_cache
|
63
|
+
end
|
64
|
+
|
52
65
|
if $DEBUG then
|
53
66
|
attr_reader :lex_state
|
54
67
|
|
55
68
|
def lex_state= o
|
56
69
|
return if @lex_state == o
|
57
|
-
raise ArgumentError, "bad state: %p" % [o] unless State === o
|
58
70
|
|
59
|
-
|
71
|
+
from = ""
|
72
|
+
if ENV["VERBOSE"]
|
73
|
+
path = caller[0]
|
74
|
+
path = caller[1] if path =~ /result/
|
75
|
+
path, line, *_ = path.split(/:/)
|
76
|
+
path.delete_prefix! File.dirname File.dirname __FILE__
|
77
|
+
from = " at .%s:%s" % [path, line]
|
78
|
+
end
|
79
|
+
|
80
|
+
warn "lex_state: %p -> %p%s" % [lex_state, o, from]
|
60
81
|
|
61
82
|
@lex_state = o
|
62
83
|
end
|
@@ -66,14 +87,16 @@ class RubyLexer
|
|
66
87
|
|
67
88
|
attr_accessor :lex_state unless $DEBUG
|
68
89
|
|
69
|
-
attr_accessor :lineno # we're bypassing oedipus' lineno handling.
|
70
90
|
attr_accessor :brace_nest
|
71
91
|
attr_accessor :cmdarg
|
72
92
|
attr_accessor :command_start
|
73
93
|
attr_accessor :cmd_state # temporary--ivar to avoid passing everywhere
|
74
94
|
attr_accessor :last_state
|
75
95
|
attr_accessor :cond
|
76
|
-
attr_accessor :
|
96
|
+
attr_accessor :old_ss
|
97
|
+
attr_accessor :old_lineno
|
98
|
+
|
99
|
+
# these are generated via ruby_lexer.rex: ss, lineno
|
77
100
|
|
78
101
|
##
|
79
102
|
# Additional context surrounding tokens that both the lexer and
|
@@ -98,6 +121,7 @@ class RubyLexer
|
|
98
121
|
|
99
122
|
self.cond = RubyParserStuff::StackState.new(:cond, $DEBUG)
|
100
123
|
self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
|
124
|
+
self.ss = RPStringScanner.new ""
|
101
125
|
|
102
126
|
reset
|
103
127
|
end
|
@@ -110,14 +134,8 @@ class RubyLexer
|
|
110
134
|
is_after_operator? ? EXPR_ARG : EXPR_BEG
|
111
135
|
end
|
112
136
|
|
113
|
-
def
|
114
|
-
|
115
|
-
end
|
116
|
-
|
117
|
-
alias bol? beginning_of_line? # to make .rex file more readable
|
118
|
-
|
119
|
-
def check re
|
120
|
-
ss.check re
|
137
|
+
def ignore_body_comments
|
138
|
+
@comments.clear
|
121
139
|
end
|
122
140
|
|
123
141
|
def comments # TODO: remove this... maybe comment_string + attr_accessor
|
@@ -126,18 +144,8 @@ class RubyLexer
|
|
126
144
|
c
|
127
145
|
end
|
128
146
|
|
129
|
-
def
|
130
|
-
|
131
|
-
self.extra_lineno += r.count("\n") if r
|
132
|
-
r
|
133
|
-
end
|
134
|
-
|
135
|
-
def end_of_stream?
|
136
|
-
ss.eos?
|
137
|
-
end
|
138
|
-
|
139
|
-
def expr_beg?
|
140
|
-
lex_state =~ EXPR_BEG
|
147
|
+
def debug n
|
148
|
+
raise "debug #{n}"
|
141
149
|
end
|
142
150
|
|
143
151
|
def expr_dot?
|
@@ -154,128 +162,6 @@ class RubyLexer
|
|
154
162
|
result EXPR_BEG, token, text
|
155
163
|
end
|
156
164
|
|
157
|
-
def fixup_lineno extra = 0
|
158
|
-
self.lineno += self.extra_lineno + extra
|
159
|
-
self.extra_lineno = 0
|
160
|
-
end
|
161
|
-
|
162
|
-
def heredoc here # TODO: rewrite / remove
|
163
|
-
_, eos, func, last_line = here
|
164
|
-
|
165
|
-
indent = func =~ STR_FUNC_INDENT ? "[ \t]*" : nil
|
166
|
-
expand = func =~ STR_FUNC_EXPAND
|
167
|
-
eol = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n"
|
168
|
-
eos_re = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
|
169
|
-
err_msg = "can't match #{eos_re.inspect} anywhere in "
|
170
|
-
|
171
|
-
rb_compile_error err_msg if end_of_stream?
|
172
|
-
|
173
|
-
if beginning_of_line? && scan(eos_re) then
|
174
|
-
self.lineno += 1
|
175
|
-
ss.unread_many last_line # TODO: figure out how to remove this
|
176
|
-
return :tSTRING_END, [eos, func] # TODO: calculate squiggle width at lex?
|
177
|
-
end
|
178
|
-
|
179
|
-
self.string_buffer = []
|
180
|
-
|
181
|
-
if expand then
|
182
|
-
case
|
183
|
-
when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
|
184
|
-
# TODO: !ISASCII
|
185
|
-
# ?! see parser_peek_variable_name
|
186
|
-
return :tSTRING_DVAR, matched
|
187
|
-
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
188
|
-
# TODO: !ISASCII
|
189
|
-
return :tSTRING_DVAR, matched
|
190
|
-
when scan(/#[{]/) then
|
191
|
-
self.command_start = true
|
192
|
-
return :tSTRING_DBEG, matched
|
193
|
-
when scan(/#/) then
|
194
|
-
string_buffer << "#"
|
195
|
-
end
|
196
|
-
|
197
|
-
begin
|
198
|
-
c = tokadd_string func, eol, nil
|
199
|
-
|
200
|
-
rb_compile_error err_msg if
|
201
|
-
c == RubyLexer::EOF
|
202
|
-
|
203
|
-
if c != eol then
|
204
|
-
return :tSTRING_CONTENT, string_buffer.join
|
205
|
-
else
|
206
|
-
string_buffer << scan(/\n/)
|
207
|
-
end
|
208
|
-
|
209
|
-
rb_compile_error err_msg if end_of_stream?
|
210
|
-
end until check(eos_re)
|
211
|
-
else
|
212
|
-
until check(eos_re) do
|
213
|
-
string_buffer << scan(/.*(\n|\z)/)
|
214
|
-
rb_compile_error err_msg if end_of_stream?
|
215
|
-
end
|
216
|
-
end
|
217
|
-
|
218
|
-
self.lex_strterm = [:heredoc, eos, func, last_line]
|
219
|
-
|
220
|
-
string_content = begin
|
221
|
-
s = string_buffer.join
|
222
|
-
s.b.force_encoding Encoding::UTF_8
|
223
|
-
end
|
224
|
-
|
225
|
-
return :tSTRING_CONTENT, string_content
|
226
|
-
end
|
227
|
-
|
228
|
-
def heredoc_identifier # TODO: remove / rewrite
|
229
|
-
term, func = nil, STR_FUNC_BORING
|
230
|
-
self.string_buffer = []
|
231
|
-
|
232
|
-
heredoc_indent_mods = "-"
|
233
|
-
heredoc_indent_mods += '\~' if ruby23plus?
|
234
|
-
|
235
|
-
case
|
236
|
-
when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
|
237
|
-
term = ss[2]
|
238
|
-
func |= STR_FUNC_INDENT unless ss[1].empty? # TODO: this seems wrong
|
239
|
-
func |= STR_FUNC_ICNTNT if ss[1] == "~"
|
240
|
-
func |= case term
|
241
|
-
when "\'" then
|
242
|
-
STR_SQUOTE
|
243
|
-
when '"' then
|
244
|
-
STR_DQUOTE
|
245
|
-
else
|
246
|
-
STR_XQUOTE
|
247
|
-
end
|
248
|
-
string_buffer << ss[3]
|
249
|
-
when scan(/[#{heredoc_indent_mods}]?([\'\"\`])(?!\1*\Z)/) then
|
250
|
-
rb_compile_error "unterminated here document identifier"
|
251
|
-
when scan(/([#{heredoc_indent_mods}]?)(#{IDENT_CHAR}+)/) then
|
252
|
-
term = '"'
|
253
|
-
func |= STR_DQUOTE
|
254
|
-
unless ss[1].empty? then
|
255
|
-
func |= STR_FUNC_INDENT
|
256
|
-
func |= STR_FUNC_ICNTNT if ss[1] == "~"
|
257
|
-
end
|
258
|
-
string_buffer << ss[2]
|
259
|
-
else
|
260
|
-
return nil
|
261
|
-
end
|
262
|
-
|
263
|
-
if scan(/.*\n/) then
|
264
|
-
# TODO: think about storing off the char range instead
|
265
|
-
line = matched
|
266
|
-
else
|
267
|
-
line = nil
|
268
|
-
end
|
269
|
-
|
270
|
-
self.lex_strterm = [:heredoc, string_buffer.join, func, line]
|
271
|
-
|
272
|
-
if term == "`" then
|
273
|
-
result nil, :tXSTRING_BEG, "`"
|
274
|
-
else
|
275
|
-
result nil, :tSTRING_BEG, "\""
|
276
|
-
end
|
277
|
-
end
|
278
|
-
|
279
165
|
def in_fname? # REFACTOR
|
280
166
|
lex_state =~ EXPR_FNAME
|
281
167
|
end
|
@@ -286,13 +172,13 @@ class RubyLexer
|
|
286
172
|
text = matched
|
287
173
|
case
|
288
174
|
when text.end_with?("ri")
|
289
|
-
|
175
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base)))
|
290
176
|
when text.end_with?("r")
|
291
|
-
|
177
|
+
result EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base))
|
292
178
|
when text.end_with?("i")
|
293
|
-
|
179
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base))
|
294
180
|
else
|
295
|
-
|
181
|
+
result EXPR_NUM, :tINTEGER, text.to_i(base)
|
296
182
|
end
|
297
183
|
end
|
298
184
|
|
@@ -338,132 +224,10 @@ class RubyLexer
|
|
338
224
|
self.parser.env[id.to_sym] == :lvar
|
339
225
|
end
|
340
226
|
|
341
|
-
def matched
|
342
|
-
ss.matched
|
343
|
-
end
|
344
|
-
|
345
227
|
def not_end?
|
346
228
|
not is_end?
|
347
229
|
end
|
348
230
|
|
349
|
-
def parse_quote # TODO: remove / rewrite
|
350
|
-
beg, nnd, short_hand, c = nil, nil, false, nil
|
351
|
-
|
352
|
-
if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
|
353
|
-
rb_compile_error "unknown type of %string" if ss.matched_size == 2
|
354
|
-
c, beg, short_hand = matched, getch, false
|
355
|
-
else # Short-hand (e.g. %{, %., %!, etc)
|
356
|
-
c, beg, short_hand = "Q", getch, true
|
357
|
-
end
|
358
|
-
|
359
|
-
if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
|
360
|
-
rb_compile_error "unterminated quoted string meets end of file"
|
361
|
-
end
|
362
|
-
|
363
|
-
# Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
|
364
|
-
nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
|
365
|
-
nnd, beg = beg, "\0" if nnd.nil?
|
366
|
-
|
367
|
-
token_type, text = nil, "%#{c}#{beg}"
|
368
|
-
token_type, string_type = case c
|
369
|
-
when "Q" then
|
370
|
-
ch = short_hand ? nnd : c + beg
|
371
|
-
text = "%#{ch}"
|
372
|
-
[:tSTRING_BEG, STR_DQUOTE]
|
373
|
-
when "q" then
|
374
|
-
[:tSTRING_BEG, STR_SQUOTE]
|
375
|
-
when "W" then
|
376
|
-
eat_whitespace
|
377
|
-
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
378
|
-
when "w" then
|
379
|
-
eat_whitespace
|
380
|
-
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
381
|
-
when "x" then
|
382
|
-
[:tXSTRING_BEG, STR_XQUOTE]
|
383
|
-
when "r" then
|
384
|
-
[:tREGEXP_BEG, STR_REGEXP]
|
385
|
-
when "s" then
|
386
|
-
self.lex_state = EXPR_FNAME
|
387
|
-
[:tSYMBEG, STR_SSYM]
|
388
|
-
when "I" then
|
389
|
-
eat_whitespace
|
390
|
-
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
391
|
-
when "i" then
|
392
|
-
eat_whitespace
|
393
|
-
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
394
|
-
end
|
395
|
-
|
396
|
-
rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
|
397
|
-
token_type.nil?
|
398
|
-
|
399
|
-
raise "huh" unless string_type
|
400
|
-
|
401
|
-
string string_type, nnd, beg
|
402
|
-
|
403
|
-
return token_type, text
|
404
|
-
end
|
405
|
-
|
406
|
-
def parse_string quote # TODO: rewrite / remove
|
407
|
-
_, string_type, term, open = quote
|
408
|
-
|
409
|
-
space = false # FIX: remove these
|
410
|
-
func = string_type
|
411
|
-
paren = open
|
412
|
-
term_re = @@regexp_cache[term]
|
413
|
-
|
414
|
-
qwords = func =~ STR_FUNC_QWORDS
|
415
|
-
regexp = func =~ STR_FUNC_REGEXP
|
416
|
-
expand = func =~ STR_FUNC_EXPAND
|
417
|
-
|
418
|
-
unless func then # nil'ed from qwords below. *sigh*
|
419
|
-
return :tSTRING_END, nil
|
420
|
-
end
|
421
|
-
|
422
|
-
space = true if qwords and eat_whitespace
|
423
|
-
|
424
|
-
if self.string_nest == 0 && scan(/#{term_re}/) then
|
425
|
-
if qwords then
|
426
|
-
quote[1] = nil
|
427
|
-
return :tSPACE, nil
|
428
|
-
elsif regexp then
|
429
|
-
return :tREGEXP_END, self.regx_options
|
430
|
-
else
|
431
|
-
return :tSTRING_END, term
|
432
|
-
end
|
433
|
-
end
|
434
|
-
|
435
|
-
return :tSPACE, nil if space
|
436
|
-
|
437
|
-
self.string_buffer = []
|
438
|
-
|
439
|
-
if expand
|
440
|
-
case
|
441
|
-
when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
|
442
|
-
# TODO: !ISASCII
|
443
|
-
# ?! see parser_peek_variable_name
|
444
|
-
return :tSTRING_DVAR, nil
|
445
|
-
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
446
|
-
# TODO: !ISASCII
|
447
|
-
return :tSTRING_DVAR, nil
|
448
|
-
when scan(/#[{]/) then
|
449
|
-
self.command_start = true
|
450
|
-
return :tSTRING_DBEG, nil
|
451
|
-
when scan(/#/) then
|
452
|
-
string_buffer << "#"
|
453
|
-
end
|
454
|
-
end
|
455
|
-
|
456
|
-
if tokadd_string(func, term, paren) == RubyLexer::EOF then
|
457
|
-
if func =~ STR_FUNC_REGEXP then
|
458
|
-
rb_compile_error "unterminated regexp meets end of file"
|
459
|
-
else
|
460
|
-
rb_compile_error "unterminated string meets end of file"
|
461
|
-
end
|
462
|
-
end
|
463
|
-
|
464
|
-
return :tSTRING_CONTENT, string_buffer.join
|
465
|
-
end
|
466
|
-
|
467
231
|
def possibly_escape_string text, check
|
468
232
|
content = match[1]
|
469
233
|
|
@@ -484,11 +248,11 @@ class RubyLexer
|
|
484
248
|
:tAMPER2
|
485
249
|
end
|
486
250
|
|
487
|
-
|
251
|
+
result :arg_state, token, "&"
|
488
252
|
end
|
489
253
|
|
490
254
|
def process_backref text
|
491
|
-
token =
|
255
|
+
token = match[1].to_sym
|
492
256
|
# TODO: can't do lineno hack w/ symbol
|
493
257
|
result EXPR_END, :tBACK_REF, token
|
494
258
|
end
|
@@ -502,7 +266,7 @@ class RubyLexer
|
|
502
266
|
end
|
503
267
|
|
504
268
|
@comments << matched
|
505
|
-
self.lineno += matched.count("\n")
|
269
|
+
self.lineno += matched.count("\n") # HACK?
|
506
270
|
|
507
271
|
nil # TODO
|
508
272
|
end
|
@@ -573,9 +337,9 @@ class RubyLexer
|
|
573
337
|
|
574
338
|
case
|
575
339
|
when scan(/\'/) then
|
576
|
-
string STR_SSYM
|
340
|
+
string STR_SSYM, matched
|
577
341
|
when scan(/\"/) then
|
578
|
-
string STR_DSYM
|
342
|
+
string STR_DSYM, matched
|
579
343
|
end
|
580
344
|
|
581
345
|
result EXPR_FNAME, :tSYMBEG, text
|
@@ -590,7 +354,7 @@ class RubyLexer
|
|
590
354
|
end
|
591
355
|
|
592
356
|
def process_dots text
|
593
|
-
tokens = ruby27plus? &&
|
357
|
+
tokens = ruby27plus? && is_beg? ? BTOKENS : TOKENS
|
594
358
|
|
595
359
|
result EXPR_BEG, tokens[text], text
|
596
360
|
end
|
@@ -600,36 +364,37 @@ class RubyLexer
|
|
600
364
|
|
601
365
|
case
|
602
366
|
when text.end_with?("ri")
|
603
|
-
|
367
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
|
604
368
|
when text.end_with?("i")
|
605
|
-
|
369
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
|
606
370
|
when text.end_with?("r")
|
607
|
-
|
371
|
+
result EXPR_NUM, :tRATIONAL, Rational(text.chop)
|
608
372
|
else
|
609
|
-
|
373
|
+
result EXPR_NUM, :tFLOAT, text.to_f
|
610
374
|
end
|
611
375
|
end
|
612
376
|
|
613
377
|
def process_gvar text
|
614
|
-
|
378
|
+
if parser.class.version > 20 && text == "$-" then
|
379
|
+
rb_compile_error "unexpected $undefined"
|
380
|
+
end
|
381
|
+
|
615
382
|
result EXPR_END, :tGVAR, text
|
616
383
|
end
|
617
384
|
|
618
385
|
def process_gvar_oddity text
|
619
|
-
return result EXPR_END, "$", "$" if text == "$" # TODO: wtf is this?
|
620
386
|
rb_compile_error "#{text.inspect} is not allowed as a global variable name"
|
621
387
|
end
|
622
388
|
|
623
389
|
def process_ivar text
|
624
390
|
tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
|
625
|
-
text.lineno = self.lineno
|
626
391
|
result EXPR_END, tok_id, text
|
627
392
|
end
|
628
393
|
|
629
394
|
def process_label text
|
630
395
|
symbol = possibly_escape_string text, /^\"/
|
631
396
|
|
632
|
-
result EXPR_LAB, :tLABEL,
|
397
|
+
result EXPR_LAB, :tLABEL, symbol
|
633
398
|
end
|
634
399
|
|
635
400
|
def process_label_or_string text
|
@@ -637,11 +402,15 @@ class RubyLexer
|
|
637
402
|
@was_label = nil
|
638
403
|
return process_label text
|
639
404
|
elsif text =~ /:\Z/ then
|
640
|
-
|
405
|
+
self.pos -= 1 # put back ":"
|
641
406
|
text = text[0..-2]
|
642
407
|
end
|
643
408
|
|
644
|
-
|
409
|
+
orig_line = lineno
|
410
|
+
str = text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
|
411
|
+
self.lineno += str.count("\n")
|
412
|
+
|
413
|
+
result EXPR_END, :tSTRING, str, orig_line
|
645
414
|
end
|
646
415
|
|
647
416
|
def process_lchevron text
|
@@ -659,34 +428,25 @@ class RubyLexer
|
|
659
428
|
self.lex_state = EXPR_BEG
|
660
429
|
end
|
661
430
|
|
662
|
-
|
431
|
+
result lex_state, :tLSHFT, "\<\<"
|
663
432
|
end
|
664
433
|
|
665
|
-
def process_newline_or_comment text
|
434
|
+
def process_newline_or_comment text # ../compare/parse30.y:9126 ish
|
666
435
|
c = matched
|
667
|
-
hit = false
|
668
436
|
|
669
437
|
if c == "#" then
|
670
|
-
|
438
|
+
self.pos -= 1
|
671
439
|
|
672
|
-
# TODO: handle magic comments
|
673
440
|
while scan(/\s*\#.*(\n+|\z)/) do
|
674
|
-
|
675
|
-
self.lineno += matched.lines.to_a.size
|
441
|
+
self.lineno += matched.count "\n"
|
676
442
|
@comments << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
|
677
443
|
end
|
678
444
|
|
679
445
|
return nil if end_of_stream?
|
680
446
|
end
|
681
447
|
|
682
|
-
self.lineno += 1 unless hit
|
683
|
-
|
684
|
-
# Replace a string of newlines with a single one
|
685
|
-
self.lineno += matched.lines.to_a.size if scan(/\n+/)
|
686
|
-
|
687
448
|
c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
|
688
449
|
lex_state !~ EXPR_LABELED)
|
689
|
-
# TODO: figure out what token_seen is for
|
690
450
|
if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
|
691
451
|
# ignore if !fallthrough?
|
692
452
|
if !c && parser.in_kwarg then
|
@@ -694,25 +454,29 @@ class RubyLexer
|
|
694
454
|
self.command_start = true
|
695
455
|
return result EXPR_BEG, :tNL, nil
|
696
456
|
else
|
697
|
-
|
457
|
+
maybe_pop_stack
|
458
|
+
return # goto retry
|
698
459
|
end
|
699
460
|
end
|
700
461
|
|
701
|
-
if scan(/
|
702
|
-
self.space_seen = true
|
462
|
+
if scan(/[\ \t\r\f\v]+/) then
|
463
|
+
self.space_seen = true
|
464
|
+
end
|
703
465
|
|
704
|
-
|
705
|
-
return
|
466
|
+
if check(/#/) then
|
467
|
+
return # goto retry
|
468
|
+
elsif check(/&\.|\.(?!\.)/) then # C version is a hellish obfuscated xnor
|
469
|
+
return # goto retry
|
706
470
|
end
|
707
471
|
|
708
472
|
self.command_start = true
|
709
473
|
|
710
|
-
|
474
|
+
result EXPR_BEG, :tNL, nil
|
711
475
|
end
|
712
476
|
|
713
477
|
def process_nthref text
|
714
478
|
# TODO: can't do lineno hack w/ number
|
715
|
-
result EXPR_END, :tNTH_REF,
|
479
|
+
result EXPR_END, :tNTH_REF, match[1].to_i
|
716
480
|
end
|
717
481
|
|
718
482
|
def process_paren text
|
@@ -740,13 +504,16 @@ class RubyLexer
|
|
740
504
|
end
|
741
505
|
|
742
506
|
def process_percent text
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
507
|
+
case
|
508
|
+
when is_beg? then
|
509
|
+
process_percent_quote
|
510
|
+
when scan(/\=/)
|
511
|
+
result EXPR_BEG, :tOP_ASGN, "%"
|
512
|
+
when is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
|
513
|
+
process_percent_quote
|
514
|
+
else
|
515
|
+
result :arg_state, :tPERCENT, "%"
|
516
|
+
end
|
750
517
|
end
|
751
518
|
|
752
519
|
def process_plus_minus text
|
@@ -820,20 +587,21 @@ class RubyLexer
|
|
820
587
|
end
|
821
588
|
|
822
589
|
def process_simple_string text
|
823
|
-
|
824
|
-
|
825
|
-
}
|
590
|
+
orig_line = lineno
|
591
|
+
self.lineno += text.count("\n")
|
826
592
|
|
827
|
-
|
593
|
+
str = text[1..-2]
|
594
|
+
.gsub(ESC) { unescape($1).b.force_encoding Encoding::UTF_8 }
|
595
|
+
str = str.b unless str.valid_encoding?
|
828
596
|
|
829
|
-
result EXPR_END, :tSTRING,
|
597
|
+
result EXPR_END, :tSTRING, str, orig_line
|
830
598
|
end
|
831
599
|
|
832
600
|
def process_slash text
|
833
601
|
if is_beg? then
|
834
|
-
string STR_REGEXP
|
602
|
+
string STR_REGEXP, matched
|
835
603
|
|
836
|
-
return result
|
604
|
+
return result nil, :tREGEXP_BEG, "/"
|
837
605
|
end
|
838
606
|
|
839
607
|
if scan(/\=/) then
|
@@ -848,7 +616,7 @@ class RubyLexer
|
|
848
616
|
end
|
849
617
|
end
|
850
618
|
|
851
|
-
|
619
|
+
result :arg_state, :tDIVIDE, "/"
|
852
620
|
end
|
853
621
|
|
854
622
|
def process_square_bracket text
|
@@ -880,34 +648,6 @@ class RubyLexer
|
|
880
648
|
result EXPR_PAR, token, text
|
881
649
|
end
|
882
650
|
|
883
|
-
def process_string # TODO: rewrite / remove
|
884
|
-
# matches top of parser_yylex in compare/parse23.y:8113
|
885
|
-
token = if lex_strterm[0] == :heredoc then
|
886
|
-
self.heredoc lex_strterm
|
887
|
-
else
|
888
|
-
self.parse_string lex_strterm
|
889
|
-
end
|
890
|
-
|
891
|
-
token_type, c = token
|
892
|
-
|
893
|
-
# matches parser_string_term from 2.3, but way off from 2.5
|
894
|
-
if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
|
895
|
-
if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
|
896
|
-
!cond.is_in_state) || is_arg?) &&
|
897
|
-
is_label_suffix? then
|
898
|
-
scan(/:/)
|
899
|
-
token_type = token[0] = :tLABEL_END
|
900
|
-
end
|
901
|
-
end
|
902
|
-
|
903
|
-
if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
|
904
|
-
self.lex_strterm = nil
|
905
|
-
self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_LIT
|
906
|
-
end
|
907
|
-
|
908
|
-
return token
|
909
|
-
end
|
910
|
-
|
911
651
|
def process_symbol text
|
912
652
|
symbol = possibly_escape_string text, /^:\"/ # stupid emacs
|
913
653
|
|
@@ -916,7 +656,6 @@ class RubyLexer
|
|
916
656
|
|
917
657
|
def process_token text
|
918
658
|
# matching: parse_ident in compare/parse23.y:7989
|
919
|
-
# TODO: make this always return [token, lineno]
|
920
659
|
# FIX: remove: self.last_state = lex_state
|
921
660
|
|
922
661
|
token = self.token = text
|
@@ -939,8 +678,7 @@ class RubyLexer
|
|
939
678
|
|
940
679
|
if is_label_possible? and is_label_suffix? then
|
941
680
|
scan(/:/)
|
942
|
-
|
943
|
-
return result EXPR_LAB, :tLABEL, [token, self.lineno]
|
681
|
+
return result EXPR_LAB, :tLABEL, token
|
944
682
|
end
|
945
683
|
|
946
684
|
# TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
|
@@ -951,14 +689,15 @@ class RubyLexer
|
|
951
689
|
return process_token_keyword keyword if keyword
|
952
690
|
end
|
953
691
|
|
954
|
-
# matching: compare/
|
955
|
-
state = if
|
692
|
+
# matching: compare/parse30.y:9039
|
693
|
+
state = if lex_state =~ EXPR_BEG_ANY|EXPR_ARG_ANY|EXPR_DOT then
|
956
694
|
cmd_state ? EXPR_CMDARG : EXPR_ARG
|
957
695
|
elsif lex_state =~ EXPR_FNAME then
|
958
696
|
EXPR_ENDFN
|
959
697
|
else
|
960
698
|
EXPR_END
|
961
699
|
end
|
700
|
+
self.lex_state = state
|
962
701
|
|
963
702
|
tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
|
964
703
|
|
@@ -968,20 +707,16 @@ class RubyLexer
|
|
968
707
|
state = EXPR_END|EXPR_LABEL
|
969
708
|
end
|
970
709
|
|
971
|
-
|
972
|
-
|
973
|
-
return result(state, tok_id, token)
|
710
|
+
result state, tok_id, token
|
974
711
|
end
|
975
712
|
|
976
713
|
def process_token_keyword keyword
|
977
714
|
# matching MIDDLE of parse_ident in compare/parse23.y:8046
|
978
715
|
state = lex_state
|
979
|
-
self.lex_state = keyword.state
|
980
|
-
|
981
|
-
value = [token, self.lineno]
|
982
716
|
|
983
|
-
return result(
|
717
|
+
return result(EXPR_ENDFN, keyword.id0, token) if lex_state =~ EXPR_FNAME
|
984
718
|
|
719
|
+
self.lex_state = keyword.state
|
985
720
|
self.command_start = true if lex_state =~ EXPR_BEG
|
986
721
|
|
987
722
|
case
|
@@ -990,27 +725,28 @@ class RubyLexer
|
|
990
725
|
when lambda_beginning? then
|
991
726
|
self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
|
992
727
|
self.paren_nest -= 1 # TODO: question this?
|
993
|
-
result lex_state, :kDO_LAMBDA,
|
728
|
+
result lex_state, :kDO_LAMBDA, token
|
994
729
|
when cond.is_in_state then
|
995
|
-
result lex_state, :kDO_COND,
|
730
|
+
result lex_state, :kDO_COND, token
|
996
731
|
when cmdarg.is_in_state && state != EXPR_CMDARG then
|
997
|
-
result lex_state, :kDO_BLOCK,
|
732
|
+
result lex_state, :kDO_BLOCK, token
|
998
733
|
else
|
999
|
-
result lex_state, :kDO,
|
734
|
+
result lex_state, :kDO, token
|
1000
735
|
end
|
1001
736
|
when state =~ EXPR_PAD then
|
1002
|
-
result lex_state, keyword.id0,
|
737
|
+
result lex_state, keyword.id0, token
|
1003
738
|
when keyword.id0 != keyword.id1 then
|
1004
|
-
result EXPR_PAR, keyword.id1,
|
739
|
+
result EXPR_PAR, keyword.id1, token
|
1005
740
|
else
|
1006
|
-
result lex_state, keyword.id1,
|
741
|
+
result lex_state, keyword.id1, token
|
1007
742
|
end
|
1008
743
|
end
|
1009
744
|
|
1010
745
|
def process_underscore text
|
1011
|
-
|
746
|
+
self.unscan # put back "_"
|
1012
747
|
|
1013
748
|
if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
|
749
|
+
ss.terminate
|
1014
750
|
[RubyLexer::EOF, RubyLexer::EOF]
|
1015
751
|
elsif scan(/#{IDENT_CHAR}+/) then
|
1016
752
|
process_token matched
|
@@ -1018,121 +754,35 @@ class RubyLexer
|
|
1018
754
|
end
|
1019
755
|
|
1020
756
|
def rb_compile_error msg
|
1021
|
-
msg += ". near line #{self.lineno}: #{
|
757
|
+
msg += ". near line #{self.lineno}: #{self.rest[/^.*/].inspect}"
|
1022
758
|
raise RubyParser::SyntaxError, msg
|
1023
759
|
end
|
1024
760
|
|
1025
|
-
def read_escape # TODO: remove / rewrite
|
1026
|
-
case
|
1027
|
-
when scan(/\\/) then # Backslash
|
1028
|
-
'\\'
|
1029
|
-
when scan(/n/) then # newline
|
1030
|
-
self.extra_lineno -= 1
|
1031
|
-
"\n"
|
1032
|
-
when scan(/t/) then # horizontal tab
|
1033
|
-
"\t"
|
1034
|
-
when scan(/r/) then # carriage-return
|
1035
|
-
"\r"
|
1036
|
-
when scan(/f/) then # form-feed
|
1037
|
-
"\f"
|
1038
|
-
when scan(/v/) then # vertical tab
|
1039
|
-
"\13"
|
1040
|
-
when scan(/a/) then # alarm(bell)
|
1041
|
-
"\007"
|
1042
|
-
when scan(/e/) then # escape
|
1043
|
-
"\033"
|
1044
|
-
when scan(/b/) then # backspace
|
1045
|
-
"\010"
|
1046
|
-
when scan(/s/) then # space
|
1047
|
-
" "
|
1048
|
-
when scan(/[0-7]{1,3}/) then # octal constant
|
1049
|
-
(matched.to_i(8) & 0xFF).chr.force_encoding Encoding::UTF_8
|
1050
|
-
when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
|
1051
|
-
# TODO: force encode everything to UTF-8?
|
1052
|
-
ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
|
1053
|
-
when check(/M-\\./) then
|
1054
|
-
scan(/M-\\/) # eat it
|
1055
|
-
c = self.read_escape
|
1056
|
-
c[0] = (c[0].ord | 0x80).chr
|
1057
|
-
c
|
1058
|
-
when scan(/M-(.)/) then
|
1059
|
-
c = ss[1]
|
1060
|
-
c[0] = (c[0].ord | 0x80).chr
|
1061
|
-
c
|
1062
|
-
when check(/(C-|c)\\[\\MCc]/) then
|
1063
|
-
scan(/(C-|c)\\/) # eat it
|
1064
|
-
c = self.read_escape
|
1065
|
-
c[0] = (c[0].ord & 0x9f).chr
|
1066
|
-
c
|
1067
|
-
when check(/(C-|c)\\(?!u|\\)/) then
|
1068
|
-
scan(/(C-|c)\\/) # eat it
|
1069
|
-
c = read_escape
|
1070
|
-
c[0] = (c[0].ord & 0x9f).chr
|
1071
|
-
c
|
1072
|
-
when scan(/C-\?|c\?/) then
|
1073
|
-
127.chr
|
1074
|
-
when scan(/(C-|c)(.)/) then
|
1075
|
-
c = ss[2]
|
1076
|
-
c[0] = (c[0].ord & 0x9f).chr
|
1077
|
-
c
|
1078
|
-
when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
|
1079
|
-
matched
|
1080
|
-
when scan(/u(\h{4})/) then
|
1081
|
-
[ss[1].to_i(16)].pack("U")
|
1082
|
-
when scan(/u(\h{1,3})/) then
|
1083
|
-
rb_compile_error "Invalid escape character syntax"
|
1084
|
-
when scan(/u\{(\h+(?:\s+\h+)*)\}/) then
|
1085
|
-
ss[1].split.map { |s| s.to_i(16) }.pack("U*")
|
1086
|
-
when scan(/[McCx0-9]/) || end_of_stream? then
|
1087
|
-
rb_compile_error("Invalid escape character syntax")
|
1088
|
-
else
|
1089
|
-
getch
|
1090
|
-
end.dup
|
1091
|
-
end
|
1092
|
-
|
1093
|
-
def getch
|
1094
|
-
c = ss.getch
|
1095
|
-
c = ss.getch if c == "\r" && ss.peek(1) == "\n"
|
1096
|
-
c
|
1097
|
-
end
|
1098
|
-
|
1099
|
-
def regx_options # TODO: rewrite / remove
|
1100
|
-
good, bad = [], []
|
1101
|
-
|
1102
|
-
if scan(/[a-z]+/) then
|
1103
|
-
good, bad = matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
|
1104
|
-
end
|
1105
|
-
|
1106
|
-
unless bad.empty? then
|
1107
|
-
rb_compile_error("unknown regexp option%s - %s" %
|
1108
|
-
[(bad.size > 1 ? "s" : ""), bad.join.inspect])
|
1109
|
-
end
|
1110
|
-
|
1111
|
-
return good.join
|
1112
|
-
end
|
1113
|
-
|
1114
761
|
def reset
|
762
|
+
self.lineno = 1
|
1115
763
|
self.brace_nest = 0
|
1116
764
|
self.command_start = true
|
1117
765
|
self.comments = []
|
1118
766
|
self.lex_state = EXPR_NONE
|
1119
767
|
self.lex_strterm = nil
|
1120
|
-
self.lineno = 1
|
1121
768
|
self.lpar_beg = nil
|
1122
769
|
self.paren_nest = 0
|
1123
770
|
self.space_seen = false
|
1124
771
|
self.string_nest = 0
|
1125
772
|
self.token = nil
|
1126
|
-
self.
|
773
|
+
self.string_buffer = []
|
774
|
+
self.old_ss = nil
|
775
|
+
self.old_lineno = nil
|
1127
776
|
|
1128
777
|
self.cond.reset
|
1129
778
|
self.cmdarg.reset
|
1130
779
|
end
|
1131
780
|
|
1132
|
-
def result new_state, token, text # :nodoc:
|
781
|
+
def result new_state, token, text, line = self.lineno # :nodoc:
|
1133
782
|
new_state = self.arg_state if new_state == :arg_state
|
1134
783
|
self.lex_state = new_state if new_state
|
1135
|
-
|
784
|
+
|
785
|
+
[token, [text, line]]
|
1136
786
|
end
|
1137
787
|
|
1138
788
|
def ruby22_label?
|
@@ -1155,14 +805,6 @@ class RubyLexer
|
|
1155
805
|
parser.class.version >= 27
|
1156
806
|
end
|
1157
807
|
|
1158
|
-
def scan re
|
1159
|
-
ss.scan re
|
1160
|
-
end
|
1161
|
-
|
1162
|
-
def scanner_class # TODO: design this out of oedipus_lex. or something.
|
1163
|
-
RPStringScanner
|
1164
|
-
end
|
1165
|
-
|
1166
808
|
def space_vs_beginning space_type, beg_type, fallback
|
1167
809
|
if is_space_arg? check(/./m) then
|
1168
810
|
warning "`**' interpreted as argument prefix"
|
@@ -1175,139 +817,9 @@ class RubyLexer
|
|
1175
817
|
end
|
1176
818
|
end
|
1177
819
|
|
1178
|
-
def string type, beg = matched, nnd = "\0"
|
1179
|
-
self.lex_strterm = [:strterm, type, beg, nnd]
|
1180
|
-
end
|
1181
|
-
|
1182
|
-
def tokadd_escape term # TODO: rewrite / remove
|
1183
|
-
case
|
1184
|
-
when scan(/\\\n/) then
|
1185
|
-
# just ignore
|
1186
|
-
when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
|
1187
|
-
self.string_buffer << matched
|
1188
|
-
when scan(/\\([MC]-|c)(?=\\)/) then
|
1189
|
-
self.string_buffer << matched
|
1190
|
-
self.tokadd_escape term
|
1191
|
-
when scan(/\\([MC]-|c)(.)/) then
|
1192
|
-
self.string_buffer << matched
|
1193
|
-
when scan(/\\[McCx]/) then
|
1194
|
-
rb_compile_error "Invalid escape character syntax"
|
1195
|
-
when scan(/\\(.)/m) then
|
1196
|
-
chr = ss[1]
|
1197
|
-
prev = self.string_buffer.last
|
1198
|
-
if term == chr && prev && prev.end_with?("(?") then
|
1199
|
-
self.string_buffer << chr
|
1200
|
-
elsif term == chr || chr.ascii_only? then
|
1201
|
-
self.string_buffer << matched # dunno why we keep them for ascii
|
1202
|
-
else
|
1203
|
-
self.string_buffer << chr # HACK? this is such a rat's nest
|
1204
|
-
end
|
1205
|
-
else
|
1206
|
-
rb_compile_error "Invalid escape character syntax"
|
1207
|
-
end
|
1208
|
-
end
|
1209
|
-
|
1210
|
-
def tokadd_string(func, term, paren) # TODO: rewrite / remove
|
1211
|
-
qwords = func =~ STR_FUNC_QWORDS
|
1212
|
-
escape = func =~ STR_FUNC_ESCAPE
|
1213
|
-
expand = func =~ STR_FUNC_EXPAND
|
1214
|
-
regexp = func =~ STR_FUNC_REGEXP
|
1215
|
-
symbol = func =~ STR_FUNC_SYMBOL
|
1216
|
-
|
1217
|
-
paren_re = @@regexp_cache[paren]
|
1218
|
-
term_re = if term == "\n"
|
1219
|
-
/#{Regexp.escape "\r"}?#{Regexp.escape "\n"}/
|
1220
|
-
else
|
1221
|
-
@@regexp_cache[term]
|
1222
|
-
end
|
1223
|
-
|
1224
|
-
until end_of_stream? do
|
1225
|
-
c = nil
|
1226
|
-
handled = true
|
1227
|
-
|
1228
|
-
case
|
1229
|
-
when scan(term_re) then
|
1230
|
-
if self.string_nest == 0 then
|
1231
|
-
ss.pos -= 1
|
1232
|
-
break
|
1233
|
-
else
|
1234
|
-
self.string_nest -= 1
|
1235
|
-
end
|
1236
|
-
when paren_re && scan(paren_re) then
|
1237
|
-
self.string_nest += 1
|
1238
|
-
when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
|
1239
|
-
ss.pos -= 1
|
1240
|
-
break
|
1241
|
-
when qwords && scan(/\s/) then
|
1242
|
-
ss.pos -= 1
|
1243
|
-
break
|
1244
|
-
when expand && scan(/#(?!\n)/) then
|
1245
|
-
# do nothing
|
1246
|
-
when check(/\\/) then
|
1247
|
-
case
|
1248
|
-
when qwords && scan(/\\\n/) then
|
1249
|
-
string_buffer << "\n"
|
1250
|
-
next
|
1251
|
-
when qwords && scan(/\\\s/) then
|
1252
|
-
c = " "
|
1253
|
-
when expand && scan(/\\\n/) then
|
1254
|
-
next
|
1255
|
-
when regexp && check(/\\/) then
|
1256
|
-
self.tokadd_escape term
|
1257
|
-
next
|
1258
|
-
when expand && scan(/\\/) then
|
1259
|
-
c = self.read_escape
|
1260
|
-
when scan(/\\\n/) then
|
1261
|
-
# do nothing
|
1262
|
-
when scan(/\\\\/) then
|
1263
|
-
string_buffer << '\\' if escape
|
1264
|
-
c = '\\'
|
1265
|
-
when scan(/\\/) then
|
1266
|
-
unless scan(term_re) || paren.nil? || scan(paren_re) then
|
1267
|
-
string_buffer << "\\"
|
1268
|
-
end
|
1269
|
-
else
|
1270
|
-
handled = false
|
1271
|
-
end # inner /\\/ case
|
1272
|
-
else
|
1273
|
-
handled = false
|
1274
|
-
end # top case
|
1275
|
-
|
1276
|
-
unless handled then
|
1277
|
-
t = if term == "\n"
|
1278
|
-
Regexp.escape "\r\n"
|
1279
|
-
else
|
1280
|
-
Regexp.escape term
|
1281
|
-
end
|
1282
|
-
x = Regexp.escape paren if paren && paren != "\000"
|
1283
|
-
re = if qwords then
|
1284
|
-
/[^#{t}#{x}\#\\\s]+|./ # |. to pick up whatever
|
1285
|
-
else
|
1286
|
-
/[^#{t}#{x}\#\\]+|./
|
1287
|
-
end
|
1288
|
-
|
1289
|
-
scan re
|
1290
|
-
c = matched
|
1291
|
-
|
1292
|
-
rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
|
1293
|
-
end # unless handled
|
1294
|
-
|
1295
|
-
c ||= matched
|
1296
|
-
string_buffer << c
|
1297
|
-
end # until
|
1298
|
-
|
1299
|
-
c ||= matched
|
1300
|
-
c = RubyLexer::EOF if end_of_stream?
|
1301
|
-
|
1302
|
-
return c
|
1303
|
-
end
|
1304
|
-
|
1305
820
|
def unescape s
|
1306
821
|
r = ESCAPES[s]
|
1307
822
|
|
1308
|
-
self.extra_lineno += 1 if s == "\n" # eg backslash newline strings
|
1309
|
-
self.extra_lineno -= 1 if r && s == "n" # literal \n, not newline
|
1310
|
-
|
1311
823
|
return r if r
|
1312
824
|
|
1313
825
|
x = case s
|
@@ -1328,7 +840,7 @@ class RubyLexer
|
|
1328
840
|
when /u(\h{1,3})/ then
|
1329
841
|
rb_compile_error("Invalid escape character syntax")
|
1330
842
|
when /u\{(\h+(?:\s+\h+)*)\}/ then
|
1331
|
-
$1.split.map { |
|
843
|
+
$1.split.map { |cp| cp.to_i(16) }.pack("U*")
|
1332
844
|
else
|
1333
845
|
s
|
1334
846
|
end
|
@@ -1441,7 +953,7 @@ class RubyLexer
|
|
1441
953
|
STR_FUNC_LABEL = State.new 0x40, str_func_names
|
1442
954
|
STR_FUNC_LIST = State.new 0x4000, str_func_names
|
1443
955
|
STR_FUNC_TERM = State.new 0x8000, str_func_names
|
1444
|
-
|
956
|
+
STR_FUNC_DEDENT = State.new 0x10000, str_func_names # <<~HEREDOC
|
1445
957
|
|
1446
958
|
# TODO: check parser25.y on how they do STR_FUNC_INDENT
|
1447
959
|
|
@@ -1453,6 +965,7 @@ class RubyLexer
|
|
1453
965
|
STR_DWORD = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
|
1454
966
|
STR_SSYM = STR_FUNC_SYMBOL
|
1455
967
|
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
968
|
+
STR_LABEL = STR_FUNC_LABEL
|
1456
969
|
|
1457
970
|
str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
|
1458
971
|
STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
|
@@ -1463,7 +976,7 @@ class RubyLexer
|
|
1463
976
|
STR_FUNC_LABEL => "STR_FUNC_LABEL",
|
1464
977
|
STR_FUNC_LIST => "STR_FUNC_LIST",
|
1465
978
|
STR_FUNC_TERM => "STR_FUNC_TERM",
|
1466
|
-
|
979
|
+
STR_FUNC_DEDENT => "STR_FUNC_DEDENT",
|
1467
980
|
STR_SQUOTE => "STR_SQUOTE")
|
1468
981
|
end
|
1469
982
|
|
@@ -1473,7 +986,145 @@ class RubyLexer
|
|
1473
986
|
include State::Values
|
1474
987
|
end
|
1475
988
|
|
1476
|
-
|
989
|
+
class RubyLexer
|
990
|
+
module SSWrapper
|
991
|
+
def string= s
|
992
|
+
ss.string= s
|
993
|
+
end
|
994
|
+
|
995
|
+
def beginning_of_line?
|
996
|
+
ss.bol?
|
997
|
+
end
|
998
|
+
|
999
|
+
alias bol? beginning_of_line? # to make .rex file more readable
|
1000
|
+
|
1001
|
+
def check re
|
1002
|
+
maybe_pop_stack
|
1003
|
+
|
1004
|
+
ss.check re
|
1005
|
+
end
|
1006
|
+
|
1007
|
+
def end_of_stream?
|
1008
|
+
ss.eos?
|
1009
|
+
end
|
1010
|
+
|
1011
|
+
alias eos? end_of_stream?
|
1012
|
+
|
1013
|
+
def getch
|
1014
|
+
c = ss.getch
|
1015
|
+
c = ss.getch if c == "\r" && ss.peek(1) == "\n"
|
1016
|
+
c
|
1017
|
+
end
|
1018
|
+
|
1019
|
+
def match
|
1020
|
+
ss
|
1021
|
+
end
|
1022
|
+
|
1023
|
+
def matched
|
1024
|
+
ss.matched
|
1025
|
+
end
|
1026
|
+
|
1027
|
+
def in_heredoc?
|
1028
|
+
!!self.old_ss
|
1029
|
+
end
|
1030
|
+
|
1031
|
+
def maybe_pop_stack
|
1032
|
+
if ss.eos? && in_heredoc? then
|
1033
|
+
self.ss_pop
|
1034
|
+
self.lineno_pop
|
1035
|
+
end
|
1036
|
+
end
|
1037
|
+
|
1038
|
+
def pos
|
1039
|
+
ss.pos
|
1040
|
+
end
|
1041
|
+
|
1042
|
+
def pos= n
|
1043
|
+
ss.pos = n
|
1044
|
+
end
|
1045
|
+
|
1046
|
+
def rest
|
1047
|
+
ss.rest
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
def scan re
|
1051
|
+
maybe_pop_stack
|
1052
|
+
|
1053
|
+
ss.scan re
|
1054
|
+
end
|
1055
|
+
|
1056
|
+
def scanner_class # TODO: design this out of oedipus_lex. or something.
|
1057
|
+
RPStringScanner
|
1058
|
+
end
|
1059
|
+
|
1060
|
+
def ss_string
|
1061
|
+
ss.string
|
1062
|
+
end
|
1063
|
+
|
1064
|
+
def ss_string= s
|
1065
|
+
raise "Probably not"
|
1066
|
+
ss.string = s
|
1067
|
+
end
|
1068
|
+
|
1069
|
+
def unscan
|
1070
|
+
ss.unscan
|
1071
|
+
end
|
1072
|
+
end
|
1073
|
+
|
1074
|
+
include SSWrapper
|
1075
|
+
end
|
1076
|
+
|
1077
|
+
class RubyLexer
|
1078
|
+
module SSStackish
|
1079
|
+
def lineno_push new_lineno
|
1080
|
+
self.old_lineno = self.lineno
|
1081
|
+
self.lineno = new_lineno
|
1082
|
+
end
|
1083
|
+
|
1084
|
+
def lineno_pop
|
1085
|
+
self.lineno = self.old_lineno
|
1086
|
+
self.old_lineno = nil
|
1087
|
+
end
|
1088
|
+
|
1089
|
+
def ss= o
|
1090
|
+
raise "Clearing ss while in heredoc!?!" if in_heredoc?
|
1091
|
+
@old_ss = nil
|
1092
|
+
super
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
def ss_push new_ss
|
1096
|
+
@old_ss = self.ss
|
1097
|
+
@ss = new_ss
|
1098
|
+
end
|
1099
|
+
|
1100
|
+
def ss_pop
|
1101
|
+
@ss = self.old_ss
|
1102
|
+
@old_ss = nil
|
1103
|
+
end
|
1104
|
+
end
|
1105
|
+
|
1106
|
+
prepend SSStackish
|
1107
|
+
end
|
1108
|
+
|
1109
|
+
if ENV["RP_STRTERM_DEBUG"] then
|
1110
|
+
class RubyLexer
|
1111
|
+
def d o
|
1112
|
+
$stderr.puts o.inspect
|
1113
|
+
end
|
1114
|
+
|
1115
|
+
alias old_lex_strterm= lex_strterm=
|
1116
|
+
|
1117
|
+
def lex_strterm= o
|
1118
|
+
self.old_lex_strterm= o
|
1119
|
+
where = caller.first.split(/:/).first(2).join(":")
|
1120
|
+
$stderr.puts
|
1121
|
+
d :lex_strterm => [o, where]
|
1122
|
+
end
|
1123
|
+
end
|
1124
|
+
end
|
1125
|
+
|
1126
|
+
require_relative "./ruby_lexer.rex.rb"
|
1127
|
+
require_relative "./ruby_lexer_strings.rb"
|
1477
1128
|
|
1478
1129
|
if ENV["RP_LINENO_DEBUG"] then
|
1479
1130
|
class RubyLexer
|
@@ -1486,7 +1137,8 @@ if ENV["RP_LINENO_DEBUG"] then
|
|
1486
1137
|
def lineno= n
|
1487
1138
|
self.old_lineno= n
|
1488
1139
|
where = caller.first.split(/:/).first(2).join(":")
|
1489
|
-
|
1140
|
+
$stderr.puts
|
1141
|
+
d :lineno => [n, where]
|
1490
1142
|
end
|
1491
1143
|
end
|
1492
1144
|
end
|