ruby_parser 3.15.0 → 3.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/History.rdoc +101 -0
- data/Manifest.txt +5 -0
- data/README.rdoc +1 -0
- data/Rakefile +128 -30
- data/bin/ruby_parse_extract_error +1 -1
- data/compare/normalize.rb +8 -3
- data/debugging.md +133 -0
- data/gauntlet.md +106 -0
- data/lib/rp_extensions.rb +15 -36
- data/lib/rp_stringscanner.rb +20 -51
- data/lib/ruby20_parser.rb +3559 -3499
- data/lib/ruby20_parser.y +333 -248
- data/lib/ruby21_parser.rb +3650 -3614
- data/lib/ruby21_parser.y +328 -245
- data/lib/ruby22_parser.rb +3690 -3628
- data/lib/ruby22_parser.y +332 -247
- data/lib/ruby23_parser.rb +3629 -3573
- data/lib/ruby23_parser.y +332 -247
- data/lib/ruby24_parser.rb +3712 -3654
- data/lib/ruby24_parser.y +332 -247
- data/lib/ruby25_parser.rb +3712 -3654
- data/lib/ruby25_parser.y +332 -247
- data/lib/ruby26_parser.rb +3715 -3658
- data/lib/ruby26_parser.y +332 -246
- data/lib/ruby27_parser.rb +5009 -3722
- data/lib/ruby27_parser.y +928 -245
- data/lib/ruby30_parser.rb +8741 -0
- data/lib/ruby30_parser.y +3463 -0
- data/lib/ruby3_parser.yy +3467 -0
- data/lib/ruby_lexer.rb +273 -602
- data/lib/ruby_lexer.rex +28 -21
- data/lib/ruby_lexer.rex.rb +60 -24
- data/lib/ruby_lexer_strings.rb +638 -0
- data/lib/ruby_parser.rb +2 -0
- data/lib/ruby_parser.yy +969 -252
- data/lib/ruby_parser_extras.rb +297 -116
- data/test/test_ruby_lexer.rb +213 -129
- data/test/test_ruby_parser.rb +1288 -110
- data/tools/munge.rb +36 -8
- data/tools/ripper.rb +15 -10
- data.tar.gz.sig +0 -0
- metadata +48 -35
- metadata.gz.sig +1 -4
data/lib/ruby_lexer.rb
CHANGED
@@ -25,10 +25,15 @@ class RubyLexer
|
|
25
25
|
|
26
26
|
HAS_ENC = "".respond_to? :encoding
|
27
27
|
|
28
|
+
BTOKENS = {
|
29
|
+
".." => :tBDOT2,
|
30
|
+
"..." => :tBDOT3,
|
31
|
+
}
|
32
|
+
|
28
33
|
TOKENS = {
|
29
34
|
"!" => :tBANG,
|
30
35
|
"!=" => :tNEQ,
|
31
|
-
|
36
|
+
"!@" => :tBANG,
|
32
37
|
"!~" => :tNMATCH,
|
33
38
|
"," => :tCOMMA,
|
34
39
|
".." => :tDOT2,
|
@@ -41,17 +46,38 @@ class RubyLexer
|
|
41
46
|
"->" => :tLAMBDA,
|
42
47
|
}
|
43
48
|
|
49
|
+
PERCENT_END = {
|
50
|
+
"(" => ")",
|
51
|
+
"[" => "]",
|
52
|
+
"{" => "}",
|
53
|
+
"<" => ">",
|
54
|
+
}
|
55
|
+
|
56
|
+
SIMPLE_RE_META = /[\$\*\+\.\?\^\|\)\]\}\>]/
|
57
|
+
|
44
58
|
@@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
|
45
59
|
@@regexp_cache[nil] = nil
|
46
60
|
|
61
|
+
def regexp_cache
|
62
|
+
@@regexp_cache
|
63
|
+
end
|
64
|
+
|
47
65
|
if $DEBUG then
|
48
66
|
attr_reader :lex_state
|
49
67
|
|
50
68
|
def lex_state= o
|
51
69
|
return if @lex_state == o
|
52
|
-
raise ArgumentError, "bad state: %p" % [o] unless State === o
|
53
70
|
|
54
|
-
|
71
|
+
from = ""
|
72
|
+
if ENV["VERBOSE"]
|
73
|
+
path = caller[0]
|
74
|
+
path = caller[1] if path =~ /result/
|
75
|
+
path, line, *_ = path.split(/:/)
|
76
|
+
path.delete_prefix! File.dirname File.dirname __FILE__
|
77
|
+
from = " at .%s:%s" % [path, line]
|
78
|
+
end
|
79
|
+
|
80
|
+
warn "lex_state: %p -> %p%s" % [lex_state, o, from]
|
55
81
|
|
56
82
|
@lex_state = o
|
57
83
|
end
|
@@ -61,14 +87,16 @@ class RubyLexer
|
|
61
87
|
|
62
88
|
attr_accessor :lex_state unless $DEBUG
|
63
89
|
|
64
|
-
attr_accessor :lineno # we're bypassing oedipus' lineno handling.
|
65
90
|
attr_accessor :brace_nest
|
66
91
|
attr_accessor :cmdarg
|
67
92
|
attr_accessor :command_start
|
68
93
|
attr_accessor :cmd_state # temporary--ivar to avoid passing everywhere
|
69
94
|
attr_accessor :last_state
|
70
95
|
attr_accessor :cond
|
71
|
-
attr_accessor :
|
96
|
+
attr_accessor :old_ss
|
97
|
+
attr_accessor :old_lineno
|
98
|
+
|
99
|
+
# these are generated via ruby_lexer.rex: ss, lineno
|
72
100
|
|
73
101
|
##
|
74
102
|
# Additional context surrounding tokens that both the lexer and
|
@@ -93,6 +121,7 @@ class RubyLexer
|
|
93
121
|
|
94
122
|
self.cond = RubyParserStuff::StackState.new(:cond, $DEBUG)
|
95
123
|
self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
|
124
|
+
self.ss = RPStringScanner.new ""
|
96
125
|
|
97
126
|
reset
|
98
127
|
end
|
@@ -105,14 +134,8 @@ class RubyLexer
|
|
105
134
|
is_after_operator? ? EXPR_ARG : EXPR_BEG
|
106
135
|
end
|
107
136
|
|
108
|
-
def
|
109
|
-
|
110
|
-
end
|
111
|
-
|
112
|
-
alias bol? beginning_of_line? # to make .rex file more readable
|
113
|
-
|
114
|
-
def check re
|
115
|
-
ss.check re
|
137
|
+
def ignore_body_comments
|
138
|
+
@comments.clear
|
116
139
|
end
|
117
140
|
|
118
141
|
def comments # TODO: remove this... maybe comment_string + attr_accessor
|
@@ -121,14 +144,8 @@ class RubyLexer
|
|
121
144
|
c
|
122
145
|
end
|
123
146
|
|
124
|
-
def
|
125
|
-
|
126
|
-
self.extra_lineno += r.count("\n") if r
|
127
|
-
r
|
128
|
-
end
|
129
|
-
|
130
|
-
def end_of_stream?
|
131
|
-
ss.eos?
|
147
|
+
def debug n
|
148
|
+
raise "debug #{n}"
|
132
149
|
end
|
133
150
|
|
134
151
|
def expr_dot?
|
@@ -145,128 +162,6 @@ class RubyLexer
|
|
145
162
|
result EXPR_BEG, token, text
|
146
163
|
end
|
147
164
|
|
148
|
-
def fixup_lineno extra = 0
|
149
|
-
self.lineno += self.extra_lineno + extra
|
150
|
-
self.extra_lineno = 0
|
151
|
-
end
|
152
|
-
|
153
|
-
def heredoc here # TODO: rewrite / remove
|
154
|
-
_, eos, func, last_line = here
|
155
|
-
|
156
|
-
indent = func =~ STR_FUNC_INDENT ? "[ \t]*" : nil
|
157
|
-
expand = func =~ STR_FUNC_EXPAND
|
158
|
-
eol = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n"
|
159
|
-
eos_re = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
|
160
|
-
err_msg = "can't match #{eos_re.inspect} anywhere in "
|
161
|
-
|
162
|
-
rb_compile_error err_msg if end_of_stream?
|
163
|
-
|
164
|
-
if beginning_of_line? && scan(eos_re) then
|
165
|
-
self.lineno += 1
|
166
|
-
ss.unread_many last_line # TODO: figure out how to remove this
|
167
|
-
return :tSTRING_END, [eos, func] # TODO: calculate squiggle width at lex?
|
168
|
-
end
|
169
|
-
|
170
|
-
self.string_buffer = []
|
171
|
-
|
172
|
-
if expand then
|
173
|
-
case
|
174
|
-
when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
|
175
|
-
# TODO: !ISASCII
|
176
|
-
# ?! see parser_peek_variable_name
|
177
|
-
return :tSTRING_DVAR, matched
|
178
|
-
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
179
|
-
# TODO: !ISASCII
|
180
|
-
return :tSTRING_DVAR, matched
|
181
|
-
when scan(/#[{]/) then
|
182
|
-
self.command_start = true
|
183
|
-
return :tSTRING_DBEG, matched
|
184
|
-
when scan(/#/) then
|
185
|
-
string_buffer << "#"
|
186
|
-
end
|
187
|
-
|
188
|
-
begin
|
189
|
-
c = tokadd_string func, eol, nil
|
190
|
-
|
191
|
-
rb_compile_error err_msg if
|
192
|
-
c == RubyLexer::EOF
|
193
|
-
|
194
|
-
if c != eol then
|
195
|
-
return :tSTRING_CONTENT, string_buffer.join
|
196
|
-
else
|
197
|
-
string_buffer << scan(/\n/)
|
198
|
-
end
|
199
|
-
|
200
|
-
rb_compile_error err_msg if end_of_stream?
|
201
|
-
end until check(eos_re)
|
202
|
-
else
|
203
|
-
until check(eos_re) do
|
204
|
-
string_buffer << scan(/.*(\n|\z)/)
|
205
|
-
rb_compile_error err_msg if end_of_stream?
|
206
|
-
end
|
207
|
-
end
|
208
|
-
|
209
|
-
self.lex_strterm = [:heredoc, eos, func, last_line]
|
210
|
-
|
211
|
-
string_content = begin
|
212
|
-
s = string_buffer.join
|
213
|
-
s.b.force_encoding Encoding::UTF_8
|
214
|
-
end
|
215
|
-
|
216
|
-
return :tSTRING_CONTENT, string_content
|
217
|
-
end
|
218
|
-
|
219
|
-
def heredoc_identifier # TODO: remove / rewrite
|
220
|
-
term, func = nil, STR_FUNC_BORING
|
221
|
-
self.string_buffer = []
|
222
|
-
|
223
|
-
heredoc_indent_mods = "-"
|
224
|
-
heredoc_indent_mods += '\~' if ruby23plus?
|
225
|
-
|
226
|
-
case
|
227
|
-
when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
|
228
|
-
term = ss[2]
|
229
|
-
func |= STR_FUNC_INDENT unless ss[1].empty? # TODO: this seems wrong
|
230
|
-
func |= STR_FUNC_ICNTNT if ss[1] == "~"
|
231
|
-
func |= case term
|
232
|
-
when "\'" then
|
233
|
-
STR_SQUOTE
|
234
|
-
when '"' then
|
235
|
-
STR_DQUOTE
|
236
|
-
else
|
237
|
-
STR_XQUOTE
|
238
|
-
end
|
239
|
-
string_buffer << ss[3]
|
240
|
-
when scan(/[#{heredoc_indent_mods}]?([\'\"\`])(?!\1*\Z)/) then
|
241
|
-
rb_compile_error "unterminated here document identifier"
|
242
|
-
when scan(/([#{heredoc_indent_mods}]?)(#{IDENT_CHAR}+)/) then
|
243
|
-
term = '"'
|
244
|
-
func |= STR_DQUOTE
|
245
|
-
unless ss[1].empty? then
|
246
|
-
func |= STR_FUNC_INDENT
|
247
|
-
func |= STR_FUNC_ICNTNT if ss[1] == "~"
|
248
|
-
end
|
249
|
-
string_buffer << ss[2]
|
250
|
-
else
|
251
|
-
return nil
|
252
|
-
end
|
253
|
-
|
254
|
-
if scan(/.*\n/) then
|
255
|
-
# TODO: think about storing off the char range instead
|
256
|
-
line = matched
|
257
|
-
else
|
258
|
-
line = nil
|
259
|
-
end
|
260
|
-
|
261
|
-
self.lex_strterm = [:heredoc, string_buffer.join, func, line]
|
262
|
-
|
263
|
-
if term == "`" then
|
264
|
-
result nil, :tXSTRING_BEG, "`"
|
265
|
-
else
|
266
|
-
result nil, :tSTRING_BEG, "\""
|
267
|
-
end
|
268
|
-
end
|
269
|
-
|
270
165
|
def in_fname? # REFACTOR
|
271
166
|
lex_state =~ EXPR_FNAME
|
272
167
|
end
|
@@ -277,13 +172,13 @@ class RubyLexer
|
|
277
172
|
text = matched
|
278
173
|
case
|
279
174
|
when text.end_with?("ri")
|
280
|
-
|
175
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base)))
|
281
176
|
when text.end_with?("r")
|
282
|
-
|
177
|
+
result EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base))
|
283
178
|
when text.end_with?("i")
|
284
|
-
|
179
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base))
|
285
180
|
else
|
286
|
-
|
181
|
+
result EXPR_NUM, :tINTEGER, text.to_i(base)
|
287
182
|
end
|
288
183
|
end
|
289
184
|
|
@@ -329,132 +224,10 @@ class RubyLexer
|
|
329
224
|
self.parser.env[id.to_sym] == :lvar
|
330
225
|
end
|
331
226
|
|
332
|
-
def matched
|
333
|
-
ss.matched
|
334
|
-
end
|
335
|
-
|
336
227
|
def not_end?
|
337
228
|
not is_end?
|
338
229
|
end
|
339
230
|
|
340
|
-
def parse_quote # TODO: remove / rewrite
|
341
|
-
beg, nnd, short_hand, c = nil, nil, false, nil
|
342
|
-
|
343
|
-
if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
|
344
|
-
rb_compile_error "unknown type of %string" if ss.matched_size == 2
|
345
|
-
c, beg, short_hand = matched, getch, false
|
346
|
-
else # Short-hand (e.g. %{, %., %!, etc)
|
347
|
-
c, beg, short_hand = "Q", getch, true
|
348
|
-
end
|
349
|
-
|
350
|
-
if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
|
351
|
-
rb_compile_error "unterminated quoted string meets end of file"
|
352
|
-
end
|
353
|
-
|
354
|
-
# Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
|
355
|
-
nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
|
356
|
-
nnd, beg = beg, "\0" if nnd.nil?
|
357
|
-
|
358
|
-
token_type, text = nil, "%#{c}#{beg}"
|
359
|
-
token_type, string_type = case c
|
360
|
-
when "Q" then
|
361
|
-
ch = short_hand ? nnd : c + beg
|
362
|
-
text = "%#{ch}"
|
363
|
-
[:tSTRING_BEG, STR_DQUOTE]
|
364
|
-
when "q" then
|
365
|
-
[:tSTRING_BEG, STR_SQUOTE]
|
366
|
-
when "W" then
|
367
|
-
eat_whitespace
|
368
|
-
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
369
|
-
when "w" then
|
370
|
-
eat_whitespace
|
371
|
-
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
372
|
-
when "x" then
|
373
|
-
[:tXSTRING_BEG, STR_XQUOTE]
|
374
|
-
when "r" then
|
375
|
-
[:tREGEXP_BEG, STR_REGEXP]
|
376
|
-
when "s" then
|
377
|
-
self.lex_state = EXPR_FNAME
|
378
|
-
[:tSYMBEG, STR_SSYM]
|
379
|
-
when "I" then
|
380
|
-
eat_whitespace
|
381
|
-
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
382
|
-
when "i" then
|
383
|
-
eat_whitespace
|
384
|
-
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
385
|
-
end
|
386
|
-
|
387
|
-
rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
|
388
|
-
token_type.nil?
|
389
|
-
|
390
|
-
raise "huh" unless string_type
|
391
|
-
|
392
|
-
string string_type, nnd, beg
|
393
|
-
|
394
|
-
return token_type, text
|
395
|
-
end
|
396
|
-
|
397
|
-
def parse_string quote # TODO: rewrite / remove
|
398
|
-
_, string_type, term, open = quote
|
399
|
-
|
400
|
-
space = false # FIX: remove these
|
401
|
-
func = string_type
|
402
|
-
paren = open
|
403
|
-
term_re = @@regexp_cache[term]
|
404
|
-
|
405
|
-
qwords = func =~ STR_FUNC_QWORDS
|
406
|
-
regexp = func =~ STR_FUNC_REGEXP
|
407
|
-
expand = func =~ STR_FUNC_EXPAND
|
408
|
-
|
409
|
-
unless func then # nil'ed from qwords below. *sigh*
|
410
|
-
return :tSTRING_END, nil
|
411
|
-
end
|
412
|
-
|
413
|
-
space = true if qwords and eat_whitespace
|
414
|
-
|
415
|
-
if self.string_nest == 0 && scan(/#{term_re}/) then
|
416
|
-
if qwords then
|
417
|
-
quote[1] = nil
|
418
|
-
return :tSPACE, nil
|
419
|
-
elsif regexp then
|
420
|
-
return :tREGEXP_END, self.regx_options
|
421
|
-
else
|
422
|
-
return :tSTRING_END, term
|
423
|
-
end
|
424
|
-
end
|
425
|
-
|
426
|
-
return :tSPACE, nil if space
|
427
|
-
|
428
|
-
self.string_buffer = []
|
429
|
-
|
430
|
-
if expand
|
431
|
-
case
|
432
|
-
when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
|
433
|
-
# TODO: !ISASCII
|
434
|
-
# ?! see parser_peek_variable_name
|
435
|
-
return :tSTRING_DVAR, nil
|
436
|
-
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
437
|
-
# TODO: !ISASCII
|
438
|
-
return :tSTRING_DVAR, nil
|
439
|
-
when scan(/#[{]/) then
|
440
|
-
self.command_start = true
|
441
|
-
return :tSTRING_DBEG, nil
|
442
|
-
when scan(/#/) then
|
443
|
-
string_buffer << "#"
|
444
|
-
end
|
445
|
-
end
|
446
|
-
|
447
|
-
if tokadd_string(func, term, paren) == RubyLexer::EOF then
|
448
|
-
if func =~ STR_FUNC_REGEXP then
|
449
|
-
rb_compile_error "unterminated regexp meets end of file"
|
450
|
-
else
|
451
|
-
rb_compile_error "unterminated string meets end of file"
|
452
|
-
end
|
453
|
-
end
|
454
|
-
|
455
|
-
return :tSTRING_CONTENT, string_buffer.join
|
456
|
-
end
|
457
|
-
|
458
231
|
def possibly_escape_string text, check
|
459
232
|
content = match[1]
|
460
233
|
|
@@ -475,11 +248,11 @@ class RubyLexer
|
|
475
248
|
:tAMPER2
|
476
249
|
end
|
477
250
|
|
478
|
-
|
251
|
+
result :arg_state, token, "&"
|
479
252
|
end
|
480
253
|
|
481
254
|
def process_backref text
|
482
|
-
token =
|
255
|
+
token = match[1].to_sym
|
483
256
|
# TODO: can't do lineno hack w/ symbol
|
484
257
|
result EXPR_END, :tBACK_REF, token
|
485
258
|
end
|
@@ -493,7 +266,7 @@ class RubyLexer
|
|
493
266
|
end
|
494
267
|
|
495
268
|
@comments << matched
|
496
|
-
self.lineno += matched.count("\n")
|
269
|
+
self.lineno += matched.count("\n") # HACK?
|
497
270
|
|
498
271
|
nil # TODO
|
499
272
|
end
|
@@ -564,9 +337,9 @@ class RubyLexer
|
|
564
337
|
|
565
338
|
case
|
566
339
|
when scan(/\'/) then
|
567
|
-
string STR_SSYM
|
340
|
+
string STR_SSYM, matched
|
568
341
|
when scan(/\"/) then
|
569
|
-
string STR_DSYM
|
342
|
+
string STR_DSYM, matched
|
570
343
|
end
|
571
344
|
|
572
345
|
result EXPR_FNAME, :tSYMBEG, text
|
@@ -580,41 +353,48 @@ class RubyLexer
|
|
580
353
|
end
|
581
354
|
end
|
582
355
|
|
356
|
+
def process_dots text
|
357
|
+
tokens = ruby27plus? && is_beg? ? BTOKENS : TOKENS
|
358
|
+
|
359
|
+
result EXPR_BEG, tokens[text], text
|
360
|
+
end
|
361
|
+
|
583
362
|
def process_float text
|
584
363
|
rb_compile_error "Invalid numeric format" if text =~ /__/
|
585
364
|
|
586
365
|
case
|
587
366
|
when text.end_with?("ri")
|
588
|
-
|
367
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
|
589
368
|
when text.end_with?("i")
|
590
|
-
|
369
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
|
591
370
|
when text.end_with?("r")
|
592
|
-
|
371
|
+
result EXPR_NUM, :tRATIONAL, Rational(text.chop)
|
593
372
|
else
|
594
|
-
|
373
|
+
result EXPR_NUM, :tFLOAT, text.to_f
|
595
374
|
end
|
596
375
|
end
|
597
376
|
|
598
377
|
def process_gvar text
|
599
|
-
|
378
|
+
if parser.class.version > 20 && text == "$-" then
|
379
|
+
rb_compile_error "unexpected $undefined"
|
380
|
+
end
|
381
|
+
|
600
382
|
result EXPR_END, :tGVAR, text
|
601
383
|
end
|
602
384
|
|
603
385
|
def process_gvar_oddity text
|
604
|
-
return result EXPR_END, "$", "$" if text == "$" # TODO: wtf is this?
|
605
386
|
rb_compile_error "#{text.inspect} is not allowed as a global variable name"
|
606
387
|
end
|
607
388
|
|
608
389
|
def process_ivar text
|
609
390
|
tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
|
610
|
-
text.lineno = self.lineno
|
611
391
|
result EXPR_END, tok_id, text
|
612
392
|
end
|
613
393
|
|
614
394
|
def process_label text
|
615
395
|
symbol = possibly_escape_string text, /^\"/
|
616
396
|
|
617
|
-
result EXPR_LAB, :tLABEL,
|
397
|
+
result EXPR_LAB, :tLABEL, symbol
|
618
398
|
end
|
619
399
|
|
620
400
|
def process_label_or_string text
|
@@ -622,11 +402,15 @@ class RubyLexer
|
|
622
402
|
@was_label = nil
|
623
403
|
return process_label text
|
624
404
|
elsif text =~ /:\Z/ then
|
625
|
-
|
405
|
+
self.pos -= 1 # put back ":"
|
626
406
|
text = text[0..-2]
|
627
407
|
end
|
628
408
|
|
629
|
-
|
409
|
+
orig_line = lineno
|
410
|
+
str = text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
|
411
|
+
self.lineno += str.count("\n")
|
412
|
+
|
413
|
+
result EXPR_END, :tSTRING, str, orig_line
|
630
414
|
end
|
631
415
|
|
632
416
|
def process_lchevron text
|
@@ -644,34 +428,25 @@ class RubyLexer
|
|
644
428
|
self.lex_state = EXPR_BEG
|
645
429
|
end
|
646
430
|
|
647
|
-
|
431
|
+
result lex_state, :tLSHFT, "\<\<"
|
648
432
|
end
|
649
433
|
|
650
|
-
def process_newline_or_comment text
|
434
|
+
def process_newline_or_comment text # ../compare/parse30.y:9126 ish
|
651
435
|
c = matched
|
652
|
-
hit = false
|
653
436
|
|
654
437
|
if c == "#" then
|
655
|
-
|
438
|
+
self.pos -= 1
|
656
439
|
|
657
|
-
# TODO: handle magic comments
|
658
440
|
while scan(/\s*\#.*(\n+|\z)/) do
|
659
|
-
|
660
|
-
self.lineno += matched.lines.to_a.size
|
441
|
+
self.lineno += matched.count "\n"
|
661
442
|
@comments << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
|
662
443
|
end
|
663
444
|
|
664
445
|
return nil if end_of_stream?
|
665
446
|
end
|
666
447
|
|
667
|
-
self.lineno += 1 unless hit
|
668
|
-
|
669
|
-
# Replace a string of newlines with a single one
|
670
|
-
self.lineno += matched.lines.to_a.size if scan(/\n+/)
|
671
|
-
|
672
448
|
c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
|
673
449
|
lex_state !~ EXPR_LABELED)
|
674
|
-
# TODO: figure out what token_seen is for
|
675
450
|
if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
|
676
451
|
# ignore if !fallthrough?
|
677
452
|
if !c && parser.in_kwarg then
|
@@ -679,25 +454,29 @@ class RubyLexer
|
|
679
454
|
self.command_start = true
|
680
455
|
return result EXPR_BEG, :tNL, nil
|
681
456
|
else
|
682
|
-
|
457
|
+
maybe_pop_stack
|
458
|
+
return # goto retry
|
683
459
|
end
|
684
460
|
end
|
685
461
|
|
686
|
-
if scan(/
|
687
|
-
self.space_seen = true
|
462
|
+
if scan(/[\ \t\r\f\v]+/) then
|
463
|
+
self.space_seen = true
|
464
|
+
end
|
688
465
|
|
689
|
-
|
690
|
-
return
|
466
|
+
if check(/#/) then
|
467
|
+
return # goto retry
|
468
|
+
elsif check(/&\.|\.(?!\.)/) then # C version is a hellish obfuscated xnor
|
469
|
+
return # goto retry
|
691
470
|
end
|
692
471
|
|
693
472
|
self.command_start = true
|
694
473
|
|
695
|
-
|
474
|
+
result EXPR_BEG, :tNL, nil
|
696
475
|
end
|
697
476
|
|
698
477
|
def process_nthref text
|
699
478
|
# TODO: can't do lineno hack w/ number
|
700
|
-
result EXPR_END, :tNTH_REF,
|
479
|
+
result EXPR_END, :tNTH_REF, match[1].to_i
|
701
480
|
end
|
702
481
|
|
703
482
|
def process_paren text
|
@@ -725,13 +504,16 @@ class RubyLexer
|
|
725
504
|
end
|
726
505
|
|
727
506
|
def process_percent text
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
507
|
+
case
|
508
|
+
when is_beg? then
|
509
|
+
process_percent_quote
|
510
|
+
when scan(/\=/)
|
511
|
+
result EXPR_BEG, :tOP_ASGN, "%"
|
512
|
+
when is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
|
513
|
+
process_percent_quote
|
514
|
+
else
|
515
|
+
result :arg_state, :tPERCENT, "%"
|
516
|
+
end
|
735
517
|
end
|
736
518
|
|
737
519
|
def process_plus_minus text
|
@@ -805,20 +587,21 @@ class RubyLexer
|
|
805
587
|
end
|
806
588
|
|
807
589
|
def process_simple_string text
|
808
|
-
|
809
|
-
|
810
|
-
}
|
590
|
+
orig_line = lineno
|
591
|
+
self.lineno += text.count("\n")
|
811
592
|
|
812
|
-
|
593
|
+
str = text[1..-2]
|
594
|
+
.gsub(ESC) { unescape($1).b.force_encoding Encoding::UTF_8 }
|
595
|
+
str = str.b unless str.valid_encoding?
|
813
596
|
|
814
|
-
result EXPR_END, :tSTRING,
|
597
|
+
result EXPR_END, :tSTRING, str, orig_line
|
815
598
|
end
|
816
599
|
|
817
600
|
def process_slash text
|
818
601
|
if is_beg? then
|
819
|
-
string STR_REGEXP
|
602
|
+
string STR_REGEXP, matched
|
820
603
|
|
821
|
-
return result
|
604
|
+
return result nil, :tREGEXP_BEG, "/"
|
822
605
|
end
|
823
606
|
|
824
607
|
if scan(/\=/) then
|
@@ -833,7 +616,7 @@ class RubyLexer
|
|
833
616
|
end
|
834
617
|
end
|
835
618
|
|
836
|
-
|
619
|
+
result :arg_state, :tDIVIDE, "/"
|
837
620
|
end
|
838
621
|
|
839
622
|
def process_square_bracket text
|
@@ -865,34 +648,6 @@ class RubyLexer
|
|
865
648
|
result EXPR_PAR, token, text
|
866
649
|
end
|
867
650
|
|
868
|
-
def process_string # TODO: rewrite / remove
|
869
|
-
# matches top of parser_yylex in compare/parse23.y:8113
|
870
|
-
token = if lex_strterm[0] == :heredoc then
|
871
|
-
self.heredoc lex_strterm
|
872
|
-
else
|
873
|
-
self.parse_string lex_strterm
|
874
|
-
end
|
875
|
-
|
876
|
-
token_type, c = token
|
877
|
-
|
878
|
-
# matches parser_string_term from 2.3, but way off from 2.5
|
879
|
-
if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
|
880
|
-
if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
|
881
|
-
!cond.is_in_state) || is_arg?) &&
|
882
|
-
is_label_suffix? then
|
883
|
-
scan(/:/)
|
884
|
-
token_type = token[0] = :tLABEL_END
|
885
|
-
end
|
886
|
-
end
|
887
|
-
|
888
|
-
if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
|
889
|
-
self.lex_strterm = nil
|
890
|
-
self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_LIT
|
891
|
-
end
|
892
|
-
|
893
|
-
return token
|
894
|
-
end
|
895
|
-
|
896
651
|
def process_symbol text
|
897
652
|
symbol = possibly_escape_string text, /^:\"/ # stupid emacs
|
898
653
|
|
@@ -901,7 +656,6 @@ class RubyLexer
|
|
901
656
|
|
902
657
|
def process_token text
|
903
658
|
# matching: parse_ident in compare/parse23.y:7989
|
904
|
-
# TODO: make this always return [token, lineno]
|
905
659
|
# FIX: remove: self.last_state = lex_state
|
906
660
|
|
907
661
|
token = self.token = text
|
@@ -924,8 +678,7 @@ class RubyLexer
|
|
924
678
|
|
925
679
|
if is_label_possible? and is_label_suffix? then
|
926
680
|
scan(/:/)
|
927
|
-
|
928
|
-
return result EXPR_LAB, :tLABEL, [token, self.lineno]
|
681
|
+
return result EXPR_LAB, :tLABEL, token
|
929
682
|
end
|
930
683
|
|
931
684
|
# TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
|
@@ -936,14 +689,15 @@ class RubyLexer
|
|
936
689
|
return process_token_keyword keyword if keyword
|
937
690
|
end
|
938
691
|
|
939
|
-
# matching: compare/
|
940
|
-
state = if
|
692
|
+
# matching: compare/parse30.y:9039
|
693
|
+
state = if lex_state =~ EXPR_BEG_ANY|EXPR_ARG_ANY|EXPR_DOT then
|
941
694
|
cmd_state ? EXPR_CMDARG : EXPR_ARG
|
942
695
|
elsif lex_state =~ EXPR_FNAME then
|
943
696
|
EXPR_ENDFN
|
944
697
|
else
|
945
698
|
EXPR_END
|
946
699
|
end
|
700
|
+
self.lex_state = state
|
947
701
|
|
948
702
|
tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
|
949
703
|
|
@@ -953,20 +707,16 @@ class RubyLexer
|
|
953
707
|
state = EXPR_END|EXPR_LABEL
|
954
708
|
end
|
955
709
|
|
956
|
-
|
957
|
-
|
958
|
-
return result(state, tok_id, token)
|
710
|
+
result state, tok_id, token
|
959
711
|
end
|
960
712
|
|
961
713
|
def process_token_keyword keyword
|
962
714
|
# matching MIDDLE of parse_ident in compare/parse23.y:8046
|
963
715
|
state = lex_state
|
964
|
-
self.lex_state = keyword.state
|
965
|
-
|
966
|
-
value = [token, self.lineno]
|
967
716
|
|
968
|
-
return result(
|
717
|
+
return result(EXPR_ENDFN, keyword.id0, token) if lex_state =~ EXPR_FNAME
|
969
718
|
|
719
|
+
self.lex_state = keyword.state
|
970
720
|
self.command_start = true if lex_state =~ EXPR_BEG
|
971
721
|
|
972
722
|
case
|
@@ -975,27 +725,28 @@ class RubyLexer
|
|
975
725
|
when lambda_beginning? then
|
976
726
|
self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
|
977
727
|
self.paren_nest -= 1 # TODO: question this?
|
978
|
-
result lex_state, :kDO_LAMBDA,
|
728
|
+
result lex_state, :kDO_LAMBDA, token
|
979
729
|
when cond.is_in_state then
|
980
|
-
result lex_state, :kDO_COND,
|
730
|
+
result lex_state, :kDO_COND, token
|
981
731
|
when cmdarg.is_in_state && state != EXPR_CMDARG then
|
982
|
-
result lex_state, :kDO_BLOCK,
|
732
|
+
result lex_state, :kDO_BLOCK, token
|
983
733
|
else
|
984
|
-
result lex_state, :kDO,
|
734
|
+
result lex_state, :kDO, token
|
985
735
|
end
|
986
736
|
when state =~ EXPR_PAD then
|
987
|
-
result lex_state, keyword.id0,
|
737
|
+
result lex_state, keyword.id0, token
|
988
738
|
when keyword.id0 != keyword.id1 then
|
989
|
-
result EXPR_PAR, keyword.id1,
|
739
|
+
result EXPR_PAR, keyword.id1, token
|
990
740
|
else
|
991
|
-
result lex_state, keyword.id1,
|
741
|
+
result lex_state, keyword.id1, token
|
992
742
|
end
|
993
743
|
end
|
994
744
|
|
995
745
|
def process_underscore text
|
996
|
-
|
746
|
+
self.unscan # put back "_"
|
997
747
|
|
998
748
|
if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
|
749
|
+
ss.terminate
|
999
750
|
[RubyLexer::EOF, RubyLexer::EOF]
|
1000
751
|
elsif scan(/#{IDENT_CHAR}+/) then
|
1001
752
|
process_token matched
|
@@ -1003,121 +754,35 @@ class RubyLexer
|
|
1003
754
|
end
|
1004
755
|
|
1005
756
|
def rb_compile_error msg
|
1006
|
-
msg += ". near line #{self.lineno}: #{
|
757
|
+
msg += ". near line #{self.lineno}: #{self.rest[/^.*/].inspect}"
|
1007
758
|
raise RubyParser::SyntaxError, msg
|
1008
759
|
end
|
1009
760
|
|
1010
|
-
def read_escape # TODO: remove / rewrite
|
1011
|
-
case
|
1012
|
-
when scan(/\\/) then # Backslash
|
1013
|
-
'\\'
|
1014
|
-
when scan(/n/) then # newline
|
1015
|
-
self.extra_lineno -= 1
|
1016
|
-
"\n"
|
1017
|
-
when scan(/t/) then # horizontal tab
|
1018
|
-
"\t"
|
1019
|
-
when scan(/r/) then # carriage-return
|
1020
|
-
"\r"
|
1021
|
-
when scan(/f/) then # form-feed
|
1022
|
-
"\f"
|
1023
|
-
when scan(/v/) then # vertical tab
|
1024
|
-
"\13"
|
1025
|
-
when scan(/a/) then # alarm(bell)
|
1026
|
-
"\007"
|
1027
|
-
when scan(/e/) then # escape
|
1028
|
-
"\033"
|
1029
|
-
when scan(/b/) then # backspace
|
1030
|
-
"\010"
|
1031
|
-
when scan(/s/) then # space
|
1032
|
-
" "
|
1033
|
-
when scan(/[0-7]{1,3}/) then # octal constant
|
1034
|
-
(matched.to_i(8) & 0xFF).chr.force_encoding Encoding::UTF_8
|
1035
|
-
when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
|
1036
|
-
# TODO: force encode everything to UTF-8?
|
1037
|
-
ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
|
1038
|
-
when check(/M-\\./) then
|
1039
|
-
scan(/M-\\/) # eat it
|
1040
|
-
c = self.read_escape
|
1041
|
-
c[0] = (c[0].ord | 0x80).chr
|
1042
|
-
c
|
1043
|
-
when scan(/M-(.)/) then
|
1044
|
-
c = ss[1]
|
1045
|
-
c[0] = (c[0].ord | 0x80).chr
|
1046
|
-
c
|
1047
|
-
when check(/(C-|c)\\[\\MCc]/) then
|
1048
|
-
scan(/(C-|c)\\/) # eat it
|
1049
|
-
c = self.read_escape
|
1050
|
-
c[0] = (c[0].ord & 0x9f).chr
|
1051
|
-
c
|
1052
|
-
when check(/(C-|c)\\(?!u|\\)/) then
|
1053
|
-
scan(/(C-|c)\\/) # eat it
|
1054
|
-
c = read_escape
|
1055
|
-
c[0] = (c[0].ord & 0x9f).chr
|
1056
|
-
c
|
1057
|
-
when scan(/C-\?|c\?/) then
|
1058
|
-
127.chr
|
1059
|
-
when scan(/(C-|c)(.)/) then
|
1060
|
-
c = ss[2]
|
1061
|
-
c[0] = (c[0].ord & 0x9f).chr
|
1062
|
-
c
|
1063
|
-
when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
|
1064
|
-
matched
|
1065
|
-
when scan(/u(\h{4})/) then
|
1066
|
-
[ss[1].to_i(16)].pack("U")
|
1067
|
-
when scan(/u(\h{1,3})/) then
|
1068
|
-
rb_compile_error "Invalid escape character syntax"
|
1069
|
-
when scan(/u\{(\h+(?:\s+\h+)*)\}/) then
|
1070
|
-
ss[1].split.map { |s| s.to_i(16) }.pack("U*")
|
1071
|
-
when scan(/[McCx0-9]/) || end_of_stream? then
|
1072
|
-
rb_compile_error("Invalid escape character syntax")
|
1073
|
-
else
|
1074
|
-
getch
|
1075
|
-
end.dup
|
1076
|
-
end
|
1077
|
-
|
1078
|
-
def getch
|
1079
|
-
c = ss.getch
|
1080
|
-
c = ss.getch if c == "\r" && ss.peek(1) == "\n"
|
1081
|
-
c
|
1082
|
-
end
|
1083
|
-
|
1084
|
-
def regx_options # TODO: rewrite / remove
|
1085
|
-
good, bad = [], []
|
1086
|
-
|
1087
|
-
if scan(/[a-z]+/) then
|
1088
|
-
good, bad = matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
|
1089
|
-
end
|
1090
|
-
|
1091
|
-
unless bad.empty? then
|
1092
|
-
rb_compile_error("unknown regexp option%s - %s" %
|
1093
|
-
[(bad.size > 1 ? "s" : ""), bad.join.inspect])
|
1094
|
-
end
|
1095
|
-
|
1096
|
-
return good.join
|
1097
|
-
end
|
1098
|
-
|
1099
761
|
def reset
|
762
|
+
self.lineno = 1
|
1100
763
|
self.brace_nest = 0
|
1101
764
|
self.command_start = true
|
1102
765
|
self.comments = []
|
1103
766
|
self.lex_state = EXPR_NONE
|
1104
767
|
self.lex_strterm = nil
|
1105
|
-
self.lineno = 1
|
1106
768
|
self.lpar_beg = nil
|
1107
769
|
self.paren_nest = 0
|
1108
770
|
self.space_seen = false
|
1109
771
|
self.string_nest = 0
|
1110
772
|
self.token = nil
|
1111
|
-
self.
|
773
|
+
self.string_buffer = []
|
774
|
+
self.old_ss = nil
|
775
|
+
self.old_lineno = nil
|
1112
776
|
|
1113
777
|
self.cond.reset
|
1114
778
|
self.cmdarg.reset
|
1115
779
|
end
|
1116
780
|
|
1117
|
-
def result new_state, token, text # :nodoc:
|
781
|
+
def result new_state, token, text, line = self.lineno # :nodoc:
|
1118
782
|
new_state = self.arg_state if new_state == :arg_state
|
1119
783
|
self.lex_state = new_state if new_state
|
1120
|
-
|
784
|
+
|
785
|
+
[token, [text, line]]
|
1121
786
|
end
|
1122
787
|
|
1123
788
|
def ruby22_label?
|
@@ -1136,12 +801,8 @@ class RubyLexer
|
|
1136
801
|
parser.class.version <= 24
|
1137
802
|
end
|
1138
803
|
|
1139
|
-
def
|
1140
|
-
|
1141
|
-
end
|
1142
|
-
|
1143
|
-
def scanner_class # TODO: design this out of oedipus_lex. or something.
|
1144
|
-
RPStringScanner
|
804
|
+
def ruby27plus?
|
805
|
+
parser.class.version >= 27
|
1145
806
|
end
|
1146
807
|
|
1147
808
|
def space_vs_beginning space_type, beg_type, fallback
|
@@ -1156,139 +817,9 @@ class RubyLexer
|
|
1156
817
|
end
|
1157
818
|
end
|
1158
819
|
|
1159
|
-
def string type, beg = matched, nnd = "\0"
|
1160
|
-
self.lex_strterm = [:strterm, type, beg, nnd]
|
1161
|
-
end
|
1162
|
-
|
1163
|
-
def tokadd_escape term # TODO: rewrite / remove
|
1164
|
-
case
|
1165
|
-
when scan(/\\\n/) then
|
1166
|
-
# just ignore
|
1167
|
-
when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
|
1168
|
-
self.string_buffer << matched
|
1169
|
-
when scan(/\\([MC]-|c)(?=\\)/) then
|
1170
|
-
self.string_buffer << matched
|
1171
|
-
self.tokadd_escape term
|
1172
|
-
when scan(/\\([MC]-|c)(.)/) then
|
1173
|
-
self.string_buffer << matched
|
1174
|
-
when scan(/\\[McCx]/) then
|
1175
|
-
rb_compile_error "Invalid escape character syntax"
|
1176
|
-
when scan(/\\(.)/m) then
|
1177
|
-
chr = ss[1]
|
1178
|
-
prev = self.string_buffer.last
|
1179
|
-
if term == chr && prev && prev.end_with?("(?") then
|
1180
|
-
self.string_buffer << chr
|
1181
|
-
elsif term == chr || chr.ascii_only? then
|
1182
|
-
self.string_buffer << matched # dunno why we keep them for ascii
|
1183
|
-
else
|
1184
|
-
self.string_buffer << chr # HACK? this is such a rat's nest
|
1185
|
-
end
|
1186
|
-
else
|
1187
|
-
rb_compile_error "Invalid escape character syntax"
|
1188
|
-
end
|
1189
|
-
end
|
1190
|
-
|
1191
|
-
def tokadd_string(func, term, paren) # TODO: rewrite / remove
|
1192
|
-
qwords = func =~ STR_FUNC_QWORDS
|
1193
|
-
escape = func =~ STR_FUNC_ESCAPE
|
1194
|
-
expand = func =~ STR_FUNC_EXPAND
|
1195
|
-
regexp = func =~ STR_FUNC_REGEXP
|
1196
|
-
symbol = func =~ STR_FUNC_SYMBOL
|
1197
|
-
|
1198
|
-
paren_re = @@regexp_cache[paren]
|
1199
|
-
term_re = if term == "\n"
|
1200
|
-
/#{Regexp.escape "\r"}?#{Regexp.escape "\n"}/
|
1201
|
-
else
|
1202
|
-
@@regexp_cache[term]
|
1203
|
-
end
|
1204
|
-
|
1205
|
-
until end_of_stream? do
|
1206
|
-
c = nil
|
1207
|
-
handled = true
|
1208
|
-
|
1209
|
-
case
|
1210
|
-
when scan(term_re) then
|
1211
|
-
if self.string_nest == 0 then
|
1212
|
-
ss.pos -= 1
|
1213
|
-
break
|
1214
|
-
else
|
1215
|
-
self.string_nest -= 1
|
1216
|
-
end
|
1217
|
-
when paren_re && scan(paren_re) then
|
1218
|
-
self.string_nest += 1
|
1219
|
-
when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
|
1220
|
-
ss.pos -= 1
|
1221
|
-
break
|
1222
|
-
when qwords && scan(/\s/) then
|
1223
|
-
ss.pos -= 1
|
1224
|
-
break
|
1225
|
-
when expand && scan(/#(?!\n)/) then
|
1226
|
-
# do nothing
|
1227
|
-
when check(/\\/) then
|
1228
|
-
case
|
1229
|
-
when qwords && scan(/\\\n/) then
|
1230
|
-
string_buffer << "\n"
|
1231
|
-
next
|
1232
|
-
when qwords && scan(/\\\s/) then
|
1233
|
-
c = " "
|
1234
|
-
when expand && scan(/\\\n/) then
|
1235
|
-
next
|
1236
|
-
when regexp && check(/\\/) then
|
1237
|
-
self.tokadd_escape term
|
1238
|
-
next
|
1239
|
-
when expand && scan(/\\/) then
|
1240
|
-
c = self.read_escape
|
1241
|
-
when scan(/\\\n/) then
|
1242
|
-
# do nothing
|
1243
|
-
when scan(/\\\\/) then
|
1244
|
-
string_buffer << '\\' if escape
|
1245
|
-
c = '\\'
|
1246
|
-
when scan(/\\/) then
|
1247
|
-
unless scan(term_re) || paren.nil? || scan(paren_re) then
|
1248
|
-
string_buffer << "\\"
|
1249
|
-
end
|
1250
|
-
else
|
1251
|
-
handled = false
|
1252
|
-
end # inner /\\/ case
|
1253
|
-
else
|
1254
|
-
handled = false
|
1255
|
-
end # top case
|
1256
|
-
|
1257
|
-
unless handled then
|
1258
|
-
t = if term == "\n"
|
1259
|
-
Regexp.escape "\r\n"
|
1260
|
-
else
|
1261
|
-
Regexp.escape term
|
1262
|
-
end
|
1263
|
-
x = Regexp.escape paren if paren && paren != "\000"
|
1264
|
-
re = if qwords then
|
1265
|
-
/[^#{t}#{x}\#\\\s]+|./ # |. to pick up whatever
|
1266
|
-
else
|
1267
|
-
/[^#{t}#{x}\#\\]+|./
|
1268
|
-
end
|
1269
|
-
|
1270
|
-
scan re
|
1271
|
-
c = matched
|
1272
|
-
|
1273
|
-
rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
|
1274
|
-
end # unless handled
|
1275
|
-
|
1276
|
-
c ||= matched
|
1277
|
-
string_buffer << c
|
1278
|
-
end # until
|
1279
|
-
|
1280
|
-
c ||= matched
|
1281
|
-
c = RubyLexer::EOF if end_of_stream?
|
1282
|
-
|
1283
|
-
return c
|
1284
|
-
end
|
1285
|
-
|
1286
820
|
def unescape s
|
1287
821
|
r = ESCAPES[s]
|
1288
822
|
|
1289
|
-
self.extra_lineno += 1 if s == "\n" # eg backslash newline strings
|
1290
|
-
self.extra_lineno -= 1 if r && s == "n" # literal \n, not newline
|
1291
|
-
|
1292
823
|
return r if r
|
1293
824
|
|
1294
825
|
x = case s
|
@@ -1309,7 +840,7 @@ class RubyLexer
|
|
1309
840
|
when /u(\h{1,3})/ then
|
1310
841
|
rb_compile_error("Invalid escape character syntax")
|
1311
842
|
when /u\{(\h+(?:\s+\h+)*)\}/ then
|
1312
|
-
$1.split.map { |
|
843
|
+
$1.split.map { |cp| cp.to_i(16) }.pack("U*")
|
1313
844
|
else
|
1314
845
|
s
|
1315
846
|
end
|
@@ -1422,7 +953,7 @@ class RubyLexer
|
|
1422
953
|
STR_FUNC_LABEL = State.new 0x40, str_func_names
|
1423
954
|
STR_FUNC_LIST = State.new 0x4000, str_func_names
|
1424
955
|
STR_FUNC_TERM = State.new 0x8000, str_func_names
|
1425
|
-
|
956
|
+
STR_FUNC_DEDENT = State.new 0x10000, str_func_names # <<~HEREDOC
|
1426
957
|
|
1427
958
|
# TODO: check parser25.y on how they do STR_FUNC_INDENT
|
1428
959
|
|
@@ -1434,6 +965,7 @@ class RubyLexer
|
|
1434
965
|
STR_DWORD = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
|
1435
966
|
STR_SSYM = STR_FUNC_SYMBOL
|
1436
967
|
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
968
|
+
STR_LABEL = STR_FUNC_LABEL
|
1437
969
|
|
1438
970
|
str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
|
1439
971
|
STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
|
@@ -1444,7 +976,7 @@ class RubyLexer
|
|
1444
976
|
STR_FUNC_LABEL => "STR_FUNC_LABEL",
|
1445
977
|
STR_FUNC_LIST => "STR_FUNC_LIST",
|
1446
978
|
STR_FUNC_TERM => "STR_FUNC_TERM",
|
1447
|
-
|
979
|
+
STR_FUNC_DEDENT => "STR_FUNC_DEDENT",
|
1448
980
|
STR_SQUOTE => "STR_SQUOTE")
|
1449
981
|
end
|
1450
982
|
|
@@ -1454,7 +986,145 @@ class RubyLexer
|
|
1454
986
|
include State::Values
|
1455
987
|
end
|
1456
988
|
|
1457
|
-
|
989
|
+
class RubyLexer
|
990
|
+
module SSWrapper
|
991
|
+
def string= s
|
992
|
+
ss.string= s
|
993
|
+
end
|
994
|
+
|
995
|
+
def beginning_of_line?
|
996
|
+
ss.bol?
|
997
|
+
end
|
998
|
+
|
999
|
+
alias bol? beginning_of_line? # to make .rex file more readable
|
1000
|
+
|
1001
|
+
def check re
|
1002
|
+
maybe_pop_stack
|
1003
|
+
|
1004
|
+
ss.check re
|
1005
|
+
end
|
1006
|
+
|
1007
|
+
def end_of_stream?
|
1008
|
+
ss.eos?
|
1009
|
+
end
|
1010
|
+
|
1011
|
+
alias eos? end_of_stream?
|
1012
|
+
|
1013
|
+
def getch
|
1014
|
+
c = ss.getch
|
1015
|
+
c = ss.getch if c == "\r" && ss.peek(1) == "\n"
|
1016
|
+
c
|
1017
|
+
end
|
1018
|
+
|
1019
|
+
def match
|
1020
|
+
ss
|
1021
|
+
end
|
1022
|
+
|
1023
|
+
def matched
|
1024
|
+
ss.matched
|
1025
|
+
end
|
1026
|
+
|
1027
|
+
def in_heredoc?
|
1028
|
+
!!self.old_ss
|
1029
|
+
end
|
1030
|
+
|
1031
|
+
def maybe_pop_stack
|
1032
|
+
if ss.eos? && in_heredoc? then
|
1033
|
+
self.ss_pop
|
1034
|
+
self.lineno_pop
|
1035
|
+
end
|
1036
|
+
end
|
1037
|
+
|
1038
|
+
def pos
|
1039
|
+
ss.pos
|
1040
|
+
end
|
1041
|
+
|
1042
|
+
def pos= n
|
1043
|
+
ss.pos = n
|
1044
|
+
end
|
1045
|
+
|
1046
|
+
def rest
|
1047
|
+
ss.rest
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
def scan re
|
1051
|
+
maybe_pop_stack
|
1052
|
+
|
1053
|
+
ss.scan re
|
1054
|
+
end
|
1055
|
+
|
1056
|
+
def scanner_class # TODO: design this out of oedipus_lex. or something.
|
1057
|
+
RPStringScanner
|
1058
|
+
end
|
1059
|
+
|
1060
|
+
def ss_string
|
1061
|
+
ss.string
|
1062
|
+
end
|
1063
|
+
|
1064
|
+
def ss_string= s
|
1065
|
+
raise "Probably not"
|
1066
|
+
ss.string = s
|
1067
|
+
end
|
1068
|
+
|
1069
|
+
def unscan
|
1070
|
+
ss.unscan
|
1071
|
+
end
|
1072
|
+
end
|
1073
|
+
|
1074
|
+
include SSWrapper
|
1075
|
+
end
|
1076
|
+
|
1077
|
+
class RubyLexer
|
1078
|
+
module SSStackish
|
1079
|
+
def lineno_push new_lineno
|
1080
|
+
self.old_lineno = self.lineno
|
1081
|
+
self.lineno = new_lineno
|
1082
|
+
end
|
1083
|
+
|
1084
|
+
def lineno_pop
|
1085
|
+
self.lineno = self.old_lineno
|
1086
|
+
self.old_lineno = nil
|
1087
|
+
end
|
1088
|
+
|
1089
|
+
def ss= o
|
1090
|
+
raise "Clearing ss while in heredoc!?!" if in_heredoc?
|
1091
|
+
@old_ss = nil
|
1092
|
+
super
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
def ss_push new_ss
|
1096
|
+
@old_ss = self.ss
|
1097
|
+
@ss = new_ss
|
1098
|
+
end
|
1099
|
+
|
1100
|
+
def ss_pop
|
1101
|
+
@ss = self.old_ss
|
1102
|
+
@old_ss = nil
|
1103
|
+
end
|
1104
|
+
end
|
1105
|
+
|
1106
|
+
prepend SSStackish
|
1107
|
+
end
|
1108
|
+
|
1109
|
+
if ENV["RP_STRTERM_DEBUG"] then
|
1110
|
+
class RubyLexer
|
1111
|
+
def d o
|
1112
|
+
$stderr.puts o.inspect
|
1113
|
+
end
|
1114
|
+
|
1115
|
+
alias old_lex_strterm= lex_strterm=
|
1116
|
+
|
1117
|
+
def lex_strterm= o
|
1118
|
+
self.old_lex_strterm= o
|
1119
|
+
where = caller.first.split(/:/).first(2).join(":")
|
1120
|
+
$stderr.puts
|
1121
|
+
d :lex_strterm => [o, where]
|
1122
|
+
end
|
1123
|
+
end
|
1124
|
+
end
|
1125
|
+
|
1126
|
+
require_relative "./ruby_lexer.rex.rb"
|
1127
|
+
require_relative "./ruby_lexer_strings.rb"
|
1458
1128
|
|
1459
1129
|
if ENV["RP_LINENO_DEBUG"] then
|
1460
1130
|
class RubyLexer
|
@@ -1467,7 +1137,8 @@ if ENV["RP_LINENO_DEBUG"] then
|
|
1467
1137
|
def lineno= n
|
1468
1138
|
self.old_lineno= n
|
1469
1139
|
where = caller.first.split(/:/).first(2).join(":")
|
1470
|
-
|
1140
|
+
$stderr.puts
|
1141
|
+
d :lineno => [n, where]
|
1471
1142
|
end
|
1472
1143
|
end
|
1473
1144
|
end
|