ruby_parser 3.15.0 → 3.19.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/History.rdoc +151 -0
- data/Manifest.txt +7 -0
- data/README.rdoc +9 -6
- data/Rakefile +141 -31
- data/bin/ruby_parse_extract_error +1 -1
- data/compare/normalize.rb +8 -3
- data/debugging.md +133 -0
- data/gauntlet.md +107 -0
- data/lib/rp_extensions.rb +15 -36
- data/lib/rp_stringscanner.rb +20 -51
- data/lib/ruby20_parser.rb +7544 -3633
- data/lib/ruby20_parser.y +335 -257
- data/lib/ruby21_parser.rb +7518 -3678
- data/lib/ruby21_parser.y +330 -254
- data/lib/ruby22_parser.rb +7652 -3689
- data/lib/ruby22_parser.y +334 -256
- data/lib/ruby23_parser.rb +7659 -3702
- data/lib/ruby23_parser.y +334 -256
- data/lib/ruby24_parser.rb +7748 -3721
- data/lib/ruby24_parser.y +334 -256
- data/lib/ruby25_parser.rb +7748 -3721
- data/lib/ruby25_parser.y +334 -256
- data/lib/ruby26_parser.rb +7755 -3726
- data/lib/ruby26_parser.y +334 -255
- data/lib/ruby27_parser.rb +10290 -4518
- data/lib/ruby27_parser.y +933 -254
- data/lib/ruby30_parser.rb +13258 -0
- data/lib/ruby30_parser.y +3459 -0
- data/lib/ruby31_parser.rb +13638 -0
- data/lib/ruby31_parser.y +3493 -0
- data/lib/ruby3_parser.yy +3548 -0
- data/lib/ruby_lexer.rb +277 -599
- data/lib/ruby_lexer.rex +28 -21
- data/lib/ruby_lexer.rex.rb +60 -24
- data/lib/ruby_lexer_strings.rb +638 -0
- data/lib/ruby_parser.rb +4 -0
- data/lib/ruby_parser.yy +974 -261
- data/lib/ruby_parser_extras.rb +355 -114
- data/test/test_ruby_lexer.rb +226 -129
- data/test/test_ruby_parser.rb +1653 -267
- data/tools/munge.rb +36 -8
- data/tools/ripper.rb +15 -10
- data.tar.gz.sig +0 -0
- metadata +55 -37
- metadata.gz.sig +0 -0
data/lib/ruby_lexer.rb
CHANGED
@@ -25,10 +25,15 @@ class RubyLexer
|
|
25
25
|
|
26
26
|
HAS_ENC = "".respond_to? :encoding
|
27
27
|
|
28
|
+
BTOKENS = {
|
29
|
+
".." => :tBDOT2,
|
30
|
+
"..." => :tBDOT3,
|
31
|
+
}
|
32
|
+
|
28
33
|
TOKENS = {
|
29
34
|
"!" => :tBANG,
|
30
35
|
"!=" => :tNEQ,
|
31
|
-
|
36
|
+
"!@" => :tBANG,
|
32
37
|
"!~" => :tNMATCH,
|
33
38
|
"," => :tCOMMA,
|
34
39
|
".." => :tDOT2,
|
@@ -41,17 +46,38 @@ class RubyLexer
|
|
41
46
|
"->" => :tLAMBDA,
|
42
47
|
}
|
43
48
|
|
49
|
+
PERCENT_END = {
|
50
|
+
"(" => ")",
|
51
|
+
"[" => "]",
|
52
|
+
"{" => "}",
|
53
|
+
"<" => ">",
|
54
|
+
}
|
55
|
+
|
56
|
+
SIMPLE_RE_META = /[\$\*\+\.\?\^\|\)\]\}\>]/
|
57
|
+
|
44
58
|
@@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
|
45
59
|
@@regexp_cache[nil] = nil
|
46
60
|
|
61
|
+
def regexp_cache
|
62
|
+
@@regexp_cache
|
63
|
+
end
|
64
|
+
|
47
65
|
if $DEBUG then
|
48
66
|
attr_reader :lex_state
|
49
67
|
|
50
68
|
def lex_state= o
|
51
69
|
return if @lex_state == o
|
52
|
-
raise ArgumentError, "bad state: %p" % [o] unless State === o
|
53
70
|
|
54
|
-
|
71
|
+
from = ""
|
72
|
+
if ENV["VERBOSE"]
|
73
|
+
path = caller[0]
|
74
|
+
path = caller[1] if path =~ /result/
|
75
|
+
path, line, *_ = path.split(/:/)
|
76
|
+
path.delete_prefix! File.dirname File.dirname __FILE__
|
77
|
+
from = " at .%s:%s" % [path, line]
|
78
|
+
end
|
79
|
+
|
80
|
+
warn "lex_state: %p -> %p%s" % [lex_state, o, from]
|
55
81
|
|
56
82
|
@lex_state = o
|
57
83
|
end
|
@@ -61,14 +87,16 @@ class RubyLexer
|
|
61
87
|
|
62
88
|
attr_accessor :lex_state unless $DEBUG
|
63
89
|
|
64
|
-
attr_accessor :lineno # we're bypassing oedipus' lineno handling.
|
65
90
|
attr_accessor :brace_nest
|
66
91
|
attr_accessor :cmdarg
|
67
92
|
attr_accessor :command_start
|
68
93
|
attr_accessor :cmd_state # temporary--ivar to avoid passing everywhere
|
69
94
|
attr_accessor :last_state
|
70
95
|
attr_accessor :cond
|
71
|
-
attr_accessor :
|
96
|
+
attr_accessor :old_ss
|
97
|
+
attr_accessor :old_lineno
|
98
|
+
|
99
|
+
# these are generated via ruby_lexer.rex: ss, lineno
|
72
100
|
|
73
101
|
##
|
74
102
|
# Additional context surrounding tokens that both the lexer and
|
@@ -93,6 +121,7 @@ class RubyLexer
|
|
93
121
|
|
94
122
|
self.cond = RubyParserStuff::StackState.new(:cond, $DEBUG)
|
95
123
|
self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
|
124
|
+
self.ss = RPStringScanner.new ""
|
96
125
|
|
97
126
|
reset
|
98
127
|
end
|
@@ -105,14 +134,8 @@ class RubyLexer
|
|
105
134
|
is_after_operator? ? EXPR_ARG : EXPR_BEG
|
106
135
|
end
|
107
136
|
|
108
|
-
def
|
109
|
-
|
110
|
-
end
|
111
|
-
|
112
|
-
alias bol? beginning_of_line? # to make .rex file more readable
|
113
|
-
|
114
|
-
def check re
|
115
|
-
ss.check re
|
137
|
+
def ignore_body_comments
|
138
|
+
@comments.clear
|
116
139
|
end
|
117
140
|
|
118
141
|
def comments # TODO: remove this... maybe comment_string + attr_accessor
|
@@ -121,14 +144,8 @@ class RubyLexer
|
|
121
144
|
c
|
122
145
|
end
|
123
146
|
|
124
|
-
def
|
125
|
-
|
126
|
-
self.extra_lineno += r.count("\n") if r
|
127
|
-
r
|
128
|
-
end
|
129
|
-
|
130
|
-
def end_of_stream?
|
131
|
-
ss.eos?
|
147
|
+
def debug n
|
148
|
+
raise "debug #{n}"
|
132
149
|
end
|
133
150
|
|
134
151
|
def expr_dot?
|
@@ -145,128 +162,6 @@ class RubyLexer
|
|
145
162
|
result EXPR_BEG, token, text
|
146
163
|
end
|
147
164
|
|
148
|
-
def fixup_lineno extra = 0
|
149
|
-
self.lineno += self.extra_lineno + extra
|
150
|
-
self.extra_lineno = 0
|
151
|
-
end
|
152
|
-
|
153
|
-
def heredoc here # TODO: rewrite / remove
|
154
|
-
_, eos, func, last_line = here
|
155
|
-
|
156
|
-
indent = func =~ STR_FUNC_INDENT ? "[ \t]*" : nil
|
157
|
-
expand = func =~ STR_FUNC_EXPAND
|
158
|
-
eol = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n"
|
159
|
-
eos_re = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
|
160
|
-
err_msg = "can't match #{eos_re.inspect} anywhere in "
|
161
|
-
|
162
|
-
rb_compile_error err_msg if end_of_stream?
|
163
|
-
|
164
|
-
if beginning_of_line? && scan(eos_re) then
|
165
|
-
self.lineno += 1
|
166
|
-
ss.unread_many last_line # TODO: figure out how to remove this
|
167
|
-
return :tSTRING_END, [eos, func] # TODO: calculate squiggle width at lex?
|
168
|
-
end
|
169
|
-
|
170
|
-
self.string_buffer = []
|
171
|
-
|
172
|
-
if expand then
|
173
|
-
case
|
174
|
-
when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
|
175
|
-
# TODO: !ISASCII
|
176
|
-
# ?! see parser_peek_variable_name
|
177
|
-
return :tSTRING_DVAR, matched
|
178
|
-
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
179
|
-
# TODO: !ISASCII
|
180
|
-
return :tSTRING_DVAR, matched
|
181
|
-
when scan(/#[{]/) then
|
182
|
-
self.command_start = true
|
183
|
-
return :tSTRING_DBEG, matched
|
184
|
-
when scan(/#/) then
|
185
|
-
string_buffer << "#"
|
186
|
-
end
|
187
|
-
|
188
|
-
begin
|
189
|
-
c = tokadd_string func, eol, nil
|
190
|
-
|
191
|
-
rb_compile_error err_msg if
|
192
|
-
c == RubyLexer::EOF
|
193
|
-
|
194
|
-
if c != eol then
|
195
|
-
return :tSTRING_CONTENT, string_buffer.join
|
196
|
-
else
|
197
|
-
string_buffer << scan(/\n/)
|
198
|
-
end
|
199
|
-
|
200
|
-
rb_compile_error err_msg if end_of_stream?
|
201
|
-
end until check(eos_re)
|
202
|
-
else
|
203
|
-
until check(eos_re) do
|
204
|
-
string_buffer << scan(/.*(\n|\z)/)
|
205
|
-
rb_compile_error err_msg if end_of_stream?
|
206
|
-
end
|
207
|
-
end
|
208
|
-
|
209
|
-
self.lex_strterm = [:heredoc, eos, func, last_line]
|
210
|
-
|
211
|
-
string_content = begin
|
212
|
-
s = string_buffer.join
|
213
|
-
s.b.force_encoding Encoding::UTF_8
|
214
|
-
end
|
215
|
-
|
216
|
-
return :tSTRING_CONTENT, string_content
|
217
|
-
end
|
218
|
-
|
219
|
-
def heredoc_identifier # TODO: remove / rewrite
|
220
|
-
term, func = nil, STR_FUNC_BORING
|
221
|
-
self.string_buffer = []
|
222
|
-
|
223
|
-
heredoc_indent_mods = "-"
|
224
|
-
heredoc_indent_mods += '\~' if ruby23plus?
|
225
|
-
|
226
|
-
case
|
227
|
-
when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
|
228
|
-
term = ss[2]
|
229
|
-
func |= STR_FUNC_INDENT unless ss[1].empty? # TODO: this seems wrong
|
230
|
-
func |= STR_FUNC_ICNTNT if ss[1] == "~"
|
231
|
-
func |= case term
|
232
|
-
when "\'" then
|
233
|
-
STR_SQUOTE
|
234
|
-
when '"' then
|
235
|
-
STR_DQUOTE
|
236
|
-
else
|
237
|
-
STR_XQUOTE
|
238
|
-
end
|
239
|
-
string_buffer << ss[3]
|
240
|
-
when scan(/[#{heredoc_indent_mods}]?([\'\"\`])(?!\1*\Z)/) then
|
241
|
-
rb_compile_error "unterminated here document identifier"
|
242
|
-
when scan(/([#{heredoc_indent_mods}]?)(#{IDENT_CHAR}+)/) then
|
243
|
-
term = '"'
|
244
|
-
func |= STR_DQUOTE
|
245
|
-
unless ss[1].empty? then
|
246
|
-
func |= STR_FUNC_INDENT
|
247
|
-
func |= STR_FUNC_ICNTNT if ss[1] == "~"
|
248
|
-
end
|
249
|
-
string_buffer << ss[2]
|
250
|
-
else
|
251
|
-
return nil
|
252
|
-
end
|
253
|
-
|
254
|
-
if scan(/.*\n/) then
|
255
|
-
# TODO: think about storing off the char range instead
|
256
|
-
line = matched
|
257
|
-
else
|
258
|
-
line = nil
|
259
|
-
end
|
260
|
-
|
261
|
-
self.lex_strterm = [:heredoc, string_buffer.join, func, line]
|
262
|
-
|
263
|
-
if term == "`" then
|
264
|
-
result nil, :tXSTRING_BEG, "`"
|
265
|
-
else
|
266
|
-
result nil, :tSTRING_BEG, "\""
|
267
|
-
end
|
268
|
-
end
|
269
|
-
|
270
165
|
def in_fname? # REFACTOR
|
271
166
|
lex_state =~ EXPR_FNAME
|
272
167
|
end
|
@@ -277,13 +172,13 @@ class RubyLexer
|
|
277
172
|
text = matched
|
278
173
|
case
|
279
174
|
when text.end_with?("ri")
|
280
|
-
|
175
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base)))
|
281
176
|
when text.end_with?("r")
|
282
|
-
|
177
|
+
result EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base))
|
283
178
|
when text.end_with?("i")
|
284
|
-
|
179
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base))
|
285
180
|
else
|
286
|
-
|
181
|
+
result EXPR_NUM, :tINTEGER, text.to_i(base)
|
287
182
|
end
|
288
183
|
end
|
289
184
|
|
@@ -329,137 +224,15 @@ class RubyLexer
|
|
329
224
|
self.parser.env[id.to_sym] == :lvar
|
330
225
|
end
|
331
226
|
|
332
|
-
def matched
|
333
|
-
ss.matched
|
334
|
-
end
|
335
|
-
|
336
227
|
def not_end?
|
337
228
|
not is_end?
|
338
229
|
end
|
339
230
|
|
340
|
-
def parse_quote # TODO: remove / rewrite
|
341
|
-
beg, nnd, short_hand, c = nil, nil, false, nil
|
342
|
-
|
343
|
-
if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
|
344
|
-
rb_compile_error "unknown type of %string" if ss.matched_size == 2
|
345
|
-
c, beg, short_hand = matched, getch, false
|
346
|
-
else # Short-hand (e.g. %{, %., %!, etc)
|
347
|
-
c, beg, short_hand = "Q", getch, true
|
348
|
-
end
|
349
|
-
|
350
|
-
if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
|
351
|
-
rb_compile_error "unterminated quoted string meets end of file"
|
352
|
-
end
|
353
|
-
|
354
|
-
# Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
|
355
|
-
nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
|
356
|
-
nnd, beg = beg, "\0" if nnd.nil?
|
357
|
-
|
358
|
-
token_type, text = nil, "%#{c}#{beg}"
|
359
|
-
token_type, string_type = case c
|
360
|
-
when "Q" then
|
361
|
-
ch = short_hand ? nnd : c + beg
|
362
|
-
text = "%#{ch}"
|
363
|
-
[:tSTRING_BEG, STR_DQUOTE]
|
364
|
-
when "q" then
|
365
|
-
[:tSTRING_BEG, STR_SQUOTE]
|
366
|
-
when "W" then
|
367
|
-
eat_whitespace
|
368
|
-
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
369
|
-
when "w" then
|
370
|
-
eat_whitespace
|
371
|
-
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
372
|
-
when "x" then
|
373
|
-
[:tXSTRING_BEG, STR_XQUOTE]
|
374
|
-
when "r" then
|
375
|
-
[:tREGEXP_BEG, STR_REGEXP]
|
376
|
-
when "s" then
|
377
|
-
self.lex_state = EXPR_FNAME
|
378
|
-
[:tSYMBEG, STR_SSYM]
|
379
|
-
when "I" then
|
380
|
-
eat_whitespace
|
381
|
-
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
382
|
-
when "i" then
|
383
|
-
eat_whitespace
|
384
|
-
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
385
|
-
end
|
386
|
-
|
387
|
-
rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
|
388
|
-
token_type.nil?
|
389
|
-
|
390
|
-
raise "huh" unless string_type
|
391
|
-
|
392
|
-
string string_type, nnd, beg
|
393
|
-
|
394
|
-
return token_type, text
|
395
|
-
end
|
396
|
-
|
397
|
-
def parse_string quote # TODO: rewrite / remove
|
398
|
-
_, string_type, term, open = quote
|
399
|
-
|
400
|
-
space = false # FIX: remove these
|
401
|
-
func = string_type
|
402
|
-
paren = open
|
403
|
-
term_re = @@regexp_cache[term]
|
404
|
-
|
405
|
-
qwords = func =~ STR_FUNC_QWORDS
|
406
|
-
regexp = func =~ STR_FUNC_REGEXP
|
407
|
-
expand = func =~ STR_FUNC_EXPAND
|
408
|
-
|
409
|
-
unless func then # nil'ed from qwords below. *sigh*
|
410
|
-
return :tSTRING_END, nil
|
411
|
-
end
|
412
|
-
|
413
|
-
space = true if qwords and eat_whitespace
|
414
|
-
|
415
|
-
if self.string_nest == 0 && scan(/#{term_re}/) then
|
416
|
-
if qwords then
|
417
|
-
quote[1] = nil
|
418
|
-
return :tSPACE, nil
|
419
|
-
elsif regexp then
|
420
|
-
return :tREGEXP_END, self.regx_options
|
421
|
-
else
|
422
|
-
return :tSTRING_END, term
|
423
|
-
end
|
424
|
-
end
|
425
|
-
|
426
|
-
return :tSPACE, nil if space
|
427
|
-
|
428
|
-
self.string_buffer = []
|
429
|
-
|
430
|
-
if expand
|
431
|
-
case
|
432
|
-
when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
|
433
|
-
# TODO: !ISASCII
|
434
|
-
# ?! see parser_peek_variable_name
|
435
|
-
return :tSTRING_DVAR, nil
|
436
|
-
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
437
|
-
# TODO: !ISASCII
|
438
|
-
return :tSTRING_DVAR, nil
|
439
|
-
when scan(/#[{]/) then
|
440
|
-
self.command_start = true
|
441
|
-
return :tSTRING_DBEG, nil
|
442
|
-
when scan(/#/) then
|
443
|
-
string_buffer << "#"
|
444
|
-
end
|
445
|
-
end
|
446
|
-
|
447
|
-
if tokadd_string(func, term, paren) == RubyLexer::EOF then
|
448
|
-
if func =~ STR_FUNC_REGEXP then
|
449
|
-
rb_compile_error "unterminated regexp meets end of file"
|
450
|
-
else
|
451
|
-
rb_compile_error "unterminated string meets end of file"
|
452
|
-
end
|
453
|
-
end
|
454
|
-
|
455
|
-
return :tSTRING_CONTENT, string_buffer.join
|
456
|
-
end
|
457
|
-
|
458
231
|
def possibly_escape_string text, check
|
459
232
|
content = match[1]
|
460
233
|
|
461
234
|
if text =~ check then
|
462
|
-
content
|
235
|
+
unescape_string content
|
463
236
|
else
|
464
237
|
content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
|
465
238
|
end
|
@@ -475,11 +248,11 @@ class RubyLexer
|
|
475
248
|
:tAMPER2
|
476
249
|
end
|
477
250
|
|
478
|
-
|
251
|
+
result :arg_state, token, "&"
|
479
252
|
end
|
480
253
|
|
481
254
|
def process_backref text
|
482
|
-
token =
|
255
|
+
token = match[1].to_sym
|
483
256
|
# TODO: can't do lineno hack w/ symbol
|
484
257
|
result EXPR_END, :tBACK_REF, token
|
485
258
|
end
|
@@ -493,7 +266,7 @@ class RubyLexer
|
|
493
266
|
end
|
494
267
|
|
495
268
|
@comments << matched
|
496
|
-
self.lineno += matched.count("\n")
|
269
|
+
self.lineno += matched.count("\n") # HACK?
|
497
270
|
|
498
271
|
nil # TODO
|
499
272
|
end
|
@@ -564,9 +337,9 @@ class RubyLexer
|
|
564
337
|
|
565
338
|
case
|
566
339
|
when scan(/\'/) then
|
567
|
-
string STR_SSYM
|
340
|
+
string STR_SSYM, matched
|
568
341
|
when scan(/\"/) then
|
569
|
-
string STR_DSYM
|
342
|
+
string STR_DSYM, matched
|
570
343
|
end
|
571
344
|
|
572
345
|
result EXPR_FNAME, :tSYMBEG, text
|
@@ -580,41 +353,48 @@ class RubyLexer
|
|
580
353
|
end
|
581
354
|
end
|
582
355
|
|
356
|
+
def process_dots text
|
357
|
+
tokens = ruby27plus? && is_beg? ? BTOKENS : TOKENS
|
358
|
+
|
359
|
+
result EXPR_BEG, tokens[text], text
|
360
|
+
end
|
361
|
+
|
583
362
|
def process_float text
|
584
363
|
rb_compile_error "Invalid numeric format" if text =~ /__/
|
585
364
|
|
586
365
|
case
|
587
366
|
when text.end_with?("ri")
|
588
|
-
|
367
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
|
589
368
|
when text.end_with?("i")
|
590
|
-
|
369
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
|
591
370
|
when text.end_with?("r")
|
592
|
-
|
371
|
+
result EXPR_NUM, :tRATIONAL, Rational(text.chop)
|
593
372
|
else
|
594
|
-
|
373
|
+
result EXPR_NUM, :tFLOAT, text.to_f
|
595
374
|
end
|
596
375
|
end
|
597
376
|
|
598
377
|
def process_gvar text
|
599
|
-
|
378
|
+
if parser.class.version > 20 && text == "$-" then
|
379
|
+
rb_compile_error "unexpected $undefined"
|
380
|
+
end
|
381
|
+
|
600
382
|
result EXPR_END, :tGVAR, text
|
601
383
|
end
|
602
384
|
|
603
385
|
def process_gvar_oddity text
|
604
|
-
return result EXPR_END, "$", "$" if text == "$" # TODO: wtf is this?
|
605
386
|
rb_compile_error "#{text.inspect} is not allowed as a global variable name"
|
606
387
|
end
|
607
388
|
|
608
389
|
def process_ivar text
|
609
390
|
tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
|
610
|
-
text.lineno = self.lineno
|
611
391
|
result EXPR_END, tok_id, text
|
612
392
|
end
|
613
393
|
|
614
394
|
def process_label text
|
615
395
|
symbol = possibly_escape_string text, /^\"/
|
616
396
|
|
617
|
-
result EXPR_LAB, :tLABEL,
|
397
|
+
result EXPR_LAB, :tLABEL, symbol
|
618
398
|
end
|
619
399
|
|
620
400
|
def process_label_or_string text
|
@@ -622,11 +402,15 @@ class RubyLexer
|
|
622
402
|
@was_label = nil
|
623
403
|
return process_label text
|
624
404
|
elsif text =~ /:\Z/ then
|
625
|
-
|
405
|
+
self.pos -= 1 # put back ":"
|
626
406
|
text = text[0..-2]
|
627
407
|
end
|
628
408
|
|
629
|
-
|
409
|
+
orig_line = lineno
|
410
|
+
str = text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
|
411
|
+
self.lineno += str.count("\n")
|
412
|
+
|
413
|
+
result EXPR_END, :tSTRING, str, orig_line
|
630
414
|
end
|
631
415
|
|
632
416
|
def process_lchevron text
|
@@ -644,34 +428,25 @@ class RubyLexer
|
|
644
428
|
self.lex_state = EXPR_BEG
|
645
429
|
end
|
646
430
|
|
647
|
-
|
431
|
+
result lex_state, :tLSHFT, "\<\<"
|
648
432
|
end
|
649
433
|
|
650
|
-
def process_newline_or_comment text
|
434
|
+
def process_newline_or_comment text # ../compare/parse30.y:9126 ish
|
651
435
|
c = matched
|
652
|
-
hit = false
|
653
436
|
|
654
437
|
if c == "#" then
|
655
|
-
|
438
|
+
self.pos -= 1
|
656
439
|
|
657
|
-
# TODO: handle magic comments
|
658
440
|
while scan(/\s*\#.*(\n+|\z)/) do
|
659
|
-
|
660
|
-
self.lineno += matched.lines.to_a.size
|
441
|
+
self.lineno += matched.count "\n"
|
661
442
|
@comments << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
|
662
443
|
end
|
663
444
|
|
664
445
|
return nil if end_of_stream?
|
665
446
|
end
|
666
447
|
|
667
|
-
self.lineno += 1 unless hit
|
668
|
-
|
669
|
-
# Replace a string of newlines with a single one
|
670
|
-
self.lineno += matched.lines.to_a.size if scan(/\n+/)
|
671
|
-
|
672
448
|
c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
|
673
449
|
lex_state !~ EXPR_LABELED)
|
674
|
-
# TODO: figure out what token_seen is for
|
675
450
|
if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
|
676
451
|
# ignore if !fallthrough?
|
677
452
|
if !c && parser.in_kwarg then
|
@@ -679,25 +454,29 @@ class RubyLexer
|
|
679
454
|
self.command_start = true
|
680
455
|
return result EXPR_BEG, :tNL, nil
|
681
456
|
else
|
682
|
-
|
457
|
+
maybe_pop_stack
|
458
|
+
return # goto retry
|
683
459
|
end
|
684
460
|
end
|
685
461
|
|
686
|
-
if scan(/
|
687
|
-
self.space_seen = true
|
462
|
+
if scan(/[\ \t\r\f\v]+/) then
|
463
|
+
self.space_seen = true
|
464
|
+
end
|
688
465
|
|
689
|
-
|
690
|
-
return
|
466
|
+
if check(/#/) then
|
467
|
+
return # goto retry
|
468
|
+
elsif check(/&\.|\.(?!\.)/) then # C version is a hellish obfuscated xnor
|
469
|
+
return # goto retry
|
691
470
|
end
|
692
471
|
|
693
472
|
self.command_start = true
|
694
473
|
|
695
|
-
|
474
|
+
result EXPR_BEG, :tNL, nil
|
696
475
|
end
|
697
476
|
|
698
477
|
def process_nthref text
|
699
478
|
# TODO: can't do lineno hack w/ number
|
700
|
-
result EXPR_END, :tNTH_REF,
|
479
|
+
result EXPR_END, :tNTH_REF, match[1].to_i
|
701
480
|
end
|
702
481
|
|
703
482
|
def process_paren text
|
@@ -725,13 +504,16 @@ class RubyLexer
|
|
725
504
|
end
|
726
505
|
|
727
506
|
def process_percent text
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
507
|
+
case
|
508
|
+
when is_beg? then
|
509
|
+
process_percent_quote
|
510
|
+
when scan(/\=/)
|
511
|
+
result EXPR_BEG, :tOP_ASGN, "%"
|
512
|
+
when is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
|
513
|
+
process_percent_quote
|
514
|
+
else
|
515
|
+
result :arg_state, :tPERCENT, "%"
|
516
|
+
end
|
735
517
|
end
|
736
518
|
|
737
519
|
def process_plus_minus text
|
@@ -805,20 +587,19 @@ class RubyLexer
|
|
805
587
|
end
|
806
588
|
|
807
589
|
def process_simple_string text
|
808
|
-
|
809
|
-
|
810
|
-
}
|
590
|
+
orig_line = lineno
|
591
|
+
self.lineno += text.count("\n")
|
811
592
|
|
812
|
-
|
593
|
+
str = unescape_string text[1..-2]
|
813
594
|
|
814
|
-
result EXPR_END, :tSTRING,
|
595
|
+
result EXPR_END, :tSTRING, str, orig_line
|
815
596
|
end
|
816
597
|
|
817
598
|
def process_slash text
|
818
599
|
if is_beg? then
|
819
|
-
string STR_REGEXP
|
600
|
+
string STR_REGEXP, matched
|
820
601
|
|
821
|
-
return result
|
602
|
+
return result nil, :tREGEXP_BEG, "/"
|
822
603
|
end
|
823
604
|
|
824
605
|
if scan(/\=/) then
|
@@ -833,7 +614,7 @@ class RubyLexer
|
|
833
614
|
end
|
834
615
|
end
|
835
616
|
|
836
|
-
|
617
|
+
result :arg_state, :tDIVIDE, "/"
|
837
618
|
end
|
838
619
|
|
839
620
|
def process_square_bracket text
|
@@ -865,34 +646,6 @@ class RubyLexer
|
|
865
646
|
result EXPR_PAR, token, text
|
866
647
|
end
|
867
648
|
|
868
|
-
def process_string # TODO: rewrite / remove
|
869
|
-
# matches top of parser_yylex in compare/parse23.y:8113
|
870
|
-
token = if lex_strterm[0] == :heredoc then
|
871
|
-
self.heredoc lex_strterm
|
872
|
-
else
|
873
|
-
self.parse_string lex_strterm
|
874
|
-
end
|
875
|
-
|
876
|
-
token_type, c = token
|
877
|
-
|
878
|
-
# matches parser_string_term from 2.3, but way off from 2.5
|
879
|
-
if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
|
880
|
-
if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
|
881
|
-
!cond.is_in_state) || is_arg?) &&
|
882
|
-
is_label_suffix? then
|
883
|
-
scan(/:/)
|
884
|
-
token_type = token[0] = :tLABEL_END
|
885
|
-
end
|
886
|
-
end
|
887
|
-
|
888
|
-
if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
|
889
|
-
self.lex_strterm = nil
|
890
|
-
self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_LIT
|
891
|
-
end
|
892
|
-
|
893
|
-
return token
|
894
|
-
end
|
895
|
-
|
896
649
|
def process_symbol text
|
897
650
|
symbol = possibly_escape_string text, /^:\"/ # stupid emacs
|
898
651
|
|
@@ -901,7 +654,6 @@ class RubyLexer
|
|
901
654
|
|
902
655
|
def process_token text
|
903
656
|
# matching: parse_ident in compare/parse23.y:7989
|
904
|
-
# TODO: make this always return [token, lineno]
|
905
657
|
# FIX: remove: self.last_state = lex_state
|
906
658
|
|
907
659
|
token = self.token = text
|
@@ -924,8 +676,7 @@ class RubyLexer
|
|
924
676
|
|
925
677
|
if is_label_possible? and is_label_suffix? then
|
926
678
|
scan(/:/)
|
927
|
-
|
928
|
-
return result EXPR_LAB, :tLABEL, [token, self.lineno]
|
679
|
+
return result EXPR_LAB, :tLABEL, token
|
929
680
|
end
|
930
681
|
|
931
682
|
# TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
|
@@ -936,14 +687,15 @@ class RubyLexer
|
|
936
687
|
return process_token_keyword keyword if keyword
|
937
688
|
end
|
938
689
|
|
939
|
-
# matching: compare/
|
940
|
-
state = if
|
690
|
+
# matching: compare/parse30.y:9039
|
691
|
+
state = if lex_state =~ EXPR_BEG_ANY|EXPR_ARG_ANY|EXPR_DOT then
|
941
692
|
cmd_state ? EXPR_CMDARG : EXPR_ARG
|
942
693
|
elsif lex_state =~ EXPR_FNAME then
|
943
694
|
EXPR_ENDFN
|
944
695
|
else
|
945
696
|
EXPR_END
|
946
697
|
end
|
698
|
+
self.lex_state = state
|
947
699
|
|
948
700
|
tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
|
949
701
|
|
@@ -953,20 +705,16 @@ class RubyLexer
|
|
953
705
|
state = EXPR_END|EXPR_LABEL
|
954
706
|
end
|
955
707
|
|
956
|
-
|
957
|
-
|
958
|
-
return result(state, tok_id, token)
|
708
|
+
result state, tok_id, token
|
959
709
|
end
|
960
710
|
|
961
711
|
def process_token_keyword keyword
|
962
712
|
# matching MIDDLE of parse_ident in compare/parse23.y:8046
|
963
713
|
state = lex_state
|
964
|
-
self.lex_state = keyword.state
|
965
|
-
|
966
|
-
value = [token, self.lineno]
|
967
714
|
|
968
|
-
return result(
|
715
|
+
return result(EXPR_ENDFN, keyword.id0, token) if lex_state =~ EXPR_FNAME
|
969
716
|
|
717
|
+
self.lex_state = keyword.state
|
970
718
|
self.command_start = true if lex_state =~ EXPR_BEG
|
971
719
|
|
972
720
|
case
|
@@ -975,27 +723,28 @@ class RubyLexer
|
|
975
723
|
when lambda_beginning? then
|
976
724
|
self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
|
977
725
|
self.paren_nest -= 1 # TODO: question this?
|
978
|
-
result lex_state, :kDO_LAMBDA,
|
726
|
+
result lex_state, :kDO_LAMBDA, token
|
979
727
|
when cond.is_in_state then
|
980
|
-
result lex_state, :kDO_COND,
|
728
|
+
result lex_state, :kDO_COND, token
|
981
729
|
when cmdarg.is_in_state && state != EXPR_CMDARG then
|
982
|
-
result lex_state, :kDO_BLOCK,
|
730
|
+
result lex_state, :kDO_BLOCK, token
|
983
731
|
else
|
984
|
-
result lex_state, :kDO,
|
732
|
+
result lex_state, :kDO, token
|
985
733
|
end
|
986
734
|
when state =~ EXPR_PAD then
|
987
|
-
result lex_state, keyword.id0,
|
735
|
+
result lex_state, keyword.id0, token
|
988
736
|
when keyword.id0 != keyword.id1 then
|
989
|
-
result EXPR_PAR, keyword.id1,
|
737
|
+
result EXPR_PAR, keyword.id1, token
|
990
738
|
else
|
991
|
-
result lex_state, keyword.id1,
|
739
|
+
result lex_state, keyword.id1, token
|
992
740
|
end
|
993
741
|
end
|
994
742
|
|
995
743
|
def process_underscore text
|
996
|
-
|
744
|
+
self.unscan # put back "_"
|
997
745
|
|
998
746
|
if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
|
747
|
+
ss.terminate
|
999
748
|
[RubyLexer::EOF, RubyLexer::EOF]
|
1000
749
|
elsif scan(/#{IDENT_CHAR}+/) then
|
1001
750
|
process_token matched
|
@@ -1003,121 +752,35 @@ class RubyLexer
|
|
1003
752
|
end
|
1004
753
|
|
1005
754
|
def rb_compile_error msg
|
1006
|
-
msg += ". near line #{self.lineno}: #{
|
755
|
+
msg += ". near line #{self.lineno}: #{self.rest[/^.*/].inspect}"
|
1007
756
|
raise RubyParser::SyntaxError, msg
|
1008
757
|
end
|
1009
758
|
|
1010
|
-
def read_escape # TODO: remove / rewrite
|
1011
|
-
case
|
1012
|
-
when scan(/\\/) then # Backslash
|
1013
|
-
'\\'
|
1014
|
-
when scan(/n/) then # newline
|
1015
|
-
self.extra_lineno -= 1
|
1016
|
-
"\n"
|
1017
|
-
when scan(/t/) then # horizontal tab
|
1018
|
-
"\t"
|
1019
|
-
when scan(/r/) then # carriage-return
|
1020
|
-
"\r"
|
1021
|
-
when scan(/f/) then # form-feed
|
1022
|
-
"\f"
|
1023
|
-
when scan(/v/) then # vertical tab
|
1024
|
-
"\13"
|
1025
|
-
when scan(/a/) then # alarm(bell)
|
1026
|
-
"\007"
|
1027
|
-
when scan(/e/) then # escape
|
1028
|
-
"\033"
|
1029
|
-
when scan(/b/) then # backspace
|
1030
|
-
"\010"
|
1031
|
-
when scan(/s/) then # space
|
1032
|
-
" "
|
1033
|
-
when scan(/[0-7]{1,3}/) then # octal constant
|
1034
|
-
(matched.to_i(8) & 0xFF).chr.force_encoding Encoding::UTF_8
|
1035
|
-
when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
|
1036
|
-
# TODO: force encode everything to UTF-8?
|
1037
|
-
ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
|
1038
|
-
when check(/M-\\./) then
|
1039
|
-
scan(/M-\\/) # eat it
|
1040
|
-
c = self.read_escape
|
1041
|
-
c[0] = (c[0].ord | 0x80).chr
|
1042
|
-
c
|
1043
|
-
when scan(/M-(.)/) then
|
1044
|
-
c = ss[1]
|
1045
|
-
c[0] = (c[0].ord | 0x80).chr
|
1046
|
-
c
|
1047
|
-
when check(/(C-|c)\\[\\MCc]/) then
|
1048
|
-
scan(/(C-|c)\\/) # eat it
|
1049
|
-
c = self.read_escape
|
1050
|
-
c[0] = (c[0].ord & 0x9f).chr
|
1051
|
-
c
|
1052
|
-
when check(/(C-|c)\\(?!u|\\)/) then
|
1053
|
-
scan(/(C-|c)\\/) # eat it
|
1054
|
-
c = read_escape
|
1055
|
-
c[0] = (c[0].ord & 0x9f).chr
|
1056
|
-
c
|
1057
|
-
when scan(/C-\?|c\?/) then
|
1058
|
-
127.chr
|
1059
|
-
when scan(/(C-|c)(.)/) then
|
1060
|
-
c = ss[2]
|
1061
|
-
c[0] = (c[0].ord & 0x9f).chr
|
1062
|
-
c
|
1063
|
-
when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
|
1064
|
-
matched
|
1065
|
-
when scan(/u(\h{4})/) then
|
1066
|
-
[ss[1].to_i(16)].pack("U")
|
1067
|
-
when scan(/u(\h{1,3})/) then
|
1068
|
-
rb_compile_error "Invalid escape character syntax"
|
1069
|
-
when scan(/u\{(\h+(?:\s+\h+)*)\}/) then
|
1070
|
-
ss[1].split.map { |s| s.to_i(16) }.pack("U*")
|
1071
|
-
when scan(/[McCx0-9]/) || end_of_stream? then
|
1072
|
-
rb_compile_error("Invalid escape character syntax")
|
1073
|
-
else
|
1074
|
-
getch
|
1075
|
-
end.dup
|
1076
|
-
end
|
1077
|
-
|
1078
|
-
def getch
|
1079
|
-
c = ss.getch
|
1080
|
-
c = ss.getch if c == "\r" && ss.peek(1) == "\n"
|
1081
|
-
c
|
1082
|
-
end
|
1083
|
-
|
1084
|
-
def regx_options # TODO: rewrite / remove
|
1085
|
-
good, bad = [], []
|
1086
|
-
|
1087
|
-
if scan(/[a-z]+/) then
|
1088
|
-
good, bad = matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
|
1089
|
-
end
|
1090
|
-
|
1091
|
-
unless bad.empty? then
|
1092
|
-
rb_compile_error("unknown regexp option%s - %s" %
|
1093
|
-
[(bad.size > 1 ? "s" : ""), bad.join.inspect])
|
1094
|
-
end
|
1095
|
-
|
1096
|
-
return good.join
|
1097
|
-
end
|
1098
|
-
|
1099
759
|
def reset
|
760
|
+
self.lineno = 1
|
1100
761
|
self.brace_nest = 0
|
1101
762
|
self.command_start = true
|
1102
763
|
self.comments = []
|
1103
764
|
self.lex_state = EXPR_NONE
|
1104
765
|
self.lex_strterm = nil
|
1105
|
-
self.lineno = 1
|
1106
766
|
self.lpar_beg = nil
|
1107
767
|
self.paren_nest = 0
|
1108
768
|
self.space_seen = false
|
1109
769
|
self.string_nest = 0
|
1110
770
|
self.token = nil
|
1111
|
-
self.
|
771
|
+
self.string_buffer = []
|
772
|
+
self.old_ss = nil
|
773
|
+
self.old_lineno = nil
|
1112
774
|
|
1113
775
|
self.cond.reset
|
1114
776
|
self.cmdarg.reset
|
1115
777
|
end
|
1116
778
|
|
1117
|
-
def result new_state, token, text # :nodoc:
|
779
|
+
def result new_state, token, text, line = self.lineno # :nodoc:
|
1118
780
|
new_state = self.arg_state if new_state == :arg_state
|
1119
781
|
self.lex_state = new_state if new_state
|
1120
|
-
|
782
|
+
|
783
|
+
[token, [text, line]]
|
1121
784
|
end
|
1122
785
|
|
1123
786
|
def ruby22_label?
|
@@ -1136,12 +799,8 @@ class RubyLexer
|
|
1136
799
|
parser.class.version <= 24
|
1137
800
|
end
|
1138
801
|
|
1139
|
-
def
|
1140
|
-
|
1141
|
-
end
|
1142
|
-
|
1143
|
-
def scanner_class # TODO: design this out of oedipus_lex. or something.
|
1144
|
-
RPStringScanner
|
802
|
+
def ruby27plus?
|
803
|
+
parser.class.version >= 27
|
1145
804
|
end
|
1146
805
|
|
1147
806
|
def space_vs_beginning space_type, beg_type, fallback
|
@@ -1156,139 +815,18 @@ class RubyLexer
|
|
1156
815
|
end
|
1157
816
|
end
|
1158
817
|
|
1159
|
-
def
|
1160
|
-
|
1161
|
-
|
1162
|
-
|
1163
|
-
def tokadd_escape term # TODO: rewrite / remove
|
1164
|
-
case
|
1165
|
-
when scan(/\\\n/) then
|
1166
|
-
# just ignore
|
1167
|
-
when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
|
1168
|
-
self.string_buffer << matched
|
1169
|
-
when scan(/\\([MC]-|c)(?=\\)/) then
|
1170
|
-
self.string_buffer << matched
|
1171
|
-
self.tokadd_escape term
|
1172
|
-
when scan(/\\([MC]-|c)(.)/) then
|
1173
|
-
self.string_buffer << matched
|
1174
|
-
when scan(/\\[McCx]/) then
|
1175
|
-
rb_compile_error "Invalid escape character syntax"
|
1176
|
-
when scan(/\\(.)/m) then
|
1177
|
-
chr = ss[1]
|
1178
|
-
prev = self.string_buffer.last
|
1179
|
-
if term == chr && prev && prev.end_with?("(?") then
|
1180
|
-
self.string_buffer << chr
|
1181
|
-
elsif term == chr || chr.ascii_only? then
|
1182
|
-
self.string_buffer << matched # dunno why we keep them for ascii
|
1183
|
-
else
|
1184
|
-
self.string_buffer << chr # HACK? this is such a rat's nest
|
1185
|
-
end
|
818
|
+
def unescape_string str
|
819
|
+
str = str.gsub(ESC) { unescape($1).b.force_encoding Encoding::UTF_8 }
|
820
|
+
if str.valid_encoding?
|
821
|
+
str
|
1186
822
|
else
|
1187
|
-
|
823
|
+
str.b
|
1188
824
|
end
|
1189
825
|
end
|
1190
826
|
|
1191
|
-
def tokadd_string(func, term, paren) # TODO: rewrite / remove
|
1192
|
-
qwords = func =~ STR_FUNC_QWORDS
|
1193
|
-
escape = func =~ STR_FUNC_ESCAPE
|
1194
|
-
expand = func =~ STR_FUNC_EXPAND
|
1195
|
-
regexp = func =~ STR_FUNC_REGEXP
|
1196
|
-
symbol = func =~ STR_FUNC_SYMBOL
|
1197
|
-
|
1198
|
-
paren_re = @@regexp_cache[paren]
|
1199
|
-
term_re = if term == "\n"
|
1200
|
-
/#{Regexp.escape "\r"}?#{Regexp.escape "\n"}/
|
1201
|
-
else
|
1202
|
-
@@regexp_cache[term]
|
1203
|
-
end
|
1204
|
-
|
1205
|
-
until end_of_stream? do
|
1206
|
-
c = nil
|
1207
|
-
handled = true
|
1208
|
-
|
1209
|
-
case
|
1210
|
-
when scan(term_re) then
|
1211
|
-
if self.string_nest == 0 then
|
1212
|
-
ss.pos -= 1
|
1213
|
-
break
|
1214
|
-
else
|
1215
|
-
self.string_nest -= 1
|
1216
|
-
end
|
1217
|
-
when paren_re && scan(paren_re) then
|
1218
|
-
self.string_nest += 1
|
1219
|
-
when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
|
1220
|
-
ss.pos -= 1
|
1221
|
-
break
|
1222
|
-
when qwords && scan(/\s/) then
|
1223
|
-
ss.pos -= 1
|
1224
|
-
break
|
1225
|
-
when expand && scan(/#(?!\n)/) then
|
1226
|
-
# do nothing
|
1227
|
-
when check(/\\/) then
|
1228
|
-
case
|
1229
|
-
when qwords && scan(/\\\n/) then
|
1230
|
-
string_buffer << "\n"
|
1231
|
-
next
|
1232
|
-
when qwords && scan(/\\\s/) then
|
1233
|
-
c = " "
|
1234
|
-
when expand && scan(/\\\n/) then
|
1235
|
-
next
|
1236
|
-
when regexp && check(/\\/) then
|
1237
|
-
self.tokadd_escape term
|
1238
|
-
next
|
1239
|
-
when expand && scan(/\\/) then
|
1240
|
-
c = self.read_escape
|
1241
|
-
when scan(/\\\n/) then
|
1242
|
-
# do nothing
|
1243
|
-
when scan(/\\\\/) then
|
1244
|
-
string_buffer << '\\' if escape
|
1245
|
-
c = '\\'
|
1246
|
-
when scan(/\\/) then
|
1247
|
-
unless scan(term_re) || paren.nil? || scan(paren_re) then
|
1248
|
-
string_buffer << "\\"
|
1249
|
-
end
|
1250
|
-
else
|
1251
|
-
handled = false
|
1252
|
-
end # inner /\\/ case
|
1253
|
-
else
|
1254
|
-
handled = false
|
1255
|
-
end # top case
|
1256
|
-
|
1257
|
-
unless handled then
|
1258
|
-
t = if term == "\n"
|
1259
|
-
Regexp.escape "\r\n"
|
1260
|
-
else
|
1261
|
-
Regexp.escape term
|
1262
|
-
end
|
1263
|
-
x = Regexp.escape paren if paren && paren != "\000"
|
1264
|
-
re = if qwords then
|
1265
|
-
/[^#{t}#{x}\#\\\s]+|./ # |. to pick up whatever
|
1266
|
-
else
|
1267
|
-
/[^#{t}#{x}\#\\]+|./
|
1268
|
-
end
|
1269
|
-
|
1270
|
-
scan re
|
1271
|
-
c = matched
|
1272
|
-
|
1273
|
-
rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
|
1274
|
-
end # unless handled
|
1275
|
-
|
1276
|
-
c ||= matched
|
1277
|
-
string_buffer << c
|
1278
|
-
end # until
|
1279
|
-
|
1280
|
-
c ||= matched
|
1281
|
-
c = RubyLexer::EOF if end_of_stream?
|
1282
|
-
|
1283
|
-
return c
|
1284
|
-
end
|
1285
|
-
|
1286
827
|
def unescape s
|
1287
828
|
r = ESCAPES[s]
|
1288
829
|
|
1289
|
-
self.extra_lineno += 1 if s == "\n" # eg backslash newline strings
|
1290
|
-
self.extra_lineno -= 1 if r && s == "n" # literal \n, not newline
|
1291
|
-
|
1292
830
|
return r if r
|
1293
831
|
|
1294
832
|
x = case s
|
@@ -1309,7 +847,7 @@ class RubyLexer
|
|
1309
847
|
when /u(\h{1,3})/ then
|
1310
848
|
rb_compile_error("Invalid escape character syntax")
|
1311
849
|
when /u\{(\h+(?:\s+\h+)*)\}/ then
|
1312
|
-
$1.split.map { |
|
850
|
+
$1.split.map { |cp| cp.to_i(16) }.pack("U*")
|
1313
851
|
else
|
1314
852
|
s
|
1315
853
|
end
|
@@ -1422,7 +960,7 @@ class RubyLexer
|
|
1422
960
|
STR_FUNC_LABEL = State.new 0x40, str_func_names
|
1423
961
|
STR_FUNC_LIST = State.new 0x4000, str_func_names
|
1424
962
|
STR_FUNC_TERM = State.new 0x8000, str_func_names
|
1425
|
-
|
963
|
+
STR_FUNC_DEDENT = State.new 0x10000, str_func_names # <<~HEREDOC
|
1426
964
|
|
1427
965
|
# TODO: check parser25.y on how they do STR_FUNC_INDENT
|
1428
966
|
|
@@ -1434,6 +972,7 @@ class RubyLexer
|
|
1434
972
|
STR_DWORD = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
|
1435
973
|
STR_SSYM = STR_FUNC_SYMBOL
|
1436
974
|
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
975
|
+
STR_LABEL = STR_FUNC_LABEL
|
1437
976
|
|
1438
977
|
str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
|
1439
978
|
STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
|
@@ -1444,7 +983,7 @@ class RubyLexer
|
|
1444
983
|
STR_FUNC_LABEL => "STR_FUNC_LABEL",
|
1445
984
|
STR_FUNC_LIST => "STR_FUNC_LIST",
|
1446
985
|
STR_FUNC_TERM => "STR_FUNC_TERM",
|
1447
|
-
|
986
|
+
STR_FUNC_DEDENT => "STR_FUNC_DEDENT",
|
1448
987
|
STR_SQUOTE => "STR_SQUOTE")
|
1449
988
|
end
|
1450
989
|
|
@@ -1454,7 +993,145 @@ class RubyLexer
|
|
1454
993
|
include State::Values
|
1455
994
|
end
|
1456
995
|
|
1457
|
-
|
996
|
+
class RubyLexer
|
997
|
+
module SSWrapper
|
998
|
+
def string= s
|
999
|
+
ss.string= s
|
1000
|
+
end
|
1001
|
+
|
1002
|
+
def beginning_of_line?
|
1003
|
+
ss.bol?
|
1004
|
+
end
|
1005
|
+
|
1006
|
+
alias bol? beginning_of_line? # to make .rex file more readable
|
1007
|
+
|
1008
|
+
def check re
|
1009
|
+
maybe_pop_stack
|
1010
|
+
|
1011
|
+
ss.check re
|
1012
|
+
end
|
1013
|
+
|
1014
|
+
def end_of_stream?
|
1015
|
+
ss.eos?
|
1016
|
+
end
|
1017
|
+
|
1018
|
+
alias eos? end_of_stream?
|
1019
|
+
|
1020
|
+
def getch
|
1021
|
+
c = ss.getch
|
1022
|
+
c = ss.getch if c == "\r" && ss.peek(1) == "\n"
|
1023
|
+
c
|
1024
|
+
end
|
1025
|
+
|
1026
|
+
def match
|
1027
|
+
ss
|
1028
|
+
end
|
1029
|
+
|
1030
|
+
def matched
|
1031
|
+
ss.matched
|
1032
|
+
end
|
1033
|
+
|
1034
|
+
def in_heredoc?
|
1035
|
+
!!self.old_ss
|
1036
|
+
end
|
1037
|
+
|
1038
|
+
def maybe_pop_stack
|
1039
|
+
if ss.eos? && in_heredoc? then
|
1040
|
+
self.ss_pop
|
1041
|
+
self.lineno_pop
|
1042
|
+
end
|
1043
|
+
end
|
1044
|
+
|
1045
|
+
def pos
|
1046
|
+
ss.pos
|
1047
|
+
end
|
1048
|
+
|
1049
|
+
def pos= n
|
1050
|
+
ss.pos = n
|
1051
|
+
end
|
1052
|
+
|
1053
|
+
def rest
|
1054
|
+
ss.rest
|
1055
|
+
end
|
1056
|
+
|
1057
|
+
def scan re
|
1058
|
+
maybe_pop_stack
|
1059
|
+
|
1060
|
+
ss.scan re
|
1061
|
+
end
|
1062
|
+
|
1063
|
+
def scanner_class # TODO: design this out of oedipus_lex. or something.
|
1064
|
+
RPStringScanner
|
1065
|
+
end
|
1066
|
+
|
1067
|
+
def ss_string
|
1068
|
+
ss.string
|
1069
|
+
end
|
1070
|
+
|
1071
|
+
def ss_string= s
|
1072
|
+
raise "Probably not"
|
1073
|
+
ss.string = s
|
1074
|
+
end
|
1075
|
+
|
1076
|
+
def unscan
|
1077
|
+
ss.unscan
|
1078
|
+
end
|
1079
|
+
end
|
1080
|
+
|
1081
|
+
include SSWrapper
|
1082
|
+
end
|
1083
|
+
|
1084
|
+
class RubyLexer
|
1085
|
+
module SSStackish
|
1086
|
+
def lineno_push new_lineno
|
1087
|
+
self.old_lineno = self.lineno
|
1088
|
+
self.lineno = new_lineno
|
1089
|
+
end
|
1090
|
+
|
1091
|
+
def lineno_pop
|
1092
|
+
self.lineno = self.old_lineno
|
1093
|
+
self.old_lineno = nil
|
1094
|
+
end
|
1095
|
+
|
1096
|
+
def ss= o
|
1097
|
+
raise "Clearing ss while in heredoc!?!" if in_heredoc?
|
1098
|
+
@old_ss = nil
|
1099
|
+
super
|
1100
|
+
end
|
1101
|
+
|
1102
|
+
def ss_push new_ss
|
1103
|
+
@old_ss = self.ss
|
1104
|
+
@ss = new_ss
|
1105
|
+
end
|
1106
|
+
|
1107
|
+
def ss_pop
|
1108
|
+
@ss = self.old_ss
|
1109
|
+
@old_ss = nil
|
1110
|
+
end
|
1111
|
+
end
|
1112
|
+
|
1113
|
+
prepend SSStackish
|
1114
|
+
end
|
1115
|
+
|
1116
|
+
if ENV["RP_STRTERM_DEBUG"] then
|
1117
|
+
class RubyLexer
|
1118
|
+
def d o
|
1119
|
+
$stderr.puts o.inspect
|
1120
|
+
end
|
1121
|
+
|
1122
|
+
alias old_lex_strterm= lex_strterm=
|
1123
|
+
|
1124
|
+
def lex_strterm= o
|
1125
|
+
self.old_lex_strterm= o
|
1126
|
+
where = caller.first.split(/:/).first(2).join(":")
|
1127
|
+
$stderr.puts
|
1128
|
+
d :lex_strterm => [o, where]
|
1129
|
+
end
|
1130
|
+
end
|
1131
|
+
end
|
1132
|
+
|
1133
|
+
require_relative "./ruby_lexer.rex.rb"
|
1134
|
+
require_relative "./ruby_lexer_strings.rb"
|
1458
1135
|
|
1459
1136
|
if ENV["RP_LINENO_DEBUG"] then
|
1460
1137
|
class RubyLexer
|
@@ -1467,7 +1144,8 @@ if ENV["RP_LINENO_DEBUG"] then
|
|
1467
1144
|
def lineno= n
|
1468
1145
|
self.old_lineno= n
|
1469
1146
|
where = caller.first.split(/:/).first(2).join(":")
|
1470
|
-
|
1147
|
+
$stderr.puts
|
1148
|
+
d :lineno => [n, where]
|
1471
1149
|
end
|
1472
1150
|
end
|
1473
1151
|
end
|