ruby_parser 3.17.0 → 3.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/History.rdoc +76 -0
- data/Manifest.txt +3 -0
- data/README.rdoc +1 -0
- data/Rakefile +68 -18
- data/bin/ruby_parse_extract_error +1 -1
- data/compare/normalize.rb +6 -1
- data/gauntlet.md +106 -0
- data/lib/rp_extensions.rb +15 -36
- data/lib/rp_stringscanner.rb +20 -51
- data/lib/ruby20_parser.rb +3445 -3394
- data/lib/ruby20_parser.y +326 -248
- data/lib/ruby21_parser.rb +3543 -3511
- data/lib/ruby21_parser.y +321 -245
- data/lib/ruby22_parser.rb +3553 -3512
- data/lib/ruby22_parser.y +325 -247
- data/lib/ruby23_parser.rb +3566 -3530
- data/lib/ruby23_parser.y +325 -247
- data/lib/ruby24_parser.rb +3595 -3548
- data/lib/ruby24_parser.y +325 -247
- data/lib/ruby25_parser.rb +3595 -3547
- data/lib/ruby25_parser.y +325 -247
- data/lib/ruby26_parser.rb +3605 -3560
- data/lib/ruby26_parser.y +324 -246
- data/lib/ruby27_parser.rb +4657 -3539
- data/lib/ruby27_parser.y +878 -253
- data/lib/ruby30_parser.rb +5230 -3882
- data/lib/ruby30_parser.y +1069 -321
- data/lib/ruby3_parser.yy +3467 -0
- data/lib/ruby_lexer.rb +261 -609
- data/lib/ruby_lexer.rex +27 -20
- data/lib/ruby_lexer.rex.rb +59 -23
- data/lib/ruby_lexer_strings.rb +638 -0
- data/lib/ruby_parser.yy +910 -263
- data/lib/ruby_parser_extras.rb +289 -114
- data/test/test_ruby_lexer.rb +181 -129
- data/test/test_ruby_parser.rb +1213 -108
- data/tools/munge.rb +34 -6
- data/tools/ripper.rb +15 -10
- data.tar.gz.sig +0 -0
- metadata +11 -12
- metadata.gz.sig +0 -0
@@ -0,0 +1,638 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class RubyLexer
|
4
|
+
def eat_whitespace
|
5
|
+
r = scan(/\s+/)
|
6
|
+
self.lineno += r.count("\n") if r
|
7
|
+
|
8
|
+
r += eat_whitespace if eos? && in_heredoc? # forces heredoc pop
|
9
|
+
|
10
|
+
r
|
11
|
+
end
|
12
|
+
|
13
|
+
def heredoc here # ../compare/parse30.y:7678
|
14
|
+
_, term, func, _indent_max, _lineno, range = here
|
15
|
+
|
16
|
+
start_line = lineno
|
17
|
+
eos = term # HACK
|
18
|
+
indent = func =~ STR_FUNC_INDENT
|
19
|
+
|
20
|
+
self.string_buffer = []
|
21
|
+
|
22
|
+
last_line = self.ss_string[range] if range
|
23
|
+
eol = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n" # HACK
|
24
|
+
|
25
|
+
expand = func =~ STR_FUNC_EXPAND
|
26
|
+
|
27
|
+
# TODO? p->heredoc_line_indent == -1
|
28
|
+
|
29
|
+
indent_re = indent ? "[ \t]*" : nil
|
30
|
+
eos_re = /#{indent_re}#{Regexp.escape eos}(?=\r?\n|\z)/
|
31
|
+
err_msg = "can't match #{eos_re.inspect} anywhere in "
|
32
|
+
|
33
|
+
maybe_pop_stack
|
34
|
+
rb_compile_error err_msg if end_of_stream?
|
35
|
+
|
36
|
+
if beginning_of_line? && scan(eos_re) then
|
37
|
+
scan(/\r?\n|\z/)
|
38
|
+
self.lineno += 1 if matched =~ /\n/
|
39
|
+
|
40
|
+
heredoc_restore
|
41
|
+
|
42
|
+
self.lex_strterm = nil
|
43
|
+
self.lex_state = EXPR_END
|
44
|
+
|
45
|
+
return :tSTRING_END, [term, func, range]
|
46
|
+
end
|
47
|
+
|
48
|
+
if expand then
|
49
|
+
case
|
50
|
+
when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
|
51
|
+
# TODO: !ISASCII
|
52
|
+
# ?! see parser_peek_variable_name
|
53
|
+
return :tSTRING_DVAR, matched
|
54
|
+
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
55
|
+
# TODO: !ISASCII
|
56
|
+
return :tSTRING_DVAR, matched
|
57
|
+
when scan(/#[{]/) then
|
58
|
+
self.command_start = true
|
59
|
+
return :tSTRING_DBEG, matched
|
60
|
+
when scan(/#/) then
|
61
|
+
string_buffer << "#"
|
62
|
+
end
|
63
|
+
|
64
|
+
begin
|
65
|
+
# NOTE: this visibly diverges from the C code but uses tokadd_string
|
66
|
+
# to stay clean.
|
67
|
+
|
68
|
+
str = tokadd_string func, eol, nil
|
69
|
+
rb_compile_error err_msg if str == RubyLexer::EOF
|
70
|
+
|
71
|
+
if str != eol then
|
72
|
+
str = string_buffer.join
|
73
|
+
string_buffer.clear
|
74
|
+
return result nil, :tSTRING_CONTENT, str, start_line
|
75
|
+
else
|
76
|
+
string_buffer << scan(/\r?\n/)
|
77
|
+
self.lineno += 1 # TODO: try to remove most scan(/\n/) and friends
|
78
|
+
end
|
79
|
+
end until check eos_re
|
80
|
+
else
|
81
|
+
until check(eos_re) do
|
82
|
+
string_buffer << scan(/.*(\r?\n|\z)/)
|
83
|
+
self.lineno += 1
|
84
|
+
rb_compile_error err_msg if end_of_stream?
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
string_content = begin
|
89
|
+
s = string_buffer.join
|
90
|
+
s.b.force_encoding Encoding::UTF_8
|
91
|
+
s
|
92
|
+
end
|
93
|
+
string_buffer.clear
|
94
|
+
|
95
|
+
result nil, :tSTRING_CONTENT, string_content, start_line
|
96
|
+
end
|
97
|
+
|
98
|
+
def heredoc_identifier # ../compare/parse30.y:7354
|
99
|
+
token = :tSTRING_BEG
|
100
|
+
func = STR_FUNC_BORING
|
101
|
+
term = nil
|
102
|
+
indent = nil
|
103
|
+
quote = nil
|
104
|
+
char_pos = nil
|
105
|
+
byte_pos = nil
|
106
|
+
|
107
|
+
heredoc_indent_mods = "-"
|
108
|
+
heredoc_indent_mods += '\~' if ruby23plus?
|
109
|
+
|
110
|
+
case
|
111
|
+
when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
|
112
|
+
mods, quote, term = match[1], match[2], match[3]
|
113
|
+
char_pos = ss.charpos
|
114
|
+
byte_pos = ss.pos
|
115
|
+
|
116
|
+
func |= STR_FUNC_INDENT unless mods.empty?
|
117
|
+
func |= STR_FUNC_DEDENT if mods == "~"
|
118
|
+
func |= case quote
|
119
|
+
when "\'" then
|
120
|
+
STR_SQUOTE
|
121
|
+
when '"' then
|
122
|
+
STR_DQUOTE
|
123
|
+
when "`" then
|
124
|
+
token = :tXSTRING_BEG
|
125
|
+
STR_XQUOTE
|
126
|
+
else
|
127
|
+
debug 1
|
128
|
+
end
|
129
|
+
when scan(/[#{heredoc_indent_mods}]?([\'\"\`])(?!\1*\Z)/) then
|
130
|
+
rb_compile_error "unterminated here document identifier"
|
131
|
+
when scan(/([#{heredoc_indent_mods}]?)(#{IDENT_CHAR}+)/) then
|
132
|
+
mods, term = match[1], match[2]
|
133
|
+
quote = '"'
|
134
|
+
char_pos = ss.charpos
|
135
|
+
byte_pos = ss.pos
|
136
|
+
|
137
|
+
func |= STR_FUNC_INDENT unless mods.empty?
|
138
|
+
func |= STR_FUNC_DEDENT if mods == "~"
|
139
|
+
func |= STR_DQUOTE
|
140
|
+
else
|
141
|
+
return
|
142
|
+
end
|
143
|
+
|
144
|
+
old_lineno = self.lineno
|
145
|
+
rest_of_line = scan(/.*(?:\r?\n|\z)/)
|
146
|
+
self.lineno += rest_of_line.count "\n"
|
147
|
+
|
148
|
+
char_pos_end = ss.charpos - 1
|
149
|
+
|
150
|
+
range = nil
|
151
|
+
range = char_pos..char_pos_end unless rest_of_line.empty?
|
152
|
+
|
153
|
+
self.lex_strterm = [:heredoc, term, func, indent, old_lineno, range, byte_pos]
|
154
|
+
|
155
|
+
result nil, token, quote, old_lineno
|
156
|
+
end
|
157
|
+
|
158
|
+
def heredoc_restore # ../compare/parse30.y:7438
|
159
|
+
_, _term, _func, _indent, lineno, range, bytepos = lex_strterm
|
160
|
+
|
161
|
+
new_ss = ss.class.new self.ss_string[0..range.max]
|
162
|
+
new_ss.pos = bytepos
|
163
|
+
|
164
|
+
lineno_push lineno
|
165
|
+
ss_push new_ss
|
166
|
+
|
167
|
+
nil
|
168
|
+
end
|
169
|
+
|
170
|
+
def newtok
|
171
|
+
string_buffer.clear
|
172
|
+
end
|
173
|
+
|
174
|
+
def nextc
|
175
|
+
# TODO:
|
176
|
+
# if (UNLIKELY((p->lex.pcur == p->lex.pend) || p->eofp || RTEST(p->lex.nextline))) {
|
177
|
+
# if (nextline(p)) return -1;
|
178
|
+
# }
|
179
|
+
|
180
|
+
maybe_pop_stack
|
181
|
+
|
182
|
+
c = ss.getch
|
183
|
+
|
184
|
+
if c == "\n" then
|
185
|
+
ss.unscan
|
186
|
+
c = nil
|
187
|
+
end
|
188
|
+
|
189
|
+
c
|
190
|
+
end
|
191
|
+
|
192
|
+
def parse_string quote # ../compare/parse30.y:7273
|
193
|
+
_, func, term, paren = quote
|
194
|
+
|
195
|
+
qwords = func =~ STR_FUNC_QWORDS
|
196
|
+
regexp = func =~ STR_FUNC_REGEXP
|
197
|
+
expand = func =~ STR_FUNC_EXPAND
|
198
|
+
list = func =~ STR_FUNC_LIST
|
199
|
+
termx = func =~ STR_FUNC_TERM # TODO: document wtf this means
|
200
|
+
|
201
|
+
space = false
|
202
|
+
term_re = regexp_cache[term]
|
203
|
+
|
204
|
+
if termx then
|
205
|
+
# self.nextc if qwords # delayed term
|
206
|
+
|
207
|
+
self.lex_strterm = nil
|
208
|
+
|
209
|
+
return result EXPR_END, regexp ? :tREGEXP_END : :tSTRING_END, term
|
210
|
+
end
|
211
|
+
|
212
|
+
space = true if qwords and eat_whitespace
|
213
|
+
|
214
|
+
if list then
|
215
|
+
debug 4
|
216
|
+
# quote[1] -= STR_FUNC_LIST
|
217
|
+
# space = true
|
218
|
+
end
|
219
|
+
|
220
|
+
# TODO: move to quote.nest!
|
221
|
+
if string_nest == 0 && scan(term_re) then
|
222
|
+
if qwords then
|
223
|
+
quote[1] |= STR_FUNC_TERM
|
224
|
+
|
225
|
+
return :tSPACE, matched
|
226
|
+
end
|
227
|
+
|
228
|
+
return string_term func
|
229
|
+
end
|
230
|
+
|
231
|
+
return result nil, :tSPACE, " " if space
|
232
|
+
|
233
|
+
newtok
|
234
|
+
|
235
|
+
if expand && check(/#/) then
|
236
|
+
t = self.scan_variable_name
|
237
|
+
return t if t
|
238
|
+
|
239
|
+
tokadd "#"
|
240
|
+
end
|
241
|
+
|
242
|
+
# TODO: add string_nest, enc, base_enc ?
|
243
|
+
lineno = self.lineno
|
244
|
+
if tokadd_string(func, term, paren) == RubyLexer::EOF then
|
245
|
+
if qwords then
|
246
|
+
rb_compile_error "unterminated list meets end of file"
|
247
|
+
end
|
248
|
+
|
249
|
+
if regexp then
|
250
|
+
rb_compile_error "unterminated regexp meets end of file"
|
251
|
+
else
|
252
|
+
rb_compile_error "unterminated string meets end of file"
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
result nil, :tSTRING_CONTENT, string_buffer.join, lineno
|
257
|
+
end
|
258
|
+
|
259
|
+
# called from process_percent
|
260
|
+
def process_percent_quote # ../compare/parse30.y:8645
|
261
|
+
c = getch # type %<type><term>...<term>
|
262
|
+
|
263
|
+
long_hand = !!(c =~ /[QqWwIixrs]/)
|
264
|
+
|
265
|
+
if end_of_stream? || c !~ /\p{Alnum}/ then
|
266
|
+
term = c # TODO? PERCENT_END[c] || c
|
267
|
+
|
268
|
+
debug 2 if c && c !~ /\p{ASCII}/
|
269
|
+
c = "Q"
|
270
|
+
else
|
271
|
+
term = getch
|
272
|
+
|
273
|
+
debug 3 if term =~ /\p{Alnum}|\P{ASCII}/
|
274
|
+
end
|
275
|
+
|
276
|
+
if end_of_stream? or c == RubyLexer::EOF or term == RubyLexer::EOF then
|
277
|
+
rb_compile_error "unterminated quoted string meets end of file"
|
278
|
+
end
|
279
|
+
|
280
|
+
# "\0" is special to indicate beg=nnd and that no nesting?
|
281
|
+
paren = term
|
282
|
+
term = PERCENT_END[term]
|
283
|
+
term, paren = paren, "\0" if term.nil? # TODO: "\0" -> nil
|
284
|
+
|
285
|
+
text = long_hand ? "%#{c}#{paren}" : "%#{term}"
|
286
|
+
|
287
|
+
current_line = self.lineno
|
288
|
+
|
289
|
+
token_type, string_type =
|
290
|
+
case c
|
291
|
+
when "Q" then
|
292
|
+
[:tSTRING_BEG, STR_DQUOTE]
|
293
|
+
when "q" then
|
294
|
+
[:tSTRING_BEG, STR_SQUOTE]
|
295
|
+
when "W" then
|
296
|
+
eat_whitespace
|
297
|
+
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
298
|
+
when "w" then
|
299
|
+
eat_whitespace
|
300
|
+
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
301
|
+
when "I" then
|
302
|
+
eat_whitespace
|
303
|
+
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
304
|
+
when "i" then
|
305
|
+
eat_whitespace
|
306
|
+
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
307
|
+
when "x" then
|
308
|
+
[:tXSTRING_BEG, STR_XQUOTE]
|
309
|
+
when "r" then
|
310
|
+
[:tREGEXP_BEG, STR_REGEXP]
|
311
|
+
when "s" then
|
312
|
+
self.lex_state = EXPR_FNAME
|
313
|
+
[:tSYMBEG, STR_SSYM]
|
314
|
+
else
|
315
|
+
rb_compile_error "unknown type of %string. Expected [QqWwIixrs], found '#{c}'."
|
316
|
+
end
|
317
|
+
|
318
|
+
string string_type, term, paren
|
319
|
+
|
320
|
+
result nil, token_type, text, current_line
|
321
|
+
end
|
322
|
+
|
323
|
+
def process_string_or_heredoc # ../compare/parse30.y:9075
|
324
|
+
if lex_strterm[0] == :heredoc then
|
325
|
+
self.heredoc lex_strterm
|
326
|
+
else
|
327
|
+
self.parse_string lex_strterm
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
def read_escape flags = nil # ../compare/parse30.y:6712
|
332
|
+
case
|
333
|
+
when scan(/\\/) then # Backslash
|
334
|
+
'\\'
|
335
|
+
when scan(/n/) then # newline
|
336
|
+
"\n"
|
337
|
+
when scan(/t/) then # horizontal tab
|
338
|
+
"\t"
|
339
|
+
when scan(/r/) then # carriage-return
|
340
|
+
"\r"
|
341
|
+
when scan(/f/) then # form-feed
|
342
|
+
"\f"
|
343
|
+
when scan(/v/) then # vertical tab
|
344
|
+
"\13"
|
345
|
+
when scan(/a/) then # alarm(bell)
|
346
|
+
"\007"
|
347
|
+
when scan(/e/) then # escape
|
348
|
+
"\033"
|
349
|
+
when scan(/[0-7]{1,3}/) then # octal constant
|
350
|
+
(matched.to_i(8) & 0xFF).chr.force_encoding Encoding::UTF_8
|
351
|
+
when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
|
352
|
+
# TODO: force encode everything to UTF-8?
|
353
|
+
match[1].to_i(16).chr.force_encoding Encoding::UTF_8
|
354
|
+
when scan(/b/) then # backspace
|
355
|
+
"\010"
|
356
|
+
when scan(/s/) then # space
|
357
|
+
" "
|
358
|
+
when check(/M-\\u/) then
|
359
|
+
debug 5
|
360
|
+
when scan(/M-\\(?=.)/) then
|
361
|
+
c = read_escape
|
362
|
+
c[0] = (c[0].ord | 0x80).chr
|
363
|
+
c
|
364
|
+
when scan(/M-(\p{ASCII})/) then
|
365
|
+
# TODO: ISCNTRL(c) -> goto eof
|
366
|
+
c = match[1]
|
367
|
+
c[0] = (c[0].ord | 0x80).chr
|
368
|
+
c
|
369
|
+
when check(/(C-|c)\\u/) then
|
370
|
+
debug 6
|
371
|
+
when scan(/(C-|c)\\?\?/) then
|
372
|
+
127.chr
|
373
|
+
when scan(/(C-|c)\\/) then
|
374
|
+
c = read_escape
|
375
|
+
c[0] = (c[0].ord & 0x9f).chr
|
376
|
+
c
|
377
|
+
when scan(/(?:C-|c)(.)/) then
|
378
|
+
c = match[1]
|
379
|
+
c[0] = (c[0].ord & 0x9f).chr
|
380
|
+
c
|
381
|
+
when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
|
382
|
+
matched
|
383
|
+
when scan(/u(\h{4})/) then
|
384
|
+
[match[1].to_i(16)].pack("U")
|
385
|
+
when scan(/u(\h{1,3})/) then
|
386
|
+
debug 7
|
387
|
+
rb_compile_error "Invalid escape character syntax"
|
388
|
+
when scan(/u\{(\h+(?: +\h+)*)\}/) then
|
389
|
+
match[1].split.map { |s| s.to_i(16) }.pack("U*")
|
390
|
+
when scan(/[McCx0-9]/) || end_of_stream? then
|
391
|
+
rb_compile_error("Invalid escape character syntax")
|
392
|
+
else
|
393
|
+
getch
|
394
|
+
end.dup
|
395
|
+
end
|
396
|
+
|
397
|
+
def regx_options # ../compare/parse30.y:6914
|
398
|
+
newtok
|
399
|
+
|
400
|
+
options = scan(/\p{Alpha}+/) || ""
|
401
|
+
|
402
|
+
rb_compile_error("unknown regexp options: %s" % [options]) if
|
403
|
+
options =~ /[^ixmonesu]/
|
404
|
+
|
405
|
+
options
|
406
|
+
end
|
407
|
+
|
408
|
+
def scan_variable_name # ../compare/parse30.y:7208
|
409
|
+
case
|
410
|
+
when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
|
411
|
+
# TODO: !ISASCII
|
412
|
+
return :tSTRING_DVAR, matched
|
413
|
+
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
414
|
+
# TODO: !ISASCII
|
415
|
+
return :tSTRING_DVAR, matched
|
416
|
+
when scan(/#[{]/) then
|
417
|
+
self.command_start = true
|
418
|
+
return :tSTRING_DBEG, matched
|
419
|
+
when scan(/#/) then
|
420
|
+
# do nothing but swallow
|
421
|
+
end
|
422
|
+
|
423
|
+
# if scan(/\P{ASCII}|_|\p{Alpha}/) then # TODO: fold into above DVAR cases
|
424
|
+
# # if (!ISASCII(c) || c == '_' || ISALPHA(c))
|
425
|
+
# # return tSTRING_DVAR;
|
426
|
+
# end
|
427
|
+
|
428
|
+
nil
|
429
|
+
end
|
430
|
+
|
431
|
+
def string type, beg, nnd = nil
|
432
|
+
# label = (IS_LABEL_POSSIBLE() ? str_label : 0);
|
433
|
+
# p->lex.strterm = NEW_STRTERM(str_dquote | label, '"', 0);
|
434
|
+
# p->lex.ptok = p->lex.pcur-1;
|
435
|
+
|
436
|
+
type |= STR_FUNC_LABEL if is_label_possible?
|
437
|
+
self.lex_strterm = [:strterm, type, beg, nnd || "\0"]
|
438
|
+
end
|
439
|
+
|
440
|
+
def string_term func # ../compare/parse30.y:7254
|
441
|
+
self.lex_strterm = nil
|
442
|
+
|
443
|
+
return result EXPR_END, :tREGEXP_END, self.regx_options if
|
444
|
+
func =~ STR_FUNC_REGEXP
|
445
|
+
|
446
|
+
if func =~ STR_FUNC_LABEL && is_label_suffix? then
|
447
|
+
self.getch
|
448
|
+
self.lex_state = EXPR_BEG|EXPR_LABEL
|
449
|
+
|
450
|
+
return :tLABEL_END, string_buffer.join
|
451
|
+
end
|
452
|
+
|
453
|
+
self.lex_state = EXPR_END
|
454
|
+
|
455
|
+
return :tSTRING_END, [self.matched, func]
|
456
|
+
end
|
457
|
+
|
458
|
+
def tokadd c # ../compare/parse30.y:6548
|
459
|
+
string_buffer << c
|
460
|
+
end
|
461
|
+
|
462
|
+
def tokadd_escape # ../compare/parse30.y:6840
|
463
|
+
case
|
464
|
+
when scan(/\\\n/) then
|
465
|
+
# just ignore
|
466
|
+
when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
|
467
|
+
tokadd matched
|
468
|
+
when scan(/\\([MC]-|c)(?=\\)/) then
|
469
|
+
tokadd matched
|
470
|
+
self.tokadd_escape
|
471
|
+
when scan(/\\([MC]-|c)(.)/) then
|
472
|
+
tokadd matched
|
473
|
+
|
474
|
+
self.tokadd_escape if check(/\\/) # recurse if continued!
|
475
|
+
when scan(/\\[McCx]/) then # all unprocessed branches from above have failed
|
476
|
+
rb_compile_error "Invalid escape character syntax"
|
477
|
+
when scan(/\\(.)/m) then
|
478
|
+
chr, = match[1]
|
479
|
+
|
480
|
+
tokadd "\\"
|
481
|
+
tokadd chr
|
482
|
+
else
|
483
|
+
rb_compile_error "Invalid escape character syntax: %p" % [self.rest.lines.first]
|
484
|
+
end
|
485
|
+
end
|
486
|
+
|
487
|
+
def tokadd_string func, term, paren # ../compare/parse30.y:7020
|
488
|
+
qwords = func =~ STR_FUNC_QWORDS
|
489
|
+
escape = func =~ STR_FUNC_ESCAPE
|
490
|
+
expand = func =~ STR_FUNC_EXPAND
|
491
|
+
regexp = func =~ STR_FUNC_REGEXP
|
492
|
+
|
493
|
+
paren_re = regexp_cache[paren] if paren != "\0"
|
494
|
+
term_re = if term == "\n"
|
495
|
+
/\r?\n/
|
496
|
+
else
|
497
|
+
regexp_cache[term]
|
498
|
+
end
|
499
|
+
|
500
|
+
until end_of_stream? do
|
501
|
+
case
|
502
|
+
when paren_re && scan(paren_re) then
|
503
|
+
self.string_nest += 1
|
504
|
+
when scan(term_re) then
|
505
|
+
if self.string_nest == 0 then
|
506
|
+
self.pos -= 1 # TODO: ss.unscan 665 errors #$ HACK: why do we depend on this so hard?
|
507
|
+
break # leave eos loop, go parse term in caller (heredoc or parse_string)
|
508
|
+
else
|
509
|
+
self.lineno += matched.count("\n")
|
510
|
+
self.string_nest -= 1
|
511
|
+
end
|
512
|
+
|
513
|
+
when expand && check(/#[\$\@\{]/) then
|
514
|
+
# do nothing since we used `check`
|
515
|
+
break # leave eos loop
|
516
|
+
when check(/\\/) then
|
517
|
+
case
|
518
|
+
when scan(/\\\n/) then
|
519
|
+
self.lineno += 1
|
520
|
+
case
|
521
|
+
when qwords then
|
522
|
+
tokadd "\n"
|
523
|
+
next
|
524
|
+
when expand then
|
525
|
+
next if func !~ STR_FUNC_INDENT
|
526
|
+
|
527
|
+
if term == "\n" then
|
528
|
+
unscan # rollback
|
529
|
+
scan(/\\/) # and split
|
530
|
+
scan(/\n/) # this is `matched`
|
531
|
+
break
|
532
|
+
end
|
533
|
+
|
534
|
+
tokadd "\\"
|
535
|
+
debug 9
|
536
|
+
else
|
537
|
+
unscan # rollback
|
538
|
+
scan(/\\/) # this is `matched`
|
539
|
+
end
|
540
|
+
when check(/\\\\/) then
|
541
|
+
tokadd '\\' if escape
|
542
|
+
nextc # ignore 1st \\
|
543
|
+
nextc # for tokadd ss.matched, below
|
544
|
+
when scan(/\\u/) then
|
545
|
+
unless expand then
|
546
|
+
tokadd "\\"
|
547
|
+
next
|
548
|
+
end
|
549
|
+
|
550
|
+
tokadd_utf8 term, func, regexp
|
551
|
+
|
552
|
+
next
|
553
|
+
else
|
554
|
+
scan(/\\/) # eat it, we know it's there
|
555
|
+
|
556
|
+
return RubyLexer::EOF if end_of_stream?
|
557
|
+
|
558
|
+
if scan(/\P{ASCII}/) then
|
559
|
+
tokadd "\\" unless expand
|
560
|
+
tokadd self.matched
|
561
|
+
next
|
562
|
+
end
|
563
|
+
|
564
|
+
case
|
565
|
+
when regexp then
|
566
|
+
if term !~ SIMPLE_RE_META && scan(term_re) then
|
567
|
+
tokadd matched
|
568
|
+
next
|
569
|
+
end
|
570
|
+
|
571
|
+
self.pos -= 1 # TODO: ss.unscan 15 errors
|
572
|
+
# HACK? decide whether to eat the \\ above
|
573
|
+
if _esc = tokadd_escape && end_of_stream? then
|
574
|
+
debug 10
|
575
|
+
end
|
576
|
+
|
577
|
+
next # C's continue = Ruby's next
|
578
|
+
when expand then
|
579
|
+
tokadd "\\" if escape
|
580
|
+
tokadd read_escape
|
581
|
+
next
|
582
|
+
when qwords && scan(/\s/) then
|
583
|
+
# ignore backslashed spaces in %w
|
584
|
+
when !check(term_re) && !(paren_re && check(paren_re)) then
|
585
|
+
tokadd "\\"
|
586
|
+
next
|
587
|
+
else
|
588
|
+
getch # slurp it too for matched below
|
589
|
+
end
|
590
|
+
end # inner case for /\\/
|
591
|
+
|
592
|
+
when scan(/\P{ASCII}/) then
|
593
|
+
# not currently checking encoding stuff -- drops to tokadd below
|
594
|
+
when qwords && check(/\s/) then
|
595
|
+
break # leave eos loop
|
596
|
+
else
|
597
|
+
t = Regexp.escape term == "\n" ? "\r\n" : term
|
598
|
+
x = Regexp.escape paren if paren && paren != "\000"
|
599
|
+
q = "\\s" if qwords
|
600
|
+
re = /[^#{t}#{x}\#\\#{q}]+/
|
601
|
+
|
602
|
+
scan re or getch
|
603
|
+
self.lineno += matched.count "\n" if matched
|
604
|
+
end # big case
|
605
|
+
|
606
|
+
tokadd self.matched
|
607
|
+
end # until end_of_stream?
|
608
|
+
|
609
|
+
if self.matched then
|
610
|
+
self.matched
|
611
|
+
elsif end_of_stream? then
|
612
|
+
RubyLexer::EOF
|
613
|
+
end
|
614
|
+
end # tokadd_string
|
615
|
+
|
616
|
+
def tokadd_utf8 term, func, regexp_literal # ../compare/parse30.y:6646
|
617
|
+
tokadd "\\u" if regexp_literal
|
618
|
+
|
619
|
+
case
|
620
|
+
when scan(/\h{4}/) then
|
621
|
+
codepoint = [matched.to_i(16)].pack("U")
|
622
|
+
|
623
|
+
tokadd regexp_literal ? matched : codepoint
|
624
|
+
when scan(/\{\s*(\h{1,6}(?:\s+\h{1,6})*)\s*\}/) then
|
625
|
+
codepoints = match[1].split.map { |s| s.to_i 16 }.pack("U")
|
626
|
+
|
627
|
+
if regexp_literal then
|
628
|
+
tokadd "{"
|
629
|
+
tokadd match[1].split.join(" ")
|
630
|
+
tokadd "}"
|
631
|
+
else
|
632
|
+
tokadd codepoints
|
633
|
+
end
|
634
|
+
else
|
635
|
+
rb_compile_error "unterminated Unicode escape"
|
636
|
+
end
|
637
|
+
end
|
638
|
+
end
|