ruby_parser 3.13.1 → 3.15.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/.autotest +18 -29
- data/History.rdoc +97 -0
- data/Manifest.txt +2 -0
- data/README.rdoc +3 -3
- data/Rakefile +39 -21
- data/bin/ruby_parse +3 -1
- data/bin/ruby_parse_extract_error +8 -3
- data/compare/normalize.rb +43 -3
- data/debugging.md +39 -0
- data/lib/ruby20_parser.rb +3431 -3238
- data/lib/ruby20_parser.y +474 -300
- data/lib/ruby21_parser.rb +3491 -3295
- data/lib/ruby21_parser.y +482 -307
- data/lib/ruby22_parser.rb +3432 -3240
- data/lib/ruby22_parser.y +483 -308
- data/lib/ruby23_parser.rb +3338 -3150
- data/lib/ruby23_parser.y +483 -308
- data/lib/ruby24_parser.rb +3483 -3273
- data/lib/ruby24_parser.y +487 -310
- data/lib/ruby25_parser.rb +3482 -3272
- data/lib/ruby25_parser.y +487 -310
- data/lib/ruby26_parser.rb +3490 -3278
- data/lib/ruby26_parser.y +493 -314
- data/lib/ruby27_parser.rb +7224 -0
- data/lib/ruby27_parser.y +2657 -0
- data/lib/ruby_lexer.rb +483 -459
- data/lib/ruby_lexer.rex +5 -6
- data/lib/ruby_lexer.rex.rb +6 -8
- data/lib/ruby_parser.rb +29 -27
- data/lib/ruby_parser.yy +497 -315
- data/lib/ruby_parser_extras.rb +670 -420
- data/test/test_ruby_lexer.rb +1208 -1121
- data/test/test_ruby_parser.rb +2517 -1955
- data/test/test_ruby_parser_extras.rb +39 -4
- data/tools/munge.rb +10 -5
- data/tools/ripper.rb +13 -2
- metadata +28 -21
- metadata.gz.sig +0 -0
data/lib/ruby_lexer.rb
CHANGED
@@ -4,135 +4,9 @@
|
|
4
4
|
$DEBUG = true if ENV["DEBUG"]
|
5
5
|
|
6
6
|
class RubyLexer
|
7
|
-
|
8
7
|
# :stopdoc:
|
9
|
-
HAS_ENC = "".respond_to? :encoding
|
10
|
-
|
11
|
-
IDENT_CHAR = if HAS_ENC then
|
12
|
-
/[\w\u0080-\u{10ffff}]/u
|
13
|
-
else
|
14
|
-
/[\w\x80-\xFF]/n
|
15
|
-
end
|
16
|
-
|
17
8
|
EOF = :eof_haha!
|
18
9
|
|
19
|
-
# ruby constants for strings (should this be moved somewhere else?)
|
20
|
-
|
21
|
-
STR_FUNC_BORING = 0x00
|
22
|
-
STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
|
23
|
-
STR_FUNC_EXPAND = 0x02
|
24
|
-
STR_FUNC_REGEXP = 0x04
|
25
|
-
STR_FUNC_QWORDS = 0x08
|
26
|
-
STR_FUNC_SYMBOL = 0x10
|
27
|
-
STR_FUNC_INDENT = 0x20 # <<-HEREDOC
|
28
|
-
STR_FUNC_ICNTNT = 0x40 # <<~HEREDOC
|
29
|
-
|
30
|
-
STR_SQUOTE = STR_FUNC_BORING
|
31
|
-
STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
|
32
|
-
STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
|
33
|
-
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
|
34
|
-
STR_SSYM = STR_FUNC_SYMBOL
|
35
|
-
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
36
|
-
|
37
|
-
class State
|
38
|
-
attr_accessor :n
|
39
|
-
|
40
|
-
def initialize o
|
41
|
-
raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
|
42
|
-
|
43
|
-
self.n = o
|
44
|
-
end
|
45
|
-
|
46
|
-
def == o
|
47
|
-
o.class == self.class && o.n == self.n
|
48
|
-
end
|
49
|
-
|
50
|
-
def =~ v
|
51
|
-
(self.n & v.n) != 0
|
52
|
-
end
|
53
|
-
|
54
|
-
def | v
|
55
|
-
self.class.new(self.n | v.n)
|
56
|
-
end
|
57
|
-
|
58
|
-
def inspect
|
59
|
-
return "EXPR_NONE" if n.zero?
|
60
|
-
NAMES.map { |v,k| k if self =~ v }.compact.join "|"
|
61
|
-
end
|
62
|
-
|
63
|
-
module Values
|
64
|
-
EXPR_NONE = State.new 0x0
|
65
|
-
EXPR_BEG = State.new 0x1
|
66
|
-
EXPR_END = State.new 0x2
|
67
|
-
EXPR_ENDARG = State.new 0x4
|
68
|
-
EXPR_ENDFN = State.new 0x8
|
69
|
-
EXPR_ARG = State.new 0x10
|
70
|
-
EXPR_CMDARG = State.new 0x20
|
71
|
-
EXPR_MID = State.new 0x40
|
72
|
-
EXPR_FNAME = State.new 0x80
|
73
|
-
EXPR_DOT = State.new 0x100
|
74
|
-
EXPR_CLASS = State.new 0x200
|
75
|
-
EXPR_LABEL = State.new 0x400
|
76
|
-
EXPR_LABELED = State.new 0x800
|
77
|
-
EXPR_FITEM = State.new 0x1000
|
78
|
-
|
79
|
-
EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
|
80
|
-
EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
|
81
|
-
EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
|
82
|
-
|
83
|
-
# extra fake lex_state names to make things a bit cleaner
|
84
|
-
|
85
|
-
EXPR_LAB = EXPR_ARG|EXPR_LABELED
|
86
|
-
EXPR_NUM = EXPR_END|EXPR_ENDARG
|
87
|
-
EXPR_PAR = EXPR_BEG|EXPR_LABEL
|
88
|
-
EXPR_PAD = EXPR_BEG|EXPR_LABELED
|
89
|
-
end
|
90
|
-
|
91
|
-
include Values
|
92
|
-
|
93
|
-
NAMES = {
|
94
|
-
EXPR_NONE => "EXPR_NONE",
|
95
|
-
EXPR_BEG => "EXPR_BEG",
|
96
|
-
EXPR_END => "EXPR_END",
|
97
|
-
EXPR_ENDARG => "EXPR_ENDARG",
|
98
|
-
EXPR_ENDFN => "EXPR_ENDFN",
|
99
|
-
EXPR_ARG => "EXPR_ARG",
|
100
|
-
EXPR_CMDARG => "EXPR_CMDARG",
|
101
|
-
EXPR_MID => "EXPR_MID",
|
102
|
-
EXPR_FNAME => "EXPR_FNAME",
|
103
|
-
EXPR_DOT => "EXPR_DOT",
|
104
|
-
EXPR_CLASS => "EXPR_CLASS",
|
105
|
-
EXPR_LABEL => "EXPR_LABEL",
|
106
|
-
EXPR_LABELED => "EXPR_LABELED",
|
107
|
-
EXPR_FITEM => "EXPR_FITEM",
|
108
|
-
}
|
109
|
-
end
|
110
|
-
|
111
|
-
include State::Values
|
112
|
-
|
113
|
-
if $DEBUG then
|
114
|
-
def lex_state= o
|
115
|
-
return if @lex_state == o
|
116
|
-
raise ArgumentError, "bad state: %p" % [o] unless State === o
|
117
|
-
if ENV["V"] then
|
118
|
-
c = caller[0]
|
119
|
-
c = caller[1] if c =~ /\b(expr_)?result\b/
|
120
|
-
c = caller[2] if c =~ /\b(expr_)?result\b/
|
121
|
-
warn "lex_state: %p -> %p from %s" % [lex_state, o, c.clean_caller]
|
122
|
-
else
|
123
|
-
warn "lex_state: %p -> %p" % [lex_state, o]
|
124
|
-
end
|
125
|
-
@lex_state = o
|
126
|
-
end
|
127
|
-
else
|
128
|
-
def lex_state= o
|
129
|
-
raise ArgumentError, "bad state: %p" % [o] unless State === o
|
130
|
-
@lex_state = o
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
attr_reader :lex_state
|
135
|
-
|
136
10
|
ESCAPES = {
|
137
11
|
"a" => "\007",
|
138
12
|
"b" => "\010",
|
@@ -149,6 +23,8 @@ class RubyLexer
|
|
149
23
|
"c\?" => 127.chr,
|
150
24
|
}
|
151
25
|
|
26
|
+
HAS_ENC = "".respond_to? :encoding
|
27
|
+
|
152
28
|
TOKENS = {
|
153
29
|
"!" => :tBANG,
|
154
30
|
"!=" => :tNEQ,
|
@@ -165,13 +41,26 @@ class RubyLexer
|
|
165
41
|
"->" => :tLAMBDA,
|
166
42
|
}
|
167
43
|
|
168
|
-
|
169
|
-
|
170
|
-
@@regexp_cache = Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }
|
44
|
+
@@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
|
171
45
|
@@regexp_cache[nil] = nil
|
172
46
|
|
47
|
+
if $DEBUG then
|
48
|
+
attr_reader :lex_state
|
49
|
+
|
50
|
+
def lex_state= o
|
51
|
+
return if @lex_state == o
|
52
|
+
raise ArgumentError, "bad state: %p" % [o] unless State === o
|
53
|
+
|
54
|
+
warn "lex_state: %p -> %p" % [lex_state, o]
|
55
|
+
|
56
|
+
@lex_state = o
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
173
60
|
# :startdoc:
|
174
61
|
|
62
|
+
attr_accessor :lex_state unless $DEBUG
|
63
|
+
|
175
64
|
attr_accessor :lineno # we're bypassing oedipus' lineno handling.
|
176
65
|
attr_accessor :brace_nest
|
177
66
|
attr_accessor :cmdarg
|
@@ -209,7 +98,7 @@ class RubyLexer
|
|
209
98
|
end
|
210
99
|
|
211
100
|
def arg_ambiguous
|
212
|
-
self.warning
|
101
|
+
self.warning "Ambiguous first argument. make sure."
|
213
102
|
end
|
214
103
|
|
215
104
|
def arg_state
|
@@ -219,7 +108,12 @@ class RubyLexer
|
|
219
108
|
def beginning_of_line?
|
220
109
|
ss.bol?
|
221
110
|
end
|
222
|
-
|
111
|
+
|
112
|
+
alias bol? beginning_of_line? # to make .rex file more readable
|
113
|
+
|
114
|
+
def check re
|
115
|
+
ss.check re
|
116
|
+
end
|
223
117
|
|
224
118
|
def comments # TODO: remove this... maybe comment_string + attr_accessor
|
225
119
|
c = @comments.join
|
@@ -227,6 +121,12 @@ class RubyLexer
|
|
227
121
|
c
|
228
122
|
end
|
229
123
|
|
124
|
+
def eat_whitespace
|
125
|
+
r = scan(/\s+/)
|
126
|
+
self.extra_lineno += r.count("\n") if r
|
127
|
+
r
|
128
|
+
end
|
129
|
+
|
230
130
|
def end_of_stream?
|
231
131
|
ss.eos?
|
232
132
|
end
|
@@ -245,12 +145,17 @@ class RubyLexer
|
|
245
145
|
result EXPR_BEG, token, text
|
246
146
|
end
|
247
147
|
|
148
|
+
def fixup_lineno extra = 0
|
149
|
+
self.lineno += self.extra_lineno + extra
|
150
|
+
self.extra_lineno = 0
|
151
|
+
end
|
152
|
+
|
248
153
|
def heredoc here # TODO: rewrite / remove
|
249
154
|
_, eos, func, last_line = here
|
250
155
|
|
251
|
-
indent =
|
252
|
-
|
253
|
-
|
156
|
+
indent = func =~ STR_FUNC_INDENT ? "[ \t]*" : nil
|
157
|
+
expand = func =~ STR_FUNC_EXPAND
|
158
|
+
eol = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n"
|
254
159
|
eos_re = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
|
255
160
|
err_msg = "can't match #{eos_re.inspect} anywhere in "
|
256
161
|
|
@@ -259,30 +164,35 @@ class RubyLexer
|
|
259
164
|
if beginning_of_line? && scan(eos_re) then
|
260
165
|
self.lineno += 1
|
261
166
|
ss.unread_many last_line # TODO: figure out how to remove this
|
262
|
-
return :tSTRING_END, eos
|
167
|
+
return :tSTRING_END, [eos, func] # TODO: calculate squiggle width at lex?
|
263
168
|
end
|
264
169
|
|
265
170
|
self.string_buffer = []
|
266
171
|
|
267
172
|
if expand then
|
268
173
|
case
|
269
|
-
when scan(/#[
|
270
|
-
|
174
|
+
when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
|
175
|
+
# TODO: !ISASCII
|
176
|
+
# ?! see parser_peek_variable_name
|
177
|
+
return :tSTRING_DVAR, matched
|
178
|
+
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
179
|
+
# TODO: !ISASCII
|
271
180
|
return :tSTRING_DVAR, matched
|
272
181
|
when scan(/#[{]/) then
|
182
|
+
self.command_start = true
|
273
183
|
return :tSTRING_DBEG, matched
|
274
184
|
when scan(/#/) then
|
275
|
-
string_buffer <<
|
185
|
+
string_buffer << "#"
|
276
186
|
end
|
277
187
|
|
278
188
|
begin
|
279
|
-
c = tokadd_string func,
|
189
|
+
c = tokadd_string func, eol, nil
|
280
190
|
|
281
191
|
rb_compile_error err_msg if
|
282
192
|
c == RubyLexer::EOF
|
283
193
|
|
284
|
-
if c !=
|
285
|
-
return :tSTRING_CONTENT, string_buffer.join
|
194
|
+
if c != eol then
|
195
|
+
return :tSTRING_CONTENT, string_buffer.join
|
286
196
|
else
|
287
197
|
string_buffer << scan(/\n/)
|
288
198
|
end
|
@@ -300,67 +210,24 @@ class RubyLexer
|
|
300
210
|
|
301
211
|
string_content = begin
|
302
212
|
s = string_buffer.join
|
303
|
-
s.
|
304
|
-
rescue ArgumentError
|
305
|
-
s.b.delete("\r").force_encoding Encoding::UTF_8
|
213
|
+
s.b.force_encoding Encoding::UTF_8
|
306
214
|
end
|
307
215
|
|
308
|
-
string_content = heredoc_dedent(string_content) if content_indent && ruby23plus?
|
309
|
-
|
310
216
|
return :tSTRING_CONTENT, string_content
|
311
217
|
end
|
312
218
|
|
313
|
-
def heredoc_dedent(string_content)
|
314
|
-
width = string_content.scan(/^[ \t]*(?=\S)/).map do |whitespace|
|
315
|
-
heredoc_whitespace_indent_size whitespace
|
316
|
-
end.min || 0
|
317
|
-
|
318
|
-
string_content.split("\n", -1).map do |line|
|
319
|
-
dedent_string line, width
|
320
|
-
end.join "\n"
|
321
|
-
end
|
322
|
-
|
323
|
-
def dedent_string(string, width)
|
324
|
-
characters_skipped = 0
|
325
|
-
indentation_skipped = 0
|
326
|
-
|
327
|
-
string.chars.each do |char|
|
328
|
-
break if indentation_skipped >= width
|
329
|
-
if char == ' '
|
330
|
-
characters_skipped += 1
|
331
|
-
indentation_skipped += 1
|
332
|
-
elsif char == "\t"
|
333
|
-
proposed = TAB_WIDTH * (indentation_skipped / TAB_WIDTH + 1)
|
334
|
-
break if (proposed > width)
|
335
|
-
characters_skipped += 1
|
336
|
-
indentation_skipped = proposed
|
337
|
-
end
|
338
|
-
end
|
339
|
-
string[characters_skipped..-1]
|
340
|
-
end
|
341
|
-
|
342
|
-
def heredoc_whitespace_indent_size(whitespace)
|
343
|
-
whitespace.chars.inject 0 do |size, char|
|
344
|
-
if char == "\t"
|
345
|
-
size + TAB_WIDTH
|
346
|
-
else
|
347
|
-
size + 1
|
348
|
-
end
|
349
|
-
end
|
350
|
-
end
|
351
|
-
|
352
219
|
def heredoc_identifier # TODO: remove / rewrite
|
353
220
|
term, func = nil, STR_FUNC_BORING
|
354
221
|
self.string_buffer = []
|
355
222
|
|
356
|
-
heredoc_indent_mods =
|
223
|
+
heredoc_indent_mods = "-"
|
357
224
|
heredoc_indent_mods += '\~' if ruby23plus?
|
358
225
|
|
359
226
|
case
|
360
227
|
when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
|
361
228
|
term = ss[2]
|
362
|
-
func |= STR_FUNC_INDENT unless ss[1].empty?
|
363
|
-
func |= STR_FUNC_ICNTNT if ss[1] ==
|
229
|
+
func |= STR_FUNC_INDENT unless ss[1].empty? # TODO: this seems wrong
|
230
|
+
func |= STR_FUNC_ICNTNT if ss[1] == "~"
|
364
231
|
func |= case term
|
365
232
|
when "\'" then
|
366
233
|
STR_SQUOTE
|
@@ -377,7 +244,7 @@ class RubyLexer
|
|
377
244
|
func |= STR_DQUOTE
|
378
245
|
unless ss[1].empty? then
|
379
246
|
func |= STR_FUNC_INDENT
|
380
|
-
func |= STR_FUNC_ICNTNT if ss[1] ==
|
247
|
+
func |= STR_FUNC_ICNTNT if ss[1] == "~"
|
381
248
|
end
|
382
249
|
string_buffer << ss[2]
|
383
250
|
else
|
@@ -393,7 +260,7 @@ class RubyLexer
|
|
393
260
|
|
394
261
|
self.lex_strterm = [:heredoc, string_buffer.join, func, line]
|
395
262
|
|
396
|
-
if term ==
|
263
|
+
if term == "`" then
|
397
264
|
result nil, :tXSTRING_BEG, "`"
|
398
265
|
else
|
399
266
|
result nil, :tSTRING_BEG, "\""
|
@@ -404,26 +271,26 @@ class RubyLexer
|
|
404
271
|
lex_state =~ EXPR_FNAME
|
405
272
|
end
|
406
273
|
|
407
|
-
def is_after_operator?
|
408
|
-
lex_state =~ EXPR_FNAME|EXPR_DOT
|
409
|
-
end
|
410
|
-
|
411
274
|
def int_with_base base
|
412
275
|
rb_compile_error "Invalid numeric format" if matched =~ /__/
|
413
276
|
|
414
277
|
text = matched
|
415
278
|
case
|
416
|
-
when text.end_with?(
|
279
|
+
when text.end_with?("ri")
|
417
280
|
return result(EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base))))
|
418
|
-
when text.end_with?(
|
281
|
+
when text.end_with?("r")
|
419
282
|
return result(EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base)))
|
420
|
-
when text.end_with?(
|
283
|
+
when text.end_with?("i")
|
421
284
|
return result(EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base)))
|
422
285
|
else
|
423
286
|
return result(EXPR_NUM, :tINTEGER, text.to_i(base))
|
424
287
|
end
|
425
288
|
end
|
426
289
|
|
290
|
+
def is_after_operator?
|
291
|
+
lex_state =~ EXPR_FNAME|EXPR_DOT
|
292
|
+
end
|
293
|
+
|
427
294
|
def is_arg?
|
428
295
|
lex_state =~ EXPR_ARG_ANY
|
429
296
|
end
|
@@ -436,15 +303,6 @@ class RubyLexer
|
|
436
303
|
lex_state =~ EXPR_END_ANY
|
437
304
|
end
|
438
305
|
|
439
|
-
def lvar_defined? id
|
440
|
-
# TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
|
441
|
-
self.parser.env[id.to_sym] == :lvar
|
442
|
-
end
|
443
|
-
|
444
|
-
def ruby22_label?
|
445
|
-
ruby22plus? and is_label_possible?
|
446
|
-
end
|
447
|
-
|
448
306
|
def is_label_possible?
|
449
307
|
(lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
|
450
308
|
end
|
@@ -461,6 +319,16 @@ class RubyLexer
|
|
461
319
|
lpar_beg && lpar_beg == paren_nest
|
462
320
|
end
|
463
321
|
|
322
|
+
def is_local_id id
|
323
|
+
# maybe just make this false for now
|
324
|
+
self.parser.env[id.to_sym] == :lvar # HACK: this isn't remotely right
|
325
|
+
end
|
326
|
+
|
327
|
+
def lvar_defined? id
|
328
|
+
# TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
|
329
|
+
self.parser.env[id.to_sym] == :lvar
|
330
|
+
end
|
331
|
+
|
464
332
|
def matched
|
465
333
|
ss.matched
|
466
334
|
end
|
@@ -469,6 +337,134 @@ class RubyLexer
|
|
469
337
|
not is_end?
|
470
338
|
end
|
471
339
|
|
340
|
+
def parse_quote # TODO: remove / rewrite
|
341
|
+
beg, nnd, short_hand, c = nil, nil, false, nil
|
342
|
+
|
343
|
+
if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
|
344
|
+
rb_compile_error "unknown type of %string" if ss.matched_size == 2
|
345
|
+
c, beg, short_hand = matched, getch, false
|
346
|
+
else # Short-hand (e.g. %{, %., %!, etc)
|
347
|
+
c, beg, short_hand = "Q", getch, true
|
348
|
+
end
|
349
|
+
|
350
|
+
if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
|
351
|
+
rb_compile_error "unterminated quoted string meets end of file"
|
352
|
+
end
|
353
|
+
|
354
|
+
# Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
|
355
|
+
nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
|
356
|
+
nnd, beg = beg, "\0" if nnd.nil?
|
357
|
+
|
358
|
+
token_type, text = nil, "%#{c}#{beg}"
|
359
|
+
token_type, string_type = case c
|
360
|
+
when "Q" then
|
361
|
+
ch = short_hand ? nnd : c + beg
|
362
|
+
text = "%#{ch}"
|
363
|
+
[:tSTRING_BEG, STR_DQUOTE]
|
364
|
+
when "q" then
|
365
|
+
[:tSTRING_BEG, STR_SQUOTE]
|
366
|
+
when "W" then
|
367
|
+
eat_whitespace
|
368
|
+
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
369
|
+
when "w" then
|
370
|
+
eat_whitespace
|
371
|
+
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
372
|
+
when "x" then
|
373
|
+
[:tXSTRING_BEG, STR_XQUOTE]
|
374
|
+
when "r" then
|
375
|
+
[:tREGEXP_BEG, STR_REGEXP]
|
376
|
+
when "s" then
|
377
|
+
self.lex_state = EXPR_FNAME
|
378
|
+
[:tSYMBEG, STR_SSYM]
|
379
|
+
when "I" then
|
380
|
+
eat_whitespace
|
381
|
+
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
382
|
+
when "i" then
|
383
|
+
eat_whitespace
|
384
|
+
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
385
|
+
end
|
386
|
+
|
387
|
+
rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
|
388
|
+
token_type.nil?
|
389
|
+
|
390
|
+
raise "huh" unless string_type
|
391
|
+
|
392
|
+
string string_type, nnd, beg
|
393
|
+
|
394
|
+
return token_type, text
|
395
|
+
end
|
396
|
+
|
397
|
+
def parse_string quote # TODO: rewrite / remove
|
398
|
+
_, string_type, term, open = quote
|
399
|
+
|
400
|
+
space = false # FIX: remove these
|
401
|
+
func = string_type
|
402
|
+
paren = open
|
403
|
+
term_re = @@regexp_cache[term]
|
404
|
+
|
405
|
+
qwords = func =~ STR_FUNC_QWORDS
|
406
|
+
regexp = func =~ STR_FUNC_REGEXP
|
407
|
+
expand = func =~ STR_FUNC_EXPAND
|
408
|
+
|
409
|
+
unless func then # nil'ed from qwords below. *sigh*
|
410
|
+
return :tSTRING_END, nil
|
411
|
+
end
|
412
|
+
|
413
|
+
space = true if qwords and eat_whitespace
|
414
|
+
|
415
|
+
if self.string_nest == 0 && scan(/#{term_re}/) then
|
416
|
+
if qwords then
|
417
|
+
quote[1] = nil
|
418
|
+
return :tSPACE, nil
|
419
|
+
elsif regexp then
|
420
|
+
return :tREGEXP_END, self.regx_options
|
421
|
+
else
|
422
|
+
return :tSTRING_END, term
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
return :tSPACE, nil if space
|
427
|
+
|
428
|
+
self.string_buffer = []
|
429
|
+
|
430
|
+
if expand
|
431
|
+
case
|
432
|
+
when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
|
433
|
+
# TODO: !ISASCII
|
434
|
+
# ?! see parser_peek_variable_name
|
435
|
+
return :tSTRING_DVAR, nil
|
436
|
+
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
437
|
+
# TODO: !ISASCII
|
438
|
+
return :tSTRING_DVAR, nil
|
439
|
+
when scan(/#[{]/) then
|
440
|
+
self.command_start = true
|
441
|
+
return :tSTRING_DBEG, nil
|
442
|
+
when scan(/#/) then
|
443
|
+
string_buffer << "#"
|
444
|
+
end
|
445
|
+
end
|
446
|
+
|
447
|
+
if tokadd_string(func, term, paren) == RubyLexer::EOF then
|
448
|
+
if func =~ STR_FUNC_REGEXP then
|
449
|
+
rb_compile_error "unterminated regexp meets end of file"
|
450
|
+
else
|
451
|
+
rb_compile_error "unterminated string meets end of file"
|
452
|
+
end
|
453
|
+
end
|
454
|
+
|
455
|
+
return :tSTRING_CONTENT, string_buffer.join
|
456
|
+
end
|
457
|
+
|
458
|
+
def possibly_escape_string text, check
|
459
|
+
content = match[1]
|
460
|
+
|
461
|
+
if text =~ check then
|
462
|
+
content.gsub(ESC) { unescape $1 }
|
463
|
+
else
|
464
|
+
content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
|
465
|
+
end
|
466
|
+
end
|
467
|
+
|
472
468
|
def process_amper text
|
473
469
|
token = if is_arg? && space_seen && !check(/\s/) then
|
474
470
|
warning("`&' interpreted as argument prefix")
|
@@ -503,20 +499,23 @@ class RubyLexer
|
|
503
499
|
end
|
504
500
|
|
505
501
|
def process_brace_close text
|
506
|
-
# matching compare/parse23.y:8561
|
507
|
-
cond.lexpop
|
508
|
-
cmdarg.lexpop
|
509
|
-
|
510
502
|
case matched
|
511
503
|
when "}" then
|
512
504
|
self.brace_nest -= 1
|
513
|
-
self.lex_state = EXPR_ENDARG # TODO: EXPR_END ? Look at 2.6
|
514
|
-
|
515
505
|
return :tSTRING_DEND, matched if brace_nest < 0
|
506
|
+
end
|
507
|
+
|
508
|
+
# matching compare/parse26.y:8099
|
509
|
+
cond.pop
|
510
|
+
cmdarg.pop
|
511
|
+
|
512
|
+
case matched
|
513
|
+
when "}" then
|
514
|
+
self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
|
516
515
|
return :tRCURLY, matched
|
517
516
|
when "]" then
|
518
517
|
self.paren_nest -= 1
|
519
|
-
self.lex_state = EXPR_ENDARG
|
518
|
+
self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
|
520
519
|
return :tRBRACK, matched
|
521
520
|
when ")" then
|
522
521
|
self.paren_nest -= 1
|
@@ -527,30 +526,6 @@ class RubyLexer
|
|
527
526
|
end
|
528
527
|
end
|
529
528
|
|
530
|
-
def process_colon1 text
|
531
|
-
# ?: / then / when
|
532
|
-
if is_end? || check(/\s/) then
|
533
|
-
return result EXPR_BEG, :tCOLON, text
|
534
|
-
end
|
535
|
-
|
536
|
-
case
|
537
|
-
when scan(/\'/) then
|
538
|
-
string STR_SSYM
|
539
|
-
when scan(/\"/) then
|
540
|
-
string STR_DSYM
|
541
|
-
end
|
542
|
-
|
543
|
-
result EXPR_FNAME, :tSYMBEG, text
|
544
|
-
end
|
545
|
-
|
546
|
-
def process_colon2 text
|
547
|
-
if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
|
548
|
-
result EXPR_BEG, :tCOLON3, text
|
549
|
-
else
|
550
|
-
result EXPR_DOT, :tCOLON2, text
|
551
|
-
end
|
552
|
-
end
|
553
|
-
|
554
529
|
def process_brace_open text
|
555
530
|
# matching compare/parse23.y:8694
|
556
531
|
self.brace_nest += 1
|
@@ -566,30 +541,54 @@ class RubyLexer
|
|
566
541
|
when lex_state =~ EXPR_LABELED then
|
567
542
|
:tLBRACE # hash
|
568
543
|
when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
|
569
|
-
:tLCURLY # block (primary)
|
544
|
+
:tLCURLY # block (primary) "{" in parse.y
|
570
545
|
when lex_state =~ EXPR_ENDARG then
|
571
546
|
:tLBRACE_ARG # block (expr)
|
572
547
|
else
|
573
548
|
:tLBRACE # hash
|
574
549
|
end
|
575
550
|
|
576
|
-
state = token == :tLBRACE_ARG ? EXPR_BEG : EXPR_PAR
|
577
|
-
self.command_start = true if token != :tLBRACE
|
551
|
+
state = token == :tLBRACE_ARG ? EXPR_BEG : EXPR_PAR
|
552
|
+
self.command_start = true if token != :tLBRACE
|
553
|
+
|
554
|
+
cond.push false
|
555
|
+
cmdarg.push false
|
556
|
+
result state, token, text
|
557
|
+
end
|
558
|
+
|
559
|
+
def process_colon1 text
|
560
|
+
# ?: / then / when
|
561
|
+
if is_end? || check(/\s/) then
|
562
|
+
return result EXPR_BEG, :tCOLON, text
|
563
|
+
end
|
564
|
+
|
565
|
+
case
|
566
|
+
when scan(/\'/) then
|
567
|
+
string STR_SSYM
|
568
|
+
when scan(/\"/) then
|
569
|
+
string STR_DSYM
|
570
|
+
end
|
578
571
|
|
579
|
-
|
580
|
-
|
581
|
-
|
572
|
+
result EXPR_FNAME, :tSYMBEG, text
|
573
|
+
end
|
574
|
+
|
575
|
+
def process_colon2 text
|
576
|
+
if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
|
577
|
+
result EXPR_BEG, :tCOLON3, text
|
578
|
+
else
|
579
|
+
result EXPR_DOT, :tCOLON2, text
|
580
|
+
end
|
582
581
|
end
|
583
582
|
|
584
583
|
def process_float text
|
585
584
|
rb_compile_error "Invalid numeric format" if text =~ /__/
|
586
585
|
|
587
586
|
case
|
588
|
-
when text.end_with?(
|
587
|
+
when text.end_with?("ri")
|
589
588
|
return result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
|
590
|
-
when text.end_with?(
|
589
|
+
when text.end_with?("i")
|
591
590
|
return result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
|
592
|
-
when text.end_with?(
|
591
|
+
when text.end_with?("r")
|
593
592
|
return result EXPR_NUM, :tRATIONAL, Rational(text.chop)
|
594
593
|
else
|
595
594
|
return result EXPR_NUM, :tFLOAT, text.to_f
|
@@ -612,6 +611,24 @@ class RubyLexer
|
|
612
611
|
result EXPR_END, tok_id, text
|
613
612
|
end
|
614
613
|
|
614
|
+
def process_label text
|
615
|
+
symbol = possibly_escape_string text, /^\"/
|
616
|
+
|
617
|
+
result EXPR_LAB, :tLABEL, [symbol, self.lineno]
|
618
|
+
end
|
619
|
+
|
620
|
+
def process_label_or_string text
|
621
|
+
if @was_label && text =~ /:\Z/ then
|
622
|
+
@was_label = nil
|
623
|
+
return process_label text
|
624
|
+
elsif text =~ /:\Z/ then
|
625
|
+
ss.pos -= 1 # put back ":"
|
626
|
+
text = text[0..-2]
|
627
|
+
end
|
628
|
+
|
629
|
+
result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
|
630
|
+
end
|
631
|
+
|
615
632
|
def process_lchevron text
|
616
633
|
if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
|
617
634
|
!is_end? &&
|
@@ -634,14 +651,14 @@ class RubyLexer
|
|
634
651
|
c = matched
|
635
652
|
hit = false
|
636
653
|
|
637
|
-
if c ==
|
654
|
+
if c == "#" then
|
638
655
|
ss.pos -= 1
|
639
656
|
|
640
657
|
# TODO: handle magic comments
|
641
658
|
while scan(/\s*\#.*(\n+|\z)/) do
|
642
659
|
hit = true
|
643
660
|
self.lineno += matched.lines.to_a.size
|
644
|
-
@comments << matched.gsub(/^ +#/,
|
661
|
+
@comments << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
|
645
662
|
end
|
646
663
|
|
647
664
|
return nil if end_of_stream?
|
@@ -697,7 +714,7 @@ class RubyLexer
|
|
697
714
|
# "an argument list, not a decomposed argument")
|
698
715
|
:tLPAREN2
|
699
716
|
else
|
700
|
-
:tLPAREN2 # plain
|
717
|
+
:tLPAREN2 # plain "(" in parse.y
|
701
718
|
end
|
702
719
|
|
703
720
|
self.paren_nest += 1
|
@@ -735,7 +752,7 @@ class RubyLexer
|
|
735
752
|
|
736
753
|
return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)
|
737
754
|
|
738
|
-
if
|
755
|
+
if is_beg? || (is_arg? && space_seen && !check(/\s/)) then
|
739
756
|
arg_ambiguous if is_arg?
|
740
757
|
|
741
758
|
if check(/\d/) then
|
@@ -760,12 +777,12 @@ class RubyLexer
|
|
760
777
|
|
761
778
|
if check(/\s|\v/) then
|
762
779
|
unless is_arg? then
|
763
|
-
c2 = { " " =>
|
764
|
-
"\n" =>
|
765
|
-
"\t" =>
|
766
|
-
"\v" =>
|
767
|
-
"\r" =>
|
768
|
-
"\f" =>
|
780
|
+
c2 = { " " => "s",
|
781
|
+
"\n" => "n",
|
782
|
+
"\t" => "t",
|
783
|
+
"\v" => "v",
|
784
|
+
"\r" => "r",
|
785
|
+
"\f" => "f" }[matched]
|
769
786
|
|
770
787
|
if c2 then
|
771
788
|
warning("invalid character syntax; use ?\\" + c2)
|
@@ -781,12 +798,22 @@ class RubyLexer
|
|
781
798
|
c = if scan(/\\/) then
|
782
799
|
self.read_escape
|
783
800
|
else
|
784
|
-
|
801
|
+
getch
|
785
802
|
end
|
786
803
|
|
787
804
|
result EXPR_END, :tSTRING, c
|
788
805
|
end
|
789
806
|
|
807
|
+
def process_simple_string text
|
808
|
+
replacement = text[1..-2].gsub(ESC) {
|
809
|
+
unescape($1).b.force_encoding Encoding::UTF_8
|
810
|
+
}
|
811
|
+
|
812
|
+
replacement = replacement.b unless replacement.valid_encoding?
|
813
|
+
|
814
|
+
result EXPR_END, :tSTRING, replacement
|
815
|
+
end
|
816
|
+
|
790
817
|
def process_slash text
|
791
818
|
if is_beg? then
|
792
819
|
string STR_REGEXP
|
@@ -838,43 +865,38 @@ class RubyLexer
|
|
838
865
|
result EXPR_PAR, token, text
|
839
866
|
end
|
840
867
|
|
841
|
-
def
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
end
|
849
|
-
end
|
850
|
-
|
851
|
-
def process_symbol text
|
852
|
-
symbol = possibly_escape_string text, /^:"/
|
868
|
+
def process_string # TODO: rewrite / remove
|
869
|
+
# matches top of parser_yylex in compare/parse23.y:8113
|
870
|
+
token = if lex_strterm[0] == :heredoc then
|
871
|
+
self.heredoc lex_strterm
|
872
|
+
else
|
873
|
+
self.parse_string lex_strterm
|
874
|
+
end
|
853
875
|
|
854
|
-
|
855
|
-
end
|
876
|
+
token_type, c = token
|
856
877
|
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
878
|
+
# matches parser_string_term from 2.3, but way off from 2.5
|
879
|
+
if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
|
880
|
+
if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
|
881
|
+
!cond.is_in_state) || is_arg?) &&
|
882
|
+
is_label_suffix? then
|
883
|
+
scan(/:/)
|
884
|
+
token_type = token[0] = :tLABEL_END
|
885
|
+
end
|
886
|
+
end
|
861
887
|
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
return process_label text
|
866
|
-
elsif text =~ /:\Z/ then
|
867
|
-
ss.pos -= 1 # put back ":"
|
868
|
-
text = text[0..-2]
|
888
|
+
if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
|
889
|
+
self.lex_strterm = nil
|
890
|
+
self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_LIT
|
869
891
|
end
|
870
892
|
|
871
|
-
|
893
|
+
return token
|
872
894
|
end
|
873
895
|
|
874
|
-
def
|
875
|
-
symbol = possibly_escape_string text,
|
896
|
+
def process_symbol text
|
897
|
+
symbol = possibly_escape_string text, /^:\"/ # stupid emacs
|
876
898
|
|
877
|
-
result
|
899
|
+
result EXPR_LIT, :tSYMBOL, symbol
|
878
900
|
end
|
879
901
|
|
880
902
|
def process_token text
|
@@ -902,6 +924,7 @@ class RubyLexer
|
|
902
924
|
|
903
925
|
if is_label_possible? and is_label_suffix? then
|
904
926
|
scan(/:/)
|
927
|
+
# TODO: propagate the lineno to ALL results
|
905
928
|
return result EXPR_LAB, :tLABEL, [token, self.lineno]
|
906
929
|
end
|
907
930
|
|
@@ -922,6 +945,8 @@ class RubyLexer
|
|
922
945
|
EXPR_END
|
923
946
|
end
|
924
947
|
|
948
|
+
tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
|
949
|
+
|
925
950
|
if last_state !~ EXPR_DOT|EXPR_FNAME and
|
926
951
|
(tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
|
927
952
|
lvar_defined?(token) then
|
@@ -945,18 +970,16 @@ class RubyLexer
|
|
945
970
|
self.command_start = true if lex_state =~ EXPR_BEG
|
946
971
|
|
947
972
|
case
|
948
|
-
when keyword.id0 == :kDO then
|
973
|
+
when keyword.id0 == :kDO then # parse26.y line 7591
|
949
974
|
case
|
950
975
|
when lambda_beginning? then
|
951
976
|
self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
|
952
|
-
self.paren_nest -= 1
|
977
|
+
self.paren_nest -= 1 # TODO: question this?
|
953
978
|
result lex_state, :kDO_LAMBDA, value
|
954
979
|
when cond.is_in_state then
|
955
980
|
result lex_state, :kDO_COND, value
|
956
981
|
when cmdarg.is_in_state && state != EXPR_CMDARG then
|
957
982
|
result lex_state, :kDO_BLOCK, value
|
958
|
-
when state =~ EXPR_BEG|EXPR_ENDARG then
|
959
|
-
result lex_state, :kDO_BLOCK, value
|
960
983
|
else
|
961
984
|
result lex_state, :kDO, value
|
962
985
|
end
|
@@ -973,9 +996,9 @@ class RubyLexer
|
|
973
996
|
ss.unscan # put back "_"
|
974
997
|
|
975
998
|
if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
|
976
|
-
|
977
|
-
elsif scan(
|
978
|
-
|
999
|
+
[RubyLexer::EOF, RubyLexer::EOF]
|
1000
|
+
elsif scan(/#{IDENT_CHAR}+/) then
|
1001
|
+
process_token matched
|
979
1002
|
end
|
980
1003
|
end
|
981
1004
|
|
@@ -1012,7 +1035,7 @@ class RubyLexer
|
|
1012
1035
|
when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
|
1013
1036
|
# TODO: force encode everything to UTF-8?
|
1014
1037
|
ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
|
1015
|
-
when check(/M
|
1038
|
+
when check(/M-\\./) then
|
1016
1039
|
scan(/M-\\/) # eat it
|
1017
1040
|
c = self.read_escape
|
1018
1041
|
c[0] = (c[0].ord | 0x80).chr
|
@@ -1026,6 +1049,11 @@ class RubyLexer
|
|
1026
1049
|
c = self.read_escape
|
1027
1050
|
c[0] = (c[0].ord & 0x9f).chr
|
1028
1051
|
c
|
1052
|
+
when check(/(C-|c)\\(?!u|\\)/) then
|
1053
|
+
scan(/(C-|c)\\/) # eat it
|
1054
|
+
c = read_escape
|
1055
|
+
c[0] = (c[0].ord & 0x9f).chr
|
1056
|
+
c
|
1029
1057
|
when scan(/C-\?|c\?/) then
|
1030
1058
|
127.chr
|
1031
1059
|
when scan(/(C-|c)(.)/) then
|
@@ -1034,17 +1062,25 @@ class RubyLexer
|
|
1034
1062
|
c
|
1035
1063
|
when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
|
1036
1064
|
matched
|
1037
|
-
when scan(/u(
|
1038
|
-
[ss[1].
|
1039
|
-
when scan(/u(
|
1065
|
+
when scan(/u(\h{4})/) then
|
1066
|
+
[ss[1].to_i(16)].pack("U")
|
1067
|
+
when scan(/u(\h{1,3})/) then
|
1040
1068
|
rb_compile_error "Invalid escape character syntax"
|
1069
|
+
when scan(/u\{(\h+(?:\s+\h+)*)\}/) then
|
1070
|
+
ss[1].split.map { |s| s.to_i(16) }.pack("U*")
|
1041
1071
|
when scan(/[McCx0-9]/) || end_of_stream? then
|
1042
1072
|
rb_compile_error("Invalid escape character syntax")
|
1043
1073
|
else
|
1044
|
-
|
1074
|
+
getch
|
1045
1075
|
end.dup
|
1046
1076
|
end
|
1047
1077
|
|
1078
|
+
def getch
|
1079
|
+
c = ss.getch
|
1080
|
+
c = ss.getch if c == "\r" && ss.peek(1) == "\n"
|
1081
|
+
c
|
1082
|
+
end
|
1083
|
+
|
1048
1084
|
def regx_options # TODO: rewrite / remove
|
1049
1085
|
good, bad = [], []
|
1050
1086
|
|
@@ -1084,23 +1120,24 @@ class RubyLexer
|
|
1084
1120
|
[token, text]
|
1085
1121
|
end
|
1086
1122
|
|
1087
|
-
def
|
1088
|
-
|
1123
|
+
def ruby22_label?
|
1124
|
+
ruby22plus? and is_label_possible?
|
1089
1125
|
end
|
1090
1126
|
|
1091
|
-
def
|
1092
|
-
|
1127
|
+
def ruby22plus?
|
1128
|
+
parser.class.version >= 22
|
1093
1129
|
end
|
1094
1130
|
|
1095
|
-
def
|
1096
|
-
|
1097
|
-
self.extra_lineno += r.count("\n") if r
|
1098
|
-
r
|
1131
|
+
def ruby23plus?
|
1132
|
+
parser.class.version >= 23
|
1099
1133
|
end
|
1100
1134
|
|
1101
|
-
def
|
1102
|
-
|
1103
|
-
|
1135
|
+
def ruby24minus?
|
1136
|
+
parser.class.version <= 24
|
1137
|
+
end
|
1138
|
+
|
1139
|
+
def scan re
|
1140
|
+
ss.scan re
|
1104
1141
|
end
|
1105
1142
|
|
1106
1143
|
def scanner_class # TODO: design this out of oedipus_lex. or something.
|
@@ -1123,12 +1160,6 @@ class RubyLexer
|
|
1123
1160
|
self.lex_strterm = [:strterm, type, beg, nnd]
|
1124
1161
|
end
|
1125
1162
|
|
1126
|
-
# TODO: consider
|
1127
|
-
# def src= src
|
1128
|
-
# raise "bad src: #{src.inspect}" unless String === src
|
1129
|
-
# @src = RPStringScanner.new(src)
|
1130
|
-
# end
|
1131
|
-
|
1132
1163
|
def tokadd_escape term # TODO: rewrite / remove
|
1133
1164
|
case
|
1134
1165
|
when scan(/\\\n/) then
|
@@ -1158,22 +1189,24 @@ class RubyLexer
|
|
1158
1189
|
end
|
1159
1190
|
|
1160
1191
|
def tokadd_string(func, term, paren) # TODO: rewrite / remove
|
1161
|
-
qwords =
|
1162
|
-
escape =
|
1163
|
-
expand =
|
1164
|
-
regexp =
|
1165
|
-
symbol =
|
1192
|
+
qwords = func =~ STR_FUNC_QWORDS
|
1193
|
+
escape = func =~ STR_FUNC_ESCAPE
|
1194
|
+
expand = func =~ STR_FUNC_EXPAND
|
1195
|
+
regexp = func =~ STR_FUNC_REGEXP
|
1196
|
+
symbol = func =~ STR_FUNC_SYMBOL
|
1166
1197
|
|
1167
1198
|
paren_re = @@regexp_cache[paren]
|
1168
|
-
term_re =
|
1199
|
+
term_re = if term == "\n"
|
1200
|
+
/#{Regexp.escape "\r"}?#{Regexp.escape "\n"}/
|
1201
|
+
else
|
1202
|
+
@@regexp_cache[term]
|
1203
|
+
end
|
1169
1204
|
|
1170
1205
|
until end_of_stream? do
|
1171
1206
|
c = nil
|
1172
1207
|
handled = true
|
1173
1208
|
|
1174
1209
|
case
|
1175
|
-
when paren_re && scan(paren_re) then
|
1176
|
-
self.string_nest += 1
|
1177
1210
|
when scan(term_re) then
|
1178
1211
|
if self.string_nest == 0 then
|
1179
1212
|
ss.pos -= 1
|
@@ -1181,6 +1214,8 @@ class RubyLexer
|
|
1181
1214
|
else
|
1182
1215
|
self.string_nest -= 1
|
1183
1216
|
end
|
1217
|
+
when paren_re && scan(paren_re) then
|
1218
|
+
self.string_nest += 1
|
1184
1219
|
when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
|
1185
1220
|
ss.pos -= 1
|
1186
1221
|
break
|
@@ -1195,7 +1230,7 @@ class RubyLexer
|
|
1195
1230
|
string_buffer << "\n"
|
1196
1231
|
next
|
1197
1232
|
when qwords && scan(/\\\s/) then
|
1198
|
-
c =
|
1233
|
+
c = " "
|
1199
1234
|
when expand && scan(/\\\n/) then
|
1200
1235
|
next
|
1201
1236
|
when regexp && check(/\\/) then
|
@@ -1220,12 +1255,16 @@ class RubyLexer
|
|
1220
1255
|
end # top case
|
1221
1256
|
|
1222
1257
|
unless handled then
|
1223
|
-
t =
|
1224
|
-
|
1258
|
+
t = if term == "\n"
|
1259
|
+
Regexp.escape "\r\n"
|
1260
|
+
else
|
1261
|
+
Regexp.escape term
|
1262
|
+
end
|
1263
|
+
x = Regexp.escape paren if paren && paren != "\000"
|
1225
1264
|
re = if qwords then
|
1226
|
-
/[^#{t}#{x}
|
1265
|
+
/[^#{t}#{x}\#\\\s]+|./ # |. to pick up whatever
|
1227
1266
|
else
|
1228
|
-
/[^#{t}#{x}
|
1267
|
+
/[^#{t}#{x}\#\\]+|./
|
1229
1268
|
end
|
1230
1269
|
|
1231
1270
|
scan re
|
@@ -1265,10 +1304,12 @@ class RubyLexer
|
|
1265
1304
|
s
|
1266
1305
|
when /^[McCx0-9]/ then
|
1267
1306
|
rb_compile_error("Invalid escape character syntax")
|
1268
|
-
when /u(
|
1307
|
+
when /u(\h{4})/ then
|
1269
1308
|
[$1.delete("{}").to_i(16)].pack("U")
|
1270
|
-
when /u(
|
1309
|
+
when /u(\h{1,3})/ then
|
1271
1310
|
rb_compile_error("Invalid escape character syntax")
|
1311
|
+
when /u\{(\h+(?:\s+\h+)*)\}/ then
|
1312
|
+
$1.split.map { |s| s.to_i(16) }.pack("U*")
|
1272
1313
|
else
|
1273
1314
|
s
|
1274
1315
|
end
|
@@ -1279,171 +1320,154 @@ class RubyLexer
|
|
1279
1320
|
# do nothing for now
|
1280
1321
|
end
|
1281
1322
|
|
1282
|
-
def
|
1283
|
-
|
1284
|
-
|
1285
|
-
|
1286
|
-
def ruby23plus?
|
1287
|
-
parser.class.version >= 23
|
1323
|
+
def was_label?
|
1324
|
+
@was_label = ruby22_label?
|
1325
|
+
true
|
1288
1326
|
end
|
1289
1327
|
|
1290
|
-
|
1291
|
-
|
1292
|
-
|
1293
|
-
self.heredoc lex_strterm
|
1294
|
-
else
|
1295
|
-
self.parse_string lex_strterm
|
1296
|
-
end
|
1328
|
+
class State
|
1329
|
+
attr_accessor :n
|
1330
|
+
attr_accessor :names
|
1297
1331
|
|
1298
|
-
|
1332
|
+
# TODO: take a shared hash of strings for inspect/to_s
|
1333
|
+
def initialize o, names
|
1334
|
+
raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
|
1299
1335
|
|
1300
|
-
|
1301
|
-
|
1302
|
-
if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
|
1303
|
-
!cond.is_in_state) || is_arg?) &&
|
1304
|
-
is_label_suffix? then
|
1305
|
-
scan(/:/)
|
1306
|
-
token_type = token[0] = :tLABEL_END
|
1307
|
-
end
|
1336
|
+
self.n = o
|
1337
|
+
self.names = names
|
1308
1338
|
end
|
1309
1339
|
|
1310
|
-
|
1311
|
-
self.
|
1312
|
-
self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_END
|
1340
|
+
def == o
|
1341
|
+
self.equal?(o) || (o.class == self.class && o.n == self.n)
|
1313
1342
|
end
|
1314
1343
|
|
1315
|
-
|
1316
|
-
|
1317
|
-
|
1318
|
-
def parse_quote # TODO: remove / rewrite
|
1319
|
-
beg, nnd, short_hand, c = nil, nil, false, nil
|
1320
|
-
|
1321
|
-
if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
|
1322
|
-
rb_compile_error "unknown type of %string" if ss.matched_size == 2
|
1323
|
-
c, beg, short_hand = matched, ss.getch, false
|
1324
|
-
else # Short-hand (e.g. %{, %., %!, etc)
|
1325
|
-
c, beg, short_hand = 'Q', ss.getch, true
|
1344
|
+
def =~ v
|
1345
|
+
(self.n & v.n) != 0
|
1326
1346
|
end
|
1327
1347
|
|
1328
|
-
|
1329
|
-
|
1348
|
+
def | v
|
1349
|
+
raise ArgumentError, "Incompatible State: %p vs %p" % [self, v] unless
|
1350
|
+
self.names == v.names
|
1351
|
+
self.class.new(self.n | v.n, self.names)
|
1330
1352
|
end
|
1331
1353
|
|
1332
|
-
|
1333
|
-
|
1334
|
-
nnd, beg = beg, "\0" if nnd.nil?
|
1335
|
-
|
1336
|
-
token_type, text = nil, "%#{c}#{beg}"
|
1337
|
-
token_type, string_type = case c
|
1338
|
-
when 'Q' then
|
1339
|
-
ch = short_hand ? nnd : c + beg
|
1340
|
-
text = "%#{ch}"
|
1341
|
-
[:tSTRING_BEG, STR_DQUOTE]
|
1342
|
-
when 'q' then
|
1343
|
-
[:tSTRING_BEG, STR_SQUOTE]
|
1344
|
-
when 'W' then
|
1345
|
-
eat_whitespace
|
1346
|
-
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
1347
|
-
when 'w' then
|
1348
|
-
eat_whitespace
|
1349
|
-
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
1350
|
-
when 'x' then
|
1351
|
-
[:tXSTRING_BEG, STR_XQUOTE]
|
1352
|
-
when 'r' then
|
1353
|
-
[:tREGEXP_BEG, STR_REGEXP]
|
1354
|
-
when 's' then
|
1355
|
-
self.lex_state = EXPR_FNAME
|
1356
|
-
[:tSYMBEG, STR_SSYM]
|
1357
|
-
when 'I' then
|
1358
|
-
eat_whitespace
|
1359
|
-
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
1360
|
-
when 'i' then
|
1361
|
-
eat_whitespace
|
1362
|
-
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
1363
|
-
end
|
1364
|
-
|
1365
|
-
rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
|
1366
|
-
token_type.nil?
|
1367
|
-
|
1368
|
-
raise "huh" unless string_type
|
1369
|
-
|
1370
|
-
string string_type, nnd, beg
|
1371
|
-
|
1372
|
-
return token_type, text
|
1373
|
-
end
|
1374
|
-
|
1375
|
-
def parse_string quote # TODO: rewrite / remove
|
1376
|
-
_, string_type, term, open = quote
|
1377
|
-
|
1378
|
-
space = false # FIX: remove these
|
1379
|
-
func = string_type
|
1380
|
-
paren = open
|
1381
|
-
term_re = @@regexp_cache[term]
|
1382
|
-
|
1383
|
-
qwords = (func & STR_FUNC_QWORDS) != 0
|
1384
|
-
regexp = (func & STR_FUNC_REGEXP) != 0
|
1385
|
-
expand = (func & STR_FUNC_EXPAND) != 0
|
1354
|
+
def inspect
|
1355
|
+
return "Value(0)" if n.zero? # HACK?
|
1386
1356
|
|
1387
|
-
|
1388
|
-
|
1357
|
+
names.map { |v, k| k if self =~ v }.
|
1358
|
+
compact.
|
1359
|
+
join("|").
|
1360
|
+
gsub(/(?:EXPR_|STR_(?:FUNC_)?)/, "")
|
1389
1361
|
end
|
1390
1362
|
|
1391
|
-
|
1363
|
+
alias to_s inspect
|
1392
1364
|
|
1393
|
-
|
1394
|
-
|
1395
|
-
|
1396
|
-
|
1397
|
-
|
1398
|
-
|
1399
|
-
|
1400
|
-
|
1401
|
-
|
1402
|
-
|
1365
|
+
module Values
|
1366
|
+
expr_names = {}
|
1367
|
+
|
1368
|
+
EXPR_NONE = State.new 0x0, expr_names
|
1369
|
+
EXPR_BEG = State.new 0x1, expr_names
|
1370
|
+
EXPR_END = State.new 0x2, expr_names
|
1371
|
+
EXPR_ENDARG = State.new 0x4, expr_names
|
1372
|
+
EXPR_ENDFN = State.new 0x8, expr_names
|
1373
|
+
EXPR_ARG = State.new 0x10, expr_names
|
1374
|
+
EXPR_CMDARG = State.new 0x20, expr_names
|
1375
|
+
EXPR_MID = State.new 0x40, expr_names
|
1376
|
+
EXPR_FNAME = State.new 0x80, expr_names
|
1377
|
+
EXPR_DOT = State.new 0x100, expr_names
|
1378
|
+
EXPR_CLASS = State.new 0x200, expr_names
|
1379
|
+
EXPR_LABEL = State.new 0x400, expr_names
|
1380
|
+
EXPR_LABELED = State.new 0x800, expr_names
|
1381
|
+
EXPR_FITEM = State.new 0x1000, expr_names
|
1403
1382
|
|
1404
|
-
|
1383
|
+
EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
|
1384
|
+
EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
|
1385
|
+
EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
|
1405
1386
|
|
1406
|
-
|
1387
|
+
# extra fake lex_state names to make things a bit cleaner
|
1407
1388
|
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1412
|
-
# ?! see parser_peek_variable_name
|
1413
|
-
return :tSTRING_DVAR, nil
|
1414
|
-
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
1415
|
-
# TODO: !ISASCII
|
1416
|
-
return :tSTRING_DVAR, nil
|
1417
|
-
when scan(/#[{]/) then
|
1418
|
-
self.command_start = true
|
1419
|
-
return :tSTRING_DBEG, nil
|
1420
|
-
when scan(/#/) then
|
1421
|
-
string_buffer << '#'
|
1422
|
-
end
|
1423
|
-
end
|
1389
|
+
EXPR_LAB = EXPR_ARG|EXPR_LABELED
|
1390
|
+
EXPR_LIT = EXPR_END|EXPR_ENDARG
|
1391
|
+
EXPR_PAR = EXPR_BEG|EXPR_LABEL
|
1392
|
+
EXPR_PAD = EXPR_BEG|EXPR_LABELED
|
1424
1393
|
|
1425
|
-
|
1426
|
-
|
1394
|
+
EXPR_NUM = EXPR_LIT
|
1395
|
+
|
1396
|
+
expr_names.merge!(EXPR_NONE => "EXPR_NONE",
|
1397
|
+
EXPR_BEG => "EXPR_BEG",
|
1398
|
+
EXPR_END => "EXPR_END",
|
1399
|
+
EXPR_ENDARG => "EXPR_ENDARG",
|
1400
|
+
EXPR_ENDFN => "EXPR_ENDFN",
|
1401
|
+
EXPR_ARG => "EXPR_ARG",
|
1402
|
+
EXPR_CMDARG => "EXPR_CMDARG",
|
1403
|
+
EXPR_MID => "EXPR_MID",
|
1404
|
+
EXPR_FNAME => "EXPR_FNAME",
|
1405
|
+
EXPR_DOT => "EXPR_DOT",
|
1406
|
+
EXPR_CLASS => "EXPR_CLASS",
|
1407
|
+
EXPR_LABEL => "EXPR_LABEL",
|
1408
|
+
EXPR_LABELED => "EXPR_LABELED",
|
1409
|
+
EXPR_FITEM => "EXPR_FITEM")
|
1410
|
+
|
1411
|
+
# ruby constants for strings
|
1412
|
+
|
1413
|
+
str_func_names = {}
|
1414
|
+
|
1415
|
+
STR_FUNC_BORING = State.new 0x00, str_func_names
|
1416
|
+
STR_FUNC_ESCAPE = State.new 0x01, str_func_names
|
1417
|
+
STR_FUNC_EXPAND = State.new 0x02, str_func_names
|
1418
|
+
STR_FUNC_REGEXP = State.new 0x04, str_func_names
|
1419
|
+
STR_FUNC_QWORDS = State.new 0x08, str_func_names
|
1420
|
+
STR_FUNC_SYMBOL = State.new 0x10, str_func_names
|
1421
|
+
STR_FUNC_INDENT = State.new 0x20, str_func_names # <<-HEREDOC
|
1422
|
+
STR_FUNC_LABEL = State.new 0x40, str_func_names
|
1423
|
+
STR_FUNC_LIST = State.new 0x4000, str_func_names
|
1424
|
+
STR_FUNC_TERM = State.new 0x8000, str_func_names
|
1425
|
+
STR_FUNC_ICNTNT = State.new 0x10000, str_func_names # <<~HEREDOC -- TODO: remove?
|
1426
|
+
|
1427
|
+
# TODO: check parser25.y on how they do STR_FUNC_INDENT
|
1428
|
+
|
1429
|
+
STR_SQUOTE = STR_FUNC_BORING
|
1430
|
+
STR_DQUOTE = STR_FUNC_EXPAND
|
1431
|
+
STR_XQUOTE = STR_FUNC_EXPAND
|
1432
|
+
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
|
1433
|
+
STR_SWORD = STR_FUNC_QWORDS | STR_FUNC_LIST
|
1434
|
+
STR_DWORD = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
|
1435
|
+
STR_SSYM = STR_FUNC_SYMBOL
|
1436
|
+
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
1437
|
+
|
1438
|
+
str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
|
1439
|
+
STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
|
1440
|
+
STR_FUNC_REGEXP => "STR_FUNC_REGEXP",
|
1441
|
+
STR_FUNC_QWORDS => "STR_FUNC_QWORDS",
|
1442
|
+
STR_FUNC_SYMBOL => "STR_FUNC_SYMBOL",
|
1443
|
+
STR_FUNC_INDENT => "STR_FUNC_INDENT",
|
1444
|
+
STR_FUNC_LABEL => "STR_FUNC_LABEL",
|
1445
|
+
STR_FUNC_LIST => "STR_FUNC_LIST",
|
1446
|
+
STR_FUNC_TERM => "STR_FUNC_TERM",
|
1447
|
+
STR_FUNC_ICNTNT => "STR_FUNC_ICNTNT",
|
1448
|
+
STR_SQUOTE => "STR_SQUOTE")
|
1427
1449
|
end
|
1428
1450
|
|
1429
|
-
|
1451
|
+
include Values
|
1430
1452
|
end
|
1453
|
+
|
1454
|
+
include State::Values
|
1431
1455
|
end
|
1432
1456
|
|
1433
1457
|
require "ruby_lexer.rex"
|
1434
1458
|
|
1435
1459
|
if ENV["RP_LINENO_DEBUG"] then
|
1436
1460
|
class RubyLexer
|
1437
|
-
alias :old_lineno= :lineno=
|
1438
|
-
|
1439
1461
|
def d o
|
1440
1462
|
$stderr.puts o.inspect
|
1441
1463
|
end
|
1442
1464
|
|
1465
|
+
alias old_lineno= lineno=
|
1466
|
+
|
1443
1467
|
def lineno= n
|
1444
1468
|
self.old_lineno= n
|
1445
1469
|
where = caller.first.split(/:/).first(2).join(":")
|
1446
|
-
d :lineno => [n, where, ss && ss.rest[0,40]]
|
1470
|
+
d :lineno => [n, where, ss && ss.rest[0, 40]]
|
1447
1471
|
end
|
1448
1472
|
end
|
1449
1473
|
end
|