ruby_parser 3.13.0 → 3.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/.autotest +18 -29
- data/History.rdoc +123 -0
- data/Manifest.txt +2 -0
- data/README.rdoc +3 -3
- data/Rakefile +41 -24
- data/bin/ruby_parse +3 -1
- data/bin/ruby_parse_extract_error +18 -35
- data/compare/normalize.rb +43 -3
- data/debugging.md +39 -0
- data/lib/rp_extensions.rb +1 -1
- data/lib/ruby20_parser.rb +3654 -3466
- data/lib/ruby20_parser.y +504 -327
- data/lib/ruby21_parser.rb +3643 -3455
- data/lib/ruby21_parser.y +512 -334
- data/lib/ruby22_parser.rb +3669 -3492
- data/lib/ruby22_parser.y +513 -335
- data/lib/ruby23_parser.rb +3692 -3499
- data/lib/ruby23_parser.y +513 -335
- data/lib/ruby24_parser.rb +3685 -3463
- data/lib/ruby24_parser.y +517 -331
- data/lib/ruby25_parser.rb +3685 -3462
- data/lib/ruby25_parser.y +517 -331
- data/lib/ruby26_parser.rb +3696 -3471
- data/lib/ruby26_parser.y +523 -335
- data/lib/ruby27_parser.rb +7224 -0
- data/lib/ruby27_parser.y +2657 -0
- data/lib/ruby_lexer.rb +611 -495
- data/lib/ruby_lexer.rex +27 -28
- data/lib/ruby_lexer.rex.rb +71 -31
- data/lib/ruby_parser.rb +31 -27
- data/lib/ruby_parser.yy +529 -336
- data/lib/ruby_parser_extras.rb +720 -449
- data/test/test_ruby_lexer.rb +1560 -1412
- data/test/test_ruby_parser.rb +2611 -1912
- data/test/test_ruby_parser_extras.rb +39 -4
- data/tools/munge.rb +12 -6
- data/tools/ripper.rb +19 -3
- metadata +25 -18
- metadata.gz.sig +4 -1
data/lib/ruby_lexer.rb
CHANGED
@@ -4,40 +4,9 @@
|
|
4
4
|
$DEBUG = true if ENV["DEBUG"]
|
5
5
|
|
6
6
|
class RubyLexer
|
7
|
-
|
8
7
|
# :stopdoc:
|
9
|
-
HAS_ENC = "".respond_to? :encoding
|
10
|
-
|
11
|
-
IDENT_CHAR = if HAS_ENC then
|
12
|
-
/[\w\u0080-\u{10ffff}]/u
|
13
|
-
else
|
14
|
-
/[\w\x80-\xFF]/n
|
15
|
-
end
|
16
|
-
|
17
8
|
EOF = :eof_haha!
|
18
9
|
|
19
|
-
# ruby constants for strings (should this be moved somewhere else?)
|
20
|
-
|
21
|
-
STR_FUNC_BORING = 0x00
|
22
|
-
STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
|
23
|
-
STR_FUNC_EXPAND = 0x02
|
24
|
-
STR_FUNC_REGEXP = 0x04
|
25
|
-
STR_FUNC_QWORDS = 0x08
|
26
|
-
STR_FUNC_SYMBOL = 0x10
|
27
|
-
STR_FUNC_INDENT = 0x20 # <<-HEREDOC
|
28
|
-
STR_FUNC_ICNTNT = 0x40 # <<~HEREDOC
|
29
|
-
|
30
|
-
STR_SQUOTE = STR_FUNC_BORING
|
31
|
-
STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
|
32
|
-
STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
|
33
|
-
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
|
34
|
-
STR_SSYM = STR_FUNC_SYMBOL
|
35
|
-
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
36
|
-
|
37
|
-
EXPR_BEG_ANY = [:expr_beg, :expr_mid, :expr_class ]
|
38
|
-
EXPR_ARG_ANY = [:expr_arg, :expr_cmdarg, ]
|
39
|
-
EXPR_END_ANY = [:expr_end, :expr_endarg, :expr_endfn]
|
40
|
-
|
41
10
|
ESCAPES = {
|
42
11
|
"a" => "\007",
|
43
12
|
"b" => "\010",
|
@@ -54,6 +23,8 @@ class RubyLexer
|
|
54
23
|
"c\?" => 127.chr,
|
55
24
|
}
|
56
25
|
|
26
|
+
HAS_ENC = "".respond_to? :encoding
|
27
|
+
|
57
28
|
TOKENS = {
|
58
29
|
"!" => :tBANG,
|
59
30
|
"!=" => :tNEQ,
|
@@ -70,13 +41,26 @@ class RubyLexer
|
|
70
41
|
"->" => :tLAMBDA,
|
71
42
|
}
|
72
43
|
|
73
|
-
|
74
|
-
|
75
|
-
@@regexp_cache = Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }
|
44
|
+
@@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
|
76
45
|
@@regexp_cache[nil] = nil
|
77
46
|
|
47
|
+
if $DEBUG then
|
48
|
+
attr_reader :lex_state
|
49
|
+
|
50
|
+
def lex_state= o
|
51
|
+
return if @lex_state == o
|
52
|
+
raise ArgumentError, "bad state: %p" % [o] unless State === o
|
53
|
+
|
54
|
+
warn "lex_state: %p -> %p" % [lex_state, o]
|
55
|
+
|
56
|
+
@lex_state = o
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
78
60
|
# :startdoc:
|
79
61
|
|
62
|
+
attr_accessor :lex_state unless $DEBUG
|
63
|
+
|
80
64
|
attr_accessor :lineno # we're bypassing oedipus' lineno handling.
|
81
65
|
attr_accessor :brace_nest
|
82
66
|
attr_accessor :cmdarg
|
@@ -90,7 +74,6 @@ class RubyLexer
|
|
90
74
|
# Additional context surrounding tokens that both the lexer and
|
91
75
|
# grammar use.
|
92
76
|
|
93
|
-
attr_accessor :lex_state
|
94
77
|
attr_accessor :lex_strterm
|
95
78
|
attr_accessor :lpar_beg
|
96
79
|
attr_accessor :paren_nest
|
@@ -99,24 +82,14 @@ class RubyLexer
|
|
99
82
|
attr_accessor :string_buffer
|
100
83
|
attr_accessor :string_nest
|
101
84
|
|
102
|
-
if $DEBUG then
|
103
|
-
alias lex_state= lex_state=
|
104
|
-
def lex_state=o
|
105
|
-
return if @lex_state == o
|
106
|
-
c = caller.first
|
107
|
-
c = caller[1] if c =~ /\bresult\b/
|
108
|
-
warn "lex_state: %p -> %p from %s" % [@lex_state, o, c.clean_caller]
|
109
|
-
@lex_state = o
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
85
|
# Last token read via next_token.
|
114
86
|
attr_accessor :token
|
115
87
|
|
116
88
|
attr_writer :comments
|
117
89
|
|
118
90
|
def initialize _ = nil
|
119
|
-
@lex_state =
|
91
|
+
@lex_state = nil # remove one warning under $DEBUG
|
92
|
+
self.lex_state = EXPR_NONE
|
120
93
|
|
121
94
|
self.cond = RubyParserStuff::StackState.new(:cond, $DEBUG)
|
122
95
|
self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
|
@@ -125,17 +98,22 @@ class RubyLexer
|
|
125
98
|
end
|
126
99
|
|
127
100
|
def arg_ambiguous
|
128
|
-
self.warning
|
101
|
+
self.warning "Ambiguous first argument. make sure."
|
129
102
|
end
|
130
103
|
|
131
104
|
def arg_state
|
132
|
-
|
105
|
+
is_after_operator? ? EXPR_ARG : EXPR_BEG
|
133
106
|
end
|
134
107
|
|
135
108
|
def beginning_of_line?
|
136
109
|
ss.bol?
|
137
110
|
end
|
138
|
-
|
111
|
+
|
112
|
+
alias bol? beginning_of_line? # to make .rex file more readable
|
113
|
+
|
114
|
+
def check re
|
115
|
+
ss.check re
|
116
|
+
end
|
139
117
|
|
140
118
|
def comments # TODO: remove this... maybe comment_string + attr_accessor
|
141
119
|
c = @comments.join
|
@@ -143,30 +121,41 @@ class RubyLexer
|
|
143
121
|
c
|
144
122
|
end
|
145
123
|
|
124
|
+
def eat_whitespace
|
125
|
+
r = scan(/\s+/)
|
126
|
+
self.extra_lineno += r.count("\n") if r
|
127
|
+
r
|
128
|
+
end
|
129
|
+
|
146
130
|
def end_of_stream?
|
147
131
|
ss.eos?
|
148
132
|
end
|
149
133
|
|
150
134
|
def expr_dot?
|
151
|
-
lex_state
|
135
|
+
lex_state =~ EXPR_DOT
|
152
136
|
end
|
153
137
|
|
154
|
-
def expr_fname?
|
155
|
-
lex_state
|
138
|
+
def expr_fname? # REFACTOR
|
139
|
+
lex_state =~ EXPR_FNAME
|
156
140
|
end
|
157
141
|
|
158
142
|
def expr_result token, text
|
159
143
|
cond.push false
|
160
144
|
cmdarg.push false
|
161
|
-
result
|
145
|
+
result EXPR_BEG, token, text
|
146
|
+
end
|
147
|
+
|
148
|
+
def fixup_lineno extra = 0
|
149
|
+
self.lineno += self.extra_lineno + extra
|
150
|
+
self.extra_lineno = 0
|
162
151
|
end
|
163
152
|
|
164
153
|
def heredoc here # TODO: rewrite / remove
|
165
154
|
_, eos, func, last_line = here
|
166
155
|
|
167
|
-
indent =
|
168
|
-
|
169
|
-
|
156
|
+
indent = func =~ STR_FUNC_INDENT ? "[ \t]*" : nil
|
157
|
+
expand = func =~ STR_FUNC_EXPAND
|
158
|
+
eol = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n"
|
170
159
|
eos_re = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
|
171
160
|
err_msg = "can't match #{eos_re.inspect} anywhere in "
|
172
161
|
|
@@ -175,30 +164,35 @@ class RubyLexer
|
|
175
164
|
if beginning_of_line? && scan(eos_re) then
|
176
165
|
self.lineno += 1
|
177
166
|
ss.unread_many last_line # TODO: figure out how to remove this
|
178
|
-
return :tSTRING_END, eos
|
167
|
+
return :tSTRING_END, [eos, func] # TODO: calculate squiggle width at lex?
|
179
168
|
end
|
180
169
|
|
181
170
|
self.string_buffer = []
|
182
171
|
|
183
172
|
if expand then
|
184
173
|
case
|
185
|
-
when scan(/#[
|
186
|
-
|
174
|
+
when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
|
175
|
+
# TODO: !ISASCII
|
176
|
+
# ?! see parser_peek_variable_name
|
177
|
+
return :tSTRING_DVAR, matched
|
178
|
+
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
179
|
+
# TODO: !ISASCII
|
187
180
|
return :tSTRING_DVAR, matched
|
188
181
|
when scan(/#[{]/) then
|
182
|
+
self.command_start = true
|
189
183
|
return :tSTRING_DBEG, matched
|
190
184
|
when scan(/#/) then
|
191
|
-
string_buffer <<
|
185
|
+
string_buffer << "#"
|
192
186
|
end
|
193
187
|
|
194
188
|
begin
|
195
|
-
c = tokadd_string func,
|
189
|
+
c = tokadd_string func, eol, nil
|
196
190
|
|
197
191
|
rb_compile_error err_msg if
|
198
192
|
c == RubyLexer::EOF
|
199
193
|
|
200
|
-
if c !=
|
201
|
-
return :tSTRING_CONTENT, string_buffer.join
|
194
|
+
if c != eol then
|
195
|
+
return :tSTRING_CONTENT, string_buffer.join
|
202
196
|
else
|
203
197
|
string_buffer << scan(/\n/)
|
204
198
|
end
|
@@ -214,64 +208,26 @@ class RubyLexer
|
|
214
208
|
|
215
209
|
self.lex_strterm = [:heredoc, eos, func, last_line]
|
216
210
|
|
217
|
-
string_content =
|
218
|
-
|
219
|
-
|
211
|
+
string_content = begin
|
212
|
+
s = string_buffer.join
|
213
|
+
s.b.force_encoding Encoding::UTF_8
|
214
|
+
end
|
220
215
|
|
221
216
|
return :tSTRING_CONTENT, string_content
|
222
217
|
end
|
223
218
|
|
224
|
-
def heredoc_dedent(string_content)
|
225
|
-
width = string_content.scan(/^[ \t]*(?=\S)/).map do |whitespace|
|
226
|
-
heredoc_whitespace_indent_size whitespace
|
227
|
-
end.min || 0
|
228
|
-
|
229
|
-
string_content.split("\n", -1).map do |line|
|
230
|
-
dedent_string line, width
|
231
|
-
end.join "\n"
|
232
|
-
end
|
233
|
-
|
234
|
-
def dedent_string(string, width)
|
235
|
-
characters_skipped = 0
|
236
|
-
indentation_skipped = 0
|
237
|
-
|
238
|
-
string.chars.each do |char|
|
239
|
-
break if indentation_skipped >= width
|
240
|
-
if char == ' '
|
241
|
-
characters_skipped += 1
|
242
|
-
indentation_skipped += 1
|
243
|
-
elsif char == "\t"
|
244
|
-
proposed = TAB_WIDTH * (indentation_skipped / TAB_WIDTH + 1)
|
245
|
-
break if (proposed > width)
|
246
|
-
characters_skipped += 1
|
247
|
-
indentation_skipped = proposed
|
248
|
-
end
|
249
|
-
end
|
250
|
-
string[characters_skipped..-1]
|
251
|
-
end
|
252
|
-
|
253
|
-
def heredoc_whitespace_indent_size(whitespace)
|
254
|
-
whitespace.chars.inject 0 do |size, char|
|
255
|
-
if char == "\t"
|
256
|
-
size + TAB_WIDTH
|
257
|
-
else
|
258
|
-
size + 1
|
259
|
-
end
|
260
|
-
end
|
261
|
-
end
|
262
|
-
|
263
219
|
def heredoc_identifier # TODO: remove / rewrite
|
264
220
|
term, func = nil, STR_FUNC_BORING
|
265
221
|
self.string_buffer = []
|
266
222
|
|
267
|
-
heredoc_indent_mods =
|
223
|
+
heredoc_indent_mods = "-"
|
268
224
|
heredoc_indent_mods += '\~' if ruby23plus?
|
269
225
|
|
270
226
|
case
|
271
227
|
when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
|
272
228
|
term = ss[2]
|
273
|
-
func |= STR_FUNC_INDENT unless ss[1].empty?
|
274
|
-
func |= STR_FUNC_ICNTNT if ss[1] ==
|
229
|
+
func |= STR_FUNC_INDENT unless ss[1].empty? # TODO: this seems wrong
|
230
|
+
func |= STR_FUNC_ICNTNT if ss[1] == "~"
|
275
231
|
func |= case term
|
276
232
|
when "\'" then
|
277
233
|
STR_SQUOTE
|
@@ -288,7 +244,7 @@ class RubyLexer
|
|
288
244
|
func |= STR_DQUOTE
|
289
245
|
unless ss[1].empty? then
|
290
246
|
func |= STR_FUNC_INDENT
|
291
|
-
func |= STR_FUNC_ICNTNT if ss[1] ==
|
247
|
+
func |= STR_FUNC_ICNTNT if ss[1] == "~"
|
292
248
|
end
|
293
249
|
string_buffer << ss[2]
|
294
250
|
else
|
@@ -304,23 +260,15 @@ class RubyLexer
|
|
304
260
|
|
305
261
|
self.lex_strterm = [:heredoc, string_buffer.join, func, line]
|
306
262
|
|
307
|
-
if term ==
|
263
|
+
if term == "`" then
|
308
264
|
result nil, :tXSTRING_BEG, "`"
|
309
265
|
else
|
310
266
|
result nil, :tSTRING_BEG, "\""
|
311
267
|
end
|
312
268
|
end
|
313
269
|
|
314
|
-
def in_fname?
|
315
|
-
|
316
|
-
end
|
317
|
-
|
318
|
-
def in_arg_state? # TODO: rename is_after_operator?
|
319
|
-
in_lex_state? :expr_fname, :expr_dot
|
320
|
-
end
|
321
|
-
|
322
|
-
def in_lex_state?(*states)
|
323
|
-
states.include? lex_state
|
270
|
+
def in_fname? # REFACTOR
|
271
|
+
lex_state =~ EXPR_FNAME
|
324
272
|
end
|
325
273
|
|
326
274
|
def int_with_base base
|
@@ -328,42 +276,35 @@ class RubyLexer
|
|
328
276
|
|
329
277
|
text = matched
|
330
278
|
case
|
331
|
-
when text.end_with?(
|
332
|
-
return result(
|
333
|
-
when text.end_with?(
|
334
|
-
return result(
|
335
|
-
when text.end_with?(
|
336
|
-
return result(
|
279
|
+
when text.end_with?("ri")
|
280
|
+
return result(EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base))))
|
281
|
+
when text.end_with?("r")
|
282
|
+
return result(EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base)))
|
283
|
+
when text.end_with?("i")
|
284
|
+
return result(EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base)))
|
337
285
|
else
|
338
|
-
return result(
|
286
|
+
return result(EXPR_NUM, :tINTEGER, text.to_i(base))
|
339
287
|
end
|
340
288
|
end
|
341
289
|
|
290
|
+
def is_after_operator?
|
291
|
+
lex_state =~ EXPR_FNAME|EXPR_DOT
|
292
|
+
end
|
293
|
+
|
342
294
|
def is_arg?
|
343
|
-
|
295
|
+
lex_state =~ EXPR_ARG_ANY
|
344
296
|
end
|
345
297
|
|
346
298
|
def is_beg?
|
347
|
-
|
348
|
-
in_lex_state?(*EXPR_BEG_ANY, :expr_value, :expr_labeled)
|
299
|
+
lex_state =~ EXPR_BEG_ANY || lex_state == EXPR_LAB # yes, == EXPR_LAB
|
349
300
|
end
|
350
301
|
|
351
302
|
def is_end?
|
352
|
-
|
353
|
-
end
|
354
|
-
|
355
|
-
def lvar_defined? id
|
356
|
-
# TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
|
357
|
-
self.parser.env[id.to_sym] == :lvar
|
358
|
-
end
|
359
|
-
|
360
|
-
|
361
|
-
def ruby22_label?
|
362
|
-
ruby22plus? and is_label_possible?
|
303
|
+
lex_state =~ EXPR_END_ANY
|
363
304
|
end
|
364
305
|
|
365
306
|
def is_label_possible?
|
366
|
-
(
|
307
|
+
(lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
|
367
308
|
end
|
368
309
|
|
369
310
|
def is_label_suffix?
|
@@ -378,6 +319,16 @@ class RubyLexer
|
|
378
319
|
lpar_beg && lpar_beg == paren_nest
|
379
320
|
end
|
380
321
|
|
322
|
+
def is_local_id id
|
323
|
+
# maybe just make this false for now
|
324
|
+
self.parser.env[id.to_sym] == :lvar # HACK: this isn't remotely right
|
325
|
+
end
|
326
|
+
|
327
|
+
def lvar_defined? id
|
328
|
+
# TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
|
329
|
+
self.parser.env[id.to_sym] == :lvar
|
330
|
+
end
|
331
|
+
|
381
332
|
def matched
|
382
333
|
ss.matched
|
383
334
|
end
|
@@ -386,11 +337,139 @@ class RubyLexer
|
|
386
337
|
not is_end?
|
387
338
|
end
|
388
339
|
|
340
|
+
def parse_quote # TODO: remove / rewrite
|
341
|
+
beg, nnd, short_hand, c = nil, nil, false, nil
|
342
|
+
|
343
|
+
if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
|
344
|
+
rb_compile_error "unknown type of %string" if ss.matched_size == 2
|
345
|
+
c, beg, short_hand = matched, getch, false
|
346
|
+
else # Short-hand (e.g. %{, %., %!, etc)
|
347
|
+
c, beg, short_hand = "Q", getch, true
|
348
|
+
end
|
349
|
+
|
350
|
+
if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
|
351
|
+
rb_compile_error "unterminated quoted string meets end of file"
|
352
|
+
end
|
353
|
+
|
354
|
+
# Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
|
355
|
+
nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
|
356
|
+
nnd, beg = beg, "\0" if nnd.nil?
|
357
|
+
|
358
|
+
token_type, text = nil, "%#{c}#{beg}"
|
359
|
+
token_type, string_type = case c
|
360
|
+
when "Q" then
|
361
|
+
ch = short_hand ? nnd : c + beg
|
362
|
+
text = "%#{ch}"
|
363
|
+
[:tSTRING_BEG, STR_DQUOTE]
|
364
|
+
when "q" then
|
365
|
+
[:tSTRING_BEG, STR_SQUOTE]
|
366
|
+
when "W" then
|
367
|
+
eat_whitespace
|
368
|
+
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
369
|
+
when "w" then
|
370
|
+
eat_whitespace
|
371
|
+
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
372
|
+
when "x" then
|
373
|
+
[:tXSTRING_BEG, STR_XQUOTE]
|
374
|
+
when "r" then
|
375
|
+
[:tREGEXP_BEG, STR_REGEXP]
|
376
|
+
when "s" then
|
377
|
+
self.lex_state = EXPR_FNAME
|
378
|
+
[:tSYMBEG, STR_SSYM]
|
379
|
+
when "I" then
|
380
|
+
eat_whitespace
|
381
|
+
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
382
|
+
when "i" then
|
383
|
+
eat_whitespace
|
384
|
+
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
385
|
+
end
|
386
|
+
|
387
|
+
rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
|
388
|
+
token_type.nil?
|
389
|
+
|
390
|
+
raise "huh" unless string_type
|
391
|
+
|
392
|
+
string string_type, nnd, beg
|
393
|
+
|
394
|
+
return token_type, text
|
395
|
+
end
|
396
|
+
|
397
|
+
def parse_string quote # TODO: rewrite / remove
|
398
|
+
_, string_type, term, open = quote
|
399
|
+
|
400
|
+
space = false # FIX: remove these
|
401
|
+
func = string_type
|
402
|
+
paren = open
|
403
|
+
term_re = @@regexp_cache[term]
|
404
|
+
|
405
|
+
qwords = func =~ STR_FUNC_QWORDS
|
406
|
+
regexp = func =~ STR_FUNC_REGEXP
|
407
|
+
expand = func =~ STR_FUNC_EXPAND
|
408
|
+
|
409
|
+
unless func then # nil'ed from qwords below. *sigh*
|
410
|
+
return :tSTRING_END, nil
|
411
|
+
end
|
412
|
+
|
413
|
+
space = true if qwords and eat_whitespace
|
414
|
+
|
415
|
+
if self.string_nest == 0 && scan(/#{term_re}/) then
|
416
|
+
if qwords then
|
417
|
+
quote[1] = nil
|
418
|
+
return :tSPACE, nil
|
419
|
+
elsif regexp then
|
420
|
+
return :tREGEXP_END, self.regx_options
|
421
|
+
else
|
422
|
+
return :tSTRING_END, term
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
return :tSPACE, nil if space
|
427
|
+
|
428
|
+
self.string_buffer = []
|
429
|
+
|
430
|
+
if expand
|
431
|
+
case
|
432
|
+
when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
|
433
|
+
# TODO: !ISASCII
|
434
|
+
# ?! see parser_peek_variable_name
|
435
|
+
return :tSTRING_DVAR, nil
|
436
|
+
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
437
|
+
# TODO: !ISASCII
|
438
|
+
return :tSTRING_DVAR, nil
|
439
|
+
when scan(/#[{]/) then
|
440
|
+
self.command_start = true
|
441
|
+
return :tSTRING_DBEG, nil
|
442
|
+
when scan(/#/) then
|
443
|
+
string_buffer << "#"
|
444
|
+
end
|
445
|
+
end
|
446
|
+
|
447
|
+
if tokadd_string(func, term, paren) == RubyLexer::EOF then
|
448
|
+
if func =~ STR_FUNC_REGEXP then
|
449
|
+
rb_compile_error "unterminated regexp meets end of file"
|
450
|
+
else
|
451
|
+
rb_compile_error "unterminated string meets end of file"
|
452
|
+
end
|
453
|
+
end
|
454
|
+
|
455
|
+
return :tSTRING_CONTENT, string_buffer.join
|
456
|
+
end
|
457
|
+
|
458
|
+
def possibly_escape_string text, check
|
459
|
+
content = match[1]
|
460
|
+
|
461
|
+
if text =~ check then
|
462
|
+
content.gsub(ESC) { unescape $1 }
|
463
|
+
else
|
464
|
+
content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
|
465
|
+
end
|
466
|
+
end
|
467
|
+
|
389
468
|
def process_amper text
|
390
469
|
token = if is_arg? && space_seen && !check(/\s/) then
|
391
470
|
warning("`&' interpreted as argument prefix")
|
392
471
|
:tAMPER
|
393
|
-
elsif
|
472
|
+
elsif lex_state =~ EXPR_BEG|EXPR_MID then
|
394
473
|
:tAMPER
|
395
474
|
else
|
396
475
|
:tAMPER2
|
@@ -402,7 +481,7 @@ class RubyLexer
|
|
402
481
|
def process_backref text
|
403
482
|
token = ss[1].to_sym
|
404
483
|
# TODO: can't do lineno hack w/ symbol
|
405
|
-
result
|
484
|
+
result EXPR_END, :tBACK_REF, token
|
406
485
|
end
|
407
486
|
|
408
487
|
def process_begin text
|
@@ -420,54 +499,33 @@ class RubyLexer
|
|
420
499
|
end
|
421
500
|
|
422
501
|
def process_brace_close text
|
423
|
-
# matching compare/parse23.y:8561
|
424
|
-
cond.lexpop
|
425
|
-
cmdarg.lexpop
|
426
|
-
|
427
502
|
case matched
|
428
503
|
when "}" then
|
429
504
|
self.brace_nest -= 1
|
430
|
-
self.lex_state = :expr_endarg # TODO: :expr_end ? Look at 2.6
|
431
|
-
|
432
505
|
return :tSTRING_DEND, matched if brace_nest < 0
|
506
|
+
end
|
507
|
+
|
508
|
+
# matching compare/parse26.y:8099
|
509
|
+
cond.pop
|
510
|
+
cmdarg.pop
|
511
|
+
|
512
|
+
case matched
|
513
|
+
when "}" then
|
514
|
+
self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
|
433
515
|
return :tRCURLY, matched
|
434
516
|
when "]" then
|
435
517
|
self.paren_nest -= 1
|
436
|
-
self.lex_state = :
|
518
|
+
self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
|
437
519
|
return :tRBRACK, matched
|
438
520
|
when ")" then
|
439
521
|
self.paren_nest -= 1
|
440
|
-
self.lex_state =
|
522
|
+
self.lex_state = EXPR_ENDFN
|
441
523
|
return :tRPAREN, matched
|
442
524
|
else
|
443
525
|
raise "Unknown bracing: #{matched.inspect}"
|
444
526
|
end
|
445
527
|
end
|
446
528
|
|
447
|
-
def process_colon1 text
|
448
|
-
# ?: / then / when
|
449
|
-
if is_end? || check(/\s/) then
|
450
|
-
return result :expr_beg, :tCOLON, text
|
451
|
-
end
|
452
|
-
|
453
|
-
case
|
454
|
-
when scan(/\'/) then
|
455
|
-
string STR_SSYM
|
456
|
-
when scan(/\"/) then
|
457
|
-
string STR_DSYM
|
458
|
-
end
|
459
|
-
|
460
|
-
result :expr_fname, :tSYMBEG, text
|
461
|
-
end
|
462
|
-
|
463
|
-
def process_colon2 text
|
464
|
-
if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
|
465
|
-
result :expr_beg, :tCOLON3, text
|
466
|
-
else
|
467
|
-
result :expr_dot, :tCOLON2, text
|
468
|
-
end
|
469
|
-
end
|
470
|
-
|
471
529
|
def process_brace_open text
|
472
530
|
# matching compare/parse23.y:8694
|
473
531
|
self.brace_nest += 1
|
@@ -479,67 +537,111 @@ class RubyLexer
|
|
479
537
|
return expr_result(:tLAMBEG, "{")
|
480
538
|
end
|
481
539
|
|
482
|
-
token = case
|
483
|
-
when
|
540
|
+
token = case
|
541
|
+
when lex_state =~ EXPR_LABELED then
|
484
542
|
:tLBRACE # hash
|
485
|
-
when
|
486
|
-
:tLCURLY # block (primary)
|
487
|
-
when
|
543
|
+
when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
|
544
|
+
:tLCURLY # block (primary) "{" in parse.y
|
545
|
+
when lex_state =~ EXPR_ENDARG then
|
488
546
|
:tLBRACE_ARG # block (expr)
|
489
547
|
else
|
490
548
|
:tLBRACE # hash
|
491
549
|
end
|
492
550
|
|
493
|
-
|
551
|
+
state = token == :tLBRACE_ARG ? EXPR_BEG : EXPR_PAR
|
494
552
|
self.command_start = true if token != :tLBRACE
|
495
553
|
|
496
|
-
|
554
|
+
cond.push false
|
555
|
+
cmdarg.push false
|
556
|
+
result state, token, text
|
557
|
+
end
|
558
|
+
|
559
|
+
def process_colon1 text
|
560
|
+
# ?: / then / when
|
561
|
+
if is_end? || check(/\s/) then
|
562
|
+
return result EXPR_BEG, :tCOLON, text
|
563
|
+
end
|
564
|
+
|
565
|
+
case
|
566
|
+
when scan(/\'/) then
|
567
|
+
string STR_SSYM
|
568
|
+
when scan(/\"/) then
|
569
|
+
string STR_DSYM
|
570
|
+
end
|
571
|
+
|
572
|
+
result EXPR_FNAME, :tSYMBEG, text
|
573
|
+
end
|
574
|
+
|
575
|
+
def process_colon2 text
|
576
|
+
if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
|
577
|
+
result EXPR_BEG, :tCOLON3, text
|
578
|
+
else
|
579
|
+
result EXPR_DOT, :tCOLON2, text
|
580
|
+
end
|
497
581
|
end
|
498
582
|
|
499
583
|
def process_float text
|
500
584
|
rb_compile_error "Invalid numeric format" if text =~ /__/
|
501
585
|
|
502
586
|
case
|
503
|
-
when text.end_with?(
|
504
|
-
return result
|
505
|
-
when text.end_with?(
|
506
|
-
return result
|
507
|
-
when text.end_with?(
|
508
|
-
return result
|
587
|
+
when text.end_with?("ri")
|
588
|
+
return result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
|
589
|
+
when text.end_with?("i")
|
590
|
+
return result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
|
591
|
+
when text.end_with?("r")
|
592
|
+
return result EXPR_NUM, :tRATIONAL, Rational(text.chop)
|
509
593
|
else
|
510
|
-
return result
|
594
|
+
return result EXPR_NUM, :tFLOAT, text.to_f
|
511
595
|
end
|
512
596
|
end
|
513
597
|
|
514
598
|
def process_gvar text
|
515
599
|
text.lineno = self.lineno
|
516
|
-
result
|
600
|
+
result EXPR_END, :tGVAR, text
|
517
601
|
end
|
518
602
|
|
519
603
|
def process_gvar_oddity text
|
520
|
-
return result
|
604
|
+
return result EXPR_END, "$", "$" if text == "$" # TODO: wtf is this?
|
521
605
|
rb_compile_error "#{text.inspect} is not allowed as a global variable name"
|
522
606
|
end
|
523
607
|
|
524
608
|
def process_ivar text
|
525
609
|
tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
|
526
610
|
text.lineno = self.lineno
|
527
|
-
|
611
|
+
result EXPR_END, tok_id, text
|
612
|
+
end
|
613
|
+
|
614
|
+
def process_label text
|
615
|
+
symbol = possibly_escape_string text, /^\"/
|
616
|
+
|
617
|
+
result EXPR_LAB, :tLABEL, [symbol, self.lineno]
|
618
|
+
end
|
619
|
+
|
620
|
+
def process_label_or_string text
|
621
|
+
if @was_label && text =~ /:\Z/ then
|
622
|
+
@was_label = nil
|
623
|
+
return process_label text
|
624
|
+
elsif text =~ /:\Z/ then
|
625
|
+
ss.pos -= 1 # put back ":"
|
626
|
+
text = text[0..-2]
|
627
|
+
end
|
628
|
+
|
629
|
+
result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
|
528
630
|
end
|
529
631
|
|
530
632
|
def process_lchevron text
|
531
|
-
if (
|
633
|
+
if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
|
532
634
|
!is_end? &&
|
533
|
-
(!is_arg? ||
|
635
|
+
(!is_arg? || lex_state =~ EXPR_LABELED || space_seen)) then
|
534
636
|
tok = self.heredoc_identifier
|
535
637
|
return tok if tok
|
536
638
|
end
|
537
639
|
|
538
|
-
if
|
539
|
-
self.lex_state =
|
640
|
+
if is_after_operator? then
|
641
|
+
self.lex_state = EXPR_ARG
|
540
642
|
else
|
541
|
-
self.command_start = true if lex_state
|
542
|
-
self.lex_state =
|
643
|
+
self.command_start = true if lex_state =~ EXPR_CLASS
|
644
|
+
self.lex_state = EXPR_BEG
|
543
645
|
end
|
544
646
|
|
545
647
|
return result(lex_state, :tLSHFT, "\<\<")
|
@@ -549,14 +651,14 @@ class RubyLexer
|
|
549
651
|
c = matched
|
550
652
|
hit = false
|
551
653
|
|
552
|
-
if c ==
|
654
|
+
if c == "#" then
|
553
655
|
ss.pos -= 1
|
554
656
|
|
555
657
|
# TODO: handle magic comments
|
556
658
|
while scan(/\s*\#.*(\n+|\z)/) do
|
557
659
|
hit = true
|
558
660
|
self.lineno += matched.lines.to_a.size
|
559
|
-
@comments << matched.gsub(/^ +#/,
|
661
|
+
@comments << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
|
560
662
|
end
|
561
663
|
|
562
664
|
return nil if end_of_stream?
|
@@ -567,17 +669,15 @@ class RubyLexer
|
|
567
669
|
# Replace a string of newlines with a single one
|
568
670
|
self.lineno += matched.lines.to_a.size if scan(/\n+/)
|
569
671
|
|
570
|
-
|
571
|
-
|
572
|
-
:expr_fname, :expr_dot) && !in_lex_state?(:expr_labeled)
|
573
|
-
|
672
|
+
c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
|
673
|
+
lex_state !~ EXPR_LABELED)
|
574
674
|
# TODO: figure out what token_seen is for
|
575
|
-
|
576
|
-
if c || self.lex_state == :expr_labeled then
|
675
|
+
if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
|
577
676
|
# ignore if !fallthrough?
|
578
677
|
if !c && parser.in_kwarg then
|
579
678
|
# normal newline
|
580
|
-
|
679
|
+
self.command_start = true
|
680
|
+
return result EXPR_BEG, :tNL, nil
|
581
681
|
else
|
582
682
|
return # skip
|
583
683
|
end
|
@@ -592,41 +692,46 @@ class RubyLexer
|
|
592
692
|
|
593
693
|
self.command_start = true
|
594
694
|
|
595
|
-
return result(
|
695
|
+
return result(EXPR_BEG, :tNL, nil)
|
596
696
|
end
|
597
697
|
|
598
698
|
def process_nthref text
|
599
699
|
# TODO: can't do lineno hack w/ number
|
600
|
-
result
|
700
|
+
result EXPR_END, :tNTH_REF, ss[1].to_i
|
601
701
|
end
|
602
702
|
|
603
703
|
def process_paren text
|
604
|
-
token =
|
704
|
+
token = if is_beg? then
|
705
|
+
:tLPAREN
|
706
|
+
elsif !space_seen then
|
707
|
+
# foo( ... ) => method call, no ambiguity
|
708
|
+
:tLPAREN2
|
709
|
+
elsif is_space_arg? then
|
710
|
+
:tLPAREN_ARG
|
711
|
+
elsif lex_state =~ EXPR_ENDFN && !lambda_beginning? then
|
712
|
+
# TODO:
|
713
|
+
# warn("parentheses after method name is interpreted as " \
|
714
|
+
# "an argument list, not a decomposed argument")
|
715
|
+
:tLPAREN2
|
716
|
+
else
|
717
|
+
:tLPAREN2 # plain "(" in parse.y
|
718
|
+
end
|
605
719
|
|
606
720
|
self.paren_nest += 1
|
607
721
|
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
def process_paren19
|
613
|
-
if is_beg? then
|
614
|
-
:tLPAREN
|
615
|
-
elsif is_space_arg? then
|
616
|
-
:tLPAREN_ARG
|
617
|
-
else
|
618
|
-
:tLPAREN2 # plain '(' in parse.y
|
619
|
-
end
|
722
|
+
cond.push false
|
723
|
+
cmdarg.push false
|
724
|
+
result EXPR_PAR, token, text
|
620
725
|
end
|
621
726
|
|
622
727
|
def process_percent text
|
623
728
|
return parse_quote if is_beg?
|
624
729
|
|
625
|
-
return result
|
730
|
+
return result EXPR_BEG, :tOP_ASGN, "%" if scan(/\=/)
|
626
731
|
|
627
|
-
return parse_quote if
|
732
|
+
return parse_quote if is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
|
628
733
|
|
629
|
-
return result
|
734
|
+
return result :arg_state, :tPERCENT, "%"
|
630
735
|
end
|
631
736
|
|
632
737
|
def process_plus_minus text
|
@@ -637,33 +742,33 @@ class RubyLexer
|
|
637
742
|
[:tUMINUS, :tMINUS]
|
638
743
|
end
|
639
744
|
|
640
|
-
if
|
745
|
+
if is_after_operator? then
|
641
746
|
if scan(/@/) then
|
642
|
-
return result(
|
747
|
+
return result(EXPR_ARG, utype, "#{sign}@")
|
643
748
|
else
|
644
|
-
return result(
|
749
|
+
return result(EXPR_ARG, type, sign)
|
645
750
|
end
|
646
751
|
end
|
647
752
|
|
648
|
-
return result(
|
753
|
+
return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)
|
649
754
|
|
650
|
-
if
|
755
|
+
if is_beg? || (is_arg? && space_seen && !check(/\s/)) then
|
651
756
|
arg_ambiguous if is_arg?
|
652
757
|
|
653
758
|
if check(/\d/) then
|
654
759
|
return nil if utype == :tUPLUS
|
655
|
-
return result
|
760
|
+
return result EXPR_BEG, :tUMINUS_NUM, sign
|
656
761
|
end
|
657
762
|
|
658
|
-
return result
|
763
|
+
return result EXPR_BEG, utype, sign
|
659
764
|
end
|
660
765
|
|
661
|
-
|
766
|
+
result EXPR_BEG, type, sign
|
662
767
|
end
|
663
768
|
|
664
769
|
def process_questionmark text
|
665
770
|
if is_end? then
|
666
|
-
return result
|
771
|
+
return result EXPR_BEG, :tEH, "?"
|
667
772
|
end
|
668
773
|
|
669
774
|
if end_of_stream? then
|
@@ -672,12 +777,12 @@ class RubyLexer
|
|
672
777
|
|
673
778
|
if check(/\s|\v/) then
|
674
779
|
unless is_arg? then
|
675
|
-
c2 = { " " =>
|
676
|
-
"\n" =>
|
677
|
-
"\t" =>
|
678
|
-
"\v" =>
|
679
|
-
"\r" =>
|
680
|
-
"\f" =>
|
780
|
+
c2 = { " " => "s",
|
781
|
+
"\n" => "n",
|
782
|
+
"\t" => "t",
|
783
|
+
"\v" => "v",
|
784
|
+
"\r" => "r",
|
785
|
+
"\f" => "f" }[matched]
|
681
786
|
|
682
787
|
if c2 then
|
683
788
|
warning("invalid character syntax; use ?\\" + c2)
|
@@ -685,18 +790,28 @@ class RubyLexer
|
|
685
790
|
end
|
686
791
|
|
687
792
|
# ternary
|
688
|
-
return result
|
793
|
+
return result EXPR_BEG, :tEH, "?"
|
689
794
|
elsif check(/\w(?=\w)/) then # ternary, also
|
690
|
-
return result
|
795
|
+
return result EXPR_BEG, :tEH, "?"
|
691
796
|
end
|
692
797
|
|
693
798
|
c = if scan(/\\/) then
|
694
799
|
self.read_escape
|
695
800
|
else
|
696
|
-
|
801
|
+
getch
|
697
802
|
end
|
698
803
|
|
699
|
-
|
804
|
+
result EXPR_END, :tSTRING, c
|
805
|
+
end
|
806
|
+
|
807
|
+
def process_simple_string text
|
808
|
+
replacement = text[1..-2].gsub(ESC) {
|
809
|
+
unescape($1).b.force_encoding Encoding::UTF_8
|
810
|
+
}
|
811
|
+
|
812
|
+
replacement = replacement.b unless replacement.valid_encoding?
|
813
|
+
|
814
|
+
result EXPR_END, :tSTRING, replacement
|
700
815
|
end
|
701
816
|
|
702
817
|
def process_slash text
|
@@ -707,7 +822,7 @@ class RubyLexer
|
|
707
822
|
end
|
708
823
|
|
709
824
|
if scan(/\=/) then
|
710
|
-
return result(
|
825
|
+
return result(EXPR_BEG, :tOP_ASGN, "/")
|
711
826
|
end
|
712
827
|
|
713
828
|
if is_arg? && space_seen then
|
@@ -726,73 +841,68 @@ class RubyLexer
|
|
726
841
|
|
727
842
|
token = nil
|
728
843
|
|
729
|
-
if
|
844
|
+
if is_after_operator? then
|
730
845
|
case
|
731
846
|
when scan(/\]\=/) then
|
732
847
|
self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
|
733
|
-
return result
|
848
|
+
return result EXPR_ARG, :tASET, "[]="
|
734
849
|
when scan(/\]/) then
|
735
850
|
self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
|
736
|
-
return result
|
851
|
+
return result EXPR_ARG, :tAREF, "[]"
|
737
852
|
else
|
738
853
|
rb_compile_error "unexpected '['"
|
739
854
|
end
|
740
855
|
elsif is_beg? then
|
741
856
|
token = :tLBRACK
|
742
|
-
elsif is_arg? && space_seen then
|
857
|
+
elsif is_arg? && (space_seen || lex_state =~ EXPR_LABELED) then
|
743
858
|
token = :tLBRACK
|
744
859
|
else
|
745
860
|
token = :tLBRACK2
|
746
861
|
end
|
747
862
|
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
end
|
752
|
-
|
753
|
-
def possibly_escape_string text, check
|
754
|
-
content = match[1]
|
755
|
-
|
756
|
-
if text =~ check then
|
757
|
-
content.gsub(ESC) { unescape $1 }
|
758
|
-
else
|
759
|
-
content.gsub(/\\\\/, "\\").gsub(/\\'/, "'")
|
760
|
-
end
|
863
|
+
cond.push false
|
864
|
+
cmdarg.push false
|
865
|
+
result EXPR_PAR, token, text
|
761
866
|
end
|
762
867
|
|
763
|
-
def
|
764
|
-
|
868
|
+
def process_string # TODO: rewrite / remove
|
869
|
+
# matches top of parser_yylex in compare/parse23.y:8113
|
870
|
+
token = if lex_strterm[0] == :heredoc then
|
871
|
+
self.heredoc lex_strterm
|
872
|
+
else
|
873
|
+
self.parse_string lex_strterm
|
874
|
+
end
|
765
875
|
|
766
|
-
|
767
|
-
end
|
876
|
+
token_type, c = token
|
768
877
|
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
878
|
+
# matches parser_string_term from 2.3, but way off from 2.5
|
879
|
+
if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
|
880
|
+
if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
|
881
|
+
!cond.is_in_state) || is_arg?) &&
|
882
|
+
is_label_suffix? then
|
883
|
+
scan(/:/)
|
884
|
+
token_type = token[0] = :tLABEL_END
|
885
|
+
end
|
886
|
+
end
|
773
887
|
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
return process_label text
|
778
|
-
elsif text =~ /:\Z/ then
|
779
|
-
ss.pos -= 1 # put back ":"
|
780
|
-
text = text[0..-2]
|
888
|
+
if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
|
889
|
+
self.lex_strterm = nil
|
890
|
+
self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_LIT
|
781
891
|
end
|
782
892
|
|
783
|
-
|
893
|
+
return token
|
784
894
|
end
|
785
895
|
|
786
|
-
def
|
787
|
-
symbol = possibly_escape_string text,
|
896
|
+
def process_symbol text
|
897
|
+
symbol = possibly_escape_string text, /^:\"/ # stupid emacs
|
788
898
|
|
789
|
-
result
|
899
|
+
result EXPR_LIT, :tSYMBOL, symbol
|
790
900
|
end
|
791
901
|
|
792
902
|
def process_token text
|
793
903
|
# matching: parse_ident in compare/parse23.y:7989
|
794
904
|
# TODO: make this always return [token, lineno]
|
795
|
-
self.last_state = lex_state
|
905
|
+
# FIX: remove: self.last_state = lex_state
|
796
906
|
|
797
907
|
token = self.token = text
|
798
908
|
token << matched if scan(/[\!\?](?!=)/)
|
@@ -801,7 +911,7 @@ class RubyLexer
|
|
801
911
|
case
|
802
912
|
when token =~ /[!?]$/ then
|
803
913
|
:tFID
|
804
|
-
when
|
914
|
+
when lex_state =~ EXPR_FNAME && scan(/=(?:(?![~>=])|(?==>))/) then
|
805
915
|
# ident=, not =~ => == or followed by =>
|
806
916
|
# TODO test lexing of a=>b vs a==>b
|
807
917
|
token << matched
|
@@ -814,31 +924,33 @@ class RubyLexer
|
|
814
924
|
|
815
925
|
if is_label_possible? and is_label_suffix? then
|
816
926
|
scan(/:/)
|
817
|
-
# TODO:
|
818
|
-
return result
|
927
|
+
# TODO: propagate the lineno to ALL results
|
928
|
+
return result EXPR_LAB, :tLABEL, [token, self.lineno]
|
819
929
|
end
|
820
930
|
|
821
|
-
# TODO: mb == ENC_CODERANGE_7BIT &&
|
822
|
-
|
931
|
+
# TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
|
932
|
+
if lex_state !~ EXPR_DOT then
|
823
933
|
# See if it is a reserved word.
|
824
934
|
keyword = RubyParserStuff::Keyword.keyword token
|
825
935
|
|
826
936
|
return process_token_keyword keyword if keyword
|
827
|
-
end
|
937
|
+
end
|
828
938
|
|
829
939
|
# matching: compare/parse23.y:8079
|
830
|
-
state = if is_beg? or is_arg? or
|
831
|
-
cmd_state ?
|
832
|
-
elsif
|
833
|
-
|
940
|
+
state = if is_beg? or is_arg? or lex_state =~ EXPR_DOT then
|
941
|
+
cmd_state ? EXPR_CMDARG : EXPR_ARG
|
942
|
+
elsif lex_state =~ EXPR_FNAME then
|
943
|
+
EXPR_ENDFN
|
834
944
|
else
|
835
|
-
|
945
|
+
EXPR_END
|
836
946
|
end
|
837
947
|
|
838
|
-
|
839
|
-
|
948
|
+
tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
|
949
|
+
|
950
|
+
if last_state !~ EXPR_DOT|EXPR_FNAME and
|
951
|
+
(tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
|
840
952
|
lvar_defined?(token) then
|
841
|
-
state =
|
953
|
+
state = EXPR_END|EXPR_LABEL
|
842
954
|
end
|
843
955
|
|
844
956
|
token.lineno = self.lineno # yes, on a string. I know... I know...
|
@@ -853,32 +965,30 @@ class RubyLexer
|
|
853
965
|
|
854
966
|
value = [token, self.lineno]
|
855
967
|
|
856
|
-
return result(lex_state, keyword.id0, value) if state
|
968
|
+
return result(lex_state, keyword.id0, value) if state =~ EXPR_FNAME
|
857
969
|
|
858
|
-
self.command_start = true if lex_state
|
970
|
+
self.command_start = true if lex_state =~ EXPR_BEG
|
859
971
|
|
860
972
|
case
|
861
|
-
when keyword.id0 == :kDO then
|
973
|
+
when keyword.id0 == :kDO then # parse26.y line 7591
|
862
974
|
case
|
863
975
|
when lambda_beginning? then
|
864
976
|
self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
|
865
|
-
self.paren_nest -= 1
|
866
|
-
result
|
977
|
+
self.paren_nest -= 1 # TODO: question this?
|
978
|
+
result lex_state, :kDO_LAMBDA, value
|
867
979
|
when cond.is_in_state then
|
868
|
-
result
|
869
|
-
when cmdarg.is_in_state && state !=
|
870
|
-
result
|
871
|
-
when [:expr_beg, :expr_endarg].include?(state) then
|
872
|
-
result(lex_state, :kDO_BLOCK, value)
|
980
|
+
result lex_state, :kDO_COND, value
|
981
|
+
when cmdarg.is_in_state && state != EXPR_CMDARG then
|
982
|
+
result lex_state, :kDO_BLOCK, value
|
873
983
|
else
|
874
|
-
result
|
984
|
+
result lex_state, :kDO, value
|
875
985
|
end
|
876
|
-
when
|
877
|
-
result
|
986
|
+
when state =~ EXPR_PAD then
|
987
|
+
result lex_state, keyword.id0, value
|
878
988
|
when keyword.id0 != keyword.id1 then
|
879
|
-
result
|
989
|
+
result EXPR_PAR, keyword.id1, value
|
880
990
|
else
|
881
|
-
result
|
991
|
+
result lex_state, keyword.id1, value
|
882
992
|
end
|
883
993
|
end
|
884
994
|
|
@@ -886,9 +996,9 @@ class RubyLexer
|
|
886
996
|
ss.unscan # put back "_"
|
887
997
|
|
888
998
|
if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
|
889
|
-
|
890
|
-
elsif scan(
|
891
|
-
|
999
|
+
[RubyLexer::EOF, RubyLexer::EOF]
|
1000
|
+
elsif scan(/#{IDENT_CHAR}+/) then
|
1001
|
+
process_token matched
|
892
1002
|
end
|
893
1003
|
end
|
894
1004
|
|
@@ -921,10 +1031,11 @@ class RubyLexer
|
|
921
1031
|
when scan(/s/) then # space
|
922
1032
|
" "
|
923
1033
|
when scan(/[0-7]{1,3}/) then # octal constant
|
924
|
-
(matched.to_i(8) & 0xFF).chr
|
1034
|
+
(matched.to_i(8) & 0xFF).chr.force_encoding Encoding::UTF_8
|
925
1035
|
when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
|
926
|
-
|
927
|
-
|
1036
|
+
# TODO: force encode everything to UTF-8?
|
1037
|
+
ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
|
1038
|
+
when check(/M-\\./) then
|
928
1039
|
scan(/M-\\/) # eat it
|
929
1040
|
c = self.read_escape
|
930
1041
|
c[0] = (c[0].ord | 0x80).chr
|
@@ -938,6 +1049,11 @@ class RubyLexer
|
|
938
1049
|
c = self.read_escape
|
939
1050
|
c[0] = (c[0].ord & 0x9f).chr
|
940
1051
|
c
|
1052
|
+
when check(/(C-|c)\\(?!u|\\)/) then
|
1053
|
+
scan(/(C-|c)\\/) # eat it
|
1054
|
+
c = read_escape
|
1055
|
+
c[0] = (c[0].ord & 0x9f).chr
|
1056
|
+
c
|
941
1057
|
when scan(/C-\?|c\?/) then
|
942
1058
|
127.chr
|
943
1059
|
when scan(/(C-|c)(.)/) then
|
@@ -946,15 +1062,25 @@ class RubyLexer
|
|
946
1062
|
c
|
947
1063
|
when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
|
948
1064
|
matched
|
949
|
-
when scan(/u(
|
950
|
-
[ss[1].
|
1065
|
+
when scan(/u(\h{4})/) then
|
1066
|
+
[ss[1].to_i(16)].pack("U")
|
1067
|
+
when scan(/u(\h{1,3})/) then
|
1068
|
+
rb_compile_error "Invalid escape character syntax"
|
1069
|
+
when scan(/u\{(\h+(?:\s+\h+)*)\}/) then
|
1070
|
+
ss[1].split.map { |s| s.to_i(16) }.pack("U*")
|
951
1071
|
when scan(/[McCx0-9]/) || end_of_stream? then
|
952
1072
|
rb_compile_error("Invalid escape character syntax")
|
953
1073
|
else
|
954
|
-
|
1074
|
+
getch
|
955
1075
|
end.dup
|
956
1076
|
end
|
957
1077
|
|
1078
|
+
def getch
|
1079
|
+
c = ss.getch
|
1080
|
+
c = ss.getch if c == "\r" && ss.peek(1) == "\n"
|
1081
|
+
c
|
1082
|
+
end
|
1083
|
+
|
958
1084
|
def regx_options # TODO: rewrite / remove
|
959
1085
|
good, bad = [], []
|
960
1086
|
|
@@ -974,7 +1100,7 @@ class RubyLexer
|
|
974
1100
|
self.brace_nest = 0
|
975
1101
|
self.command_start = true
|
976
1102
|
self.comments = []
|
977
|
-
self.lex_state =
|
1103
|
+
self.lex_state = EXPR_NONE
|
978
1104
|
self.lex_strterm = nil
|
979
1105
|
self.lineno = 1
|
980
1106
|
self.lpar_beg = nil
|
@@ -988,29 +1114,30 @@ class RubyLexer
|
|
988
1114
|
self.cmdarg.reset
|
989
1115
|
end
|
990
1116
|
|
991
|
-
def result
|
992
|
-
|
993
|
-
self.lex_state =
|
1117
|
+
def result new_state, token, text # :nodoc:
|
1118
|
+
new_state = self.arg_state if new_state == :arg_state
|
1119
|
+
self.lex_state = new_state if new_state
|
994
1120
|
[token, text]
|
995
1121
|
end
|
996
1122
|
|
997
|
-
def
|
998
|
-
|
1123
|
+
def ruby22_label?
|
1124
|
+
ruby22plus? and is_label_possible?
|
999
1125
|
end
|
1000
1126
|
|
1001
|
-
def
|
1002
|
-
|
1127
|
+
def ruby22plus?
|
1128
|
+
parser.class.version >= 22
|
1003
1129
|
end
|
1004
1130
|
|
1005
|
-
def
|
1006
|
-
|
1007
|
-
self.extra_lineno += r.count("\n") if r
|
1008
|
-
r
|
1131
|
+
def ruby23plus?
|
1132
|
+
parser.class.version >= 23
|
1009
1133
|
end
|
1010
1134
|
|
1011
|
-
def
|
1012
|
-
|
1013
|
-
|
1135
|
+
def ruby24minus?
|
1136
|
+
parser.class.version <= 24
|
1137
|
+
end
|
1138
|
+
|
1139
|
+
def scan re
|
1140
|
+
ss.scan re
|
1014
1141
|
end
|
1015
1142
|
|
1016
1143
|
def scanner_class # TODO: design this out of oedipus_lex. or something.
|
@@ -1033,12 +1160,6 @@ class RubyLexer
|
|
1033
1160
|
self.lex_strterm = [:strterm, type, beg, nnd]
|
1034
1161
|
end
|
1035
1162
|
|
1036
|
-
# TODO: consider
|
1037
|
-
# def src= src
|
1038
|
-
# raise "bad src: #{src.inspect}" unless String === src
|
1039
|
-
# @src = RPStringScanner.new(src)
|
1040
|
-
# end
|
1041
|
-
|
1042
1163
|
def tokadd_escape term # TODO: rewrite / remove
|
1043
1164
|
case
|
1044
1165
|
when scan(/\\\n/) then
|
@@ -1057,8 +1178,10 @@ class RubyLexer
|
|
1057
1178
|
prev = self.string_buffer.last
|
1058
1179
|
if term == chr && prev && prev.end_with?("(?") then
|
1059
1180
|
self.string_buffer << chr
|
1181
|
+
elsif term == chr || chr.ascii_only? then
|
1182
|
+
self.string_buffer << matched # dunno why we keep them for ascii
|
1060
1183
|
else
|
1061
|
-
self.string_buffer <<
|
1184
|
+
self.string_buffer << chr # HACK? this is such a rat's nest
|
1062
1185
|
end
|
1063
1186
|
else
|
1064
1187
|
rb_compile_error "Invalid escape character syntax"
|
@@ -1066,22 +1189,24 @@ class RubyLexer
|
|
1066
1189
|
end
|
1067
1190
|
|
1068
1191
|
def tokadd_string(func, term, paren) # TODO: rewrite / remove
|
1069
|
-
qwords =
|
1070
|
-
escape =
|
1071
|
-
expand =
|
1072
|
-
regexp =
|
1073
|
-
symbol =
|
1192
|
+
qwords = func =~ STR_FUNC_QWORDS
|
1193
|
+
escape = func =~ STR_FUNC_ESCAPE
|
1194
|
+
expand = func =~ STR_FUNC_EXPAND
|
1195
|
+
regexp = func =~ STR_FUNC_REGEXP
|
1196
|
+
symbol = func =~ STR_FUNC_SYMBOL
|
1074
1197
|
|
1075
1198
|
paren_re = @@regexp_cache[paren]
|
1076
|
-
term_re =
|
1199
|
+
term_re = if term == "\n"
|
1200
|
+
/#{Regexp.escape "\r"}?#{Regexp.escape "\n"}/
|
1201
|
+
else
|
1202
|
+
@@regexp_cache[term]
|
1203
|
+
end
|
1077
1204
|
|
1078
1205
|
until end_of_stream? do
|
1079
1206
|
c = nil
|
1080
1207
|
handled = true
|
1081
1208
|
|
1082
1209
|
case
|
1083
|
-
when paren_re && scan(paren_re) then
|
1084
|
-
self.string_nest += 1
|
1085
1210
|
when scan(term_re) then
|
1086
1211
|
if self.string_nest == 0 then
|
1087
1212
|
ss.pos -= 1
|
@@ -1089,7 +1214,9 @@ class RubyLexer
|
|
1089
1214
|
else
|
1090
1215
|
self.string_nest -= 1
|
1091
1216
|
end
|
1092
|
-
when
|
1217
|
+
when paren_re && scan(paren_re) then
|
1218
|
+
self.string_nest += 1
|
1219
|
+
when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
|
1093
1220
|
ss.pos -= 1
|
1094
1221
|
break
|
1095
1222
|
when qwords && scan(/\s/) then
|
@@ -1103,7 +1230,7 @@ class RubyLexer
|
|
1103
1230
|
string_buffer << "\n"
|
1104
1231
|
next
|
1105
1232
|
when qwords && scan(/\\\s/) then
|
1106
|
-
c =
|
1233
|
+
c = " "
|
1107
1234
|
when expand && scan(/\\\n/) then
|
1108
1235
|
next
|
1109
1236
|
when regexp && check(/\\/) then
|
@@ -1128,12 +1255,16 @@ class RubyLexer
|
|
1128
1255
|
end # top case
|
1129
1256
|
|
1130
1257
|
unless handled then
|
1131
|
-
t =
|
1132
|
-
|
1258
|
+
t = if term == "\n"
|
1259
|
+
Regexp.escape "\r\n"
|
1260
|
+
else
|
1261
|
+
Regexp.escape term
|
1262
|
+
end
|
1263
|
+
x = Regexp.escape paren if paren && paren != "\000"
|
1133
1264
|
re = if qwords then
|
1134
|
-
/[^#{t}#{x}
|
1265
|
+
/[^#{t}#{x}\#\\\s]+|./ # |. to pick up whatever
|
1135
1266
|
else
|
1136
|
-
/[^#{t}#{x}
|
1267
|
+
/[^#{t}#{x}\#\\]+|./
|
1137
1268
|
end
|
1138
1269
|
|
1139
1270
|
scan re
|
@@ -1173,12 +1304,15 @@ class RubyLexer
|
|
1173
1304
|
s
|
1174
1305
|
when /^[McCx0-9]/ then
|
1175
1306
|
rb_compile_error("Invalid escape character syntax")
|
1176
|
-
when /u(
|
1307
|
+
when /u(\h{4})/ then
|
1177
1308
|
[$1.delete("{}").to_i(16)].pack("U")
|
1309
|
+
when /u(\h{1,3})/ then
|
1310
|
+
rb_compile_error("Invalid escape character syntax")
|
1311
|
+
when /u\{(\h+(?:\s+\h+)*)\}/ then
|
1312
|
+
$1.split.map { |s| s.to_i(16) }.pack("U*")
|
1178
1313
|
else
|
1179
1314
|
s
|
1180
1315
|
end
|
1181
|
-
x.force_encoding "UTF-8" if HAS_ENC
|
1182
1316
|
x
|
1183
1317
|
end
|
1184
1318
|
|
@@ -1186,172 +1320,154 @@ class RubyLexer
|
|
1186
1320
|
# do nothing for now
|
1187
1321
|
end
|
1188
1322
|
|
1189
|
-
def
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
def ruby23plus?
|
1194
|
-
parser.class.version >= 23
|
1323
|
+
def was_label?
|
1324
|
+
@was_label = ruby22_label?
|
1325
|
+
true
|
1195
1326
|
end
|
1196
1327
|
|
1197
|
-
|
1198
|
-
|
1199
|
-
|
1200
|
-
self.heredoc lex_strterm
|
1201
|
-
else
|
1202
|
-
self.parse_string lex_strterm
|
1203
|
-
end
|
1328
|
+
class State
|
1329
|
+
attr_accessor :n
|
1330
|
+
attr_accessor :names
|
1204
1331
|
|
1205
|
-
|
1332
|
+
# TODO: take a shared hash of strings for inspect/to_s
|
1333
|
+
def initialize o, names
|
1334
|
+
raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
|
1206
1335
|
|
1207
|
-
|
1208
|
-
|
1209
|
-
if (([:expr_beg, :expr_endfn].include?(lex_state) &&
|
1210
|
-
!cond.is_in_state) || is_arg?) &&
|
1211
|
-
is_label_suffix? then
|
1212
|
-
scan(/:/)
|
1213
|
-
token_type = token[0] = :tLABEL_END
|
1214
|
-
end
|
1336
|
+
self.n = o
|
1337
|
+
self.names = names
|
1215
1338
|
end
|
1216
1339
|
|
1217
|
-
|
1218
|
-
self.
|
1219
|
-
# TODO: :expr_beg|:expr_label
|
1220
|
-
self.lex_state = (token_type == :tLABEL_END) ? :expr_label : :expr_end
|
1340
|
+
def == o
|
1341
|
+
self.equal?(o) || (o.class == self.class && o.n == self.n)
|
1221
1342
|
end
|
1222
1343
|
|
1223
|
-
|
1224
|
-
|
1225
|
-
|
1226
|
-
def parse_quote # TODO: remove / rewrite
|
1227
|
-
beg, nnd, short_hand, c = nil, nil, false, nil
|
1228
|
-
|
1229
|
-
if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
|
1230
|
-
rb_compile_error "unknown type of %string" if ss.matched_size == 2
|
1231
|
-
c, beg, short_hand = matched, ss.getch, false
|
1232
|
-
else # Short-hand (e.g. %{, %., %!, etc)
|
1233
|
-
c, beg, short_hand = 'Q', ss.getch, true
|
1234
|
-
end
|
1235
|
-
|
1236
|
-
if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
|
1237
|
-
rb_compile_error "unterminated quoted string meets end of file"
|
1238
|
-
end
|
1239
|
-
|
1240
|
-
# Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
|
1241
|
-
nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
|
1242
|
-
nnd, beg = beg, "\0" if nnd.nil?
|
1243
|
-
|
1244
|
-
token_type, text = nil, "%#{c}#{beg}"
|
1245
|
-
token_type, string_type = case c
|
1246
|
-
when 'Q' then
|
1247
|
-
ch = short_hand ? nnd : c + beg
|
1248
|
-
text = "%#{ch}"
|
1249
|
-
[:tSTRING_BEG, STR_DQUOTE]
|
1250
|
-
when 'q' then
|
1251
|
-
[:tSTRING_BEG, STR_SQUOTE]
|
1252
|
-
when 'W' then
|
1253
|
-
eat_whitespace
|
1254
|
-
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
1255
|
-
when 'w' then
|
1256
|
-
eat_whitespace
|
1257
|
-
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
1258
|
-
when 'x' then
|
1259
|
-
[:tXSTRING_BEG, STR_XQUOTE]
|
1260
|
-
when 'r' then
|
1261
|
-
[:tREGEXP_BEG, STR_REGEXP]
|
1262
|
-
when 's' then
|
1263
|
-
self.lex_state = :expr_fname
|
1264
|
-
[:tSYMBEG, STR_SSYM]
|
1265
|
-
when 'I' then
|
1266
|
-
eat_whitespace
|
1267
|
-
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
1268
|
-
when 'i' then
|
1269
|
-
eat_whitespace
|
1270
|
-
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
1271
|
-
end
|
1272
|
-
|
1273
|
-
rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
|
1274
|
-
token_type.nil?
|
1275
|
-
|
1276
|
-
raise "huh" unless string_type
|
1277
|
-
|
1278
|
-
string string_type, nnd, beg
|
1279
|
-
|
1280
|
-
return token_type, text
|
1281
|
-
end
|
1282
|
-
|
1283
|
-
def parse_string quote # TODO: rewrite / remove
|
1284
|
-
_, string_type, term, open = quote
|
1285
|
-
|
1286
|
-
space = false # FIX: remove these
|
1287
|
-
func = string_type
|
1288
|
-
paren = open
|
1289
|
-
term_re = @@regexp_cache[term]
|
1290
|
-
|
1291
|
-
qwords = (func & STR_FUNC_QWORDS) != 0
|
1292
|
-
regexp = (func & STR_FUNC_REGEXP) != 0
|
1293
|
-
expand = (func & STR_FUNC_EXPAND) != 0
|
1294
|
-
|
1295
|
-
unless func then # nil'ed from qwords below. *sigh*
|
1296
|
-
return :tSTRING_END, nil
|
1344
|
+
def =~ v
|
1345
|
+
(self.n & v.n) != 0
|
1297
1346
|
end
|
1298
1347
|
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
quote[1] = nil
|
1304
|
-
return :tSPACE, nil
|
1305
|
-
elsif regexp then
|
1306
|
-
return :tREGEXP_END, self.regx_options
|
1307
|
-
else
|
1308
|
-
return :tSTRING_END, term
|
1309
|
-
end
|
1348
|
+
def | v
|
1349
|
+
raise ArgumentError, "Incompatible State: %p vs %p" % [self, v] unless
|
1350
|
+
self.names == v.names
|
1351
|
+
self.class.new(self.n | v.n, self.names)
|
1310
1352
|
end
|
1311
1353
|
|
1312
|
-
|
1354
|
+
def inspect
|
1355
|
+
return "Value(0)" if n.zero? # HACK?
|
1313
1356
|
|
1314
|
-
|
1315
|
-
|
1316
|
-
|
1317
|
-
|
1318
|
-
when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
|
1319
|
-
# TODO: !ISASCII
|
1320
|
-
# ?! see parser_peek_variable_name
|
1321
|
-
return :tSTRING_DVAR, nil
|
1322
|
-
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
1323
|
-
# TODO: !ISASCII
|
1324
|
-
return :tSTRING_DVAR, nil
|
1325
|
-
when scan(/#[{]/) then
|
1326
|
-
self.command_start = true
|
1327
|
-
return :tSTRING_DBEG, nil
|
1328
|
-
when scan(/#/) then
|
1329
|
-
string_buffer << '#'
|
1330
|
-
end
|
1357
|
+
names.map { |v, k| k if self =~ v }.
|
1358
|
+
compact.
|
1359
|
+
join("|").
|
1360
|
+
gsub(/(?:EXPR_|STR_(?:FUNC_)?)/, "")
|
1331
1361
|
end
|
1332
1362
|
|
1333
|
-
|
1334
|
-
|
1363
|
+
alias to_s inspect
|
1364
|
+
|
1365
|
+
module Values
|
1366
|
+
expr_names = {}
|
1367
|
+
|
1368
|
+
EXPR_NONE = State.new 0x0, expr_names
|
1369
|
+
EXPR_BEG = State.new 0x1, expr_names
|
1370
|
+
EXPR_END = State.new 0x2, expr_names
|
1371
|
+
EXPR_ENDARG = State.new 0x4, expr_names
|
1372
|
+
EXPR_ENDFN = State.new 0x8, expr_names
|
1373
|
+
EXPR_ARG = State.new 0x10, expr_names
|
1374
|
+
EXPR_CMDARG = State.new 0x20, expr_names
|
1375
|
+
EXPR_MID = State.new 0x40, expr_names
|
1376
|
+
EXPR_FNAME = State.new 0x80, expr_names
|
1377
|
+
EXPR_DOT = State.new 0x100, expr_names
|
1378
|
+
EXPR_CLASS = State.new 0x200, expr_names
|
1379
|
+
EXPR_LABEL = State.new 0x400, expr_names
|
1380
|
+
EXPR_LABELED = State.new 0x800, expr_names
|
1381
|
+
EXPR_FITEM = State.new 0x1000, expr_names
|
1382
|
+
|
1383
|
+
EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
|
1384
|
+
EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
|
1385
|
+
EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
|
1386
|
+
|
1387
|
+
# extra fake lex_state names to make things a bit cleaner
|
1388
|
+
|
1389
|
+
EXPR_LAB = EXPR_ARG|EXPR_LABELED
|
1390
|
+
EXPR_LIT = EXPR_END|EXPR_ENDARG
|
1391
|
+
EXPR_PAR = EXPR_BEG|EXPR_LABEL
|
1392
|
+
EXPR_PAD = EXPR_BEG|EXPR_LABELED
|
1393
|
+
|
1394
|
+
EXPR_NUM = EXPR_LIT
|
1395
|
+
|
1396
|
+
expr_names.merge!(EXPR_NONE => "EXPR_NONE",
|
1397
|
+
EXPR_BEG => "EXPR_BEG",
|
1398
|
+
EXPR_END => "EXPR_END",
|
1399
|
+
EXPR_ENDARG => "EXPR_ENDARG",
|
1400
|
+
EXPR_ENDFN => "EXPR_ENDFN",
|
1401
|
+
EXPR_ARG => "EXPR_ARG",
|
1402
|
+
EXPR_CMDARG => "EXPR_CMDARG",
|
1403
|
+
EXPR_MID => "EXPR_MID",
|
1404
|
+
EXPR_FNAME => "EXPR_FNAME",
|
1405
|
+
EXPR_DOT => "EXPR_DOT",
|
1406
|
+
EXPR_CLASS => "EXPR_CLASS",
|
1407
|
+
EXPR_LABEL => "EXPR_LABEL",
|
1408
|
+
EXPR_LABELED => "EXPR_LABELED",
|
1409
|
+
EXPR_FITEM => "EXPR_FITEM")
|
1410
|
+
|
1411
|
+
# ruby constants for strings
|
1412
|
+
|
1413
|
+
str_func_names = {}
|
1414
|
+
|
1415
|
+
STR_FUNC_BORING = State.new 0x00, str_func_names
|
1416
|
+
STR_FUNC_ESCAPE = State.new 0x01, str_func_names
|
1417
|
+
STR_FUNC_EXPAND = State.new 0x02, str_func_names
|
1418
|
+
STR_FUNC_REGEXP = State.new 0x04, str_func_names
|
1419
|
+
STR_FUNC_QWORDS = State.new 0x08, str_func_names
|
1420
|
+
STR_FUNC_SYMBOL = State.new 0x10, str_func_names
|
1421
|
+
STR_FUNC_INDENT = State.new 0x20, str_func_names # <<-HEREDOC
|
1422
|
+
STR_FUNC_LABEL = State.new 0x40, str_func_names
|
1423
|
+
STR_FUNC_LIST = State.new 0x4000, str_func_names
|
1424
|
+
STR_FUNC_TERM = State.new 0x8000, str_func_names
|
1425
|
+
STR_FUNC_ICNTNT = State.new 0x10000, str_func_names # <<~HEREDOC -- TODO: remove?
|
1426
|
+
|
1427
|
+
# TODO: check parser25.y on how they do STR_FUNC_INDENT
|
1428
|
+
|
1429
|
+
STR_SQUOTE = STR_FUNC_BORING
|
1430
|
+
STR_DQUOTE = STR_FUNC_EXPAND
|
1431
|
+
STR_XQUOTE = STR_FUNC_EXPAND
|
1432
|
+
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
|
1433
|
+
STR_SWORD = STR_FUNC_QWORDS | STR_FUNC_LIST
|
1434
|
+
STR_DWORD = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
|
1435
|
+
STR_SSYM = STR_FUNC_SYMBOL
|
1436
|
+
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
1437
|
+
|
1438
|
+
str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
|
1439
|
+
STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
|
1440
|
+
STR_FUNC_REGEXP => "STR_FUNC_REGEXP",
|
1441
|
+
STR_FUNC_QWORDS => "STR_FUNC_QWORDS",
|
1442
|
+
STR_FUNC_SYMBOL => "STR_FUNC_SYMBOL",
|
1443
|
+
STR_FUNC_INDENT => "STR_FUNC_INDENT",
|
1444
|
+
STR_FUNC_LABEL => "STR_FUNC_LABEL",
|
1445
|
+
STR_FUNC_LIST => "STR_FUNC_LIST",
|
1446
|
+
STR_FUNC_TERM => "STR_FUNC_TERM",
|
1447
|
+
STR_FUNC_ICNTNT => "STR_FUNC_ICNTNT",
|
1448
|
+
STR_SQUOTE => "STR_SQUOTE")
|
1335
1449
|
end
|
1336
1450
|
|
1337
|
-
|
1451
|
+
include Values
|
1338
1452
|
end
|
1453
|
+
|
1454
|
+
include State::Values
|
1339
1455
|
end
|
1340
1456
|
|
1341
1457
|
require "ruby_lexer.rex"
|
1342
1458
|
|
1343
1459
|
if ENV["RP_LINENO_DEBUG"] then
|
1344
1460
|
class RubyLexer
|
1345
|
-
alias :old_lineno= :lineno=
|
1346
|
-
|
1347
1461
|
def d o
|
1348
1462
|
$stderr.puts o.inspect
|
1349
1463
|
end
|
1350
1464
|
|
1465
|
+
alias old_lineno= lineno=
|
1466
|
+
|
1351
1467
|
def lineno= n
|
1352
1468
|
self.old_lineno= n
|
1353
1469
|
where = caller.first.split(/:/).first(2).join(":")
|
1354
|
-
d :lineno => [n, where, ss && ss.rest[0,40]]
|
1470
|
+
d :lineno => [n, where, ss && ss.rest[0, 40]]
|
1355
1471
|
end
|
1356
1472
|
end
|
1357
1473
|
end
|