ruby_parser 3.13.1 → 3.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/.autotest +18 -29
- data/History.rdoc +38 -0
- data/README.rdoc +3 -3
- data/Rakefile +10 -13
- data/bin/ruby_parse +3 -1
- data/lib/ruby20_parser.rb +3042 -2866
- data/lib/ruby20_parser.y +391 -247
- data/lib/ruby21_parser.rb +3088 -2916
- data/lib/ruby21_parser.y +399 -254
- data/lib/ruby22_parser.rb +3118 -2937
- data/lib/ruby22_parser.y +400 -255
- data/lib/ruby23_parser.rb +3119 -2940
- data/lib/ruby23_parser.y +400 -255
- data/lib/ruby24_parser.rb +3089 -2905
- data/lib/ruby24_parser.y +404 -257
- data/lib/ruby25_parser.rb +3089 -2905
- data/lib/ruby25_parser.y +404 -257
- data/lib/ruby26_parser.rb +3095 -2909
- data/lib/ruby26_parser.y +410 -261
- data/lib/ruby_lexer.rb +424 -432
- data/lib/ruby_lexer.rex.rb +1 -1
- data/lib/ruby_parser.rb +27 -27
- data/lib/ruby_parser.yy +412 -262
- data/lib/ruby_parser_extras.rb +627 -406
- data/test/test_ruby_lexer.rb +1148 -1093
- data/test/test_ruby_parser.rb +2259 -1915
- data/test/test_ruby_parser_extras.rb +39 -4
- data/tools/munge.rb +1 -1
- data/tools/ripper.rb +13 -2
- metadata +8 -8
- metadata.gz.sig +0 -0
data/lib/ruby_lexer.rb
CHANGED
@@ -4,135 +4,9 @@
|
|
4
4
|
$DEBUG = true if ENV["DEBUG"]
|
5
5
|
|
6
6
|
class RubyLexer
|
7
|
-
|
8
7
|
# :stopdoc:
|
9
|
-
HAS_ENC = "".respond_to? :encoding
|
10
|
-
|
11
|
-
IDENT_CHAR = if HAS_ENC then
|
12
|
-
/[\w\u0080-\u{10ffff}]/u
|
13
|
-
else
|
14
|
-
/[\w\x80-\xFF]/n
|
15
|
-
end
|
16
|
-
|
17
8
|
EOF = :eof_haha!
|
18
9
|
|
19
|
-
# ruby constants for strings (should this be moved somewhere else?)
|
20
|
-
|
21
|
-
STR_FUNC_BORING = 0x00
|
22
|
-
STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
|
23
|
-
STR_FUNC_EXPAND = 0x02
|
24
|
-
STR_FUNC_REGEXP = 0x04
|
25
|
-
STR_FUNC_QWORDS = 0x08
|
26
|
-
STR_FUNC_SYMBOL = 0x10
|
27
|
-
STR_FUNC_INDENT = 0x20 # <<-HEREDOC
|
28
|
-
STR_FUNC_ICNTNT = 0x40 # <<~HEREDOC
|
29
|
-
|
30
|
-
STR_SQUOTE = STR_FUNC_BORING
|
31
|
-
STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
|
32
|
-
STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
|
33
|
-
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
|
34
|
-
STR_SSYM = STR_FUNC_SYMBOL
|
35
|
-
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
36
|
-
|
37
|
-
class State
|
38
|
-
attr_accessor :n
|
39
|
-
|
40
|
-
def initialize o
|
41
|
-
raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
|
42
|
-
|
43
|
-
self.n = o
|
44
|
-
end
|
45
|
-
|
46
|
-
def == o
|
47
|
-
o.class == self.class && o.n == self.n
|
48
|
-
end
|
49
|
-
|
50
|
-
def =~ v
|
51
|
-
(self.n & v.n) != 0
|
52
|
-
end
|
53
|
-
|
54
|
-
def | v
|
55
|
-
self.class.new(self.n | v.n)
|
56
|
-
end
|
57
|
-
|
58
|
-
def inspect
|
59
|
-
return "EXPR_NONE" if n.zero?
|
60
|
-
NAMES.map { |v,k| k if self =~ v }.compact.join "|"
|
61
|
-
end
|
62
|
-
|
63
|
-
module Values
|
64
|
-
EXPR_NONE = State.new 0x0
|
65
|
-
EXPR_BEG = State.new 0x1
|
66
|
-
EXPR_END = State.new 0x2
|
67
|
-
EXPR_ENDARG = State.new 0x4
|
68
|
-
EXPR_ENDFN = State.new 0x8
|
69
|
-
EXPR_ARG = State.new 0x10
|
70
|
-
EXPR_CMDARG = State.new 0x20
|
71
|
-
EXPR_MID = State.new 0x40
|
72
|
-
EXPR_FNAME = State.new 0x80
|
73
|
-
EXPR_DOT = State.new 0x100
|
74
|
-
EXPR_CLASS = State.new 0x200
|
75
|
-
EXPR_LABEL = State.new 0x400
|
76
|
-
EXPR_LABELED = State.new 0x800
|
77
|
-
EXPR_FITEM = State.new 0x1000
|
78
|
-
|
79
|
-
EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
|
80
|
-
EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
|
81
|
-
EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
|
82
|
-
|
83
|
-
# extra fake lex_state names to make things a bit cleaner
|
84
|
-
|
85
|
-
EXPR_LAB = EXPR_ARG|EXPR_LABELED
|
86
|
-
EXPR_NUM = EXPR_END|EXPR_ENDARG
|
87
|
-
EXPR_PAR = EXPR_BEG|EXPR_LABEL
|
88
|
-
EXPR_PAD = EXPR_BEG|EXPR_LABELED
|
89
|
-
end
|
90
|
-
|
91
|
-
include Values
|
92
|
-
|
93
|
-
NAMES = {
|
94
|
-
EXPR_NONE => "EXPR_NONE",
|
95
|
-
EXPR_BEG => "EXPR_BEG",
|
96
|
-
EXPR_END => "EXPR_END",
|
97
|
-
EXPR_ENDARG => "EXPR_ENDARG",
|
98
|
-
EXPR_ENDFN => "EXPR_ENDFN",
|
99
|
-
EXPR_ARG => "EXPR_ARG",
|
100
|
-
EXPR_CMDARG => "EXPR_CMDARG",
|
101
|
-
EXPR_MID => "EXPR_MID",
|
102
|
-
EXPR_FNAME => "EXPR_FNAME",
|
103
|
-
EXPR_DOT => "EXPR_DOT",
|
104
|
-
EXPR_CLASS => "EXPR_CLASS",
|
105
|
-
EXPR_LABEL => "EXPR_LABEL",
|
106
|
-
EXPR_LABELED => "EXPR_LABELED",
|
107
|
-
EXPR_FITEM => "EXPR_FITEM",
|
108
|
-
}
|
109
|
-
end
|
110
|
-
|
111
|
-
include State::Values
|
112
|
-
|
113
|
-
if $DEBUG then
|
114
|
-
def lex_state= o
|
115
|
-
return if @lex_state == o
|
116
|
-
raise ArgumentError, "bad state: %p" % [o] unless State === o
|
117
|
-
if ENV["V"] then
|
118
|
-
c = caller[0]
|
119
|
-
c = caller[1] if c =~ /\b(expr_)?result\b/
|
120
|
-
c = caller[2] if c =~ /\b(expr_)?result\b/
|
121
|
-
warn "lex_state: %p -> %p from %s" % [lex_state, o, c.clean_caller]
|
122
|
-
else
|
123
|
-
warn "lex_state: %p -> %p" % [lex_state, o]
|
124
|
-
end
|
125
|
-
@lex_state = o
|
126
|
-
end
|
127
|
-
else
|
128
|
-
def lex_state= o
|
129
|
-
raise ArgumentError, "bad state: %p" % [o] unless State === o
|
130
|
-
@lex_state = o
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
attr_reader :lex_state
|
135
|
-
|
136
10
|
ESCAPES = {
|
137
11
|
"a" => "\007",
|
138
12
|
"b" => "\010",
|
@@ -149,6 +23,14 @@ class RubyLexer
|
|
149
23
|
"c\?" => 127.chr,
|
150
24
|
}
|
151
25
|
|
26
|
+
HAS_ENC = "".respond_to? :encoding
|
27
|
+
|
28
|
+
IDENT_CHAR = if HAS_ENC then
|
29
|
+
/[\w\u0080-\u{10ffff}]/u
|
30
|
+
else
|
31
|
+
/[\w\x80-\xFF]/n
|
32
|
+
end
|
33
|
+
|
152
34
|
TOKENS = {
|
153
35
|
"!" => :tBANG,
|
154
36
|
"!=" => :tNEQ,
|
@@ -165,13 +47,26 @@ class RubyLexer
|
|
165
47
|
"->" => :tLAMBDA,
|
166
48
|
}
|
167
49
|
|
168
|
-
|
169
|
-
|
170
|
-
@@regexp_cache = Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }
|
50
|
+
@@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
|
171
51
|
@@regexp_cache[nil] = nil
|
172
52
|
|
53
|
+
if $DEBUG then
|
54
|
+
attr_reader :lex_state
|
55
|
+
|
56
|
+
def lex_state= o
|
57
|
+
return if @lex_state == o
|
58
|
+
raise ArgumentError, "bad state: %p" % [o] unless State === o
|
59
|
+
|
60
|
+
warn "lex_state: %p -> %p" % [lex_state, o]
|
61
|
+
|
62
|
+
@lex_state = o
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
173
66
|
# :startdoc:
|
174
67
|
|
68
|
+
attr_accessor :lex_state unless $DEBUG
|
69
|
+
|
175
70
|
attr_accessor :lineno # we're bypassing oedipus' lineno handling.
|
176
71
|
attr_accessor :brace_nest
|
177
72
|
attr_accessor :cmdarg
|
@@ -209,7 +104,7 @@ class RubyLexer
|
|
209
104
|
end
|
210
105
|
|
211
106
|
def arg_ambiguous
|
212
|
-
self.warning
|
107
|
+
self.warning "Ambiguous first argument. make sure."
|
213
108
|
end
|
214
109
|
|
215
110
|
def arg_state
|
@@ -219,7 +114,12 @@ class RubyLexer
|
|
219
114
|
def beginning_of_line?
|
220
115
|
ss.bol?
|
221
116
|
end
|
222
|
-
|
117
|
+
|
118
|
+
alias bol? beginning_of_line? # to make .rex file more readable
|
119
|
+
|
120
|
+
def check re
|
121
|
+
ss.check re
|
122
|
+
end
|
223
123
|
|
224
124
|
def comments # TODO: remove this... maybe comment_string + attr_accessor
|
225
125
|
c = @comments.join
|
@@ -227,6 +127,12 @@ class RubyLexer
|
|
227
127
|
c
|
228
128
|
end
|
229
129
|
|
130
|
+
def eat_whitespace
|
131
|
+
r = scan(/\s+/)
|
132
|
+
self.extra_lineno += r.count("\n") if r
|
133
|
+
r
|
134
|
+
end
|
135
|
+
|
230
136
|
def end_of_stream?
|
231
137
|
ss.eos?
|
232
138
|
end
|
@@ -245,13 +151,18 @@ class RubyLexer
|
|
245
151
|
result EXPR_BEG, token, text
|
246
152
|
end
|
247
153
|
|
154
|
+
def fixup_lineno extra = 0
|
155
|
+
self.lineno += self.extra_lineno + extra
|
156
|
+
self.extra_lineno = 0
|
157
|
+
end
|
158
|
+
|
248
159
|
def heredoc here # TODO: rewrite / remove
|
249
160
|
_, eos, func, last_line = here
|
250
161
|
|
251
|
-
indent =
|
252
|
-
|
253
|
-
|
254
|
-
eos_re = /#{indent}#{Regexp.escape eos}(
|
162
|
+
indent = func =~ STR_FUNC_INDENT ? "[ \t]*" : nil
|
163
|
+
expand = func =~ STR_FUNC_EXPAND
|
164
|
+
eol = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n"
|
165
|
+
eos_re = /#{indent}#{Regexp.escape eos}(#{eol}|\z)/
|
255
166
|
err_msg = "can't match #{eos_re.inspect} anywhere in "
|
256
167
|
|
257
168
|
rb_compile_error err_msg if end_of_stream?
|
@@ -259,7 +170,7 @@ class RubyLexer
|
|
259
170
|
if beginning_of_line? && scan(eos_re) then
|
260
171
|
self.lineno += 1
|
261
172
|
ss.unread_many last_line # TODO: figure out how to remove this
|
262
|
-
return :tSTRING_END, eos
|
173
|
+
return :tSTRING_END, [eos, func] # TODO: calculate squiggle width at lex?
|
263
174
|
end
|
264
175
|
|
265
176
|
self.string_buffer = []
|
@@ -272,17 +183,17 @@ class RubyLexer
|
|
272
183
|
when scan(/#[{]/) then
|
273
184
|
return :tSTRING_DBEG, matched
|
274
185
|
when scan(/#/) then
|
275
|
-
string_buffer <<
|
186
|
+
string_buffer << "#"
|
276
187
|
end
|
277
188
|
|
278
189
|
begin
|
279
|
-
c = tokadd_string func,
|
190
|
+
c = tokadd_string func, eol, nil
|
280
191
|
|
281
192
|
rb_compile_error err_msg if
|
282
193
|
c == RubyLexer::EOF
|
283
194
|
|
284
|
-
if c !=
|
285
|
-
return :tSTRING_CONTENT, string_buffer.join
|
195
|
+
if c != eol then
|
196
|
+
return :tSTRING_CONTENT, string_buffer.join
|
286
197
|
else
|
287
198
|
string_buffer << scan(/\n/)
|
288
199
|
end
|
@@ -300,67 +211,24 @@ class RubyLexer
|
|
300
211
|
|
301
212
|
string_content = begin
|
302
213
|
s = string_buffer.join
|
303
|
-
s.
|
304
|
-
rescue ArgumentError
|
305
|
-
s.b.delete("\r").force_encoding Encoding::UTF_8
|
214
|
+
s.b.force_encoding Encoding::UTF_8
|
306
215
|
end
|
307
216
|
|
308
|
-
string_content = heredoc_dedent(string_content) if content_indent && ruby23plus?
|
309
|
-
|
310
217
|
return :tSTRING_CONTENT, string_content
|
311
218
|
end
|
312
219
|
|
313
|
-
def heredoc_dedent(string_content)
|
314
|
-
width = string_content.scan(/^[ \t]*(?=\S)/).map do |whitespace|
|
315
|
-
heredoc_whitespace_indent_size whitespace
|
316
|
-
end.min || 0
|
317
|
-
|
318
|
-
string_content.split("\n", -1).map do |line|
|
319
|
-
dedent_string line, width
|
320
|
-
end.join "\n"
|
321
|
-
end
|
322
|
-
|
323
|
-
def dedent_string(string, width)
|
324
|
-
characters_skipped = 0
|
325
|
-
indentation_skipped = 0
|
326
|
-
|
327
|
-
string.chars.each do |char|
|
328
|
-
break if indentation_skipped >= width
|
329
|
-
if char == ' '
|
330
|
-
characters_skipped += 1
|
331
|
-
indentation_skipped += 1
|
332
|
-
elsif char == "\t"
|
333
|
-
proposed = TAB_WIDTH * (indentation_skipped / TAB_WIDTH + 1)
|
334
|
-
break if (proposed > width)
|
335
|
-
characters_skipped += 1
|
336
|
-
indentation_skipped = proposed
|
337
|
-
end
|
338
|
-
end
|
339
|
-
string[characters_skipped..-1]
|
340
|
-
end
|
341
|
-
|
342
|
-
def heredoc_whitespace_indent_size(whitespace)
|
343
|
-
whitespace.chars.inject 0 do |size, char|
|
344
|
-
if char == "\t"
|
345
|
-
size + TAB_WIDTH
|
346
|
-
else
|
347
|
-
size + 1
|
348
|
-
end
|
349
|
-
end
|
350
|
-
end
|
351
|
-
|
352
220
|
def heredoc_identifier # TODO: remove / rewrite
|
353
221
|
term, func = nil, STR_FUNC_BORING
|
354
222
|
self.string_buffer = []
|
355
223
|
|
356
|
-
heredoc_indent_mods =
|
224
|
+
heredoc_indent_mods = "-"
|
357
225
|
heredoc_indent_mods += '\~' if ruby23plus?
|
358
226
|
|
359
227
|
case
|
360
228
|
when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
|
361
229
|
term = ss[2]
|
362
|
-
func |= STR_FUNC_INDENT unless ss[1].empty?
|
363
|
-
func |= STR_FUNC_ICNTNT if ss[1] ==
|
230
|
+
func |= STR_FUNC_INDENT unless ss[1].empty? # TODO: this seems wrong
|
231
|
+
func |= STR_FUNC_ICNTNT if ss[1] == "~"
|
364
232
|
func |= case term
|
365
233
|
when "\'" then
|
366
234
|
STR_SQUOTE
|
@@ -377,7 +245,7 @@ class RubyLexer
|
|
377
245
|
func |= STR_DQUOTE
|
378
246
|
unless ss[1].empty? then
|
379
247
|
func |= STR_FUNC_INDENT
|
380
|
-
func |= STR_FUNC_ICNTNT if ss[1] ==
|
248
|
+
func |= STR_FUNC_ICNTNT if ss[1] == "~"
|
381
249
|
end
|
382
250
|
string_buffer << ss[2]
|
383
251
|
else
|
@@ -393,7 +261,7 @@ class RubyLexer
|
|
393
261
|
|
394
262
|
self.lex_strterm = [:heredoc, string_buffer.join, func, line]
|
395
263
|
|
396
|
-
if term ==
|
264
|
+
if term == "`" then
|
397
265
|
result nil, :tXSTRING_BEG, "`"
|
398
266
|
else
|
399
267
|
result nil, :tSTRING_BEG, "\""
|
@@ -404,26 +272,26 @@ class RubyLexer
|
|
404
272
|
lex_state =~ EXPR_FNAME
|
405
273
|
end
|
406
274
|
|
407
|
-
def is_after_operator?
|
408
|
-
lex_state =~ EXPR_FNAME|EXPR_DOT
|
409
|
-
end
|
410
|
-
|
411
275
|
def int_with_base base
|
412
276
|
rb_compile_error "Invalid numeric format" if matched =~ /__/
|
413
277
|
|
414
278
|
text = matched
|
415
279
|
case
|
416
|
-
when text.end_with?(
|
280
|
+
when text.end_with?("ri")
|
417
281
|
return result(EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base))))
|
418
|
-
when text.end_with?(
|
282
|
+
when text.end_with?("r")
|
419
283
|
return result(EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base)))
|
420
|
-
when text.end_with?(
|
284
|
+
when text.end_with?("i")
|
421
285
|
return result(EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base)))
|
422
286
|
else
|
423
287
|
return result(EXPR_NUM, :tINTEGER, text.to_i(base))
|
424
288
|
end
|
425
289
|
end
|
426
290
|
|
291
|
+
def is_after_operator?
|
292
|
+
lex_state =~ EXPR_FNAME|EXPR_DOT
|
293
|
+
end
|
294
|
+
|
427
295
|
def is_arg?
|
428
296
|
lex_state =~ EXPR_ARG_ANY
|
429
297
|
end
|
@@ -436,15 +304,6 @@ class RubyLexer
|
|
436
304
|
lex_state =~ EXPR_END_ANY
|
437
305
|
end
|
438
306
|
|
439
|
-
def lvar_defined? id
|
440
|
-
# TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
|
441
|
-
self.parser.env[id.to_sym] == :lvar
|
442
|
-
end
|
443
|
-
|
444
|
-
def ruby22_label?
|
445
|
-
ruby22plus? and is_label_possible?
|
446
|
-
end
|
447
|
-
|
448
307
|
def is_label_possible?
|
449
308
|
(lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
|
450
309
|
end
|
@@ -461,6 +320,11 @@ class RubyLexer
|
|
461
320
|
lpar_beg && lpar_beg == paren_nest
|
462
321
|
end
|
463
322
|
|
323
|
+
def lvar_defined? id
|
324
|
+
# TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
|
325
|
+
self.parser.env[id.to_sym] == :lvar
|
326
|
+
end
|
327
|
+
|
464
328
|
def matched
|
465
329
|
ss.matched
|
466
330
|
end
|
@@ -469,6 +333,134 @@ class RubyLexer
|
|
469
333
|
not is_end?
|
470
334
|
end
|
471
335
|
|
336
|
+
def parse_quote # TODO: remove / rewrite
|
337
|
+
beg, nnd, short_hand, c = nil, nil, false, nil
|
338
|
+
|
339
|
+
if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
|
340
|
+
rb_compile_error "unknown type of %string" if ss.matched_size == 2
|
341
|
+
c, beg, short_hand = matched, ss.getch, false
|
342
|
+
else # Short-hand (e.g. %{, %., %!, etc)
|
343
|
+
c, beg, short_hand = "Q", ss.getch, true
|
344
|
+
end
|
345
|
+
|
346
|
+
if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
|
347
|
+
rb_compile_error "unterminated quoted string meets end of file"
|
348
|
+
end
|
349
|
+
|
350
|
+
# Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
|
351
|
+
nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
|
352
|
+
nnd, beg = beg, "\0" if nnd.nil?
|
353
|
+
|
354
|
+
token_type, text = nil, "%#{c}#{beg}"
|
355
|
+
token_type, string_type = case c
|
356
|
+
when "Q" then
|
357
|
+
ch = short_hand ? nnd : c + beg
|
358
|
+
text = "%#{ch}"
|
359
|
+
[:tSTRING_BEG, STR_DQUOTE]
|
360
|
+
when "q" then
|
361
|
+
[:tSTRING_BEG, STR_SQUOTE]
|
362
|
+
when "W" then
|
363
|
+
eat_whitespace
|
364
|
+
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
365
|
+
when "w" then
|
366
|
+
eat_whitespace
|
367
|
+
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
368
|
+
when "x" then
|
369
|
+
[:tXSTRING_BEG, STR_XQUOTE]
|
370
|
+
when "r" then
|
371
|
+
[:tREGEXP_BEG, STR_REGEXP]
|
372
|
+
when "s" then
|
373
|
+
self.lex_state = EXPR_FNAME
|
374
|
+
[:tSYMBEG, STR_SSYM]
|
375
|
+
when "I" then
|
376
|
+
eat_whitespace
|
377
|
+
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
378
|
+
when "i" then
|
379
|
+
eat_whitespace
|
380
|
+
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
381
|
+
end
|
382
|
+
|
383
|
+
rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
|
384
|
+
token_type.nil?
|
385
|
+
|
386
|
+
raise "huh" unless string_type
|
387
|
+
|
388
|
+
string string_type, nnd, beg
|
389
|
+
|
390
|
+
return token_type, text
|
391
|
+
end
|
392
|
+
|
393
|
+
def parse_string quote # TODO: rewrite / remove
|
394
|
+
_, string_type, term, open = quote
|
395
|
+
|
396
|
+
space = false # FIX: remove these
|
397
|
+
func = string_type
|
398
|
+
paren = open
|
399
|
+
term_re = @@regexp_cache[term]
|
400
|
+
|
401
|
+
qwords = func =~ STR_FUNC_QWORDS
|
402
|
+
regexp = func =~ STR_FUNC_REGEXP
|
403
|
+
expand = func =~ STR_FUNC_EXPAND
|
404
|
+
|
405
|
+
unless func then # nil'ed from qwords below. *sigh*
|
406
|
+
return :tSTRING_END, nil
|
407
|
+
end
|
408
|
+
|
409
|
+
space = true if qwords and eat_whitespace
|
410
|
+
|
411
|
+
if self.string_nest == 0 && scan(/#{term_re}/) then
|
412
|
+
if qwords then
|
413
|
+
quote[1] = nil
|
414
|
+
return :tSPACE, nil
|
415
|
+
elsif regexp then
|
416
|
+
return :tREGEXP_END, self.regx_options
|
417
|
+
else
|
418
|
+
return :tSTRING_END, term
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
return :tSPACE, nil if space
|
423
|
+
|
424
|
+
self.string_buffer = []
|
425
|
+
|
426
|
+
if expand
|
427
|
+
case
|
428
|
+
when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
|
429
|
+
# TODO: !ISASCII
|
430
|
+
# ?! see parser_peek_variable_name
|
431
|
+
return :tSTRING_DVAR, nil
|
432
|
+
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
433
|
+
# TODO: !ISASCII
|
434
|
+
return :tSTRING_DVAR, nil
|
435
|
+
when scan(/#[{]/) then
|
436
|
+
self.command_start = true
|
437
|
+
return :tSTRING_DBEG, nil
|
438
|
+
when scan(/#/) then
|
439
|
+
string_buffer << "#"
|
440
|
+
end
|
441
|
+
end
|
442
|
+
|
443
|
+
if tokadd_string(func, term, paren) == RubyLexer::EOF then
|
444
|
+
if func =~ STR_FUNC_REGEXP then
|
445
|
+
rb_compile_error "unterminated regexp meets end of file"
|
446
|
+
else
|
447
|
+
rb_compile_error "unterminated string meets end of file"
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
return :tSTRING_CONTENT, string_buffer.join
|
452
|
+
end
|
453
|
+
|
454
|
+
def possibly_escape_string text, check
|
455
|
+
content = match[1]
|
456
|
+
|
457
|
+
if text =~ check then
|
458
|
+
content.gsub(ESC) { unescape $1 }
|
459
|
+
else
|
460
|
+
content.gsub(/\\\\/, "\\").gsub(/\\'/, "'")
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
472
464
|
def process_amper text
|
473
465
|
token = if is_arg? && space_seen && !check(/\s/) then
|
474
466
|
warning("`&' interpreted as argument prefix")
|
@@ -510,44 +502,20 @@ class RubyLexer
|
|
510
502
|
case matched
|
511
503
|
when "}" then
|
512
504
|
self.brace_nest -= 1
|
513
|
-
self.lex_state = EXPR_ENDARG
|
505
|
+
self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
|
514
506
|
|
515
507
|
return :tSTRING_DEND, matched if brace_nest < 0
|
516
508
|
return :tRCURLY, matched
|
517
509
|
when "]" then
|
518
510
|
self.paren_nest -= 1
|
519
|
-
self.lex_state = EXPR_ENDARG
|
511
|
+
self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
|
520
512
|
return :tRBRACK, matched
|
521
513
|
when ")" then
|
522
514
|
self.paren_nest -= 1
|
523
|
-
self.lex_state = EXPR_ENDFN
|
524
|
-
return :tRPAREN, matched
|
525
|
-
else
|
526
|
-
raise "Unknown bracing: #{matched.inspect}"
|
527
|
-
end
|
528
|
-
end
|
529
|
-
|
530
|
-
def process_colon1 text
|
531
|
-
# ?: / then / when
|
532
|
-
if is_end? || check(/\s/) then
|
533
|
-
return result EXPR_BEG, :tCOLON, text
|
534
|
-
end
|
535
|
-
|
536
|
-
case
|
537
|
-
when scan(/\'/) then
|
538
|
-
string STR_SSYM
|
539
|
-
when scan(/\"/) then
|
540
|
-
string STR_DSYM
|
541
|
-
end
|
542
|
-
|
543
|
-
result EXPR_FNAME, :tSYMBEG, text
|
544
|
-
end
|
545
|
-
|
546
|
-
def process_colon2 text
|
547
|
-
if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
|
548
|
-
result EXPR_BEG, :tCOLON3, text
|
515
|
+
self.lex_state = EXPR_ENDFN
|
516
|
+
return :tRPAREN, matched
|
549
517
|
else
|
550
|
-
|
518
|
+
raise "Unknown bracing: #{matched.inspect}"
|
551
519
|
end
|
552
520
|
end
|
553
521
|
|
@@ -566,7 +534,7 @@ class RubyLexer
|
|
566
534
|
when lex_state =~ EXPR_LABELED then
|
567
535
|
:tLBRACE # hash
|
568
536
|
when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
|
569
|
-
:tLCURLY # block (primary)
|
537
|
+
:tLCURLY # block (primary) "{" in parse.y
|
570
538
|
when lex_state =~ EXPR_ENDARG then
|
571
539
|
:tLBRACE_ARG # block (expr)
|
572
540
|
else
|
@@ -581,15 +549,39 @@ class RubyLexer
|
|
581
549
|
result state, token, text
|
582
550
|
end
|
583
551
|
|
552
|
+
def process_colon1 text
|
553
|
+
# ?: / then / when
|
554
|
+
if is_end? || check(/\s/) then
|
555
|
+
return result EXPR_BEG, :tCOLON, text
|
556
|
+
end
|
557
|
+
|
558
|
+
case
|
559
|
+
when scan(/\'/) then
|
560
|
+
string STR_SSYM
|
561
|
+
when scan(/\"/) then
|
562
|
+
string STR_DSYM
|
563
|
+
end
|
564
|
+
|
565
|
+
result EXPR_FNAME, :tSYMBEG, text
|
566
|
+
end
|
567
|
+
|
568
|
+
def process_colon2 text
|
569
|
+
if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
|
570
|
+
result EXPR_BEG, :tCOLON3, text
|
571
|
+
else
|
572
|
+
result EXPR_DOT, :tCOLON2, text
|
573
|
+
end
|
574
|
+
end
|
575
|
+
|
584
576
|
def process_float text
|
585
577
|
rb_compile_error "Invalid numeric format" if text =~ /__/
|
586
578
|
|
587
579
|
case
|
588
|
-
when text.end_with?(
|
580
|
+
when text.end_with?("ri")
|
589
581
|
return result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
|
590
|
-
when text.end_with?(
|
582
|
+
when text.end_with?("i")
|
591
583
|
return result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
|
592
|
-
when text.end_with?(
|
584
|
+
when text.end_with?("r")
|
593
585
|
return result EXPR_NUM, :tRATIONAL, Rational(text.chop)
|
594
586
|
else
|
595
587
|
return result EXPR_NUM, :tFLOAT, text.to_f
|
@@ -612,6 +604,24 @@ class RubyLexer
|
|
612
604
|
result EXPR_END, tok_id, text
|
613
605
|
end
|
614
606
|
|
607
|
+
def process_label text
|
608
|
+
symbol = possibly_escape_string text, /^"/
|
609
|
+
|
610
|
+
result EXPR_LAB, :tLABEL, [symbol, self.lineno]
|
611
|
+
end
|
612
|
+
|
613
|
+
def process_label_or_string text
|
614
|
+
if @was_label && text =~ /:\Z/ then
|
615
|
+
@was_label = nil
|
616
|
+
return process_label text
|
617
|
+
elsif text =~ /:\Z/ then
|
618
|
+
ss.pos -= 1 # put back ":"
|
619
|
+
text = text[0..-2]
|
620
|
+
end
|
621
|
+
|
622
|
+
result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
|
623
|
+
end
|
624
|
+
|
615
625
|
def process_lchevron text
|
616
626
|
if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
|
617
627
|
!is_end? &&
|
@@ -634,14 +644,14 @@ class RubyLexer
|
|
634
644
|
c = matched
|
635
645
|
hit = false
|
636
646
|
|
637
|
-
if c ==
|
647
|
+
if c == "#" then
|
638
648
|
ss.pos -= 1
|
639
649
|
|
640
650
|
# TODO: handle magic comments
|
641
651
|
while scan(/\s*\#.*(\n+|\z)/) do
|
642
652
|
hit = true
|
643
653
|
self.lineno += matched.lines.to_a.size
|
644
|
-
@comments << matched.gsub(/^ +#/,
|
654
|
+
@comments << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
|
645
655
|
end
|
646
656
|
|
647
657
|
return nil if end_of_stream?
|
@@ -697,7 +707,7 @@ class RubyLexer
|
|
697
707
|
# "an argument list, not a decomposed argument")
|
698
708
|
:tLPAREN2
|
699
709
|
else
|
700
|
-
:tLPAREN2 # plain
|
710
|
+
:tLPAREN2 # plain "(" in parse.y
|
701
711
|
end
|
702
712
|
|
703
713
|
self.paren_nest += 1
|
@@ -735,7 +745,7 @@ class RubyLexer
|
|
735
745
|
|
736
746
|
return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)
|
737
747
|
|
738
|
-
if
|
748
|
+
if is_beg? || (is_arg? && space_seen && !check(/\s/)) then
|
739
749
|
arg_ambiguous if is_arg?
|
740
750
|
|
741
751
|
if check(/\d/) then
|
@@ -760,12 +770,12 @@ class RubyLexer
|
|
760
770
|
|
761
771
|
if check(/\s|\v/) then
|
762
772
|
unless is_arg? then
|
763
|
-
c2 = { " " =>
|
764
|
-
"\n" =>
|
765
|
-
"\t" =>
|
766
|
-
"\v" =>
|
767
|
-
"\r" =>
|
768
|
-
"\f" =>
|
773
|
+
c2 = { " " => "s",
|
774
|
+
"\n" => "n",
|
775
|
+
"\t" => "t",
|
776
|
+
"\v" => "v",
|
777
|
+
"\r" => "r",
|
778
|
+
"\f" => "f" }[matched]
|
769
779
|
|
770
780
|
if c2 then
|
771
781
|
warning("invalid character syntax; use ?\\" + c2)
|
@@ -838,43 +848,38 @@ class RubyLexer
|
|
838
848
|
result EXPR_PAR, token, text
|
839
849
|
end
|
840
850
|
|
841
|
-
def
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
end
|
849
|
-
end
|
850
|
-
|
851
|
-
def process_symbol text
|
852
|
-
symbol = possibly_escape_string text, /^:"/
|
851
|
+
def process_string # TODO: rewrite / remove
|
852
|
+
# matches top of parser_yylex in compare/parse23.y:8113
|
853
|
+
token = if lex_strterm[0] == :heredoc then
|
854
|
+
self.heredoc lex_strterm
|
855
|
+
else
|
856
|
+
self.parse_string lex_strterm
|
857
|
+
end
|
853
858
|
|
854
|
-
|
855
|
-
end
|
859
|
+
token_type, c = token
|
856
860
|
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
+
# matches parser_string_term from 2.3, but way off from 2.5
|
862
|
+
if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
|
863
|
+
if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
|
864
|
+
!cond.is_in_state) || is_arg?) &&
|
865
|
+
is_label_suffix? then
|
866
|
+
scan(/:/)
|
867
|
+
token_type = token[0] = :tLABEL_END
|
868
|
+
end
|
869
|
+
end
|
861
870
|
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
return process_label text
|
866
|
-
elsif text =~ /:\Z/ then
|
867
|
-
ss.pos -= 1 # put back ":"
|
868
|
-
text = text[0..-2]
|
871
|
+
if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
|
872
|
+
self.lex_strterm = nil
|
873
|
+
self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_END|EXPR_ENDARG
|
869
874
|
end
|
870
875
|
|
871
|
-
|
876
|
+
return token
|
872
877
|
end
|
873
878
|
|
874
|
-
def
|
875
|
-
symbol = possibly_escape_string text,
|
879
|
+
def process_symbol text
|
880
|
+
symbol = possibly_escape_string text, /^:"/
|
876
881
|
|
877
|
-
result
|
882
|
+
result EXPR_END|EXPR_ENDARG, :tSYMBOL, symbol
|
878
883
|
end
|
879
884
|
|
880
885
|
def process_token text
|
@@ -902,6 +907,7 @@ class RubyLexer
|
|
902
907
|
|
903
908
|
if is_label_possible? and is_label_suffix? then
|
904
909
|
scan(/:/)
|
910
|
+
# TODO: propagate the lineno to ALL results
|
905
911
|
return result EXPR_LAB, :tLABEL, [token, self.lineno]
|
906
912
|
end
|
907
913
|
|
@@ -1084,23 +1090,24 @@ class RubyLexer
|
|
1084
1090
|
[token, text]
|
1085
1091
|
end
|
1086
1092
|
|
1087
|
-
def
|
1088
|
-
|
1093
|
+
def ruby22_label?
|
1094
|
+
ruby22plus? and is_label_possible?
|
1089
1095
|
end
|
1090
1096
|
|
1091
|
-
def
|
1092
|
-
|
1097
|
+
def ruby22plus?
|
1098
|
+
parser.class.version >= 22
|
1093
1099
|
end
|
1094
1100
|
|
1095
|
-
def
|
1096
|
-
|
1097
|
-
self.extra_lineno += r.count("\n") if r
|
1098
|
-
r
|
1101
|
+
def ruby23plus?
|
1102
|
+
parser.class.version >= 23
|
1099
1103
|
end
|
1100
1104
|
|
1101
|
-
def
|
1102
|
-
|
1103
|
-
|
1105
|
+
def ruby24minus?
|
1106
|
+
parser.class.version <= 24
|
1107
|
+
end
|
1108
|
+
|
1109
|
+
def scan re
|
1110
|
+
ss.scan re
|
1104
1111
|
end
|
1105
1112
|
|
1106
1113
|
def scanner_class # TODO: design this out of oedipus_lex. or something.
|
@@ -1123,12 +1130,6 @@ class RubyLexer
|
|
1123
1130
|
self.lex_strterm = [:strterm, type, beg, nnd]
|
1124
1131
|
end
|
1125
1132
|
|
1126
|
-
# TODO: consider
|
1127
|
-
# def src= src
|
1128
|
-
# raise "bad src: #{src.inspect}" unless String === src
|
1129
|
-
# @src = RPStringScanner.new(src)
|
1130
|
-
# end
|
1131
|
-
|
1132
1133
|
def tokadd_escape term # TODO: rewrite / remove
|
1133
1134
|
case
|
1134
1135
|
when scan(/\\\n/) then
|
@@ -1158,14 +1159,18 @@ class RubyLexer
|
|
1158
1159
|
end
|
1159
1160
|
|
1160
1161
|
def tokadd_string(func, term, paren) # TODO: rewrite / remove
|
1161
|
-
qwords =
|
1162
|
-
escape =
|
1163
|
-
expand =
|
1164
|
-
regexp =
|
1165
|
-
symbol =
|
1162
|
+
qwords = func =~ STR_FUNC_QWORDS
|
1163
|
+
escape = func =~ STR_FUNC_ESCAPE
|
1164
|
+
expand = func =~ STR_FUNC_EXPAND
|
1165
|
+
regexp = func =~ STR_FUNC_REGEXP
|
1166
|
+
symbol = func =~ STR_FUNC_SYMBOL
|
1166
1167
|
|
1167
1168
|
paren_re = @@regexp_cache[paren]
|
1168
|
-
term_re =
|
1169
|
+
term_re = if term == "\n"
|
1170
|
+
/#{Regexp.escape "\r"}?#{Regexp.escape "\n"}/
|
1171
|
+
else
|
1172
|
+
@@regexp_cache[term]
|
1173
|
+
end
|
1169
1174
|
|
1170
1175
|
until end_of_stream? do
|
1171
1176
|
c = nil
|
@@ -1195,7 +1200,7 @@ class RubyLexer
|
|
1195
1200
|
string_buffer << "\n"
|
1196
1201
|
next
|
1197
1202
|
when qwords && scan(/\\\s/) then
|
1198
|
-
c =
|
1203
|
+
c = " "
|
1199
1204
|
when expand && scan(/\\\n/) then
|
1200
1205
|
next
|
1201
1206
|
when regexp && check(/\\/) then
|
@@ -1220,8 +1225,12 @@ class RubyLexer
|
|
1220
1225
|
end # top case
|
1221
1226
|
|
1222
1227
|
unless handled then
|
1223
|
-
t =
|
1224
|
-
|
1228
|
+
t = if term == "\n"
|
1229
|
+
Regexp.escape "\r\n"
|
1230
|
+
else
|
1231
|
+
Regexp.escape term
|
1232
|
+
end
|
1233
|
+
x = Regexp.escape paren if paren && paren != "\000"
|
1225
1234
|
re = if qwords then
|
1226
1235
|
/[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
|
1227
1236
|
else
|
@@ -1279,171 +1288,154 @@ class RubyLexer
|
|
1279
1288
|
# do nothing for now
|
1280
1289
|
end
|
1281
1290
|
|
1282
|
-
def
|
1283
|
-
|
1284
|
-
|
1285
|
-
|
1286
|
-
def ruby23plus?
|
1287
|
-
parser.class.version >= 23
|
1291
|
+
def was_label?
|
1292
|
+
@was_label = ruby22_label?
|
1293
|
+
true
|
1288
1294
|
end
|
1289
1295
|
|
1290
|
-
|
1291
|
-
|
1292
|
-
|
1293
|
-
self.heredoc lex_strterm
|
1294
|
-
else
|
1295
|
-
self.parse_string lex_strterm
|
1296
|
-
end
|
1296
|
+
class State
|
1297
|
+
attr_accessor :n
|
1298
|
+
attr_accessor :names
|
1297
1299
|
|
1298
|
-
|
1300
|
+
# TODO: take a shared hash of strings for inspect/to_s
|
1301
|
+
def initialize o, names
|
1302
|
+
raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
|
1299
1303
|
|
1300
|
-
|
1301
|
-
|
1302
|
-
if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
|
1303
|
-
!cond.is_in_state) || is_arg?) &&
|
1304
|
-
is_label_suffix? then
|
1305
|
-
scan(/:/)
|
1306
|
-
token_type = token[0] = :tLABEL_END
|
1307
|
-
end
|
1304
|
+
self.n = o
|
1305
|
+
self.names = names
|
1308
1306
|
end
|
1309
1307
|
|
1310
|
-
|
1311
|
-
self.
|
1312
|
-
self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_END
|
1308
|
+
def == o
|
1309
|
+
self.equal?(o) || (o.class == self.class && o.n == self.n)
|
1313
1310
|
end
|
1314
1311
|
|
1315
|
-
|
1316
|
-
|
1317
|
-
|
1318
|
-
def parse_quote # TODO: remove / rewrite
|
1319
|
-
beg, nnd, short_hand, c = nil, nil, false, nil
|
1320
|
-
|
1321
|
-
if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
|
1322
|
-
rb_compile_error "unknown type of %string" if ss.matched_size == 2
|
1323
|
-
c, beg, short_hand = matched, ss.getch, false
|
1324
|
-
else # Short-hand (e.g. %{, %., %!, etc)
|
1325
|
-
c, beg, short_hand = 'Q', ss.getch, true
|
1312
|
+
def =~ v
|
1313
|
+
(self.n & v.n) != 0
|
1326
1314
|
end
|
1327
1315
|
|
1328
|
-
|
1329
|
-
|
1316
|
+
def | v
|
1317
|
+
raise ArgumentError, "Incompatible State: %p vs %p" % [self, v] unless
|
1318
|
+
self.names == v.names
|
1319
|
+
self.class.new(self.n | v.n, self.names)
|
1330
1320
|
end
|
1331
1321
|
|
1332
|
-
|
1333
|
-
|
1334
|
-
nnd, beg = beg, "\0" if nnd.nil?
|
1335
|
-
|
1336
|
-
token_type, text = nil, "%#{c}#{beg}"
|
1337
|
-
token_type, string_type = case c
|
1338
|
-
when 'Q' then
|
1339
|
-
ch = short_hand ? nnd : c + beg
|
1340
|
-
text = "%#{ch}"
|
1341
|
-
[:tSTRING_BEG, STR_DQUOTE]
|
1342
|
-
when 'q' then
|
1343
|
-
[:tSTRING_BEG, STR_SQUOTE]
|
1344
|
-
when 'W' then
|
1345
|
-
eat_whitespace
|
1346
|
-
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
1347
|
-
when 'w' then
|
1348
|
-
eat_whitespace
|
1349
|
-
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
1350
|
-
when 'x' then
|
1351
|
-
[:tXSTRING_BEG, STR_XQUOTE]
|
1352
|
-
when 'r' then
|
1353
|
-
[:tREGEXP_BEG, STR_REGEXP]
|
1354
|
-
when 's' then
|
1355
|
-
self.lex_state = EXPR_FNAME
|
1356
|
-
[:tSYMBEG, STR_SSYM]
|
1357
|
-
when 'I' then
|
1358
|
-
eat_whitespace
|
1359
|
-
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
1360
|
-
when 'i' then
|
1361
|
-
eat_whitespace
|
1362
|
-
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
1363
|
-
end
|
1364
|
-
|
1365
|
-
rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
|
1366
|
-
token_type.nil?
|
1367
|
-
|
1368
|
-
raise "huh" unless string_type
|
1369
|
-
|
1370
|
-
string string_type, nnd, beg
|
1371
|
-
|
1372
|
-
return token_type, text
|
1373
|
-
end
|
1374
|
-
|
1375
|
-
def parse_string quote # TODO: rewrite / remove
|
1376
|
-
_, string_type, term, open = quote
|
1377
|
-
|
1378
|
-
space = false # FIX: remove these
|
1379
|
-
func = string_type
|
1380
|
-
paren = open
|
1381
|
-
term_re = @@regexp_cache[term]
|
1382
|
-
|
1383
|
-
qwords = (func & STR_FUNC_QWORDS) != 0
|
1384
|
-
regexp = (func & STR_FUNC_REGEXP) != 0
|
1385
|
-
expand = (func & STR_FUNC_EXPAND) != 0
|
1322
|
+
def inspect
|
1323
|
+
return "Value(0)" if n.zero? # HACK?
|
1386
1324
|
|
1387
|
-
|
1388
|
-
|
1325
|
+
names.map { |v, k| k if self =~ v }.
|
1326
|
+
compact.
|
1327
|
+
join("|").
|
1328
|
+
gsub(/(?:EXPR_|STR_(?:FUNC_)?)/, "")
|
1389
1329
|
end
|
1390
1330
|
|
1391
|
-
|
1331
|
+
alias to_s inspect
|
1392
1332
|
|
1393
|
-
|
1394
|
-
|
1395
|
-
|
1396
|
-
|
1397
|
-
|
1398
|
-
|
1399
|
-
|
1400
|
-
|
1401
|
-
|
1402
|
-
|
1333
|
+
module Values
|
1334
|
+
expr_names = {}
|
1335
|
+
|
1336
|
+
EXPR_NONE = State.new 0x0, expr_names
|
1337
|
+
EXPR_BEG = State.new 0x1, expr_names
|
1338
|
+
EXPR_END = State.new 0x2, expr_names
|
1339
|
+
EXPR_ENDARG = State.new 0x4, expr_names
|
1340
|
+
EXPR_ENDFN = State.new 0x8, expr_names
|
1341
|
+
EXPR_ARG = State.new 0x10, expr_names
|
1342
|
+
EXPR_CMDARG = State.new 0x20, expr_names
|
1343
|
+
EXPR_MID = State.new 0x40, expr_names
|
1344
|
+
EXPR_FNAME = State.new 0x80, expr_names
|
1345
|
+
EXPR_DOT = State.new 0x100, expr_names
|
1346
|
+
EXPR_CLASS = State.new 0x200, expr_names
|
1347
|
+
EXPR_LABEL = State.new 0x400, expr_names
|
1348
|
+
EXPR_LABELED = State.new 0x800, expr_names
|
1349
|
+
EXPR_FITEM = State.new 0x1000, expr_names
|
1403
1350
|
|
1404
|
-
|
1351
|
+
EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
|
1352
|
+
EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
|
1353
|
+
EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
|
1405
1354
|
|
1406
|
-
|
1355
|
+
# extra fake lex_state names to make things a bit cleaner
|
1407
1356
|
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1412
|
-
# ?! see parser_peek_variable_name
|
1413
|
-
return :tSTRING_DVAR, nil
|
1414
|
-
when scan(/#(?=\@\@?[a-zA-Z_])/) then
|
1415
|
-
# TODO: !ISASCII
|
1416
|
-
return :tSTRING_DVAR, nil
|
1417
|
-
when scan(/#[{]/) then
|
1418
|
-
self.command_start = true
|
1419
|
-
return :tSTRING_DBEG, nil
|
1420
|
-
when scan(/#/) then
|
1421
|
-
string_buffer << '#'
|
1422
|
-
end
|
1423
|
-
end
|
1357
|
+
EXPR_LAB = EXPR_ARG|EXPR_LABELED
|
1358
|
+
EXPR_NUM = EXPR_END|EXPR_ENDARG
|
1359
|
+
EXPR_PAR = EXPR_BEG|EXPR_LABEL
|
1360
|
+
EXPR_PAD = EXPR_BEG|EXPR_LABELED
|
1424
1361
|
|
1425
|
-
|
1426
|
-
|
1362
|
+
EXPR_LIT = EXPR_NUM # TODO: migrate to EXPR_LIT
|
1363
|
+
|
1364
|
+
expr_names.merge!(EXPR_NONE => "EXPR_NONE",
|
1365
|
+
EXPR_BEG => "EXPR_BEG",
|
1366
|
+
EXPR_END => "EXPR_END",
|
1367
|
+
EXPR_ENDARG => "EXPR_ENDARG",
|
1368
|
+
EXPR_ENDFN => "EXPR_ENDFN",
|
1369
|
+
EXPR_ARG => "EXPR_ARG",
|
1370
|
+
EXPR_CMDARG => "EXPR_CMDARG",
|
1371
|
+
EXPR_MID => "EXPR_MID",
|
1372
|
+
EXPR_FNAME => "EXPR_FNAME",
|
1373
|
+
EXPR_DOT => "EXPR_DOT",
|
1374
|
+
EXPR_CLASS => "EXPR_CLASS",
|
1375
|
+
EXPR_LABEL => "EXPR_LABEL",
|
1376
|
+
EXPR_LABELED => "EXPR_LABELED",
|
1377
|
+
EXPR_FITEM => "EXPR_FITEM")
|
1378
|
+
|
1379
|
+
# ruby constants for strings
|
1380
|
+
|
1381
|
+
str_func_names = {}
|
1382
|
+
|
1383
|
+
STR_FUNC_BORING = State.new 0x00, str_func_names
|
1384
|
+
STR_FUNC_ESCAPE = State.new 0x01, str_func_names
|
1385
|
+
STR_FUNC_EXPAND = State.new 0x02, str_func_names
|
1386
|
+
STR_FUNC_REGEXP = State.new 0x04, str_func_names
|
1387
|
+
STR_FUNC_QWORDS = State.new 0x08, str_func_names
|
1388
|
+
STR_FUNC_SYMBOL = State.new 0x10, str_func_names
|
1389
|
+
STR_FUNC_INDENT = State.new 0x20, str_func_names # <<-HEREDOC
|
1390
|
+
STR_FUNC_LABEL = State.new 0x40, str_func_names
|
1391
|
+
STR_FUNC_LIST = State.new 0x4000, str_func_names
|
1392
|
+
STR_FUNC_TERM = State.new 0x8000, str_func_names
|
1393
|
+
STR_FUNC_ICNTNT = State.new 0x10000, str_func_names # <<~HEREDOC -- TODO: remove?
|
1394
|
+
|
1395
|
+
# TODO: check parser25.y on how they do STR_FUNC_INDENT
|
1396
|
+
|
1397
|
+
STR_SQUOTE = STR_FUNC_BORING
|
1398
|
+
STR_DQUOTE = STR_FUNC_EXPAND
|
1399
|
+
STR_XQUOTE = STR_FUNC_EXPAND
|
1400
|
+
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
|
1401
|
+
STR_SWORD = STR_FUNC_QWORDS | STR_FUNC_LIST
|
1402
|
+
STR_DWORD = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
|
1403
|
+
STR_SSYM = STR_FUNC_SYMBOL
|
1404
|
+
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
1405
|
+
|
1406
|
+
str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
|
1407
|
+
STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
|
1408
|
+
STR_FUNC_REGEXP => "STR_FUNC_REGEXP",
|
1409
|
+
STR_FUNC_QWORDS => "STR_FUNC_QWORDS",
|
1410
|
+
STR_FUNC_SYMBOL => "STR_FUNC_SYMBOL",
|
1411
|
+
STR_FUNC_INDENT => "STR_FUNC_INDENT",
|
1412
|
+
STR_FUNC_LABEL => "STR_FUNC_LABEL",
|
1413
|
+
STR_FUNC_LIST => "STR_FUNC_LIST",
|
1414
|
+
STR_FUNC_TERM => "STR_FUNC_TERM",
|
1415
|
+
STR_FUNC_ICNTNT => "STR_FUNC_ICNTNT",
|
1416
|
+
STR_SQUOTE => "STR_SQUOTE")
|
1427
1417
|
end
|
1428
1418
|
|
1429
|
-
|
1419
|
+
include Values
|
1430
1420
|
end
|
1421
|
+
|
1422
|
+
include State::Values
|
1431
1423
|
end
|
1432
1424
|
|
1433
1425
|
require "ruby_lexer.rex"
|
1434
1426
|
|
1435
1427
|
if ENV["RP_LINENO_DEBUG"] then
|
1436
1428
|
class RubyLexer
|
1437
|
-
alias :old_lineno= :lineno=
|
1438
|
-
|
1439
1429
|
def d o
|
1440
1430
|
$stderr.puts o.inspect
|
1441
1431
|
end
|
1442
1432
|
|
1433
|
+
alias old_lineno= lineno=
|
1434
|
+
|
1443
1435
|
def lineno= n
|
1444
1436
|
self.old_lineno= n
|
1445
1437
|
where = caller.first.split(/:/).first(2).join(":")
|
1446
|
-
d :lineno => [n, where, ss && ss.rest[0,40]]
|
1438
|
+
d :lineno => [n, where, ss && ss.rest[0, 40]]
|
1447
1439
|
end
|
1448
1440
|
end
|
1449
1441
|
end
|