ruby_parser 3.13.1 → 3.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.autotest +18 -29
- data/History.rdoc +312 -0
- data/Manifest.txt +16 -15
- data/README.rdoc +13 -9
- data/Rakefile +237 -106
- data/bin/ruby_parse +3 -1
- data/bin/ruby_parse_extract_error +9 -4
- data/compare/normalize.rb +54 -6
- data/debugging.md +172 -0
- data/gauntlet.md +107 -0
- data/lib/rp_extensions.rb +15 -36
- data/lib/rp_stringscanner.rb +20 -51
- data/lib/ruby_lexer.rb +515 -812
- data/lib/ruby_lexer.rex +33 -27
- data/lib/ruby_lexer.rex.rb +64 -31
- data/lib/ruby_lexer_strings.rb +638 -0
- data/lib/ruby_parser.rb +46 -36
- data/lib/{ruby_parser.yy → ruby_parser2.yy} +1400 -488
- data/lib/ruby_parser20.rb +10953 -0
- data/lib/ruby_parser21.rb +10978 -0
- data/lib/ruby_parser22.rb +11119 -0
- data/lib/ruby_parser23.rb +11160 -0
- data/lib/ruby_parser24.rb +11209 -0
- data/lib/ruby_parser25.rb +11209 -0
- data/lib/ruby_parser26.rb +11231 -0
- data/lib/ruby_parser27.rb +12960 -0
- data/lib/{ruby26_parser.y → ruby_parser3.yy} +1652 -521
- data/lib/ruby_parser30.rb +13292 -0
- data/lib/ruby_parser31.rb +13625 -0
- data/lib/ruby_parser32.rb +13577 -0
- data/lib/ruby_parser33.rb +13577 -0
- data/lib/ruby_parser_extras.rb +988 -474
- data/test/test_ruby_lexer.rb +1339 -1155
- data/test/test_ruby_parser.rb +4255 -2103
- data/test/test_ruby_parser_extras.rb +39 -4
- data/tools/munge.rb +52 -13
- data/tools/ripper.rb +24 -6
- data.tar.gz.sig +0 -0
- metadata +73 -56
- metadata.gz.sig +0 -0
- data/lib/ruby20_parser.rb +0 -6869
- data/lib/ruby20_parser.y +0 -2431
- data/lib/ruby21_parser.rb +0 -6944
- data/lib/ruby21_parser.y +0 -2449
- data/lib/ruby22_parser.rb +0 -6968
- data/lib/ruby22_parser.y +0 -2458
- data/lib/ruby23_parser.rb +0 -6987
- data/lib/ruby23_parser.y +0 -2460
- data/lib/ruby24_parser.rb +0 -6994
- data/lib/ruby24_parser.y +0 -2466
- data/lib/ruby25_parser.rb +0 -6994
- data/lib/ruby25_parser.y +0 -2466
- data/lib/ruby26_parser.rb +0 -7012
data/lib/ruby_lexer.rb
CHANGED
@@ -4,135 +4,9 @@
|
|
4
4
|
$DEBUG = true if ENV["DEBUG"]
|
5
5
|
|
6
6
|
class RubyLexer
|
7
|
-
|
8
7
|
# :stopdoc:
|
9
|
-
HAS_ENC = "".respond_to? :encoding
|
10
|
-
|
11
|
-
IDENT_CHAR = if HAS_ENC then
|
12
|
-
/[\w\u0080-\u{10ffff}]/u
|
13
|
-
else
|
14
|
-
/[\w\x80-\xFF]/n
|
15
|
-
end
|
16
|
-
|
17
8
|
EOF = :eof_haha!
|
18
9
|
|
19
|
-
# ruby constants for strings (should this be moved somewhere else?)
|
20
|
-
|
21
|
-
STR_FUNC_BORING = 0x00
|
22
|
-
STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
|
23
|
-
STR_FUNC_EXPAND = 0x02
|
24
|
-
STR_FUNC_REGEXP = 0x04
|
25
|
-
STR_FUNC_QWORDS = 0x08
|
26
|
-
STR_FUNC_SYMBOL = 0x10
|
27
|
-
STR_FUNC_INDENT = 0x20 # <<-HEREDOC
|
28
|
-
STR_FUNC_ICNTNT = 0x40 # <<~HEREDOC
|
29
|
-
|
30
|
-
STR_SQUOTE = STR_FUNC_BORING
|
31
|
-
STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
|
32
|
-
STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
|
33
|
-
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
|
34
|
-
STR_SSYM = STR_FUNC_SYMBOL
|
35
|
-
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
36
|
-
|
37
|
-
class State
|
38
|
-
attr_accessor :n
|
39
|
-
|
40
|
-
def initialize o
|
41
|
-
raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
|
42
|
-
|
43
|
-
self.n = o
|
44
|
-
end
|
45
|
-
|
46
|
-
def == o
|
47
|
-
o.class == self.class && o.n == self.n
|
48
|
-
end
|
49
|
-
|
50
|
-
def =~ v
|
51
|
-
(self.n & v.n) != 0
|
52
|
-
end
|
53
|
-
|
54
|
-
def | v
|
55
|
-
self.class.new(self.n | v.n)
|
56
|
-
end
|
57
|
-
|
58
|
-
def inspect
|
59
|
-
return "EXPR_NONE" if n.zero?
|
60
|
-
NAMES.map { |v,k| k if self =~ v }.compact.join "|"
|
61
|
-
end
|
62
|
-
|
63
|
-
module Values
|
64
|
-
EXPR_NONE = State.new 0x0
|
65
|
-
EXPR_BEG = State.new 0x1
|
66
|
-
EXPR_END = State.new 0x2
|
67
|
-
EXPR_ENDARG = State.new 0x4
|
68
|
-
EXPR_ENDFN = State.new 0x8
|
69
|
-
EXPR_ARG = State.new 0x10
|
70
|
-
EXPR_CMDARG = State.new 0x20
|
71
|
-
EXPR_MID = State.new 0x40
|
72
|
-
EXPR_FNAME = State.new 0x80
|
73
|
-
EXPR_DOT = State.new 0x100
|
74
|
-
EXPR_CLASS = State.new 0x200
|
75
|
-
EXPR_LABEL = State.new 0x400
|
76
|
-
EXPR_LABELED = State.new 0x800
|
77
|
-
EXPR_FITEM = State.new 0x1000
|
78
|
-
|
79
|
-
EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
|
80
|
-
EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
|
81
|
-
EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
|
82
|
-
|
83
|
-
# extra fake lex_state names to make things a bit cleaner
|
84
|
-
|
85
|
-
EXPR_LAB = EXPR_ARG|EXPR_LABELED
|
86
|
-
EXPR_NUM = EXPR_END|EXPR_ENDARG
|
87
|
-
EXPR_PAR = EXPR_BEG|EXPR_LABEL
|
88
|
-
EXPR_PAD = EXPR_BEG|EXPR_LABELED
|
89
|
-
end
|
90
|
-
|
91
|
-
include Values
|
92
|
-
|
93
|
-
NAMES = {
|
94
|
-
EXPR_NONE => "EXPR_NONE",
|
95
|
-
EXPR_BEG => "EXPR_BEG",
|
96
|
-
EXPR_END => "EXPR_END",
|
97
|
-
EXPR_ENDARG => "EXPR_ENDARG",
|
98
|
-
EXPR_ENDFN => "EXPR_ENDFN",
|
99
|
-
EXPR_ARG => "EXPR_ARG",
|
100
|
-
EXPR_CMDARG => "EXPR_CMDARG",
|
101
|
-
EXPR_MID => "EXPR_MID",
|
102
|
-
EXPR_FNAME => "EXPR_FNAME",
|
103
|
-
EXPR_DOT => "EXPR_DOT",
|
104
|
-
EXPR_CLASS => "EXPR_CLASS",
|
105
|
-
EXPR_LABEL => "EXPR_LABEL",
|
106
|
-
EXPR_LABELED => "EXPR_LABELED",
|
107
|
-
EXPR_FITEM => "EXPR_FITEM",
|
108
|
-
}
|
109
|
-
end
|
110
|
-
|
111
|
-
include State::Values
|
112
|
-
|
113
|
-
if $DEBUG then
|
114
|
-
def lex_state= o
|
115
|
-
return if @lex_state == o
|
116
|
-
raise ArgumentError, "bad state: %p" % [o] unless State === o
|
117
|
-
if ENV["V"] then
|
118
|
-
c = caller[0]
|
119
|
-
c = caller[1] if c =~ /\b(expr_)?result\b/
|
120
|
-
c = caller[2] if c =~ /\b(expr_)?result\b/
|
121
|
-
warn "lex_state: %p -> %p from %s" % [lex_state, o, c.clean_caller]
|
122
|
-
else
|
123
|
-
warn "lex_state: %p -> %p" % [lex_state, o]
|
124
|
-
end
|
125
|
-
@lex_state = o
|
126
|
-
end
|
127
|
-
else
|
128
|
-
def lex_state= o
|
129
|
-
raise ArgumentError, "bad state: %p" % [o] unless State === o
|
130
|
-
@lex_state = o
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
attr_reader :lex_state
|
135
|
-
|
136
10
|
ESCAPES = {
|
137
11
|
"a" => "\007",
|
138
12
|
"b" => "\010",
|
@@ -149,10 +23,17 @@ class RubyLexer
|
|
149
23
|
"c\?" => 127.chr,
|
150
24
|
}
|
151
25
|
|
26
|
+
HAS_ENC = "".respond_to? :encoding
|
27
|
+
|
28
|
+
BTOKENS = {
|
29
|
+
".." => :tBDOT2,
|
30
|
+
"..." => :tBDOT3,
|
31
|
+
}
|
32
|
+
|
152
33
|
TOKENS = {
|
153
34
|
"!" => :tBANG,
|
154
35
|
"!=" => :tNEQ,
|
155
|
-
|
36
|
+
"!@" => :tBANG,
|
156
37
|
"!~" => :tNMATCH,
|
157
38
|
"," => :tCOMMA,
|
158
39
|
".." => :tDOT2,
|
@@ -165,21 +46,57 @@ class RubyLexer
|
|
165
46
|
"->" => :tLAMBDA,
|
166
47
|
}
|
167
48
|
|
168
|
-
|
49
|
+
PERCENT_END = {
|
50
|
+
"(" => ")",
|
51
|
+
"[" => "]",
|
52
|
+
"{" => "}",
|
53
|
+
"<" => ">",
|
54
|
+
}
|
169
55
|
|
170
|
-
|
56
|
+
SIMPLE_RE_META = /[\$\*\+\.\?\^\|\)\]\}\>]/
|
57
|
+
|
58
|
+
@@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
|
171
59
|
@@regexp_cache[nil] = nil
|
172
60
|
|
61
|
+
def regexp_cache
|
62
|
+
@@regexp_cache
|
63
|
+
end
|
64
|
+
|
65
|
+
if $DEBUG then
|
66
|
+
attr_reader :lex_state
|
67
|
+
|
68
|
+
def lex_state= o
|
69
|
+
return if @lex_state == o
|
70
|
+
|
71
|
+
from = ""
|
72
|
+
if ENV["VERBOSE"]
|
73
|
+
path = caller[0]
|
74
|
+
path = caller[1] if path =~ /result/
|
75
|
+
path, line, *_ = path.split(/:/)
|
76
|
+
path.delete_prefix! File.dirname File.dirname __FILE__
|
77
|
+
from = " at .%s:%s" % [path, line]
|
78
|
+
end
|
79
|
+
|
80
|
+
warn "lex_state: %p -> %p%s" % [lex_state, o, from]
|
81
|
+
|
82
|
+
@lex_state = o
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
173
86
|
# :startdoc:
|
174
87
|
|
175
|
-
attr_accessor :
|
88
|
+
attr_accessor :lex_state unless $DEBUG
|
89
|
+
|
176
90
|
attr_accessor :brace_nest
|
177
91
|
attr_accessor :cmdarg
|
178
92
|
attr_accessor :command_start
|
179
93
|
attr_accessor :cmd_state # temporary--ivar to avoid passing everywhere
|
180
94
|
attr_accessor :last_state
|
181
95
|
attr_accessor :cond
|
182
|
-
attr_accessor :
|
96
|
+
attr_accessor :old_ss
|
97
|
+
attr_accessor :old_lineno
|
98
|
+
|
99
|
+
# these are generated via ruby_lexer.rex: ss, lineno
|
183
100
|
|
184
101
|
##
|
185
102
|
# Additional context surrounding tokens that both the lexer and
|
@@ -196,39 +113,30 @@ class RubyLexer
|
|
196
113
|
# Last token read via next_token.
|
197
114
|
attr_accessor :token
|
198
115
|
|
199
|
-
|
116
|
+
# Last comment lexed, or nil
|
117
|
+
attr_accessor :comment
|
200
118
|
|
201
119
|
def initialize _ = nil
|
202
120
|
@lex_state = nil # remove one warning under $DEBUG
|
203
|
-
|
121
|
+
@lex_state = EXPR_NONE
|
204
122
|
|
205
123
|
self.cond = RubyParserStuff::StackState.new(:cond, $DEBUG)
|
206
124
|
self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
|
125
|
+
self.ss = RPStringScanner.new ""
|
207
126
|
|
208
127
|
reset
|
209
128
|
end
|
210
129
|
|
211
130
|
def arg_ambiguous
|
212
|
-
self.warning
|
131
|
+
self.warning "Ambiguous first argument. make sure."
|
213
132
|
end
|
214
133
|
|
215
134
|
def arg_state
|
216
135
|
is_after_operator? ? EXPR_ARG : EXPR_BEG
|
217
136
|
end
|
218
137
|
|
219
|
-
def
|
220
|
-
|
221
|
-
end
|
222
|
-
alias :bol? :beginning_of_line? # to make .rex file more readable
|
223
|
-
|
224
|
-
def comments # TODO: remove this... maybe comment_string + attr_accessor
|
225
|
-
c = @comments.join
|
226
|
-
@comments.clear
|
227
|
-
c
|
228
|
-
end
|
229
|
-
|
230
|
-
def end_of_stream?
|
231
|
-
ss.eos?
|
138
|
+
def debug n
|
139
|
+
raise "debug #{n}"
|
232
140
|
end
|
233
141
|
|
234
142
|
def expr_dot?
|
@@ -245,185 +153,30 @@ class RubyLexer
|
|
245
153
|
result EXPR_BEG, token, text
|
246
154
|
end
|
247
155
|
|
248
|
-
def heredoc here # TODO: rewrite / remove
|
249
|
-
_, eos, func, last_line = here
|
250
|
-
|
251
|
-
indent = (func & STR_FUNC_INDENT) != 0 ? "[ \t]*" : nil
|
252
|
-
content_indent = (func & STR_FUNC_ICNTNT) != 0
|
253
|
-
expand = (func & STR_FUNC_EXPAND) != 0
|
254
|
-
eos_re = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
|
255
|
-
err_msg = "can't match #{eos_re.inspect} anywhere in "
|
256
|
-
|
257
|
-
rb_compile_error err_msg if end_of_stream?
|
258
|
-
|
259
|
-
if beginning_of_line? && scan(eos_re) then
|
260
|
-
self.lineno += 1
|
261
|
-
ss.unread_many last_line # TODO: figure out how to remove this
|
262
|
-
return :tSTRING_END, eos
|
263
|
-
end
|
264
|
-
|
265
|
-
self.string_buffer = []
|
266
|
-
|
267
|
-
if expand then
|
268
|
-
case
|
269
|
-
when scan(/#[$@]/) then
|
270
|
-
ss.pos -= 1 # FIX omg stupid
|
271
|
-
return :tSTRING_DVAR, matched
|
272
|
-
when scan(/#[{]/) then
|
273
|
-
return :tSTRING_DBEG, matched
|
274
|
-
when scan(/#/) then
|
275
|
-
string_buffer << '#'
|
276
|
-
end
|
277
|
-
|
278
|
-
begin
|
279
|
-
c = tokadd_string func, "\n", nil
|
280
|
-
|
281
|
-
rb_compile_error err_msg if
|
282
|
-
c == RubyLexer::EOF
|
283
|
-
|
284
|
-
if c != "\n" then
|
285
|
-
return :tSTRING_CONTENT, string_buffer.join.delete("\r")
|
286
|
-
else
|
287
|
-
string_buffer << scan(/\n/)
|
288
|
-
end
|
289
|
-
|
290
|
-
rb_compile_error err_msg if end_of_stream?
|
291
|
-
end until check(eos_re)
|
292
|
-
else
|
293
|
-
until check(eos_re) do
|
294
|
-
string_buffer << scan(/.*(\n|\z)/)
|
295
|
-
rb_compile_error err_msg if end_of_stream?
|
296
|
-
end
|
297
|
-
end
|
298
|
-
|
299
|
-
self.lex_strterm = [:heredoc, eos, func, last_line]
|
300
|
-
|
301
|
-
string_content = begin
|
302
|
-
s = string_buffer.join
|
303
|
-
s.delete "\r"
|
304
|
-
rescue ArgumentError
|
305
|
-
s.b.delete("\r").force_encoding Encoding::UTF_8
|
306
|
-
end
|
307
|
-
|
308
|
-
string_content = heredoc_dedent(string_content) if content_indent && ruby23plus?
|
309
|
-
|
310
|
-
return :tSTRING_CONTENT, string_content
|
311
|
-
end
|
312
|
-
|
313
|
-
def heredoc_dedent(string_content)
|
314
|
-
width = string_content.scan(/^[ \t]*(?=\S)/).map do |whitespace|
|
315
|
-
heredoc_whitespace_indent_size whitespace
|
316
|
-
end.min || 0
|
317
|
-
|
318
|
-
string_content.split("\n", -1).map do |line|
|
319
|
-
dedent_string line, width
|
320
|
-
end.join "\n"
|
321
|
-
end
|
322
|
-
|
323
|
-
def dedent_string(string, width)
|
324
|
-
characters_skipped = 0
|
325
|
-
indentation_skipped = 0
|
326
|
-
|
327
|
-
string.chars.each do |char|
|
328
|
-
break if indentation_skipped >= width
|
329
|
-
if char == ' '
|
330
|
-
characters_skipped += 1
|
331
|
-
indentation_skipped += 1
|
332
|
-
elsif char == "\t"
|
333
|
-
proposed = TAB_WIDTH * (indentation_skipped / TAB_WIDTH + 1)
|
334
|
-
break if (proposed > width)
|
335
|
-
characters_skipped += 1
|
336
|
-
indentation_skipped = proposed
|
337
|
-
end
|
338
|
-
end
|
339
|
-
string[characters_skipped..-1]
|
340
|
-
end
|
341
|
-
|
342
|
-
def heredoc_whitespace_indent_size(whitespace)
|
343
|
-
whitespace.chars.inject 0 do |size, char|
|
344
|
-
if char == "\t"
|
345
|
-
size + TAB_WIDTH
|
346
|
-
else
|
347
|
-
size + 1
|
348
|
-
end
|
349
|
-
end
|
350
|
-
end
|
351
|
-
|
352
|
-
def heredoc_identifier # TODO: remove / rewrite
|
353
|
-
term, func = nil, STR_FUNC_BORING
|
354
|
-
self.string_buffer = []
|
355
|
-
|
356
|
-
heredoc_indent_mods = '-'
|
357
|
-
heredoc_indent_mods += '\~' if ruby23plus?
|
358
|
-
|
359
|
-
case
|
360
|
-
when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
|
361
|
-
term = ss[2]
|
362
|
-
func |= STR_FUNC_INDENT unless ss[1].empty?
|
363
|
-
func |= STR_FUNC_ICNTNT if ss[1] == '~'
|
364
|
-
func |= case term
|
365
|
-
when "\'" then
|
366
|
-
STR_SQUOTE
|
367
|
-
when '"' then
|
368
|
-
STR_DQUOTE
|
369
|
-
else
|
370
|
-
STR_XQUOTE
|
371
|
-
end
|
372
|
-
string_buffer << ss[3]
|
373
|
-
when scan(/[#{heredoc_indent_mods}]?([\'\"\`])(?!\1*\Z)/) then
|
374
|
-
rb_compile_error "unterminated here document identifier"
|
375
|
-
when scan(/([#{heredoc_indent_mods}]?)(#{IDENT_CHAR}+)/) then
|
376
|
-
term = '"'
|
377
|
-
func |= STR_DQUOTE
|
378
|
-
unless ss[1].empty? then
|
379
|
-
func |= STR_FUNC_INDENT
|
380
|
-
func |= STR_FUNC_ICNTNT if ss[1] == '~'
|
381
|
-
end
|
382
|
-
string_buffer << ss[2]
|
383
|
-
else
|
384
|
-
return nil
|
385
|
-
end
|
386
|
-
|
387
|
-
if scan(/.*\n/) then
|
388
|
-
# TODO: think about storing off the char range instead
|
389
|
-
line = matched
|
390
|
-
else
|
391
|
-
line = nil
|
392
|
-
end
|
393
|
-
|
394
|
-
self.lex_strterm = [:heredoc, string_buffer.join, func, line]
|
395
|
-
|
396
|
-
if term == '`' then
|
397
|
-
result nil, :tXSTRING_BEG, "`"
|
398
|
-
else
|
399
|
-
result nil, :tSTRING_BEG, "\""
|
400
|
-
end
|
401
|
-
end
|
402
|
-
|
403
156
|
def in_fname? # REFACTOR
|
404
157
|
lex_state =~ EXPR_FNAME
|
405
158
|
end
|
406
159
|
|
407
|
-
def is_after_operator?
|
408
|
-
lex_state =~ EXPR_FNAME|EXPR_DOT
|
409
|
-
end
|
410
|
-
|
411
160
|
def int_with_base base
|
412
161
|
rb_compile_error "Invalid numeric format" if matched =~ /__/
|
413
162
|
|
414
163
|
text = matched
|
415
164
|
case
|
416
|
-
when text.end_with?(
|
417
|
-
|
418
|
-
when text.end_with?(
|
419
|
-
|
420
|
-
when text.end_with?(
|
421
|
-
|
165
|
+
when text.end_with?("ri")
|
166
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base)))
|
167
|
+
when text.end_with?("r")
|
168
|
+
result EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base))
|
169
|
+
when text.end_with?("i")
|
170
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base))
|
422
171
|
else
|
423
|
-
|
172
|
+
result EXPR_NUM, :tINTEGER, text.to_i(base)
|
424
173
|
end
|
425
174
|
end
|
426
175
|
|
176
|
+
def is_after_operator?
|
177
|
+
lex_state =~ EXPR_FNAME|EXPR_DOT
|
178
|
+
end
|
179
|
+
|
427
180
|
def is_arg?
|
428
181
|
lex_state =~ EXPR_ARG_ANY
|
429
182
|
end
|
@@ -436,15 +189,6 @@ class RubyLexer
|
|
436
189
|
lex_state =~ EXPR_END_ANY
|
437
190
|
end
|
438
191
|
|
439
|
-
def lvar_defined? id
|
440
|
-
# TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
|
441
|
-
self.parser.env[id.to_sym] == :lvar
|
442
|
-
end
|
443
|
-
|
444
|
-
def ruby22_label?
|
445
|
-
ruby22plus? and is_label_possible?
|
446
|
-
end
|
447
|
-
|
448
192
|
def is_label_possible?
|
449
193
|
(lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
|
450
194
|
end
|
@@ -461,14 +205,30 @@ class RubyLexer
|
|
461
205
|
lpar_beg && lpar_beg == paren_nest
|
462
206
|
end
|
463
207
|
|
464
|
-
def
|
465
|
-
|
208
|
+
def is_local_id id
|
209
|
+
# maybe just make this false for now
|
210
|
+
self.parser.env[id.to_sym] == :lvar # HACK: this isn't remotely right
|
211
|
+
end
|
212
|
+
|
213
|
+
def lvar_defined? id
|
214
|
+
# TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
|
215
|
+
self.parser.env[id.to_sym] == :lvar
|
466
216
|
end
|
467
217
|
|
468
218
|
def not_end?
|
469
219
|
not is_end?
|
470
220
|
end
|
471
221
|
|
222
|
+
def possibly_escape_string text, check
|
223
|
+
content = match[1]
|
224
|
+
|
225
|
+
if text =~ check then
|
226
|
+
unescape_string content
|
227
|
+
else
|
228
|
+
content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
472
232
|
def process_amper text
|
473
233
|
token = if is_arg? && space_seen && !check(/\s/) then
|
474
234
|
warning("`&' interpreted as argument prefix")
|
@@ -479,44 +239,50 @@ class RubyLexer
|
|
479
239
|
:tAMPER2
|
480
240
|
end
|
481
241
|
|
482
|
-
|
242
|
+
result :arg_state, token, "&"
|
483
243
|
end
|
484
244
|
|
485
245
|
def process_backref text
|
486
|
-
token =
|
246
|
+
token = match[1].to_sym
|
487
247
|
# TODO: can't do lineno hack w/ symbol
|
488
248
|
result EXPR_END, :tBACK_REF, token
|
489
249
|
end
|
490
250
|
|
491
251
|
def process_begin text
|
492
|
-
|
252
|
+
self.comment ||= +""
|
253
|
+
self.comment << matched
|
493
254
|
|
494
255
|
unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
|
495
|
-
|
256
|
+
self.comment = nil
|
496
257
|
rb_compile_error("embedded document meets end of file")
|
497
258
|
end
|
498
259
|
|
499
|
-
|
500
|
-
self.lineno += matched.count("\n")
|
260
|
+
self.comment << matched
|
261
|
+
self.lineno += matched.count("\n") # HACK?
|
501
262
|
|
502
263
|
nil # TODO
|
503
264
|
end
|
504
265
|
|
505
|
-
|
506
|
-
# matching compare/parse23.y:8561
|
507
|
-
cond.lexpop
|
508
|
-
cmdarg.lexpop
|
266
|
+
# TODO: make all tXXXX terminals include lexer.lineno ... enforce it somehow?
|
509
267
|
|
268
|
+
def process_brace_close text
|
510
269
|
case matched
|
511
270
|
when "}" then
|
512
271
|
self.brace_nest -= 1
|
513
|
-
self.lex_state = EXPR_ENDARG # TODO: EXPR_END ? Look at 2.6
|
514
|
-
|
515
272
|
return :tSTRING_DEND, matched if brace_nest < 0
|
273
|
+
end
|
274
|
+
|
275
|
+
# matching compare/parse26.y:8099
|
276
|
+
cond.pop
|
277
|
+
cmdarg.pop
|
278
|
+
|
279
|
+
case matched
|
280
|
+
when "}" then
|
281
|
+
self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
|
516
282
|
return :tRCURLY, matched
|
517
283
|
when "]" then
|
518
284
|
self.paren_nest -= 1
|
519
|
-
self.lex_state = EXPR_ENDARG
|
285
|
+
self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
|
520
286
|
return :tRBRACK, matched
|
521
287
|
when ")" then
|
522
288
|
self.paren_nest -= 1
|
@@ -527,30 +293,6 @@ class RubyLexer
|
|
527
293
|
end
|
528
294
|
end
|
529
295
|
|
530
|
-
def process_colon1 text
|
531
|
-
# ?: / then / when
|
532
|
-
if is_end? || check(/\s/) then
|
533
|
-
return result EXPR_BEG, :tCOLON, text
|
534
|
-
end
|
535
|
-
|
536
|
-
case
|
537
|
-
when scan(/\'/) then
|
538
|
-
string STR_SSYM
|
539
|
-
when scan(/\"/) then
|
540
|
-
string STR_DSYM
|
541
|
-
end
|
542
|
-
|
543
|
-
result EXPR_FNAME, :tSYMBEG, text
|
544
|
-
end
|
545
|
-
|
546
|
-
def process_colon2 text
|
547
|
-
if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
|
548
|
-
result EXPR_BEG, :tCOLON3, text
|
549
|
-
else
|
550
|
-
result EXPR_DOT, :tCOLON2, text
|
551
|
-
end
|
552
|
-
end
|
553
|
-
|
554
296
|
def process_brace_open text
|
555
297
|
# matching compare/parse23.y:8694
|
556
298
|
self.brace_nest += 1
|
@@ -566,7 +308,7 @@ class RubyLexer
|
|
566
308
|
when lex_state =~ EXPR_LABELED then
|
567
309
|
:tLBRACE # hash
|
568
310
|
when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
|
569
|
-
:tLCURLY # block (primary)
|
311
|
+
:tLCURLY # block (primary) "{" in parse.y
|
570
312
|
when lex_state =~ EXPR_ENDARG then
|
571
313
|
:tLBRACE_ARG # block (expr)
|
572
314
|
else
|
@@ -581,37 +323,96 @@ class RubyLexer
|
|
581
323
|
result state, token, text
|
582
324
|
end
|
583
325
|
|
326
|
+
def process_colon1 text
|
327
|
+
# ?: / then / when
|
328
|
+
if is_end? || check(/\s/) then
|
329
|
+
return result EXPR_BEG, :tCOLON, text
|
330
|
+
end
|
331
|
+
|
332
|
+
case
|
333
|
+
when scan(/\'/) then
|
334
|
+
string STR_SSYM, matched
|
335
|
+
when scan(/\"/) then
|
336
|
+
string STR_DSYM, matched
|
337
|
+
end
|
338
|
+
|
339
|
+
result EXPR_FNAME, :tSYMBEG, text
|
340
|
+
end
|
341
|
+
|
342
|
+
def process_colon2 text
|
343
|
+
if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
|
344
|
+
result EXPR_BEG, :tCOLON3, text
|
345
|
+
else
|
346
|
+
result EXPR_DOT, :tCOLON2, text
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
def process_dots text # parse32.y:10216
|
351
|
+
is_beg = self.is_beg?
|
352
|
+
self.lex_state = EXPR_BEG
|
353
|
+
|
354
|
+
return result EXPR_ENDARG, :tBDOT3, text if
|
355
|
+
parser.in_argdef && text == "..." # TODO: version check?
|
356
|
+
|
357
|
+
tokens = ruby27plus? && is_beg ? BTOKENS : TOKENS
|
358
|
+
|
359
|
+
result EXPR_BEG, tokens[text], text
|
360
|
+
end
|
361
|
+
|
584
362
|
def process_float text
|
585
363
|
rb_compile_error "Invalid numeric format" if text =~ /__/
|
586
364
|
|
587
365
|
case
|
588
|
-
when text.end_with?(
|
589
|
-
|
590
|
-
when text.end_with?(
|
591
|
-
|
592
|
-
when text.end_with?(
|
593
|
-
|
366
|
+
when text.end_with?("ri")
|
367
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
|
368
|
+
when text.end_with?("i")
|
369
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
|
370
|
+
when text.end_with?("r")
|
371
|
+
result EXPR_NUM, :tRATIONAL, Rational(text.chop)
|
594
372
|
else
|
595
|
-
|
373
|
+
result EXPR_NUM, :tFLOAT, text.to_f
|
596
374
|
end
|
597
375
|
end
|
598
376
|
|
599
377
|
def process_gvar text
|
600
|
-
|
378
|
+
if parser.class.version > 20 && text == "$-" then
|
379
|
+
rb_compile_error "unexpected $undefined"
|
380
|
+
end
|
381
|
+
|
601
382
|
result EXPR_END, :tGVAR, text
|
602
383
|
end
|
603
384
|
|
604
385
|
def process_gvar_oddity text
|
605
|
-
return result EXPR_END, "$", "$" if text == "$" # TODO: wtf is this?
|
606
386
|
rb_compile_error "#{text.inspect} is not allowed as a global variable name"
|
607
387
|
end
|
608
388
|
|
609
389
|
def process_ivar text
|
610
390
|
tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
|
611
|
-
text.lineno = self.lineno
|
612
391
|
result EXPR_END, tok_id, text
|
613
392
|
end
|
614
393
|
|
394
|
+
def process_label text
|
395
|
+
symbol = possibly_escape_string text, /^\"/
|
396
|
+
|
397
|
+
result EXPR_LAB, :tLABEL, symbol
|
398
|
+
end
|
399
|
+
|
400
|
+
def process_label_or_string text
|
401
|
+
if @was_label && text =~ /:\Z/ then
|
402
|
+
@was_label = nil
|
403
|
+
return process_label text
|
404
|
+
elsif text =~ /:\Z/ then
|
405
|
+
self.pos -= 1 # put back ":"
|
406
|
+
text = text[0..-2]
|
407
|
+
end
|
408
|
+
|
409
|
+
orig_line = lineno
|
410
|
+
str = text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
|
411
|
+
self.lineno += str.count("\n")
|
412
|
+
|
413
|
+
result EXPR_END, :tSTRING, str, orig_line
|
414
|
+
end
|
415
|
+
|
615
416
|
def process_lchevron text
|
616
417
|
if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
|
617
418
|
!is_end? &&
|
@@ -627,34 +428,26 @@ class RubyLexer
|
|
627
428
|
self.lex_state = EXPR_BEG
|
628
429
|
end
|
629
430
|
|
630
|
-
|
431
|
+
result lex_state, :tLSHFT, "\<\<"
|
631
432
|
end
|
632
433
|
|
633
|
-
def process_newline_or_comment text
|
434
|
+
def process_newline_or_comment text # ../compare/parse30.y:9126 ish
|
634
435
|
c = matched
|
635
|
-
hit = false
|
636
436
|
|
637
|
-
if c ==
|
638
|
-
|
437
|
+
if c == "#" then
|
438
|
+
self.pos -= 1
|
639
439
|
|
640
|
-
# TODO: handle magic comments
|
641
440
|
while scan(/\s*\#.*(\n+|\z)/) do
|
642
|
-
|
643
|
-
self.
|
644
|
-
|
441
|
+
self.lineno += matched.count "\n"
|
442
|
+
self.comment ||= +""
|
443
|
+
self.comment << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
|
645
444
|
end
|
646
445
|
|
647
446
|
return nil if end_of_stream?
|
648
447
|
end
|
649
448
|
|
650
|
-
self.lineno += 1 unless hit
|
651
|
-
|
652
|
-
# Replace a string of newlines with a single one
|
653
|
-
self.lineno += matched.lines.to_a.size if scan(/\n+/)
|
654
|
-
|
655
449
|
c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
|
656
450
|
lex_state !~ EXPR_LABELED)
|
657
|
-
# TODO: figure out what token_seen is for
|
658
451
|
if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
|
659
452
|
# ignore if !fallthrough?
|
660
453
|
if !c && parser.in_kwarg then
|
@@ -662,25 +455,29 @@ class RubyLexer
|
|
662
455
|
self.command_start = true
|
663
456
|
return result EXPR_BEG, :tNL, nil
|
664
457
|
else
|
665
|
-
|
458
|
+
maybe_pop_stack
|
459
|
+
return # goto retry
|
666
460
|
end
|
667
461
|
end
|
668
462
|
|
669
|
-
if scan(/
|
670
|
-
self.space_seen = true
|
463
|
+
if scan(/[\ \t\r\f\v]+/) then
|
464
|
+
self.space_seen = true
|
465
|
+
end
|
671
466
|
|
672
|
-
|
673
|
-
return
|
467
|
+
if check(/#/) then
|
468
|
+
return # goto retry
|
469
|
+
elsif check(/&\.|\.(?!\.)/) then # C version is a hellish obfuscated xnor
|
470
|
+
return # goto retry
|
674
471
|
end
|
675
472
|
|
676
473
|
self.command_start = true
|
677
474
|
|
678
|
-
|
475
|
+
result EXPR_BEG, :tNL, nil
|
679
476
|
end
|
680
477
|
|
681
478
|
def process_nthref text
|
682
479
|
# TODO: can't do lineno hack w/ number
|
683
|
-
result EXPR_END, :tNTH_REF,
|
480
|
+
result EXPR_END, :tNTH_REF, match[1].to_i
|
684
481
|
end
|
685
482
|
|
686
483
|
def process_paren text
|
@@ -697,7 +494,7 @@ class RubyLexer
|
|
697
494
|
# "an argument list, not a decomposed argument")
|
698
495
|
:tLPAREN2
|
699
496
|
else
|
700
|
-
:tLPAREN2 # plain
|
497
|
+
:tLPAREN2 # plain "(" in parse.y
|
701
498
|
end
|
702
499
|
|
703
500
|
self.paren_nest += 1
|
@@ -708,13 +505,16 @@ class RubyLexer
|
|
708
505
|
end
|
709
506
|
|
710
507
|
def process_percent text
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
508
|
+
case
|
509
|
+
when is_beg? then
|
510
|
+
process_percent_quote
|
511
|
+
when scan(/\=/)
|
512
|
+
result EXPR_BEG, :tOP_ASGN, "%"
|
513
|
+
when is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
|
514
|
+
process_percent_quote
|
515
|
+
else
|
516
|
+
result :arg_state, :tPERCENT, "%"
|
517
|
+
end
|
718
518
|
end
|
719
519
|
|
720
520
|
def process_plus_minus text
|
@@ -735,7 +535,7 @@ class RubyLexer
|
|
735
535
|
|
736
536
|
return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)
|
737
537
|
|
738
|
-
if
|
538
|
+
if is_beg? || (is_arg? && space_seen && !check(/\s/)) then
|
739
539
|
arg_ambiguous if is_arg?
|
740
540
|
|
741
541
|
if check(/\d/) then
|
@@ -760,12 +560,12 @@ class RubyLexer
|
|
760
560
|
|
761
561
|
if check(/\s|\v/) then
|
762
562
|
unless is_arg? then
|
763
|
-
c2 = { " " =>
|
764
|
-
"\n" =>
|
765
|
-
"\t" =>
|
766
|
-
"\v" =>
|
767
|
-
"\r" =>
|
768
|
-
"\f" =>
|
563
|
+
c2 = { " " => "s",
|
564
|
+
"\n" => "n",
|
565
|
+
"\t" => "t",
|
566
|
+
"\v" => "v",
|
567
|
+
"\r" => "r",
|
568
|
+
"\f" => "f" }[matched]
|
769
569
|
|
770
570
|
if c2 then
|
771
571
|
warning("invalid character syntax; use ?\\" + c2)
|
@@ -781,17 +581,26 @@ class RubyLexer
|
|
781
581
|
c = if scan(/\\/) then
|
782
582
|
self.read_escape
|
783
583
|
else
|
784
|
-
|
584
|
+
getch
|
785
585
|
end
|
786
586
|
|
787
587
|
result EXPR_END, :tSTRING, c
|
788
588
|
end
|
789
589
|
|
590
|
+
def process_simple_string text
|
591
|
+
orig_line = lineno
|
592
|
+
self.lineno += text.count("\n")
|
593
|
+
|
594
|
+
str = unescape_string text[1..-2]
|
595
|
+
|
596
|
+
result EXPR_END, :tSTRING, str, orig_line
|
597
|
+
end
|
598
|
+
|
790
599
|
def process_slash text
|
791
600
|
if is_beg? then
|
792
|
-
string STR_REGEXP
|
601
|
+
string STR_REGEXP, matched
|
793
602
|
|
794
|
-
return result
|
603
|
+
return result nil, :tREGEXP_BEG, "/"
|
795
604
|
end
|
796
605
|
|
797
606
|
if scan(/\=/) then
|
@@ -806,7 +615,7 @@ class RubyLexer
|
|
806
615
|
end
|
807
616
|
end
|
808
617
|
|
809
|
-
|
618
|
+
result :arg_state, :tDIVIDE, "/"
|
810
619
|
end
|
811
620
|
|
812
621
|
def process_square_bracket text
|
@@ -838,48 +647,14 @@ class RubyLexer
|
|
838
647
|
result EXPR_PAR, token, text
|
839
648
|
end
|
840
649
|
|
841
|
-
def possibly_escape_string text, check
|
842
|
-
content = match[1]
|
843
|
-
|
844
|
-
if text =~ check then
|
845
|
-
content.gsub(ESC) { unescape $1 }
|
846
|
-
else
|
847
|
-
content.gsub(/\\\\/, "\\").gsub(/\\'/, "'")
|
848
|
-
end
|
849
|
-
end
|
850
|
-
|
851
650
|
def process_symbol text
|
852
|
-
symbol = possibly_escape_string text,
|
853
|
-
|
854
|
-
result EXPR_END, :tSYMBOL, symbol
|
855
|
-
end
|
856
|
-
|
857
|
-
def was_label?
|
858
|
-
@was_label = ruby22_label?
|
859
|
-
true
|
860
|
-
end
|
861
|
-
|
862
|
-
def process_label_or_string text
|
863
|
-
if @was_label && text =~ /:\Z/ then
|
864
|
-
@was_label = nil
|
865
|
-
return process_label text
|
866
|
-
elsif text =~ /:\Z/ then
|
867
|
-
ss.pos -= 1 # put back ":"
|
868
|
-
text = text[0..-2]
|
869
|
-
end
|
651
|
+
symbol = possibly_escape_string text, /^:\"/ # stupid emacs
|
870
652
|
|
871
|
-
result
|
872
|
-
end
|
873
|
-
|
874
|
-
def process_label text
|
875
|
-
symbol = possibly_escape_string text, /^"/
|
876
|
-
|
877
|
-
result EXPR_LAB, :tLABEL, [symbol, self.lineno]
|
653
|
+
result EXPR_LIT, :tSYMBOL, symbol
|
878
654
|
end
|
879
655
|
|
880
656
|
def process_token text
|
881
657
|
# matching: parse_ident in compare/parse23.y:7989
|
882
|
-
# TODO: make this always return [token, lineno]
|
883
658
|
# FIX: remove: self.last_state = lex_state
|
884
659
|
|
885
660
|
token = self.token = text
|
@@ -902,7 +677,7 @@ class RubyLexer
|
|
902
677
|
|
903
678
|
if is_label_possible? and is_label_suffix? then
|
904
679
|
scan(/:/)
|
905
|
-
return result EXPR_LAB, :tLABEL,
|
680
|
+
return result EXPR_LAB, :tLABEL, token
|
906
681
|
end
|
907
682
|
|
908
683
|
# TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
|
@@ -913,14 +688,17 @@ class RubyLexer
|
|
913
688
|
return process_token_keyword keyword if keyword
|
914
689
|
end
|
915
690
|
|
916
|
-
# matching: compare/
|
917
|
-
state = if
|
691
|
+
# matching: compare/parse32.y:9031
|
692
|
+
state = if lex_state =~ EXPR_BEG_ANY|EXPR_ARG_ANY|EXPR_DOT then
|
918
693
|
cmd_state ? EXPR_CMDARG : EXPR_ARG
|
919
694
|
elsif lex_state =~ EXPR_FNAME then
|
920
695
|
EXPR_ENDFN
|
921
696
|
else
|
922
697
|
EXPR_END
|
923
698
|
end
|
699
|
+
self.lex_state = state
|
700
|
+
|
701
|
+
tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
|
924
702
|
|
925
703
|
if last_state !~ EXPR_DOT|EXPR_FNAME and
|
926
704
|
(tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
|
@@ -928,183 +706,102 @@ class RubyLexer
|
|
928
706
|
state = EXPR_END|EXPR_LABEL
|
929
707
|
end
|
930
708
|
|
931
|
-
|
932
|
-
|
933
|
-
return result(state, tok_id, token)
|
709
|
+
result state, tok_id, token
|
934
710
|
end
|
935
711
|
|
936
712
|
def process_token_keyword keyword
|
937
|
-
# matching MIDDLE of parse_ident in compare/
|
713
|
+
# matching MIDDLE of parse_ident in compare/parse32.y:9695
|
938
714
|
state = lex_state
|
939
|
-
self.lex_state = keyword.state
|
940
715
|
|
941
|
-
|
942
|
-
|
943
|
-
return result(lex_state, keyword.id0, value) if state =~ EXPR_FNAME
|
716
|
+
return result(EXPR_ENDFN, keyword.id0, token) if lex_state =~ EXPR_FNAME
|
944
717
|
|
718
|
+
self.lex_state = keyword.state
|
945
719
|
self.command_start = true if lex_state =~ EXPR_BEG
|
946
720
|
|
947
721
|
case
|
948
|
-
when keyword.id0 == :kDO then
|
722
|
+
when keyword.id0 == :kDO then # parse32.y line 9712
|
949
723
|
case
|
950
724
|
when lambda_beginning? then
|
951
725
|
self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
|
952
|
-
self.paren_nest -= 1
|
953
|
-
result lex_state, :kDO_LAMBDA,
|
726
|
+
self.paren_nest -= 1 # TODO: question this?
|
727
|
+
result lex_state, :kDO_LAMBDA, token
|
954
728
|
when cond.is_in_state then
|
955
|
-
result lex_state, :kDO_COND,
|
729
|
+
result lex_state, :kDO_COND, token
|
956
730
|
when cmdarg.is_in_state && state != EXPR_CMDARG then
|
957
|
-
result lex_state, :kDO_BLOCK,
|
958
|
-
when state =~ EXPR_BEG|EXPR_ENDARG then
|
959
|
-
result lex_state, :kDO_BLOCK, value
|
731
|
+
result lex_state, :kDO_BLOCK, token
|
960
732
|
else
|
961
|
-
result lex_state, :kDO,
|
733
|
+
result lex_state, :kDO, token
|
962
734
|
end
|
963
735
|
when state =~ EXPR_PAD then
|
964
|
-
result lex_state, keyword.id0,
|
736
|
+
result lex_state, keyword.id0, token
|
965
737
|
when keyword.id0 != keyword.id1 then
|
966
|
-
result EXPR_PAR, keyword.id1,
|
738
|
+
result EXPR_PAR, keyword.id1, token
|
967
739
|
else
|
968
|
-
result lex_state, keyword.id1,
|
740
|
+
result lex_state, keyword.id1, token
|
969
741
|
end
|
970
742
|
end
|
971
743
|
|
972
744
|
def process_underscore text
|
973
|
-
|
745
|
+
self.unscan # put back "_"
|
974
746
|
|
975
747
|
if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
|
976
|
-
|
977
|
-
|
978
|
-
|
748
|
+
ss.terminate
|
749
|
+
[RubyLexer::EOF, RubyLexer::EOF]
|
750
|
+
elsif scan(/#{IDENT_CHAR}+/) then
|
751
|
+
process_token matched
|
979
752
|
end
|
980
753
|
end
|
981
754
|
|
982
755
|
def rb_compile_error msg
|
983
|
-
msg += ". near line #{self.lineno}: #{
|
756
|
+
msg += ". near line #{self.lineno}: #{self.rest[/^.*/].inspect}"
|
984
757
|
raise RubyParser::SyntaxError, msg
|
985
758
|
end
|
986
759
|
|
987
|
-
def read_escape # TODO: remove / rewrite
|
988
|
-
case
|
989
|
-
when scan(/\\/) then # Backslash
|
990
|
-
'\\'
|
991
|
-
when scan(/n/) then # newline
|
992
|
-
self.extra_lineno -= 1
|
993
|
-
"\n"
|
994
|
-
when scan(/t/) then # horizontal tab
|
995
|
-
"\t"
|
996
|
-
when scan(/r/) then # carriage-return
|
997
|
-
"\r"
|
998
|
-
when scan(/f/) then # form-feed
|
999
|
-
"\f"
|
1000
|
-
when scan(/v/) then # vertical tab
|
1001
|
-
"\13"
|
1002
|
-
when scan(/a/) then # alarm(bell)
|
1003
|
-
"\007"
|
1004
|
-
when scan(/e/) then # escape
|
1005
|
-
"\033"
|
1006
|
-
when scan(/b/) then # backspace
|
1007
|
-
"\010"
|
1008
|
-
when scan(/s/) then # space
|
1009
|
-
" "
|
1010
|
-
when scan(/[0-7]{1,3}/) then # octal constant
|
1011
|
-
(matched.to_i(8) & 0xFF).chr.force_encoding Encoding::UTF_8
|
1012
|
-
when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
|
1013
|
-
# TODO: force encode everything to UTF-8?
|
1014
|
-
ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
|
1015
|
-
when check(/M-\\[\\MCc]/) then
|
1016
|
-
scan(/M-\\/) # eat it
|
1017
|
-
c = self.read_escape
|
1018
|
-
c[0] = (c[0].ord | 0x80).chr
|
1019
|
-
c
|
1020
|
-
when scan(/M-(.)/) then
|
1021
|
-
c = ss[1]
|
1022
|
-
c[0] = (c[0].ord | 0x80).chr
|
1023
|
-
c
|
1024
|
-
when check(/(C-|c)\\[\\MCc]/) then
|
1025
|
-
scan(/(C-|c)\\/) # eat it
|
1026
|
-
c = self.read_escape
|
1027
|
-
c[0] = (c[0].ord & 0x9f).chr
|
1028
|
-
c
|
1029
|
-
when scan(/C-\?|c\?/) then
|
1030
|
-
127.chr
|
1031
|
-
when scan(/(C-|c)(.)/) then
|
1032
|
-
c = ss[2]
|
1033
|
-
c[0] = (c[0].ord & 0x9f).chr
|
1034
|
-
c
|
1035
|
-
when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
|
1036
|
-
matched
|
1037
|
-
when scan(/u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/) then
|
1038
|
-
[ss[1].delete("{}").to_i(16)].pack("U")
|
1039
|
-
when scan(/u([0-9a-fA-F]{1,3})/) then
|
1040
|
-
rb_compile_error "Invalid escape character syntax"
|
1041
|
-
when scan(/[McCx0-9]/) || end_of_stream? then
|
1042
|
-
rb_compile_error("Invalid escape character syntax")
|
1043
|
-
else
|
1044
|
-
ss.getch
|
1045
|
-
end.dup
|
1046
|
-
end
|
1047
|
-
|
1048
|
-
def regx_options # TODO: rewrite / remove
|
1049
|
-
good, bad = [], []
|
1050
|
-
|
1051
|
-
if scan(/[a-z]+/) then
|
1052
|
-
good, bad = matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
|
1053
|
-
end
|
1054
|
-
|
1055
|
-
unless bad.empty? then
|
1056
|
-
rb_compile_error("unknown regexp option%s - %s" %
|
1057
|
-
[(bad.size > 1 ? "s" : ""), bad.join.inspect])
|
1058
|
-
end
|
1059
|
-
|
1060
|
-
return good.join
|
1061
|
-
end
|
1062
|
-
|
1063
760
|
def reset
|
761
|
+
self.lineno = 1
|
1064
762
|
self.brace_nest = 0
|
1065
763
|
self.command_start = true
|
1066
|
-
self.
|
764
|
+
self.comment = nil
|
1067
765
|
self.lex_state = EXPR_NONE
|
1068
766
|
self.lex_strterm = nil
|
1069
|
-
self.lineno = 1
|
1070
767
|
self.lpar_beg = nil
|
1071
768
|
self.paren_nest = 0
|
1072
769
|
self.space_seen = false
|
1073
770
|
self.string_nest = 0
|
1074
771
|
self.token = nil
|
1075
|
-
self.
|
772
|
+
self.string_buffer = []
|
773
|
+
self.old_ss = nil
|
774
|
+
self.old_lineno = nil
|
1076
775
|
|
1077
776
|
self.cond.reset
|
1078
777
|
self.cmdarg.reset
|
1079
778
|
end
|
1080
779
|
|
1081
|
-
def result new_state, token, text # :nodoc:
|
780
|
+
def result new_state, token, text, line = self.lineno # :nodoc:
|
1082
781
|
new_state = self.arg_state if new_state == :arg_state
|
1083
782
|
self.lex_state = new_state if new_state
|
1084
|
-
|
783
|
+
|
784
|
+
[token, [text, line]]
|
1085
785
|
end
|
1086
786
|
|
1087
|
-
def
|
1088
|
-
|
787
|
+
def ruby22_label?
|
788
|
+
ruby22plus? and is_label_possible?
|
1089
789
|
end
|
1090
790
|
|
1091
|
-
def
|
1092
|
-
|
791
|
+
def ruby22plus?
|
792
|
+
parser.class.version >= 22
|
1093
793
|
end
|
1094
794
|
|
1095
|
-
def
|
1096
|
-
|
1097
|
-
self.extra_lineno += r.count("\n") if r
|
1098
|
-
r
|
795
|
+
def ruby23plus?
|
796
|
+
parser.class.version >= 23
|
1099
797
|
end
|
1100
798
|
|
1101
|
-
def
|
1102
|
-
|
1103
|
-
self.extra_lineno = 0
|
799
|
+
def ruby24minus?
|
800
|
+
parser.class.version <= 24
|
1104
801
|
end
|
1105
802
|
|
1106
|
-
def
|
1107
|
-
|
803
|
+
def ruby27plus?
|
804
|
+
parser.class.version >= 27
|
1108
805
|
end
|
1109
806
|
|
1110
807
|
def space_vs_beginning space_type, beg_type, fallback
|
@@ -1119,137 +816,18 @@ class RubyLexer
|
|
1119
816
|
end
|
1120
817
|
end
|
1121
818
|
|
1122
|
-
def
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
# TODO: consider
|
1127
|
-
# def src= src
|
1128
|
-
# raise "bad src: #{src.inspect}" unless String === src
|
1129
|
-
# @src = RPStringScanner.new(src)
|
1130
|
-
# end
|
1131
|
-
|
1132
|
-
def tokadd_escape term # TODO: rewrite / remove
|
1133
|
-
case
|
1134
|
-
when scan(/\\\n/) then
|
1135
|
-
# just ignore
|
1136
|
-
when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
|
1137
|
-
self.string_buffer << matched
|
1138
|
-
when scan(/\\([MC]-|c)(?=\\)/) then
|
1139
|
-
self.string_buffer << matched
|
1140
|
-
self.tokadd_escape term
|
1141
|
-
when scan(/\\([MC]-|c)(.)/) then
|
1142
|
-
self.string_buffer << matched
|
1143
|
-
when scan(/\\[McCx]/) then
|
1144
|
-
rb_compile_error "Invalid escape character syntax"
|
1145
|
-
when scan(/\\(.)/m) then
|
1146
|
-
chr = ss[1]
|
1147
|
-
prev = self.string_buffer.last
|
1148
|
-
if term == chr && prev && prev.end_with?("(?") then
|
1149
|
-
self.string_buffer << chr
|
1150
|
-
elsif term == chr || chr.ascii_only? then
|
1151
|
-
self.string_buffer << matched # dunno why we keep them for ascii
|
1152
|
-
else
|
1153
|
-
self.string_buffer << chr # HACK? this is such a rat's nest
|
1154
|
-
end
|
819
|
+
def unescape_string str
|
820
|
+
str = str.gsub(ESC) { unescape($1).b.force_encoding Encoding::UTF_8 }
|
821
|
+
if str.valid_encoding?
|
822
|
+
str
|
1155
823
|
else
|
1156
|
-
|
824
|
+
str.b
|
1157
825
|
end
|
1158
826
|
end
|
1159
827
|
|
1160
|
-
def tokadd_string(func, term, paren) # TODO: rewrite / remove
|
1161
|
-
qwords = (func & STR_FUNC_QWORDS) != 0
|
1162
|
-
escape = (func & STR_FUNC_ESCAPE) != 0
|
1163
|
-
expand = (func & STR_FUNC_EXPAND) != 0
|
1164
|
-
regexp = (func & STR_FUNC_REGEXP) != 0
|
1165
|
-
symbol = (func & STR_FUNC_SYMBOL) != 0
|
1166
|
-
|
1167
|
-
paren_re = @@regexp_cache[paren]
|
1168
|
-
term_re = @@regexp_cache[term]
|
1169
|
-
|
1170
|
-
until end_of_stream? do
|
1171
|
-
c = nil
|
1172
|
-
handled = true
|
1173
|
-
|
1174
|
-
case
|
1175
|
-
when paren_re && scan(paren_re) then
|
1176
|
-
self.string_nest += 1
|
1177
|
-
when scan(term_re) then
|
1178
|
-
if self.string_nest == 0 then
|
1179
|
-
ss.pos -= 1
|
1180
|
-
break
|
1181
|
-
else
|
1182
|
-
self.string_nest -= 1
|
1183
|
-
end
|
1184
|
-
when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
|
1185
|
-
ss.pos -= 1
|
1186
|
-
break
|
1187
|
-
when qwords && scan(/\s/) then
|
1188
|
-
ss.pos -= 1
|
1189
|
-
break
|
1190
|
-
when expand && scan(/#(?!\n)/) then
|
1191
|
-
# do nothing
|
1192
|
-
when check(/\\/) then
|
1193
|
-
case
|
1194
|
-
when qwords && scan(/\\\n/) then
|
1195
|
-
string_buffer << "\n"
|
1196
|
-
next
|
1197
|
-
when qwords && scan(/\\\s/) then
|
1198
|
-
c = ' '
|
1199
|
-
when expand && scan(/\\\n/) then
|
1200
|
-
next
|
1201
|
-
when regexp && check(/\\/) then
|
1202
|
-
self.tokadd_escape term
|
1203
|
-
next
|
1204
|
-
when expand && scan(/\\/) then
|
1205
|
-
c = self.read_escape
|
1206
|
-
when scan(/\\\n/) then
|
1207
|
-
# do nothing
|
1208
|
-
when scan(/\\\\/) then
|
1209
|
-
string_buffer << '\\' if escape
|
1210
|
-
c = '\\'
|
1211
|
-
when scan(/\\/) then
|
1212
|
-
unless scan(term_re) || paren.nil? || scan(paren_re) then
|
1213
|
-
string_buffer << "\\"
|
1214
|
-
end
|
1215
|
-
else
|
1216
|
-
handled = false
|
1217
|
-
end # inner /\\/ case
|
1218
|
-
else
|
1219
|
-
handled = false
|
1220
|
-
end # top case
|
1221
|
-
|
1222
|
-
unless handled then
|
1223
|
-
t = Regexp.escape term
|
1224
|
-
x = Regexp.escape(paren) if paren && paren != "\000"
|
1225
|
-
re = if qwords then
|
1226
|
-
/[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
|
1227
|
-
else
|
1228
|
-
/[^#{t}#{x}\#\0\\]+|./
|
1229
|
-
end
|
1230
|
-
|
1231
|
-
scan re
|
1232
|
-
c = matched
|
1233
|
-
|
1234
|
-
rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
|
1235
|
-
end # unless handled
|
1236
|
-
|
1237
|
-
c ||= matched
|
1238
|
-
string_buffer << c
|
1239
|
-
end # until
|
1240
|
-
|
1241
|
-
c ||= matched
|
1242
|
-
c = RubyLexer::EOF if end_of_stream?
|
1243
|
-
|
1244
|
-
return c
|
1245
|
-
end
|
1246
|
-
|
1247
828
|
def unescape s
|
1248
829
|
r = ESCAPES[s]
|
1249
830
|
|
1250
|
-
self.extra_lineno += 1 if s == "\n" # eg backslash newline strings
|
1251
|
-
self.extra_lineno -= 1 if r && s == "n" # literal \n, not newline
|
1252
|
-
|
1253
831
|
return r if r
|
1254
832
|
|
1255
833
|
x = case s
|
@@ -1265,10 +843,12 @@ class RubyLexer
|
|
1265
843
|
s
|
1266
844
|
when /^[McCx0-9]/ then
|
1267
845
|
rb_compile_error("Invalid escape character syntax")
|
1268
|
-
when /u(
|
846
|
+
when /u(\h{4})/ then
|
1269
847
|
[$1.delete("{}").to_i(16)].pack("U")
|
1270
|
-
when /u(
|
848
|
+
when /u(\h{1,3})/ then
|
1271
849
|
rb_compile_error("Invalid escape character syntax")
|
850
|
+
when /u\{(\h+(?:\s+\h+)*)\}/ then
|
851
|
+
$1.split.map { |cp| cp.to_i(16) }.pack("U*")
|
1272
852
|
else
|
1273
853
|
s
|
1274
854
|
end
|
@@ -1279,171 +859,294 @@ class RubyLexer
|
|
1279
859
|
# do nothing for now
|
1280
860
|
end
|
1281
861
|
|
1282
|
-
def
|
1283
|
-
|
862
|
+
def was_label?
|
863
|
+
@was_label = ruby22_label?
|
864
|
+
true
|
1284
865
|
end
|
1285
866
|
|
1286
|
-
|
1287
|
-
|
1288
|
-
|
867
|
+
class State
|
868
|
+
attr_accessor :n
|
869
|
+
attr_accessor :names
|
1289
870
|
|
1290
|
-
|
1291
|
-
|
1292
|
-
|
1293
|
-
self.heredoc lex_strterm
|
1294
|
-
else
|
1295
|
-
self.parse_string lex_strterm
|
1296
|
-
end
|
871
|
+
# TODO: take a shared hash of strings for inspect/to_s
|
872
|
+
def initialize o, names
|
873
|
+
raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
|
1297
874
|
|
1298
|
-
|
875
|
+
self.n = o
|
876
|
+
self.names = names
|
877
|
+
end
|
1299
878
|
|
1300
|
-
|
1301
|
-
|
1302
|
-
if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
|
1303
|
-
!cond.is_in_state) || is_arg?) &&
|
1304
|
-
is_label_suffix? then
|
1305
|
-
scan(/:/)
|
1306
|
-
token_type = token[0] = :tLABEL_END
|
1307
|
-
end
|
879
|
+
def == o
|
880
|
+
self.equal?(o) || (o.class == self.class && o.n == self.n)
|
1308
881
|
end
|
1309
882
|
|
1310
|
-
|
1311
|
-
self.
|
1312
|
-
self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_END
|
883
|
+
def =~ v
|
884
|
+
(self.n & v.n) != 0
|
1313
885
|
end
|
1314
886
|
|
1315
|
-
|
1316
|
-
|
887
|
+
def | v
|
888
|
+
raise ArgumentError, "Incompatible State: %p vs %p" % [self, v] unless
|
889
|
+
self.names == v.names
|
890
|
+
self.class.new(self.n | v.n, self.names)
|
891
|
+
end
|
1317
892
|
|
1318
|
-
|
1319
|
-
|
893
|
+
def inspect
|
894
|
+
return "EXPR_NONE" if n.zero? # HACK?
|
1320
895
|
|
1321
|
-
|
1322
|
-
|
1323
|
-
|
1324
|
-
|
1325
|
-
c, beg, short_hand = 'Q', ss.getch, true
|
896
|
+
names.map { |v, k| k if self =~ v }.
|
897
|
+
compact.
|
898
|
+
join("|").
|
899
|
+
gsub(/(?:EXPR_|STR_(?:FUNC_)?)/, "")
|
1326
900
|
end
|
1327
901
|
|
1328
|
-
|
1329
|
-
rb_compile_error "unterminated quoted string meets end of file"
|
1330
|
-
end
|
902
|
+
alias to_s inspect
|
1331
903
|
|
1332
|
-
|
1333
|
-
|
1334
|
-
|
904
|
+
module Values
|
905
|
+
expr_names = {}
|
906
|
+
|
907
|
+
EXPR_NONE = State.new 0x0, expr_names
|
908
|
+
EXPR_BEG = State.new 0x1, expr_names
|
909
|
+
EXPR_END = State.new 0x2, expr_names
|
910
|
+
EXPR_ENDARG = State.new 0x4, expr_names
|
911
|
+
EXPR_ENDFN = State.new 0x8, expr_names
|
912
|
+
EXPR_ARG = State.new 0x10, expr_names
|
913
|
+
EXPR_CMDARG = State.new 0x20, expr_names
|
914
|
+
EXPR_MID = State.new 0x40, expr_names
|
915
|
+
EXPR_FNAME = State.new 0x80, expr_names
|
916
|
+
EXPR_DOT = State.new 0x100, expr_names
|
917
|
+
EXPR_CLASS = State.new 0x200, expr_names
|
918
|
+
EXPR_LABEL = State.new 0x400, expr_names
|
919
|
+
EXPR_LABELED = State.new 0x800, expr_names
|
920
|
+
EXPR_FITEM = State.new 0x1000, expr_names
|
1335
921
|
|
1336
|
-
|
1337
|
-
|
1338
|
-
|
1339
|
-
ch = short_hand ? nnd : c + beg
|
1340
|
-
text = "%#{ch}"
|
1341
|
-
[:tSTRING_BEG, STR_DQUOTE]
|
1342
|
-
when 'q' then
|
1343
|
-
[:tSTRING_BEG, STR_SQUOTE]
|
1344
|
-
when 'W' then
|
1345
|
-
eat_whitespace
|
1346
|
-
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
1347
|
-
when 'w' then
|
1348
|
-
eat_whitespace
|
1349
|
-
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
1350
|
-
when 'x' then
|
1351
|
-
[:tXSTRING_BEG, STR_XQUOTE]
|
1352
|
-
when 'r' then
|
1353
|
-
[:tREGEXP_BEG, STR_REGEXP]
|
1354
|
-
when 's' then
|
1355
|
-
self.lex_state = EXPR_FNAME
|
1356
|
-
[:tSYMBEG, STR_SSYM]
|
1357
|
-
when 'I' then
|
1358
|
-
eat_whitespace
|
1359
|
-
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
1360
|
-
when 'i' then
|
1361
|
-
eat_whitespace
|
1362
|
-
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
1363
|
-
end
|
922
|
+
EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
|
923
|
+
EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
|
924
|
+
EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
|
1364
925
|
|
1365
|
-
|
1366
|
-
token_type.nil?
|
926
|
+
# extra fake lex_state names to make things a bit cleaner
|
1367
927
|
|
1368
|
-
|
928
|
+
EXPR_LAB = EXPR_ARG|EXPR_LABELED
|
929
|
+
EXPR_LIT = EXPR_END|EXPR_ENDARG
|
930
|
+
EXPR_PAR = EXPR_BEG|EXPR_LABEL
|
931
|
+
EXPR_PAD = EXPR_BEG|EXPR_LABELED
|
1369
932
|
|
1370
|
-
|
933
|
+
EXPR_NUM = EXPR_LIT
|
934
|
+
|
935
|
+
expr_names.merge!(EXPR_NONE => "EXPR_NONE",
|
936
|
+
EXPR_BEG => "EXPR_BEG",
|
937
|
+
EXPR_END => "EXPR_END",
|
938
|
+
EXPR_ENDARG => "EXPR_ENDARG",
|
939
|
+
EXPR_ENDFN => "EXPR_ENDFN",
|
940
|
+
EXPR_ARG => "EXPR_ARG",
|
941
|
+
EXPR_CMDARG => "EXPR_CMDARG",
|
942
|
+
EXPR_MID => "EXPR_MID",
|
943
|
+
EXPR_FNAME => "EXPR_FNAME",
|
944
|
+
EXPR_DOT => "EXPR_DOT",
|
945
|
+
EXPR_CLASS => "EXPR_CLASS",
|
946
|
+
EXPR_LABEL => "EXPR_LABEL",
|
947
|
+
EXPR_LABELED => "EXPR_LABELED",
|
948
|
+
EXPR_FITEM => "EXPR_FITEM")
|
949
|
+
|
950
|
+
# ruby constants for strings
|
951
|
+
|
952
|
+
str_func_names = {}
|
953
|
+
|
954
|
+
STR_FUNC_BORING = State.new 0x00, str_func_names
|
955
|
+
STR_FUNC_ESCAPE = State.new 0x01, str_func_names
|
956
|
+
STR_FUNC_EXPAND = State.new 0x02, str_func_names
|
957
|
+
STR_FUNC_REGEXP = State.new 0x04, str_func_names
|
958
|
+
STR_FUNC_QWORDS = State.new 0x08, str_func_names
|
959
|
+
STR_FUNC_SYMBOL = State.new 0x10, str_func_names
|
960
|
+
STR_FUNC_INDENT = State.new 0x20, str_func_names # <<-HEREDOC
|
961
|
+
STR_FUNC_LABEL = State.new 0x40, str_func_names
|
962
|
+
STR_FUNC_LIST = State.new 0x4000, str_func_names
|
963
|
+
STR_FUNC_TERM = State.new 0x8000, str_func_names
|
964
|
+
STR_FUNC_DEDENT = State.new 0x10000, str_func_names # <<~HEREDOC
|
965
|
+
|
966
|
+
# TODO: check parser25.y on how they do STR_FUNC_INDENT
|
967
|
+
|
968
|
+
STR_SQUOTE = STR_FUNC_BORING
|
969
|
+
STR_DQUOTE = STR_FUNC_EXPAND
|
970
|
+
STR_XQUOTE = STR_FUNC_EXPAND
|
971
|
+
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
|
972
|
+
STR_SWORD = STR_FUNC_QWORDS | STR_FUNC_LIST
|
973
|
+
STR_DWORD = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
|
974
|
+
STR_SSYM = STR_FUNC_SYMBOL
|
975
|
+
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
976
|
+
STR_LABEL = STR_FUNC_LABEL
|
977
|
+
|
978
|
+
str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
|
979
|
+
STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
|
980
|
+
STR_FUNC_REGEXP => "STR_FUNC_REGEXP",
|
981
|
+
STR_FUNC_QWORDS => "STR_FUNC_QWORDS",
|
982
|
+
STR_FUNC_SYMBOL => "STR_FUNC_SYMBOL",
|
983
|
+
STR_FUNC_INDENT => "STR_FUNC_INDENT",
|
984
|
+
STR_FUNC_LABEL => "STR_FUNC_LABEL",
|
985
|
+
STR_FUNC_LIST => "STR_FUNC_LIST",
|
986
|
+
STR_FUNC_TERM => "STR_FUNC_TERM",
|
987
|
+
STR_FUNC_DEDENT => "STR_FUNC_DEDENT",
|
988
|
+
STR_SQUOTE => "STR_SQUOTE")
|
989
|
+
end
|
1371
990
|
|
1372
|
-
|
991
|
+
include Values
|
1373
992
|
end
|
1374
993
|
|
1375
|
-
|
1376
|
-
|
994
|
+
include State::Values
|
995
|
+
end
|
996
|
+
|
997
|
+
class RubyLexer
|
998
|
+
module SSWrapper
|
999
|
+
def string= s
|
1000
|
+
ss.string= s
|
1001
|
+
end
|
1002
|
+
|
1003
|
+
def beginning_of_line?
|
1004
|
+
ss.bol?
|
1005
|
+
end
|
1006
|
+
|
1007
|
+
alias bol? beginning_of_line? # to make .rex file more readable
|
1377
1008
|
|
1378
|
-
|
1379
|
-
|
1380
|
-
paren = open
|
1381
|
-
term_re = @@regexp_cache[term]
|
1009
|
+
def check re
|
1010
|
+
maybe_pop_stack
|
1382
1011
|
|
1383
|
-
|
1384
|
-
|
1385
|
-
expand = (func & STR_FUNC_EXPAND) != 0
|
1012
|
+
ss.check re
|
1013
|
+
end
|
1386
1014
|
|
1387
|
-
|
1388
|
-
|
1015
|
+
def end_of_stream?
|
1016
|
+
ss.eos?
|
1389
1017
|
end
|
1390
1018
|
|
1391
|
-
|
1019
|
+
alias eos? end_of_stream?
|
1392
1020
|
|
1393
|
-
|
1394
|
-
|
1395
|
-
|
1396
|
-
|
1397
|
-
elsif regexp then
|
1398
|
-
return :tREGEXP_END, self.regx_options
|
1399
|
-
else
|
1400
|
-
return :tSTRING_END, term
|
1401
|
-
end
|
1021
|
+
def getch
|
1022
|
+
c = ss.getch
|
1023
|
+
c = ss.getch if c == "\r" && ss.peek(1) == "\n"
|
1024
|
+
c
|
1402
1025
|
end
|
1403
1026
|
|
1404
|
-
|
1027
|
+
def match
|
1028
|
+
ss
|
1029
|
+
end
|
1405
1030
|
|
1406
|
-
|
1031
|
+
def matched
|
1032
|
+
ss.matched
|
1033
|
+
end
|
1407
1034
|
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1412
|
-
|
1413
|
-
|
1414
|
-
|
1415
|
-
|
1416
|
-
return :tSTRING_DVAR, nil
|
1417
|
-
when scan(/#[{]/) then
|
1418
|
-
self.command_start = true
|
1419
|
-
return :tSTRING_DBEG, nil
|
1420
|
-
when scan(/#/) then
|
1421
|
-
string_buffer << '#'
|
1035
|
+
def in_heredoc?
|
1036
|
+
!!self.old_ss
|
1037
|
+
end
|
1038
|
+
|
1039
|
+
def maybe_pop_stack
|
1040
|
+
if ss.eos? && in_heredoc? then
|
1041
|
+
self.ss_pop
|
1042
|
+
self.lineno_pop
|
1422
1043
|
end
|
1423
1044
|
end
|
1424
1045
|
|
1425
|
-
|
1426
|
-
|
1046
|
+
def pos
|
1047
|
+
ss.pos
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
def pos= n
|
1051
|
+
ss.pos = n
|
1052
|
+
end
|
1053
|
+
|
1054
|
+
def rest
|
1055
|
+
ss.rest
|
1056
|
+
end
|
1057
|
+
|
1058
|
+
def scan re
|
1059
|
+
maybe_pop_stack
|
1060
|
+
|
1061
|
+
ss.scan re
|
1062
|
+
end
|
1063
|
+
|
1064
|
+
def scanner_class # TODO: design this out of oedipus_lex. or something.
|
1065
|
+
RPStringScanner
|
1066
|
+
end
|
1067
|
+
|
1068
|
+
def ss_string
|
1069
|
+
ss.string
|
1070
|
+
end
|
1071
|
+
|
1072
|
+
def ss_string= s
|
1073
|
+
raise "Probably not"
|
1074
|
+
ss.string = s
|
1075
|
+
end
|
1076
|
+
|
1077
|
+
def unscan
|
1078
|
+
ss.unscan
|
1079
|
+
end
|
1080
|
+
end
|
1081
|
+
|
1082
|
+
include SSWrapper
|
1083
|
+
end
|
1084
|
+
|
1085
|
+
class RubyLexer
|
1086
|
+
module SSStackish
|
1087
|
+
def lineno_push new_lineno
|
1088
|
+
self.old_lineno = self.lineno
|
1089
|
+
self.lineno = new_lineno
|
1090
|
+
end
|
1091
|
+
|
1092
|
+
def lineno_pop
|
1093
|
+
self.lineno = self.old_lineno
|
1094
|
+
self.old_lineno = nil
|
1095
|
+
end
|
1096
|
+
|
1097
|
+
def ss= o
|
1098
|
+
raise "Clearing ss while in heredoc!?!" if in_heredoc?
|
1099
|
+
@old_ss = nil
|
1100
|
+
super
|
1427
1101
|
end
|
1428
1102
|
|
1429
|
-
|
1103
|
+
def ss_push new_ss
|
1104
|
+
@old_ss = self.ss
|
1105
|
+
@ss = new_ss
|
1106
|
+
end
|
1107
|
+
|
1108
|
+
def ss_pop
|
1109
|
+
@ss = self.old_ss
|
1110
|
+
@old_ss = nil
|
1111
|
+
end
|
1430
1112
|
end
|
1113
|
+
|
1114
|
+
prepend SSStackish
|
1431
1115
|
end
|
1432
1116
|
|
1433
|
-
|
1117
|
+
if ENV["RP_STRTERM_DEBUG"] then
|
1118
|
+
class RubyLexer
|
1119
|
+
def d o
|
1120
|
+
$stderr.puts o.inspect
|
1121
|
+
end
|
1122
|
+
|
1123
|
+
alias old_lex_strterm= lex_strterm=
|
1124
|
+
|
1125
|
+
def lex_strterm= o
|
1126
|
+
self.old_lex_strterm= o
|
1127
|
+
where = caller.first.split(/:/).first(2).join(":")
|
1128
|
+
$stderr.puts
|
1129
|
+
d :lex_strterm => [o, where]
|
1130
|
+
end
|
1131
|
+
end
|
1132
|
+
end
|
1133
|
+
|
1134
|
+
require_relative "./ruby_lexer.rex.rb"
|
1135
|
+
require_relative "./ruby_lexer_strings.rb"
|
1434
1136
|
|
1435
1137
|
if ENV["RP_LINENO_DEBUG"] then
|
1436
1138
|
class RubyLexer
|
1437
|
-
alias :old_lineno= :lineno=
|
1438
|
-
|
1439
1139
|
def d o
|
1440
1140
|
$stderr.puts o.inspect
|
1441
1141
|
end
|
1442
1142
|
|
1143
|
+
alias old_lineno= lineno=
|
1144
|
+
|
1443
1145
|
def lineno= n
|
1444
1146
|
self.old_lineno= n
|
1445
1147
|
where = caller.first.split(/:/).first(2).join(":")
|
1446
|
-
|
1148
|
+
$stderr.puts
|
1149
|
+
d :lineno => [n, where]
|
1447
1150
|
end
|
1448
1151
|
end
|
1449
1152
|
end
|