ruby_parser 3.12.0 → 3.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.autotest +18 -29
- data/History.rdoc +283 -0
- data/Manifest.txt +12 -4
- data/README.rdoc +4 -3
- data/Rakefile +189 -51
- data/bin/ruby_parse +3 -1
- data/bin/ruby_parse_extract_error +19 -36
- data/compare/normalize.rb +76 -4
- data/debugging.md +190 -0
- data/gauntlet.md +106 -0
- data/lib/rp_extensions.rb +14 -42
- data/lib/rp_stringscanner.rb +20 -51
- data/lib/ruby20_parser.rb +4659 -4218
- data/lib/ruby20_parser.y +953 -602
- data/lib/ruby21_parser.rb +4723 -4308
- data/lib/ruby21_parser.y +956 -605
- data/lib/ruby22_parser.rb +4762 -4337
- data/lib/ruby22_parser.y +960 -612
- data/lib/ruby23_parser.rb +4761 -4342
- data/lib/ruby23_parser.y +961 -613
- data/lib/ruby24_parser.rb +4791 -4341
- data/lib/ruby24_parser.y +968 -612
- data/lib/ruby25_parser.rb +4791 -4341
- data/lib/ruby25_parser.y +968 -612
- data/lib/ruby26_parser.rb +7287 -0
- data/lib/ruby26_parser.y +2749 -0
- data/lib/ruby27_parser.rb +8517 -0
- data/lib/ruby27_parser.y +3346 -0
- data/lib/ruby30_parser.rb +8751 -0
- data/lib/ruby30_parser.y +3472 -0
- data/lib/ruby3_parser.yy +3476 -0
- data/lib/ruby_lexer.rb +611 -826
- data/lib/ruby_lexer.rex +48 -40
- data/lib/ruby_lexer.rex.rb +122 -46
- data/lib/ruby_lexer_strings.rb +638 -0
- data/lib/ruby_parser.rb +38 -34
- data/lib/ruby_parser.yy +1710 -704
- data/lib/ruby_parser_extras.rb +987 -553
- data/test/test_ruby_lexer.rb +1718 -1539
- data/test/test_ruby_parser.rb +3957 -2164
- data/test/test_ruby_parser_extras.rb +39 -4
- data/tools/munge.rb +250 -0
- data/tools/ripper.rb +44 -0
- data.tar.gz.sig +0 -0
- metadata +68 -47
- metadata.gz.sig +0 -0
- data/lib/ruby18_parser.rb +0 -5793
- data/lib/ruby18_parser.y +0 -1908
- data/lib/ruby19_parser.rb +0 -6185
- data/lib/ruby19_parser.y +0 -2116
data/lib/ruby_lexer.rb
CHANGED
@@ -4,36 +4,9 @@
|
|
4
4
|
$DEBUG = true if ENV["DEBUG"]
|
5
5
|
|
6
6
|
class RubyLexer
|
7
|
-
|
8
7
|
# :stopdoc:
|
9
|
-
HAS_ENC = "".respond_to? :encoding
|
10
|
-
|
11
|
-
IDENT_CHAR = if HAS_ENC then
|
12
|
-
/[\w\u0080-\u{10ffff}]/u
|
13
|
-
else
|
14
|
-
/[\w\x80-\xFF]/n
|
15
|
-
end
|
16
|
-
|
17
8
|
EOF = :eof_haha!
|
18
9
|
|
19
|
-
# ruby constants for strings (should this be moved somewhere else?)
|
20
|
-
|
21
|
-
STR_FUNC_BORING = 0x00
|
22
|
-
STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
|
23
|
-
STR_FUNC_EXPAND = 0x02
|
24
|
-
STR_FUNC_REGEXP = 0x04
|
25
|
-
STR_FUNC_QWORDS = 0x08
|
26
|
-
STR_FUNC_SYMBOL = 0x10
|
27
|
-
STR_FUNC_INDENT = 0x20 # <<-HEREDOC
|
28
|
-
STR_FUNC_ICNTNT = 0x40 # <<~HEREDOC
|
29
|
-
|
30
|
-
STR_SQUOTE = STR_FUNC_BORING
|
31
|
-
STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
|
32
|
-
STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
|
33
|
-
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
|
34
|
-
STR_SSYM = STR_FUNC_SYMBOL
|
35
|
-
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
36
|
-
|
37
10
|
ESCAPES = {
|
38
11
|
"a" => "\007",
|
39
12
|
"b" => "\010",
|
@@ -50,10 +23,17 @@ class RubyLexer
|
|
50
23
|
"c\?" => 127.chr,
|
51
24
|
}
|
52
25
|
|
26
|
+
HAS_ENC = "".respond_to? :encoding
|
27
|
+
|
28
|
+
BTOKENS = {
|
29
|
+
".." => :tBDOT2,
|
30
|
+
"..." => :tBDOT3,
|
31
|
+
}
|
32
|
+
|
53
33
|
TOKENS = {
|
54
34
|
"!" => :tBANG,
|
55
35
|
"!=" => :tNEQ,
|
56
|
-
|
36
|
+
"!@" => :tBANG,
|
57
37
|
"!~" => :tNMATCH,
|
58
38
|
"," => :tCOMMA,
|
59
39
|
".." => :tDOT2,
|
@@ -66,27 +46,62 @@ class RubyLexer
|
|
66
46
|
"->" => :tLAMBDA,
|
67
47
|
}
|
68
48
|
|
69
|
-
|
49
|
+
PERCENT_END = {
|
50
|
+
"(" => ")",
|
51
|
+
"[" => "]",
|
52
|
+
"{" => "}",
|
53
|
+
"<" => ">",
|
54
|
+
}
|
70
55
|
|
71
|
-
|
56
|
+
SIMPLE_RE_META = /[\$\*\+\.\?\^\|\)\]\}\>]/
|
57
|
+
|
58
|
+
@@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
|
72
59
|
@@regexp_cache[nil] = nil
|
73
60
|
|
61
|
+
def regexp_cache
|
62
|
+
@@regexp_cache
|
63
|
+
end
|
64
|
+
|
65
|
+
if $DEBUG then
|
66
|
+
attr_reader :lex_state
|
67
|
+
|
68
|
+
def lex_state= o
|
69
|
+
return if @lex_state == o
|
70
|
+
|
71
|
+
from = ""
|
72
|
+
if ENV["VERBOSE"]
|
73
|
+
path = caller[0]
|
74
|
+
path = caller[1] if path =~ /result/
|
75
|
+
path, line, *_ = path.split(/:/)
|
76
|
+
path.delete_prefix! File.dirname File.dirname __FILE__
|
77
|
+
from = " at .%s:%s" % [path, line]
|
78
|
+
end
|
79
|
+
|
80
|
+
warn "lex_state: %p -> %p%s" % [lex_state, o, from]
|
81
|
+
|
82
|
+
@lex_state = o
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
74
86
|
# :startdoc:
|
75
87
|
|
76
|
-
attr_accessor :
|
88
|
+
attr_accessor :lex_state unless $DEBUG
|
89
|
+
|
77
90
|
attr_accessor :brace_nest
|
78
91
|
attr_accessor :cmdarg
|
79
92
|
attr_accessor :command_start
|
80
|
-
attr_accessor :
|
93
|
+
attr_accessor :cmd_state # temporary--ivar to avoid passing everywhere
|
81
94
|
attr_accessor :last_state
|
82
95
|
attr_accessor :cond
|
83
|
-
attr_accessor :
|
96
|
+
attr_accessor :old_ss
|
97
|
+
attr_accessor :old_lineno
|
98
|
+
|
99
|
+
# these are generated via ruby_lexer.rex: ss, lineno
|
84
100
|
|
85
101
|
##
|
86
102
|
# Additional context surrounding tokens that both the lexer and
|
87
103
|
# grammar use.
|
88
104
|
|
89
|
-
attr_accessor :lex_state
|
90
105
|
attr_accessor :lex_strterm
|
91
106
|
attr_accessor :lpar_beg
|
92
107
|
attr_accessor :paren_nest
|
@@ -95,50 +110,33 @@ class RubyLexer
|
|
95
110
|
attr_accessor :string_buffer
|
96
111
|
attr_accessor :string_nest
|
97
112
|
|
98
|
-
if $DEBUG then
|
99
|
-
alias lex_state= lex_state=
|
100
|
-
def lex_state=o
|
101
|
-
return if @lex_state == o
|
102
|
-
c = caller.first
|
103
|
-
c = caller[1] if c =~ /\bresult\b/
|
104
|
-
warn "lex_state: %p -> %p from %s" % [@lex_state, o, c.clean_caller]
|
105
|
-
@lex_state = o
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
113
|
# Last token read via next_token.
|
110
114
|
attr_accessor :token
|
111
115
|
|
112
|
-
##
|
113
|
-
# What version of ruby to parse. 18 and 19 are the only valid values
|
114
|
-
# currently supported.
|
115
|
-
|
116
|
-
attr_accessor :version
|
117
|
-
|
118
116
|
attr_writer :comments
|
119
117
|
|
120
|
-
def initialize
|
121
|
-
|
122
|
-
|
118
|
+
def initialize _ = nil
|
119
|
+
@lex_state = nil # remove one warning under $DEBUG
|
120
|
+
self.lex_state = EXPR_NONE
|
123
121
|
|
124
|
-
self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
|
125
122
|
self.cond = RubyParserStuff::StackState.new(:cond, $DEBUG)
|
123
|
+
self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
|
124
|
+
self.ss = RPStringScanner.new ""
|
126
125
|
|
127
126
|
reset
|
128
127
|
end
|
129
128
|
|
130
129
|
def arg_ambiguous
|
131
|
-
self.warning
|
130
|
+
self.warning "Ambiguous first argument. make sure."
|
132
131
|
end
|
133
132
|
|
134
133
|
def arg_state
|
135
|
-
|
134
|
+
is_after_operator? ? EXPR_ARG : EXPR_BEG
|
136
135
|
end
|
137
136
|
|
138
|
-
def
|
139
|
-
|
137
|
+
def ignore_body_comments
|
138
|
+
@comments.clear
|
140
139
|
end
|
141
|
-
alias :bol? :beginning_of_line? # to make .rex file more readable
|
142
140
|
|
143
141
|
def comments # TODO: remove this... maybe comment_string + attr_accessor
|
144
142
|
c = @comments.join
|
@@ -146,184 +144,26 @@ class RubyLexer
|
|
146
144
|
c
|
147
145
|
end
|
148
146
|
|
149
|
-
def
|
150
|
-
|
147
|
+
def debug n
|
148
|
+
raise "debug #{n}"
|
151
149
|
end
|
152
150
|
|
153
151
|
def expr_dot?
|
154
|
-
lex_state
|
152
|
+
lex_state =~ EXPR_DOT
|
155
153
|
end
|
156
154
|
|
157
|
-
def expr_fname?
|
158
|
-
lex_state
|
155
|
+
def expr_fname? # REFACTOR
|
156
|
+
lex_state =~ EXPR_FNAME
|
159
157
|
end
|
160
158
|
|
161
159
|
def expr_result token, text
|
162
160
|
cond.push false
|
163
161
|
cmdarg.push false
|
164
|
-
result
|
165
|
-
end
|
166
|
-
|
167
|
-
def heredoc here # TODO: rewrite / remove
|
168
|
-
_, eos, func, last_line = here
|
169
|
-
|
170
|
-
indent = (func & STR_FUNC_INDENT) != 0 ? "[ \t]*" : nil
|
171
|
-
content_indent = (func & STR_FUNC_ICNTNT) != 0
|
172
|
-
expand = (func & STR_FUNC_EXPAND) != 0
|
173
|
-
eos_re = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
|
174
|
-
err_msg = "can't match #{eos_re.inspect} anywhere in "
|
175
|
-
|
176
|
-
rb_compile_error err_msg if end_of_stream?
|
177
|
-
|
178
|
-
if beginning_of_line? && scan(eos_re) then
|
179
|
-
self.lineno += 1
|
180
|
-
ss.unread_many last_line # TODO: figure out how to remove this
|
181
|
-
return :tSTRING_END, eos
|
182
|
-
end
|
183
|
-
|
184
|
-
self.string_buffer = []
|
185
|
-
|
186
|
-
if expand then
|
187
|
-
case
|
188
|
-
when scan(/#[$@]/) then
|
189
|
-
ss.pos -= 1 # FIX omg stupid
|
190
|
-
return :tSTRING_DVAR, matched
|
191
|
-
when scan(/#[{]/) then
|
192
|
-
return :tSTRING_DBEG, matched
|
193
|
-
when scan(/#/) then
|
194
|
-
string_buffer << '#'
|
195
|
-
end
|
196
|
-
|
197
|
-
begin
|
198
|
-
c = tokadd_string func, "\n", nil
|
199
|
-
|
200
|
-
rb_compile_error err_msg if
|
201
|
-
c == RubyLexer::EOF
|
202
|
-
|
203
|
-
if c != "\n" then
|
204
|
-
return :tSTRING_CONTENT, string_buffer.join.delete("\r")
|
205
|
-
else
|
206
|
-
string_buffer << scan(/\n/)
|
207
|
-
end
|
208
|
-
|
209
|
-
rb_compile_error err_msg if end_of_stream?
|
210
|
-
end until check(eos_re)
|
211
|
-
else
|
212
|
-
until check(eos_re) do
|
213
|
-
string_buffer << scan(/.*(\n|\z)/)
|
214
|
-
rb_compile_error err_msg if end_of_stream?
|
215
|
-
end
|
216
|
-
end
|
217
|
-
|
218
|
-
self.lex_strterm = [:heredoc, eos, func, last_line]
|
219
|
-
|
220
|
-
string_content = string_buffer.join.delete("\r")
|
221
|
-
|
222
|
-
string_content = heredoc_dedent(string_content) if content_indent && ruby23plus?
|
223
|
-
|
224
|
-
return :tSTRING_CONTENT, string_content
|
162
|
+
result EXPR_BEG, token, text
|
225
163
|
end
|
226
164
|
|
227
|
-
def
|
228
|
-
|
229
|
-
heredoc_whitespace_indent_size whitespace
|
230
|
-
end.min || 0
|
231
|
-
|
232
|
-
string_content.split("\n", -1).map do |line|
|
233
|
-
dedent_string line, width
|
234
|
-
end.join "\n"
|
235
|
-
end
|
236
|
-
|
237
|
-
def dedent_string(string, width)
|
238
|
-
characters_skipped = 0
|
239
|
-
indentation_skipped = 0
|
240
|
-
|
241
|
-
string.chars.each do |char|
|
242
|
-
break if indentation_skipped >= width
|
243
|
-
if char == ' '
|
244
|
-
characters_skipped += 1
|
245
|
-
indentation_skipped += 1
|
246
|
-
elsif char == "\t"
|
247
|
-
proposed = TAB_WIDTH * (indentation_skipped / TAB_WIDTH + 1)
|
248
|
-
break if (proposed > width)
|
249
|
-
characters_skipped += 1
|
250
|
-
indentation_skipped = proposed
|
251
|
-
end
|
252
|
-
end
|
253
|
-
string[characters_skipped..-1]
|
254
|
-
end
|
255
|
-
|
256
|
-
def heredoc_whitespace_indent_size(whitespace)
|
257
|
-
whitespace.chars.inject 0 do |size, char|
|
258
|
-
if char == "\t"
|
259
|
-
size + TAB_WIDTH
|
260
|
-
else
|
261
|
-
size + 1
|
262
|
-
end
|
263
|
-
end
|
264
|
-
end
|
265
|
-
|
266
|
-
def heredoc_identifier # TODO: remove / rewrite
|
267
|
-
term, func = nil, STR_FUNC_BORING
|
268
|
-
self.string_buffer = []
|
269
|
-
|
270
|
-
heredoc_indent_mods = '-'
|
271
|
-
heredoc_indent_mods += '\~' if ruby23plus?
|
272
|
-
|
273
|
-
case
|
274
|
-
when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
|
275
|
-
term = ss[2]
|
276
|
-
func |= STR_FUNC_INDENT unless ss[1].empty?
|
277
|
-
func |= STR_FUNC_ICNTNT if ss[1] == '~'
|
278
|
-
func |= case term
|
279
|
-
when "\'" then
|
280
|
-
STR_SQUOTE
|
281
|
-
when '"' then
|
282
|
-
STR_DQUOTE
|
283
|
-
else
|
284
|
-
STR_XQUOTE
|
285
|
-
end
|
286
|
-
string_buffer << ss[3]
|
287
|
-
when scan(/[#{heredoc_indent_mods}]?([\'\"\`])(?!\1*\Z)/) then
|
288
|
-
rb_compile_error "unterminated here document identifier"
|
289
|
-
when scan(/([#{heredoc_indent_mods}]?)(#{IDENT_CHAR}+)/) then
|
290
|
-
term = '"'
|
291
|
-
func |= STR_DQUOTE
|
292
|
-
unless ss[1].empty? then
|
293
|
-
func |= STR_FUNC_INDENT
|
294
|
-
func |= STR_FUNC_ICNTNT if ss[1] == '~'
|
295
|
-
end
|
296
|
-
string_buffer << ss[2]
|
297
|
-
else
|
298
|
-
return nil
|
299
|
-
end
|
300
|
-
|
301
|
-
if scan(/.*\n/) then
|
302
|
-
# TODO: think about storing off the char range instead
|
303
|
-
line = matched
|
304
|
-
else
|
305
|
-
line = nil
|
306
|
-
end
|
307
|
-
|
308
|
-
self.lex_strterm = [:heredoc, string_buffer.join, func, line]
|
309
|
-
|
310
|
-
if term == '`' then
|
311
|
-
result nil, :tXSTRING_BEG, "`"
|
312
|
-
else
|
313
|
-
result nil, :tSTRING_BEG, "\""
|
314
|
-
end
|
315
|
-
end
|
316
|
-
|
317
|
-
def in_fname?
|
318
|
-
in_lex_state? :expr_fname
|
319
|
-
end
|
320
|
-
|
321
|
-
def in_arg_state? # TODO: rename is_after_operator?
|
322
|
-
in_lex_state? :expr_fname, :expr_dot
|
323
|
-
end
|
324
|
-
|
325
|
-
def in_lex_state?(*states)
|
326
|
-
states.include? lex_state
|
165
|
+
def in_fname? # REFACTOR
|
166
|
+
lex_state =~ EXPR_FNAME
|
327
167
|
end
|
328
168
|
|
329
169
|
def int_with_base base
|
@@ -331,35 +171,35 @@ class RubyLexer
|
|
331
171
|
|
332
172
|
text = matched
|
333
173
|
case
|
334
|
-
when text.end_with?(
|
335
|
-
|
336
|
-
when text.end_with?(
|
337
|
-
|
338
|
-
when text.end_with?(
|
339
|
-
|
174
|
+
when text.end_with?("ri")
|
175
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base)))
|
176
|
+
when text.end_with?("r")
|
177
|
+
result EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base))
|
178
|
+
when text.end_with?("i")
|
179
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base))
|
340
180
|
else
|
341
|
-
|
181
|
+
result EXPR_NUM, :tINTEGER, text.to_i(base)
|
342
182
|
end
|
343
183
|
end
|
344
184
|
|
185
|
+
def is_after_operator?
|
186
|
+
lex_state =~ EXPR_FNAME|EXPR_DOT
|
187
|
+
end
|
188
|
+
|
345
189
|
def is_arg?
|
346
|
-
|
190
|
+
lex_state =~ EXPR_ARG_ANY
|
347
191
|
end
|
348
192
|
|
349
193
|
def is_beg?
|
350
|
-
|
194
|
+
lex_state =~ EXPR_BEG_ANY || lex_state == EXPR_LAB # yes, == EXPR_LAB
|
351
195
|
end
|
352
196
|
|
353
197
|
def is_end?
|
354
|
-
|
355
|
-
end
|
356
|
-
|
357
|
-
def ruby22_label?
|
358
|
-
ruby22plus? and is_label_possible?
|
198
|
+
lex_state =~ EXPR_END_ANY
|
359
199
|
end
|
360
200
|
|
361
201
|
def is_label_possible?
|
362
|
-
(
|
202
|
+
(lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
|
363
203
|
end
|
364
204
|
|
365
205
|
def is_label_suffix?
|
@@ -370,31 +210,51 @@ class RubyLexer
|
|
370
210
|
is_arg? and space_seen and c !~ /\s/
|
371
211
|
end
|
372
212
|
|
373
|
-
def
|
374
|
-
|
213
|
+
def lambda_beginning?
|
214
|
+
lpar_beg && lpar_beg == paren_nest
|
215
|
+
end
|
216
|
+
|
217
|
+
def is_local_id id
|
218
|
+
# maybe just make this false for now
|
219
|
+
self.parser.env[id.to_sym] == :lvar # HACK: this isn't remotely right
|
220
|
+
end
|
221
|
+
|
222
|
+
def lvar_defined? id
|
223
|
+
# TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
|
224
|
+
self.parser.env[id.to_sym] == :lvar
|
375
225
|
end
|
376
226
|
|
377
227
|
def not_end?
|
378
228
|
not is_end?
|
379
229
|
end
|
380
230
|
|
231
|
+
def possibly_escape_string text, check
|
232
|
+
content = match[1]
|
233
|
+
|
234
|
+
if text =~ check then
|
235
|
+
content.gsub(ESC) { unescape $1 }
|
236
|
+
else
|
237
|
+
content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
381
241
|
def process_amper text
|
382
242
|
token = if is_arg? && space_seen && !check(/\s/) then
|
383
243
|
warning("`&' interpreted as argument prefix")
|
384
244
|
:tAMPER
|
385
|
-
elsif
|
245
|
+
elsif lex_state =~ EXPR_BEG|EXPR_MID then
|
386
246
|
:tAMPER
|
387
247
|
else
|
388
248
|
:tAMPER2
|
389
249
|
end
|
390
250
|
|
391
|
-
|
251
|
+
result :arg_state, token, "&"
|
392
252
|
end
|
393
253
|
|
394
254
|
def process_backref text
|
395
|
-
token =
|
255
|
+
token = match[1].to_sym
|
396
256
|
# TODO: can't do lineno hack w/ symbol
|
397
|
-
result
|
257
|
+
result EXPR_END, :tBACK_REF, token
|
398
258
|
end
|
399
259
|
|
400
260
|
def process_begin text
|
@@ -406,220 +266,256 @@ class RubyLexer
|
|
406
266
|
end
|
407
267
|
|
408
268
|
@comments << matched
|
409
|
-
self.lineno += matched.count("\n")
|
269
|
+
self.lineno += matched.count("\n") # HACK?
|
410
270
|
|
411
271
|
nil # TODO
|
412
272
|
end
|
413
273
|
|
414
|
-
def
|
415
|
-
cond.lexpop
|
416
|
-
cmdarg.lexpop
|
417
|
-
|
274
|
+
def process_brace_close text
|
418
275
|
case matched
|
419
276
|
when "}" then
|
420
277
|
self.brace_nest -= 1
|
421
|
-
|
278
|
+
return :tSTRING_DEND, matched if brace_nest < 0
|
279
|
+
end
|
422
280
|
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
# }
|
281
|
+
# matching compare/parse26.y:8099
|
282
|
+
cond.pop
|
283
|
+
cmdarg.pop
|
427
284
|
|
285
|
+
case matched
|
286
|
+
when "}" then
|
287
|
+
self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
|
428
288
|
return :tRCURLY, matched
|
429
289
|
when "]" then
|
430
290
|
self.paren_nest -= 1
|
431
|
-
self.lex_state = :
|
291
|
+
self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
|
432
292
|
return :tRBRACK, matched
|
433
293
|
when ")" then
|
434
294
|
self.paren_nest -= 1
|
435
|
-
self.lex_state =
|
295
|
+
self.lex_state = EXPR_ENDFN
|
436
296
|
return :tRPAREN, matched
|
437
297
|
else
|
438
298
|
raise "Unknown bracing: #{matched.inspect}"
|
439
299
|
end
|
440
300
|
end
|
441
301
|
|
302
|
+
def process_brace_open text
|
303
|
+
# matching compare/parse23.y:8694
|
304
|
+
self.brace_nest += 1
|
305
|
+
|
306
|
+
if lambda_beginning? then
|
307
|
+
self.lpar_beg = nil
|
308
|
+
self.paren_nest -= 1 # close arg list when lambda opens body
|
309
|
+
|
310
|
+
return expr_result(:tLAMBEG, "{")
|
311
|
+
end
|
312
|
+
|
313
|
+
token = case
|
314
|
+
when lex_state =~ EXPR_LABELED then
|
315
|
+
:tLBRACE # hash
|
316
|
+
when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
|
317
|
+
:tLCURLY # block (primary) "{" in parse.y
|
318
|
+
when lex_state =~ EXPR_ENDARG then
|
319
|
+
:tLBRACE_ARG # block (expr)
|
320
|
+
else
|
321
|
+
:tLBRACE # hash
|
322
|
+
end
|
323
|
+
|
324
|
+
state = token == :tLBRACE_ARG ? EXPR_BEG : EXPR_PAR
|
325
|
+
self.command_start = true if token != :tLBRACE
|
326
|
+
|
327
|
+
cond.push false
|
328
|
+
cmdarg.push false
|
329
|
+
result state, token, text
|
330
|
+
end
|
331
|
+
|
442
332
|
def process_colon1 text
|
443
333
|
# ?: / then / when
|
444
334
|
if is_end? || check(/\s/) then
|
445
|
-
return result
|
335
|
+
return result EXPR_BEG, :tCOLON, text
|
446
336
|
end
|
447
337
|
|
448
338
|
case
|
449
339
|
when scan(/\'/) then
|
450
|
-
string STR_SSYM
|
340
|
+
string STR_SSYM, matched
|
451
341
|
when scan(/\"/) then
|
452
|
-
string STR_DSYM
|
342
|
+
string STR_DSYM, matched
|
453
343
|
end
|
454
344
|
|
455
|
-
result
|
345
|
+
result EXPR_FNAME, :tSYMBEG, text
|
456
346
|
end
|
457
347
|
|
458
348
|
def process_colon2 text
|
459
|
-
if is_beg? ||
|
460
|
-
result
|
349
|
+
if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
|
350
|
+
result EXPR_BEG, :tCOLON3, text
|
461
351
|
else
|
462
|
-
result
|
352
|
+
result EXPR_DOT, :tCOLON2, text
|
463
353
|
end
|
464
354
|
end
|
465
355
|
|
466
|
-
def
|
467
|
-
|
468
|
-
if lpar_beg && lpar_beg == paren_nest then
|
469
|
-
self.lpar_beg = nil
|
470
|
-
self.paren_nest -= 1
|
471
|
-
|
472
|
-
return expr_result(:tLAMBEG, "{")
|
473
|
-
end
|
356
|
+
def process_dots text
|
357
|
+
tokens = ruby27plus? && is_beg? ? BTOKENS : TOKENS
|
474
358
|
|
475
|
-
|
476
|
-
:tLCURLY # block (primary)
|
477
|
-
elsif in_lex_state?(:expr_endarg) then
|
478
|
-
:tLBRACE_ARG # block (expr)
|
479
|
-
else
|
480
|
-
:tLBRACE # hash
|
481
|
-
end
|
482
|
-
|
483
|
-
self.command_start = true unless token == :tLBRACE
|
484
|
-
|
485
|
-
return expr_result(token, "{")
|
359
|
+
result EXPR_BEG, tokens[text], text
|
486
360
|
end
|
487
361
|
|
488
362
|
def process_float text
|
489
363
|
rb_compile_error "Invalid numeric format" if text =~ /__/
|
490
364
|
|
491
365
|
case
|
492
|
-
when text.end_with?(
|
493
|
-
|
494
|
-
when text.end_with?(
|
495
|
-
|
496
|
-
when text.end_with?(
|
497
|
-
|
366
|
+
when text.end_with?("ri")
|
367
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
|
368
|
+
when text.end_with?("i")
|
369
|
+
result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
|
370
|
+
when text.end_with?("r")
|
371
|
+
result EXPR_NUM, :tRATIONAL, Rational(text.chop)
|
498
372
|
else
|
499
|
-
|
373
|
+
result EXPR_NUM, :tFLOAT, text.to_f
|
500
374
|
end
|
501
375
|
end
|
502
376
|
|
503
377
|
def process_gvar text
|
504
|
-
|
505
|
-
|
378
|
+
if parser.class.version > 20 && text == "$-" then
|
379
|
+
rb_compile_error "unexpected $undefined"
|
380
|
+
end
|
381
|
+
|
382
|
+
result EXPR_END, :tGVAR, text
|
506
383
|
end
|
507
384
|
|
508
385
|
def process_gvar_oddity text
|
509
|
-
return result :expr_end, "$", "$" if text == "$" # TODO: wtf is this?
|
510
386
|
rb_compile_error "#{text.inspect} is not allowed as a global variable name"
|
511
387
|
end
|
512
388
|
|
513
389
|
def process_ivar text
|
514
390
|
tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
|
515
|
-
|
516
|
-
|
391
|
+
result EXPR_END, tok_id, text
|
392
|
+
end
|
393
|
+
|
394
|
+
def process_label text
|
395
|
+
symbol = possibly_escape_string text, /^\"/
|
396
|
+
|
397
|
+
result EXPR_LAB, :tLABEL, symbol
|
398
|
+
end
|
399
|
+
|
400
|
+
def process_label_or_string text
|
401
|
+
if @was_label && text =~ /:\Z/ then
|
402
|
+
@was_label = nil
|
403
|
+
return process_label text
|
404
|
+
elsif text =~ /:\Z/ then
|
405
|
+
self.pos -= 1 # put back ":"
|
406
|
+
text = text[0..-2]
|
407
|
+
end
|
408
|
+
|
409
|
+
orig_line = lineno
|
410
|
+
str = text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
|
411
|
+
self.lineno += str.count("\n")
|
412
|
+
|
413
|
+
result EXPR_END, :tSTRING, str, orig_line
|
517
414
|
end
|
518
415
|
|
519
416
|
def process_lchevron text
|
520
|
-
if (
|
417
|
+
if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
|
521
418
|
!is_end? &&
|
522
|
-
(!is_arg? || space_seen)) then
|
419
|
+
(!is_arg? || lex_state =~ EXPR_LABELED || space_seen)) then
|
523
420
|
tok = self.heredoc_identifier
|
524
421
|
return tok if tok
|
525
422
|
end
|
526
423
|
|
527
|
-
|
424
|
+
if is_after_operator? then
|
425
|
+
self.lex_state = EXPR_ARG
|
426
|
+
else
|
427
|
+
self.command_start = true if lex_state =~ EXPR_CLASS
|
428
|
+
self.lex_state = EXPR_BEG
|
429
|
+
end
|
430
|
+
|
431
|
+
result lex_state, :tLSHFT, "\<\<"
|
528
432
|
end
|
529
433
|
|
530
|
-
def process_newline_or_comment text
|
434
|
+
def process_newline_or_comment text # ../compare/parse30.y:9126 ish
|
531
435
|
c = matched
|
532
|
-
hit = false
|
533
436
|
|
534
|
-
if c ==
|
535
|
-
|
437
|
+
if c == "#" then
|
438
|
+
self.pos -= 1
|
536
439
|
|
537
440
|
while scan(/\s*\#.*(\n+|\z)/) do
|
538
|
-
|
539
|
-
|
540
|
-
@comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
|
441
|
+
self.lineno += matched.count "\n"
|
442
|
+
@comments << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
|
541
443
|
end
|
542
444
|
|
543
445
|
return nil if end_of_stream?
|
544
446
|
end
|
545
447
|
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
448
|
+
c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
|
449
|
+
lex_state !~ EXPR_LABELED)
|
450
|
+
if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
|
451
|
+
# ignore if !fallthrough?
|
452
|
+
if !c && parser.in_kwarg then
|
453
|
+
# normal newline
|
454
|
+
self.command_start = true
|
455
|
+
return result EXPR_BEG, :tNL, nil
|
456
|
+
else
|
457
|
+
maybe_pop_stack
|
458
|
+
return # goto retry
|
459
|
+
end
|
460
|
+
end
|
553
461
|
|
554
|
-
if scan(/
|
555
|
-
self.space_seen = true
|
462
|
+
if scan(/[\ \t\r\f\v]+/) then
|
463
|
+
self.space_seen = true
|
464
|
+
end
|
556
465
|
|
557
|
-
|
558
|
-
return
|
466
|
+
if check(/#/) then
|
467
|
+
return # goto retry
|
468
|
+
elsif check(/&\.|\.(?!\.)/) then # C version is a hellish obfuscated xnor
|
469
|
+
return # goto retry
|
559
470
|
end
|
560
471
|
|
561
472
|
self.command_start = true
|
562
473
|
|
563
|
-
|
474
|
+
result EXPR_BEG, :tNL, nil
|
564
475
|
end
|
565
476
|
|
566
477
|
def process_nthref text
|
567
478
|
# TODO: can't do lineno hack w/ number
|
568
|
-
result
|
479
|
+
result EXPR_END, :tNTH_REF, match[1].to_i
|
569
480
|
end
|
570
481
|
|
571
482
|
def process_paren text
|
572
|
-
token = if
|
573
|
-
|
483
|
+
token = if is_beg? then
|
484
|
+
:tLPAREN
|
485
|
+
elsif !space_seen then
|
486
|
+
# foo( ... ) => method call, no ambiguity
|
487
|
+
:tLPAREN2
|
488
|
+
elsif is_space_arg? then
|
489
|
+
:tLPAREN_ARG
|
490
|
+
elsif lex_state =~ EXPR_ENDFN && !lambda_beginning? then
|
491
|
+
# TODO:
|
492
|
+
# warn("parentheses after method name is interpreted as " \
|
493
|
+
# "an argument list, not a decomposed argument")
|
494
|
+
:tLPAREN2
|
574
495
|
else
|
575
|
-
|
496
|
+
:tLPAREN2 # plain "(" in parse.y
|
576
497
|
end
|
577
498
|
|
578
499
|
self.paren_nest += 1
|
579
500
|
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
def process_paren18
|
585
|
-
self.command_start = true
|
586
|
-
token = :tLPAREN2
|
587
|
-
|
588
|
-
if in_lex_state? :expr_beg, :expr_mid then
|
589
|
-
token = :tLPAREN
|
590
|
-
elsif space_seen then
|
591
|
-
if in_lex_state? :expr_cmdarg then
|
592
|
-
token = :tLPAREN_ARG
|
593
|
-
elsif in_lex_state? :expr_arg then
|
594
|
-
warning "don't put space before argument parentheses"
|
595
|
-
end
|
596
|
-
else
|
597
|
-
# not a ternary -- do nothing?
|
598
|
-
end
|
599
|
-
|
600
|
-
token
|
501
|
+
cond.push false
|
502
|
+
cmdarg.push false
|
503
|
+
result EXPR_PAR, token, text
|
601
504
|
end
|
602
505
|
|
603
|
-
def
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
506
|
+
def process_percent text
|
507
|
+
case
|
508
|
+
when is_beg? then
|
509
|
+
process_percent_quote
|
510
|
+
when scan(/\=/)
|
511
|
+
result EXPR_BEG, :tOP_ASGN, "%"
|
512
|
+
when is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
|
513
|
+
process_percent_quote
|
608
514
|
else
|
609
|
-
:
|
515
|
+
result :arg_state, :tPERCENT, "%"
|
610
516
|
end
|
611
517
|
end
|
612
518
|
|
613
|
-
def process_percent text
|
614
|
-
return parse_quote if is_beg?
|
615
|
-
|
616
|
-
return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)
|
617
|
-
|
618
|
-
return parse_quote if is_arg? && space_seen && ! check(/\s/)
|
619
|
-
|
620
|
-
return result(:arg_state, :tPERCENT, "%")
|
621
|
-
end
|
622
|
-
|
623
519
|
def process_plus_minus text
|
624
520
|
sign = matched
|
625
521
|
utype, type = if sign == "+" then
|
@@ -628,34 +524,33 @@ class RubyLexer
|
|
628
524
|
[:tUMINUS, :tMINUS]
|
629
525
|
end
|
630
526
|
|
631
|
-
if
|
527
|
+
if is_after_operator? then
|
632
528
|
if scan(/@/) then
|
633
|
-
return result(
|
529
|
+
return result(EXPR_ARG, utype, "#{sign}@")
|
634
530
|
else
|
635
|
-
return result(
|
531
|
+
return result(EXPR_ARG, type, sign)
|
636
532
|
end
|
637
533
|
end
|
638
534
|
|
639
|
-
return result(
|
535
|
+
return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)
|
640
536
|
|
641
|
-
if
|
537
|
+
if is_beg? || (is_arg? && space_seen && !check(/\s/)) then
|
642
538
|
arg_ambiguous if is_arg?
|
643
539
|
|
644
540
|
if check(/\d/) then
|
645
541
|
return nil if utype == :tUPLUS
|
646
|
-
return result
|
542
|
+
return result EXPR_BEG, :tUMINUS_NUM, sign
|
647
543
|
end
|
648
544
|
|
649
|
-
return result
|
545
|
+
return result EXPR_BEG, utype, sign
|
650
546
|
end
|
651
547
|
|
652
|
-
|
548
|
+
result EXPR_BEG, type, sign
|
653
549
|
end
|
654
550
|
|
655
551
|
def process_questionmark text
|
656
552
|
if is_end? then
|
657
|
-
|
658
|
-
return result(state, :tEH, "?")
|
553
|
+
return result EXPR_BEG, :tEH, "?"
|
659
554
|
end
|
660
555
|
|
661
556
|
if end_of_stream? then
|
@@ -664,12 +559,12 @@ class RubyLexer
|
|
664
559
|
|
665
560
|
if check(/\s|\v/) then
|
666
561
|
unless is_arg? then
|
667
|
-
c2 = { " " =>
|
668
|
-
"\n" =>
|
669
|
-
"\t" =>
|
670
|
-
"\v" =>
|
671
|
-
"\r" =>
|
672
|
-
"\f" =>
|
562
|
+
c2 = { " " => "s",
|
563
|
+
"\n" => "n",
|
564
|
+
"\t" => "t",
|
565
|
+
"\v" => "v",
|
566
|
+
"\r" => "r",
|
567
|
+
"\f" => "f" }[matched]
|
673
568
|
|
674
569
|
if c2 then
|
675
570
|
warning("invalid character syntax; use ?\\" + c2)
|
@@ -677,34 +572,40 @@ class RubyLexer
|
|
677
572
|
end
|
678
573
|
|
679
574
|
# ternary
|
680
|
-
|
681
|
-
return result(state, :tEH, "?")
|
575
|
+
return result EXPR_BEG, :tEH, "?"
|
682
576
|
elsif check(/\w(?=\w)/) then # ternary, also
|
683
|
-
return result
|
577
|
+
return result EXPR_BEG, :tEH, "?"
|
684
578
|
end
|
685
579
|
|
686
580
|
c = if scan(/\\/) then
|
687
581
|
self.read_escape
|
688
582
|
else
|
689
|
-
|
583
|
+
getch
|
690
584
|
end
|
691
585
|
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
586
|
+
result EXPR_END, :tSTRING, c
|
587
|
+
end
|
588
|
+
|
589
|
+
def process_simple_string text
|
590
|
+
orig_line = lineno
|
591
|
+
self.lineno += text.count("\n")
|
592
|
+
|
593
|
+
str = text[1..-2]
|
594
|
+
.gsub(ESC) { unescape($1).b.force_encoding Encoding::UTF_8 }
|
595
|
+
str = str.b unless str.valid_encoding?
|
596
|
+
|
597
|
+
result EXPR_END, :tSTRING, str, orig_line
|
697
598
|
end
|
698
599
|
|
699
600
|
def process_slash text
|
700
601
|
if is_beg? then
|
701
|
-
string STR_REGEXP
|
602
|
+
string STR_REGEXP, matched
|
702
603
|
|
703
|
-
return result
|
604
|
+
return result nil, :tREGEXP_BEG, "/"
|
704
605
|
end
|
705
606
|
|
706
607
|
if scan(/\=/) then
|
707
|
-
return result(
|
608
|
+
return result(EXPR_BEG, :tOP_ASGN, "/")
|
708
609
|
end
|
709
610
|
|
710
611
|
if is_arg? && space_seen then
|
@@ -715,7 +616,7 @@ class RubyLexer
|
|
715
616
|
end
|
716
617
|
end
|
717
618
|
|
718
|
-
|
619
|
+
result :arg_state, :tDIVIDE, "/"
|
719
620
|
end
|
720
621
|
|
721
622
|
def process_square_bracket text
|
@@ -723,72 +624,40 @@ class RubyLexer
|
|
723
624
|
|
724
625
|
token = nil
|
725
626
|
|
726
|
-
if
|
627
|
+
if is_after_operator? then
|
727
628
|
case
|
728
629
|
when scan(/\]\=/) then
|
729
630
|
self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
|
730
|
-
return result
|
631
|
+
return result EXPR_ARG, :tASET, "[]="
|
731
632
|
when scan(/\]/) then
|
732
633
|
self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
|
733
|
-
return result
|
634
|
+
return result EXPR_ARG, :tAREF, "[]"
|
734
635
|
else
|
735
636
|
rb_compile_error "unexpected '['"
|
736
637
|
end
|
737
638
|
elsif is_beg? then
|
738
639
|
token = :tLBRACK
|
739
|
-
elsif is_arg? && space_seen then
|
640
|
+
elsif is_arg? && (space_seen || lex_state =~ EXPR_LABELED) then
|
740
641
|
token = :tLBRACK
|
741
642
|
else
|
742
643
|
token = :tLBRACK2
|
743
644
|
end
|
744
645
|
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
def possibly_escape_string text, check
|
749
|
-
content = match[1]
|
750
|
-
|
751
|
-
if text =~ check then
|
752
|
-
content.gsub(ESC) { unescape $1 }
|
753
|
-
else
|
754
|
-
content.gsub(/\\\\/, "\\").gsub(/\\'/, "'")
|
755
|
-
end
|
646
|
+
cond.push false
|
647
|
+
cmdarg.push false
|
648
|
+
result EXPR_PAR, token, text
|
756
649
|
end
|
757
650
|
|
758
651
|
def process_symbol text
|
759
|
-
symbol = possibly_escape_string text,
|
760
|
-
|
761
|
-
rb_compile_error "symbol cannot contain '\\0'" if
|
762
|
-
ruby18 && symbol =~ /\0/
|
763
|
-
|
764
|
-
return result(:expr_end, :tSYMBOL, symbol)
|
765
|
-
end
|
766
|
-
|
767
|
-
def was_label?
|
768
|
-
@was_label = ruby22_label?
|
769
|
-
true
|
770
|
-
end
|
652
|
+
symbol = possibly_escape_string text, /^:\"/ # stupid emacs
|
771
653
|
|
772
|
-
|
773
|
-
if @was_label && text =~ /:\Z/ then
|
774
|
-
@was_label = nil
|
775
|
-
return process_label text
|
776
|
-
elsif text =~ /:\Z/ then
|
777
|
-
ss.pos -= 1 # put back ":"
|
778
|
-
text = text[0..-2]
|
779
|
-
end
|
780
|
-
|
781
|
-
result :expr_end, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
|
782
|
-
end
|
783
|
-
|
784
|
-
def process_label text
|
785
|
-
symbol = possibly_escape_string text, /^"/
|
786
|
-
|
787
|
-
result(:expr_labelarg, :tLABEL, [symbol, self.lineno])
|
654
|
+
result EXPR_LIT, :tSYMBOL, symbol
|
788
655
|
end
|
789
656
|
|
790
657
|
def process_token text
|
791
|
-
#
|
658
|
+
# matching: parse_ident in compare/parse23.y:7989
|
659
|
+
# FIX: remove: self.last_state = lex_state
|
660
|
+
|
792
661
|
token = self.token = text
|
793
662
|
token << matched if scan(/[\!\?](?!=)/)
|
794
663
|
|
@@ -796,7 +665,7 @@ class RubyLexer
|
|
796
665
|
case
|
797
666
|
when token =~ /[!?]$/ then
|
798
667
|
:tFID
|
799
|
-
when
|
668
|
+
when lex_state =~ EXPR_FNAME && scan(/=(?:(?![~>=])|(?==>))/) then
|
800
669
|
# ident=, not =~ => == or followed by =>
|
801
670
|
# TODO test lexing of a=>b vs a==>b
|
802
671
|
token << matched
|
@@ -807,216 +676,133 @@ class RubyLexer
|
|
807
676
|
:tIDENTIFIER
|
808
677
|
end
|
809
678
|
|
810
|
-
if
|
679
|
+
if is_label_possible? and is_label_suffix? then
|
811
680
|
scan(/:/)
|
812
|
-
return result
|
681
|
+
return result EXPR_LAB, :tLABEL, token
|
813
682
|
end
|
814
683
|
|
815
|
-
|
684
|
+
# TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
|
685
|
+
if lex_state !~ EXPR_DOT then
|
816
686
|
# See if it is a reserved word.
|
817
|
-
keyword =
|
818
|
-
RubyParserStuff::Keyword.keyword18 token
|
819
|
-
else
|
820
|
-
RubyParserStuff::Keyword.keyword19 token
|
821
|
-
end
|
687
|
+
keyword = RubyParserStuff::Keyword.keyword token
|
822
688
|
|
823
689
|
return process_token_keyword keyword if keyword
|
824
|
-
end
|
825
|
-
|
826
|
-
# TODO:
|
827
|
-
# if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
|
690
|
+
end
|
828
691
|
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
692
|
+
# matching: compare/parse30.y:9039
|
693
|
+
state = if lex_state =~ EXPR_BEG_ANY|EXPR_ARG_ANY|EXPR_DOT then
|
694
|
+
cmd_state ? EXPR_CMDARG : EXPR_ARG
|
695
|
+
elsif lex_state =~ EXPR_FNAME then
|
696
|
+
EXPR_ENDFN
|
833
697
|
else
|
834
|
-
|
698
|
+
EXPR_END
|
835
699
|
end
|
700
|
+
self.lex_state = state
|
836
701
|
|
837
|
-
|
838
|
-
self.parser.env[token.to_sym] == :lvar then
|
839
|
-
state = :expr_end
|
840
|
-
end
|
702
|
+
tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
|
841
703
|
|
842
|
-
|
704
|
+
if last_state !~ EXPR_DOT|EXPR_FNAME and
|
705
|
+
(tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
|
706
|
+
lvar_defined?(token) then
|
707
|
+
state = EXPR_END|EXPR_LABEL
|
708
|
+
end
|
843
709
|
|
844
|
-
|
710
|
+
result state, tok_id, token
|
845
711
|
end
|
846
712
|
|
847
713
|
def process_token_keyword keyword
|
848
|
-
|
714
|
+
# matching MIDDLE of parse_ident in compare/parse23.y:8046
|
715
|
+
state = lex_state
|
849
716
|
|
850
|
-
|
717
|
+
return result(EXPR_ENDFN, keyword.id0, token) if lex_state =~ EXPR_FNAME
|
851
718
|
|
852
|
-
self.
|
719
|
+
self.lex_state = keyword.state
|
720
|
+
self.command_start = true if lex_state =~ EXPR_BEG
|
853
721
|
|
854
722
|
case
|
855
|
-
when
|
856
|
-
result(state, keyword.id0, keyword.name)
|
857
|
-
when keyword.id0 == :kDO then
|
723
|
+
when keyword.id0 == :kDO then # parse26.y line 7591
|
858
724
|
case
|
859
|
-
when
|
860
|
-
self.lpar_beg = nil
|
861
|
-
self.paren_nest -= 1
|
862
|
-
|
725
|
+
when lambda_beginning? then
|
726
|
+
self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
|
727
|
+
self.paren_nest -= 1 # TODO: question this?
|
728
|
+
result lex_state, :kDO_LAMBDA, token
|
863
729
|
when cond.is_in_state then
|
864
|
-
result
|
865
|
-
when cmdarg.is_in_state &&
|
866
|
-
result
|
867
|
-
when in_lex_state?(:expr_beg, :expr_endarg) then
|
868
|
-
result(state, :kDO_BLOCK, value)
|
869
|
-
when lex_state == :expr_end # eg: a -> do end do end
|
870
|
-
result(state, :kDO_BLOCK, value)
|
730
|
+
result lex_state, :kDO_COND, token
|
731
|
+
when cmdarg.is_in_state && state != EXPR_CMDARG then
|
732
|
+
result lex_state, :kDO_BLOCK, token
|
871
733
|
else
|
872
|
-
result
|
734
|
+
result lex_state, :kDO, token
|
873
735
|
end
|
874
|
-
when
|
875
|
-
result
|
736
|
+
when state =~ EXPR_PAD then
|
737
|
+
result lex_state, keyword.id0, token
|
876
738
|
when keyword.id0 != keyword.id1 then
|
877
|
-
result
|
739
|
+
result EXPR_PAR, keyword.id1, token
|
878
740
|
else
|
879
|
-
result
|
741
|
+
result lex_state, keyword.id1, token
|
880
742
|
end
|
881
743
|
end
|
882
744
|
|
883
745
|
def process_underscore text
|
884
|
-
|
746
|
+
self.unscan # put back "_"
|
885
747
|
|
886
748
|
if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
|
887
|
-
|
888
|
-
|
889
|
-
|
749
|
+
ss.terminate
|
750
|
+
[RubyLexer::EOF, RubyLexer::EOF]
|
751
|
+
elsif scan(/#{IDENT_CHAR}+/) then
|
752
|
+
process_token matched
|
890
753
|
end
|
891
754
|
end
|
892
755
|
|
893
756
|
def rb_compile_error msg
|
894
|
-
msg += ". near line #{self.lineno}: #{
|
757
|
+
msg += ". near line #{self.lineno}: #{self.rest[/^.*/].inspect}"
|
895
758
|
raise RubyParser::SyntaxError, msg
|
896
759
|
end
|
897
760
|
|
898
|
-
def read_escape # TODO: remove / rewrite
|
899
|
-
case
|
900
|
-
when scan(/\\/) then # Backslash
|
901
|
-
'\\'
|
902
|
-
when scan(/n/) then # newline
|
903
|
-
self.extra_lineno -= 1
|
904
|
-
"\n"
|
905
|
-
when scan(/t/) then # horizontal tab
|
906
|
-
"\t"
|
907
|
-
when scan(/r/) then # carriage-return
|
908
|
-
"\r"
|
909
|
-
when scan(/f/) then # form-feed
|
910
|
-
"\f"
|
911
|
-
when scan(/v/) then # vertical tab
|
912
|
-
"\13"
|
913
|
-
when scan(/a/) then # alarm(bell)
|
914
|
-
"\007"
|
915
|
-
when scan(/e/) then # escape
|
916
|
-
"\033"
|
917
|
-
when scan(/b/) then # backspace
|
918
|
-
"\010"
|
919
|
-
when scan(/s/) then # space
|
920
|
-
" "
|
921
|
-
when scan(/[0-7]{1,3}/) then # octal constant
|
922
|
-
(matched.to_i(8) & 0xFF).chr
|
923
|
-
when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
|
924
|
-
ss[1].to_i(16).chr
|
925
|
-
when check(/M-\\[\\MCc]/) then
|
926
|
-
scan(/M-\\/) # eat it
|
927
|
-
c = self.read_escape
|
928
|
-
c[0] = (c[0].ord | 0x80).chr
|
929
|
-
c
|
930
|
-
when scan(/M-(.)/) then
|
931
|
-
c = ss[1]
|
932
|
-
c[0] = (c[0].ord | 0x80).chr
|
933
|
-
c
|
934
|
-
when check(/(C-|c)\\[\\MCc]/) then
|
935
|
-
scan(/(C-|c)\\/) # eat it
|
936
|
-
c = self.read_escape
|
937
|
-
c[0] = (c[0].ord & 0x9f).chr
|
938
|
-
c
|
939
|
-
when scan(/C-\?|c\?/) then
|
940
|
-
127.chr
|
941
|
-
when scan(/(C-|c)(.)/) then
|
942
|
-
c = ss[2]
|
943
|
-
c[0] = (c[0].ord & 0x9f).chr
|
944
|
-
c
|
945
|
-
when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
|
946
|
-
matched
|
947
|
-
when scan(/u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/) then
|
948
|
-
[ss[1].delete("{}").to_i(16)].pack("U")
|
949
|
-
when scan(/[McCx0-9]/) || end_of_stream? then
|
950
|
-
rb_compile_error("Invalid escape character syntax")
|
951
|
-
else
|
952
|
-
ss.getch
|
953
|
-
end.dup
|
954
|
-
end
|
955
|
-
|
956
|
-
def regx_options # TODO: rewrite / remove
|
957
|
-
good, bad = [], []
|
958
|
-
|
959
|
-
if scan(/[a-z]+/) then
|
960
|
-
good, bad = matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
|
961
|
-
end
|
962
|
-
|
963
|
-
unless bad.empty? then
|
964
|
-
rb_compile_error("unknown regexp option%s - %s" %
|
965
|
-
[(bad.size > 1 ? "s" : ""), bad.join.inspect])
|
966
|
-
end
|
967
|
-
|
968
|
-
return good.join
|
969
|
-
end
|
970
|
-
|
971
761
|
def reset
|
762
|
+
self.lineno = 1
|
972
763
|
self.brace_nest = 0
|
973
764
|
self.command_start = true
|
974
765
|
self.comments = []
|
975
|
-
self.lex_state =
|
766
|
+
self.lex_state = EXPR_NONE
|
976
767
|
self.lex_strterm = nil
|
977
|
-
self.lineno = 1
|
978
768
|
self.lpar_beg = nil
|
979
769
|
self.paren_nest = 0
|
980
770
|
self.space_seen = false
|
981
771
|
self.string_nest = 0
|
982
772
|
self.token = nil
|
983
|
-
self.
|
773
|
+
self.string_buffer = []
|
774
|
+
self.old_ss = nil
|
775
|
+
self.old_lineno = nil
|
984
776
|
|
985
|
-
self.cmdarg.reset
|
986
777
|
self.cond.reset
|
778
|
+
self.cmdarg.reset
|
987
779
|
end
|
988
780
|
|
989
|
-
def result
|
990
|
-
|
991
|
-
self.lex_state =
|
992
|
-
[token, text]
|
993
|
-
end
|
781
|
+
def result new_state, token, text, line = self.lineno # :nodoc:
|
782
|
+
new_state = self.arg_state if new_state == :arg_state
|
783
|
+
self.lex_state = new_state if new_state
|
994
784
|
|
995
|
-
|
996
|
-
RubyParser::V18 === parser
|
785
|
+
[token, [text, line]]
|
997
786
|
end
|
998
787
|
|
999
|
-
def
|
1000
|
-
|
788
|
+
def ruby22_label?
|
789
|
+
ruby22plus? and is_label_possible?
|
1001
790
|
end
|
1002
791
|
|
1003
|
-
def
|
1004
|
-
|
792
|
+
def ruby22plus?
|
793
|
+
parser.class.version >= 22
|
1005
794
|
end
|
1006
795
|
|
1007
|
-
def
|
1008
|
-
|
1009
|
-
self.extra_lineno += r.count("\n") if r
|
1010
|
-
r
|
796
|
+
def ruby23plus?
|
797
|
+
parser.class.version >= 23
|
1011
798
|
end
|
1012
799
|
|
1013
|
-
def
|
1014
|
-
|
1015
|
-
self.extra_lineno = 0
|
800
|
+
def ruby24minus?
|
801
|
+
parser.class.version <= 24
|
1016
802
|
end
|
1017
803
|
|
1018
|
-
def
|
1019
|
-
|
804
|
+
def ruby27plus?
|
805
|
+
parser.class.version >= 27
|
1020
806
|
end
|
1021
807
|
|
1022
808
|
def space_vs_beginning space_type, beg_type, fallback
|
@@ -1031,139 +817,9 @@ class RubyLexer
|
|
1031
817
|
end
|
1032
818
|
end
|
1033
819
|
|
1034
|
-
def string type, beg = matched, nnd = "\0"
|
1035
|
-
self.lex_strterm = [:strterm, type, beg, nnd]
|
1036
|
-
end
|
1037
|
-
|
1038
|
-
# TODO: consider
|
1039
|
-
# def src= src
|
1040
|
-
# raise "bad src: #{src.inspect}" unless String === src
|
1041
|
-
# @src = RPStringScanner.new(src)
|
1042
|
-
# end
|
1043
|
-
|
1044
|
-
def tokadd_escape term # TODO: rewrite / remove
|
1045
|
-
case
|
1046
|
-
when scan(/\\\n/) then
|
1047
|
-
# just ignore
|
1048
|
-
when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
|
1049
|
-
self.string_buffer << matched
|
1050
|
-
when scan(/\\([MC]-|c)(?=\\)/) then
|
1051
|
-
self.string_buffer << matched
|
1052
|
-
self.tokadd_escape term
|
1053
|
-
when scan(/\\([MC]-|c)(.)/) then
|
1054
|
-
self.string_buffer << matched
|
1055
|
-
when scan(/\\[McCx]/) then
|
1056
|
-
rb_compile_error "Invalid escape character syntax"
|
1057
|
-
when scan(/\\(.)/m) then
|
1058
|
-
chr = ss[1]
|
1059
|
-
prev = self.string_buffer.last
|
1060
|
-
if term == chr && prev && prev.end_with?("(?") then
|
1061
|
-
self.string_buffer << chr
|
1062
|
-
else
|
1063
|
-
self.string_buffer << matched
|
1064
|
-
end
|
1065
|
-
else
|
1066
|
-
rb_compile_error "Invalid escape character syntax"
|
1067
|
-
end
|
1068
|
-
end
|
1069
|
-
|
1070
|
-
def tokadd_string(func, term, paren) # TODO: rewrite / remove
|
1071
|
-
qwords = (func & STR_FUNC_QWORDS) != 0
|
1072
|
-
escape = (func & STR_FUNC_ESCAPE) != 0
|
1073
|
-
expand = (func & STR_FUNC_EXPAND) != 0
|
1074
|
-
regexp = (func & STR_FUNC_REGEXP) != 0
|
1075
|
-
symbol = (func & STR_FUNC_SYMBOL) != 0
|
1076
|
-
|
1077
|
-
paren_re = @@regexp_cache[paren]
|
1078
|
-
term_re = @@regexp_cache[term]
|
1079
|
-
|
1080
|
-
until end_of_stream? do
|
1081
|
-
c = nil
|
1082
|
-
handled = true
|
1083
|
-
|
1084
|
-
case
|
1085
|
-
when paren_re && scan(paren_re) then
|
1086
|
-
self.string_nest += 1
|
1087
|
-
when scan(term_re) then
|
1088
|
-
if self.string_nest == 0 then
|
1089
|
-
ss.pos -= 1
|
1090
|
-
break
|
1091
|
-
else
|
1092
|
-
self.string_nest -= 1
|
1093
|
-
end
|
1094
|
-
when expand && scan(/#(?=[\$\@\{])/) then
|
1095
|
-
ss.pos -= 1
|
1096
|
-
break
|
1097
|
-
when qwords && scan(/\s/) then
|
1098
|
-
ss.pos -= 1
|
1099
|
-
break
|
1100
|
-
when expand && scan(/#(?!\n)/) then
|
1101
|
-
# do nothing
|
1102
|
-
when check(/\\/) then
|
1103
|
-
case
|
1104
|
-
when qwords && scan(/\\\n/) then
|
1105
|
-
string_buffer << "\n"
|
1106
|
-
next
|
1107
|
-
when qwords && scan(/\\\s/) then
|
1108
|
-
c = ' '
|
1109
|
-
when expand && scan(/\\\n/) then
|
1110
|
-
next
|
1111
|
-
when regexp && check(/\\/) then
|
1112
|
-
self.tokadd_escape term
|
1113
|
-
next
|
1114
|
-
when expand && scan(/\\/) then
|
1115
|
-
c = self.read_escape
|
1116
|
-
when scan(/\\\n/) then
|
1117
|
-
# do nothing
|
1118
|
-
when scan(/\\\\/) then
|
1119
|
-
string_buffer << '\\' if escape
|
1120
|
-
c = '\\'
|
1121
|
-
when scan(/\\/) then
|
1122
|
-
unless scan(term_re) || paren.nil? || scan(paren_re) then
|
1123
|
-
string_buffer << "\\"
|
1124
|
-
end
|
1125
|
-
else
|
1126
|
-
handled = false
|
1127
|
-
end # inner /\\/ case
|
1128
|
-
else
|
1129
|
-
handled = false
|
1130
|
-
end # top case
|
1131
|
-
|
1132
|
-
unless handled then
|
1133
|
-
t = Regexp.escape term
|
1134
|
-
x = Regexp.escape(paren) if paren && paren != "\000"
|
1135
|
-
re = if qwords then
|
1136
|
-
if HAS_ENC then
|
1137
|
-
/[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
|
1138
|
-
else
|
1139
|
-
/[^#{t}#{x}\#\0\\\s\v]+|./ # argh. 1.8's \s doesn't pick up \v
|
1140
|
-
end
|
1141
|
-
else
|
1142
|
-
/[^#{t}#{x}\#\0\\]+|./
|
1143
|
-
end
|
1144
|
-
|
1145
|
-
scan re
|
1146
|
-
c = matched
|
1147
|
-
|
1148
|
-
rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
|
1149
|
-
end # unless handled
|
1150
|
-
|
1151
|
-
c ||= matched
|
1152
|
-
string_buffer << c
|
1153
|
-
end # until
|
1154
|
-
|
1155
|
-
c ||= matched
|
1156
|
-
c = RubyLexer::EOF if end_of_stream?
|
1157
|
-
|
1158
|
-
return c
|
1159
|
-
end
|
1160
|
-
|
1161
820
|
def unescape s
|
1162
821
|
r = ESCAPES[s]
|
1163
822
|
|
1164
|
-
self.extra_lineno += 1 if s == "\n" # eg backslash newline strings
|
1165
|
-
self.extra_lineno -= 1 if r && s == "n" # literal \n, not newline
|
1166
|
-
|
1167
823
|
return r if r
|
1168
824
|
|
1169
825
|
x = case s
|
@@ -1179,12 +835,15 @@ class RubyLexer
|
|
1179
835
|
s
|
1180
836
|
when /^[McCx0-9]/ then
|
1181
837
|
rb_compile_error("Invalid escape character syntax")
|
1182
|
-
when /u(
|
838
|
+
when /u(\h{4})/ then
|
1183
839
|
[$1.delete("{}").to_i(16)].pack("U")
|
840
|
+
when /u(\h{1,3})/ then
|
841
|
+
rb_compile_error("Invalid escape character syntax")
|
842
|
+
when /u\{(\h+(?:\s+\h+)*)\}/ then
|
843
|
+
$1.split.map { |cp| cp.to_i(16) }.pack("U*")
|
1184
844
|
else
|
1185
845
|
s
|
1186
846
|
end
|
1187
|
-
x.force_encoding "UTF-8" if HAS_ENC
|
1188
847
|
x
|
1189
848
|
end
|
1190
849
|
|
@@ -1192,168 +851,294 @@ class RubyLexer
|
|
1192
851
|
# do nothing for now
|
1193
852
|
end
|
1194
853
|
|
1195
|
-
def
|
1196
|
-
|
854
|
+
def was_label?
|
855
|
+
@was_label = ruby22_label?
|
856
|
+
true
|
1197
857
|
end
|
1198
858
|
|
1199
|
-
|
1200
|
-
|
1201
|
-
|
859
|
+
class State
|
860
|
+
attr_accessor :n
|
861
|
+
attr_accessor :names
|
1202
862
|
|
1203
|
-
|
1204
|
-
|
1205
|
-
|
1206
|
-
else
|
1207
|
-
self.parse_string lex_strterm
|
1208
|
-
end
|
863
|
+
# TODO: take a shared hash of strings for inspect/to_s
|
864
|
+
def initialize o, names
|
865
|
+
raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
|
1209
866
|
|
1210
|
-
|
867
|
+
self.n = o
|
868
|
+
self.names = names
|
869
|
+
end
|
1211
870
|
|
1212
|
-
|
1213
|
-
|
1214
|
-
!cond.is_in_state) || is_arg?) &&
|
1215
|
-
is_label_suffix? then
|
1216
|
-
scan(/:/)
|
1217
|
-
token_type = token[0] = :tLABEL_END
|
1218
|
-
end
|
871
|
+
def == o
|
872
|
+
self.equal?(o) || (o.class == self.class && o.n == self.n)
|
1219
873
|
end
|
1220
874
|
|
1221
|
-
|
1222
|
-
self.
|
1223
|
-
self.lex_state = (token_type == :tLABEL_END) ? :expr_labelarg : :expr_end
|
875
|
+
def =~ v
|
876
|
+
(self.n & v.n) != 0
|
1224
877
|
end
|
1225
878
|
|
1226
|
-
|
1227
|
-
|
879
|
+
def | v
|
880
|
+
raise ArgumentError, "Incompatible State: %p vs %p" % [self, v] unless
|
881
|
+
self.names == v.names
|
882
|
+
self.class.new(self.n | v.n, self.names)
|
883
|
+
end
|
1228
884
|
|
1229
|
-
|
1230
|
-
|
885
|
+
def inspect
|
886
|
+
return "Value(0)" if n.zero? # HACK?
|
1231
887
|
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
c, beg, short_hand = 'Q', ss.getch, true
|
888
|
+
names.map { |v, k| k if self =~ v }.
|
889
|
+
compact.
|
890
|
+
join("|").
|
891
|
+
gsub(/(?:EXPR_|STR_(?:FUNC_)?)/, "")
|
1237
892
|
end
|
1238
893
|
|
1239
|
-
|
1240
|
-
|
894
|
+
alias to_s inspect
|
895
|
+
|
896
|
+
module Values
|
897
|
+
expr_names = {}
|
898
|
+
|
899
|
+
EXPR_NONE = State.new 0x0, expr_names
|
900
|
+
EXPR_BEG = State.new 0x1, expr_names
|
901
|
+
EXPR_END = State.new 0x2, expr_names
|
902
|
+
EXPR_ENDARG = State.new 0x4, expr_names
|
903
|
+
EXPR_ENDFN = State.new 0x8, expr_names
|
904
|
+
EXPR_ARG = State.new 0x10, expr_names
|
905
|
+
EXPR_CMDARG = State.new 0x20, expr_names
|
906
|
+
EXPR_MID = State.new 0x40, expr_names
|
907
|
+
EXPR_FNAME = State.new 0x80, expr_names
|
908
|
+
EXPR_DOT = State.new 0x100, expr_names
|
909
|
+
EXPR_CLASS = State.new 0x200, expr_names
|
910
|
+
EXPR_LABEL = State.new 0x400, expr_names
|
911
|
+
EXPR_LABELED = State.new 0x800, expr_names
|
912
|
+
EXPR_FITEM = State.new 0x1000, expr_names
|
913
|
+
|
914
|
+
EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
|
915
|
+
EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
|
916
|
+
EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
|
917
|
+
|
918
|
+
# extra fake lex_state names to make things a bit cleaner
|
919
|
+
|
920
|
+
EXPR_LAB = EXPR_ARG|EXPR_LABELED
|
921
|
+
EXPR_LIT = EXPR_END|EXPR_ENDARG
|
922
|
+
EXPR_PAR = EXPR_BEG|EXPR_LABEL
|
923
|
+
EXPR_PAD = EXPR_BEG|EXPR_LABELED
|
924
|
+
|
925
|
+
EXPR_NUM = EXPR_LIT
|
926
|
+
|
927
|
+
expr_names.merge!(EXPR_NONE => "EXPR_NONE",
|
928
|
+
EXPR_BEG => "EXPR_BEG",
|
929
|
+
EXPR_END => "EXPR_END",
|
930
|
+
EXPR_ENDARG => "EXPR_ENDARG",
|
931
|
+
EXPR_ENDFN => "EXPR_ENDFN",
|
932
|
+
EXPR_ARG => "EXPR_ARG",
|
933
|
+
EXPR_CMDARG => "EXPR_CMDARG",
|
934
|
+
EXPR_MID => "EXPR_MID",
|
935
|
+
EXPR_FNAME => "EXPR_FNAME",
|
936
|
+
EXPR_DOT => "EXPR_DOT",
|
937
|
+
EXPR_CLASS => "EXPR_CLASS",
|
938
|
+
EXPR_LABEL => "EXPR_LABEL",
|
939
|
+
EXPR_LABELED => "EXPR_LABELED",
|
940
|
+
EXPR_FITEM => "EXPR_FITEM")
|
941
|
+
|
942
|
+
# ruby constants for strings
|
943
|
+
|
944
|
+
str_func_names = {}
|
945
|
+
|
946
|
+
STR_FUNC_BORING = State.new 0x00, str_func_names
|
947
|
+
STR_FUNC_ESCAPE = State.new 0x01, str_func_names
|
948
|
+
STR_FUNC_EXPAND = State.new 0x02, str_func_names
|
949
|
+
STR_FUNC_REGEXP = State.new 0x04, str_func_names
|
950
|
+
STR_FUNC_QWORDS = State.new 0x08, str_func_names
|
951
|
+
STR_FUNC_SYMBOL = State.new 0x10, str_func_names
|
952
|
+
STR_FUNC_INDENT = State.new 0x20, str_func_names # <<-HEREDOC
|
953
|
+
STR_FUNC_LABEL = State.new 0x40, str_func_names
|
954
|
+
STR_FUNC_LIST = State.new 0x4000, str_func_names
|
955
|
+
STR_FUNC_TERM = State.new 0x8000, str_func_names
|
956
|
+
STR_FUNC_DEDENT = State.new 0x10000, str_func_names # <<~HEREDOC
|
957
|
+
|
958
|
+
# TODO: check parser25.y on how they do STR_FUNC_INDENT
|
959
|
+
|
960
|
+
STR_SQUOTE = STR_FUNC_BORING
|
961
|
+
STR_DQUOTE = STR_FUNC_EXPAND
|
962
|
+
STR_XQUOTE = STR_FUNC_EXPAND
|
963
|
+
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
|
964
|
+
STR_SWORD = STR_FUNC_QWORDS | STR_FUNC_LIST
|
965
|
+
STR_DWORD = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
|
966
|
+
STR_SSYM = STR_FUNC_SYMBOL
|
967
|
+
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
968
|
+
STR_LABEL = STR_FUNC_LABEL
|
969
|
+
|
970
|
+
str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
|
971
|
+
STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
|
972
|
+
STR_FUNC_REGEXP => "STR_FUNC_REGEXP",
|
973
|
+
STR_FUNC_QWORDS => "STR_FUNC_QWORDS",
|
974
|
+
STR_FUNC_SYMBOL => "STR_FUNC_SYMBOL",
|
975
|
+
STR_FUNC_INDENT => "STR_FUNC_INDENT",
|
976
|
+
STR_FUNC_LABEL => "STR_FUNC_LABEL",
|
977
|
+
STR_FUNC_LIST => "STR_FUNC_LIST",
|
978
|
+
STR_FUNC_TERM => "STR_FUNC_TERM",
|
979
|
+
STR_FUNC_DEDENT => "STR_FUNC_DEDENT",
|
980
|
+
STR_SQUOTE => "STR_SQUOTE")
|
1241
981
|
end
|
1242
982
|
|
1243
|
-
|
1244
|
-
|
1245
|
-
nnd, beg = beg, "\0" if nnd.nil?
|
983
|
+
include Values
|
984
|
+
end
|
1246
985
|
|
1247
|
-
|
1248
|
-
|
1249
|
-
when 'Q' then
|
1250
|
-
ch = short_hand ? nnd : c + beg
|
1251
|
-
text = "%#{ch}"
|
1252
|
-
[:tSTRING_BEG, STR_DQUOTE]
|
1253
|
-
when 'q' then
|
1254
|
-
[:tSTRING_BEG, STR_SQUOTE]
|
1255
|
-
when 'W' then
|
1256
|
-
eat_whitespace
|
1257
|
-
[:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
1258
|
-
when 'w' then
|
1259
|
-
eat_whitespace
|
1260
|
-
[:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
1261
|
-
when 'x' then
|
1262
|
-
[:tXSTRING_BEG, STR_XQUOTE]
|
1263
|
-
when 'r' then
|
1264
|
-
[:tREGEXP_BEG, STR_REGEXP]
|
1265
|
-
when 's' then
|
1266
|
-
self.lex_state = :expr_fname
|
1267
|
-
[:tSYMBEG, STR_SSYM]
|
1268
|
-
when 'I' then
|
1269
|
-
eat_whitespace
|
1270
|
-
[:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
|
1271
|
-
when 'i' then
|
1272
|
-
eat_whitespace
|
1273
|
-
[:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
|
1274
|
-
end
|
986
|
+
include State::Values
|
987
|
+
end
|
1275
988
|
|
1276
|
-
|
1277
|
-
|
989
|
+
class RubyLexer
|
990
|
+
module SSWrapper
|
991
|
+
def string= s
|
992
|
+
ss.string= s
|
993
|
+
end
|
1278
994
|
|
1279
|
-
|
995
|
+
def beginning_of_line?
|
996
|
+
ss.bol?
|
997
|
+
end
|
1280
998
|
|
1281
|
-
|
999
|
+
alias bol? beginning_of_line? # to make .rex file more readable
|
1282
1000
|
|
1283
|
-
|
1284
|
-
|
1001
|
+
def check re
|
1002
|
+
maybe_pop_stack
|
1285
1003
|
|
1286
|
-
|
1287
|
-
|
1004
|
+
ss.check re
|
1005
|
+
end
|
1288
1006
|
|
1289
|
-
|
1290
|
-
|
1291
|
-
|
1292
|
-
term_re = @@regexp_cache[term]
|
1007
|
+
def end_of_stream?
|
1008
|
+
ss.eos?
|
1009
|
+
end
|
1293
1010
|
|
1294
|
-
|
1295
|
-
regexp = (func & STR_FUNC_REGEXP) != 0
|
1296
|
-
expand = (func & STR_FUNC_EXPAND) != 0
|
1011
|
+
alias eos? end_of_stream?
|
1297
1012
|
|
1298
|
-
|
1299
|
-
|
1013
|
+
def getch
|
1014
|
+
c = ss.getch
|
1015
|
+
c = ss.getch if c == "\r" && ss.peek(1) == "\n"
|
1016
|
+
c
|
1300
1017
|
end
|
1301
1018
|
|
1302
|
-
|
1019
|
+
def match
|
1020
|
+
ss
|
1021
|
+
end
|
1303
1022
|
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1023
|
+
def matched
|
1024
|
+
ss.matched
|
1025
|
+
end
|
1026
|
+
|
1027
|
+
def in_heredoc?
|
1028
|
+
!!self.old_ss
|
1029
|
+
end
|
1030
|
+
|
1031
|
+
def maybe_pop_stack
|
1032
|
+
if ss.eos? && in_heredoc? then
|
1033
|
+
self.ss_pop
|
1034
|
+
self.lineno_pop
|
1312
1035
|
end
|
1313
1036
|
end
|
1314
1037
|
|
1315
|
-
|
1038
|
+
def pos
|
1039
|
+
ss.pos
|
1040
|
+
end
|
1316
1041
|
|
1317
|
-
|
1042
|
+
def pos= n
|
1043
|
+
ss.pos = n
|
1044
|
+
end
|
1318
1045
|
|
1319
|
-
|
1320
|
-
|
1321
|
-
|
1322
|
-
|
1323
|
-
|
1324
|
-
|
1325
|
-
|
1326
|
-
|
1327
|
-
return :tSTRING_DVAR, nil
|
1328
|
-
when scan(/#[{]/) then
|
1329
|
-
return :tSTRING_DBEG, nil
|
1330
|
-
when scan(/#/) then
|
1331
|
-
string_buffer << '#'
|
1332
|
-
end
|
1046
|
+
def rest
|
1047
|
+
ss.rest
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
def scan re
|
1051
|
+
maybe_pop_stack
|
1052
|
+
|
1053
|
+
ss.scan re
|
1333
1054
|
end
|
1334
1055
|
|
1335
|
-
|
1336
|
-
|
1056
|
+
def scanner_class # TODO: design this out of oedipus_lex. or something.
|
1057
|
+
RPStringScanner
|
1337
1058
|
end
|
1338
1059
|
|
1339
|
-
|
1060
|
+
def ss_string
|
1061
|
+
ss.string
|
1062
|
+
end
|
1063
|
+
|
1064
|
+
def ss_string= s
|
1065
|
+
raise "Probably not"
|
1066
|
+
ss.string = s
|
1067
|
+
end
|
1068
|
+
|
1069
|
+
def unscan
|
1070
|
+
ss.unscan
|
1071
|
+
end
|
1340
1072
|
end
|
1073
|
+
|
1074
|
+
include SSWrapper
|
1341
1075
|
end
|
1342
1076
|
|
1343
|
-
|
1077
|
+
class RubyLexer
|
1078
|
+
module SSStackish
|
1079
|
+
def lineno_push new_lineno
|
1080
|
+
self.old_lineno = self.lineno
|
1081
|
+
self.lineno = new_lineno
|
1082
|
+
end
|
1344
1083
|
|
1345
|
-
|
1084
|
+
def lineno_pop
|
1085
|
+
self.lineno = self.old_lineno
|
1086
|
+
self.old_lineno = nil
|
1087
|
+
end
|
1088
|
+
|
1089
|
+
def ss= o
|
1090
|
+
raise "Clearing ss while in heredoc!?!" if in_heredoc?
|
1091
|
+
@old_ss = nil
|
1092
|
+
super
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
def ss_push new_ss
|
1096
|
+
@old_ss = self.ss
|
1097
|
+
@ss = new_ss
|
1098
|
+
end
|
1099
|
+
|
1100
|
+
def ss_pop
|
1101
|
+
@ss = self.old_ss
|
1102
|
+
@old_ss = nil
|
1103
|
+
end
|
1104
|
+
end
|
1105
|
+
|
1106
|
+
prepend SSStackish
|
1107
|
+
end
|
1108
|
+
|
1109
|
+
if ENV["RP_STRTERM_DEBUG"] then
|
1346
1110
|
class RubyLexer
|
1347
|
-
|
1111
|
+
def d o
|
1112
|
+
$stderr.puts o.inspect
|
1113
|
+
end
|
1114
|
+
|
1115
|
+
alias old_lex_strterm= lex_strterm=
|
1348
1116
|
|
1117
|
+
def lex_strterm= o
|
1118
|
+
self.old_lex_strterm= o
|
1119
|
+
where = caller.first.split(/:/).first(2).join(":")
|
1120
|
+
$stderr.puts
|
1121
|
+
d :lex_strterm => [o, where]
|
1122
|
+
end
|
1123
|
+
end
|
1124
|
+
end
|
1125
|
+
|
1126
|
+
require_relative "./ruby_lexer.rex.rb"
|
1127
|
+
require_relative "./ruby_lexer_strings.rb"
|
1128
|
+
|
1129
|
+
if ENV["RP_LINENO_DEBUG"] then
|
1130
|
+
class RubyLexer
|
1349
1131
|
def d o
|
1350
1132
|
$stderr.puts o.inspect
|
1351
1133
|
end
|
1352
1134
|
|
1135
|
+
alias old_lineno= lineno=
|
1136
|
+
|
1353
1137
|
def lineno= n
|
1354
1138
|
self.old_lineno= n
|
1355
1139
|
where = caller.first.split(/:/).first(2).join(":")
|
1356
|
-
|
1140
|
+
$stderr.puts
|
1141
|
+
d :lineno => [n, where]
|
1357
1142
|
end
|
1358
1143
|
end
|
1359
1144
|
end
|