ruby_parser 3.0.0 → 3.19.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data/.autotest +36 -19
  4. data/History.rdoc +1297 -0
  5. data/Manifest.txt +35 -7
  6. data/{README.txt → README.rdoc} +44 -14
  7. data/Rakefile +308 -110
  8. data/bin/ruby_parse +3 -1
  9. data/bin/ruby_parse_extract_error +36 -16
  10. data/compare/normalize.rb +218 -0
  11. data/debugging.md +190 -0
  12. data/gauntlet.md +107 -0
  13. data/lib/.document +1 -0
  14. data/lib/rp_extensions.rb +53 -0
  15. data/lib/rp_stringscanner.rb +33 -0
  16. data/lib/ruby20_parser.rb +10973 -0
  17. data/lib/ruby20_parser.y +2683 -0
  18. data/lib/ruby21_parser.rb +10980 -0
  19. data/lib/ruby21_parser.y +2700 -0
  20. data/lib/ruby22_parser.rb +11123 -0
  21. data/lib/ruby22_parser.y +2711 -0
  22. data/lib/ruby23_parser.rb +11132 -0
  23. data/lib/ruby23_parser.y +2713 -0
  24. data/lib/ruby24_parser.rb +11231 -0
  25. data/lib/ruby24_parser.y +2721 -0
  26. data/lib/ruby25_parser.rb +11231 -0
  27. data/lib/ruby25_parser.y +2721 -0
  28. data/lib/ruby26_parser.rb +11253 -0
  29. data/lib/ruby26_parser.y +2736 -0
  30. data/lib/ruby27_parser.rb +12980 -0
  31. data/lib/ruby27_parser.y +3324 -0
  32. data/lib/ruby30_parser.rb +13242 -0
  33. data/lib/ruby30_parser.y +3447 -0
  34. data/lib/ruby31_parser.rb +13622 -0
  35. data/lib/ruby31_parser.y +3481 -0
  36. data/lib/ruby3_parser.yy +3536 -0
  37. data/lib/ruby_lexer.rb +933 -1232
  38. data/lib/ruby_lexer.rex +185 -0
  39. data/lib/ruby_lexer.rex.rb +399 -0
  40. data/lib/ruby_lexer_strings.rb +638 -0
  41. data/lib/ruby_parser.rb +97 -3
  42. data/lib/ruby_parser.yy +3465 -0
  43. data/lib/ruby_parser_extras.rb +1216 -687
  44. data/test/test_ruby_lexer.rb +2249 -1092
  45. data/test/test_ruby_parser.rb +5156 -975
  46. data/test/test_ruby_parser_extras.rb +47 -77
  47. data/tools/munge.rb +250 -0
  48. data/tools/ripper.rb +44 -0
  49. data.tar.gz.sig +1 -1
  50. metadata +200 -155
  51. metadata.gz.sig +0 -0
  52. data/.gemtest +0 -0
  53. data/History.txt +0 -482
  54. data/lib/gauntlet_rubyparser.rb +0 -120
  55. data/lib/ruby18_parser.rb +0 -5747
  56. data/lib/ruby18_parser.y +0 -1873
  57. data/lib/ruby19_parser.rb +0 -6110
  58. data/lib/ruby19_parser.y +0 -2078
data/lib/ruby_lexer.rb CHANGED
@@ -1,1443 +1,1144 @@
1
- # encoding: US-ASCII
1
+ # frozen_string_literal: true
2
+ # encoding: UTF-8
3
+
4
+ $DEBUG = true if ENV["DEBUG"]
2
5
 
3
6
  class RubyLexer
7
+ # :stopdoc:
8
+ EOF = :eof_haha!
4
9
 
5
- RUBY19 = "".respond_to? :encoding
10
+ ESCAPES = {
11
+ "a" => "\007",
12
+ "b" => "\010",
13
+ "e" => "\033",
14
+ "f" => "\f",
15
+ "n" => "\n",
16
+ "r" => "\r",
17
+ "s" => " ",
18
+ "t" => "\t",
19
+ "v" => "\13",
20
+ "\\" => '\\',
21
+ "\n" => "",
22
+ "C-\?" => 127.chr,
23
+ "c\?" => 127.chr,
24
+ }
6
25
 
7
- IDENT_CHAR_RE = case RUBY_VERSION
8
- when /^1\.8/ then
9
- /[\w\x80-\xFF]/
10
- when /^(1\.9|2\.0)/ then # HACK - matching 2.0 for now
11
- /[\w\u0080-\uFFFF]/u
12
- else
13
- raise "bork"
14
- end
26
+ HAS_ENC = "".respond_to? :encoding
15
27
 
16
- IDENT_RE = /^#{IDENT_CHAR_RE}+/
28
+ BTOKENS = {
29
+ ".." => :tBDOT2,
30
+ "..." => :tBDOT3,
31
+ }
17
32
 
18
- attr_accessor :command_start
19
- attr_accessor :cmdarg
20
- attr_accessor :cond
21
- attr_accessor :tern # TODO: rename ternary damnit... wtf
22
- attr_accessor :nest
33
+ TOKENS = {
34
+ "!" => :tBANG,
35
+ "!=" => :tNEQ,
36
+ "!@" => :tBANG,
37
+ "!~" => :tNMATCH,
38
+ "," => :tCOMMA,
39
+ ".." => :tDOT2,
40
+ "..." => :tDOT3,
41
+ "=" => :tEQL,
42
+ "==" => :tEQ,
43
+ "===" => :tEQQ,
44
+ "=>" => :tASSOC,
45
+ "=~" => :tMATCH,
46
+ "->" => :tLAMBDA,
47
+ }
23
48
 
24
- ESC_RE = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|[^0-7xMCc]))/
49
+ PERCENT_END = {
50
+ "(" => ")",
51
+ "[" => "]",
52
+ "{" => "}",
53
+ "<" => ">",
54
+ }
25
55
 
26
- ##
27
- # What version of ruby to parse. 18 and 19 are the only valid values
28
- # currently supported.
56
+ SIMPLE_RE_META = /[\$\*\+\.\?\^\|\)\]\}\>]/
29
57
 
30
- attr_accessor :version
58
+ @@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
59
+ @@regexp_cache[nil] = nil
31
60
 
32
- # Additional context surrounding tokens that both the lexer and
33
- # grammar use.
34
- attr_reader :lex_state
61
+ def regexp_cache
62
+ @@regexp_cache
63
+ end
35
64
 
36
- attr_accessor :lex_strterm
65
+ if $DEBUG then
66
+ attr_reader :lex_state
37
67
 
38
- attr_accessor :parser # HACK for very end of lexer... *sigh*
68
+ def lex_state= o
69
+ return if @lex_state == o
39
70
 
40
- # Stream of data that yylex examines.
41
- attr_reader :src
71
+ from = ""
72
+ if ENV["VERBOSE"]
73
+ path = caller[0]
74
+ path = caller[1] if path =~ /result/
75
+ path, line, *_ = path.split(/:/)
76
+ path.delete_prefix! File.dirname File.dirname __FILE__
77
+ from = " at .%s:%s" % [path, line]
78
+ end
42
79
 
43
- # Last token read via yylex.
44
- attr_accessor :token
80
+ warn "lex_state: %p -> %p%s" % [lex_state, o, from]
45
81
 
46
- attr_accessor :string_buffer
82
+ @lex_state = o
83
+ end
84
+ end
47
85
 
48
- # Value of last token which had a value associated with it.
49
- attr_accessor :yacc_value
86
+ # :startdoc:
50
87
 
51
- # What handles warnings
52
- attr_accessor :warnings
88
+ attr_accessor :lex_state unless $DEBUG
53
89
 
54
- attr_accessor :space_seen
90
+ attr_accessor :brace_nest
91
+ attr_accessor :cmdarg
92
+ attr_accessor :command_start
93
+ attr_accessor :cmd_state # temporary--ivar to avoid passing everywhere
94
+ attr_accessor :last_state
95
+ attr_accessor :cond
96
+ attr_accessor :old_ss
97
+ attr_accessor :old_lineno
55
98
 
56
- EOF = :eof_haha!
99
+ # these are generated via ruby_lexer.rex: ss, lineno
57
100
 
58
- # ruby constants for strings (should this be moved somewhere else?)
59
- STR_FUNC_BORING = 0x00
60
- STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
61
- STR_FUNC_EXPAND = 0x02
62
- STR_FUNC_REGEXP = 0x04
63
- STR_FUNC_QWORDS = 0x08
64
- STR_FUNC_SYMBOL = 0x10
65
- STR_FUNC_INDENT = 0x20 # <<-HEREDOC
66
-
67
- STR_SQUOTE = STR_FUNC_BORING
68
- STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
69
- STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
70
- STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
71
- STR_SSYM = STR_FUNC_SYMBOL
72
- STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
101
+ ##
102
+ # Additional context surrounding tokens that both the lexer and
103
+ # grammar use.
73
104
 
74
- TOKENS = {
75
- "!" => :tBANG,
76
- "!=" => :tNEQ,
77
- "!~" => :tNMATCH,
78
- "," => :tCOMMA,
79
- ".." => :tDOT2,
80
- "..." => :tDOT3,
81
- "=" => :tEQL,
82
- "==" => :tEQ,
83
- "===" => :tEQQ,
84
- "=>" => :tASSOC,
85
- "=~" => :tMATCH,
86
- "->" => :tLAMBDA,
87
- }
105
+ attr_accessor :lex_strterm
106
+ attr_accessor :lpar_beg
107
+ attr_accessor :paren_nest
108
+ attr_accessor :parser # HACK for very end of lexer... *sigh*
109
+ attr_accessor :space_seen
110
+ attr_accessor :string_buffer
111
+ attr_accessor :string_nest
112
+
113
+ # Last token read via next_token.
114
+ attr_accessor :token
88
115
 
89
- # How the parser advances to the next token.
90
- #
91
- # @return true if not at end of file (EOF).
116
+ attr_writer :comments
92
117
 
93
- def advance
94
- r = yylex
95
- self.token = r
118
+ def initialize _ = nil
119
+ @lex_state = nil # remove one warning under $DEBUG
120
+ self.lex_state = EXPR_NONE
96
121
 
97
- raise "yylex returned nil" unless r
122
+ self.cond = RubyParserStuff::StackState.new(:cond, $DEBUG)
123
+ self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
124
+ self.ss = RPStringScanner.new ""
98
125
 
99
- return RubyLexer::EOF != r
126
+ reset
100
127
  end
101
128
 
102
129
  def arg_ambiguous
103
- self.warning("Ambiguous first argument. make sure.")
130
+ self.warning "Ambiguous first argument. make sure."
131
+ end
132
+
133
+ def arg_state
134
+ is_after_operator? ? EXPR_ARG : EXPR_BEG
135
+ end
136
+
137
+ def ignore_body_comments
138
+ @comments.clear
104
139
  end
105
140
 
106
- def comments
141
+ def comments # TODO: remove this... maybe comment_string + attr_accessor
107
142
  c = @comments.join
108
143
  @comments.clear
109
144
  c
110
145
  end
111
146
 
112
- def expr_beg_push val
147
+ def debug n
148
+ raise "debug #{n}"
149
+ end
150
+
151
+ def expr_dot?
152
+ lex_state =~ EXPR_DOT
153
+ end
154
+
155
+ def expr_fname? # REFACTOR
156
+ lex_state =~ EXPR_FNAME
157
+ end
158
+
159
+ def expr_result token, text
113
160
  cond.push false
114
161
  cmdarg.push false
115
- self.lex_state = :expr_beg
116
- self.yacc_value = val
162
+ result EXPR_BEG, token, text
117
163
  end
118
164
 
119
- def fix_arg_lex_state
120
- self.lex_state = if in_lex_state? :expr_fname, :expr_dot then
121
- :expr_arg
122
- else
123
- :expr_beg
124
- end
165
+ def in_fname? # REFACTOR
166
+ lex_state =~ EXPR_FNAME
125
167
  end
126
168
 
127
- def heredoc here # 63 lines
128
- _, eos, func, last_line = here
169
+ def int_with_base base
170
+ rb_compile_error "Invalid numeric format" if matched =~ /__/
129
171
 
130
- indent = (func & STR_FUNC_INDENT) != 0
131
- expand = (func & STR_FUNC_EXPAND) != 0
132
- eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
133
- err_msg = "can't match #{eos_re.inspect} anywhere in "
172
+ text = matched
173
+ case
174
+ when text.end_with?("ri")
175
+ result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base)))
176
+ when text.end_with?("r")
177
+ result EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base))
178
+ when text.end_with?("i")
179
+ result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base))
180
+ else
181
+ result EXPR_NUM, :tINTEGER, text.to_i(base)
182
+ end
183
+ end
134
184
 
135
- rb_compile_error err_msg if
136
- src.eos?
185
+ def is_after_operator?
186
+ lex_state =~ EXPR_FNAME|EXPR_DOT
187
+ end
137
188
 
138
- if src.beginning_of_line? && src.scan(eos_re) then
139
- src.unread_many last_line # TODO: figure out how to remove this
140
- self.yacc_value = eos
141
- return :tSTRING_END
142
- end
189
+ def is_arg?
190
+ lex_state =~ EXPR_ARG_ANY
191
+ end
143
192
 
144
- self.string_buffer = []
193
+ def is_beg?
194
+ lex_state =~ EXPR_BEG_ANY || lex_state == EXPR_LAB # yes, == EXPR_LAB
195
+ end
145
196
 
146
- if expand then
147
- case
148
- when src.scan(/#[$@]/) then
149
- src.pos -= 1 # FIX omg stupid
150
- self.yacc_value = src.matched
151
- return :tSTRING_DVAR
152
- when src.scan(/#[{]/) then
153
- self.yacc_value = src.matched
154
- return :tSTRING_DBEG
155
- when src.scan(/#/) then
156
- string_buffer << '#'
157
- end
197
+ def is_end?
198
+ lex_state =~ EXPR_END_ANY
199
+ end
158
200
 
159
- begin
160
- c = tokadd_string func, "\n", nil
201
+ def is_label_possible?
202
+ (lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
203
+ end
161
204
 
162
- rb_compile_error err_msg if
163
- c == RubyLexer::EOF
205
+ def is_label_suffix?
206
+ check(/:(?!:)/)
207
+ end
164
208
 
165
- if c != "\n" then
166
- self.yacc_value = string_buffer.join.delete("\r")
167
- return :tSTRING_CONTENT
168
- else
169
- string_buffer << src.scan(/\n/)
170
- end
209
+ def is_space_arg? c = "x"
210
+ is_arg? and space_seen and c !~ /\s/
211
+ end
212
+
213
+ def lambda_beginning?
214
+ lpar_beg && lpar_beg == paren_nest
215
+ end
171
216
 
172
- rb_compile_error err_msg if
173
- src.eos?
174
- end until src.check(eos_re)
217
+ def is_local_id id
218
+ # maybe just make this false for now
219
+ self.parser.env[id.to_sym] == :lvar # HACK: this isn't remotely right
220
+ end
221
+
222
+ def lvar_defined? id
223
+ # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
224
+ self.parser.env[id.to_sym] == :lvar
225
+ end
226
+
227
+ def not_end?
228
+ not is_end?
229
+ end
230
+
231
+ def possibly_escape_string text, check
232
+ content = match[1]
233
+
234
+ if text =~ check then
235
+ content.gsub(ESC) { unescape $1 }
175
236
  else
176
- until src.check(eos_re) do
177
- string_buffer << src.scan(/.*(\n|\z)/)
178
- rb_compile_error err_msg if
179
- src.eos?
180
- end
237
+ content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
181
238
  end
239
+ end
182
240
 
183
- self.lex_strterm = [:heredoc, eos, func, last_line]
184
- self.yacc_value = string_buffer.join.delete("\r")
241
+ def process_amper text
242
+ token = if is_arg? && space_seen && !check(/\s/) then
243
+ warning("`&' interpreted as argument prefix")
244
+ :tAMPER
245
+ elsif lex_state =~ EXPR_BEG|EXPR_MID then
246
+ :tAMPER
247
+ else
248
+ :tAMPER2
249
+ end
185
250
 
186
- return :tSTRING_CONTENT
251
+ result :arg_state, token, "&"
187
252
  end
188
253
 
189
- def heredoc_identifier # 51 lines
190
- term, func = nil, STR_FUNC_BORING
191
- self.string_buffer = []
254
+ def process_backref text
255
+ token = match[1].to_sym
256
+ # TODO: can't do lineno hack w/ symbol
257
+ result EXPR_END, :tBACK_REF, token
258
+ end
192
259
 
193
- case
194
- when src.scan(/(-?)(['"`])(.*?)\2/) then
195
- term = src[2]
196
- func |= STR_FUNC_INDENT unless src[1].empty?
197
- func |= case term
198
- when "\'" then
199
- STR_SQUOTE
200
- when '"' then
201
- STR_DQUOTE
202
- else
203
- STR_XQUOTE
204
- end
205
- string_buffer << src[3]
206
- when src.scan(/-?(['"`])(?!\1*\Z)/) then
207
- rb_compile_error "unterminated here document identifier"
208
- when src.scan(/(-?)(\w+)/) then
209
- term = '"'
210
- func |= STR_DQUOTE
211
- unless src[1].empty? then
212
- func |= STR_FUNC_INDENT
213
- end
214
- string_buffer << src[2]
215
- else
216
- return nil
260
+ def process_begin text
261
+ @comments << matched
262
+
263
+ unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
264
+ @comments.clear
265
+ rb_compile_error("embedded document meets end of file")
217
266
  end
218
267
 
219
- if src.scan(/.*\n/) then
220
- # TODO: think about storing off the char range instead
221
- line = src.matched
222
- src.extra_lines_added += 1
268
+ @comments << matched
269
+ self.lineno += matched.count("\n") # HACK?
270
+
271
+ nil # TODO
272
+ end
273
+
274
+ def process_brace_close text
275
+ case matched
276
+ when "}" then
277
+ self.brace_nest -= 1
278
+ return :tSTRING_DEND, matched if brace_nest < 0
279
+ end
280
+
281
+ # matching compare/parse26.y:8099
282
+ cond.pop
283
+ cmdarg.pop
284
+
285
+ case matched
286
+ when "}" then
287
+ self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
288
+ return :tRCURLY, matched
289
+ when "]" then
290
+ self.paren_nest -= 1
291
+ self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
292
+ return :tRBRACK, matched
293
+ when ")" then
294
+ self.paren_nest -= 1
295
+ self.lex_state = EXPR_ENDFN
296
+ return :tRPAREN, matched
223
297
  else
224
- line = nil
298
+ raise "Unknown bracing: #{matched.inspect}"
299
+ end
300
+ end
301
+
302
+ def process_brace_open text
303
+ # matching compare/parse23.y:8694
304
+ self.brace_nest += 1
305
+
306
+ if lambda_beginning? then
307
+ self.lpar_beg = nil
308
+ self.paren_nest -= 1 # close arg list when lambda opens body
309
+
310
+ return expr_result(:tLAMBEG, "{")
311
+ end
312
+
313
+ token = case
314
+ when lex_state =~ EXPR_LABELED then
315
+ :tLBRACE # hash
316
+ when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
317
+ :tLCURLY # block (primary) "{" in parse.y
318
+ when lex_state =~ EXPR_ENDARG then
319
+ :tLBRACE_ARG # block (expr)
320
+ else
321
+ :tLBRACE # hash
322
+ end
323
+
324
+ state = token == :tLBRACE_ARG ? EXPR_BEG : EXPR_PAR
325
+ self.command_start = true if token != :tLBRACE
326
+
327
+ cond.push false
328
+ cmdarg.push false
329
+ result state, token, text
330
+ end
331
+
332
+ def process_colon1 text
333
+ # ?: / then / when
334
+ if is_end? || check(/\s/) then
335
+ return result EXPR_BEG, :tCOLON, text
225
336
  end
226
337
 
227
- self.lex_strterm = [:heredoc, string_buffer.join, func, line]
338
+ case
339
+ when scan(/\'/) then
340
+ string STR_SSYM, matched
341
+ when scan(/\"/) then
342
+ string STR_DSYM, matched
343
+ end
344
+
345
+ result EXPR_FNAME, :tSYMBEG, text
346
+ end
228
347
 
229
- if term == '`' then
230
- self.yacc_value = "`"
231
- return :tXSTRING_BEG
348
+ def process_colon2 text
349
+ if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
350
+ result EXPR_BEG, :tCOLON3, text
232
351
  else
233
- self.yacc_value = "\""
234
- return :tSTRING_BEG
352
+ result EXPR_DOT, :tCOLON2, text
235
353
  end
236
354
  end
237
355
 
238
- def in_lex_state?(*states)
239
- states.include? lex_state
356
+ def process_dots text
357
+ tokens = ruby27plus? && is_beg? ? BTOKENS : TOKENS
358
+
359
+ result EXPR_BEG, tokens[text], text
240
360
  end
241
361
 
242
- def initialize v = 18
243
- self.version = v
244
- self.cond = RubyParserStuff::StackState.new(:cond)
245
- self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
246
- self.tern = RubyParserStuff::StackState.new(:tern)
247
- self.nest = 0
248
- @comments = []
362
+ def process_float text
363
+ rb_compile_error "Invalid numeric format" if text =~ /__/
249
364
 
250
- reset
365
+ case
366
+ when text.end_with?("ri")
367
+ result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
368
+ when text.end_with?("i")
369
+ result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
370
+ when text.end_with?("r")
371
+ result EXPR_NUM, :tRATIONAL, Rational(text.chop)
372
+ else
373
+ result EXPR_NUM, :tFLOAT, text.to_f
374
+ end
251
375
  end
252
376
 
253
- def int_with_base base
254
- rb_compile_error "Invalid numeric format" if src.matched =~ /__/
255
- self.yacc_value = src.matched.to_i(base)
256
- return :tINTEGER
377
+ def process_gvar text
378
+ if parser.class.version > 20 && text == "$-" then
379
+ rb_compile_error "unexpected $undefined"
380
+ end
381
+
382
+ result EXPR_END, :tGVAR, text
257
383
  end
258
384
 
259
- def lex_state= o
260
- # warn "wtf lex_state = #{o.inspect} from #{caller.first}"
261
- raise "wtf\?" unless Symbol === o
262
- @lex_state = o
385
+ def process_gvar_oddity text
386
+ rb_compile_error "#{text.inspect} is not allowed as a global variable name"
263
387
  end
264
388
 
265
- attr_writer :lineno
266
- def lineno
267
- @lineno ||= src.lineno
389
+ def process_ivar text
390
+ tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
391
+ result EXPR_END, tok_id, text
268
392
  end
269
393
 
270
- ##
271
- # Parse a number from the input stream.
272
- #
273
- # @param c The first character of the number.
274
- # @return A int constant wich represents a token.
394
+ def process_label text
395
+ symbol = possibly_escape_string text, /^\"/
275
396
 
276
- def parse_number
277
- self.lex_state = :expr_end
397
+ result EXPR_LAB, :tLABEL, symbol
398
+ end
278
399
 
279
- case
280
- when src.scan(/[+-]?0[xXbBdD]\b/) then
281
- rb_compile_error "Invalid numeric format"
282
- when src.scan(/[+-]?0x[a-f0-9_]+/i) then
283
- int_with_base(16)
284
- when src.scan(/[+-]?0[Bb][01_]+/) then
285
- int_with_base(2)
286
- when src.scan(/[+-]?0[Dd][0-9_]+/) then
287
- int_with_base(10)
288
- when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
289
- rb_compile_error "Illegal octal digit."
290
- when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
291
- int_with_base(8)
292
- when src.scan(/[+-]?[\d_]+_(e|\.)/) then
293
- rb_compile_error "Trailing '_' in number."
294
- when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
295
- number = src.matched
296
- if number =~ /__/ then
297
- rb_compile_error "Invalid numeric format"
298
- end
299
- self.yacc_value = number.to_f
300
- :tFLOAT
301
- when src.scan(/[+-]?0\b/) then
302
- int_with_base(10)
303
- when src.scan(/[+-]?[\d_]+\b/) then
304
- int_with_base(10)
400
+ def process_label_or_string text
401
+ if @was_label && text =~ /:\Z/ then
402
+ @was_label = nil
403
+ return process_label text
404
+ elsif text =~ /:\Z/ then
405
+ self.pos -= 1 # put back ":"
406
+ text = text[0..-2]
407
+ end
408
+
409
+ orig_line = lineno
410
+ str = text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
411
+ self.lineno += str.count("\n")
412
+
413
+ result EXPR_END, :tSTRING, str, orig_line
414
+ end
415
+
416
+ def process_lchevron text
417
+ if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
418
+ !is_end? &&
419
+ (!is_arg? || lex_state =~ EXPR_LABELED || space_seen)) then
420
+ tok = self.heredoc_identifier
421
+ return tok if tok
422
+ end
423
+
424
+ if is_after_operator? then
425
+ self.lex_state = EXPR_ARG
305
426
  else
306
- rb_compile_error "Bad number format"
427
+ self.command_start = true if lex_state =~ EXPR_CLASS
428
+ self.lex_state = EXPR_BEG
307
429
  end
430
+
431
+ result lex_state, :tLSHFT, "\<\<"
308
432
  end
309
433
 
310
- def parse_quote # 58 lines
311
- beg, nnd, short_hand, c = nil, nil, false, nil
434
+ def process_newline_or_comment text # ../compare/parse30.y:9126 ish
435
+ c = matched
436
+
437
+ if c == "#" then
438
+ self.pos -= 1
312
439
 
313
- if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
314
- rb_compile_error "unknown type of %string" if src.matched_size == 2
315
- c, beg, short_hand = src.matched, src.getch, false
316
- else # Short-hand (e.g. %{, %., %!, etc)
317
- c, beg, short_hand = 'Q', src.getch, true
440
+ while scan(/\s*\#.*(\n+|\z)/) do
441
+ self.lineno += matched.count "\n"
442
+ @comments << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
443
+ end
444
+
445
+ return nil if end_of_stream?
318
446
  end
319
447
 
320
- if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
321
- rb_compile_error "unterminated quoted string meets end of file"
448
+ c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
449
+ lex_state !~ EXPR_LABELED)
450
+ if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
451
+ # ignore if !fallthrough?
452
+ if !c && parser.in_kwarg then
453
+ # normal newline
454
+ self.command_start = true
455
+ return result EXPR_BEG, :tNL, nil
456
+ else
457
+ maybe_pop_stack
458
+ return # goto retry
459
+ end
322
460
  end
323
461
 
324
- # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
325
- nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
326
- nnd, beg = beg, "\0" if nnd.nil?
462
+ if scan(/[\ \t\r\f\v]+/) then
463
+ self.space_seen = true
464
+ end
327
465
 
328
- token_type, self.yacc_value = nil, "%#{c}#{beg}"
329
- token_type, string_type = case c
330
- when 'Q' then
331
- ch = short_hand ? nnd : c + beg
332
- self.yacc_value = "%#{ch}"
333
- [:tSTRING_BEG, STR_DQUOTE]
334
- when 'q' then
335
- [:tSTRING_BEG, STR_SQUOTE]
336
- when 'W' then
337
- src.scan(/\s*/)
338
- [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
339
- when 'w' then
340
- src.scan(/\s*/)
341
- [:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
342
- when 'x' then
343
- [:tXSTRING_BEG, STR_XQUOTE]
344
- when 'r' then
345
- [:tREGEXP_BEG, STR_REGEXP]
346
- when 's' then
347
- self.lex_state = :expr_fname
348
- [:tSYMBEG, STR_SSYM]
349
- end
466
+ if check(/#/) then
467
+ return # goto retry
468
+ elsif check(/&\.|\.(?!\.)/) then # C version is a hellish obfuscated xnor
469
+ return # goto retry
470
+ end
350
471
 
351
- rb_compile_error "Bad %string type. Expected [Qq\Wwxrs], found '#{c}'." if
352
- token_type.nil?
472
+ self.command_start = true
353
473
 
354
- self.lex_strterm = [:strterm, string_type, nnd, beg]
474
+ result EXPR_BEG, :tNL, nil
475
+ end
355
476
 
356
- return token_type
477
+ def process_nthref text
478
+ # TODO: can't do lineno hack w/ number
479
+ result EXPR_END, :tNTH_REF, match[1].to_i
357
480
  end
358
481
 
359
- def parse_string(quote) # 65 lines
360
- _, string_type, term, open = quote
482
+ def process_paren text
483
+ token = if is_beg? then
484
+ :tLPAREN
485
+ elsif !space_seen then
486
+ # foo( ... ) => method call, no ambiguity
487
+ :tLPAREN2
488
+ elsif is_space_arg? then
489
+ :tLPAREN_ARG
490
+ elsif lex_state =~ EXPR_ENDFN && !lambda_beginning? then
491
+ # TODO:
492
+ # warn("parentheses after method name is interpreted as " \
493
+ # "an argument list, not a decomposed argument")
494
+ :tLPAREN2
495
+ else
496
+ :tLPAREN2 # plain "(" in parse.y
497
+ end
361
498
 
362
- space = false # FIX: remove these
363
- func = string_type
364
- paren = open
365
- term_re = Regexp.escape term
499
+ self.paren_nest += 1
366
500
 
367
- qwords = (func & STR_FUNC_QWORDS) != 0
368
- regexp = (func & STR_FUNC_REGEXP) != 0
369
- expand = (func & STR_FUNC_EXPAND) != 0
501
+ cond.push false
502
+ cmdarg.push false
503
+ result EXPR_PAR, token, text
504
+ end
370
505
 
371
- unless func then # FIX: impossible, prolly needs == 0
372
- self.lineno = nil
373
- return :tSTRING_END
506
+ def process_percent text
507
+ case
508
+ when is_beg? then
509
+ process_percent_quote
510
+ when scan(/\=/)
511
+ result EXPR_BEG, :tOP_ASGN, "%"
512
+ when is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
513
+ process_percent_quote
514
+ else
515
+ result :arg_state, :tPERCENT, "%"
374
516
  end
517
+ end
375
518
 
376
- space = true if qwords and src.scan(/\s+/)
519
+ def process_plus_minus text
520
+ sign = matched
521
+ utype, type = if sign == "+" then
522
+ [:tUPLUS, :tPLUS]
523
+ else
524
+ [:tUMINUS, :tMINUS]
525
+ end
377
526
 
378
- if self.nest == 0 && src.scan(/#{term_re}/) then
379
- if qwords then
380
- quote[1] = nil
381
- return :tSPACE
382
- elsif regexp then
383
- self.yacc_value = self.regx_options
384
- self.lineno = nil
385
- return :tREGEXP_END
527
+ if is_after_operator? then
528
+ if scan(/@/) then
529
+ return result(EXPR_ARG, utype, "#{sign}@")
386
530
  else
387
- self.yacc_value = term
388
- self.lineno = nil
389
- return :tSTRING_END
531
+ return result(EXPR_ARG, type, sign)
390
532
  end
391
533
  end
392
534
 
393
- if space then
394
- return :tSPACE
395
- end
535
+ return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)
396
536
 
397
- self.string_buffer = []
537
+ if is_beg? || (is_arg? && space_seen && !check(/\s/)) then
538
+ arg_ambiguous if is_arg?
398
539
 
399
- if expand
400
- case
401
- when src.scan(/#(?=[$@])/) then
402
- return :tSTRING_DVAR
403
- when src.scan(/#[{]/) then
404
- return :tSTRING_DBEG
405
- when src.scan(/#/) then
406
- string_buffer << '#'
540
+ if check(/\d/) then
541
+ return nil if utype == :tUPLUS
542
+ return result EXPR_BEG, :tUMINUS_NUM, sign
407
543
  end
544
+
545
+ return result EXPR_BEG, utype, sign
408
546
  end
409
547
 
410
- if tokadd_string(func, term, paren) == RubyLexer::EOF then
411
- rb_compile_error "unterminated string meets end of file"
548
+ result EXPR_BEG, type, sign
549
+ end
550
+
551
+ def process_questionmark text
552
+ if is_end? then
553
+ return result EXPR_BEG, :tEH, "?"
412
554
  end
413
555
 
414
- self.yacc_value = string_buffer.join
556
+ if end_of_stream? then
557
+ rb_compile_error "incomplete character syntax: parsed #{text.inspect}"
558
+ end
415
559
 
416
- return :tSTRING_CONTENT
417
- end
560
+ if check(/\s|\v/) then
561
+ unless is_arg? then
562
+ c2 = { " " => "s",
563
+ "\n" => "n",
564
+ "\t" => "t",
565
+ "\v" => "v",
566
+ "\r" => "r",
567
+ "\f" => "f" }[matched]
568
+
569
+ if c2 then
570
+ warning("invalid character syntax; use ?\\" + c2)
571
+ end
572
+ end
418
573
 
419
- def rb_compile_error msg
420
- msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
421
- raise RubyParser::SyntaxError, msg
574
+ # ternary
575
+ return result EXPR_BEG, :tEH, "?"
576
+ elsif check(/\w(?=\w)/) then # ternary, also
577
+ return result EXPR_BEG, :tEH, "?"
578
+ end
579
+
580
+ c = if scan(/\\/) then
581
+ self.read_escape
582
+ else
583
+ getch
584
+ end
585
+
586
+ result EXPR_END, :tSTRING, c
422
587
  end
423
588
 
424
- def read_escape # 51 lines
425
- case
426
- when src.scan(/\\/) then # Backslash
427
- '\\'
428
- when src.scan(/n/) then # newline
429
- "\n"
430
- when src.scan(/t/) then # horizontal tab
431
- "\t"
432
- when src.scan(/r/) then # carriage-return
433
- "\r"
434
- when src.scan(/f/) then # form-feed
435
- "\f"
436
- when src.scan(/v/) then # vertical tab
437
- "\13"
438
- when src.scan(/a/) then # alarm(bell)
439
- "\007"
440
- when src.scan(/e/) then # escape
441
- "\033"
442
- when src.scan(/b/) then # backspace
443
- "\010"
444
- when src.scan(/s/) then # space
445
- " "
446
- when src.scan(/[0-7]{1,3}/) then # octal constant
447
- src.matched.to_i(8).chr
448
- when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
449
- src[1].to_i(16).chr
450
- when src.check(/M-\\[\\MCc]/) then
451
- src.scan(/M-\\/) # eat it
452
- c = self.read_escape
453
- c[0] = (c[0].ord | 0x80).chr
454
- c
455
- when src.scan(/M-(.)/) then
456
- c = src[1]
457
- c[0] = (c[0].ord | 0x80).chr
458
- c
459
- when src.check(/(C-|c)\\[\\MCc]/) then
460
- src.scan(/(C-|c)\\/) # eat it
461
- c = self.read_escape
462
- c[0] = (c[0].ord & 0x9f).chr
463
- c
464
- when src.scan(/C-\?|c\?/) then
465
- 127.chr
466
- when src.scan(/(C-|c)(.)/) then
467
- c = src[2]
468
- c[0] = (c[0].ord & 0x9f).chr
469
- c
470
- when src.scan(/[McCx0-9]/) || src.eos? then
471
- rb_compile_error("Invalid escape character syntax")
472
- else
473
- src.getch
474
- end
589
+ def process_simple_string text
590
+ orig_line = lineno
591
+ self.lineno += text.count("\n")
592
+
593
+ str = text[1..-2]
594
+ .gsub(ESC) { unescape($1).b.force_encoding Encoding::UTF_8 }
595
+ str = str.b unless str.valid_encoding?
596
+
597
+ result EXPR_END, :tSTRING, str, orig_line
475
598
  end
476
599
 
477
- def regx_options # 15 lines
478
- good, bad = [], []
600
+ def process_slash text
601
+ if is_beg? then
602
+ string STR_REGEXP, matched
479
603
 
480
- if src.scan(/[a-z]+/) then
481
- good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
604
+ return result nil, :tREGEXP_BEG, "/"
482
605
  end
483
606
 
484
- unless bad.empty? then
485
- rb_compile_error("unknown regexp option%s - %s" %
486
- [(bad.size > 1 ? "s" : ""), bad.join.inspect])
607
+ if scan(/\=/) then
608
+ return result(EXPR_BEG, :tOP_ASGN, "/")
487
609
  end
488
610
 
489
- return good.join
611
+ if is_arg? && space_seen then
612
+ unless scan(/\s/) then
613
+ arg_ambiguous
614
+ string STR_REGEXP, "/"
615
+ return result(nil, :tREGEXP_BEG, "/")
616
+ end
617
+ end
618
+
619
+ result :arg_state, :tDIVIDE, "/"
490
620
  end
491
621
 
492
- def reset
493
- self.command_start = true
494
- self.lex_strterm = nil
495
- self.token = nil
496
- self.yacc_value = nil
622
+ def process_square_bracket text
623
+ self.paren_nest += 1
497
624
 
498
- @src = nil
499
- @lex_state = nil
500
- end
625
+ token = nil
501
626
 
502
- def ruby18
503
- Ruby18Parser === parser
627
+ if is_after_operator? then
628
+ case
629
+ when scan(/\]\=/) then
630
+ self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
631
+ return result EXPR_ARG, :tASET, "[]="
632
+ when scan(/\]/) then
633
+ self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
634
+ return result EXPR_ARG, :tAREF, "[]"
635
+ else
636
+ rb_compile_error "unexpected '['"
637
+ end
638
+ elsif is_beg? then
639
+ token = :tLBRACK
640
+ elsif is_arg? && (space_seen || lex_state =~ EXPR_LABELED) then
641
+ token = :tLBRACK
642
+ else
643
+ token = :tLBRACK2
644
+ end
645
+
646
+ cond.push false
647
+ cmdarg.push false
648
+ result EXPR_PAR, token, text
504
649
  end
505
650
 
506
- def ruby19
507
- Ruby19Parser === parser
651
+ def process_symbol text
652
+ symbol = possibly_escape_string text, /^:\"/ # stupid emacs
653
+
654
+ result EXPR_LIT, :tSYMBOL, symbol
508
655
  end
509
656
 
510
- def src= src
511
- raise "bad src: #{src.inspect}" unless String === src
512
- @src = RPStringScanner.new(src)
657
+ def process_token text
658
+ # matching: parse_ident in compare/parse23.y:7989
659
+ # FIX: remove: self.last_state = lex_state
660
+
661
+ token = self.token = text
662
+ token << matched if scan(/[\!\?](?!=)/)
663
+
664
+ tok_id =
665
+ case
666
+ when token =~ /[!?]$/ then
667
+ :tFID
668
+ when lex_state =~ EXPR_FNAME && scan(/=(?:(?![~>=])|(?==>))/) then
669
+ # ident=, not =~ => == or followed by =>
670
+ # TODO test lexing of a=>b vs a==>b
671
+ token << matched
672
+ :tIDENTIFIER
673
+ when token =~ /^[A-Z]/ then
674
+ :tCONSTANT
675
+ else
676
+ :tIDENTIFIER
677
+ end
678
+
679
+ if is_label_possible? and is_label_suffix? then
680
+ scan(/:/)
681
+ return result EXPR_LAB, :tLABEL, token
682
+ end
683
+
684
+ # TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
685
+ if lex_state !~ EXPR_DOT then
686
+ # See if it is a reserved word.
687
+ keyword = RubyParserStuff::Keyword.keyword token
688
+
689
+ return process_token_keyword keyword if keyword
690
+ end
691
+
692
+ # matching: compare/parse30.y:9039
693
+ state = if lex_state =~ EXPR_BEG_ANY|EXPR_ARG_ANY|EXPR_DOT then
694
+ cmd_state ? EXPR_CMDARG : EXPR_ARG
695
+ elsif lex_state =~ EXPR_FNAME then
696
+ EXPR_ENDFN
697
+ else
698
+ EXPR_END
699
+ end
700
+ self.lex_state = state
701
+
702
+ tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
703
+
704
+ if last_state !~ EXPR_DOT|EXPR_FNAME and
705
+ (tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
706
+ lvar_defined?(token) then
707
+ state = EXPR_END|EXPR_LABEL
708
+ end
709
+
710
+ result state, tok_id, token
513
711
  end
514
712
 
515
- def tokadd_escape term # 20 lines
713
+ def process_token_keyword keyword
714
+ # matching MIDDLE of parse_ident in compare/parse23.y:8046
715
+ state = lex_state
716
+
717
+ return result(EXPR_ENDFN, keyword.id0, token) if lex_state =~ EXPR_FNAME
718
+
719
+ self.lex_state = keyword.state
720
+ self.command_start = true if lex_state =~ EXPR_BEG
721
+
516
722
  case
517
- when src.scan(/\\\n/) then
518
- # just ignore
519
- when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
520
- self.string_buffer << src.matched
521
- when src.scan(/\\([MC]-|c)(?=\\)/) then
522
- self.string_buffer << src.matched
523
- self.tokadd_escape term
524
- when src.scan(/\\([MC]-|c)(.)/) then
525
- self.string_buffer << src.matched
526
- when src.scan(/\\[McCx]/) then
527
- rb_compile_error "Invalid escape character syntax"
528
- when src.scan(/\\(.)/m) then
529
- self.string_buffer << src.matched
723
+ when keyword.id0 == :kDO then # parse26.y line 7591
724
+ case
725
+ when lambda_beginning? then
726
+ self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
727
+ self.paren_nest -= 1 # TODO: question this?
728
+ result lex_state, :kDO_LAMBDA, token
729
+ when cond.is_in_state then
730
+ result lex_state, :kDO_COND, token
731
+ when cmdarg.is_in_state && state != EXPR_CMDARG then
732
+ result lex_state, :kDO_BLOCK, token
733
+ else
734
+ result lex_state, :kDO, token
735
+ end
736
+ when state =~ EXPR_PAD then
737
+ result lex_state, keyword.id0, token
738
+ when keyword.id0 != keyword.id1 then
739
+ result EXPR_PAR, keyword.id1, token
530
740
  else
531
- rb_compile_error "Invalid escape character syntax"
741
+ result lex_state, keyword.id1, token
532
742
  end
533
743
  end
534
744
 
535
- def tokadd_string(func, term, paren) # 105 lines
536
- qwords = (func & STR_FUNC_QWORDS) != 0
537
- escape = (func & STR_FUNC_ESCAPE) != 0
538
- expand = (func & STR_FUNC_EXPAND) != 0
539
- regexp = (func & STR_FUNC_REGEXP) != 0
540
- symbol = (func & STR_FUNC_SYMBOL) != 0
745
+ def process_underscore text
746
+ self.unscan # put back "_"
541
747
 
542
- paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
543
- term_re = Regexp.new(Regexp.escape(term))
748
+ if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
749
+ ss.terminate
750
+ [RubyLexer::EOF, RubyLexer::EOF]
751
+ elsif scan(/#{IDENT_CHAR}+/) then
752
+ process_token matched
753
+ end
754
+ end
544
755
 
545
- until src.eos? do
546
- c = nil
547
- handled = true
548
- case
549
- when self.nest == 0 && src.scan(term_re) then
550
- src.pos -= 1
551
- break
552
- when paren_re && src.scan(paren_re) then
553
- self.nest += 1
554
- when src.scan(term_re) then
555
- self.nest -= 1
556
- when qwords && src.scan(/\s/) then
557
- src.pos -= 1
558
- break
559
- when expand && src.scan(/#(?=[\$\@\{])/) then
560
- src.pos -= 1
561
- break
562
- when expand && src.scan(/#(?!\n)/) then
563
- # do nothing
564
- when src.check(/\\/) then
565
- case
566
- when qwords && src.scan(/\\\n/) then
567
- string_buffer << "\n"
568
- next
569
- when qwords && src.scan(/\\\s/) then
570
- c = ' '
571
- when expand && src.scan(/\\\n/) then
572
- next
573
- when regexp && src.check(/\\/) then
574
- self.tokadd_escape term
575
- next
576
- when expand && src.scan(/\\/) then
577
- c = self.read_escape
578
- when src.scan(/\\\n/) then
579
- # do nothing
580
- when src.scan(/\\\\/) then
581
- string_buffer << '\\' if escape
582
- c = '\\'
583
- when src.scan(/\\/) then
584
- unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
585
- string_buffer << "\\"
586
- end
587
- else
588
- handled = false
589
- end
590
- else
591
- handled = false
592
- end # case
756
+ def rb_compile_error msg
757
+ msg += ". near line #{self.lineno}: #{self.rest[/^.*/].inspect}"
758
+ raise RubyParser::SyntaxError, msg
759
+ end
593
760
 
594
- unless handled then
761
+ def reset
762
+ self.lineno = 1
763
+ self.brace_nest = 0
764
+ self.command_start = true
765
+ self.comments = []
766
+ self.lex_state = EXPR_NONE
767
+ self.lex_strterm = nil
768
+ self.lpar_beg = nil
769
+ self.paren_nest = 0
770
+ self.space_seen = false
771
+ self.string_nest = 0
772
+ self.token = nil
773
+ self.string_buffer = []
774
+ self.old_ss = nil
775
+ self.old_lineno = nil
595
776
 
596
- t = Regexp.escape term
597
- x = Regexp.escape(paren) if paren && paren != "\000"
598
- re = if qwords then
599
- /[^#{t}#{x}\#\0\\\n\ ]+|./ # |. to pick up whatever
600
- else
601
- /[^#{t}#{x}\#\0\\]+|./
602
- end
777
+ self.cond.reset
778
+ self.cmdarg.reset
779
+ end
603
780
 
604
- src.scan re
605
- c = src.matched
781
+ def result new_state, token, text, line = self.lineno # :nodoc:
782
+ new_state = self.arg_state if new_state == :arg_state
783
+ self.lex_state = new_state if new_state
606
784
 
607
- rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
608
- end # unless handled
785
+ [token, [text, line]]
786
+ end
609
787
 
610
- c ||= src.matched
611
- string_buffer << c
612
- end # until
788
+ def ruby22_label?
789
+ ruby22plus? and is_label_possible?
790
+ end
613
791
 
614
- c ||= src.matched
615
- c = RubyLexer::EOF if src.eos?
792
+ def ruby22plus?
793
+ parser.class.version >= 22
794
+ end
616
795
 
796
+ def ruby23plus?
797
+ parser.class.version >= 23
798
+ end
617
799
 
618
- return c
800
+ def ruby24minus?
801
+ parser.class.version <= 24
619
802
  end
620
803
 
621
- ESCAPES = {
622
- "a" => "\007",
623
- "b" => "\010",
624
- "e" => "\033",
625
- "f" => "\f",
626
- "n" => "\n",
627
- "r" => "\r",
628
- "s" => " ",
629
- "t" => "\t",
630
- "v" => "\13",
631
- "\\" => '\\',
632
- "\n" => "",
633
- "C-\?" => 127.chr,
634
- "c\?" => 127.chr,
635
- }
804
+ def ruby27plus?
805
+ parser.class.version >= 27
806
+ end
807
+
808
+ def space_vs_beginning space_type, beg_type, fallback
809
+ if is_space_arg? check(/./m) then
810
+ warning "`**' interpreted as argument prefix"
811
+ space_type
812
+ elsif is_beg? then
813
+ beg_type
814
+ else
815
+ # TODO: warn_balanced("**", "argument prefix");
816
+ fallback
817
+ end
818
+ end
636
819
 
637
820
  def unescape s
638
821
  r = ESCAPES[s]
639
822
 
640
823
  return r if r
641
824
 
642
- case s
643
- when /^[0-7]{1,3}/ then
644
- $&.to_i(8).chr
645
- when /^x([0-9a-fA-F]{1,2})/ then
646
- $1.to_i(16).chr
647
- when /^M-(.)/ then
648
- ($1[0].ord | 0x80).chr
649
- when /^(C-|c)(.)/ then
650
- ($2[0].ord & 0x9f).chr
651
- when /^[McCx0-9]/ then
652
- rb_compile_error("Invalid escape character syntax")
653
- else
654
- s
655
- end
825
+ x = case s
826
+ when /^[0-7]{1,3}/ then
827
+ ($&.to_i(8) & 0xFF).chr
828
+ when /^x([0-9a-fA-F]{1,2})/ then
829
+ $1.to_i(16).chr
830
+ when /^M-(.)/ then
831
+ ($1[0].ord | 0x80).chr
832
+ when /^(C-|c)(.)/ then
833
+ ($2[0].ord & 0x9f).chr
834
+ when /^[89a-f]/i then # bad octal or hex... ignore? that's what MRI does :(
835
+ s
836
+ when /^[McCx0-9]/ then
837
+ rb_compile_error("Invalid escape character syntax")
838
+ when /u(\h{4})/ then
839
+ [$1.delete("{}").to_i(16)].pack("U")
840
+ when /u(\h{1,3})/ then
841
+ rb_compile_error("Invalid escape character syntax")
842
+ when /u\{(\h+(?:\s+\h+)*)\}/ then
843
+ $1.split.map { |cp| cp.to_i(16) }.pack("U*")
844
+ else
845
+ s
846
+ end
847
+ x
656
848
  end
657
849
 
658
850
  def warning s
659
851
  # do nothing for now
660
852
  end
661
853
 
662
- ##
663
- # Returns the next token. Also sets yy_val is needed.
664
- #
665
- # @return Description of the Returned Value
666
-
667
- def yylex # 826 lines
668
- c = ''
669
- self.space_seen = false
670
- command_state = false
671
- src = self.src
672
-
673
- self.token = nil
674
- self.yacc_value = nil
675
-
676
- return yylex_string if lex_strterm
677
-
678
- command_state = self.command_start
679
- self.command_start = false
680
-
681
- last_state = lex_state
682
-
683
- loop do # START OF CASE
684
- if src.scan(/[\ \t\r\f\v]/) then # \s - \n + \v
685
- self.space_seen = true
686
- next
687
- elsif src.check(/[^a-zA-Z]/) then
688
- if src.scan(/\n|#/) then
689
- self.lineno = nil
690
- c = src.matched
691
- if c == '#' then
692
- src.pos -= 1
693
-
694
- while src.scan(/\s*#.*(\n+|\z)/) do
695
- @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
696
- end
697
-
698
- return RubyLexer::EOF if src.eos?
699
- end
700
-
701
- # Replace a string of newlines with a single one
702
- src.scan(/\n+/)
703
-
704
- next if in_lex_state?(:expr_beg, :expr_fname, :expr_dot, :expr_class,
705
- :expr_value)
706
-
707
- if src.scan(/([\ \t\r\f\v]*)\./) then
708
- self.space_seen = true unless src[1].empty?
709
-
710
- src.pos -= 1
711
- next unless src.check(/\.\./)
712
- end
713
-
714
- self.command_start = true
715
- self.lex_state = :expr_beg
716
- return :tNL
717
- elsif src.scan(/[\]\)\}]/) then
718
- cond.lexpop
719
- cmdarg.lexpop
720
- tern.lexpop
721
- self.lex_state = :expr_end
722
- self.yacc_value = src.matched
723
- result = {
724
- ")" => :tRPAREN,
725
- "]" => :tRBRACK,
726
- "}" => :tRCURLY
727
- }[src.matched]
728
- return result
729
- elsif src.scan(/\.\.\.?|,|![=~]?/) then
730
- self.lex_state = :expr_beg
731
- tok = self.yacc_value = src.matched
732
- return TOKENS[tok]
733
- elsif src.check(/\./) then
734
- if src.scan(/\.\d/) then
735
- rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
736
- elsif src.scan(/\./) then
737
- self.lex_state = :expr_dot
738
- self.yacc_value = "."
739
- return :tDOT
740
- end
741
- elsif src.scan(/\(/) then
742
- result = if ruby18 then
743
- yylex_paren18
744
- else
745
- yylex_paren19
746
- end
747
-
748
- self.expr_beg_push "("
749
-
750
- return result
751
- elsif src.check(/\=/) then
752
- if src.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
753
- self.fix_arg_lex_state
754
- tok = self.yacc_value = src.matched
755
- return TOKENS[tok]
756
- elsif src.scan(/\=begin(?=\s)/) then
757
- # @comments << '=' << src.matched
758
- @comments << src.matched
759
-
760
- unless src.scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
761
- @comments.clear
762
- rb_compile_error("embedded document meets end of file")
763
- end
764
-
765
- @comments << src.matched
766
-
767
- next
768
- else
769
- raise "you shouldn't be able to get here"
770
- end
771
- elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/o) then
772
- self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
773
- self.lex_state = :expr_end
774
- return :tSTRING
775
- elsif src.scan(/\"/) then # FALLBACK
776
- self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\0"] # TODO: question this
777
- self.yacc_value = "\""
778
- return :tSTRING_BEG
779
- elsif src.scan(/\@\@?\w*/) then
780
- self.token = src.matched
781
-
782
- rb_compile_error "`#{token}` is not allowed as a variable name" if
783
- token =~ /\@\d/
784
-
785
- return process_token(command_state)
786
- elsif src.scan(/\:\:/) then
787
- if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
788
- self.lex_state = :expr_beg
789
- self.yacc_value = "::"
790
- return :tCOLON3
791
- end
792
-
793
- self.lex_state = :expr_dot
794
- self.yacc_value = "::"
795
- return :tCOLON2
796
- elsif ! is_end? && src.scan(/:([a-zA-Z_]#{IDENT_CHAR_RE}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
797
- # scanning shortcut to symbols
798
- self.yacc_value = src[1]
799
- self.lex_state = :expr_end
800
- return :tSYMBOL
801
- elsif src.scan(/\:/) then
802
- # ?: / then / when
803
- if is_end? || src.check(/\s/) then
804
- self.lex_state = :expr_beg
805
- # TODO warn_balanced(":", "symbol literal");
806
- self.yacc_value = ":"
807
- return :tCOLON
808
- end
809
-
810
- case
811
- when src.scan(/\'/) then
812
- self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\0"]
813
- when src.scan(/\"/) then
814
- self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\0"]
815
- end
816
-
817
- self.lex_state = :expr_fname
818
- self.yacc_value = ":"
819
- return :tSYMBEG
820
- elsif src.check(/[0-9]/) then
821
- return parse_number
822
- elsif src.scan(/\[/) then
823
- result = src.matched
824
-
825
- if in_lex_state? :expr_fname, :expr_dot then
826
- self.lex_state = :expr_arg
827
- case
828
- when src.scan(/\]\=/) then
829
- self.yacc_value = "[]="
830
- return :tASET
831
- when src.scan(/\]/) then
832
- self.yacc_value = "[]"
833
- return :tAREF
834
- else
835
- rb_compile_error "unexpected '['"
836
- end
837
- elsif is_beg? then
838
- self.tern.push false
839
- result = :tLBRACK
840
- elsif is_arg? && space_seen then
841
- self.tern.push false
842
- result = :tLBRACK
843
- else
844
- result = :tLBRACK2
845
- end
846
-
847
- self.expr_beg_push "["
848
-
849
- return result
850
- elsif src.scan(/\'(\\.|[^\'])*\'/) then
851
- self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
852
- self.lex_state = :expr_end
853
- return :tSTRING
854
- elsif src.check(/\|/) then
855
- if src.scan(/\|\|\=/) then
856
- self.lex_state = :expr_beg
857
- self.yacc_value = "||"
858
- return :tOP_ASGN
859
- elsif src.scan(/\|\|/) then
860
- self.lex_state = :expr_beg
861
- self.yacc_value = "||"
862
- return :tOROP
863
- elsif src.scan(/\|\=/) then
864
- self.lex_state = :expr_beg
865
- self.yacc_value = "|"
866
- return :tOP_ASGN
867
- elsif src.scan(/\|/) then
868
- self.fix_arg_lex_state
869
- self.yacc_value = "|"
870
- return :tPIPE
871
- end
872
- elsif src.scan(/\{/) then
873
- if defined?(@hack_expects_lambda) && @hack_expects_lambda
874
- @hack_expects_lambda = false
875
- self.lex_state = :expr_beg
876
- return :tLAMBEG
877
- end
878
-
879
- result = if is_arg? || in_lex_state?(:expr_end) then
880
- :tLCURLY # block (primary)
881
- elsif in_lex_state?(:expr_endarg) then
882
- :tLBRACE_ARG # block (expr)
883
- else
884
- self.tern.push false
885
- :tLBRACE # hash
886
- end
887
-
888
- self.expr_beg_push "{"
889
- self.command_start = true unless result == :tLBRACE
890
-
891
- return result
892
- elsif src.scan(/->/) then
893
- @hack_expects_lambda = true
894
- self.lex_state = :expr_arg
895
- return :tLAMBDA
896
- elsif src.scan(/[+-]/) then
897
- sign = src.matched
898
- utype, type = if sign == "+" then
899
- [:tUPLUS, :tPLUS]
900
- else
901
- [:tUMINUS, :tMINUS]
902
- end
903
-
904
- if in_lex_state? :expr_fname, :expr_dot then
905
- self.lex_state = :expr_arg
906
- if src.scan(/@/) then
907
- self.yacc_value = "#{sign}@"
908
- return utype
909
- else
910
- self.yacc_value = sign
911
- return type
912
- end
913
- end
914
-
915
- if src.scan(/\=/) then
916
- self.lex_state = :expr_beg
917
- self.yacc_value = sign
918
- return :tOP_ASGN
919
- end
920
-
921
- if (is_beg? ||
922
- (is_arg? && space_seen && !src.check(/\s/))) then
923
- if is_arg? then
924
- arg_ambiguous
925
- end
854
+ def was_label?
855
+ @was_label = ruby22_label?
856
+ true
857
+ end
926
858
 
927
- self.lex_state = :expr_beg
928
- self.yacc_value = sign
859
+ class State
860
+ attr_accessor :n
861
+ attr_accessor :names
929
862
 
930
- if src.check(/\d/) then
931
- if utype == :tUPLUS then
932
- return self.parse_number
933
- else
934
- return :tUMINUS_NUM
935
- end
936
- end
863
+ # TODO: take a shared hash of strings for inspect/to_s
864
+ def initialize o, names
865
+ raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
937
866
 
938
- return utype
939
- end
940
-
941
- self.lex_state = :expr_beg
942
- self.yacc_value = sign
943
- return type
944
- elsif src.check(/\*/) then
945
- if src.scan(/\*\*=/) then
946
- self.lex_state = :expr_beg
947
- self.yacc_value = "**"
948
- return :tOP_ASGN
949
- elsif src.scan(/\*\*/) then
950
- self.yacc_value = "**"
951
- self.fix_arg_lex_state
952
- return :tPOW
953
- elsif src.scan(/\*\=/) then
954
- self.lex_state = :expr_beg
955
- self.yacc_value = "*"
956
- return :tOP_ASGN
957
- elsif src.scan(/\*/) then
958
- result = if is_arg? && space_seen && src.check(/\S/) then
959
- warning("`*' interpreted as argument prefix")
960
- :tSTAR
961
- elsif is_beg? then
962
- :tSTAR
963
- else
964
- :tSTAR2
965
- end
966
- self.yacc_value = "*"
967
- self.fix_arg_lex_state
968
-
969
- return result
970
- end
971
- elsif src.check(/\</) then
972
- if src.scan(/\<\=\>/) then
973
- self.fix_arg_lex_state
974
- self.yacc_value = "<=>"
975
- return :tCMP
976
- elsif src.scan(/\<\=/) then
977
- self.fix_arg_lex_state
978
- self.yacc_value = "<="
979
- return :tLEQ
980
- elsif src.scan(/\<\<\=/) then
981
- self.fix_arg_lex_state
982
- self.lex_state = :expr_beg
983
- self.yacc_value = "\<\<"
984
- return :tOP_ASGN
985
- elsif src.scan(/\<\</) then
986
- if (! in_lex_state?(:expr_end, :expr_dot,
987
- :expr_endarg, :expr_class) &&
988
- (!is_arg? || space_seen)) then
989
- tok = self.heredoc_identifier
990
- return tok if tok
991
- end
867
+ self.n = o
868
+ self.names = names
869
+ end
992
870
 
993
- self.fix_arg_lex_state
994
- self.yacc_value = "\<\<"
995
- return :tLSHFT
996
- elsif src.scan(/\</) then
997
- self.fix_arg_lex_state
998
- self.yacc_value = "<"
999
- return :tLT
1000
- end
1001
- elsif src.check(/\>/) then
1002
- if src.scan(/\>\=/) then
1003
- self.fix_arg_lex_state
1004
- self.yacc_value = ">="
1005
- return :tGEQ
1006
- elsif src.scan(/\>\>=/) then
1007
- self.fix_arg_lex_state
1008
- self.lex_state = :expr_beg
1009
- self.yacc_value = ">>"
1010
- return :tOP_ASGN
1011
- elsif src.scan(/\>\>/) then
1012
- self.fix_arg_lex_state
1013
- self.yacc_value = ">>"
1014
- return :tRSHFT
1015
- elsif src.scan(/\>/) then
1016
- self.fix_arg_lex_state
1017
- self.yacc_value = ">"
1018
- return :tGT
1019
- end
1020
- elsif src.scan(/\`/) then
1021
- self.yacc_value = "`"
1022
- case lex_state
1023
- when :expr_fname then
1024
- self.lex_state = :expr_end
1025
- return :tBACK_REF2
1026
- when :expr_dot then
1027
- self.lex_state = if command_state then
1028
- :expr_cmdarg
1029
- else
1030
- :expr_arg
1031
- end
1032
- return :tBACK_REF2
1033
- end
1034
- self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"]
1035
- return :tXSTRING_BEG
1036
- elsif src.scan(/\?/) then
1037
-
1038
- if is_end? then
1039
- self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
1040
- self.tern.push true
1041
- self.yacc_value = "?"
1042
- return :tEH
1043
- end
1044
-
1045
- if src.eos? then
1046
- rb_compile_error "incomplete character syntax"
1047
- end
1048
-
1049
- if src.check(/\s|\v/) then
1050
- unless is_arg? then
1051
- c2 = { " " => 's',
1052
- "\n" => 'n',
1053
- "\t" => 't',
1054
- "\v" => 'v',
1055
- "\r" => 'r',
1056
- "\f" => 'f' }[src.matched]
1057
-
1058
- if c2 then
1059
- warning("invalid character syntax; use ?\\" + c2)
1060
- end
1061
- end
871
+ def == o
872
+ self.equal?(o) || (o.class == self.class && o.n == self.n)
873
+ end
1062
874
 
1063
- # ternary
1064
- self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
1065
- self.tern.push true
1066
- self.yacc_value = "?"
1067
- return :tEH
1068
- elsif src.check(/\w(?=\w)/) then # ternary, also
1069
- self.lex_state = :expr_beg
1070
- self.tern.push true
1071
- self.yacc_value = "?"
1072
- return :tEH
1073
- end
1074
-
1075
- c = if src.scan(/\\/) then
1076
- self.read_escape
1077
- else
1078
- src.getch
1079
- end
1080
- self.lex_state = :expr_end
1081
-
1082
- if version == 18 then
1083
- self.yacc_value = c[0].ord & 0xff
1084
- return :tINTEGER
1085
- else
1086
- self.yacc_value = c
1087
- return :tSTRING
1088
- end
1089
- elsif src.check(/\&/) then
1090
- if src.scan(/\&\&\=/) then
1091
- self.yacc_value = "&&"
1092
- self.lex_state = :expr_beg
1093
- return :tOP_ASGN
1094
- elsif src.scan(/\&\&/) then
1095
- self.lex_state = :expr_beg
1096
- self.yacc_value = "&&"
1097
- return :tANDOP
1098
- elsif src.scan(/\&\=/) then
1099
- self.yacc_value = "&"
1100
- self.lex_state = :expr_beg
1101
- return :tOP_ASGN
1102
- elsif src.scan(/&/) then
1103
- result = if is_arg? && space_seen &&
1104
- !src.check(/\s/) then
1105
- warning("`&' interpreted as argument prefix")
1106
- :tAMPER
1107
- elsif in_lex_state? :expr_beg, :expr_mid then
1108
- :tAMPER
1109
- else
1110
- :tAMPER2
1111
- end
1112
-
1113
- self.fix_arg_lex_state
1114
- self.yacc_value = "&"
1115
- return result
1116
- end
1117
- elsif src.scan(/\//) then
1118
- if is_beg? then
1119
- self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1120
- self.yacc_value = "/"
1121
- return :tREGEXP_BEG
1122
- end
1123
-
1124
- if src.scan(/\=/) then
1125
- self.yacc_value = "/"
1126
- self.lex_state = :expr_beg
1127
- return :tOP_ASGN
1128
- end
1129
-
1130
- if is_arg? && space_seen then
1131
- unless src.scan(/\s/) then
1132
- arg_ambiguous
1133
- self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1134
- self.yacc_value = "/"
1135
- return :tREGEXP_BEG
1136
- end
1137
- end
1138
-
1139
- self.fix_arg_lex_state
1140
- self.yacc_value = "/"
1141
-
1142
- return :tDIVIDE
1143
- elsif src.scan(/\^=/) then
1144
- self.lex_state = :expr_beg
1145
- self.yacc_value = "^"
1146
- return :tOP_ASGN
1147
- elsif src.scan(/\^/) then
1148
- self.fix_arg_lex_state
1149
- self.yacc_value = "^"
1150
- return :tCARET
1151
- elsif src.scan(/\;/) then
1152
- self.command_start = true
1153
- self.lex_state = :expr_beg
1154
- self.yacc_value = ";"
1155
- return :tSEMI
1156
- elsif src.scan(/\~/) then
1157
- if in_lex_state? :expr_fname, :expr_dot then
1158
- src.scan(/@/)
1159
- end
1160
-
1161
- self.fix_arg_lex_state
1162
- self.yacc_value = "~"
1163
-
1164
- return :tTILDE
1165
- elsif src.scan(/\\/) then
1166
- if src.scan(/\r?\n/) then
1167
- self.lineno = nil
1168
- self.space_seen = true
1169
- next
1170
- end
1171
- rb_compile_error "bare backslash only allowed before newline"
1172
- elsif src.scan(/\%/) then
1173
- if is_beg? then
1174
- return parse_quote
1175
- end
1176
-
1177
- if src.scan(/\=/) then
1178
- self.lex_state = :expr_beg
1179
- self.yacc_value = "%"
1180
- return :tOP_ASGN
1181
- end
1182
-
1183
- return parse_quote if is_arg? && space_seen && ! src.check(/\s/)
1184
-
1185
- self.fix_arg_lex_state
1186
- self.yacc_value = "%"
1187
-
1188
- return :tPERCENT
1189
- elsif src.check(/\$/) then
1190
- if src.scan(/(\$_)(\w+)/) then
1191
- self.lex_state = :expr_end
1192
- self.token = src.matched
1193
- return process_token(command_state)
1194
- elsif src.scan(/\$_/) then
1195
- self.lex_state = :expr_end
1196
- self.token = src.matched
1197
- self.yacc_value = src.matched
1198
- return :tGVAR
1199
- elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1200
- self.lex_state = :expr_end
1201
- self.yacc_value = src.matched
1202
- return :tGVAR
1203
- elsif src.scan(/\$([\&\`\'\+])/) then
1204
- self.lex_state = :expr_end
1205
- # Explicit reference to these vars as symbols...
1206
- if last_state == :expr_fname then
1207
- self.yacc_value = src.matched
1208
- return :tGVAR
1209
- else
1210
- self.yacc_value = src[1].to_sym
1211
- return :tBACK_REF
1212
- end
1213
- elsif src.scan(/\$([1-9]\d*)/) then
1214
- self.lex_state = :expr_end
1215
- if last_state == :expr_fname then
1216
- self.yacc_value = src.matched
1217
- return :tGVAR
1218
- else
1219
- self.yacc_value = src[1].to_i
1220
- return :tNTH_REF
1221
- end
1222
- elsif src.scan(/\$0/) then
1223
- self.lex_state = :expr_end
1224
- self.token = src.matched
1225
- return process_token(command_state)
1226
- elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
1227
- self.lex_state = :expr_end
1228
- self.yacc_value = "$"
1229
- return "$"
1230
- elsif src.scan(/\$\w+/)
1231
- self.lex_state = :expr_end
1232
- self.token = src.matched
1233
- return process_token(command_state)
1234
- end
1235
- elsif src.check(/\_/) then
1236
- if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
1237
- self.lineno = nil
1238
- return RubyLexer::EOF
1239
- elsif src.scan(/\_\w*/) then
1240
- self.token = src.matched
1241
- return process_token(command_state)
1242
- end
1243
- end
1244
- end # END OF CASE
875
+ def =~ v
876
+ (self.n & v.n) != 0
877
+ end
1245
878
 
1246
- if src.scan(/\004|\032|\000/) || src.eos? then # ^D, ^Z, EOF
1247
- return RubyLexer::EOF
1248
- else # alpha check
1249
- unless src.check IDENT_RE then
1250
- rb_compile_error "Invalid char #{src.matched.inspect} in expression"
1251
- end
1252
- end
879
+ def | v
880
+ raise ArgumentError, "Incompatible State: %p vs %p" % [self, v] unless
881
+ self.names == v.names
882
+ self.class.new(self.n | v.n, self.names)
883
+ end
1253
884
 
1254
- self.token = src.matched if self.src.scan IDENT_RE
885
+ def inspect
886
+ return "Value(0)" if n.zero? # HACK?
1255
887
 
1256
- return process_token(command_state)
888
+ names.map { |v, k| k if self =~ v }.
889
+ compact.
890
+ join("|").
891
+ gsub(/(?:EXPR_|STR_(?:FUNC_)?)/, "")
1257
892
  end
1258
- end
1259
893
 
1260
- def yylex_paren18
1261
- self.command_start = true
1262
- result = :tLPAREN2
1263
-
1264
- if in_lex_state? :expr_beg, :expr_mid then
1265
- result = :tLPAREN
1266
- elsif space_seen then
1267
- if in_lex_state? :expr_cmdarg then
1268
- result = :tLPAREN_ARG
1269
- elsif in_lex_state? :expr_arg then
1270
- self.tern.push false
1271
- warning "don't put space before argument parentheses"
1272
- end
1273
- else
1274
- self.tern.push false
894
+ alias to_s inspect
895
+
896
+ module Values
897
+ expr_names = {}
898
+
899
+ EXPR_NONE = State.new 0x0, expr_names
900
+ EXPR_BEG = State.new 0x1, expr_names
901
+ EXPR_END = State.new 0x2, expr_names
902
+ EXPR_ENDARG = State.new 0x4, expr_names
903
+ EXPR_ENDFN = State.new 0x8, expr_names
904
+ EXPR_ARG = State.new 0x10, expr_names
905
+ EXPR_CMDARG = State.new 0x20, expr_names
906
+ EXPR_MID = State.new 0x40, expr_names
907
+ EXPR_FNAME = State.new 0x80, expr_names
908
+ EXPR_DOT = State.new 0x100, expr_names
909
+ EXPR_CLASS = State.new 0x200, expr_names
910
+ EXPR_LABEL = State.new 0x400, expr_names
911
+ EXPR_LABELED = State.new 0x800, expr_names
912
+ EXPR_FITEM = State.new 0x1000, expr_names
913
+
914
+ EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
915
+ EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
916
+ EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
917
+
918
+ # extra fake lex_state names to make things a bit cleaner
919
+
920
+ EXPR_LAB = EXPR_ARG|EXPR_LABELED
921
+ EXPR_LIT = EXPR_END|EXPR_ENDARG
922
+ EXPR_PAR = EXPR_BEG|EXPR_LABEL
923
+ EXPR_PAD = EXPR_BEG|EXPR_LABELED
924
+
925
+ EXPR_NUM = EXPR_LIT
926
+
927
+ expr_names.merge!(EXPR_NONE => "EXPR_NONE",
928
+ EXPR_BEG => "EXPR_BEG",
929
+ EXPR_END => "EXPR_END",
930
+ EXPR_ENDARG => "EXPR_ENDARG",
931
+ EXPR_ENDFN => "EXPR_ENDFN",
932
+ EXPR_ARG => "EXPR_ARG",
933
+ EXPR_CMDARG => "EXPR_CMDARG",
934
+ EXPR_MID => "EXPR_MID",
935
+ EXPR_FNAME => "EXPR_FNAME",
936
+ EXPR_DOT => "EXPR_DOT",
937
+ EXPR_CLASS => "EXPR_CLASS",
938
+ EXPR_LABEL => "EXPR_LABEL",
939
+ EXPR_LABELED => "EXPR_LABELED",
940
+ EXPR_FITEM => "EXPR_FITEM")
941
+
942
+ # ruby constants for strings
943
+
944
+ str_func_names = {}
945
+
946
+ STR_FUNC_BORING = State.new 0x00, str_func_names
947
+ STR_FUNC_ESCAPE = State.new 0x01, str_func_names
948
+ STR_FUNC_EXPAND = State.new 0x02, str_func_names
949
+ STR_FUNC_REGEXP = State.new 0x04, str_func_names
950
+ STR_FUNC_QWORDS = State.new 0x08, str_func_names
951
+ STR_FUNC_SYMBOL = State.new 0x10, str_func_names
952
+ STR_FUNC_INDENT = State.new 0x20, str_func_names # <<-HEREDOC
953
+ STR_FUNC_LABEL = State.new 0x40, str_func_names
954
+ STR_FUNC_LIST = State.new 0x4000, str_func_names
955
+ STR_FUNC_TERM = State.new 0x8000, str_func_names
956
+ STR_FUNC_DEDENT = State.new 0x10000, str_func_names # <<~HEREDOC
957
+
958
+ # TODO: check parser25.y on how they do STR_FUNC_INDENT
959
+
960
+ STR_SQUOTE = STR_FUNC_BORING
961
+ STR_DQUOTE = STR_FUNC_EXPAND
962
+ STR_XQUOTE = STR_FUNC_EXPAND
963
+ STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
964
+ STR_SWORD = STR_FUNC_QWORDS | STR_FUNC_LIST
965
+ STR_DWORD = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
966
+ STR_SSYM = STR_FUNC_SYMBOL
967
+ STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
968
+ STR_LABEL = STR_FUNC_LABEL
969
+
970
+ str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
971
+ STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
972
+ STR_FUNC_REGEXP => "STR_FUNC_REGEXP",
973
+ STR_FUNC_QWORDS => "STR_FUNC_QWORDS",
974
+ STR_FUNC_SYMBOL => "STR_FUNC_SYMBOL",
975
+ STR_FUNC_INDENT => "STR_FUNC_INDENT",
976
+ STR_FUNC_LABEL => "STR_FUNC_LABEL",
977
+ STR_FUNC_LIST => "STR_FUNC_LIST",
978
+ STR_FUNC_TERM => "STR_FUNC_TERM",
979
+ STR_FUNC_DEDENT => "STR_FUNC_DEDENT",
980
+ STR_SQUOTE => "STR_SQUOTE")
1275
981
  end
1276
982
 
1277
- result
983
+ include Values
1278
984
  end
1279
985
 
1280
- def is_end?
1281
- in_lex_state? :expr_end, :expr_endarg, :expr_endfn
1282
- end
986
+ include State::Values
987
+ end
1283
988
 
1284
- def is_arg?
1285
- in_lex_state? :expr_arg, :expr_cmdarg
1286
- end
989
+ class RubyLexer
990
+ module SSWrapper
991
+ def string= s
992
+ ss.string= s
993
+ end
1287
994
 
1288
- def is_beg?
1289
- in_lex_state? :expr_beg, :expr_mid, :expr_value, :expr_class
1290
- end
995
+ def beginning_of_line?
996
+ ss.bol?
997
+ end
1291
998
 
1292
- def is_space_arg? c = "x"
1293
- is_arg? and space_seen and c !~ /\s/
1294
- end
999
+ alias bol? beginning_of_line? # to make .rex file more readable
1295
1000
 
1296
- def is_label_possible? command_state
1297
- (in_lex_state?(:expr_beg) && !command_state) || is_arg?
1298
- end
1001
+ def check re
1002
+ maybe_pop_stack
1299
1003
 
1300
- def yylex_paren19 # TODO: move or remove
1301
- result =
1302
- if is_beg? then
1303
- :tLPAREN
1304
- elsif is_space_arg? then
1305
- :tLPAREN_ARG
1306
- else
1307
- :tLPAREN2 # plain '(' in parse.y
1308
- end
1004
+ ss.check re
1005
+ end
1309
1006
 
1310
- # paren_nest++; # TODO
1007
+ def end_of_stream?
1008
+ ss.eos?
1009
+ end
1311
1010
 
1312
- result
1313
- end
1011
+ alias eos? end_of_stream?
1314
1012
 
1315
- def process_token(command_state)
1013
+ def getch
1014
+ c = ss.getch
1015
+ c = ss.getch if c == "\r" && ss.peek(1) == "\n"
1016
+ c
1017
+ end
1316
1018
 
1317
- token << src.matched if token =~ IDENT_RE && src.scan(/[\!\?](?!=)/)
1019
+ def match
1020
+ ss
1021
+ end
1318
1022
 
1319
- result = nil
1320
- last_state = lex_state
1023
+ def matched
1024
+ ss.matched
1025
+ end
1321
1026
 
1322
- case token
1323
- when /^\$/ then
1324
- self.lex_state, result = :expr_end, :tGVAR
1325
- when /^@@/ then
1326
- self.lex_state, result = :expr_end, :tCVAR
1327
- when /^@/ then
1328
- self.lex_state, result = :expr_end, :tIVAR
1329
- else
1330
- if token =~ /[!?]$/ then
1331
- result = :tFID
1332
- else
1333
- if in_lex_state? :expr_fname then
1334
- # ident=, not =~ => == or followed by =>
1335
- # TODO test lexing of a=>b vs a==>b
1336
- if src.scan(/=(?:(?![~>=])|(?==>))/) then
1337
- result = :tIDENTIFIER
1338
- token << src.matched
1339
- end
1340
- end
1027
+ def in_heredoc?
1028
+ !!self.old_ss
1029
+ end
1341
1030
 
1342
- result ||= if token =~ /^[A-Z]/ then
1343
- :tCONSTANT
1344
- else
1345
- :tIDENTIFIER
1346
- end
1031
+ def maybe_pop_stack
1032
+ if ss.eos? && in_heredoc? then
1033
+ self.ss_pop
1034
+ self.lineno_pop
1347
1035
  end
1036
+ end
1348
1037
 
1349
- unless ruby18
1350
- if is_label_possible? command_state then
1351
- colon = src.scan(/:/)
1038
+ def pos
1039
+ ss.pos
1040
+ end
1352
1041
 
1353
- if colon && src.peek(1) != ":" then
1354
- self.lex_state = :expr_beg
1355
- self.yacc_value = [token, src.lineno]
1356
- return :tLABEL
1357
- end
1042
+ def pos= n
1043
+ ss.pos = n
1044
+ end
1358
1045
 
1359
- src.unscan if colon
1360
- end
1361
- end
1046
+ def rest
1047
+ ss.rest
1048
+ end
1362
1049
 
1363
- unless in_lex_state? :expr_dot then
1364
- # See if it is a reserved word.
1365
- keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
1366
- RubyParserStuff::Keyword.keyword18 token
1367
- else
1368
- RubyParserStuff::Keyword.keyword19 token
1369
- end
1050
+ def scan re
1051
+ maybe_pop_stack
1370
1052
 
1371
- if keyword then
1372
- state = lex_state
1373
- self.lex_state = keyword.state
1374
- self.yacc_value = [token, src.lineno]
1375
-
1376
- if state == :expr_fname then
1377
- self.yacc_value = keyword.name
1378
- return keyword.id0
1379
- end
1380
-
1381
- if keyword.id0 == :kDO then
1382
- self.command_start = true
1383
- return :kDO_COND if cond.is_in_state
1384
- return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
1385
- return :kDO_BLOCK if state == :expr_endarg
1386
- if defined?(@hack_expects_lambda) && @hack_expects_lambda
1387
- @hack_expects_lambda = false
1388
- return :kDO_LAMBDA
1389
- end
1390
- return :kDO
1391
- end
1053
+ ss.scan re
1054
+ end
1392
1055
 
1393
- return keyword.id0 if state == :expr_beg or state == :expr_value
1056
+ def scanner_class # TODO: design this out of oedipus_lex. or something.
1057
+ RPStringScanner
1058
+ end
1394
1059
 
1395
- self.lex_state = :expr_beg if keyword.id0 != keyword.id1
1060
+ def ss_string
1061
+ ss.string
1062
+ end
1396
1063
 
1397
- return keyword.id1
1398
- end
1399
- end
1064
+ def ss_string= s
1065
+ raise "Probably not"
1066
+ ss.string = s
1067
+ end
1400
1068
 
1401
- # TODO:
1402
- # if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
1403
-
1404
- self.lex_state =
1405
- if is_beg? || in_lex_state?(:expr_dot) || is_arg? then
1406
- if command_state then
1407
- :expr_cmdarg
1408
- else
1409
- :expr_arg
1410
- end
1411
- elsif ruby19 && in_lex_state?(:expr_fname) then
1412
- :expr_endfn
1413
- else
1414
- :expr_end
1415
- end
1069
+ def unscan
1070
+ ss.unscan
1071
+ end
1072
+ end
1073
+
1074
+ include SSWrapper
1075
+ end
1416
1076
 
1077
+ class RubyLexer
1078
+ module SSStackish
1079
+ def lineno_push new_lineno
1080
+ self.old_lineno = self.lineno
1081
+ self.lineno = new_lineno
1417
1082
  end
1418
1083
 
1419
- self.yacc_value = token
1084
+ def lineno_pop
1085
+ self.lineno = self.old_lineno
1086
+ self.old_lineno = nil
1087
+ end
1420
1088
 
1089
+ def ss= o
1090
+ raise "Clearing ss while in heredoc!?!" if in_heredoc?
1091
+ @old_ss = nil
1092
+ super
1093
+ end
1421
1094
 
1422
- self.lex_state = :expr_end if
1423
- last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar
1095
+ def ss_push new_ss
1096
+ @old_ss = self.ss
1097
+ @ss = new_ss
1098
+ end
1424
1099
 
1425
- return result
1100
+ def ss_pop
1101
+ @ss = self.old_ss
1102
+ @old_ss = nil
1103
+ end
1426
1104
  end
1427
1105
 
1428
- def yylex_string # 23 lines
1429
- token = if lex_strterm[0] == :heredoc then
1430
- self.heredoc lex_strterm
1431
- else
1432
- self.parse_string lex_strterm
1433
- end
1106
+ prepend SSStackish
1107
+ end
1108
+
1109
+ if ENV["RP_STRTERM_DEBUG"] then
1110
+ class RubyLexer
1111
+ def d o
1112
+ $stderr.puts o.inspect
1113
+ end
1114
+
1115
+ alias old_lex_strterm= lex_strterm=
1116
+
1117
+ def lex_strterm= o
1118
+ self.old_lex_strterm= o
1119
+ where = caller.first.split(/:/).first(2).join(":")
1120
+ $stderr.puts
1121
+ d :lex_strterm => [o, where]
1122
+ end
1123
+ end
1124
+ end
1434
1125
 
1435
- if token == :tSTRING_END || token == :tREGEXP_END then
1436
- self.lineno = nil
1437
- self.lex_strterm = nil
1438
- self.lex_state = :expr_end
1126
+ require_relative "./ruby_lexer.rex.rb"
1127
+ require_relative "./ruby_lexer_strings.rb"
1128
+
1129
+ if ENV["RP_LINENO_DEBUG"] then
1130
+ class RubyLexer
1131
+ def d o
1132
+ $stderr.puts o.inspect
1439
1133
  end
1440
1134
 
1441
- return token
1135
+ alias old_lineno= lineno=
1136
+
1137
+ def lineno= n
1138
+ self.old_lineno= n
1139
+ where = caller.first.split(/:/).first(2).join(":")
1140
+ $stderr.puts
1141
+ d :lineno => [n, where]
1142
+ end
1442
1143
  end
1443
1144
  end