ruby_parser 3.0.0 → 3.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data/.autotest +36 -19
  4. data/History.rdoc +1297 -0
  5. data/Manifest.txt +35 -7
  6. data/{README.txt → README.rdoc} +44 -14
  7. data/Rakefile +308 -110
  8. data/bin/ruby_parse +3 -1
  9. data/bin/ruby_parse_extract_error +36 -16
  10. data/compare/normalize.rb +218 -0
  11. data/debugging.md +190 -0
  12. data/gauntlet.md +107 -0
  13. data/lib/.document +1 -0
  14. data/lib/rp_extensions.rb +53 -0
  15. data/lib/rp_stringscanner.rb +33 -0
  16. data/lib/ruby20_parser.rb +10973 -0
  17. data/lib/ruby20_parser.y +2683 -0
  18. data/lib/ruby21_parser.rb +10980 -0
  19. data/lib/ruby21_parser.y +2700 -0
  20. data/lib/ruby22_parser.rb +11123 -0
  21. data/lib/ruby22_parser.y +2711 -0
  22. data/lib/ruby23_parser.rb +11132 -0
  23. data/lib/ruby23_parser.y +2713 -0
  24. data/lib/ruby24_parser.rb +11231 -0
  25. data/lib/ruby24_parser.y +2721 -0
  26. data/lib/ruby25_parser.rb +11231 -0
  27. data/lib/ruby25_parser.y +2721 -0
  28. data/lib/ruby26_parser.rb +11253 -0
  29. data/lib/ruby26_parser.y +2736 -0
  30. data/lib/ruby27_parser.rb +12980 -0
  31. data/lib/ruby27_parser.y +3324 -0
  32. data/lib/ruby30_parser.rb +13242 -0
  33. data/lib/ruby30_parser.y +3447 -0
  34. data/lib/ruby31_parser.rb +13622 -0
  35. data/lib/ruby31_parser.y +3481 -0
  36. data/lib/ruby3_parser.yy +3536 -0
  37. data/lib/ruby_lexer.rb +933 -1232
  38. data/lib/ruby_lexer.rex +185 -0
  39. data/lib/ruby_lexer.rex.rb +399 -0
  40. data/lib/ruby_lexer_strings.rb +638 -0
  41. data/lib/ruby_parser.rb +97 -3
  42. data/lib/ruby_parser.yy +3465 -0
  43. data/lib/ruby_parser_extras.rb +1216 -687
  44. data/test/test_ruby_lexer.rb +2249 -1092
  45. data/test/test_ruby_parser.rb +5156 -975
  46. data/test/test_ruby_parser_extras.rb +47 -77
  47. data/tools/munge.rb +250 -0
  48. data/tools/ripper.rb +44 -0
  49. data.tar.gz.sig +1 -1
  50. metadata +200 -155
  51. metadata.gz.sig +0 -0
  52. data/.gemtest +0 -0
  53. data/History.txt +0 -482
  54. data/lib/gauntlet_rubyparser.rb +0 -120
  55. data/lib/ruby18_parser.rb +0 -5747
  56. data/lib/ruby18_parser.y +0 -1873
  57. data/lib/ruby19_parser.rb +0 -6110
  58. data/lib/ruby19_parser.y +0 -2078
data/lib/ruby_lexer.rb CHANGED
@@ -1,1443 +1,1144 @@
1
- # encoding: US-ASCII
1
+ # frozen_string_literal: true
2
+ # encoding: UTF-8
3
+
4
+ $DEBUG = true if ENV["DEBUG"]
2
5
 
3
6
  class RubyLexer
7
+ # :stopdoc:
8
+ EOF = :eof_haha!
4
9
 
5
- RUBY19 = "".respond_to? :encoding
10
+ ESCAPES = {
11
+ "a" => "\007",
12
+ "b" => "\010",
13
+ "e" => "\033",
14
+ "f" => "\f",
15
+ "n" => "\n",
16
+ "r" => "\r",
17
+ "s" => " ",
18
+ "t" => "\t",
19
+ "v" => "\13",
20
+ "\\" => '\\',
21
+ "\n" => "",
22
+ "C-\?" => 127.chr,
23
+ "c\?" => 127.chr,
24
+ }
6
25
 
7
- IDENT_CHAR_RE = case RUBY_VERSION
8
- when /^1\.8/ then
9
- /[\w\x80-\xFF]/
10
- when /^(1\.9|2\.0)/ then # HACK - matching 2.0 for now
11
- /[\w\u0080-\uFFFF]/u
12
- else
13
- raise "bork"
14
- end
26
+ HAS_ENC = "".respond_to? :encoding
15
27
 
16
- IDENT_RE = /^#{IDENT_CHAR_RE}+/
28
+ BTOKENS = {
29
+ ".." => :tBDOT2,
30
+ "..." => :tBDOT3,
31
+ }
17
32
 
18
- attr_accessor :command_start
19
- attr_accessor :cmdarg
20
- attr_accessor :cond
21
- attr_accessor :tern # TODO: rename ternary damnit... wtf
22
- attr_accessor :nest
33
+ TOKENS = {
34
+ "!" => :tBANG,
35
+ "!=" => :tNEQ,
36
+ "!@" => :tBANG,
37
+ "!~" => :tNMATCH,
38
+ "," => :tCOMMA,
39
+ ".." => :tDOT2,
40
+ "..." => :tDOT3,
41
+ "=" => :tEQL,
42
+ "==" => :tEQ,
43
+ "===" => :tEQQ,
44
+ "=>" => :tASSOC,
45
+ "=~" => :tMATCH,
46
+ "->" => :tLAMBDA,
47
+ }
23
48
 
24
- ESC_RE = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|[^0-7xMCc]))/
49
+ PERCENT_END = {
50
+ "(" => ")",
51
+ "[" => "]",
52
+ "{" => "}",
53
+ "<" => ">",
54
+ }
25
55
 
26
- ##
27
- # What version of ruby to parse. 18 and 19 are the only valid values
28
- # currently supported.
56
+ SIMPLE_RE_META = /[\$\*\+\.\?\^\|\)\]\}\>]/
29
57
 
30
- attr_accessor :version
58
+ @@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
59
+ @@regexp_cache[nil] = nil
31
60
 
32
- # Additional context surrounding tokens that both the lexer and
33
- # grammar use.
34
- attr_reader :lex_state
61
+ def regexp_cache
62
+ @@regexp_cache
63
+ end
35
64
 
36
- attr_accessor :lex_strterm
65
+ if $DEBUG then
66
+ attr_reader :lex_state
37
67
 
38
- attr_accessor :parser # HACK for very end of lexer... *sigh*
68
+ def lex_state= o
69
+ return if @lex_state == o
39
70
 
40
- # Stream of data that yylex examines.
41
- attr_reader :src
71
+ from = ""
72
+ if ENV["VERBOSE"]
73
+ path = caller[0]
74
+ path = caller[1] if path =~ /result/
75
+ path, line, *_ = path.split(/:/)
76
+ path.delete_prefix! File.dirname File.dirname __FILE__
77
+ from = " at .%s:%s" % [path, line]
78
+ end
42
79
 
43
- # Last token read via yylex.
44
- attr_accessor :token
80
+ warn "lex_state: %p -> %p%s" % [lex_state, o, from]
45
81
 
46
- attr_accessor :string_buffer
82
+ @lex_state = o
83
+ end
84
+ end
47
85
 
48
- # Value of last token which had a value associated with it.
49
- attr_accessor :yacc_value
86
+ # :startdoc:
50
87
 
51
- # What handles warnings
52
- attr_accessor :warnings
88
+ attr_accessor :lex_state unless $DEBUG
53
89
 
54
- attr_accessor :space_seen
90
+ attr_accessor :brace_nest
91
+ attr_accessor :cmdarg
92
+ attr_accessor :command_start
93
+ attr_accessor :cmd_state # temporary--ivar to avoid passing everywhere
94
+ attr_accessor :last_state
95
+ attr_accessor :cond
96
+ attr_accessor :old_ss
97
+ attr_accessor :old_lineno
55
98
 
56
- EOF = :eof_haha!
99
+ # these are generated via ruby_lexer.rex: ss, lineno
57
100
 
58
- # ruby constants for strings (should this be moved somewhere else?)
59
- STR_FUNC_BORING = 0x00
60
- STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
61
- STR_FUNC_EXPAND = 0x02
62
- STR_FUNC_REGEXP = 0x04
63
- STR_FUNC_QWORDS = 0x08
64
- STR_FUNC_SYMBOL = 0x10
65
- STR_FUNC_INDENT = 0x20 # <<-HEREDOC
66
-
67
- STR_SQUOTE = STR_FUNC_BORING
68
- STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
69
- STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
70
- STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
71
- STR_SSYM = STR_FUNC_SYMBOL
72
- STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
101
+ ##
102
+ # Additional context surrounding tokens that both the lexer and
103
+ # grammar use.
73
104
 
74
- TOKENS = {
75
- "!" => :tBANG,
76
- "!=" => :tNEQ,
77
- "!~" => :tNMATCH,
78
- "," => :tCOMMA,
79
- ".." => :tDOT2,
80
- "..." => :tDOT3,
81
- "=" => :tEQL,
82
- "==" => :tEQ,
83
- "===" => :tEQQ,
84
- "=>" => :tASSOC,
85
- "=~" => :tMATCH,
86
- "->" => :tLAMBDA,
87
- }
105
+ attr_accessor :lex_strterm
106
+ attr_accessor :lpar_beg
107
+ attr_accessor :paren_nest
108
+ attr_accessor :parser # HACK for very end of lexer... *sigh*
109
+ attr_accessor :space_seen
110
+ attr_accessor :string_buffer
111
+ attr_accessor :string_nest
112
+
113
+ # Last token read via next_token.
114
+ attr_accessor :token
88
115
 
89
- # How the parser advances to the next token.
90
- #
91
- # @return true if not at end of file (EOF).
116
+ attr_writer :comments
92
117
 
93
- def advance
94
- r = yylex
95
- self.token = r
118
+ def initialize _ = nil
119
+ @lex_state = nil # remove one warning under $DEBUG
120
+ self.lex_state = EXPR_NONE
96
121
 
97
- raise "yylex returned nil" unless r
122
+ self.cond = RubyParserStuff::StackState.new(:cond, $DEBUG)
123
+ self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
124
+ self.ss = RPStringScanner.new ""
98
125
 
99
- return RubyLexer::EOF != r
126
+ reset
100
127
  end
101
128
 
102
129
  def arg_ambiguous
103
- self.warning("Ambiguous first argument. make sure.")
130
+ self.warning "Ambiguous first argument. make sure."
131
+ end
132
+
133
+ def arg_state
134
+ is_after_operator? ? EXPR_ARG : EXPR_BEG
135
+ end
136
+
137
+ def ignore_body_comments
138
+ @comments.clear
104
139
  end
105
140
 
106
- def comments
141
+ def comments # TODO: remove this... maybe comment_string + attr_accessor
107
142
  c = @comments.join
108
143
  @comments.clear
109
144
  c
110
145
  end
111
146
 
112
- def expr_beg_push val
147
+ def debug n
148
+ raise "debug #{n}"
149
+ end
150
+
151
+ def expr_dot?
152
+ lex_state =~ EXPR_DOT
153
+ end
154
+
155
+ def expr_fname? # REFACTOR
156
+ lex_state =~ EXPR_FNAME
157
+ end
158
+
159
+ def expr_result token, text
113
160
  cond.push false
114
161
  cmdarg.push false
115
- self.lex_state = :expr_beg
116
- self.yacc_value = val
162
+ result EXPR_BEG, token, text
117
163
  end
118
164
 
119
- def fix_arg_lex_state
120
- self.lex_state = if in_lex_state? :expr_fname, :expr_dot then
121
- :expr_arg
122
- else
123
- :expr_beg
124
- end
165
+ def in_fname? # REFACTOR
166
+ lex_state =~ EXPR_FNAME
125
167
  end
126
168
 
127
- def heredoc here # 63 lines
128
- _, eos, func, last_line = here
169
+ def int_with_base base
170
+ rb_compile_error "Invalid numeric format" if matched =~ /__/
129
171
 
130
- indent = (func & STR_FUNC_INDENT) != 0
131
- expand = (func & STR_FUNC_EXPAND) != 0
132
- eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
133
- err_msg = "can't match #{eos_re.inspect} anywhere in "
172
+ text = matched
173
+ case
174
+ when text.end_with?("ri")
175
+ result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base)))
176
+ when text.end_with?("r")
177
+ result EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base))
178
+ when text.end_with?("i")
179
+ result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base))
180
+ else
181
+ result EXPR_NUM, :tINTEGER, text.to_i(base)
182
+ end
183
+ end
134
184
 
135
- rb_compile_error err_msg if
136
- src.eos?
185
+ def is_after_operator?
186
+ lex_state =~ EXPR_FNAME|EXPR_DOT
187
+ end
137
188
 
138
- if src.beginning_of_line? && src.scan(eos_re) then
139
- src.unread_many last_line # TODO: figure out how to remove this
140
- self.yacc_value = eos
141
- return :tSTRING_END
142
- end
189
+ def is_arg?
190
+ lex_state =~ EXPR_ARG_ANY
191
+ end
143
192
 
144
- self.string_buffer = []
193
+ def is_beg?
194
+ lex_state =~ EXPR_BEG_ANY || lex_state == EXPR_LAB # yes, == EXPR_LAB
195
+ end
145
196
 
146
- if expand then
147
- case
148
- when src.scan(/#[$@]/) then
149
- src.pos -= 1 # FIX omg stupid
150
- self.yacc_value = src.matched
151
- return :tSTRING_DVAR
152
- when src.scan(/#[{]/) then
153
- self.yacc_value = src.matched
154
- return :tSTRING_DBEG
155
- when src.scan(/#/) then
156
- string_buffer << '#'
157
- end
197
+ def is_end?
198
+ lex_state =~ EXPR_END_ANY
199
+ end
158
200
 
159
- begin
160
- c = tokadd_string func, "\n", nil
201
+ def is_label_possible?
202
+ (lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
203
+ end
161
204
 
162
- rb_compile_error err_msg if
163
- c == RubyLexer::EOF
205
+ def is_label_suffix?
206
+ check(/:(?!:)/)
207
+ end
164
208
 
165
- if c != "\n" then
166
- self.yacc_value = string_buffer.join.delete("\r")
167
- return :tSTRING_CONTENT
168
- else
169
- string_buffer << src.scan(/\n/)
170
- end
209
+ def is_space_arg? c = "x"
210
+ is_arg? and space_seen and c !~ /\s/
211
+ end
212
+
213
+ def lambda_beginning?
214
+ lpar_beg && lpar_beg == paren_nest
215
+ end
171
216
 
172
- rb_compile_error err_msg if
173
- src.eos?
174
- end until src.check(eos_re)
217
+ def is_local_id id
218
+ # maybe just make this false for now
219
+ self.parser.env[id.to_sym] == :lvar # HACK: this isn't remotely right
220
+ end
221
+
222
+ def lvar_defined? id
223
+ # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
224
+ self.parser.env[id.to_sym] == :lvar
225
+ end
226
+
227
+ def not_end?
228
+ not is_end?
229
+ end
230
+
231
+ def possibly_escape_string text, check
232
+ content = match[1]
233
+
234
+ if text =~ check then
235
+ content.gsub(ESC) { unescape $1 }
175
236
  else
176
- until src.check(eos_re) do
177
- string_buffer << src.scan(/.*(\n|\z)/)
178
- rb_compile_error err_msg if
179
- src.eos?
180
- end
237
+ content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
181
238
  end
239
+ end
182
240
 
183
- self.lex_strterm = [:heredoc, eos, func, last_line]
184
- self.yacc_value = string_buffer.join.delete("\r")
241
+ def process_amper text
242
+ token = if is_arg? && space_seen && !check(/\s/) then
243
+ warning("`&' interpreted as argument prefix")
244
+ :tAMPER
245
+ elsif lex_state =~ EXPR_BEG|EXPR_MID then
246
+ :tAMPER
247
+ else
248
+ :tAMPER2
249
+ end
185
250
 
186
- return :tSTRING_CONTENT
251
+ result :arg_state, token, "&"
187
252
  end
188
253
 
189
- def heredoc_identifier # 51 lines
190
- term, func = nil, STR_FUNC_BORING
191
- self.string_buffer = []
254
+ def process_backref text
255
+ token = match[1].to_sym
256
+ # TODO: can't do lineno hack w/ symbol
257
+ result EXPR_END, :tBACK_REF, token
258
+ end
192
259
 
193
- case
194
- when src.scan(/(-?)(['"`])(.*?)\2/) then
195
- term = src[2]
196
- func |= STR_FUNC_INDENT unless src[1].empty?
197
- func |= case term
198
- when "\'" then
199
- STR_SQUOTE
200
- when '"' then
201
- STR_DQUOTE
202
- else
203
- STR_XQUOTE
204
- end
205
- string_buffer << src[3]
206
- when src.scan(/-?(['"`])(?!\1*\Z)/) then
207
- rb_compile_error "unterminated here document identifier"
208
- when src.scan(/(-?)(\w+)/) then
209
- term = '"'
210
- func |= STR_DQUOTE
211
- unless src[1].empty? then
212
- func |= STR_FUNC_INDENT
213
- end
214
- string_buffer << src[2]
215
- else
216
- return nil
260
+ def process_begin text
261
+ @comments << matched
262
+
263
+ unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
264
+ @comments.clear
265
+ rb_compile_error("embedded document meets end of file")
217
266
  end
218
267
 
219
- if src.scan(/.*\n/) then
220
- # TODO: think about storing off the char range instead
221
- line = src.matched
222
- src.extra_lines_added += 1
268
+ @comments << matched
269
+ self.lineno += matched.count("\n") # HACK?
270
+
271
+ nil # TODO
272
+ end
273
+
274
+ def process_brace_close text
275
+ case matched
276
+ when "}" then
277
+ self.brace_nest -= 1
278
+ return :tSTRING_DEND, matched if brace_nest < 0
279
+ end
280
+
281
+ # matching compare/parse26.y:8099
282
+ cond.pop
283
+ cmdarg.pop
284
+
285
+ case matched
286
+ when "}" then
287
+ self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
288
+ return :tRCURLY, matched
289
+ when "]" then
290
+ self.paren_nest -= 1
291
+ self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
292
+ return :tRBRACK, matched
293
+ when ")" then
294
+ self.paren_nest -= 1
295
+ self.lex_state = EXPR_ENDFN
296
+ return :tRPAREN, matched
223
297
  else
224
- line = nil
298
+ raise "Unknown bracing: #{matched.inspect}"
299
+ end
300
+ end
301
+
302
+ def process_brace_open text
303
+ # matching compare/parse23.y:8694
304
+ self.brace_nest += 1
305
+
306
+ if lambda_beginning? then
307
+ self.lpar_beg = nil
308
+ self.paren_nest -= 1 # close arg list when lambda opens body
309
+
310
+ return expr_result(:tLAMBEG, "{")
311
+ end
312
+
313
+ token = case
314
+ when lex_state =~ EXPR_LABELED then
315
+ :tLBRACE # hash
316
+ when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
317
+ :tLCURLY # block (primary) "{" in parse.y
318
+ when lex_state =~ EXPR_ENDARG then
319
+ :tLBRACE_ARG # block (expr)
320
+ else
321
+ :tLBRACE # hash
322
+ end
323
+
324
+ state = token == :tLBRACE_ARG ? EXPR_BEG : EXPR_PAR
325
+ self.command_start = true if token != :tLBRACE
326
+
327
+ cond.push false
328
+ cmdarg.push false
329
+ result state, token, text
330
+ end
331
+
332
+ def process_colon1 text
333
+ # ?: / then / when
334
+ if is_end? || check(/\s/) then
335
+ return result EXPR_BEG, :tCOLON, text
225
336
  end
226
337
 
227
- self.lex_strterm = [:heredoc, string_buffer.join, func, line]
338
+ case
339
+ when scan(/\'/) then
340
+ string STR_SSYM, matched
341
+ when scan(/\"/) then
342
+ string STR_DSYM, matched
343
+ end
344
+
345
+ result EXPR_FNAME, :tSYMBEG, text
346
+ end
228
347
 
229
- if term == '`' then
230
- self.yacc_value = "`"
231
- return :tXSTRING_BEG
348
+ def process_colon2 text
349
+ if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
350
+ result EXPR_BEG, :tCOLON3, text
232
351
  else
233
- self.yacc_value = "\""
234
- return :tSTRING_BEG
352
+ result EXPR_DOT, :tCOLON2, text
235
353
  end
236
354
  end
237
355
 
238
- def in_lex_state?(*states)
239
- states.include? lex_state
356
+ def process_dots text
357
+ tokens = ruby27plus? && is_beg? ? BTOKENS : TOKENS
358
+
359
+ result EXPR_BEG, tokens[text], text
240
360
  end
241
361
 
242
- def initialize v = 18
243
- self.version = v
244
- self.cond = RubyParserStuff::StackState.new(:cond)
245
- self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
246
- self.tern = RubyParserStuff::StackState.new(:tern)
247
- self.nest = 0
248
- @comments = []
362
+ def process_float text
363
+ rb_compile_error "Invalid numeric format" if text =~ /__/
249
364
 
250
- reset
365
+ case
366
+ when text.end_with?("ri")
367
+ result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
368
+ when text.end_with?("i")
369
+ result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
370
+ when text.end_with?("r")
371
+ result EXPR_NUM, :tRATIONAL, Rational(text.chop)
372
+ else
373
+ result EXPR_NUM, :tFLOAT, text.to_f
374
+ end
251
375
  end
252
376
 
253
- def int_with_base base
254
- rb_compile_error "Invalid numeric format" if src.matched =~ /__/
255
- self.yacc_value = src.matched.to_i(base)
256
- return :tINTEGER
377
+ def process_gvar text
378
+ if parser.class.version > 20 && text == "$-" then
379
+ rb_compile_error "unexpected $undefined"
380
+ end
381
+
382
+ result EXPR_END, :tGVAR, text
257
383
  end
258
384
 
259
- def lex_state= o
260
- # warn "wtf lex_state = #{o.inspect} from #{caller.first}"
261
- raise "wtf\?" unless Symbol === o
262
- @lex_state = o
385
+ def process_gvar_oddity text
386
+ rb_compile_error "#{text.inspect} is not allowed as a global variable name"
263
387
  end
264
388
 
265
- attr_writer :lineno
266
- def lineno
267
- @lineno ||= src.lineno
389
+ def process_ivar text
390
+ tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
391
+ result EXPR_END, tok_id, text
268
392
  end
269
393
 
270
- ##
271
- # Parse a number from the input stream.
272
- #
273
- # @param c The first character of the number.
274
- # @return A int constant wich represents a token.
394
+ def process_label text
395
+ symbol = possibly_escape_string text, /^\"/
275
396
 
276
- def parse_number
277
- self.lex_state = :expr_end
397
+ result EXPR_LAB, :tLABEL, symbol
398
+ end
278
399
 
279
- case
280
- when src.scan(/[+-]?0[xXbBdD]\b/) then
281
- rb_compile_error "Invalid numeric format"
282
- when src.scan(/[+-]?0x[a-f0-9_]+/i) then
283
- int_with_base(16)
284
- when src.scan(/[+-]?0[Bb][01_]+/) then
285
- int_with_base(2)
286
- when src.scan(/[+-]?0[Dd][0-9_]+/) then
287
- int_with_base(10)
288
- when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
289
- rb_compile_error "Illegal octal digit."
290
- when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
291
- int_with_base(8)
292
- when src.scan(/[+-]?[\d_]+_(e|\.)/) then
293
- rb_compile_error "Trailing '_' in number."
294
- when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
295
- number = src.matched
296
- if number =~ /__/ then
297
- rb_compile_error "Invalid numeric format"
298
- end
299
- self.yacc_value = number.to_f
300
- :tFLOAT
301
- when src.scan(/[+-]?0\b/) then
302
- int_with_base(10)
303
- when src.scan(/[+-]?[\d_]+\b/) then
304
- int_with_base(10)
400
+ def process_label_or_string text
401
+ if @was_label && text =~ /:\Z/ then
402
+ @was_label = nil
403
+ return process_label text
404
+ elsif text =~ /:\Z/ then
405
+ self.pos -= 1 # put back ":"
406
+ text = text[0..-2]
407
+ end
408
+
409
+ orig_line = lineno
410
+ str = text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
411
+ self.lineno += str.count("\n")
412
+
413
+ result EXPR_END, :tSTRING, str, orig_line
414
+ end
415
+
416
+ def process_lchevron text
417
+ if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
418
+ !is_end? &&
419
+ (!is_arg? || lex_state =~ EXPR_LABELED || space_seen)) then
420
+ tok = self.heredoc_identifier
421
+ return tok if tok
422
+ end
423
+
424
+ if is_after_operator? then
425
+ self.lex_state = EXPR_ARG
305
426
  else
306
- rb_compile_error "Bad number format"
427
+ self.command_start = true if lex_state =~ EXPR_CLASS
428
+ self.lex_state = EXPR_BEG
307
429
  end
430
+
431
+ result lex_state, :tLSHFT, "\<\<"
308
432
  end
309
433
 
310
- def parse_quote # 58 lines
311
- beg, nnd, short_hand, c = nil, nil, false, nil
434
+ def process_newline_or_comment text # ../compare/parse30.y:9126 ish
435
+ c = matched
436
+
437
+ if c == "#" then
438
+ self.pos -= 1
312
439
 
313
- if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
314
- rb_compile_error "unknown type of %string" if src.matched_size == 2
315
- c, beg, short_hand = src.matched, src.getch, false
316
- else # Short-hand (e.g. %{, %., %!, etc)
317
- c, beg, short_hand = 'Q', src.getch, true
440
+ while scan(/\s*\#.*(\n+|\z)/) do
441
+ self.lineno += matched.count "\n"
442
+ @comments << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
443
+ end
444
+
445
+ return nil if end_of_stream?
318
446
  end
319
447
 
320
- if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
321
- rb_compile_error "unterminated quoted string meets end of file"
448
+ c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
449
+ lex_state !~ EXPR_LABELED)
450
+ if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
451
+ # ignore if !fallthrough?
452
+ if !c && parser.in_kwarg then
453
+ # normal newline
454
+ self.command_start = true
455
+ return result EXPR_BEG, :tNL, nil
456
+ else
457
+ maybe_pop_stack
458
+ return # goto retry
459
+ end
322
460
  end
323
461
 
324
- # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
325
- nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
326
- nnd, beg = beg, "\0" if nnd.nil?
462
+ if scan(/[\ \t\r\f\v]+/) then
463
+ self.space_seen = true
464
+ end
327
465
 
328
- token_type, self.yacc_value = nil, "%#{c}#{beg}"
329
- token_type, string_type = case c
330
- when 'Q' then
331
- ch = short_hand ? nnd : c + beg
332
- self.yacc_value = "%#{ch}"
333
- [:tSTRING_BEG, STR_DQUOTE]
334
- when 'q' then
335
- [:tSTRING_BEG, STR_SQUOTE]
336
- when 'W' then
337
- src.scan(/\s*/)
338
- [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
339
- when 'w' then
340
- src.scan(/\s*/)
341
- [:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
342
- when 'x' then
343
- [:tXSTRING_BEG, STR_XQUOTE]
344
- when 'r' then
345
- [:tREGEXP_BEG, STR_REGEXP]
346
- when 's' then
347
- self.lex_state = :expr_fname
348
- [:tSYMBEG, STR_SSYM]
349
- end
466
+ if check(/#/) then
467
+ return # goto retry
468
+ elsif check(/&\.|\.(?!\.)/) then # C version is a hellish obfuscated xnor
469
+ return # goto retry
470
+ end
350
471
 
351
- rb_compile_error "Bad %string type. Expected [Qq\Wwxrs], found '#{c}'." if
352
- token_type.nil?
472
+ self.command_start = true
353
473
 
354
- self.lex_strterm = [:strterm, string_type, nnd, beg]
474
+ result EXPR_BEG, :tNL, nil
475
+ end
355
476
 
356
- return token_type
477
+ def process_nthref text
478
+ # TODO: can't do lineno hack w/ number
479
+ result EXPR_END, :tNTH_REF, match[1].to_i
357
480
  end
358
481
 
359
- def parse_string(quote) # 65 lines
360
- _, string_type, term, open = quote
482
+ def process_paren text
483
+ token = if is_beg? then
484
+ :tLPAREN
485
+ elsif !space_seen then
486
+ # foo( ... ) => method call, no ambiguity
487
+ :tLPAREN2
488
+ elsif is_space_arg? then
489
+ :tLPAREN_ARG
490
+ elsif lex_state =~ EXPR_ENDFN && !lambda_beginning? then
491
+ # TODO:
492
+ # warn("parentheses after method name is interpreted as " \
493
+ # "an argument list, not a decomposed argument")
494
+ :tLPAREN2
495
+ else
496
+ :tLPAREN2 # plain "(" in parse.y
497
+ end
361
498
 
362
- space = false # FIX: remove these
363
- func = string_type
364
- paren = open
365
- term_re = Regexp.escape term
499
+ self.paren_nest += 1
366
500
 
367
- qwords = (func & STR_FUNC_QWORDS) != 0
368
- regexp = (func & STR_FUNC_REGEXP) != 0
369
- expand = (func & STR_FUNC_EXPAND) != 0
501
+ cond.push false
502
+ cmdarg.push false
503
+ result EXPR_PAR, token, text
504
+ end
370
505
 
371
- unless func then # FIX: impossible, prolly needs == 0
372
- self.lineno = nil
373
- return :tSTRING_END
506
+ def process_percent text
507
+ case
508
+ when is_beg? then
509
+ process_percent_quote
510
+ when scan(/\=/)
511
+ result EXPR_BEG, :tOP_ASGN, "%"
512
+ when is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
513
+ process_percent_quote
514
+ else
515
+ result :arg_state, :tPERCENT, "%"
374
516
  end
517
+ end
375
518
 
376
- space = true if qwords and src.scan(/\s+/)
519
+ def process_plus_minus text
520
+ sign = matched
521
+ utype, type = if sign == "+" then
522
+ [:tUPLUS, :tPLUS]
523
+ else
524
+ [:tUMINUS, :tMINUS]
525
+ end
377
526
 
378
- if self.nest == 0 && src.scan(/#{term_re}/) then
379
- if qwords then
380
- quote[1] = nil
381
- return :tSPACE
382
- elsif regexp then
383
- self.yacc_value = self.regx_options
384
- self.lineno = nil
385
- return :tREGEXP_END
527
+ if is_after_operator? then
528
+ if scan(/@/) then
529
+ return result(EXPR_ARG, utype, "#{sign}@")
386
530
  else
387
- self.yacc_value = term
388
- self.lineno = nil
389
- return :tSTRING_END
531
+ return result(EXPR_ARG, type, sign)
390
532
  end
391
533
  end
392
534
 
393
- if space then
394
- return :tSPACE
395
- end
535
+ return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)
396
536
 
397
- self.string_buffer = []
537
+ if is_beg? || (is_arg? && space_seen && !check(/\s/)) then
538
+ arg_ambiguous if is_arg?
398
539
 
399
- if expand
400
- case
401
- when src.scan(/#(?=[$@])/) then
402
- return :tSTRING_DVAR
403
- when src.scan(/#[{]/) then
404
- return :tSTRING_DBEG
405
- when src.scan(/#/) then
406
- string_buffer << '#'
540
+ if check(/\d/) then
541
+ return nil if utype == :tUPLUS
542
+ return result EXPR_BEG, :tUMINUS_NUM, sign
407
543
  end
544
+
545
+ return result EXPR_BEG, utype, sign
408
546
  end
409
547
 
410
- if tokadd_string(func, term, paren) == RubyLexer::EOF then
411
- rb_compile_error "unterminated string meets end of file"
548
+ result EXPR_BEG, type, sign
549
+ end
550
+
551
+ def process_questionmark text
552
+ if is_end? then
553
+ return result EXPR_BEG, :tEH, "?"
412
554
  end
413
555
 
414
- self.yacc_value = string_buffer.join
556
+ if end_of_stream? then
557
+ rb_compile_error "incomplete character syntax: parsed #{text.inspect}"
558
+ end
415
559
 
416
- return :tSTRING_CONTENT
417
- end
560
+ if check(/\s|\v/) then
561
+ unless is_arg? then
562
+ c2 = { " " => "s",
563
+ "\n" => "n",
564
+ "\t" => "t",
565
+ "\v" => "v",
566
+ "\r" => "r",
567
+ "\f" => "f" }[matched]
568
+
569
+ if c2 then
570
+ warning("invalid character syntax; use ?\\" + c2)
571
+ end
572
+ end
418
573
 
419
- def rb_compile_error msg
420
- msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
421
- raise RubyParser::SyntaxError, msg
574
+ # ternary
575
+ return result EXPR_BEG, :tEH, "?"
576
+ elsif check(/\w(?=\w)/) then # ternary, also
577
+ return result EXPR_BEG, :tEH, "?"
578
+ end
579
+
580
+ c = if scan(/\\/) then
581
+ self.read_escape
582
+ else
583
+ getch
584
+ end
585
+
586
+ result EXPR_END, :tSTRING, c
422
587
  end
423
588
 
424
- def read_escape # 51 lines
425
- case
426
- when src.scan(/\\/) then # Backslash
427
- '\\'
428
- when src.scan(/n/) then # newline
429
- "\n"
430
- when src.scan(/t/) then # horizontal tab
431
- "\t"
432
- when src.scan(/r/) then # carriage-return
433
- "\r"
434
- when src.scan(/f/) then # form-feed
435
- "\f"
436
- when src.scan(/v/) then # vertical tab
437
- "\13"
438
- when src.scan(/a/) then # alarm(bell)
439
- "\007"
440
- when src.scan(/e/) then # escape
441
- "\033"
442
- when src.scan(/b/) then # backspace
443
- "\010"
444
- when src.scan(/s/) then # space
445
- " "
446
- when src.scan(/[0-7]{1,3}/) then # octal constant
447
- src.matched.to_i(8).chr
448
- when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
449
- src[1].to_i(16).chr
450
- when src.check(/M-\\[\\MCc]/) then
451
- src.scan(/M-\\/) # eat it
452
- c = self.read_escape
453
- c[0] = (c[0].ord | 0x80).chr
454
- c
455
- when src.scan(/M-(.)/) then
456
- c = src[1]
457
- c[0] = (c[0].ord | 0x80).chr
458
- c
459
- when src.check(/(C-|c)\\[\\MCc]/) then
460
- src.scan(/(C-|c)\\/) # eat it
461
- c = self.read_escape
462
- c[0] = (c[0].ord & 0x9f).chr
463
- c
464
- when src.scan(/C-\?|c\?/) then
465
- 127.chr
466
- when src.scan(/(C-|c)(.)/) then
467
- c = src[2]
468
- c[0] = (c[0].ord & 0x9f).chr
469
- c
470
- when src.scan(/[McCx0-9]/) || src.eos? then
471
- rb_compile_error("Invalid escape character syntax")
472
- else
473
- src.getch
474
- end
589
+ def process_simple_string text
590
+ orig_line = lineno
591
+ self.lineno += text.count("\n")
592
+
593
+ str = text[1..-2]
594
+ .gsub(ESC) { unescape($1).b.force_encoding Encoding::UTF_8 }
595
+ str = str.b unless str.valid_encoding?
596
+
597
+ result EXPR_END, :tSTRING, str, orig_line
475
598
  end
476
599
 
477
- def regx_options # 15 lines
478
- good, bad = [], []
600
+ def process_slash text
601
+ if is_beg? then
602
+ string STR_REGEXP, matched
479
603
 
480
- if src.scan(/[a-z]+/) then
481
- good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
604
+ return result nil, :tREGEXP_BEG, "/"
482
605
  end
483
606
 
484
- unless bad.empty? then
485
- rb_compile_error("unknown regexp option%s - %s" %
486
- [(bad.size > 1 ? "s" : ""), bad.join.inspect])
607
+ if scan(/\=/) then
608
+ return result(EXPR_BEG, :tOP_ASGN, "/")
487
609
  end
488
610
 
489
- return good.join
611
+ if is_arg? && space_seen then
612
+ unless scan(/\s/) then
613
+ arg_ambiguous
614
+ string STR_REGEXP, "/"
615
+ return result(nil, :tREGEXP_BEG, "/")
616
+ end
617
+ end
618
+
619
+ result :arg_state, :tDIVIDE, "/"
490
620
  end
491
621
 
492
- def reset
493
- self.command_start = true
494
- self.lex_strterm = nil
495
- self.token = nil
496
- self.yacc_value = nil
622
+ def process_square_bracket text
623
+ self.paren_nest += 1
497
624
 
498
- @src = nil
499
- @lex_state = nil
500
- end
625
+ token = nil
501
626
 
502
- def ruby18
503
- Ruby18Parser === parser
627
+ if is_after_operator? then
628
+ case
629
+ when scan(/\]\=/) then
630
+ self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
631
+ return result EXPR_ARG, :tASET, "[]="
632
+ when scan(/\]/) then
633
+ self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
634
+ return result EXPR_ARG, :tAREF, "[]"
635
+ else
636
+ rb_compile_error "unexpected '['"
637
+ end
638
+ elsif is_beg? then
639
+ token = :tLBRACK
640
+ elsif is_arg? && (space_seen || lex_state =~ EXPR_LABELED) then
641
+ token = :tLBRACK
642
+ else
643
+ token = :tLBRACK2
644
+ end
645
+
646
+ cond.push false
647
+ cmdarg.push false
648
+ result EXPR_PAR, token, text
504
649
  end
505
650
 
506
- def ruby19
507
- Ruby19Parser === parser
651
+ def process_symbol text
652
+ symbol = possibly_escape_string text, /^:\"/ # stupid emacs
653
+
654
+ result EXPR_LIT, :tSYMBOL, symbol
508
655
  end
509
656
 
510
- def src= src
511
- raise "bad src: #{src.inspect}" unless String === src
512
- @src = RPStringScanner.new(src)
657
+ def process_token text
658
+ # matching: parse_ident in compare/parse23.y:7989
659
+ # FIX: remove: self.last_state = lex_state
660
+
661
+ token = self.token = text
662
+ token << matched if scan(/[\!\?](?!=)/)
663
+
664
+ tok_id =
665
+ case
666
+ when token =~ /[!?]$/ then
667
+ :tFID
668
+ when lex_state =~ EXPR_FNAME && scan(/=(?:(?![~>=])|(?==>))/) then
669
+ # ident=, not =~ => == or followed by =>
670
+ # TODO test lexing of a=>b vs a==>b
671
+ token << matched
672
+ :tIDENTIFIER
673
+ when token =~ /^[A-Z]/ then
674
+ :tCONSTANT
675
+ else
676
+ :tIDENTIFIER
677
+ end
678
+
679
+ if is_label_possible? and is_label_suffix? then
680
+ scan(/:/)
681
+ return result EXPR_LAB, :tLABEL, token
682
+ end
683
+
684
+ # TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
685
+ if lex_state !~ EXPR_DOT then
686
+ # See if it is a reserved word.
687
+ keyword = RubyParserStuff::Keyword.keyword token
688
+
689
+ return process_token_keyword keyword if keyword
690
+ end
691
+
692
+ # matching: compare/parse30.y:9039
693
+ state = if lex_state =~ EXPR_BEG_ANY|EXPR_ARG_ANY|EXPR_DOT then
694
+ cmd_state ? EXPR_CMDARG : EXPR_ARG
695
+ elsif lex_state =~ EXPR_FNAME then
696
+ EXPR_ENDFN
697
+ else
698
+ EXPR_END
699
+ end
700
+ self.lex_state = state
701
+
702
+ tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
703
+
704
+ if last_state !~ EXPR_DOT|EXPR_FNAME and
705
+ (tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
706
+ lvar_defined?(token) then
707
+ state = EXPR_END|EXPR_LABEL
708
+ end
709
+
710
+ result state, tok_id, token
513
711
  end
514
712
 
515
- def tokadd_escape term # 20 lines
713
+ def process_token_keyword keyword
714
+ # matching MIDDLE of parse_ident in compare/parse23.y:8046
715
+ state = lex_state
716
+
717
+ return result(EXPR_ENDFN, keyword.id0, token) if lex_state =~ EXPR_FNAME
718
+
719
+ self.lex_state = keyword.state
720
+ self.command_start = true if lex_state =~ EXPR_BEG
721
+
516
722
  case
517
- when src.scan(/\\\n/) then
518
- # just ignore
519
- when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
520
- self.string_buffer << src.matched
521
- when src.scan(/\\([MC]-|c)(?=\\)/) then
522
- self.string_buffer << src.matched
523
- self.tokadd_escape term
524
- when src.scan(/\\([MC]-|c)(.)/) then
525
- self.string_buffer << src.matched
526
- when src.scan(/\\[McCx]/) then
527
- rb_compile_error "Invalid escape character syntax"
528
- when src.scan(/\\(.)/m) then
529
- self.string_buffer << src.matched
723
+ when keyword.id0 == :kDO then # parse26.y line 7591
724
+ case
725
+ when lambda_beginning? then
726
+ self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
727
+ self.paren_nest -= 1 # TODO: question this?
728
+ result lex_state, :kDO_LAMBDA, token
729
+ when cond.is_in_state then
730
+ result lex_state, :kDO_COND, token
731
+ when cmdarg.is_in_state && state != EXPR_CMDARG then
732
+ result lex_state, :kDO_BLOCK, token
733
+ else
734
+ result lex_state, :kDO, token
735
+ end
736
+ when state =~ EXPR_PAD then
737
+ result lex_state, keyword.id0, token
738
+ when keyword.id0 != keyword.id1 then
739
+ result EXPR_PAR, keyword.id1, token
530
740
  else
531
- rb_compile_error "Invalid escape character syntax"
741
+ result lex_state, keyword.id1, token
532
742
  end
533
743
  end
534
744
 
535
- def tokadd_string(func, term, paren) # 105 lines
536
- qwords = (func & STR_FUNC_QWORDS) != 0
537
- escape = (func & STR_FUNC_ESCAPE) != 0
538
- expand = (func & STR_FUNC_EXPAND) != 0
539
- regexp = (func & STR_FUNC_REGEXP) != 0
540
- symbol = (func & STR_FUNC_SYMBOL) != 0
745
+ def process_underscore text
746
+ self.unscan # put back "_"
541
747
 
542
- paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
543
- term_re = Regexp.new(Regexp.escape(term))
748
+ if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
749
+ ss.terminate
750
+ [RubyLexer::EOF, RubyLexer::EOF]
751
+ elsif scan(/#{IDENT_CHAR}+/) then
752
+ process_token matched
753
+ end
754
+ end
544
755
 
545
- until src.eos? do
546
- c = nil
547
- handled = true
548
- case
549
- when self.nest == 0 && src.scan(term_re) then
550
- src.pos -= 1
551
- break
552
- when paren_re && src.scan(paren_re) then
553
- self.nest += 1
554
- when src.scan(term_re) then
555
- self.nest -= 1
556
- when qwords && src.scan(/\s/) then
557
- src.pos -= 1
558
- break
559
- when expand && src.scan(/#(?=[\$\@\{])/) then
560
- src.pos -= 1
561
- break
562
- when expand && src.scan(/#(?!\n)/) then
563
- # do nothing
564
- when src.check(/\\/) then
565
- case
566
- when qwords && src.scan(/\\\n/) then
567
- string_buffer << "\n"
568
- next
569
- when qwords && src.scan(/\\\s/) then
570
- c = ' '
571
- when expand && src.scan(/\\\n/) then
572
- next
573
- when regexp && src.check(/\\/) then
574
- self.tokadd_escape term
575
- next
576
- when expand && src.scan(/\\/) then
577
- c = self.read_escape
578
- when src.scan(/\\\n/) then
579
- # do nothing
580
- when src.scan(/\\\\/) then
581
- string_buffer << '\\' if escape
582
- c = '\\'
583
- when src.scan(/\\/) then
584
- unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
585
- string_buffer << "\\"
586
- end
587
- else
588
- handled = false
589
- end
590
- else
591
- handled = false
592
- end # case
756
+ def rb_compile_error msg
757
+ msg += ". near line #{self.lineno}: #{self.rest[/^.*/].inspect}"
758
+ raise RubyParser::SyntaxError, msg
759
+ end
593
760
 
594
- unless handled then
761
+ def reset
762
+ self.lineno = 1
763
+ self.brace_nest = 0
764
+ self.command_start = true
765
+ self.comments = []
766
+ self.lex_state = EXPR_NONE
767
+ self.lex_strterm = nil
768
+ self.lpar_beg = nil
769
+ self.paren_nest = 0
770
+ self.space_seen = false
771
+ self.string_nest = 0
772
+ self.token = nil
773
+ self.string_buffer = []
774
+ self.old_ss = nil
775
+ self.old_lineno = nil
595
776
 
596
- t = Regexp.escape term
597
- x = Regexp.escape(paren) if paren && paren != "\000"
598
- re = if qwords then
599
- /[^#{t}#{x}\#\0\\\n\ ]+|./ # |. to pick up whatever
600
- else
601
- /[^#{t}#{x}\#\0\\]+|./
602
- end
777
+ self.cond.reset
778
+ self.cmdarg.reset
779
+ end
603
780
 
604
- src.scan re
605
- c = src.matched
781
+ def result new_state, token, text, line = self.lineno # :nodoc:
782
+ new_state = self.arg_state if new_state == :arg_state
783
+ self.lex_state = new_state if new_state
606
784
 
607
- rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
608
- end # unless handled
785
+ [token, [text, line]]
786
+ end
609
787
 
610
- c ||= src.matched
611
- string_buffer << c
612
- end # until
788
+ def ruby22_label?
789
+ ruby22plus? and is_label_possible?
790
+ end
613
791
 
614
- c ||= src.matched
615
- c = RubyLexer::EOF if src.eos?
792
+ def ruby22plus?
793
+ parser.class.version >= 22
794
+ end
616
795
 
796
+ def ruby23plus?
797
+ parser.class.version >= 23
798
+ end
617
799
 
618
- return c
800
+ def ruby24minus?
801
+ parser.class.version <= 24
619
802
  end
620
803
 
621
- ESCAPES = {
622
- "a" => "\007",
623
- "b" => "\010",
624
- "e" => "\033",
625
- "f" => "\f",
626
- "n" => "\n",
627
- "r" => "\r",
628
- "s" => " ",
629
- "t" => "\t",
630
- "v" => "\13",
631
- "\\" => '\\',
632
- "\n" => "",
633
- "C-\?" => 127.chr,
634
- "c\?" => 127.chr,
635
- }
804
+ def ruby27plus?
805
+ parser.class.version >= 27
806
+ end
807
+
808
+ def space_vs_beginning space_type, beg_type, fallback
809
+ if is_space_arg? check(/./m) then
810
+ warning "`**' interpreted as argument prefix"
811
+ space_type
812
+ elsif is_beg? then
813
+ beg_type
814
+ else
815
+ # TODO: warn_balanced("**", "argument prefix");
816
+ fallback
817
+ end
818
+ end
636
819
 
637
820
  def unescape s
638
821
  r = ESCAPES[s]
639
822
 
640
823
  return r if r
641
824
 
642
- case s
643
- when /^[0-7]{1,3}/ then
644
- $&.to_i(8).chr
645
- when /^x([0-9a-fA-F]{1,2})/ then
646
- $1.to_i(16).chr
647
- when /^M-(.)/ then
648
- ($1[0].ord | 0x80).chr
649
- when /^(C-|c)(.)/ then
650
- ($2[0].ord & 0x9f).chr
651
- when /^[McCx0-9]/ then
652
- rb_compile_error("Invalid escape character syntax")
653
- else
654
- s
655
- end
825
+ x = case s
826
+ when /^[0-7]{1,3}/ then
827
+ ($&.to_i(8) & 0xFF).chr
828
+ when /^x([0-9a-fA-F]{1,2})/ then
829
+ $1.to_i(16).chr
830
+ when /^M-(.)/ then
831
+ ($1[0].ord | 0x80).chr
832
+ when /^(C-|c)(.)/ then
833
+ ($2[0].ord & 0x9f).chr
834
+ when /^[89a-f]/i then # bad octal or hex... ignore? that's what MRI does :(
835
+ s
836
+ when /^[McCx0-9]/ then
837
+ rb_compile_error("Invalid escape character syntax")
838
+ when /u(\h{4})/ then
839
+ [$1.delete("{}").to_i(16)].pack("U")
840
+ when /u(\h{1,3})/ then
841
+ rb_compile_error("Invalid escape character syntax")
842
+ when /u\{(\h+(?:\s+\h+)*)\}/ then
843
+ $1.split.map { |cp| cp.to_i(16) }.pack("U*")
844
+ else
845
+ s
846
+ end
847
+ x
656
848
  end
657
849
 
658
850
  def warning s
659
851
  # do nothing for now
660
852
  end
661
853
 
662
- ##
663
- # Returns the next token. Also sets yy_val is needed.
664
- #
665
- # @return Description of the Returned Value
666
-
667
- def yylex # 826 lines
668
- c = ''
669
- self.space_seen = false
670
- command_state = false
671
- src = self.src
672
-
673
- self.token = nil
674
- self.yacc_value = nil
675
-
676
- return yylex_string if lex_strterm
677
-
678
- command_state = self.command_start
679
- self.command_start = false
680
-
681
- last_state = lex_state
682
-
683
- loop do # START OF CASE
684
- if src.scan(/[\ \t\r\f\v]/) then # \s - \n + \v
685
- self.space_seen = true
686
- next
687
- elsif src.check(/[^a-zA-Z]/) then
688
- if src.scan(/\n|#/) then
689
- self.lineno = nil
690
- c = src.matched
691
- if c == '#' then
692
- src.pos -= 1
693
-
694
- while src.scan(/\s*#.*(\n+|\z)/) do
695
- @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
696
- end
697
-
698
- return RubyLexer::EOF if src.eos?
699
- end
700
-
701
- # Replace a string of newlines with a single one
702
- src.scan(/\n+/)
703
-
704
- next if in_lex_state?(:expr_beg, :expr_fname, :expr_dot, :expr_class,
705
- :expr_value)
706
-
707
- if src.scan(/([\ \t\r\f\v]*)\./) then
708
- self.space_seen = true unless src[1].empty?
709
-
710
- src.pos -= 1
711
- next unless src.check(/\.\./)
712
- end
713
-
714
- self.command_start = true
715
- self.lex_state = :expr_beg
716
- return :tNL
717
- elsif src.scan(/[\]\)\}]/) then
718
- cond.lexpop
719
- cmdarg.lexpop
720
- tern.lexpop
721
- self.lex_state = :expr_end
722
- self.yacc_value = src.matched
723
- result = {
724
- ")" => :tRPAREN,
725
- "]" => :tRBRACK,
726
- "}" => :tRCURLY
727
- }[src.matched]
728
- return result
729
- elsif src.scan(/\.\.\.?|,|![=~]?/) then
730
- self.lex_state = :expr_beg
731
- tok = self.yacc_value = src.matched
732
- return TOKENS[tok]
733
- elsif src.check(/\./) then
734
- if src.scan(/\.\d/) then
735
- rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
736
- elsif src.scan(/\./) then
737
- self.lex_state = :expr_dot
738
- self.yacc_value = "."
739
- return :tDOT
740
- end
741
- elsif src.scan(/\(/) then
742
- result = if ruby18 then
743
- yylex_paren18
744
- else
745
- yylex_paren19
746
- end
747
-
748
- self.expr_beg_push "("
749
-
750
- return result
751
- elsif src.check(/\=/) then
752
- if src.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
753
- self.fix_arg_lex_state
754
- tok = self.yacc_value = src.matched
755
- return TOKENS[tok]
756
- elsif src.scan(/\=begin(?=\s)/) then
757
- # @comments << '=' << src.matched
758
- @comments << src.matched
759
-
760
- unless src.scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
761
- @comments.clear
762
- rb_compile_error("embedded document meets end of file")
763
- end
764
-
765
- @comments << src.matched
766
-
767
- next
768
- else
769
- raise "you shouldn't be able to get here"
770
- end
771
- elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/o) then
772
- self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
773
- self.lex_state = :expr_end
774
- return :tSTRING
775
- elsif src.scan(/\"/) then # FALLBACK
776
- self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\0"] # TODO: question this
777
- self.yacc_value = "\""
778
- return :tSTRING_BEG
779
- elsif src.scan(/\@\@?\w*/) then
780
- self.token = src.matched
781
-
782
- rb_compile_error "`#{token}` is not allowed as a variable name" if
783
- token =~ /\@\d/
784
-
785
- return process_token(command_state)
786
- elsif src.scan(/\:\:/) then
787
- if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
788
- self.lex_state = :expr_beg
789
- self.yacc_value = "::"
790
- return :tCOLON3
791
- end
792
-
793
- self.lex_state = :expr_dot
794
- self.yacc_value = "::"
795
- return :tCOLON2
796
- elsif ! is_end? && src.scan(/:([a-zA-Z_]#{IDENT_CHAR_RE}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
797
- # scanning shortcut to symbols
798
- self.yacc_value = src[1]
799
- self.lex_state = :expr_end
800
- return :tSYMBOL
801
- elsif src.scan(/\:/) then
802
- # ?: / then / when
803
- if is_end? || src.check(/\s/) then
804
- self.lex_state = :expr_beg
805
- # TODO warn_balanced(":", "symbol literal");
806
- self.yacc_value = ":"
807
- return :tCOLON
808
- end
809
-
810
- case
811
- when src.scan(/\'/) then
812
- self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\0"]
813
- when src.scan(/\"/) then
814
- self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\0"]
815
- end
816
-
817
- self.lex_state = :expr_fname
818
- self.yacc_value = ":"
819
- return :tSYMBEG
820
- elsif src.check(/[0-9]/) then
821
- return parse_number
822
- elsif src.scan(/\[/) then
823
- result = src.matched
824
-
825
- if in_lex_state? :expr_fname, :expr_dot then
826
- self.lex_state = :expr_arg
827
- case
828
- when src.scan(/\]\=/) then
829
- self.yacc_value = "[]="
830
- return :tASET
831
- when src.scan(/\]/) then
832
- self.yacc_value = "[]"
833
- return :tAREF
834
- else
835
- rb_compile_error "unexpected '['"
836
- end
837
- elsif is_beg? then
838
- self.tern.push false
839
- result = :tLBRACK
840
- elsif is_arg? && space_seen then
841
- self.tern.push false
842
- result = :tLBRACK
843
- else
844
- result = :tLBRACK2
845
- end
846
-
847
- self.expr_beg_push "["
848
-
849
- return result
850
- elsif src.scan(/\'(\\.|[^\'])*\'/) then
851
- self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
852
- self.lex_state = :expr_end
853
- return :tSTRING
854
- elsif src.check(/\|/) then
855
- if src.scan(/\|\|\=/) then
856
- self.lex_state = :expr_beg
857
- self.yacc_value = "||"
858
- return :tOP_ASGN
859
- elsif src.scan(/\|\|/) then
860
- self.lex_state = :expr_beg
861
- self.yacc_value = "||"
862
- return :tOROP
863
- elsif src.scan(/\|\=/) then
864
- self.lex_state = :expr_beg
865
- self.yacc_value = "|"
866
- return :tOP_ASGN
867
- elsif src.scan(/\|/) then
868
- self.fix_arg_lex_state
869
- self.yacc_value = "|"
870
- return :tPIPE
871
- end
872
- elsif src.scan(/\{/) then
873
- if defined?(@hack_expects_lambda) && @hack_expects_lambda
874
- @hack_expects_lambda = false
875
- self.lex_state = :expr_beg
876
- return :tLAMBEG
877
- end
878
-
879
- result = if is_arg? || in_lex_state?(:expr_end) then
880
- :tLCURLY # block (primary)
881
- elsif in_lex_state?(:expr_endarg) then
882
- :tLBRACE_ARG # block (expr)
883
- else
884
- self.tern.push false
885
- :tLBRACE # hash
886
- end
887
-
888
- self.expr_beg_push "{"
889
- self.command_start = true unless result == :tLBRACE
890
-
891
- return result
892
- elsif src.scan(/->/) then
893
- @hack_expects_lambda = true
894
- self.lex_state = :expr_arg
895
- return :tLAMBDA
896
- elsif src.scan(/[+-]/) then
897
- sign = src.matched
898
- utype, type = if sign == "+" then
899
- [:tUPLUS, :tPLUS]
900
- else
901
- [:tUMINUS, :tMINUS]
902
- end
903
-
904
- if in_lex_state? :expr_fname, :expr_dot then
905
- self.lex_state = :expr_arg
906
- if src.scan(/@/) then
907
- self.yacc_value = "#{sign}@"
908
- return utype
909
- else
910
- self.yacc_value = sign
911
- return type
912
- end
913
- end
914
-
915
- if src.scan(/\=/) then
916
- self.lex_state = :expr_beg
917
- self.yacc_value = sign
918
- return :tOP_ASGN
919
- end
920
-
921
- if (is_beg? ||
922
- (is_arg? && space_seen && !src.check(/\s/))) then
923
- if is_arg? then
924
- arg_ambiguous
925
- end
854
+ def was_label?
855
+ @was_label = ruby22_label?
856
+ true
857
+ end
926
858
 
927
- self.lex_state = :expr_beg
928
- self.yacc_value = sign
859
+ class State
860
+ attr_accessor :n
861
+ attr_accessor :names
929
862
 
930
- if src.check(/\d/) then
931
- if utype == :tUPLUS then
932
- return self.parse_number
933
- else
934
- return :tUMINUS_NUM
935
- end
936
- end
863
+ # TODO: take a shared hash of strings for inspect/to_s
864
+ def initialize o, names
865
+ raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
937
866
 
938
- return utype
939
- end
940
-
941
- self.lex_state = :expr_beg
942
- self.yacc_value = sign
943
- return type
944
- elsif src.check(/\*/) then
945
- if src.scan(/\*\*=/) then
946
- self.lex_state = :expr_beg
947
- self.yacc_value = "**"
948
- return :tOP_ASGN
949
- elsif src.scan(/\*\*/) then
950
- self.yacc_value = "**"
951
- self.fix_arg_lex_state
952
- return :tPOW
953
- elsif src.scan(/\*\=/) then
954
- self.lex_state = :expr_beg
955
- self.yacc_value = "*"
956
- return :tOP_ASGN
957
- elsif src.scan(/\*/) then
958
- result = if is_arg? && space_seen && src.check(/\S/) then
959
- warning("`*' interpreted as argument prefix")
960
- :tSTAR
961
- elsif is_beg? then
962
- :tSTAR
963
- else
964
- :tSTAR2
965
- end
966
- self.yacc_value = "*"
967
- self.fix_arg_lex_state
968
-
969
- return result
970
- end
971
- elsif src.check(/\</) then
972
- if src.scan(/\<\=\>/) then
973
- self.fix_arg_lex_state
974
- self.yacc_value = "<=>"
975
- return :tCMP
976
- elsif src.scan(/\<\=/) then
977
- self.fix_arg_lex_state
978
- self.yacc_value = "<="
979
- return :tLEQ
980
- elsif src.scan(/\<\<\=/) then
981
- self.fix_arg_lex_state
982
- self.lex_state = :expr_beg
983
- self.yacc_value = "\<\<"
984
- return :tOP_ASGN
985
- elsif src.scan(/\<\</) then
986
- if (! in_lex_state?(:expr_end, :expr_dot,
987
- :expr_endarg, :expr_class) &&
988
- (!is_arg? || space_seen)) then
989
- tok = self.heredoc_identifier
990
- return tok if tok
991
- end
867
+ self.n = o
868
+ self.names = names
869
+ end
992
870
 
993
- self.fix_arg_lex_state
994
- self.yacc_value = "\<\<"
995
- return :tLSHFT
996
- elsif src.scan(/\</) then
997
- self.fix_arg_lex_state
998
- self.yacc_value = "<"
999
- return :tLT
1000
- end
1001
- elsif src.check(/\>/) then
1002
- if src.scan(/\>\=/) then
1003
- self.fix_arg_lex_state
1004
- self.yacc_value = ">="
1005
- return :tGEQ
1006
- elsif src.scan(/\>\>=/) then
1007
- self.fix_arg_lex_state
1008
- self.lex_state = :expr_beg
1009
- self.yacc_value = ">>"
1010
- return :tOP_ASGN
1011
- elsif src.scan(/\>\>/) then
1012
- self.fix_arg_lex_state
1013
- self.yacc_value = ">>"
1014
- return :tRSHFT
1015
- elsif src.scan(/\>/) then
1016
- self.fix_arg_lex_state
1017
- self.yacc_value = ">"
1018
- return :tGT
1019
- end
1020
- elsif src.scan(/\`/) then
1021
- self.yacc_value = "`"
1022
- case lex_state
1023
- when :expr_fname then
1024
- self.lex_state = :expr_end
1025
- return :tBACK_REF2
1026
- when :expr_dot then
1027
- self.lex_state = if command_state then
1028
- :expr_cmdarg
1029
- else
1030
- :expr_arg
1031
- end
1032
- return :tBACK_REF2
1033
- end
1034
- self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"]
1035
- return :tXSTRING_BEG
1036
- elsif src.scan(/\?/) then
1037
-
1038
- if is_end? then
1039
- self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
1040
- self.tern.push true
1041
- self.yacc_value = "?"
1042
- return :tEH
1043
- end
1044
-
1045
- if src.eos? then
1046
- rb_compile_error "incomplete character syntax"
1047
- end
1048
-
1049
- if src.check(/\s|\v/) then
1050
- unless is_arg? then
1051
- c2 = { " " => 's',
1052
- "\n" => 'n',
1053
- "\t" => 't',
1054
- "\v" => 'v',
1055
- "\r" => 'r',
1056
- "\f" => 'f' }[src.matched]
1057
-
1058
- if c2 then
1059
- warning("invalid character syntax; use ?\\" + c2)
1060
- end
1061
- end
871
+ def == o
872
+ self.equal?(o) || (o.class == self.class && o.n == self.n)
873
+ end
1062
874
 
1063
- # ternary
1064
- self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
1065
- self.tern.push true
1066
- self.yacc_value = "?"
1067
- return :tEH
1068
- elsif src.check(/\w(?=\w)/) then # ternary, also
1069
- self.lex_state = :expr_beg
1070
- self.tern.push true
1071
- self.yacc_value = "?"
1072
- return :tEH
1073
- end
1074
-
1075
- c = if src.scan(/\\/) then
1076
- self.read_escape
1077
- else
1078
- src.getch
1079
- end
1080
- self.lex_state = :expr_end
1081
-
1082
- if version == 18 then
1083
- self.yacc_value = c[0].ord & 0xff
1084
- return :tINTEGER
1085
- else
1086
- self.yacc_value = c
1087
- return :tSTRING
1088
- end
1089
- elsif src.check(/\&/) then
1090
- if src.scan(/\&\&\=/) then
1091
- self.yacc_value = "&&"
1092
- self.lex_state = :expr_beg
1093
- return :tOP_ASGN
1094
- elsif src.scan(/\&\&/) then
1095
- self.lex_state = :expr_beg
1096
- self.yacc_value = "&&"
1097
- return :tANDOP
1098
- elsif src.scan(/\&\=/) then
1099
- self.yacc_value = "&"
1100
- self.lex_state = :expr_beg
1101
- return :tOP_ASGN
1102
- elsif src.scan(/&/) then
1103
- result = if is_arg? && space_seen &&
1104
- !src.check(/\s/) then
1105
- warning("`&' interpreted as argument prefix")
1106
- :tAMPER
1107
- elsif in_lex_state? :expr_beg, :expr_mid then
1108
- :tAMPER
1109
- else
1110
- :tAMPER2
1111
- end
1112
-
1113
- self.fix_arg_lex_state
1114
- self.yacc_value = "&"
1115
- return result
1116
- end
1117
- elsif src.scan(/\//) then
1118
- if is_beg? then
1119
- self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1120
- self.yacc_value = "/"
1121
- return :tREGEXP_BEG
1122
- end
1123
-
1124
- if src.scan(/\=/) then
1125
- self.yacc_value = "/"
1126
- self.lex_state = :expr_beg
1127
- return :tOP_ASGN
1128
- end
1129
-
1130
- if is_arg? && space_seen then
1131
- unless src.scan(/\s/) then
1132
- arg_ambiguous
1133
- self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1134
- self.yacc_value = "/"
1135
- return :tREGEXP_BEG
1136
- end
1137
- end
1138
-
1139
- self.fix_arg_lex_state
1140
- self.yacc_value = "/"
1141
-
1142
- return :tDIVIDE
1143
- elsif src.scan(/\^=/) then
1144
- self.lex_state = :expr_beg
1145
- self.yacc_value = "^"
1146
- return :tOP_ASGN
1147
- elsif src.scan(/\^/) then
1148
- self.fix_arg_lex_state
1149
- self.yacc_value = "^"
1150
- return :tCARET
1151
- elsif src.scan(/\;/) then
1152
- self.command_start = true
1153
- self.lex_state = :expr_beg
1154
- self.yacc_value = ";"
1155
- return :tSEMI
1156
- elsif src.scan(/\~/) then
1157
- if in_lex_state? :expr_fname, :expr_dot then
1158
- src.scan(/@/)
1159
- end
1160
-
1161
- self.fix_arg_lex_state
1162
- self.yacc_value = "~"
1163
-
1164
- return :tTILDE
1165
- elsif src.scan(/\\/) then
1166
- if src.scan(/\r?\n/) then
1167
- self.lineno = nil
1168
- self.space_seen = true
1169
- next
1170
- end
1171
- rb_compile_error "bare backslash only allowed before newline"
1172
- elsif src.scan(/\%/) then
1173
- if is_beg? then
1174
- return parse_quote
1175
- end
1176
-
1177
- if src.scan(/\=/) then
1178
- self.lex_state = :expr_beg
1179
- self.yacc_value = "%"
1180
- return :tOP_ASGN
1181
- end
1182
-
1183
- return parse_quote if is_arg? && space_seen && ! src.check(/\s/)
1184
-
1185
- self.fix_arg_lex_state
1186
- self.yacc_value = "%"
1187
-
1188
- return :tPERCENT
1189
- elsif src.check(/\$/) then
1190
- if src.scan(/(\$_)(\w+)/) then
1191
- self.lex_state = :expr_end
1192
- self.token = src.matched
1193
- return process_token(command_state)
1194
- elsif src.scan(/\$_/) then
1195
- self.lex_state = :expr_end
1196
- self.token = src.matched
1197
- self.yacc_value = src.matched
1198
- return :tGVAR
1199
- elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1200
- self.lex_state = :expr_end
1201
- self.yacc_value = src.matched
1202
- return :tGVAR
1203
- elsif src.scan(/\$([\&\`\'\+])/) then
1204
- self.lex_state = :expr_end
1205
- # Explicit reference to these vars as symbols...
1206
- if last_state == :expr_fname then
1207
- self.yacc_value = src.matched
1208
- return :tGVAR
1209
- else
1210
- self.yacc_value = src[1].to_sym
1211
- return :tBACK_REF
1212
- end
1213
- elsif src.scan(/\$([1-9]\d*)/) then
1214
- self.lex_state = :expr_end
1215
- if last_state == :expr_fname then
1216
- self.yacc_value = src.matched
1217
- return :tGVAR
1218
- else
1219
- self.yacc_value = src[1].to_i
1220
- return :tNTH_REF
1221
- end
1222
- elsif src.scan(/\$0/) then
1223
- self.lex_state = :expr_end
1224
- self.token = src.matched
1225
- return process_token(command_state)
1226
- elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
1227
- self.lex_state = :expr_end
1228
- self.yacc_value = "$"
1229
- return "$"
1230
- elsif src.scan(/\$\w+/)
1231
- self.lex_state = :expr_end
1232
- self.token = src.matched
1233
- return process_token(command_state)
1234
- end
1235
- elsif src.check(/\_/) then
1236
- if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
1237
- self.lineno = nil
1238
- return RubyLexer::EOF
1239
- elsif src.scan(/\_\w*/) then
1240
- self.token = src.matched
1241
- return process_token(command_state)
1242
- end
1243
- end
1244
- end # END OF CASE
875
+ def =~ v
876
+ (self.n & v.n) != 0
877
+ end
1245
878
 
1246
- if src.scan(/\004|\032|\000/) || src.eos? then # ^D, ^Z, EOF
1247
- return RubyLexer::EOF
1248
- else # alpha check
1249
- unless src.check IDENT_RE then
1250
- rb_compile_error "Invalid char #{src.matched.inspect} in expression"
1251
- end
1252
- end
879
+ def | v
880
+ raise ArgumentError, "Incompatible State: %p vs %p" % [self, v] unless
881
+ self.names == v.names
882
+ self.class.new(self.n | v.n, self.names)
883
+ end
1253
884
 
1254
- self.token = src.matched if self.src.scan IDENT_RE
885
+ def inspect
886
+ return "Value(0)" if n.zero? # HACK?
1255
887
 
1256
- return process_token(command_state)
888
+ names.map { |v, k| k if self =~ v }.
889
+ compact.
890
+ join("|").
891
+ gsub(/(?:EXPR_|STR_(?:FUNC_)?)/, "")
1257
892
  end
1258
- end
1259
893
 
1260
- def yylex_paren18
1261
- self.command_start = true
1262
- result = :tLPAREN2
1263
-
1264
- if in_lex_state? :expr_beg, :expr_mid then
1265
- result = :tLPAREN
1266
- elsif space_seen then
1267
- if in_lex_state? :expr_cmdarg then
1268
- result = :tLPAREN_ARG
1269
- elsif in_lex_state? :expr_arg then
1270
- self.tern.push false
1271
- warning "don't put space before argument parentheses"
1272
- end
1273
- else
1274
- self.tern.push false
894
+ alias to_s inspect
895
+
896
+ module Values
897
+ expr_names = {}
898
+
899
+ EXPR_NONE = State.new 0x0, expr_names
900
+ EXPR_BEG = State.new 0x1, expr_names
901
+ EXPR_END = State.new 0x2, expr_names
902
+ EXPR_ENDARG = State.new 0x4, expr_names
903
+ EXPR_ENDFN = State.new 0x8, expr_names
904
+ EXPR_ARG = State.new 0x10, expr_names
905
+ EXPR_CMDARG = State.new 0x20, expr_names
906
+ EXPR_MID = State.new 0x40, expr_names
907
+ EXPR_FNAME = State.new 0x80, expr_names
908
+ EXPR_DOT = State.new 0x100, expr_names
909
+ EXPR_CLASS = State.new 0x200, expr_names
910
+ EXPR_LABEL = State.new 0x400, expr_names
911
+ EXPR_LABELED = State.new 0x800, expr_names
912
+ EXPR_FITEM = State.new 0x1000, expr_names
913
+
914
+ EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
915
+ EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
916
+ EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
917
+
918
+ # extra fake lex_state names to make things a bit cleaner
919
+
920
+ EXPR_LAB = EXPR_ARG|EXPR_LABELED
921
+ EXPR_LIT = EXPR_END|EXPR_ENDARG
922
+ EXPR_PAR = EXPR_BEG|EXPR_LABEL
923
+ EXPR_PAD = EXPR_BEG|EXPR_LABELED
924
+
925
+ EXPR_NUM = EXPR_LIT
926
+
927
+ expr_names.merge!(EXPR_NONE => "EXPR_NONE",
928
+ EXPR_BEG => "EXPR_BEG",
929
+ EXPR_END => "EXPR_END",
930
+ EXPR_ENDARG => "EXPR_ENDARG",
931
+ EXPR_ENDFN => "EXPR_ENDFN",
932
+ EXPR_ARG => "EXPR_ARG",
933
+ EXPR_CMDARG => "EXPR_CMDARG",
934
+ EXPR_MID => "EXPR_MID",
935
+ EXPR_FNAME => "EXPR_FNAME",
936
+ EXPR_DOT => "EXPR_DOT",
937
+ EXPR_CLASS => "EXPR_CLASS",
938
+ EXPR_LABEL => "EXPR_LABEL",
939
+ EXPR_LABELED => "EXPR_LABELED",
940
+ EXPR_FITEM => "EXPR_FITEM")
941
+
942
+ # ruby constants for strings
943
+
944
+ str_func_names = {}
945
+
946
+ STR_FUNC_BORING = State.new 0x00, str_func_names
947
+ STR_FUNC_ESCAPE = State.new 0x01, str_func_names
948
+ STR_FUNC_EXPAND = State.new 0x02, str_func_names
949
+ STR_FUNC_REGEXP = State.new 0x04, str_func_names
950
+ STR_FUNC_QWORDS = State.new 0x08, str_func_names
951
+ STR_FUNC_SYMBOL = State.new 0x10, str_func_names
952
+ STR_FUNC_INDENT = State.new 0x20, str_func_names # <<-HEREDOC
953
+ STR_FUNC_LABEL = State.new 0x40, str_func_names
954
+ STR_FUNC_LIST = State.new 0x4000, str_func_names
955
+ STR_FUNC_TERM = State.new 0x8000, str_func_names
956
+ STR_FUNC_DEDENT = State.new 0x10000, str_func_names # <<~HEREDOC
957
+
958
+ # TODO: check parser25.y on how they do STR_FUNC_INDENT
959
+
960
+ STR_SQUOTE = STR_FUNC_BORING
961
+ STR_DQUOTE = STR_FUNC_EXPAND
962
+ STR_XQUOTE = STR_FUNC_EXPAND
963
+ STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
964
+ STR_SWORD = STR_FUNC_QWORDS | STR_FUNC_LIST
965
+ STR_DWORD = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
966
+ STR_SSYM = STR_FUNC_SYMBOL
967
+ STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
968
+ STR_LABEL = STR_FUNC_LABEL
969
+
970
+ str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
971
+ STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
972
+ STR_FUNC_REGEXP => "STR_FUNC_REGEXP",
973
+ STR_FUNC_QWORDS => "STR_FUNC_QWORDS",
974
+ STR_FUNC_SYMBOL => "STR_FUNC_SYMBOL",
975
+ STR_FUNC_INDENT => "STR_FUNC_INDENT",
976
+ STR_FUNC_LABEL => "STR_FUNC_LABEL",
977
+ STR_FUNC_LIST => "STR_FUNC_LIST",
978
+ STR_FUNC_TERM => "STR_FUNC_TERM",
979
+ STR_FUNC_DEDENT => "STR_FUNC_DEDENT",
980
+ STR_SQUOTE => "STR_SQUOTE")
1275
981
  end
1276
982
 
1277
- result
983
+ include Values
1278
984
  end
1279
985
 
1280
- def is_end?
1281
- in_lex_state? :expr_end, :expr_endarg, :expr_endfn
1282
- end
986
+ include State::Values
987
+ end
1283
988
 
1284
- def is_arg?
1285
- in_lex_state? :expr_arg, :expr_cmdarg
1286
- end
989
+ class RubyLexer
990
+ module SSWrapper
991
+ def string= s
992
+ ss.string= s
993
+ end
1287
994
 
1288
- def is_beg?
1289
- in_lex_state? :expr_beg, :expr_mid, :expr_value, :expr_class
1290
- end
995
+ def beginning_of_line?
996
+ ss.bol?
997
+ end
1291
998
 
1292
- def is_space_arg? c = "x"
1293
- is_arg? and space_seen and c !~ /\s/
1294
- end
999
+ alias bol? beginning_of_line? # to make .rex file more readable
1295
1000
 
1296
- def is_label_possible? command_state
1297
- (in_lex_state?(:expr_beg) && !command_state) || is_arg?
1298
- end
1001
+ def check re
1002
+ maybe_pop_stack
1299
1003
 
1300
- def yylex_paren19 # TODO: move or remove
1301
- result =
1302
- if is_beg? then
1303
- :tLPAREN
1304
- elsif is_space_arg? then
1305
- :tLPAREN_ARG
1306
- else
1307
- :tLPAREN2 # plain '(' in parse.y
1308
- end
1004
+ ss.check re
1005
+ end
1309
1006
 
1310
- # paren_nest++; # TODO
1007
+ def end_of_stream?
1008
+ ss.eos?
1009
+ end
1311
1010
 
1312
- result
1313
- end
1011
+ alias eos? end_of_stream?
1314
1012
 
1315
- def process_token(command_state)
1013
+ def getch
1014
+ c = ss.getch
1015
+ c = ss.getch if c == "\r" && ss.peek(1) == "\n"
1016
+ c
1017
+ end
1316
1018
 
1317
- token << src.matched if token =~ IDENT_RE && src.scan(/[\!\?](?!=)/)
1019
+ def match
1020
+ ss
1021
+ end
1318
1022
 
1319
- result = nil
1320
- last_state = lex_state
1023
+ def matched
1024
+ ss.matched
1025
+ end
1321
1026
 
1322
- case token
1323
- when /^\$/ then
1324
- self.lex_state, result = :expr_end, :tGVAR
1325
- when /^@@/ then
1326
- self.lex_state, result = :expr_end, :tCVAR
1327
- when /^@/ then
1328
- self.lex_state, result = :expr_end, :tIVAR
1329
- else
1330
- if token =~ /[!?]$/ then
1331
- result = :tFID
1332
- else
1333
- if in_lex_state? :expr_fname then
1334
- # ident=, not =~ => == or followed by =>
1335
- # TODO test lexing of a=>b vs a==>b
1336
- if src.scan(/=(?:(?![~>=])|(?==>))/) then
1337
- result = :tIDENTIFIER
1338
- token << src.matched
1339
- end
1340
- end
1027
+ def in_heredoc?
1028
+ !!self.old_ss
1029
+ end
1341
1030
 
1342
- result ||= if token =~ /^[A-Z]/ then
1343
- :tCONSTANT
1344
- else
1345
- :tIDENTIFIER
1346
- end
1031
+ def maybe_pop_stack
1032
+ if ss.eos? && in_heredoc? then
1033
+ self.ss_pop
1034
+ self.lineno_pop
1347
1035
  end
1036
+ end
1348
1037
 
1349
- unless ruby18
1350
- if is_label_possible? command_state then
1351
- colon = src.scan(/:/)
1038
+ def pos
1039
+ ss.pos
1040
+ end
1352
1041
 
1353
- if colon && src.peek(1) != ":" then
1354
- self.lex_state = :expr_beg
1355
- self.yacc_value = [token, src.lineno]
1356
- return :tLABEL
1357
- end
1042
+ def pos= n
1043
+ ss.pos = n
1044
+ end
1358
1045
 
1359
- src.unscan if colon
1360
- end
1361
- end
1046
+ def rest
1047
+ ss.rest
1048
+ end
1362
1049
 
1363
- unless in_lex_state? :expr_dot then
1364
- # See if it is a reserved word.
1365
- keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
1366
- RubyParserStuff::Keyword.keyword18 token
1367
- else
1368
- RubyParserStuff::Keyword.keyword19 token
1369
- end
1050
+ def scan re
1051
+ maybe_pop_stack
1370
1052
 
1371
- if keyword then
1372
- state = lex_state
1373
- self.lex_state = keyword.state
1374
- self.yacc_value = [token, src.lineno]
1375
-
1376
- if state == :expr_fname then
1377
- self.yacc_value = keyword.name
1378
- return keyword.id0
1379
- end
1380
-
1381
- if keyword.id0 == :kDO then
1382
- self.command_start = true
1383
- return :kDO_COND if cond.is_in_state
1384
- return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
1385
- return :kDO_BLOCK if state == :expr_endarg
1386
- if defined?(@hack_expects_lambda) && @hack_expects_lambda
1387
- @hack_expects_lambda = false
1388
- return :kDO_LAMBDA
1389
- end
1390
- return :kDO
1391
- end
1053
+ ss.scan re
1054
+ end
1392
1055
 
1393
- return keyword.id0 if state == :expr_beg or state == :expr_value
1056
+ def scanner_class # TODO: design this out of oedipus_lex. or something.
1057
+ RPStringScanner
1058
+ end
1394
1059
 
1395
- self.lex_state = :expr_beg if keyword.id0 != keyword.id1
1060
+ def ss_string
1061
+ ss.string
1062
+ end
1396
1063
 
1397
- return keyword.id1
1398
- end
1399
- end
1064
+ def ss_string= s
1065
+ raise "Probably not"
1066
+ ss.string = s
1067
+ end
1400
1068
 
1401
- # TODO:
1402
- # if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
1403
-
1404
- self.lex_state =
1405
- if is_beg? || in_lex_state?(:expr_dot) || is_arg? then
1406
- if command_state then
1407
- :expr_cmdarg
1408
- else
1409
- :expr_arg
1410
- end
1411
- elsif ruby19 && in_lex_state?(:expr_fname) then
1412
- :expr_endfn
1413
- else
1414
- :expr_end
1415
- end
1069
+ def unscan
1070
+ ss.unscan
1071
+ end
1072
+ end
1073
+
1074
+ include SSWrapper
1075
+ end
1416
1076
 
1077
+ class RubyLexer
1078
+ module SSStackish
1079
+ def lineno_push new_lineno
1080
+ self.old_lineno = self.lineno
1081
+ self.lineno = new_lineno
1417
1082
  end
1418
1083
 
1419
- self.yacc_value = token
1084
+ def lineno_pop
1085
+ self.lineno = self.old_lineno
1086
+ self.old_lineno = nil
1087
+ end
1420
1088
 
1089
+ def ss= o
1090
+ raise "Clearing ss while in heredoc!?!" if in_heredoc?
1091
+ @old_ss = nil
1092
+ super
1093
+ end
1421
1094
 
1422
- self.lex_state = :expr_end if
1423
- last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar
1095
+ def ss_push new_ss
1096
+ @old_ss = self.ss
1097
+ @ss = new_ss
1098
+ end
1424
1099
 
1425
- return result
1100
+ def ss_pop
1101
+ @ss = self.old_ss
1102
+ @old_ss = nil
1103
+ end
1426
1104
  end
1427
1105
 
1428
- def yylex_string # 23 lines
1429
- token = if lex_strterm[0] == :heredoc then
1430
- self.heredoc lex_strterm
1431
- else
1432
- self.parse_string lex_strterm
1433
- end
1106
+ prepend SSStackish
1107
+ end
1108
+
1109
+ if ENV["RP_STRTERM_DEBUG"] then
1110
+ class RubyLexer
1111
+ def d o
1112
+ $stderr.puts o.inspect
1113
+ end
1114
+
1115
+ alias old_lex_strterm= lex_strterm=
1116
+
1117
+ def lex_strterm= o
1118
+ self.old_lex_strterm= o
1119
+ where = caller.first.split(/:/).first(2).join(":")
1120
+ $stderr.puts
1121
+ d :lex_strterm => [o, where]
1122
+ end
1123
+ end
1124
+ end
1434
1125
 
1435
- if token == :tSTRING_END || token == :tREGEXP_END then
1436
- self.lineno = nil
1437
- self.lex_strterm = nil
1438
- self.lex_state = :expr_end
1126
+ require_relative "./ruby_lexer.rex.rb"
1127
+ require_relative "./ruby_lexer_strings.rb"
1128
+
1129
+ if ENV["RP_LINENO_DEBUG"] then
1130
+ class RubyLexer
1131
+ def d o
1132
+ $stderr.puts o.inspect
1439
1133
  end
1440
1134
 
1441
- return token
1135
+ alias old_lineno= lineno=
1136
+
1137
+ def lineno= n
1138
+ self.old_lineno= n
1139
+ where = caller.first.split(/:/).first(2).join(":")
1140
+ $stderr.puts
1141
+ d :lineno => [n, where]
1142
+ end
1442
1143
  end
1443
1144
  end