ruby_parser 3.0.0 → 3.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data/.autotest +36 -19
  4. data/History.rdoc +1297 -0
  5. data/Manifest.txt +35 -7
  6. data/{README.txt → README.rdoc} +44 -14
  7. data/Rakefile +308 -110
  8. data/bin/ruby_parse +3 -1
  9. data/bin/ruby_parse_extract_error +36 -16
  10. data/compare/normalize.rb +218 -0
  11. data/debugging.md +190 -0
  12. data/gauntlet.md +107 -0
  13. data/lib/.document +1 -0
  14. data/lib/rp_extensions.rb +53 -0
  15. data/lib/rp_stringscanner.rb +33 -0
  16. data/lib/ruby20_parser.rb +10973 -0
  17. data/lib/ruby20_parser.y +2683 -0
  18. data/lib/ruby21_parser.rb +10980 -0
  19. data/lib/ruby21_parser.y +2700 -0
  20. data/lib/ruby22_parser.rb +11123 -0
  21. data/lib/ruby22_parser.y +2711 -0
  22. data/lib/ruby23_parser.rb +11132 -0
  23. data/lib/ruby23_parser.y +2713 -0
  24. data/lib/ruby24_parser.rb +11231 -0
  25. data/lib/ruby24_parser.y +2721 -0
  26. data/lib/ruby25_parser.rb +11231 -0
  27. data/lib/ruby25_parser.y +2721 -0
  28. data/lib/ruby26_parser.rb +11253 -0
  29. data/lib/ruby26_parser.y +2736 -0
  30. data/lib/ruby27_parser.rb +12980 -0
  31. data/lib/ruby27_parser.y +3324 -0
  32. data/lib/ruby30_parser.rb +13242 -0
  33. data/lib/ruby30_parser.y +3447 -0
  34. data/lib/ruby31_parser.rb +13622 -0
  35. data/lib/ruby31_parser.y +3481 -0
  36. data/lib/ruby3_parser.yy +3536 -0
  37. data/lib/ruby_lexer.rb +933 -1232
  38. data/lib/ruby_lexer.rex +185 -0
  39. data/lib/ruby_lexer.rex.rb +399 -0
  40. data/lib/ruby_lexer_strings.rb +638 -0
  41. data/lib/ruby_parser.rb +97 -3
  42. data/lib/ruby_parser.yy +3465 -0
  43. data/lib/ruby_parser_extras.rb +1216 -687
  44. data/test/test_ruby_lexer.rb +2249 -1092
  45. data/test/test_ruby_parser.rb +5156 -975
  46. data/test/test_ruby_parser_extras.rb +47 -77
  47. data/tools/munge.rb +250 -0
  48. data/tools/ripper.rb +44 -0
  49. data.tar.gz.sig +1 -1
  50. metadata +200 -155
  51. metadata.gz.sig +0 -0
  52. data/.gemtest +0 -0
  53. data/History.txt +0 -482
  54. data/lib/gauntlet_rubyparser.rb +0 -120
  55. data/lib/ruby18_parser.rb +0 -5747
  56. data/lib/ruby18_parser.y +0 -1873
  57. data/lib/ruby19_parser.rb +0 -6110
  58. data/lib/ruby19_parser.y +0 -2078
@@ -1,222 +1,261 @@
1
1
  # encoding: ASCII-8BIT
2
+ # frozen_string_literal: true
3
+ # TODO: remove encoding comment
2
4
 
3
- require 'stringio'
4
- require 'racc/parser'
5
- require 'sexp'
6
- require 'strscan'
7
- require 'ruby_lexer'
5
+ require "sexp"
6
+ require "ruby_lexer"
8
7
  require "timeout"
8
+ require "rp_extensions"
9
+ require "rp_stringscanner"
9
10
 
10
- # WHY do I have to do this?!?
11
- class Regexp
12
- ONCE = 0 unless defined? ONCE # FIX: remove this - it makes no sense
11
+ class Sexp
12
+ def check_line_numbers
13
+ raise "bad nil line for:\n%s" % [self.pretty_inspect] if nil_line?
14
+ raise "bad line number for:\n%s" % [self.pretty_inspect] unless
15
+ Integer === self.line &&
16
+ self.line >= 1 &&
17
+ self.line <= self.line_min
18
+ end
19
+
20
+ ##
21
+ # Returns the maximum line number of the children of self.
13
22
 
14
- unless defined? ENC_NONE then
15
- ENC_NONE = /x/n.options
16
- ENC_EUC = /x/e.options
17
- ENC_SJIS = /x/s.options
18
- ENC_UTF8 = /x/u.options
23
+ def line_min
24
+ @line_min ||= [self.deep_each.map(&:line).min, self.line].compact.min
19
25
  end
20
- end
21
26
 
22
- # I hate ruby 1.9 string changes
23
- class Fixnum
24
- def ord
25
- self
27
+ def nil_line?
28
+ self.deep_each.map(&:line).any?(&:nil?)
26
29
  end
27
- end unless "a"[0] == "a"
30
+ end
28
31
 
29
- class RPStringScanner < StringScanner
30
- # if ENV['TALLY'] then
31
- # alias :old_getch :getch
32
- # def getch
33
- # warn({:getch => caller[0]}.inspect)
34
- # old_getch
35
- # end
36
- # end
32
+ module RubyParserStuff
33
+ VERSION = "3.19.1"
37
34
 
38
- if "".respond_to? :encoding then
39
- def string_to_pos
40
- string.byteslice(0, pos)
41
- end
35
+ attr_accessor :lexer, :in_def, :in_single, :file
36
+ attr_accessor :in_kwarg
37
+ attr_reader :env, :comments
42
38
 
43
- def charpos
44
- string_to_pos.length
45
- end
46
- else
47
- alias :charpos :pos
39
+ ##
40
+ # Canonicalize conditionals. Eg:
41
+ #
42
+ # not x ? a : b
43
+ #
44
+ # becomes:
45
+ #
46
+ # x ? b : a
48
47
 
49
- def string_to_pos
50
- string[0..pos]
51
- end
52
- end
48
+ attr_accessor :canonicalize_conditions
53
49
 
54
- def current_line # HAHA fuck you (HACK)
55
- string_to_pos[/\A.*__LINE__/m].split(/\n/).size
56
- end
50
+ ##
51
+ # The last token type returned from #next_token
57
52
 
58
- def extra_lines_added
59
- @extra_lines_added ||= 0
60
- end
53
+ attr_accessor :last_token_type
61
54
 
62
- def extra_lines_added= val
63
- @extra_lines_added = val
64
- end
55
+ $good20 = []
65
56
 
66
- def lineno
67
- string[0...pos].count("\n") + 1 - extra_lines_added
57
+ %w[
58
+ ].map(&:to_i).each do |n|
59
+ $good20[n] = n
68
60
  end
69
61
 
70
- # TODO: once we get rid of these, we can make things like
71
- # TODO: current_line and lineno much more accurate and easy to do
62
+ def debug20 n, v = nil, r = nil
63
+ raise "not yet #{n} #{v.inspect} => #{r.inspect}" unless $good20[n]
64
+ end
72
65
 
73
- def unread_many str # TODO: remove this entirely - we should not need it
74
- warn({:unread_many => caller[0]}.inspect) if ENV['TALLY']
75
- self.extra_lines_added += str.count("\n")
76
- begin
77
- string[charpos, 0] = str
78
- rescue IndexError
79
- # HACK -- this is a bandaid on a dirty rag on an open festering wound
66
+ def self.deprecate old, new
67
+ define_method old do |*args|
68
+ warn "DEPRECATED: #{old} -> #{new} from #{caller.first}"
69
+ send new, *args
80
70
  end
81
71
  end
82
72
 
83
- if ENV['DEBUG'] then
84
- alias :old_getch :getch
85
- def getch
86
- c = self.old_getch
87
- p :getch => [c, caller.first]
88
- c
89
- end
73
+ ##
74
+ # for pure ruby systems only
75
+
76
+ def do_parse
77
+ _racc_do_parse_rb(_racc_setup, false)
78
+ end if ENV["PURE_RUBY"] || ENV["CHECK_LINE_NUMS"]
90
79
 
91
- alias :old_scan :scan
92
- def scan re
93
- s = old_scan re
94
- d :scan => [s, caller.first] if s
95
- s
80
+ if ENV["CHECK_LINE_NUMS"] then
81
+ def _racc_do_reduce arg, act
82
+ x = super
83
+
84
+ @racc_vstack.grep(Sexp).each do |sexp|
85
+ sexp.check_line_numbers
86
+ end
87
+ x
96
88
  end
97
89
  end
98
90
 
99
- def d o
100
- $stderr.puts o.inspect
101
- end
102
- end
91
+ ARG_TYPES = [:arglist, :call_args, :array, :args].map { |k|
92
+ [k, true]
93
+ }.to_h
103
94
 
104
- module RubyParserStuff
105
- VERSION = '3.0.0' unless constants.include? "VERSION" # SIGH
95
+ has_enc = "".respond_to? :encoding
106
96
 
107
- attr_accessor :lexer, :in_def, :in_single, :file
108
- attr_reader :env, :comments
97
+ # This is in sorted order of occurrence according to
98
+ # charlock_holmes against 500k files, with UTF_8 forced
99
+ # to the top.
100
+ #
101
+ # Overwrite this contstant if you need something different.
102
+ ENCODING_ORDER = [
103
+ Encoding::UTF_8, # moved to top to reflect default in 2.0
104
+ Encoding::ISO_8859_1,
105
+ Encoding::ISO_8859_2,
106
+ Encoding::ISO_8859_9,
107
+ Encoding::SHIFT_JIS,
108
+ Encoding::WINDOWS_1252,
109
+ Encoding::EUC_JP
110
+ ] if has_enc
109
111
 
110
- def syntax_error msg
111
- raise RubyParser::SyntaxError, msg
112
- end
112
+ JUMP_TYPE = [:return, :next, :break, :yield].map { |k| [k, true] }.to_h
113
113
 
114
- def arg_add(node1, node2) # TODO: nuke
115
- return s(:arglist, node2) unless node1
114
+ TAB_WIDTH = 8
116
115
 
117
- node1[0] = :arglist if node1[0] == :array
118
- return node1 << node2 if node1[0] == :arglist
116
+ def initialize(options = {})
117
+ super()
119
118
 
120
- return s(:arglist, node1, node2)
121
- end
119
+ v = self.class.name[/[23]\d/]
120
+ raise "Bad Class name #{self.class}" unless v
122
121
 
123
- def arg_blk_pass node1, node2 # TODO: nuke
124
- node1 = s(:arglist, node1) unless [:arglist, :array].include? node1.first
125
- node1 << node2 if node2
126
- node1
127
- end
122
+ self.lexer = RubyLexer.new v && v.to_i
123
+ self.lexer.parser = self
124
+ self.in_kwarg = false
128
125
 
129
- def arg_concat node1, node2 # TODO: nuke
130
- raise "huh" unless node2
131
- node1 << s(:splat, node2).compact
132
- node1
133
- end
126
+ @env = RubyParserStuff::Environment.new
127
+ @comments = []
134
128
 
135
- def clean_mlhs sexp
136
- case sexp.sexp_type
137
- when :masgn then
138
- if sexp.size == 2 and sexp[1].sexp_type == :array then
139
- s(:masgn, *sexp[1][1..-1].map { |sub| clean_mlhs sub })
140
- else
141
- sexp
142
- end
143
- when :gasgn, :iasgn, :lasgn, :cvasgn then
144
- if sexp.size == 2 then
145
- sexp.last
146
- else
147
- sexp # optional value
148
- end
149
- else
150
- raise "unsupported type: #{sexp.inspect}"
151
- end
152
- end
129
+ @canonicalize_conditions = true
153
130
 
154
- def block_var *args
155
- result = self.args args
156
- result[0] = :masgn
157
- result
131
+ self.reset
158
132
  end
159
133
 
160
- def block_var18 ary, splat, block
161
- ary ||= s(:array)
134
+ def arg_concat node1, node2 # TODO: nuke
135
+ raise "huh" unless node2
162
136
 
163
- if splat then
164
- splat = splat[1] unless Symbol === splat
165
- ary << "*#{splat}".to_sym
166
- end
137
+ splat = s(:splat, node2)
138
+ splat.line node2.line
167
139
 
168
- ary << "&#{block[1]}".to_sym if block
140
+ node1 << splat
141
+ end
169
142
 
170
- if ary.length > 2 or ary.splat then # HACK
171
- s(:masgn, *ary[1..-1])
172
- else
173
- ary.last
174
- end
143
+ def argl x
144
+ x = s(:arglist, x) if x and x.sexp_type == :array
145
+ x
175
146
  end
176
147
 
177
148
  def args args
178
149
  result = s(:args)
179
150
 
151
+ ss = args.grep Sexp
152
+ if ss.empty? then
153
+ result.line lexer.lineno
154
+ else
155
+ result.line ss.first.line
156
+ end
157
+
180
158
  args.each do |arg|
159
+ if arg.instance_of? Array and arg.size == 2 and arg.last.is_a? Numeric then
160
+ arg = arg.first
161
+ end
162
+
181
163
  case arg
182
164
  when Sexp then
183
165
  case arg.sexp_type
184
- when :args, :block, :array then
185
- result.concat arg[1..-1]
166
+ when :args, :block, :array, :call_args then # HACK call_args mismatch
167
+ rest = arg.sexp_body
168
+
169
+ rest.map! { |x|
170
+ if x.instance_of? Array and x.size == 2 and Numeric === x.last then
171
+ x.first
172
+ else
173
+ x
174
+ end
175
+ }
176
+
177
+ result.concat rest
178
+ when :forward_args then
179
+ self.env[:*] = :lvar # TODO: arg_var(p, idFWD_REST) ?
180
+ self.env[:**] = :lvar
181
+ self.env[:&] = :lvar
182
+
183
+ result << arg
186
184
  when :block_arg then
187
185
  result << :"&#{arg.last}"
188
- when :masgn then
186
+ when :shadow then
187
+ name = arg.last
188
+ self.env[name] = :lvar
189
+ if Sexp === result.last and result.last.sexp_type == :shadow then
190
+ result.last << name
191
+ else
192
+ result << arg
193
+ end
194
+ when :masgn, :block_pass, :hash then # HACK: remove. prolly call_args
189
195
  result << arg
190
196
  else
191
- raise "unhandled: #{arg.inspect}"
197
+ raise "unhandled: #{arg.sexp_type} in #{args.inspect}"
192
198
  end
193
199
  when Symbol then
200
+ name = arg.to_s.delete("&*")
201
+ self.env[name.to_sym] = :lvar unless name.empty?
194
202
  result << arg
195
- when ",", nil then
203
+ when true, false then
204
+ self.in_kwarg = arg
205
+ when ",", "|", ";", "(", ")", nil then
196
206
  # ignore
197
207
  else
198
- raise "unhandled: #{arg.inspect}"
208
+ raise "unhandled: #{arg.inspect} in #{args.inspect}"
199
209
  end
200
210
  end
201
211
 
202
212
  result
203
213
  end
204
214
 
205
- def aryset receiver, index
206
- index ||= []
207
- s(:attrasgn, receiver, :"[]=", *index[1..-1])
215
+ def end_args args
216
+ lexer.lex_state = RubyLexer::State::Values::EXPR_BEG
217
+ lexer.command_start = true
218
+ self.args args
208
219
  end
209
220
 
210
- def assignable(lhs, value = nil)
211
- id = lhs.to_sym
212
- id = id.to_sym if Sexp === id
221
+ def attrset_id? id
222
+ id =~ /^\[\]=$|^\w+=$/
223
+ end
213
224
 
214
- raise "write a test 1" if id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
225
+ def endless_method_name defn_or_defs
226
+ name = defn_or_defs[1]
227
+ name = defn_or_defs[2] unless Symbol === name
215
228
 
216
- raise SyntaxError, "Can't change the value of #{id}" if
217
- id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
229
+ if attrset_id? name then
230
+ yyerror "setter method cannot be defined in an endless method definition"
231
+ end
218
232
 
219
- result = case id.to_s
233
+ # TODO? token_info_drop(p, "def", loc->beg_pos);
234
+ end
235
+
236
+ def array_to_hash array
237
+ case array.sexp_type
238
+ when :kwsplat then
239
+ array
240
+ else
241
+ s(:hash, *array.sexp_body).line array.line
242
+ end
243
+ end
244
+
245
+ def aryset receiver, index
246
+ index ||= s()
247
+ l = receiver.line
248
+ result = s(:attrasgn, receiver, :"[]=",
249
+ *index.sexp_body).compact # [].sexp_body => nil
250
+ result.line = l
251
+ result
252
+ end
253
+
254
+ def assignable(lhs, value = nil)
255
+ id, line = lhs
256
+ id = id.to_sym
257
+
258
+ result = case id
220
259
  when /^@@/ then
221
260
  asgn = in_def || in_single > 0
222
261
  s((asgn ? :cvasgn : :cvdecl), id)
@@ -228,104 +267,231 @@ module RubyParserStuff
228
267
  s(:cdecl, id)
229
268
  else
230
269
  case self.env[id]
231
- when :lvar then
270
+ when :lvar, :dvar, nil then
232
271
  s(:lasgn, id)
233
- when :dvar, nil then
234
- if self.env.current[id] == :dvar then
235
- s(:lasgn, id)
236
- elsif self.env[id] == :dvar then
237
- self.env.use(id)
238
- s(:lasgn, id)
239
- elsif ! self.env.dynamic? then
240
- s(:lasgn, id)
241
- else
242
- s(:lasgn, id)
243
- end
244
272
  else
245
273
  raise "wtf? unknown type: #{self.env[id]}"
246
274
  end
247
275
  end
248
276
 
249
- self.env[id] ||= :lvar
277
+ self.env[id] ||= :lvar if result.sexp_type == :lasgn
250
278
 
251
279
  result << value if value
280
+ result.line line
281
+ result
282
+ end
252
283
 
253
- return result
284
+ def backref_assign_error ref
285
+ # TODO: need a test for this... obviously
286
+ case ref.sexp_type
287
+ when :nth_ref then
288
+ raise "write a test 2"
289
+ raise SyntaxError, "Can't set variable %p" % ref.last
290
+ when :back_ref then
291
+ raise "write a test 3"
292
+ raise SyntaxError, "Can't set back reference %p" % ref.last
293
+ else
294
+ raise "Unknown backref type: #{ref.inspect}"
295
+ end
254
296
  end
255
297
 
256
298
  def block_append(head, tail)
257
299
  return head if tail.nil?
258
300
  return tail if head.nil?
259
301
 
260
- case head[0]
261
- when :lit, :str then
262
- return tail
263
- end
264
-
265
302
  line = [head.line, tail.line].compact.min
266
303
 
267
304
  head = remove_begin(head)
268
- head = s(:block, head) unless head.node_type == :block
305
+ head = s(:block, head).line(line) unless head.sexp_type == :block
269
306
 
270
- head.line = line
307
+ # head.line = line
271
308
  head << tail
272
309
  end
273
310
 
311
+ def block_dup_check call_or_args, block
312
+ syntax_error "Both block arg and actual block given." if
313
+ block and call_or_args.block_pass?
314
+ end
315
+
316
+ def block_var *args
317
+ result = self.args args
318
+ result.sexp_type = :masgn
319
+ result
320
+ end
321
+
322
+ def call_args args
323
+ result = s(:call_args)
324
+
325
+ a = args.grep(Sexp).first
326
+ if a then
327
+ result.line a.line
328
+ else
329
+ result.line lexer.lineno
330
+ end
331
+
332
+ args.each do |arg|
333
+ if arg.instance_of? Array and arg.size == 2 and arg.last.is_a? Numeric then
334
+ arg = arg.first
335
+ end
336
+
337
+ case arg
338
+ when Sexp then
339
+ case arg.sexp_type
340
+ when :array, :args, :call_args then # HACK? remove array at some point
341
+ result.concat arg.sexp_body
342
+ else
343
+ result << arg
344
+ end
345
+ when Symbol then
346
+ result << arg
347
+ when Array then
348
+ id, _line = arg
349
+ result << id
350
+ when ",", nil, "(" then
351
+ # ignore
352
+ else
353
+ raise "unhandled: #{arg.inspect} in #{args.inspect}"
354
+ end
355
+ end
356
+
357
+ result
358
+ end
359
+
360
+ def clean_mlhs sexp
361
+ case sexp.sexp_type
362
+ when :masgn then
363
+ if sexp.size == 2 and sexp[1].sexp_type == :array then
364
+ s(:masgn, *sexp[1].sexp_body.map { |sub| clean_mlhs sub })
365
+ else
366
+ debug20 5
367
+ sexp
368
+ end
369
+ when :gasgn, :iasgn, :lasgn, :cvasgn then
370
+ if sexp.size == 2 then
371
+ sexp.last
372
+ else
373
+ debug20 7
374
+ sexp # optional value
375
+ end
376
+ else
377
+ raise "unsupported type: #{sexp.inspect}"
378
+ end
379
+ end
380
+
274
381
  def cond node
275
382
  return nil if node.nil?
276
383
  node = value_expr node
277
384
 
278
- case node.first
385
+ case node.sexp_type
279
386
  when :lit then
280
387
  if Regexp === node.last then
281
- return s(:match, node)
388
+ s(:match, node)
282
389
  else
283
- return node
390
+ node
284
391
  end
285
392
  when :and then
286
- return s(:and, cond(node[1]), cond(node[2]))
393
+ _, lhs, rhs = node
394
+ s(:and, cond(lhs), cond(rhs))
287
395
  when :or then
288
- return s(:or, cond(node[1]), cond(node[2]))
396
+ _, lhs, rhs = node
397
+ s(:or, cond(lhs), cond(rhs))
289
398
  when :dot2 then
290
399
  label = "flip#{node.hash}"
291
400
  env[label] = :lvar
292
- return s(:flip2, node[1], node[2])
401
+ _, lhs, rhs = node
402
+ s(:flip2, lhs, rhs) # TODO: recurse?
293
403
  when :dot3 then
294
404
  label = "flip#{node.hash}"
295
405
  env[label] = :lvar
296
- return s(:flip3, node[1], node[2])
406
+ _, lhs, rhs = node
407
+ s(:flip3, lhs, rhs)
297
408
  else
298
- return node
299
- end
409
+ node
410
+ end.line node.line
300
411
  end
301
412
 
302
- ##
303
- # for pure ruby systems only
413
+ def dedent sexp
414
+ dedent_count = dedent_size sexp
304
415
 
305
- def do_parse
306
- _racc_do_parse_rb(_racc_setup, false)
307
- end if ENV['PURE_RUBY']
416
+ skip_one = false
417
+ sexp.map { |obj|
418
+ case obj
419
+ when Symbol then
420
+ obj
421
+ when String then
422
+ obj.lines.map { |l| remove_whitespace_width l, dedent_count }.join
423
+ when Sexp then
424
+ case obj.sexp_type
425
+ when :evstr then
426
+ skip_one = true
427
+ obj
428
+ when :str then
429
+ _, str = obj
430
+ str = if skip_one then
431
+ skip_one = false
432
+ s1, *rest = str.lines
433
+ s1 + rest.map { |l| remove_whitespace_width l, dedent_count }.join
434
+ else
435
+ str.lines.map { |l| remove_whitespace_width l, dedent_count }.join
436
+ end
308
437
 
309
- def get_match_node lhs, rhs # TODO: rename to new_match
310
- if lhs then
311
- case lhs[0]
312
- when :dregx, :dregx_once then
313
- return s(:match2, lhs, rhs).line(lhs.line)
314
- when :lit then
315
- return s(:match2, lhs, rhs).line(lhs.line) if Regexp === lhs.last
438
+ s(:str, str).line obj.line
439
+ else
440
+ warn "unprocessed sexp %p" % [obj]
441
+ end
442
+ else
443
+ warn "unprocessed: %p" % [obj]
316
444
  end
317
- end
445
+ }
446
+ end
318
447
 
319
- if rhs then
320
- case rhs[0]
321
- when :dregx, :dregx_once then
322
- return s(:match3, rhs, lhs).line(lhs.line)
323
- when :lit then
324
- return s(:match3, rhs, lhs).line(lhs.line) if Regexp === rhs.last
448
+ def dedent_size sexp
449
+ skip_one = false
450
+ sexp.flat_map { |s|
451
+ case s
452
+ when Symbol then
453
+ next
454
+ when String then
455
+ s.lines
456
+ when Sexp then
457
+ case s.sexp_type
458
+ when :evstr then
459
+ skip_one = true
460
+ next
461
+ when :str then
462
+ _, str = s
463
+ lines = str.lines
464
+ if skip_one then
465
+ skip_one = false
466
+ lines.shift
467
+ end
468
+ lines
469
+ else
470
+ warn "unprocessed sexp %p" % [s]
471
+ end
472
+ else
473
+ warn "unprocessed: %p" % [s]
474
+ end.map { |l| whitespace_width l.chomp }
475
+ }.compact.min
476
+ end
477
+
478
+ def dedent_string string, width
479
+ characters_skipped = 0
480
+ indentation_skipped = 0
481
+
482
+ string.chars.each do |char|
483
+ break if indentation_skipped >= width
484
+ if char == " "
485
+ characters_skipped += 1
486
+ indentation_skipped += 1
487
+ elsif char == "\t"
488
+ proposed = TAB_WIDTH * (indentation_skipped / TAB_WIDTH + 1)
489
+ break if proposed > width
490
+ characters_skipped += 1
491
+ indentation_skipped = proposed
325
492
  end
326
493
  end
327
-
328
- return new_call(lhs, :"=~", argl(rhs)).line(lhs.line)
494
+ string[characters_skipped..-1]
329
495
  end
330
496
 
331
497
  def gettable(id)
@@ -354,76 +520,151 @@ module RubyParserStuff
354
520
  result
355
521
  end
356
522
 
523
+ def hack_encoding str, extra = nil
524
+ encodings = ENCODING_ORDER.dup
525
+ encodings.unshift(extra) unless extra.nil?
526
+
527
+ # terrible, horrible, no good, very bad, last ditch effort.
528
+ encodings.each do |enc|
529
+ begin
530
+ str.force_encoding enc
531
+ if str.valid_encoding? then
532
+ str.encode! Encoding::UTF_8
533
+ break
534
+ end
535
+ rescue ArgumentError # unknown encoding name
536
+ # do nothing
537
+ rescue Encoding::InvalidByteSequenceError
538
+ # do nothing
539
+ rescue Encoding::UndefinedConversionError
540
+ # do nothing
541
+ end
542
+ end
543
+
544
+ # no amount of pain is enough for you.
545
+ raise "Bad encoding. Need a magic encoding comment." unless
546
+ str.encoding.name == "UTF-8"
547
+ end
548
+
357
549
  ##
358
- # Canonicalize conditionals. Eg:
359
- #
360
- # not x ? a : b
550
+ # Returns a UTF-8 encoded string after processing BOMs and magic
551
+ # encoding comments.
361
552
  #
362
- # becomes:
553
+ # Holy crap... ok. Here goes:
363
554
  #
364
- # x ? b : a
555
+ # Ruby's file handling and encoding support is insane. We need to be
556
+ # able to lex a file. The lexer file is explicitly UTF-8 to make
557
+ # things cleaner. This allows us to deal with extended chars in
558
+ # class and method names. In order to do this, we need to encode all
559
+ # input source files as UTF-8. First, we look for a UTF-8 BOM by
560
+ # looking at the first line while forcing its encoding to
561
+ # ASCII-8BIT. If we find a BOM, we strip it and set the expected
562
+ # encoding to UTF-8. Then, we search for a magic encoding comment.
563
+ # If found, it overrides the BOM. Finally, we force the encoding of
564
+ # the input string to whatever was found, and then encode that to
565
+ # UTF-8 for compatibility with the lexer.
365
566
 
366
- attr_accessor :canonicalize_conditions
567
+ def handle_encoding str
568
+ str = str.dup
569
+ has_enc = str.respond_to? :encoding
570
+ encoding = nil
367
571
 
368
- def initialize(options = {})
369
- super()
572
+ header = str.each_line.first(2)
573
+ header.map! { |s| s.force_encoding "ASCII-8BIT" } if has_enc
370
574
 
371
- v = self.class.name[/1[89]/]
372
- self.lexer = RubyLexer.new v && v.to_i
373
- self.lexer.parser = self
374
- @env = RubyParserStuff::Environment.new
375
- @comments = []
575
+ first = header.first || ""
576
+ encoding, str = +"utf-8", str.b[3..-1] if first =~ /\A\xEF\xBB\xBF/
376
577
 
377
- @canonicalize_conditions = true
578
+ encoding = $1.strip if header.find { |s|
579
+ s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
580
+ s[/^#.*(?:en)?coding(?:\s*[:=])\s*([\w-]+)/, 1]
581
+ }
378
582
 
379
- self.reset
583
+ if encoding then
584
+ if has_enc then
585
+ encoding.sub!(/utf-8-.+$/, "utf-8") # HACK for stupid emacs formats
586
+ hack_encoding str, encoding
587
+ else
588
+ warn "Skipping magic encoding comment"
589
+ end
590
+ else
591
+ # nothing specified... ugh. try to encode as utf-8
592
+ hack_encoding str if has_enc
593
+ end
594
+
595
+ str
596
+ end
597
+
598
+ def invert_block_call val
599
+ ret, iter = val
600
+ type, call = ret
601
+
602
+ iter.insert 1, call
603
+
604
+ ret = s(type).line ret.line
605
+
606
+ [iter, ret]
607
+ end
608
+
609
+ def inverted? val
610
+ JUMP_TYPE[val[0].sexp_type]
380
611
  end
381
612
 
382
613
  def list_append list, item # TODO: nuke me *sigh*
383
614
  return s(:array, item) unless list
384
- list = s(:array, list) unless Sexp === list && list.first == :array
615
+ list = s(:array, list) unless Sexp === list && list.sexp_type == :array
385
616
  list << item
386
617
  end
387
618
 
388
619
  def list_prepend item, list # TODO: nuke me *sigh*
389
- list = s(:array, list) unless Sexp === list && list[0] == :array
620
+ list = s(:array, list) unless Sexp === list && list.sexp_type == :array
390
621
  list.insert 1, item
391
622
  list
392
623
  end
393
624
 
394
- def literal_concat head, tail
625
+ def literal_concat head, tail # TODO: ugh. rewrite
395
626
  return tail unless head
396
627
  return head unless tail
397
628
 
398
- htype, ttype = head[0], tail[0]
629
+ htype, ttype = head.sexp_type, tail.sexp_type
399
630
 
400
- head = s(:dstr, '', head) if htype == :evstr
631
+ head = s(:dstr, "", head).line head.line if htype == :evstr
401
632
 
402
633
  case ttype
403
634
  when :str then
404
635
  if htype == :str
405
- head[-1] << tail[-1]
636
+ a, b = head.last, tail.last
637
+ b = b.dup.force_encoding a.encoding unless Encoding.compatible?(a, b)
638
+ a << b
406
639
  elsif htype == :dstr and head.size == 2 then
407
- head[-1] << tail[-1]
640
+ head.last << tail.last
408
641
  else
409
642
  head << tail
410
643
  end
411
644
  when :dstr then
412
645
  if htype == :str then
413
- tail[1] = head[-1] + tail[1]
646
+ lineno = head.line
647
+ tail[1] = head.last + tail[1]
414
648
  head = tail
649
+ head.line = lineno
415
650
  else
416
- tail[0] = :array
417
- tail[1] = s(:str, tail[1])
418
- tail.delete_at 1 if tail[1] == s(:str, '')
651
+ tail.sexp_type = :array
652
+ tail[1] = s(:str, tail[1]).line tail.line
653
+ tail.delete_at 1 if tail[1] == s(:str, "")
419
654
 
420
- head.push(*tail[1..-1])
655
+ head.push(*tail.sexp_body)
421
656
  end
422
657
  when :evstr then
423
- head[0] = :dstr if htype == :str
424
- if head.size == 2 and tail.size > 1 and tail[1][0] == :str then
425
- head[-1] << tail[1][-1]
426
- head[0] = :str if head.size == 2 # HACK ?
658
+ if htype == :str then
659
+ f, l = head.file, head.line
660
+ head = s(:dstr, *head.sexp_body)
661
+ head.file = f
662
+ head.line = l
663
+ end
664
+
665
+ if head.size == 2 and tail.size > 1 and tail[1].sexp_type == :str then
666
+ head.last << tail[1].last
667
+ head.sexp_type = :str if head.size == 2 # HACK ?
427
668
  else
428
669
  head.push(tail)
429
670
  end
@@ -435,109 +676,238 @@ module RubyParserStuff
435
676
  return head
436
677
  end
437
678
 
438
- def logop(type, left, right) # TODO: rename logical_op
679
+ def local_pop in_def
680
+ lexer.cond.pop # group = local_pop
681
+ lexer.cmdarg.pop
682
+ self.env.unextend
683
+ self.in_def = in_def
684
+ end
685
+
686
+ def logical_op type, left, right
439
687
  left = value_expr left
440
688
 
441
- if left and left[0] == type and not left.paren then
442
- node, second = left, nil
689
+ if left and left.sexp_type == type and not left.paren then
690
+ node, rhs = left, nil
443
691
 
444
- while (second = node[2]) && second[0] == type and not second.paren do
445
- node = second
692
+ loop do
693
+ _, _lhs, rhs = node
694
+ break unless rhs && rhs.sexp_type == type and not rhs.paren
695
+ node = rhs
446
696
  end
447
697
 
448
- node[2] = s(type, second, right)
698
+ node.pop
699
+ node << s(type, rhs, right).line(rhs.line)
449
700
 
450
701
  return left
451
702
  end
452
703
 
453
- return s(type, left, right)
704
+ result = s(type, left, right)
705
+ result.line left.line if left.line
706
+ result
454
707
  end
455
708
 
456
709
  def new_aref val
457
710
  val[2] ||= s(:arglist)
458
- val[2][0] = :arglist if val[2][0] == :array # REFACTOR
459
- if val[0].node_type == :self then
460
- result = new_call nil, :"[]", val[2]
711
+ val[2].sexp_type = :arglist if val[2].sexp_type == :array # REFACTOR
712
+ new_call val[0], :"[]", val[2]
713
+ end
714
+
715
+ def new_arg val
716
+ arg, = val
717
+
718
+ case arg
719
+ when Symbol then
720
+ result = s(:args, arg).line line
721
+ when Sexp then
722
+ result = arg
723
+ when Array then
724
+ (arg, line), = val
725
+ result = s(:args, arg).line line
726
+ else
727
+ debug20 32
728
+ raise "Unknown f_arg type: #{val.inspect}"
729
+ end
730
+
731
+ result
732
+ end
733
+
734
+ def new_array_pattern const, pre_arg, arypat, loc
735
+ result = s(:array_pat, const).line loc
736
+ result << pre_arg if pre_arg
737
+
738
+ if arypat && arypat.sexp_type == :array_TAIL then
739
+ result.concat arypat.sexp_body
740
+ else
741
+ raise "NO?: %p" % [arypat]
742
+ end
743
+
744
+ result
745
+ end
746
+
747
+ def array_pat_concat lhs, rhs
748
+ case lhs.sexp_type
749
+ when :PATTERN then
750
+ lhs.sexp_type = :array_pat
751
+ end
752
+
753
+ if rhs then
754
+ case rhs.sexp_type
755
+ when :array_pat, :array_TAIL, :PATTERN then
756
+ lhs.concat rhs.sexp_body
757
+ else
758
+ lhs << rhs
759
+ end
760
+ end
761
+ end
762
+
763
+ def new_array_pattern_tail pre_args, has_rest, rest_arg, post_args
764
+ # TODO: remove has_rest once all tests pass !!!
765
+ rest_arg = if has_rest then
766
+ :"*#{rest_arg}"
767
+ else
768
+ nil
769
+ end
770
+
771
+ result = s(:array_TAIL).line 666
772
+
773
+ array_pat_concat result, pre_args
774
+
775
+ result << rest_arg if rest_arg
776
+
777
+ array_pat_concat result, post_args
778
+
779
+ result
780
+ end
781
+
782
+ def new_assign lhs, rhs
783
+ return nil unless lhs
784
+
785
+ rhs = value_expr rhs
786
+
787
+ case lhs.sexp_type
788
+ when :lasgn, :iasgn, :cdecl, :cvdecl, :gasgn, :cvasgn, :attrasgn, :safe_attrasgn then
789
+ lhs << rhs
790
+ when :const then
791
+ lhs.sexp_type = :cdecl
792
+ lhs << rhs
461
793
  else
462
- result = new_call val[0], :"[]", val[2]
794
+ raise "unknown lhs #{lhs.inspect} w/ #{rhs.inspect}"
463
795
  end
796
+
797
+ lhs
798
+ end
799
+
800
+ def new_attrasgn recv, meth, call_op = :"."
801
+ call_op = call_op.first if Array === call_op
802
+
803
+ meth = :"#{meth}="
804
+
805
+ result = case call_op.to_sym
806
+ when :"."
807
+ s(:attrasgn, recv, meth)
808
+ when :"&."
809
+ s(:safe_attrasgn, recv, meth)
810
+ else
811
+ raise "unknown call operator: `#{type.inspect}`"
812
+ end
813
+
814
+ result.line = recv.line
815
+ result
816
+ end
817
+
818
+ def new_begin val
819
+ _, lineno, body, _ = val
820
+
821
+ result = body ? s(:begin, body) : s(:nil)
822
+ result.line lineno
823
+
464
824
  result
465
825
  end
466
826
 
467
827
  def new_body val
468
- result = val[0]
828
+ body, resbody, elsebody, ensurebody = val
829
+
830
+ result = body
469
831
 
470
- if val[1] then
832
+ if resbody then
471
833
  result = s(:rescue)
472
- result << val[0] if val[0]
834
+ result << body if body
473
835
 
474
- resbody = val[1]
836
+ res = resbody
475
837
 
476
- while resbody do
477
- result << resbody
478
- resbody = resbody.resbody(true)
838
+ while res do
839
+ result << res
840
+ res = res.resbody(true)
479
841
  end
480
842
 
481
- result << val[2] if val[2]
843
+ result << elsebody if elsebody
844
+
845
+ result.line = (body || resbody).line
846
+ end
482
847
 
483
- result.line = (val[0] || val[1]).line
484
- elsif not val[2].nil? then
848
+ if elsebody and not resbody then
485
849
  warning("else without rescue is useless")
486
- result = block_append(result, val[2])
850
+ result = s(:begin, result).line result.line if result
851
+ result = block_append(result, elsebody)
487
852
  end
488
853
 
489
- result = s(:ensure, result, val[3]).compact if val[3]
490
- return result
491
- end
854
+ if ensurebody
855
+ lineno = (result || ensurebody).line
856
+ result = s(:ensure, result, ensurebody).compact.line lineno
857
+ end
492
858
 
493
- def argl x
494
- x = s(:arglist, x) if x and x[0] != :arglist
495
- x
859
+ result
496
860
  end
497
861
 
498
- def backref_assign_error ref
499
- # TODO: need a test for this... obviously
500
- case ref.first
501
- when :nth_ref then
502
- raise "write a test 2"
503
- raise SyntaxError, "Can't set variable %p" % ref.last
504
- when :back_ref then
505
- raise "write a test 3"
506
- raise SyntaxError, "Can't set back reference %p" % ref.last
507
- else
508
- raise "Unknown backref type: #{ref.inspect}"
509
- end
862
+ def new_brace_body args, body, lineno
863
+ new_iter(nil, args, body).line lineno
510
864
  end
511
865
 
512
- def new_call recv, meth, args = nil
513
- result = s(:call, recv, meth)
514
- result.line = recv.line if recv
866
+ def new_call recv, meth, args = nil, call_op = :"."
867
+ call_op = call_op.first if Array === call_op
868
+
869
+ result = case call_op.to_sym
870
+ when :"."
871
+ s(:call, recv, meth)
872
+ when :"&."
873
+ s(:safe_call, recv, meth)
874
+ else
875
+ raise "unknown call operator: `#{type.inspect}`"
876
+ end
515
877
 
516
878
  # TODO: need a test with f(&b) to produce block_pass
517
879
  # TODO: need a test with f(&b) { } to produce warning
518
880
 
519
- args ||= s(:arglist)
520
- args[0] = :arglist if args.first == :array
521
- args = s(:arglist, args) unless args.first == :arglist
881
+ if args
882
+ if ARG_TYPES[args.sexp_type] then
883
+ result.concat args.sexp_body
884
+ else
885
+ result << args
886
+ end
887
+ end
522
888
 
523
- # HACK quick hack to make this work quickly... easy to clean up above
524
- result.concat args[1..-1]
889
+ # line = result.grep(Sexp).map(&:line).compact.min
890
+ result.line = recv.line if recv
891
+ result.line ||= lexer.lineno
525
892
 
526
893
  result
527
894
  end
528
895
 
529
- def new_case expr, body
896
+ def new_in pat, body, cases, line
897
+ s(:in, pat, body, cases).line line
898
+ end
899
+
900
+ def new_case expr, body, line
530
901
  result = s(:case, expr)
531
- line = (expr || body).line
532
902
 
533
- while body and body.node_type == :when
903
+ while body and [:when, :in].include? body.sexp_type
534
904
  result << body
535
905
  body = body.delete_at 3
536
906
  end
537
907
 
538
908
  result[2..-1].each do |node|
539
909
  block = node.block(:delete)
540
- node.concat block[1..-1] if block
910
+ node.concat block.sexp_body if block
541
911
  end
542
912
 
543
913
  # else
@@ -549,13 +919,16 @@ module RubyParserStuff
549
919
  end
550
920
 
551
921
  def new_class val
922
+ # TODO: get line from class keyword
552
923
  line, path, superclass, body = val[1], val[2], val[3], val[5]
553
924
 
925
+ path = path.first if path.instance_of? Array
926
+
554
927
  result = s(:class, path, superclass)
555
928
 
556
929
  if body then
557
- if body.first == :block then
558
- result.push(*body[1..-1])
930
+ if body.sexp_type == :block then
931
+ result.push(*body.sexp_body)
559
932
  else
560
933
  result.push body
561
934
  end
@@ -572,53 +945,188 @@ module RubyParserStuff
572
945
  result
573
946
  end
574
947
 
948
+ def new_const_op_asgn val
949
+ lhs, (asgn_op, _), rhs = val
950
+ asgn_op = asgn_op.to_sym
951
+
952
+ result = case asgn_op
953
+ when :"||" then
954
+ s(:op_asgn_or, lhs, rhs)
955
+ when :"&&" then
956
+ s(:op_asgn_and, lhs, rhs)
957
+ else
958
+ s(:op_asgn, lhs, asgn_op, rhs)
959
+ end
960
+
961
+ result.line = lhs.line
962
+ result
963
+ end
964
+
575
965
  def new_defn val
576
- (_, line), name, args, body = val[0], val[1], val[3], val[4]
577
- body ||= s(:nil)
966
+ _, (name, line), in_def, args, body, _ = val
578
967
 
579
- result = s(:defn, name.to_sym, args)
968
+ body ||= s(:nil).line line
580
969
 
581
- if body then
582
- if body.first == :block then
583
- result.push(*body[1..-1])
970
+ args.line line
971
+
972
+ result = s(:defn, name.to_sym, args).line line
973
+
974
+ if body.sexp_type == :block then
975
+ result.push(*body.sexp_body)
976
+ else
977
+ result.push body
978
+ end
979
+
980
+ result.comments = self.comments.pop
981
+
982
+ [result, in_def]
983
+ end
984
+
985
+ def new_endless_defn val
986
+ (name, line, in_def), args, _, body, _, resbody = val
987
+
988
+ result =
989
+ if resbody then
990
+ s(:defn, name, args,
991
+ new_rescue(body,
992
+ new_resbody(s(:array).line(line),
993
+ resbody))).line line
584
994
  else
585
- result.push body
995
+ s(:defn, name, args, body).line line
586
996
  end
587
- end
588
997
 
589
- result.line = line
998
+ local_pop in_def
999
+ endless_method_name result
1000
+
590
1001
  result.comments = self.comments.pop
1002
+
591
1003
  result
592
1004
  end
593
1005
 
594
- def new_defs val
595
- recv, name, args, body = val[1], val[4], val[6], val[7]
596
-
597
- result = s(:defs, recv, name.to_sym, args)
1006
+ def new_endless_defs val
1007
+ (recv, (name, line, in_def)), args, _, body, _, resbody = val
598
1008
 
599
- if body then
600
- if body.first == :block then
601
- result.push(*body[1..-1])
1009
+ result =
1010
+ if resbody then
1011
+ s(:defs, recv, name, args,
1012
+ new_rescue(body,
1013
+ new_resbody(s(:array).line(line),
1014
+ resbody))).line line
602
1015
  else
603
- result.push body
1016
+ s(:defs, recv, name, args, body).line(line)
604
1017
  end
605
- end
606
1018
 
607
- result.line = recv.line
1019
+ self.in_single -= 1
1020
+ local_pop in_def
1021
+ endless_method_name result
1022
+
608
1023
  result.comments = self.comments.pop
1024
+
609
1025
  result
610
1026
  end
611
1027
 
1028
+ def new_defs val
1029
+ _, recv, (name, line), in_def, args, body, _ = val
1030
+
1031
+ body ||= s(:nil).line line
1032
+
1033
+ args.line line
1034
+
1035
+ result = s(:defs, recv, name.to_sym, args).line line
1036
+
1037
+ # TODO: remove_begin
1038
+ # TODO: reduce_nodes
1039
+
1040
+ if body.sexp_type == :block then
1041
+ result.push(*body.sexp_body)
1042
+ else
1043
+ result.push body
1044
+ end
1045
+
1046
+ result.comments = self.comments.pop
1047
+
1048
+ [result, in_def]
1049
+ end
1050
+
1051
+ def new_do_body args, body, lineno
1052
+ new_iter(nil, args, body).line(lineno)
1053
+ end
1054
+
1055
+ def new_find_pattern const, pat
1056
+ pat.sexp_type = :find_pat
1057
+ pat.insert 1, const
1058
+ end
1059
+
1060
+ def new_find_pattern_tail lhs, mid, rhs
1061
+ lhs_id, line = lhs
1062
+ rhs_id, _line = rhs
1063
+
1064
+ # TODO: fpinfo->pre_rest_arg = pre_rest_arg ? assignable(p, pre_rest_arg, 0, loc) : NODE_SPECIAL_NO_NAME_REST;
1065
+
1066
+ lhs_id = "*#{lhs_id}".to_sym
1067
+ rhs_id = "*#{rhs_id}".to_sym
1068
+
1069
+ mid.sexp_type = :array_pat # HACK?
1070
+
1071
+ s(:find_pat_TAIL, lhs_id, mid, rhs_id).line line
1072
+ end
1073
+
612
1074
  def new_for expr, var, body
613
1075
  result = s(:for, expr, var).line(var.line)
614
1076
  result << body if body
615
1077
  result
616
1078
  end
617
1079
 
1080
+ def new_hash val
1081
+ _, line, assocs = val
1082
+
1083
+ s(:hash).line(line).concat assocs.sexp_body
1084
+ end
1085
+
1086
+ def new_hash_pattern const, hash_pat, loc
1087
+ _, pat, kw_args, kw_rest_arg = hash_pat
1088
+
1089
+ line = (const||hash_pat).line
1090
+
1091
+ result = s(:hash_pat, const).line line
1092
+ result.concat pat.sexp_body if pat
1093
+ result << kw_args if kw_args
1094
+ result << kw_rest_arg if kw_rest_arg
1095
+ result
1096
+ end
1097
+
1098
+ def new_hash_pattern_tail kw_args, kw_rest_arg, line # TODO: remove line arg
1099
+ # kw_rest_arg = assignable(kw_rest_arg, nil).line line if kw_rest_arg
1100
+
1101
+ result = s(:hash_pat).line line
1102
+ result << kw_args
1103
+
1104
+ if kw_rest_arg then
1105
+ name = kw_rest_arg.value
1106
+ # TODO: I _hate_ this:
1107
+ assignable [name, kw_rest_arg.line] if name != :**
1108
+ result << kw_rest_arg
1109
+ end
1110
+
1111
+ result
1112
+ end
1113
+
1114
+ def push_pktbl
1115
+ end
1116
+
1117
+ def pop_pktbl
1118
+ end
1119
+
1120
+ def push_pvtbl
1121
+ end
1122
+
1123
+ def pop_pvtbl
1124
+ end
1125
+
618
1126
  def new_if c, t, f
619
1127
  l = [c.line, t && t.line, f && f.line].compact.min
620
1128
  c = cond c
621
- c, t, f = c.last, f, t if c[0] == :not and canonicalize_conditions
1129
+ c, t, f = c.last, f, t if c.sexp_type == :not and canonicalize_conditions
622
1130
  s(:if, c, t, f).line(l)
623
1131
  end
624
1132
 
@@ -633,98 +1141,198 @@ module RubyParserStuff
633
1141
  result << args
634
1142
  result << body if body
635
1143
 
636
- args[0] = :args unless args == 0
1144
+ result.line call.line if call
1145
+
1146
+ unless args == 0 then
1147
+ args.line call.line if call
1148
+ args.sexp_type = :args
1149
+ end
637
1150
 
638
1151
  result
639
1152
  end
640
1153
 
641
1154
  def new_masgn lhs, rhs, wrap = false
1155
+ _, ary = lhs
1156
+
1157
+ line = rhs.line
642
1158
  rhs = value_expr(rhs)
643
- rhs = lhs[1] ? s(:to_ary, rhs) : s(:array, rhs) if wrap
1159
+ rhs = ary ? s(:to_ary, rhs) : s(:array, rhs) if wrap
1160
+ rhs.line line if wrap
644
1161
 
645
- lhs.delete_at 1 if lhs[1].nil?
1162
+ lhs.delete_at 1 if ary.nil?
646
1163
  lhs << rhs
647
1164
 
648
1165
  lhs
649
1166
  end
650
1167
 
1168
+ def new_masgn_arg rhs, wrap = false
1169
+ rhs = value_expr(rhs)
1170
+ # HACK: could be array if lhs isn't right
1171
+ rhs = s(:to_ary, rhs).line rhs.line if wrap
1172
+ rhs
1173
+ end
1174
+
1175
+ def new_match lhs, rhs
1176
+ if lhs then
1177
+ case lhs.sexp_type
1178
+ when :dregx, :dregx_once then
1179
+ # TODO: no test coverage
1180
+ return s(:match2, lhs, rhs).line(lhs.line)
1181
+ when :lit then
1182
+ return s(:match2, lhs, rhs).line(lhs.line) if Regexp === lhs.last
1183
+ end
1184
+ end
1185
+
1186
+ if rhs then
1187
+ case rhs.sexp_type
1188
+ when :dregx, :dregx_once then
1189
+ # TODO: no test coverage
1190
+ return s(:match3, rhs, lhs).line(lhs.line)
1191
+ when :lit then
1192
+ return s(:match3, rhs, lhs).line(lhs.line) if Regexp === rhs.last
1193
+ end
1194
+ end
1195
+
1196
+ new_call(lhs, :"=~", argl(rhs)).line lhs.line
1197
+ end
1198
+
651
1199
  def new_module val
1200
+ # TODO: get line from module keyword
652
1201
  line, path, body = val[1], val[2], val[4]
653
1202
 
654
- result = s(:module, path)
1203
+ path = path.first if path.instance_of? Array
1204
+
1205
+ result = s(:module, path).line line
655
1206
 
656
1207
  if body then # REFACTOR?
657
- if body.first == :block then
658
- result.push(*body[1..-1])
1208
+ if body.sexp_type == :block then
1209
+ result.push(*body.sexp_body)
659
1210
  else
660
1211
  result.push body
661
1212
  end
662
1213
  end
663
1214
 
664
- result.line = line
665
1215
  result.comments = self.comments.pop
666
1216
  result
667
1217
  end
668
1218
 
669
1219
  def new_op_asgn val
670
- lhs, asgn_op, arg = val[0], val[1].to_sym, val[2]
671
- name = lhs.value
672
- arg = remove_begin(arg)
673
- result = case asgn_op # REFACTOR
1220
+ lhs, (op, _line), rhs = val
1221
+ op = op.to_sym
1222
+
1223
+ name = gettable(lhs.last).line lhs.line
1224
+ arg = remove_begin rhs
1225
+ result = case op # REFACTOR
674
1226
  when :"||" then
675
1227
  lhs << arg
676
- s(:op_asgn_or, self.gettable(name), lhs)
1228
+ s(:op_asgn_or, name, lhs).line lhs.line
677
1229
  when :"&&" then
678
1230
  lhs << arg
679
- s(:op_asgn_and, self.gettable(name), lhs)
1231
+ s(:op_asgn_and, name, lhs).line lhs.line
680
1232
  else
681
- # TODO: why [2] ?
682
- lhs[2] = new_call(self.gettable(name), asgn_op, argl(arg))
1233
+ lhs << new_call(name, op, argl(arg))
683
1234
  lhs
684
1235
  end
685
- result.line = lhs.line
1236
+
1237
+ result
1238
+ end
1239
+
1240
+ def new_op_asgn1 val
1241
+ lhs, _, args, _, (op, _), rhs = val
1242
+
1243
+ args.sexp_type = :arglist if args
1244
+
1245
+ result = s(:op_asgn1, lhs, args, op.to_sym, rhs)
1246
+ result.line lhs.line
1247
+ result
1248
+ end
1249
+
1250
+ def new_op_asgn2 val
1251
+ recv, (call_op, _), (meth, _), (op, _), arg = val
1252
+ meth = :"#{meth}="
1253
+
1254
+ result = case call_op.to_sym
1255
+ when :"."
1256
+ s(:op_asgn2, recv, meth, op.to_sym, arg)
1257
+ when :"&."
1258
+ s(:safe_op_asgn2, recv, meth, op.to_sym, arg)
1259
+ else
1260
+ raise "unknown call operator: `#{type.inspect}`"
1261
+ end
1262
+
1263
+ result.line = recv.line
686
1264
  result
687
1265
  end
688
1266
 
1267
+ def new_qsym_list
1268
+ s(:array).line lexer.lineno
1269
+ end
1270
+
1271
+ def new_qsym_list_entry val
1272
+ _, (str, line), _ = val
1273
+ s(:lit, str.to_sym).line line
1274
+ end
1275
+
1276
+ def new_qword_list
1277
+ s(:array).line lexer.lineno
1278
+ end
1279
+
1280
+ def new_qword_list_entry val
1281
+ _, (str, line), _ = val
1282
+ str.force_encoding("ASCII-8BIT") unless str.valid_encoding?
1283
+ s(:str, str).line line
1284
+ end
1285
+
689
1286
  def new_regexp val
690
- node = val[1] || s(:str, '')
691
- options = val[2]
1287
+ (_, line), node, (options, _) = val
1288
+
1289
+ node ||= s(:str, "").line line
692
1290
 
693
1291
  o, k = 0, nil
694
1292
  options.split(//).uniq.each do |c| # FIX: this has a better home
695
1293
  v = {
696
- 'x' => Regexp::EXTENDED,
697
- 'i' => Regexp::IGNORECASE,
698
- 'm' => Regexp::MULTILINE,
699
- 'o' => Regexp::ONCE,
700
- 'n' => Regexp::ENC_NONE,
701
- 'e' => Regexp::ENC_EUC,
702
- 's' => Regexp::ENC_SJIS,
703
- 'u' => Regexp::ENC_UTF8,
1294
+ "x" => Regexp::EXTENDED,
1295
+ "i" => Regexp::IGNORECASE,
1296
+ "m" => Regexp::MULTILINE,
1297
+ "o" => Regexp::ONCE,
1298
+ "n" => Regexp::ENC_NONE,
1299
+ "e" => Regexp::ENC_EUC,
1300
+ "s" => Regexp::ENC_SJIS,
1301
+ "u" => Regexp::ENC_UTF8,
704
1302
  }[c]
705
1303
  raise "unknown regexp option: #{c}" unless v
706
1304
  o += v
707
- k = c if c =~ /[esu]/
708
1305
  end
709
1306
 
710
- case node[0]
1307
+ case node.sexp_type
711
1308
  when :str then
712
- node[0] = :lit
1309
+ node.sexp_type = :lit
713
1310
  node[1] = if k then
714
1311
  Regexp.new(node[1], o, k)
715
1312
  else
716
- Regexp.new(node[1], o)
717
- end rescue node[1] # HACK - drops options
1313
+ begin
1314
+ Regexp.new(node[1], o)
1315
+ rescue RegexpError => e
1316
+ warn "WARNING: #{e.message} for #{node[1].inspect} #{options.inspect}"
1317
+ begin
1318
+ warn "WARNING: trying to recover with ENC_UTF8"
1319
+ Regexp.new(node[1], Regexp::ENC_UTF8)
1320
+ rescue RegexpError => e
1321
+ warn "WARNING: trying to recover with ENC_NONE"
1322
+ Regexp.new(node[1], Regexp::ENC_NONE)
1323
+ end
1324
+ end
1325
+ end
718
1326
  when :dstr then
719
1327
  if options =~ /o/ then
720
- node[0] = :dregx_once
1328
+ node.sexp_type = :dregx_once
721
1329
  else
722
- node[0] = :dregx
1330
+ node.sexp_type = :dregx
723
1331
  end
724
1332
  node << o if o and o != 0
725
1333
  else
726
- node = s(:dregx, '', node);
727
- node[0] = :dregx_once if options =~ /o/
1334
+ node = s(:dregx, "", node).line line
1335
+ node.sexp_type = :dregx_once if options =~ /o/
728
1336
  node << o if o and o != 0
729
1337
  end
730
1338
 
@@ -732,12 +1340,17 @@ module RubyParserStuff
732
1340
  end
733
1341
 
734
1342
  def new_resbody cond, body
735
- if body && body.first == :block then
1343
+ if body && body.sexp_type == :block then
736
1344
  body.shift # remove block and splat it in directly
737
1345
  else
738
1346
  body = [body]
739
1347
  end
740
- s(:resbody, cond, *body)
1348
+
1349
+ s(:resbody, cond, *body).line cond.line
1350
+ end
1351
+
1352
+ def new_rescue body, resbody
1353
+ s(:rescue, body, resbody).line body.line
741
1354
  end
742
1355
 
743
1356
  def new_sclass val
@@ -746,8 +1359,8 @@ module RubyParserStuff
746
1359
  result = s(:sclass, recv)
747
1360
 
748
1361
  if body then
749
- if body.first == :block then
750
- result.push(*body[1..-1])
1362
+ if body.sexp_type == :block then
1363
+ result.push(*body.sexp_body)
751
1364
  else
752
1365
  result.push body
753
1366
  end
@@ -759,20 +1372,56 @@ module RubyParserStuff
759
1372
  result
760
1373
  end
761
1374
 
1375
+ def new_string val
1376
+ (str, line), = val
1377
+
1378
+ str.force_encoding("UTF-8")
1379
+ # TODO: remove:
1380
+ str.force_encoding("ASCII-8BIT") unless str.valid_encoding?
1381
+ s(:str, str).line line
1382
+ end
1383
+
762
1384
  def new_super args
763
- if args && args.node_type == :block_pass then
764
- s(:super, args)
1385
+ if args && args.sexp_type == :block_pass then
1386
+ s(:super, args).line args.line
1387
+ else
1388
+ args ||= s(:arglist).line lexer.lineno
1389
+ s(:super, *args.sexp_body).line args.line
1390
+ end
1391
+ end
1392
+
1393
+ def new_symbol val
1394
+ name = val.last
1395
+ s(:lit, name.to_sym).line lexer.lineno
1396
+ end
1397
+
1398
+ def new_symbol_list
1399
+ # TODO: hunt down and try to remove ALL lexer.lineno usage!
1400
+ s(:array).line lexer.lineno
1401
+ end
1402
+
1403
+ def new_symbol_list_entry val
1404
+ _, sym, _ = val
1405
+
1406
+ sym ||= s(:str, "").line lexer.lineno
1407
+
1408
+ case sym.sexp_type
1409
+ when :dstr then
1410
+ sym.sexp_type = :dsym
1411
+ when :str then
1412
+ sym = s(:lit, sym.last.to_sym).line sym.line
765
1413
  else
766
- args ||= s(:arglist)
767
- s(:super, *args[1..-1])
1414
+ sym = s(:dsym, "", sym).line sym.line
768
1415
  end
1416
+
1417
+ sym
769
1418
  end
770
1419
 
771
1420
  def new_undef n, m = nil
772
1421
  if m then
773
- block_append(n, s(:undef, m))
1422
+ block_append(n, s(:undef, m).line(m.line))
774
1423
  else
775
- s(:undef, n)
1424
+ s(:undef, n).line n.line
776
1425
  end
777
1426
  end
778
1427
 
@@ -783,11 +1432,11 @@ module RubyParserStuff
783
1432
  def new_until_or_while type, block, expr, pre
784
1433
  other = type == :until ? :while : :until
785
1434
  line = [block && block.line, expr.line].compact.min
786
- block, pre = block.last, false if block && block[0] == :begin
1435
+ block, pre = block.last, false if block && block.sexp_type == :begin
787
1436
 
788
1437
  expr = cond expr
789
1438
 
790
- result = unless expr.first == :not and canonicalize_conditions then
1439
+ result = unless expr.sexp_type == :not and canonicalize_conditions then
791
1440
  s(type, expr, block, pre)
792
1441
  else
793
1442
  s(other, expr.last, block, pre)
@@ -805,147 +1454,78 @@ module RubyParserStuff
805
1454
  new_until_or_while :while, block, expr, pre
806
1455
  end
807
1456
 
808
- def new_xstring str
809
- if str then
810
- case str[0]
1457
+ def new_word_list
1458
+ s(:array).line lexer.lineno
1459
+ end
1460
+
1461
+ def new_word_list_entry val
1462
+ _, word, _ = val
1463
+ word.sexp_type == :evstr ? s(:dstr, "", word).line(word.line) : word
1464
+ end
1465
+
1466
+ def new_xstring val
1467
+ _, node = val
1468
+
1469
+ node ||= s(:str, "").line lexer.lineno
1470
+
1471
+ if node then
1472
+ case node.sexp_type
811
1473
  when :str
812
- str[0] = :xstr
1474
+ node.sexp_type = :xstr
813
1475
  when :dstr
814
- str[0] = :dxstr
1476
+ node.sexp_type = :dxstr
815
1477
  else
816
- str = s(:dxstr, '', str)
1478
+ node = s(:dxstr, "", node).line node.line
817
1479
  end
818
- str
819
- else
820
- s(:xstr, '')
821
1480
  end
1481
+
1482
+ node
822
1483
  end
823
1484
 
824
1485
  def new_yield args = nil
825
1486
  # TODO: raise args.inspect unless [:arglist].include? args.first # HACK
826
- raise "write a test 4" if args && args.node_type == :block_pass
1487
+ raise "write a test 4" if args && args.sexp_type == :block_pass
827
1488
  raise SyntaxError, "Block argument should not be given." if
828
- args && args.node_type == :block_pass
1489
+ args && args.sexp_type == :block_pass
829
1490
 
830
- args ||= s(:arglist)
1491
+ args ||= s(:arglist).line lexer.lineno
831
1492
 
832
- # TODO: I can prolly clean this up
833
- args[0] = :arglist if args.first == :array
834
- args = s(:arglist, args) unless args.first == :arglist
835
-
836
- return s(:yield, *args[1..-1])
837
- end
1493
+ args.sexp_type = :arglist if [:call_args, :array].include? args.sexp_type
1494
+ args = s(:arglist, args).line args.line unless args.sexp_type == :arglist
838
1495
 
839
- def next_token
840
- if self.lexer.advance then
841
- return self.lexer.token, self.lexer.yacc_value
842
- else
843
- return [false, '$end']
844
- end
1496
+ s(:yield, *args.sexp_body).line args.line
845
1497
  end
846
1498
 
847
- def node_assign(lhs, rhs) # TODO: rename new_assign
848
- return nil unless lhs
849
-
850
- rhs = value_expr rhs
851
-
852
- case lhs[0]
853
- when :gasgn, :iasgn, :lasgn, :masgn, :cdecl, :cvdecl, :cvasgn then
854
- lhs << rhs
855
- when :attrasgn, :call then
856
- args = lhs.pop unless Symbol === lhs.last
857
- lhs.concat arg_add(args, rhs)[1..-1]
858
- when :const then
859
- lhs[0] = :cdecl
860
- lhs << rhs
1499
+ def prev_value_to_lineno v
1500
+ s, n = v
1501
+ if String === s then
1502
+ n
861
1503
  else
862
- raise "unknown lhs #{lhs.inspect}"
1504
+ lexer.lineno
863
1505
  end
864
-
865
- lhs
866
1506
  end
867
1507
 
868
- ##
869
- # Returns a UTF-8 encoded string after processing BOMs and magic
870
- # encoding comments.
871
- #
872
- # Holy crap... ok. Here goes:
873
- #
874
- # Ruby's file handling and encoding support is insane. We need to be
875
- # able to lex a file. The lexer file is explicitly UTF-8 to make
876
- # things cleaner. This allows us to deal with extended chars in
877
- # class and method names. In order to do this, we need to encode all
878
- # input source files as UTF-8. First, we look for a UTF-8 BOM by
879
- # looking at the first line while forcing its encoding to
880
- # ASCII-8BIT. If we find a BOM, we strip it and set the expected
881
- # encoding to UTF-8. Then, we search for a magic encoding comment.
882
- # If found, it overrides the BOM. Finally, we force the encoding of
883
- # the input string to whatever was found, and then encode that to
884
- # UTF-8 for compatibility with the lexer.
885
-
886
- def handle_encoding str
887
- str = str.dup
888
- ruby19 = str.respond_to? :encoding
889
- encoding = nil
890
-
891
- header = str.lines.first(2)
892
- header.map! { |s| s.force_encoding "ASCII-8BIT" } if ruby19
893
-
894
- first = header.first || ""
895
- encoding, str = "utf-8", str[3..-1] if first =~ /\A\xEF\xBB\xBF/
896
-
897
- encoding = $1.strip if header.find { |s|
898
- s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
899
- s[/^#.*(?:en)?coding(?:\s*[:=])\s*([\w-]+)/, 1]
900
- }
1508
+ def next_token
1509
+ token = self.lexer.next_token
901
1510
 
902
- if encoding then
903
- if ruby19 then
904
- encoding.sub!(/utf-8-.+$/, 'utf-8') # HACK for stupid emacs formats
905
- hack_encoding str, encoding
906
- else
907
- warn "Skipping magic encoding comment"
908
- end
1511
+ if token and token.first != RubyLexer::EOF then
1512
+ self.last_token_type = token
1513
+ return token
1514
+ elsif !token
1515
+ return self.lexer.next_token
909
1516
  else
910
- # nothing specified... ugh. try to encode as utf-8
911
- hack_encoding str if ruby19
1517
+ return [false, false]
912
1518
  end
913
-
914
- str
915
1519
  end
916
1520
 
917
- def hack_encoding str, extra = nil
918
- # this is in sorted order of occurrence according to
919
- # charlock_holmes against 500k files
920
- encodings = [
921
- extra,
922
- Encoding::ISO_8859_1,
923
- Encoding::UTF_8,
924
- Encoding::ISO_8859_2,
925
- Encoding::ISO_8859_9,
926
- Encoding::SHIFT_JIS,
927
- Encoding::WINDOWS_1252,
928
- Encoding::EUC_JP,
929
- ].compact
930
-
931
- # terrible, horrible, no good, very bad, last ditch effort.
932
- encodings.each do |enc|
933
- begin
934
- str.force_encoding enc
935
- if str.valid_encoding? then
936
- str.encode! Encoding::UTF_8
937
- break
938
- end
939
- rescue Encoding::InvalidByteSequenceError
940
- # do nothing
941
- rescue Encoding::UndefinedConversionError
942
- # do nothing
943
- end
944
- end
945
-
946
- # no amount of pain is enough for you.
947
- raise "Bad encoding. Need a magic encoding comment." unless
948
- str.encoding.name == "UTF-8"
1521
+ def on_error(et, ev, values)
1522
+ ev = ev.first if ev.instance_of?(Array) && ev.size == 2 && ev.last.is_a?(Integer)
1523
+ super
1524
+ rescue Racc::ParseError => e
1525
+ # I don't like how the exception obscures the error message
1526
+ e.message.replace "%s:%p :: %s" % [self.file, lexer.lineno, e.message.strip]
1527
+ warn e.message if $DEBUG
1528
+ raise
949
1529
  end
950
1530
 
951
1531
  ##
@@ -953,56 +1533,60 @@ module RubyParserStuff
953
1533
  # Timeout::Error if it runs for more than +time+ seconds.
954
1534
 
955
1535
  def process(str, file = "(string)", time = 10)
1536
+ str.freeze
1537
+
956
1538
  Timeout.timeout time do
957
1539
  raise "bad val: #{str.inspect}" unless String === str
958
1540
 
959
- str = handle_encoding str
1541
+ self.lexer.string = handle_encoding str
960
1542
 
961
1543
  self.file = file.dup
962
- self.lexer.src = str
963
1544
 
964
- @yydebug = ENV.has_key? 'DEBUG'
1545
+ @yydebug = ENV.has_key? "DEBUG"
965
1546
 
966
1547
  do_parse
967
1548
  end
968
1549
  end
969
1550
 
970
- alias :parse :process
1551
+ alias parse process
971
1552
 
972
1553
  def remove_begin node
973
- oldnode = node
974
- if node and :begin == node[0] and node.size == 2 then
975
- node = node[-1]
976
- node.line = oldnode.line
977
- end
1554
+ line = node.line
1555
+
1556
+ node = node.last while node and node.sexp_type == :begin and node.size == 2
1557
+
1558
+ node = s(:nil) if node == s(:begin)
1559
+
1560
+ node.line ||= line
1561
+
978
1562
  node
979
1563
  end
980
1564
 
1565
+ alias value_expr remove_begin # TODO: for now..? could check the tree, but meh?
1566
+
981
1567
  def reset
982
1568
  lexer.reset
983
1569
  self.in_def = false
984
1570
  self.in_single = 0
985
1571
  self.env.reset
986
1572
  self.comments.clear
987
- end
988
-
989
- def block_dup_check call_or_args, block
990
- syntax_error "Both block arg and actual block given." if
991
- block and call_or_args.block_pass?
1573
+ self.last_token_type = nil
992
1574
  end
993
1575
 
994
1576
  def ret_args node
995
1577
  if node then
996
- raise "write a test 5" if node[0] == :block_pass
1578
+ raise "write a test 5" if node.sexp_type == :block_pass
997
1579
 
998
1580
  raise SyntaxError, "block argument should not be given" if
999
- node[0] == :block_pass
1581
+ node.sexp_type == :block_pass
1582
+
1583
+ node.sexp_type = :array if node.sexp_type == :call_args
1584
+ node = node.last if node.sexp_type == :array && node.size == 2
1000
1585
 
1001
- node = node.last if node[0] == :array && node.size == 2
1002
1586
  # HACK matz wraps ONE of the FOUR splats in a newline to
1003
1587
  # distinguish. I use paren for now. ugh
1004
- node = s(:svalue, node) if node[0] == :splat and not node.paren
1005
- node[0] = :svalue if node[0] == :arglist && node[1][0] == :splat
1588
+ node = s(:svalue, node).line node.line if node.sexp_type == :splat and not node.paren
1589
+ node.sexp_type = :svalue if node.sexp_type == :arglist && node[1].sexp_type == :splat
1006
1590
  end
1007
1591
 
1008
1592
  node
@@ -1010,23 +1594,35 @@ module RubyParserStuff
1010
1594
 
1011
1595
  def s(*args)
1012
1596
  result = Sexp.new(*args)
1013
- result.line ||= lexer.lineno if lexer.src # otherwise...
1597
+ # result.line ||= lexer.lineno if lexer.ss unless ENV["CHECK_LINE_NUMS"] # otherwise...
1014
1598
  result.file = self.file
1015
1599
  result
1016
1600
  end
1017
1601
 
1018
- def value_expr oldnode # HACK
1019
- node = remove_begin oldnode
1020
- node.line = oldnode.line if oldnode
1021
- node[2] = value_expr(node[2]) if node and node[0] == :if
1022
- node
1602
+ def debug n
1603
+ if ENV["PRY"] then
1604
+ require "pry"; binding.pry
1605
+ end
1606
+
1607
+ raise RubyParser::SyntaxError, "debug #{n}"
1608
+ end
1609
+
1610
+ def syntax_error msg
1611
+ raise RubyParser::SyntaxError, msg
1023
1612
  end
1024
1613
 
1614
+ alias yyerror syntax_error
1615
+
1025
1616
  def void_stmts node
1026
1617
  return nil unless node
1027
- return node unless node[0] == :block
1618
+ return node unless node.sexp_type == :block
1619
+
1620
+ if node.respond_to? :sexp_body= then
1621
+ node.sexp_body = node.sexp_body.map { |n| remove_begin n }
1622
+ else
1623
+ node[1..-1] = node[1..-1].map { |n| remove_begin(n) }
1624
+ end
1028
1625
 
1029
- node[1..-1] = node[1..-1].map { |n| remove_begin(n) }
1030
1626
  node
1031
1627
  end
1032
1628
 
@@ -1034,21 +1630,45 @@ module RubyParserStuff
1034
1630
  # do nothing for now
1035
1631
  end
1036
1632
 
1037
- def yyerror msg
1038
- warn msg
1039
- super()
1633
+ def whitespace_width line, remove_width = nil
1634
+ col = 0
1635
+ idx = 0
1636
+
1637
+ line.chars.each do |c|
1638
+ break if remove_width && col >= remove_width
1639
+ case c
1640
+ when " " then
1641
+ col += 1
1642
+ when "\t" then
1643
+ n = TAB_WIDTH * (col / TAB_WIDTH + 1)
1644
+ break if remove_width && n > remove_width
1645
+ col = n
1646
+ else
1647
+ break
1648
+ end
1649
+ idx += 1
1650
+ end
1651
+
1652
+ if remove_width then
1653
+ line[idx..-1]
1654
+ elsif line[idx].nil?
1655
+ nil
1656
+ else
1657
+ col
1658
+ end
1040
1659
  end
1041
1660
 
1042
- def on_error(et, ev, values)
1043
- super
1044
- rescue Racc::ParseError => e
1045
- # I don't like how the exception obscures the error message
1046
- msg = "# ERROR: %s:%p :: %s" % [self.file, lexer.lineno, e.message.strip]
1047
- warn msg
1048
- raise
1661
+ alias remove_whitespace_width whitespace_width
1662
+
1663
+ def wrap type, node
1664
+ value, line = node
1665
+ value = value.to_sym if value.respond_to? :to_sym
1666
+ s(type, value).line line
1049
1667
  end
1050
1668
 
1051
1669
  class Keyword
1670
+ include RubyLexer::State::Values
1671
+
1052
1672
  class KWtable
1053
1673
  attr_accessor :name, :state, :id0, :id1
1054
1674
  def initialize(name, id=[], state=nil)
@@ -1061,80 +1681,75 @@ module RubyParserStuff
1061
1681
  ##
1062
1682
  # :stopdoc:
1063
1683
  #
1064
- # :expr_beg = ignore newline, +/- is a sign.
1065
- # :expr_end = newline significant, +/- is a operator.
1066
- # :expr_arg = newline significant, +/- is a operator.
1067
- # :expr_cmdarg = newline significant, +/- is a operator.
1068
- # :expr_endarg = newline significant, +/- is a operator.
1069
- # :expr_mid = newline significant, +/- is a operator.
1070
- # :expr_fname = ignore newline, no reserved words.
1071
- # :expr_dot = right after . or ::, no reserved words.
1072
- # :expr_class = immediate after class, no here document.
1684
+ # :expr_beg = ignore newline, +/- is a sign.
1685
+ # :expr_end = newline significant, +/- is an operator.
1686
+ # :expr_endarg = ditto, and unbound braces.
1687
+ # :expr_endfn = ditto, and unbound braces.
1688
+ # :expr_arg = newline significant, +/- is an operator.
1689
+ # :expr_cmdarg = ditto
1690
+ # :expr_mid = ditto
1691
+ # :expr_fname = ignore newline, no reserved words.
1692
+ # :expr_dot = right after . or ::, no reserved words.
1693
+ # :expr_class = immediate after class, no here document.
1694
+ # :expr_label = flag bit, label is allowed.
1695
+ # :expr_labeled = flag bit, just after a label.
1696
+ # :expr_fitem = symbol literal as FNAME.
1697
+ # :expr_value = :expr_beg -- work to remove. Need multi-state support.
1698
+
1699
+ expr_woot = EXPR_FNAME|EXPR_FITEM
1073
1700
 
1074
1701
  wordlist = [
1075
- ["end", [:kEND, :kEND ], :expr_end ],
1076
- ["else", [:kELSE, :kELSE ], :expr_beg ],
1077
- ["case", [:kCASE, :kCASE ], :expr_beg ],
1078
- ["ensure", [:kENSURE, :kENSURE ], :expr_beg ],
1079
- ["module", [:kMODULE, :kMODULE ], :expr_beg ],
1080
- ["elsif", [:kELSIF, :kELSIF ], :expr_beg ],
1081
- ["def", [:kDEF, :kDEF ], :expr_fname ],
1082
- ["rescue", [:kRESCUE, :kRESCUE_MOD ], :expr_mid ],
1083
- ["not", [:kNOT, :kNOT ], :expr_beg ],
1084
- ["then", [:kTHEN, :kTHEN ], :expr_beg ],
1085
- ["yield", [:kYIELD, :kYIELD ], :expr_arg ],
1086
- ["for", [:kFOR, :kFOR ], :expr_beg ],
1087
- ["self", [:kSELF, :kSELF ], :expr_end ],
1088
- ["false", [:kFALSE, :kFALSE ], :expr_end ],
1089
- ["retry", [:kRETRY, :kRETRY ], :expr_end ],
1090
- ["return", [:kRETURN, :kRETURN ], :expr_mid ],
1091
- ["true", [:kTRUE, :kTRUE ], :expr_end ],
1092
- ["if", [:kIF, :kIF_MOD ], :expr_beg ],
1093
- ["defined?", [:kDEFINED, :kDEFINED ], :expr_arg ],
1094
- ["super", [:kSUPER, :kSUPER ], :expr_arg ],
1095
- ["undef", [:kUNDEF, :kUNDEF ], :expr_fname ],
1096
- ["break", [:kBREAK, :kBREAK ], :expr_mid ],
1097
- ["in", [:kIN, :kIN ], :expr_beg ],
1098
- ["do", [:kDO, :kDO ], :expr_beg ],
1099
- ["nil", [:kNIL, :kNIL ], :expr_end ],
1100
- ["until", [:kUNTIL, :kUNTIL_MOD ], :expr_beg ],
1101
- ["unless", [:kUNLESS, :kUNLESS_MOD ], :expr_beg ],
1102
- ["or", [:kOR, :kOR ], :expr_beg ],
1103
- ["next", [:kNEXT, :kNEXT ], :expr_mid ],
1104
- ["when", [:kWHEN, :kWHEN ], :expr_beg ],
1105
- ["redo", [:kREDO, :kREDO ], :expr_end ],
1106
- ["and", [:kAND, :kAND ], :expr_beg ],
1107
- ["begin", [:kBEGIN, :kBEGIN ], :expr_beg ],
1108
- ["__LINE__", [:k__LINE__, :k__LINE__ ], :expr_end ],
1109
- ["class", [:kCLASS, :kCLASS ], :expr_class ],
1110
- ["__FILE__", [:k__FILE__, :k__FILE__ ], :expr_end ],
1111
- ["END", [:klEND, :klEND ], :expr_end ],
1112
- ["BEGIN", [:klBEGIN, :klBEGIN ], :expr_end ],
1113
- ["while", [:kWHILE, :kWHILE_MOD ], :expr_beg ],
1114
- ["alias", [:kALIAS, :kALIAS ], :expr_fname ],
1115
- ["__ENCODING__", [:k__ENCODING__, :k__ENCODING__], :expr_end],
1116
- ].map { |args| KWtable.new(*args) }
1702
+ ["alias", [:kALIAS, :kALIAS ], expr_woot ],
1703
+ ["and", [:kAND, :kAND ], EXPR_BEG ],
1704
+ ["begin", [:kBEGIN, :kBEGIN ], EXPR_BEG ],
1705
+ ["break", [:kBREAK, :kBREAK ], EXPR_MID ],
1706
+ ["case", [:kCASE, :kCASE ], EXPR_BEG ],
1707
+ ["class", [:kCLASS, :kCLASS ], EXPR_CLASS ],
1708
+ ["def", [:kDEF, :kDEF ], EXPR_FNAME ],
1709
+ ["defined?", [:kDEFINED, :kDEFINED ], EXPR_ARG ],
1710
+ ["do", [:kDO, :kDO ], EXPR_BEG ],
1711
+ ["else", [:kELSE, :kELSE ], EXPR_BEG ],
1712
+ ["elsif", [:kELSIF, :kELSIF ], EXPR_BEG ],
1713
+ ["end", [:kEND, :kEND ], EXPR_END ],
1714
+ ["ensure", [:kENSURE, :kENSURE ], EXPR_BEG ],
1715
+ ["false", [:kFALSE, :kFALSE ], EXPR_END ],
1716
+ ["for", [:kFOR, :kFOR ], EXPR_BEG ],
1717
+ ["if", [:kIF, :kIF_MOD ], EXPR_BEG ],
1718
+ ["in", [:kIN, :kIN ], EXPR_BEG ],
1719
+ ["module", [:kMODULE, :kMODULE ], EXPR_BEG ],
1720
+ ["next", [:kNEXT, :kNEXT ], EXPR_MID ],
1721
+ ["nil", [:kNIL, :kNIL ], EXPR_END ],
1722
+ ["not", [:kNOT, :kNOT ], EXPR_ARG ],
1723
+ ["or", [:kOR, :kOR ], EXPR_BEG ],
1724
+ ["redo", [:kREDO, :kREDO ], EXPR_END ],
1725
+ ["rescue", [:kRESCUE, :kRESCUE_MOD ], EXPR_MID ],
1726
+ ["retry", [:kRETRY, :kRETRY ], EXPR_END ],
1727
+ ["return", [:kRETURN, :kRETURN ], EXPR_MID ],
1728
+ ["self", [:kSELF, :kSELF ], EXPR_END ],
1729
+ ["super", [:kSUPER, :kSUPER ], EXPR_ARG ],
1730
+ ["then", [:kTHEN, :kTHEN ], EXPR_BEG ],
1731
+ ["true", [:kTRUE, :kTRUE ], EXPR_END ],
1732
+ ["undef", [:kUNDEF, :kUNDEF ], expr_woot ],
1733
+ ["unless", [:kUNLESS, :kUNLESS_MOD ], EXPR_BEG ],
1734
+ ["until", [:kUNTIL, :kUNTIL_MOD ], EXPR_BEG ],
1735
+ ["when", [:kWHEN, :kWHEN ], EXPR_BEG ],
1736
+ ["while", [:kWHILE, :kWHILE_MOD ], EXPR_BEG ],
1737
+ ["yield", [:kYIELD, :kYIELD ], EXPR_ARG ],
1738
+ ["BEGIN", [:klBEGIN, :klBEGIN ], EXPR_END ],
1739
+ ["END", [:klEND, :klEND ], EXPR_END ],
1740
+ ["__FILE__", [:k__FILE__, :k__FILE__ ], EXPR_END ],
1741
+ ["__LINE__", [:k__LINE__, :k__LINE__ ], EXPR_END ],
1742
+ ["__ENCODING__", [:k__ENCODING__, :k__ENCODING__], EXPR_END],
1743
+ ].map { |args|
1744
+ KWtable.new(*args)
1745
+ }
1117
1746
 
1118
1747
  # :startdoc:
1119
1748
 
1120
- WORDLIST18 = Hash[*wordlist.map { |o| [o.name, o] }.flatten]
1121
- WORDLIST19 = Hash[*wordlist.map { |o| [o.name, o] }.flatten]
1749
+ WORDLIST = Hash[*wordlist.map { |o| [o.name, o] }.flatten]
1122
1750
 
1123
- %w[and case elsif for if in module or unless until when while].each do |k|
1124
- WORDLIST19[k] = WORDLIST19[k].dup
1125
- WORDLIST19[k].state = :expr_value
1126
- end
1127
- %w[not].each do |k|
1128
- WORDLIST19[k] = WORDLIST19[k].dup
1129
- WORDLIST19[k].state = :expr_arg
1130
- end
1131
-
1132
- def self.keyword18 str # REFACTOR
1133
- WORDLIST18[str]
1134
- end
1135
-
1136
- def self.keyword19 str
1137
- WORDLIST19[str]
1751
+ def self.keyword str
1752
+ WORDLIST[str]
1138
1753
  end
1139
1754
  end
1140
1755
 
@@ -1159,55 +1774,28 @@ module RubyParserStuff
1159
1774
  @env.first
1160
1775
  end
1161
1776
 
1162
- def dynamic
1163
- idx = @dyn.index false
1164
- @env[0...idx].reverse.inject { |env, scope| env.merge scope } || {}
1165
- end
1166
-
1167
- def dynamic?
1168
- @dyn[0] != false
1169
- end
1170
-
1171
1777
  def extend dyn = false
1172
1778
  @dyn.unshift dyn
1173
1779
  @env.unshift({})
1174
- @use.unshift({})
1175
1780
  end
1176
1781
 
1177
1782
  def initialize dyn = false
1178
1783
  @dyn = []
1179
1784
  @env = []
1180
- @use = []
1181
1785
  self.reset
1182
1786
  end
1183
1787
 
1184
1788
  def reset
1185
1789
  @dyn.clear
1186
1790
  @env.clear
1187
- @use.clear
1188
1791
  self.extend
1189
1792
  end
1190
1793
 
1191
1794
  def unextend
1192
1795
  @dyn.shift
1193
1796
  @env.shift
1194
- @use.shift
1195
1797
  raise "You went too far unextending env" if @env.empty?
1196
1798
  end
1197
-
1198
- def use id
1199
- @env.each_with_index do |env, i|
1200
- if env[id] then
1201
- @use[i][id] = true
1202
- end
1203
- end
1204
- end
1205
-
1206
- def used? id
1207
- idx = @dyn.index false # REFACTOR
1208
- u = @use[0...idx].reverse.inject { |env, scope| env.merge scope } || {}
1209
- u[id]
1210
- end
1211
1799
  end
1212
1800
 
1213
1801
  class StackState
@@ -1215,10 +1803,10 @@ module RubyParserStuff
1215
1803
  attr_reader :stack
1216
1804
  attr_accessor :debug
1217
1805
 
1218
- def initialize(name)
1806
+ def initialize name, debug=false
1219
1807
  @name = name
1220
1808
  @stack = [false]
1221
- @debug = false
1809
+ @debug = debug
1222
1810
  end
1223
1811
 
1224
1812
  def inspect
@@ -1226,111 +1814,52 @@ module RubyParserStuff
1226
1814
  end
1227
1815
 
1228
1816
  def is_in_state
1229
- p :stack_is_in_state => [name, @stack.last, caller.first] if debug
1817
+ log :is_in_state if debug
1230
1818
  @stack.last
1231
1819
  end
1232
1820
 
1233
1821
  def lexpop
1234
- p :stack_lexpop => caller.first if debug
1235
1822
  raise if @stack.size == 0
1236
1823
  a = @stack.pop
1237
1824
  b = @stack.pop
1238
1825
  @stack.push(a || b)
1826
+ log :lexpop if debug
1827
+ end
1828
+
1829
+ def log action
1830
+ c = caller[1]
1831
+ c = caller[2] if c =~ /expr_result/
1832
+ warn "%s_stack.%s: %p at %s" % [name, action, @stack, c.clean_caller]
1833
+ nil
1239
1834
  end
1240
1835
 
1241
1836
  def pop
1242
1837
  r = @stack.pop
1243
- p :stack_pop => [name, r, @stack, caller.first] if debug
1244
- @stack.push false if @stack.size == 0
1838
+ @stack.push false if @stack.empty?
1839
+ log :pop if debug
1245
1840
  r
1246
1841
  end
1247
1842
 
1248
1843
  def push val
1249
1844
  @stack.push val
1250
- p :stack_push => [name, @stack, caller.first] if debug
1251
- nil
1845
+ log :push if debug
1252
1846
  end
1253
- end
1254
- end
1255
-
1256
- class Ruby19Parser < Racc::Parser
1257
- include RubyParserStuff
1258
- end
1259
-
1260
- class Ruby18Parser < Racc::Parser
1261
- include RubyParserStuff
1262
- end
1263
-
1264
- ##
1265
- # RubyParser is a compound parser that first attempts to parse using
1266
- # the 1.9 syntax parser and falls back to the 1.8 syntax parser on a
1267
- # parse error.
1268
-
1269
- class RubyParser
1270
- class SyntaxError < RuntimeError; end
1271
-
1272
- def initialize
1273
- @p18 = Ruby18Parser.new
1274
- @p19 = Ruby19Parser.new
1275
- end
1276
-
1277
- def process(s, f = "(string)") # parens for emacs *sigh*
1278
- Ruby19Parser.new.process s, f
1279
- rescue Racc::ParseError
1280
- Ruby18Parser.new.process s, f
1281
- end
1282
-
1283
- alias :parse :process
1284
1847
 
1285
- def reset
1286
- @p18.reset
1287
- @p19.reset
1288
- end
1289
- end
1290
-
1291
- ############################################################
1292
- # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
1293
-
1294
- unless "".respond_to?(:grep) then
1295
- class String
1296
- def grep re
1297
- lines.grep re
1848
+ def reset
1849
+ @stack = [false]
1850
+ log :reset if debug
1298
1851
  end
1299
- end
1300
- end
1301
-
1302
- class Sexp
1303
- attr_writer :paren
1304
-
1305
- def paren
1306
- @paren ||= false
1307
- end
1308
-
1309
- def value
1310
- raise "multi item sexp" if size > 2
1311
- last
1312
- end
1313
-
1314
- def to_sym
1315
- raise "no"
1316
- self.value.to_sym
1317
- end
1318
-
1319
- def add x
1320
- concat x
1321
- end
1322
1852
 
1323
- def add_all x
1324
- raise "no: #{self.inspect}.add_all #{x.inspect}" # TODO: need a test to trigger this
1325
- end
1853
+ def restore oldstate
1854
+ @stack.replace oldstate
1855
+ log :restore if debug
1856
+ end
1326
1857
 
1327
- def block_pass?
1328
- any? { |s| Sexp === s && s[0] == :block_pass }
1858
+ def store base = false
1859
+ result = @stack.dup
1860
+ @stack.replace [base]
1861
+ log :store if debug
1862
+ result
1863
+ end
1329
1864
  end
1330
-
1331
- alias :node_type :sexp_type
1332
- alias :values :sexp_body # TODO: retire
1333
1865
  end
1334
-
1335
- # END HACK
1336
- ############################################################