ruby_parser 3.13.1 → 3.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,135 +4,9 @@
4
4
  $DEBUG = true if ENV["DEBUG"]
5
5
 
6
6
  class RubyLexer
7
-
8
7
  # :stopdoc:
9
- HAS_ENC = "".respond_to? :encoding
10
-
11
- IDENT_CHAR = if HAS_ENC then
12
- /[\w\u0080-\u{10ffff}]/u
13
- else
14
- /[\w\x80-\xFF]/n
15
- end
16
-
17
8
  EOF = :eof_haha!
18
9
 
19
- # ruby constants for strings (should this be moved somewhere else?)
20
-
21
- STR_FUNC_BORING = 0x00
22
- STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
23
- STR_FUNC_EXPAND = 0x02
24
- STR_FUNC_REGEXP = 0x04
25
- STR_FUNC_QWORDS = 0x08
26
- STR_FUNC_SYMBOL = 0x10
27
- STR_FUNC_INDENT = 0x20 # <<-HEREDOC
28
- STR_FUNC_ICNTNT = 0x40 # <<~HEREDOC
29
-
30
- STR_SQUOTE = STR_FUNC_BORING
31
- STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
32
- STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
33
- STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
34
- STR_SSYM = STR_FUNC_SYMBOL
35
- STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
36
-
37
- class State
38
- attr_accessor :n
39
-
40
- def initialize o
41
- raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
42
-
43
- self.n = o
44
- end
45
-
46
- def == o
47
- o.class == self.class && o.n == self.n
48
- end
49
-
50
- def =~ v
51
- (self.n & v.n) != 0
52
- end
53
-
54
- def | v
55
- self.class.new(self.n | v.n)
56
- end
57
-
58
- def inspect
59
- return "EXPR_NONE" if n.zero?
60
- NAMES.map { |v,k| k if self =~ v }.compact.join "|"
61
- end
62
-
63
- module Values
64
- EXPR_NONE = State.new 0x0
65
- EXPR_BEG = State.new 0x1
66
- EXPR_END = State.new 0x2
67
- EXPR_ENDARG = State.new 0x4
68
- EXPR_ENDFN = State.new 0x8
69
- EXPR_ARG = State.new 0x10
70
- EXPR_CMDARG = State.new 0x20
71
- EXPR_MID = State.new 0x40
72
- EXPR_FNAME = State.new 0x80
73
- EXPR_DOT = State.new 0x100
74
- EXPR_CLASS = State.new 0x200
75
- EXPR_LABEL = State.new 0x400
76
- EXPR_LABELED = State.new 0x800
77
- EXPR_FITEM = State.new 0x1000
78
-
79
- EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
80
- EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
81
- EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
82
-
83
- # extra fake lex_state names to make things a bit cleaner
84
-
85
- EXPR_LAB = EXPR_ARG|EXPR_LABELED
86
- EXPR_NUM = EXPR_END|EXPR_ENDARG
87
- EXPR_PAR = EXPR_BEG|EXPR_LABEL
88
- EXPR_PAD = EXPR_BEG|EXPR_LABELED
89
- end
90
-
91
- include Values
92
-
93
- NAMES = {
94
- EXPR_NONE => "EXPR_NONE",
95
- EXPR_BEG => "EXPR_BEG",
96
- EXPR_END => "EXPR_END",
97
- EXPR_ENDARG => "EXPR_ENDARG",
98
- EXPR_ENDFN => "EXPR_ENDFN",
99
- EXPR_ARG => "EXPR_ARG",
100
- EXPR_CMDARG => "EXPR_CMDARG",
101
- EXPR_MID => "EXPR_MID",
102
- EXPR_FNAME => "EXPR_FNAME",
103
- EXPR_DOT => "EXPR_DOT",
104
- EXPR_CLASS => "EXPR_CLASS",
105
- EXPR_LABEL => "EXPR_LABEL",
106
- EXPR_LABELED => "EXPR_LABELED",
107
- EXPR_FITEM => "EXPR_FITEM",
108
- }
109
- end
110
-
111
- include State::Values
112
-
113
- if $DEBUG then
114
- def lex_state= o
115
- return if @lex_state == o
116
- raise ArgumentError, "bad state: %p" % [o] unless State === o
117
- if ENV["V"] then
118
- c = caller[0]
119
- c = caller[1] if c =~ /\b(expr_)?result\b/
120
- c = caller[2] if c =~ /\b(expr_)?result\b/
121
- warn "lex_state: %p -> %p from %s" % [lex_state, o, c.clean_caller]
122
- else
123
- warn "lex_state: %p -> %p" % [lex_state, o]
124
- end
125
- @lex_state = o
126
- end
127
- else
128
- def lex_state= o
129
- raise ArgumentError, "bad state: %p" % [o] unless State === o
130
- @lex_state = o
131
- end
132
- end
133
-
134
- attr_reader :lex_state
135
-
136
10
  ESCAPES = {
137
11
  "a" => "\007",
138
12
  "b" => "\010",
@@ -149,6 +23,8 @@ class RubyLexer
149
23
  "c\?" => 127.chr,
150
24
  }
151
25
 
26
+ HAS_ENC = "".respond_to? :encoding
27
+
152
28
  TOKENS = {
153
29
  "!" => :tBANG,
154
30
  "!=" => :tNEQ,
@@ -165,13 +41,26 @@ class RubyLexer
165
41
  "->" => :tLAMBDA,
166
42
  }
167
43
 
168
- TAB_WIDTH = 8
169
-
170
- @@regexp_cache = Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }
44
+ @@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
171
45
  @@regexp_cache[nil] = nil
172
46
 
47
+ if $DEBUG then
48
+ attr_reader :lex_state
49
+
50
+ def lex_state= o
51
+ return if @lex_state == o
52
+ raise ArgumentError, "bad state: %p" % [o] unless State === o
53
+
54
+ warn "lex_state: %p -> %p" % [lex_state, o]
55
+
56
+ @lex_state = o
57
+ end
58
+ end
59
+
173
60
  # :startdoc:
174
61
 
62
+ attr_accessor :lex_state unless $DEBUG
63
+
175
64
  attr_accessor :lineno # we're bypassing oedipus' lineno handling.
176
65
  attr_accessor :brace_nest
177
66
  attr_accessor :cmdarg
@@ -209,7 +98,7 @@ class RubyLexer
209
98
  end
210
99
 
211
100
  def arg_ambiguous
212
- self.warning("Ambiguous first argument. make sure.")
101
+ self.warning "Ambiguous first argument. make sure."
213
102
  end
214
103
 
215
104
  def arg_state
@@ -219,7 +108,12 @@ class RubyLexer
219
108
  def beginning_of_line?
220
109
  ss.bol?
221
110
  end
222
- alias :bol? :beginning_of_line? # to make .rex file more readable
111
+
112
+ alias bol? beginning_of_line? # to make .rex file more readable
113
+
114
+ def check re
115
+ ss.check re
116
+ end
223
117
 
224
118
  def comments # TODO: remove this... maybe comment_string + attr_accessor
225
119
  c = @comments.join
@@ -227,6 +121,12 @@ class RubyLexer
227
121
  c
228
122
  end
229
123
 
124
+ def eat_whitespace
125
+ r = scan(/\s+/)
126
+ self.extra_lineno += r.count("\n") if r
127
+ r
128
+ end
129
+
230
130
  def end_of_stream?
231
131
  ss.eos?
232
132
  end
@@ -245,12 +145,17 @@ class RubyLexer
245
145
  result EXPR_BEG, token, text
246
146
  end
247
147
 
148
+ def fixup_lineno extra = 0
149
+ self.lineno += self.extra_lineno + extra
150
+ self.extra_lineno = 0
151
+ end
152
+
248
153
  def heredoc here # TODO: rewrite / remove
249
154
  _, eos, func, last_line = here
250
155
 
251
- indent = (func & STR_FUNC_INDENT) != 0 ? "[ \t]*" : nil
252
- content_indent = (func & STR_FUNC_ICNTNT) != 0
253
- expand = (func & STR_FUNC_EXPAND) != 0
156
+ indent = func =~ STR_FUNC_INDENT ? "[ \t]*" : nil
157
+ expand = func =~ STR_FUNC_EXPAND
158
+ eol = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n"
254
159
  eos_re = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
255
160
  err_msg = "can't match #{eos_re.inspect} anywhere in "
256
161
 
@@ -259,30 +164,35 @@ class RubyLexer
259
164
  if beginning_of_line? && scan(eos_re) then
260
165
  self.lineno += 1
261
166
  ss.unread_many last_line # TODO: figure out how to remove this
262
- return :tSTRING_END, eos
167
+ return :tSTRING_END, [eos, func] # TODO: calculate squiggle width at lex?
263
168
  end
264
169
 
265
170
  self.string_buffer = []
266
171
 
267
172
  if expand then
268
173
  case
269
- when scan(/#[$@]/) then
270
- ss.pos -= 1 # FIX omg stupid
174
+ when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
175
+ # TODO: !ISASCII
176
+ # ?! see parser_peek_variable_name
177
+ return :tSTRING_DVAR, matched
178
+ when scan(/#(?=\@\@?[a-zA-Z_])/) then
179
+ # TODO: !ISASCII
271
180
  return :tSTRING_DVAR, matched
272
181
  when scan(/#[{]/) then
182
+ self.command_start = true
273
183
  return :tSTRING_DBEG, matched
274
184
  when scan(/#/) then
275
- string_buffer << '#'
185
+ string_buffer << "#"
276
186
  end
277
187
 
278
188
  begin
279
- c = tokadd_string func, "\n", nil
189
+ c = tokadd_string func, eol, nil
280
190
 
281
191
  rb_compile_error err_msg if
282
192
  c == RubyLexer::EOF
283
193
 
284
- if c != "\n" then
285
- return :tSTRING_CONTENT, string_buffer.join.delete("\r")
194
+ if c != eol then
195
+ return :tSTRING_CONTENT, string_buffer.join
286
196
  else
287
197
  string_buffer << scan(/\n/)
288
198
  end
@@ -300,67 +210,24 @@ class RubyLexer
300
210
 
301
211
  string_content = begin
302
212
  s = string_buffer.join
303
- s.delete "\r"
304
- rescue ArgumentError
305
- s.b.delete("\r").force_encoding Encoding::UTF_8
213
+ s.b.force_encoding Encoding::UTF_8
306
214
  end
307
215
 
308
- string_content = heredoc_dedent(string_content) if content_indent && ruby23plus?
309
-
310
216
  return :tSTRING_CONTENT, string_content
311
217
  end
312
218
 
313
- def heredoc_dedent(string_content)
314
- width = string_content.scan(/^[ \t]*(?=\S)/).map do |whitespace|
315
- heredoc_whitespace_indent_size whitespace
316
- end.min || 0
317
-
318
- string_content.split("\n", -1).map do |line|
319
- dedent_string line, width
320
- end.join "\n"
321
- end
322
-
323
- def dedent_string(string, width)
324
- characters_skipped = 0
325
- indentation_skipped = 0
326
-
327
- string.chars.each do |char|
328
- break if indentation_skipped >= width
329
- if char == ' '
330
- characters_skipped += 1
331
- indentation_skipped += 1
332
- elsif char == "\t"
333
- proposed = TAB_WIDTH * (indentation_skipped / TAB_WIDTH + 1)
334
- break if (proposed > width)
335
- characters_skipped += 1
336
- indentation_skipped = proposed
337
- end
338
- end
339
- string[characters_skipped..-1]
340
- end
341
-
342
- def heredoc_whitespace_indent_size(whitespace)
343
- whitespace.chars.inject 0 do |size, char|
344
- if char == "\t"
345
- size + TAB_WIDTH
346
- else
347
- size + 1
348
- end
349
- end
350
- end
351
-
352
219
  def heredoc_identifier # TODO: remove / rewrite
353
220
  term, func = nil, STR_FUNC_BORING
354
221
  self.string_buffer = []
355
222
 
356
- heredoc_indent_mods = '-'
223
+ heredoc_indent_mods = "-"
357
224
  heredoc_indent_mods += '\~' if ruby23plus?
358
225
 
359
226
  case
360
227
  when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
361
228
  term = ss[2]
362
- func |= STR_FUNC_INDENT unless ss[1].empty?
363
- func |= STR_FUNC_ICNTNT if ss[1] == '~'
229
+ func |= STR_FUNC_INDENT unless ss[1].empty? # TODO: this seems wrong
230
+ func |= STR_FUNC_ICNTNT if ss[1] == "~"
364
231
  func |= case term
365
232
  when "\'" then
366
233
  STR_SQUOTE
@@ -377,7 +244,7 @@ class RubyLexer
377
244
  func |= STR_DQUOTE
378
245
  unless ss[1].empty? then
379
246
  func |= STR_FUNC_INDENT
380
- func |= STR_FUNC_ICNTNT if ss[1] == '~'
247
+ func |= STR_FUNC_ICNTNT if ss[1] == "~"
381
248
  end
382
249
  string_buffer << ss[2]
383
250
  else
@@ -393,7 +260,7 @@ class RubyLexer
393
260
 
394
261
  self.lex_strterm = [:heredoc, string_buffer.join, func, line]
395
262
 
396
- if term == '`' then
263
+ if term == "`" then
397
264
  result nil, :tXSTRING_BEG, "`"
398
265
  else
399
266
  result nil, :tSTRING_BEG, "\""
@@ -404,26 +271,26 @@ class RubyLexer
404
271
  lex_state =~ EXPR_FNAME
405
272
  end
406
273
 
407
- def is_after_operator?
408
- lex_state =~ EXPR_FNAME|EXPR_DOT
409
- end
410
-
411
274
  def int_with_base base
412
275
  rb_compile_error "Invalid numeric format" if matched =~ /__/
413
276
 
414
277
  text = matched
415
278
  case
416
- when text.end_with?('ri')
279
+ when text.end_with?("ri")
417
280
  return result(EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base))))
418
- when text.end_with?('r')
281
+ when text.end_with?("r")
419
282
  return result(EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base)))
420
- when text.end_with?('i')
283
+ when text.end_with?("i")
421
284
  return result(EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base)))
422
285
  else
423
286
  return result(EXPR_NUM, :tINTEGER, text.to_i(base))
424
287
  end
425
288
  end
426
289
 
290
+ def is_after_operator?
291
+ lex_state =~ EXPR_FNAME|EXPR_DOT
292
+ end
293
+
427
294
  def is_arg?
428
295
  lex_state =~ EXPR_ARG_ANY
429
296
  end
@@ -436,15 +303,6 @@ class RubyLexer
436
303
  lex_state =~ EXPR_END_ANY
437
304
  end
438
305
 
439
- def lvar_defined? id
440
- # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
441
- self.parser.env[id.to_sym] == :lvar
442
- end
443
-
444
- def ruby22_label?
445
- ruby22plus? and is_label_possible?
446
- end
447
-
448
306
  def is_label_possible?
449
307
  (lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
450
308
  end
@@ -461,6 +319,16 @@ class RubyLexer
461
319
  lpar_beg && lpar_beg == paren_nest
462
320
  end
463
321
 
322
+ def is_local_id id
323
+ # maybe just make this false for now
324
+ self.parser.env[id.to_sym] == :lvar # HACK: this isn't remotely right
325
+ end
326
+
327
+ def lvar_defined? id
328
+ # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
329
+ self.parser.env[id.to_sym] == :lvar
330
+ end
331
+
464
332
  def matched
465
333
  ss.matched
466
334
  end
@@ -469,6 +337,134 @@ class RubyLexer
469
337
  not is_end?
470
338
  end
471
339
 
340
+ def parse_quote # TODO: remove / rewrite
341
+ beg, nnd, short_hand, c = nil, nil, false, nil
342
+
343
+ if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
344
+ rb_compile_error "unknown type of %string" if ss.matched_size == 2
345
+ c, beg, short_hand = matched, getch, false
346
+ else # Short-hand (e.g. %{, %., %!, etc)
347
+ c, beg, short_hand = "Q", getch, true
348
+ end
349
+
350
+ if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
351
+ rb_compile_error "unterminated quoted string meets end of file"
352
+ end
353
+
354
+ # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
355
+ nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
356
+ nnd, beg = beg, "\0" if nnd.nil?
357
+
358
+ token_type, text = nil, "%#{c}#{beg}"
359
+ token_type, string_type = case c
360
+ when "Q" then
361
+ ch = short_hand ? nnd : c + beg
362
+ text = "%#{ch}"
363
+ [:tSTRING_BEG, STR_DQUOTE]
364
+ when "q" then
365
+ [:tSTRING_BEG, STR_SQUOTE]
366
+ when "W" then
367
+ eat_whitespace
368
+ [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
369
+ when "w" then
370
+ eat_whitespace
371
+ [:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
372
+ when "x" then
373
+ [:tXSTRING_BEG, STR_XQUOTE]
374
+ when "r" then
375
+ [:tREGEXP_BEG, STR_REGEXP]
376
+ when "s" then
377
+ self.lex_state = EXPR_FNAME
378
+ [:tSYMBEG, STR_SSYM]
379
+ when "I" then
380
+ eat_whitespace
381
+ [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
382
+ when "i" then
383
+ eat_whitespace
384
+ [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
385
+ end
386
+
387
+ rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
388
+ token_type.nil?
389
+
390
+ raise "huh" unless string_type
391
+
392
+ string string_type, nnd, beg
393
+
394
+ return token_type, text
395
+ end
396
+
397
+ def parse_string quote # TODO: rewrite / remove
398
+ _, string_type, term, open = quote
399
+
400
+ space = false # FIX: remove these
401
+ func = string_type
402
+ paren = open
403
+ term_re = @@regexp_cache[term]
404
+
405
+ qwords = func =~ STR_FUNC_QWORDS
406
+ regexp = func =~ STR_FUNC_REGEXP
407
+ expand = func =~ STR_FUNC_EXPAND
408
+
409
+ unless func then # nil'ed from qwords below. *sigh*
410
+ return :tSTRING_END, nil
411
+ end
412
+
413
+ space = true if qwords and eat_whitespace
414
+
415
+ if self.string_nest == 0 && scan(/#{term_re}/) then
416
+ if qwords then
417
+ quote[1] = nil
418
+ return :tSPACE, nil
419
+ elsif regexp then
420
+ return :tREGEXP_END, self.regx_options
421
+ else
422
+ return :tSTRING_END, term
423
+ end
424
+ end
425
+
426
+ return :tSPACE, nil if space
427
+
428
+ self.string_buffer = []
429
+
430
+ if expand
431
+ case
432
+ when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
433
+ # TODO: !ISASCII
434
+ # ?! see parser_peek_variable_name
435
+ return :tSTRING_DVAR, nil
436
+ when scan(/#(?=\@\@?[a-zA-Z_])/) then
437
+ # TODO: !ISASCII
438
+ return :tSTRING_DVAR, nil
439
+ when scan(/#[{]/) then
440
+ self.command_start = true
441
+ return :tSTRING_DBEG, nil
442
+ when scan(/#/) then
443
+ string_buffer << "#"
444
+ end
445
+ end
446
+
447
+ if tokadd_string(func, term, paren) == RubyLexer::EOF then
448
+ if func =~ STR_FUNC_REGEXP then
449
+ rb_compile_error "unterminated regexp meets end of file"
450
+ else
451
+ rb_compile_error "unterminated string meets end of file"
452
+ end
453
+ end
454
+
455
+ return :tSTRING_CONTENT, string_buffer.join
456
+ end
457
+
458
+ def possibly_escape_string text, check
459
+ content = match[1]
460
+
461
+ if text =~ check then
462
+ content.gsub(ESC) { unescape $1 }
463
+ else
464
+ content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
465
+ end
466
+ end
467
+
472
468
  def process_amper text
473
469
  token = if is_arg? && space_seen && !check(/\s/) then
474
470
  warning("`&' interpreted as argument prefix")
@@ -503,20 +499,23 @@ class RubyLexer
503
499
  end
504
500
 
505
501
  def process_brace_close text
506
- # matching compare/parse23.y:8561
507
- cond.lexpop
508
- cmdarg.lexpop
509
-
510
502
  case matched
511
503
  when "}" then
512
504
  self.brace_nest -= 1
513
- self.lex_state = EXPR_ENDARG # TODO: EXPR_END ? Look at 2.6
514
-
515
505
  return :tSTRING_DEND, matched if brace_nest < 0
506
+ end
507
+
508
+ # matching compare/parse26.y:8099
509
+ cond.pop
510
+ cmdarg.pop
511
+
512
+ case matched
513
+ when "}" then
514
+ self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
516
515
  return :tRCURLY, matched
517
516
  when "]" then
518
517
  self.paren_nest -= 1
519
- self.lex_state = EXPR_ENDARG
518
+ self.lex_state = ruby24minus? ? EXPR_ENDARG : EXPR_END
520
519
  return :tRBRACK, matched
521
520
  when ")" then
522
521
  self.paren_nest -= 1
@@ -527,30 +526,6 @@ class RubyLexer
527
526
  end
528
527
  end
529
528
 
530
- def process_colon1 text
531
- # ?: / then / when
532
- if is_end? || check(/\s/) then
533
- return result EXPR_BEG, :tCOLON, text
534
- end
535
-
536
- case
537
- when scan(/\'/) then
538
- string STR_SSYM
539
- when scan(/\"/) then
540
- string STR_DSYM
541
- end
542
-
543
- result EXPR_FNAME, :tSYMBEG, text
544
- end
545
-
546
- def process_colon2 text
547
- if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
548
- result EXPR_BEG, :tCOLON3, text
549
- else
550
- result EXPR_DOT, :tCOLON2, text
551
- end
552
- end
553
-
554
529
  def process_brace_open text
555
530
  # matching compare/parse23.y:8694
556
531
  self.brace_nest += 1
@@ -566,30 +541,54 @@ class RubyLexer
566
541
  when lex_state =~ EXPR_LABELED then
567
542
  :tLBRACE # hash
568
543
  when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
569
- :tLCURLY # block (primary) '{' in parse.y
544
+ :tLCURLY # block (primary) "{" in parse.y
570
545
  when lex_state =~ EXPR_ENDARG then
571
546
  :tLBRACE_ARG # block (expr)
572
547
  else
573
548
  :tLBRACE # hash
574
549
  end
575
550
 
576
- state = token == :tLBRACE_ARG ? EXPR_BEG : EXPR_PAR
577
- self.command_start = true if token != :tLBRACE
551
+ state = token == :tLBRACE_ARG ? EXPR_BEG : EXPR_PAR
552
+ self.command_start = true if token != :tLBRACE
553
+
554
+ cond.push false
555
+ cmdarg.push false
556
+ result state, token, text
557
+ end
558
+
559
+ def process_colon1 text
560
+ # ?: / then / when
561
+ if is_end? || check(/\s/) then
562
+ return result EXPR_BEG, :tCOLON, text
563
+ end
564
+
565
+ case
566
+ when scan(/\'/) then
567
+ string STR_SSYM
568
+ when scan(/\"/) then
569
+ string STR_DSYM
570
+ end
578
571
 
579
- cond.push false
580
- cmdarg.push false
581
- result state, token, text
572
+ result EXPR_FNAME, :tSYMBEG, text
573
+ end
574
+
575
+ def process_colon2 text
576
+ if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
577
+ result EXPR_BEG, :tCOLON3, text
578
+ else
579
+ result EXPR_DOT, :tCOLON2, text
580
+ end
582
581
  end
583
582
 
584
583
  def process_float text
585
584
  rb_compile_error "Invalid numeric format" if text =~ /__/
586
585
 
587
586
  case
588
- when text.end_with?('ri')
587
+ when text.end_with?("ri")
589
588
  return result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
590
- when text.end_with?('i')
589
+ when text.end_with?("i")
591
590
  return result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
592
- when text.end_with?('r')
591
+ when text.end_with?("r")
593
592
  return result EXPR_NUM, :tRATIONAL, Rational(text.chop)
594
593
  else
595
594
  return result EXPR_NUM, :tFLOAT, text.to_f
@@ -612,6 +611,24 @@ class RubyLexer
612
611
  result EXPR_END, tok_id, text
613
612
  end
614
613
 
614
+ def process_label text
615
+ symbol = possibly_escape_string text, /^\"/
616
+
617
+ result EXPR_LAB, :tLABEL, [symbol, self.lineno]
618
+ end
619
+
620
+ def process_label_or_string text
621
+ if @was_label && text =~ /:\Z/ then
622
+ @was_label = nil
623
+ return process_label text
624
+ elsif text =~ /:\Z/ then
625
+ ss.pos -= 1 # put back ":"
626
+ text = text[0..-2]
627
+ end
628
+
629
+ result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
630
+ end
631
+
615
632
  def process_lchevron text
616
633
  if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
617
634
  !is_end? &&
@@ -634,14 +651,14 @@ class RubyLexer
634
651
  c = matched
635
652
  hit = false
636
653
 
637
- if c == '#' then
654
+ if c == "#" then
638
655
  ss.pos -= 1
639
656
 
640
657
  # TODO: handle magic comments
641
658
  while scan(/\s*\#.*(\n+|\z)/) do
642
659
  hit = true
643
660
  self.lineno += matched.lines.to_a.size
644
- @comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
661
+ @comments << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
645
662
  end
646
663
 
647
664
  return nil if end_of_stream?
@@ -697,7 +714,7 @@ class RubyLexer
697
714
  # "an argument list, not a decomposed argument")
698
715
  :tLPAREN2
699
716
  else
700
- :tLPAREN2 # plain '(' in parse.y
717
+ :tLPAREN2 # plain "(" in parse.y
701
718
  end
702
719
 
703
720
  self.paren_nest += 1
@@ -735,7 +752,7 @@ class RubyLexer
735
752
 
736
753
  return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)
737
754
 
738
- if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
755
+ if is_beg? || (is_arg? && space_seen && !check(/\s/)) then
739
756
  arg_ambiguous if is_arg?
740
757
 
741
758
  if check(/\d/) then
@@ -760,12 +777,12 @@ class RubyLexer
760
777
 
761
778
  if check(/\s|\v/) then
762
779
  unless is_arg? then
763
- c2 = { " " => 's',
764
- "\n" => 'n',
765
- "\t" => 't',
766
- "\v" => 'v',
767
- "\r" => 'r',
768
- "\f" => 'f' }[matched]
780
+ c2 = { " " => "s",
781
+ "\n" => "n",
782
+ "\t" => "t",
783
+ "\v" => "v",
784
+ "\r" => "r",
785
+ "\f" => "f" }[matched]
769
786
 
770
787
  if c2 then
771
788
  warning("invalid character syntax; use ?\\" + c2)
@@ -781,12 +798,22 @@ class RubyLexer
781
798
  c = if scan(/\\/) then
782
799
  self.read_escape
783
800
  else
784
- ss.getch
801
+ getch
785
802
  end
786
803
 
787
804
  result EXPR_END, :tSTRING, c
788
805
  end
789
806
 
807
+ def process_simple_string text
808
+ replacement = text[1..-2].gsub(ESC) {
809
+ unescape($1).b.force_encoding Encoding::UTF_8
810
+ }
811
+
812
+ replacement = replacement.b unless replacement.valid_encoding?
813
+
814
+ result EXPR_END, :tSTRING, replacement
815
+ end
816
+
790
817
  def process_slash text
791
818
  if is_beg? then
792
819
  string STR_REGEXP
@@ -838,43 +865,38 @@ class RubyLexer
838
865
  result EXPR_PAR, token, text
839
866
  end
840
867
 
841
- def possibly_escape_string text, check
842
- content = match[1]
843
-
844
- if text =~ check then
845
- content.gsub(ESC) { unescape $1 }
846
- else
847
- content.gsub(/\\\\/, "\\").gsub(/\\'/, "'")
848
- end
849
- end
850
-
851
- def process_symbol text
852
- symbol = possibly_escape_string text, /^:"/
868
+ def process_string # TODO: rewrite / remove
869
+ # matches top of parser_yylex in compare/parse23.y:8113
870
+ token = if lex_strterm[0] == :heredoc then
871
+ self.heredoc lex_strterm
872
+ else
873
+ self.parse_string lex_strterm
874
+ end
853
875
 
854
- result EXPR_END, :tSYMBOL, symbol
855
- end
876
+ token_type, c = token
856
877
 
857
- def was_label?
858
- @was_label = ruby22_label?
859
- true
860
- end
878
+ # matches parser_string_term from 2.3, but way off from 2.5
879
+ if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
880
+ if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
881
+ !cond.is_in_state) || is_arg?) &&
882
+ is_label_suffix? then
883
+ scan(/:/)
884
+ token_type = token[0] = :tLABEL_END
885
+ end
886
+ end
861
887
 
862
- def process_label_or_string text
863
- if @was_label && text =~ /:\Z/ then
864
- @was_label = nil
865
- return process_label text
866
- elsif text =~ /:\Z/ then
867
- ss.pos -= 1 # put back ":"
868
- text = text[0..-2]
888
+ if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
889
+ self.lex_strterm = nil
890
+ self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_LIT
869
891
  end
870
892
 
871
- result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
893
+ return token
872
894
  end
873
895
 
874
- def process_label text
875
- symbol = possibly_escape_string text, /^"/
896
+ def process_symbol text
897
+ symbol = possibly_escape_string text, /^:\"/ # stupid emacs
876
898
 
877
- result EXPR_LAB, :tLABEL, [symbol, self.lineno]
899
+ result EXPR_LIT, :tSYMBOL, symbol
878
900
  end
879
901
 
880
902
  def process_token text
@@ -902,6 +924,7 @@ class RubyLexer
902
924
 
903
925
  if is_label_possible? and is_label_suffix? then
904
926
  scan(/:/)
927
+ # TODO: propagate the lineno to ALL results
905
928
  return result EXPR_LAB, :tLABEL, [token, self.lineno]
906
929
  end
907
930
 
@@ -922,6 +945,8 @@ class RubyLexer
922
945
  EXPR_END
923
946
  end
924
947
 
948
+ tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
949
+
925
950
  if last_state !~ EXPR_DOT|EXPR_FNAME and
926
951
  (tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
927
952
  lvar_defined?(token) then
@@ -945,18 +970,16 @@ class RubyLexer
945
970
  self.command_start = true if lex_state =~ EXPR_BEG
946
971
 
947
972
  case
948
- when keyword.id0 == :kDO then
973
+ when keyword.id0 == :kDO then # parse26.y line 7591
949
974
  case
950
975
  when lambda_beginning? then
951
976
  self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
952
- self.paren_nest -= 1
977
+ self.paren_nest -= 1 # TODO: question this?
953
978
  result lex_state, :kDO_LAMBDA, value
954
979
  when cond.is_in_state then
955
980
  result lex_state, :kDO_COND, value
956
981
  when cmdarg.is_in_state && state != EXPR_CMDARG then
957
982
  result lex_state, :kDO_BLOCK, value
958
- when state =~ EXPR_BEG|EXPR_ENDARG then
959
- result lex_state, :kDO_BLOCK, value
960
983
  else
961
984
  result lex_state, :kDO, value
962
985
  end
@@ -973,9 +996,9 @@ class RubyLexer
973
996
  ss.unscan # put back "_"
974
997
 
975
998
  if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
976
- return [RubyLexer::EOF, RubyLexer::EOF]
977
- elsif scan(/\_\w*/) then
978
- return process_token matched
999
+ [RubyLexer::EOF, RubyLexer::EOF]
1000
+ elsif scan(/#{IDENT_CHAR}+/) then
1001
+ process_token matched
979
1002
  end
980
1003
  end
981
1004
 
@@ -1012,7 +1035,7 @@ class RubyLexer
1012
1035
  when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
1013
1036
  # TODO: force encode everything to UTF-8?
1014
1037
  ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
1015
- when check(/M-\\[\\MCc]/) then
1038
+ when check(/M-\\./) then
1016
1039
  scan(/M-\\/) # eat it
1017
1040
  c = self.read_escape
1018
1041
  c[0] = (c[0].ord | 0x80).chr
@@ -1026,6 +1049,11 @@ class RubyLexer
1026
1049
  c = self.read_escape
1027
1050
  c[0] = (c[0].ord & 0x9f).chr
1028
1051
  c
1052
+ when check(/(C-|c)\\(?!u|\\)/) then
1053
+ scan(/(C-|c)\\/) # eat it
1054
+ c = read_escape
1055
+ c[0] = (c[0].ord & 0x9f).chr
1056
+ c
1029
1057
  when scan(/C-\?|c\?/) then
1030
1058
  127.chr
1031
1059
  when scan(/(C-|c)(.)/) then
@@ -1034,17 +1062,25 @@ class RubyLexer
1034
1062
  c
1035
1063
  when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
1036
1064
  matched
1037
- when scan(/u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/) then
1038
- [ss[1].delete("{}").to_i(16)].pack("U")
1039
- when scan(/u([0-9a-fA-F]{1,3})/) then
1065
+ when scan(/u(\h{4})/) then
1066
+ [ss[1].to_i(16)].pack("U")
1067
+ when scan(/u(\h{1,3})/) then
1040
1068
  rb_compile_error "Invalid escape character syntax"
1069
+ when scan(/u\{(\h+(?:\s+\h+)*)\}/) then
1070
+ ss[1].split.map { |s| s.to_i(16) }.pack("U*")
1041
1071
  when scan(/[McCx0-9]/) || end_of_stream? then
1042
1072
  rb_compile_error("Invalid escape character syntax")
1043
1073
  else
1044
- ss.getch
1074
+ getch
1045
1075
  end.dup
1046
1076
  end
1047
1077
 
1078
+ def getch
1079
+ c = ss.getch
1080
+ c = ss.getch if c == "\r" && ss.peek(1) == "\n"
1081
+ c
1082
+ end
1083
+
1048
1084
  def regx_options # TODO: rewrite / remove
1049
1085
  good, bad = [], []
1050
1086
 
@@ -1084,23 +1120,24 @@ class RubyLexer
1084
1120
  [token, text]
1085
1121
  end
1086
1122
 
1087
- def scan re
1088
- ss.scan re
1123
+ def ruby22_label?
1124
+ ruby22plus? and is_label_possible?
1089
1125
  end
1090
1126
 
1091
- def check re
1092
- ss.check re
1127
+ def ruby22plus?
1128
+ parser.class.version >= 22
1093
1129
  end
1094
1130
 
1095
- def eat_whitespace
1096
- r = scan(/\s+/)
1097
- self.extra_lineno += r.count("\n") if r
1098
- r
1131
+ def ruby23plus?
1132
+ parser.class.version >= 23
1099
1133
  end
1100
1134
 
1101
- def fixup_lineno extra = 0
1102
- self.lineno += self.extra_lineno + extra
1103
- self.extra_lineno = 0
1135
+ def ruby24minus?
1136
+ parser.class.version <= 24
1137
+ end
1138
+
1139
+ def scan re
1140
+ ss.scan re
1104
1141
  end
1105
1142
 
1106
1143
  def scanner_class # TODO: design this out of oedipus_lex. or something.
@@ -1123,12 +1160,6 @@ class RubyLexer
1123
1160
  self.lex_strterm = [:strterm, type, beg, nnd]
1124
1161
  end
1125
1162
 
1126
- # TODO: consider
1127
- # def src= src
1128
- # raise "bad src: #{src.inspect}" unless String === src
1129
- # @src = RPStringScanner.new(src)
1130
- # end
1131
-
1132
1163
  def tokadd_escape term # TODO: rewrite / remove
1133
1164
  case
1134
1165
  when scan(/\\\n/) then
@@ -1158,22 +1189,24 @@ class RubyLexer
1158
1189
  end
1159
1190
 
1160
1191
  def tokadd_string(func, term, paren) # TODO: rewrite / remove
1161
- qwords = (func & STR_FUNC_QWORDS) != 0
1162
- escape = (func & STR_FUNC_ESCAPE) != 0
1163
- expand = (func & STR_FUNC_EXPAND) != 0
1164
- regexp = (func & STR_FUNC_REGEXP) != 0
1165
- symbol = (func & STR_FUNC_SYMBOL) != 0
1192
+ qwords = func =~ STR_FUNC_QWORDS
1193
+ escape = func =~ STR_FUNC_ESCAPE
1194
+ expand = func =~ STR_FUNC_EXPAND
1195
+ regexp = func =~ STR_FUNC_REGEXP
1196
+ symbol = func =~ STR_FUNC_SYMBOL
1166
1197
 
1167
1198
  paren_re = @@regexp_cache[paren]
1168
- term_re = @@regexp_cache[term]
1199
+ term_re = if term == "\n"
1200
+ /#{Regexp.escape "\r"}?#{Regexp.escape "\n"}/
1201
+ else
1202
+ @@regexp_cache[term]
1203
+ end
1169
1204
 
1170
1205
  until end_of_stream? do
1171
1206
  c = nil
1172
1207
  handled = true
1173
1208
 
1174
1209
  case
1175
- when paren_re && scan(paren_re) then
1176
- self.string_nest += 1
1177
1210
  when scan(term_re) then
1178
1211
  if self.string_nest == 0 then
1179
1212
  ss.pos -= 1
@@ -1181,6 +1214,8 @@ class RubyLexer
1181
1214
  else
1182
1215
  self.string_nest -= 1
1183
1216
  end
1217
+ when paren_re && scan(paren_re) then
1218
+ self.string_nest += 1
1184
1219
  when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
1185
1220
  ss.pos -= 1
1186
1221
  break
@@ -1195,7 +1230,7 @@ class RubyLexer
1195
1230
  string_buffer << "\n"
1196
1231
  next
1197
1232
  when qwords && scan(/\\\s/) then
1198
- c = ' '
1233
+ c = " "
1199
1234
  when expand && scan(/\\\n/) then
1200
1235
  next
1201
1236
  when regexp && check(/\\/) then
@@ -1220,12 +1255,16 @@ class RubyLexer
1220
1255
  end # top case
1221
1256
 
1222
1257
  unless handled then
1223
- t = Regexp.escape term
1224
- x = Regexp.escape(paren) if paren && paren != "\000"
1258
+ t = if term == "\n"
1259
+ Regexp.escape "\r\n"
1260
+ else
1261
+ Regexp.escape term
1262
+ end
1263
+ x = Regexp.escape paren if paren && paren != "\000"
1225
1264
  re = if qwords then
1226
- /[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
1265
+ /[^#{t}#{x}\#\\\s]+|./ # |. to pick up whatever
1227
1266
  else
1228
- /[^#{t}#{x}\#\0\\]+|./
1267
+ /[^#{t}#{x}\#\\]+|./
1229
1268
  end
1230
1269
 
1231
1270
  scan re
@@ -1265,10 +1304,12 @@ class RubyLexer
1265
1304
  s
1266
1305
  when /^[McCx0-9]/ then
1267
1306
  rb_compile_error("Invalid escape character syntax")
1268
- when /u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/ then
1307
+ when /u(\h{4})/ then
1269
1308
  [$1.delete("{}").to_i(16)].pack("U")
1270
- when /u([0-9a-fA-F]{1,3})/ then
1309
+ when /u(\h{1,3})/ then
1271
1310
  rb_compile_error("Invalid escape character syntax")
1311
+ when /u\{(\h+(?:\s+\h+)*)\}/ then
1312
+ $1.split.map { |s| s.to_i(16) }.pack("U*")
1272
1313
  else
1273
1314
  s
1274
1315
  end
@@ -1279,171 +1320,154 @@ class RubyLexer
1279
1320
  # do nothing for now
1280
1321
  end
1281
1322
 
1282
- def ruby22plus?
1283
- parser.class.version >= 22
1284
- end
1285
-
1286
- def ruby23plus?
1287
- parser.class.version >= 23
1323
+ def was_label?
1324
+ @was_label = ruby22_label?
1325
+ true
1288
1326
  end
1289
1327
 
1290
- def process_string # TODO: rewrite / remove
1291
- # matches top of parser_yylex in compare/parse23.y:8113
1292
- token = if lex_strterm[0] == :heredoc then
1293
- self.heredoc lex_strterm
1294
- else
1295
- self.parse_string lex_strterm
1296
- end
1328
+ class State
1329
+ attr_accessor :n
1330
+ attr_accessor :names
1297
1331
 
1298
- token_type, c = token
1332
+ # TODO: take a shared hash of strings for inspect/to_s
1333
+ def initialize o, names
1334
+ raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
1299
1335
 
1300
- # matches parser_string_term
1301
- if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
1302
- if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
1303
- !cond.is_in_state) || is_arg?) &&
1304
- is_label_suffix? then
1305
- scan(/:/)
1306
- token_type = token[0] = :tLABEL_END
1307
- end
1336
+ self.n = o
1337
+ self.names = names
1308
1338
  end
1309
1339
 
1310
- if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
1311
- self.lex_strterm = nil
1312
- self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_END
1340
+ def == o
1341
+ self.equal?(o) || (o.class == self.class && o.n == self.n)
1313
1342
  end
1314
1343
 
1315
- return token
1316
- end
1317
-
1318
- def parse_quote # TODO: remove / rewrite
1319
- beg, nnd, short_hand, c = nil, nil, false, nil
1320
-
1321
- if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
1322
- rb_compile_error "unknown type of %string" if ss.matched_size == 2
1323
- c, beg, short_hand = matched, ss.getch, false
1324
- else # Short-hand (e.g. %{, %., %!, etc)
1325
- c, beg, short_hand = 'Q', ss.getch, true
1344
+ def =~ v
1345
+ (self.n & v.n) != 0
1326
1346
  end
1327
1347
 
1328
- if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
1329
- rb_compile_error "unterminated quoted string meets end of file"
1348
+ def | v
1349
+ raise ArgumentError, "Incompatible State: %p vs %p" % [self, v] unless
1350
+ self.names == v.names
1351
+ self.class.new(self.n | v.n, self.names)
1330
1352
  end
1331
1353
 
1332
- # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
1333
- nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
1334
- nnd, beg = beg, "\0" if nnd.nil?
1335
-
1336
- token_type, text = nil, "%#{c}#{beg}"
1337
- token_type, string_type = case c
1338
- when 'Q' then
1339
- ch = short_hand ? nnd : c + beg
1340
- text = "%#{ch}"
1341
- [:tSTRING_BEG, STR_DQUOTE]
1342
- when 'q' then
1343
- [:tSTRING_BEG, STR_SQUOTE]
1344
- when 'W' then
1345
- eat_whitespace
1346
- [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
1347
- when 'w' then
1348
- eat_whitespace
1349
- [:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
1350
- when 'x' then
1351
- [:tXSTRING_BEG, STR_XQUOTE]
1352
- when 'r' then
1353
- [:tREGEXP_BEG, STR_REGEXP]
1354
- when 's' then
1355
- self.lex_state = EXPR_FNAME
1356
- [:tSYMBEG, STR_SSYM]
1357
- when 'I' then
1358
- eat_whitespace
1359
- [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
1360
- when 'i' then
1361
- eat_whitespace
1362
- [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
1363
- end
1364
-
1365
- rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
1366
- token_type.nil?
1367
-
1368
- raise "huh" unless string_type
1369
-
1370
- string string_type, nnd, beg
1371
-
1372
- return token_type, text
1373
- end
1374
-
1375
- def parse_string quote # TODO: rewrite / remove
1376
- _, string_type, term, open = quote
1377
-
1378
- space = false # FIX: remove these
1379
- func = string_type
1380
- paren = open
1381
- term_re = @@regexp_cache[term]
1382
-
1383
- qwords = (func & STR_FUNC_QWORDS) != 0
1384
- regexp = (func & STR_FUNC_REGEXP) != 0
1385
- expand = (func & STR_FUNC_EXPAND) != 0
1354
+ def inspect
1355
+ return "Value(0)" if n.zero? # HACK?
1386
1356
 
1387
- unless func then # nil'ed from qwords below. *sigh*
1388
- return :tSTRING_END, nil
1357
+ names.map { |v, k| k if self =~ v }.
1358
+ compact.
1359
+ join("|").
1360
+ gsub(/(?:EXPR_|STR_(?:FUNC_)?)/, "")
1389
1361
  end
1390
1362
 
1391
- space = true if qwords and eat_whitespace
1363
+ alias to_s inspect
1392
1364
 
1393
- if self.string_nest == 0 && scan(/#{term_re}/) then
1394
- if qwords then
1395
- quote[1] = nil
1396
- return :tSPACE, nil
1397
- elsif regexp then
1398
- return :tREGEXP_END, self.regx_options
1399
- else
1400
- return :tSTRING_END, term
1401
- end
1402
- end
1365
+ module Values
1366
+ expr_names = {}
1367
+
1368
+ EXPR_NONE = State.new 0x0, expr_names
1369
+ EXPR_BEG = State.new 0x1, expr_names
1370
+ EXPR_END = State.new 0x2, expr_names
1371
+ EXPR_ENDARG = State.new 0x4, expr_names
1372
+ EXPR_ENDFN = State.new 0x8, expr_names
1373
+ EXPR_ARG = State.new 0x10, expr_names
1374
+ EXPR_CMDARG = State.new 0x20, expr_names
1375
+ EXPR_MID = State.new 0x40, expr_names
1376
+ EXPR_FNAME = State.new 0x80, expr_names
1377
+ EXPR_DOT = State.new 0x100, expr_names
1378
+ EXPR_CLASS = State.new 0x200, expr_names
1379
+ EXPR_LABEL = State.new 0x400, expr_names
1380
+ EXPR_LABELED = State.new 0x800, expr_names
1381
+ EXPR_FITEM = State.new 0x1000, expr_names
1403
1382
 
1404
- return :tSPACE, nil if space
1383
+ EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
1384
+ EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
1385
+ EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
1405
1386
 
1406
- self.string_buffer = []
1387
+ # extra fake lex_state names to make things a bit cleaner
1407
1388
 
1408
- if expand
1409
- case
1410
- when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
1411
- # TODO: !ISASCII
1412
- # ?! see parser_peek_variable_name
1413
- return :tSTRING_DVAR, nil
1414
- when scan(/#(?=\@\@?[a-zA-Z_])/) then
1415
- # TODO: !ISASCII
1416
- return :tSTRING_DVAR, nil
1417
- when scan(/#[{]/) then
1418
- self.command_start = true
1419
- return :tSTRING_DBEG, nil
1420
- when scan(/#/) then
1421
- string_buffer << '#'
1422
- end
1423
- end
1389
+ EXPR_LAB = EXPR_ARG|EXPR_LABELED
1390
+ EXPR_LIT = EXPR_END|EXPR_ENDARG
1391
+ EXPR_PAR = EXPR_BEG|EXPR_LABEL
1392
+ EXPR_PAD = EXPR_BEG|EXPR_LABELED
1424
1393
 
1425
- if tokadd_string(func, term, paren) == RubyLexer::EOF then
1426
- rb_compile_error "unterminated string meets end of file"
1394
+ EXPR_NUM = EXPR_LIT
1395
+
1396
+ expr_names.merge!(EXPR_NONE => "EXPR_NONE",
1397
+ EXPR_BEG => "EXPR_BEG",
1398
+ EXPR_END => "EXPR_END",
1399
+ EXPR_ENDARG => "EXPR_ENDARG",
1400
+ EXPR_ENDFN => "EXPR_ENDFN",
1401
+ EXPR_ARG => "EXPR_ARG",
1402
+ EXPR_CMDARG => "EXPR_CMDARG",
1403
+ EXPR_MID => "EXPR_MID",
1404
+ EXPR_FNAME => "EXPR_FNAME",
1405
+ EXPR_DOT => "EXPR_DOT",
1406
+ EXPR_CLASS => "EXPR_CLASS",
1407
+ EXPR_LABEL => "EXPR_LABEL",
1408
+ EXPR_LABELED => "EXPR_LABELED",
1409
+ EXPR_FITEM => "EXPR_FITEM")
1410
+
1411
+ # ruby constants for strings
1412
+
1413
+ str_func_names = {}
1414
+
1415
+ STR_FUNC_BORING = State.new 0x00, str_func_names
1416
+ STR_FUNC_ESCAPE = State.new 0x01, str_func_names
1417
+ STR_FUNC_EXPAND = State.new 0x02, str_func_names
1418
+ STR_FUNC_REGEXP = State.new 0x04, str_func_names
1419
+ STR_FUNC_QWORDS = State.new 0x08, str_func_names
1420
+ STR_FUNC_SYMBOL = State.new 0x10, str_func_names
1421
+ STR_FUNC_INDENT = State.new 0x20, str_func_names # <<-HEREDOC
1422
+ STR_FUNC_LABEL = State.new 0x40, str_func_names
1423
+ STR_FUNC_LIST = State.new 0x4000, str_func_names
1424
+ STR_FUNC_TERM = State.new 0x8000, str_func_names
1425
+ STR_FUNC_ICNTNT = State.new 0x10000, str_func_names # <<~HEREDOC -- TODO: remove?
1426
+
1427
+ # TODO: check parser25.y on how they do STR_FUNC_INDENT
1428
+
1429
+ STR_SQUOTE = STR_FUNC_BORING
1430
+ STR_DQUOTE = STR_FUNC_EXPAND
1431
+ STR_XQUOTE = STR_FUNC_EXPAND
1432
+ STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
1433
+ STR_SWORD = STR_FUNC_QWORDS | STR_FUNC_LIST
1434
+ STR_DWORD = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
1435
+ STR_SSYM = STR_FUNC_SYMBOL
1436
+ STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
1437
+
1438
+ str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
1439
+ STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
1440
+ STR_FUNC_REGEXP => "STR_FUNC_REGEXP",
1441
+ STR_FUNC_QWORDS => "STR_FUNC_QWORDS",
1442
+ STR_FUNC_SYMBOL => "STR_FUNC_SYMBOL",
1443
+ STR_FUNC_INDENT => "STR_FUNC_INDENT",
1444
+ STR_FUNC_LABEL => "STR_FUNC_LABEL",
1445
+ STR_FUNC_LIST => "STR_FUNC_LIST",
1446
+ STR_FUNC_TERM => "STR_FUNC_TERM",
1447
+ STR_FUNC_ICNTNT => "STR_FUNC_ICNTNT",
1448
+ STR_SQUOTE => "STR_SQUOTE")
1427
1449
  end
1428
1450
 
1429
- return :tSTRING_CONTENT, string_buffer.join
1451
+ include Values
1430
1452
  end
1453
+
1454
+ include State::Values
1431
1455
  end
1432
1456
 
1433
1457
  require "ruby_lexer.rex"
1434
1458
 
1435
1459
  if ENV["RP_LINENO_DEBUG"] then
1436
1460
  class RubyLexer
1437
- alias :old_lineno= :lineno=
1438
-
1439
1461
  def d o
1440
1462
  $stderr.puts o.inspect
1441
1463
  end
1442
1464
 
1465
+ alias old_lineno= lineno=
1466
+
1443
1467
  def lineno= n
1444
1468
  self.old_lineno= n
1445
1469
  where = caller.first.split(/:/).first(2).join(":")
1446
- d :lineno => [n, where, ss && ss.rest[0,40]]
1470
+ d :lineno => [n, where, ss && ss.rest[0, 40]]
1447
1471
  end
1448
1472
  end
1449
1473
  end