RubyGems - parser - Versions diffs - 0.9.alpha → 0.9.alpha1 - Mend

parser 0.9.alpha → 0.9.alpha1

Files changed (33) hide show

checksums.yaml +4 -4
data/.gitignore +21 -0
data/.travis.yml +9 -0
data/Gemfile +4 -0
data/LICENSE.txt +25 -0
data/README.md +29 -0
data/Rakefile +15 -182
data/lib/parser.rb +4 -0
data/lib/parser/lexer.rl +1713 -0
data/lib/parser/lexer_literal.rb +175 -0
data/lib/parser/static_environment.rb +38 -0
data/lib/parser/syntax_error.rb +3 -0
data/parser.gemspec +25 -0
data/test/{test_ruby_lexer.rb → test_lexer.rb} +77 -129
data/test/test_static_environment.rb +46 -0
metadata +39 -98
data/.autotest +0 -50
data/.gemtest +0 -0
data/History.txt +0 -558
data/Manifest.txt +0 -18
data/README.txt +0 -87
data/bin/ruby_parse +0 -96
data/bin/ruby_parse_extract_error +0 -130
data/lib/gauntlet_rubyparser.rb +0 -117
data/lib/ruby18_parser.rb +0 -5706
data/lib/ruby18_parser.y +0 -1846
data/lib/ruby19_parser.rb +0 -6054
data/lib/ruby19_parser.y +0 -2035
data/lib/ruby_lexer.rb +0 -6789
data/lib/ruby_parser.rb +0 -4
data/lib/ruby_parser_extras.rb +0 -1148
data/test/test_ruby_parser.rb +0 -1772
data/test/test_ruby_parser_extras.rb +0 -228

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ac470264d9dbf4e1781557f2e4c6f93050ca1fca
-  data.tar.gz: 2af0ef8f8dee1d84f91305adbbd6f099f99b5c8f
+  metadata.gz: 384df81635da81957880f54cb589109db642c914
+  data.tar.gz: 101c991d44683e9ba699a3ec3deca74572fb7a09
 SHA512:
-  metadata.gz: cceb3bc547346c33f28c9392971b0d6df75c289c3e5046bebac559bdf3a7334c902efae7d2c02e294d40930589ff27db011ab95afb72ff0f9f82af6851db2e0c
-  data.tar.gz: 0121d2b3fea37bac9b97db352c1cc55469cfa0ca854f0f913f0d8cb9015740e6799d9180a2a29c6a9715285e8acbc50d5717fafd8759de3966af222fd963a3b4
+  metadata.gz: 0724f1d86bbe49d1aa390c5bea5e3e6c859850b59be9ef0b78623f23cbe97cae94f75de5bc961cbdad1aabe6a1bb166e63dcac4103bb211a41c19ae86d8d3624
+  data.tar.gz: 1c64825e1d3a58b00d1a7038b599e161025b516435cde204e325061b1800416813f8ef2ab154f5b5dc99a1dc9cb1d11a9dfa33adb1e2e35f756c3fa10a9f5960

data/.gitignore ADDED

@@ -0,0 +1,21 @@
+*.gem
+*.rbc
+.bundle
+.config
+.yardoc
+Gemfile.lock
+InstalledFiles
+_yardoc
+coverage
+doc/
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+tmp
+*.output
+lib/parser/lexer.rb
+lib/parser/ruby18.rb
+lib/parser/ruby19.rb

data/.travis.yml ADDED

@@ -0,0 +1,9 @@
+language: ruby
+rvm:
+ - 1.9.3
+ - 2.0.0
+ - rbx-19mode
+ - jruby-19mode
+matrix:
+  allow_failures:
+    - rvm: jruby-19mode

data/Gemfile ADDED

@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+# Specify your gem's dependencies in parser.gemspec
+gemspec

data/LICENSE.txt ADDED

@@ -0,0 +1,25 @@
+Copyright (c) 2013 Peter Zotov  <whitequark@whitequark.org>
+Parts of the source are derived from ruby_parser:
+Copyright (c) Ryan Davis, seattle.rb
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED

@@ -0,0 +1,29 @@
+# Parser
+[![Build Status](https://travis-ci.org/whitequark/parser.png?branch=master)](https://travis-ci.org/whitequark/parser)
+[![Code Climate](https://codeclimate.com/github/whitequark/parser.png)](https://codeclimate.com/github/whitequark/parser)
+Parser is a Ruby parser written in pure Ruby.
+## Installation
+    $ gem install parser
+## Usage
+TODO: Write usage instructions here
+## Acknowledgements
+The lexer testsuite is derived from [ruby_parser](http://github.com/seattlerb/ruby_parser).
+The Bison parser rules are derived from [Ruby MRI](http://github.com/ruby/ruby) parse.y.
+## Contributing
+1. Make sure you have [Ragel 6.8](http://www.complang.org/ragel/) installed
+2. Fork it
+3. Create your feature branch (`git checkout -b my-new-feature`)
+4. Commit your changes (`git commit -am 'Add some feature'`)
+5. Push to the branch (`git push origin my-new-feature`)
+6. Create new Pull Request

data/Rakefile CHANGED

@@ -1,192 +1,25 @@
-# -*- ruby -*-
+require "bundler/gem_tasks"
-require 'rubygems'
-require 'hoe'
+task :default => [:generate, :test]
-Hoe.plugin :seattlerb
-Hoe.plugin :racc
-Hoe.plugin :isolate
-Hoe.add_include_dirs "../../sexp_processor/dev/lib"
-Hoe.spec 'parser' do
-  developer 'Peter Zotov', 'whitequark@whitequark.org'
-  dependency 'sexp_processor', '~> 4.1'
-  self.racc_flags << " -t" if plugin?(:racc) && ENV["DEBUG"]
-end
-file "lib/ruby18_parser.rb" => "lib/ruby18_parser.y"
-file "lib/ruby19_parser.rb" => "lib/ruby19_parser.y"
-file "lib/ruby_lexer.rb" => "lib/ruby_lexer.rl" do |t|
-  sh "ragel -R #{t.prerequisites.first} -o #{t.name}"
-end
-task :clean do
-  rm_rf(Dir["**/*~"] +
-        Dir["**/*.diff"] +
-        Dir["coverage.info"] +
-        Dir["coverage"] +
-        Dir["lib/*.output"])
-end
-def next_num(glob)
-  num = Dir[glob].max[/\d+/].to_i + 1
-end
-desc "Compares PT to RP and deletes all files that match"
-task :compare do
-  files = Dir["unit/**/*.rb"]
-  puts "Parsing #{files.size} files"
-  files.each do |file|
-    puts file
-    system "./cmp.rb -q #{file} && rm #{file}"
-  end
-  system 'find -d unit -type d -empty -exec rmdir {} \;'
-end
-desc "Compares PT to RP and stops on first failure"
-task :find_bug do
-  files = Dir["unit/**/*.rb"]
-  puts "Parsing #{files.size} files"
-  files.each do |file|
-    puts file
-    sh "./cmp.rb -q #{file}"
-  end
-end
-task :sort do
-  sh 'grepsort "^ +def" lib/ruby_lexer.rb'
-  sh 'grepsort "^ +def (test|util)" test/test_ruby_lexer.rb'
-end
-task :loc do
-  loc1  = `wc -l ../1.0.0/lib/ruby_lexer.rb`[/\d+/]
-  flog1 = `flog -s ../1.0.0/lib/ruby_lexer.rb`[/\d+\.\d+/]
-  loc2  = `cat lib/ruby_lexer.rb lib/ruby_parser_extras.rb | wc -l`[/\d+/]
-  flog2 = `flog -s lib/ruby_lexer.rb lib/ruby_parser_extras.rb`[/\d+\.\d+/]
-  loc1, loc2, flog1, flog2 = loc1.to_i, loc2.to_i, flog1.to_f, flog2.to_f
-  puts "1.0.0: loc = #{loc1} flog = #{flog1}"
-  puts "dev  : loc = #{loc2} flog = #{flog2}"
-  puts "delta: loc = #{loc2-loc1} flog = #{flog2-flog1}"
-end
-desc "Validate against all normal files in unit dir"
-task :validate do
-  sh "./cmp.rb unit/*.rb"
-end
-def run_and_log cmd, prefix
-  files = ENV['FILES'] || 'unit/*.rb'
-  p, x = prefix, "txt"
-  n = Dir["#{p}.*.#{x}"].map { |s| s[/\d+/].to_i }.max + 1 rescue 1
-  f = "#{p}.#{n}.#{x}"
-  sh "#{cmd} #{Hoe::RUBY_FLAGS} bin/ruby_parse -q -g #{files} &> #{f}"
-  puts File.read(f)
-end
-desc "Benchmark against all normal files in unit dir"
-task :benchmark do
-  run_and_log "ruby", "benchmark"
-end
-desc "Profile against all normal files in unit dir"
-task :profile do
-  run_and_log "zenprofile", "profile"
-end
-desc "what was that command again?"
-task :huh? do
-  puts "ruby #{Hoe::RUBY_FLAGS} bin/ruby_parse -q -g ..."
-end
-task :irb => [:isolate] do
-  sh "GEM_HOME=#{Gem.path.first} irb -rubygems -Ilib -rruby_parser;"
-end
-def (task(:phony)).timestamp
-  Time.at 0
-end
-task :isolate => :phony
-file "lib/ruby18_parser.rb" => :isolate
-file "lib/ruby19_parser.rb" => :isolate
-task :compare18 do
-  sh "./yack.rb lib/ruby18_parser.output > racc18.txt"
-  sh "./yack.rb parse18.output > yacc18.txt"
-  sh "diff -du racc18.txt yacc18.txt || true"
-  puts
-  sh "diff -du racc18.txt yacc18.txt | wc -l"
-end
-task :compare19 do
-  sh "./yack.rb lib/ruby19_parser.output > racc19.txt"
-  sh "./yack.rb parse19.output > yacc19.txt"
-  sh "diff -du racc19.txt yacc19.txt || true"
-  puts
-  sh "diff -du racc19.txt yacc19.txt | wc -l"
-end
-task :debug => :isolate do
-  ENV["V"] ||= "19"
-  Rake.application[:parser].invoke # this way we can have DEBUG set
-  $: << "lib"
-  require 'ruby_parser'
-  require 'pp'
-  parser = if ENV["V"] == "18" then
-             Ruby18Parser.new
-           else
-             Ruby19Parser.new
-           end
-  time = (ENV["RP_TIMEOUT"] || 10).to_i
-  file = ENV["F"] || ENV["FILE"]
-  ruby = if file then
-           File.read(file)
-         else
-           file = "env"
-           ENV["R"] || ENV["RUBY"]
-         end
-  begin
-    pp parser.process(ruby, file, time)
-  rescue Racc::ParseError => e
-    p e
-    ss = parser.lexer.src
-    src = ss.string
-    lines = src[0..ss.pos].split(/\n/)
-    abort "on #{file}:#{lines.size}"
+task :test do
+  $LOAD_PATH << File.expand_path('../lib/', __FILE__)
+  Dir["test/test_*.rb"].each do |file|
+    load file
   end
 end
-task :debug_ruby do
-  file = ENV["F"] || ENV["FILE"]
-  sh "ruby19 -cwy #{file} 2>&1 | ./yuck.rb"
-end
-task :extract => :isolate do
-  ENV["V"] ||= "19"
-  Rake.application[:parser].invoke # this way we can have DEBUG set
+desc "Generate the Ragel lexer and Bison parser."
+task :generate => %w(lib/parser/lexer.rb)
+                    #lib/parser/ruby18.rb
+                    #lib/parser/ruby19.rb)
-  file = ENV["F"] || ENV["FILE"]
+task :build => :generate
-  ruby "-Ilib", "bin/ruby_parse_extract_error", file
+rule '.rb' => '.rl' do |t|
+  sh "ragel -R #{t.source} -o #{t.name}"
 end
-task :bugs do
-  sh "for f in bug*.rb ; do rake19 debug F=$f && rm $f ; done"
+rule '.rb' => '.y' do |t|
+  sh "racc #{t.source} -o #{t.name} -O"
 end
-# vim: syntax=Ruby

data/lib/parser.rb ADDED

@@ -0,0 +1,4 @@
+module Parser
+  require 'parser/static_environment'
+  require 'parser/lexer'
+end

data/lib/parser/lexer.rl ADDED

@@ -0,0 +1,1713 @@
+%%machine lex; # % fix highlighting
+#
+# === BEFORE YOU START ===
+#
+# Remember two things about Ragel scanners:
+#
+#   1) Longest match wins.
+#
+#   2) If two matches have the same length, the first
+#      in source code wins.
+#
+# General rules of making Ragel and Bison happy:
+#
+#  * `p` (position) and `@te` contain the index of the character
+#    they're pointing to ("current"), plus one. `@ts` contains the index
+#    of the corresponding character. The code for extracting matched token is:
+#
+#       @source[@ts...@te]
+#
+#  * If your input is `foooooooobar` and the rule is:
+#
+#       'f' 'o'+
+#
+#    the result will be:
+#
+#       foooooooobar
+#       ^ ts=0   ^ p=te=9
+#
+#  * A Ragel lexer action should not emit more than one token, unless
+#    you know what you are doing.
+#
+#  * All Ragel commands (fnext, fgoto, ...) end with a semicolon.
+#
+#  * If an action emits the token and transitions to another state, use
+#    these Ragel commands:
+#
+#       emit($whatever)
+#       fnext $next_state; fbreak;
+#
+#  * If an action does not emit a token:
+#
+#       fgoto $next_state;
+#
+#  * If an action features lookbehind, i.e. matches characters with the
+#    intent of passing them to another action:
+#
+#       p = @ts - 1
+#       fgoto $next_state;
+#
+#    or, if the lookbehind consists of a single character:
+#
+#       fhold; fgoto $next_state;
+#
+#  * Ragel merges actions. So, if you have `e_lparen = '(' %act` and
+#    `c_lparen = '('` and a lexer action `e_lparen | c_lparen`, the result
+#    _will_ invoke the action `act`.
+#
+#  * EOF is explicit and is matched by `c_eof`. If you want to introspect
+#    the state of the lexer, add this rule to the state:
+#
+#       c_eof => do_eof;
+#
+#  * If you proceed past EOF, the lexer will complain:
+#
+#       NoMethodError: undefined method `ord' for nil:NilClass
+#
+require 'parser/lexer_literal'
+require 'parser/syntax_error'
+class Parser::Lexer
+  %% write data nofinal;
+  # %
+  attr_reader   :source
+  attr_accessor :static_env
+  attr_reader   :location, :comments
+  def initialize(version)
+    @version = version
+    reset
+  end
+  def reset(reset_state=true)
+    if reset_state
+      # Unit tests set state prior to resetting lexer.
+      @cs  = self.class.lex_en_line_begin
+    end
+    # Ragel-internal variables:
+    @p     = 0   # stream position (saved manually in #advance)
+    @ts    = nil # token start
+    @te    = nil # token end
+    @act   = 0   # next action
+    @stack = []  # state stack
+    @top   = 0   # state stack top pointer
+    @token_queue   = []
+    @literal_stack = []
+    @newlines      = [0] # sorted set of \n positions
+    @newline_s     = nil # location of last encountered newline
+    @location      = nil # location of last #advance'd token
+    @comments      = ""  # collected comments
+    @num_base      = nil # last numeric base
+    @num_digits_s  = nil # starting position of numeric digits
+    @escape_s      = nil # starting position of current sequence
+    @escape        = nil # last escaped sequence, as string
+    # See below the section on parsing heredocs.
+    @heredoc_e     = nil
+    @herebody_s    = nil
+    # Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
+    # encountered after a matching closing parenthesis.
+    @paren_nest    = 0
+    @lambda_stack  = []
+  end
+  def source=(source)
+    # Heredoc processing coupled with weird newline quirks
+    # require three '\0' (EOF) chars to be appended; after
+    # `p = @heredoc_s`, if `p` points at EOF, the FSM could
+    # not bail out early enough and will crash.
+    #
+    # Patches accepted.
+    #
+    @source = source.gsub(/\r\n/, "\n") + "\0\0\0"
+  end
+  LEX_STATES = {
+    :line_begin  => lex_en_line_begin,
+    :expr_beg    => lex_en_expr_beg,
+    :expr_value  => lex_en_expr_value,
+    :expr_mid    => lex_en_expr_mid,
+    :expr_dot    => lex_en_expr_dot,
+    :expr_fname  => lex_en_expr_fname,
+    :expr_end    => lex_en_expr_end,
+    :expr_arg    => lex_en_expr_arg,
+    :expr_endarg => lex_en_expr_endarg,
+  }
+  def state
+    LEX_STATES.invert.fetch(@cs, @cs)
+  end
+  def state=(state)
+    @cs = LEX_STATES.fetch(state)
+  end
+  # Return next token: [type, value].
+  def advance
+    if @token_queue.any?
+      return with_location(@token_queue.shift)
+    end
+    # Ugly, but dependent on Ragel output. Consider refactoring it somehow.
+    _lex_trans_keys         = self.class.send :_lex_trans_keys
+    _lex_actions            = self.class.send :_lex_actions
+    _lex_key_offsets        = self.class.send :_lex_key_offsets
+    _lex_index_offsets      = self.class.send :_lex_index_offsets
+    _lex_single_lengths     = self.class.send :_lex_single_lengths
+    _lex_range_lengths      = self.class.send :_lex_range_lengths
+    _lex_indicies           = self.class.send :_lex_indicies
+    _lex_trans_targs        = self.class.send :_lex_trans_targs
+    _lex_trans_actions      = self.class.send :_lex_trans_actions
+    _lex_to_state_actions   = self.class.send :_lex_to_state_actions
+    _lex_from_state_actions = self.class.send :_lex_from_state_actions
+    p, pe, eof = @p, @source.length + 1, nil
+    %% write exec;
+    # %
+    @p = p
+    if @token_queue.any?
+      with_location(@token_queue.shift)
+    elsif @cs == self.class.lex_error
+      with_location([ false, '$undefined', p, p + 1 ])
+    else
+      with_location([ false, '$end',       p, p + 1 ])
+    end
+  end
+  # Like #advance, but also pretty-print the token and its position
+  # in the stream to `stdout`.
+  def advance_and_decorate
+    type, val = advance
+    puts decorate(location, "\e[0;32m#{type} #{val.inspect}\e[0m")
+    [type, val]
+  end
+  # Return the current collected comment block and clear the storage.
+  def clear_comments
+    comments  = @comments
+    @comments = ""
+    comments
+  end
+  # Lex `str` for the Ruby version `version` with initial state `state`.
+  #
+  # The tokens displayed by this function are not the same as tokens
+  # consumed by parser, because the parser manipulates lexer state on
+  # its own.
+  def self.do(source, state=nil, version=19)
+    lex = new(version)
+    lex.source = source
+    lex.state  = state if state
+    loop do
+      type, val = lex.advance_and_decorate
+      break if !type
+    end
+    puts "Lex state: #{lex.state}"
+  end
+  # Used by LexerLiteral to emit tokens for string content.
+  def emit(type, value = tok, s = @ts, e = @te)
+    if s.nil? || e.nil?
+      raise "broken #emit invocation in #{caller[0]}"
+    end
+    @token_queue << [ type, value, s, e ]
+  end
+  def emit_table(table, s = @ts, e = @te)
+    token = tok(s, e)
+    emit(table[token], token, s, e)
+  end
+  # shim
+  def lineno
+    @location[0] + 1
+  end
+  protected
+  def eof_char?(char)
+    [0x04, 0x1a, 0x00].include? char.ord
+  end
+  def ruby18?
+    @version == 18
+  end
+  def ruby19?
+    @version == 19
+  end
+  def tok(s = @ts, e = @te)
+    @source[s...e]
+  end
+  def record_newline(p)
+    @newlines = (@newlines + [p]).uniq.sort
+  end
+  def dissect_location(start, finish)
+    line_number    = @newlines.rindex { |nl| start >= nl }
+    line_first_col = @newlines[line_number]
+    start_col   = start  - line_first_col
+    finish_col  = finish - line_first_col
+    [ line_number, start_col, finish_col ]
+  end
+  def with_location(item)
+    type, value, start, finish = *item
+    @location = dissect_location(start, finish)
+    [ type, value ]
+  end
+  def decorate(location, message="")
+    line_number, from, to = location
+    line = @source.lines.drop(line_number).first
+    line[from...to] = "\e[4m#{line[from...to]}\e[0m"
+    tail_len   = to - from - 1
+    tail       = "~" * (tail_len >= 0 ? tail_len : 0)
+    decoration =  "#{" " * from}\e[1;31m^#{tail}\e[0m #{message}"
+    [ line, decoration ]
+  end
+  def warning(message, start = @ts, finish = @te)
+    $stderr.puts "warning: #{message}"
+    $stderr.puts decorate(dissect_location(start, finish))
+  end
+  def error(message)
+    raise Parser::SyntaxError, message
+  end
+  #
+  # === LITERAL STACK ===
+  #
+  def push_literal(*args)
+    new_literal = Parser::LexerLiteral.new(self, *args)
+    @literal_stack.push(new_literal)
+    if    new_literal.type == :tWORDS_BEG
+      self.class.lex_en_interp_words
+    elsif new_literal.type == :tQWORDS_BEG
+      self.class.lex_en_plain_words
+    elsif new_literal.interpolate?
+      self.class.lex_en_interp_string
+    else
+      self.class.lex_en_plain_string
+    end
+  end
+  def literal
+    @literal_stack[-1]
+  end
+  def pop_literal
+    old_literal = @literal_stack.pop
+    if old_literal.type == :tREGEXP_BEG
+      # Fetch modifiers.
+      self.class.lex_en_regexp_modifiers
+    else
+      self.class.lex_en_expr_end
+    end
+  end
+  # Mapping of strings to parser tokens.
+  PUNCTUATION = {
+    '='   => :tEQL,     '&'   => :tAMPER2,  '|'   => :tPIPE,
+    '!'   => :tBANG,    '^'   => :tCARET,   '+'   => :tPLUS,
+    '-'   => :tMINUS,   '*'   => :tSTAR2,   '/'   => :tDIVIDE,
+    '%'   => :tPERCENT, '~'   => :tTILDE,   ','   => :tCOMMA,
+    ';'   => :tSEMI,    '.'   => :tDOT,     '..'  => :tDOT2,
+    '...' => :tDOT3,    '['   => :tLBRACK2, ']'   => :tRBRACK,
+    '('   => :tLPAREN2, ')'   => :tRPAREN,  '?'   => :tEH,
+    ':'   => :tCOLON,   '&&'  => :tANDOP,   '||'  => :tOROP,
+    '-@'  => :tUMINUS,  '+@'  => :tUPLUS,   '~@'  => :tTILDE,
+    '**'  => :tPOW,     '->'  => :tLAMBDA,  '=~'  => :tMATCH,
+    '!~'  => :tNMATCH,  '=='  => :tEQ,      '!='  => :tNEQ,
+    '>'   => :tGT,      '>>'  => :tRSHFT,   '>='  => :tGEQ,
+    '<'   => :tLT,      '<<'  => :tLSHFT,   '<='  => :tLEQ,
+    '=>'  => :tASSOC,   '::'  => :tCOLON2,  '===' => :tEQQ,
+    '<=>' => :tCMP,     '[]'  => :tAREF,    '[]=' => :tASET,
+    '{'   => :tLCURLY,  '}'   => :tRCURLY,  '`'   => :tBACK_REF2,
+    'do'  => :kDO
+  }
+  PUNCTUATION_BEGIN = {
+    '&'   => :tAMPER,   '*'   => :tSTAR,   '+'   => :tUPLUS,
+    '-'   => :tUMINUS,  '::'  => :tCOLON3, '('   => :tLPAREN,
+    '{'   => :tLBRACE,  '['   => :tLBRACK,
+  }
+  KEYWORDS = {
+    'if'     => :kIF_MOD,      'unless'   => :kUNLESS_MOD,
+    'while'  => :kWHILE_MOD,   'until'    => :kUNTIL_MOD,
+    'rescue' => :kRESCUE_MOD,  'defined?' => :kDEFINED,
+    'BEGIN'  => :klBEGIN,      'END'      => :klEND,
+  }
+  %w(class module def undef begin end then elsif else ensure case when
+     for break next redo retry in do return yield super self nil true
+     false and or not alias __FILE__ __LINE__ __ENCODING__).each do |keyword|
+    KEYWORDS[keyword] = :"k#{keyword.upcase}"
+  end
+  KEYWORDS_BEGIN = {
+    'if'     => :kIF,          'unless' => :kUNLESS,
+    'while'  => :kWHILE,       'until'  => :kUNTIL,
+    'rescue' => :kRESCUE
+  }
+  %%{
+  # %
+  access @;
+  getkey @source[p].ord;
+  # === CHARACTER CLASSES ===
+  #
+  # Pay close attention to the differences between c_any and any.
+  # c_any does not include EOF and so will cause incorrect behavior
+  # for machine subtraction (any-except rules) and default transitions
+  # for scanners.
+  action do_nl {
+    # Record position of a newline for precise line and column reporting.
+    #
+    # This action is embedded directly into c_nl, as it is idempotent and
+    # there are no cases when we need to skip it.
+    record_newline(p + 1)
+    @newline_s = p
+  }
+  c_nl       = '\n' $ do_nl;
+  c_space    = [ \t\r\f\v];
+  c_space_nl = c_space | c_nl;
+  c_eof      = 0x04 | 0x1a | 0; # ^D, ^Z, EOF
+  c_eol      = c_nl | c_eof;
+  c_any      = any - c_eof - zlen;
+  c_line     = c_any - c_nl;
+  c_unicode  = c_any - 0x00..0x7f;
+  c_lower    = [a-z_]  | c_unicode;
+  c_upper    = [A-Z]   | c_unicode;
+  c_alpha    = c_lower | c_upper;
+  c_alnum    = c_alpha | [0-9];
+  action do_eof {
+    # Sit at EOF indefinitely. #advance would return $eof each time.
+    # This allows to feed the lexer more data if needed; this is only used
+    # in tests.
+    #
+    # Note that this action is not embedded into e_eof like e_nl and e_bs
+    # below. This is due to the fact that scanner state at EOF is observed
+    # by tests, and encapsulating it in a rule would break the introspection.
+    fhold; fbreak;
+  }
+  #
+  # === TOKEN DEFINITIONS ===
+  #
+  # All operators are punctuation. There is more to punctuation
+  # than just operators. Operators can be overridden by user;
+  # punctuation can not.
+  # A list of operators which are valid in the function name context, but
+  # have different semantics in others.
+  operator_fname      = '[]' | '[]=' | '`'  | '-@' | '+@' | '~@' ;
+  # A list of operators which can occur within an assignment shortcut (+ → +=).
+  operator_arithmetic = '&'  | '|'   | '&&' | '||' | '^'  | '+'   | '-'  |
+                        '*'  | '/'   | '**' | '~'  | '**' | '<<'  | '>>' |
+                        '%'  ;
+  # A list of all user-definable operators not covered by groups above.
+  operator_rest       = '=~' | '!~' | '==' | '!=' | '!'   | '===' |
+                        '<'  | '<=' | '>'  | '>=' | '<=>' | '=>'  ;
+  # Note that `{` and `}` need to be referred to as e_lbrace and e_rbrace,
+  # as they are ambiguous with interpolation `#{}` and should be counted.
+  # These braces are not present in punctuation lists.
+  # A list of punctuation which has different meaning when used at the
+  # beginning of expression.
+  punctuation_begin   = '-'  | '+'  | '::' | '('  | '['  | '*'   | '&' ;
+  # A list of all punctuation except punctuation_begin.
+  punctuation_end     = ','  | '='  | '->' | '('  | '['  | ']'   |
+                        '::' | '?'  | ':'  | '.'  | '..' | '...' ;
+  # A list of keywords which have different meaning at the beginning of expression.
+  keyword_modifier    = 'if'     | 'unless' | 'while'  | 'until' | 'rescue' ;
+  # A list of keywords which accept an argument-like expression, i.e. have the
+  # same post-processing as method calls or commands. Example: `yield 1`,
+  # `yield (1)`, `yield(1)`, are interpreted as if `yield` was a function.
+  keyword_with_arg    = 'yield'  | 'super'  | 'not'    | 'defined?' ;
+  # A list of keywords which accept a literal function name as an argument.
+  keyword_with_fname  = 'def'    | 'undef'  | 'alias'  ;
+  # A list of keywords which accept an expression after them.
+  keyword_with_value  = 'else'   | 'case'   | 'ensure' | 'module' | 'elsif' | 'then'  |
+                        'for'    | 'in'     | 'do'     | 'when'   | 'begin' | 'class' |
+                        'and'    | 'or'     ;
+  # A list of keywords which accept a value, and treat the keywords from
+  # `keyword_modifier` list as modifiers.
+  keyword_with_mid    = 'rescue' | 'return' | 'break'  | 'next'   ;
+  # A list of keywords which do not accept an expression after them.
+  keyword_with_end    = 'end'    | 'self'   | 'true'   | 'false'  | 'retry'    |
+                        'redo'   | 'nil'    | 'BEGIN'  | 'END'    | '__FILE__' |
+                        '__LINE__' | '__ENCODING__';
+  # All keywords.
+  keyword             = keyword_with_value | keyword_with_mid |
+                        keyword_with_end   | keyword_with_arg |
+                        keyword_with_fname | keyword_modifier ;
+  constant       = [A-Z] c_alnum*;
+  bareword       = c_alpha c_alnum*;
+  call_or_var    = c_lower c_alnum*;
+  class_var      = '@@' bareword;
+  instance_var   = '@' bareword;
+  global_var     = '$'
+      ( bareword | digit+
+      | [`'+~*$&?!@/\\;,.=:<>"] # `
+      | '-' [A-Za-z0-9_]?
+      )
+  ;
+  # Ruby accepts (and fails on) variables with leading digit
+  # in literal context, but not in unquoted symbol body.
+  class_var_v    = '@@' [0-9]? bareword;
+  instance_var_v = '@' [0-9]? bareword;
+  #
+  # === ESCAPE SEQUENCE PARSING ===
+  #
+  # Escape parsing code is a Ragel pattern, not a scanner, and therefore
+  # it shouldn't directly raise errors or perform other actions with side effects.
+  # In reality this would probably just mess up error reporting in pathological
+  # cases, through.
+  # The amount of code required to parse \M\C stuff correctly is ridiculous.
+  escaped_nl = "\\" c_nl;
+  action unicode_points {
+    @escape = ""
+    codepoints = tok(@escape_s + 2, p - 1)
+    codepoints.split(/[ \t]/).each do |codepoint_str|
+      codepoint = codepoint_str.to_i(16)
+      if codepoint >= 0x110000
+        @escape = lambda { error "invalid Unicode codepoint (too large)" }
+        break
+      end
+      @escape += codepoint.chr(Encoding::UTF_8)
+    end
+  }
+  action unescape_char {
+    @escape = {
+      'a' => "\a", 'b'  => "\b", 'e'  => "\e", 'f' => "\f",
+      'n' => "\n", 'r'  => "\r", 's'  => "\s", 't' => "\t",
+      'v' => "\v", '\\' => "\\"
+    }.fetch(@source[p - 1], @source[p - 1])
+  }
+  action invalid_complex_escape {
+    @escape = lambda { error "invalid escape character syntax" }
+  }
+  action slash_c_char {
+    @escape = (@escape.ord & 0x9f).chr
+  }
+  action slash_m_char {
+    @escape = (@escape.ord | 0x80).chr
+  }
+  maybe_escaped_char = (
+        '\\' c_any      %unescape_char
+    | ( c_any - [\\] )  % { @escape = @source[p - 1] }
+  );
+  maybe_escaped_ctrl_char = ( # why?!
+        '\\' c_any      %unescape_char %slash_c_char
+    |   '?'             % { @escape = "\x7f" }
+    | ( c_any - [\\?] ) % { @escape = @source[p - 1] } %slash_c_char
+  );
+  escape = (
+      # \377
+      [0-7]{1,3}
+      % { @escape = tok(@escape_s, p).to_i(8).chr }
+      # \xff
+    | ( 'x' xdigit{1,2}
+        % { @escape = tok(@escape_s + 1, p).to_i(16).chr }
+      # \u263a
+      | 'u' xdigit{4}
+        % { @escape = tok(@escape_s + 1, p).to_i(16).chr(Encoding::UTF_8) }
+      )
+      # %q[\x]
+    | 'x' ( c_any - xdigit )
+      % { @escape = lambda { error "invalid hex escape" } }
+      # %q[\u123] %q[\u{12]
+    | 'u' ( c_any{0,4}  -
+            xdigit{4}   -          # \u1234 is valid
+            ( '{' xdigit{1,3}      # \u{1 \u{12 \u{123 are valid
+            | '{' xdigit [ \t}]    # \u{1. \u{1} are valid
+            | '{' xdigit{2} [ \t}] # \u{12. \u{12} are valid
+            )
+          )
+      % { @escape = lambda { error "invalid Unicode escape" } }
+      # \u{123 456}
+    | 'u{' ( xdigit{1,6} [ \t] )*
+      ( xdigit{1,6} '}'
+        %unicode_points
+      | ( xdigit* ( c_any - xdigit - '}' )+ '}'
+        | ( c_any - '}' )* c_eof
+        | xdigit{7,}
+        ) % { @escape = lambda { error "unterminated Unicode escape" } }
+      )
+      # \C-\a \cx
+    | ( 'C-' | 'c' ) escaped_nl?
+      maybe_escaped_ctrl_char
+      # \M-a
+    | 'M-' escaped_nl?
+      maybe_escaped_char
+      %slash_m_char
+      # \C-\M-f \M-\cf \c\M-f
+    | ( ( 'C-'   | 'c' ) escaped_nl?   '\\M-'
+      |   'M-\\'         escaped_nl? ( 'C-'   | 'c' ) ) escaped_nl?
+      maybe_escaped_ctrl_char
+      %slash_m_char
+    | 'C' c_any %invalid_complex_escape
+    | 'M' c_any %invalid_complex_escape
+    | ( 'M-\\C' | 'C-\\M' | 'cM' ) c_any %invalid_complex_escape
+    | ( c_any - [0-7xuCMc] ) %unescape_char
+    | c_eof % { error "escape sequence meets end of file" }
+  );
+  # Use rules in form of `e_bs escape' when you need to parse a sequence.
+  e_bs = '\\' % {
+    @escape_s = p
+    @escape   = nil
+  };
+  #
+  # === STRING AND HEREDOC PARSING ===
+  #
+  # Heredoc parsing is quite a complex topic. First, consider that heredocs
+  # can be arbitrarily nested. For example:
+  #
+  #     puts <<CODE
+  #     the result is: #{<<RESULT.inspect
+  #       i am a heredoc
+  #     RESULT
+  #     }
+  #     CODE
+  #
+  # which, incidentally, evaluates to:
+  #
+  #     the result is: "  i am a heredoc\n"
+  #
+  # To parse them, lexer refers to two kinds (remember, nested heredocs)
+  # of positions in the input stream, namely @heredoc_e
+  # (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
+  #
+  # @heredoc_e is simply contained inside the corresponding LexerLiteral, and
+  # when the heredoc is closed, the lexing is restarted from that position.
+  #
+  # @herebody_s is quite more complex. First, @herebody_s changes after each
+  # heredoc line is lexed. This way, at '\n' tok(@herebody_s, @te) always
+  # contains the current line, and also when a heredoc is started, @herebody_s
+  # contains the position from which the heredoc will be lexed.
+  #
+  # Second, as (insanity) there are nested heredocs, we need to maintain a
+  # stack of these positions. Each time #push_literal is called, it saves current
+  # @heredoc_s to literal.saved_herebody_s, and after an interpolation (possibly
+  # containing another heredocs) is closed, the previous value is restored.
+  e_heredoc_nl = c_nl $ {
+    # After every heredoc was parsed, @herebody_s contains the
+    # position of next token after all heredocs.
+    if @herebody_s
+      p = @herebody_s
+      @herebody_s = nil
+    end
+  };
+  action extend_string {
+    if literal.nest_and_try_closing tok, @ts, @te
+      fgoto *pop_literal;
+    else
+      literal.extend_string tok, @ts, @te
+    end
+  }
+  action extend_string_escaped {
+    if literal.nest_and_try_closing('\\', @ts, @ts + 1)
+      # If the literal is actually closed by the backslash,
+      # rewind the input prior to consuming the escape sequence.
+      p = @escape_s - 1
+      fgoto *pop_literal;
+    else
+      # Get the first character after the backslash.
+      escaped_char = @source[@escape_s]
+      if literal.munge_escape? escaped_char
+        # If this particular literal uses this character as an opening
+        # or closing delimiter, it is an escape sequence for that
+        # particular character. Write it without the backslash.
+        if literal.regexp?
+          # Regular expressions should have every escape sequence in its
+          # raw form.
+          literal.extend_string(tok, @ts, @te)
+        else
+          literal.extend_string(escaped_char, @ts, @te)
+        end
+      else
+        # It does not. So this is an actual escape sequence, yay!
+        # Two things to consider here.
+        #
+        # 1. The `escape' rule should be pure and so won't raise any
+        #    errors by itself. Instead, it stores them in lambdas.
+        #
+        # 2. Non-interpolated literals do not go through the aforementioned
+        #    rule. As \\ and \' (and variants) are munged, the full token
+        #    should always be written for such literals.
+        @escape.call if @escape.respond_to? :call
+        if literal.regexp?
+          # Ditto. Also, expand escaped newlines.
+          literal.extend_string(tok.gsub("\\\n", ''), @ts, @te)
+        else
+          literal.extend_string(@escape || tok, @ts, @te)
+        end
+      end
+    end
+  }
+  # Extend a string with a newline or a EOF character.
+  # As heredoc closing line can immediately precede EOF, this action
+  # has to handle such case specially.
+  action extend_string_eol {
+    is_eof = eof_char? @source[p]
+    if literal.heredoc?
+      # Try ending the heredoc with the complete most recently
+      # scanned line. @herebody_s always refers to the start of such line.
+      if literal.nest_and_try_closing(tok(@herebody_s, @te - 1),
+                                      @herebody_s, @te - 1)
+        # Adjust @herebody_s to point to the next line.
+        @herebody_s = @te
+        # Continue regular lexing after the heredoc reference (<<END).
+        p = literal.heredoc_e - 1
+        fgoto *pop_literal;
+      else
+        # Ditto.
+        @herebody_s = @te
+      end
+    end
+    if is_eof
+      error "unterminated string meets end of file"
+    end
+    # A literal newline is appended if the heredoc was _not_ closed
+    # this time. See also LexerLiteral#nest_and_try_closing for rationale of
+    # calling #flush_string here.
+    literal.extend_string tok, @ts, @te
+    literal.flush_string
+  }
+  #
+  # === INTERPOLATION PARSING ===
+  #
+  # Interpolations with immediate variable names simply call into
+  # the corresponding machine.
+  interp_var =
+      '#' ( global_var | class_var_v | instance_var_v );
+  action extend_interp_var {
+    literal.flush_string
+    emit(:tSTRING_DVAR, nil, @ts, @ts + 1)
+    p = @ts
+    fcall expr_variable;
+  }
+  # Interpolations with code blocks must match nested curly braces, as
+  # interpolation ending is ambiguous with a block ending. So, every
+  # opening and closing brace should be matched with e_[lr]brace rules,
+  # which automatically perform the counting.
+  #
+  # Note that interpolations can themselves be nested, so brace balance
+  # is tied to the innermost literal.
+  #
+  # Also note that literals themselves should not use e_[lr]brace rules
+  # when matching their opening and closing delimiters, as the amount of
+  # braces inside the characters of a string literal is independent.
+  interp_code = '#{';
+  e_lbrace = '{' % {
+    if literal
+      literal.start_interp_brace
+    end
+  };
+  e_rbrace = '}' % {
+    if literal
+      if literal.end_interp_brace_and_try_closing
+        emit(:tRCURLY, '}')
+        if literal.words?
+          emit(:tSPACE, nil)
+        end
+        if literal.saved_herebody_s
+          @herebody_s = literal.saved_herebody_s
+        end
+        fhold;
+        fnext *@stack.pop;
+        fbreak;
+      end
+    end
+  };
+  action extend_interp_code {
+    literal.flush_string
+    emit(:tSTRING_DBEG, '#{')
+    literal.saved_herebody_s = @herebody_s
+    @herebody_s = nil
+    literal.start_interp_brace
+    fcall expr_beg;
+  }
+  # Actual string parsers are simply combined from the primitives defined
+  # above.
+  interp_words := |*
+      interp_code => extend_interp_code;
+      interp_var  => extend_interp_var;
+      e_bs escape => extend_string_escaped;
+      c_space_nl  => { literal.flush_string };
+      c_eol       => extend_string_eol;
+      c_any       => extend_string;
+  *|;
+  interp_string := |*
+      interp_code => extend_interp_code;
+      interp_var  => extend_interp_var;
+      e_bs escape => extend_string_escaped;
+      c_eol       => extend_string_eol;
+      c_any       => extend_string;
+  *|;
+  plain_words := |*
+      e_bs c_any  => extend_string_escaped;
+      c_space_nl  => { literal.flush_string };
+      c_eol       => extend_string_eol;
+      c_any       => extend_string;
+  *|;
+  plain_string := |*
+      e_bs c_any  => extend_string_escaped;
+      c_eol       => extend_string_eol;
+      c_any       => extend_string;
+  *|;
+  regexp_modifiers := |*
+      [A-Za-z]+
+      => {
+        unknown_options = tok.scan(/[^imxouesn]/)
+        if unknown_options.any?
+          error "unknown regexp options: #{unknown_options.join}"
+        end
+        emit(:tREGEXP_OPT)
+        fgoto expr_end;
+      };
+      any
+      => {
+        emit(:tREGEXP_OPT, tok(@ts, @te - 1), @ts, @te - 1)
+        fhold; fgoto expr_end;
+      };
+  *|;
+  #
+  # === EXPRESSION PARSING ===
+  #
+  # These rules implement a form of manually defined lookahead.
+  # The default longest-match scanning does not work here due
+  # to sheer ambiguity.
+  ambiguous_ident_suffix =     # actual    parsed
+      [?!=] %{ tm = p } |      # a?        a?
+      '=='  %{ tm = p - 2 } |  # a==b      a == b
+      '=~'  %{ tm = p - 2 } |  # a=~b      a =~ b
+      '=>'  %{ tm = p - 2 } |  # a=>b      a => b
+      '===' %{ tm = p - 3 }    # a===b     a === b
+  ;
+  ambiguous_symbol_suffix =    # actual    parsed
+      ambiguous_ident_suffix |
+      '==>' %{ tm = p - 2 }    # :a==>b    :a= => b
+  ;
+  # Ambiguous with 1.9 hash labels.
+  ambiguous_const_suffix =     # actual    parsed
+      '::'  %{ tm = p - 2 }    # A::B      A :: B
+  ;
+  # Ruby 1.9 lambdas require parentheses counting in order to
+  # emit correct opening kDO/tLBRACE.
+  e_lparen = '(' % {
+      @paren_nest += 1
+  };
+  e_rparen = ')' % {
+      @paren_nest -= 1
+  };
+  # Variable lexing code is accessed from both expressions and
+  # string interpolation related code.
+  #
+  expr_variable := |*
+      global_var
+      => {
+        if    tok =~ /^\$([1-9][0-9]*)$/
+          emit(:tNTH_REF, $1.to_i)
+        elsif tok =~ /^\$([&`'+])$/
+          emit(:tBACK_REF, $1.to_sym)
+        else
+          emit(:tGVAR)
+        end
+        fnext *@stack.pop; fbreak;
+      };
+      class_var_v
+      => {
+        error "`#{tok}' is not allowed as a class variable name" if tok =~ /^@@[0-9]/
+        emit(:tCVAR)
+        fnext *@stack.pop; fbreak;
+      };
+      instance_var_v
+      => {
+        error "`#{tok}' is not allowed as an instance variable name" if tok =~ /^@[0-9]/
+        emit(:tIVAR)
+        fnext *@stack.pop; fbreak;
+      };
+  *|;
+  # Literal function name in definition (e.g. `def class`).
+  # Keywords are returned as their respective tokens; this is used
+  # to support singleton def `def self.foo`. Global variables are
+  # returned as `tGVAR`; this is used in global variable alias
+  # statements `alias $a $b`. Symbols are returned verbatim; this
+  # is used in `alias :a :"b#{foo}"` and `undef :a`.
+  #
+  # Transitions to `expr_end` afterwards.
+  #
+  expr_fname := |*
+      keyword
+      => { emit(KEYWORDS[tok]);
+           fnext expr_end; fbreak; };
+      bareword
+      => { emit(:tIDENTIFIER)
+           fnext expr_end; fbreak; };
+      bareword ambiguous_ident_suffix
+      => { emit(:tIDENTIFIER, tok(@ts, tm), @ts, tm)
+           fnext expr_end; p = tm - 1; fbreak; };
+      operator_fname      |
+      operator_arithmetic |
+      operator_rest
+      => { emit_table(PUNCTUATION)
+           fnext expr_end; fbreak; };
+      ':'
+      => { fhold; fgoto expr_end; };
+      global_var
+      => { emit(:tGVAR)
+           fbreak; };
+      c_space_nl+;
+      c_any
+      => { fhold; fgoto expr_end; };
+      c_eof => do_eof;
+  *|;
+  # Literal function name in method call (e.g. `a.class`).
+  #
+  # Transitions to `expr_arg` afterwards.
+  #
+  expr_dot := |*
+      bareword
+      => { emit(:tIDENTIFIER)
+           fnext expr_arg; fbreak; };
+      bareword ambiguous_ident_suffix
+      => { emit(:tIDENTIFIER, tok(@ts, tm), @ts, tm)
+           fnext expr_arg; p = tm - 1; fbreak; };
+      operator_fname      |
+      operator_arithmetic |
+      operator_rest
+      => { emit_table(PUNCTUATION)
+           fnext expr_arg; fbreak; };
+      c_space_nl+;
+      c_any
+      => { fhold; fgoto expr_end; };
+      c_eof => do_eof;
+  *|;
+  # The previous token emitted was a `tIDENTIFIER` or `tFID`; no space
+  # is consumed; the current expression is a command or method call.
+  #
+  expr_arg := |*
+      #
+      # COMMAND MODE SPECIFIC TOKENS
+      #
+      # cmd (1 + 2)
+      # See below the rationale about expr_endarg.
+      c_space+ e_lparen
+      => { emit(:tLPAREN_ARG, '(', @te - 1, @te)
+           fnext expr_beg; fbreak; };
+      # meth(1 + 2)
+      # Regular method call.
+      e_lparen
+      => { emit(:tLPAREN2)
+           fnext expr_beg; fbreak; };
+      # meth [...]
+      # Array argument. Compare with indexing `meth[...]`.
+      c_space+ '['
+      => { emit(:tLBRACK, '[', @te - 1, @te);
+           fnext expr_beg; fbreak; };
+      # cmd {}
+      # Command: method call without parentheses.
+      c_space* e_lbrace
+      => {
+        if @lambda_stack.last == @paren_nest
+          p = @ts - 1
+          fgoto expr_end;
+        else
+          emit(:tLCURLY, '{', @te - 1, @te)
+          fnext expr_value; fbreak;
+        end
+      };
+      # a.b
+      # Dot-call.
+      '.' | '::'
+      => { emit_table(PUNCTUATION);
+           fnext expr_dot; fbreak; };
+      #
+      # AMBIGUOUS TOKENS RESOLVED VIA EXPR_BEG
+      #
+      # a ?b
+      # Character literal.
+      c_space+ '?'
+      => { fhold; fgoto expr_beg; };
+      # x +1
+      # Ambiguous unary operator or regexp literal.
+      c_space+ [+\-/]
+      => {
+        warning "ambiguous first argument; put parentheses or even spaces", @te - 1, @te
+        fhold; fhold; fgoto expr_beg;
+      };
+      # x *1
+      # Ambiguous splat or block-pass.
+      c_space+ [*&]
+      => {
+        what = tok(@te - 1, @te)
+        warning "`#{what}' interpreted as argument prefix", @te - 1, @te
+        fhold; fgoto expr_beg;
+      };
+      #
+      # AMBIGUOUS TOKENS RESOLVED VIA EXPR_END
+      #
+      # a ? b
+      # Ternary operator.
+      c_space+ '?' c_space_nl
+      => { fhold; fhold; fgoto expr_end; };
+      # x + 1: Binary operator or operator-assignment.
+      c_space* operator_arithmetic
+                  ( '=' | c_space_nl )?    |
+      # x rescue y: Modifier keyword.
+      c_space+ keyword_modifier            |
+      # Miscellanea.
+      c_space* punctuation_end
+      => {
+        p = @ts - 1
+        fgoto expr_end;
+      };
+      c_space* c_nl
+      => { fhold; fgoto expr_end; };
+      c_any
+      => { fhold; fgoto expr_beg; };
+      c_eof => do_eof;
+  *|;
+  # The rationale for this state is pretty complex. Normally, if an argument
+  # is passed to a command and then there is a block (tLCURLY...tRCURLY),
+  # the block is attached to the innermost argument (`f` in `m f {}`), or it
+  # is a parse error (`m 1 {}`). But there is a special case for passing a single
+  # primary expression grouped with parentheses: if you write `m (1) {}` or
+  # (2.0 only) `m () {}`, then the block is attached to `m`.
+  #
+  # Thus, we recognize the opening `(` of a command (remember, a command is
+  # a method call without parens) as a tLPAREN_ARG; then, in parser, we recognize
+  # `tLPAREN_ARG expr rparen` as a `primary_expr` and before rparen, set the
+  # lexer's state to `expr_endarg`, which makes it emit the possibly following
+  # `{` as `tLBRACE_ARG`.
+  #
+  # The default post-`expr_endarg` state is `expr_end`, so this state also handles
+  # `do` (as `kDO_BLOCK` in `expr_beg`). (I have no clue why the parser cannot
+  # just handle `kDO`.)
+  expr_endarg := |*
+      e_lbrace
+      => { emit(:tLBRACE_ARG)
+           fnext expr_value; };
+      'do'
+      => { emit(:kDO_BLOCK)
+           fnext expr_value; };
+      c_space*;
+      c_any
+      => { fhold; fgoto expr_end; };
+      c_eof => do_eof;
+  *|;
+  # The rationale for this state is that several keywords accept value
+  # (i.e. should transition to `expr_beg`), do not accept it like a command
+  # (i.e. not an `expr_arg`), and must behave like a statement, that is,
+  # accept a modifier if/while/etc.
+  #
+  expr_mid := |*
+      keyword_modifier
+      => { emit_table(KEYWORDS)
+           fnext expr_beg; fbreak; };
+      c_space+;
+      c_nl
+      => { fhold; fgoto expr_end; };
+      c_any
+      => { fhold; fgoto expr_beg; };
+      c_eof => do_eof;
+  *|;
+  # Beginning of an expression.
+  #
+  # Don't fallthrough to this state from `c_any`; make sure to handle
+  # `c_space* c_nl` and let `expr_end` handle the newline.
+  # Otherwise code like `f\ndef x` gets glued together and the parser
+  # explodes.
+  #
+  expr_beg := |*
+      # Numeric processing. Converts:
+      #   +5 to [tINTEGER, 5]
+      #   -5 to [tUMINUS_NUM] [tINTEGER, 5]
+      [+\-][0-9]
+      => {
+        fhold;
+        if tok.start_with? '-'
+          emit(:tUMINUS_NUM, '-')
+          fnext expr_end; fbreak;
+        end
+      };
+      # splat *a
+      '*'
+      => { emit(:tSTAR)
+           fbreak; };
+      #
+      # STRING AND REGEXP LITERALS
+      #
+      # a / 42
+      # a % 42
+      # a %= 42 (disambiguation with %=string=)
+      [/%] c_space_nl | '%=' # /
+      => {
+        fhold; fhold;
+        fgoto expr_end;
+      };
+      # /regexp/oui
+      '/'
+      => {
+        type, delimiter = tok, tok
+        fgoto *push_literal(type, delimiter, @ts);
+      };
+      # %<string>
+      '%' ( c_any - [A-Za-z] )
+      => {
+        type, delimiter = tok[0], tok[-1]
+        fgoto *push_literal(type, delimiter, @ts);
+      };
+      # %w(we are the people)
+      '%' [A-Za-z]+ c_any
+      => {
+        type, delimiter = tok[0..-2], tok[-1]
+        fgoto *push_literal(type, delimiter, @ts);
+      };
+      '%' c_eof
+      => {
+        error "unterminated string meets end of file"
+      };
+      # Heredoc start.
+      # <<EOF | <<-END | <<"FOOBAR" | <<-`SMTH`
+      '<<' '-'?
+        ( '"' ( c_any - c_nl - '"' )* '"'
+        | "'" ( c_any - c_nl - "'" )* "'"
+        | "`" ( c_any - c_nl - "`" )* "`"
+        | bareword )           % { @heredoc_e     = p }
+        ( c_any - c_nl )* c_nl % { new_herebody_s = p }
+      => {
+        tok(@ts, @heredoc_e) =~ /^<<(-?)(["'`]?)(.*)\2$/
+        indent    = !$1.empty?
+        type      =  $2.empty? ? '"' : $2
+        delimiter =  $3
+        fnext *push_literal(type, delimiter, @ts, @heredoc_e, indent);
+        if @herebody_s.nil?
+          @herebody_s = new_herebody_s
+        end
+        p = @herebody_s - 1
+      };
+      #
+      # AMBIGUOUS TERNARY OPERATOR
+      #
+      '?' ( e_bs escape
+          | c_any - c_space_nl - e_bs % { @escape = nil }
+          )
+      => {
+        # Show an error if memorized.
+        @escape.call if @escape.respond_to? :call
+        value = @escape || tok(@ts + 1)
+        if ruby18?
+          emit(:tINTEGER, value.ord)
+        else
+          emit(:tSTRING, value)
+        end
+        fbreak;
+      };
+      '?' c_space_nl
+      => {
+        escape = { " "  => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
+                   "\v" => '\v', "\f" => '\f' }[tok[@ts + 1]]
+        warning "invalid character syntax; use ?#{escape}", @ts
+        p = @ts - 1
+        fgoto expr_end;
+      };
+      '?' c_eof
+      => {
+        error "incomplete character syntax"
+      };
+      # f ?aa : b: Disambiguate with a character literal.
+      '?' [A-Za-z_] bareword
+      => {
+        p = @ts - 1
+        fgoto expr_end;
+      };
+      #
+      # KEYWORDS AND PUNCTUATION
+      #
+      # a(+b)
+      punctuation_begin |
+      # a({b=>c})
+      e_lbrace          |
+      # a()
+      e_lparen
+      => { emit_table(PUNCTUATION_BEGIN)
+           fbreak; };
+      # rescue Exception => e: Block rescue.
+      # Special because it should transition to expr_mid.
+      'rescue'
+      => { emit_table(KEYWORDS_BEGIN)
+           fnext expr_mid; fbreak; };
+      # if a: Statement if.
+      keyword_modifier
+      => { emit_table(KEYWORDS_BEGIN)
+           fnext expr_value; fbreak; };
+      #
+      # RUBY 1.9 HASH LABELS
+      #
+      bareword ':' ( c_any - ':' )
+      => {
+        fhold;
+        if ruby18?
+          emit(:tIDENTIFIER, tok(@ts, @te - 2), @ts, @te - 2)
+          fhold; # continue as a symbol
+        else
+          emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
+        end
+        fbreak;
+      };
+      #
+      # CONTEXT-DEPENDENT VARIABLE LOOKUP OR COMMAND INVOCATION
+      #
+      # foo= bar:  Disambiguate with bareword rule below.
+      bareword ambiguous_ident_suffix |
+      # def foo:   Disambiguate with bareword rule below.
+      keyword
+      => { p = @ts - 1
+           fgoto expr_end; };
+      # a = 42;     a [42]: Indexing.
+      # def a; end; a [42]: Array argument.
+      call_or_var
+      => {
+        emit(:tIDENTIFIER)
+        if @static_env && @static_env.declared?(tok.to_sym)
+          fgoto expr_end;
+        else
+          fgoto expr_arg;
+        end
+      };
+      c_space_nl+;
+      # The following rules match most binary and all unary operators.
+      # Rules for binary operators provide better error reporting.
+      operator_arithmetic '='    |
+      operator_rest              |
+      punctuation_end            |
+      c_any
+      => { p = @ts - 1; fgoto expr_end; };
+      c_eof => do_eof;
+  *|;
+  # Like expr_beg, but no 1.9 label possible.
+  #
+  expr_value := |*
+      # a:b: a(:b), a::B, A::B
+      bareword ':'
+      => { p = @ts - 1
+           fgoto expr_end; };
+      c_space_nl+;
+      c_any
+      => { fhold; fgoto expr_beg; };
+      c_eof => do_eof;
+  *|;
+  expr_end := |*
+      #
+      # STABBY LAMBDA
+      #
+      '->'
+      => {
+        emit_table(PUNCTUATION)
+        @lambda_stack.push @paren_nest
+        fbreak;
+      };
+      e_lbrace | 'do'
+      => {
+        if @lambda_stack.last == @paren_nest
+          @lambda_stack.pop
+          if tok == '{'
+            emit(:tLAMBEG)
+          else
+            emit(:kDO_LAMBDA)
+          end
+        else
+          emit_table(PUNCTUATION)
+        end
+        fnext expr_value; fbreak;
+      };
+      #
+      # KEYWORDS
+      #
+      keyword_with_fname
+      => { emit_table(KEYWORDS)
+           fnext expr_fname; fbreak; };
+      'class' c_space_nl '<<'
+      => { emit(:kCLASS, 'class', @ts, @ts + 5)
+           emit(:tLSHFT, '<<',    @te - 2, @te)
+           fnext expr_beg; fbreak; };
+      # a if b:c: Syntax error.
+      keyword_modifier
+      => { emit_table(KEYWORDS)
+           fnext expr_beg; fbreak; };
+      # elsif b:c: elsif b(:c)
+      keyword_with_value
+      => { emit_table(KEYWORDS)
+           fnext expr_value; fbreak; };
+      keyword_with_mid
+      => { emit_table(KEYWORDS)
+           fnext expr_mid; fbreak; };
+      keyword_with_arg
+      => {
+        emit_table(KEYWORDS)
+        if ruby18? && tok == 'not'
+          fnext expr_beg; fbreak;
+        else
+          fnext expr_arg; fbreak;
+        end
+      };
+      keyword_with_end
+      => { emit_table(KEYWORDS)
+           fbreak; };
+      #
+      # NUMERIC LITERALS
+      #
+      ( '0' [Xx]  %{ @num_base = 16; @num_digits_s = p }
+               ( xdigit+ '_' )* xdigit* '_'?
+      | '0' [Dd]  %{ @num_base = 10; @num_digits_s = p }
+               ( digit+ '_' )* digit* '_'?
+      | '0' [Oo]  %{ @num_base = 8;  @num_digits_s = p }
+               ( digit+ '_' )* digit* '_'?
+      | '0' [Bb]  %{ @num_base = 2;  @num_digits_s = p }
+               ( [01]+ '_' )* [01]* '_'?
+      | [1-9]     %{ @num_base = 10; @num_digits_s = @ts }
+               ( '_' digit+ )* digit* '_'?
+      | '0'       %{ @num_base = 8;  @num_digits_s = @ts }
+               ( '_' digit+ )* digit* '_'?
+      )
+      => {
+        digits = tok(@num_digits_s)
+        if digits.end_with? '_'
+          error "trailing `_' in number"
+        elsif digits.empty? && @num_base == 8 && ruby18?
+          # 1.8 did not raise an error on 0o.
+          digits = "0"
+        elsif digits.empty?
+          error "numeric literal without digits"
+        elsif @num_base == 8 && digits =~ /[89]/
+          error "invalid octal digit"
+        end
+        emit(:tINTEGER, digits.to_i(@num_base))
+        fbreak;
+      };
+      # Floating point literals cannot start with 0 except when a dot
+      # follows immediately, probably to avoid confusion with octal literals.
+      ( [1-9] [0-9]* ( '_' digit+ )* |
+        '0'
+      )?
+      (
+          '.' ( digit+ '_' )* digit+ |
+        ( '.' ( digit+ '_' )* digit+ )? [eE] [+\-]? ( digit+ '_' )* digit+
+      )
+      => {
+        if tok.start_with? '.'
+          error "no .<digit> floating literal anymore; put 0 before dot"
+        elsif tok =~ /^[eE]/
+          # The rule above allows to specify floats as just `e10', which is
+          # certainly not a float. Send a patch if you can do this better.
+          emit(:tIDENTIFIER, tok)
+          fbreak;
+        end
+        emit(:tFLOAT, tok.to_f)
+        fbreak;
+      };
+      #
+      # SYMBOL LITERALS
+      #
+      # `echo foo` | :"bar" | :'baz'
+      '`' | ':'? ['"] # '
+      => {
+        type, delimiter = tok, tok[-1]
+        fgoto *push_literal(type, delimiter, @ts);
+      };
+      ':' bareword ambiguous_symbol_suffix
+      => { emit(:tSYMBOL, tok(@ts + 1, tm))
+           p = tm - 1; fbreak; };
+      ':' ( bareword | global_var | class_var | instance_var |
+            operator_fname | operator_arithmetic | operator_rest )
+      => { emit(:tSYMBOL, tok(@ts + 1))
+           fbreak; };
+      #
+      # CONSTANTS AND VARIABLES
+      #
+      constant
+      => { emit(:tCONSTANT)
+           fbreak; };
+      constant ambiguous_const_suffix
+      => { emit(:tCONSTANT, tok(@ts, tm))
+           p = tm - 1; fbreak; };
+      global_var | class_var_v | instance_var_v
+      => { p = @ts - 1; fcall expr_variable; };
+      #
+      # METHOD CALLS
+      #
+      '.'
+      => { emit_table(PUNCTUATION)
+           fnext expr_dot; fbreak; };
+      call_or_var
+      => { emit(:tIDENTIFIER)
+           fnext expr_arg; fbreak; };
+      call_or_var [?!]
+      => { emit(:tFID)
+           fnext expr_arg; fbreak; };
+      #
+      # OPERATORS
+      #
+      ( e_lparen            |
+        operator_arithmetic |
+        operator_rest
+      ) %{ tm = p } c_space_nl*
+      => { emit_table(PUNCTUATION, @ts, tm)
+           fnext expr_beg; fbreak; };
+      e_rbrace | e_rparen | ']'
+      => { emit_table(PUNCTUATION)
+           fbreak; };
+      operator_arithmetic '='
+      => { emit(:tOP_ASGN, tok(@ts, @te - 1))
+           fnext expr_beg; fbreak; };
+      '?'
+      => { emit_table(PUNCTUATION)
+           fnext expr_value; fbreak; };
+      punctuation_end
+      => { emit_table(PUNCTUATION)
+           fnext expr_beg; fbreak; };
+      #
+      # WHITESPACE
+      #
+      '\\' e_heredoc_nl;
+      '\\' ( any - c_nl ) {
+        error "bare backslash only allowed before newline"
+      };
+      '#' ( c_any - c_nl )*
+      => { @comments << tok(@ts, @te + 1) };
+      e_heredoc_nl
+      => { fgoto leading_dot; };
+      ';'
+      => { emit_table(PUNCTUATION)
+           fnext expr_value; fbreak; };
+      c_space+;
+      c_any
+      => {
+        error "unexpected #{tok.inspect}"
+      };
+      c_eof => do_eof;
+  *|;
+  leading_dot := |*
+      # Insane leading dots:
+      # a #comment
+      #  .b: a.b
+      c_space* '.' ( c_any - '.' )
+      => { fhold; fhold;
+           fgoto expr_end; };
+      any
+      => { emit(:tNL, nil, @newline_s, @newline_s + 1)
+           fnext line_begin; fhold; fbreak; };
+  *|;
+  #
+  # === EMBEDDED DOCUMENT (aka BLOCK COMMENT) PARSING ===
+  #
+  line_comment := |*
+      '=end' c_line* c_nl
+      => { @comments << tok
+           fgoto line_begin; };
+      c_line* c_nl
+      => { @comments << tok };
+      any
+      => {
+        @comments = ""
+        error "embedded document meats end of file (and they embark on a romantic journey)"
+      };
+  *|;
+  line_begin := |*
+      c_space_nl+;
+      '#' c_line* c_eol
+      => { @comments << tok
+           fhold; };
+      '=begin' ( c_space | c_eol )
+      => { @comments << tok(@ts, @te)
+           fgoto line_comment; };
+      '__END__' c_eol
+      => { p = pe - 1 };
+      c_any
+      => { fhold; fgoto expr_value; };
+      c_eof => do_eof;
+  *|;
+  }%%
+  # %
+end