RubyGems - regexp_parser - Versions diffs - 0.1.6 → 0.2.0 - Mend

regexp_parser 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

checksums.yaml +4 -4
data/ChangeLog +57 -0
data/Gemfile +8 -0
data/LICENSE +1 -1
data/README.md +225 -206
data/Rakefile +9 -3
data/lib/regexp_parser.rb +7 -11
data/lib/regexp_parser/expression.rb +72 -14
data/lib/regexp_parser/expression/classes/alternation.rb +3 -16
data/lib/regexp_parser/expression/classes/conditional.rb +57 -0
data/lib/regexp_parser/expression/classes/free_space.rb +17 -0
data/lib/regexp_parser/expression/classes/keep.rb +7 -0
data/lib/regexp_parser/expression/classes/set.rb +28 -7
data/lib/regexp_parser/expression/methods/strfregexp.rb +113 -0
data/lib/regexp_parser/expression/methods/tests.rb +116 -0
data/lib/regexp_parser/expression/methods/traverse.rb +63 -0
data/lib/regexp_parser/expression/quantifier.rb +10 -0
data/lib/regexp_parser/expression/sequence.rb +45 -0
data/lib/regexp_parser/expression/subexpression.rb +29 -1
data/lib/regexp_parser/lexer.rb +31 -8
data/lib/regexp_parser/parser.rb +118 -45
data/lib/regexp_parser/scanner.rb +1745 -1404
data/lib/regexp_parser/scanner/property.rl +57 -3
data/lib/regexp_parser/scanner/scanner.rl +161 -34
data/lib/regexp_parser/syntax.rb +12 -2
data/lib/regexp_parser/syntax/ruby/1.9.1.rb +3 -3
data/lib/regexp_parser/syntax/ruby/1.9.3.rb +2 -7
data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -1
data/lib/regexp_parser/syntax/ruby/2.1.4.rb +13 -0
data/lib/regexp_parser/syntax/ruby/2.1.5.rb +13 -0
data/lib/regexp_parser/syntax/ruby/2.1.rb +2 -2
data/lib/regexp_parser/syntax/ruby/2.2.0.rb +16 -0
data/lib/regexp_parser/syntax/ruby/2.2.rb +8 -0
data/lib/regexp_parser/syntax/tokens.rb +19 -2
data/lib/regexp_parser/syntax/tokens/conditional.rb +22 -0
data/lib/regexp_parser/syntax/tokens/keep.rb +14 -0
data/lib/regexp_parser/syntax/tokens/unicode_property.rb +45 -4
data/lib/regexp_parser/token.rb +23 -8
data/lib/regexp_parser/version.rb +5 -0
data/regexp_parser.gemspec +35 -0
data/test/expression/test_all.rb +6 -1
data/test/expression/test_base.rb +19 -0
data/test/expression/test_conditionals.rb +114 -0
data/test/expression/test_free_space.rb +33 -0
data/test/expression/test_set.rb +61 -0
data/test/expression/test_strfregexp.rb +214 -0
data/test/expression/test_subexpression.rb +24 -0
data/test/expression/test_tests.rb +99 -0
data/test/expression/test_to_h.rb +48 -0
data/test/expression/test_to_s.rb +46 -0
data/test/expression/test_traverse.rb +164 -0
data/test/lexer/test_all.rb +16 -3
data/test/lexer/test_conditionals.rb +101 -0
data/test/lexer/test_keep.rb +24 -0
data/test/lexer/test_literals.rb +51 -51
data/test/lexer/test_nesting.rb +62 -62
data/test/lexer/test_refcalls.rb +18 -20
data/test/parser/test_all.rb +18 -3
data/test/parser/test_alternation.rb +11 -14
data/test/parser/test_conditionals.rb +148 -0
data/test/parser/test_escapes.rb +29 -5
data/test/parser/test_free_space.rb +139 -0
data/test/parser/test_groups.rb +40 -0
data/test/parser/test_keep.rb +21 -0
data/test/scanner/test_all.rb +8 -2
data/test/scanner/test_conditionals.rb +166 -0
data/test/scanner/test_escapes.rb +8 -5
data/test/scanner/test_free_space.rb +133 -0
data/test/scanner/test_groups.rb +28 -0
data/test/scanner/test_keep.rb +33 -0
data/test/scanner/test_properties.rb +4 -0
data/test/scanner/test_scripts.rb +71 -1
data/test/syntax/ruby/test_1.9.3.rb +2 -2
data/test/syntax/ruby/test_2.0.0.rb +38 -0
data/test/syntax/ruby/test_2.2.0.rb +38 -0
data/test/syntax/ruby/test_all.rb +1 -8
data/test/syntax/ruby/test_files.rb +104 -0
data/test/test_all.rb +2 -1
data/test/token/test_all.rb +2 -0
data/test/token/test_token.rb +109 -0
metadata +75 -21
data/VERSION.yml +0 -5
data/lib/regexp_parser/ctype.rb +0 -48
data/test/syntax/ruby/test_2.x.rb +0 -46

data/test/expression/test_to_s.rb CHANGED Viewed

@@ -48,4 +48,50 @@ class ExpressionToS < Test::Unit::TestCase
     assert_equal( pattern, RP.parse(pattern).to_s )
   end
+  def test_expression_to_s_multiline_source
+    multiline = %r{
+      \A
+      a?      # One letter
+      b{2,5}  # Another one
+      [c-g]+  # A set
+      \z
+    }x
+    assert_equal( multiline.source, RP.parse(multiline).to_s )
+  end
+  def test_expression_to_s_multiline_to_s
+    multiline = %r{
+      \A
+      a?      # One letter
+      b{2,5}  # Another one
+      [c-g]+  # A set
+      \z
+    }x
+    assert_equal( multiline.to_s, RP.parse(multiline.to_s).to_s )
+  end
+  # Free spacing expressions that use spaces between quantifiers and their
+  # targets do not produce identical results due to the way quantifiers are
+  # applied to expressions (members, not nodes) and the merging of consecutive
+  # space nodes. This tests that they produce equivalent results.
+  def test_expression_to_s_multiline_equivalence
+    multiline = %r{
+      \A
+      a   ?             # One letter
+      b {2,5}           # Another one
+      [c-g]  +          # A set
+      \z
+    }x
+    str  = 'bbbcged'
+    root = RP.parse(multiline)
+    assert_equal(
+      multiline.match(str)[0],
+      Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]
+    )
+  end
 end

data/test/expression/test_traverse.rb ADDED Viewed

@@ -0,0 +1,164 @@
+require File.expand_path("../../helpers", __FILE__)
+class SubexpressionTraverse < Test::Unit::TestCase
+  def test_subexpression_traverse
+    root = RP.parse(/a(b(c(d)))|g[hi]j|klmn/)
+    enters = 0
+    visits = 0
+    exits  = 0
+    root.traverse {|event, exp, index|
+      enters += 1 if event == :enter
+      visits += 1 if event == :visit
+      exits  += 1 if event == :exit
+    }
+    assert_equal( 7, enters )
+    assert_equal( exits, enters )
+    assert_equal( 8, visits )
+  end
+  def test_subexpression_traverse_include_self
+    root = RP.parse(/a(b(c(d)))|g[hi]j|klmn/)
+    enters = 0
+    visits = 0
+    exits  = 0
+    root.traverse(true) {|event, exp, index|
+      enters += 1 if event == :enter
+      visits += 1 if event == :visit
+      exits  += 1 if event == :exit
+    }
+    assert_equal( 8, enters )
+    assert_equal( exits, enters )
+    assert_equal( 8, visits )
+  end
+  def test_subexpression_walk_alias
+    root = RP.parse(/abc/)
+    assert_equal( true, root.respond_to?(:walk) )
+  end
+  def test_subexpression_each_expression
+    root = RP.parse(/a(?x:b(c))|g[h-k]/)
+    count = 0
+    root.each_expression {|exp, index|
+      count += 1
+    }
+    assert_equal( 10, count )
+  end
+  def test_subexpression_each_expression_include_self
+    root = RP.parse(/a(?x:b(c))|g[hi]/)
+    count = 0
+    root.each_expression(true) {|exp, index|
+      count += 1
+    }
+    assert_equal( 11, count )
+  end
+  def test_subexpression_each_expression_indices
+    root = RP.parse(/a(b)c/)
+    indices = []
+    root.each_expression {|exp, index| indices << index}
+    assert_equal( [0, 1, 0, 2], indices )
+  end
+  def test_subexpression_each_expression_indices_include_self
+    root = RP.parse(/a(b)c/)
+    indices = []
+    root.each_expression(true) {|exp, index| indices << index}
+    assert_equal( [0, 0, 1, 0, 2], indices )
+  end
+  def test_subexpression_map_without_block
+    root = RP.parse(/a(b([c-e]+))?/)
+    array = root.map
+    assert_equal( Array, array.class )
+    assert_equal( 5, array.length )
+    array.each do |item|
+      assert_equal( Array, item.class )
+      assert_equal( 2, item.length )
+      assert_equal( true, item.first.is_a?(Regexp::Expression::Base) )
+      assert_equal( true, item.last.is_a?(Fixnum) )
+    end
+  end
+  def test_subexpression_map_without_block_include_self
+    root = RP.parse(/a(b([c-e]+))?/)
+    array = root.map(true)
+    assert_equal( Array, array.class )
+    assert_equal( 6, array.length )
+  end
+  def test_subexpression_map_indices
+    root = RP.parse(/a(b([c-e]+))?f*g/)
+    indices = root.map {|exp, index| index}
+    assert_equal( [0, 1, 0, 1, 0, 2, 3], indices )
+  end
+  def test_subexpression_map_indices_include_self
+    root = RP.parse(/a(b([c-e]+))?f*g/)
+    indices = root.map(true) {|exp, index| index}
+    assert_equal( [0, 0, 1, 0, 1, 0, 2, 3], indices )
+  end
+  def test_subexpression_map_expressions
+    root = RP.parse(/a(b(c(d)))/)
+    levels = root.map {|exp, index|
+      [exp.level, exp.text] if exp.terminal?
+    }.compact
+    assert_equal(
+      [[0, 'a'], [1, 'b'], [2, 'c'], [3, 'd']],
+      levels
+    )
+  end
+  def test_subexpression_map_expressions_include_self
+    root = RP.parse(/a(b(c(d)))/)
+    levels = root.map(true) {|exp, index|
+      [exp.level, exp.to_s]
+    }.compact
+    assert_equal( [
+        [nil, 'a(b(c(d)))'],
+        [0,   'a'],
+        [0,   '(b(c(d)))'],
+        [1,   'b'],
+        [1,   '(c(d))'],
+        [2,   'c'],
+        [2,   '(d)'],
+        [3,   'd']
+      ],
+      levels
+    )
+  end
+end

data/test/lexer/test_all.rb CHANGED Viewed

@@ -6,21 +6,34 @@ require File.expand_path("../../helpers", __FILE__)
   require File.expand_path("../test_#{tc}", __FILE__)
 end
+if RUBY_VERSION >= '2.0.0'
+  %w{conditionals keep}.each do|tc|
+    require File.expand_path("../test_#{tc}", __FILE__)
+  end
+end
 class TestRegexpLexer < Test::Unit::TestCase
   def test_lexer_returns_an_array
-    assert_instance_of( Array, RL.scan('abc'))
+    assert_instance_of( Array, RL.lex('abc'))
   end
   def test_lexer_returns_tokens
-    tokens = RL.scan('^abc+[^one]{2,3}\b\d\\\C-C$')
+    tokens = RL.lex('^abc+[^one]{2,3}\b\d\\\C-C$')
     assert( tokens.all?{|token| token.kind_of?(Regexp::Token)},
           "Not all array members are tokens")
+    assert( tokens.all?{|token| token.to_a.length == 8},
+          "Not all tokens have a length of 8")
   end
   def test_lexer_token_count
-    tokens = RL.scan(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
+    tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
     assert_equal( 26, tokens.length )
   end
+  def test_lexer_scan_alias
+    assert_equal( RL.lex(/a|b|c/), RL.scan(/a|b|c/) )
+  end
 end

data/test/lexer/test_conditionals.rb ADDED Viewed

@@ -0,0 +1,101 @@
+require File.expand_path("../../helpers", __FILE__)
+class LexerConditionals < Test::Unit::TestCase
+  if RUBY_VERSION >= '2.0'
+  # Basic lexer output and nesting tests
+  tests = {
+    '(?<A>a)(?(<A>)b|c)'  => [3, :conditional, :open,       '(?',     7,  9, 0, 0, 0],
+    '(?<B>a)(?(<B>)b|c)'  => [4, :conditional, :condition,  '(<B>)',  9, 14, 0, 0, 1],
+    '(?<C>a)(?(<C>)b|c)'  => [6, :conditional, :separator,  '|',     15, 16, 0, 0, 1],
+    '(?<D>a)(?(<D>)b|c)'  => [8, :conditional, :close,      ')',     17, 18, 0, 0, 0],
+  }
+  count = 0
+  tests.each do |pattern, test|
+    define_method "test_lexer_#{test[1]}_#{test[2]}_#{count+=1}" do
+      tokens = RL.lex(pattern)
+      assert_equal( test[1,8], tokens[test[0]].to_a)
+    end
+  end
+  def test_lexer_conditional_mixed_nesting
+    regexp = /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/
+    tokens = RL.lex(regexp)
+    expected = [
+      [ 0, :group,       :capture,          '(',       0,  1, 0, 0, 0],
+      [ 1, :group,       :named,            '(?<A>',   1,  6, 1, 0, 0],
+      [ 5, :conditional, :open,             '(?',     13, 15, 2, 0, 0],
+      [ 6, :conditional, :condition,        '(<A>)',  15, 20, 2, 0, 1],
+      [ 8, :conditional, :separator,        '|',      21, 22, 2, 0, 1],
+      [10, :conditional, :open,             '(?',     23, 25, 3, 0, 1],
+      [11, :conditional, :condition,        '(<B>)',  25, 30, 3, 0, 2],
+      [12, :set,         :open,             '[',      30, 31, 3, 0, 2],
+      [13, :set,         :range,            'e-g',    31, 34, 3, 1, 2],
+      [14, :set,         :close,            ']',      34, 35, 3, 0, 2],
+      [15, :conditional, :separator,        '|',      35, 36, 3, 0, 2],
+      [19, :conditional, :close,            ')',      41, 42, 3, 0, 1],
+      [21, :conditional, :close,            ')',      43, 44, 2, 0, 0],
+      [22, :group,       :close,            ')',      44, 45, 1, 0, 0],
+      [23, :group,       :close,            ')',      45, 46, 0, 0, 0]
+    ].each do |test|
+      assert_equal( test[1,8], tokens[test[0]].to_a)
+    end
+  end
+  def test_lexer_conditional_deep_nesting
+    regexp = /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/
+    tokens = RL.lex(regexp)
+    expected = [
+      [ 9, :conditional, :open,       '(?',    9, 11, 0, 0, 0],
+      [10, :conditional, :condition,  '(1)',  11, 14, 0, 0, 1],
+      [11, :conditional, :open,       '(?',   14, 16, 0, 0, 1],
+      [12, :conditional, :condition,  '(2)',  16, 19, 0, 0, 2],
+      [13, :conditional, :open,       '(?',   19, 21, 0, 0, 2],
+      [14, :conditional, :condition,  '(3)',  21, 24, 0, 0, 3],
+      [16, :conditional, :separator,  '|',    25, 26, 0, 0, 3],
+      [18, :conditional, :close,      ')',    27, 28, 0, 0, 2],
+      [19, :conditional, :close,      ')',    28, 29, 0, 0, 1],
+      [20, :conditional, :separator,  '|',    29, 30, 0, 0, 1],
+      [21, :conditional, :open,       '(?',   30, 32, 0, 0, 1],
+      [22, :conditional, :condition,  '(3)',  32, 35, 0, 0, 2],
+      [23, :conditional, :open,       '(?',   35, 37, 0, 0, 2],
+      [24, :conditional, :condition,  '(2)',  37, 40, 0, 0, 3],
+      [26, :conditional, :separator,  '|',    41, 42, 0, 0, 3],
+      [28, :conditional, :close,      ')',    43, 44, 0, 0, 2],
+      [29, :conditional, :separator,  '|',    44, 45, 0, 0, 2],
+      [30, :conditional, :open,       '(?',   45, 47, 0, 0, 2],
+      [31, :conditional, :condition,  '(1)',  47, 50, 0, 0, 3],
+      [33, :conditional, :separator,  '|',    51, 52, 0, 0, 3],
+      [35, :conditional, :close,      ')',    53, 54, 0, 0, 2],
+      [36, :conditional, :close,      ')',    54, 55, 0, 0, 1],
+      [37, :conditional, :close,      ')',    55, 56, 0, 0, 0]
+    ].each do |test|
+      assert_equal( test[1,8], tokens[test[0]].to_a)
+    end
+  end
+  end
+end

data/test/lexer/test_keep.rb ADDED Viewed

@@ -0,0 +1,24 @@
+require File.expand_path("../../helpers", __FILE__)
+class LexerKeep < Test::Unit::TestCase
+  def test_lex_keep_token
+    regexp = /ab\Kcd/
+    tokens = RL.lex(regexp)
+    assert_equal( :keep, tokens[1].type )
+    assert_equal( :mark, tokens[1].token )
+  end
+  def test_lex_keep_nested
+    regexp = /(a\Kb)|(c\\\Kd)ef/
+    tokens = RL.lex(regexp)
+    assert_equal( :keep, tokens[2].type )
+    assert_equal( :mark, tokens[2].token )
+    assert_equal( :keep, tokens[9].type )
+    assert_equal( :mark, tokens[9].token )
+  end
+end

data/test/lexer/test_literals.rb CHANGED Viewed

@@ -7,86 +7,86 @@ class LexerLiterals < Test::Unit::TestCase
   tests = {
     # ascii, single byte characters
     'a' => {
-      0     => [:literal,     :literal,       'a',        0, 1, 0, 0],
+      0     => [:literal,     :literal,       'a',        0, 1, 0, 0, 0],
     },
     'ab+' => {
-      0     => [:literal,     :literal,       'a',        0, 1, 0, 0],
-      1     => [:literal,     :literal,       'b',        1, 2, 0, 0],
-      2     => [:quantifier,  :one_or_more,   '+',        2, 3, 0, 0],
+      0     => [:literal,     :literal,       'a',        0, 1, 0, 0, 0],
+      1     => [:literal,     :literal,       'b',        1, 2, 0, 0, 0],
+      2     => [:quantifier,  :one_or_more,   '+',        2, 3, 0, 0, 0],
     },
     # 2 byte wide characters, Arabic
     'ا' => {
-      0     => [:literal,     :literal,       'ا',        0, 2, 0, 0],
+      0     => [:literal,     :literal,       'ا',        0, 2, 0, 0, 0],
     },
     'aاbبcت' => {
-      0     => [:literal,     :literal,       'aاbبcت',   0, 9, 0, 0],
+      0     => [:literal,     :literal,       'aاbبcت',   0, 9, 0, 0, 0],
     },
     'aاbبت?' => {
-      0     => [:literal,     :literal,       'aاbب',     0, 6, 0, 0],
-      1     => [:literal,     :literal,       'ت',        6, 8, 0, 0],
-      2     => [:quantifier,  :zero_or_one,   '?',        8, 9, 0, 0],
+      0     => [:literal,     :literal,       'aاbب',     0, 6, 0, 0, 0],
+      1     => [:literal,     :literal,       'ت',        6, 8, 0, 0, 0],
+      2     => [:quantifier,  :zero_or_one,   '?',        8, 9, 0, 0, 0],
     },
     'aا?bبcت+' => {
-      0     => [:literal,     :literal,       'a',        0, 1, 0, 0],
-      1     => [:literal,     :literal,       'ا',        1, 3, 0, 0],
-      2     => [:quantifier,  :zero_or_one,   '?',        3, 4, 0, 0],
-      3     => [:literal,     :literal,       'bبc',      4, 8, 0, 0],
-      4     => [:literal,     :literal,       'ت',        8, 10, 0, 0],
-      5     => [:quantifier,  :one_or_more,   '+',        10, 11, 0, 0],
+      0     => [:literal,     :literal,       'a',        0, 1, 0, 0, 0],
+      1     => [:literal,     :literal,       'ا',        1, 3, 0, 0, 0],
+      2     => [:quantifier,  :zero_or_one,   '?',        3, 4, 0, 0, 0],
+      3     => [:literal,     :literal,       'bبc',      4, 8, 0, 0, 0],
+      4     => [:literal,     :literal,       'ت',        8, 10, 0, 0, 0],
+      5     => [:quantifier,  :one_or_more,   '+',        10, 11, 0, 0, 0],
     },
     'a(اbب+)cت?' => {
-      0     => [:literal,     :literal,       'a',        0, 1, 0, 0],
-      1     => [:group,       :capture,       '(',        1, 2, 0, 0],
-      2     => [:literal,     :literal,       'اb',       2, 5, 1, 0],
-      3     => [:literal,     :literal,       'ب',        5, 7, 1, 0],
-      4     => [:quantifier,  :one_or_more,   '+',        7, 8, 1, 0],
-      5     => [:group,       :close,         ')',        8, 9, 0, 0],
-      6     => [:literal,     :literal,       'c',        9, 10, 0, 0],
-      7     => [:literal,     :literal,       'ت',        10, 12, 0, 0],
-      8     => [:quantifier,  :zero_or_one,   '?',        12, 13, 0, 0],
+      0     => [:literal,     :literal,       'a',        0, 1, 0, 0, 0],
+      1     => [:group,       :capture,       '(',        1, 2, 0, 0, 0],
+      2     => [:literal,     :literal,       'اb',       2, 5, 1, 0, 0],
+      3     => [:literal,     :literal,       'ب',        5, 7, 1, 0, 0],
+      4     => [:quantifier,  :one_or_more,   '+',        7, 8, 1, 0, 0],
+      5     => [:group,       :close,         ')',        8, 9, 0, 0, 0],
+      6     => [:literal,     :literal,       'c',        9, 10, 0, 0, 0],
+      7     => [:literal,     :literal,       'ت',        10, 12, 0, 0, 0],
+      8     => [:quantifier,  :zero_or_one,   '?',        12, 13, 0, 0, 0],
     },
     # 3 byte wide characters, Japanese
     'ab?れます+cd' => {
-      0     => [:literal,     :literal,       'a',        0, 1, 0, 0],
-      1     => [:literal,     :literal,       'b',        1, 2, 0, 0],
-      2     => [:quantifier,  :zero_or_one,   '?',        2, 3, 0, 0],
-      3     => [:literal,     :literal,       'れま',     3, 9, 0, 0],
-      4     => [:literal,     :literal,       'す',       9, 12, 0, 0],
-      5     => [:quantifier,  :one_or_more,   '+',        12, 13, 0, 0],
-      6     => [:literal,     :literal,       'cd',       13, 15, 0, 0],
+      0     => [:literal,     :literal,       'a',        0, 1, 0, 0, 0],
+      1     => [:literal,     :literal,       'b',        1, 2, 0, 0, 0],
+      2     => [:quantifier,  :zero_or_one,   '?',        2, 3, 0, 0, 0],
+      3     => [:literal,     :literal,       'れま',     3, 9, 0, 0, 0],
+      4     => [:literal,     :literal,       'す',       9, 12, 0, 0, 0],
+      5     => [:quantifier,  :one_or_more,   '+',        12, 13, 0, 0, 0],
+      6     => [:literal,     :literal,       'cd',       13, 15, 0, 0, 0],
     },
     # 4 byte wide characters, Osmanya
     '𐒀𐒁?𐒂ab+𐒃' => {
-      0     => [:literal,     :literal,       '𐒀',        0, 4, 0, 0],
-      1     => [:literal,     :literal,       '𐒁',        4, 8, 0, 0],
-      2     => [:quantifier,  :zero_or_one,   '?',        8, 9, 0, 0],
-      3     => [:literal,     :literal,       '𐒂a',       9, 14, 0, 0],
-      4     => [:literal,     :literal,       'b',        14, 15, 0, 0],
-      5     => [:quantifier,  :one_or_more,   '+',        15, 16, 0, 0],
-      6     => [:literal,     :literal,       '𐒃',        16, 20, 0, 0],
+      0     => [:literal,     :literal,       '𐒀',        0, 4, 0, 0, 0],
+      1     => [:literal,     :literal,       '𐒁',        4, 8, 0, 0, 0],
+      2     => [:quantifier,  :zero_or_one,   '?',        8, 9, 0, 0, 0],
+      3     => [:literal,     :literal,       '𐒂a',       9, 14, 0, 0, 0],
+      4     => [:literal,     :literal,       'b',        14, 15, 0, 0, 0],
+      5     => [:quantifier,  :one_or_more,   '+',        15, 16, 0, 0, 0],
+      6     => [:literal,     :literal,       '𐒃',        16, 20, 0, 0, 0],
     },
     'mu𝄞?si*𝄫c+' => {
-      0     => [:literal,     :literal,       'mu',       0, 2, 0, 0],
-      1     => [:literal,     :literal,       '𝄞',        2, 6, 0, 0],
-      2     => [:quantifier,  :zero_or_one,   '?',        6, 7, 0, 0],
-      3     => [:literal,     :literal,       's',        7, 8, 0, 0],
-      4     => [:literal,     :literal,       'i',        8, 9, 0, 0],
-      5     => [:quantifier,  :zero_or_more,  '*',        9, 10, 0, 0],
-      6     => [:literal,     :literal,       '𝄫',        10, 14, 0, 0],
-      7     => [:literal,     :literal,       'c',        14, 15, 0, 0],
-      8     => [:quantifier,  :one_or_more,   '+',        15, 16, 0, 0],
+      0     => [:literal,     :literal,       'mu',       0, 2, 0, 0, 0],
+      1     => [:literal,     :literal,       '𝄞',        2, 6, 0, 0, 0],
+      2     => [:quantifier,  :zero_or_one,   '?',        6, 7, 0, 0, 0],
+      3     => [:literal,     :literal,       's',        7, 8, 0, 0, 0],
+      4     => [:literal,     :literal,       'i',        8, 9, 0, 0, 0],
+      5     => [:quantifier,  :zero_or_more,  '*',        9, 10, 0, 0, 0],
+      6     => [:literal,     :literal,       '𝄫',        10, 14, 0, 0, 0],
+      7     => [:literal,     :literal,       'c',        14, 15, 0, 0, 0],
+      8     => [:quantifier,  :one_or_more,   '+',        15, 16, 0, 0, 0],
     },
   }
@@ -94,7 +94,7 @@ class LexerLiterals < Test::Unit::TestCase
   tests.each do |pattern, checks|
     define_method "test_lex_literal_runs_#{count+=1}" do
-      tokens = RL.scan(pattern)
+      tokens = RL.lex(pattern)
       checks.each do |offset, token|
         assert_equal( token, tokens[offset].to_a )
       end
@@ -103,17 +103,17 @@ class LexerLiterals < Test::Unit::TestCase
   end
   def test_lex_single_2_byte_char
-    tokens = RL.scan('ا+')
+    tokens = RL.lex('ا+')
     assert_equal( 2, tokens.length )
   end
   def test_lex_single_3_byte_char
-    tokens = RL.scan('れ+')
+    tokens = RL.lex('れ+')
     assert_equal( 2, tokens.length )
   end
   def test_lex_single_4_byte_char
-    tokens = RL.scan('𝄞+')
+    tokens = RL.lex('𝄞+')
     assert_equal( 2, tokens.length )
   end