RubyGems - regexp_parser - Versions diffs - 0.5.0 → 1.0.0 - Mend

regexp_parser 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +242 -0
data/Gemfile +1 -0
data/README.md +21 -17
data/Rakefile +31 -0
data/lib/regexp_parser/expression.rb +11 -9
data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
data/lib/regexp_parser/expression/classes/backref.rb +21 -16
data/lib/regexp_parser/expression/classes/escape.rb +81 -10
data/lib/regexp_parser/expression/classes/group.rb +20 -20
data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
data/lib/regexp_parser/expression/classes/property.rb +6 -0
data/lib/regexp_parser/expression/classes/set.rb +10 -93
data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
data/lib/regexp_parser/expression/methods/tests.rb +4 -14
data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
data/lib/regexp_parser/expression/quantifier.rb +3 -4
data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
data/lib/regexp_parser/expression/subexpression.rb +6 -10
data/lib/regexp_parser/lexer.rb +13 -17
data/lib/regexp_parser/parser.rb +170 -116
data/lib/regexp_parser/scanner.rb +952 -2431
data/lib/regexp_parser/scanner/char_type.rl +31 -0
data/lib/regexp_parser/scanner/properties/long.yml +561 -0
data/lib/regexp_parser/scanner/properties/short.yml +225 -0
data/lib/regexp_parser/scanner/property.rl +7 -806
data/lib/regexp_parser/scanner/scanner.rl +112 -154
data/lib/regexp_parser/syntax/base.rb +4 -4
data/lib/regexp_parser/syntax/tokens.rb +1 -0
data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
data/lib/regexp_parser/version.rb +1 -1
data/regexp_parser.gemspec +2 -1
data/test/expression/test_base.rb +2 -1
data/test/expression/test_clone.rb +0 -57
data/test/expression/test_set.rb +31 -8
data/test/expression/test_strfregexp.rb +13 -4
data/test/expression/test_subexpression.rb +25 -0
data/test/expression/test_traverse.rb +25 -25
data/test/helpers.rb +1 -0
data/test/lexer/test_all.rb +1 -1
data/test/lexer/test_conditionals.rb +9 -7
data/test/lexer/test_nesting.rb +39 -21
data/test/lexer/test_refcalls.rb +4 -4
data/test/parser/set/test_intersections.rb +127 -0
data/test/parser/set/test_ranges.rb +111 -0
data/test/parser/test_all.rb +4 -1
data/test/parser/test_escapes.rb +41 -9
data/test/parser/test_groups.rb +22 -3
data/test/parser/test_posix_classes.rb +27 -0
data/test/parser/test_properties.rb +17 -290
data/test/parser/test_refcalls.rb +66 -26
data/test/parser/test_sets.rb +132 -129
data/test/scanner/test_all.rb +1 -7
data/test/scanner/test_conditionals.rb +16 -16
data/test/scanner/test_errors.rb +0 -30
data/test/scanner/test_escapes.rb +1 -2
data/test/scanner/test_free_space.rb +28 -28
data/test/scanner/test_groups.rb +35 -35
data/test/scanner/test_meta.rb +1 -1
data/test/scanner/test_properties.rb +87 -114
data/test/scanner/test_refcalls.rb +18 -18
data/test/scanner/test_scripts.rb +19 -351
data/test/scanner/test_sets.rb +87 -60
data/test/scanner/test_unicode_blocks.rb +4 -105
data/test/support/warning_extractor.rb +1 -1
data/test/syntax/test_syntax.rb +7 -0
data/test/syntax/versions/test_1.8.rb +2 -4
metadata +17 -7
data/ChangeLog +0 -325
data/test/scanner/test_emojis.rb +0 -31

data/test/parser/test_sets.rb CHANGED Viewed

@@ -1,176 +1,179 @@
 require File.expand_path("../../helpers", __FILE__)
 class TestParserSets < Test::Unit::TestCase
   def test_parse_set_basic
-    root = RP.parse('[a-c]+', :any)
-    exp  = root.expressions.at(0)
+    root = RP.parse('[ab]+')
+    exp  = root[0]
+    assert_equal CharacterSet, exp.class
+    assert_equal 2, exp.count
-    assert_equal true, exp.is_a?(CharacterSet)
-    assert_equal true, exp.include?('a-c')
+    assert_equal Literal, exp[0].class
+    assert_equal 'a', exp[0].text
+    assert_equal Literal, exp[1].class
+    assert_equal 'b', exp[1].text
-    assert_equal true,  exp.quantified?
-    assert_equal 1,     exp.quantifier.min
-    assert_equal(-1,    exp.quantifier.max)
+    assert       exp.quantified?
+    assert_equal 1, exp.quantifier.min
+    assert_equal(-1, exp.quantifier.max)
   end
-  def test_parse_set_posix_class
-    root = RP.parse('[[:digit:][:lower:]]+', 'ruby/1.9')
-    exp  = root.expressions.at(0)
+  def test_parse_set_char_type
+    root = RP.parse('[a\dc]')
+    exp  = root[0]
-    assert_equal true,  exp.is_a?(CharacterSet)
+    assert_equal CharacterSet, exp.class
+    assert_equal 3, exp.count
-    assert_equal true,  exp.include?('[:digit:]')
-    assert_equal true,  exp.include?('[:lower:]')
+    assert_equal CharacterType::Digit, exp[1].class
+    assert_equal '\d', exp[1].text
+  end
-    assert_equal true,  exp.matches?("6")
+  def test_parse_set_escape_sequence_backspace
+    root = RP.parse('[a\bc]')
+    exp  = root[0]
-    assert_equal true,  exp.matches?("v")
-    assert_equal false, exp.matches?("\x48")
+    assert_equal CharacterSet, exp.class
+    assert_equal 3, exp.count
+    assert_equal EscapeSequence::Backspace, exp[1].class
+    assert_equal '\b', exp[1].text
+    assert       exp.matches?('a')
+    assert       exp.matches?("\b")
+    refute       exp.matches?('b')
+    assert       exp.matches?('c')
   end
-  def test_parse_set_members
-    root = RP.parse('[ac-eh]', :any)
-    exp  = root.expressions.at(0)
+  def test_parse_set_escape_sequence_hex
+    root = RP.parse('[a\x20c]', :any)
+    exp  = root[0]
+    assert_equal CharacterSet, exp.class
+    assert_equal 3, exp.count
-    assert_equal true,  exp.include?('a')
-    assert_equal true,  exp.include?('c-e')
-    assert_equal true,  exp.include?('h')
-    assert_equal false, exp.include?(']')
+    assert_equal EscapeSequence::Hex, exp[1].class
+    assert_equal '\x20', exp[1].text
   end
-  def test_parse_hex_members
-    root = RP.parse('[\x20\x24-\x26\x28]', :any)
-    exp  = root.expressions.at(0)
+  def test_parse_set_escape_sequence_codepoint
+    root = RP.parse('[a\u0640]')
+    exp  = root[0]
-    assert_equal true,  exp.include?('\x20')
-    assert_equal true,  exp.include?('\x24-\x26')
-    assert_equal true,  exp.include?('\x28')
-    assert_equal false, exp.include?(']')
+    assert_equal CharacterSet, exp.class
+    assert_equal 2, exp.count
+    assert_equal EscapeSequence::Codepoint, exp[1].class
+    assert_equal '\u0640', exp[1].text
   end
-  def test_parse_chat_type_set_members
-    root = RP.parse('[\da-z]', :any)
-    exp  = root.expressions.at(0)
+  def test_parse_set_escape_sequence_codepoint_list
+    root = RP.parse('[a\u{41 1F60D}]')
+    exp  = root[0]
+    assert_equal CharacterSet, exp.class
+    assert_equal 2, exp.count
-    assert_equal true,  exp.include?('\d')
-    assert_equal true,  exp.include?('a-z')
+    assert_equal EscapeSequence::CodepointList, exp[1].class
+    assert_equal '\u{41 1F60D}', exp[1].text
   end
-  def test_parse_set_collating_sequence
-    root = RP.parse('[a[.span-ll.]h]', :any)
-    exp  = root.expressions.at(0)
+  def test_parse_set_posix_class
+    root = RP.parse('[[:digit:][:^lower:]]+')
+    exp  = root[0]
+    assert_equal CharacterSet, exp.class
+    assert_equal 2, exp.count
-    assert_equal true,  exp.include?('[.span-ll.]')
-    assert_equal false, exp.include?(']')
+    assert_equal PosixClass, exp[0].class
+    assert_equal '[:digit:]', exp[0].text
+    assert_equal PosixClass, exp[1].class
+    assert_equal '[:^lower:]', exp[1].text
   end
-  def test_parse_set_character_equivalents
-    root = RP.parse('[a[=e=]h]', :any)
-    exp  = root.expressions.at(0)
+  def test_parse_set_nesting
+    root = RP.parse('[a[b[c]d]e]')
+    exp = root[0]
+    assert_equal CharacterSet, exp.class
+    assert_equal 3, exp.count
+    assert_equal Literal, exp[0].class
+    assert_equal Literal, exp[2].class
+    subset1 = exp[1]
+    assert_equal CharacterSet, subset1.class
+    assert_equal 3, subset1.count
+    assert_equal Literal, subset1[0].class
+    assert_equal Literal, subset1[2].class
+    subset2 = subset1[1]
+    assert_equal CharacterSet, subset2.class
+    assert_equal 1, subset2.count
+    assert_equal Literal, subset2[0].class
+  end
-    assert_equal true,  exp.include?('[=e=]')
-    assert_equal false, exp.include?(']')
+  def test_parse_set_nesting_negative
+    root = RP.parse('[a[^b[c]]]')
+    exp  = root[0]
+    assert_equal CharacterSet, exp.class
+    assert_equal 2, exp.count
+    assert_equal Literal, exp[0].class
+    refute       exp.negative?
+    subset1 = exp[1]
+    assert_equal CharacterSet, subset1.class
+    assert_equal 2, subset1.count
+    assert_equal Literal, subset1[0].class
+    assert       subset1.negative?
+    subset2 = subset1[1]
+    assert_equal CharacterSet, subset2.class
+    assert_equal 1, subset2.count
+    assert_equal Literal, subset2[0].class
+    refute       subset2.negative?
   end
-  def test_parse_set_nesting_tos
+  def test_parse_set_nesting_to_s
     pattern = '[a[b[^c]]]'
-    root    = RP.parse(pattern, 'ruby/1.9')
+    root    = RP.parse(pattern)
     assert_equal pattern, root.to_s
   end
-  def test_parse_set_nesting_include
-    root = RP.parse('[a[b[^c]]]', 'ruby/1.9')
-    exp  = root.expressions.at(0)
+  def test_parse_set_literals_are_not_merged
+    root = RP.parse("[#{'a' * 10}]")
+    exp  = root[0]
-    assert_equal true, exp.is_a?(CharacterSet)
-    assert_equal true, exp.include?('a')
-    assert_equal true, exp.include?('b')
-    assert_equal true, exp.include?('c')
+    assert_equal 10, exp.count
   end
-  def test_parse_set_nesting_include_at_depth
-    root = RP.parse('[a[b]c]', 'ruby/1.9')
+  def test_parse_set_whitespace_is_not_merged
+    root = RP.parse("[#{' ' * 10}]")
+    exp  = root[0]
-    exp = root.expressions.at(0)
-    assert_equal true,  exp.is_a?(CharacterSet)
-    assert_equal true,  exp.include?('a')
-    assert_equal true,  exp.include?('b')
-    assert_equal false, exp.include?('b', true) # should not include b directly
-    sub = exp.members.at(1)
-    assert_equal false, sub.include?('a')
-    assert_equal true,  sub.include?('b')
-    assert_equal true,  sub.include?('b', true)
-    assert_equal false, sub.include?('c')
+    assert_equal 10, exp.count
   end
-  def test_parse_set_nesting_include_at_depth_2
-    root = RP.parse('[a[b[c[d]e]f]g]', 'ruby/1.9')
-    exp = root.expressions.at(0)
-    assert_equal true,  exp.is_a?(CharacterSet)
-    assert_equal true,  exp.include?('a')
-    assert_equal true,  exp.include?('b')
-    assert_equal false, exp.include?('b', true) # should not include b directly
-    sub = exp.members.at(1)
-    assert_equal false, sub.include?('a')
-    assert_equal true,  sub.include?('b')
-    assert_equal true,  sub.include?('b', true)
-    assert_equal true,  sub.include?('f', true)
-    assert_equal true,  sub.include?('c')
-    assert_equal false, sub.include?('c', true)
-    sub2 = sub.members.at(1)
-    assert_equal false, sub2.include?('a')
-    assert_equal false, sub2.include?('b')
-    assert_equal true,  sub2.include?('c')
-    assert_equal true,  sub2.include?('c', true)
-    assert_equal true,  sub2.include?('e', true)
-    assert_equal true,  sub2.include?('d')
-    assert_equal false, sub2.include?('d', true)
-    sub3 = sub2.members.at(1)
-    assert_equal false, sub3.include?('a')
-    assert_equal false, sub3.include?('g')
-    assert_equal false, sub3.include?('b')
-    assert_equal false, sub3.include?('f')
-    assert_equal false, sub3.include?('c')
-    assert_equal false, sub3.include?('e')
-    assert_equal true,  sub3.include?('d')
-    assert_equal true,  sub3.include?('d', true)
-  end
-  # character subsets and negated posix classes are not available in ruby 1.8
-  if RUBY_VERSION >= '1.9'
-    def test_parse_set_nesting_matches
-      root = RP.parse('[a[b[^c]]]', 'ruby/1.9')
-      exp  = root.expressions.at(0)
-      assert_equal true,  exp.matches?('b')
-      assert_equal false, exp.matches?('c')
-    end
-    def test_parse_set_nesting_not_matches
-      root = RP.parse('[a[b[^c]]]', 'ruby/1.9')
-      exp  = root.expressions.at(0)
+  def test_parse_set_whitespace_is_not_merged_in_x_mode
+    root = RP.parse("(?x)[#{' ' * 10}]")
+    exp  = root[1]
-      assert_equal false, exp.matches?('c')
-    end
+    assert_equal 10, exp.count
+  end
-    def test_parse_set_negated_posix_class
-      root = RP.parse('[[:^xdigit:][:^lower:]]+', 'ruby/1.9')
-      exp  = root.expressions.at(0)
+  # TODO: Collations and equivalents need own exp class if they ever get enabled
+  def test_parse_set_collating_sequence
+    root = RP.parse('[a[.span-ll.]h]', :any)
+    exp  = root[0]
-      assert_equal true,  exp.is_a?(CharacterSet)
+    assert_equal '[.span-ll.]', exp[1].to_s
+  end
-      assert_equal true,  exp.include?('[:^xdigit:]')
-      assert_equal true,  exp.include?('[:^lower:]')
+  def test_parse_set_character_equivalents
+    root = RP.parse('[a[=e=]h]', :any)
+    exp  = root[0]
-      assert_equal true,  exp.matches?('GT')
-    end
+    assert_equal '[=e=]', exp[1].to_s
   end
 end

data/test/scanner/test_all.rb CHANGED Viewed

@@ -13,12 +13,6 @@ if RUBY_VERSION >= '2.0.0'
   end
 end
-if RUBY_VERSION >= '2.5.0'
-  %w{emojis}.each do|tc|
-    require File.expand_path("../test_#{tc}", __FILE__)
-  end
-end
 class TestRegexpScanner < Test::Unit::TestCase
   def test_scanner_returns_an_array
@@ -38,7 +32,7 @@ class TestRegexpScanner < Test::Unit::TestCase
   def test_scanner_token_count
     re = /^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i
-    assert_equal 26, RS.scan(re).length
+    assert_equal 28, RS.scan(re).length
   end
 end

data/test/scanner/test_conditionals.rb CHANGED Viewed

@@ -4,22 +4,22 @@ class ScannerConditionals < Test::Unit::TestCase
   # Basic conditional scan token tests
   tests = {
-    /(a)(?(1)T|F)/        => [3,  :conditional,  :open,              '(?',   3,  5],
-    /(a)(?(1)T|F)/        => [4,  :conditional,  :condition_open,    '(',    5,  6],
-    /(a)(?(1)T|F)/        => [5,  :conditional,  :condition,         '3',    6,  7],
-    /(a)(?(1)T|F)/        => [6,  :conditional,  :condition_close,   ')',    7,  8],
-    /(a)(?(1)T|F)/        => [7,  :literal,      :literal,           'T',    8,  9],
-    /(a)(?(1)T|F)/        => [8,  :conditional,  :separator,         '|',    9,  10],
-    /(a)(?(1)T|F)/        => [9,  :literal,      :literal,           'F',    10, 11],
-    /(a)(?(1)T|F)/        => [10, :conditional,  :close,             ')',    11, 12],
-    /(a)(?(1)TRUE)/       => [8,  :conditional,  :close,             ')',    12, 13],
-    /(a)(?(1)TRUE|)/      => [8,  :conditional,  :separator,         '|',    12, 13],
-    /(a)(?(1)TRUE|)/      => [9,  :conditional,  :close,             ')',    13, 14],
-    /(?<N>A)(?(<N>)T|F)/  => [5,  :conditional,  :condition,         '<N>',  10, 13],
-    /(?'N'A)(?('N')T|F)/  => [5,  :conditional,  :condition,         "'N'",  10, 13],
+    /(a)(?(1)T|F)1/       => [3,  :conditional,  :open,              '(?',   3,  5],
+    /(a)(?(1)T|F)2/       => [4,  :conditional,  :condition_open,    '(',    5,  6],
+    /(a)(?(1)T|F)3/       => [5,  :conditional,  :condition,         '1',    6,  7],
+    /(a)(?(1)T|F)4/       => [6,  :conditional,  :condition_close,   ')',    7,  8],
+    /(a)(?(1)T|F)5/       => [7,  :literal,      :literal,           'T',    8,  9],
+    /(a)(?(1)T|F)6/       => [8,  :conditional,  :separator,         '|',    9,  10],
+    /(a)(?(1)T|F)7/       => [9,  :literal,      :literal,           'F',    10, 11],
+    /(a)(?(1)T|F)8/       => [10, :conditional,  :close,             ')',    11, 12],
+    /(a)(?(1)TRUE)9/      => [8,  :conditional,  :close,             ')',    12, 13],
+    /(a)(?(1)TRUE|)10/    => [8,  :conditional,  :separator,         '|',    12, 13],
+    /(a)(?(1)TRUE|)11/    => [9,  :conditional,  :close,             ')',    13, 14],
+    /(?<N>A)(?(<N>)T|F)1/ => [5,  :conditional,  :condition,         '<N>',  10, 13],
+    /(?'N'A)(?('N')T|F)2/ => [5,  :conditional,  :condition,         "'N'",  10, 13],
   }
   tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|

data/test/scanner/test_errors.rb CHANGED Viewed

@@ -46,18 +46,6 @@ class ScannerErrors < Test::Unit::TestCase
     assert_raise( RS::PrematureEndError ) { RS.scan('\x') }
   end
-  def test_scanner_eof_in_wide_hex_escape
-    assert_raise( RS::PrematureEndError ) { RS.scan('\x{') }
-    assert_raise( RS::PrematureEndError ) { RS.scan('\x{0') }
-    assert_raise( RS::PrematureEndError ) { RS.scan('\x{02') }
-    assert_raise( RS::PrematureEndError ) { RS.scan('\x{024') }
-    assert_raise( RS::PrematureEndError ) { RS.scan('\x{0246') }
-    assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468') }
-    assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468A') }
-    assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468AC') }
-    assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468ACE') }
-  end
   def test_scanner_eof_in_codepoint_escape
     assert_raise( RS::PrematureEndError ) { RS.scan('\u') }
     assert_raise( RS::PrematureEndError ) { RS.scan('\u0') }
@@ -94,24 +82,6 @@ class ScannerErrors < Test::Unit::TestCase
     assert_raise( RS::InvalidSequenceError ) { RS.scan('\xZ0') }
   end
-  def test_scanner_invalid_wide_hex_escape
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{}') }
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{ }') }
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{ A }') }
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0-}') }
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{Z00}') }
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{000Z}') }
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00ZZ}') }
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ}') }
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ0}') }
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ0X}') }
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00X') }
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00XYZ') }
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000XYZ') }
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{02468ACED') }
-    assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{02468ACE]') }
-  end
   def test_scanner_invalid_named_group
     assert_raise( RS::InvalidGroupError ) { RS.scan("(?'')") }
     assert_raise( RS::InvalidGroupError ) { RS.scan("(?''empty-name)") }

data/test/scanner/test_escapes.rb CHANGED Viewed

@@ -22,10 +22,9 @@ class ScannerEscapes < Test::Unit::TestCase
     'a\x24c'          => [1, :escape,  :hex,              '\x24',           1,  5],
     'a\x0640c'        => [1, :escape,  :hex,              '\x06',           1,  5],
-    'a\x{0640}c'      => [1, :escape,  :hex_wide,         '\x{0640}',       1,  9],
     'a\u0640c'        => [1, :escape,  :codepoint,        '\u0640',         1,  7],
     'a\u{640 0641}c'  => [1, :escape,  :codepoint_list,   '\u{640 0641}',   1,  13],
+    'a\u{10FFFF}c'    => [1, :escape,  :codepoint_list,   '\u{10FFFF}',     1,  11],
     /a\cBc/           => [1, :escape,  :control,          '\cB',            1,  4],
     /a\C-bc/          => [1, :escape,  :control,          '\C-b',           1,  5],

data/test/scanner/test_free_space.rb CHANGED Viewed

@@ -159,34 +159,34 @@ class ScannerFreeSpace < Test::Unit::TestCase
     regexp = /(a (b((?x) (c d) ((?-x)(e f) )g) h)i j)/
     tokens = RS.scan(regexp)
     [
-      [ 0, :group,      :capture,      '(',       0,  1],
-      [ 1, :literal,    :literal,      'a ',      1,  3],
-      [ 2, :group,      :capture,      '(',       3,  4],
-      [ 3, :literal,    :literal,      'b',       4,  5],
-      [ 4, :group,      :capture,      '(',       5,  6],
-      [ 5, :group,      :options,      '(?x',     6,  9],
-      [ 6, :group,      :close,        ')',       9,  10],
-      [ 7, :free_space, :whitespace,   ' ',       10, 11],
-      [ 8, :group,      :capture,      '(',       11, 12],
-      [ 9, :literal,    :literal,      'c',       12, 13],
-      [10, :free_space, :whitespace,   ' ',       13, 14],
-      [11, :literal,    :literal,      'd',       14, 15],
-      [12, :group,      :close,        ')',       15, 16],
-      [13, :free_space, :whitespace,   ' ',       16, 17],
-      [14, :group,      :capture,      '(',       17, 18],
-      [15, :group,      :options,      '(?-x',    18, 22],
-      [16, :group,      :close,        ')',       22, 23],
-      [17, :group,      :capture,      '(',       23, 24],
-      [18, :literal,    :literal,      'e f',     24, 27],
-      [19, :group,      :close,        ')',       27, 28],
-      [20, :literal,    :literal,      ' ',       28, 29],
-      [21, :group,      :close,        ')',       29, 30],
-      [22, :literal,    :literal,      'g',       30, 31],
-      [23, :group,      :close,        ')',       31, 32],
-      [24, :literal,    :literal,      ' h',      32, 34],
-      [25, :group,      :close,        ')',       34, 35],
-      [26, :literal,    :literal,      'i j',     35, 38],
-      [27, :group,      :close,        ')',       38, 39]
+      [ 0, :group,      :capture,        '(',     0,  1],
+      [ 1, :literal,    :literal,        'a ',    1,  3],
+      [ 2, :group,      :capture,        '(',     3,  4],
+      [ 3, :literal,    :literal,        'b',     4,  5],
+      [ 4, :group,      :capture,        '(',     5,  6],
+      [ 5, :group,      :options_switch, '(?x',   6,  9],
+      [ 6, :group,      :close,           ')',    9,  10],
+      [ 7, :free_space, :whitespace,      ' ',    10, 11],
+      [ 8, :group,      :capture,         '(',    11, 12],
+      [ 9, :literal,    :literal,         'c',    12, 13],
+      [10, :free_space, :whitespace,      ' ',    13, 14],
+      [11, :literal,    :literal,         'd',    14, 15],
+      [12, :group,      :close,           ')',    15, 16],
+      [13, :free_space, :whitespace,      ' ',    16, 17],
+      [14, :group,      :capture,         '(',    17, 18],
+      [15, :group,      :options_switch, '(?-x',  18, 22],
+      [16, :group,      :close,          ')',     22, 23],
+      [17, :group,      :capture,        '(',     23, 24],
+      [18, :literal,    :literal,        'e f',   24, 27],
+      [19, :group,      :close,          ')',     27, 28],
+      [20, :literal,    :literal,        ' ',     28, 29],
+      [21, :group,      :close,          ')',     29, 30],
+      [22, :literal,    :literal,        'g',     30, 31],
+      [23, :group,      :close,          ')',     31, 32],
+      [24, :literal,    :literal,        ' h',    32, 34],
+      [25, :group,      :close,          ')',     34, 35],
+      [26, :literal,    :literal,        'i j',   35, 38],
+      [27, :group,      :close,          ')',     38, 39]
     ].each do |index, type, token, text, ts, te|
       result = tokens[index]