RubyGems - regexp_parser - Versions diffs - 0.1.6 → 0.2.0 - Mend

regexp_parser 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

checksums.yaml +4 -4
data/ChangeLog +57 -0
data/Gemfile +8 -0
data/LICENSE +1 -1
data/README.md +225 -206
data/Rakefile +9 -3
data/lib/regexp_parser.rb +7 -11
data/lib/regexp_parser/expression.rb +72 -14
data/lib/regexp_parser/expression/classes/alternation.rb +3 -16
data/lib/regexp_parser/expression/classes/conditional.rb +57 -0
data/lib/regexp_parser/expression/classes/free_space.rb +17 -0
data/lib/regexp_parser/expression/classes/keep.rb +7 -0
data/lib/regexp_parser/expression/classes/set.rb +28 -7
data/lib/regexp_parser/expression/methods/strfregexp.rb +113 -0
data/lib/regexp_parser/expression/methods/tests.rb +116 -0
data/lib/regexp_parser/expression/methods/traverse.rb +63 -0
data/lib/regexp_parser/expression/quantifier.rb +10 -0
data/lib/regexp_parser/expression/sequence.rb +45 -0
data/lib/regexp_parser/expression/subexpression.rb +29 -1
data/lib/regexp_parser/lexer.rb +31 -8
data/lib/regexp_parser/parser.rb +118 -45
data/lib/regexp_parser/scanner.rb +1745 -1404
data/lib/regexp_parser/scanner/property.rl +57 -3
data/lib/regexp_parser/scanner/scanner.rl +161 -34
data/lib/regexp_parser/syntax.rb +12 -2
data/lib/regexp_parser/syntax/ruby/1.9.1.rb +3 -3
data/lib/regexp_parser/syntax/ruby/1.9.3.rb +2 -7
data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -1
data/lib/regexp_parser/syntax/ruby/2.1.4.rb +13 -0
data/lib/regexp_parser/syntax/ruby/2.1.5.rb +13 -0
data/lib/regexp_parser/syntax/ruby/2.1.rb +2 -2
data/lib/regexp_parser/syntax/ruby/2.2.0.rb +16 -0
data/lib/regexp_parser/syntax/ruby/2.2.rb +8 -0
data/lib/regexp_parser/syntax/tokens.rb +19 -2
data/lib/regexp_parser/syntax/tokens/conditional.rb +22 -0
data/lib/regexp_parser/syntax/tokens/keep.rb +14 -0
data/lib/regexp_parser/syntax/tokens/unicode_property.rb +45 -4
data/lib/regexp_parser/token.rb +23 -8
data/lib/regexp_parser/version.rb +5 -0
data/regexp_parser.gemspec +35 -0
data/test/expression/test_all.rb +6 -1
data/test/expression/test_base.rb +19 -0
data/test/expression/test_conditionals.rb +114 -0
data/test/expression/test_free_space.rb +33 -0
data/test/expression/test_set.rb +61 -0
data/test/expression/test_strfregexp.rb +214 -0
data/test/expression/test_subexpression.rb +24 -0
data/test/expression/test_tests.rb +99 -0
data/test/expression/test_to_h.rb +48 -0
data/test/expression/test_to_s.rb +46 -0
data/test/expression/test_traverse.rb +164 -0
data/test/lexer/test_all.rb +16 -3
data/test/lexer/test_conditionals.rb +101 -0
data/test/lexer/test_keep.rb +24 -0
data/test/lexer/test_literals.rb +51 -51
data/test/lexer/test_nesting.rb +62 -62
data/test/lexer/test_refcalls.rb +18 -20
data/test/parser/test_all.rb +18 -3
data/test/parser/test_alternation.rb +11 -14
data/test/parser/test_conditionals.rb +148 -0
data/test/parser/test_escapes.rb +29 -5
data/test/parser/test_free_space.rb +139 -0
data/test/parser/test_groups.rb +40 -0
data/test/parser/test_keep.rb +21 -0
data/test/scanner/test_all.rb +8 -2
data/test/scanner/test_conditionals.rb +166 -0
data/test/scanner/test_escapes.rb +8 -5
data/test/scanner/test_free_space.rb +133 -0
data/test/scanner/test_groups.rb +28 -0
data/test/scanner/test_keep.rb +33 -0
data/test/scanner/test_properties.rb +4 -0
data/test/scanner/test_scripts.rb +71 -1
data/test/syntax/ruby/test_1.9.3.rb +2 -2
data/test/syntax/ruby/test_2.0.0.rb +38 -0
data/test/syntax/ruby/test_2.2.0.rb +38 -0
data/test/syntax/ruby/test_all.rb +1 -8
data/test/syntax/ruby/test_files.rb +104 -0
data/test/test_all.rb +2 -1
data/test/token/test_all.rb +2 -0
data/test/token/test_token.rb +109 -0
metadata +75 -21
data/VERSION.yml +0 -5
data/lib/regexp_parser/ctype.rb +0 -48
data/test/syntax/ruby/test_2.x.rb +0 -46

data/lib/regexp_parser/scanner/property.rl CHANGED Viewed

@@ -11,7 +11,6 @@
                           'cntrl'i | 'digit'i | 'graph'i | 'lower'i | 'print'i |
                           'punct'i | 'space'i | 'upper'i | 'word'i  | 'xdigit'i;
-  # TODO: are these case-insensitive?
   property_name_posix   = 'any'i | 'assigned'i | 'newline'i;
   property_name         = property_name_unicode | property_name_posix;
@@ -39,7 +38,9 @@
   property_age          = 'age=1.1'i | 'age=2.0'i | 'age=2.1'i |
                           'age=3.0'i | 'age=3.1'i | 'age=3.2'i |
                           'age=4.0'i | 'age=4.1'i | 'age=5.0'i |
-                          'age=5.1'i | 'age=5.2'i | 'age=6.0'i;
+                          'age=5.1'i | 'age=5.2'i | 'age=6.0'i |
+                          'age=6.1'i | 'age=6.2'i | 'age=6.3'i |
+                          'age=7.0'i;
   property_script       = (alpha | space | '_')+; # everything else
@@ -222,6 +223,14 @@
         self.emit(type, :age_5_2,     text, ts-1, te)
       when 'age=6.0'
         self.emit(type, :age_6_0,     text, ts-1, te)
+      when 'age=6.1'
+        self.emit(type, :age_6_1,     text, ts-1, te)
+      when 'age=6.2'
+        self.emit(type, :age_6_2,     text, ts-1, te)
+      when 'age=6.3'
+        self.emit(type, :age_6_3,     text, ts-1, te)
+      when 'age=7.0'
+        self.emit(type, :age_7_0,     text, ts-1, te)
       # Derived Properties
       when 'ahex', 'asciihexdigit'
@@ -327,8 +336,9 @@
       when 'xidc', 'xidcontinue'
         self.emit(type, :xid_continue,                    text, ts-1, te)
       # Scripts
+      when 'aghb', 'caucasianalbanian'
+        self.emit(type, :script_caucasian_albanian,       text, ts-1, te)
       when 'arab', 'arabic'
         self.emit(type, :script_arabic,                   text, ts-1, te)
       when 'armi', 'imperialaramaic'
@@ -341,6 +351,8 @@
         self.emit(type, :script_balinese,                 text, ts-1, te)
       when 'bamu', 'bamum'
         self.emit(type, :script_bamum,                    text, ts-1, te)
+      when 'bass', 'bassavah'
+        self.emit(type, :script_bassa_vah,                text, ts-1, te)
       when 'batk', 'batak'
         self.emit(type, :script_batak,                    text, ts-1, te)
       when 'beng', 'bengali'
@@ -373,8 +385,12 @@
         self.emit(type, :script_devanagari,               text, ts-1, te)
       when 'dsrt', 'deseret'
         self.emit(type, :script_deseret,                  text, ts-1, te)
+      when 'dupl', 'duployan'
+        self.emit(type, :script_duployan,                 text, ts-1, te)
       when 'egyp', 'egyptianhieroglyphs'
         self.emit(type, :script_egyptian_hieroglyphs,     text, ts-1, te)
+      when 'elba', 'elbasan'
+        self.emit(type, :script_elbasan,                  text, ts-1, te)
       when 'ethi', 'ethiopic'
         self.emit(type, :script_ethiopic,                 text, ts-1, te)
       when 'geor', 'georgian'
@@ -383,6 +399,8 @@
         self.emit(type, :script_glagolitic,               text, ts-1, te)
       when 'goth', 'gothic'
         self.emit(type, :script_gothic,                   text, ts-1, te)
+      when 'gran', 'grantha'
+        self.emit(type, :script_grantha,                  text, ts-1, te)
       when 'grek', 'greek'
         self.emit(type, :script_greek,                    text, ts-1, te)
       when 'gujr', 'gujarati'
@@ -399,6 +417,8 @@
         self.emit(type, :script_hebrew,                   text, ts-1, te)
       when 'hira', 'hiragana'
         self.emit(type, :script_hiragana,                 text, ts-1, te)
+      when 'hmng', 'pahawhhmong'
+        self.emit(type, :script_pahawh_hmong,             text, ts-1, te)
       when 'hrkt', 'katakanaorhiragana'
         self.emit(type, :script_katakana_or_hiragana,     text, ts-1, te)
       when 'ital', 'olditalic'
@@ -413,6 +433,8 @@
         self.emit(type, :script_kharoshthi,               text, ts-1, te)
       when 'khmr', 'khmer'
         self.emit(type, :script_khmer,                    text, ts-1, te)
+      when 'khoj', 'khojki'
+        self.emit(type, :script_khojki,                   text, ts-1, te)
       when 'knda', 'kannada'
         self.emit(type, :script_kannada,                  text, ts-1, te)
       when 'kthi', 'kaithi'
@@ -427,6 +449,8 @@
         self.emit(type, :script_lepcha,                   text, ts-1, te)
       when 'limb', 'limbu'
         self.emit(type, :script_limbu,                    text, ts-1, te)
+      when 'lina', 'lineara'
+        self.emit(type, :script_linear_a,                 text, ts-1, te)
       when 'linb', 'linearb'
         self.emit(type, :script_linear_b,                 text, ts-1, te)
       when 'lisu'
@@ -437,14 +461,28 @@
         self.emit(type, :script_lydian,                   text, ts-1, te)
       when 'mlym', 'malayalam'
         self.emit(type, :script_malayalam,                text, ts-1, te)
+      when 'mahj', 'mahajani'
+        self.emit(type, :script_mahajani,                 text, ts-1, te)
       when 'mand', 'mandaic'
         self.emit(type, :script_mandaic,                  text, ts-1, te)
+      when 'mani', 'manichaean'
+        self.emit(type, :script_manichaean,               text, ts-1, te)
+      when 'mend', 'mendekikakui'
+        self.emit(type, :script_mende_kikakui,            text, ts-1, te)
+      when 'modi'
+        self.emit(type, :script_modi,                     text, ts-1, te)
       when 'mong', 'mongolian'
         self.emit(type, :script_mongolian,                text, ts-1, te)
+      when 'mroo', 'mro'
+        self.emit(type, :script_mro,                      text, ts-1, te)
       when 'mtei', 'meeteimayek'
         self.emit(type, :script_meetei_mayek,             text, ts-1, te)
       when 'mymr', 'myanmar'
         self.emit(type, :script_myanmar,                  text, ts-1, te)
+      when 'narb', 'oldnortharabian'
+        self.emit(type, :script_old_north_arabian,        text, ts-1, te)
+      when 'nbat', 'nabataean'
+        self.emit(type, :script_nabataean,                text, ts-1, te)
       when 'nkoo', 'nko'
         self.emit(type, :script_nko,                      text, ts-1, te)
       when 'ogam', 'ogham'
@@ -457,10 +495,18 @@
         self.emit(type, :script_oriya,                    text, ts-1, te)
       when 'osma', 'osmanya'
         self.emit(type, :script_osmanya,                  text, ts-1, te)
+      when 'palm', 'palmyrene'
+        self.emit(type, :script_palmyrene,                text, ts-1, te)
+      when 'pauc', 'paucinhau'
+        self.emit(type, :script_pau_cin_hau,              text, ts-1, te)
+      when 'perm', 'oldpermic'
+        self.emit(type, :script_old_permic,               text, ts-1, te)
       when 'phag', 'phagspa'
         self.emit(type, :script_phags_pa,                 text, ts-1, te)
       when 'phli', 'inscriptionalpahlavi'
         self.emit(type, :script_inscriptional_pahlavi,    text, ts-1, te)
+      when 'phlp', 'psalterpahlavi'
+        self.emit(type, :script_psalter_pahlavi,          text, ts-1, te)
       when 'phnx', 'phoenician'
         self.emit(type, :script_phoenician,               text, ts-1, te)
       when 'prti', 'inscriptionalparthian'
@@ -477,6 +523,10 @@
         self.emit(type, :script_saurashtra,               text, ts-1, te)
       when 'shaw', 'shavian'
         self.emit(type, :script_shavian,                  text, ts-1, te)
+      when 'sidd', 'siddham'
+        self.emit(type, :script_siddham,                  text, ts-1, te)
+      when 'sind', 'khudawadi'
+        self.emit(type, :script_khudawadi,                text, ts-1, te)
       when 'sinh', 'sinhala'
         self.emit(type, :script_sinhala,                  text, ts-1, te)
       when 'sund', 'sundanese'
@@ -507,10 +557,14 @@
         self.emit(type, :script_thai,                     text, ts-1, te)
       when 'tibt', 'tibetan'
         self.emit(type, :script_tibetan,                  text, ts-1, te)
+      when 'tirh', 'tirhuta'
+        self.emit(type, :script_tirhuta,                  text, ts-1, te)
       when 'ugar', 'ugaritic'
         self.emit(type, :script_ugaritic,                 text, ts-1, te)
       when 'vaii', 'vai'
         self.emit(type, :script_vai,                      text, ts-1, te)
+      when 'wara', 'warangciti'
+        self.emit(type, :script_warang_citi,              text, ts-1, te)
       when 'xpeo', 'oldpersian'
         self.emit(type, :script_old_persian,              text, ts-1, te)
       when 'xsux', 'cuneiform'

data/lib/regexp_parser/scanner/scanner.rl CHANGED Viewed

@@ -20,6 +20,8 @@
   set_close             = ']';
   brackets              = set_open | set_close;
+  comment               = ('#' . [^\n]* . '\n');
   class_name_posix      = 'alnum' | 'alpha' | 'blank' |
                           'cntrl' | 'digit' | 'graph' |
                           'lower' | 'print' | 'punct' |
@@ -74,6 +76,8 @@
                           quantifier_possessive | quantifier_interval;
+  conditional           = '(?(';
   group_comment         = '?#' . [^)]+ . group_close;
   group_atomic          = '?>';
@@ -84,23 +88,28 @@
   assertion_lookbehind  = '?<=';
   assertion_nlookbehind = '?<!';
-  group_options         = '?' . [\-mix];
+  group_options         = '?' . [\-mixdau];
   group_ref             = [gk];
-  group_name            = (alnum . (alnum+)?)?;
+  group_name_char       = (alnum | '_');
+  group_name_id         = (group_name_char . (group_name_char+)?)?;
   group_number          = '-'? . [1-9] . ([0-9]+)?;
   group_level           = [+\-] . [0-9]+;
-  group_named           = ('?<' . group_name . '>') | ("?'" . group_name . "'");
+  group_name            = ('<' . group_name_id . '>') | ("'" . group_name_id . "'");
+  group_lookup          = group_name | group_number;
-  group_name_ref        = group_ref . (('<' . group_name . group_level? '>') |
-                                       ("'" . group_name . group_level? "'"));
+  group_named           = ('?' . group_name );
+  group_name_ref        = group_ref . (('<' . group_name_id . group_level? '>') |
+                                       ("'" . group_name_id . group_level? "'"));
   group_number_ref      = group_ref . (('<' . group_number . group_level? '>') |
                                        ("'" . group_number . group_level? "'"));
   group_type            = group_atomic | group_passive | group_named;
   assertion_type        = assertion_lookahead  | assertion_nlookahead |
                           assertion_lookbehind | assertion_nlookbehind;
@@ -133,8 +142,8 @@
   }
   # group (nesting) and set open/close actions
-  action group_opened { group_depth += 1; in_group = true }
-  action group_closed { group_depth -= 1; in_group = group_depth > 0 ? true : false }
+  action group_opened { @group_depth += 1; @in_group = true }
+  action group_closed { @group_depth -= 1; @in_group = @group_depth > 0 ? true : false }
   # Character set scanner, continues consuming characters until it meets the
   # closing bracket of the set.
@@ -410,6 +419,22 @@
   *|;
+  # conditional expressions scanner
+  # --------------------------------------------------------------------------
+  conditional_expression := |*
+    group_lookup . ')' {
+      text = text(data, ts, te-1).first
+      emit(:conditional, :condition, text, ts, te-1)
+      emit(:conditional, :condition_close, ')', te-1, te)
+    };
+    any {
+      fhold;
+      fcall main;
+    };
+  *|;
   # Main scanner
   # --------------------------------------------------------------------------
   main := |*
@@ -421,7 +446,12 @@
     };
     alternation {
-      emit(:meta, :alternation, *text(data, ts, te))
+      if in_conditional and conditional_stack.length > 0 and
+         conditional_stack.last[1] == @group_depth
+        emit(:conditional, :separator, *text(data, ts, te))
+      else
+        emit(:meta, :alternation, *text(data, ts, te))
+      end
     };
     # Anchors
@@ -434,6 +464,10 @@
       emit(:anchor, :eol, *text(data, ts, te))
     };
+    backslash . 'K' > (backslashed, 4) {
+      emit(:keep, :mark, *text(data, ts, te))
+    };
     backslash . anchor_char > (backslashed, 3) {
       case text = text(data, ts, te).first
       when '\\A'; emit(:anchor, :bos,                text, ts, te)
@@ -481,6 +515,23 @@
       fcall character_set;
     };
+    # Conditional expression
+    #   (?(condition)Y|N)   conditional expression
+    # ------------------------------------------------------------------------
+    conditional {
+      text = text(data, ts, te).first
+      in_conditional = true unless in_conditional
+      conditional_depth += 1
+      conditional_stack << [conditional_depth, @group_depth]
+      emit(:conditional, :open, text[0..-2], ts, te-1)
+      emit(:conditional, :condition_open, '(', te-1, te)
+      fcall conditional_expression;
+    };
     # (?#...) comments: parsed as a single expression, without introducing a
     # new nesting level. Comments may not include parentheses, escaped or not.
     # special case for close, action performed on all transitions to get the
@@ -491,12 +542,15 @@
     };
     # Expression options:
-    #   (?imx-imx)          option on/off
+    #   (?imxdau-imx)         option on/off
     #                         i: ignore case
     #                         m: multi-line (dot(.) match newline)
     #                         x: extended form
+    #                         d: default class rules (1.9 compatible)
+    #                         a: ASCII class rules (\s, \w, etc.)
+    #                         u: Unicode class rules (\s, \w, etc.)
     #
-    #   (?imx-imx:subexp)   option on/off for subexp
+    #   (?imxdau-imx:subexp)  option on/off for subexp
     # ------------------------------------------------------------------------
     group_open . group_options >group_opened {
       p = scan_options(p, data, ts, te)
@@ -551,7 +605,29 @@
     };
     group_close @group_closed {
-      emit(:group, :close, *text(data, ts, te))
+      if in_conditional and conditional_stack.last and
+         conditional_stack.last[1] == (@group_depth + 1)
+        emit(:conditional, :close, *text(data, ts, te))
+        conditional_stack.pop
+        if conditional_stack.length == 0
+          in_conditional = false
+        end
+      else
+        if @spacing_stack.length > 1 and
+          @spacing_stack.last[1] == (@group_depth + 1)
+          @spacing_stack.pop
+          @free_spacing = @spacing_stack.last[0]
+          if @spacing_stack.length == 1
+            @in_options = false
+          end
+        end
+        emit(:group, :close, *text(data, ts, te))
+      end
     };
@@ -662,10 +738,26 @@
       fcall escape_sequence;
     };
+    comment {
+      if @free_spacing
+        emit(:free_space, :comment, *text(data, ts, te))
+      else
+        append_literal(data, ts, te)
+      end
+    };
+    space+ {
+      if @free_spacing
+        emit(:free_space, :whitespace, *text(data, ts, te))
+      else
+        append_literal(data, ts, te)
+      end
+    };
     # Literal: any run of ASCII (pritable or non-printable), and/or UTF-8,
     # except meta characters.
     # ------------------------------------------------------------------------
-    ascii_print+    |
+    (ascii_print -- space)+    |
     ascii_nonprint+ |
     utf8_2_byte+    |
     utf8_3_byte+    |
@@ -683,11 +775,7 @@ module Regexp::Scanner
   %% write data;
   # General scanner error (catch all)
-  class ScannerError < StandardError
-    def initialize(what)
-      super what
-    end
-  end
+  class ScannerError < StandardError; end
   # Base for all scanner validation errors
   class ValidationError < StandardError
@@ -717,6 +805,13 @@ module Regexp::Scanner
     end
   end
+  # Invalid groupOption. Used for inline options.
+  class InvalidGroupOption < ValidationError
+    def initialize(option, text)
+      super "Invalid group option #{option} in #{text}"
+    end
+  end
   # Invalid back reference. Used for name a number refs/calls.
   class InvalidBackrefError < ValidationError
     def initialize(what, reason)
@@ -737,18 +832,29 @@ module Regexp::Scanner
   #
   # This method may raise errors if a syntax error is encountered.
   # --------------------------------------------------------------------------
-  def self.scan(input, &block)
+  def self.scan(input_object, &block)
     top, stack = 0, []
-    input = input.source if input.is_a?(Regexp)
+    if input_object.is_a?(Regexp)
+      input    = input_object.source
+      @free_spacing  = (input_object.options & Regexp::EXTENDED != 0)
+    else
+      input   = input_object
+      @free_spacing = false
+    end
     data  = input.unpack("c*") if input.is_a?(String)
     eof   = data.length
     @tokens = []
     @block  = block_given? ? block : nil
-    in_group, group_depth = false, 0
+    @in_group, @group_depth = false, 0
+    @in_options, @spacing_stack = false, [[@free_spacing, 0]]
     in_set,   set_depth, set_type   = false, 0, :set
+    in_conditional, conditional_depth, conditional_stack = false, 0, []
     %% write init;
     %% write exec;
@@ -759,7 +865,7 @@ module Regexp::Scanner
     end
     raise PrematureEndError.new("(missing group closing paranthesis) "+
-          "[#{in_group}:#{group_depth}]") if in_group
+          "[#{@in_group}:#{@group_depth}]") if @in_group
     raise PrematureEndError.new("(missing set closing bracket) "+
           "[#{in_set}:#{set_depth}]") if in_set
@@ -779,13 +885,19 @@ module Regexp::Scanner
     options_char, options_length = true, 0
-    # Copy while we have option characters, the maximum is 7, for (?mix-mix,
-    # even though it doesn't make sense it is possible.
-    while options_char and options_length < 7
+    # Copy while we have option characters. There is no maximum length,
+    # as ruby allows things like '(?xxxxxxxxx-xxxxxxxxxxxxx:abc)'.
+    negative_options = false
+    while options_char
       if data[te + options_length]
         c = data[te + options_length].chr
-        if c =~ /[-mix]/
+        if c =~ /[-mixdau]/
+          negative_options = true if c == '-'
+          raise InvalidGroupOption.new(c, text) if negative_options and
+            c =~ /[dau]/
           text << c ; p += 1 ; options_length += 1
         else
           options_char = false
@@ -801,11 +913,11 @@ module Regexp::Scanner
       if c == ':'
         # Include the ':' in the options text
         text << c ; p += 1 ; options_length += 1
-        emit(:group, :options, text, ts, te + options_length)
+        emit_options(text, ts, te + options_length)
       elsif c == ')'
         # Don't include the closing ')', let group_close handle it.
-        emit(:group, :options, text, ts, te + options_length)
+        emit_options(text, ts, te + options_length)
       else
         # Plain Regexp reports this as 'undefined group option'
@@ -849,6 +961,27 @@ module Regexp::Scanner
     emit(:literal, :literal, text, ts, te)
   end
+  def self.emit_options(text, ts, te)
+    if text =~ /\(\?([mixdau]+)?-?([mix]+)?:/
+      positive, negative = $1, $2
+      if positive =~ /x/
+        @free_spacing = true
+      end
+      # If the x appears in both, treat it like ruby does, the second cancels
+      # the first.
+      if negative =~ /x/
+        @free_spacing = false
+      end
+    end
+    @in_options = true
+    @spacing_stack << [@free_spacing, @group_depth]
+    emit(:group, :options, text, ts, te)
+  end
   # Emits an array with the details of the scanned pattern
   def self.emit(type, token, text, ts, te)
     #puts "EMIT: type: #{type}, token: #{token}, text: #{text}, ts: #{ts}, te: #{te}"
@@ -876,13 +1009,7 @@ module Regexp::Scanner
       error = ValidationError.new('expression')
     end
-    # TODO: configuration option to treat scanner level validation
-    # errors as warnings or ignore them
-    if false # @@config.validation_warn
-      $stderr.puts error.to_s # unless @@config.validation_ignore
-    else
-      raise error # unless @@config.validation_ignore
-    end
+    raise error # unless @@config.validation_ignore
   end
   # Used for references with an empty name or number