regexp_parser 1.7.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/Gemfile +9 -3
 - data/LICENSE +1 -1
 - data/Rakefile +6 -70
 - data/lib/regexp_parser/error.rb +4 -0
 - data/lib/regexp_parser/expression/base.rb +76 -0
 - data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
 - data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
 - data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
 - data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
 - data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +4 -8
 - data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
 - data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
 - data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
 - data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
 - data/lib/regexp_parser/expression/classes/group.rb +28 -15
 - data/lib/regexp_parser/expression/classes/keep.rb +2 -0
 - data/lib/regexp_parser/expression/classes/literal.rb +1 -5
 - data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
 - data/lib/regexp_parser/expression/classes/root.rb +4 -19
 - data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +11 -12
 - data/lib/regexp_parser/expression/methods/construct.rb +41 -0
 - data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
 - data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
 - data/lib/regexp_parser/expression/methods/negative.rb +20 -0
 - data/lib/regexp_parser/expression/methods/parts.rb +23 -0
 - data/lib/regexp_parser/expression/methods/printing.rb +26 -0
 - data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
 - data/lib/regexp_parser/expression/methods/tests.rb +47 -1
 - data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
 - data/lib/regexp_parser/expression/quantifier.rb +57 -17
 - data/lib/regexp_parser/expression/sequence.rb +11 -47
 - data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
 - data/lib/regexp_parser/expression/shared.rb +111 -0
 - data/lib/regexp_parser/expression/subexpression.rb +27 -19
 - data/lib/regexp_parser/expression.rb +15 -141
 - data/lib/regexp_parser/lexer.rb +83 -41
 - data/lib/regexp_parser/parser.rb +372 -429
 - data/lib/regexp_parser/scanner/char_type.rl +11 -11
 - data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
 - data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
 - data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
 - data/lib/regexp_parser/scanner/properties/long.csv +651 -0
 - data/lib/regexp_parser/scanner/properties/short.csv +249 -0
 - data/lib/regexp_parser/scanner/property.rl +4 -4
 - data/lib/regexp_parser/scanner/scanner.rl +303 -368
 - data/lib/regexp_parser/scanner.rb +1423 -1674
 - data/lib/regexp_parser/syntax/any.rb +2 -7
 - data/lib/regexp_parser/syntax/base.rb +92 -67
 - data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
 - data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
 - data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
 - data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
 - data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
 - data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
 - data/lib/regexp_parser/syntax/token/escape.rb +33 -0
 - data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
 - data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
 - data/lib/regexp_parser/syntax/token/meta.rb +20 -0
 - data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
 - data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
 - data/lib/regexp_parser/syntax/token/unicode_property.rb +751 -0
 - data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
 - data/lib/regexp_parser/syntax/token.rb +45 -0
 - data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
 - data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
 - data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
 - data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
 - data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
 - data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
 - data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
 - data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
 - data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
 - data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
 - data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
 - data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
 - data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
 - data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
 - data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
 - data/lib/regexp_parser/syntax/versions.rb +3 -1
 - data/lib/regexp_parser/syntax.rb +8 -6
 - data/lib/regexp_parser/token.rb +9 -20
 - data/lib/regexp_parser/version.rb +1 -1
 - data/lib/regexp_parser.rb +0 -2
 - data/regexp_parser.gemspec +19 -23
 - metadata +53 -171
 - data/CHANGELOG.md +0 -349
 - data/README.md +0 -470
 - data/lib/regexp_parser/scanner/properties/long.yml +0 -594
 - data/lib/regexp_parser/scanner/properties/short.yml +0 -237
 - data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
 - data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
 - data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
 - data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
 - data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
 - data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
 - data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
 - data/lib/regexp_parser/syntax/tokens.rb +0 -45
 - data/spec/expression/base_spec.rb +0 -94
 - data/spec/expression/clone_spec.rb +0 -120
 - data/spec/expression/conditional_spec.rb +0 -89
 - data/spec/expression/free_space_spec.rb +0 -27
 - data/spec/expression/methods/match_length_spec.rb +0 -161
 - data/spec/expression/methods/match_spec.rb +0 -25
 - data/spec/expression/methods/strfregexp_spec.rb +0 -224
 - data/spec/expression/methods/tests_spec.rb +0 -99
 - data/spec/expression/methods/traverse_spec.rb +0 -161
 - data/spec/expression/options_spec.rb +0 -128
 - data/spec/expression/root_spec.rb +0 -9
 - data/spec/expression/sequence_spec.rb +0 -9
 - data/spec/expression/subexpression_spec.rb +0 -50
 - data/spec/expression/to_h_spec.rb +0 -26
 - data/spec/expression/to_s_spec.rb +0 -100
 - data/spec/lexer/all_spec.rb +0 -22
 - data/spec/lexer/conditionals_spec.rb +0 -53
 - data/spec/lexer/escapes_spec.rb +0 -14
 - data/spec/lexer/keep_spec.rb +0 -10
 - data/spec/lexer/literals_spec.rb +0 -89
 - data/spec/lexer/nesting_spec.rb +0 -99
 - data/spec/lexer/refcalls_spec.rb +0 -55
 - data/spec/parser/all_spec.rb +0 -43
 - data/spec/parser/alternation_spec.rb +0 -88
 - data/spec/parser/anchors_spec.rb +0 -17
 - data/spec/parser/conditionals_spec.rb +0 -179
 - data/spec/parser/errors_spec.rb +0 -30
 - data/spec/parser/escapes_spec.rb +0 -121
 - data/spec/parser/free_space_spec.rb +0 -130
 - data/spec/parser/groups_spec.rb +0 -108
 - data/spec/parser/keep_spec.rb +0 -6
 - data/spec/parser/posix_classes_spec.rb +0 -8
 - data/spec/parser/properties_spec.rb +0 -115
 - data/spec/parser/quantifiers_spec.rb +0 -51
 - data/spec/parser/refcalls_spec.rb +0 -112
 - data/spec/parser/set/intersections_spec.rb +0 -127
 - data/spec/parser/set/ranges_spec.rb +0 -111
 - data/spec/parser/sets_spec.rb +0 -178
 - data/spec/parser/types_spec.rb +0 -18
 - data/spec/scanner/all_spec.rb +0 -18
 - data/spec/scanner/anchors_spec.rb +0 -21
 - data/spec/scanner/conditionals_spec.rb +0 -128
 - data/spec/scanner/errors_spec.rb +0 -68
 - data/spec/scanner/escapes_spec.rb +0 -53
 - data/spec/scanner/free_space_spec.rb +0 -133
 - data/spec/scanner/groups_spec.rb +0 -52
 - data/spec/scanner/keep_spec.rb +0 -10
 - data/spec/scanner/literals_spec.rb +0 -49
 - data/spec/scanner/meta_spec.rb +0 -18
 - data/spec/scanner/properties_spec.rb +0 -64
 - data/spec/scanner/quantifiers_spec.rb +0 -20
 - data/spec/scanner/refcalls_spec.rb +0 -36
 - data/spec/scanner/sets_spec.rb +0 -102
 - data/spec/scanner/types_spec.rb +0 -14
 - data/spec/spec_helper.rb +0 -15
 - data/spec/support/runner.rb +0 -42
 - data/spec/support/shared_examples.rb +0 -77
 - data/spec/support/warning_extractor.rb +0 -60
 - data/spec/syntax/syntax_spec.rb +0 -48
 - data/spec/syntax/syntax_token_map_spec.rb +0 -23
 - data/spec/syntax/versions/1.8.6_spec.rb +0 -17
 - data/spec/syntax/versions/1.9.1_spec.rb +0 -10
 - data/spec/syntax/versions/1.9.3_spec.rb +0 -9
 - data/spec/syntax/versions/2.0.0_spec.rb +0 -13
 - data/spec/syntax/versions/2.2.0_spec.rb +0 -9
 - data/spec/syntax/versions/aliases_spec.rb +0 -37
 - data/spec/token/token_spec.rb +0 -85
 - /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
 
| 
         @@ -3,6 +3,11 @@ 
     | 
|
| 
       3 
3 
     | 
    
         
             
              include re_char_type "char_type.rl";
         
     | 
| 
       4 
4 
     | 
    
         
             
              include re_property  "property.rl";
         
     | 
| 
       5 
5 
     | 
    
         | 
| 
      
 6 
     | 
    
         
            +
              utf8_2_byte           = (0xc2..0xdf 0x80..0xbf);
         
     | 
| 
      
 7 
     | 
    
         
            +
              utf8_3_byte           = (0xe0..0xef 0x80..0xbf 0x80..0xbf);
         
     | 
| 
      
 8 
     | 
    
         
            +
              utf8_4_byte           = (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf);
         
     | 
| 
      
 9 
     | 
    
         
            +
              utf8_multibyte        = utf8_2_byte | utf8_3_byte | utf8_4_byte;
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
       6 
11 
     | 
    
         
             
              dot                   = '.';
         
     | 
| 
       7 
12 
     | 
    
         
             
              backslash             = '\\';
         
     | 
| 
       8 
13 
     | 
    
         
             
              alternation           = '|';
         
     | 
| 
         @@ -15,26 +20,15 @@ 
     | 
|
| 
       15 
20 
     | 
    
         | 
| 
       16 
21 
     | 
    
         
             
              group_open            = '(';
         
     | 
| 
       17 
22 
     | 
    
         
             
              group_close           = ')';
         
     | 
| 
       18 
     | 
    
         
            -
               
     | 
| 
      
 23 
     | 
    
         
            +
              parentheses           = group_open | group_close;
         
     | 
| 
       19 
24 
     | 
    
         | 
| 
       20 
25 
     | 
    
         
             
              set_open              = '[';
         
     | 
| 
       21 
26 
     | 
    
         
             
              set_close             = ']';
         
     | 
| 
       22 
27 
     | 
    
         
             
              brackets              = set_open | set_close;
         
     | 
| 
       23 
28 
     | 
    
         | 
| 
       24 
     | 
    
         
            -
              comment               = ('#' . [^\n]* . '\n');
         
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
              class_name_posix      = 'alnum' | 'alpha' | 'blank' |
         
     | 
| 
       27 
     | 
    
         
            -
                                      'cntrl' | 'digit' | 'graph' |
         
     | 
| 
       28 
     | 
    
         
            -
                                      'lower' | 'print' | 'punct' |
         
     | 
| 
       29 
     | 
    
         
            -
                                      'space' | 'upper' | 'xdigit' |
         
     | 
| 
       30 
     | 
    
         
            -
                                      'word'  | 'ascii';
         
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
              class_posix           = ('[:' . '^'? . class_name_posix . ':]');
         
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
      
 29 
     | 
    
         
            +
              comment               = ('#' . [^\n]* . '\n'?);
         
     | 
| 
       34 
30 
     | 
    
         | 
| 
       35 
     | 
    
         
            -
               
     | 
| 
       36 
     | 
    
         
            -
              collating_sequence    = '[.' . (alpha | [\-])+ . '.]';
         
     | 
| 
       37 
     | 
    
         
            -
              character_equivalent  = '[=' . alpha . '=]';
         
     | 
| 
      
 31 
     | 
    
         
            +
              class_posix           = ('[:' . '^'? . [^\[\]]* . ':]');
         
     | 
| 
       38 
32 
     | 
    
         | 
| 
       39 
33 
     | 
    
         
             
              line_anchor           = beginning_of_line | end_of_line;
         
     | 
| 
       40 
34 
     | 
    
         
             
              anchor_char           = [AbBzZG];
         
     | 
| 
         @@ -53,21 +47,20 @@ 
     | 
|
| 
       53 
47 
     | 
    
         | 
| 
       54 
48 
     | 
    
         
             
              meta_sequence         = 'M-' . (backslash . ('c' | 'C-'))? . backslash? . any;
         
     | 
| 
       55 
49 
     | 
    
         | 
| 
      
 50 
     | 
    
         
            +
              sequence_char         = [CMcux];
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
       56 
52 
     | 
    
         
             
              zero_or_one           = '?' | '??' | '?+';
         
     | 
| 
       57 
53 
     | 
    
         
             
              zero_or_more          = '*' | '*?' | '*+';
         
     | 
| 
       58 
54 
     | 
    
         
             
              one_or_more           = '+' | '+?' | '++';
         
     | 
| 
       59 
55 
     | 
    
         | 
| 
       60 
56 
     | 
    
         
             
              quantifier_greedy     = '?'  | '*'  | '+';
         
     | 
| 
       61 
     | 
    
         
            -
              quantifier_reluctant  = '??' | '*?' | '+?';
         
     | 
| 
       62 
     | 
    
         
            -
              quantifier_possessive = '?+' | '*+' | '++';
         
     | 
| 
       63 
     | 
    
         
            -
              quantifier_mode       = '?'  | '+';
         
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
       65 
     | 
    
         
            -
              quantifier_interval   = range_open . (digit+)? . ','? . (digit+)? .
         
     | 
| 
       66 
     | 
    
         
            -
                                      range_close . quantifier_mode?;
         
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
              quantifiers           = quantifier_greedy | quantifier_reluctant |
         
     | 
| 
       69 
     | 
    
         
            -
                                      quantifier_possessive | quantifier_interval;
         
     | 
| 
       70 
57 
     | 
    
         | 
| 
      
 58 
     | 
    
         
            +
              quantity_exact        = (digit+);
         
     | 
| 
      
 59 
     | 
    
         
            +
              quantity_minimum      = (digit+) . ',';
         
     | 
| 
      
 60 
     | 
    
         
            +
              quantity_maximum      = ',' . (digit+);
         
     | 
| 
      
 61 
     | 
    
         
            +
              quantity_range        = (digit+) . ',' . (digit+);
         
     | 
| 
      
 62 
     | 
    
         
            +
              quantifier_interval   = range_open . ( quantity_exact | quantity_minimum |
         
     | 
| 
      
 63 
     | 
    
         
            +
                                      quantity_maximum | quantity_range ) . range_close;
         
     | 
| 
       71 
64 
     | 
    
         | 
| 
       72 
65 
     | 
    
         
             
              conditional           = '(?(';
         
     | 
| 
       73 
66 
     | 
    
         | 
| 
         @@ -85,22 +78,22 @@ 
     | 
|
| 
       85 
78 
     | 
    
         
             
              # try to treat every other group head as options group, like Ruby
         
     | 
| 
       86 
79 
     | 
    
         
             
              group_options         = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
         
     | 
| 
       87 
80 
     | 
    
         | 
| 
       88 
     | 
    
         
            -
               
     | 
| 
       89 
     | 
    
         
            -
               
     | 
| 
       90 
     | 
    
         
            -
               
     | 
| 
       91 
     | 
    
         
            -
              group_number          = '-'? . [1-9] . ([0-9]+)?;
         
     | 
| 
      
 81 
     | 
    
         
            +
              group_name_id_ab      = ([^!0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
         
     | 
| 
      
 82 
     | 
    
         
            +
              group_name_id_sq      = ([^0-9\-']  | utf8_multibyte) . ([^'] | utf8_multibyte)*;
         
     | 
| 
      
 83 
     | 
    
         
            +
              group_number          = '-'? . [0-9]+;
         
     | 
| 
       92 
84 
     | 
    
         
             
              group_level           = [+\-] . [0-9]+;
         
     | 
| 
       93 
85 
     | 
    
         | 
| 
       94 
     | 
    
         
            -
              group_name            = ('<' .  
     | 
| 
      
 86 
     | 
    
         
            +
              group_name            = ('<' . group_name_id_ab? . '>') |
         
     | 
| 
      
 87 
     | 
    
         
            +
                                      ("'" . group_name_id_sq? . "'");
         
     | 
| 
       95 
88 
     | 
    
         
             
              group_lookup          = group_name | group_number;
         
     | 
| 
       96 
89 
     | 
    
         | 
| 
       97 
90 
     | 
    
         
             
              group_named           = ('?' . group_name );
         
     | 
| 
       98 
91 
     | 
    
         | 
| 
       99 
     | 
    
         
            -
               
     | 
| 
       100 
     | 
    
         
            -
             
     | 
| 
      
 92 
     | 
    
         
            +
              group_ref_body        = (('<' . (group_name_id_ab? | group_number) . group_level? '>') |
         
     | 
| 
      
 93 
     | 
    
         
            +
                                       ("'" . (group_name_id_sq? | group_number) . group_level? "'"));
         
     | 
| 
       101 
94 
     | 
    
         | 
| 
       102 
     | 
    
         
            -
               
     | 
| 
       103 
     | 
    
         
            -
             
     | 
| 
      
 95 
     | 
    
         
            +
              group_ref             = 'k' . group_ref_body;
         
     | 
| 
      
 96 
     | 
    
         
            +
              group_call            = 'g' . group_ref_body;
         
     | 
| 
       104 
97 
     | 
    
         | 
| 
       105 
98 
     | 
    
         
             
              group_type            = group_atomic | group_passive | group_absence | group_named;
         
     | 
| 
       106 
99 
     | 
    
         | 
| 
         @@ -111,32 +104,33 @@ 
     | 
|
| 
       111 
104 
     | 
    
         | 
| 
       112 
105 
     | 
    
         
             
              # characters that 'break' a literal
         
     | 
| 
       113 
106 
     | 
    
         
             
              meta_char             = dot | backslash | alternation |
         
     | 
| 
       114 
     | 
    
         
            -
                                      curlies |  
     | 
| 
      
 107 
     | 
    
         
            +
                                      curlies | parentheses | brackets |
         
     | 
| 
       115 
108 
     | 
    
         
             
                                      line_anchor | quantifier_greedy;
         
     | 
| 
       116 
109 
     | 
    
         | 
| 
       117 
     | 
    
         
            -
               
     | 
| 
       118 
     | 
    
         
            -
              ascii_nonprint        = (0x01..0x1f | 0x7f);
         
     | 
| 
      
 110 
     | 
    
         
            +
              literal_delimiters    = ']' | '}';
         
     | 
| 
       119 
111 
     | 
    
         | 
| 
       120 
     | 
    
         
            -
               
     | 
| 
       121 
     | 
    
         
            -
               
     | 
| 
       122 
     | 
    
         
            -
              utf8_4_byte           = (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf);
         
     | 
| 
      
 112 
     | 
    
         
            +
              ascii_print           = ((0x20..0x7e) - meta_char - '#');
         
     | 
| 
      
 113 
     | 
    
         
            +
              ascii_nonprint        = (0x01..0x1f | 0x7f);
         
     | 
| 
       123 
114 
     | 
    
         | 
| 
       124 
115 
     | 
    
         
             
              non_literal_escape    = char_type_char | anchor_char | escaped_ascii |
         
     | 
| 
       125 
     | 
    
         
            -
                                       
     | 
| 
      
 116 
     | 
    
         
            +
                                      keep_mark | sequence_char;
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
              # escapes that also work within a character set
         
     | 
| 
      
 119 
     | 
    
         
            +
              set_escape            = backslash | brackets | escaped_ascii |
         
     | 
| 
      
 120 
     | 
    
         
            +
                                      octal_sequence | property_char |
         
     | 
| 
      
 121 
     | 
    
         
            +
                                      sequence_char | single_codepoint_char_type;
         
     | 
| 
       126 
122 
     | 
    
         | 
| 
       127 
     | 
    
         
            -
              non_set_escape        = (anchor_char - 'b') | group_ref | keep_mark |
         
     | 
| 
       128 
     | 
    
         
            -
                                      multi_codepoint_char_type | [0-9cCM];
         
     | 
| 
       129 
123 
     | 
    
         | 
| 
       130 
124 
     | 
    
         
             
              # EOF error, used where it can be detected
         
     | 
| 
       131 
125 
     | 
    
         
             
              action premature_end_error {
         
     | 
| 
       132 
     | 
    
         
            -
                text =  
     | 
| 
       133 
     | 
    
         
            -
                raise PrematureEndError.new( 
     | 
| 
      
 126 
     | 
    
         
            +
                text = copy(data, ts ? ts-1 : 0, -1)
         
     | 
| 
      
 127 
     | 
    
         
            +
                raise PrematureEndError.new(text)
         
     | 
| 
       134 
128 
     | 
    
         
             
              }
         
     | 
| 
       135 
129 
     | 
    
         | 
| 
       136 
130 
     | 
    
         
             
              # Invalid sequence error, used from sequences, like escapes and sets
         
     | 
| 
       137 
131 
     | 
    
         
             
              action invalid_sequence_error {
         
     | 
| 
       138 
     | 
    
         
            -
                text =  
     | 
| 
       139 
     | 
    
         
            -
                 
     | 
| 
      
 132 
     | 
    
         
            +
                text = copy(data, ts ? ts-1 : 0, -1)
         
     | 
| 
      
 133 
     | 
    
         
            +
                raise ValidationError.for(:sequence, 'sequence', text)
         
     | 
| 
       140 
134 
     | 
    
         
             
              }
         
     | 
| 
       141 
135 
     | 
    
         | 
| 
       142 
136 
     | 
    
         
             
              # group (nesting) and set open/close actions
         
     | 
| 
         @@ -150,7 +144,7 @@ 
     | 
|
| 
       150 
144 
     | 
    
         
             
              # --------------------------------------------------------------------------
         
     | 
| 
       151 
145 
     | 
    
         
             
              character_set := |*
         
     | 
| 
       152 
146 
     | 
    
         
             
                set_close > (set_meta, 2) @set_closed {
         
     | 
| 
       153 
     | 
    
         
            -
                  emit(:set, :close,  
     | 
| 
      
 147 
     | 
    
         
            +
                  emit(:set, :close, copy(data, ts, te))
         
     | 
| 
       154 
148 
     | 
    
         
             
                  if in_set?
         
     | 
| 
       155 
149 
     | 
    
         
             
                    fret;
         
     | 
| 
       156 
150 
     | 
    
         
             
                  else
         
     | 
| 
         @@ -159,8 +153,8 @@ 
     | 
|
| 
       159 
153 
     | 
    
         
             
                };
         
     | 
| 
       160 
154 
     | 
    
         | 
| 
       161 
155 
     | 
    
         
             
                '-]' @set_closed { # special case, emits two tokens
         
     | 
| 
       162 
     | 
    
         
            -
                  emit(:literal, :literal,  
     | 
| 
       163 
     | 
    
         
            -
                  emit(:set, :close,  
     | 
| 
      
 156 
     | 
    
         
            +
                  emit(:literal, :literal, '-')
         
     | 
| 
      
 157 
     | 
    
         
            +
                  emit(:set, :close, ']')
         
     | 
| 
       164 
158 
     | 
    
         
             
                  if in_set?
         
     | 
| 
       165 
159 
     | 
    
         
             
                    fret;
         
     | 
| 
       166 
160 
     | 
    
         
             
                  else
         
     | 
| 
         @@ -169,33 +163,32 @@ 
     | 
|
| 
       169 
163 
     | 
    
         
             
                };
         
     | 
| 
       170 
164 
     | 
    
         | 
| 
       171 
165 
     | 
    
         
             
                '-&&' { # special case, emits two tokens
         
     | 
| 
       172 
     | 
    
         
            -
                  emit(:literal, :literal, '-' 
     | 
| 
       173 
     | 
    
         
            -
                  emit(:set, :intersection, '&&' 
     | 
| 
      
 166 
     | 
    
         
            +
                  emit(:literal, :literal, '-')
         
     | 
| 
      
 167 
     | 
    
         
            +
                  emit(:set, :intersection, '&&')
         
     | 
| 
       174 
168 
     | 
    
         
             
                };
         
     | 
| 
       175 
169 
     | 
    
         | 
| 
       176 
170 
     | 
    
         
             
                '^' {
         
     | 
| 
       177 
     | 
    
         
            -
                   
     | 
| 
       178 
     | 
    
         
            -
             
     | 
| 
       179 
     | 
    
         
            -
                    emit(:set, :negate, text, ts, te)
         
     | 
| 
      
 171 
     | 
    
         
            +
                  if prev_token[1] == :open
         
     | 
| 
      
 172 
     | 
    
         
            +
                    emit(:set, :negate, '^')
         
     | 
| 
       180 
173 
     | 
    
         
             
                  else
         
     | 
| 
       181 
     | 
    
         
            -
                    emit(:literal, :literal,  
     | 
| 
      
 174 
     | 
    
         
            +
                    emit(:literal, :literal, '^')
         
     | 
| 
       182 
175 
     | 
    
         
             
                  end
         
     | 
| 
       183 
176 
     | 
    
         
             
                };
         
     | 
| 
       184 
177 
     | 
    
         | 
| 
       185 
178 
     | 
    
         
             
                '-' {
         
     | 
| 
       186 
     | 
    
         
            -
                   
     | 
| 
       187 
     | 
    
         
            -
                  #  
     | 
| 
       188 
     | 
    
         
            -
                  if  
     | 
| 
       189 
     | 
    
         
            -
                    emit(:literal, :literal,  
     | 
| 
      
 179 
     | 
    
         
            +
                  # ranges cant start with the opening bracket, a subset, or
         
     | 
| 
      
 180 
     | 
    
         
            +
                  # intersection/negation/range operators
         
     | 
| 
      
 181 
     | 
    
         
            +
                  if prev_token[0] == :set
         
     | 
| 
      
 182 
     | 
    
         
            +
                    emit(:literal, :literal, '-')
         
     | 
| 
       190 
183 
     | 
    
         
             
                  else
         
     | 
| 
       191 
     | 
    
         
            -
                    emit(:set, :range,  
     | 
| 
      
 184 
     | 
    
         
            +
                    emit(:set, :range, '-')
         
     | 
| 
       192 
185 
     | 
    
         
             
                  end
         
     | 
| 
       193 
186 
     | 
    
         
             
                };
         
     | 
| 
       194 
187 
     | 
    
         | 
| 
       195 
188 
     | 
    
         
             
                # Unlike ranges, intersections can start or end at set boundaries, whereupon
         
     | 
| 
       196 
189 
     | 
    
         
             
                # they match nothing: r = /[a&&]/; [r =~ ?a, r =~ ?&] # => [nil, nil]
         
     | 
| 
       197 
190 
     | 
    
         
             
                '&&' {
         
     | 
| 
       198 
     | 
    
         
            -
                  emit(:set, :intersection,  
     | 
| 
      
 191 
     | 
    
         
            +
                  emit(:set, :intersection, '&&')
         
     | 
| 
       199 
192 
     | 
    
         
             
                };
         
     | 
| 
       200 
193 
     | 
    
         | 
| 
       201 
194 
     | 
    
         
             
                backslash {
         
     | 
| 
         @@ -203,59 +196,60 @@ 
     | 
|
| 
       203 
196 
     | 
    
         
             
                };
         
     | 
| 
       204 
197 
     | 
    
         | 
| 
       205 
198 
     | 
    
         
             
                set_open >(open_bracket, 1) >set_opened {
         
     | 
| 
       206 
     | 
    
         
            -
                  emit(:set, :open,  
     | 
| 
      
 199 
     | 
    
         
            +
                  emit(:set, :open, '[')
         
     | 
| 
       207 
200 
     | 
    
         
             
                  fcall character_set;
         
     | 
| 
       208 
201 
     | 
    
         
             
                };
         
     | 
| 
       209 
202 
     | 
    
         | 
| 
       210 
     | 
    
         
            -
                class_posix >(open_bracket, 1) @set_closed @eof(premature_end_error) 
     | 
| 
       211 
     | 
    
         
            -
                  text =  
     | 
| 
      
 203 
     | 
    
         
            +
                class_posix >(open_bracket, 1) @set_closed @eof(premature_end_error) {
         
     | 
| 
      
 204 
     | 
    
         
            +
                  text = copy(data, ts, te)
         
     | 
| 
       212 
205 
     | 
    
         | 
| 
       213 
206 
     | 
    
         
             
                  type = :posixclass
         
     | 
| 
       214 
207 
     | 
    
         
             
                  class_name = text[2..-3]
         
     | 
| 
       215 
     | 
    
         
            -
                  if class_name[0] 
     | 
| 
      
 208 
     | 
    
         
            +
                  if class_name[0] == '^'
         
     | 
| 
       216 
209 
     | 
    
         
             
                    class_name = class_name[1..-1]
         
     | 
| 
       217 
210 
     | 
    
         
             
                    type = :nonposixclass
         
     | 
| 
       218 
211 
     | 
    
         
             
                  end
         
     | 
| 
       219 
212 
     | 
    
         | 
| 
       220 
     | 
    
         
            -
                   
     | 
| 
       221 
     | 
    
         
            -
             
     | 
| 
       222 
     | 
    
         
            -
             
     | 
| 
       223 
     | 
    
         
            -
                collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error)  {
         
     | 
| 
       224 
     | 
    
         
            -
                  emit(:set, :collation, *text(data, ts, te))
         
     | 
| 
       225 
     | 
    
         
            -
                };
         
     | 
| 
      
 213 
     | 
    
         
            +
                  unless self.class.posix_classes.include?(class_name)
         
     | 
| 
      
 214 
     | 
    
         
            +
                    raise ValidationError.for(:posix_class, text)
         
     | 
| 
      
 215 
     | 
    
         
            +
                  end
         
     | 
| 
       226 
216 
     | 
    
         | 
| 
       227 
     | 
    
         
            -
             
     | 
| 
       228 
     | 
    
         
            -
                  emit(:set, :equivalent, *text(data, ts, te))
         
     | 
| 
      
 217 
     | 
    
         
            +
                  emit(type, class_name.to_sym, text)
         
     | 
| 
       229 
218 
     | 
    
         
             
                };
         
     | 
| 
       230 
219 
     | 
    
         | 
| 
       231 
220 
     | 
    
         
             
                meta_char > (set_meta, 1) {
         
     | 
| 
       232 
     | 
    
         
            -
                  emit(:literal, :literal,  
     | 
| 
      
 221 
     | 
    
         
            +
                  emit(:literal, :literal, copy(data, ts, te))
         
     | 
| 
       233 
222 
     | 
    
         
             
                };
         
     | 
| 
       234 
223 
     | 
    
         | 
| 
       235 
     | 
    
         
            -
                any 
     | 
| 
       236 
     | 
    
         
            -
             
     | 
| 
       237 
     | 
    
         
            -
             
     | 
| 
       238 
     | 
    
         
            -
                utf8_3_byte    |
         
     | 
| 
       239 
     | 
    
         
            -
                utf8_4_byte    {
         
     | 
| 
       240 
     | 
    
         
            -
                  char, *rest = *text(data, ts, te)
         
     | 
| 
       241 
     | 
    
         
            -
                  char.force_encoding('utf-8') if char.respond_to?(:force_encoding)
         
     | 
| 
       242 
     | 
    
         
            -
                  emit(:literal, :literal, char, *rest)
         
     | 
| 
      
 224 
     | 
    
         
            +
                any | ascii_nonprint | utf8_multibyte {
         
     | 
| 
      
 225 
     | 
    
         
            +
                  text = copy(data, ts, te)
         
     | 
| 
      
 226 
     | 
    
         
            +
                  emit(:literal, :literal, text)
         
     | 
| 
       243 
227 
     | 
    
         
             
                };
         
     | 
| 
       244 
228 
     | 
    
         
             
              *|;
         
     | 
| 
       245 
229 
     | 
    
         | 
| 
       246 
230 
     | 
    
         
             
              # set escapes scanner
         
     | 
| 
       247 
231 
     | 
    
         
             
              # --------------------------------------------------------------------------
         
     | 
| 
       248 
232 
     | 
    
         
             
              set_escape_sequence := |*
         
     | 
| 
       249 
     | 
    
         
            -
                 
     | 
| 
       250 
     | 
    
         
            -
             
     | 
| 
      
 233 
     | 
    
         
            +
                # Special case: in sets, octal sequences have higher priority than backrefs
         
     | 
| 
      
 234 
     | 
    
         
            +
                octal_sequence {
         
     | 
| 
      
 235 
     | 
    
         
            +
                  emit(:escape, :octal, copy(data, ts-1, te))
         
     | 
| 
       251 
236 
     | 
    
         
             
                  fret;
         
     | 
| 
       252 
237 
     | 
    
         
             
                };
         
     | 
| 
       253 
238 
     | 
    
         | 
| 
       254 
     | 
    
         
            -
                 
     | 
| 
      
 239 
     | 
    
         
            +
                # Scan all other escapes that work in sets with the generic escape scanner
         
     | 
| 
      
 240 
     | 
    
         
            +
                set_escape > (escaped_set_alpha, 2) {
         
     | 
| 
       255 
241 
     | 
    
         
             
                  fhold;
         
     | 
| 
       256 
242 
     | 
    
         
             
                  fnext character_set;
         
     | 
| 
       257 
243 
     | 
    
         
             
                  fcall escape_sequence;
         
     | 
| 
       258 
244 
     | 
    
         
             
                };
         
     | 
| 
      
 245 
     | 
    
         
            +
             
     | 
| 
      
 246 
     | 
    
         
            +
                # Treat all remaining escapes - those not supported in sets - as literal.
         
     | 
| 
      
 247 
     | 
    
         
            +
                # (This currently includes \^, \-, \&, \:, although these could potentially
         
     | 
| 
      
 248 
     | 
    
         
            +
                # be meta chars when not escaped, depending on their position in the set.)
         
     | 
| 
      
 249 
     | 
    
         
            +
                any > (escaped_set_alpha, 1) {
         
     | 
| 
      
 250 
     | 
    
         
            +
                  emit(:escape, :literal, copy(data, ts-1, te))
         
     | 
| 
      
 251 
     | 
    
         
            +
                  fret;
         
     | 
| 
      
 252 
     | 
    
         
            +
                };
         
     | 
| 
       259 
253 
     | 
    
         
             
              *|;
         
     | 
| 
       260 
254 
     | 
    
         | 
| 
       261 
255 
     | 
    
         | 
| 
         @@ -263,33 +257,40 @@ 
     | 
|
| 
       263 
257 
     | 
    
         
             
              # --------------------------------------------------------------------------
         
     | 
| 
       264 
258 
     | 
    
         
             
              escape_sequence := |*
         
     | 
| 
       265 
259 
     | 
    
         
             
                [1-9] {
         
     | 
| 
       266 
     | 
    
         
            -
                  text =  
     | 
| 
       267 
     | 
    
         
            -
                  emit(:backref, :number, text 
     | 
| 
      
 260 
     | 
    
         
            +
                  text = copy(data, ts-1, te)
         
     | 
| 
      
 261 
     | 
    
         
            +
                  emit(:backref, :number, text)
         
     | 
| 
       268 
262 
     | 
    
         
             
                  fret;
         
     | 
| 
       269 
263 
     | 
    
         
             
                };
         
     | 
| 
       270 
264 
     | 
    
         | 
| 
       271 
265 
     | 
    
         
             
                octal_sequence {
         
     | 
| 
       272 
     | 
    
         
            -
                  emit(:escape, :octal,  
     | 
| 
      
 266 
     | 
    
         
            +
                  emit(:escape, :octal, copy(data, ts-1, te))
         
     | 
| 
      
 267 
     | 
    
         
            +
                  fret;
         
     | 
| 
      
 268 
     | 
    
         
            +
                };
         
     | 
| 
      
 269 
     | 
    
         
            +
             
     | 
| 
      
 270 
     | 
    
         
            +
                [8-9] . [0-9] { # special case, emits two tokens
         
     | 
| 
      
 271 
     | 
    
         
            +
                  text = copy(data, ts-1, te)
         
     | 
| 
      
 272 
     | 
    
         
            +
                  emit(:escape, :literal, text[0, 2])
         
     | 
| 
      
 273 
     | 
    
         
            +
                  emit(:literal, :literal, text[2])
         
     | 
| 
       273 
274 
     | 
    
         
             
                  fret;
         
     | 
| 
       274 
275 
     | 
    
         
             
                };
         
     | 
| 
       275 
276 
     | 
    
         | 
| 
       276 
277 
     | 
    
         
             
                meta_char {
         
     | 
| 
       277 
     | 
    
         
            -
                  case text =  
     | 
| 
       278 
     | 
    
         
            -
                  when '\.';  emit(:escape, :dot,               text 
     | 
| 
       279 
     | 
    
         
            -
                  when '\|';  emit(:escape, :alternation,       text 
     | 
| 
       280 
     | 
    
         
            -
                  when '\^';  emit(:escape, :bol,               text 
     | 
| 
       281 
     | 
    
         
            -
                  when '\$';  emit(:escape, :eol,               text 
     | 
| 
       282 
     | 
    
         
            -
                  when '\?';  emit(:escape, :zero_or_one,       text 
     | 
| 
       283 
     | 
    
         
            -
                  when '\*';  emit(:escape, :zero_or_more,      text 
     | 
| 
       284 
     | 
    
         
            -
                  when '\+';  emit(:escape, :one_or_more,       text 
     | 
| 
       285 
     | 
    
         
            -
                  when '\(';  emit(:escape, :group_open,        text 
     | 
| 
       286 
     | 
    
         
            -
                  when '\)';  emit(:escape, :group_close,       text 
     | 
| 
       287 
     | 
    
         
            -
                  when '\{';  emit(:escape, :interval_open,     text 
     | 
| 
       288 
     | 
    
         
            -
                  when '\}';  emit(:escape, :interval_close,    text 
     | 
| 
       289 
     | 
    
         
            -
                  when '\[';  emit(:escape, :set_open,          text 
     | 
| 
       290 
     | 
    
         
            -
                  when '\]';  emit(:escape, :set_close,         text 
     | 
| 
      
 278 
     | 
    
         
            +
                  case text = copy(data, ts-1, te)
         
     | 
| 
      
 279 
     | 
    
         
            +
                  when '\.';  emit(:escape, :dot,               text)
         
     | 
| 
      
 280 
     | 
    
         
            +
                  when '\|';  emit(:escape, :alternation,       text)
         
     | 
| 
      
 281 
     | 
    
         
            +
                  when '\^';  emit(:escape, :bol,               text)
         
     | 
| 
      
 282 
     | 
    
         
            +
                  when '\$';  emit(:escape, :eol,               text)
         
     | 
| 
      
 283 
     | 
    
         
            +
                  when '\?';  emit(:escape, :zero_or_one,       text)
         
     | 
| 
      
 284 
     | 
    
         
            +
                  when '\*';  emit(:escape, :zero_or_more,      text)
         
     | 
| 
      
 285 
     | 
    
         
            +
                  when '\+';  emit(:escape, :one_or_more,       text)
         
     | 
| 
      
 286 
     | 
    
         
            +
                  when '\(';  emit(:escape, :group_open,        text)
         
     | 
| 
      
 287 
     | 
    
         
            +
                  when '\)';  emit(:escape, :group_close,       text)
         
     | 
| 
      
 288 
     | 
    
         
            +
                  when '\{';  emit(:escape, :interval_open,     text)
         
     | 
| 
      
 289 
     | 
    
         
            +
                  when '\}';  emit(:escape, :interval_close,    text)
         
     | 
| 
      
 290 
     | 
    
         
            +
                  when '\[';  emit(:escape, :set_open,          text)
         
     | 
| 
      
 291 
     | 
    
         
            +
                  when '\]';  emit(:escape, :set_close,         text)
         
     | 
| 
       291 
292 
     | 
    
         
             
                  when "\\\\";
         
     | 
| 
       292 
     | 
    
         
            -
                    emit(:escape, :backslash, text 
     | 
| 
      
 293 
     | 
    
         
            +
                    emit(:escape, :backslash, text)
         
     | 
| 
       293 
294 
     | 
    
         
             
                  end
         
     | 
| 
       294 
295 
     | 
    
         
             
                  fret;
         
     | 
| 
       295 
296 
     | 
    
         
             
                };
         
     | 
| 
         @@ -297,31 +298,31 @@ 
     | 
|
| 
       297 
298 
     | 
    
         
             
                escaped_ascii > (escaped_alpha, 7) {
         
     | 
| 
       298 
299 
     | 
    
         
             
                  # \b is emitted as backspace only when inside a character set, otherwise
         
     | 
| 
       299 
300 
     | 
    
         
             
                  # it is a word boundary anchor. A syntax might "normalize" it if needed.
         
     | 
| 
       300 
     | 
    
         
            -
                  case text =  
     | 
| 
       301 
     | 
    
         
            -
                  when '\a'; emit(:escape, :bell,           text 
     | 
| 
       302 
     | 
    
         
            -
                  when '\b'; emit(:escape, :backspace,      text 
     | 
| 
       303 
     | 
    
         
            -
                  when '\e'; emit(:escape, :escape,         text 
     | 
| 
       304 
     | 
    
         
            -
                  when '\f'; emit(:escape, :form_feed,      text 
     | 
| 
       305 
     | 
    
         
            -
                  when '\n'; emit(:escape, :newline,        text 
     | 
| 
       306 
     | 
    
         
            -
                  when '\r'; emit(:escape, :carriage,       text 
     | 
| 
       307 
     | 
    
         
            -
                  when '\t'; emit(:escape, :tab,            text 
     | 
| 
       308 
     | 
    
         
            -
                  when '\v'; emit(:escape, :vertical_tab,   text 
     | 
| 
      
 301 
     | 
    
         
            +
                  case text = copy(data, ts-1, te)
         
     | 
| 
      
 302 
     | 
    
         
            +
                  when '\a'; emit(:escape, :bell,           text)
         
     | 
| 
      
 303 
     | 
    
         
            +
                  when '\b'; emit(:escape, :backspace,      text)
         
     | 
| 
      
 304 
     | 
    
         
            +
                  when '\e'; emit(:escape, :escape,         text)
         
     | 
| 
      
 305 
     | 
    
         
            +
                  when '\f'; emit(:escape, :form_feed,      text)
         
     | 
| 
      
 306 
     | 
    
         
            +
                  when '\n'; emit(:escape, :newline,        text)
         
     | 
| 
      
 307 
     | 
    
         
            +
                  when '\r'; emit(:escape, :carriage,       text)
         
     | 
| 
      
 308 
     | 
    
         
            +
                  when '\t'; emit(:escape, :tab,            text)
         
     | 
| 
      
 309 
     | 
    
         
            +
                  when '\v'; emit(:escape, :vertical_tab,   text)
         
     | 
| 
       309 
310 
     | 
    
         
             
                  end
         
     | 
| 
       310 
311 
     | 
    
         
             
                  fret;
         
     | 
| 
       311 
312 
     | 
    
         
             
                };
         
     | 
| 
       312 
313 
     | 
    
         | 
| 
       313 
314 
     | 
    
         
             
                codepoint_sequence > (escaped_alpha, 6) $eof(premature_end_error) {
         
     | 
| 
       314 
     | 
    
         
            -
                  text =  
     | 
| 
       315 
     | 
    
         
            -
                  if text[2] 
     | 
| 
       316 
     | 
    
         
            -
                    emit(:escape, :codepoint_list, text 
     | 
| 
      
 315 
     | 
    
         
            +
                  text = copy(data, ts-1, te)
         
     | 
| 
      
 316 
     | 
    
         
            +
                  if text[2] == '{'
         
     | 
| 
      
 317 
     | 
    
         
            +
                    emit(:escape, :codepoint_list, text)
         
     | 
| 
       317 
318 
     | 
    
         
             
                  else
         
     | 
| 
       318 
     | 
    
         
            -
                    emit(:escape, :codepoint,      text 
     | 
| 
      
 319 
     | 
    
         
            +
                    emit(:escape, :codepoint,      text)
         
     | 
| 
       319 
320 
     | 
    
         
             
                  end
         
     | 
| 
       320 
321 
     | 
    
         
             
                  fret;
         
     | 
| 
       321 
322 
     | 
    
         
             
                };
         
     | 
| 
       322 
323 
     | 
    
         | 
| 
       323 
     | 
    
         
            -
                hex_sequence > (escaped_alpha, 5)  
     | 
| 
       324 
     | 
    
         
            -
                  emit(:escape, :hex,  
     | 
| 
      
 324 
     | 
    
         
            +
                hex_sequence > (escaped_alpha, 5) @eof(premature_end_error) {
         
     | 
| 
      
 325 
     | 
    
         
            +
                  emit(:escape, :hex, copy(data, ts-1, te))
         
     | 
| 
       325 
326 
     | 
    
         
             
                  fret;
         
     | 
| 
       326 
327 
     | 
    
         
             
                };
         
     | 
| 
       327 
328 
     | 
    
         | 
| 
         @@ -351,8 +352,8 @@ 
     | 
|
| 
       351 
352 
     | 
    
         
             
                  fcall unicode_property;
         
     | 
| 
       352 
353 
     | 
    
         
             
                };
         
     | 
| 
       353 
354 
     | 
    
         | 
| 
       354 
     | 
    
         
            -
                (any -- non_literal_escape) > (escaped_alpha, 1) 
     | 
| 
       355 
     | 
    
         
            -
                  emit(:escape, :literal,  
     | 
| 
      
 355 
     | 
    
         
            +
                (any -- non_literal_escape) | utf8_multibyte > (escaped_alpha, 1) {
         
     | 
| 
      
 356 
     | 
    
         
            +
                  emit(:escape, :literal, copy(data, ts-1, te))
         
     | 
| 
       356 
357 
     | 
    
         
             
                  fret;
         
     | 
| 
       357 
358 
     | 
    
         
             
                };
         
     | 
| 
       358 
359 
     | 
    
         
             
              *|;
         
     | 
| 
         @@ -362,9 +363,10 @@ 
     | 
|
| 
       362 
363 
     | 
    
         
             
              # --------------------------------------------------------------------------
         
     | 
| 
       363 
364 
     | 
    
         
             
              conditional_expression := |*
         
     | 
| 
       364 
365 
     | 
    
         
             
                group_lookup . ')' {
         
     | 
| 
       365 
     | 
    
         
            -
                  text =  
     | 
| 
       366 
     | 
    
         
            -
                   
     | 
| 
       367 
     | 
    
         
            -
                  emit(:conditional, : 
     | 
| 
      
 366 
     | 
    
         
            +
                  text = copy(data, ts, te-1)
         
     | 
| 
      
 367 
     | 
    
         
            +
                  text =~ /[^0]/ or raise ValidationError.for(:backref, 'condition', 'invalid ref ID')
         
     | 
| 
      
 368 
     | 
    
         
            +
                  emit(:conditional, :condition, text)
         
     | 
| 
      
 369 
     | 
    
         
            +
                  emit(:conditional, :condition_close, ')')
         
     | 
| 
       368 
370 
     | 
    
         
             
                };
         
     | 
| 
       369 
371 
     | 
    
         | 
| 
       370 
372 
     | 
    
         
             
                any {
         
     | 
| 
         @@ -381,46 +383,50 @@ 
     | 
|
| 
       381 
383 
     | 
    
         
             
                # Meta characters
         
     | 
| 
       382 
384 
     | 
    
         
             
                # ------------------------------------------------------------------------
         
     | 
| 
       383 
385 
     | 
    
         
             
                dot {
         
     | 
| 
       384 
     | 
    
         
            -
                  emit(:meta, :dot,  
     | 
| 
      
 386 
     | 
    
         
            +
                  emit(:meta, :dot, copy(data, ts, te))
         
     | 
| 
       385 
387 
     | 
    
         
             
                };
         
     | 
| 
       386 
388 
     | 
    
         | 
| 
       387 
389 
     | 
    
         
             
                alternation {
         
     | 
| 
       388 
390 
     | 
    
         
             
                  if conditional_stack.last == group_depth
         
     | 
| 
       389 
     | 
    
         
            -
                    emit(:conditional, :separator,  
     | 
| 
      
 391 
     | 
    
         
            +
                    emit(:conditional, :separator, copy(data, ts, te))
         
     | 
| 
       390 
392 
     | 
    
         
             
                  else
         
     | 
| 
       391 
     | 
    
         
            -
                    emit(:meta, :alternation,  
     | 
| 
      
 393 
     | 
    
         
            +
                    emit(:meta, :alternation, copy(data, ts, te))
         
     | 
| 
       392 
394 
     | 
    
         
             
                  end
         
     | 
| 
       393 
395 
     | 
    
         
             
                };
         
     | 
| 
       394 
396 
     | 
    
         | 
| 
       395 
397 
     | 
    
         
             
                # Anchors
         
     | 
| 
       396 
398 
     | 
    
         
             
                # ------------------------------------------------------------------------
         
     | 
| 
       397 
399 
     | 
    
         
             
                beginning_of_line {
         
     | 
| 
       398 
     | 
    
         
            -
                  emit(:anchor, :bol,  
     | 
| 
      
 400 
     | 
    
         
            +
                  emit(:anchor, :bol, copy(data, ts, te))
         
     | 
| 
       399 
401 
     | 
    
         
             
                };
         
     | 
| 
       400 
402 
     | 
    
         | 
| 
       401 
403 
     | 
    
         
             
                end_of_line {
         
     | 
| 
       402 
     | 
    
         
            -
                  emit(:anchor, :eol,  
     | 
| 
      
 404 
     | 
    
         
            +
                  emit(:anchor, :eol, copy(data, ts, te))
         
     | 
| 
       403 
405 
     | 
    
         
             
                };
         
     | 
| 
       404 
406 
     | 
    
         | 
| 
       405 
407 
     | 
    
         
             
                backslash . keep_mark > (backslashed, 4) {
         
     | 
| 
       406 
     | 
    
         
            -
                  emit(:keep, :mark,  
     | 
| 
      
 408 
     | 
    
         
            +
                  emit(:keep, :mark, copy(data, ts, te))
         
     | 
| 
       407 
409 
     | 
    
         
             
                };
         
     | 
| 
       408 
410 
     | 
    
         | 
| 
       409 
411 
     | 
    
         
             
                backslash . anchor_char > (backslashed, 3) {
         
     | 
| 
       410 
     | 
    
         
            -
                  case text =  
     | 
| 
       411 
     | 
    
         
            -
                  when ' 
     | 
| 
       412 
     | 
    
         
            -
                  when ' 
     | 
| 
       413 
     | 
    
         
            -
                  when ' 
     | 
| 
       414 
     | 
    
         
            -
                  when ' 
     | 
| 
       415 
     | 
    
         
            -
                  when ' 
     | 
| 
       416 
     | 
    
         
            -
                  when ' 
     | 
| 
      
 412 
     | 
    
         
            +
                  case text = copy(data, ts, te)
         
     | 
| 
      
 413 
     | 
    
         
            +
                  when '\A';  emit(:anchor, :bos,                text)
         
     | 
| 
      
 414 
     | 
    
         
            +
                  when '\z';  emit(:anchor, :eos,                text)
         
     | 
| 
      
 415 
     | 
    
         
            +
                  when '\Z';  emit(:anchor, :eos_ob_eol,         text)
         
     | 
| 
      
 416 
     | 
    
         
            +
                  when '\b';  emit(:anchor, :word_boundary,      text)
         
     | 
| 
      
 417 
     | 
    
         
            +
                  when '\B';  emit(:anchor, :nonword_boundary,   text)
         
     | 
| 
      
 418 
     | 
    
         
            +
                  when '\G';  emit(:anchor, :match_start,        text)
         
     | 
| 
       417 
419 
     | 
    
         
             
                  end
         
     | 
| 
       418 
420 
     | 
    
         
             
                };
         
     | 
| 
       419 
421 
     | 
    
         | 
| 
      
 422 
     | 
    
         
            +
                literal_delimiters {
         
     | 
| 
      
 423 
     | 
    
         
            +
                  append_literal(data, ts, te)
         
     | 
| 
      
 424 
     | 
    
         
            +
                };
         
     | 
| 
      
 425 
     | 
    
         
            +
             
     | 
| 
       420 
426 
     | 
    
         
             
                # Character sets
         
     | 
| 
       421 
427 
     | 
    
         
             
                # ------------------------------------------------------------------------
         
     | 
| 
       422 
428 
     | 
    
         
             
                set_open >set_opened {
         
     | 
| 
       423 
     | 
    
         
            -
                  emit(:set, :open,  
     | 
| 
      
 429 
     | 
    
         
            +
                  emit(:set, :open, copy(data, ts, te))
         
     | 
| 
       424 
430 
     | 
    
         
             
                  fcall character_set;
         
     | 
| 
       425 
431 
     | 
    
         
             
                };
         
     | 
| 
       426 
432 
     | 
    
         | 
| 
         @@ -429,23 +435,22 @@ 
     | 
|
| 
       429 
435 
     | 
    
         
             
                #   (?(condition)Y|N)   conditional expression
         
     | 
| 
       430 
436 
     | 
    
         
             
                # ------------------------------------------------------------------------
         
     | 
| 
       431 
437 
     | 
    
         
             
                conditional {
         
     | 
| 
       432 
     | 
    
         
            -
                  text =  
     | 
| 
      
 438 
     | 
    
         
            +
                  text = copy(data, ts, te)
         
     | 
| 
       433 
439 
     | 
    
         | 
| 
       434 
440 
     | 
    
         
             
                  conditional_stack << group_depth
         
     | 
| 
       435 
441 
     | 
    
         | 
| 
       436 
     | 
    
         
            -
                  emit(:conditional, :open, text[0..-2] 
     | 
| 
       437 
     | 
    
         
            -
                  emit(:conditional, :condition_open, '(' 
     | 
| 
      
 442 
     | 
    
         
            +
                  emit(:conditional, :open, text[0..-2])
         
     | 
| 
      
 443 
     | 
    
         
            +
                  emit(:conditional, :condition_open, '(')
         
     | 
| 
       438 
444 
     | 
    
         
             
                  fcall conditional_expression;
         
     | 
| 
       439 
445 
     | 
    
         
             
                };
         
     | 
| 
       440 
446 
     | 
    
         | 
| 
       441 
447 
     | 
    
         | 
| 
       442 
448 
     | 
    
         
             
                # (?#...) comments: parsed as a single expression, without introducing a
         
     | 
| 
       443 
449 
     | 
    
         
             
                # new nesting level. Comments may not include parentheses, escaped or not.
         
     | 
| 
       444 
     | 
    
         
            -
                # special case for close 
     | 
| 
       445 
     | 
    
         
            -
                # correct closing count.
         
     | 
| 
      
 450 
     | 
    
         
            +
                # special case for close to get the correct closing count.
         
     | 
| 
       446 
451 
     | 
    
         
             
                # ------------------------------------------------------------------------
         
     | 
| 
       447 
     | 
    
         
            -
                group_open . group_comment  
     | 
| 
       448 
     | 
    
         
            -
                  emit(:group, :comment,  
     | 
| 
      
 452 
     | 
    
         
            +
                (group_open . group_comment) @group_closed {
         
     | 
| 
      
 453 
     | 
    
         
            +
                  emit(:group, :comment, copy(data, ts, te))
         
     | 
| 
       449 
454 
     | 
    
         
             
                };
         
     | 
| 
       450 
455 
     | 
    
         | 
| 
       451 
456 
     | 
    
         
             
                # Expression options:
         
     | 
| 
         @@ -459,12 +464,12 @@ 
     | 
|
| 
       459 
464 
     | 
    
         
             
                #
         
     | 
| 
       460 
465 
     | 
    
         
             
                #   (?imxdau-imx:subexp)  option on/off for subexp
         
     | 
| 
       461 
466 
     | 
    
         
             
                # ------------------------------------------------------------------------
         
     | 
| 
       462 
     | 
    
         
            -
                group_open . group_options >group_opened {
         
     | 
| 
       463 
     | 
    
         
            -
                  text =  
     | 
| 
      
 467 
     | 
    
         
            +
                (group_open . group_options) >group_opened {
         
     | 
| 
      
 468 
     | 
    
         
            +
                  text = copy(data, ts, te)
         
     | 
| 
       464 
469 
     | 
    
         
             
                  if text[2..-1] =~ /([^\-mixdau:]|^$)|-.*([dau])/
         
     | 
| 
       465 
     | 
    
         
            -
                    raise  
     | 
| 
      
 470 
     | 
    
         
            +
                    raise ValidationError.for(:group_option, $1 || "-#{$2}", text)
         
     | 
| 
       466 
471 
     | 
    
         
             
                  end
         
     | 
| 
       467 
     | 
    
         
            -
                  emit_options(text 
     | 
| 
      
 472 
     | 
    
         
            +
                  emit_options(text)
         
     | 
| 
       468 
473 
     | 
    
         
             
                };
         
     | 
| 
       469 
474 
     | 
    
         | 
| 
       470 
475 
     | 
    
         
             
                # Assertions
         
     | 
| 
         @@ -473,12 +478,12 @@ 
     | 
|
| 
       473 
478 
     | 
    
         
             
                #   (?<=subexp)         look-behind
         
     | 
| 
       474 
479 
     | 
    
         
             
                #   (?<!subexp)         negative look-behind
         
     | 
| 
       475 
480 
     | 
    
         
             
                # ------------------------------------------------------------------------
         
     | 
| 
       476 
     | 
    
         
            -
                group_open . assertion_type >group_opened {
         
     | 
| 
       477 
     | 
    
         
            -
                  case text =  
     | 
| 
       478 
     | 
    
         
            -
                  when '(?=';  emit(:assertion, :lookahead,    text 
     | 
| 
       479 
     | 
    
         
            -
                  when '(?!';  emit(:assertion, :nlookahead,   text 
     | 
| 
       480 
     | 
    
         
            -
                  when '(?<='; emit(:assertion, :lookbehind,   text 
     | 
| 
       481 
     | 
    
         
            -
                  when '(?<!'; emit(:assertion, :nlookbehind,  text 
     | 
| 
      
 481 
     | 
    
         
            +
                (group_open . assertion_type) >group_opened {
         
     | 
| 
      
 482 
     | 
    
         
            +
                  case text = copy(data, ts, te)
         
     | 
| 
      
 483 
     | 
    
         
            +
                  when '(?=';  emit(:assertion, :lookahead,    text)
         
     | 
| 
      
 484 
     | 
    
         
            +
                  when '(?!';  emit(:assertion, :nlookahead,   text)
         
     | 
| 
      
 485 
     | 
    
         
            +
                  when '(?<='; emit(:assertion, :lookbehind,   text)
         
     | 
| 
      
 486 
     | 
    
         
            +
                  when '(?<!'; emit(:assertion, :nlookbehind,  text)
         
     | 
| 
       482 
487 
     | 
    
         
             
                  end
         
     | 
| 
       483 
488 
     | 
    
         
             
                };
         
     | 
| 
       484 
489 
     | 
    
         | 
| 
         @@ -490,106 +495,78 @@ 
     | 
|
| 
       490 
495 
     | 
    
         
             
                #   (?'name'subexp)     named group (single quoted version)
         
     | 
| 
       491 
496 
     | 
    
         
             
                #   (subexp)            captured group
         
     | 
| 
       492 
497 
     | 
    
         
             
                # ------------------------------------------------------------------------
         
     | 
| 
       493 
     | 
    
         
            -
                group_open . group_type >group_opened {
         
     | 
| 
       494 
     | 
    
         
            -
                  case text =  
     | 
| 
       495 
     | 
    
         
            -
                  when '(?:';  emit(:group, :passive,      text 
     | 
| 
       496 
     | 
    
         
            -
                  when '(?>';  emit(:group, :atomic,       text 
     | 
| 
       497 
     | 
    
         
            -
                  when '(?~';  emit(:group, :absence,      text 
     | 
| 
      
 498 
     | 
    
         
            +
                (group_open . group_type) >group_opened {
         
     | 
| 
      
 499 
     | 
    
         
            +
                  case text = copy(data, ts, te)
         
     | 
| 
      
 500 
     | 
    
         
            +
                  when '(?:';  emit(:group, :passive,      text)
         
     | 
| 
      
 501 
     | 
    
         
            +
                  when '(?>';  emit(:group, :atomic,       text)
         
     | 
| 
      
 502 
     | 
    
         
            +
                  when '(?~';  emit(:group, :absence,      text)
         
     | 
| 
       498 
503 
     | 
    
         | 
| 
       499 
504 
     | 
    
         
             
                  when /^\(\?(?:<>|'')/
         
     | 
| 
       500 
     | 
    
         
            -
                     
     | 
| 
      
 505 
     | 
    
         
            +
                    raise ValidationError.for(:group, 'named group', 'name is empty')
         
     | 
| 
       501 
506 
     | 
    
         | 
| 
       502 
     | 
    
         
            -
                  when /^\( 
     | 
| 
       503 
     | 
    
         
            -
                    emit(:group, :named_ab,  text 
     | 
| 
      
 507 
     | 
    
         
            +
                  when /^\(\?<[^>]+>/
         
     | 
| 
      
 508 
     | 
    
         
            +
                    emit(:group, :named_ab,  text)
         
     | 
| 
       504 
509 
     | 
    
         | 
| 
       505 
     | 
    
         
            -
                  when /^\(\?' 
     | 
| 
       506 
     | 
    
         
            -
                    emit(:group, :named_sq,  text 
     | 
| 
      
 510 
     | 
    
         
            +
                  when /^\(\?'[^']+'/
         
     | 
| 
      
 511 
     | 
    
         
            +
                    emit(:group, :named_sq,  text)
         
     | 
| 
       507 
512 
     | 
    
         | 
| 
       508 
513 
     | 
    
         
             
                  end
         
     | 
| 
       509 
514 
     | 
    
         
             
                };
         
     | 
| 
       510 
515 
     | 
    
         | 
| 
       511 
516 
     | 
    
         
             
                group_open @group_opened {
         
     | 
| 
       512 
     | 
    
         
            -
                  text =  
     | 
| 
       513 
     | 
    
         
            -
                  emit(:group, :capture, text 
     | 
| 
      
 517 
     | 
    
         
            +
                  text = copy(data, ts, te)
         
     | 
| 
      
 518 
     | 
    
         
            +
                  emit(:group, :capture, text)
         
     | 
| 
       514 
519 
     | 
    
         
             
                };
         
     | 
| 
       515 
520 
     | 
    
         | 
| 
       516 
521 
     | 
    
         
             
                group_close @group_closed {
         
     | 
| 
       517 
522 
     | 
    
         
             
                  if conditional_stack.last == group_depth + 1
         
     | 
| 
       518 
523 
     | 
    
         
             
                    conditional_stack.pop
         
     | 
| 
       519 
     | 
    
         
            -
                    emit(:conditional, :close,  
     | 
| 
       520 
     | 
    
         
            -
                   
     | 
| 
      
 524 
     | 
    
         
            +
                    emit(:conditional, :close, ')')
         
     | 
| 
      
 525 
     | 
    
         
            +
                  elsif group_depth >= 0
         
     | 
| 
       521 
526 
     | 
    
         
             
                    if spacing_stack.length > 1 &&
         
     | 
| 
       522 
527 
     | 
    
         
             
                       spacing_stack.last[:depth] == group_depth + 1
         
     | 
| 
       523 
528 
     | 
    
         
             
                      spacing_stack.pop
         
     | 
| 
       524 
529 
     | 
    
         
             
                      self.free_spacing = spacing_stack.last[:free_spacing]
         
     | 
| 
       525 
530 
     | 
    
         
             
                    end
         
     | 
| 
       526 
531 
     | 
    
         | 
| 
       527 
     | 
    
         
            -
                    emit(:group, :close,  
     | 
| 
      
 532 
     | 
    
         
            +
                    emit(:group, :close, ')')
         
     | 
| 
      
 533 
     | 
    
         
            +
                  else
         
     | 
| 
      
 534 
     | 
    
         
            +
                    raise ValidationError.for(:group, 'group', 'unmatched close parenthesis')
         
     | 
| 
       528 
535 
     | 
    
         
             
                  end
         
     | 
| 
       529 
536 
     | 
    
         
             
                };
         
     | 
| 
       530 
537 
     | 
    
         | 
| 
       531 
538 
     | 
    
         | 
| 
       532 
539 
     | 
    
         
             
                # Group backreference, named and numbered
         
     | 
| 
       533 
540 
     | 
    
         
             
                # ------------------------------------------------------------------------
         
     | 
| 
       534 
     | 
    
         
            -
                backslash . ( 
     | 
| 
       535 
     | 
    
         
            -
                  case text =  
     | 
| 
       536 
     | 
    
         
            -
                  when /^\\([ 
     | 
| 
       537 
     | 
    
         
            -
                     
     | 
| 
       538 
     | 
    
         
            -
             
     | 
| 
       539 
     | 
    
         
            -
             
     | 
| 
       540 
     | 
    
         
            -
             
     | 
| 
       541 
     | 
    
         
            -
             
     | 
| 
       542 
     | 
    
         
            -
             
     | 
| 
       543 
     | 
    
         
            -
             
     | 
| 
       544 
     | 
    
         
            -
             
     | 
| 
       545 
     | 
    
         
            -
             
     | 
| 
       546 
     | 
    
         
            -
                   
     | 
| 
       547 
     | 
    
         
            -
                     
     | 
| 
       548 
     | 
    
         
            -
             
     | 
| 
       549 
     | 
    
         
            -
             
     | 
| 
       550 
     | 
    
         
            -
                      emit(:backref, :name_call_sq, text, ts, te)
         
     | 
| 
       551 
     | 
    
         
            -
                    end
         
     | 
| 
       552 
     | 
    
         
            -
             
     | 
| 
       553 
     | 
    
         
            -
                  when /^\\([gk])<\d+>/ # angle-brackets
         
     | 
| 
       554 
     | 
    
         
            -
                    if $1 == 'k'
         
     | 
| 
       555 
     | 
    
         
            -
                      emit(:backref, :number_ref_ab, text, ts, te)
         
     | 
| 
       556 
     | 
    
         
            -
                    else
         
     | 
| 
       557 
     | 
    
         
            -
                      emit(:backref, :number_call_ab, text, ts, te)
         
     | 
| 
       558 
     | 
    
         
            -
                    end
         
     | 
| 
       559 
     | 
    
         
            -
             
     | 
| 
       560 
     | 
    
         
            -
                  when /^\\([gk])'\d+'/ # single quotes
         
     | 
| 
       561 
     | 
    
         
            -
                    if $1 == 'k'
         
     | 
| 
       562 
     | 
    
         
            -
                      emit(:backref, :number_ref_sq, text, ts, te)
         
     | 
| 
       563 
     | 
    
         
            -
                    else
         
     | 
| 
       564 
     | 
    
         
            -
                      emit(:backref, :number_call_sq, text, ts, te)
         
     | 
| 
       565 
     | 
    
         
            -
                    end
         
     | 
| 
       566 
     | 
    
         
            -
             
     | 
| 
       567 
     | 
    
         
            -
                  when /^\\(?:g<\+|g<-|(k)<-)\d+>/ # angle-brackets
         
     | 
| 
       568 
     | 
    
         
            -
                    if $1 == 'k'
         
     | 
| 
       569 
     | 
    
         
            -
                      emit(:backref, :number_rel_ref_ab, text, ts, te)
         
     | 
| 
       570 
     | 
    
         
            -
                    else
         
     | 
| 
       571 
     | 
    
         
            -
                      emit(:backref, :number_rel_call_ab, text, ts, te)
         
     | 
| 
       572 
     | 
    
         
            -
                    end
         
     | 
| 
       573 
     | 
    
         
            -
             
     | 
| 
       574 
     | 
    
         
            -
                  when /^\\(?:g'\+|g'-|(k)'-)\d+'/ # single quotes
         
     | 
| 
       575 
     | 
    
         
            -
                    if $1 == 'k'
         
     | 
| 
       576 
     | 
    
         
            -
                      emit(:backref, :number_rel_ref_sq, text, ts, te)
         
     | 
| 
       577 
     | 
    
         
            -
                    else
         
     | 
| 
       578 
     | 
    
         
            -
                      emit(:backref, :number_rel_call_sq, text, ts, te)
         
     | 
| 
       579 
     | 
    
         
            -
                    end
         
     | 
| 
       580 
     | 
    
         
            -
             
     | 
| 
       581 
     | 
    
         
            -
                  when /^\\k<[^\d+\-]\w*[+\-]\d+>/ # angle-brackets
         
     | 
| 
       582 
     | 
    
         
            -
                    emit(:backref, :name_recursion_ref_ab, text, ts, te)
         
     | 
| 
       583 
     | 
    
         
            -
             
     | 
| 
       584 
     | 
    
         
            -
                  when /^\\k'[^\d+\-]\w*[+\-]\d+'/ # single-quotes
         
     | 
| 
       585 
     | 
    
         
            -
                    emit(:backref, :name_recursion_ref_sq, text, ts, te)
         
     | 
| 
       586 
     | 
    
         
            -
             
     | 
| 
       587 
     | 
    
         
            -
                  when /^\\([gk])<[+\-]?\d+[+\-]\d+>/ # angle-brackets
         
     | 
| 
       588 
     | 
    
         
            -
                    emit(:backref, :number_recursion_ref_ab, text, ts, te)
         
     | 
| 
       589 
     | 
    
         
            -
             
     | 
| 
       590 
     | 
    
         
            -
                  when /^\\([gk])'[+\-]?\d+[+\-]\d+'/ # single-quotes
         
     | 
| 
       591 
     | 
    
         
            -
                    emit(:backref, :number_recursion_ref_sq, text, ts, te)
         
     | 
| 
      
 541 
     | 
    
         
            +
                backslash . (group_ref) > (backslashed, 4) {
         
     | 
| 
      
 542 
     | 
    
         
            +
                  case text = copy(data, ts, te)
         
     | 
| 
      
 543 
     | 
    
         
            +
                  when /^\\k(.)[^0-9\-][^+\-]*['>]$/
         
     | 
| 
      
 544 
     | 
    
         
            +
                    emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
         
     | 
| 
      
 545 
     | 
    
         
            +
                  when /^\\k(.)0*[1-9]\d*['>]$/
         
     | 
| 
      
 546 
     | 
    
         
            +
                    emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
         
     | 
| 
      
 547 
     | 
    
         
            +
                  when /^\\k(.)-0*[1-9]\d*['>]$/
         
     | 
| 
      
 548 
     | 
    
         
            +
                    emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
         
     | 
| 
      
 549 
     | 
    
         
            +
                  when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
         
     | 
| 
      
 550 
     | 
    
         
            +
                    emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
         
     | 
| 
      
 551 
     | 
    
         
            +
                  when /^\\k(.)-?0*[1-9]\d*[+\-]\d+['>]$/
         
     | 
| 
      
 552 
     | 
    
         
            +
                    emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
         
     | 
| 
      
 553 
     | 
    
         
            +
                  else
         
     | 
| 
      
 554 
     | 
    
         
            +
                    raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
         
     | 
| 
      
 555 
     | 
    
         
            +
                  end
         
     | 
| 
      
 556 
     | 
    
         
            +
                };
         
     | 
| 
       592 
557 
     | 
    
         | 
| 
      
 558 
     | 
    
         
            +
                # Group call, named and numbered
         
     | 
| 
      
 559 
     | 
    
         
            +
                # ------------------------------------------------------------------------
         
     | 
| 
      
 560 
     | 
    
         
            +
                backslash . (group_call) > (backslashed, 4) {
         
     | 
| 
      
 561 
     | 
    
         
            +
                  case text = copy(data, ts, te)
         
     | 
| 
      
 562 
     | 
    
         
            +
                  when /^\\g(.)[^0-9+\-].*['>]$/
         
     | 
| 
      
 563 
     | 
    
         
            +
                    emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
         
     | 
| 
      
 564 
     | 
    
         
            +
                  when /^\\g(.)(?:0|0*[1-9]\d*)['>]$/
         
     | 
| 
      
 565 
     | 
    
         
            +
                    emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
         
     | 
| 
      
 566 
     | 
    
         
            +
                  when /^\\g(.)[+-]0*[1-9]\d*/
         
     | 
| 
      
 567 
     | 
    
         
            +
                    emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
         
     | 
| 
      
 568 
     | 
    
         
            +
                  else
         
     | 
| 
      
 569 
     | 
    
         
            +
                    raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
         
     | 
| 
       593 
570 
     | 
    
         
             
                  end
         
     | 
| 
       594 
571 
     | 
    
         
             
                };
         
     | 
| 
       595 
572 
     | 
    
         | 
| 
         @@ -597,31 +574,36 @@ 
     | 
|
| 
       597 
574 
     | 
    
         
             
                # Quantifiers
         
     | 
| 
       598 
575 
     | 
    
         
             
                # ------------------------------------------------------------------------
         
     | 
| 
       599 
576 
     | 
    
         
             
                zero_or_one {
         
     | 
| 
       600 
     | 
    
         
            -
                  case text =  
     | 
| 
       601 
     | 
    
         
            -
                  when '?' ;  emit(:quantifier, :zero_or_one,            text 
     | 
| 
       602 
     | 
    
         
            -
                  when '??';  emit(:quantifier, :zero_or_one_reluctant,  text 
     | 
| 
       603 
     | 
    
         
            -
                  when '?+';  emit(:quantifier, :zero_or_one_possessive, text 
     | 
| 
      
 577 
     | 
    
         
            +
                  case text = copy(data, ts, te)
         
     | 
| 
      
 578 
     | 
    
         
            +
                  when '?' ;  emit(:quantifier, :zero_or_one,            text)
         
     | 
| 
      
 579 
     | 
    
         
            +
                  when '??';  emit(:quantifier, :zero_or_one_reluctant,  text)
         
     | 
| 
      
 580 
     | 
    
         
            +
                  when '?+';  emit(:quantifier, :zero_or_one_possessive, text)
         
     | 
| 
       604 
581 
     | 
    
         
             
                  end
         
     | 
| 
       605 
582 
     | 
    
         
             
                };
         
     | 
| 
       606 
583 
     | 
    
         | 
| 
       607 
584 
     | 
    
         
             
                zero_or_more {
         
     | 
| 
       608 
     | 
    
         
            -
                  case text =  
     | 
| 
       609 
     | 
    
         
            -
                  when '*' ;  emit(:quantifier, :zero_or_more,            text 
     | 
| 
       610 
     | 
    
         
            -
                  when '*?';  emit(:quantifier, :zero_or_more_reluctant,  text 
     | 
| 
       611 
     | 
    
         
            -
                  when '*+';  emit(:quantifier, :zero_or_more_possessive, text 
     | 
| 
      
 585 
     | 
    
         
            +
                  case text = copy(data, ts, te)
         
     | 
| 
      
 586 
     | 
    
         
            +
                  when '*' ;  emit(:quantifier, :zero_or_more,            text)
         
     | 
| 
      
 587 
     | 
    
         
            +
                  when '*?';  emit(:quantifier, :zero_or_more_reluctant,  text)
         
     | 
| 
      
 588 
     | 
    
         
            +
                  when '*+';  emit(:quantifier, :zero_or_more_possessive, text)
         
     | 
| 
       612 
589 
     | 
    
         
             
                  end
         
     | 
| 
       613 
590 
     | 
    
         
             
                };
         
     | 
| 
       614 
591 
     | 
    
         | 
| 
       615 
592 
     | 
    
         
             
                one_or_more {
         
     | 
| 
       616 
     | 
    
         
            -
                  case text =  
     | 
| 
       617 
     | 
    
         
            -
                  when '+' ;  emit(:quantifier, :one_or_more,            text 
     | 
| 
       618 
     | 
    
         
            -
                  when '+?';  emit(:quantifier, :one_or_more_reluctant,  text 
     | 
| 
       619 
     | 
    
         
            -
                  when '++';  emit(:quantifier, :one_or_more_possessive, text 
     | 
| 
      
 593 
     | 
    
         
            +
                  case text = copy(data, ts, te)
         
     | 
| 
      
 594 
     | 
    
         
            +
                  when '+' ;  emit(:quantifier, :one_or_more,            text)
         
     | 
| 
      
 595 
     | 
    
         
            +
                  when '+?';  emit(:quantifier, :one_or_more_reluctant,  text)
         
     | 
| 
      
 596 
     | 
    
         
            +
                  when '++';  emit(:quantifier, :one_or_more_possessive, text)
         
     | 
| 
       620 
597 
     | 
    
         
             
                  end
         
     | 
| 
       621 
598 
     | 
    
         
             
                };
         
     | 
| 
       622 
599 
     | 
    
         | 
| 
       623 
     | 
    
         
            -
                quantifier_interval 
     | 
| 
       624 
     | 
    
         
            -
                  emit(:quantifier, :interval,  
     | 
| 
      
 600 
     | 
    
         
            +
                quantifier_interval {
         
     | 
| 
      
 601 
     | 
    
         
            +
                  emit(:quantifier, :interval, copy(data, ts, te))
         
     | 
| 
      
 602 
     | 
    
         
            +
                };
         
     | 
| 
      
 603 
     | 
    
         
            +
             
     | 
| 
      
 604 
     | 
    
         
            +
                # Catch unmatched curly braces as literals
         
     | 
| 
      
 605 
     | 
    
         
            +
                range_open {
         
     | 
| 
      
 606 
     | 
    
         
            +
                  append_literal(data, ts, te)
         
     | 
| 
       625 
607 
     | 
    
         
             
                };
         
     | 
| 
       626 
608 
     | 
    
         | 
| 
       627 
609 
     | 
    
         
             
                # Escaped sequences
         
     | 
| 
         @@ -632,15 +614,17 @@ 
     | 
|
| 
       632 
614 
     | 
    
         | 
| 
       633 
615 
     | 
    
         
             
                comment {
         
     | 
| 
       634 
616 
     | 
    
         
             
                  if free_spacing
         
     | 
| 
       635 
     | 
    
         
            -
                    emit(:free_space, :comment,  
     | 
| 
      
 617 
     | 
    
         
            +
                    emit(:free_space, :comment, copy(data, ts, te))
         
     | 
| 
       636 
618 
     | 
    
         
             
                  else
         
     | 
| 
       637 
     | 
    
         
            -
                     
     | 
| 
      
 619 
     | 
    
         
            +
                    # consume only the pound sign (#) and backtrack to do regular scanning
         
     | 
| 
      
 620 
     | 
    
         
            +
                    append_literal(data, ts, ts + 1)
         
     | 
| 
      
 621 
     | 
    
         
            +
                    fexec ts + 1;
         
     | 
| 
       638 
622 
     | 
    
         
             
                  end
         
     | 
| 
       639 
623 
     | 
    
         
             
                };
         
     | 
| 
       640 
624 
     | 
    
         | 
| 
       641 
625 
     | 
    
         
             
                space+ {
         
     | 
| 
       642 
626 
     | 
    
         
             
                  if free_spacing
         
     | 
| 
       643 
     | 
    
         
            -
                    emit(:free_space, :whitespace,  
     | 
| 
      
 627 
     | 
    
         
            +
                    emit(:free_space, :whitespace, copy(data, ts, te))
         
     | 
| 
       644 
628 
     | 
    
         
             
                  else
         
     | 
| 
       645 
629 
     | 
    
         
             
                    append_literal(data, ts, te)
         
     | 
| 
       646 
630 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -649,105 +633,47 @@ 
     | 
|
| 
       649 
633 
     | 
    
         
             
                # Literal: any run of ASCII (pritable or non-printable), and/or UTF-8,
         
     | 
| 
       650 
634 
     | 
    
         
             
                # except meta characters.
         
     | 
| 
       651 
635 
     | 
    
         
             
                # ------------------------------------------------------------------------
         
     | 
| 
       652 
     | 
    
         
            -
                (ascii_print -- space)+ 
     | 
| 
       653 
     | 
    
         
            -
                ascii_nonprint+ |
         
     | 
| 
       654 
     | 
    
         
            -
                utf8_2_byte+    |
         
     | 
| 
       655 
     | 
    
         
            -
                utf8_3_byte+    |
         
     | 
| 
       656 
     | 
    
         
            -
                utf8_4_byte+    {
         
     | 
| 
      
 636 
     | 
    
         
            +
                (ascii_print -- space)+ | ascii_nonprint+ | utf8_multibyte+ {
         
     | 
| 
       657 
637 
     | 
    
         
             
                  append_literal(data, ts, te)
         
     | 
| 
       658 
638 
     | 
    
         
             
                };
         
     | 
| 
       659 
639 
     | 
    
         | 
| 
       660 
640 
     | 
    
         
             
              *|;
         
     | 
| 
       661 
641 
     | 
    
         
             
            }%%
         
     | 
| 
       662 
642 
     | 
    
         | 
| 
       663 
     | 
    
         
            -
             
     | 
| 
       664 
     | 
    
         
            -
             
     | 
| 
      
 643 
     | 
    
         
            +
            require 'regexp_parser/scanner/errors/scanner_error'
         
     | 
| 
      
 644 
     | 
    
         
            +
            require 'regexp_parser/scanner/errors/premature_end_error'
         
     | 
| 
      
 645 
     | 
    
         
            +
            require 'regexp_parser/scanner/errors/validation_error'
         
     | 
| 
       665 
646 
     | 
    
         | 
| 
       666 
647 
     | 
    
         
             
            class Regexp::Scanner
         
     | 
| 
       667 
     | 
    
         
            -
              # General scanner error (catch all)
         
     | 
| 
       668 
     | 
    
         
            -
              class ScannerError < StandardError; end
         
     | 
| 
       669 
     | 
    
         
            -
             
     | 
| 
       670 
     | 
    
         
            -
              # Base for all scanner validation errors
         
     | 
| 
       671 
     | 
    
         
            -
              class ValidationError < StandardError
         
     | 
| 
       672 
     | 
    
         
            -
                def initialize(reason)
         
     | 
| 
       673 
     | 
    
         
            -
                  super reason
         
     | 
| 
       674 
     | 
    
         
            -
                end
         
     | 
| 
       675 
     | 
    
         
            -
              end
         
     | 
| 
       676 
     | 
    
         
            -
             
     | 
| 
       677 
     | 
    
         
            -
              # Unexpected end of pattern
         
     | 
| 
       678 
     | 
    
         
            -
              class PrematureEndError < ScannerError
         
     | 
| 
       679 
     | 
    
         
            -
                def initialize(where = '')
         
     | 
| 
       680 
     | 
    
         
            -
                  super "Premature end of pattern at #{where}"
         
     | 
| 
       681 
     | 
    
         
            -
                end
         
     | 
| 
       682 
     | 
    
         
            -
              end
         
     | 
| 
       683 
     | 
    
         
            -
             
     | 
| 
       684 
     | 
    
         
            -
              # Invalid sequence format. Used for escape sequences, mainly.
         
     | 
| 
       685 
     | 
    
         
            -
              class InvalidSequenceError < ValidationError
         
     | 
| 
       686 
     | 
    
         
            -
                def initialize(what = 'sequence', where = '')
         
     | 
| 
       687 
     | 
    
         
            -
                  super "Invalid #{what} at #{where}"
         
     | 
| 
       688 
     | 
    
         
            -
                end
         
     | 
| 
       689 
     | 
    
         
            -
              end
         
     | 
| 
       690 
     | 
    
         
            -
             
     | 
| 
       691 
     | 
    
         
            -
              # Invalid group. Used for named groups.
         
     | 
| 
       692 
     | 
    
         
            -
              class InvalidGroupError < ValidationError
         
     | 
| 
       693 
     | 
    
         
            -
                def initialize(what, reason)
         
     | 
| 
       694 
     | 
    
         
            -
                  super "Invalid #{what}, #{reason}."
         
     | 
| 
       695 
     | 
    
         
            -
                end
         
     | 
| 
       696 
     | 
    
         
            -
              end
         
     | 
| 
       697 
     | 
    
         
            -
             
     | 
| 
       698 
     | 
    
         
            -
              # Invalid groupOption. Used for inline options.
         
     | 
| 
       699 
     | 
    
         
            -
              class InvalidGroupOption < ValidationError
         
     | 
| 
       700 
     | 
    
         
            -
                def initialize(option, text)
         
     | 
| 
       701 
     | 
    
         
            -
                  super "Invalid group option #{option} in #{text}"
         
     | 
| 
       702 
     | 
    
         
            -
                end
         
     | 
| 
       703 
     | 
    
         
            -
              end
         
     | 
| 
       704 
     | 
    
         
            -
             
     | 
| 
       705 
     | 
    
         
            -
              # Invalid back reference. Used for name a number refs/calls.
         
     | 
| 
       706 
     | 
    
         
            -
              class InvalidBackrefError < ValidationError
         
     | 
| 
       707 
     | 
    
         
            -
                def initialize(what, reason)
         
     | 
| 
       708 
     | 
    
         
            -
                  super "Invalid back reference #{what}, #{reason}"
         
     | 
| 
       709 
     | 
    
         
            -
                end
         
     | 
| 
       710 
     | 
    
         
            -
              end
         
     | 
| 
       711 
     | 
    
         
            -
             
     | 
| 
       712 
     | 
    
         
            -
              # The property name was not recognized by the scanner.
         
     | 
| 
       713 
     | 
    
         
            -
              class UnknownUnicodePropertyError < ValidationError
         
     | 
| 
       714 
     | 
    
         
            -
                def initialize(name)
         
     | 
| 
       715 
     | 
    
         
            -
                  super "Unknown unicode character property name #{name}"
         
     | 
| 
       716 
     | 
    
         
            -
                end
         
     | 
| 
       717 
     | 
    
         
            -
              end
         
     | 
| 
       718 
     | 
    
         
            -
             
     | 
| 
       719 
648 
     | 
    
         
             
              # Scans the given regular expression text, or Regexp object and collects the
         
     | 
| 
       720 
649 
     | 
    
         
             
              # emitted token into an array that gets returned at the end. If a block is
         
     | 
| 
       721 
650 
     | 
    
         
             
              # given, it gets called for each emitted token.
         
     | 
| 
       722 
651 
     | 
    
         
             
              #
         
     | 
| 
       723 
652 
     | 
    
         
             
              # This method may raise errors if a syntax error is encountered.
         
     | 
| 
       724 
653 
     | 
    
         
             
              # --------------------------------------------------------------------------
         
     | 
| 
       725 
     | 
    
         
            -
              def self.scan(input_object, &block)
         
     | 
| 
       726 
     | 
    
         
            -
                new.scan(input_object, &block)
         
     | 
| 
      
 654 
     | 
    
         
            +
              def self.scan(input_object, options: nil, collect_tokens: true, &block)
         
     | 
| 
      
 655 
     | 
    
         
            +
                new.scan(input_object, options: options, collect_tokens: collect_tokens, &block)
         
     | 
| 
       727 
656 
     | 
    
         
             
              end
         
     | 
| 
       728 
657 
     | 
    
         | 
| 
       729 
     | 
    
         
            -
              def scan(input_object, &block)
         
     | 
| 
       730 
     | 
    
         
            -
                self. 
     | 
| 
      
 658 
     | 
    
         
            +
              def scan(input_object, options: nil, collect_tokens: true, &block)
         
     | 
| 
      
 659 
     | 
    
         
            +
                self.collect_tokens = collect_tokens
         
     | 
| 
      
 660 
     | 
    
         
            +
                self.literal_run = nil
         
     | 
| 
       731 
661 
     | 
    
         
             
                stack = []
         
     | 
| 
       732 
662 
     | 
    
         | 
| 
       733 
     | 
    
         
            -
                 
     | 
| 
       734 
     | 
    
         
            -
             
     | 
| 
       735 
     | 
    
         
            -
                  self.free_spacing = (input_object.options & Regexp::EXTENDED != 0)
         
     | 
| 
       736 
     | 
    
         
            -
                else
         
     | 
| 
       737 
     | 
    
         
            -
                  input = input_object
         
     | 
| 
       738 
     | 
    
         
            -
                  self.free_spacing = false
         
     | 
| 
       739 
     | 
    
         
            -
                end
         
     | 
| 
      
 663 
     | 
    
         
            +
                input = input_object.is_a?(Regexp) ? input_object.source : input_object
         
     | 
| 
      
 664 
     | 
    
         
            +
                self.free_spacing = free_spacing?(input_object, options)
         
     | 
| 
       740 
665 
     | 
    
         
             
                self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
         
     | 
| 
       741 
666 
     | 
    
         | 
| 
       742 
     | 
    
         
            -
                data  = input.unpack("c*") 
     | 
| 
      
 667 
     | 
    
         
            +
                data  = input.unpack("c*")
         
     | 
| 
       743 
668 
     | 
    
         
             
                eof   = data.length
         
     | 
| 
       744 
669 
     | 
    
         | 
| 
       745 
670 
     | 
    
         
             
                self.tokens = []
         
     | 
| 
       746 
     | 
    
         
            -
                self.block  =  
     | 
| 
      
 671 
     | 
    
         
            +
                self.block  = block
         
     | 
| 
       747 
672 
     | 
    
         | 
| 
       748 
673 
     | 
    
         
             
                self.set_depth = 0
         
     | 
| 
       749 
674 
     | 
    
         
             
                self.group_depth = 0
         
     | 
| 
       750 
675 
     | 
    
         
             
                self.conditional_stack = []
         
     | 
| 
      
 676 
     | 
    
         
            +
                self.char_pos = 0
         
     | 
| 
       751 
677 
     | 
    
         | 
| 
       752 
678 
     | 
    
         
             
                %% write data;
         
     | 
| 
       753 
679 
     | 
    
         
             
                %% write init;
         
     | 
| 
         @@ -757,7 +683,7 @@ class Regexp::Scanner 
     | 
|
| 
       757 
683 
     | 
    
         
             
                testEof = testEof
         
     | 
| 
       758 
684 
     | 
    
         | 
| 
       759 
685 
     | 
    
         
             
                if cs == re_scanner_error
         
     | 
| 
       760 
     | 
    
         
            -
                  text =  
     | 
| 
      
 686 
     | 
    
         
            +
                  text = copy(data, ts ? ts-1 : 0, -1)
         
     | 
| 
       761 
687 
     | 
    
         
             
                  raise ScannerError.new("Scan error at '#{text}'")
         
     | 
| 
       762 
688 
     | 
    
         
             
                end
         
     | 
| 
       763 
689 
     | 
    
         | 
| 
         @@ -767,40 +693,76 @@ class Regexp::Scanner 
     | 
|
| 
       767 
693 
     | 
    
         
             
                      "[#{set_depth}]") if in_set?
         
     | 
| 
       768 
694 
     | 
    
         | 
| 
       769 
695 
     | 
    
         
             
                # when the entire expression is a literal run
         
     | 
| 
       770 
     | 
    
         
            -
                emit_literal if  
     | 
| 
      
 696 
     | 
    
         
            +
                emit_literal if literal_run
         
     | 
| 
       771 
697 
     | 
    
         | 
| 
       772 
698 
     | 
    
         
             
                tokens
         
     | 
| 
       773 
699 
     | 
    
         
             
              end
         
     | 
| 
       774 
700 
     | 
    
         | 
| 
       775 
701 
     | 
    
         
             
              # lazy-load property maps when first needed
         
     | 
| 
       776 
     | 
    
         
            -
              require 'yaml'
         
     | 
| 
       777 
     | 
    
         
            -
              PROP_MAPS_DIR = File.expand_path('../scanner/properties', __FILE__)
         
     | 
| 
       778 
     | 
    
         
            -
             
     | 
| 
       779 
702 
     | 
    
         
             
              def self.short_prop_map
         
     | 
| 
       780 
     | 
    
         
            -
                @short_prop_map ||=  
     | 
| 
      
 703 
     | 
    
         
            +
                @short_prop_map ||= parse_prop_map('short')
         
     | 
| 
       781 
704 
     | 
    
         
             
              end
         
     | 
| 
       782 
705 
     | 
    
         | 
| 
       783 
706 
     | 
    
         
             
              def self.long_prop_map
         
     | 
| 
       784 
     | 
    
         
            -
                @long_prop_map ||=  
     | 
| 
      
 707 
     | 
    
         
            +
                @long_prop_map ||= parse_prop_map('long')
         
     | 
| 
      
 708 
     | 
    
         
            +
              end
         
     | 
| 
      
 709 
     | 
    
         
            +
             
     | 
| 
      
 710 
     | 
    
         
            +
              def self.parse_prop_map(name)
         
     | 
| 
      
 711 
     | 
    
         
            +
                File.read("#{__dir__}/scanner/properties/#{name}.csv").scan(/(.+),(.+)/).to_h
         
     | 
| 
      
 712 
     | 
    
         
            +
              end
         
     | 
| 
      
 713 
     | 
    
         
            +
             
     | 
| 
      
 714 
     | 
    
         
            +
              def self.posix_classes
         
     | 
| 
      
 715 
     | 
    
         
            +
                %w[alnum alpha ascii blank cntrl digit graph
         
     | 
| 
      
 716 
     | 
    
         
            +
                   lower print punct space upper word xdigit]
         
     | 
| 
       785 
717 
     | 
    
         
             
              end
         
     | 
| 
       786 
718 
     | 
    
         | 
| 
       787 
719 
     | 
    
         
             
              # Emits an array with the details of the scanned pattern
         
     | 
| 
       788 
     | 
    
         
            -
              def emit(type, token, text 
     | 
| 
      
 720 
     | 
    
         
            +
              def emit(type, token, text)
         
     | 
| 
       789 
721 
     | 
    
         
             
                #puts "EMIT: type: #{type}, token: #{token}, text: #{text}, ts: #{ts}, te: #{te}"
         
     | 
| 
       790 
722 
     | 
    
         | 
| 
       791 
     | 
    
         
            -
                emit_literal if  
     | 
| 
      
 723 
     | 
    
         
            +
                emit_literal if literal_run
         
     | 
| 
      
 724 
     | 
    
         
            +
             
     | 
| 
      
 725 
     | 
    
         
            +
                # Ragel runs with byte-based indices (ts, te). These are of little value to
         
     | 
| 
      
 726 
     | 
    
         
            +
                # end-users, so we keep track of char-based indices and emit those instead.
         
     | 
| 
      
 727 
     | 
    
         
            +
                ts_char_pos = char_pos
         
     | 
| 
      
 728 
     | 
    
         
            +
                te_char_pos = char_pos + text.length
         
     | 
| 
      
 729 
     | 
    
         
            +
             
     | 
| 
      
 730 
     | 
    
         
            +
                tok = [type, token, text, ts_char_pos, te_char_pos]
         
     | 
| 
      
 731 
     | 
    
         
            +
             
     | 
| 
      
 732 
     | 
    
         
            +
                self.prev_token = tok
         
     | 
| 
      
 733 
     | 
    
         
            +
             
     | 
| 
      
 734 
     | 
    
         
            +
                self.char_pos = te_char_pos
         
     | 
| 
       792 
735 
     | 
    
         | 
| 
       793 
736 
     | 
    
         
             
                if block
         
     | 
| 
       794 
     | 
    
         
            -
                  block.call type, token, text,  
     | 
| 
      
 737 
     | 
    
         
            +
                  block.call type, token, text, ts_char_pos, te_char_pos
         
     | 
| 
      
 738 
     | 
    
         
            +
                  # TODO: in v3.0.0, remove `collect_tokens:` kwarg and only collect if no block given
         
     | 
| 
      
 739 
     | 
    
         
            +
                  tokens << tok if collect_tokens
         
     | 
| 
      
 740 
     | 
    
         
            +
                elsif collect_tokens
         
     | 
| 
      
 741 
     | 
    
         
            +
                  tokens << tok
         
     | 
| 
       795 
742 
     | 
    
         
             
                end
         
     | 
| 
       796 
     | 
    
         
            -
             
     | 
| 
       797 
     | 
    
         
            -
                tokens << [type, token, text, ts, te]
         
     | 
| 
       798 
743 
     | 
    
         
             
              end
         
     | 
| 
       799 
744 
     | 
    
         | 
| 
      
 745 
     | 
    
         
            +
              attr_accessor :literal_run # only public for #||= to work on ruby <= 2.5
         
     | 
| 
      
 746 
     | 
    
         
            +
             
     | 
| 
       800 
747 
     | 
    
         
             
              private
         
     | 
| 
       801 
748 
     | 
    
         | 
| 
       802 
     | 
    
         
            -
              attr_accessor : 
     | 
| 
       803 
     | 
    
         
            -
                            : 
     | 
| 
      
 749 
     | 
    
         
            +
              attr_accessor :block,
         
     | 
| 
      
 750 
     | 
    
         
            +
                            :collect_tokens, :tokens, :prev_token,
         
     | 
| 
      
 751 
     | 
    
         
            +
                            :free_spacing, :spacing_stack,
         
     | 
| 
      
 752 
     | 
    
         
            +
                            :group_depth, :set_depth, :conditional_stack,
         
     | 
| 
      
 753 
     | 
    
         
            +
                            :char_pos
         
     | 
| 
      
 754 
     | 
    
         
            +
             
     | 
| 
      
 755 
     | 
    
         
            +
              def free_spacing?(input_object, options)
         
     | 
| 
      
 756 
     | 
    
         
            +
                if options && !input_object.is_a?(String)
         
     | 
| 
      
 757 
     | 
    
         
            +
                  raise ArgumentError, 'options cannot be supplied unless scanning a String'
         
     | 
| 
      
 758 
     | 
    
         
            +
                end
         
     | 
| 
      
 759 
     | 
    
         
            +
             
     | 
| 
      
 760 
     | 
    
         
            +
                options = input_object.options if input_object.is_a?(::Regexp)
         
     | 
| 
      
 761 
     | 
    
         
            +
             
     | 
| 
      
 762 
     | 
    
         
            +
                return false unless options
         
     | 
| 
      
 763 
     | 
    
         
            +
             
     | 
| 
      
 764 
     | 
    
         
            +
                options & Regexp::EXTENDED != 0
         
     | 
| 
      
 765 
     | 
    
         
            +
              end
         
     | 
| 
       804 
766 
     | 
    
         | 
| 
       805 
767 
     | 
    
         
             
              def in_group?
         
     | 
| 
       806 
768 
     | 
    
         
             
                group_depth > 0
         
     | 
| 
         @@ -811,36 +773,24 @@ class Regexp::Scanner 
     | 
|
| 
       811 
773 
     | 
    
         
             
              end
         
     | 
| 
       812 
774 
     | 
    
         | 
| 
       813 
775 
     | 
    
         
             
              # Copy from ts to te from data as text
         
     | 
| 
       814 
     | 
    
         
            -
              def copy(data,  
     | 
| 
       815 
     | 
    
         
            -
                data[ 
     | 
| 
       816 
     | 
    
         
            -
              end
         
     | 
| 
       817 
     | 
    
         
            -
             
     | 
| 
       818 
     | 
    
         
            -
              # Copy from ts to te from data as text, returning an array with the text
         
     | 
| 
       819 
     | 
    
         
            -
              #  and the offsets used to copy it.
         
     | 
| 
       820 
     | 
    
         
            -
              def text(data, ts, te, soff = 0)
         
     | 
| 
       821 
     | 
    
         
            -
                [copy(data, ts-soff..te-1), ts-soff, te]
         
     | 
| 
      
 776 
     | 
    
         
            +
              def copy(data, ts, te)
         
     | 
| 
      
 777 
     | 
    
         
            +
                data[ts...te].pack('c*').force_encoding('utf-8')
         
     | 
| 
       822 
778 
     | 
    
         
             
              end
         
     | 
| 
       823 
779 
     | 
    
         | 
| 
       824 
780 
     | 
    
         
             
              # Appends one or more characters to the literal buffer, to be emitted later
         
     | 
| 
       825 
     | 
    
         
            -
              # by a call to emit_literal. 
     | 
| 
      
 781 
     | 
    
         
            +
              # by a call to emit_literal.
         
     | 
| 
       826 
782 
     | 
    
         
             
              def append_literal(data, ts, te)
         
     | 
| 
       827 
     | 
    
         
            -
                self. 
     | 
| 
       828 
     | 
    
         
            -
                literal << text(data, ts, te)
         
     | 
| 
      
 783 
     | 
    
         
            +
                (self.literal_run ||= []) << copy(data, ts, te)
         
     | 
| 
       829 
784 
     | 
    
         
             
              end
         
     | 
| 
       830 
785 
     | 
    
         | 
| 
       831 
     | 
    
         
            -
              # Emits the literal run collected by calls to the append_literal method 
     | 
| 
       832 
     | 
    
         
            -
              # using the total start (ts) and end (te) offsets of the run.
         
     | 
| 
      
 786 
     | 
    
         
            +
              # Emits the literal run collected by calls to the append_literal method.
         
     | 
| 
       833 
787 
     | 
    
         
             
              def emit_literal
         
     | 
| 
       834 
     | 
    
         
            -
                 
     | 
| 
       835 
     | 
    
         
            -
                 
     | 
| 
       836 
     | 
    
         
            -
             
     | 
| 
       837 
     | 
    
         
            -
                text.force_encoding('utf-8') if text.respond_to?(:force_encoding)
         
     | 
| 
       838 
     | 
    
         
            -
             
     | 
| 
       839 
     | 
    
         
            -
                self.literal = nil
         
     | 
| 
       840 
     | 
    
         
            -
                emit(:literal, :literal, text, ts, te)
         
     | 
| 
      
 788 
     | 
    
         
            +
                text = literal_run.join
         
     | 
| 
      
 789 
     | 
    
         
            +
                self.literal_run = nil
         
     | 
| 
      
 790 
     | 
    
         
            +
                emit(:literal, :literal, text)
         
     | 
| 
       841 
791 
     | 
    
         
             
              end
         
     | 
| 
       842 
792 
     | 
    
         | 
| 
       843 
     | 
    
         
            -
              def emit_options(text 
     | 
| 
      
 793 
     | 
    
         
            +
              def emit_options(text)
         
     | 
| 
       844 
794 
     | 
    
         
             
                token = nil
         
     | 
| 
       845 
795 
     | 
    
         | 
| 
       846 
796 
     | 
    
         
             
                # Ruby allows things like '(?-xxxx)' or '(?xx-xx--xx-:abc)'.
         
     | 
| 
         @@ -866,28 +816,13 @@ class Regexp::Scanner 
     | 
|
| 
       866 
816 
     | 
    
         
             
                  token = :options_switch
         
     | 
| 
       867 
817 
     | 
    
         
             
                end
         
     | 
| 
       868 
818 
     | 
    
         | 
| 
       869 
     | 
    
         
            -
                emit(:group, token, text 
     | 
| 
      
 819 
     | 
    
         
            +
                emit(:group, token, text)
         
     | 
| 
       870 
820 
     | 
    
         
             
              end
         
     | 
| 
       871 
821 
     | 
    
         | 
| 
       872 
822 
     | 
    
         
             
              def emit_meta_control_sequence(data, ts, te, token)
         
     | 
| 
       873 
823 
     | 
    
         
             
                if data.last < 0x00 || data.last > 0x7F
         
     | 
| 
       874 
     | 
    
         
            -
                   
     | 
| 
       875 
     | 
    
         
            -
                end
         
     | 
| 
       876 
     | 
    
         
            -
                emit(:escape, token, *text(data, ts, te, 1))
         
     | 
| 
       877 
     | 
    
         
            -
              end
         
     | 
| 
       878 
     | 
    
         
            -
             
     | 
| 
       879 
     | 
    
         
            -
              # Centralizes and unifies the handling of validation related
         
     | 
| 
       880 
     | 
    
         
            -
              # errors.
         
     | 
| 
       881 
     | 
    
         
            -
              def validation_error(type, what, reason)
         
     | 
| 
       882 
     | 
    
         
            -
                case type
         
     | 
| 
       883 
     | 
    
         
            -
                when :group
         
     | 
| 
       884 
     | 
    
         
            -
                  error = InvalidGroupError.new(what, reason)
         
     | 
| 
       885 
     | 
    
         
            -
                when :backref
         
     | 
| 
       886 
     | 
    
         
            -
                  error = InvalidBackrefError.new(what, reason)
         
     | 
| 
       887 
     | 
    
         
            -
                when :sequence
         
     | 
| 
       888 
     | 
    
         
            -
                  error = InvalidSequenceError.new(what, reason)
         
     | 
| 
      
 824 
     | 
    
         
            +
                  raise ValidationError.for(:sequence, 'escape', token.to_s)
         
     | 
| 
       889 
825 
     | 
    
         
             
                end
         
     | 
| 
       890 
     | 
    
         
            -
             
     | 
| 
       891 
     | 
    
         
            -
                raise error # unless @@config.validation_ignore
         
     | 
| 
      
 826 
     | 
    
         
            +
                emit(:escape, token, copy(data, ts-1, te))
         
     | 
| 
       892 
827 
     | 
    
         
             
              end
         
     | 
| 
       893 
828 
     | 
    
         
             
            end # module Regexp::Scanner
         
     |