regexp_parser 1.3.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +53 -1
 - data/Gemfile +3 -3
 - data/README.md +10 -14
 - data/Rakefile +3 -4
 - data/lib/regexp_parser/expression.rb +28 -53
 - data/lib/regexp_parser/expression/classes/backref.rb +18 -10
 - data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
 - data/lib/regexp_parser/expression/classes/escape.rb +0 -4
 - data/lib/regexp_parser/expression/classes/group.rb +4 -2
 - data/lib/regexp_parser/expression/classes/keep.rb +1 -3
 - data/lib/regexp_parser/expression/methods/match.rb +13 -0
 - data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
 - data/lib/regexp_parser/expression/methods/options.rb +35 -0
 - data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
 - data/lib/regexp_parser/expression/methods/tests.rb +6 -15
 - data/lib/regexp_parser/expression/quantifier.rb +2 -2
 - data/lib/regexp_parser/expression/sequence.rb +3 -6
 - data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
 - data/lib/regexp_parser/expression/subexpression.rb +3 -5
 - data/lib/regexp_parser/lexer.rb +30 -44
 - data/lib/regexp_parser/parser.rb +47 -24
 - data/lib/regexp_parser/scanner.rb +1159 -1329
 - data/lib/regexp_parser/scanner/char_type.rl +0 -3
 - data/lib/regexp_parser/scanner/properties/long.yml +34 -1
 - data/lib/regexp_parser/scanner/properties/short.yml +12 -0
 - data/lib/regexp_parser/scanner/scanner.rl +82 -190
 - data/lib/regexp_parser/syntax/tokens.rb +2 -10
 - data/lib/regexp_parser/syntax/tokens/unicode_property.rb +72 -21
 - data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
 - data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
 - data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
 - data/lib/regexp_parser/version.rb +1 -1
 - data/regexp_parser.gemspec +3 -3
 - data/spec/expression/base_spec.rb +94 -0
 - data/spec/expression/clone_spec.rb +120 -0
 - data/spec/expression/conditional_spec.rb +89 -0
 - data/spec/expression/free_space_spec.rb +27 -0
 - data/spec/expression/methods/match_length_spec.rb +154 -0
 - data/spec/expression/methods/match_spec.rb +25 -0
 - data/spec/expression/methods/strfregexp_spec.rb +224 -0
 - data/spec/expression/methods/tests_spec.rb +99 -0
 - data/spec/expression/methods/traverse_spec.rb +140 -0
 - data/spec/expression/options_spec.rb +128 -0
 - data/spec/expression/root_spec.rb +9 -0
 - data/spec/expression/sequence_spec.rb +9 -0
 - data/spec/expression/subexpression_spec.rb +50 -0
 - data/spec/expression/to_h_spec.rb +26 -0
 - data/spec/expression/to_s_spec.rb +100 -0
 - data/spec/lexer/all_spec.rb +22 -0
 - data/spec/lexer/conditionals_spec.rb +53 -0
 - data/spec/lexer/escapes_spec.rb +14 -0
 - data/spec/lexer/keep_spec.rb +10 -0
 - data/spec/lexer/literals_spec.rb +89 -0
 - data/spec/lexer/nesting_spec.rb +99 -0
 - data/spec/lexer/refcalls_spec.rb +55 -0
 - data/spec/parser/all_spec.rb +43 -0
 - data/spec/parser/alternation_spec.rb +88 -0
 - data/spec/parser/anchors_spec.rb +17 -0
 - data/spec/parser/conditionals_spec.rb +179 -0
 - data/spec/parser/errors_spec.rb +30 -0
 - data/spec/parser/escapes_spec.rb +121 -0
 - data/spec/parser/free_space_spec.rb +130 -0
 - data/spec/parser/groups_spec.rb +108 -0
 - data/spec/parser/keep_spec.rb +6 -0
 - data/spec/parser/posix_classes_spec.rb +8 -0
 - data/spec/parser/properties_spec.rb +115 -0
 - data/spec/parser/quantifiers_spec.rb +51 -0
 - data/spec/parser/refcalls_spec.rb +112 -0
 - data/spec/parser/set/intersections_spec.rb +127 -0
 - data/spec/parser/set/ranges_spec.rb +111 -0
 - data/spec/parser/sets_spec.rb +178 -0
 - data/spec/parser/types_spec.rb +18 -0
 - data/spec/scanner/all_spec.rb +18 -0
 - data/spec/scanner/anchors_spec.rb +21 -0
 - data/spec/scanner/conditionals_spec.rb +128 -0
 - data/spec/scanner/errors_spec.rb +68 -0
 - data/spec/scanner/escapes_spec.rb +53 -0
 - data/spec/scanner/free_space_spec.rb +133 -0
 - data/spec/scanner/groups_spec.rb +52 -0
 - data/spec/scanner/keep_spec.rb +10 -0
 - data/spec/scanner/literals_spec.rb +49 -0
 - data/spec/scanner/meta_spec.rb +18 -0
 - data/spec/scanner/properties_spec.rb +64 -0
 - data/spec/scanner/quantifiers_spec.rb +20 -0
 - data/spec/scanner/refcalls_spec.rb +36 -0
 - data/spec/scanner/sets_spec.rb +102 -0
 - data/spec/scanner/types_spec.rb +14 -0
 - data/spec/spec_helper.rb +15 -0
 - data/{test → spec}/support/runner.rb +9 -8
 - data/spec/support/shared_examples.rb +77 -0
 - data/{test → spec}/support/warning_extractor.rb +5 -7
 - data/spec/syntax/syntax_spec.rb +48 -0
 - data/spec/syntax/syntax_token_map_spec.rb +23 -0
 - data/spec/syntax/versions/1.8.6_spec.rb +17 -0
 - data/spec/syntax/versions/1.9.1_spec.rb +10 -0
 - data/spec/syntax/versions/1.9.3_spec.rb +9 -0
 - data/spec/syntax/versions/2.0.0_spec.rb +13 -0
 - data/spec/syntax/versions/2.2.0_spec.rb +9 -0
 - data/spec/syntax/versions/aliases_spec.rb +37 -0
 - data/spec/token/token_spec.rb +85 -0
 - metadata +144 -143
 - data/test/expression/test_all.rb +0 -12
 - data/test/expression/test_base.rb +0 -90
 - data/test/expression/test_clone.rb +0 -89
 - data/test/expression/test_conditionals.rb +0 -113
 - data/test/expression/test_free_space.rb +0 -35
 - data/test/expression/test_set.rb +0 -84
 - data/test/expression/test_strfregexp.rb +0 -230
 - data/test/expression/test_subexpression.rb +0 -58
 - data/test/expression/test_tests.rb +0 -99
 - data/test/expression/test_to_h.rb +0 -59
 - data/test/expression/test_to_s.rb +0 -104
 - data/test/expression/test_traverse.rb +0 -161
 - data/test/helpers.rb +0 -10
 - data/test/lexer/test_all.rb +0 -41
 - data/test/lexer/test_conditionals.rb +0 -127
 - data/test/lexer/test_keep.rb +0 -24
 - data/test/lexer/test_literals.rb +0 -130
 - data/test/lexer/test_nesting.rb +0 -132
 - data/test/lexer/test_refcalls.rb +0 -56
 - data/test/parser/set/test_intersections.rb +0 -127
 - data/test/parser/set/test_ranges.rb +0 -111
 - data/test/parser/test_all.rb +0 -64
 - data/test/parser/test_alternation.rb +0 -92
 - data/test/parser/test_anchors.rb +0 -34
 - data/test/parser/test_conditionals.rb +0 -187
 - data/test/parser/test_errors.rb +0 -63
 - data/test/parser/test_escapes.rb +0 -134
 - data/test/parser/test_free_space.rb +0 -139
 - data/test/parser/test_groups.rb +0 -289
 - data/test/parser/test_keep.rb +0 -21
 - data/test/parser/test_posix_classes.rb +0 -27
 - data/test/parser/test_properties.rb +0 -133
 - data/test/parser/test_quantifiers.rb +0 -301
 - data/test/parser/test_refcalls.rb +0 -186
 - data/test/parser/test_sets.rb +0 -179
 - data/test/parser/test_types.rb +0 -50
 - data/test/scanner/test_all.rb +0 -38
 - data/test/scanner/test_anchors.rb +0 -38
 - data/test/scanner/test_conditionals.rb +0 -184
 - data/test/scanner/test_errors.rb +0 -91
 - data/test/scanner/test_escapes.rb +0 -56
 - data/test/scanner/test_free_space.rb +0 -200
 - data/test/scanner/test_groups.rb +0 -79
 - data/test/scanner/test_keep.rb +0 -35
 - data/test/scanner/test_literals.rb +0 -89
 - data/test/scanner/test_meta.rb +0 -40
 - data/test/scanner/test_properties.rb +0 -312
 - data/test/scanner/test_quantifiers.rb +0 -37
 - data/test/scanner/test_refcalls.rb +0 -52
 - data/test/scanner/test_scripts.rb +0 -53
 - data/test/scanner/test_sets.rb +0 -119
 - data/test/scanner/test_types.rb +0 -35
 - data/test/scanner/test_unicode_blocks.rb +0 -30
 - data/test/support/disable_autotest.rb +0 -8
 - data/test/syntax/test_all.rb +0 -6
 - data/test/syntax/test_syntax.rb +0 -61
 - data/test/syntax/test_syntax_token_map.rb +0 -25
 - data/test/syntax/versions/test_1.8.rb +0 -55
 - data/test/syntax/versions/test_1.9.1.rb +0 -36
 - data/test/syntax/versions/test_1.9.3.rb +0 -32
 - data/test/syntax/versions/test_2.0.0.rb +0 -37
 - data/test/syntax/versions/test_2.2.0.rb +0 -32
 - data/test/syntax/versions/test_aliases.rb +0 -129
 - data/test/syntax/versions/test_all.rb +0 -5
 - data/test/test_all.rb +0 -5
 - data/test/token/test_all.rb +0 -2
 - data/test/token/test_token.rb +0 -107
 
| 
         @@ -21,9 +21,6 @@ 
     | 
|
| 
       21 
21 
     | 
    
         
             
                  when '\W'; emit(:type, :nonword,    text, ts - 1, te)
         
     | 
| 
       22 
22 
     | 
    
         
             
                  when '\R'; emit(:type, :linebreak,  text, ts - 1, te)
         
     | 
| 
       23 
23 
     | 
    
         
             
                  when '\X'; emit(:type, :xgrapheme,  text, ts - 1, te)
         
     | 
| 
       24 
     | 
    
         
            -
                  else
         
     | 
| 
       25 
     | 
    
         
            -
                    raise ScannerError.new(
         
     | 
| 
       26 
     | 
    
         
            -
                      "Unexpected character in type at #{text} (char #{ts})")
         
     | 
| 
       27 
24 
     | 
    
         
             
                  end
         
     | 
| 
       28 
25 
     | 
    
         
             
                  fret;
         
     | 
| 
       29 
26 
     | 
    
         
             
                };
         
     | 
| 
         @@ -5,6 +5,9 @@ 
     | 
|
| 
       5 
5 
     | 
    
         
             
            adlam: adlam
         
     | 
| 
       6 
6 
     | 
    
         
             
            age=1.1: age=1.1
         
     | 
| 
       7 
7 
     | 
    
         
             
            age=10.0: age=10.0
         
     | 
| 
      
 8 
     | 
    
         
            +
            age=11.0: age=11.0
         
     | 
| 
      
 9 
     | 
    
         
            +
            age=12.0: age=12.0
         
     | 
| 
      
 10 
     | 
    
         
            +
            age=12.1: age=12.1
         
     | 
| 
       8 
11 
     | 
    
         
             
            age=2.0: age=2.0
         
     | 
| 
       9 
12 
     | 
    
         
             
            age=2.1: age=2.1
         
     | 
| 
       10 
13 
     | 
    
         
             
            age=3.0: age=3.0
         
     | 
| 
         @@ -63,7 +66,6 @@ changeswhenuppercased: changes_when_uppercased 
     | 
|
| 
       63 
66 
     | 
    
         
             
            cherokee: cherokee
         
     | 
| 
       64 
67 
     | 
    
         
             
            closepunctuation: close_punctuation
         
     | 
| 
       65 
68 
     | 
    
         
             
            cntrl: cntrl
         
     | 
| 
       66 
     | 
    
         
            -
            combiningmark: combining_mark
         
     | 
| 
       67 
69 
     | 
    
         
             
            common: common
         
     | 
| 
       68 
70 
     | 
    
         
             
            connectorpunctuation: connector_punctuation
         
     | 
| 
       69 
71 
     | 
    
         
             
            control: control
         
     | 
| 
         @@ -81,9 +83,11 @@ deseret: deseret 
     | 
|
| 
       81 
83 
     | 
    
         
             
            devanagari: devanagari
         
     | 
| 
       82 
84 
     | 
    
         
             
            diacritic: diacritic
         
     | 
| 
       83 
85 
     | 
    
         
             
            digit: digit
         
     | 
| 
      
 86 
     | 
    
         
            +
            dogra: dogra
         
     | 
| 
       84 
87 
     | 
    
         
             
            duployan: duployan
         
     | 
| 
       85 
88 
     | 
    
         
             
            egyptianhieroglyphs: egyptian_hieroglyphs
         
     | 
| 
       86 
89 
     | 
    
         
             
            elbasan: elbasan
         
     | 
| 
      
 90 
     | 
    
         
            +
            elymaic: elymaic
         
     | 
| 
       87 
91 
     | 
    
         
             
            emoji: emoji
         
     | 
| 
       88 
92 
     | 
    
         
             
            emojicomponent: emoji_component
         
     | 
| 
       89 
93 
     | 
    
         
             
            emojimodifier: emoji_modifier
         
     | 
| 
         @@ -104,9 +108,11 @@ graphemeextend: grapheme_extend 
     | 
|
| 
       104 
108 
     | 
    
         
             
            graphemelink: grapheme_link
         
     | 
| 
       105 
109 
     | 
    
         
             
            greek: greek
         
     | 
| 
       106 
110 
     | 
    
         
             
            gujarati: gujarati
         
     | 
| 
      
 111 
     | 
    
         
            +
            gunjalagondi: gunjala_gondi
         
     | 
| 
       107 
112 
     | 
    
         
             
            gurmukhi: gurmukhi
         
     | 
| 
       108 
113 
     | 
    
         
             
            han: han
         
     | 
| 
       109 
114 
     | 
    
         
             
            hangul: hangul
         
     | 
| 
      
 115 
     | 
    
         
            +
            hanifirohingya: hanifi_rohingya
         
     | 
| 
       110 
116 
     | 
    
         
             
            hanunoo: hanunoo
         
     | 
| 
       111 
117 
     | 
    
         
             
            hatran: hatran
         
     | 
| 
       112 
118 
     | 
    
         
             
            hebrew: hebrew
         
     | 
| 
         @@ -160,6 +166,7 @@ inchakma: in_chakma 
     | 
|
| 
       160 
166 
     | 
    
         
             
            incham: in_cham
         
     | 
| 
       161 
167 
     | 
    
         
             
            incherokee: in_cherokee
         
     | 
| 
       162 
168 
     | 
    
         
             
            incherokeesupplement: in_cherokee_supplement
         
     | 
| 
      
 169 
     | 
    
         
            +
            inchesssymbols: in_chess_symbols
         
     | 
| 
       163 
170 
     | 
    
         
             
            incjkcompatibility: in_cjk_compatibility
         
     | 
| 
       164 
171 
     | 
    
         
             
            incjkcompatibilityforms: in_cjk_compatibility_forms
         
     | 
| 
       165 
172 
     | 
    
         
             
            incjkcompatibilityideographs: in_cjk_compatibility_ideographs
         
     | 
| 
         @@ -197,11 +204,14 @@ indeseret: in_deseret 
     | 
|
| 
       197 
204 
     | 
    
         
             
            indevanagari: in_devanagari
         
     | 
| 
       198 
205 
     | 
    
         
             
            indevanagariextended: in_devanagari_extended
         
     | 
| 
       199 
206 
     | 
    
         
             
            indingbats: in_dingbats
         
     | 
| 
      
 207 
     | 
    
         
            +
            indogra: in_dogra
         
     | 
| 
       200 
208 
     | 
    
         
             
            indominotiles: in_domino_tiles
         
     | 
| 
       201 
209 
     | 
    
         
             
            induployan: in_duployan
         
     | 
| 
       202 
210 
     | 
    
         
             
            inearlydynasticcuneiform: in_early_dynastic_cuneiform
         
     | 
| 
      
 211 
     | 
    
         
            +
            inegyptianhieroglyphformatcontrols: in_egyptian_hieroglyph_format_controls
         
     | 
| 
       203 
212 
     | 
    
         
             
            inegyptianhieroglyphs: in_egyptian_hieroglyphs
         
     | 
| 
       204 
213 
     | 
    
         
             
            inelbasan: in_elbasan
         
     | 
| 
      
 214 
     | 
    
         
            +
            inelymaic: in_elymaic
         
     | 
| 
       205 
215 
     | 
    
         
             
            inemoticons: in_emoticons
         
     | 
| 
       206 
216 
     | 
    
         
             
            inenclosedalphanumerics: in_enclosed_alphanumerics
         
     | 
| 
       207 
217 
     | 
    
         
             
            inenclosedalphanumericsupplement: in_enclosed_alphanumeric_supplement
         
     | 
| 
         @@ -215,6 +225,7 @@ ingeneralpunctuation: in_general_punctuation 
     | 
|
| 
       215 
225 
     | 
    
         
             
            ingeometricshapes: in_geometric_shapes
         
     | 
| 
       216 
226 
     | 
    
         
             
            ingeometricshapesextended: in_geometric_shapes_extended
         
     | 
| 
       217 
227 
     | 
    
         
             
            ingeorgian: in_georgian
         
     | 
| 
      
 228 
     | 
    
         
            +
            ingeorgianextended: in_georgian_extended
         
     | 
| 
       218 
229 
     | 
    
         
             
            ingeorgiansupplement: in_georgian_supplement
         
     | 
| 
       219 
230 
     | 
    
         
             
            inglagolitic: in_glagolitic
         
     | 
| 
       220 
231 
     | 
    
         
             
            inglagoliticsupplement: in_glagolitic_supplement
         
     | 
| 
         @@ -223,6 +234,7 @@ ingrantha: in_grantha 
     | 
|
| 
       223 
234 
     | 
    
         
             
            ingreekandcoptic: in_greek_and_coptic
         
     | 
| 
       224 
235 
     | 
    
         
             
            ingreekextended: in_greek_extended
         
     | 
| 
       225 
236 
     | 
    
         
             
            ingujarati: in_gujarati
         
     | 
| 
      
 237 
     | 
    
         
            +
            ingunjalagondi: in_gunjala_gondi
         
     | 
| 
       226 
238 
     | 
    
         
             
            ingurmukhi: in_gurmukhi
         
     | 
| 
       227 
239 
     | 
    
         
             
            inhalfwidthandfullwidthforms: in_halfwidth_and_fullwidth_forms
         
     | 
| 
       228 
240 
     | 
    
         
             
            inhangulcompatibilityjamo: in_hangul_compatibility_jamo
         
     | 
| 
         @@ -230,6 +242,7 @@ inhanguljamo: in_hangul_jamo 
     | 
|
| 
       230 
242 
     | 
    
         
             
            inhanguljamoextendeda: in_hangul_jamo_extended_a
         
     | 
| 
       231 
243 
     | 
    
         
             
            inhanguljamoextendedb: in_hangul_jamo_extended_b
         
     | 
| 
       232 
244 
     | 
    
         
             
            inhangulsyllables: in_hangul_syllables
         
     | 
| 
      
 245 
     | 
    
         
            +
            inhanifirohingya: in_hanifi_rohingya
         
     | 
| 
       233 
246 
     | 
    
         
             
            inhanunoo: in_hanunoo
         
     | 
| 
       234 
247 
     | 
    
         
             
            inhatran: in_hatran
         
     | 
| 
       235 
248 
     | 
    
         
             
            inhebrew: in_hebrew
         
     | 
| 
         @@ -240,6 +253,7 @@ inhiragana: in_hiragana 
     | 
|
| 
       240 
253 
     | 
    
         
             
            inideographicdescriptioncharacters: in_ideographic_description_characters
         
     | 
| 
       241 
254 
     | 
    
         
             
            inideographicsymbolsandpunctuation: in_ideographic_symbols_and_punctuation
         
     | 
| 
       242 
255 
     | 
    
         
             
            inimperialaramaic: in_imperial_aramaic
         
     | 
| 
      
 256 
     | 
    
         
            +
            inindicsiyaqnumbers: in_indic_siyaq_numbers
         
     | 
| 
       243 
257 
     | 
    
         
             
            ininscriptionalpahlavi: in_inscriptional_pahlavi
         
     | 
| 
       244 
258 
     | 
    
         
             
            ininscriptionalparthian: in_inscriptional_parthian
         
     | 
| 
       245 
259 
     | 
    
         
             
            inipaextensions: in_ipa_extensions
         
     | 
| 
         @@ -279,6 +293,7 @@ inlycian: in_lycian 
     | 
|
| 
       279 
293 
     | 
    
         
             
            inlydian: in_lydian
         
     | 
| 
       280 
294 
     | 
    
         
             
            inmahajani: in_mahajani
         
     | 
| 
       281 
295 
     | 
    
         
             
            inmahjongtiles: in_mahjong_tiles
         
     | 
| 
      
 296 
     | 
    
         
            +
            inmakasar: in_makasar
         
     | 
| 
       282 
297 
     | 
    
         
             
            inmalayalam: in_malayalam
         
     | 
| 
       283 
298 
     | 
    
         
             
            inmandaic: in_mandaic
         
     | 
| 
       284 
299 
     | 
    
         
             
            inmanichaean: in_manichaean
         
     | 
| 
         @@ -286,6 +301,8 @@ inmarchen: in_marchen 
     | 
|
| 
       286 
301 
     | 
    
         
             
            inmasaramgondi: in_masaram_gondi
         
     | 
| 
       287 
302 
     | 
    
         
             
            inmathematicalalphanumericsymbols: in_mathematical_alphanumeric_symbols
         
     | 
| 
       288 
303 
     | 
    
         
             
            inmathematicaloperators: in_mathematical_operators
         
     | 
| 
      
 304 
     | 
    
         
            +
            inmayannumerals: in_mayan_numerals
         
     | 
| 
      
 305 
     | 
    
         
            +
            inmedefaidrin: in_medefaidrin
         
     | 
| 
       289 
306 
     | 
    
         
             
            inmeeteimayek: in_meetei_mayek
         
     | 
| 
       290 
307 
     | 
    
         
             
            inmeeteimayekextensions: in_meetei_mayek_extensions
         
     | 
| 
       291 
308 
     | 
    
         
             
            inmendekikakui: in_mende_kikakui
         
     | 
| 
         @@ -309,12 +326,14 @@ inmyanmar: in_myanmar 
     | 
|
| 
       309 
326 
     | 
    
         
             
            inmyanmarextendeda: in_myanmar_extended_a
         
     | 
| 
       310 
327 
     | 
    
         
             
            inmyanmarextendedb: in_myanmar_extended_b
         
     | 
| 
       311 
328 
     | 
    
         
             
            innabataean: in_nabataean
         
     | 
| 
      
 329 
     | 
    
         
            +
            innandinagari: in_nandinagari
         
     | 
| 
       312 
330 
     | 
    
         
             
            innewa: in_newa
         
     | 
| 
       313 
331 
     | 
    
         
             
            innewtailue: in_new_tai_lue
         
     | 
| 
       314 
332 
     | 
    
         
             
            innko: in_nko
         
     | 
| 
       315 
333 
     | 
    
         
             
            innoblock: in_no_block
         
     | 
| 
       316 
334 
     | 
    
         
             
            innumberforms: in_number_forms
         
     | 
| 
       317 
335 
     | 
    
         
             
            innushu: in_nushu
         
     | 
| 
      
 336 
     | 
    
         
            +
            innyiakengpuachuehmong: in_nyiakeng_puachue_hmong
         
     | 
| 
       318 
337 
     | 
    
         
             
            inogham: in_ogham
         
     | 
| 
       319 
338 
     | 
    
         
             
            inolchiki: in_ol_chiki
         
     | 
| 
       320 
339 
     | 
    
         
             
            inoldhungarian: in_old_hungarian
         
     | 
| 
         @@ -322,6 +341,7 @@ inolditalic: in_old_italic 
     | 
|
| 
       322 
341 
     | 
    
         
             
            inoldnortharabian: in_old_north_arabian
         
     | 
| 
       323 
342 
     | 
    
         
             
            inoldpermic: in_old_permic
         
     | 
| 
       324 
343 
     | 
    
         
             
            inoldpersian: in_old_persian
         
     | 
| 
      
 344 
     | 
    
         
            +
            inoldsogdian: in_old_sogdian
         
     | 
| 
       325 
345 
     | 
    
         
             
            inoldsoutharabian: in_old_south_arabian
         
     | 
| 
       326 
346 
     | 
    
         
             
            inoldturkic: in_old_turkic
         
     | 
| 
       327 
347 
     | 
    
         
             
            inopticalcharacterrecognition: in_optical_character_recognition
         
     | 
| 
         @@ -329,6 +349,7 @@ inoriya: in_oriya 
     | 
|
| 
       329 
349 
     | 
    
         
             
            inornamentaldingbats: in_ornamental_dingbats
         
     | 
| 
       330 
350 
     | 
    
         
             
            inosage: in_osage
         
     | 
| 
       331 
351 
     | 
    
         
             
            inosmanya: in_osmanya
         
     | 
| 
      
 352 
     | 
    
         
            +
            inottomansiyaqnumbers: in_ottoman_siyaq_numbers
         
     | 
| 
       332 
353 
     | 
    
         
             
            inpahawhhmong: in_pahawh_hmong
         
     | 
| 
       333 
354 
     | 
    
         
             
            inpalmyrene: in_palmyrene
         
     | 
| 
       334 
355 
     | 
    
         
             
            inpaucinhau: in_pau_cin_hau
         
     | 
| 
         @@ -354,6 +375,8 @@ insiddham: in_siddham 
     | 
|
| 
       354 
375 
     | 
    
         
             
            insinhala: in_sinhala
         
     | 
| 
       355 
376 
     | 
    
         
             
            insinhalaarchaicnumbers: in_sinhala_archaic_numbers
         
     | 
| 
       356 
377 
     | 
    
         
             
            insmallformvariants: in_small_form_variants
         
     | 
| 
      
 378 
     | 
    
         
            +
            insmallkanaextension: in_small_kana_extension
         
     | 
| 
      
 379 
     | 
    
         
            +
            insogdian: in_sogdian
         
     | 
| 
       357 
380 
     | 
    
         
             
            insorasompeng: in_sora_sompeng
         
     | 
| 
       358 
381 
     | 
    
         
             
            insoyombo: in_soyombo
         
     | 
| 
       359 
382 
     | 
    
         
             
            inspacingmodifierletters: in_spacing_modifier_letters
         
     | 
| 
         @@ -371,6 +394,7 @@ insupplementaryprivateuseareaa: in_supplementary_private_use_area_a 
     | 
|
| 
       371 
394 
     | 
    
         
             
            insupplementaryprivateuseareab: in_supplementary_private_use_area_b
         
     | 
| 
       372 
395 
     | 
    
         
             
            insuttonsignwriting: in_sutton_signwriting
         
     | 
| 
       373 
396 
     | 
    
         
             
            insylotinagri: in_syloti_nagri
         
     | 
| 
      
 397 
     | 
    
         
            +
            insymbolsandpictographsextendeda: in_symbols_and_pictographs_extended_a
         
     | 
| 
       374 
398 
     | 
    
         
             
            insyriac: in_syriac
         
     | 
| 
       375 
399 
     | 
    
         
             
            insyriacsupplement: in_syriac_supplement
         
     | 
| 
       376 
400 
     | 
    
         
             
            intagalog: in_tagalog
         
     | 
| 
         @@ -382,6 +406,7 @@ intaiviet: in_tai_viet 
     | 
|
| 
       382 
406 
     | 
    
         
             
            intaixuanjingsymbols: in_tai_xuan_jing_symbols
         
     | 
| 
       383 
407 
     | 
    
         
             
            intakri: in_takri
         
     | 
| 
       384 
408 
     | 
    
         
             
            intamil: in_tamil
         
     | 
| 
      
 409 
     | 
    
         
            +
            intamilsupplement: in_tamil_supplement
         
     | 
| 
       385 
410 
     | 
    
         
             
            intangut: in_tangut
         
     | 
| 
       386 
411 
     | 
    
         
             
            intangutcomponents: in_tangut_components
         
     | 
| 
       387 
412 
     | 
    
         
             
            intelugu: in_telugu
         
     | 
| 
         @@ -399,6 +424,7 @@ invariationselectors: in_variation_selectors 
     | 
|
| 
       399 
424 
     | 
    
         
             
            invariationselectorssupplement: in_variation_selectors_supplement
         
     | 
| 
       400 
425 
     | 
    
         
             
            invedicextensions: in_vedic_extensions
         
     | 
| 
       401 
426 
     | 
    
         
             
            inverticalforms: in_vertical_forms
         
     | 
| 
      
 427 
     | 
    
         
            +
            inwancho: in_wancho
         
     | 
| 
       402 
428 
     | 
    
         
             
            inwarangciti: in_warang_citi
         
     | 
| 
       403 
429 
     | 
    
         
             
            inyijinghexagramsymbols: in_yijing_hexagram_symbols
         
     | 
| 
       404 
430 
     | 
    
         
             
            inyiradicals: in_yi_radicals
         
     | 
| 
         @@ -431,6 +457,7 @@ lowercaseletter: lowercase_letter 
     | 
|
| 
       431 
457 
     | 
    
         
             
            lycian: lycian
         
     | 
| 
       432 
458 
     | 
    
         
             
            lydian: lydian
         
     | 
| 
       433 
459 
     | 
    
         
             
            mahajani: mahajani
         
     | 
| 
      
 460 
     | 
    
         
            +
            makasar: makasar
         
     | 
| 
       434 
461 
     | 
    
         
             
            malayalam: malayalam
         
     | 
| 
       435 
462 
     | 
    
         
             
            mandaic: mandaic
         
     | 
| 
       436 
463 
     | 
    
         
             
            manichaean: manichaean
         
     | 
| 
         @@ -439,6 +466,7 @@ mark: mark 
     | 
|
| 
       439 
466 
     | 
    
         
             
            masaramgondi: masaram_gondi
         
     | 
| 
       440 
467 
     | 
    
         
             
            math: math
         
     | 
| 
       441 
468 
     | 
    
         
             
            mathsymbol: math_symbol
         
     | 
| 
      
 469 
     | 
    
         
            +
            medefaidrin: medefaidrin
         
     | 
| 
       442 
470 
     | 
    
         
             
            meeteimayek: meetei_mayek
         
     | 
| 
       443 
471 
     | 
    
         
             
            mendekikakui: mende_kikakui
         
     | 
| 
       444 
472 
     | 
    
         
             
            meroiticcursive: meroitic_cursive
         
     | 
| 
         @@ -452,6 +480,7 @@ mro: mro 
     | 
|
| 
       452 
480 
     | 
    
         
             
            multani: multani
         
     | 
| 
       453 
481 
     | 
    
         
             
            myanmar: myanmar
         
     | 
| 
       454 
482 
     | 
    
         
             
            nabataean: nabataean
         
     | 
| 
      
 483 
     | 
    
         
            +
            nandinagari: nandinagari
         
     | 
| 
       455 
484 
     | 
    
         
             
            newa: newa
         
     | 
| 
       456 
485 
     | 
    
         
             
            newline: newline
         
     | 
| 
       457 
486 
     | 
    
         
             
            newtailue: new_tai_lue
         
     | 
| 
         @@ -460,6 +489,7 @@ noncharactercodepoint: noncharacter_code_point 
     | 
|
| 
       460 
489 
     | 
    
         
             
            nonspacingmark: nonspacing_mark
         
     | 
| 
       461 
490 
     | 
    
         
             
            number: number
         
     | 
| 
       462 
491 
     | 
    
         
             
            nushu: nushu
         
     | 
| 
      
 492 
     | 
    
         
            +
            nyiakengpuachuehmong: nyiakeng_puachue_hmong
         
     | 
| 
       463 
493 
     | 
    
         
             
            ogham: ogham
         
     | 
| 
       464 
494 
     | 
    
         
             
            olchiki: ol_chiki
         
     | 
| 
       465 
495 
     | 
    
         
             
            oldhungarian: old_hungarian
         
     | 
| 
         @@ -467,6 +497,7 @@ olditalic: old_italic 
     | 
|
| 
       467 
497 
     | 
    
         
             
            oldnortharabian: old_north_arabian
         
     | 
| 
       468 
498 
     | 
    
         
             
            oldpermic: old_permic
         
     | 
| 
       469 
499 
     | 
    
         
             
            oldpersian: old_persian
         
     | 
| 
      
 500 
     | 
    
         
            +
            oldsogdian: old_sogdian
         
     | 
| 
       470 
501 
     | 
    
         
             
            oldsoutharabian: old_south_arabian
         
     | 
| 
       471 
502 
     | 
    
         
             
            oldturkic: old_turkic
         
     | 
| 
       472 
503 
     | 
    
         
             
            openpunctuation: open_punctuation
         
     | 
| 
         @@ -515,6 +546,7 @@ siddham: siddham 
     | 
|
| 
       515 
546 
     | 
    
         
             
            signwriting: signwriting
         
     | 
| 
       516 
547 
     | 
    
         
             
            sinhala: sinhala
         
     | 
| 
       517 
548 
     | 
    
         
             
            softdotted: soft_dotted
         
     | 
| 
      
 549 
     | 
    
         
            +
            sogdian: sogdian
         
     | 
| 
       518 
550 
     | 
    
         
             
            sorasompeng: sora_sompeng
         
     | 
| 
       519 
551 
     | 
    
         
             
            soyombo: soyombo
         
     | 
| 
       520 
552 
     | 
    
         
             
            space: space
         
     | 
| 
         @@ -550,6 +582,7 @@ uppercase: uppercase 
     | 
|
| 
       550 
582 
     | 
    
         
             
            uppercaseletter: uppercase_letter
         
     | 
| 
       551 
583 
     | 
    
         
             
            vai: vai
         
     | 
| 
       552 
584 
     | 
    
         
             
            variationselector: variation_selector
         
     | 
| 
      
 585 
     | 
    
         
            +
            wancho: wancho
         
     | 
| 
       553 
586 
     | 
    
         
             
            warangciti: warang_citi
         
     | 
| 
       554 
587 
     | 
    
         
             
            whitespace: white_space
         
     | 
| 
       555 
588 
     | 
    
         
             
            word: word
         
     | 
| 
         @@ -31,6 +31,7 @@ cher: cherokee 
     | 
|
| 
       31 
31 
     | 
    
         
             
            ci: case_ignorable
         
     | 
| 
       32 
32 
     | 
    
         
             
            cn: unassigned
         
     | 
| 
       33 
33 
     | 
    
         
             
            co: private_use
         
     | 
| 
      
 34 
     | 
    
         
            +
            combiningmark: mark
         
     | 
| 
       34 
35 
     | 
    
         
             
            copt: coptic
         
     | 
| 
       35 
36 
     | 
    
         
             
            cprt: cypriot
         
     | 
| 
       36 
37 
     | 
    
         
             
            cs: surrogate
         
     | 
| 
         @@ -44,14 +45,17 @@ dep: deprecated 
     | 
|
| 
       44 
45 
     | 
    
         
             
            deva: devanagari
         
     | 
| 
       45 
46 
     | 
    
         
             
            di: default_ignorable_code_point
         
     | 
| 
       46 
47 
     | 
    
         
             
            dia: diacritic
         
     | 
| 
      
 48 
     | 
    
         
            +
            dogr: dogra
         
     | 
| 
       47 
49 
     | 
    
         
             
            dsrt: deseret
         
     | 
| 
       48 
50 
     | 
    
         
             
            dupl: duployan
         
     | 
| 
       49 
51 
     | 
    
         
             
            egyp: egyptian_hieroglyphs
         
     | 
| 
       50 
52 
     | 
    
         
             
            elba: elbasan
         
     | 
| 
      
 53 
     | 
    
         
            +
            elym: elymaic
         
     | 
| 
       51 
54 
     | 
    
         
             
            ethi: ethiopic
         
     | 
| 
       52 
55 
     | 
    
         
             
            ext: extender
         
     | 
| 
       53 
56 
     | 
    
         
             
            geor: georgian
         
     | 
| 
       54 
57 
     | 
    
         
             
            glag: glagolitic
         
     | 
| 
      
 58 
     | 
    
         
            +
            gong: gunjala_gondi
         
     | 
| 
       55 
59 
     | 
    
         
             
            gonm: masaram_gondi
         
     | 
| 
       56 
60 
     | 
    
         
             
            goth: gothic
         
     | 
| 
       57 
61 
     | 
    
         
             
            gran: grantha
         
     | 
| 
         @@ -70,6 +74,7 @@ hex: hex_digit 
     | 
|
| 
       70 
74 
     | 
    
         
             
            hira: hiragana
         
     | 
| 
       71 
75 
     | 
    
         
             
            hluw: anatolian_hieroglyphs
         
     | 
| 
       72 
76 
     | 
    
         
             
            hmng: pahawh_hmong
         
     | 
| 
      
 77 
     | 
    
         
            +
            hmnp: nyiakeng_puachue_hmong
         
     | 
| 
       73 
78 
     | 
    
         
             
            hung: old_hungarian
         
     | 
| 
       74 
79 
     | 
    
         
             
            idc: id_continue
         
     | 
| 
       75 
80 
     | 
    
         
             
            ideo: ideographic
         
     | 
| 
         @@ -105,11 +110,13 @@ lyci: lycian 
     | 
|
| 
       105 
110 
     | 
    
         
             
            lydi: lydian
         
     | 
| 
       106 
111 
     | 
    
         
             
            m: mark
         
     | 
| 
       107 
112 
     | 
    
         
             
            mahj: mahajani
         
     | 
| 
      
 113 
     | 
    
         
            +
            maka: makasar
         
     | 
| 
       108 
114 
     | 
    
         
             
            mand: mandaic
         
     | 
| 
       109 
115 
     | 
    
         
             
            mani: manichaean
         
     | 
| 
       110 
116 
     | 
    
         
             
            marc: marchen
         
     | 
| 
       111 
117 
     | 
    
         
             
            mc: spacing_mark
         
     | 
| 
       112 
118 
     | 
    
         
             
            me: enclosing_mark
         
     | 
| 
      
 119 
     | 
    
         
            +
            medf: medefaidrin
         
     | 
| 
       113 
120 
     | 
    
         
             
            mend: mende_kikakui
         
     | 
| 
       114 
121 
     | 
    
         
             
            merc: meroitic_cursive
         
     | 
| 
       115 
122 
     | 
    
         
             
            mero: meroitic_hieroglyphs
         
     | 
| 
         @@ -121,6 +128,7 @@ mtei: meetei_mayek 
     | 
|
| 
       121 
128 
     | 
    
         
             
            mult: multani
         
     | 
| 
       122 
129 
     | 
    
         
             
            mymr: myanmar
         
     | 
| 
       123 
130 
     | 
    
         
             
            n: number
         
     | 
| 
      
 131 
     | 
    
         
            +
            nand: nandinagari
         
     | 
| 
       124 
132 
     | 
    
         
             
            narb: old_north_arabian
         
     | 
| 
       125 
133 
     | 
    
         
             
            nbat: nabataean
         
     | 
| 
       126 
134 
     | 
    
         
             
            nchar: noncharacter_code_point
         
     | 
| 
         @@ -168,6 +176,7 @@ qaai: inherited 
     | 
|
| 
       168 
176 
     | 
    
         
             
            qmark: quotation_mark
         
     | 
| 
       169 
177 
     | 
    
         
             
            ri: regional_indicator
         
     | 
| 
       170 
178 
     | 
    
         
             
            rjng: rejang
         
     | 
| 
      
 179 
     | 
    
         
            +
            rohg: hanifi_rohingya
         
     | 
| 
       171 
180 
     | 
    
         
             
            runr: runic
         
     | 
| 
       172 
181 
     | 
    
         
             
            s: symbol
         
     | 
| 
       173 
182 
     | 
    
         
             
            samr: samaritan
         
     | 
| 
         @@ -184,6 +193,8 @@ sinh: sinhala 
     | 
|
| 
       184 
193 
     | 
    
         
             
            sk: modifier_symbol
         
     | 
| 
       185 
194 
     | 
    
         
             
            sm: math_symbol
         
     | 
| 
       186 
195 
     | 
    
         
             
            so: other_symbol
         
     | 
| 
      
 196 
     | 
    
         
            +
            sogd: sogdian
         
     | 
| 
      
 197 
     | 
    
         
            +
            sogo: old_sogdian
         
     | 
| 
       187 
198 
     | 
    
         
             
            sora: sora_sompeng
         
     | 
| 
       188 
199 
     | 
    
         
             
            soyo: soyombo
         
     | 
| 
       189 
200 
     | 
    
         
             
            sterm: sentence_terminal
         
     | 
| 
         @@ -209,6 +220,7 @@ uideo: unified_ideograph 
     | 
|
| 
       209 
220 
     | 
    
         
             
            vaii: vai
         
     | 
| 
       210 
221 
     | 
    
         
             
            vs: variation_selector
         
     | 
| 
       211 
222 
     | 
    
         
             
            wara: warang_citi
         
     | 
| 
      
 223 
     | 
    
         
            +
            wcho: wancho
         
     | 
| 
       212 
224 
     | 
    
         
             
            wspace: white_space
         
     | 
| 
       213 
225 
     | 
    
         
             
            xidc: xid_continue
         
     | 
| 
       214 
226 
     | 
    
         
             
            xids: xid_start
         
     | 
| 
         @@ -49,9 +49,9 @@ 
     | 
|
| 
       49 
49 
     | 
    
         
             
              codepoint_list        = 'u{' . xdigit{1,6} . (space . xdigit{1,6})* . '}';
         
     | 
| 
       50 
50 
     | 
    
         
             
              codepoint_sequence    = codepoint_single | codepoint_list;
         
     | 
| 
       51 
51 
     | 
    
         | 
| 
       52 
     | 
    
         
            -
              control_sequence      = ('c' | 'C-') . (backslash . 'M-') 
     | 
| 
      
 52 
     | 
    
         
            +
              control_sequence      = ('c' | 'C-') . (backslash . 'M-')? . backslash? . any;
         
     | 
| 
       53 
53 
     | 
    
         | 
| 
       54 
     | 
    
         
            -
              meta_sequence         = 'M-' . (backslash .  
     | 
| 
      
 54 
     | 
    
         
            +
              meta_sequence         = 'M-' . (backslash . ('c' | 'C-'))? . backslash? . any;
         
     | 
| 
       55 
55 
     | 
    
         | 
| 
       56 
56 
     | 
    
         
             
              zero_or_one           = '?' | '??' | '?+';
         
     | 
| 
       57 
57 
     | 
    
         
             
              zero_or_more          = '*' | '*?' | '*+';
         
     | 
| 
         @@ -82,7 +82,8 @@ 
     | 
|
| 
       82 
82 
     | 
    
         
             
              assertion_lookbehind  = '?<=';
         
     | 
| 
       83 
83 
     | 
    
         
             
              assertion_nlookbehind = '?<!';
         
     | 
| 
       84 
84 
     | 
    
         | 
| 
       85 
     | 
    
         
            -
               
     | 
| 
      
 85 
     | 
    
         
            +
              # try to treat every other group head as options group, like Ruby
         
     | 
| 
      
 86 
     | 
    
         
            +
              group_options         = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
         
     | 
| 
       86 
87 
     | 
    
         | 
| 
       87 
88 
     | 
    
         
             
              group_ref             = [gk];
         
     | 
| 
       88 
89 
     | 
    
         
             
              group_name_char       = (alnum | '_');
         
     | 
| 
         @@ -135,41 +136,35 @@ 
     | 
|
| 
       135 
136 
     | 
    
         
             
              # Invalid sequence error, used from sequences, like escapes and sets
         
     | 
| 
       136 
137 
     | 
    
         
             
              action invalid_sequence_error {
         
     | 
| 
       137 
138 
     | 
    
         
             
                text = ts ? copy(data, ts-1..-1) : data.pack('c*')
         
     | 
| 
       138 
     | 
    
         
            -
                 
     | 
| 
      
 139 
     | 
    
         
            +
                validation_error(:sequence, 'sequence', text)
         
     | 
| 
       139 
140 
     | 
    
         
             
              }
         
     | 
| 
       140 
141 
     | 
    
         | 
| 
       141 
142 
     | 
    
         
             
              # group (nesting) and set open/close actions
         
     | 
| 
       142 
     | 
    
         
            -
              action group_opened { self.group_depth = group_depth + 1 
     | 
| 
       143 
     | 
    
         
            -
              action group_closed { self.group_depth = group_depth - 1 
     | 
| 
      
 143 
     | 
    
         
            +
              action group_opened { self.group_depth = group_depth + 1 }
         
     | 
| 
      
 144 
     | 
    
         
            +
              action group_closed { self.group_depth = group_depth - 1 }
         
     | 
| 
      
 145 
     | 
    
         
            +
              action set_opened   { self.set_depth   = set_depth   + 1 }
         
     | 
| 
      
 146 
     | 
    
         
            +
              action set_closed   { self.set_depth   = set_depth   - 1 }
         
     | 
| 
       144 
147 
     | 
    
         | 
| 
       145 
148 
     | 
    
         
             
              # Character set scanner, continues consuming characters until it meets the
         
     | 
| 
       146 
149 
     | 
    
         
             
              # closing bracket of the set.
         
     | 
| 
       147 
150 
     | 
    
         
             
              # --------------------------------------------------------------------------
         
     | 
| 
       148 
151 
     | 
    
         
             
              character_set := |*
         
     | 
| 
       149 
     | 
    
         
            -
                set_close > (set_meta, 2) {
         
     | 
| 
       150 
     | 
    
         
            -
                  set_depth -= 1
         
     | 
| 
       151 
     | 
    
         
            -
                  in_set = set_depth > 0 ? true : false
         
     | 
| 
       152 
     | 
    
         
            -
             
     | 
| 
      
 152 
     | 
    
         
            +
                set_close > (set_meta, 2) @set_closed {
         
     | 
| 
       153 
153 
     | 
    
         
             
                  emit(:set, :close, *text(data, ts, te))
         
     | 
| 
       154 
     | 
    
         
            -
             
     | 
| 
       155 
     | 
    
         
            -
                  if set_depth == 0
         
     | 
| 
       156 
     | 
    
         
            -
                    fgoto main;
         
     | 
| 
       157 
     | 
    
         
            -
                  else
         
     | 
| 
      
 154 
     | 
    
         
            +
                  if in_set?
         
     | 
| 
       158 
155 
     | 
    
         
             
                    fret;
         
     | 
| 
      
 156 
     | 
    
         
            +
                  else
         
     | 
| 
      
 157 
     | 
    
         
            +
                    fgoto main;
         
     | 
| 
       159 
158 
     | 
    
         
             
                  end
         
     | 
| 
       160 
159 
     | 
    
         
             
                };
         
     | 
| 
       161 
160 
     | 
    
         | 
| 
       162 
     | 
    
         
            -
                '-]' { # special case, emits two tokens
         
     | 
| 
       163 
     | 
    
         
            -
                   
     | 
| 
       164 
     | 
    
         
            -
                   
     | 
| 
       165 
     | 
    
         
            -
             
     | 
| 
       166 
     | 
    
         
            -
                  emit(:literal, :literal, copy(data, ts..te-2), ts, te)
         
     | 
| 
       167 
     | 
    
         
            -
                  emit(:set, :close, copy(data, ts+1..te-1), ts, te)
         
     | 
| 
       168 
     | 
    
         
            -
             
     | 
| 
       169 
     | 
    
         
            -
                  if set_depth == 0
         
     | 
| 
       170 
     | 
    
         
            -
                    fgoto main;
         
     | 
| 
       171 
     | 
    
         
            -
                  else
         
     | 
| 
      
 161 
     | 
    
         
            +
                '-]' @set_closed { # special case, emits two tokens
         
     | 
| 
      
 162 
     | 
    
         
            +
                  emit(:literal, :literal, copy(data, ts..te-2), ts, te - 1)
         
     | 
| 
      
 163 
     | 
    
         
            +
                  emit(:set, :close, copy(data, ts+1..te-1), ts + 1, te)
         
     | 
| 
      
 164 
     | 
    
         
            +
                  if in_set?
         
     | 
| 
       172 
165 
     | 
    
         
             
                    fret;
         
     | 
| 
      
 166 
     | 
    
         
            +
                  else
         
     | 
| 
      
 167 
     | 
    
         
            +
                    fgoto main;
         
     | 
| 
       173 
168 
     | 
    
         
             
                  end
         
     | 
| 
       174 
169 
     | 
    
         
             
                };
         
     | 
| 
       175 
170 
     | 
    
         | 
| 
         @@ -207,14 +202,12 @@ 
     | 
|
| 
       207 
202 
     | 
    
         
             
                  fcall set_escape_sequence;
         
     | 
| 
       208 
203 
     | 
    
         
             
                };
         
     | 
| 
       209 
204 
     | 
    
         | 
| 
       210 
     | 
    
         
            -
                set_open >(open_bracket, 1) {
         
     | 
| 
       211 
     | 
    
         
            -
                  set_depth += 1
         
     | 
| 
       212 
     | 
    
         
            -
             
     | 
| 
      
 205 
     | 
    
         
            +
                set_open >(open_bracket, 1) >set_opened {
         
     | 
| 
       213 
206 
     | 
    
         
             
                  emit(:set, :open, *text(data, ts, te))
         
     | 
| 
       214 
207 
     | 
    
         
             
                  fcall character_set;
         
     | 
| 
       215 
208 
     | 
    
         
             
                };
         
     | 
| 
       216 
209 
     | 
    
         | 
| 
       217 
     | 
    
         
            -
                class_posix >(open_bracket, 1) @eof(premature_end_error) 
     | 
| 
      
 210 
     | 
    
         
            +
                class_posix >(open_bracket, 1) @set_closed @eof(premature_end_error)  {
         
     | 
| 
       218 
211 
     | 
    
         
             
                  text = text(data, ts, te).first
         
     | 
| 
       219 
212 
     | 
    
         | 
| 
       220 
213 
     | 
    
         
             
                  type = :posixclass
         
     | 
| 
         @@ -227,11 +220,11 @@ 
     | 
|
| 
       227 
220 
     | 
    
         
             
                  emit(type, class_name.to_sym, text, ts, te)
         
     | 
| 
       228 
221 
     | 
    
         
             
                };
         
     | 
| 
       229 
222 
     | 
    
         | 
| 
       230 
     | 
    
         
            -
                collating_sequence >(open_bracket, 1) @eof(premature_end_error) 
     | 
| 
      
 223 
     | 
    
         
            +
                collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error)  {
         
     | 
| 
       231 
224 
     | 
    
         
             
                  emit(:set, :collation, *text(data, ts, te))
         
     | 
| 
       232 
225 
     | 
    
         
             
                };
         
     | 
| 
       233 
226 
     | 
    
         | 
| 
       234 
     | 
    
         
            -
                character_equivalent >(open_bracket, 1) @eof(premature_end_error) 
     | 
| 
      
 227 
     | 
    
         
            +
                character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error)  {
         
     | 
| 
       235 
228 
     | 
    
         
             
                  emit(:set, :equivalent, *text(data, ts, te))
         
     | 
| 
       236 
229 
     | 
    
         
             
                };
         
     | 
| 
       237 
230 
     | 
    
         | 
| 
         @@ -337,44 +330,24 @@ 
     | 
|
| 
       337 
330 
     | 
    
         
             
                };
         
     | 
| 
       338 
331 
     | 
    
         | 
| 
       339 
332 
     | 
    
         
             
                control_sequence >(escaped_alpha, 4) $eof(premature_end_error) {
         
     | 
| 
       340 
     | 
    
         
            -
                   
     | 
| 
       341 
     | 
    
         
            -
                    c = data[te].chr
         
     | 
| 
       342 
     | 
    
         
            -
                    if c =~ /[\x00-\x7F]/
         
     | 
| 
       343 
     | 
    
         
            -
                      emit(:escape, :control, copy(data, ts-1..te), ts-1, te+1)
         
     | 
| 
       344 
     | 
    
         
            -
                      p += 1
         
     | 
| 
       345 
     | 
    
         
            -
                    else
         
     | 
| 
       346 
     | 
    
         
            -
                      raise InvalidSequenceError.new("control sequence")
         
     | 
| 
       347 
     | 
    
         
            -
                    end
         
     | 
| 
       348 
     | 
    
         
            -
                  else
         
     | 
| 
       349 
     | 
    
         
            -
                    raise PrematureEndError.new("control sequence")
         
     | 
| 
       350 
     | 
    
         
            -
                  end
         
     | 
| 
      
 333 
     | 
    
         
            +
                  emit_meta_control_sequence(data, ts, te, :control)
         
     | 
| 
       351 
334 
     | 
    
         
             
                  fret;
         
     | 
| 
       352 
335 
     | 
    
         
             
                };
         
     | 
| 
       353 
336 
     | 
    
         | 
| 
       354 
337 
     | 
    
         
             
                meta_sequence >(backslashed, 3) $eof(premature_end_error) {
         
     | 
| 
       355 
     | 
    
         
            -
                   
     | 
| 
       356 
     | 
    
         
            -
                    c = data[te].chr
         
     | 
| 
       357 
     | 
    
         
            -
                    if c =~ /[\x00-\x7F]/
         
     | 
| 
       358 
     | 
    
         
            -
                      emit(:escape, :meta_sequence, copy(data, ts-1..te), ts-1, te+1)
         
     | 
| 
       359 
     | 
    
         
            -
                      p += 1
         
     | 
| 
       360 
     | 
    
         
            -
                    else
         
     | 
| 
       361 
     | 
    
         
            -
                      raise InvalidSequenceError.new("meta sequence")
         
     | 
| 
       362 
     | 
    
         
            -
                    end
         
     | 
| 
       363 
     | 
    
         
            -
                  else
         
     | 
| 
       364 
     | 
    
         
            -
                    raise PrematureEndError.new("meta sequence")
         
     | 
| 
       365 
     | 
    
         
            -
                  end
         
     | 
| 
      
 338 
     | 
    
         
            +
                  emit_meta_control_sequence(data, ts, te, :meta_sequence)
         
     | 
| 
       366 
339 
     | 
    
         
             
                  fret;
         
     | 
| 
       367 
340 
     | 
    
         
             
                };
         
     | 
| 
       368 
341 
     | 
    
         | 
| 
       369 
342 
     | 
    
         
             
                char_type_char > (escaped_alpha, 2) {
         
     | 
| 
       370 
343 
     | 
    
         
             
                  fhold;
         
     | 
| 
       371 
     | 
    
         
            -
                  fnext *(in_set ? fentry(character_set) : fentry(main));
         
     | 
| 
      
 344 
     | 
    
         
            +
                  fnext *(in_set? ? fentry(character_set) : fentry(main));
         
     | 
| 
       372 
345 
     | 
    
         
             
                  fcall char_type;
         
     | 
| 
       373 
346 
     | 
    
         
             
                };
         
     | 
| 
       374 
347 
     | 
    
         | 
| 
       375 
348 
     | 
    
         
             
                property_char > (escaped_alpha, 2) {
         
     | 
| 
       376 
349 
     | 
    
         
             
                  fhold;
         
     | 
| 
       377 
     | 
    
         
            -
                  fnext *(in_set ? fentry(character_set) : fentry(main));
         
     | 
| 
      
 350 
     | 
    
         
            +
                  fnext *(in_set? ? fentry(character_set) : fentry(main));
         
     | 
| 
       378 
351 
     | 
    
         
             
                  fcall unicode_property;
         
     | 
| 
       379 
352 
     | 
    
         
             
                };
         
     | 
| 
       380 
353 
     | 
    
         | 
| 
         @@ -412,8 +385,7 @@ 
     | 
|
| 
       412 
385 
     | 
    
         
             
                };
         
     | 
| 
       413 
386 
     | 
    
         | 
| 
       414 
387 
     | 
    
         
             
                alternation {
         
     | 
| 
       415 
     | 
    
         
            -
                  if  
     | 
| 
       416 
     | 
    
         
            -
                     conditional_stack.last[1] == group_depth
         
     | 
| 
      
 388 
     | 
    
         
            +
                  if conditional_stack.last == group_depth
         
     | 
| 
       417 
389 
     | 
    
         
             
                    emit(:conditional, :separator, *text(data, ts, te))
         
     | 
| 
       418 
390 
     | 
    
         
             
                  else
         
     | 
| 
       419 
391 
     | 
    
         
             
                    emit(:meta, :alternation, *text(data, ts, te))
         
     | 
| 
         @@ -442,18 +414,12 @@ 
     | 
|
| 
       442 
414 
     | 
    
         
             
                  when '\\b'; emit(:anchor, :word_boundary,      text, ts, te)
         
     | 
| 
       443 
415 
     | 
    
         
             
                  when '\\B'; emit(:anchor, :nonword_boundary,   text, ts, te)
         
     | 
| 
       444 
416 
     | 
    
         
             
                  when '\\G'; emit(:anchor, :match_start,        text, ts, te)
         
     | 
| 
       445 
     | 
    
         
            -
                  else
         
     | 
| 
       446 
     | 
    
         
            -
                    raise ScannerError.new(
         
     | 
| 
       447 
     | 
    
         
            -
                      "Unexpected character in anchor at #{text} (char #{ts})")
         
     | 
| 
       448 
417 
     | 
    
         
             
                  end
         
     | 
| 
       449 
418 
     | 
    
         
             
                };
         
     | 
| 
       450 
419 
     | 
    
         | 
| 
       451 
420 
     | 
    
         
             
                # Character sets
         
     | 
| 
       452 
421 
     | 
    
         
             
                # ------------------------------------------------------------------------
         
     | 
| 
       453 
     | 
    
         
            -
                set_open {
         
     | 
| 
       454 
     | 
    
         
            -
                  set_depth += 1
         
     | 
| 
       455 
     | 
    
         
            -
                  in_set = true
         
     | 
| 
       456 
     | 
    
         
            -
             
     | 
| 
      
 422 
     | 
    
         
            +
                set_open >set_opened {
         
     | 
| 
       457 
423 
     | 
    
         
             
                  emit(:set, :open, *text(data, ts, te))
         
     | 
| 
       458 
424 
     | 
    
         
             
                  fcall character_set;
         
     | 
| 
       459 
425 
     | 
    
         
             
                };
         
     | 
| 
         @@ -465,9 +431,7 @@ 
     | 
|
| 
       465 
431 
     | 
    
         
             
                conditional {
         
     | 
| 
       466 
432 
     | 
    
         
             
                  text = text(data, ts, te).first
         
     | 
| 
       467 
433 
     | 
    
         | 
| 
       468 
     | 
    
         
            -
                   
     | 
| 
       469 
     | 
    
         
            -
                  conditional_depth += 1
         
     | 
| 
       470 
     | 
    
         
            -
                  conditional_stack << [conditional_depth, group_depth]
         
     | 
| 
      
 434 
     | 
    
         
            +
                  conditional_stack << group_depth
         
     | 
| 
       471 
435 
     | 
    
         | 
| 
       472 
436 
     | 
    
         
             
                  emit(:conditional, :open, text[0..-2], ts, te-1)
         
     | 
| 
       473 
437 
     | 
    
         
             
                  emit(:conditional, :condition_open, '(', te-1, te)
         
     | 
| 
         @@ -496,7 +460,11 @@ 
     | 
|
| 
       496 
460 
     | 
    
         
             
                #   (?imxdau-imx:subexp)  option on/off for subexp
         
     | 
| 
       497 
461 
     | 
    
         
             
                # ------------------------------------------------------------------------
         
     | 
| 
       498 
462 
     | 
    
         
             
                group_open . group_options >group_opened {
         
     | 
| 
       499 
     | 
    
         
            -
                   
     | 
| 
      
 463 
     | 
    
         
            +
                  text = text(data, ts, te).first
         
     | 
| 
      
 464 
     | 
    
         
            +
                  if text[2..-1] =~ /([^\-mixdau:]|^$)|-.*([dau])/
         
     | 
| 
      
 465 
     | 
    
         
            +
                    raise InvalidGroupOption.new($1 || "-#{$2}", text)
         
     | 
| 
      
 466 
     | 
    
         
            +
                  end
         
     | 
| 
      
 467 
     | 
    
         
            +
                  emit_options(text, ts, te)
         
     | 
| 
       500 
468 
     | 
    
         
             
                };
         
     | 
| 
       501 
469 
     | 
    
         | 
| 
       502 
470 
     | 
    
         
             
                # Assertions
         
     | 
| 
         @@ -528,19 +496,15 @@ 
     | 
|
| 
       528 
496 
     | 
    
         
             
                  when '(?>';  emit(:group, :atomic,       text, ts, te)
         
     | 
| 
       529 
497 
     | 
    
         
             
                  when '(?~';  emit(:group, :absence,      text, ts, te)
         
     | 
| 
       530 
498 
     | 
    
         | 
| 
       531 
     | 
    
         
            -
                  when /^\( 
     | 
| 
       532 
     | 
    
         
            -
                     
     | 
| 
      
 499 
     | 
    
         
            +
                  when /^\(\?(?:<>|'')/
         
     | 
| 
      
 500 
     | 
    
         
            +
                    validation_error(:group, 'named group', 'name is empty')
         
     | 
| 
       533 
501 
     | 
    
         | 
| 
      
 502 
     | 
    
         
            +
                  when /^\(\?<\w*>/
         
     | 
| 
       534 
503 
     | 
    
         
             
                    emit(:group, :named_ab,  text, ts, te)
         
     | 
| 
       535 
504 
     | 
    
         | 
| 
       536 
     | 
    
         
            -
                  when /^\(\?' 
     | 
| 
       537 
     | 
    
         
            -
                    empty_name_error(:group, 'named group (sq)') if $1.empty?
         
     | 
| 
       538 
     | 
    
         
            -
             
     | 
| 
      
 505 
     | 
    
         
            +
                  when /^\(\?'\w*'/
         
     | 
| 
       539 
506 
     | 
    
         
             
                    emit(:group, :named_sq,  text, ts, te)
         
     | 
| 
       540 
507 
     | 
    
         | 
| 
       541 
     | 
    
         
            -
                  else
         
     | 
| 
       542 
     | 
    
         
            -
                    raise ScannerError.new(
         
     | 
| 
       543 
     | 
    
         
            -
                      "Unknown subexpression group format '#{text}'")
         
     | 
| 
       544 
508 
     | 
    
         
             
                  end
         
     | 
| 
       545 
509 
     | 
    
         
             
                };
         
     | 
| 
       546 
510 
     | 
    
         | 
| 
         @@ -550,20 +514,13 @@ 
     | 
|
| 
       550 
514 
     | 
    
         
             
                };
         
     | 
| 
       551 
515 
     | 
    
         | 
| 
       552 
516 
     | 
    
         
             
                group_close @group_closed {
         
     | 
| 
       553 
     | 
    
         
            -
                  if  
     | 
| 
       554 
     | 
    
         
            -
                     conditional_stack.last[1] == (group_depth + 1)
         
     | 
| 
       555 
     | 
    
         
            -
             
     | 
| 
       556 
     | 
    
         
            -
                    emit(:conditional, :close, *text(data, ts, te))
         
     | 
| 
      
 517 
     | 
    
         
            +
                  if conditional_stack.last == group_depth + 1
         
     | 
| 
       557 
518 
     | 
    
         
             
                    conditional_stack.pop
         
     | 
| 
       558 
     | 
    
         
            -
             
     | 
| 
       559 
     | 
    
         
            -
                    if conditional_stack.length == 0
         
     | 
| 
       560 
     | 
    
         
            -
                      in_conditional = false
         
     | 
| 
       561 
     | 
    
         
            -
                    end
         
     | 
| 
      
 519 
     | 
    
         
            +
                    emit(:conditional, :close, *text(data, ts, te))
         
     | 
| 
       562 
520 
     | 
    
         
             
                  else
         
     | 
| 
       563 
     | 
    
         
            -
                    if spacing_stack.length > 1  
     | 
| 
       564 
     | 
    
         
            -
             
     | 
| 
      
 521 
     | 
    
         
            +
                    if spacing_stack.length > 1 &&
         
     | 
| 
      
 522 
     | 
    
         
            +
                       spacing_stack.last[:depth] == group_depth + 1
         
     | 
| 
       565 
523 
     | 
    
         
             
                      spacing_stack.pop
         
     | 
| 
       566 
     | 
    
         
            -
             
     | 
| 
       567 
524 
     | 
    
         
             
                      self.free_spacing = spacing_stack.last[:free_spacing]
         
     | 
| 
       568 
525 
     | 
    
         
             
                    end
         
     | 
| 
       569 
526 
     | 
    
         | 
| 
         @@ -576,11 +533,8 @@ 
     | 
|
| 
       576 
533 
     | 
    
         
             
                # ------------------------------------------------------------------------
         
     | 
| 
       577 
534 
     | 
    
         
             
                backslash . (group_name_ref | group_number_ref) > (backslashed, 4) {
         
     | 
| 
       578 
535 
     | 
    
         
             
                  case text = text(data, ts, te).first
         
     | 
| 
       579 
     | 
    
         
            -
                  when /^\\([gk]) 
     | 
| 
       580 
     | 
    
         
            -
                     
     | 
| 
       581 
     | 
    
         
            -
             
     | 
| 
       582 
     | 
    
         
            -
                  when /^\\([gk])''/ # single quotes
         
     | 
| 
       583 
     | 
    
         
            -
                    empty_backref_error("ref/call (sq)")
         
     | 
| 
      
 536 
     | 
    
         
            +
                  when /^\\([gk])(<>|'')/ # angle brackets
         
     | 
| 
      
 537 
     | 
    
         
            +
                    validation_error(:backref, 'ref/call', 'ref ID is empty')
         
     | 
| 
       584 
538 
     | 
    
         | 
| 
       585 
539 
     | 
    
         
             
                  when /^\\([gk])<[^\d+-]\w*>/ # angle-brackets
         
     | 
| 
       586 
540 
     | 
    
         
             
                    if $1 == 'k'
         
     | 
| 
         @@ -636,9 +590,6 @@ 
     | 
|
| 
       636 
590 
     | 
    
         
             
                  when /^\\([gk])'[+\-]?\d+[+\-]\d+'/ # single-quotes
         
     | 
| 
       637 
591 
     | 
    
         
             
                    emit(:backref, :number_recursion_ref_sq, text, ts, te)
         
     | 
| 
       638 
592 
     | 
    
         | 
| 
       639 
     | 
    
         
            -
                  else
         
     | 
| 
       640 
     | 
    
         
            -
                    raise ScannerError.new(
         
     | 
| 
       641 
     | 
    
         
            -
                      "Unknown backreference format '#{text}'")
         
     | 
| 
       642 
593 
     | 
    
         
             
                  end
         
     | 
| 
       643 
594 
     | 
    
         
             
                };
         
     | 
| 
       644 
595 
     | 
    
         | 
| 
         @@ -786,7 +737,7 @@ class Regexp::Scanner 
     | 
|
| 
       786 
737 
     | 
    
         
             
                  input = input_object
         
     | 
| 
       787 
738 
     | 
    
         
             
                  self.free_spacing = false
         
     | 
| 
       788 
739 
     | 
    
         
             
                end
         
     | 
| 
       789 
     | 
    
         
            -
             
     | 
| 
      
 740 
     | 
    
         
            +
                self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
         
     | 
| 
       790 
741 
     | 
    
         | 
| 
       791 
742 
     | 
    
         
             
                data  = input.unpack("c*") if input.is_a?(String)
         
     | 
| 
       792 
743 
     | 
    
         
             
                eof   = data.length
         
     | 
| 
         @@ -794,15 +745,9 @@ class Regexp::Scanner 
     | 
|
| 
       794 
745 
     | 
    
         
             
                self.tokens = []
         
     | 
| 
       795 
746 
     | 
    
         
             
                self.block  = block_given? ? block : nil
         
     | 
| 
       796 
747 
     | 
    
         | 
| 
       797 
     | 
    
         
            -
                self. 
     | 
| 
      
 748 
     | 
    
         
            +
                self.set_depth = 0
         
     | 
| 
       798 
749 
     | 
    
         
             
                self.group_depth = 0
         
     | 
| 
       799 
     | 
    
         
            -
                self. 
     | 
| 
       800 
     | 
    
         
            -
             
     | 
| 
       801 
     | 
    
         
            -
                in_set = false
         
     | 
| 
       802 
     | 
    
         
            -
                set_depth = 0
         
     | 
| 
       803 
     | 
    
         
            -
                in_conditional = false
         
     | 
| 
       804 
     | 
    
         
            -
                conditional_depth = 0
         
     | 
| 
       805 
     | 
    
         
            -
                conditional_stack = []
         
     | 
| 
      
 750 
     | 
    
         
            +
                self.conditional_stack = []
         
     | 
| 
       806 
751 
     | 
    
         | 
| 
       807 
752 
     | 
    
         
             
                %% write data;
         
     | 
| 
       808 
753 
     | 
    
         
             
                %% write init;
         
     | 
| 
         @@ -817,9 +762,9 @@ class Regexp::Scanner 
     | 
|
| 
       817 
762 
     | 
    
         
             
                end
         
     | 
| 
       818 
763 
     | 
    
         | 
| 
       819 
764 
     | 
    
         
             
                raise PrematureEndError.new("(missing group closing paranthesis) "+
         
     | 
| 
       820 
     | 
    
         
            -
                      "[#{ 
     | 
| 
      
 765 
     | 
    
         
            +
                      "[#{group_depth}]") if in_group?
         
     | 
| 
       821 
766 
     | 
    
         
             
                raise PrematureEndError.new("(missing set closing bracket) "+
         
     | 
| 
       822 
     | 
    
         
            -
                      "[#{ 
     | 
| 
      
 767 
     | 
    
         
            +
                      "[#{set_depth}]") if in_set?
         
     | 
| 
       823 
768 
     | 
    
         | 
| 
       824 
769 
     | 
    
         
             
                # when the entire expression is a literal run
         
     | 
| 
       825 
770 
     | 
    
         
             
                emit_literal if literal
         
     | 
| 
         @@ -854,62 +799,15 @@ class Regexp::Scanner 
     | 
|
| 
       854 
799 
     | 
    
         | 
| 
       855 
800 
     | 
    
         
             
              private
         
     | 
| 
       856 
801 
     | 
    
         | 
| 
       857 
     | 
    
         
            -
              attr_accessor :tokens, :literal, :block,
         
     | 
| 
       858 
     | 
    
         
            -
                            : 
     | 
| 
       859 
     | 
    
         
            -
                            :free_spacing, :spacing_stack
         
     | 
| 
       860 
     | 
    
         
            -
             
     | 
| 
       861 
     | 
    
         
            -
              # Ragel's regex-based scan of the group options introduced a lot of
         
     | 
| 
       862 
     | 
    
         
            -
              # ambiguity, so we just ask it to find the beginning of what looks
         
     | 
| 
       863 
     | 
    
         
            -
              # like an options run and handle the rest in here.
         
     | 
| 
       864 
     | 
    
         
            -
              def scan_options(p, data, ts, te)
         
     | 
| 
       865 
     | 
    
         
            -
                text = text(data, ts, te).first
         
     | 
| 
       866 
     | 
    
         
            -
             
     | 
| 
       867 
     | 
    
         
            -
                options_char, options_length = true, 0
         
     | 
| 
       868 
     | 
    
         
            -
             
     | 
| 
       869 
     | 
    
         
            -
                # Copy while we have option characters. There is no maximum length,
         
     | 
| 
       870 
     | 
    
         
            -
                # as ruby allows things like '(?xxxxxxxxx-xxxxxxxxxxxxx:abc)'.
         
     | 
| 
       871 
     | 
    
         
            -
                negative_options = false
         
     | 
| 
       872 
     | 
    
         
            -
                while options_char
         
     | 
| 
       873 
     | 
    
         
            -
                  if data[te + options_length]
         
     | 
| 
       874 
     | 
    
         
            -
                    c = data[te + options_length].chr
         
     | 
| 
       875 
     | 
    
         
            -
             
     | 
| 
       876 
     | 
    
         
            -
                    if c =~ /[-mixdau]/
         
     | 
| 
       877 
     | 
    
         
            -
                      negative_options = true if c == '-'
         
     | 
| 
      
 802 
     | 
    
         
            +
              attr_accessor :tokens, :literal, :block, :free_spacing, :spacing_stack,
         
     | 
| 
      
 803 
     | 
    
         
            +
                            :group_depth, :set_depth, :conditional_stack
         
     | 
| 
       878 
804 
     | 
    
         | 
| 
       879 
     | 
    
         
            -
             
     | 
| 
       880 
     | 
    
         
            -
             
     | 
| 
       881 
     | 
    
         
            -
             
     | 
| 
       882 
     | 
    
         
            -
                      text << c ; p += 1 ; options_length += 1
         
     | 
| 
       883 
     | 
    
         
            -
                    else
         
     | 
| 
       884 
     | 
    
         
            -
                      options_char = false
         
     | 
| 
       885 
     | 
    
         
            -
                    end
         
     | 
| 
       886 
     | 
    
         
            -
                  else
         
     | 
| 
       887 
     | 
    
         
            -
                    raise PrematureEndError.new("expression options `#{text}'")
         
     | 
| 
       888 
     | 
    
         
            -
                  end
         
     | 
| 
       889 
     | 
    
         
            -
                end
         
     | 
| 
       890 
     | 
    
         
            -
             
     | 
| 
       891 
     | 
    
         
            -
                if data[te + options_length]
         
     | 
| 
       892 
     | 
    
         
            -
                  c = data[te + options_length].chr
         
     | 
| 
       893 
     | 
    
         
            -
             
     | 
| 
       894 
     | 
    
         
            -
                  if c == ':'
         
     | 
| 
       895 
     | 
    
         
            -
                    # Include the ':' in the options text
         
     | 
| 
       896 
     | 
    
         
            -
                    text << c ; p += 1 ; options_length += 1
         
     | 
| 
       897 
     | 
    
         
            -
                    emit_options(text, ts, te + options_length)
         
     | 
| 
       898 
     | 
    
         
            -
             
     | 
| 
       899 
     | 
    
         
            -
                  elsif c == ')'
         
     | 
| 
       900 
     | 
    
         
            -
                    # Don't include the closing ')', let group_close handle it.
         
     | 
| 
       901 
     | 
    
         
            -
                    emit_options(text, ts, te + options_length)
         
     | 
| 
       902 
     | 
    
         
            -
             
     | 
| 
       903 
     | 
    
         
            -
                  else
         
     | 
| 
       904 
     | 
    
         
            -
                    # Plain Regexp reports this as 'undefined group option'
         
     | 
| 
       905 
     | 
    
         
            -
                    raise ScannerError.new(
         
     | 
| 
       906 
     | 
    
         
            -
                      "Unexpected `#{c}' in options sequence, ':' or ')' expected")
         
     | 
| 
       907 
     | 
    
         
            -
                  end
         
     | 
| 
       908 
     | 
    
         
            -
                else
         
     | 
| 
       909 
     | 
    
         
            -
                  raise PrematureEndError.new("expression options `#{text}'")
         
     | 
| 
       910 
     | 
    
         
            -
                end
         
     | 
| 
      
 805 
     | 
    
         
            +
              def in_group?
         
     | 
| 
      
 806 
     | 
    
         
            +
                group_depth > 0
         
     | 
| 
      
 807 
     | 
    
         
            +
              end
         
     | 
| 
       911 
808 
     | 
    
         | 
| 
       912 
     | 
    
         
            -
             
     | 
| 
      
 809 
     | 
    
         
            +
              def in_set?
         
     | 
| 
      
 810 
     | 
    
         
            +
                set_depth > 0
         
     | 
| 
       913 
811 
     | 
    
         
             
              end
         
     | 
| 
       914 
812 
     | 
    
         | 
| 
       915 
813 
     | 
    
         
             
              # Copy from ts to te from data as text
         
     | 
| 
         @@ -945,32 +843,39 @@ class Regexp::Scanner 
     | 
|
| 
       945 
843 
     | 
    
         
             
              def emit_options(text, ts, te)
         
     | 
| 
       946 
844 
     | 
    
         
             
                token = nil
         
     | 
| 
       947 
845 
     | 
    
         | 
| 
       948 
     | 
    
         
            -
                 
     | 
| 
       949 
     | 
    
         
            -
             
     | 
| 
      
 846 
     | 
    
         
            +
                # Ruby allows things like '(?-xxxx)' or '(?xx-xx--xx-:abc)'.
         
     | 
| 
      
 847 
     | 
    
         
            +
                text =~ /\(\?([mixdau]*)(-(?:[mix]*))*(:)?/
         
     | 
| 
      
 848 
     | 
    
         
            +
                positive, negative, group_local = $1, $2, $3
         
     | 
| 
       950 
849 
     | 
    
         | 
| 
       951 
     | 
    
         
            -
             
     | 
| 
       952 
     | 
    
         
            -
             
     | 
| 
       953 
     | 
    
         
            -
             
     | 
| 
      
 850 
     | 
    
         
            +
                if positive.include?('x')
         
     | 
| 
      
 851 
     | 
    
         
            +
                  self.free_spacing = true
         
     | 
| 
      
 852 
     | 
    
         
            +
                end
         
     | 
| 
       954 
853 
     | 
    
         | 
| 
       955 
     | 
    
         
            -
             
     | 
| 
       956 
     | 
    
         
            -
             
     | 
| 
       957 
     | 
    
         
            -
             
     | 
| 
       958 
     | 
    
         
            -
             
     | 
| 
       959 
     | 
    
         
            -
             
     | 
| 
      
 854 
     | 
    
         
            +
                # If the x appears in both, treat it like ruby does, the second cancels
         
     | 
| 
      
 855 
     | 
    
         
            +
                # the first.
         
     | 
| 
      
 856 
     | 
    
         
            +
                if negative && negative.include?('x')
         
     | 
| 
      
 857 
     | 
    
         
            +
                  self.free_spacing = false
         
     | 
| 
      
 858 
     | 
    
         
            +
                end
         
     | 
| 
       960 
859 
     | 
    
         | 
| 
       961 
     | 
    
         
            -
             
     | 
| 
       962 
     | 
    
         
            -
             
     | 
| 
       963 
     | 
    
         
            -
             
     | 
| 
       964 
     | 
    
         
            -
             
     | 
| 
       965 
     | 
    
         
            -
             
     | 
| 
       966 
     | 
    
         
            -
             
     | 
| 
       967 
     | 
    
         
            -
             
     | 
| 
       968 
     | 
    
         
            -
                  end
         
     | 
| 
      
 860 
     | 
    
         
            +
                if group_local
         
     | 
| 
      
 861 
     | 
    
         
            +
                  spacing_stack << {:free_spacing => free_spacing, :depth => group_depth}
         
     | 
| 
      
 862 
     | 
    
         
            +
                  token = :options
         
     | 
| 
      
 863 
     | 
    
         
            +
                else
         
     | 
| 
      
 864 
     | 
    
         
            +
                  # switch for parent group level
         
     | 
| 
      
 865 
     | 
    
         
            +
                  spacing_stack.last[:free_spacing] = free_spacing
         
     | 
| 
      
 866 
     | 
    
         
            +
                  token = :options_switch
         
     | 
| 
       969 
867 
     | 
    
         
             
                end
         
     | 
| 
       970 
868 
     | 
    
         | 
| 
       971 
869 
     | 
    
         
             
                emit(:group, token, text, ts, te)
         
     | 
| 
       972 
870 
     | 
    
         
             
              end
         
     | 
| 
       973 
871 
     | 
    
         | 
| 
      
 872 
     | 
    
         
            +
              def emit_meta_control_sequence(data, ts, te, token)
         
     | 
| 
      
 873 
     | 
    
         
            +
                if data.last < 0x00 || data.last > 0x7F
         
     | 
| 
      
 874 
     | 
    
         
            +
                  validation_error(:sequence, 'escape', token.to_s)
         
     | 
| 
      
 875 
     | 
    
         
            +
                end
         
     | 
| 
      
 876 
     | 
    
         
            +
                emit(:escape, token, *text(data, ts, te, 1))
         
     | 
| 
      
 877 
     | 
    
         
            +
              end
         
     | 
| 
      
 878 
     | 
    
         
            +
             
     | 
| 
       974 
879 
     | 
    
         
             
              # Centralizes and unifies the handling of validation related
         
     | 
| 
       975 
880 
     | 
    
         
             
              # errors.
         
     | 
| 
       976 
881 
     | 
    
         
             
              def validation_error(type, what, reason)
         
     | 
| 
         @@ -981,21 +886,8 @@ class Regexp::Scanner 
     | 
|
| 
       981 
886 
     | 
    
         
             
                  error = InvalidBackrefError.new(what, reason)
         
     | 
| 
       982 
887 
     | 
    
         
             
                when :sequence
         
     | 
| 
       983 
888 
     | 
    
         
             
                  error = InvalidSequenceError.new(what, reason)
         
     | 
| 
       984 
     | 
    
         
            -
                else
         
     | 
| 
       985 
     | 
    
         
            -
                  error = ValidationError.new('expression')
         
     | 
| 
       986 
889 
     | 
    
         
             
                end
         
     | 
| 
       987 
890 
     | 
    
         | 
| 
       988 
891 
     | 
    
         
             
                raise error # unless @@config.validation_ignore
         
     | 
| 
       989 
892 
     | 
    
         
             
              end
         
     | 
| 
       990 
     | 
    
         
            -
             
     | 
| 
       991 
     | 
    
         
            -
              # Used for references with an empty name or number
         
     | 
| 
       992 
     | 
    
         
            -
              def empty_backref_error(type, what)
         
     | 
| 
       993 
     | 
    
         
            -
                validation_error(:backref, what, 'ref ID is empty')
         
     | 
| 
       994 
     | 
    
         
            -
              end
         
     | 
| 
       995 
     | 
    
         
            -
             
     | 
| 
       996 
     | 
    
         
            -
              # Used for named expressions with an empty name
         
     | 
| 
       997 
     | 
    
         
            -
              def empty_name_error(type, what)
         
     | 
| 
       998 
     | 
    
         
            -
                validation_error(type, what, 'name is empty')
         
     | 
| 
       999 
     | 
    
         
            -
              end
         
     | 
| 
       1000 
     | 
    
         
            -
             
     | 
| 
       1001 
893 
     | 
    
         
             
            end # module Regexp::Scanner
         
     |