regexp_parser 2.6.2 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +67 -0
 - data/Gemfile +2 -2
 - data/README.md +32 -29
 - data/lib/regexp_parser/expression/base.rb +0 -7
 - data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
 - data/lib/regexp_parser/expression/classes/backreference.rb +4 -2
 - data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
 - data/lib/regexp_parser/expression/classes/character_set.rb +3 -4
 - data/lib/regexp_parser/expression/classes/conditional.rb +2 -6
 - data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
 - data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
 - data/lib/regexp_parser/expression/classes/group.rb +0 -22
 - data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
 - data/lib/regexp_parser/expression/classes/unicode_property.rb +5 -2
 - data/lib/regexp_parser/expression/methods/construct.rb +2 -4
 - data/lib/regexp_parser/expression/methods/parts.rb +23 -0
 - data/lib/regexp_parser/expression/methods/printing.rb +26 -0
 - data/lib/regexp_parser/expression/methods/tests.rb +40 -3
 - data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
 - data/lib/regexp_parser/expression/quantifier.rb +30 -17
 - data/lib/regexp_parser/expression/sequence.rb +5 -10
 - data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
 - data/lib/regexp_parser/expression/shared.rb +37 -20
 - data/lib/regexp_parser/expression/subexpression.rb +20 -15
 - data/lib/regexp_parser/expression.rb +2 -0
 - data/lib/regexp_parser/lexer.rb +76 -36
 - data/lib/regexp_parser/parser.rb +97 -97
 - data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
 - data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
 - data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
 - data/lib/regexp_parser/scanner/mapping.rb +89 -0
 - data/lib/regexp_parser/scanner/property.rl +2 -2
 - data/lib/regexp_parser/scanner/scanner.rl +90 -169
 - data/lib/regexp_parser/scanner.rb +1157 -1330
 - data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
 - data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
 - data/lib/regexp_parser/syntax/token/escape.rb +3 -1
 - data/lib/regexp_parser/syntax/token/meta.rb +9 -2
 - data/lib/regexp_parser/syntax/token/unicode_property.rb +3 -0
 - data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
 - data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
 - data/lib/regexp_parser/syntax/versions.rb +2 -0
 - data/lib/regexp_parser/version.rb +1 -1
 - metadata +10 -3
 
    
        data/lib/regexp_parser/parser.rb
    CHANGED
    
    | 
         @@ -18,11 +18,11 @@ class Regexp::Parser 
     | 
|
| 
       18 
18 
     | 
    
         
             
                end
         
     | 
| 
       19 
19 
     | 
    
         
             
              end
         
     | 
| 
       20 
20 
     | 
    
         | 
| 
       21 
     | 
    
         
            -
              def self.parse(input, syntax =  
     | 
| 
      
 21 
     | 
    
         
            +
              def self.parse(input, syntax = nil, options: nil, &block)
         
     | 
| 
       22 
22 
     | 
    
         
             
                new.parse(input, syntax, options: options, &block)
         
     | 
| 
       23 
23 
     | 
    
         
             
              end
         
     | 
| 
       24 
24 
     | 
    
         | 
| 
       25 
     | 
    
         
            -
              def parse(input, syntax =  
     | 
| 
      
 25 
     | 
    
         
            +
              def parse(input, syntax = nil, options: nil, &block)
         
     | 
| 
       26 
26 
     | 
    
         
             
                root = Root.construct(options: extract_options(input, options))
         
     | 
| 
       27 
27 
     | 
    
         | 
| 
       28 
28 
     | 
    
         
             
                self.root = root
         
     | 
| 
         @@ -35,7 +35,7 @@ class Regexp::Parser 
     | 
|
| 
       35 
35 
     | 
    
         | 
| 
       36 
36 
     | 
    
         
             
                self.captured_group_counts = Hash.new(0)
         
     | 
| 
       37 
37 
     | 
    
         | 
| 
       38 
     | 
    
         
            -
                Regexp::Lexer.scan(input, syntax, options: options) do |token|
         
     | 
| 
      
 38 
     | 
    
         
            +
                Regexp::Lexer.scan(input, syntax, options: options, collect_tokens: false) do |token|
         
     | 
| 
       39 
39 
     | 
    
         
             
                  parse_token(token)
         
     | 
| 
       40 
40 
     | 
    
         
             
                end
         
     | 
| 
       41 
41 
     | 
    
         | 
| 
         @@ -232,7 +232,7 @@ class Regexp::Parser 
     | 
|
| 
       232 
232 
     | 
    
         
             
                  node << Backreference::NameRecursionLevel.new(token, active_opts)
         
     | 
| 
       233 
233 
     | 
    
         
             
                when :name_call
         
     | 
| 
       234 
234 
     | 
    
         
             
                  node << Backreference::NameCall.new(token, active_opts)
         
     | 
| 
       235 
     | 
    
         
            -
                when :number, :number_ref
         
     | 
| 
      
 235 
     | 
    
         
            +
                when :number, :number_ref # TODO: split in v3.0.0
         
     | 
| 
       236 
236 
     | 
    
         
             
                  node << Backreference::Number.new(token, active_opts)
         
     | 
| 
       237 
237 
     | 
    
         
             
                when :number_recursion_ref
         
     | 
| 
       238 
238 
     | 
    
         
             
                  node << Backreference::NumberRecursionLevel.new(token, active_opts).tap do |exp|
         
     | 
| 
         @@ -272,9 +272,9 @@ class Regexp::Parser 
     | 
|
| 
       272 
272 
     | 
    
         
             
                  nest_conditional(Conditional::Expression.new(token, active_opts))
         
     | 
| 
       273 
273 
     | 
    
         
             
                when :condition
         
     | 
| 
       274 
274 
     | 
    
         
             
                  conditional_nesting.last.condition = Conditional::Condition.new(token, active_opts)
         
     | 
| 
       275 
     | 
    
         
            -
                  conditional_nesting.last.add_sequence(active_opts)
         
     | 
| 
      
 275 
     | 
    
         
            +
                  conditional_nesting.last.add_sequence(active_opts, { ts: token.te })
         
     | 
| 
       276 
276 
     | 
    
         
             
                when :separator
         
     | 
| 
       277 
     | 
    
         
            -
                  conditional_nesting.last.add_sequence(active_opts)
         
     | 
| 
      
 277 
     | 
    
         
            +
                  conditional_nesting.last.add_sequence(active_opts, { ts: token.te })
         
     | 
| 
       278 
278 
     | 
    
         
             
                  self.node = conditional_nesting.last.branches.last
         
     | 
| 
       279 
279 
     | 
    
         
             
                when :close
         
     | 
| 
       280 
280 
     | 
    
         
             
                  conditional_nesting.pop
         
     | 
| 
         @@ -322,6 +322,7 @@ class Regexp::Parser 
     | 
|
| 
       322 
322 
     | 
    
         | 
| 
       323 
323 
     | 
    
         
             
                when :control
         
     | 
| 
       324 
324 
     | 
    
         
             
                  if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
         
     | 
| 
      
 325 
     | 
    
         
            +
                    # TODO: emit :meta_control_sequence token in v3.0.0
         
     | 
| 
       325 
326 
     | 
    
         
             
                    node << EscapeSequence::MetaControl.new(token, active_opts)
         
     | 
| 
       326 
327 
     | 
    
         
             
                  else
         
     | 
| 
       327 
328 
     | 
    
         
             
                    node << EscapeSequence::Control.new(token, active_opts)
         
     | 
| 
         @@ -329,6 +330,7 @@ class Regexp::Parser 
     | 
|
| 
       329 
330 
     | 
    
         | 
| 
       330 
331 
     | 
    
         
             
                when :meta_sequence
         
     | 
| 
       331 
332 
     | 
    
         
             
                  if token.text =~ /\A\\M-\\[Cc]/
         
     | 
| 
      
 333 
     | 
    
         
            +
                    # TODO: emit :meta_control_sequence token in v3.0.0:
         
     | 
| 
       332 
334 
     | 
    
         
             
                    node << EscapeSequence::MetaControl.new(token, active_opts)
         
     | 
| 
       333 
335 
     | 
    
         
             
                  else
         
     | 
| 
       334 
336 
     | 
    
         
             
                    node << EscapeSequence::Meta.new(token, active_opts)
         
     | 
| 
         @@ -349,11 +351,7 @@ class Regexp::Parser 
     | 
|
| 
       349 
351 
     | 
    
         
             
                when :comment
         
     | 
| 
       350 
352 
     | 
    
         
             
                  node << Comment.new(token, active_opts)
         
     | 
| 
       351 
353 
     | 
    
         
             
                when :whitespace
         
     | 
| 
       352 
     | 
    
         
            -
                   
     | 
| 
       353 
     | 
    
         
            -
                    node.last.merge(WhiteSpace.new(token, active_opts))
         
     | 
| 
       354 
     | 
    
         
            -
                  else
         
     | 
| 
       355 
     | 
    
         
            -
                    node << WhiteSpace.new(token, active_opts)
         
     | 
| 
       356 
     | 
    
         
            -
                  end
         
     | 
| 
      
 354 
     | 
    
         
            +
                  node << WhiteSpace.new(token, active_opts)
         
     | 
| 
       357 
355 
     | 
    
         
             
                else
         
     | 
| 
       358 
356 
     | 
    
         
             
                  raise UnknownTokenError.new('FreeSpace', token)
         
     | 
| 
       359 
357 
     | 
    
         
             
                end
         
     | 
| 
         @@ -379,98 +377,98 @@ class Regexp::Parser 
     | 
|
| 
       379 
377 
     | 
    
         
             
              end
         
     | 
| 
       380 
378 
     | 
    
         | 
| 
       381 
379 
     | 
    
         
             
              def sequence_operation(klass, token)
         
     | 
| 
       382 
     | 
    
         
            -
                unless node. 
     | 
| 
      
 380 
     | 
    
         
            +
                unless node.instance_of?(klass)
         
     | 
| 
       383 
381 
     | 
    
         
             
                  operator = klass.new(token, active_opts)
         
     | 
| 
       384 
     | 
    
         
            -
                  sequence = operator.add_sequence(active_opts)
         
     | 
| 
      
 382 
     | 
    
         
            +
                  sequence = operator.add_sequence(active_opts, { ts: token.ts })
         
     | 
| 
       385 
383 
     | 
    
         
             
                  sequence.expressions = node.expressions
         
     | 
| 
       386 
384 
     | 
    
         
             
                  node.expressions = []
         
     | 
| 
       387 
385 
     | 
    
         
             
                  nest(operator)
         
     | 
| 
       388 
386 
     | 
    
         
             
                end
         
     | 
| 
       389 
     | 
    
         
            -
                node.add_sequence(active_opts)
         
     | 
| 
      
 387 
     | 
    
         
            +
                node.add_sequence(active_opts, { ts: token.te })
         
     | 
| 
       390 
388 
     | 
    
         
             
              end
         
     | 
| 
       391 
389 
     | 
    
         | 
| 
       392 
390 
     | 
    
         
             
              def posixclass(token)
         
     | 
| 
       393 
391 
     | 
    
         
             
                node << PosixClass.new(token, active_opts)
         
     | 
| 
       394 
392 
     | 
    
         
             
              end
         
     | 
| 
       395 
393 
     | 
    
         | 
| 
       396 
     | 
    
         
            -
               
     | 
| 
       397 
     | 
    
         
            -
              UPTokens = Regexp::Syntax::Token:: 
     | 
| 
      
 394 
     | 
    
         
            +
              UP = Regexp::Expression::Property
         
     | 
| 
      
 395 
     | 
    
         
            +
              UPTokens = Regexp::Syntax::Token::Property
         
     | 
| 
       398 
396 
     | 
    
         | 
| 
       399 
397 
     | 
    
         
             
              def property(token)
         
     | 
| 
       400 
398 
     | 
    
         
             
                case token.token
         
     | 
| 
       401 
     | 
    
         
            -
                when :alnum;                  node << Alnum.new(token, active_opts)
         
     | 
| 
       402 
     | 
    
         
            -
                when :alpha;                  node << Alpha.new(token, active_opts)
         
     | 
| 
       403 
     | 
    
         
            -
                when :ascii;                  node << Ascii.new(token, active_opts)
         
     | 
| 
       404 
     | 
    
         
            -
                when :blank;                  node << Blank.new(token, active_opts)
         
     | 
| 
       405 
     | 
    
         
            -
                when :cntrl;                  node << Cntrl.new(token, active_opts)
         
     | 
| 
       406 
     | 
    
         
            -
                when :digit;                  node << Digit.new(token, active_opts)
         
     | 
| 
       407 
     | 
    
         
            -
                when :graph;                  node << Graph.new(token, active_opts)
         
     | 
| 
       408 
     | 
    
         
            -
                when :lower;                  node << Lower.new(token, active_opts)
         
     | 
| 
       409 
     | 
    
         
            -
                when :print;                  node << Print.new(token, active_opts)
         
     | 
| 
       410 
     | 
    
         
            -
                when :punct;                  node << Punct.new(token, active_opts)
         
     | 
| 
       411 
     | 
    
         
            -
                when :space;                  node << Space.new(token, active_opts)
         
     | 
| 
       412 
     | 
    
         
            -
                when :upper;                  node << Upper.new(token, active_opts)
         
     | 
| 
       413 
     | 
    
         
            -
                when :word;                   node << Word.new(token, active_opts)
         
     | 
| 
       414 
     | 
    
         
            -
                when :xdigit;                 node << Xdigit.new(token, active_opts)
         
     | 
| 
       415 
     | 
    
         
            -
                when :xposixpunct;            node << XPosixPunct.new(token, active_opts)
         
     | 
| 
      
 399 
     | 
    
         
            +
                when :alnum;                  node << UP::Alnum.new(token, active_opts)
         
     | 
| 
      
 400 
     | 
    
         
            +
                when :alpha;                  node << UP::Alpha.new(token, active_opts)
         
     | 
| 
      
 401 
     | 
    
         
            +
                when :ascii;                  node << UP::Ascii.new(token, active_opts)
         
     | 
| 
      
 402 
     | 
    
         
            +
                when :blank;                  node << UP::Blank.new(token, active_opts)
         
     | 
| 
      
 403 
     | 
    
         
            +
                when :cntrl;                  node << UP::Cntrl.new(token, active_opts)
         
     | 
| 
      
 404 
     | 
    
         
            +
                when :digit;                  node << UP::Digit.new(token, active_opts)
         
     | 
| 
      
 405 
     | 
    
         
            +
                when :graph;                  node << UP::Graph.new(token, active_opts)
         
     | 
| 
      
 406 
     | 
    
         
            +
                when :lower;                  node << UP::Lower.new(token, active_opts)
         
     | 
| 
      
 407 
     | 
    
         
            +
                when :print;                  node << UP::Print.new(token, active_opts)
         
     | 
| 
      
 408 
     | 
    
         
            +
                when :punct;                  node << UP::Punct.new(token, active_opts)
         
     | 
| 
      
 409 
     | 
    
         
            +
                when :space;                  node << UP::Space.new(token, active_opts)
         
     | 
| 
      
 410 
     | 
    
         
            +
                when :upper;                  node << UP::Upper.new(token, active_opts)
         
     | 
| 
      
 411 
     | 
    
         
            +
                when :word;                   node << UP::Word.new(token, active_opts)
         
     | 
| 
      
 412 
     | 
    
         
            +
                when :xdigit;                 node << UP::Xdigit.new(token, active_opts)
         
     | 
| 
      
 413 
     | 
    
         
            +
                when :xposixpunct;            node << UP::XPosixPunct.new(token, active_opts)
         
     | 
| 
       416 
414 
     | 
    
         | 
| 
       417 
415 
     | 
    
         
             
                # only in Oniguruma (old rubies)
         
     | 
| 
       418 
     | 
    
         
            -
                when :newline;                node << Newline.new(token, active_opts)
         
     | 
| 
       419 
     | 
    
         
            -
             
     | 
| 
       420 
     | 
    
         
            -
                when :any;                    node << Any.new(token, active_opts)
         
     | 
| 
       421 
     | 
    
         
            -
                when :assigned;               node << Assigned.new(token, active_opts)
         
     | 
| 
       422 
     | 
    
         
            -
             
     | 
| 
       423 
     | 
    
         
            -
                when :letter;                 node << Letter::Any.new(token, active_opts)
         
     | 
| 
       424 
     | 
    
         
            -
                when :cased_letter;           node << Letter::Cased.new(token, active_opts)
         
     | 
| 
       425 
     | 
    
         
            -
                when :uppercase_letter;       node << Letter::Uppercase.new(token, active_opts)
         
     | 
| 
       426 
     | 
    
         
            -
                when :lowercase_letter;       node << Letter::Lowercase.new(token, active_opts)
         
     | 
| 
       427 
     | 
    
         
            -
                when :titlecase_letter;       node << Letter::Titlecase.new(token, active_opts)
         
     | 
| 
       428 
     | 
    
         
            -
                when :modifier_letter;        node << Letter::Modifier.new(token, active_opts)
         
     | 
| 
       429 
     | 
    
         
            -
                when :other_letter;           node << Letter::Other.new(token, active_opts)
         
     | 
| 
       430 
     | 
    
         
            -
             
     | 
| 
       431 
     | 
    
         
            -
                when :mark;                   node << Mark::Any.new(token, active_opts)
         
     | 
| 
       432 
     | 
    
         
            -
                when :combining_mark;         node << Mark::Combining.new(token, active_opts)
         
     | 
| 
       433 
     | 
    
         
            -
                when :nonspacing_mark;        node << Mark::Nonspacing.new(token, active_opts)
         
     | 
| 
       434 
     | 
    
         
            -
                when :spacing_mark;           node << Mark::Spacing.new(token, active_opts)
         
     | 
| 
       435 
     | 
    
         
            -
                when :enclosing_mark;         node << Mark::Enclosing.new(token, active_opts)
         
     | 
| 
       436 
     | 
    
         
            -
             
     | 
| 
       437 
     | 
    
         
            -
                when :number;                 node << Number::Any.new(token, active_opts)
         
     | 
| 
       438 
     | 
    
         
            -
                when :decimal_number;         node << Number::Decimal.new(token, active_opts)
         
     | 
| 
       439 
     | 
    
         
            -
                when :letter_number;          node << Number::Letter.new(token, active_opts)
         
     | 
| 
       440 
     | 
    
         
            -
                when :other_number;           node << Number::Other.new(token, active_opts)
         
     | 
| 
       441 
     | 
    
         
            -
             
     | 
| 
       442 
     | 
    
         
            -
                when :punctuation;            node << Punctuation::Any.new(token, active_opts)
         
     | 
| 
       443 
     | 
    
         
            -
                when :connector_punctuation;  node << Punctuation::Connector.new(token, active_opts)
         
     | 
| 
       444 
     | 
    
         
            -
                when :dash_punctuation;       node << Punctuation::Dash.new(token, active_opts)
         
     | 
| 
       445 
     | 
    
         
            -
                when :open_punctuation;       node << Punctuation::Open.new(token, active_opts)
         
     | 
| 
       446 
     | 
    
         
            -
                when :close_punctuation;      node << Punctuation::Close.new(token, active_opts)
         
     | 
| 
       447 
     | 
    
         
            -
                when :initial_punctuation;    node << Punctuation::Initial.new(token, active_opts)
         
     | 
| 
       448 
     | 
    
         
            -
                when :final_punctuation;      node << Punctuation::Final.new(token, active_opts)
         
     | 
| 
       449 
     | 
    
         
            -
                when :other_punctuation;      node << Punctuation::Other.new(token, active_opts)
         
     | 
| 
       450 
     | 
    
         
            -
             
     | 
| 
       451 
     | 
    
         
            -
                when :separator;              node << Separator::Any.new(token, active_opts)
         
     | 
| 
       452 
     | 
    
         
            -
                when :space_separator;        node << Separator::Space.new(token, active_opts)
         
     | 
| 
       453 
     | 
    
         
            -
                when :line_separator;         node << Separator::Line.new(token, active_opts)
         
     | 
| 
       454 
     | 
    
         
            -
                when :paragraph_separator;    node << Separator::Paragraph.new(token, active_opts)
         
     | 
| 
       455 
     | 
    
         
            -
             
     | 
| 
       456 
     | 
    
         
            -
                when :symbol;                 node << Symbol::Any.new(token, active_opts)
         
     | 
| 
       457 
     | 
    
         
            -
                when :math_symbol;            node << Symbol::Math.new(token, active_opts)
         
     | 
| 
       458 
     | 
    
         
            -
                when :currency_symbol;        node << Symbol::Currency.new(token, active_opts)
         
     | 
| 
       459 
     | 
    
         
            -
                when :modifier_symbol;        node << Symbol::Modifier.new(token, active_opts)
         
     | 
| 
       460 
     | 
    
         
            -
                when :other_symbol;           node << Symbol::Other.new(token, active_opts)
         
     | 
| 
       461 
     | 
    
         
            -
             
     | 
| 
       462 
     | 
    
         
            -
                when :other;                  node << Codepoint::Any.new(token, active_opts)
         
     | 
| 
       463 
     | 
    
         
            -
                when :control;                node << Codepoint::Control.new(token, active_opts)
         
     | 
| 
       464 
     | 
    
         
            -
                when :format;                 node << Codepoint::Format.new(token, active_opts)
         
     | 
| 
       465 
     | 
    
         
            -
                when :surrogate;              node << Codepoint::Surrogate.new(token, active_opts)
         
     | 
| 
       466 
     | 
    
         
            -
                when :private_use;            node << Codepoint::PrivateUse.new(token, active_opts)
         
     | 
| 
       467 
     | 
    
         
            -
                when :unassigned;             node << Codepoint::Unassigned.new(token, active_opts)
         
     | 
| 
       468 
     | 
    
         
            -
             
     | 
| 
       469 
     | 
    
         
            -
                when *UPTokens::Age;          node << Age.new(token, active_opts)
         
     | 
| 
       470 
     | 
    
         
            -
                when *UPTokens::Derived;      node << Derived.new(token, active_opts)
         
     | 
| 
       471 
     | 
    
         
            -
                when *UPTokens::Emoji;        node << Emoji.new(token, active_opts)
         
     | 
| 
       472 
     | 
    
         
            -
                when *UPTokens::Script;       node << Script.new(token, active_opts)
         
     | 
| 
       473 
     | 
    
         
            -
                when *UPTokens::UnicodeBlock; node << Block.new(token, active_opts)
         
     | 
| 
      
 416 
     | 
    
         
            +
                when :newline;                node << UP::Newline.new(token, active_opts)
         
     | 
| 
      
 417 
     | 
    
         
            +
             
     | 
| 
      
 418 
     | 
    
         
            +
                when :any;                    node << UP::Any.new(token, active_opts)
         
     | 
| 
      
 419 
     | 
    
         
            +
                when :assigned;               node << UP::Assigned.new(token, active_opts)
         
     | 
| 
      
 420 
     | 
    
         
            +
             
     | 
| 
      
 421 
     | 
    
         
            +
                when :letter;                 node << UP::Letter::Any.new(token, active_opts)
         
     | 
| 
      
 422 
     | 
    
         
            +
                when :cased_letter;           node << UP::Letter::Cased.new(token, active_opts)
         
     | 
| 
      
 423 
     | 
    
         
            +
                when :uppercase_letter;       node << UP::Letter::Uppercase.new(token, active_opts)
         
     | 
| 
      
 424 
     | 
    
         
            +
                when :lowercase_letter;       node << UP::Letter::Lowercase.new(token, active_opts)
         
     | 
| 
      
 425 
     | 
    
         
            +
                when :titlecase_letter;       node << UP::Letter::Titlecase.new(token, active_opts)
         
     | 
| 
      
 426 
     | 
    
         
            +
                when :modifier_letter;        node << UP::Letter::Modifier.new(token, active_opts)
         
     | 
| 
      
 427 
     | 
    
         
            +
                when :other_letter;           node << UP::Letter::Other.new(token, active_opts)
         
     | 
| 
      
 428 
     | 
    
         
            +
             
     | 
| 
      
 429 
     | 
    
         
            +
                when :mark;                   node << UP::Mark::Any.new(token, active_opts)
         
     | 
| 
      
 430 
     | 
    
         
            +
                when :combining_mark;         node << UP::Mark::Combining.new(token, active_opts)
         
     | 
| 
      
 431 
     | 
    
         
            +
                when :nonspacing_mark;        node << UP::Mark::Nonspacing.new(token, active_opts)
         
     | 
| 
      
 432 
     | 
    
         
            +
                when :spacing_mark;           node << UP::Mark::Spacing.new(token, active_opts)
         
     | 
| 
      
 433 
     | 
    
         
            +
                when :enclosing_mark;         node << UP::Mark::Enclosing.new(token, active_opts)
         
     | 
| 
      
 434 
     | 
    
         
            +
             
     | 
| 
      
 435 
     | 
    
         
            +
                when :number;                 node << UP::Number::Any.new(token, active_opts)
         
     | 
| 
      
 436 
     | 
    
         
            +
                when :decimal_number;         node << UP::Number::Decimal.new(token, active_opts)
         
     | 
| 
      
 437 
     | 
    
         
            +
                when :letter_number;          node << UP::Number::Letter.new(token, active_opts)
         
     | 
| 
      
 438 
     | 
    
         
            +
                when :other_number;           node << UP::Number::Other.new(token, active_opts)
         
     | 
| 
      
 439 
     | 
    
         
            +
             
     | 
| 
      
 440 
     | 
    
         
            +
                when :punctuation;            node << UP::Punctuation::Any.new(token, active_opts)
         
     | 
| 
      
 441 
     | 
    
         
            +
                when :connector_punctuation;  node << UP::Punctuation::Connector.new(token, active_opts)
         
     | 
| 
      
 442 
     | 
    
         
            +
                when :dash_punctuation;       node << UP::Punctuation::Dash.new(token, active_opts)
         
     | 
| 
      
 443 
     | 
    
         
            +
                when :open_punctuation;       node << UP::Punctuation::Open.new(token, active_opts)
         
     | 
| 
      
 444 
     | 
    
         
            +
                when :close_punctuation;      node << UP::Punctuation::Close.new(token, active_opts)
         
     | 
| 
      
 445 
     | 
    
         
            +
                when :initial_punctuation;    node << UP::Punctuation::Initial.new(token, active_opts)
         
     | 
| 
      
 446 
     | 
    
         
            +
                when :final_punctuation;      node << UP::Punctuation::Final.new(token, active_opts)
         
     | 
| 
      
 447 
     | 
    
         
            +
                when :other_punctuation;      node << UP::Punctuation::Other.new(token, active_opts)
         
     | 
| 
      
 448 
     | 
    
         
            +
             
     | 
| 
      
 449 
     | 
    
         
            +
                when :separator;              node << UP::Separator::Any.new(token, active_opts)
         
     | 
| 
      
 450 
     | 
    
         
            +
                when :space_separator;        node << UP::Separator::Space.new(token, active_opts)
         
     | 
| 
      
 451 
     | 
    
         
            +
                when :line_separator;         node << UP::Separator::Line.new(token, active_opts)
         
     | 
| 
      
 452 
     | 
    
         
            +
                when :paragraph_separator;    node << UP::Separator::Paragraph.new(token, active_opts)
         
     | 
| 
      
 453 
     | 
    
         
            +
             
     | 
| 
      
 454 
     | 
    
         
            +
                when :symbol;                 node << UP::Symbol::Any.new(token, active_opts)
         
     | 
| 
      
 455 
     | 
    
         
            +
                when :math_symbol;            node << UP::Symbol::Math.new(token, active_opts)
         
     | 
| 
      
 456 
     | 
    
         
            +
                when :currency_symbol;        node << UP::Symbol::Currency.new(token, active_opts)
         
     | 
| 
      
 457 
     | 
    
         
            +
                when :modifier_symbol;        node << UP::Symbol::Modifier.new(token, active_opts)
         
     | 
| 
      
 458 
     | 
    
         
            +
                when :other_symbol;           node << UP::Symbol::Other.new(token, active_opts)
         
     | 
| 
      
 459 
     | 
    
         
            +
             
     | 
| 
      
 460 
     | 
    
         
            +
                when :other;                  node << UP::Codepoint::Any.new(token, active_opts)
         
     | 
| 
      
 461 
     | 
    
         
            +
                when :control;                node << UP::Codepoint::Control.new(token, active_opts)
         
     | 
| 
      
 462 
     | 
    
         
            +
                when :format;                 node << UP::Codepoint::Format.new(token, active_opts)
         
     | 
| 
      
 463 
     | 
    
         
            +
                when :surrogate;              node << UP::Codepoint::Surrogate.new(token, active_opts)
         
     | 
| 
      
 464 
     | 
    
         
            +
                when :private_use;            node << UP::Codepoint::PrivateUse.new(token, active_opts)
         
     | 
| 
      
 465 
     | 
    
         
            +
                when :unassigned;             node << UP::Codepoint::Unassigned.new(token, active_opts)
         
     | 
| 
      
 466 
     | 
    
         
            +
             
     | 
| 
      
 467 
     | 
    
         
            +
                when *UPTokens::Age;          node << UP::Age.new(token, active_opts)
         
     | 
| 
      
 468 
     | 
    
         
            +
                when *UPTokens::Derived;      node << UP::Derived.new(token, active_opts)
         
     | 
| 
      
 469 
     | 
    
         
            +
                when *UPTokens::Emoji;        node << UP::Emoji.new(token, active_opts)
         
     | 
| 
      
 470 
     | 
    
         
            +
                when *UPTokens::Script;       node << UP::Script.new(token, active_opts)
         
     | 
| 
      
 471 
     | 
    
         
            +
                when *UPTokens::UnicodeBlock; node << UP::Block.new(token, active_opts)
         
     | 
| 
       474 
472 
     | 
    
         | 
| 
       475 
473 
     | 
    
         
             
                else
         
     | 
| 
       476 
474 
     | 
    
         
             
                  raise UnknownTokenError.new('UnicodeProperty', token)
         
     | 
| 
         @@ -478,8 +476,7 @@ class Regexp::Parser 
     | 
|
| 
       478 
476 
     | 
    
         
             
              end
         
     | 
| 
       479 
477 
     | 
    
         | 
| 
       480 
478 
     | 
    
         
             
              def quantifier(token)
         
     | 
| 
       481 
     | 
    
         
            -
                target_node = node. 
     | 
| 
       482 
     | 
    
         
            -
                target_node or raise ParserError, "No valid target found for '#{token.text}'"
         
     | 
| 
      
 479 
     | 
    
         
            +
                target_node = node.extract_quantifier_target(token.text)
         
     | 
| 
       483 
480 
     | 
    
         | 
| 
       484 
481 
     | 
    
         
             
                # in case of chained quantifiers, wrap target in an implicit passive group
         
     | 
| 
       485 
482 
     | 
    
         
             
                # description of the problem: https://github.com/ammar/regexp_parser/issues/3
         
     | 
| 
         @@ -527,6 +524,8 @@ class Regexp::Parser 
     | 
|
| 
       527 
524 
     | 
    
         
             
              end
         
     | 
| 
       528 
525 
     | 
    
         | 
| 
       529 
526 
     | 
    
         
             
              def open_set(token)
         
     | 
| 
      
 527 
     | 
    
         
            +
                # TODO: this and Quantifier are the only cases where Expression#token
         
     | 
| 
      
 528 
     | 
    
         
            +
                # does not match the scanner/lexer output. Fix in v3.0.0.
         
     | 
| 
       530 
529 
     | 
    
         
             
                token.token = :character
         
     | 
| 
       531 
530 
     | 
    
         
             
                nest(CharacterSet.new(token, active_opts))
         
     | 
| 
       532 
531 
     | 
    
         
             
              end
         
     | 
| 
         @@ -541,7 +540,7 @@ class Regexp::Parser 
     | 
|
| 
       541 
540 
     | 
    
         | 
| 
       542 
541 
     | 
    
         
             
              def range(token)
         
     | 
| 
       543 
542 
     | 
    
         
             
                exp = CharacterSet::Range.new(token, active_opts)
         
     | 
| 
       544 
     | 
    
         
            -
                scope = node.last. 
     | 
| 
      
 543 
     | 
    
         
            +
                scope = node.last.instance_of?(CharacterSet::IntersectedSequence) ? node.last : node
         
     | 
| 
       545 
544 
     | 
    
         
             
                exp << scope.expressions.pop
         
     | 
| 
       546 
545 
     | 
    
         
             
                nest(exp)
         
     | 
| 
       547 
546 
     | 
    
         
             
              end
         
     | 
| 
         @@ -568,7 +567,7 @@ class Regexp::Parser 
     | 
|
| 
       568 
567 
     | 
    
         
             
              end
         
     | 
| 
       569 
568 
     | 
    
         | 
| 
       570 
569 
     | 
    
         
             
              def close_completed_character_set_range
         
     | 
| 
       571 
     | 
    
         
            -
                decrease_nesting if node. 
     | 
| 
      
 570 
     | 
    
         
            +
                decrease_nesting if node.instance_of?(CharacterSet::Range) && node.complete?
         
     | 
| 
       572 
571 
     | 
    
         
             
              end
         
     | 
| 
       573 
572 
     | 
    
         | 
| 
       574 
573 
     | 
    
         
             
              def active_opts
         
     | 
| 
         @@ -579,17 +578,18 @@ class Regexp::Parser 
     | 
|
| 
       579 
578 
     | 
    
         
             
              # an instance of Backreference::Number, its #referenced_expression is set to
         
     | 
| 
       580 
579 
     | 
    
         
             
              # the instance of Group::Capture that it refers to via its number.
         
     | 
| 
       581 
580 
     | 
    
         
             
              def assign_referenced_expressions
         
     | 
| 
       582 
     | 
    
         
            -
                # find all referencable expressions
         
     | 
| 
      
 581 
     | 
    
         
            +
                # find all referencable and refering expressions
         
     | 
| 
       583 
582 
     | 
    
         
             
                targets = { 0 => root }
         
     | 
| 
      
 583 
     | 
    
         
            +
                referrers = []
         
     | 
| 
       584 
584 
     | 
    
         
             
                root.each_expression do |exp|
         
     | 
| 
       585 
585 
     | 
    
         
             
                  exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
         
     | 
| 
      
 586 
     | 
    
         
            +
                  referrers << exp if exp.referential?
         
     | 
| 
       586 
587 
     | 
    
         
             
                end
         
     | 
| 
       587 
     | 
    
         
            -
                # assign  
     | 
| 
       588 
     | 
    
         
            -
                 
     | 
| 
       589 
     | 
    
         
            -
             
     | 
| 
       590 
     | 
    
         
            -
             
     | 
| 
      
 588 
     | 
    
         
            +
                # assign reference expression to refering expressions
         
     | 
| 
      
 589 
     | 
    
         
            +
                # (in a second iteration because there might be forward references)
         
     | 
| 
      
 590 
     | 
    
         
            +
                referrers.each do |exp|
         
     | 
| 
       591 
591 
     | 
    
         
             
                  exp.referenced_expression = targets[exp.reference] ||
         
     | 
| 
       592 
     | 
    
         
            -
                    raise(ParserError, "Invalid reference 
     | 
| 
      
 592 
     | 
    
         
            +
                    raise(ParserError, "Invalid reference #{exp.reference} at pos #{exp.ts}")
         
     | 
| 
       593 
593 
     | 
    
         
             
                end
         
     | 
| 
       594 
594 
     | 
    
         
             
              end
         
     | 
| 
       595 
595 
     | 
    
         
             
            end # module Regexp::Parser
         
     | 
| 
         @@ -0,0 +1,63 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            class Regexp::Scanner
         
     | 
| 
      
 2 
     | 
    
         
            +
              # Base for all scanner validation errors
         
     | 
| 
      
 3 
     | 
    
         
            +
              class ValidationError < ScannerError
         
     | 
| 
      
 4 
     | 
    
         
            +
                # Centralizes and unifies the handling of validation related errors.
         
     | 
| 
      
 5 
     | 
    
         
            +
                def self.for(type, problem, reason = nil)
         
     | 
| 
      
 6 
     | 
    
         
            +
                  types.fetch(type).new(problem, reason)
         
     | 
| 
      
 7 
     | 
    
         
            +
                end
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                def self.types
         
     | 
| 
      
 10 
     | 
    
         
            +
                  @types ||= {
         
     | 
| 
      
 11 
     | 
    
         
            +
                    backref:      InvalidBackrefError,
         
     | 
| 
      
 12 
     | 
    
         
            +
                    group:        InvalidGroupError,
         
     | 
| 
      
 13 
     | 
    
         
            +
                    group_option: InvalidGroupOption,
         
     | 
| 
      
 14 
     | 
    
         
            +
                    posix_class:  UnknownPosixClassError,
         
     | 
| 
      
 15 
     | 
    
         
            +
                    property:     UnknownUnicodePropertyError,
         
     | 
| 
      
 16 
     | 
    
         
            +
                    sequence:     InvalidSequenceError,
         
     | 
| 
      
 17 
     | 
    
         
            +
                  }
         
     | 
| 
      
 18 
     | 
    
         
            +
                end
         
     | 
| 
      
 19 
     | 
    
         
            +
              end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
              # Invalid sequence format. Used for escape sequences, mainly.
         
     | 
| 
      
 22 
     | 
    
         
            +
              class InvalidSequenceError < ValidationError
         
     | 
| 
      
 23 
     | 
    
         
            +
                def initialize(what = 'sequence', where = '')
         
     | 
| 
      
 24 
     | 
    
         
            +
                  super "Invalid #{what} at #{where}"
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
              end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
              # Invalid group. Used for named groups.
         
     | 
| 
      
 29 
     | 
    
         
            +
              class InvalidGroupError < ValidationError
         
     | 
| 
      
 30 
     | 
    
         
            +
                def initialize(what, reason)
         
     | 
| 
      
 31 
     | 
    
         
            +
                  super "Invalid #{what}, #{reason}."
         
     | 
| 
      
 32 
     | 
    
         
            +
                end
         
     | 
| 
      
 33 
     | 
    
         
            +
              end
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
              # Invalid groupOption. Used for inline options.
         
     | 
| 
      
 36 
     | 
    
         
            +
              # TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
         
     | 
| 
      
 37 
     | 
    
         
            +
              class InvalidGroupOption < ValidationError
         
     | 
| 
      
 38 
     | 
    
         
            +
                def initialize(option, text)
         
     | 
| 
      
 39 
     | 
    
         
            +
                  super "Invalid group option #{option} in #{text}"
         
     | 
| 
      
 40 
     | 
    
         
            +
                end
         
     | 
| 
      
 41 
     | 
    
         
            +
              end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
              # Invalid back reference. Used for name a number refs/calls.
         
     | 
| 
      
 44 
     | 
    
         
            +
              class InvalidBackrefError < ValidationError
         
     | 
| 
      
 45 
     | 
    
         
            +
                def initialize(what, reason)
         
     | 
| 
      
 46 
     | 
    
         
            +
                  super "Invalid back reference #{what}, #{reason}"
         
     | 
| 
      
 47 
     | 
    
         
            +
                end
         
     | 
| 
      
 48 
     | 
    
         
            +
              end
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
              # The property name was not recognized by the scanner.
         
     | 
| 
      
 51 
     | 
    
         
            +
              class UnknownUnicodePropertyError < ValidationError
         
     | 
| 
      
 52 
     | 
    
         
            +
                def initialize(name, _)
         
     | 
| 
      
 53 
     | 
    
         
            +
                  super "Unknown unicode character property name #{name}"
         
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
      
 55 
     | 
    
         
            +
              end
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
              # The POSIX class name was not recognized by the scanner.
         
     | 
| 
      
 58 
     | 
    
         
            +
              class UnknownPosixClassError < ValidationError
         
     | 
| 
      
 59 
     | 
    
         
            +
                def initialize(text, _)
         
     | 
| 
      
 60 
     | 
    
         
            +
                  super "Unknown POSIX class #{text}"
         
     | 
| 
      
 61 
     | 
    
         
            +
                end
         
     | 
| 
      
 62 
     | 
    
         
            +
              end
         
     | 
| 
      
 63 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,89 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # mapping for simple cases with a 1:1 relation between text and token
         
     | 
| 
      
 2 
     | 
    
         
            +
            class Regexp::Scanner
         
     | 
| 
      
 3 
     | 
    
         
            +
              MAPPING = {
         
     | 
| 
      
 4 
     | 
    
         
            +
                anchor: {
         
     | 
| 
      
 5 
     | 
    
         
            +
                  '\A' => :bos,
         
     | 
| 
      
 6 
     | 
    
         
            +
                  '\B' => :nonword_boundary,
         
     | 
| 
      
 7 
     | 
    
         
            +
                  '\G' => :match_start,
         
     | 
| 
      
 8 
     | 
    
         
            +
                  '\Z' => :eos_ob_eol,
         
     | 
| 
      
 9 
     | 
    
         
            +
                  '\b' => :word_boundary,
         
     | 
| 
      
 10 
     | 
    
         
            +
                  '\z' => :eos,
         
     | 
| 
      
 11 
     | 
    
         
            +
                },
         
     | 
| 
      
 12 
     | 
    
         
            +
                assertion: {
         
     | 
| 
      
 13 
     | 
    
         
            +
                  '(?='  => :lookahead,
         
     | 
| 
      
 14 
     | 
    
         
            +
                  '(?!'  => :nlookahead,
         
     | 
| 
      
 15 
     | 
    
         
            +
                  '(?<=' => :lookbehind,
         
     | 
| 
      
 16 
     | 
    
         
            +
                  '(?<!' => :nlookbehind,
         
     | 
| 
      
 17 
     | 
    
         
            +
                },
         
     | 
| 
      
 18 
     | 
    
         
            +
                conditional: {
         
     | 
| 
      
 19 
     | 
    
         
            +
                  '(?' => :open,
         
     | 
| 
      
 20 
     | 
    
         
            +
                },
         
     | 
| 
      
 21 
     | 
    
         
            +
                escape: {
         
     | 
| 
      
 22 
     | 
    
         
            +
                  '\.'   => :dot,
         
     | 
| 
      
 23 
     | 
    
         
            +
                  '\|'   => :alternation,
         
     | 
| 
      
 24 
     | 
    
         
            +
                  '\^'   => :bol,
         
     | 
| 
      
 25 
     | 
    
         
            +
                  '\$'   => :eol,
         
     | 
| 
      
 26 
     | 
    
         
            +
                  '\?'   => :zero_or_one,
         
     | 
| 
      
 27 
     | 
    
         
            +
                  '\*'   => :zero_or_more,
         
     | 
| 
      
 28 
     | 
    
         
            +
                  '\+'   => :one_or_more,
         
     | 
| 
      
 29 
     | 
    
         
            +
                  '\('   => :group_open,
         
     | 
| 
      
 30 
     | 
    
         
            +
                  '\)'   => :group_close,
         
     | 
| 
      
 31 
     | 
    
         
            +
                  '\{'   => :interval_open,
         
     | 
| 
      
 32 
     | 
    
         
            +
                  '\}'   => :interval_close,
         
     | 
| 
      
 33 
     | 
    
         
            +
                  '\['   => :set_open,
         
     | 
| 
      
 34 
     | 
    
         
            +
                  '\]'   => :set_close,
         
     | 
| 
      
 35 
     | 
    
         
            +
                  '\\\\' => :backslash,
         
     | 
| 
      
 36 
     | 
    
         
            +
                  '\a'   => :bell,
         
     | 
| 
      
 37 
     | 
    
         
            +
                  '\b'   => :backspace,
         
     | 
| 
      
 38 
     | 
    
         
            +
                  '\e'   => :escape,
         
     | 
| 
      
 39 
     | 
    
         
            +
                  '\f'   => :form_feed,
         
     | 
| 
      
 40 
     | 
    
         
            +
                  '\n'   => :newline,
         
     | 
| 
      
 41 
     | 
    
         
            +
                  '\r'   => :carriage,
         
     | 
| 
      
 42 
     | 
    
         
            +
                  '\t'   => :tab,
         
     | 
| 
      
 43 
     | 
    
         
            +
                  '\v'   => :vertical_tab,
         
     | 
| 
      
 44 
     | 
    
         
            +
                },
         
     | 
| 
      
 45 
     | 
    
         
            +
                group: {
         
     | 
| 
      
 46 
     | 
    
         
            +
                  '(?:' => :passive,
         
     | 
| 
      
 47 
     | 
    
         
            +
                  '(?>' => :atomic,
         
     | 
| 
      
 48 
     | 
    
         
            +
                  '(?~' => :absence,
         
     | 
| 
      
 49 
     | 
    
         
            +
                },
         
     | 
| 
      
 50 
     | 
    
         
            +
                meta: {
         
     | 
| 
      
 51 
     | 
    
         
            +
                  '|' => :alternation,
         
     | 
| 
      
 52 
     | 
    
         
            +
                  '.' => :dot,
         
     | 
| 
      
 53 
     | 
    
         
            +
                },
         
     | 
| 
      
 54 
     | 
    
         
            +
                quantifier: {
         
     | 
| 
      
 55 
     | 
    
         
            +
                  '?'  => :zero_or_one,
         
     | 
| 
      
 56 
     | 
    
         
            +
                  '??' => :zero_or_one_reluctant,
         
     | 
| 
      
 57 
     | 
    
         
            +
                  '?+' => :zero_or_one_possessive,
         
     | 
| 
      
 58 
     | 
    
         
            +
                  '*'  => :zero_or_more,
         
     | 
| 
      
 59 
     | 
    
         
            +
                  '*?' => :zero_or_more_reluctant,
         
     | 
| 
      
 60 
     | 
    
         
            +
                  '*+' => :zero_or_more_possessive,
         
     | 
| 
      
 61 
     | 
    
         
            +
                  '+'  => :one_or_more,
         
     | 
| 
      
 62 
     | 
    
         
            +
                  '+?' => :one_or_more_reluctant,
         
     | 
| 
      
 63 
     | 
    
         
            +
                  '++' => :one_or_more_possessive,
         
     | 
| 
      
 64 
     | 
    
         
            +
                },
         
     | 
| 
      
 65 
     | 
    
         
            +
                set: {
         
     | 
| 
      
 66 
     | 
    
         
            +
                  '['  => :character,
         
     | 
| 
      
 67 
     | 
    
         
            +
                  '-'  => :range,
         
     | 
| 
      
 68 
     | 
    
         
            +
                  '&&' => :intersection,
         
     | 
| 
      
 69 
     | 
    
         
            +
                },
         
     | 
| 
      
 70 
     | 
    
         
            +
                type: {
         
     | 
| 
      
 71 
     | 
    
         
            +
                  '\d' => :digit,
         
     | 
| 
      
 72 
     | 
    
         
            +
                  '\D' => :nondigit,
         
     | 
| 
      
 73 
     | 
    
         
            +
                  '\h' => :hex,
         
     | 
| 
      
 74 
     | 
    
         
            +
                  '\H' => :nonhex,
         
     | 
| 
      
 75 
     | 
    
         
            +
                  '\s' => :space,
         
     | 
| 
      
 76 
     | 
    
         
            +
                  '\S' => :nonspace,
         
     | 
| 
      
 77 
     | 
    
         
            +
                  '\w' => :word,
         
     | 
| 
      
 78 
     | 
    
         
            +
                  '\W' => :nonword,
         
     | 
| 
      
 79 
     | 
    
         
            +
                  '\R' => :linebreak,
         
     | 
| 
      
 80 
     | 
    
         
            +
                  '\X' => :xgrapheme,
         
     | 
| 
      
 81 
     | 
    
         
            +
                }
         
     | 
| 
      
 82 
     | 
    
         
            +
              }
         
     | 
| 
      
 83 
     | 
    
         
            +
              ANCHOR_MAPPING     = MAPPING[:anchor]
         
     | 
| 
      
 84 
     | 
    
         
            +
              ASSERTION_MAPPING  = MAPPING[:assertion]
         
     | 
| 
      
 85 
     | 
    
         
            +
              ESCAPE_MAPPING     = MAPPING[:escape]
         
     | 
| 
      
 86 
     | 
    
         
            +
              GROUP_MAPPING      = MAPPING[:group]
         
     | 
| 
      
 87 
     | 
    
         
            +
              QUANTIFIER_MAPPING = MAPPING[:quantifier]
         
     | 
| 
      
 88 
     | 
    
         
            +
              TYPE_MAPPING       = MAPPING[:type]
         
     | 
| 
      
 89 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -17,10 +17,10 @@ 
     | 
|
| 
       17 
17 
     | 
    
         
             
                  text = copy(data, ts-1, te)
         
     | 
| 
       18 
18 
     | 
    
         
             
                  type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property
         
     | 
| 
       19 
19 
     | 
    
         | 
| 
       20 
     | 
    
         
            -
                  name =  
     | 
| 
      
 20 
     | 
    
         
            +
                  name = text[3..-2].gsub(/[\^\s_\-]/, '').downcase
         
     | 
| 
       21 
21 
     | 
    
         | 
| 
       22 
22 
     | 
    
         
             
                  token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
         
     | 
| 
       23 
     | 
    
         
            -
                   
     | 
| 
      
 23 
     | 
    
         
            +
                  raise ValidationError.for(:property, name) unless token
         
     | 
| 
       24 
24 
     | 
    
         | 
| 
       25 
25 
     | 
    
         
             
                  self.emit(type, token.to_sym, text)
         
     | 
| 
       26 
26 
     | 
    
         |