regexp_parser 2.1.1 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +94 -6
- data/Gemfile +2 -1
- data/LICENSE +1 -1
- data/README.md +40 -30
- data/Rakefile +6 -70
- data/lib/regexp_parser/error.rb +1 -1
- data/lib/regexp_parser/expression/base.rb +75 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +1 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -2
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +2 -2
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
- data/lib/regexp_parser/expression/classes/group.rb +6 -6
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/root.rb +3 -6
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -2
- data/lib/regexp_parser/expression/methods/construct.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +10 -1
- data/lib/regexp_parser/expression/quantifier.rb +41 -23
- data/lib/regexp_parser/expression/sequence.rb +9 -24
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
- data/lib/regexp_parser/expression/shared.rb +85 -0
- data/lib/regexp_parser/expression/subexpression.rb +11 -8
- data/lib/regexp_parser/expression.rb +10 -132
- data/lib/regexp_parser/lexer.rb +8 -6
- data/lib/regexp_parser/parser.rb +21 -72
- data/lib/regexp_parser/scanner/properties/long.csv +622 -0
- data/lib/regexp_parser/scanner/properties/short.csv +246 -0
- data/lib/regexp_parser/scanner/property.rl +1 -1
- data/lib/regexp_parser/scanner/scanner.rl +48 -35
- data/lib/regexp_parser/scanner.rb +735 -801
- data/lib/regexp_parser/syntax/any.rb +2 -7
- data/lib/regexp_parser/syntax/base.rb +91 -66
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +717 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +20 -22
- metadata +37 -166
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -104
- data/spec/expression/clone_spec.rb +0 -152
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -108
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/delimiters_spec.rb +0 -68
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -64
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -60
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/options_spec.rb +0 -28
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -68
- data/spec/parser/refcalls_spec.rb +0 -117
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/delimiters_spec.rb +0 -52
- data/spec/scanner/errors_spec.rb +0 -67
- data/spec/scanner/escapes_spec.rb +0 -64
- data/spec/scanner/free_space_spec.rb +0 -165
- data/spec/scanner/groups_spec.rb +0 -61
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -39
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/options_spec.rb +0 -36
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -25
- data/spec/scanner/refcalls_spec.rb +0 -55
- data/spec/scanner/sets_spec.rb +0 -151
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -16
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
| @@ -1,12 +1,9 @@ | |
| 1 1 | 
             
            module Regexp::Expression
         | 
| 2 | 
            -
             | 
| 3 2 | 
             
              class Root < Regexp::Expression::Subexpression
         | 
| 4 3 | 
             
                def self.build(options = {})
         | 
| 5 | 
            -
                   | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
                def self.build_token
         | 
| 9 | 
            -
                  Regexp::Token.new(:expression, :root, '', 0)
         | 
| 4 | 
            +
                  warn "`#{self.class}.build(options)` is deprecated and will raise in "\
         | 
| 5 | 
            +
                       "regexp_parser v3.0.0. Please use `.construct(options: options)`."
         | 
| 6 | 
            +
                  construct(options: options)
         | 
| 10 7 | 
             
                end
         | 
| 11 8 | 
             
              end
         | 
| 12 9 | 
             
            end
         | 
| @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            module Regexp::Expression
         | 
| 2 | 
            -
             | 
| 2 | 
            +
              # TODO: unify name with token :property, on way or the other, in v3.0.0
         | 
| 3 3 | 
             
              module UnicodeProperty
         | 
| 4 4 | 
             
                class Base < Regexp::Expression::Base
         | 
| 5 5 | 
             
                  def negative?
         | 
| @@ -116,5 +116,4 @@ module Regexp::Expression | |
| 116 116 | 
             
                class Script  < UnicodeProperty::Base; end
         | 
| 117 117 | 
             
                class Block   < UnicodeProperty::Base; end
         | 
| 118 118 | 
             
              end
         | 
| 119 | 
            -
             | 
| 120 119 | 
             
            end # module Regexp::Expression
         | 
| @@ -0,0 +1,43 @@ | |
| 1 | 
            +
            module Regexp::Expression
         | 
| 2 | 
            +
              module Shared
         | 
| 3 | 
            +
                module ClassMethods
         | 
| 4 | 
            +
                  # Convenience method to init a valid Expression without a Regexp::Token
         | 
| 5 | 
            +
                  def construct(params = {})
         | 
| 6 | 
            +
                    attrs = construct_defaults.merge(params)
         | 
| 7 | 
            +
                    options = attrs.delete(:options)
         | 
| 8 | 
            +
                    token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
         | 
| 9 | 
            +
                    token = Regexp::Token.new(*token_args)
         | 
| 10 | 
            +
                    raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                    new(token, options)
         | 
| 13 | 
            +
                  end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  def construct_defaults
         | 
| 16 | 
            +
                    if self == Root
         | 
| 17 | 
            +
                      { type: :expression, token: :root, ts: 0 }
         | 
| 18 | 
            +
                    elsif self < Sequence
         | 
| 19 | 
            +
                      { type: :expression, token: :sequence }
         | 
| 20 | 
            +
                    else
         | 
| 21 | 
            +
                      { type: token_class::Type }
         | 
| 22 | 
            +
                    end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
         | 
| 23 | 
            +
                  end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                  def token_class
         | 
| 26 | 
            +
                    if self == Root || self < Sequence
         | 
| 27 | 
            +
                      nil # no token class because these objects are Parser-generated
         | 
| 28 | 
            +
                    # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
         | 
| 29 | 
            +
                    elsif self == Alternation || self == CharacterType::Any
         | 
| 30 | 
            +
                      Regexp::Syntax::Token::Meta
         | 
| 31 | 
            +
                    elsif self <= EscapeSequence::Base
         | 
| 32 | 
            +
                      Regexp::Syntax::Token::Escape
         | 
| 33 | 
            +
                    else
         | 
| 34 | 
            +
                      Regexp::Syntax::Token.const_get(name.split('::')[2])
         | 
| 35 | 
            +
                    end
         | 
| 36 | 
            +
                  end
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                def token_class
         | 
| 40 | 
            +
                  self.class.token_class
         | 
| 41 | 
            +
                end
         | 
| 42 | 
            +
              end
         | 
| 43 | 
            +
            end
         | 
| @@ -112,7 +112,7 @@ module Regexp::Expression | |
| 112 112 | 
             
                end
         | 
| 113 113 |  | 
| 114 114 | 
             
                def inner_match_length
         | 
| 115 | 
            -
                  dummy = Regexp::Expression::Root. | 
| 115 | 
            +
                  dummy = Regexp::Expression::Root.construct
         | 
| 116 116 | 
             
                  dummy.expressions = expressions.map(&:clone)
         | 
| 117 117 | 
             
                  dummy.quantifier = quantifier && quantifier.clone
         | 
| 118 118 | 
             
                  dummy.match_length
         | 
| @@ -43,7 +43,7 @@ module Regexp::Expression | |
| 43 43 |  | 
| 44 44 | 
             
                  # Order is important! Fields that use other fields in their
         | 
| 45 45 | 
             
                  # definition must appear before the fields they use.
         | 
| 46 | 
            -
                  part_keys = %w | 
| 46 | 
            +
                  part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
         | 
| 47 47 | 
             
                  part.keys.each {|k| part[k] = "<?#{k}?>"}
         | 
| 48 48 |  | 
| 49 49 | 
             
                  part['>'] = print_level ? ('  ' * (print_level + indent_offset)) : ''
         | 
| @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            module Regexp::Expression
         | 
| 2 | 
            -
               | 
| 2 | 
            +
              module Shared
         | 
| 3 3 |  | 
| 4 4 | 
             
                # Test if this expression has the given test_type, which can be either
         | 
| 5 5 | 
             
                # a symbol or an array of symbols to check against the expression's type.
         | 
| @@ -93,5 +93,14 @@ module Regexp::Expression | |
| 93 93 | 
             
                          "Array, Hash, or Symbol expected, #{scope.class.name} given"
         | 
| 94 94 | 
             
                  end
         | 
| 95 95 | 
             
                end
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                # Deep-compare two expressions for equality.
         | 
| 98 | 
            +
                def ==(other)
         | 
| 99 | 
            +
                  other.class == self.class &&
         | 
| 100 | 
            +
                    other.to_s == to_s &&
         | 
| 101 | 
            +
                    other.options == options
         | 
| 102 | 
            +
                end
         | 
| 103 | 
            +
                alias :=== :==
         | 
| 104 | 
            +
                alias :eql? :==
         | 
| 96 105 | 
             
              end
         | 
| 97 106 | 
             
            end
         | 
| @@ -1,26 +1,24 @@ | |
| 1 1 | 
             
            module Regexp::Expression
         | 
| 2 | 
            +
              # TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
         | 
| 3 | 
            +
              # call super in #initialize, but raise in #quantifier= and #quantify,
         | 
| 4 | 
            +
              # or introduce an Expression::Quantifiable intermediate class.
         | 
| 5 | 
            +
              # Or actually allow chaining as a more concise but tricky solution than PR#69.
         | 
| 2 6 | 
             
              class Quantifier
         | 
| 3 | 
            -
                 | 
| 7 | 
            +
                include Regexp::Expression::Shared
         | 
| 4 8 |  | 
| 5 | 
            -
                 | 
| 9 | 
            +
                MODES = %i[greedy possessive reluctant]
         | 
| 6 10 |  | 
| 7 | 
            -
                 | 
| 8 | 
            -
                  @token = token
         | 
| 9 | 
            -
                  @text  = text
         | 
| 10 | 
            -
                  @mode  = mode
         | 
| 11 | 
            -
                  @min   = min
         | 
| 12 | 
            -
                  @max   = max
         | 
| 13 | 
            -
                end
         | 
| 11 | 
            +
                attr_reader :min, :max, :mode
         | 
| 14 12 |  | 
| 15 | 
            -
                def  | 
| 16 | 
            -
                   | 
| 17 | 
            -
                  super
         | 
| 18 | 
            -
                end
         | 
| 13 | 
            +
                def initialize(*args)
         | 
| 14 | 
            +
                  deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
         | 
| 19 15 |  | 
| 20 | 
            -
             | 
| 21 | 
            -
                   | 
| 16 | 
            +
                  init_from_token_and_options(*args)
         | 
| 17 | 
            +
                  @mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
         | 
| 18 | 
            +
                  @min, @max = minmax
         | 
| 19 | 
            +
                  # TODO: remove in v3.0.0, stop removing parts of #token (?)
         | 
| 20 | 
            +
                  self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
         | 
| 22 21 | 
             
                end
         | 
| 23 | 
            -
                alias :to_str :to_s
         | 
| 24 22 |  | 
| 25 23 | 
             
                def to_h
         | 
| 26 24 | 
             
                  {
         | 
| @@ -41,13 +39,33 @@ module Regexp::Expression | |
| 41 39 | 
             
                end
         | 
| 42 40 | 
             
                alias :lazy? :reluctant?
         | 
| 43 41 |  | 
| 44 | 
            -
                 | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 42 | 
            +
                private
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                def deprecated_old_init(token, text, min, max, mode = :greedy)
         | 
| 45 | 
            +
                  warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
         | 
| 46 | 
            +
                       "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
         | 
| 47 | 
            +
                       "Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
         | 
| 48 | 
            +
                       "with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
         | 
| 49 | 
            +
                       "will be derived automatically.\n"\
         | 
| 50 | 
            +
                       "Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
         | 
| 51 | 
            +
                       "This is consistent with how Expression::Base instances are created. "
         | 
| 52 | 
            +
                  @token = token
         | 
| 53 | 
            +
                  @text  = text
         | 
| 54 | 
            +
                  @min   = min
         | 
| 55 | 
            +
                  @max   = max
         | 
| 56 | 
            +
                  @mode  = mode
         | 
| 57 | 
            +
                end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                def minmax
         | 
| 60 | 
            +
                  case token
         | 
| 61 | 
            +
                  when /zero_or_one/  then [0, 1]
         | 
| 62 | 
            +
                  when /zero_or_more/ then [0, -1]
         | 
| 63 | 
            +
                  when /one_or_more/  then [1, -1]
         | 
| 64 | 
            +
                  when :interval
         | 
| 65 | 
            +
                    int_min = text[/\{(\d*)/, 1]
         | 
| 66 | 
            +
                    int_max = text[/,?(\d*)\}/, 1]
         | 
| 67 | 
            +
                    [int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
         | 
| 68 | 
            +
                  end
         | 
| 50 69 | 
             
                end
         | 
| 51 | 
            -
                alias :eq :==
         | 
| 52 70 | 
             
              end
         | 
| 53 71 | 
             
            end
         | 
| @@ -1,5 +1,4 @@ | |
| 1 1 | 
             
            module Regexp::Expression
         | 
| 2 | 
            -
             | 
| 3 2 | 
             
              # A sequence of expressions. Differs from a Subexpressions by how it handles
         | 
| 4 3 | 
             
              # quantifiers, as it applies them to its last element instead of itself as
         | 
| 5 4 | 
             
              # a whole subexpression.
         | 
| @@ -8,31 +7,17 @@ module Regexp::Expression | |
| 8 7 | 
             
              # branches, and CharacterSet::Intersection intersected sequences.
         | 
| 9 8 | 
             
              class Sequence < Regexp::Expression::Subexpression
         | 
| 10 9 | 
             
                class << self
         | 
| 11 | 
            -
                  def add_to( | 
| 12 | 
            -
                    sequence =  | 
| 13 | 
            -
                       | 
| 14 | 
            -
                       | 
| 15 | 
            -
                      params[:conditional_level] ||  | 
| 10 | 
            +
                  def add_to(exp, params = {}, active_opts = {})
         | 
| 11 | 
            +
                    sequence = construct(
         | 
| 12 | 
            +
                      level:             exp.level,
         | 
| 13 | 
            +
                      set_level:         exp.set_level,
         | 
| 14 | 
            +
                      conditional_level: params[:conditional_level] || exp.conditional_level,
         | 
| 16 15 | 
             
                    )
         | 
| 17 | 
            -
                    sequence.nesting_level =  | 
| 16 | 
            +
                    sequence.nesting_level = exp.nesting_level + 1
         | 
| 18 17 | 
             
                    sequence.options = active_opts
         | 
| 19 | 
            -
                     | 
| 18 | 
            +
                    exp.expressions << sequence
         | 
| 20 19 | 
             
                    sequence
         | 
| 21 20 | 
             
                  end
         | 
| 22 | 
            -
             | 
| 23 | 
            -
                  def at_levels(level, set_level, conditional_level)
         | 
| 24 | 
            -
                    token = Regexp::Token.new(
         | 
| 25 | 
            -
                      :expression,
         | 
| 26 | 
            -
                      :sequence,
         | 
| 27 | 
            -
                      '',
         | 
| 28 | 
            -
                      nil, # ts
         | 
| 29 | 
            -
                      nil, # te
         | 
| 30 | 
            -
                      level,
         | 
| 31 | 
            -
                      set_level,
         | 
| 32 | 
            -
                      conditional_level
         | 
| 33 | 
            -
                    )
         | 
| 34 | 
            -
                    new(token)
         | 
| 35 | 
            -
                  end
         | 
| 36 21 | 
             
                end
         | 
| 37 22 |  | 
| 38 23 | 
             
                def starts_at
         | 
| @@ -40,12 +25,12 @@ module Regexp::Expression | |
| 40 25 | 
             
                end
         | 
| 41 26 | 
             
                alias :ts :starts_at
         | 
| 42 27 |  | 
| 43 | 
            -
                def quantify( | 
| 28 | 
            +
                def quantify(*args)
         | 
| 44 29 | 
             
                  target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
         | 
| 45 30 | 
             
                  target or raise Regexp::Parser::Error,
         | 
| 46 31 | 
             
                    "No valid target found for '#{text}' quantifier"
         | 
| 47 32 |  | 
| 48 | 
            -
                  target.quantify( | 
| 33 | 
            +
                  target.quantify(*args)
         | 
| 49 34 | 
             
                end
         | 
| 50 35 | 
             
              end
         | 
| 51 36 | 
             
            end
         | 
| @@ -0,0 +1,85 @@ | |
| 1 | 
            +
            module Regexp::Expression
         | 
| 2 | 
            +
              module Shared
         | 
| 3 | 
            +
                module ClassMethods; end # filled in ./methods/*.rb
         | 
| 4 | 
            +
             | 
| 5 | 
            +
                def self.included(mod)
         | 
| 6 | 
            +
                  mod.class_eval do
         | 
| 7 | 
            +
                    extend Shared::ClassMethods
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                    attr_accessor :type, :token, :text, :ts, :te,
         | 
| 10 | 
            +
                                  :level, :set_level, :conditional_level,
         | 
| 11 | 
            +
                                  :options, :quantifier
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                    attr_reader   :nesting_level
         | 
| 14 | 
            +
                  end
         | 
| 15 | 
            +
                end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                def init_from_token_and_options(token, options = {})
         | 
| 18 | 
            +
                  self.type              = token.type
         | 
| 19 | 
            +
                  self.token             = token.token
         | 
| 20 | 
            +
                  self.text              = token.text
         | 
| 21 | 
            +
                  self.ts                = token.ts
         | 
| 22 | 
            +
                  self.te                = token.te
         | 
| 23 | 
            +
                  self.level             = token.level
         | 
| 24 | 
            +
                  self.set_level         = token.set_level
         | 
| 25 | 
            +
                  self.conditional_level = token.conditional_level
         | 
| 26 | 
            +
                  self.nesting_level     = 0
         | 
| 27 | 
            +
                  self.options           = options || {}
         | 
| 28 | 
            +
                end
         | 
| 29 | 
            +
                private :init_from_token_and_options
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                def initialize_copy(orig)
         | 
| 32 | 
            +
                  self.text       = orig.text.dup         if orig.text
         | 
| 33 | 
            +
                  self.options    = orig.options.dup      if orig.options
         | 
| 34 | 
            +
                  self.quantifier = orig.quantifier.clone if orig.quantifier
         | 
| 35 | 
            +
                  super
         | 
| 36 | 
            +
                end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                def starts_at
         | 
| 39 | 
            +
                  ts
         | 
| 40 | 
            +
                end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                def base_length
         | 
| 43 | 
            +
                  to_s(:base).length
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                def full_length
         | 
| 47 | 
            +
                  to_s.length
         | 
| 48 | 
            +
                end
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                def to_s(format = :full)
         | 
| 51 | 
            +
                  "#{parts.join}#{quantifier_affix(format)}"
         | 
| 52 | 
            +
                end
         | 
| 53 | 
            +
                alias :to_str :to_s
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                def parts
         | 
| 56 | 
            +
                  [text.dup]
         | 
| 57 | 
            +
                end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                def quantifier_affix(expression_format)
         | 
| 60 | 
            +
                  quantifier.to_s if quantified? && expression_format != :base
         | 
| 61 | 
            +
                end
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                def quantified?
         | 
| 64 | 
            +
                  !quantifier.nil?
         | 
| 65 | 
            +
                end
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                def offset
         | 
| 68 | 
            +
                  [starts_at, full_length]
         | 
| 69 | 
            +
                end
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                def coded_offset
         | 
| 72 | 
            +
                  '@%d+%d' % offset
         | 
| 73 | 
            +
                end
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                def terminal?
         | 
| 76 | 
            +
                  !respond_to?(:expressions)
         | 
| 77 | 
            +
                end
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                def nesting_level=(lvl)
         | 
| 80 | 
            +
                  @nesting_level = lvl
         | 
| 81 | 
            +
                  quantifier && quantifier.nesting_level = lvl
         | 
| 82 | 
            +
                  terminal? || each { |subexp| subexp.nesting_level = lvl + 1 }
         | 
| 83 | 
            +
                end
         | 
| 84 | 
            +
              end
         | 
| 85 | 
            +
            end
         | 
| @@ -1,14 +1,12 @@ | |
| 1 1 | 
             
            module Regexp::Expression
         | 
| 2 | 
            -
             | 
| 3 2 | 
             
              class Subexpression < Regexp::Expression::Base
         | 
| 4 3 | 
             
                include Enumerable
         | 
| 5 4 |  | 
| 6 5 | 
             
                attr_accessor :expressions
         | 
| 7 6 |  | 
| 8 7 | 
             
                def initialize(token, options = {})
         | 
| 9 | 
            -
                  super
         | 
| 10 | 
            -
             | 
| 11 8 | 
             
                  self.expressions = []
         | 
| 9 | 
            +
                  super
         | 
| 12 10 | 
             
                end
         | 
| 13 11 |  | 
| 14 12 | 
             
                # Override base method to clone the expressions as well.
         | 
| @@ -44,16 +42,21 @@ module Regexp::Expression | |
| 44 42 | 
             
                  ts + to_s.length
         | 
| 45 43 | 
             
                end
         | 
| 46 44 |  | 
| 47 | 
            -
                def  | 
| 48 | 
            -
                   | 
| 49 | 
            -
                  "#{expressions.join}#{quantifier_affix(format)}"
         | 
| 45 | 
            +
                def parts
         | 
| 46 | 
            +
                  expressions
         | 
| 50 47 | 
             
                end
         | 
| 51 48 |  | 
| 52 49 | 
             
                def to_h
         | 
| 53 | 
            -
                  attributes.merge( | 
| 50 | 
            +
                  attributes.merge(
         | 
| 54 51 | 
             
                    text:        to_s(:base),
         | 
| 55 52 | 
             
                    expressions: expressions.map(&:to_h)
         | 
| 56 | 
            -
                   | 
| 53 | 
            +
                  )
         | 
| 54 | 
            +
                end
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                private
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                def intersperse(expressions, separator)
         | 
| 59 | 
            +
                  expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
         | 
| 57 60 | 
             
                end
         | 
| 58 61 | 
             
              end
         | 
| 59 62 | 
             
            end
         | 
| @@ -1,130 +1,7 @@ | |
| 1 1 | 
             
            require 'regexp_parser/error'
         | 
| 2 2 |  | 
| 3 | 
            -
             | 
| 4 | 
            -
             | 
| 5 | 
            -
                attr_accessor :type, :token
         | 
| 6 | 
            -
                attr_accessor :text, :ts
         | 
| 7 | 
            -
                attr_accessor :level, :set_level, :conditional_level, :nesting_level
         | 
| 8 | 
            -
             | 
| 9 | 
            -
                attr_accessor :quantifier
         | 
| 10 | 
            -
                attr_accessor :options
         | 
| 11 | 
            -
             | 
| 12 | 
            -
                def initialize(token, options = {})
         | 
| 13 | 
            -
                  self.type              = token.type
         | 
| 14 | 
            -
                  self.token             = token.token
         | 
| 15 | 
            -
                  self.text              = token.text
         | 
| 16 | 
            -
                  self.ts                = token.ts
         | 
| 17 | 
            -
                  self.level             = token.level
         | 
| 18 | 
            -
                  self.set_level         = token.set_level
         | 
| 19 | 
            -
                  self.conditional_level = token.conditional_level
         | 
| 20 | 
            -
                  self.nesting_level     = 0
         | 
| 21 | 
            -
                  self.quantifier        = nil
         | 
| 22 | 
            -
                  self.options           = options
         | 
| 23 | 
            -
                end
         | 
| 24 | 
            -
             | 
| 25 | 
            -
                def initialize_copy(orig)
         | 
| 26 | 
            -
                  self.text       = (orig.text       ? orig.text.dup         : nil)
         | 
| 27 | 
            -
                  self.options    = (orig.options    ? orig.options.dup      : nil)
         | 
| 28 | 
            -
                  self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
         | 
| 29 | 
            -
                  super
         | 
| 30 | 
            -
                end
         | 
| 31 | 
            -
             | 
| 32 | 
            -
                def to_re(format = :full)
         | 
| 33 | 
            -
                  ::Regexp.new(to_s(format))
         | 
| 34 | 
            -
                end
         | 
| 35 | 
            -
             | 
| 36 | 
            -
                alias :starts_at :ts
         | 
| 37 | 
            -
             | 
| 38 | 
            -
                def base_length
         | 
| 39 | 
            -
                  to_s(:base).length
         | 
| 40 | 
            -
                end
         | 
| 41 | 
            -
             | 
| 42 | 
            -
                def full_length
         | 
| 43 | 
            -
                  to_s.length
         | 
| 44 | 
            -
                end
         | 
| 45 | 
            -
             | 
| 46 | 
            -
                def offset
         | 
| 47 | 
            -
                  [starts_at, full_length]
         | 
| 48 | 
            -
                end
         | 
| 49 | 
            -
             | 
| 50 | 
            -
                def coded_offset
         | 
| 51 | 
            -
                  '@%d+%d' % offset
         | 
| 52 | 
            -
                end
         | 
| 53 | 
            -
             | 
| 54 | 
            -
                def to_s(format = :full)
         | 
| 55 | 
            -
                  "#{text}#{quantifier_affix(format)}"
         | 
| 56 | 
            -
                end
         | 
| 57 | 
            -
             | 
| 58 | 
            -
                def quantifier_affix(expression_format)
         | 
| 59 | 
            -
                  quantifier.to_s if quantified? && expression_format != :base
         | 
| 60 | 
            -
                end
         | 
| 61 | 
            -
             | 
| 62 | 
            -
                def terminal?
         | 
| 63 | 
            -
                  !respond_to?(:expressions)
         | 
| 64 | 
            -
                end
         | 
| 65 | 
            -
             | 
| 66 | 
            -
                def quantify(token, text, min = nil, max = nil, mode = :greedy)
         | 
| 67 | 
            -
                  self.quantifier = Quantifier.new(token, text, min, max, mode)
         | 
| 68 | 
            -
                end
         | 
| 69 | 
            -
             | 
| 70 | 
            -
                def unquantified_clone
         | 
| 71 | 
            -
                  clone.tap { |exp| exp.quantifier = nil }
         | 
| 72 | 
            -
                end
         | 
| 73 | 
            -
             | 
| 74 | 
            -
                def quantified?
         | 
| 75 | 
            -
                  !quantifier.nil?
         | 
| 76 | 
            -
                end
         | 
| 77 | 
            -
             | 
| 78 | 
            -
                # Deprecated. Prefer `#repetitions` which has a more uniform interface.
         | 
| 79 | 
            -
                def quantity
         | 
| 80 | 
            -
                  return [nil,nil] unless quantified?
         | 
| 81 | 
            -
                  [quantifier.min, quantifier.max]
         | 
| 82 | 
            -
                end
         | 
| 83 | 
            -
             | 
| 84 | 
            -
                def repetitions
         | 
| 85 | 
            -
                  return 1..1 unless quantified?
         | 
| 86 | 
            -
                  min = quantifier.min
         | 
| 87 | 
            -
                  max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
         | 
| 88 | 
            -
                  range = min..max
         | 
| 89 | 
            -
                  # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
         | 
| 90 | 
            -
                  if RUBY_VERSION.to_f < 2.7
         | 
| 91 | 
            -
                    range.define_singleton_method(:minmax) { [min, max] }
         | 
| 92 | 
            -
                  end
         | 
| 93 | 
            -
                  range
         | 
| 94 | 
            -
                end
         | 
| 95 | 
            -
             | 
| 96 | 
            -
                def greedy?
         | 
| 97 | 
            -
                  quantified? and quantifier.greedy?
         | 
| 98 | 
            -
                end
         | 
| 99 | 
            -
             | 
| 100 | 
            -
                def reluctant?
         | 
| 101 | 
            -
                  quantified? and quantifier.reluctant?
         | 
| 102 | 
            -
                end
         | 
| 103 | 
            -
                alias :lazy? :reluctant?
         | 
| 104 | 
            -
             | 
| 105 | 
            -
                def possessive?
         | 
| 106 | 
            -
                  quantified? and quantifier.possessive?
         | 
| 107 | 
            -
                end
         | 
| 108 | 
            -
             | 
| 109 | 
            -
                def attributes
         | 
| 110 | 
            -
                  {
         | 
| 111 | 
            -
                    type:              type,
         | 
| 112 | 
            -
                    token:             token,
         | 
| 113 | 
            -
                    text:              to_s(:base),
         | 
| 114 | 
            -
                    starts_at:         ts,
         | 
| 115 | 
            -
                    length:            full_length,
         | 
| 116 | 
            -
                    level:             level,
         | 
| 117 | 
            -
                    set_level:         set_level,
         | 
| 118 | 
            -
                    conditional_level: conditional_level,
         | 
| 119 | 
            -
                    options:           options,
         | 
| 120 | 
            -
                    quantifier:        quantified? ? quantifier.to_h : nil,
         | 
| 121 | 
            -
                  }
         | 
| 122 | 
            -
                end
         | 
| 123 | 
            -
                alias :to_h :attributes
         | 
| 124 | 
            -
              end
         | 
| 125 | 
            -
             | 
| 126 | 
            -
            end # module Regexp::Expression
         | 
| 127 | 
            -
             | 
| 3 | 
            +
            require 'regexp_parser/expression/shared'
         | 
| 4 | 
            +
            require 'regexp_parser/expression/base'
         | 
| 128 5 | 
             
            require 'regexp_parser/expression/quantifier'
         | 
| 129 6 | 
             
            require 'regexp_parser/expression/subexpression'
         | 
| 130 7 | 
             
            require 'regexp_parser/expression/sequence'
         | 
| @@ -132,21 +9,22 @@ require 'regexp_parser/expression/sequence_operation' | |
| 132 9 |  | 
| 133 10 | 
             
            require 'regexp_parser/expression/classes/alternation'
         | 
| 134 11 | 
             
            require 'regexp_parser/expression/classes/anchor'
         | 
| 135 | 
            -
            require 'regexp_parser/expression/classes/ | 
| 12 | 
            +
            require 'regexp_parser/expression/classes/backreference'
         | 
| 13 | 
            +
            require 'regexp_parser/expression/classes/character_set'
         | 
| 14 | 
            +
            require 'regexp_parser/expression/classes/character_set/intersection'
         | 
| 15 | 
            +
            require 'regexp_parser/expression/classes/character_set/range'
         | 
| 16 | 
            +
            require 'regexp_parser/expression/classes/character_type'
         | 
| 136 17 | 
             
            require 'regexp_parser/expression/classes/conditional'
         | 
| 137 | 
            -
            require 'regexp_parser/expression/classes/ | 
| 18 | 
            +
            require 'regexp_parser/expression/classes/escape_sequence'
         | 
| 138 19 | 
             
            require 'regexp_parser/expression/classes/free_space'
         | 
| 139 20 | 
             
            require 'regexp_parser/expression/classes/group'
         | 
| 140 21 | 
             
            require 'regexp_parser/expression/classes/keep'
         | 
| 141 22 | 
             
            require 'regexp_parser/expression/classes/literal'
         | 
| 142 23 | 
             
            require 'regexp_parser/expression/classes/posix_class'
         | 
| 143 | 
            -
            require 'regexp_parser/expression/classes/property'
         | 
| 144 24 | 
             
            require 'regexp_parser/expression/classes/root'
         | 
| 145 | 
            -
            require 'regexp_parser/expression/classes/ | 
| 146 | 
            -
            require 'regexp_parser/expression/classes/set/intersection'
         | 
| 147 | 
            -
            require 'regexp_parser/expression/classes/set/range'
         | 
| 148 | 
            -
            require 'regexp_parser/expression/classes/type'
         | 
| 25 | 
            +
            require 'regexp_parser/expression/classes/unicode_property'
         | 
| 149 26 |  | 
| 27 | 
            +
            require 'regexp_parser/expression/methods/construct'
         | 
| 150 28 | 
             
            require 'regexp_parser/expression/methods/match'
         | 
| 151 29 | 
             
            require 'regexp_parser/expression/methods/match_length'
         | 
| 152 30 | 
             
            require 'regexp_parser/expression/methods/options'
         | 
    
        data/lib/regexp_parser/lexer.rb
    CHANGED
    
    | @@ -4,19 +4,21 @@ | |
| 4 4 | 
             
            # given syntax flavor.
         | 
| 5 5 | 
             
            class Regexp::Lexer
         | 
| 6 6 |  | 
| 7 | 
            -
              OPENING_TOKENS = [
         | 
| 8 | 
            -
                 | 
| 9 | 
            -
                 | 
| 7 | 
            +
              OPENING_TOKENS = %i[
         | 
| 8 | 
            +
                capture passive lookahead nlookahead lookbehind nlookbehind
         | 
| 9 | 
            +
                atomic options options_switch named absence
         | 
| 10 10 | 
             
              ].freeze
         | 
| 11 11 |  | 
| 12 | 
            -
              CLOSING_TOKENS = [ | 
| 12 | 
            +
              CLOSING_TOKENS = %i[close].freeze
         | 
| 13 | 
            +
             | 
| 14 | 
            +
              CONDITION_TOKENS = %i[condition condition_close].freeze
         | 
| 13 15 |  | 
| 14 16 | 
             
              def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
         | 
| 15 17 | 
             
                new.lex(input, syntax, options: options, &block)
         | 
| 16 18 | 
             
              end
         | 
| 17 19 |  | 
| 18 20 | 
             
              def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
         | 
| 19 | 
            -
                syntax = Regexp::Syntax. | 
| 21 | 
            +
                syntax = Regexp::Syntax.for(syntax)
         | 
| 20 22 |  | 
| 21 23 | 
             
                self.tokens = []
         | 
| 22 24 | 
             
                self.nesting = 0
         | 
| @@ -40,7 +42,7 @@ class Regexp::Lexer | |
| 40 42 | 
             
                                              nesting, set_nesting, conditional_nesting)
         | 
| 41 43 |  | 
| 42 44 | 
             
                  current = merge_condition(current) if type == :conditional and
         | 
| 43 | 
            -
                     | 
| 45 | 
            +
                    CONDITION_TOKENS.include?(token)
         | 
| 44 46 |  | 
| 45 47 | 
             
                  last.next = current if last
         | 
| 46 48 | 
             
                  current.previous = last if last
         |