RubyGems - regexp_parser - Versions diffs - 2.3.0 → 2.5.0 - Mend

regexp_parser 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +63 -6
data/Gemfile +1 -0
data/README.md +12 -6
data/lib/regexp_parser/error.rb +1 -1
data/lib/regexp_parser/expression/base.rb +9 -57
data/lib/regexp_parser/expression/classes/backreference.rb +1 -0
data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -2
data/lib/regexp_parser/expression/classes/character_set.rb +2 -2
data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -0
data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
data/lib/regexp_parser/expression/classes/group.rb +6 -6
data/lib/regexp_parser/expression/classes/keep.rb +2 -0
data/lib/regexp_parser/expression/classes/root.rb +3 -5
data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -0
data/lib/regexp_parser/expression/methods/construct.rb +43 -0
data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
data/lib/regexp_parser/expression/methods/tests.rb +10 -1
data/lib/regexp_parser/expression/quantifier.rb +41 -23
data/lib/regexp_parser/expression/sequence.rb +9 -23
data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
data/lib/regexp_parser/expression/shared.rb +85 -0
data/lib/regexp_parser/expression/subexpression.rb +11 -7
data/lib/regexp_parser/expression.rb +4 -2
data/lib/regexp_parser/parser.rb +21 -72
data/lib/regexp_parser/scanner/property.rl +1 -1
data/lib/regexp_parser/scanner/scanner.rl +42 -31
data/lib/regexp_parser/scanner.rb +725 -793
data/lib/regexp_parser/syntax/token/escape.rb +1 -1
data/lib/regexp_parser/syntax/token/unicode_property.rb +0 -5
data/lib/regexp_parser/version.rb +1 -1
metadata +10 -8

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 369b108d8410e12bd6af5c659f58cb56c583e48780c1b35b6270bb21cc6a4ee7
-  data.tar.gz: 30cd2c0823ae154a2db04c705f898f252774ec8ab9ef304833c5e3546ba7406a
+  metadata.gz: f871ec3cdea5a594f72f5386f1b344710e6204f7307ba40d966653197f526be8
+  data.tar.gz: dd93c880f29ec77531faa2379fbfc8e34a9b67680664c6a3477d38afeaa1809a
 SHA512:
-  metadata.gz: 4104bec7dd02a7ea099de9aeacb766fb1a2db50cb52bd84f44e4bde93431d436b75d0f1b3f4d62242713a1eeca3f4d8c0be034270d515979aad8ad2d504880b0
-  data.tar.gz: 11deb2d7c8a6fad3fa9cb18b3f29cae15bab7e12e6cbbc968706dd02c16b0d1a6b1d69f05a5f665f7b46947315b0ea4ecda62dab8ddca8b5ef71f521b877da74
+  metadata.gz: 45e52ab0ce7bec3e4a275efa3828532778c49e8d36eec1ea82a43755a87abc9eee97e986027aa8f5c64fd604f15164d2ad4f37e5d6e22a5a1e3e9da6788271b9
+  data.tar.gz: 1f5514f3252294d9fe0877cff1d8b0db0400838c97ed78d15bbb794b94595c20d081681e4b1fe9bb6c89be7749514d8b2b8cf385360d002cd89e2a76ce6d2e63

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,62 @@
 ## [Unreleased]
+### Added
+- `Regexp::Expression::Base.construct` and `.token_class` methods
+## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Fixed
+- fixed interpretation of `+` and `?` after interval quantifiers (`{n,n}`)
+  - they used to be treated as reluctant or possessive mode indicators
+  - however, Ruby does not support these modes for interval quantifiers
+  - they are now treated as chained quantifiers instead, as Ruby does it
+  - c.f. [#3](https://github.com/ammar/regexp_parser/issues/3)
+- fixed `Expression::Base#nesting_level` for some tree rewrite cases
+  - e.g. the alternatives in `/a|[b]/` had an inconsistent nesting_level
+- fixed `Scanner` accepting invalid posix classes, e.g. `[[:foo:]]`
+  - they raise a `SyntaxError` when used in a Regexp, so could only be passed as String
+  - they now raise a `Regexp::Scanner::ValidationError` in the `Scanner`
+### Added
+- added `Expression::Base#==` for (deep) comparison of expressions
+- added `Expression::Base#parts`
+  - returns the text elements and subexpressions of an expression
+  - e.g. `parse(/(a)/)[0].parts # => ["(", #<Literal @text="a"...>, ")"]`
+- added `Expression::Base#te` (a.k.a. token end index)
+  - `Expression::Subexpression` always had `#te`, only terminal nodes lacked it so far
+- made some `Expression::Base` methods available on `Quantifier` instances, too
+  - `#type`, `#type?`, `#is?`, `#one_of?`, `#options`, `#terminal?`
+  - `#base_length`, `#full_length`, `#starts_at`, `#te`, `#ts`, `#offset`
+  - `#conditional_level`, `#level`, `#nesting_level` , `#set_level`
+  - this allows a more unified handling with `Expression::Base` instances
+- allowed `Quantifier#initialize` to take a token and options Hash like other nodes
+- added a deprecation warning for initializing Quantifiers with 4+ arguments:
+    Calling `Expression::Base#quantify` or `Quantifier.new` with 4+ arguments
+    is deprecated.
+    It will no longer be supported in regexp_parser v3.0.0.
+    Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode`
+    with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode
+    will be derived automatically.
+    Or do `exp.quantifier = Quantifier.construct(token: token, text: str)`.
+    This is consistent with how Expression::Base instances are created.
+## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Fixed
+- removed five inexistent unicode properties from `Syntax#features`
+  - these were never supported by Ruby or the `Regexp::Scanner`
+  - thanks to [Markus Schirp](https://github.com/mbj) for the report
 ## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
 ### Added
@@ -180,7 +237,7 @@
 ### Added
-- `Expression#each_expression` and `#traverse` can now be called without a block
+- `Expression::Base#each_expression` and `#traverse` can now be called without a block
   * this returns an `Enumerator` and allows chaining, e.g. `each_expression.select`
   * thanks to [Masataka Kuwabara](https://github.com/pocke)
@@ -206,7 +263,7 @@
 - Fixed `Group#option_changes` not accounting for indirectly disabled (overridden) encoding flags
 - Fixed `Scanner` allowing negative encoding options if there were no positive options, e.g. '(?-u)'
 - Fixed `ScannerError` for some valid meta/control sequences such as '\\C-\\\\'
-- Fixed `Expression#match` and `#=~` not working with a single argument
+- Fixed `Expression::Base#match` and `#=~` not working with a single argument
 ### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
@@ -214,15 +271,15 @@
 - Added `#referenced_expression` for backrefs, subexp calls and conditionals
   * returns the `Group` expression that is being referenced via name or number
-- Added `Expression#repetitions`
+- Added `Expression::Base#repetitions`
   * returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
   * like `#quantity` but with a more uniform interface
-- Added `Expression#match_length`
+- Added `Expression::Base#match_length`
   * allows to inspect and iterate over String lengths matched by the Expression
 ### Fixed
-- Fixed `Expression#clone` "direction"
+- Fixed `Expression::Base#clone` "direction"
   * it used to dup ivars onto the callee, leaving only the clone referencing the original objects
   * this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
 - Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
@@ -384,7 +441,7 @@ This release includes several breaking changes, mostly to character sets, #map a
 - Fixed a thread safety issue (issue #45)
 - Some public class methods that were only reliable for
   internal use are now private instance methods (PR #46)
-- Improved the usefulness of Expression#options (issue #43) -
+- Improved the usefulness of Expression::Base#options (issue #43) -
   #options and derived methods such as #i?, #m? and #x? are now
   defined for all Expressions that are affected by such flags.
 - Fixed scanning of whitespace following (?x) (commit 5c94bd2)

data/Gemfile CHANGED Viewed

@@ -8,6 +8,7 @@ group :development, :test do
   gem 'regexp_property_values', '~> 1.3'
   gem 'rspec', '~> 3.10'
   if RUBY_VERSION.to_f >= 2.7
+    gem 'benchmark-ips', '~> 2.1'
     gem 'gouteur'
     gem 'rubocop', '~> 1.7'
   end

data/README.md CHANGED Viewed

@@ -367,12 +367,12 @@ _Note that not all of these are available in all versions of Ruby_
 | **POSIX Classes**                     | `[:alpha:]`, `[:^digit:]`                               | &#x2713; |
 | **Quantifiers**                       |                                                         | &#x22f1; |
 | &emsp;&nbsp;_**Greedy**_              | `?`, `*`, `+`, `{m,M}`                                  | &#x2713; |
-| &emsp;&nbsp;_**Reluctant** (Lazy)_    | `??`, `*?`, `+?`, `{m,M}?`                              | &#x2713; |
-| &emsp;&nbsp;_**Possessive**_          | `?+`, `*+`, `++`, `{m,M}+`                              | &#x2713; |
+| &emsp;&nbsp;_**Reluctant** (Lazy)_    | `??`, `*?`, `+?` \[1\]                                  | &#x2713; |
+| &emsp;&nbsp;_**Possessive**_          | `?+`, `*+`, `++` \[1\]                                  | &#x2713; |
 | **String Escapes**                    |                                                         | &#x22f1; |
-| &emsp;&nbsp;_**Control** \[1\]_       | `\C-C`, `\cD`                                           | &#x2713; |
+| &emsp;&nbsp;_**Control** \[2\]_       | `\C-C`, `\cD`                                           | &#x2713; |
 | &emsp;&nbsp;_**Hex**_                 | `\x20`, `\x{701230}`                                    | &#x2713; |
-| &emsp;&nbsp;_**Meta** \[1\]_          | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C`        | &#x2713; |
+| &emsp;&nbsp;_**Meta** \[2\]_          | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C`        | &#x2713; |
 | &emsp;&nbsp;_**Octal**_               | `\0`, `\01`, `\012`                                     | &#x2713; |
 | &emsp;&nbsp;_**Unicode**_             | `\uHHHH`, `\u{H+ H+}`                                   | &#x2713; |
 | **Unicode Properties**                | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | &#x22f1; |
@@ -384,7 +384,11 @@ _Note that not all of these are available in all versions of Ruby_
 | &emsp;&nbsp;_**Scripts**_             | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}`              | &#x2713; |
 | &emsp;&nbsp;_**Simple**_              | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}`               | &#x2713; |
-**\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
+**\[1\]**: Ruby does not support lazy or possessive interval quantifiers. Any `+` or `?` that follows an interval
+quantifier will be treated as another, chained quantifier. See also [#3](https://github.com/ammar/regexp_parser/issue/3),
+[#69](https://github.com/ammar/regexp_parser/pull/69).
+**\[2\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
  https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
 scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
@@ -443,12 +447,14 @@ Projects using regexp_parser.
 - [capybara](https://github.com/teamcapybara/capybara) is an integration testing tool that uses regexp_parser to convert Regexps to css/xpath selectors.
-- [js_regex](https://github.com/janosch-x/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
+- [js_regex](https://github.com/jaynetics/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
 - [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
 - [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
+- [repper](https://github.com/jaynetics/repper) is a regular expression pretty-printer for Ruby.
 - [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
 - [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) is a localization helper that uses regexp_parser to generate examples of postal codes.

data/lib/regexp_parser/error.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 class Regexp::Parser
-  # base class for all gem-specific errors (inherited but never raised itself)
+  # base class for all gem-specific errors
   class Error < StandardError; end
 end

data/lib/regexp_parser/expression/base.rb CHANGED Viewed

@@ -1,29 +1,15 @@
 module Regexp::Expression
   class Base
-    attr_accessor :type, :token
-    attr_accessor :text, :ts
-    attr_accessor :level, :set_level, :conditional_level, :nesting_level
-    attr_accessor :quantifier
-    attr_accessor :options
+    include Regexp::Expression::Shared
     def initialize(token, options = {})
-      self.type              = token.type
-      self.token             = token.token
-      self.text              = token.text
-      self.ts                = token.ts
-      self.level             = token.level
-      self.set_level         = token.set_level
-      self.conditional_level = token.conditional_level
-      self.nesting_level     = 0
-      self.quantifier        = nil
-      self.options           = options
+      init_from_token_and_options(token, options)
     end
     def initialize_copy(orig)
-      self.text       = (orig.text       ? orig.text.dup         : nil)
-      self.options    = (orig.options    ? orig.options.dup      : nil)
-      self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
+      self.text       = orig.text.dup         if orig.text
+      self.options    = orig.options.dup      if orig.options
+      self.quantifier = orig.quantifier.clone if orig.quantifier
       super
     end
@@ -31,48 +17,14 @@ module Regexp::Expression
       ::Regexp.new(to_s(format))
     end
-    alias :starts_at :ts
-    def base_length
-      to_s(:base).length
-    end
-    def full_length
-      to_s.length
-    end
-    def offset
-      [starts_at, full_length]
-    end
-    def coded_offset
-      '@%d+%d' % offset
-    end
-    def to_s(format = :full)
-      "#{text}#{quantifier_affix(format)}"
-    end
-    def quantifier_affix(expression_format)
-      quantifier.to_s if quantified? && expression_format != :base
-    end
-    def terminal?
-      !respond_to?(:expressions)
-    end
-    def quantify(token, text, min = nil, max = nil, mode = :greedy)
-      self.quantifier = Quantifier.new(token, text, min, max, mode)
+    def quantify(*args)
+      self.quantifier = Quantifier.new(*args)
     end
     def unquantified_clone
       clone.tap { |exp| exp.quantifier = nil }
     end
-    def quantified?
-      !quantifier.nil?
-    end
     # Deprecated. Prefer `#repetitions` which has a more uniform interface.
     def quantity
       return [nil,nil] unless quantified?
@@ -104,7 +56,7 @@ module Regexp::Expression
       quantified? and quantifier.possessive?
     end
-    def attributes
+    def to_h
       {
         type:              type,
         token:             token,
@@ -118,6 +70,6 @@ module Regexp::Expression
         quantifier:        quantified? ? quantifier.to_h : nil,
       }
     end
-    alias :to_h :attributes
+    alias :attributes :to_h
   end
 end

data/lib/regexp_parser/expression/classes/backreference.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 module Regexp::Expression
+  # TODO: unify name with token :backref, one way or the other, in v3.0.0
   module Backreference
     class Base < Regexp::Expression::Base
       attr_accessor :referenced_expression

data/lib/regexp_parser/expression/classes/character_set/range.rb CHANGED Viewed

@@ -16,8 +16,8 @@ module Regexp::Expression
         count == 2
       end
-      def to_s(_format = :full)
-        expressions.join(text)
+      def parts
+        intersperse(expressions, text.dup)
       end
     end
   end

data/lib/regexp_parser/expression/classes/character_set.rb CHANGED Viewed

@@ -20,8 +20,8 @@ module Regexp::Expression
       self.closed = true
     end
-    def to_s(format = :full)
-      "#{text}#{'^' if negated?}#{expressions.join}]#{quantifier_affix(format)}"
+    def parts
+      ["#{text}#{'^' if negated?}", *expressions, ']']
     end
   end
 end # module Regexp::Expression

data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} RENAMED Viewed

File without changes

data/lib/regexp_parser/expression/classes/conditional.rb CHANGED Viewed

@@ -55,8 +55,8 @@ module Regexp::Expression
         condition.reference
       end
-      def to_s(format = :full)
-        "#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
+      def parts
+        [text.dup, condition, *intersperse(branches, '|'), ')']
       end
       def initialize_copy(orig)

data/lib/regexp_parser/expression/classes/free_space.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module Regexp::Expression
   class FreeSpace < Regexp::Expression::Base
-    def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
+    def quantify(*_args)
       raise Regexp::Parser::Error, 'Can not quantify a free space object'
     end
   end

data/lib/regexp_parser/expression/classes/group.rb CHANGED Viewed

@@ -1,8 +1,8 @@
 module Regexp::Expression
   module Group
     class Base < Regexp::Expression::Subexpression
-      def to_s(format = :full)
-        "#{text}#{expressions.join})#{quantifier_affix(format)}"
+      def parts
+        [text.dup, *expressions, ')']
       end
       def capturing?; false end
@@ -18,9 +18,9 @@ module Regexp::Expression
         super
       end
-      def to_s(format = :full)
+      def parts
         if implicit?
-          "#{expressions.join}#{quantifier_affix(format)}"
+          expressions
         else
           super
         end
@@ -65,8 +65,8 @@ module Regexp::Expression
     end
     class Comment < Group::Base
-      def to_s(_format = :full)
-        text.dup
+      def parts
+        [text.dup]
       end
       def comment?; true end

data/lib/regexp_parser/expression/classes/keep.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 module Regexp::Expression
   module Keep
+    # TOOD: in regexp_parser v3.0.0 this should possibly be a Subexpression
+    #       that contains all expressions to its left.
     class Mark < Regexp::Expression::Base; end
   end
 end

data/lib/regexp_parser/expression/classes/root.rb CHANGED Viewed

@@ -1,11 +1,9 @@
 module Regexp::Expression
   class Root < Regexp::Expression::Subexpression
     def self.build(options = {})
-      new(build_token, options)
-    end
-    def self.build_token
-      Regexp::Token.new(:expression, :root, '', 0)
+      warn "`#{self.class}.build(options)` is deprecated and will raise in "\
+           "regexp_parser v3.0.0. Please use `.construct(options: options)`."
+      construct(options: options)
     end
   end
 end

data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} RENAMED Viewed

@@ -1,4 +1,5 @@
 module Regexp::Expression
+  # TODO: unify name with token :property, on way or the other, in v3.0.0
   module UnicodeProperty
     class Base < Regexp::Expression::Base
       def negative?

data/lib/regexp_parser/expression/methods/construct.rb ADDED Viewed

@@ -0,0 +1,43 @@
+module Regexp::Expression
+  module Shared
+    module ClassMethods
+      # Convenience method to init a valid Expression without a Regexp::Token
+      def construct(params = {})
+        attrs = construct_defaults.merge(params)
+        options = attrs.delete(:options)
+        token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
+        token = Regexp::Token.new(*token_args)
+        raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
+        new(token, options)
+      end
+      def construct_defaults
+        if self == Root
+          { type: :expression, token: :root, ts: 0 }
+        elsif self < Sequence
+          { type: :expression, token: :sequence }
+        else
+          { type: token_class::Type }
+        end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
+      end
+      def token_class
+        if self == Root || self < Sequence
+          nil # no token class because these objects are Parser-generated
+        # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
+        elsif self == Alternation || self == CharacterType::Any
+          Regexp::Syntax::Token::Meta
+        elsif self <= EscapeSequence::Base
+          Regexp::Syntax::Token::Escape
+        else
+          Regexp::Syntax::Token.const_get(name.split('::')[2])
+        end
+      end
+    end
+    def token_class
+      self.class.token_class
+    end
+  end
+end

data/lib/regexp_parser/expression/methods/match_length.rb CHANGED Viewed

@@ -112,7 +112,7 @@ module Regexp::Expression
     end
     def inner_match_length
-      dummy = Regexp::Expression::Root.build
+      dummy = Regexp::Expression::Root.construct
       dummy.expressions = expressions.map(&:clone)
       dummy.quantifier = quantifier && quantifier.clone
       dummy.match_length

data/lib/regexp_parser/expression/methods/tests.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Regexp::Expression
-  class Base
+  module Shared
     # Test if this expression has the given test_type, which can be either
     # a symbol or an array of symbols to check against the expression's type.
@@ -93,5 +93,14 @@ module Regexp::Expression
               "Array, Hash, or Symbol expected, #{scope.class.name} given"
       end
     end
+    # Deep-compare two expressions for equality.
+    def ==(other)
+      other.class == self.class &&
+        other.to_s == to_s &&
+        other.options == options
+    end
+    alias :=== :==
+    alias :eql? :==
   end
 end

data/lib/regexp_parser/expression/quantifier.rb CHANGED Viewed

@@ -1,26 +1,24 @@
 module Regexp::Expression
+  # TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
+  # call super in #initialize, but raise in #quantifier= and #quantify,
+  # or introduce an Expression::Quantifiable intermediate class.
+  # Or actually allow chaining as a more concise but tricky solution than PR#69.
   class Quantifier
+    include Regexp::Expression::Shared
     MODES = %i[greedy possessive reluctant]
-    attr_reader :token, :text, :min, :max, :mode
+    attr_reader :min, :max, :mode
-    def initialize(token, text, min, max, mode)
-      @token = token
-      @text  = text
-      @mode  = mode
-      @min   = min
-      @max   = max
-    end
-    def initialize_copy(orig)
-      @text = orig.text.dup
-      super
-    end
+    def initialize(*args)
+      deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
-    def to_s
-      text.dup
+      init_from_token_and_options(*args)
+      @mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
+      @min, @max = minmax
+      # TODO: remove in v3.0.0, stop removing parts of #token (?)
+      self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
     end
-    alias :to_str :to_s
     def to_h
       {
@@ -41,13 +39,33 @@ module Regexp::Expression
     end
     alias :lazy? :reluctant?
-    def ==(other)
-      other.class == self.class &&
-        other.token == token &&
-        other.mode == mode &&
-        other.min == min &&
-        other.max == max
+    private
+    def deprecated_old_init(token, text, min, max, mode = :greedy)
+      warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
+           "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
+           "Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
+           "with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
+           "will be derived automatically.\n"\
+           "Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
+           "This is consistent with how Expression::Base instances are created. "
+      @token = token
+      @text  = text
+      @min   = min
+      @max   = max
+      @mode  = mode
+    end
+    def minmax
+      case token
+      when /zero_or_one/  then [0, 1]
+      when /zero_or_more/ then [0, -1]
+      when /one_or_more/  then [1, -1]
+      when :interval
+        int_min = text[/\{(\d*)/, 1]
+        int_max = text[/,?(\d*)\}/, 1]
+        [int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
+      end
     end
-    alias :eq :==
   end
 end

data/lib/regexp_parser/expression/sequence.rb CHANGED Viewed

@@ -7,31 +7,17 @@ module Regexp::Expression
   # branches, and CharacterSet::Intersection intersected sequences.
   class Sequence < Regexp::Expression::Subexpression
     class << self
-      def add_to(subexpression, params = {}, active_opts = {})
-        sequence = at_levels(
-          subexpression.level,
-          subexpression.set_level,
-          params[:conditional_level] || subexpression.conditional_level
+      def add_to(exp, params = {}, active_opts = {})
+        sequence = construct(
+          level:             exp.level,
+          set_level:         exp.set_level,
+          conditional_level: params[:conditional_level] || exp.conditional_level,
         )
-        sequence.nesting_level = subexpression.nesting_level + 1
+        sequence.nesting_level = exp.nesting_level + 1
         sequence.options = active_opts
-        subexpression.expressions << sequence
+        exp.expressions << sequence
         sequence
       end
-      def at_levels(level, set_level, conditional_level)
-        token = Regexp::Token.new(
-          :expression,
-          :sequence,
-          '',
-          nil, # ts
-          nil, # te
-          level,
-          set_level,
-          conditional_level
-        )
-        new(token)
-      end
     end
     def starts_at
@@ -39,12 +25,12 @@ module Regexp::Expression
     end
     alias :ts :starts_at
-    def quantify(token, text, min = nil, max = nil, mode = :greedy)
+    def quantify(*args)
       target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
       target or raise Regexp::Parser::Error,
         "No valid target found for '#{text}' quantifier"
-      target.quantify(token, text, min, max, mode)
+      target.quantify(*args)
     end
   end
 end

data/lib/regexp_parser/expression/sequence_operation.rb CHANGED Viewed

@@ -18,8 +18,8 @@ module Regexp::Expression
       self.class::OPERAND.add_to(self, {}, active_opts)
     end
-    def to_s(format = :full)
-      sequences.map { |e| e.to_s(format) }.join(text)
+    def parts
+      intersperse(expressions, text.dup)
     end
   end
 end