RubyGems - regexp_parser - Versions diffs - 2.6.2 → 2.8.0 - Mend

regexp_parser 2.6.2 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +67 -0
data/Gemfile +2 -2
data/README.md +32 -29
data/lib/regexp_parser/expression/base.rb +0 -7
data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
data/lib/regexp_parser/expression/classes/backreference.rb +4 -2
data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
data/lib/regexp_parser/expression/classes/character_set.rb +3 -4
data/lib/regexp_parser/expression/classes/conditional.rb +2 -6
data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
data/lib/regexp_parser/expression/classes/group.rb +0 -22
data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
data/lib/regexp_parser/expression/classes/unicode_property.rb +5 -2
data/lib/regexp_parser/expression/methods/construct.rb +2 -4
data/lib/regexp_parser/expression/methods/parts.rb +23 -0
data/lib/regexp_parser/expression/methods/printing.rb +26 -0
data/lib/regexp_parser/expression/methods/tests.rb +40 -3
data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
data/lib/regexp_parser/expression/quantifier.rb +30 -17
data/lib/regexp_parser/expression/sequence.rb +5 -10
data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
data/lib/regexp_parser/expression/shared.rb +37 -20
data/lib/regexp_parser/expression/subexpression.rb +20 -15
data/lib/regexp_parser/expression.rb +2 -0
data/lib/regexp_parser/lexer.rb +76 -36
data/lib/regexp_parser/parser.rb +97 -97
data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
data/lib/regexp_parser/scanner/mapping.rb +89 -0
data/lib/regexp_parser/scanner/property.rl +2 -2
data/lib/regexp_parser/scanner/scanner.rl +90 -169
data/lib/regexp_parser/scanner.rb +1157 -1330
data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
data/lib/regexp_parser/syntax/token/escape.rb +3 -1
data/lib/regexp_parser/syntax/token/meta.rb +9 -2
data/lib/regexp_parser/syntax/token/unicode_property.rb +3 -0
data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
data/lib/regexp_parser/syntax/versions.rb +2 -0
data/lib/regexp_parser/version.rb +1 -1
metadata +10 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 66568005494b517613155277c6be4731eb8a26bb9b48a692a9430507286ce583
-  data.tar.gz: d1fc6c6f1a0c7f939c51703ac844c2dbb134f96e0e55780646cb7e3e87d7a652
+  metadata.gz: bed928e92928d8f595241456658e516f3afd2474196ca4d6fdbb849c072d5024
+  data.tar.gz: 48d50057af6883cd2d67050fc05aed79e87342f6067eb80734729a8440c08a69
 SHA512:
-  metadata.gz: b955b2215b71c94497e52841142fab8c2b9930d0d6cea6ea2b3eeb8ed9fe84575e2f34aae3a6051af2b56429f98cf070b9151805f2cb93ddb511ec1e0e50dd7c
-  data.tar.gz: 3a4f083942b66ddb4b67ab33f14bb1c0b724a60c2b30605059d32ce3648e9cb46e31e797b7a526a2028c1e018d73365f5ef955256de4e63397d6ea105714ff12
+  metadata.gz: 455e79dd780d7d5c130fae56140158615195601f68ea9eb83367d0b9faaf631586bbf12f5b9243d16bb42d29eeb57ba595f87a3b4604b32af059dc9a72c4d6d4
+  data.tar.gz: 37216a681eda06118b7317e64cab14cb06e39e4923433225598b60b8b36684ab831e4d90960516adbfdaa16811b274c2181eb38a13ddd259fb6790cbeef99ebf

data/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,73 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [2.8.0] - 2023-04-17 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Added
+- `Regexp::Expression::Shared#ends_at`
+  * e.g. `parse(/a +/x)[0].ends_at # => 3`
+  * e.g. `parse(/a +/x)[0].ends_at(include_quantifier = false) # => 1`
+- `Regexp::Expression::Shared#{capturing?,comment?}`
+  * previously only available on capturing and comment groups
+- `Regexp::Expression::Shared#{decorative?}`
+  * true for decorations: comment groups as well as comments and whitespace in x-mode
+- `Regexp::Expression::Shared#parent`
+- new format argument `:original` for `Regexp::Expression::Base#to_s`
+  * includes decorative elements between node and its quantifier
+  * e.g. `parse(/a (?#comment) +/x)[0].to_s(:original) # => "a (?#comment) +"`
+  * using it is not needed when calling `Root#to_s` as Root can't be quantified
+- support calling `Subexpression#{each_expression,flat_map}` with a one-argument block
+  * in this case, only the expressions are passed to the block, no indices
+- support calling test methods at Expression class level
+  - `capturing?`, `comment?`, `decorative?`, `referential?`, `terminal?`
+  - e.g. `Regexp::Expression::CharacterSet.terminal? # => false`
+### Fixed
+- `Regexp::Expression::Shared#full_length` with whitespace before quantifier
+  * e.g. `parse(/a +/x)[0].full_length` used to yield `2`, now it yields `3`
+- `Subexpression#to_s` output with children with whitespace before their quantifier
+  * e.g. `parse(/a + /x).to_s` used to yield `"a+  "`, now it yields `"a + "`
+  * calling `#to_s` on sub-nodes still omits such decorative interludes by default
+    - use new `#to_s` format `:original` to include it
+    - e.g. `parse(/a + /x)[0].to_s(:original) # => "a +"`
+- fixed `Subexpression#te` behaving differently from other expressions
+  * only `Subexpression#te` used to include the quantifier
+  * now `#te` is the end index without quantifier, as for other expressions
+- fixed `NoMethodError` when calling `#starts_at` or `#ts` on empty sequences
+  * e.g. `Regexp::Parser.parse(/|/)[0].starts_at`
+  * e.g. `Regexp::Parser.parse(/[&&]/)[0][0].starts_at`
+- fixed nested comment groups breaking local x-options
+  * e.g. in `/(?x:(?#hello)) /`, the x-option wrongly applied to the whitespace
+- fixed nested comment groups breaking conditionals
+  * e.g. in `/(a)(?(1)b|c(?#hello)d)e/`, the 2nd conditional branch included "e"
+- fixed quantifiers after comment groups being mis-assigned to that group
+  * e.g. in `/a(?#foo){3}/` (matches 'aaa')
+- fixed Scanner accepting two cases of invalid Regexp syntax
+  * unmatched closing parentheses (`)`) and k-backrefs with number 0 (`\k<0>`)
+  * these are a `SyntaxError` in Ruby, so could only be passed as a String
+  * they now raise a `Regexp::Scanner::ScannerError`
+- fixed some scanner errors not inheriting from `Regexp::Scanner::ScannerError`
+- reduced verbosity of inspect / pretty print output
+## [2.7.0] - 2023-02-08 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Added
+- `Regexp::Lexer.lex` now streams tokens when called with a block
+  * it can now take arbitrarily large input, just like `Regexp::Scanner`
+  * this also slightly improves `Regexp::Parser.parse` performance
+  * note: `Regexp::Parser.parse` still does not and will not support streaming
+- improved performance of `Subexpression#each_expression`
+- minor improvements to `Regexp::Scanner` performance
+- overall improvement of parse performance: about 10% for large Regexps
+### Fixed
+- parsing of octal escape sequences in sets, e.g. `[\141]`
+  * thanks to [Randy Stauner](https://github.com/rwstauner) for the report
 ## [2.6.2] - 2023-01-19 - [Janosch Müller](mailto:janosch84@gmail.com)
 ### Fixed

data/Gemfile CHANGED Viewed

@@ -3,13 +3,13 @@ source 'https://rubygems.org'
 gemspec
 group :development, :test do
-  gem 'ice_nine', '~> 0.11.2'
+  gem 'leto', '~> 2.0'
   gem 'rake', '~> 13.0'
   gem 'regexp_property_values', '~> 1.3'
   gem 'rspec', '~> 3.10'
   if RUBY_VERSION.to_f >= 2.7
     gem 'benchmark-ips', '~> 2.1'
-    gem 'gouteur'
+    gem 'gouteur', '~> 1.1'
     gem 'rubocop', '~> 1.7'
   end
 end

data/README.md CHANGED Viewed

@@ -67,7 +67,7 @@ called with the results as follows:
 * **Scanner**: the block gets passed the results as they are scanned. See the
   example in the next section for details.
-* **Lexer**: after completion, the block gets passed the tokens one by one.
+* **Lexer**: the block gets passed the tokens one by one as they are scanned.
   _The result of the block is returned._
 * **Parser**: after completion, the block gets passed the root expression.
@@ -126,7 +126,7 @@ parts of the pattern:
 ```ruby
 Regexp::Scanner.scan(/(cat?([bhm]at)){3,5}/).map { |token| token[2] }
-#=> ["(", "cat", "?", "(", "[", "b", "h", "m", "]", "at", ")", ")", "{3,5}"]
+# => ["(", "cat", "?", "(", "[", "b", "h", "m", "]", "at", ")", ")", "{3,5}"]
 ```
@@ -248,7 +248,7 @@ by a quantifier that only applies to it.
 ```ruby
 Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
-#=> ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
+# => ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
 ```
 #### Notes
@@ -262,7 +262,7 @@ Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
 ### Parser
 Sits on top of the lexer and transforms the "stream" of Token objects emitted
 by it into a tree of Expression objects represented by an instance of the
-Expression::Root class.
+`Expression::Root` class.
 See the [Expression Objects](https://github.com/ammar/regexp_parser/wiki/Expression-Objects)
 wiki page for attributes and methods.
@@ -270,6 +270,34 @@ wiki page for attributes and methods.
 #### Example
+This example uses the tree traversal method `#each_expression`
+and the method `#strfregexp` to print each object in the tree.
+```ruby
+include_root  = true
+indent_offset = include_root ? 1 : 0
+tree.each_expression(include_root) do |exp|
+  puts exp.strfregexp("%>> %c", indent_offset)
+end
+# Output
+# > Regexp::Expression::Root
+#   > Regexp::Expression::Literal
+#   > Regexp::Expression::Group::Capture
+#     > Regexp::Expression::Literal
+#     > Regexp::Expression::Group::Capture
+#       > Regexp::Expression::Literal
+#     > Regexp::Expression::Literal
+#   > Regexp::Expression::Group::Named
+#     > Regexp::Expression::CharacterSet
+```
+_Note: quantifiers do not appear in the output because they are members of the
+Expression class. See the next section for details._
+Another example, using `#traverse` for a more fine-grained tree traversal:
 ```ruby
 require 'regexp_parser'
@@ -295,34 +323,9 @@ end
 # exit: group `(?<name>[0-9]+)`
 ```
-Another example, using each_expression and strfregexp to print the object tree.
 _See the traverse.rb and strfregexp.rb files under `lib/regexp_parser/expression/methods`
 for more information on these methods._
-```ruby
-include_root  = true
-indent_offset = include_root ? 1 : 0
-tree.each_expression(include_root) do |exp, level_index|
-  puts exp.strfregexp("%>> %c", indent_offset)
-end
-# Output
-# > Regexp::Expression::Root
-#   > Regexp::Expression::Literal
-#   > Regexp::Expression::Group::Capture
-#     > Regexp::Expression::Literal
-#     > Regexp::Expression::Group::Capture
-#       > Regexp::Expression::Literal
-#     > Regexp::Expression::Literal
-#   > Regexp::Expression::Group::Named
-#     > Regexp::Expression::CharacterSet
-```
-_Note: quantifiers do not appear in the output because they are members of the
-Expression class. See the next section for details._
 ---

data/lib/regexp_parser/expression/base.rb CHANGED Viewed

@@ -6,13 +6,6 @@ module Regexp::Expression
       init_from_token_and_options(token, options)
     end
-    def initialize_copy(orig)
-      self.text       = orig.text.dup         if orig.text
-      self.options    = orig.options.dup      if orig.options
-      self.quantifier = orig.quantifier.clone if orig.quantifier
-      super
-    end
     def to_re(format = :full)
       if set_level > 0
         warn "Calling #to_re on character set members is deprecated - "\

data/lib/regexp_parser/expression/classes/alternation.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Regexp::Expression
-  # A sequence of expressions, used by Alternation as one of its alternative.
+  # A sequence of expressions, used by Alternation as one of its alternatives.
   class Alternative < Regexp::Expression::Sequence; end
   class Alternation < Regexp::Expression::SequenceOperation

data/lib/regexp_parser/expression/classes/backreference.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 module Regexp::Expression
-  # TODO: unify name with token :backref, one way or the other, in v3.0.0
   module Backreference
     class Base < Regexp::Expression::Base
       attr_accessor :referenced_expression
@@ -27,7 +26,7 @@ module Regexp::Expression
       alias reference number
       def initialize(token, options = {})
-        @number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
+        @number = token.text[/-?\d+/].to_i
         super
       end
     end
@@ -70,4 +69,7 @@ module Regexp::Expression
       end
     end
   end
+  # alias for symmetry between token symbol and Expression class name
+  Backref = Backreference
 end

data/lib/regexp_parser/expression/classes/character_set/range.rb CHANGED Viewed

@@ -1,10 +1,9 @@
 module Regexp::Expression
   class CharacterSet < Regexp::Expression::Subexpression
     class Range < Regexp::Expression::Subexpression
-      def starts_at
-        expressions.first.starts_at
+      def ts
+        (head = expressions.first) ? head.ts : @ts
       end
-      alias :ts :starts_at
       def <<(exp)
         complete? and raise Regexp::Parser::Error,
@@ -15,10 +14,6 @@ module Regexp::Expression
       def complete?
         count == 2
       end
-      def parts
-        intersperse(expressions, text.dup)
-      end
     end
   end
 end

data/lib/regexp_parser/expression/classes/character_set.rb CHANGED Viewed

@@ -19,9 +19,8 @@ module Regexp::Expression
     def close
       self.closed = true
     end
-    def parts
-      ["#{text}#{'^' if negated?}", *expressions, ']']
-    end
   end
+  # alias for symmetry between token symbol and Expression class name
+  Set = CharacterSet
 end # module Regexp::Expression

data/lib/regexp_parser/expression/classes/conditional.rb CHANGED Viewed

@@ -31,9 +31,9 @@ module Regexp::Expression
         expressions.last << exp
       end
-      def add_sequence(active_opts = {})
+      def add_sequence(active_opts = {}, params = { ts: 0 })
         raise TooManyBranches.new if branches.length == 2
-        params = { conditional_level: conditional_level + 1 }
+        params = params.merge({ conditional_level: conditional_level + 1 })
         Branch.add_to(self, params, active_opts)
       end
       alias :branch :add_sequence
@@ -55,10 +55,6 @@ module Regexp::Expression
         condition.reference
       end
-      def parts
-        [text.dup, condition, *intersperse(branches, '|'), ')']
-      end
       def initialize_copy(orig)
         self.referenced_expression = orig.referenced_expression.dup
         super

data/lib/regexp_parser/expression/classes/escape_sequence.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 module Regexp::Expression
-  # TODO: unify naming with Token::Escape, one way or the other, in v3.0.0
   module EscapeSequence
     class Base < Regexp::Expression::Base
       def codepoint
@@ -97,4 +96,7 @@ module Regexp::Expression
       end
     end
   end
+  # alias for symmetry between Token::* and Expression::*
+  Escape = EscapeSequence
 end

data/lib/regexp_parser/expression/classes/free_space.rb CHANGED Viewed

@@ -5,10 +5,12 @@ module Regexp::Expression
     end
   end
-  class Comment < Regexp::Expression::FreeSpace; end
+  class Comment < Regexp::Expression::FreeSpace
+  end
   class WhiteSpace < Regexp::Expression::FreeSpace
     def merge(exp)
+      warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
       text << exp.text
     end
   end

data/lib/regexp_parser/expression/classes/group.rb CHANGED Viewed

@@ -1,13 +1,6 @@
 module Regexp::Expression
   module Group
     class Base < Regexp::Expression::Subexpression
-      def parts
-        [text.dup, *expressions, ')']
-      end
-      def capturing?; false end
-      def comment?; false end
     end
     class Passive < Group::Base
@@ -18,14 +11,6 @@ module Regexp::Expression
         super
       end
-      def parts
-        if implicit?
-          expressions
-        else
-          super
-        end
-      end
       def implicit?
         @implicit
       end
@@ -55,8 +40,6 @@ module Regexp::Expression
     class Capture < Group::Base
       attr_accessor :number, :number_at_level
       alias identifier number
-      def capturing?; true end
     end
     class Named < Group::Capture
@@ -75,11 +58,6 @@ module Regexp::Expression
     end
     class Comment < Group::Base
-      def parts
-        [text.dup]
-      end
-      def comment?; true end
     end
   end

data/lib/regexp_parser/expression/classes/posix_class.rb CHANGED Viewed

@@ -5,7 +5,11 @@ module Regexp::Expression
     end
     def name
-      token.to_s
+      text[/\w+/]
     end
   end
+  # alias for symmetry between token symbol and Expression class name
+  Posixclass    = PosixClass
+  Nonposixclass = PosixClass
 end

data/lib/regexp_parser/expression/classes/unicode_property.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 module Regexp::Expression
-  # TODO: unify name with token :property, one way or the other, in v3.0.0
   module UnicodeProperty
     class Base < Regexp::Expression::Base
       def negative?
@@ -11,7 +10,7 @@ module Regexp::Expression
       end
       def shortcut
-        (Regexp::Scanner.short_prop_map.rassoc(token.to_s) || []).first
+        Regexp::Scanner.short_prop_map.key(token.to_s)
       end
     end
@@ -116,4 +115,8 @@ module Regexp::Expression
     class Script  < UnicodeProperty::Base; end
     class Block   < UnicodeProperty::Base; end
   end
+  # alias for symmetry between token symbol and Expression class name
+  Property    = UnicodeProperty
+  Nonproperty = UnicodeProperty
 end # module Regexp::Expression

data/lib/regexp_parser/expression/methods/construct.rb CHANGED Viewed

@@ -25,11 +25,9 @@ module Regexp::Expression
       def token_class
         if self == Root || self < Sequence
           nil # no token class because these objects are Parser-generated
-        # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
-        elsif self == Alternation || self == CharacterType::Any
+        # TODO: synch exp class, token class & type names for this in v3.0.0
+        elsif self == CharacterType::Any
           Regexp::Syntax::Token::Meta
-        elsif self <= EscapeSequence::Base
-          Regexp::Syntax::Token::Escape
         else
           Regexp::Syntax::Token.const_get(name.split('::')[2])
         end

data/lib/regexp_parser/expression/methods/parts.rb ADDED Viewed

@@ -0,0 +1,23 @@
+module Regexp::Expression
+  module Shared
+    # default implementation
+    def parts
+      [text.dup]
+    end
+    private
+    def intersperse(expressions, separator)
+      expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
+    end
+  end
+  CharacterSet.class_eval            { def parts; ["#{text}#{'^' if negated?}", *expressions, ']']        end }
+  CharacterSet::Range.class_eval     { def parts; intersperse(expressions, text.dup)                      end }
+  Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
+  Group::Base.class_eval             { def parts; [text.dup, *expressions, ')']                           end }
+  Group::Passive.class_eval          { def parts; implicit? ? expressions : super                         end }
+  Group::Comment.class_eval          { def parts; [text.dup]                                              end }
+  Subexpression.class_eval           { def parts; expressions                                             end }
+  SequenceOperation.class_eval       { def parts; intersperse(expressions, text.dup)                      end }
+end

data/lib/regexp_parser/expression/methods/printing.rb ADDED Viewed

@@ -0,0 +1,26 @@
+module Regexp::Expression
+  module Shared
+    def inspect
+      [
+        "#<#{self.class}",
+        pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
+        ">"
+      ].join
+    end
+    # Make pretty-print work despite #inspect implementation.
+    def pretty_print(q)
+      q.pp_object(self)
+    end
+    # Called by pretty_print (ruby/pp) and #inspect.
+    def pretty_print_instance_variables
+      [
+        (:@text unless text.to_s.empty?),
+        (:@quantifier if quantified?),
+        (:@options unless options.empty?),
+        (:@expressions unless terminal?),
+      ].compact
+    end
+  end
+end

data/lib/regexp_parser/expression/methods/tests.rb CHANGED Viewed

@@ -95,12 +95,49 @@ module Regexp::Expression
     end
     # Deep-compare two expressions for equality.
+    #
+    # When changing the conditions, please make sure to update
+    # #pretty_print_instance_variables so that it includes all relevant values.
     def ==(other)
-      other.class == self.class &&
-        other.to_s == to_s &&
-        other.options == options
+      self.class   == other.class &&
+        text       == other.text &&
+        quantifier == other.quantifier &&
+        options    == other.options &&
+        (terminal? || expressions == other.expressions)
     end
     alias :=== :==
     alias :eql? :==
+    def optional?
+      quantified? && quantifier.min == 0
+    end
+    def quantified?
+      !quantifier.nil?
+    end
   end
+  Shared.class_eval                     { def terminal?; self.class.terminal? end }
+  Shared::ClassMethods.class_eval       { def terminal?; true  end }
+  Subexpression.instance_eval           { def terminal?; false end }
+  Shared.class_eval                     { def capturing?; self.class.capturing? end }
+  Shared::ClassMethods.class_eval       { def capturing?; false end }
+  Group::Capture.instance_eval          { def capturing?; true  end }
+  Shared.class_eval                     { def comment?; self.class.comment? end }
+  Shared::ClassMethods.class_eval       { def comment?; false end }
+  Comment.instance_eval                 { def comment?; true  end }
+  Group::Comment.instance_eval          { def comment?; true  end }
+  Shared.class_eval                     { def decorative?; self.class.decorative? end }
+  Shared::ClassMethods.class_eval       { def decorative?; false end }
+  FreeSpace.instance_eval               { def decorative?; true  end }
+  Group::Comment.instance_eval          { def decorative?; true  end }
+  Shared.class_eval                     { def referential?; self.class.referential? end }
+  Shared::ClassMethods.class_eval       { def referential?; false end }
+  Backreference::Base.instance_eval     { def referential?; true  end }
+  Conditional::Condition.instance_eval  { def referential?; true  end }
+  Conditional::Expression.instance_eval { def referential?; true  end }
 end

data/lib/regexp_parser/expression/methods/traverse.rb CHANGED Viewed

@@ -1,6 +1,22 @@
 module Regexp::Expression
   class Subexpression < Regexp::Expression::Base
+    # Traverses the expression, passing each recursive child to the
+    # given block.
+    # If the block takes two arguments, the indices of the children within
+    # their parents are also passed to it.
+    def each_expression(include_self = false, &block)
+      return enum_for(__method__, include_self) unless block
+      if block.arity == 1
+        block.call(self) if include_self
+        each_expression_without_index(&block)
+      else
+        block.call(self, 0) if include_self
+        each_expression_with_index(&block)
+      end
+    end
     # Traverses the subexpression (depth-first, pre-order) and calls the given
     # block for each expression with three arguments; the traversal event,
     # the expression, and the index of the expression within its parent.
@@ -34,31 +50,31 @@ module Regexp::Expression
     end
     alias :walk :traverse
-    # Iterates over the expressions of this expression as an array, passing
-    # the expression and its index within its parent to the given block.
-    def each_expression(include_self = false)
-      return enum_for(__method__, include_self) unless block_given?
-      traverse(include_self) do |event, exp, index|
-        yield(exp, index) unless event == :exit
-      end
-    end
     # Returns a new array with the results of calling the given block once
     # for every expression. If a block is not given, returns an array with
     # each expression and its level index as an array.
-    def flat_map(include_self = false)
-      result = []
+    def flat_map(include_self = false, &block)
+      case block && block.arity
+      when nil then each_expression(include_self).to_a
+      when 2   then each_expression(include_self).map(&block)
+      else          each_expression(include_self).map { |exp| block.call(exp) }
+      end
+    end
-      each_expression(include_self) do |exp, index|
-        if block_given?
-          result << yield(exp, index)
-        else
-          result << [exp, index]
-        end
+    protected
+    def each_expression_with_index(&block)
+      each_with_index do |exp, index|
+        block.call(exp, index)
+        exp.each_expression_with_index(&block) unless exp.terminal?
       end
+    end
-      result
+    def each_expression_without_index(&block)
+      each do |exp|
+        block.call(exp)
+        exp.each_expression_without_index(&block) unless exp.terminal?
+      end
     end
   end
 end