RubyGems - regexp_parser - Versions diffs - 2.7.0 → 2.8.1 - Mend

regexp_parser 2.7.0 → 2.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +62 -3
data/Gemfile +3 -3
data/LICENSE +1 -1
data/README.md +33 -30
data/lib/regexp_parser/expression/base.rb +0 -7
data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
data/lib/regexp_parser/expression/classes/backreference.rb +4 -6
data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
data/lib/regexp_parser/expression/classes/character_set.rb +3 -4
data/lib/regexp_parser/expression/classes/conditional.rb +2 -14
data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
data/lib/regexp_parser/expression/classes/group.rb +0 -22
data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
data/lib/regexp_parser/expression/classes/unicode_property.rb +5 -2
data/lib/regexp_parser/expression/methods/construct.rb +2 -4
data/lib/regexp_parser/expression/methods/parts.rb +23 -0
data/lib/regexp_parser/expression/methods/printing.rb +26 -0
data/lib/regexp_parser/expression/methods/tests.rb +40 -3
data/lib/regexp_parser/expression/methods/traverse.rb +33 -20
data/lib/regexp_parser/expression/quantifier.rb +30 -17
data/lib/regexp_parser/expression/sequence.rb +5 -9
data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
data/lib/regexp_parser/expression/shared.rb +37 -24
data/lib/regexp_parser/expression/subexpression.rb +20 -18
data/lib/regexp_parser/expression.rb +2 -0
data/lib/regexp_parser/lexer.rb +15 -7
data/lib/regexp_parser/parser.rb +85 -86
data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
data/lib/regexp_parser/scanner/properties/long.csv +11 -0
data/lib/regexp_parser/scanner/properties/short.csv +2 -0
data/lib/regexp_parser/scanner/property.rl +1 -1
data/lib/regexp_parser/scanner/scanner.rl +35 -129
data/lib/regexp_parser/scanner.rb +1084 -1303
data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
data/lib/regexp_parser/syntax/token/escape.rb +3 -1
data/lib/regexp_parser/syntax/token/meta.rb +9 -2
data/lib/regexp_parser/syntax/token/unicode_property.rb +17 -1
data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
data/lib/regexp_parser/version.rb +1 -1
metadata +9 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 04af46818e9d560362fea9b3fd24802b557ac145ed95f6e02580dd7cf5e8ddfc
-  data.tar.gz: 75b7d30241f48ddf90c8cd68228fa928904ab6055ea755f4bdcf28361e645a4b
+  metadata.gz: e1426faee272654c45e3da8e262e94cfdbcf134dbad7804aed8cd945334c07be
+  data.tar.gz: 37eec721839fe2ebfc25c9d614756289b59ee766f5e7e60ecf4839b554bbb93e
 SHA512:
-  metadata.gz: 407025a9b14af76463260fca2a48f9fef4ab863e3dddf3f7f54101c1348611afa49d9973e850d9e1c84d6e5faf8f1a9d3d2da5dceaefe8dc4fefe7069ecd9280
-  data.tar.gz: 9f3d2eb4264318511a82e9034c4c4a8a8e73e67e427945f0c9f745fd37b2f2f0ae8e30ba942f0920da3109b59436a5518dfc5e2f7669317de0214a0deb6f0e07
+  metadata.gz: abed9d7f387634b5e16eb19cbfd5d9aab03288dd4d284b1c52688f958714479783275c5418ee623607ced96b301124ab82dff546e7e4146c7c5ec7feae3e089d
+  data.tar.gz: 62c0757df1c73df52fcf71ef8de666ab9a51a4a8145e71321424ab0ff8408cb2b707cf154dae64ebbcc5a9c8a12ee377a3eadab7549432a9d0e6ee0e65afddd1

data/CHANGELOG.md CHANGED Viewed

@@ -5,14 +5,73 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [Unreleased]
+## [2.8.1] - 2023-06-10 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Fixed
+- support for extpict unicode property, added in Ruby 2.6
+- support for 10 unicode script/block properties added in Ruby 3.2
+## [2.8.0] - 2023-04-17 - [Janosch Müller](mailto:janosch84@gmail.com)
+### Added
+- `Regexp::Expression::Shared#ends_at`
+  * e.g. `parse(/a +/x)[0].ends_at # => 3`
+  * e.g. `parse(/a +/x)[0].ends_at(include_quantifier = false) # => 1`
+- `Regexp::Expression::Shared#{capturing?,comment?}`
+  * previously only available on capturing and comment groups
+- `Regexp::Expression::Shared#{decorative?}`
+  * true for decorations: comment groups as well as comments and whitespace in x-mode
+- `Regexp::Expression::Shared#parent`
+- new format argument `:original` for `Regexp::Expression::Base#to_s`
+  * includes decorative elements between node and its quantifier
+  * e.g. `parse(/a (?#comment) +/x)[0].to_s(:original) # => "a (?#comment) +"`
+  * using it is not needed when calling `Root#to_s` as Root can't be quantified
+- support calling `Subexpression#{each_expression,flat_map}` with a one-argument block
+  * in this case, only the expressions are passed to the block, no indices
+- support calling test methods at Expression class level
+  - `capturing?`, `comment?`, `decorative?`, `referential?`, `terminal?`
+  - e.g. `Regexp::Expression::CharacterSet.terminal? # => false`
+### Fixed
+- `Regexp::Expression::Shared#full_length` with whitespace before quantifier
+  * e.g. `parse(/a +/x)[0].full_length` used to yield `2`, now it yields `3`
+- `Subexpression#to_s` output with children with whitespace before their quantifier
+  * e.g. `parse(/a + /x).to_s` used to yield `"a+  "`, now it yields `"a + "`
+  * calling `#to_s` on sub-nodes still omits such decorative interludes by default
+    - use new `#to_s` format `:original` to include it
+    - e.g. `parse(/a + /x)[0].to_s(:original) # => "a +"`
+- fixed `Subexpression#te` behaving differently from other expressions
+  * only `Subexpression#te` used to include the quantifier
+  * now `#te` is the end index without quantifier, as for other expressions
+- fixed `NoMethodError` when calling `#starts_at` or `#ts` on empty sequences
+  * e.g. `Regexp::Parser.parse(/|/)[0].starts_at`
+  * e.g. `Regexp::Parser.parse(/[&&]/)[0][0].starts_at`
+- fixed nested comment groups breaking local x-options
+  * e.g. in `/(?x:(?#hello)) /`, the x-option wrongly applied to the whitespace
+- fixed nested comment groups breaking conditionals
+  * e.g. in `/(a)(?(1)b|c(?#hello)d)e/`, the 2nd conditional branch included "e"
+- fixed quantifiers after comment groups being mis-assigned to that group
+  * e.g. in `/a(?#foo){3}/` (matches 'aaa')
+- fixed Scanner accepting two cases of invalid Regexp syntax
+  * unmatched closing parentheses (`)`) and k-backrefs with number 0 (`\k<0>`)
+  * these are a `SyntaxError` in Ruby, so could only be passed as a String
+  * they now raise a `Regexp::Scanner::ScannerError`
+- fixed some scanner errors not inheriting from `Regexp::Scanner::ScannerError`
+- reduced verbosity of inspect / pretty print output
 ## [2.7.0] - 2023-02-08 - [Janosch Müller](mailto:janosch84@gmail.com)
 ### Added
 - `Regexp::Lexer.lex` now streams tokens when called with a block
-  - it can now take arbitrarily large input, just like `Regexp::Scanner`
-  - this also slightly improves `Regexp::Parser.parse` performance
-  - note: `Regexp::Parser.parse` still does not and will not support streaming
+  * it can now take arbitrarily large input, just like `Regexp::Scanner`
+  * this also slightly improves `Regexp::Parser.parse` performance
+  * note: `Regexp::Parser.parse` still does not and will not support streaming
 - improved performance of `Subexpression#each_expression`
 - minor improvements to `Regexp::Scanner` performance
 - overall improvement of parse performance: about 10% for large Regexps

data/Gemfile CHANGED Viewed

@@ -3,13 +3,13 @@ source 'https://rubygems.org'
 gemspec
 group :development, :test do
-  gem 'ice_nine', '~> 0.11.2'
+  gem 'leto', '~> 2.0'
   gem 'rake', '~> 13.0'
-  gem 'regexp_property_values', '~> 1.3'
+  gem 'regexp_property_values', '~> 1.4'
   gem 'rspec', '~> 3.10'
   if RUBY_VERSION.to_f >= 2.7
     gem 'benchmark-ips', '~> 2.1'
-    gem 'gouteur'
+    gem 'gouteur', '~> 1.1'
     gem 'rubocop', '~> 1.7'
   end
 end

data/LICENSE CHANGED Viewed

@@ -1,4 +1,4 @@
-Copyright (c) 2010, 2012-2022,  Ammar Ali
+Copyright (c) 2010, 2012-2023,  Ammar Ali
 Permission is hereby granted, free of charge, to any person
 obtaining a copy of this software and associated documentation

data/README.md CHANGED Viewed

@@ -67,7 +67,7 @@ called with the results as follows:
 * **Scanner**: the block gets passed the results as they are scanned. See the
   example in the next section for details.
-* **Lexer**: after completion, the block gets passed the tokens one by one.
+* **Lexer**: the block gets passed the tokens one by one as they are scanned.
   _The result of the block is returned._
 * **Parser**: after completion, the block gets passed the root expression.
@@ -126,7 +126,7 @@ parts of the pattern:
 ```ruby
 Regexp::Scanner.scan(/(cat?([bhm]at)){3,5}/).map { |token| token[2] }
-#=> ["(", "cat", "?", "(", "[", "b", "h", "m", "]", "at", ")", ")", "{3,5}"]
+# => ["(", "cat", "?", "(", "[", "b", "h", "m", "]", "at", ")", ")", "{3,5}"]
 ```
@@ -248,7 +248,7 @@ by a quantifier that only applies to it.
 ```ruby
 Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
-#=> ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
+# => ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
 ```
 #### Notes
@@ -262,7 +262,7 @@ Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
 ### Parser
 Sits on top of the lexer and transforms the "stream" of Token objects emitted
 by it into a tree of Expression objects represented by an instance of the
-Expression::Root class.
+`Expression::Root` class.
 See the [Expression Objects](https://github.com/ammar/regexp_parser/wiki/Expression-Objects)
 wiki page for attributes and methods.
@@ -270,6 +270,34 @@ wiki page for attributes and methods.
 #### Example
+This example uses the tree traversal method `#each_expression`
+and the method `#strfregexp` to print each object in the tree.
+```ruby
+include_root  = true
+indent_offset = include_root ? 1 : 0
+tree.each_expression(include_root) do |exp|
+  puts exp.strfregexp("%>> %c", indent_offset)
+end
+# Output
+# > Regexp::Expression::Root
+#   > Regexp::Expression::Literal
+#   > Regexp::Expression::Group::Capture
+#     > Regexp::Expression::Literal
+#     > Regexp::Expression::Group::Capture
+#       > Regexp::Expression::Literal
+#     > Regexp::Expression::Literal
+#   > Regexp::Expression::Group::Named
+#     > Regexp::Expression::CharacterSet
+```
+_Note: quantifiers do not appear in the output because they are members of the
+Expression class. See the next section for details._
+Another example, using `#traverse` for a more fine-grained tree traversal:
 ```ruby
 require 'regexp_parser'
@@ -295,34 +323,9 @@ end
 # exit: group `(?<name>[0-9]+)`
 ```
-Another example, using each_expression and strfregexp to print the object tree.
 _See the traverse.rb and strfregexp.rb files under `lib/regexp_parser/expression/methods`
 for more information on these methods._
-```ruby
-include_root  = true
-indent_offset = include_root ? 1 : 0
-tree.each_expression(include_root) do |exp, level_index|
-  puts exp.strfregexp("%>> %c", indent_offset)
-end
-# Output
-# > Regexp::Expression::Root
-#   > Regexp::Expression::Literal
-#   > Regexp::Expression::Group::Capture
-#     > Regexp::Expression::Literal
-#     > Regexp::Expression::Group::Capture
-#       > Regexp::Expression::Literal
-#     > Regexp::Expression::Literal
-#   > Regexp::Expression::Group::Named
-#     > Regexp::Expression::CharacterSet
-```
-_Note: quantifiers do not appear in the output because they are members of the
-Expression class. See the next section for details._
 ---
@@ -500,4 +503,4 @@ Documentation and books used while working on this project.
 ---
 ##### Copyright
-_Copyright (c) 2010-2022 Ammar Ali. See LICENSE file for details._
+_Copyright (c) 2010-2023 Ammar Ali. See LICENSE file for details._

data/lib/regexp_parser/expression/base.rb CHANGED Viewed

@@ -6,13 +6,6 @@ module Regexp::Expression
       init_from_token_and_options(token, options)
     end
-    def initialize_copy(orig)
-      self.text       = orig.text.dup         if orig.text
-      self.options    = orig.options.dup      if orig.options
-      self.quantifier = orig.quantifier.clone if orig.quantifier
-      super
-    end
     def to_re(format = :full)
       if set_level > 0
         warn "Calling #to_re on character set members is deprecated - "\

data/lib/regexp_parser/expression/classes/alternation.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Regexp::Expression
-  # A sequence of expressions, used by Alternation as one of its alternative.
+  # A sequence of expressions, used by Alternation as one of its alternatives.
   class Alternative < Regexp::Expression::Sequence; end
   class Alternation < Regexp::Expression::SequenceOperation

data/lib/regexp_parser/expression/classes/backreference.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 module Regexp::Expression
-  # TODO: unify name with token :backref, one way or the other, in v3.0.0
   module Backreference
     class Base < Regexp::Expression::Base
       attr_accessor :referenced_expression
@@ -20,10 +19,6 @@ module Regexp::Expression
         super
       end
-      def referential?
-        true
-      end
     end
     class Number < Backreference::Base
@@ -31,7 +26,7 @@ module Regexp::Expression
       alias reference number
       def initialize(token, options = {})
-        @number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
+        @number = token.text[/-?\d+/].to_i
         super
       end
     end
@@ -74,4 +69,7 @@ module Regexp::Expression
       end
     end
   end
+  # alias for symmetry between token symbol and Expression class name
+  Backref = Backreference
 end

data/lib/regexp_parser/expression/classes/character_set/range.rb CHANGED Viewed

@@ -1,10 +1,9 @@
 module Regexp::Expression
   class CharacterSet < Regexp::Expression::Subexpression
     class Range < Regexp::Expression::Subexpression
-      def starts_at
-        expressions.first.starts_at
+      def ts
+        (head = expressions.first) ? head.ts : @ts
       end
-      alias :ts :starts_at
       def <<(exp)
         complete? and raise Regexp::Parser::Error,
@@ -15,10 +14,6 @@ module Regexp::Expression
       def complete?
         count == 2
       end
-      def parts
-        intersperse(expressions, text.dup)
-      end
     end
   end
 end

data/lib/regexp_parser/expression/classes/character_set.rb CHANGED Viewed

@@ -19,9 +19,8 @@ module Regexp::Expression
     def close
       self.closed = true
     end
-    def parts
-      ["#{text}#{'^' if negated?}", *expressions, ']']
-    end
   end
+  # alias for symmetry between token symbol and Expression class name
+  Set = CharacterSet
 end # module Regexp::Expression

data/lib/regexp_parser/expression/classes/conditional.rb CHANGED Viewed

@@ -20,10 +20,6 @@ module Regexp::Expression
         self.referenced_expression = orig.referenced_expression.dup
         super
       end
-      def referential?
-        true
-      end
     end
     class Branch < Regexp::Expression::Sequence; end
@@ -35,9 +31,9 @@ module Regexp::Expression
         expressions.last << exp
       end
-      def add_sequence(active_opts = {})
+      def add_sequence(active_opts = {}, params = { ts: 0 })
         raise TooManyBranches.new if branches.length == 2
-        params = { conditional_level: conditional_level + 1 }
+        params = params.merge({ conditional_level: conditional_level + 1 })
         Branch.add_to(self, params, active_opts)
       end
       alias :branch :add_sequence
@@ -59,14 +55,6 @@ module Regexp::Expression
         condition.reference
       end
-      def referential?
-        true
-      end
-      def parts
-        [text.dup, condition, *intersperse(branches, '|'), ')']
-      end
       def initialize_copy(orig)
         self.referenced_expression = orig.referenced_expression.dup
         super

data/lib/regexp_parser/expression/classes/escape_sequence.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 module Regexp::Expression
-  # TODO: unify naming with Token::Escape, one way or the other, in v3.0.0
   module EscapeSequence
     class Base < Regexp::Expression::Base
       def codepoint
@@ -97,4 +96,7 @@ module Regexp::Expression
       end
     end
   end
+  # alias for symmetry between Token::* and Expression::*
+  Escape = EscapeSequence
 end

data/lib/regexp_parser/expression/classes/free_space.rb CHANGED Viewed

@@ -5,10 +5,12 @@ module Regexp::Expression
     end
   end
-  class Comment < Regexp::Expression::FreeSpace; end
+  class Comment < Regexp::Expression::FreeSpace
+  end
   class WhiteSpace < Regexp::Expression::FreeSpace
     def merge(exp)
+      warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
       text << exp.text
     end
   end

data/lib/regexp_parser/expression/classes/group.rb CHANGED Viewed

@@ -1,13 +1,6 @@
 module Regexp::Expression
   module Group
     class Base < Regexp::Expression::Subexpression
-      def parts
-        [text.dup, *expressions, ')']
-      end
-      def capturing?; false end
-      def comment?; false end
     end
     class Passive < Group::Base
@@ -18,14 +11,6 @@ module Regexp::Expression
         super
       end
-      def parts
-        if implicit?
-          expressions
-        else
-          super
-        end
-      end
       def implicit?
         @implicit
       end
@@ -55,8 +40,6 @@ module Regexp::Expression
     class Capture < Group::Base
       attr_accessor :number, :number_at_level
       alias identifier number
-      def capturing?; true end
     end
     class Named < Group::Capture
@@ -75,11 +58,6 @@ module Regexp::Expression
     end
     class Comment < Group::Base
-      def parts
-        [text.dup]
-      end
-      def comment?; true end
     end
   end

data/lib/regexp_parser/expression/classes/posix_class.rb CHANGED Viewed

@@ -5,7 +5,11 @@ module Regexp::Expression
     end
     def name
-      token.to_s
+      text[/\w+/]
     end
   end
+  # alias for symmetry between token symbol and Expression class name
+  Posixclass    = PosixClass
+  Nonposixclass = PosixClass
 end

data/lib/regexp_parser/expression/classes/unicode_property.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 module Regexp::Expression
-  # TODO: unify name with token :property, one way or the other, in v3.0.0
   module UnicodeProperty
     class Base < Regexp::Expression::Base
       def negative?
@@ -11,7 +10,7 @@ module Regexp::Expression
       end
       def shortcut
-        (Regexp::Scanner.short_prop_map.rassoc(token.to_s) || []).first
+        Regexp::Scanner.short_prop_map.key(token.to_s)
       end
     end
@@ -116,4 +115,8 @@ module Regexp::Expression
     class Script  < UnicodeProperty::Base; end
     class Block   < UnicodeProperty::Base; end
   end
+  # alias for symmetry between token symbol and Expression class name
+  Property    = UnicodeProperty
+  Nonproperty = UnicodeProperty
 end # module Regexp::Expression

data/lib/regexp_parser/expression/methods/construct.rb CHANGED Viewed

@@ -25,11 +25,9 @@ module Regexp::Expression
       def token_class
         if self == Root || self < Sequence
           nil # no token class because these objects are Parser-generated
-        # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
-        elsif self == Alternation || self == CharacterType::Any
+        # TODO: synch exp class, token class & type names for this in v3.0.0
+        elsif self == CharacterType::Any
           Regexp::Syntax::Token::Meta
-        elsif self <= EscapeSequence::Base
-          Regexp::Syntax::Token::Escape
         else
           Regexp::Syntax::Token.const_get(name.split('::')[2])
         end

data/lib/regexp_parser/expression/methods/parts.rb ADDED Viewed

@@ -0,0 +1,23 @@
+module Regexp::Expression
+  module Shared
+    # default implementation
+    def parts
+      [text.dup]
+    end
+    private
+    def intersperse(expressions, separator)
+      expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
+    end
+  end
+  CharacterSet.class_eval            { def parts; ["#{text}#{'^' if negated?}", *expressions, ']']        end }
+  CharacterSet::Range.class_eval     { def parts; intersperse(expressions, text.dup)                      end }
+  Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
+  Group::Base.class_eval             { def parts; [text.dup, *expressions, ')']                           end }
+  Group::Passive.class_eval          { def parts; implicit? ? expressions : super                         end }
+  Group::Comment.class_eval          { def parts; [text.dup]                                              end }
+  Subexpression.class_eval           { def parts; expressions                                             end }
+  SequenceOperation.class_eval       { def parts; intersperse(expressions, text.dup)                      end }
+end

data/lib/regexp_parser/expression/methods/printing.rb ADDED Viewed

@@ -0,0 +1,26 @@
+module Regexp::Expression
+  module Shared
+    def inspect
+      [
+        "#<#{self.class}",
+        pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
+        ">"
+      ].join
+    end
+    # Make pretty-print work despite #inspect implementation.
+    def pretty_print(q)
+      q.pp_object(self)
+    end
+    # Called by pretty_print (ruby/pp) and #inspect.
+    def pretty_print_instance_variables
+      [
+        (:@text unless text.to_s.empty?),
+        (:@quantifier if quantified?),
+        (:@options unless options.empty?),
+        (:@expressions unless terminal?),
+      ].compact
+    end
+  end
+end

data/lib/regexp_parser/expression/methods/tests.rb CHANGED Viewed

@@ -95,12 +95,49 @@ module Regexp::Expression
     end
     # Deep-compare two expressions for equality.
+    #
+    # When changing the conditions, please make sure to update
+    # #pretty_print_instance_variables so that it includes all relevant values.
     def ==(other)
-      other.class == self.class &&
-        other.to_s == to_s &&
-        other.options == options
+      self.class   == other.class &&
+        text       == other.text &&
+        quantifier == other.quantifier &&
+        options    == other.options &&
+        (terminal? || expressions == other.expressions)
     end
     alias :=== :==
     alias :eql? :==
+    def optional?
+      quantified? && quantifier.min == 0
+    end
+    def quantified?
+      !quantifier.nil?
+    end
   end
+  Shared.class_eval                     { def terminal?; self.class.terminal? end }
+  Shared::ClassMethods.class_eval       { def terminal?; true  end }
+  Subexpression.instance_eval           { def terminal?; false end }
+  Shared.class_eval                     { def capturing?; self.class.capturing? end }
+  Shared::ClassMethods.class_eval       { def capturing?; false end }
+  Group::Capture.instance_eval          { def capturing?; true  end }
+  Shared.class_eval                     { def comment?; self.class.comment? end }
+  Shared::ClassMethods.class_eval       { def comment?; false end }
+  Comment.instance_eval                 { def comment?; true  end }
+  Group::Comment.instance_eval          { def comment?; true  end }
+  Shared.class_eval                     { def decorative?; self.class.decorative? end }
+  Shared::ClassMethods.class_eval       { def decorative?; false end }
+  FreeSpace.instance_eval               { def decorative?; true  end }
+  Group::Comment.instance_eval          { def decorative?; true  end }
+  Shared.class_eval                     { def referential?; self.class.referential? end }
+  Shared::ClassMethods.class_eval       { def referential?; false end }
+  Backreference::Base.instance_eval     { def referential?; true  end }
+  Conditional::Condition.instance_eval  { def referential?; true  end }
+  Conditional::Expression.instance_eval { def referential?; true  end }
 end

data/lib/regexp_parser/expression/methods/traverse.rb CHANGED Viewed

@@ -1,6 +1,22 @@
 module Regexp::Expression
   class Subexpression < Regexp::Expression::Base
+    # Traverses the expression, passing each recursive child to the
+    # given block.
+    # If the block takes two arguments, the indices of the children within
+    # their parents are also passed to it.
+    def each_expression(include_self = false, &block)
+      return enum_for(__method__, include_self) unless block
+      if block.arity == 1
+        block.call(self) if include_self
+        each_expression_without_index(&block)
+      else
+        block.call(self, 0) if include_self
+        each_expression_with_index(&block)
+      end
+    end
     # Traverses the subexpression (depth-first, pre-order) and calls the given
     # block for each expression with three arguments; the traversal event,
     # the expression, and the index of the expression within its parent.
@@ -34,34 +50,31 @@ module Regexp::Expression
     end
     alias :walk :traverse
-    # Iterates over the expressions of this expression as an array, passing
-    # the expression and its index within its parent to the given block.
-    def each_expression(include_self = false, &block)
-      return enum_for(__method__, include_self) unless block_given?
+    # Returns a new array with the results of calling the given block once
+    # for every expression. If a block is not given, returns an array with
+    # each expression and its level index as an array.
+    def flat_map(include_self = false, &block)
+      case block && block.arity
+      when nil then each_expression(include_self).to_a
+      when 2   then each_expression(include_self).map(&block)
+      else          each_expression(include_self).map { |exp| block.call(exp) }
+      end
+    end
-      block.call(self, 0) if include_self
+    protected
+    def each_expression_with_index(&block)
       each_with_index do |exp, index|
         block.call(exp, index)
-        exp.each_expression(&block) unless exp.terminal?
+        exp.each_expression_with_index(&block) unless exp.terminal?
       end
     end
-    # Returns a new array with the results of calling the given block once
-    # for every expression. If a block is not given, returns an array with
-    # each expression and its level index as an array.
-    def flat_map(include_self = false)
-      result = []
-      each_expression(include_self) do |exp, index|
-        if block_given?
-          result << yield(exp, index)
-        else
-          result << [exp, index]
-        end
+    def each_expression_without_index(&block)
+      each do |exp|
+        block.call(exp)
+        exp.each_expression_without_index(&block) unless exp.terminal?
       end
-      result
     end
   end
 end