regexp_parser 2.6.0 → 2.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -5
  3. data/LICENSE +1 -1
  4. data/lib/regexp_parser/expression/base.rb +0 -7
  5. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  6. data/lib/regexp_parser/expression/classes/backreference.rb +17 -3
  7. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
  8. data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
  9. data/lib/regexp_parser/expression/classes/conditional.rb +2 -6
  10. data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
  11. data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
  12. data/lib/regexp_parser/expression/classes/group.rb +0 -22
  13. data/lib/regexp_parser/expression/classes/keep.rb +1 -1
  14. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
  15. data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
  16. data/lib/regexp_parser/expression/methods/construct.rb +2 -4
  17. data/lib/regexp_parser/expression/methods/match_length.rb +8 -4
  18. data/lib/regexp_parser/expression/methods/negative.rb +20 -0
  19. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  20. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  21. data/lib/regexp_parser/expression/methods/tests.rb +40 -3
  22. data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
  23. data/lib/regexp_parser/expression/quantifier.rb +30 -17
  24. data/lib/regexp_parser/expression/sequence.rb +5 -10
  25. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  26. data/lib/regexp_parser/expression/shared.rb +37 -20
  27. data/lib/regexp_parser/expression/subexpression.rb +20 -15
  28. data/lib/regexp_parser/expression.rb +34 -31
  29. data/lib/regexp_parser/lexer.rb +76 -36
  30. data/lib/regexp_parser/parser.rb +101 -100
  31. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  32. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  33. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  34. data/lib/regexp_parser/scanner/properties/long.csv +29 -0
  35. data/lib/regexp_parser/scanner/properties/short.csv +3 -0
  36. data/lib/regexp_parser/scanner/property.rl +2 -2
  37. data/lib/regexp_parser/scanner/scanner.rl +101 -172
  38. data/lib/regexp_parser/scanner.rb +1132 -1283
  39. data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
  40. data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
  41. data/lib/regexp_parser/syntax/token/escape.rb +3 -1
  42. data/lib/regexp_parser/syntax/token/meta.rb +9 -2
  43. data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
  44. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  45. data/lib/regexp_parser/syntax/token.rb +13 -13
  46. data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
  47. data/lib/regexp_parser/syntax/versions.rb +3 -1
  48. data/lib/regexp_parser/syntax.rb +1 -1
  49. data/lib/regexp_parser/version.rb +1 -1
  50. data/lib/regexp_parser.rb +6 -6
  51. data/regexp_parser.gemspec +5 -5
  52. metadata +14 -8
  53. data/CHANGELOG.md +0 -601
  54. data/README.md +0 -503
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cadf1761e17469c6bf76db652a4f6fc97a3d33b7eaa46e6ea16f95ee6661743d
4
- data.tar.gz: 3d6252f67f201b3cb6a3b94721c65b39abfe7b13bf0097fc9144498f6fdf8837
3
+ metadata.gz: c88d5bc178e9bf95a8a008d9d5e9d8cf1b4a8bb0d65310901a995daa448a28f4
4
+ data.tar.gz: 47c1ed4782981f5cc2a0bb7bd8f402e360cd60ebeba33615df0c94dd3842b48c
5
5
  SHA512:
6
- metadata.gz: 3fb24f56b5d8da354aa5825dc2e9432c7e8bd836c9c2a7009c8883e367fb8ca61020a04854c714cacff913281b1156b4663334696edcb1d7e9239d8c8184d439
7
- data.tar.gz: e793b72a9394e26bf0b9e6cb58c7536b72c30562382713f8b60735969f3b3b9b3aea78bf45efa661397d7141c2684a6df2b32cc8b449c413ea9d11c90c5396db
6
+ metadata.gz: 5dc1bf229c259b762ea38f459f70a9a04e5ee08207fbae04bdf9045f9f2b1c0f0b6a716a3e08fda55ca0b769ef55f480f7f0e19f3412175fdc7a475362889ab3
7
+ data.tar.gz: 5de692c1cce8f2436936752d0cf6c5ea51d84bb9c63110dcc49621a476b47800300911952f4d4a687c81f151886bc5570b14af559d74b5196b63e13c684ab7c5
data/Gemfile CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
3
3
  gemspec
4
4
 
5
5
  group :development, :test do
6
- gem 'ice_nine', '~> 0.11.2'
7
- gem 'rake', '~> 13.0'
8
- gem 'regexp_property_values', '~> 1.3'
6
+ gem 'leto', '~> 2.1'
7
+ gem 'rake', '~> 13.1'
8
+ gem 'regexp_property_values', '~> 1.5'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
11
  gem 'benchmark-ips', '~> 2.1'
12
- gem 'gouteur'
13
- gem 'rubocop', '~> 1.7'
12
+ gem 'gouteur', '~> 1.1'
13
+ gem 'rubocop', '~> 1.59'
14
14
  end
15
15
  end
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2022, Ammar Ali
1
+ Copyright (c) 2010, 2012-2024, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation
@@ -6,13 +6,6 @@ module Regexp::Expression
6
6
  init_from_token_and_options(token, options)
7
7
  end
8
8
 
9
- def initialize_copy(orig)
10
- self.text = orig.text.dup if orig.text
11
- self.options = orig.options.dup if orig.options
12
- self.quantifier = orig.quantifier.clone if orig.quantifier
13
- super
14
- end
15
-
16
9
  def to_re(format = :full)
17
10
  if set_level > 0
18
11
  warn "Calling #to_re on character set members is deprecated - "\
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- # A sequence of expressions, used by Alternation as one of its alternative.
2
+ # A sequence of expressions, used by Alternation as one of its alternatives.
3
3
  class Alternative < Regexp::Expression::Sequence; end
4
4
 
5
5
  class Alternation < Regexp::Expression::SequenceOperation
@@ -1,11 +1,22 @@
1
1
  module Regexp::Expression
2
- # TODO: unify name with token :backref, one way or the other, in v3.0.0
3
2
  module Backreference
4
3
  class Base < Regexp::Expression::Base
5
4
  attr_accessor :referenced_expression
6
5
 
7
6
  def initialize_copy(orig)
8
- self.referenced_expression = orig.referenced_expression.dup
7
+ exp_id = [self.class, self.starts_at]
8
+
9
+ # prevent infinite recursion for recursive subexp calls
10
+ copied = @@copied ||= {}
11
+ self.referenced_expression =
12
+ if copied[exp_id]
13
+ orig.referenced_expression
14
+ else
15
+ copied[exp_id] = true
16
+ orig.referenced_expression.dup
17
+ end
18
+ copied.clear
19
+
9
20
  super
10
21
  end
11
22
  end
@@ -15,7 +26,7 @@ module Regexp::Expression
15
26
  alias reference number
16
27
 
17
28
  def initialize(token, options = {})
18
- @number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
29
+ @number = token.text[/-?\d+/].to_i
19
30
  super
20
31
  end
21
32
  end
@@ -58,4 +69,7 @@ module Regexp::Expression
58
69
  end
59
70
  end
60
71
  end
72
+
73
+ # alias for symmetry between token symbol and Expression class name
74
+ Backref = Backreference
61
75
  end
@@ -1,10 +1,9 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  class Range < Regexp::Expression::Subexpression
4
- def starts_at
5
- expressions.first.starts_at
4
+ def ts
5
+ (head = expressions.first) ? head.ts : @ts
6
6
  end
7
- alias :ts :starts_at
8
7
 
9
8
  def <<(exp)
10
9
  complete? and raise Regexp::Parser::Error,
@@ -15,10 +14,6 @@ module Regexp::Expression
15
14
  def complete?
16
15
  count == 2
17
16
  end
18
-
19
- def parts
20
- intersperse(expressions, text.dup)
21
- end
22
17
  end
23
18
  end
24
19
  end
@@ -1,10 +1,7 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  attr_accessor :closed, :negative
4
-
5
- alias :negative? :negative
6
- alias :negated? :negative
7
- alias :closed? :closed
4
+ alias :closed? :closed
8
5
 
9
6
  def initialize(token, options = {})
10
7
  self.negative = false
@@ -19,9 +16,8 @@ module Regexp::Expression
19
16
  def close
20
17
  self.closed = true
21
18
  end
22
-
23
- def parts
24
- ["#{text}#{'^' if negated?}", *expressions, ']']
25
- end
26
19
  end
20
+
21
+ # alias for symmetry between token symbol and Expression class name
22
+ Set = CharacterSet
27
23
  end # module Regexp::Expression
@@ -31,9 +31,9 @@ module Regexp::Expression
31
31
  expressions.last << exp
32
32
  end
33
33
 
34
- def add_sequence(active_opts = {})
34
+ def add_sequence(active_opts = {}, params = { ts: 0 })
35
35
  raise TooManyBranches.new if branches.length == 2
36
- params = { conditional_level: conditional_level + 1 }
36
+ params = params.merge({ conditional_level: conditional_level + 1 })
37
37
  Branch.add_to(self, params, active_opts)
38
38
  end
39
39
  alias :branch :add_sequence
@@ -55,10 +55,6 @@ module Regexp::Expression
55
55
  condition.reference
56
56
  end
57
57
 
58
- def parts
59
- [text.dup, condition, *intersperse(branches, '|'), ')']
60
- end
61
-
62
58
  def initialize_copy(orig)
63
59
  self.referenced_expression = orig.referenced_expression.dup
64
60
  super
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
- # TODO: unify naming with Token::Escape, one way or the other, in v3.0.0
3
2
  module EscapeSequence
4
3
  class Base < Regexp::Expression::Base
5
4
  def codepoint
@@ -97,4 +96,7 @@ module Regexp::Expression
97
96
  end
98
97
  end
99
98
  end
99
+
100
+ # alias for symmetry between Token::* and Expression::*
101
+ Escape = EscapeSequence
100
102
  end
@@ -5,10 +5,12 @@ module Regexp::Expression
5
5
  end
6
6
  end
7
7
 
8
- class Comment < Regexp::Expression::FreeSpace; end
8
+ class Comment < Regexp::Expression::FreeSpace
9
+ end
9
10
 
10
11
  class WhiteSpace < Regexp::Expression::FreeSpace
11
12
  def merge(exp)
13
+ warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
12
14
  text << exp.text
13
15
  end
14
16
  end
@@ -1,13 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Group
3
3
  class Base < Regexp::Expression::Subexpression
4
- def parts
5
- [text.dup, *expressions, ')']
6
- end
7
-
8
- def capturing?; false end
9
-
10
- def comment?; false end
11
4
  end
12
5
 
13
6
  class Passive < Group::Base
@@ -18,14 +11,6 @@ module Regexp::Expression
18
11
  super
19
12
  end
20
13
 
21
- def parts
22
- if implicit?
23
- expressions
24
- else
25
- super
26
- end
27
- end
28
-
29
14
  def implicit?
30
15
  @implicit
31
16
  end
@@ -55,8 +40,6 @@ module Regexp::Expression
55
40
  class Capture < Group::Base
56
41
  attr_accessor :number, :number_at_level
57
42
  alias identifier number
58
-
59
- def capturing?; true end
60
43
  end
61
44
 
62
45
  class Named < Group::Capture
@@ -75,11 +58,6 @@ module Regexp::Expression
75
58
  end
76
59
 
77
60
  class Comment < Group::Base
78
- def parts
79
- [text.dup]
80
- end
81
-
82
- def comment?; true end
83
61
  end
84
62
  end
85
63
 
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Keep
3
- # TOOD: in regexp_parser v3.0.0 this should possibly be a Subexpression
3
+ # TODO: in regexp_parser v3.0.0 this should possibly be a Subexpression
4
4
  # that contains all expressions to its left.
5
5
  class Mark < Regexp::Expression::Base; end
6
6
  end
@@ -1,11 +1,11 @@
1
1
  module Regexp::Expression
2
2
  class PosixClass < Regexp::Expression::Base
3
- def negative?
4
- type == :nonposixclass
5
- end
6
-
7
3
  def name
8
- token.to_s
4
+ text[/\w+/]
9
5
  end
10
6
  end
7
+
8
+ # alias for symmetry between token symbol and Expression class name
9
+ Posixclass = PosixClass
10
+ Nonposixclass = PosixClass
11
11
  end
@@ -1,17 +1,12 @@
1
1
  module Regexp::Expression
2
- # TODO: unify name with token :property, one way or the other, in v3.0.0
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
- def negative?
6
- type == :nonproperty
7
- end
8
-
9
4
  def name
10
5
  text[/\A\\[pP]\{([^}]+)\}\z/, 1]
11
6
  end
12
7
 
13
8
  def shortcut
14
- (Regexp::Scanner.short_prop_map.rassoc(token.to_s) || []).first
9
+ Regexp::Scanner.short_prop_map.key(token.to_s)
15
10
  end
16
11
  end
17
12
 
@@ -110,10 +105,15 @@ module Regexp::Expression
110
105
  class Unassigned < Codepoint::Base; end
111
106
  end
112
107
 
113
- class Age < UnicodeProperty::Base; end
114
- class Derived < UnicodeProperty::Base; end
115
- class Emoji < UnicodeProperty::Base; end
116
- class Script < UnicodeProperty::Base; end
117
- class Block < UnicodeProperty::Base; end
108
+ class Age < UnicodeProperty::Base; end
109
+ class Block < UnicodeProperty::Base; end
110
+ class Derived < UnicodeProperty::Base; end
111
+ class Emoji < UnicodeProperty::Base; end
112
+ class Enumerated < UnicodeProperty::Base; end
113
+ class Script < UnicodeProperty::Base; end
118
114
  end
115
+
116
+ # alias for symmetry between token symbol and Expression class name
117
+ Property = UnicodeProperty
118
+ Nonproperty = UnicodeProperty
119
119
  end # module Regexp::Expression
@@ -25,11 +25,9 @@ module Regexp::Expression
25
25
  def token_class
26
26
  if self == Root || self < Sequence
27
27
  nil # no token class because these objects are Parser-generated
28
- # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
29
- elsif self == Alternation || self == CharacterType::Any
28
+ # TODO: synch exp class, token class & type names for this in v3.0.0
29
+ elsif self == CharacterType::Any
30
30
  Regexp::Syntax::Token::Meta
31
- elsif self <= EscapeSequence::Base
32
- Regexp::Syntax::Token::Escape
33
31
  else
34
32
  Regexp::Syntax::Token.const_get(name.split('::')[2])
35
33
  end
@@ -63,16 +63,20 @@ class Regexp::MatchLength
63
63
  end
64
64
 
65
65
  def to_re
66
- "(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}"
66
+ /(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
67
67
  end
68
68
 
69
69
  private
70
70
 
71
71
  attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
72
72
 
73
- def test_regexp
74
- @test_regexp ||= Regexp.new("^#{to_re}$").tap do |regexp|
75
- regexp.respond_to?(:match?) || def regexp.match?(str); !!match(str) end
73
+ if Regexp.method_defined?(:match?) # ruby >= 2.4
74
+ def test_regexp
75
+ @test_regexp ||= /^#{to_re}$/
76
+ end
77
+ else
78
+ def test_regexp
79
+ @test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
76
80
  end
77
81
  end
78
82
  end
@@ -0,0 +1,20 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ def negative?
4
+ false
5
+ end
6
+
7
+ # not an alias so as to respect overrides of #negative?
8
+ def negated?
9
+ negative?
10
+ end
11
+ end
12
+
13
+ Anchor::NonWordBoundary.class_eval { def negative?; true end }
14
+ Assertion::NegativeLookahead.class_eval { def negative?; true end }
15
+ Assertion::NegativeLookbehind.class_eval { def negative?; true end }
16
+ CharacterSet.class_eval { def negative?; negative end }
17
+ CharacterType::Base.class_eval { def negative?; token.to_s.start_with?('non') end }
18
+ PosixClass.class_eval { def negative?; type == :nonposixclass end }
19
+ UnicodeProperty::Base.class_eval { def negative?; type == :nonproperty end }
20
+ end
@@ -0,0 +1,23 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ # default implementation
4
+ def parts
5
+ [text.dup]
6
+ end
7
+
8
+ private
9
+
10
+ def intersperse(expressions, separator)
11
+ expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
12
+ end
13
+ end
14
+
15
+ CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
16
+ CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
17
+ Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
18
+ Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
19
+ Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
20
+ Group::Comment.class_eval { def parts; [text.dup] end }
21
+ Subexpression.class_eval { def parts; expressions end }
22
+ SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
23
+ end
@@ -0,0 +1,26 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ def inspect
4
+ [
5
+ "#<#{self.class}",
6
+ pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
7
+ ">"
8
+ ].join
9
+ end
10
+
11
+ # Make pretty-print work despite #inspect implementation.
12
+ def pretty_print(q)
13
+ q.pp_object(self)
14
+ end
15
+
16
+ # Called by pretty_print (ruby/pp) and #inspect.
17
+ def pretty_print_instance_variables
18
+ [
19
+ (:@text unless text.to_s.empty?),
20
+ (:@quantifier if quantified?),
21
+ (:@options unless options.empty?),
22
+ (:@expressions unless terminal?),
23
+ ].compact
24
+ end
25
+ end
26
+ end
@@ -95,12 +95,49 @@ module Regexp::Expression
95
95
  end
96
96
 
97
97
  # Deep-compare two expressions for equality.
98
+ #
99
+ # When changing the conditions, please make sure to update
100
+ # #pretty_print_instance_variables so that it includes all relevant values.
98
101
  def ==(other)
99
- other.class == self.class &&
100
- other.to_s == to_s &&
101
- other.options == options
102
+ self.class == other.class &&
103
+ text == other.text &&
104
+ quantifier == other.quantifier &&
105
+ options == other.options &&
106
+ (terminal? || expressions == other.expressions)
102
107
  end
103
108
  alias :=== :==
104
109
  alias :eql? :==
110
+
111
+ def optional?
112
+ quantified? && quantifier.min == 0
113
+ end
114
+
115
+ def quantified?
116
+ !quantifier.nil?
117
+ end
105
118
  end
119
+
120
+ Shared.class_eval { def terminal?; self.class.terminal? end }
121
+ Shared::ClassMethods.class_eval { def terminal?; true end }
122
+ Subexpression.instance_eval { def terminal?; false end }
123
+
124
+ Shared.class_eval { def capturing?; self.class.capturing? end }
125
+ Shared::ClassMethods.class_eval { def capturing?; false end }
126
+ Group::Capture.instance_eval { def capturing?; true end }
127
+
128
+ Shared.class_eval { def comment?; self.class.comment? end }
129
+ Shared::ClassMethods.class_eval { def comment?; false end }
130
+ Comment.instance_eval { def comment?; true end }
131
+ Group::Comment.instance_eval { def comment?; true end }
132
+
133
+ Shared.class_eval { def decorative?; self.class.decorative? end }
134
+ Shared::ClassMethods.class_eval { def decorative?; false end }
135
+ FreeSpace.instance_eval { def decorative?; true end }
136
+ Group::Comment.instance_eval { def decorative?; true end }
137
+
138
+ Shared.class_eval { def referential?; self.class.referential? end }
139
+ Shared::ClassMethods.class_eval { def referential?; false end }
140
+ Backreference::Base.instance_eval { def referential?; true end }
141
+ Conditional::Condition.instance_eval { def referential?; true end }
142
+ Conditional::Expression.instance_eval { def referential?; true end }
106
143
  end
@@ -1,6 +1,22 @@
1
1
  module Regexp::Expression
2
2
  class Subexpression < Regexp::Expression::Base
3
3
 
4
+ # Traverses the expression, passing each recursive child to the
5
+ # given block.
6
+ # If the block takes two arguments, the indices of the children within
7
+ # their parents are also passed to it.
8
+ def each_expression(include_self = false, &block)
9
+ return enum_for(__method__, include_self) unless block
10
+
11
+ if block.arity == 1
12
+ block.call(self) if include_self
13
+ each_expression_without_index(&block)
14
+ else
15
+ block.call(self, 0) if include_self
16
+ each_expression_with_index(&block)
17
+ end
18
+ end
19
+
4
20
  # Traverses the subexpression (depth-first, pre-order) and calls the given
5
21
  # block for each expression with three arguments; the traversal event,
6
22
  # the expression, and the index of the expression within its parent.
@@ -34,31 +50,31 @@ module Regexp::Expression
34
50
  end
35
51
  alias :walk :traverse
36
52
 
37
- # Iterates over the expressions of this expression as an array, passing
38
- # the expression and its index within its parent to the given block.
39
- def each_expression(include_self = false)
40
- return enum_for(__method__, include_self) unless block_given?
41
-
42
- traverse(include_self) do |event, exp, index|
43
- yield(exp, index) unless event == :exit
44
- end
45
- end
46
-
47
53
  # Returns a new array with the results of calling the given block once
48
54
  # for every expression. If a block is not given, returns an array with
49
55
  # each expression and its level index as an array.
50
- def flat_map(include_self = false)
51
- result = []
56
+ def flat_map(include_self = false, &block)
57
+ case block && block.arity
58
+ when nil then each_expression(include_self).to_a
59
+ when 2 then each_expression(include_self).map(&block)
60
+ else each_expression(include_self).map { |exp| block.call(exp) }
61
+ end
62
+ end
52
63
 
53
- each_expression(include_self) do |exp, index|
54
- if block_given?
55
- result << yield(exp, index)
56
- else
57
- result << [exp, index]
58
- end
64
+ protected
65
+
66
+ def each_expression_with_index(&block)
67
+ each_with_index do |exp, index|
68
+ block.call(exp, index)
69
+ exp.each_expression_with_index(&block) unless exp.terminal?
59
70
  end
71
+ end
60
72
 
61
- result
73
+ def each_expression_without_index(&block)
74
+ each do |exp|
75
+ block.call(exp)
76
+ exp.each_expression_without_index(&block) unless exp.terminal?
77
+ end
62
78
  end
63
79
  end
64
80
  end
@@ -8,14 +8,10 @@ module Regexp::Expression
8
8
 
9
9
  MODES = %i[greedy possessive reluctant]
10
10
 
11
- attr_reader :min, :max, :mode
12
-
13
11
  def initialize(*args)
14
12
  deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
15
13
 
16
14
  init_from_token_and_options(*args)
17
- @mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
18
- @min, @max = minmax
19
15
  # TODO: remove in v3.0.0, stop removing parts of #token (?)
20
16
  self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
21
17
  end
@@ -39,9 +35,21 @@ module Regexp::Expression
39
35
  end
40
36
  alias :lazy? :reluctant?
41
37
 
38
+ def min
39
+ derived_data[:min]
40
+ end
41
+
42
+ def max
43
+ derived_data[:max]
44
+ end
45
+
46
+ def mode
47
+ derived_data[:mode]
48
+ end
49
+
42
50
  private
43
51
 
44
- def deprecated_old_init(token, text, min, max, mode = :greedy)
52
+ def deprecated_old_init(token, text, _min, _max, _mode = :greedy)
45
53
  warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
46
54
  "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
47
55
  "Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
@@ -51,20 +59,25 @@ module Regexp::Expression
51
59
  "This is consistent with how Expression::Base instances are created. "
52
60
  @token = token
53
61
  @text = text
54
- @min = min
55
- @max = max
56
- @mode = mode
57
62
  end
58
63
 
59
- def minmax
60
- case token
61
- when /zero_or_one/ then [0, 1]
62
- when /zero_or_more/ then [0, -1]
63
- when /one_or_more/ then [1, -1]
64
- when :interval
65
- int_min = text[/\{(\d*)/, 1]
66
- int_max = text[/,?(\d*)\}/, 1]
67
- [int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
64
+ def derived_data
65
+ @derived_data ||= begin
66
+ min, max =
67
+ case text[0]
68
+ when '?'; [0, 1]
69
+ when '*'; [0, -1]
70
+ when '+'; [1, -1]
71
+ else
72
+ int_min = text[/\{(\d*)/, 1]
73
+ int_max = text[/,?(\d*)\}/, 1]
74
+ [int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
75
+ end
76
+
77
+ mod = text[/.([?+])/, 1]
78
+ mode = (mod == '?' && :reluctant) || (mod == '+' && :possessive) || :greedy
79
+
80
+ { min: min, max: max, mode: mode }
68
81
  end
69
82
  end
70
83
  end
@@ -12,25 +12,20 @@ module Regexp::Expression
12
12
  level: exp.level,
13
13
  set_level: exp.set_level,
14
14
  conditional_level: params[:conditional_level] || exp.conditional_level,
15
+ ts: params[:ts],
15
16
  )
16
- sequence.nesting_level = exp.nesting_level + 1
17
17
  sequence.options = active_opts
18
18
  exp.expressions << sequence
19
19
  sequence
20
20
  end
21
21
  end
22
22
 
23
- def starts_at
24
- expressions.first.starts_at
23
+ def ts
24
+ (head = expressions.first) ? head.ts : @ts
25
25
  end
26
- alias :ts :starts_at
27
26
 
28
- def quantify(*args)
29
- target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
30
- target or raise Regexp::Parser::Error,
31
- "No valid target found for '#{text}' quantifier"
32
-
33
- target.quantify(*args)
27
+ def quantify(token, *args)
28
+ extract_quantifier_target(token.text).quantify(token, *args)
34
29
  end
35
30
  end
36
31
  end