regexp_parser 2.7.0 → 2.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +5 -5
- data/LICENSE +1 -1
- data/lib/regexp_parser/expression/base.rb +0 -7
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/backreference.rb +4 -6
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
- data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -14
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
- data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
- data/lib/regexp_parser/expression/classes/group.rb +0 -22
- data/lib/regexp_parser/expression/classes/keep.rb +1 -1
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
- data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
- data/lib/regexp_parser/expression/methods/construct.rb +2 -4
- data/lib/regexp_parser/expression/methods/negative.rb +20 -0
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/tests.rb +40 -3
- data/lib/regexp_parser/expression/methods/traverse.rb +33 -20
- data/lib/regexp_parser/expression/quantifier.rb +30 -17
- data/lib/regexp_parser/expression/sequence.rb +5 -9
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +37 -24
- data/lib/regexp_parser/expression/subexpression.rb +20 -18
- data/lib/regexp_parser/expression.rb +34 -31
- data/lib/regexp_parser/lexer.rb +15 -7
- data/lib/regexp_parser/parser.rb +91 -91
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +29 -0
- data/lib/regexp_parser/scanner/properties/short.csv +3 -0
- data/lib/regexp_parser/scanner/property.rl +1 -1
- data/lib/regexp_parser/scanner/scanner.rl +44 -130
- data/lib/regexp_parser/scanner.rb +1096 -1297
- data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
- data/lib/regexp_parser/syntax/token/escape.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +9 -2
- data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +13 -13
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +6 -6
- data/regexp_parser.gemspec +5 -5
- metadata +14 -8
- data/CHANGELOG.md +0 -632
- data/README.md +0 -503
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c88d5bc178e9bf95a8a008d9d5e9d8cf1b4a8bb0d65310901a995daa448a28f4
|
4
|
+
data.tar.gz: 47c1ed4782981f5cc2a0bb7bd8f402e360cd60ebeba33615df0c94dd3842b48c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dc1bf229c259b762ea38f459f70a9a04e5ee08207fbae04bdf9045f9f2b1c0f0b6a716a3e08fda55ca0b769ef55f480f7f0e19f3412175fdc7a475362889ab3
|
7
|
+
data.tar.gz: 5de692c1cce8f2436936752d0cf6c5ea51d84bb9c63110dcc49621a476b47800300911952f4d4a687c81f151886bc5570b14af559d74b5196b63e13c684ab7c5
|
data/Gemfile
CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
-
gem '
|
7
|
-
gem 'rake', '~> 13.
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
6
|
+
gem 'leto', '~> 2.1'
|
7
|
+
gem 'rake', '~> 13.1'
|
8
|
+
gem 'regexp_property_values', '~> 1.5'
|
9
9
|
gem 'rspec', '~> 3.10'
|
10
10
|
if RUBY_VERSION.to_f >= 2.7
|
11
11
|
gem 'benchmark-ips', '~> 2.1'
|
12
|
-
gem 'gouteur'
|
13
|
-
gem 'rubocop', '~> 1.
|
12
|
+
gem 'gouteur', '~> 1.1'
|
13
|
+
gem 'rubocop', '~> 1.59'
|
14
14
|
end
|
15
15
|
end
|
data/LICENSE
CHANGED
@@ -6,13 +6,6 @@ module Regexp::Expression
|
|
6
6
|
init_from_token_and_options(token, options)
|
7
7
|
end
|
8
8
|
|
9
|
-
def initialize_copy(orig)
|
10
|
-
self.text = orig.text.dup if orig.text
|
11
|
-
self.options = orig.options.dup if orig.options
|
12
|
-
self.quantifier = orig.quantifier.clone if orig.quantifier
|
13
|
-
super
|
14
|
-
end
|
15
|
-
|
16
9
|
def to_re(format = :full)
|
17
10
|
if set_level > 0
|
18
11
|
warn "Calling #to_re on character set members is deprecated - "\
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# A sequence of expressions, used by Alternation as one of its
|
2
|
+
# A sequence of expressions, used by Alternation as one of its alternatives.
|
3
3
|
class Alternative < Regexp::Expression::Sequence; end
|
4
4
|
|
5
5
|
class Alternation < Regexp::Expression::SequenceOperation
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify name with token :backref, one way or the other, in v3.0.0
|
3
2
|
module Backreference
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
attr_accessor :referenced_expression
|
@@ -20,10 +19,6 @@ module Regexp::Expression
|
|
20
19
|
|
21
20
|
super
|
22
21
|
end
|
23
|
-
|
24
|
-
def referential?
|
25
|
-
true
|
26
|
-
end
|
27
22
|
end
|
28
23
|
|
29
24
|
class Number < Backreference::Base
|
@@ -31,7 +26,7 @@ module Regexp::Expression
|
|
31
26
|
alias reference number
|
32
27
|
|
33
28
|
def initialize(token, options = {})
|
34
|
-
@number = token.text[
|
29
|
+
@number = token.text[/-?\d+/].to_i
|
35
30
|
super
|
36
31
|
end
|
37
32
|
end
|
@@ -74,4 +69,7 @@ module Regexp::Expression
|
|
74
69
|
end
|
75
70
|
end
|
76
71
|
end
|
72
|
+
|
73
|
+
# alias for symmetry between token symbol and Expression class name
|
74
|
+
Backref = Backreference
|
77
75
|
end
|
@@ -1,10 +1,9 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
class Range < Regexp::Expression::Subexpression
|
4
|
-
def
|
5
|
-
expressions.first.
|
4
|
+
def ts
|
5
|
+
(head = expressions.first) ? head.ts : @ts
|
6
6
|
end
|
7
|
-
alias :ts :starts_at
|
8
7
|
|
9
8
|
def <<(exp)
|
10
9
|
complete? and raise Regexp::Parser::Error,
|
@@ -15,10 +14,6 @@ module Regexp::Expression
|
|
15
14
|
def complete?
|
16
15
|
count == 2
|
17
16
|
end
|
18
|
-
|
19
|
-
def parts
|
20
|
-
intersperse(expressions, text.dup)
|
21
|
-
end
|
22
17
|
end
|
23
18
|
end
|
24
19
|
end
|
@@ -1,10 +1,7 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
attr_accessor :closed, :negative
|
4
|
-
|
5
|
-
alias :negative? :negative
|
6
|
-
alias :negated? :negative
|
7
|
-
alias :closed? :closed
|
4
|
+
alias :closed? :closed
|
8
5
|
|
9
6
|
def initialize(token, options = {})
|
10
7
|
self.negative = false
|
@@ -19,9 +16,8 @@ module Regexp::Expression
|
|
19
16
|
def close
|
20
17
|
self.closed = true
|
21
18
|
end
|
22
|
-
|
23
|
-
def parts
|
24
|
-
["#{text}#{'^' if negated?}", *expressions, ']']
|
25
|
-
end
|
26
19
|
end
|
20
|
+
|
21
|
+
# alias for symmetry between token symbol and Expression class name
|
22
|
+
Set = CharacterSet
|
27
23
|
end # module Regexp::Expression
|
@@ -20,10 +20,6 @@ module Regexp::Expression
|
|
20
20
|
self.referenced_expression = orig.referenced_expression.dup
|
21
21
|
super
|
22
22
|
end
|
23
|
-
|
24
|
-
def referential?
|
25
|
-
true
|
26
|
-
end
|
27
23
|
end
|
28
24
|
|
29
25
|
class Branch < Regexp::Expression::Sequence; end
|
@@ -35,9 +31,9 @@ module Regexp::Expression
|
|
35
31
|
expressions.last << exp
|
36
32
|
end
|
37
33
|
|
38
|
-
def add_sequence(active_opts = {})
|
34
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
39
35
|
raise TooManyBranches.new if branches.length == 2
|
40
|
-
params = { conditional_level: conditional_level + 1 }
|
36
|
+
params = params.merge({ conditional_level: conditional_level + 1 })
|
41
37
|
Branch.add_to(self, params, active_opts)
|
42
38
|
end
|
43
39
|
alias :branch :add_sequence
|
@@ -59,14 +55,6 @@ module Regexp::Expression
|
|
59
55
|
condition.reference
|
60
56
|
end
|
61
57
|
|
62
|
-
def referential?
|
63
|
-
true
|
64
|
-
end
|
65
|
-
|
66
|
-
def parts
|
67
|
-
[text.dup, condition, *intersperse(branches, '|'), ')']
|
68
|
-
end
|
69
|
-
|
70
58
|
def initialize_copy(orig)
|
71
59
|
self.referenced_expression = orig.referenced_expression.dup
|
72
60
|
super
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify naming with Token::Escape, one way or the other, in v3.0.0
|
3
2
|
module EscapeSequence
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def codepoint
|
@@ -97,4 +96,7 @@ module Regexp::Expression
|
|
97
96
|
end
|
98
97
|
end
|
99
98
|
end
|
99
|
+
|
100
|
+
# alias for symmetry between Token::* and Expression::*
|
101
|
+
Escape = EscapeSequence
|
100
102
|
end
|
@@ -5,10 +5,12 @@ module Regexp::Expression
|
|
5
5
|
end
|
6
6
|
end
|
7
7
|
|
8
|
-
class Comment < Regexp::Expression::FreeSpace
|
8
|
+
class Comment < Regexp::Expression::FreeSpace
|
9
|
+
end
|
9
10
|
|
10
11
|
class WhiteSpace < Regexp::Expression::FreeSpace
|
11
12
|
def merge(exp)
|
13
|
+
warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
|
12
14
|
text << exp.text
|
13
15
|
end
|
14
16
|
end
|
@@ -1,13 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Group
|
3
3
|
class Base < Regexp::Expression::Subexpression
|
4
|
-
def parts
|
5
|
-
[text.dup, *expressions, ')']
|
6
|
-
end
|
7
|
-
|
8
|
-
def capturing?; false end
|
9
|
-
|
10
|
-
def comment?; false end
|
11
4
|
end
|
12
5
|
|
13
6
|
class Passive < Group::Base
|
@@ -18,14 +11,6 @@ module Regexp::Expression
|
|
18
11
|
super
|
19
12
|
end
|
20
13
|
|
21
|
-
def parts
|
22
|
-
if implicit?
|
23
|
-
expressions
|
24
|
-
else
|
25
|
-
super
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
14
|
def implicit?
|
30
15
|
@implicit
|
31
16
|
end
|
@@ -55,8 +40,6 @@ module Regexp::Expression
|
|
55
40
|
class Capture < Group::Base
|
56
41
|
attr_accessor :number, :number_at_level
|
57
42
|
alias identifier number
|
58
|
-
|
59
|
-
def capturing?; true end
|
60
43
|
end
|
61
44
|
|
62
45
|
class Named < Group::Capture
|
@@ -75,11 +58,6 @@ module Regexp::Expression
|
|
75
58
|
end
|
76
59
|
|
77
60
|
class Comment < Group::Base
|
78
|
-
def parts
|
79
|
-
[text.dup]
|
80
|
-
end
|
81
|
-
|
82
|
-
def comment?; true end
|
83
61
|
end
|
84
62
|
end
|
85
63
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Keep
|
3
|
-
#
|
3
|
+
# TODO: in regexp_parser v3.0.0 this should possibly be a Subexpression
|
4
4
|
# that contains all expressions to its left.
|
5
5
|
class Mark < Regexp::Expression::Base; end
|
6
6
|
end
|
@@ -1,11 +1,11 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class PosixClass < Regexp::Expression::Base
|
3
|
-
def negative?
|
4
|
-
type == :nonposixclass
|
5
|
-
end
|
6
|
-
|
7
3
|
def name
|
8
|
-
|
4
|
+
text[/\w+/]
|
9
5
|
end
|
10
6
|
end
|
7
|
+
|
8
|
+
# alias for symmetry between token symbol and Expression class name
|
9
|
+
Posixclass = PosixClass
|
10
|
+
Nonposixclass = PosixClass
|
11
11
|
end
|
@@ -1,17 +1,12 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify name with token :property, one way or the other, in v3.0.0
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
|
-
def negative?
|
6
|
-
type == :nonproperty
|
7
|
-
end
|
8
|
-
|
9
4
|
def name
|
10
5
|
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
11
6
|
end
|
12
7
|
|
13
8
|
def shortcut
|
14
|
-
|
9
|
+
Regexp::Scanner.short_prop_map.key(token.to_s)
|
15
10
|
end
|
16
11
|
end
|
17
12
|
|
@@ -110,10 +105,15 @@ module Regexp::Expression
|
|
110
105
|
class Unassigned < Codepoint::Base; end
|
111
106
|
end
|
112
107
|
|
113
|
-
class Age
|
114
|
-
class
|
115
|
-
class
|
116
|
-
class
|
117
|
-
class
|
108
|
+
class Age < UnicodeProperty::Base; end
|
109
|
+
class Block < UnicodeProperty::Base; end
|
110
|
+
class Derived < UnicodeProperty::Base; end
|
111
|
+
class Emoji < UnicodeProperty::Base; end
|
112
|
+
class Enumerated < UnicodeProperty::Base; end
|
113
|
+
class Script < UnicodeProperty::Base; end
|
118
114
|
end
|
115
|
+
|
116
|
+
# alias for symmetry between token symbol and Expression class name
|
117
|
+
Property = UnicodeProperty
|
118
|
+
Nonproperty = UnicodeProperty
|
119
119
|
end # module Regexp::Expression
|
@@ -25,11 +25,9 @@ module Regexp::Expression
|
|
25
25
|
def token_class
|
26
26
|
if self == Root || self < Sequence
|
27
27
|
nil # no token class because these objects are Parser-generated
|
28
|
-
# TODO: synch exp
|
29
|
-
elsif self ==
|
28
|
+
# TODO: synch exp class, token class & type names for this in v3.0.0
|
29
|
+
elsif self == CharacterType::Any
|
30
30
|
Regexp::Syntax::Token::Meta
|
31
|
-
elsif self <= EscapeSequence::Base
|
32
|
-
Regexp::Syntax::Token::Escape
|
33
31
|
else
|
34
32
|
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
35
33
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def negative?
|
4
|
+
false
|
5
|
+
end
|
6
|
+
|
7
|
+
# not an alias so as to respect overrides of #negative?
|
8
|
+
def negated?
|
9
|
+
negative?
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
Anchor::NonWordBoundary.class_eval { def negative?; true end }
|
14
|
+
Assertion::NegativeLookahead.class_eval { def negative?; true end }
|
15
|
+
Assertion::NegativeLookbehind.class_eval { def negative?; true end }
|
16
|
+
CharacterSet.class_eval { def negative?; negative end }
|
17
|
+
CharacterType::Base.class_eval { def negative?; token.to_s.start_with?('non') end }
|
18
|
+
PosixClass.class_eval { def negative?; type == :nonposixclass end }
|
19
|
+
UnicodeProperty::Base.class_eval { def negative?; type == :nonproperty end }
|
20
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
# default implementation
|
4
|
+
def parts
|
5
|
+
[text.dup]
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def intersperse(expressions, separator)
|
11
|
+
expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
|
16
|
+
CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
|
17
|
+
Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
|
18
|
+
Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
|
19
|
+
Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
|
20
|
+
Group::Comment.class_eval { def parts; [text.dup] end }
|
21
|
+
Subexpression.class_eval { def parts; expressions end }
|
22
|
+
SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
|
23
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def inspect
|
4
|
+
[
|
5
|
+
"#<#{self.class}",
|
6
|
+
pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
|
7
|
+
">"
|
8
|
+
].join
|
9
|
+
end
|
10
|
+
|
11
|
+
# Make pretty-print work despite #inspect implementation.
|
12
|
+
def pretty_print(q)
|
13
|
+
q.pp_object(self)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Called by pretty_print (ruby/pp) and #inspect.
|
17
|
+
def pretty_print_instance_variables
|
18
|
+
[
|
19
|
+
(:@text unless text.to_s.empty?),
|
20
|
+
(:@quantifier if quantified?),
|
21
|
+
(:@options unless options.empty?),
|
22
|
+
(:@expressions unless terminal?),
|
23
|
+
].compact
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -95,12 +95,49 @@ module Regexp::Expression
|
|
95
95
|
end
|
96
96
|
|
97
97
|
# Deep-compare two expressions for equality.
|
98
|
+
#
|
99
|
+
# When changing the conditions, please make sure to update
|
100
|
+
# #pretty_print_instance_variables so that it includes all relevant values.
|
98
101
|
def ==(other)
|
99
|
-
|
100
|
-
other.
|
101
|
-
other.
|
102
|
+
self.class == other.class &&
|
103
|
+
text == other.text &&
|
104
|
+
quantifier == other.quantifier &&
|
105
|
+
options == other.options &&
|
106
|
+
(terminal? || expressions == other.expressions)
|
102
107
|
end
|
103
108
|
alias :=== :==
|
104
109
|
alias :eql? :==
|
110
|
+
|
111
|
+
def optional?
|
112
|
+
quantified? && quantifier.min == 0
|
113
|
+
end
|
114
|
+
|
115
|
+
def quantified?
|
116
|
+
!quantifier.nil?
|
117
|
+
end
|
105
118
|
end
|
119
|
+
|
120
|
+
Shared.class_eval { def terminal?; self.class.terminal? end }
|
121
|
+
Shared::ClassMethods.class_eval { def terminal?; true end }
|
122
|
+
Subexpression.instance_eval { def terminal?; false end }
|
123
|
+
|
124
|
+
Shared.class_eval { def capturing?; self.class.capturing? end }
|
125
|
+
Shared::ClassMethods.class_eval { def capturing?; false end }
|
126
|
+
Group::Capture.instance_eval { def capturing?; true end }
|
127
|
+
|
128
|
+
Shared.class_eval { def comment?; self.class.comment? end }
|
129
|
+
Shared::ClassMethods.class_eval { def comment?; false end }
|
130
|
+
Comment.instance_eval { def comment?; true end }
|
131
|
+
Group::Comment.instance_eval { def comment?; true end }
|
132
|
+
|
133
|
+
Shared.class_eval { def decorative?; self.class.decorative? end }
|
134
|
+
Shared::ClassMethods.class_eval { def decorative?; false end }
|
135
|
+
FreeSpace.instance_eval { def decorative?; true end }
|
136
|
+
Group::Comment.instance_eval { def decorative?; true end }
|
137
|
+
|
138
|
+
Shared.class_eval { def referential?; self.class.referential? end }
|
139
|
+
Shared::ClassMethods.class_eval { def referential?; false end }
|
140
|
+
Backreference::Base.instance_eval { def referential?; true end }
|
141
|
+
Conditional::Condition.instance_eval { def referential?; true end }
|
142
|
+
Conditional::Expression.instance_eval { def referential?; true end }
|
106
143
|
end
|
@@ -1,6 +1,22 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Subexpression < Regexp::Expression::Base
|
3
3
|
|
4
|
+
# Traverses the expression, passing each recursive child to the
|
5
|
+
# given block.
|
6
|
+
# If the block takes two arguments, the indices of the children within
|
7
|
+
# their parents are also passed to it.
|
8
|
+
def each_expression(include_self = false, &block)
|
9
|
+
return enum_for(__method__, include_self) unless block
|
10
|
+
|
11
|
+
if block.arity == 1
|
12
|
+
block.call(self) if include_self
|
13
|
+
each_expression_without_index(&block)
|
14
|
+
else
|
15
|
+
block.call(self, 0) if include_self
|
16
|
+
each_expression_with_index(&block)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
4
20
|
# Traverses the subexpression (depth-first, pre-order) and calls the given
|
5
21
|
# block for each expression with three arguments; the traversal event,
|
6
22
|
# the expression, and the index of the expression within its parent.
|
@@ -34,34 +50,31 @@ module Regexp::Expression
|
|
34
50
|
end
|
35
51
|
alias :walk :traverse
|
36
52
|
|
37
|
-
#
|
38
|
-
#
|
39
|
-
|
40
|
-
|
53
|
+
# Returns a new array with the results of calling the given block once
|
54
|
+
# for every expression. If a block is not given, returns an array with
|
55
|
+
# each expression and its level index as an array.
|
56
|
+
def flat_map(include_self = false, &block)
|
57
|
+
case block && block.arity
|
58
|
+
when nil then each_expression(include_self).to_a
|
59
|
+
when 2 then each_expression(include_self).map(&block)
|
60
|
+
else each_expression(include_self).map { |exp| block.call(exp) }
|
61
|
+
end
|
62
|
+
end
|
41
63
|
|
42
|
-
|
64
|
+
protected
|
43
65
|
|
66
|
+
def each_expression_with_index(&block)
|
44
67
|
each_with_index do |exp, index|
|
45
68
|
block.call(exp, index)
|
46
|
-
exp.
|
69
|
+
exp.each_expression_with_index(&block) unless exp.terminal?
|
47
70
|
end
|
48
71
|
end
|
49
72
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
result = []
|
55
|
-
|
56
|
-
each_expression(include_self) do |exp, index|
|
57
|
-
if block_given?
|
58
|
-
result << yield(exp, index)
|
59
|
-
else
|
60
|
-
result << [exp, index]
|
61
|
-
end
|
73
|
+
def each_expression_without_index(&block)
|
74
|
+
each do |exp|
|
75
|
+
block.call(exp)
|
76
|
+
exp.each_expression_without_index(&block) unless exp.terminal?
|
62
77
|
end
|
63
|
-
|
64
|
-
result
|
65
78
|
end
|
66
79
|
end
|
67
80
|
end
|
@@ -8,14 +8,10 @@ module Regexp::Expression
|
|
8
8
|
|
9
9
|
MODES = %i[greedy possessive reluctant]
|
10
10
|
|
11
|
-
attr_reader :min, :max, :mode
|
12
|
-
|
13
11
|
def initialize(*args)
|
14
12
|
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
15
13
|
|
16
14
|
init_from_token_and_options(*args)
|
17
|
-
@mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
|
18
|
-
@min, @max = minmax
|
19
15
|
# TODO: remove in v3.0.0, stop removing parts of #token (?)
|
20
16
|
self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
|
21
17
|
end
|
@@ -39,9 +35,21 @@ module Regexp::Expression
|
|
39
35
|
end
|
40
36
|
alias :lazy? :reluctant?
|
41
37
|
|
38
|
+
def min
|
39
|
+
derived_data[:min]
|
40
|
+
end
|
41
|
+
|
42
|
+
def max
|
43
|
+
derived_data[:max]
|
44
|
+
end
|
45
|
+
|
46
|
+
def mode
|
47
|
+
derived_data[:mode]
|
48
|
+
end
|
49
|
+
|
42
50
|
private
|
43
51
|
|
44
|
-
def deprecated_old_init(token, text,
|
52
|
+
def deprecated_old_init(token, text, _min, _max, _mode = :greedy)
|
45
53
|
warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
|
46
54
|
"is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
|
47
55
|
"Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
|
@@ -51,20 +59,25 @@ module Regexp::Expression
|
|
51
59
|
"This is consistent with how Expression::Base instances are created. "
|
52
60
|
@token = token
|
53
61
|
@text = text
|
54
|
-
@min = min
|
55
|
-
@max = max
|
56
|
-
@mode = mode
|
57
62
|
end
|
58
63
|
|
59
|
-
def
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
64
|
+
def derived_data
|
65
|
+
@derived_data ||= begin
|
66
|
+
min, max =
|
67
|
+
case text[0]
|
68
|
+
when '?'; [0, 1]
|
69
|
+
when '*'; [0, -1]
|
70
|
+
when '+'; [1, -1]
|
71
|
+
else
|
72
|
+
int_min = text[/\{(\d*)/, 1]
|
73
|
+
int_max = text[/,?(\d*)\}/, 1]
|
74
|
+
[int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
|
75
|
+
end
|
76
|
+
|
77
|
+
mod = text[/.([?+])/, 1]
|
78
|
+
mode = (mod == '?' && :reluctant) || (mod == '+' && :possessive) || :greedy
|
79
|
+
|
80
|
+
{ min: min, max: max, mode: mode }
|
68
81
|
end
|
69
82
|
end
|
70
83
|
end
|
@@ -12,6 +12,7 @@ module Regexp::Expression
|
|
12
12
|
level: exp.level,
|
13
13
|
set_level: exp.set_level,
|
14
14
|
conditional_level: params[:conditional_level] || exp.conditional_level,
|
15
|
+
ts: params[:ts],
|
15
16
|
)
|
16
17
|
sequence.options = active_opts
|
17
18
|
exp.expressions << sequence
|
@@ -19,17 +20,12 @@ module Regexp::Expression
|
|
19
20
|
end
|
20
21
|
end
|
21
22
|
|
22
|
-
def
|
23
|
-
expressions.first.
|
23
|
+
def ts
|
24
|
+
(head = expressions.first) ? head.ts : @ts
|
24
25
|
end
|
25
|
-
alias :ts :starts_at
|
26
26
|
|
27
|
-
def quantify(*args)
|
28
|
-
|
29
|
-
target or raise Regexp::Parser::Error,
|
30
|
-
"No valid target found for '#{text}' quantifier"
|
31
|
-
|
32
|
-
target.quantify(*args)
|
27
|
+
def quantify(token, *args)
|
28
|
+
extract_quantifier_target(token.text).quantify(token, *args)
|
33
29
|
end
|
34
30
|
end
|
35
31
|
end
|