regexp_parser 2.6.0 → 2.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +5 -5
- data/LICENSE +1 -1
- data/lib/regexp_parser/expression/base.rb +0 -7
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/backreference.rb +17 -3
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
- data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -6
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
- data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
- data/lib/regexp_parser/expression/classes/group.rb +0 -22
- data/lib/regexp_parser/expression/classes/keep.rb +1 -1
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
- data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
- data/lib/regexp_parser/expression/methods/construct.rb +2 -4
- data/lib/regexp_parser/expression/methods/match_length.rb +8 -4
- data/lib/regexp_parser/expression/methods/negative.rb +20 -0
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/tests.rb +40 -3
- data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
- data/lib/regexp_parser/expression/quantifier.rb +30 -17
- data/lib/regexp_parser/expression/sequence.rb +5 -10
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +37 -20
- data/lib/regexp_parser/expression/subexpression.rb +20 -15
- data/lib/regexp_parser/expression.rb +34 -31
- data/lib/regexp_parser/lexer.rb +76 -36
- data/lib/regexp_parser/parser.rb +101 -100
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +29 -0
- data/lib/regexp_parser/scanner/properties/short.csv +3 -0
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +101 -172
- data/lib/regexp_parser/scanner.rb +1132 -1283
- data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
- data/lib/regexp_parser/syntax/token/escape.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +9 -2
- data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +13 -13
- data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +6 -6
- data/regexp_parser.gemspec +5 -5
- metadata +14 -8
- data/CHANGELOG.md +0 -601
- data/README.md +0 -503
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c88d5bc178e9bf95a8a008d9d5e9d8cf1b4a8bb0d65310901a995daa448a28f4
|
4
|
+
data.tar.gz: 47c1ed4782981f5cc2a0bb7bd8f402e360cd60ebeba33615df0c94dd3842b48c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dc1bf229c259b762ea38f459f70a9a04e5ee08207fbae04bdf9045f9f2b1c0f0b6a716a3e08fda55ca0b769ef55f480f7f0e19f3412175fdc7a475362889ab3
|
7
|
+
data.tar.gz: 5de692c1cce8f2436936752d0cf6c5ea51d84bb9c63110dcc49621a476b47800300911952f4d4a687c81f151886bc5570b14af559d74b5196b63e13c684ab7c5
|
data/Gemfile
CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
-
gem '
|
7
|
-
gem 'rake', '~> 13.
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
6
|
+
gem 'leto', '~> 2.1'
|
7
|
+
gem 'rake', '~> 13.1'
|
8
|
+
gem 'regexp_property_values', '~> 1.5'
|
9
9
|
gem 'rspec', '~> 3.10'
|
10
10
|
if RUBY_VERSION.to_f >= 2.7
|
11
11
|
gem 'benchmark-ips', '~> 2.1'
|
12
|
-
gem 'gouteur'
|
13
|
-
gem 'rubocop', '~> 1.
|
12
|
+
gem 'gouteur', '~> 1.1'
|
13
|
+
gem 'rubocop', '~> 1.59'
|
14
14
|
end
|
15
15
|
end
|
data/LICENSE
CHANGED
@@ -6,13 +6,6 @@ module Regexp::Expression
|
|
6
6
|
init_from_token_and_options(token, options)
|
7
7
|
end
|
8
8
|
|
9
|
-
def initialize_copy(orig)
|
10
|
-
self.text = orig.text.dup if orig.text
|
11
|
-
self.options = orig.options.dup if orig.options
|
12
|
-
self.quantifier = orig.quantifier.clone if orig.quantifier
|
13
|
-
super
|
14
|
-
end
|
15
|
-
|
16
9
|
def to_re(format = :full)
|
17
10
|
if set_level > 0
|
18
11
|
warn "Calling #to_re on character set members is deprecated - "\
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# A sequence of expressions, used by Alternation as one of its
|
2
|
+
# A sequence of expressions, used by Alternation as one of its alternatives.
|
3
3
|
class Alternative < Regexp::Expression::Sequence; end
|
4
4
|
|
5
5
|
class Alternation < Regexp::Expression::SequenceOperation
|
@@ -1,11 +1,22 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify name with token :backref, one way or the other, in v3.0.0
|
3
2
|
module Backreference
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
attr_accessor :referenced_expression
|
6
5
|
|
7
6
|
def initialize_copy(orig)
|
8
|
-
|
7
|
+
exp_id = [self.class, self.starts_at]
|
8
|
+
|
9
|
+
# prevent infinite recursion for recursive subexp calls
|
10
|
+
copied = @@copied ||= {}
|
11
|
+
self.referenced_expression =
|
12
|
+
if copied[exp_id]
|
13
|
+
orig.referenced_expression
|
14
|
+
else
|
15
|
+
copied[exp_id] = true
|
16
|
+
orig.referenced_expression.dup
|
17
|
+
end
|
18
|
+
copied.clear
|
19
|
+
|
9
20
|
super
|
10
21
|
end
|
11
22
|
end
|
@@ -15,7 +26,7 @@ module Regexp::Expression
|
|
15
26
|
alias reference number
|
16
27
|
|
17
28
|
def initialize(token, options = {})
|
18
|
-
@number = token.text[
|
29
|
+
@number = token.text[/-?\d+/].to_i
|
19
30
|
super
|
20
31
|
end
|
21
32
|
end
|
@@ -58,4 +69,7 @@ module Regexp::Expression
|
|
58
69
|
end
|
59
70
|
end
|
60
71
|
end
|
72
|
+
|
73
|
+
# alias for symmetry between token symbol and Expression class name
|
74
|
+
Backref = Backreference
|
61
75
|
end
|
@@ -1,10 +1,9 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
class Range < Regexp::Expression::Subexpression
|
4
|
-
def
|
5
|
-
expressions.first.
|
4
|
+
def ts
|
5
|
+
(head = expressions.first) ? head.ts : @ts
|
6
6
|
end
|
7
|
-
alias :ts :starts_at
|
8
7
|
|
9
8
|
def <<(exp)
|
10
9
|
complete? and raise Regexp::Parser::Error,
|
@@ -15,10 +14,6 @@ module Regexp::Expression
|
|
15
14
|
def complete?
|
16
15
|
count == 2
|
17
16
|
end
|
18
|
-
|
19
|
-
def parts
|
20
|
-
intersperse(expressions, text.dup)
|
21
|
-
end
|
22
17
|
end
|
23
18
|
end
|
24
19
|
end
|
@@ -1,10 +1,7 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
attr_accessor :closed, :negative
|
4
|
-
|
5
|
-
alias :negative? :negative
|
6
|
-
alias :negated? :negative
|
7
|
-
alias :closed? :closed
|
4
|
+
alias :closed? :closed
|
8
5
|
|
9
6
|
def initialize(token, options = {})
|
10
7
|
self.negative = false
|
@@ -19,9 +16,8 @@ module Regexp::Expression
|
|
19
16
|
def close
|
20
17
|
self.closed = true
|
21
18
|
end
|
22
|
-
|
23
|
-
def parts
|
24
|
-
["#{text}#{'^' if negated?}", *expressions, ']']
|
25
|
-
end
|
26
19
|
end
|
20
|
+
|
21
|
+
# alias for symmetry between token symbol and Expression class name
|
22
|
+
Set = CharacterSet
|
27
23
|
end # module Regexp::Expression
|
@@ -31,9 +31,9 @@ module Regexp::Expression
|
|
31
31
|
expressions.last << exp
|
32
32
|
end
|
33
33
|
|
34
|
-
def add_sequence(active_opts = {})
|
34
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
35
35
|
raise TooManyBranches.new if branches.length == 2
|
36
|
-
params = { conditional_level: conditional_level + 1 }
|
36
|
+
params = params.merge({ conditional_level: conditional_level + 1 })
|
37
37
|
Branch.add_to(self, params, active_opts)
|
38
38
|
end
|
39
39
|
alias :branch :add_sequence
|
@@ -55,10 +55,6 @@ module Regexp::Expression
|
|
55
55
|
condition.reference
|
56
56
|
end
|
57
57
|
|
58
|
-
def parts
|
59
|
-
[text.dup, condition, *intersperse(branches, '|'), ')']
|
60
|
-
end
|
61
|
-
|
62
58
|
def initialize_copy(orig)
|
63
59
|
self.referenced_expression = orig.referenced_expression.dup
|
64
60
|
super
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify naming with Token::Escape, one way or the other, in v3.0.0
|
3
2
|
module EscapeSequence
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def codepoint
|
@@ -97,4 +96,7 @@ module Regexp::Expression
|
|
97
96
|
end
|
98
97
|
end
|
99
98
|
end
|
99
|
+
|
100
|
+
# alias for symmetry between Token::* and Expression::*
|
101
|
+
Escape = EscapeSequence
|
100
102
|
end
|
@@ -5,10 +5,12 @@ module Regexp::Expression
|
|
5
5
|
end
|
6
6
|
end
|
7
7
|
|
8
|
-
class Comment < Regexp::Expression::FreeSpace
|
8
|
+
class Comment < Regexp::Expression::FreeSpace
|
9
|
+
end
|
9
10
|
|
10
11
|
class WhiteSpace < Regexp::Expression::FreeSpace
|
11
12
|
def merge(exp)
|
13
|
+
warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
|
12
14
|
text << exp.text
|
13
15
|
end
|
14
16
|
end
|
@@ -1,13 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Group
|
3
3
|
class Base < Regexp::Expression::Subexpression
|
4
|
-
def parts
|
5
|
-
[text.dup, *expressions, ')']
|
6
|
-
end
|
7
|
-
|
8
|
-
def capturing?; false end
|
9
|
-
|
10
|
-
def comment?; false end
|
11
4
|
end
|
12
5
|
|
13
6
|
class Passive < Group::Base
|
@@ -18,14 +11,6 @@ module Regexp::Expression
|
|
18
11
|
super
|
19
12
|
end
|
20
13
|
|
21
|
-
def parts
|
22
|
-
if implicit?
|
23
|
-
expressions
|
24
|
-
else
|
25
|
-
super
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
14
|
def implicit?
|
30
15
|
@implicit
|
31
16
|
end
|
@@ -55,8 +40,6 @@ module Regexp::Expression
|
|
55
40
|
class Capture < Group::Base
|
56
41
|
attr_accessor :number, :number_at_level
|
57
42
|
alias identifier number
|
58
|
-
|
59
|
-
def capturing?; true end
|
60
43
|
end
|
61
44
|
|
62
45
|
class Named < Group::Capture
|
@@ -75,11 +58,6 @@ module Regexp::Expression
|
|
75
58
|
end
|
76
59
|
|
77
60
|
class Comment < Group::Base
|
78
|
-
def parts
|
79
|
-
[text.dup]
|
80
|
-
end
|
81
|
-
|
82
|
-
def comment?; true end
|
83
61
|
end
|
84
62
|
end
|
85
63
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Keep
|
3
|
-
#
|
3
|
+
# TODO: in regexp_parser v3.0.0 this should possibly be a Subexpression
|
4
4
|
# that contains all expressions to its left.
|
5
5
|
class Mark < Regexp::Expression::Base; end
|
6
6
|
end
|
@@ -1,11 +1,11 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class PosixClass < Regexp::Expression::Base
|
3
|
-
def negative?
|
4
|
-
type == :nonposixclass
|
5
|
-
end
|
6
|
-
|
7
3
|
def name
|
8
|
-
|
4
|
+
text[/\w+/]
|
9
5
|
end
|
10
6
|
end
|
7
|
+
|
8
|
+
# alias for symmetry between token symbol and Expression class name
|
9
|
+
Posixclass = PosixClass
|
10
|
+
Nonposixclass = PosixClass
|
11
11
|
end
|
@@ -1,17 +1,12 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify name with token :property, one way or the other, in v3.0.0
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
|
-
def negative?
|
6
|
-
type == :nonproperty
|
7
|
-
end
|
8
|
-
|
9
4
|
def name
|
10
5
|
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
11
6
|
end
|
12
7
|
|
13
8
|
def shortcut
|
14
|
-
|
9
|
+
Regexp::Scanner.short_prop_map.key(token.to_s)
|
15
10
|
end
|
16
11
|
end
|
17
12
|
|
@@ -110,10 +105,15 @@ module Regexp::Expression
|
|
110
105
|
class Unassigned < Codepoint::Base; end
|
111
106
|
end
|
112
107
|
|
113
|
-
class Age
|
114
|
-
class
|
115
|
-
class
|
116
|
-
class
|
117
|
-
class
|
108
|
+
class Age < UnicodeProperty::Base; end
|
109
|
+
class Block < UnicodeProperty::Base; end
|
110
|
+
class Derived < UnicodeProperty::Base; end
|
111
|
+
class Emoji < UnicodeProperty::Base; end
|
112
|
+
class Enumerated < UnicodeProperty::Base; end
|
113
|
+
class Script < UnicodeProperty::Base; end
|
118
114
|
end
|
115
|
+
|
116
|
+
# alias for symmetry between token symbol and Expression class name
|
117
|
+
Property = UnicodeProperty
|
118
|
+
Nonproperty = UnicodeProperty
|
119
119
|
end # module Regexp::Expression
|
@@ -25,11 +25,9 @@ module Regexp::Expression
|
|
25
25
|
def token_class
|
26
26
|
if self == Root || self < Sequence
|
27
27
|
nil # no token class because these objects are Parser-generated
|
28
|
-
# TODO: synch exp
|
29
|
-
elsif self ==
|
28
|
+
# TODO: synch exp class, token class & type names for this in v3.0.0
|
29
|
+
elsif self == CharacterType::Any
|
30
30
|
Regexp::Syntax::Token::Meta
|
31
|
-
elsif self <= EscapeSequence::Base
|
32
|
-
Regexp::Syntax::Token::Escape
|
33
31
|
else
|
34
32
|
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
35
33
|
end
|
@@ -63,16 +63,20 @@ class Regexp::MatchLength
|
|
63
63
|
end
|
64
64
|
|
65
65
|
def to_re
|
66
|
-
|
66
|
+
/(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
|
67
67
|
end
|
68
68
|
|
69
69
|
private
|
70
70
|
|
71
71
|
attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
|
72
72
|
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
if Regexp.method_defined?(:match?) # ruby >= 2.4
|
74
|
+
def test_regexp
|
75
|
+
@test_regexp ||= /^#{to_re}$/
|
76
|
+
end
|
77
|
+
else
|
78
|
+
def test_regexp
|
79
|
+
@test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
|
76
80
|
end
|
77
81
|
end
|
78
82
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def negative?
|
4
|
+
false
|
5
|
+
end
|
6
|
+
|
7
|
+
# not an alias so as to respect overrides of #negative?
|
8
|
+
def negated?
|
9
|
+
negative?
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
Anchor::NonWordBoundary.class_eval { def negative?; true end }
|
14
|
+
Assertion::NegativeLookahead.class_eval { def negative?; true end }
|
15
|
+
Assertion::NegativeLookbehind.class_eval { def negative?; true end }
|
16
|
+
CharacterSet.class_eval { def negative?; negative end }
|
17
|
+
CharacterType::Base.class_eval { def negative?; token.to_s.start_with?('non') end }
|
18
|
+
PosixClass.class_eval { def negative?; type == :nonposixclass end }
|
19
|
+
UnicodeProperty::Base.class_eval { def negative?; type == :nonproperty end }
|
20
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
# default implementation
|
4
|
+
def parts
|
5
|
+
[text.dup]
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def intersperse(expressions, separator)
|
11
|
+
expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
|
16
|
+
CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
|
17
|
+
Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
|
18
|
+
Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
|
19
|
+
Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
|
20
|
+
Group::Comment.class_eval { def parts; [text.dup] end }
|
21
|
+
Subexpression.class_eval { def parts; expressions end }
|
22
|
+
SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
|
23
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def inspect
|
4
|
+
[
|
5
|
+
"#<#{self.class}",
|
6
|
+
pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
|
7
|
+
">"
|
8
|
+
].join
|
9
|
+
end
|
10
|
+
|
11
|
+
# Make pretty-print work despite #inspect implementation.
|
12
|
+
def pretty_print(q)
|
13
|
+
q.pp_object(self)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Called by pretty_print (ruby/pp) and #inspect.
|
17
|
+
def pretty_print_instance_variables
|
18
|
+
[
|
19
|
+
(:@text unless text.to_s.empty?),
|
20
|
+
(:@quantifier if quantified?),
|
21
|
+
(:@options unless options.empty?),
|
22
|
+
(:@expressions unless terminal?),
|
23
|
+
].compact
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -95,12 +95,49 @@ module Regexp::Expression
|
|
95
95
|
end
|
96
96
|
|
97
97
|
# Deep-compare two expressions for equality.
|
98
|
+
#
|
99
|
+
# When changing the conditions, please make sure to update
|
100
|
+
# #pretty_print_instance_variables so that it includes all relevant values.
|
98
101
|
def ==(other)
|
99
|
-
|
100
|
-
other.
|
101
|
-
other.
|
102
|
+
self.class == other.class &&
|
103
|
+
text == other.text &&
|
104
|
+
quantifier == other.quantifier &&
|
105
|
+
options == other.options &&
|
106
|
+
(terminal? || expressions == other.expressions)
|
102
107
|
end
|
103
108
|
alias :=== :==
|
104
109
|
alias :eql? :==
|
110
|
+
|
111
|
+
def optional?
|
112
|
+
quantified? && quantifier.min == 0
|
113
|
+
end
|
114
|
+
|
115
|
+
def quantified?
|
116
|
+
!quantifier.nil?
|
117
|
+
end
|
105
118
|
end
|
119
|
+
|
120
|
+
Shared.class_eval { def terminal?; self.class.terminal? end }
|
121
|
+
Shared::ClassMethods.class_eval { def terminal?; true end }
|
122
|
+
Subexpression.instance_eval { def terminal?; false end }
|
123
|
+
|
124
|
+
Shared.class_eval { def capturing?; self.class.capturing? end }
|
125
|
+
Shared::ClassMethods.class_eval { def capturing?; false end }
|
126
|
+
Group::Capture.instance_eval { def capturing?; true end }
|
127
|
+
|
128
|
+
Shared.class_eval { def comment?; self.class.comment? end }
|
129
|
+
Shared::ClassMethods.class_eval { def comment?; false end }
|
130
|
+
Comment.instance_eval { def comment?; true end }
|
131
|
+
Group::Comment.instance_eval { def comment?; true end }
|
132
|
+
|
133
|
+
Shared.class_eval { def decorative?; self.class.decorative? end }
|
134
|
+
Shared::ClassMethods.class_eval { def decorative?; false end }
|
135
|
+
FreeSpace.instance_eval { def decorative?; true end }
|
136
|
+
Group::Comment.instance_eval { def decorative?; true end }
|
137
|
+
|
138
|
+
Shared.class_eval { def referential?; self.class.referential? end }
|
139
|
+
Shared::ClassMethods.class_eval { def referential?; false end }
|
140
|
+
Backreference::Base.instance_eval { def referential?; true end }
|
141
|
+
Conditional::Condition.instance_eval { def referential?; true end }
|
142
|
+
Conditional::Expression.instance_eval { def referential?; true end }
|
106
143
|
end
|
@@ -1,6 +1,22 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Subexpression < Regexp::Expression::Base
|
3
3
|
|
4
|
+
# Traverses the expression, passing each recursive child to the
|
5
|
+
# given block.
|
6
|
+
# If the block takes two arguments, the indices of the children within
|
7
|
+
# their parents are also passed to it.
|
8
|
+
def each_expression(include_self = false, &block)
|
9
|
+
return enum_for(__method__, include_self) unless block
|
10
|
+
|
11
|
+
if block.arity == 1
|
12
|
+
block.call(self) if include_self
|
13
|
+
each_expression_without_index(&block)
|
14
|
+
else
|
15
|
+
block.call(self, 0) if include_self
|
16
|
+
each_expression_with_index(&block)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
4
20
|
# Traverses the subexpression (depth-first, pre-order) and calls the given
|
5
21
|
# block for each expression with three arguments; the traversal event,
|
6
22
|
# the expression, and the index of the expression within its parent.
|
@@ -34,31 +50,31 @@ module Regexp::Expression
|
|
34
50
|
end
|
35
51
|
alias :walk :traverse
|
36
52
|
|
37
|
-
# Iterates over the expressions of this expression as an array, passing
|
38
|
-
# the expression and its index within its parent to the given block.
|
39
|
-
def each_expression(include_self = false)
|
40
|
-
return enum_for(__method__, include_self) unless block_given?
|
41
|
-
|
42
|
-
traverse(include_self) do |event, exp, index|
|
43
|
-
yield(exp, index) unless event == :exit
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
53
|
# Returns a new array with the results of calling the given block once
|
48
54
|
# for every expression. If a block is not given, returns an array with
|
49
55
|
# each expression and its level index as an array.
|
50
|
-
def flat_map(include_self = false)
|
51
|
-
|
56
|
+
def flat_map(include_self = false, &block)
|
57
|
+
case block && block.arity
|
58
|
+
when nil then each_expression(include_self).to_a
|
59
|
+
when 2 then each_expression(include_self).map(&block)
|
60
|
+
else each_expression(include_self).map { |exp| block.call(exp) }
|
61
|
+
end
|
62
|
+
end
|
52
63
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
64
|
+
protected
|
65
|
+
|
66
|
+
def each_expression_with_index(&block)
|
67
|
+
each_with_index do |exp, index|
|
68
|
+
block.call(exp, index)
|
69
|
+
exp.each_expression_with_index(&block) unless exp.terminal?
|
59
70
|
end
|
71
|
+
end
|
60
72
|
|
61
|
-
|
73
|
+
def each_expression_without_index(&block)
|
74
|
+
each do |exp|
|
75
|
+
block.call(exp)
|
76
|
+
exp.each_expression_without_index(&block) unless exp.terminal?
|
77
|
+
end
|
62
78
|
end
|
63
79
|
end
|
64
80
|
end
|
@@ -8,14 +8,10 @@ module Regexp::Expression
|
|
8
8
|
|
9
9
|
MODES = %i[greedy possessive reluctant]
|
10
10
|
|
11
|
-
attr_reader :min, :max, :mode
|
12
|
-
|
13
11
|
def initialize(*args)
|
14
12
|
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
15
13
|
|
16
14
|
init_from_token_and_options(*args)
|
17
|
-
@mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
|
18
|
-
@min, @max = minmax
|
19
15
|
# TODO: remove in v3.0.0, stop removing parts of #token (?)
|
20
16
|
self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
|
21
17
|
end
|
@@ -39,9 +35,21 @@ module Regexp::Expression
|
|
39
35
|
end
|
40
36
|
alias :lazy? :reluctant?
|
41
37
|
|
38
|
+
def min
|
39
|
+
derived_data[:min]
|
40
|
+
end
|
41
|
+
|
42
|
+
def max
|
43
|
+
derived_data[:max]
|
44
|
+
end
|
45
|
+
|
46
|
+
def mode
|
47
|
+
derived_data[:mode]
|
48
|
+
end
|
49
|
+
|
42
50
|
private
|
43
51
|
|
44
|
-
def deprecated_old_init(token, text,
|
52
|
+
def deprecated_old_init(token, text, _min, _max, _mode = :greedy)
|
45
53
|
warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
|
46
54
|
"is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
|
47
55
|
"Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
|
@@ -51,20 +59,25 @@ module Regexp::Expression
|
|
51
59
|
"This is consistent with how Expression::Base instances are created. "
|
52
60
|
@token = token
|
53
61
|
@text = text
|
54
|
-
@min = min
|
55
|
-
@max = max
|
56
|
-
@mode = mode
|
57
62
|
end
|
58
63
|
|
59
|
-
def
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
64
|
+
def derived_data
|
65
|
+
@derived_data ||= begin
|
66
|
+
min, max =
|
67
|
+
case text[0]
|
68
|
+
when '?'; [0, 1]
|
69
|
+
when '*'; [0, -1]
|
70
|
+
when '+'; [1, -1]
|
71
|
+
else
|
72
|
+
int_min = text[/\{(\d*)/, 1]
|
73
|
+
int_max = text[/,?(\d*)\}/, 1]
|
74
|
+
[int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
|
75
|
+
end
|
76
|
+
|
77
|
+
mod = text[/.([?+])/, 1]
|
78
|
+
mode = (mod == '?' && :reluctant) || (mod == '+' && :possessive) || :greedy
|
79
|
+
|
80
|
+
{ min: min, max: max, mode: mode }
|
68
81
|
end
|
69
82
|
end
|
70
83
|
end
|
@@ -12,25 +12,20 @@ module Regexp::Expression
|
|
12
12
|
level: exp.level,
|
13
13
|
set_level: exp.set_level,
|
14
14
|
conditional_level: params[:conditional_level] || exp.conditional_level,
|
15
|
+
ts: params[:ts],
|
15
16
|
)
|
16
|
-
sequence.nesting_level = exp.nesting_level + 1
|
17
17
|
sequence.options = active_opts
|
18
18
|
exp.expressions << sequence
|
19
19
|
sequence
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
-
def
|
24
|
-
expressions.first.
|
23
|
+
def ts
|
24
|
+
(head = expressions.first) ? head.ts : @ts
|
25
25
|
end
|
26
|
-
alias :ts :starts_at
|
27
26
|
|
28
|
-
def quantify(*args)
|
29
|
-
|
30
|
-
target or raise Regexp::Parser::Error,
|
31
|
-
"No valid target found for '#{text}' quantifier"
|
32
|
-
|
33
|
-
target.quantify(*args)
|
27
|
+
def quantify(token, *args)
|
28
|
+
extract_quantifier_target(token.text).quantify(token, *args)
|
34
29
|
end
|
35
30
|
end
|
36
31
|
end
|