regexp_parser 2.1.1 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +94 -6
- data/Gemfile +2 -1
- data/LICENSE +1 -1
- data/README.md +40 -30
- data/Rakefile +6 -70
- data/lib/regexp_parser/error.rb +1 -1
- data/lib/regexp_parser/expression/base.rb +75 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +1 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -2
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +2 -2
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
- data/lib/regexp_parser/expression/classes/group.rb +6 -6
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/root.rb +3 -6
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -2
- data/lib/regexp_parser/expression/methods/construct.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +10 -1
- data/lib/regexp_parser/expression/quantifier.rb +41 -23
- data/lib/regexp_parser/expression/sequence.rb +9 -24
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
- data/lib/regexp_parser/expression/shared.rb +85 -0
- data/lib/regexp_parser/expression/subexpression.rb +11 -8
- data/lib/regexp_parser/expression.rb +10 -132
- data/lib/regexp_parser/lexer.rb +8 -6
- data/lib/regexp_parser/parser.rb +21 -72
- data/lib/regexp_parser/scanner/properties/long.csv +622 -0
- data/lib/regexp_parser/scanner/properties/short.csv +246 -0
- data/lib/regexp_parser/scanner/property.rl +1 -1
- data/lib/regexp_parser/scanner/scanner.rl +48 -35
- data/lib/regexp_parser/scanner.rb +735 -801
- data/lib/regexp_parser/syntax/any.rb +2 -7
- data/lib/regexp_parser/syntax/base.rb +91 -66
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +717 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +20 -22
- metadata +37 -166
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -104
- data/spec/expression/clone_spec.rb +0 -152
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -108
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/delimiters_spec.rb +0 -68
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -64
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -60
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/options_spec.rb +0 -28
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -68
- data/spec/parser/refcalls_spec.rb +0 -117
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/delimiters_spec.rb +0 -52
- data/spec/scanner/errors_spec.rb +0 -67
- data/spec/scanner/escapes_spec.rb +0 -64
- data/spec/scanner/free_space_spec.rb +0 -165
- data/spec/scanner/groups_spec.rb +0 -61
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -39
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/options_spec.rb +0 -36
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -25
- data/spec/scanner/refcalls_spec.rb +0 -55
- data/spec/scanner/sets_spec.rb +0 -151
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -16
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
@@ -1,12 +1,9 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class Root < Regexp::Expression::Subexpression
|
4
3
|
def self.build(options = {})
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
def self.build_token
|
9
|
-
Regexp::Token.new(:expression, :root, '', 0)
|
4
|
+
warn "`#{self.class}.build(options)` is deprecated and will raise in "\
|
5
|
+
"regexp_parser v3.0.0. Please use `.construct(options: options)`."
|
6
|
+
construct(options: options)
|
10
7
|
end
|
11
8
|
end
|
12
9
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
2
|
+
# TODO: unify name with token :property, on way or the other, in v3.0.0
|
3
3
|
module UnicodeProperty
|
4
4
|
class Base < Regexp::Expression::Base
|
5
5
|
def negative?
|
@@ -116,5 +116,4 @@ module Regexp::Expression
|
|
116
116
|
class Script < UnicodeProperty::Base; end
|
117
117
|
class Block < UnicodeProperty::Base; end
|
118
118
|
end
|
119
|
-
|
120
119
|
end # module Regexp::Expression
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
module ClassMethods
|
4
|
+
# Convenience method to init a valid Expression without a Regexp::Token
|
5
|
+
def construct(params = {})
|
6
|
+
attrs = construct_defaults.merge(params)
|
7
|
+
options = attrs.delete(:options)
|
8
|
+
token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
|
9
|
+
token = Regexp::Token.new(*token_args)
|
10
|
+
raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
|
11
|
+
|
12
|
+
new(token, options)
|
13
|
+
end
|
14
|
+
|
15
|
+
def construct_defaults
|
16
|
+
if self == Root
|
17
|
+
{ type: :expression, token: :root, ts: 0 }
|
18
|
+
elsif self < Sequence
|
19
|
+
{ type: :expression, token: :sequence }
|
20
|
+
else
|
21
|
+
{ type: token_class::Type }
|
22
|
+
end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
|
23
|
+
end
|
24
|
+
|
25
|
+
def token_class
|
26
|
+
if self == Root || self < Sequence
|
27
|
+
nil # no token class because these objects are Parser-generated
|
28
|
+
# TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
|
29
|
+
elsif self == Alternation || self == CharacterType::Any
|
30
|
+
Regexp::Syntax::Token::Meta
|
31
|
+
elsif self <= EscapeSequence::Base
|
32
|
+
Regexp::Syntax::Token::Escape
|
33
|
+
else
|
34
|
+
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def token_class
|
40
|
+
self.class.token_class
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -112,7 +112,7 @@ module Regexp::Expression
|
|
112
112
|
end
|
113
113
|
|
114
114
|
def inner_match_length
|
115
|
-
dummy = Regexp::Expression::Root.
|
115
|
+
dummy = Regexp::Expression::Root.construct
|
116
116
|
dummy.expressions = expressions.map(&:clone)
|
117
117
|
dummy.quantifier = quantifier && quantifier.clone
|
118
118
|
dummy.match_length
|
@@ -43,7 +43,7 @@ module Regexp::Expression
|
|
43
43
|
|
44
44
|
# Order is important! Fields that use other fields in their
|
45
45
|
# definition must appear before the fields they use.
|
46
|
-
part_keys = %w
|
46
|
+
part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
|
47
47
|
part.keys.each {|k| part[k] = "<?#{k}?>"}
|
48
48
|
|
49
49
|
part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
2
|
+
module Shared
|
3
3
|
|
4
4
|
# Test if this expression has the given test_type, which can be either
|
5
5
|
# a symbol or an array of symbols to check against the expression's type.
|
@@ -93,5 +93,14 @@ module Regexp::Expression
|
|
93
93
|
"Array, Hash, or Symbol expected, #{scope.class.name} given"
|
94
94
|
end
|
95
95
|
end
|
96
|
+
|
97
|
+
# Deep-compare two expressions for equality.
|
98
|
+
def ==(other)
|
99
|
+
other.class == self.class &&
|
100
|
+
other.to_s == to_s &&
|
101
|
+
other.options == options
|
102
|
+
end
|
103
|
+
alias :=== :==
|
104
|
+
alias :eql? :==
|
96
105
|
end
|
97
106
|
end
|
@@ -1,26 +1,24 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
+
# TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
|
3
|
+
# call super in #initialize, but raise in #quantifier= and #quantify,
|
4
|
+
# or introduce an Expression::Quantifiable intermediate class.
|
5
|
+
# Or actually allow chaining as a more concise but tricky solution than PR#69.
|
2
6
|
class Quantifier
|
3
|
-
|
7
|
+
include Regexp::Expression::Shared
|
4
8
|
|
5
|
-
|
9
|
+
MODES = %i[greedy possessive reluctant]
|
6
10
|
|
7
|
-
|
8
|
-
@token = token
|
9
|
-
@text = text
|
10
|
-
@mode = mode
|
11
|
-
@min = min
|
12
|
-
@max = max
|
13
|
-
end
|
11
|
+
attr_reader :min, :max, :mode
|
14
12
|
|
15
|
-
def
|
16
|
-
|
17
|
-
super
|
18
|
-
end
|
13
|
+
def initialize(*args)
|
14
|
+
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
19
15
|
|
20
|
-
|
21
|
-
|
16
|
+
init_from_token_and_options(*args)
|
17
|
+
@mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
|
18
|
+
@min, @max = minmax
|
19
|
+
# TODO: remove in v3.0.0, stop removing parts of #token (?)
|
20
|
+
self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
|
22
21
|
end
|
23
|
-
alias :to_str :to_s
|
24
22
|
|
25
23
|
def to_h
|
26
24
|
{
|
@@ -41,13 +39,33 @@ module Regexp::Expression
|
|
41
39
|
end
|
42
40
|
alias :lazy? :reluctant?
|
43
41
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
42
|
+
private
|
43
|
+
|
44
|
+
def deprecated_old_init(token, text, min, max, mode = :greedy)
|
45
|
+
warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
|
46
|
+
"is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
|
47
|
+
"Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
|
48
|
+
"with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
|
49
|
+
"will be derived automatically.\n"\
|
50
|
+
"Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
|
51
|
+
"This is consistent with how Expression::Base instances are created. "
|
52
|
+
@token = token
|
53
|
+
@text = text
|
54
|
+
@min = min
|
55
|
+
@max = max
|
56
|
+
@mode = mode
|
57
|
+
end
|
58
|
+
|
59
|
+
def minmax
|
60
|
+
case token
|
61
|
+
when /zero_or_one/ then [0, 1]
|
62
|
+
when /zero_or_more/ then [0, -1]
|
63
|
+
when /one_or_more/ then [1, -1]
|
64
|
+
when :interval
|
65
|
+
int_min = text[/\{(\d*)/, 1]
|
66
|
+
int_max = text[/,?(\d*)\}/, 1]
|
67
|
+
[int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
|
68
|
+
end
|
50
69
|
end
|
51
|
-
alias :eq :==
|
52
70
|
end
|
53
71
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
# A sequence of expressions. Differs from a Subexpressions by how it handles
|
4
3
|
# quantifiers, as it applies them to its last element instead of itself as
|
5
4
|
# a whole subexpression.
|
@@ -8,31 +7,17 @@ module Regexp::Expression
|
|
8
7
|
# branches, and CharacterSet::Intersection intersected sequences.
|
9
8
|
class Sequence < Regexp::Expression::Subexpression
|
10
9
|
class << self
|
11
|
-
def add_to(
|
12
|
-
sequence =
|
13
|
-
|
14
|
-
|
15
|
-
params[:conditional_level] ||
|
10
|
+
def add_to(exp, params = {}, active_opts = {})
|
11
|
+
sequence = construct(
|
12
|
+
level: exp.level,
|
13
|
+
set_level: exp.set_level,
|
14
|
+
conditional_level: params[:conditional_level] || exp.conditional_level,
|
16
15
|
)
|
17
|
-
sequence.nesting_level =
|
16
|
+
sequence.nesting_level = exp.nesting_level + 1
|
18
17
|
sequence.options = active_opts
|
19
|
-
|
18
|
+
exp.expressions << sequence
|
20
19
|
sequence
|
21
20
|
end
|
22
|
-
|
23
|
-
def at_levels(level, set_level, conditional_level)
|
24
|
-
token = Regexp::Token.new(
|
25
|
-
:expression,
|
26
|
-
:sequence,
|
27
|
-
'',
|
28
|
-
nil, # ts
|
29
|
-
nil, # te
|
30
|
-
level,
|
31
|
-
set_level,
|
32
|
-
conditional_level
|
33
|
-
)
|
34
|
-
new(token)
|
35
|
-
end
|
36
21
|
end
|
37
22
|
|
38
23
|
def starts_at
|
@@ -40,12 +25,12 @@ module Regexp::Expression
|
|
40
25
|
end
|
41
26
|
alias :ts :starts_at
|
42
27
|
|
43
|
-
def quantify(
|
28
|
+
def quantify(*args)
|
44
29
|
target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
|
45
30
|
target or raise Regexp::Parser::Error,
|
46
31
|
"No valid target found for '#{text}' quantifier"
|
47
32
|
|
48
|
-
target.quantify(
|
33
|
+
target.quantify(*args)
|
49
34
|
end
|
50
35
|
end
|
51
36
|
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
module ClassMethods; end # filled in ./methods/*.rb
|
4
|
+
|
5
|
+
def self.included(mod)
|
6
|
+
mod.class_eval do
|
7
|
+
extend Shared::ClassMethods
|
8
|
+
|
9
|
+
attr_accessor :type, :token, :text, :ts, :te,
|
10
|
+
:level, :set_level, :conditional_level,
|
11
|
+
:options, :quantifier
|
12
|
+
|
13
|
+
attr_reader :nesting_level
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def init_from_token_and_options(token, options = {})
|
18
|
+
self.type = token.type
|
19
|
+
self.token = token.token
|
20
|
+
self.text = token.text
|
21
|
+
self.ts = token.ts
|
22
|
+
self.te = token.te
|
23
|
+
self.level = token.level
|
24
|
+
self.set_level = token.set_level
|
25
|
+
self.conditional_level = token.conditional_level
|
26
|
+
self.nesting_level = 0
|
27
|
+
self.options = options || {}
|
28
|
+
end
|
29
|
+
private :init_from_token_and_options
|
30
|
+
|
31
|
+
def initialize_copy(orig)
|
32
|
+
self.text = orig.text.dup if orig.text
|
33
|
+
self.options = orig.options.dup if orig.options
|
34
|
+
self.quantifier = orig.quantifier.clone if orig.quantifier
|
35
|
+
super
|
36
|
+
end
|
37
|
+
|
38
|
+
def starts_at
|
39
|
+
ts
|
40
|
+
end
|
41
|
+
|
42
|
+
def base_length
|
43
|
+
to_s(:base).length
|
44
|
+
end
|
45
|
+
|
46
|
+
def full_length
|
47
|
+
to_s.length
|
48
|
+
end
|
49
|
+
|
50
|
+
def to_s(format = :full)
|
51
|
+
"#{parts.join}#{quantifier_affix(format)}"
|
52
|
+
end
|
53
|
+
alias :to_str :to_s
|
54
|
+
|
55
|
+
def parts
|
56
|
+
[text.dup]
|
57
|
+
end
|
58
|
+
|
59
|
+
def quantifier_affix(expression_format)
|
60
|
+
quantifier.to_s if quantified? && expression_format != :base
|
61
|
+
end
|
62
|
+
|
63
|
+
def quantified?
|
64
|
+
!quantifier.nil?
|
65
|
+
end
|
66
|
+
|
67
|
+
def offset
|
68
|
+
[starts_at, full_length]
|
69
|
+
end
|
70
|
+
|
71
|
+
def coded_offset
|
72
|
+
'@%d+%d' % offset
|
73
|
+
end
|
74
|
+
|
75
|
+
def terminal?
|
76
|
+
!respond_to?(:expressions)
|
77
|
+
end
|
78
|
+
|
79
|
+
def nesting_level=(lvl)
|
80
|
+
@nesting_level = lvl
|
81
|
+
quantifier && quantifier.nesting_level = lvl
|
82
|
+
terminal? || each { |subexp| subexp.nesting_level = lvl + 1 }
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -1,14 +1,12 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class Subexpression < Regexp::Expression::Base
|
4
3
|
include Enumerable
|
5
4
|
|
6
5
|
attr_accessor :expressions
|
7
6
|
|
8
7
|
def initialize(token, options = {})
|
9
|
-
super
|
10
|
-
|
11
8
|
self.expressions = []
|
9
|
+
super
|
12
10
|
end
|
13
11
|
|
14
12
|
# Override base method to clone the expressions as well.
|
@@ -44,16 +42,21 @@ module Regexp::Expression
|
|
44
42
|
ts + to_s.length
|
45
43
|
end
|
46
44
|
|
47
|
-
def
|
48
|
-
|
49
|
-
"#{expressions.join}#{quantifier_affix(format)}"
|
45
|
+
def parts
|
46
|
+
expressions
|
50
47
|
end
|
51
48
|
|
52
49
|
def to_h
|
53
|
-
attributes.merge(
|
50
|
+
attributes.merge(
|
54
51
|
text: to_s(:base),
|
55
52
|
expressions: expressions.map(&:to_h)
|
56
|
-
|
53
|
+
)
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def intersperse(expressions, separator)
|
59
|
+
expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
|
57
60
|
end
|
58
61
|
end
|
59
62
|
end
|
@@ -1,130 +1,7 @@
|
|
1
1
|
require 'regexp_parser/error'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
attr_accessor :type, :token
|
6
|
-
attr_accessor :text, :ts
|
7
|
-
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
8
|
-
|
9
|
-
attr_accessor :quantifier
|
10
|
-
attr_accessor :options
|
11
|
-
|
12
|
-
def initialize(token, options = {})
|
13
|
-
self.type = token.type
|
14
|
-
self.token = token.token
|
15
|
-
self.text = token.text
|
16
|
-
self.ts = token.ts
|
17
|
-
self.level = token.level
|
18
|
-
self.set_level = token.set_level
|
19
|
-
self.conditional_level = token.conditional_level
|
20
|
-
self.nesting_level = 0
|
21
|
-
self.quantifier = nil
|
22
|
-
self.options = options
|
23
|
-
end
|
24
|
-
|
25
|
-
def initialize_copy(orig)
|
26
|
-
self.text = (orig.text ? orig.text.dup : nil)
|
27
|
-
self.options = (orig.options ? orig.options.dup : nil)
|
28
|
-
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
29
|
-
super
|
30
|
-
end
|
31
|
-
|
32
|
-
def to_re(format = :full)
|
33
|
-
::Regexp.new(to_s(format))
|
34
|
-
end
|
35
|
-
|
36
|
-
alias :starts_at :ts
|
37
|
-
|
38
|
-
def base_length
|
39
|
-
to_s(:base).length
|
40
|
-
end
|
41
|
-
|
42
|
-
def full_length
|
43
|
-
to_s.length
|
44
|
-
end
|
45
|
-
|
46
|
-
def offset
|
47
|
-
[starts_at, full_length]
|
48
|
-
end
|
49
|
-
|
50
|
-
def coded_offset
|
51
|
-
'@%d+%d' % offset
|
52
|
-
end
|
53
|
-
|
54
|
-
def to_s(format = :full)
|
55
|
-
"#{text}#{quantifier_affix(format)}"
|
56
|
-
end
|
57
|
-
|
58
|
-
def quantifier_affix(expression_format)
|
59
|
-
quantifier.to_s if quantified? && expression_format != :base
|
60
|
-
end
|
61
|
-
|
62
|
-
def terminal?
|
63
|
-
!respond_to?(:expressions)
|
64
|
-
end
|
65
|
-
|
66
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
67
|
-
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
68
|
-
end
|
69
|
-
|
70
|
-
def unquantified_clone
|
71
|
-
clone.tap { |exp| exp.quantifier = nil }
|
72
|
-
end
|
73
|
-
|
74
|
-
def quantified?
|
75
|
-
!quantifier.nil?
|
76
|
-
end
|
77
|
-
|
78
|
-
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
79
|
-
def quantity
|
80
|
-
return [nil,nil] unless quantified?
|
81
|
-
[quantifier.min, quantifier.max]
|
82
|
-
end
|
83
|
-
|
84
|
-
def repetitions
|
85
|
-
return 1..1 unless quantified?
|
86
|
-
min = quantifier.min
|
87
|
-
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
88
|
-
range = min..max
|
89
|
-
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
90
|
-
if RUBY_VERSION.to_f < 2.7
|
91
|
-
range.define_singleton_method(:minmax) { [min, max] }
|
92
|
-
end
|
93
|
-
range
|
94
|
-
end
|
95
|
-
|
96
|
-
def greedy?
|
97
|
-
quantified? and quantifier.greedy?
|
98
|
-
end
|
99
|
-
|
100
|
-
def reluctant?
|
101
|
-
quantified? and quantifier.reluctant?
|
102
|
-
end
|
103
|
-
alias :lazy? :reluctant?
|
104
|
-
|
105
|
-
def possessive?
|
106
|
-
quantified? and quantifier.possessive?
|
107
|
-
end
|
108
|
-
|
109
|
-
def attributes
|
110
|
-
{
|
111
|
-
type: type,
|
112
|
-
token: token,
|
113
|
-
text: to_s(:base),
|
114
|
-
starts_at: ts,
|
115
|
-
length: full_length,
|
116
|
-
level: level,
|
117
|
-
set_level: set_level,
|
118
|
-
conditional_level: conditional_level,
|
119
|
-
options: options,
|
120
|
-
quantifier: quantified? ? quantifier.to_h : nil,
|
121
|
-
}
|
122
|
-
end
|
123
|
-
alias :to_h :attributes
|
124
|
-
end
|
125
|
-
|
126
|
-
end # module Regexp::Expression
|
127
|
-
|
3
|
+
require 'regexp_parser/expression/shared'
|
4
|
+
require 'regexp_parser/expression/base'
|
128
5
|
require 'regexp_parser/expression/quantifier'
|
129
6
|
require 'regexp_parser/expression/subexpression'
|
130
7
|
require 'regexp_parser/expression/sequence'
|
@@ -132,21 +9,22 @@ require 'regexp_parser/expression/sequence_operation'
|
|
132
9
|
|
133
10
|
require 'regexp_parser/expression/classes/alternation'
|
134
11
|
require 'regexp_parser/expression/classes/anchor'
|
135
|
-
require 'regexp_parser/expression/classes/
|
12
|
+
require 'regexp_parser/expression/classes/backreference'
|
13
|
+
require 'regexp_parser/expression/classes/character_set'
|
14
|
+
require 'regexp_parser/expression/classes/character_set/intersection'
|
15
|
+
require 'regexp_parser/expression/classes/character_set/range'
|
16
|
+
require 'regexp_parser/expression/classes/character_type'
|
136
17
|
require 'regexp_parser/expression/classes/conditional'
|
137
|
-
require 'regexp_parser/expression/classes/
|
18
|
+
require 'regexp_parser/expression/classes/escape_sequence'
|
138
19
|
require 'regexp_parser/expression/classes/free_space'
|
139
20
|
require 'regexp_parser/expression/classes/group'
|
140
21
|
require 'regexp_parser/expression/classes/keep'
|
141
22
|
require 'regexp_parser/expression/classes/literal'
|
142
23
|
require 'regexp_parser/expression/classes/posix_class'
|
143
|
-
require 'regexp_parser/expression/classes/property'
|
144
24
|
require 'regexp_parser/expression/classes/root'
|
145
|
-
require 'regexp_parser/expression/classes/
|
146
|
-
require 'regexp_parser/expression/classes/set/intersection'
|
147
|
-
require 'regexp_parser/expression/classes/set/range'
|
148
|
-
require 'regexp_parser/expression/classes/type'
|
25
|
+
require 'regexp_parser/expression/classes/unicode_property'
|
149
26
|
|
27
|
+
require 'regexp_parser/expression/methods/construct'
|
150
28
|
require 'regexp_parser/expression/methods/match'
|
151
29
|
require 'regexp_parser/expression/methods/match_length'
|
152
30
|
require 'regexp_parser/expression/methods/options'
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -4,19 +4,21 @@
|
|
4
4
|
# given syntax flavor.
|
5
5
|
class Regexp::Lexer
|
6
6
|
|
7
|
-
OPENING_TOKENS = [
|
8
|
-
|
9
|
-
|
7
|
+
OPENING_TOKENS = %i[
|
8
|
+
capture passive lookahead nlookahead lookbehind nlookbehind
|
9
|
+
atomic options options_switch named absence
|
10
10
|
].freeze
|
11
11
|
|
12
|
-
CLOSING_TOKENS = [
|
12
|
+
CLOSING_TOKENS = %i[close].freeze
|
13
|
+
|
14
|
+
CONDITION_TOKENS = %i[condition condition_close].freeze
|
13
15
|
|
14
16
|
def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
|
15
17
|
new.lex(input, syntax, options: options, &block)
|
16
18
|
end
|
17
19
|
|
18
20
|
def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
|
19
|
-
syntax = Regexp::Syntax.
|
21
|
+
syntax = Regexp::Syntax.for(syntax)
|
20
22
|
|
21
23
|
self.tokens = []
|
22
24
|
self.nesting = 0
|
@@ -40,7 +42,7 @@ class Regexp::Lexer
|
|
40
42
|
nesting, set_nesting, conditional_nesting)
|
41
43
|
|
42
44
|
current = merge_condition(current) if type == :conditional and
|
43
|
-
|
45
|
+
CONDITION_TOKENS.include?(token)
|
44
46
|
|
45
47
|
last.next = current if last
|
46
48
|
current.previous = last if last
|