regexp_parser 1.7.0 → 2.8.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +8 -2
- data/LICENSE +1 -1
- data/Rakefile +6 -70
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +76 -0
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
- data/lib/regexp_parser/expression/classes/group.rb +28 -15
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
- data/lib/regexp_parser/expression/classes/root.rb +4 -19
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
- data/lib/regexp_parser/expression/methods/construct.rb +41 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +47 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
- data/lib/regexp_parser/expression/quantifier.rb +57 -17
- data/lib/regexp_parser/expression/sequence.rb +11 -47
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +111 -0
- data/lib/regexp_parser/expression/subexpression.rb +27 -19
- data/lib/regexp_parser/expression.rb +14 -141
- data/lib/regexp_parser/lexer.rb +83 -41
- data/lib/regexp_parser/parser.rb +371 -429
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +633 -0
- data/lib/regexp_parser/scanner/properties/short.csv +248 -0
- data/lib/regexp_parser/scanner/property.rl +4 -4
- data/lib/regexp_parser/scanner/scanner.rl +303 -368
- data/lib/regexp_parser/scanner.rb +1423 -1674
- data/lib/regexp_parser/syntax/any.rb +2 -7
- data/lib/regexp_parser/syntax/base.rb +92 -67
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +33 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/token/meta.rb +20 -0
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +19 -23
- metadata +52 -171
- data/CHANGELOG.md +0 -349
- data/README.md +0 -470
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -94
- data/spec/expression/clone_spec.rb +0 -120
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -100
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -89
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -55
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -51
- data/spec/parser/refcalls_spec.rb +0 -112
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/errors_spec.rb +0 -68
- data/spec/scanner/escapes_spec.rb +0 -53
- data/spec/scanner/free_space_spec.rb +0 -133
- data/spec/scanner/groups_spec.rb +0 -52
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -49
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -20
- data/spec/scanner/refcalls_spec.rb +0 -36
- data/spec/scanner/sets_spec.rb +0 -102
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -15
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
- /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
@@ -0,0 +1,43 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
# default implementation, e.g. "atomic group", "hex escape", "word type", ..
|
4
|
+
def human_name
|
5
|
+
[token, type].compact.join(' ').tr('_', ' ')
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
Alternation.class_eval { def human_name; 'alternation' end }
|
10
|
+
Alternative.class_eval { def human_name; 'alternative' end }
|
11
|
+
Anchor::BOL.class_eval { def human_name; 'beginning of line' end }
|
12
|
+
Anchor::BOS.class_eval { def human_name; 'beginning of string' end }
|
13
|
+
Anchor::EOL.class_eval { def human_name; 'end of line' end }
|
14
|
+
Anchor::EOS.class_eval { def human_name; 'end of string' end }
|
15
|
+
Anchor::EOSobEOL.class_eval { def human_name; 'newline-ready end of string' end }
|
16
|
+
Anchor::MatchStart.class_eval { def human_name; 'match start' end }
|
17
|
+
Anchor::NonWordBoundary.class_eval { def human_name; 'no word boundary' end }
|
18
|
+
Anchor::WordBoundary.class_eval { def human_name; 'word boundary' end }
|
19
|
+
Assertion::Lookahead.class_eval { def human_name; 'lookahead' end }
|
20
|
+
Assertion::Lookbehind.class_eval { def human_name; 'lookbehind' end }
|
21
|
+
Assertion::NegativeLookahead.class_eval { def human_name; 'negative lookahead' end }
|
22
|
+
Assertion::NegativeLookbehind.class_eval { def human_name; 'negative lookbehind' end }
|
23
|
+
Backreference::Name.class_eval { def human_name; 'backreference by name' end }
|
24
|
+
Backreference::NameCall.class_eval { def human_name; 'subexpression call by name' end }
|
25
|
+
Backreference::Number.class_eval { def human_name; 'backreference' end }
|
26
|
+
Backreference::NumberRelative.class_eval { def human_name; 'relative backreference' end }
|
27
|
+
Backreference::NumberCall.class_eval { def human_name; 'subexpression call' end }
|
28
|
+
Backreference::NumberCallRelative.class_eval { def human_name; 'relative subexpression call' end }
|
29
|
+
CharacterSet::IntersectedSequence.class_eval { def human_name; 'intersected sequence' end }
|
30
|
+
CharacterSet::Intersection.class_eval { def human_name; 'intersection' end }
|
31
|
+
CharacterSet::Range.class_eval { def human_name; 'character range' end }
|
32
|
+
CharacterType::Any.class_eval { def human_name; 'match-all' end }
|
33
|
+
Comment.class_eval { def human_name; 'comment' end }
|
34
|
+
Conditional::Branch.class_eval { def human_name; 'conditional branch' end }
|
35
|
+
Conditional::Condition.class_eval { def human_name; 'condition' end }
|
36
|
+
Conditional::Expression.class_eval { def human_name; 'conditional' end }
|
37
|
+
Group::Capture.class_eval { def human_name; "capture group #{number}" end }
|
38
|
+
Group::Named.class_eval { def human_name; 'named capture group' end }
|
39
|
+
Keep::Mark.class_eval { def human_name; 'keep-mark lookbehind' end }
|
40
|
+
Literal.class_eval { def human_name; 'literal' end }
|
41
|
+
Root.class_eval { def human_name; 'root' end }
|
42
|
+
WhiteSpace.class_eval { def human_name; 'free space' end }
|
43
|
+
end
|
@@ -10,7 +10,7 @@ class Regexp::MatchLength
|
|
10
10
|
self.exp_class = exp.class
|
11
11
|
self.min_rep = exp.repetitions.min
|
12
12
|
self.max_rep = exp.repetitions.max
|
13
|
-
if base = opts[:base]
|
13
|
+
if (base = opts[:base])
|
14
14
|
self.base_min = base
|
15
15
|
self.base_max = base
|
16
16
|
self.reify = ->{ '.' * base }
|
@@ -32,7 +32,7 @@ class Regexp::MatchLength
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
def endless_each
|
35
|
+
def endless_each
|
36
36
|
return enum_for(__method__) unless block_given?
|
37
37
|
(min..max).each { |num| yield(num) if include?(num) }
|
38
38
|
end
|
@@ -63,16 +63,20 @@ class Regexp::MatchLength
|
|
63
63
|
end
|
64
64
|
|
65
65
|
def to_re
|
66
|
-
|
66
|
+
/(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
|
67
67
|
end
|
68
68
|
|
69
69
|
private
|
70
70
|
|
71
71
|
attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
|
72
72
|
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
if Regexp.method_defined?(:match?) # ruby >= 2.4
|
74
|
+
def test_regexp
|
75
|
+
@test_regexp ||= /^#{to_re}$/
|
76
|
+
end
|
77
|
+
else
|
78
|
+
def test_regexp
|
79
|
+
@test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
|
76
80
|
end
|
77
81
|
end
|
78
82
|
end
|
@@ -112,7 +116,7 @@ module Regexp::Expression
|
|
112
116
|
end
|
113
117
|
|
114
118
|
def inner_match_length
|
115
|
-
dummy = Regexp::Expression::Root.
|
119
|
+
dummy = Regexp::Expression::Root.construct
|
116
120
|
dummy.expressions = expressions.map(&:clone)
|
117
121
|
dummy.quantifier = quantifier && quantifier.clone
|
118
122
|
dummy.match_length
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
# default implementation
|
4
|
+
def parts
|
5
|
+
[text.dup]
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def intersperse(expressions, separator)
|
11
|
+
expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
|
16
|
+
CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
|
17
|
+
Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
|
18
|
+
Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
|
19
|
+
Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
|
20
|
+
Group::Comment.class_eval { def parts; [text.dup] end }
|
21
|
+
Subexpression.class_eval { def parts; expressions end }
|
22
|
+
SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
|
23
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def inspect
|
4
|
+
[
|
5
|
+
"#<#{self.class}",
|
6
|
+
pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
|
7
|
+
">"
|
8
|
+
].join
|
9
|
+
end
|
10
|
+
|
11
|
+
# Make pretty-print work despite #inspect implementation.
|
12
|
+
def pretty_print(q)
|
13
|
+
q.pp_object(self)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Called by pretty_print (ruby/pp) and #inspect.
|
17
|
+
def pretty_print_instance_variables
|
18
|
+
[
|
19
|
+
(:@text unless text.to_s.empty?),
|
20
|
+
(:@quantifier if quantified?),
|
21
|
+
(:@options unless options.empty?),
|
22
|
+
(:@expressions unless terminal?),
|
23
|
+
].compact
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -43,7 +43,7 @@ module Regexp::Expression
|
|
43
43
|
|
44
44
|
# Order is important! Fields that use other fields in their
|
45
45
|
# definition must appear before the fields they use.
|
46
|
-
part_keys = %w
|
46
|
+
part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
|
47
47
|
part.keys.each {|k| part[k] = "<?#{k}?>"}
|
48
48
|
|
49
49
|
part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
2
|
+
module Shared
|
3
3
|
|
4
4
|
# Test if this expression has the given test_type, which can be either
|
5
5
|
# a symbol or an array of symbols to check against the expression's type.
|
@@ -93,5 +93,51 @@ module Regexp::Expression
|
|
93
93
|
"Array, Hash, or Symbol expected, #{scope.class.name} given"
|
94
94
|
end
|
95
95
|
end
|
96
|
+
|
97
|
+
# Deep-compare two expressions for equality.
|
98
|
+
#
|
99
|
+
# When changing the conditions, please make sure to update
|
100
|
+
# #pretty_print_instance_variables so that it includes all relevant values.
|
101
|
+
def ==(other)
|
102
|
+
self.class == other.class &&
|
103
|
+
text == other.text &&
|
104
|
+
quantifier == other.quantifier &&
|
105
|
+
options == other.options &&
|
106
|
+
(terminal? || expressions == other.expressions)
|
107
|
+
end
|
108
|
+
alias :=== :==
|
109
|
+
alias :eql? :==
|
110
|
+
|
111
|
+
def optional?
|
112
|
+
quantified? && quantifier.min == 0
|
113
|
+
end
|
114
|
+
|
115
|
+
def quantified?
|
116
|
+
!quantifier.nil?
|
117
|
+
end
|
96
118
|
end
|
119
|
+
|
120
|
+
Shared.class_eval { def terminal?; self.class.terminal? end }
|
121
|
+
Shared::ClassMethods.class_eval { def terminal?; true end }
|
122
|
+
Subexpression.instance_eval { def terminal?; false end }
|
123
|
+
|
124
|
+
Shared.class_eval { def capturing?; self.class.capturing? end }
|
125
|
+
Shared::ClassMethods.class_eval { def capturing?; false end }
|
126
|
+
Group::Capture.instance_eval { def capturing?; true end }
|
127
|
+
|
128
|
+
Shared.class_eval { def comment?; self.class.comment? end }
|
129
|
+
Shared::ClassMethods.class_eval { def comment?; false end }
|
130
|
+
Comment.instance_eval { def comment?; true end }
|
131
|
+
Group::Comment.instance_eval { def comment?; true end }
|
132
|
+
|
133
|
+
Shared.class_eval { def decorative?; self.class.decorative? end }
|
134
|
+
Shared::ClassMethods.class_eval { def decorative?; false end }
|
135
|
+
FreeSpace.instance_eval { def decorative?; true end }
|
136
|
+
Group::Comment.instance_eval { def decorative?; true end }
|
137
|
+
|
138
|
+
Shared.class_eval { def referential?; self.class.referential? end }
|
139
|
+
Shared::ClassMethods.class_eval { def referential?; false end }
|
140
|
+
Backreference::Base.instance_eval { def referential?; true end }
|
141
|
+
Conditional::Condition.instance_eval { def referential?; true end }
|
142
|
+
Conditional::Expression.instance_eval { def referential?; true end }
|
97
143
|
end
|
@@ -1,6 +1,22 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Subexpression < Regexp::Expression::Base
|
3
3
|
|
4
|
+
# Traverses the expression, passing each recursive child to the
|
5
|
+
# given block.
|
6
|
+
# If the block takes two arguments, the indices of the children within
|
7
|
+
# their parents are also passed to it.
|
8
|
+
def each_expression(include_self = false, &block)
|
9
|
+
return enum_for(__method__, include_self) unless block
|
10
|
+
|
11
|
+
if block.arity == 1
|
12
|
+
block.call(self) if include_self
|
13
|
+
each_expression_without_index(&block)
|
14
|
+
else
|
15
|
+
block.call(self, 0) if include_self
|
16
|
+
each_expression_with_index(&block)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
4
20
|
# Traverses the subexpression (depth-first, pre-order) and calls the given
|
5
21
|
# block for each expression with three arguments; the traversal event,
|
6
22
|
# the expression, and the index of the expression within its parent.
|
@@ -34,31 +50,31 @@ module Regexp::Expression
|
|
34
50
|
end
|
35
51
|
alias :walk :traverse
|
36
52
|
|
37
|
-
# Iterates over the expressions of this expression as an array, passing
|
38
|
-
# the expression and its index within its parent to the given block.
|
39
|
-
def each_expression(include_self = false, &block)
|
40
|
-
return enum_for(__method__, include_self) unless block_given?
|
41
|
-
|
42
|
-
traverse(include_self) do |event, exp, index|
|
43
|
-
yield(exp, index) unless event == :exit
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
53
|
# Returns a new array with the results of calling the given block once
|
48
54
|
# for every expression. If a block is not given, returns an array with
|
49
55
|
# each expression and its level index as an array.
|
50
56
|
def flat_map(include_self = false, &block)
|
51
|
-
|
57
|
+
case block && block.arity
|
58
|
+
when nil then each_expression(include_self).to_a
|
59
|
+
when 2 then each_expression(include_self).map(&block)
|
60
|
+
else each_expression(include_self).map { |exp| block.call(exp) }
|
61
|
+
end
|
62
|
+
end
|
52
63
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
64
|
+
protected
|
65
|
+
|
66
|
+
def each_expression_with_index(&block)
|
67
|
+
each_with_index do |exp, index|
|
68
|
+
block.call(exp, index)
|
69
|
+
exp.each_expression_with_index(&block) unless exp.terminal?
|
59
70
|
end
|
71
|
+
end
|
60
72
|
|
61
|
-
|
73
|
+
def each_expression_without_index(&block)
|
74
|
+
each do |exp|
|
75
|
+
block.call(exp)
|
76
|
+
exp.each_expression_without_index(&block) unless exp.terminal?
|
77
|
+
end
|
62
78
|
end
|
63
79
|
end
|
64
80
|
end
|
@@ -1,26 +1,20 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
+
# TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
|
3
|
+
# call super in #initialize, but raise in #quantifier= and #quantify,
|
4
|
+
# or introduce an Expression::Quantifiable intermediate class.
|
5
|
+
# Or actually allow chaining as a more concise but tricky solution than PR#69.
|
2
6
|
class Quantifier
|
3
|
-
|
7
|
+
include Regexp::Expression::Shared
|
4
8
|
|
5
|
-
|
9
|
+
MODES = %i[greedy possessive reluctant]
|
6
10
|
|
7
|
-
def initialize(
|
8
|
-
|
9
|
-
@text = text
|
10
|
-
@mode = mode
|
11
|
-
@min = min
|
12
|
-
@max = max
|
13
|
-
end
|
14
|
-
|
15
|
-
def initialize_clone(orig)
|
16
|
-
@text = orig.text.dup
|
17
|
-
super
|
18
|
-
end
|
11
|
+
def initialize(*args)
|
12
|
+
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
19
13
|
|
20
|
-
|
21
|
-
|
14
|
+
init_from_token_and_options(*args)
|
15
|
+
# TODO: remove in v3.0.0, stop removing parts of #token (?)
|
16
|
+
self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
|
22
17
|
end
|
23
|
-
alias :to_str :to_s
|
24
18
|
|
25
19
|
def to_h
|
26
20
|
{
|
@@ -40,5 +34,51 @@ module Regexp::Expression
|
|
40
34
|
RUBY
|
41
35
|
end
|
42
36
|
alias :lazy? :reluctant?
|
37
|
+
|
38
|
+
def min
|
39
|
+
derived_data[:min]
|
40
|
+
end
|
41
|
+
|
42
|
+
def max
|
43
|
+
derived_data[:max]
|
44
|
+
end
|
45
|
+
|
46
|
+
def mode
|
47
|
+
derived_data[:mode]
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def deprecated_old_init(token, text, _min, _max, _mode = :greedy)
|
53
|
+
warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
|
54
|
+
"is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
|
55
|
+
"Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
|
56
|
+
"with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
|
57
|
+
"will be derived automatically.\n"\
|
58
|
+
"Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
|
59
|
+
"This is consistent with how Expression::Base instances are created. "
|
60
|
+
@token = token
|
61
|
+
@text = text
|
62
|
+
end
|
63
|
+
|
64
|
+
def derived_data
|
65
|
+
@derived_data ||= begin
|
66
|
+
min, max =
|
67
|
+
case text[0]
|
68
|
+
when '?'; [0, 1]
|
69
|
+
when '*'; [0, -1]
|
70
|
+
when '+'; [1, -1]
|
71
|
+
else
|
72
|
+
int_min = text[/\{(\d*)/, 1]
|
73
|
+
int_max = text[/,?(\d*)\}/, 1]
|
74
|
+
[int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
|
75
|
+
end
|
76
|
+
|
77
|
+
mod = text[/.([?+])/, 1]
|
78
|
+
mode = (mod == '?' && :reluctant) || (mod == '+' && :possessive) || :greedy
|
79
|
+
|
80
|
+
{ min: min, max: max, mode: mode }
|
81
|
+
end
|
82
|
+
end
|
43
83
|
end
|
44
84
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
# A sequence of expressions. Differs from a Subexpressions by how it handles
|
4
3
|
# quantifiers, as it applies them to its last element instead of itself as
|
5
4
|
# a whole subexpression.
|
@@ -7,61 +6,26 @@ module Regexp::Expression
|
|
7
6
|
# Used as the base class for the Alternation alternatives, Conditional
|
8
7
|
# branches, and CharacterSet::Intersection intersected sequences.
|
9
8
|
class Sequence < Regexp::Expression::Subexpression
|
10
|
-
# TODO: this override is here for backwards compatibility, remove in 2.0.0
|
11
|
-
def initialize(*args)
|
12
|
-
if args.count == 3
|
13
|
-
warn('WARNING: Sequence.new without a Regexp::Token argument is '\
|
14
|
-
'deprecated and will be removed in 2.0.0.')
|
15
|
-
return self.class.at_levels(*args)
|
16
|
-
end
|
17
|
-
super
|
18
|
-
end
|
19
|
-
|
20
9
|
class << self
|
21
|
-
def add_to(
|
22
|
-
sequence =
|
23
|
-
|
24
|
-
|
25
|
-
params[:conditional_level] ||
|
10
|
+
def add_to(exp, params = {}, active_opts = {})
|
11
|
+
sequence = construct(
|
12
|
+
level: exp.level,
|
13
|
+
set_level: exp.set_level,
|
14
|
+
conditional_level: params[:conditional_level] || exp.conditional_level,
|
15
|
+
ts: params[:ts],
|
26
16
|
)
|
27
|
-
sequence.nesting_level = subexpression.nesting_level + 1
|
28
17
|
sequence.options = active_opts
|
29
|
-
|
18
|
+
exp.expressions << sequence
|
30
19
|
sequence
|
31
20
|
end
|
32
|
-
|
33
|
-
def at_levels(level, set_level, conditional_level)
|
34
|
-
token = Regexp::Token.new(
|
35
|
-
:expression,
|
36
|
-
:sequence,
|
37
|
-
'',
|
38
|
-
nil, # ts
|
39
|
-
nil, # te
|
40
|
-
level,
|
41
|
-
set_level,
|
42
|
-
conditional_level
|
43
|
-
)
|
44
|
-
new(token)
|
45
|
-
end
|
46
21
|
end
|
47
22
|
|
48
|
-
def
|
49
|
-
expressions.first.
|
23
|
+
def ts
|
24
|
+
(head = expressions.first) ? head.ts : @ts
|
50
25
|
end
|
51
|
-
alias :ts :starts_at
|
52
|
-
|
53
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
54
|
-
offset = -1
|
55
|
-
target = expressions[offset]
|
56
|
-
while target.is_a?(FreeSpace)
|
57
|
-
target = expressions[offset -= 1]
|
58
|
-
end
|
59
26
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
target.quantify(token, text, min, max, mode)
|
27
|
+
def quantify(token, *args)
|
28
|
+
extract_quantifier_target(token.text).quantify(token, *args)
|
64
29
|
end
|
65
30
|
end
|
66
|
-
|
67
31
|
end
|
@@ -5,21 +5,16 @@ module Regexp::Expression
|
|
5
5
|
alias :operands :expressions
|
6
6
|
alias :operator :text
|
7
7
|
|
8
|
-
def
|
9
|
-
expressions.first.
|
8
|
+
def ts
|
9
|
+
(head = expressions.first) ? head.ts : @ts
|
10
10
|
end
|
11
|
-
alias :ts :starts_at
|
12
11
|
|
13
12
|
def <<(exp)
|
14
13
|
expressions.last << exp
|
15
14
|
end
|
16
15
|
|
17
|
-
def add_sequence(active_opts = {})
|
18
|
-
self.class::OPERAND.add_to(self,
|
19
|
-
end
|
20
|
-
|
21
|
-
def to_s(format = :full)
|
22
|
-
sequences.map { |e| e.to_s(format) }.join(text)
|
16
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
17
|
+
self.class::OPERAND.add_to(self, params, active_opts)
|
23
18
|
end
|
24
19
|
end
|
25
20
|
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
module ClassMethods; end # filled in ./methods/*.rb
|
4
|
+
|
5
|
+
def self.included(mod)
|
6
|
+
mod.class_eval do
|
7
|
+
extend Shared::ClassMethods
|
8
|
+
|
9
|
+
attr_accessor :type, :token, :text, :ts, :te,
|
10
|
+
:level, :set_level, :conditional_level,
|
11
|
+
:options, :parent,
|
12
|
+
:custom_to_s_handling, :pre_quantifier_decorations
|
13
|
+
|
14
|
+
attr_reader :nesting_level, :quantifier
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def init_from_token_and_options(token, options = {})
|
19
|
+
self.type = token.type
|
20
|
+
self.token = token.token
|
21
|
+
self.text = token.text
|
22
|
+
self.ts = token.ts
|
23
|
+
self.te = token.te
|
24
|
+
self.level = token.level
|
25
|
+
self.set_level = token.set_level
|
26
|
+
self.conditional_level = token.conditional_level
|
27
|
+
self.nesting_level = 0
|
28
|
+
self.options = options || {}
|
29
|
+
end
|
30
|
+
private :init_from_token_and_options
|
31
|
+
|
32
|
+
def initialize_copy(orig)
|
33
|
+
self.text = orig.text.dup if orig.text
|
34
|
+
self.options = orig.options.dup if orig.options
|
35
|
+
self.quantifier = orig.quantifier.clone if orig.quantifier
|
36
|
+
self.parent = nil # updated by Subexpression#initialize_copy
|
37
|
+
if orig.pre_quantifier_decorations
|
38
|
+
self.pre_quantifier_decorations = orig.pre_quantifier_decorations.map(&:dup)
|
39
|
+
end
|
40
|
+
super
|
41
|
+
end
|
42
|
+
|
43
|
+
def starts_at
|
44
|
+
ts
|
45
|
+
end
|
46
|
+
|
47
|
+
def ends_at(include_quantifier = true)
|
48
|
+
ts + (include_quantifier ? full_length : base_length)
|
49
|
+
end
|
50
|
+
|
51
|
+
def base_length
|
52
|
+
to_s(:base).length
|
53
|
+
end
|
54
|
+
|
55
|
+
def full_length
|
56
|
+
to_s(:original).length
|
57
|
+
end
|
58
|
+
|
59
|
+
# #to_s reproduces the original source, as an unparser would.
|
60
|
+
#
|
61
|
+
# It takes an optional format argument.
|
62
|
+
#
|
63
|
+
# Example:
|
64
|
+
#
|
65
|
+
# lit = Regexp::Parser.parse(/a +/x)[0]
|
66
|
+
#
|
67
|
+
# lit.to_s # => 'a+' # default; with quantifier
|
68
|
+
# lit.to_s(:full) # => 'a+' # default; with quantifier
|
69
|
+
# lit.to_s(:base) # => 'a' # without quantifier
|
70
|
+
# lit.to_s(:original) # => 'a +' # with quantifier AND intermittent decorations
|
71
|
+
#
|
72
|
+
def to_s(format = :full)
|
73
|
+
base = parts.each_with_object(''.dup) do |part, buff|
|
74
|
+
if part.instance_of?(String)
|
75
|
+
buff << part
|
76
|
+
elsif !part.custom_to_s_handling
|
77
|
+
buff << part.to_s(:original)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
"#{base}#{pre_quantifier_decoration(format)}#{quantifier_affix(format)}"
|
81
|
+
end
|
82
|
+
alias :to_str :to_s
|
83
|
+
|
84
|
+
def pre_quantifier_decoration(expression_format = :original)
|
85
|
+
pre_quantifier_decorations.to_a.join if expression_format == :original
|
86
|
+
end
|
87
|
+
|
88
|
+
def quantifier_affix(expression_format = :full)
|
89
|
+
quantifier.to_s if quantified? && expression_format != :base
|
90
|
+
end
|
91
|
+
|
92
|
+
def offset
|
93
|
+
[starts_at, full_length]
|
94
|
+
end
|
95
|
+
|
96
|
+
def coded_offset
|
97
|
+
'@%d+%d' % offset
|
98
|
+
end
|
99
|
+
|
100
|
+
def nesting_level=(lvl)
|
101
|
+
@nesting_level = lvl
|
102
|
+
quantifier && quantifier.nesting_level = lvl
|
103
|
+
terminal? || each { |subexp| subexp.nesting_level = lvl + 1 }
|
104
|
+
end
|
105
|
+
|
106
|
+
def quantifier=(qtf)
|
107
|
+
@quantifier = qtf
|
108
|
+
@repetitions = nil # clear memoized value
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -1,29 +1,25 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class Subexpression < Regexp::Expression::Base
|
4
3
|
include Enumerable
|
5
4
|
|
6
5
|
attr_accessor :expressions
|
7
6
|
|
8
7
|
def initialize(token, options = {})
|
9
|
-
super
|
10
|
-
|
11
8
|
self.expressions = []
|
9
|
+
super
|
12
10
|
end
|
13
11
|
|
14
12
|
# Override base method to clone the expressions as well.
|
15
|
-
def
|
16
|
-
self.expressions = orig.expressions.map
|
13
|
+
def initialize_copy(orig)
|
14
|
+
self.expressions = orig.expressions.map do |exp|
|
15
|
+
exp.clone.tap { |copy| copy.parent = self }
|
16
|
+
end
|
17
17
|
super
|
18
18
|
end
|
19
19
|
|
20
20
|
def <<(exp)
|
21
|
-
|
22
|
-
|
23
|
-
else
|
24
|
-
exp.nesting_level = nesting_level + 1
|
25
|
-
expressions << exp
|
26
|
-
end
|
21
|
+
exp.parent = self
|
22
|
+
expressions << exp
|
27
23
|
end
|
28
24
|
|
29
25
|
%w[[] at each empty? fetch index join last length values_at].each do |method|
|
@@ -41,19 +37,31 @@ module Regexp::Expression
|
|
41
37
|
end
|
42
38
|
|
43
39
|
def te
|
44
|
-
ts +
|
45
|
-
end
|
46
|
-
|
47
|
-
def to_s(format = :full)
|
48
|
-
# Note: the format does not get passed down to subexpressions.
|
49
|
-
"#{expressions.join}#{quantifier_affix(format)}"
|
40
|
+
ts + base_length
|
50
41
|
end
|
51
42
|
|
52
43
|
def to_h
|
53
|
-
attributes.merge(
|
44
|
+
attributes.merge(
|
54
45
|
text: to_s(:base),
|
55
46
|
expressions: expressions.map(&:to_h)
|
56
|
-
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
def extract_quantifier_target(quantifier_description)
|
51
|
+
pre_quantifier_decorations = []
|
52
|
+
target = expressions.reverse.find do |exp|
|
53
|
+
if exp.decorative?
|
54
|
+
exp.custom_to_s_handling = true
|
55
|
+
pre_quantifier_decorations << exp.text
|
56
|
+
next
|
57
|
+
end
|
58
|
+
exp
|
59
|
+
end
|
60
|
+
target or raise Regexp::Parser::ParserError,
|
61
|
+
"No valid target found for '#{quantifier_description}' quantifier"
|
62
|
+
|
63
|
+
target.pre_quantifier_decorations = pre_quantifier_decorations
|
64
|
+
target
|
57
65
|
end
|
58
66
|
end
|
59
67
|
end
|