regexp_parser 1.3.0 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +53 -1
- data/Gemfile +3 -3
- data/README.md +10 -14
- data/Rakefile +3 -4
- data/lib/regexp_parser/expression.rb +28 -53
- data/lib/regexp_parser/expression/classes/backref.rb +18 -10
- data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
- data/lib/regexp_parser/expression/classes/escape.rb +0 -4
- data/lib/regexp_parser/expression/classes/group.rb +4 -2
- data/lib/regexp_parser/expression/classes/keep.rb +1 -3
- data/lib/regexp_parser/expression/methods/match.rb +13 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
- data/lib/regexp_parser/expression/methods/options.rb +35 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
- data/lib/regexp_parser/expression/methods/tests.rb +6 -15
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -6
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
- data/lib/regexp_parser/expression/subexpression.rb +3 -5
- data/lib/regexp_parser/lexer.rb +30 -44
- data/lib/regexp_parser/parser.rb +47 -24
- data/lib/regexp_parser/scanner.rb +1159 -1329
- data/lib/regexp_parser/scanner/char_type.rl +0 -3
- data/lib/regexp_parser/scanner/properties/long.yml +34 -1
- data/lib/regexp_parser/scanner/properties/short.yml +12 -0
- data/lib/regexp_parser/scanner/scanner.rl +82 -190
- data/lib/regexp_parser/syntax/tokens.rb +2 -10
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +72 -21
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +3 -3
- data/spec/expression/base_spec.rb +94 -0
- data/spec/expression/clone_spec.rb +120 -0
- data/spec/expression/conditional_spec.rb +89 -0
- data/spec/expression/free_space_spec.rb +27 -0
- data/spec/expression/methods/match_length_spec.rb +154 -0
- data/spec/expression/methods/match_spec.rb +25 -0
- data/spec/expression/methods/strfregexp_spec.rb +224 -0
- data/spec/expression/methods/tests_spec.rb +99 -0
- data/spec/expression/methods/traverse_spec.rb +140 -0
- data/spec/expression/options_spec.rb +128 -0
- data/spec/expression/root_spec.rb +9 -0
- data/spec/expression/sequence_spec.rb +9 -0
- data/spec/expression/subexpression_spec.rb +50 -0
- data/spec/expression/to_h_spec.rb +26 -0
- data/spec/expression/to_s_spec.rb +100 -0
- data/spec/lexer/all_spec.rb +22 -0
- data/spec/lexer/conditionals_spec.rb +53 -0
- data/spec/lexer/escapes_spec.rb +14 -0
- data/spec/lexer/keep_spec.rb +10 -0
- data/spec/lexer/literals_spec.rb +89 -0
- data/spec/lexer/nesting_spec.rb +99 -0
- data/spec/lexer/refcalls_spec.rb +55 -0
- data/spec/parser/all_spec.rb +43 -0
- data/spec/parser/alternation_spec.rb +88 -0
- data/spec/parser/anchors_spec.rb +17 -0
- data/spec/parser/conditionals_spec.rb +179 -0
- data/spec/parser/errors_spec.rb +30 -0
- data/spec/parser/escapes_spec.rb +121 -0
- data/spec/parser/free_space_spec.rb +130 -0
- data/spec/parser/groups_spec.rb +108 -0
- data/spec/parser/keep_spec.rb +6 -0
- data/spec/parser/posix_classes_spec.rb +8 -0
- data/spec/parser/properties_spec.rb +115 -0
- data/spec/parser/quantifiers_spec.rb +51 -0
- data/spec/parser/refcalls_spec.rb +112 -0
- data/spec/parser/set/intersections_spec.rb +127 -0
- data/spec/parser/set/ranges_spec.rb +111 -0
- data/spec/parser/sets_spec.rb +178 -0
- data/spec/parser/types_spec.rb +18 -0
- data/spec/scanner/all_spec.rb +18 -0
- data/spec/scanner/anchors_spec.rb +21 -0
- data/spec/scanner/conditionals_spec.rb +128 -0
- data/spec/scanner/errors_spec.rb +68 -0
- data/spec/scanner/escapes_spec.rb +53 -0
- data/spec/scanner/free_space_spec.rb +133 -0
- data/spec/scanner/groups_spec.rb +52 -0
- data/spec/scanner/keep_spec.rb +10 -0
- data/spec/scanner/literals_spec.rb +49 -0
- data/spec/scanner/meta_spec.rb +18 -0
- data/spec/scanner/properties_spec.rb +64 -0
- data/spec/scanner/quantifiers_spec.rb +20 -0
- data/spec/scanner/refcalls_spec.rb +36 -0
- data/spec/scanner/sets_spec.rb +102 -0
- data/spec/scanner/types_spec.rb +14 -0
- data/spec/spec_helper.rb +15 -0
- data/{test → spec}/support/runner.rb +9 -8
- data/spec/support/shared_examples.rb +77 -0
- data/{test → spec}/support/warning_extractor.rb +5 -7
- data/spec/syntax/syntax_spec.rb +48 -0
- data/spec/syntax/syntax_token_map_spec.rb +23 -0
- data/spec/syntax/versions/1.8.6_spec.rb +17 -0
- data/spec/syntax/versions/1.9.1_spec.rb +10 -0
- data/spec/syntax/versions/1.9.3_spec.rb +9 -0
- data/spec/syntax/versions/2.0.0_spec.rb +13 -0
- data/spec/syntax/versions/2.2.0_spec.rb +9 -0
- data/spec/syntax/versions/aliases_spec.rb +37 -0
- data/spec/token/token_spec.rb +85 -0
- metadata +144 -143
- data/test/expression/test_all.rb +0 -12
- data/test/expression/test_base.rb +0 -90
- data/test/expression/test_clone.rb +0 -89
- data/test/expression/test_conditionals.rb +0 -113
- data/test/expression/test_free_space.rb +0 -35
- data/test/expression/test_set.rb +0 -84
- data/test/expression/test_strfregexp.rb +0 -230
- data/test/expression/test_subexpression.rb +0 -58
- data/test/expression/test_tests.rb +0 -99
- data/test/expression/test_to_h.rb +0 -59
- data/test/expression/test_to_s.rb +0 -104
- data/test/expression/test_traverse.rb +0 -161
- data/test/helpers.rb +0 -10
- data/test/lexer/test_all.rb +0 -41
- data/test/lexer/test_conditionals.rb +0 -127
- data/test/lexer/test_keep.rb +0 -24
- data/test/lexer/test_literals.rb +0 -130
- data/test/lexer/test_nesting.rb +0 -132
- data/test/lexer/test_refcalls.rb +0 -56
- data/test/parser/set/test_intersections.rb +0 -127
- data/test/parser/set/test_ranges.rb +0 -111
- data/test/parser/test_all.rb +0 -64
- data/test/parser/test_alternation.rb +0 -92
- data/test/parser/test_anchors.rb +0 -34
- data/test/parser/test_conditionals.rb +0 -187
- data/test/parser/test_errors.rb +0 -63
- data/test/parser/test_escapes.rb +0 -134
- data/test/parser/test_free_space.rb +0 -139
- data/test/parser/test_groups.rb +0 -289
- data/test/parser/test_keep.rb +0 -21
- data/test/parser/test_posix_classes.rb +0 -27
- data/test/parser/test_properties.rb +0 -133
- data/test/parser/test_quantifiers.rb +0 -301
- data/test/parser/test_refcalls.rb +0 -186
- data/test/parser/test_sets.rb +0 -179
- data/test/parser/test_types.rb +0 -50
- data/test/scanner/test_all.rb +0 -38
- data/test/scanner/test_anchors.rb +0 -38
- data/test/scanner/test_conditionals.rb +0 -184
- data/test/scanner/test_errors.rb +0 -91
- data/test/scanner/test_escapes.rb +0 -56
- data/test/scanner/test_free_space.rb +0 -200
- data/test/scanner/test_groups.rb +0 -79
- data/test/scanner/test_keep.rb +0 -35
- data/test/scanner/test_literals.rb +0 -89
- data/test/scanner/test_meta.rb +0 -40
- data/test/scanner/test_properties.rb +0 -312
- data/test/scanner/test_quantifiers.rb +0 -37
- data/test/scanner/test_refcalls.rb +0 -52
- data/test/scanner/test_scripts.rb +0 -53
- data/test/scanner/test_sets.rb +0 -119
- data/test/scanner/test_types.rb +0 -35
- data/test/scanner/test_unicode_blocks.rb +0 -30
- data/test/support/disable_autotest.rb +0 -8
- data/test/syntax/test_all.rb +0 -6
- data/test/syntax/test_syntax.rb +0 -61
- data/test/syntax/test_syntax_token_map.rb +0 -25
- data/test/syntax/versions/test_1.8.rb +0 -55
- data/test/syntax/versions/test_1.9.1.rb +0 -36
- data/test/syntax/versions/test_1.9.3.rb +0 -32
- data/test/syntax/versions/test_2.0.0.rb +0 -37
- data/test/syntax/versions/test_2.2.0.rb +0 -32
- data/test/syntax/versions/test_aliases.rb +0 -129
- data/test/syntax/versions/test_all.rb +0 -5
- data/test/test_all.rb +0 -5
- data/test/token/test_all.rb +0 -2
- data/test/token/test_token.rb +0 -107
@@ -0,0 +1,172 @@
|
|
1
|
+
class Regexp::MatchLength
|
2
|
+
include Enumerable
|
3
|
+
|
4
|
+
def self.of(obj)
|
5
|
+
exp = obj.is_a?(Regexp::Expression::Base) ? obj : Regexp::Parser.parse(obj)
|
6
|
+
exp.match_length
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(exp, opts = {})
|
10
|
+
self.exp_class = exp.class
|
11
|
+
self.min_rep = exp.repetitions.min
|
12
|
+
self.max_rep = exp.repetitions.max
|
13
|
+
if base = opts[:base]
|
14
|
+
self.base_min = base
|
15
|
+
self.base_max = base
|
16
|
+
self.reify = ->{ '.' * base }
|
17
|
+
else
|
18
|
+
self.base_min = opts.fetch(:base_min)
|
19
|
+
self.base_max = opts.fetch(:base_max)
|
20
|
+
self.reify = opts.fetch(:reify)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def each(opts = {})
|
25
|
+
return enum_for(__method__) unless block_given?
|
26
|
+
limit = opts[:limit] || 1000
|
27
|
+
yielded = 0
|
28
|
+
(min..max).each do |num|
|
29
|
+
next unless include?(num)
|
30
|
+
yield(num)
|
31
|
+
break if (yielded += 1) >= limit
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def endless_each(&block)
|
36
|
+
return enum_for(__method__) unless block_given?
|
37
|
+
(min..max).each { |num| yield(num) if include?(num) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def include?(length)
|
41
|
+
test_regexp.match?('X' * length)
|
42
|
+
end
|
43
|
+
|
44
|
+
def fixed?
|
45
|
+
min == max
|
46
|
+
end
|
47
|
+
|
48
|
+
def min
|
49
|
+
min_rep * base_min
|
50
|
+
end
|
51
|
+
|
52
|
+
def max
|
53
|
+
max_rep * base_max
|
54
|
+
end
|
55
|
+
|
56
|
+
def minmax
|
57
|
+
[min, max]
|
58
|
+
end
|
59
|
+
|
60
|
+
def inspect
|
61
|
+
type = exp_class.name.sub('Regexp::Expression::', '')
|
62
|
+
"#<#{self.class}<#{type}> min=#{min} max=#{max}>"
|
63
|
+
end
|
64
|
+
|
65
|
+
def to_re
|
66
|
+
"(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}"
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
|
72
|
+
|
73
|
+
def test_regexp
|
74
|
+
@test_regexp ||= Regexp.new("^#{to_re}$").tap do |regexp|
|
75
|
+
regexp.respond_to?(:match?) || def regexp.match?(str); !!match(str) end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
module Regexp::Expression
|
81
|
+
MatchLength = Regexp::MatchLength
|
82
|
+
|
83
|
+
[
|
84
|
+
CharacterSet,
|
85
|
+
CharacterSet::Intersection,
|
86
|
+
CharacterSet::IntersectedSequence,
|
87
|
+
CharacterSet::Range,
|
88
|
+
CharacterType::Base,
|
89
|
+
EscapeSequence::Base,
|
90
|
+
PosixClass,
|
91
|
+
UnicodeProperty::Base,
|
92
|
+
].each do |klass|
|
93
|
+
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
94
|
+
def match_length
|
95
|
+
MatchLength.new(self, base: 1)
|
96
|
+
end
|
97
|
+
RUBY
|
98
|
+
end
|
99
|
+
|
100
|
+
class Literal
|
101
|
+
def match_length
|
102
|
+
MatchLength.new(self, base: text.length)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
class Subexpression
|
107
|
+
def match_length
|
108
|
+
MatchLength.new(self,
|
109
|
+
base_min: map { |exp| exp.match_length.min }.inject(0, :+),
|
110
|
+
base_max: map { |exp| exp.match_length.max }.inject(0, :+),
|
111
|
+
reify: ->{ map { |exp| exp.match_length.to_re }.join })
|
112
|
+
end
|
113
|
+
|
114
|
+
def inner_match_length
|
115
|
+
dummy = Regexp::Expression::Root.build
|
116
|
+
dummy.expressions = expressions.map(&:clone)
|
117
|
+
dummy.quantifier = quantifier && quantifier.clone
|
118
|
+
dummy.match_length
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
[
|
123
|
+
Alternation,
|
124
|
+
Conditional::Expression,
|
125
|
+
].each do |klass|
|
126
|
+
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
127
|
+
def match_length
|
128
|
+
MatchLength.new(self,
|
129
|
+
base_min: map { |exp| exp.match_length.min }.min,
|
130
|
+
base_max: map { |exp| exp.match_length.max }.max,
|
131
|
+
reify: ->{ map { |exp| exp.match_length.to_re }.join('|') })
|
132
|
+
end
|
133
|
+
RUBY
|
134
|
+
end
|
135
|
+
|
136
|
+
[
|
137
|
+
Anchor::Base,
|
138
|
+
Assertion::Base,
|
139
|
+
Conditional::Condition,
|
140
|
+
FreeSpace,
|
141
|
+
Keep::Mark,
|
142
|
+
].each do |klass|
|
143
|
+
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
144
|
+
def match_length
|
145
|
+
MatchLength.new(self, base: 0)
|
146
|
+
end
|
147
|
+
RUBY
|
148
|
+
end
|
149
|
+
|
150
|
+
class Backreference::Base
|
151
|
+
def match_length
|
152
|
+
if referenced_expression.nil?
|
153
|
+
raise ArgumentError, 'Missing referenced_expression - not parsed?'
|
154
|
+
end
|
155
|
+
referenced_expression.unquantified_clone.match_length
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
class EscapeSequence::CodepointList
|
160
|
+
def match_length
|
161
|
+
MatchLength.new(self, base: codepoints.count)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# Special case. Absence group can match 0.. chars, irrespective of content.
|
166
|
+
# TODO: in theory, they *can* exclude match lengths with `.`: `(?~.{3})`
|
167
|
+
class Group::Absence
|
168
|
+
def match_length
|
169
|
+
MatchLength.new(self, base_min: 0, base_max: Float::INFINITY, reify: ->{ '.*' })
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
class Base
|
3
|
+
def multiline?
|
4
|
+
options[:m] == true
|
5
|
+
end
|
6
|
+
alias :m? :multiline?
|
7
|
+
|
8
|
+
def case_insensitive?
|
9
|
+
options[:i] == true
|
10
|
+
end
|
11
|
+
alias :i? :case_insensitive?
|
12
|
+
alias :ignore_case? :case_insensitive?
|
13
|
+
|
14
|
+
def free_spacing?
|
15
|
+
options[:x] == true
|
16
|
+
end
|
17
|
+
alias :x? :free_spacing?
|
18
|
+
alias :extended? :free_spacing?
|
19
|
+
|
20
|
+
def default_classes?
|
21
|
+
options[:d] == true
|
22
|
+
end
|
23
|
+
alias :d? :default_classes?
|
24
|
+
|
25
|
+
def ascii_classes?
|
26
|
+
options[:a] == true
|
27
|
+
end
|
28
|
+
alias :a? :ascii_classes?
|
29
|
+
|
30
|
+
def unicode_classes?
|
31
|
+
options[:u] == true
|
32
|
+
end
|
33
|
+
alias :u? :unicode_classes?
|
34
|
+
end
|
35
|
+
end
|
@@ -75,32 +75,23 @@ module Regexp::Expression
|
|
75
75
|
def one_of?(scope, top = true)
|
76
76
|
case scope
|
77
77
|
when Array
|
78
|
-
|
79
|
-
return (scope.include?(token) or scope.include?(:*))
|
80
|
-
else
|
81
|
-
return scope.include?(token)
|
82
|
-
end
|
78
|
+
scope.include?(:*) || scope.include?(token)
|
83
79
|
|
84
80
|
when Hash
|
85
81
|
if scope.has_key?(:*)
|
86
82
|
test_type = scope.has_key?(type) ? type : :*
|
87
|
-
|
83
|
+
one_of?(scope[test_type], false)
|
88
84
|
else
|
89
|
-
|
85
|
+
scope.has_key?(type) && one_of?(scope[type], false)
|
90
86
|
end
|
91
87
|
|
92
88
|
when Symbol
|
93
|
-
|
94
|
-
|
95
|
-
return is?(scope) unless top
|
96
|
-
return type?(scope) if top
|
89
|
+
scope.equal?(:*) || (top ? type?(scope) : is?(scope))
|
97
90
|
|
98
91
|
else
|
99
|
-
raise
|
92
|
+
raise ArgumentError,
|
93
|
+
"Array, Hash, or Symbol expected, #{scope.class.name} given"
|
100
94
|
end
|
101
|
-
|
102
|
-
false
|
103
95
|
end
|
104
|
-
|
105
96
|
end
|
106
97
|
end
|
@@ -18,13 +18,14 @@ module Regexp::Expression
|
|
18
18
|
end
|
19
19
|
|
20
20
|
class << self
|
21
|
-
def add_to(subexpression,
|
21
|
+
def add_to(subexpression, params = {}, active_opts = {})
|
22
22
|
sequence = at_levels(
|
23
23
|
subexpression.level,
|
24
24
|
subexpression.set_level,
|
25
|
-
|
25
|
+
params[:conditional_level] || subexpression.conditional_level
|
26
26
|
)
|
27
27
|
sequence.nesting_level = subexpression.nesting_level + 1
|
28
|
+
sequence.options = active_opts
|
28
29
|
subexpression.expressions << sequence
|
29
30
|
sequence
|
30
31
|
end
|
@@ -44,10 +45,6 @@ module Regexp::Expression
|
|
44
45
|
end
|
45
46
|
end
|
46
47
|
|
47
|
-
def text
|
48
|
-
to_s
|
49
|
-
end
|
50
|
-
|
51
48
|
def starts_at
|
52
49
|
expressions.first.starts_at
|
53
50
|
end
|
@@ -14,12 +14,8 @@ module Regexp::Expression
|
|
14
14
|
expressions.last << exp
|
15
15
|
end
|
16
16
|
|
17
|
-
def add_sequence
|
18
|
-
self.class::OPERAND.add_to(self)
|
19
|
-
end
|
20
|
-
|
21
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
22
|
-
sequences.last.last.quantify(token, text, min, max, mode)
|
17
|
+
def add_sequence(active_opts = {})
|
18
|
+
self.class::OPERAND.add_to(self, {}, active_opts)
|
23
19
|
end
|
24
20
|
|
25
21
|
def to_s(format = :full)
|
@@ -12,8 +12,8 @@ module Regexp::Expression
|
|
12
12
|
end
|
13
13
|
|
14
14
|
# Override base method to clone the expressions as well.
|
15
|
-
def initialize_clone(
|
16
|
-
|
15
|
+
def initialize_clone(orig)
|
16
|
+
self.expressions = orig.expressions.map(&:clone)
|
17
17
|
super
|
18
18
|
end
|
19
19
|
|
@@ -46,9 +46,7 @@ module Regexp::Expression
|
|
46
46
|
|
47
47
|
def to_s(format = :full)
|
48
48
|
# Note: the format does not get passed down to subexpressions.
|
49
|
-
#
|
50
|
-
# in Expression::Sequence, causing infinite recursion. Clean-up needed.
|
51
|
-
"#{@text}#{expressions.join}#{quantifier_affix(format)}"
|
49
|
+
"#{expressions.join}#{quantifier_affix(format)}"
|
52
50
|
end
|
53
51
|
|
54
52
|
def to_h
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -22,6 +22,7 @@ class Regexp::Lexer
|
|
22
22
|
self.nesting = 0
|
23
23
|
self.set_nesting = 0
|
24
24
|
self.conditional_nesting = 0
|
25
|
+
self.shift = 0
|
25
26
|
|
26
27
|
last = nil
|
27
28
|
Regexp::Scanner.scan(input) do |type, token, text, ts, te|
|
@@ -30,15 +31,13 @@ class Regexp::Lexer
|
|
30
31
|
|
31
32
|
ascend(type, token)
|
32
33
|
|
33
|
-
|
34
|
-
last
|
35
|
-
|
36
|
-
|
37
|
-
nesting, set_nesting, conditional_nesting)
|
34
|
+
if type == :quantifier and last
|
35
|
+
break_literal(last) if last.type == :literal
|
36
|
+
break_codepoint_list(last) if last.token == :codepoint_list
|
37
|
+
end
|
38
38
|
|
39
|
-
current =
|
40
|
-
|
41
|
-
last and last.type == :literal
|
39
|
+
current = Regexp::Token.new(type, token, text, ts + shift, te + shift,
|
40
|
+
nesting, set_nesting, conditional_nesting)
|
42
41
|
|
43
42
|
current = merge_condition(current) if type == :conditional and
|
44
43
|
[:condition, :condition_close].include?(token)
|
@@ -65,7 +64,7 @@ class Regexp::Lexer
|
|
65
64
|
|
66
65
|
private
|
67
66
|
|
68
|
-
attr_accessor :tokens, :nesting, :set_nesting, :conditional_nesting
|
67
|
+
attr_accessor :tokens, :nesting, :set_nesting, :conditional_nesting, :shift
|
69
68
|
|
70
69
|
def ascend(type, token)
|
71
70
|
case type
|
@@ -92,44 +91,31 @@ class Regexp::Lexer
|
|
92
91
|
# called by scan to break a literal run that is longer than one character
|
93
92
|
# into two separate tokens when it is followed by a quantifier
|
94
93
|
def break_literal(token)
|
95
|
-
|
96
|
-
if
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
last_length = last.length
|
106
|
-
end
|
107
|
-
|
108
|
-
tokens.pop
|
109
|
-
tokens << Regexp::Token.new(:literal, :literal, lead, token.ts,
|
110
|
-
(token.te - last_length), nesting, set_nesting, conditional_nesting)
|
111
|
-
|
112
|
-
tokens << Regexp::Token.new(:literal, :literal, last,
|
113
|
-
(token.ts + lead_length),
|
114
|
-
token.te, nesting, set_nesting, conditional_nesting)
|
115
|
-
end
|
94
|
+
lead, last, _ = token.text.partition(/.\z/mu)
|
95
|
+
return if lead.empty?
|
96
|
+
|
97
|
+
tokens.pop
|
98
|
+
tokens << Regexp::Token.new(:literal, :literal, lead,
|
99
|
+
token.ts, (token.te - last.bytesize),
|
100
|
+
nesting, set_nesting, conditional_nesting)
|
101
|
+
tokens << Regexp::Token.new(:literal, :literal, last,
|
102
|
+
(token.ts + lead.bytesize), token.te,
|
103
|
+
nesting, set_nesting, conditional_nesting)
|
116
104
|
end
|
117
105
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
106
|
+
def break_codepoint_list(token)
|
107
|
+
lead, _, tail = token.text.rpartition(' ')
|
108
|
+
return if lead.empty?
|
109
|
+
|
110
|
+
tokens.pop
|
111
|
+
tokens << Regexp::Token.new(:escape, :codepoint_list, lead + '}',
|
112
|
+
token.ts, (token.te - tail.length),
|
113
|
+
nesting, set_nesting, conditional_nesting)
|
114
|
+
tokens << Regexp::Token.new(:escape, :codepoint_list, '\u{' + tail,
|
115
|
+
(token.ts + lead.length + 1), (token.te + 3),
|
116
|
+
nesting, set_nesting, conditional_nesting)
|
122
117
|
|
123
|
-
|
124
|
-
:literal,
|
125
|
-
:literal,
|
126
|
-
last.text + current.text,
|
127
|
-
last.ts,
|
128
|
-
current.te,
|
129
|
-
nesting,
|
130
|
-
set_nesting,
|
131
|
-
conditional_nesting,
|
132
|
-
)
|
118
|
+
self.shift = shift + 3 # one space less, but extra \, u, {, and }
|
133
119
|
end
|
134
120
|
|
135
121
|
def merge_condition(current)
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -39,6 +39,8 @@ class Regexp::Parser
|
|
39
39
|
parse_token(token)
|
40
40
|
end
|
41
41
|
|
42
|
+
assign_referenced_expressions
|
43
|
+
|
42
44
|
if block_given?
|
43
45
|
block.call(root)
|
44
46
|
else
|
@@ -163,14 +165,18 @@ class Regexp::Parser
|
|
163
165
|
node << Backreference::NameCall.new(token, active_opts)
|
164
166
|
when :number, :number_ref
|
165
167
|
node << Backreference::Number.new(token, active_opts)
|
166
|
-
when :number_rel_ref
|
167
|
-
node << Backreference::NumberRelative.new(token, active_opts)
|
168
168
|
when :number_recursion_ref
|
169
169
|
node << Backreference::NumberRecursionLevel.new(token, active_opts)
|
170
170
|
when :number_call
|
171
171
|
node << Backreference::NumberCall.new(token, active_opts)
|
172
|
+
when :number_rel_ref
|
173
|
+
node << Backreference::NumberRelative.new(token, active_opts).tap do |exp|
|
174
|
+
assign_effective_number(exp)
|
175
|
+
end
|
172
176
|
when :number_rel_call
|
173
|
-
node << Backreference::NumberCallRelative.new(token, active_opts)
|
177
|
+
node << Backreference::NumberCallRelative.new(token, active_opts).tap do |exp|
|
178
|
+
assign_effective_number(exp)
|
179
|
+
end
|
174
180
|
else
|
175
181
|
raise UnknownTokenError.new('Backreference', token)
|
176
182
|
end
|
@@ -209,9 +215,9 @@ class Regexp::Parser
|
|
209
215
|
nest_conditional(Conditional::Expression.new(token, active_opts))
|
210
216
|
when :condition
|
211
217
|
conditional_nesting.last.condition = Conditional::Condition.new(token, active_opts)
|
212
|
-
conditional_nesting.last.
|
218
|
+
conditional_nesting.last.add_sequence(active_opts)
|
213
219
|
when :separator
|
214
|
-
conditional_nesting.last.
|
220
|
+
conditional_nesting.last.add_sequence(active_opts)
|
215
221
|
self.node = conditional_nesting.last.branches.last
|
216
222
|
when :close
|
217
223
|
conditional_nesting.pop
|
@@ -229,7 +235,7 @@ class Regexp::Parser
|
|
229
235
|
end
|
230
236
|
|
231
237
|
def posixclass(token)
|
232
|
-
node << PosixClass.new(token)
|
238
|
+
node << PosixClass.new(token, active_opts)
|
233
239
|
end
|
234
240
|
|
235
241
|
include Regexp::Expression::UnicodeProperty
|
@@ -491,6 +497,9 @@ class Regexp::Parser
|
|
491
497
|
end
|
492
498
|
end
|
493
499
|
|
500
|
+
MOD_FLAGS = %w[i m x].map(&:to_sym)
|
501
|
+
ENC_FLAGS = %w[a d u].map(&:to_sym)
|
502
|
+
|
494
503
|
def options_group(token)
|
495
504
|
positive, negative = token.text.split('-', 2)
|
496
505
|
negative ||= ''
|
@@ -499,23 +508,23 @@ class Regexp::Parser
|
|
499
508
|
opt_changes = {}
|
500
509
|
new_active_opts = active_opts.dup
|
501
510
|
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
opt_changes[flag.to_sym] = new_active_opts[flag.to_sym] = true
|
511
|
+
MOD_FLAGS.each do |flag|
|
512
|
+
if positive.include?(flag.to_s)
|
513
|
+
opt_changes[flag] = new_active_opts[flag] = true
|
506
514
|
end
|
507
|
-
if negative.include?(flag)
|
508
|
-
opt_changes[flag
|
509
|
-
new_active_opts.delete(flag
|
515
|
+
if negative.include?(flag.to_s)
|
516
|
+
opt_changes[flag] = false
|
517
|
+
new_active_opts.delete(flag)
|
510
518
|
end
|
511
519
|
end
|
512
520
|
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
521
|
+
if (enc_flag = positive.reverse[/[adu]/])
|
522
|
+
enc_flag = enc_flag.to_sym
|
523
|
+
(ENC_FLAGS - [enc_flag]).each do |other|
|
524
|
+
opt_changes[other] = false if new_active_opts[other]
|
525
|
+
new_active_opts.delete(other)
|
526
|
+
end
|
527
|
+
opt_changes[enc_flag] = new_active_opts[enc_flag] = true
|
519
528
|
end
|
520
529
|
|
521
530
|
options_stack << new_active_opts
|
@@ -600,16 +609,14 @@ class Regexp::Parser
|
|
600
609
|
end
|
601
610
|
|
602
611
|
def sequence_operation(klass, token)
|
603
|
-
|
604
|
-
self.node = node.last
|
605
|
-
elsif !node.is_a?(klass)
|
612
|
+
unless node.is_a?(klass)
|
606
613
|
operator = klass.new(token, active_opts)
|
607
|
-
sequence = operator.add_sequence
|
614
|
+
sequence = operator.add_sequence(active_opts)
|
608
615
|
sequence.expressions = node.expressions
|
609
616
|
node.expressions = []
|
610
617
|
nest(operator)
|
611
618
|
end
|
612
|
-
node.add_sequence
|
619
|
+
node.add_sequence(active_opts)
|
613
620
|
end
|
614
621
|
|
615
622
|
def active_opts
|
@@ -627,4 +634,20 @@ class Regexp::Parser
|
|
627
634
|
def count_captured_group
|
628
635
|
captured_group_counts[node.level] += 1
|
629
636
|
end
|
637
|
+
|
638
|
+
def assign_effective_number(exp)
|
639
|
+
exp.effective_number =
|
640
|
+
exp.number + total_captured_group_count + (exp.number < 0 ? 1 : 0)
|
641
|
+
end
|
642
|
+
|
643
|
+
def assign_referenced_expressions
|
644
|
+
targets = {}
|
645
|
+
root.each_expression do |exp|
|
646
|
+
exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
|
647
|
+
end
|
648
|
+
root.each_expression do |exp|
|
649
|
+
exp.respond_to?(:reference) &&
|
650
|
+
exp.referenced_expression = targets[exp.reference]
|
651
|
+
end
|
652
|
+
end
|
630
653
|
end # module Regexp::Parser
|