regexp_parser 1.3.0 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -1
- data/Gemfile +3 -3
- data/README.md +12 -19
- data/Rakefile +3 -4
- data/lib/regexp_parser/expression.rb +28 -53
- data/lib/regexp_parser/expression/classes/backref.rb +18 -10
- data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
- data/lib/regexp_parser/expression/classes/escape.rb +0 -4
- data/lib/regexp_parser/expression/classes/group.rb +4 -2
- data/lib/regexp_parser/expression/classes/keep.rb +1 -3
- data/lib/regexp_parser/expression/methods/match.rb +13 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
- data/lib/regexp_parser/expression/methods/options.rb +35 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
- data/lib/regexp_parser/expression/methods/tests.rb +6 -15
- data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -6
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
- data/lib/regexp_parser/expression/subexpression.rb +3 -5
- data/lib/regexp_parser/lexer.rb +30 -44
- data/lib/regexp_parser/parser.rb +47 -24
- data/lib/regexp_parser/scanner.rb +1228 -1367
- data/lib/regexp_parser/scanner/char_type.rl +0 -3
- data/lib/regexp_parser/scanner/properties/long.yml +34 -1
- data/lib/regexp_parser/scanner/properties/short.yml +12 -0
- data/lib/regexp_parser/scanner/scanner.rl +101 -194
- data/lib/regexp_parser/syntax/tokens.rb +2 -10
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +72 -21
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +3 -3
- data/spec/expression/base_spec.rb +94 -0
- data/spec/expression/clone_spec.rb +120 -0
- data/spec/expression/conditional_spec.rb +89 -0
- data/spec/expression/free_space_spec.rb +27 -0
- data/spec/expression/methods/match_length_spec.rb +161 -0
- data/spec/expression/methods/match_spec.rb +25 -0
- data/spec/expression/methods/strfregexp_spec.rb +224 -0
- data/spec/expression/methods/tests_spec.rb +99 -0
- data/spec/expression/methods/traverse_spec.rb +161 -0
- data/spec/expression/options_spec.rb +128 -0
- data/spec/expression/root_spec.rb +9 -0
- data/spec/expression/sequence_spec.rb +9 -0
- data/spec/expression/subexpression_spec.rb +50 -0
- data/spec/expression/to_h_spec.rb +26 -0
- data/spec/expression/to_s_spec.rb +100 -0
- data/spec/lexer/all_spec.rb +22 -0
- data/spec/lexer/conditionals_spec.rb +53 -0
- data/spec/lexer/delimiters_spec.rb +68 -0
- data/spec/lexer/escapes_spec.rb +14 -0
- data/spec/lexer/keep_spec.rb +10 -0
- data/spec/lexer/literals_spec.rb +89 -0
- data/spec/lexer/nesting_spec.rb +99 -0
- data/spec/lexer/refcalls_spec.rb +55 -0
- data/spec/parser/all_spec.rb +43 -0
- data/spec/parser/alternation_spec.rb +88 -0
- data/spec/parser/anchors_spec.rb +17 -0
- data/spec/parser/conditionals_spec.rb +179 -0
- data/spec/parser/errors_spec.rb +30 -0
- data/spec/parser/escapes_spec.rb +121 -0
- data/spec/parser/free_space_spec.rb +130 -0
- data/spec/parser/groups_spec.rb +108 -0
- data/spec/parser/keep_spec.rb +6 -0
- data/spec/parser/posix_classes_spec.rb +8 -0
- data/spec/parser/properties_spec.rb +115 -0
- data/spec/parser/quantifiers_spec.rb +52 -0
- data/spec/parser/refcalls_spec.rb +112 -0
- data/spec/parser/set/intersections_spec.rb +127 -0
- data/spec/parser/set/ranges_spec.rb +111 -0
- data/spec/parser/sets_spec.rb +178 -0
- data/spec/parser/types_spec.rb +18 -0
- data/spec/scanner/all_spec.rb +18 -0
- data/spec/scanner/anchors_spec.rb +21 -0
- data/spec/scanner/conditionals_spec.rb +128 -0
- data/spec/scanner/delimiters_spec.rb +52 -0
- data/spec/scanner/errors_spec.rb +67 -0
- data/spec/scanner/escapes_spec.rb +53 -0
- data/spec/scanner/free_space_spec.rb +133 -0
- data/spec/scanner/groups_spec.rb +52 -0
- data/spec/scanner/keep_spec.rb +10 -0
- data/spec/scanner/literals_spec.rb +49 -0
- data/spec/scanner/meta_spec.rb +18 -0
- data/spec/scanner/properties_spec.rb +64 -0
- data/spec/scanner/quantifiers_spec.rb +20 -0
- data/spec/scanner/refcalls_spec.rb +36 -0
- data/spec/scanner/sets_spec.rb +102 -0
- data/spec/scanner/types_spec.rb +14 -0
- data/spec/spec_helper.rb +15 -0
- data/{test → spec}/support/runner.rb +9 -8
- data/spec/support/shared_examples.rb +77 -0
- data/{test → spec}/support/warning_extractor.rb +5 -7
- data/spec/syntax/syntax_spec.rb +48 -0
- data/spec/syntax/syntax_token_map_spec.rb +23 -0
- data/spec/syntax/versions/1.8.6_spec.rb +17 -0
- data/spec/syntax/versions/1.9.1_spec.rb +10 -0
- data/spec/syntax/versions/1.9.3_spec.rb +9 -0
- data/spec/syntax/versions/2.0.0_spec.rb +13 -0
- data/spec/syntax/versions/2.2.0_spec.rb +9 -0
- data/spec/syntax/versions/aliases_spec.rb +37 -0
- data/spec/token/token_spec.rb +85 -0
- metadata +151 -146
- data/test/expression/test_all.rb +0 -12
- data/test/expression/test_base.rb +0 -90
- data/test/expression/test_clone.rb +0 -89
- data/test/expression/test_conditionals.rb +0 -113
- data/test/expression/test_free_space.rb +0 -35
- data/test/expression/test_set.rb +0 -84
- data/test/expression/test_strfregexp.rb +0 -230
- data/test/expression/test_subexpression.rb +0 -58
- data/test/expression/test_tests.rb +0 -99
- data/test/expression/test_to_h.rb +0 -59
- data/test/expression/test_to_s.rb +0 -104
- data/test/expression/test_traverse.rb +0 -161
- data/test/helpers.rb +0 -10
- data/test/lexer/test_all.rb +0 -41
- data/test/lexer/test_conditionals.rb +0 -127
- data/test/lexer/test_keep.rb +0 -24
- data/test/lexer/test_literals.rb +0 -130
- data/test/lexer/test_nesting.rb +0 -132
- data/test/lexer/test_refcalls.rb +0 -56
- data/test/parser/set/test_intersections.rb +0 -127
- data/test/parser/set/test_ranges.rb +0 -111
- data/test/parser/test_all.rb +0 -64
- data/test/parser/test_alternation.rb +0 -92
- data/test/parser/test_anchors.rb +0 -34
- data/test/parser/test_conditionals.rb +0 -187
- data/test/parser/test_errors.rb +0 -63
- data/test/parser/test_escapes.rb +0 -134
- data/test/parser/test_free_space.rb +0 -139
- data/test/parser/test_groups.rb +0 -289
- data/test/parser/test_keep.rb +0 -21
- data/test/parser/test_posix_classes.rb +0 -27
- data/test/parser/test_properties.rb +0 -133
- data/test/parser/test_quantifiers.rb +0 -301
- data/test/parser/test_refcalls.rb +0 -186
- data/test/parser/test_sets.rb +0 -179
- data/test/parser/test_types.rb +0 -50
- data/test/scanner/test_all.rb +0 -38
- data/test/scanner/test_anchors.rb +0 -38
- data/test/scanner/test_conditionals.rb +0 -184
- data/test/scanner/test_errors.rb +0 -91
- data/test/scanner/test_escapes.rb +0 -56
- data/test/scanner/test_free_space.rb +0 -200
- data/test/scanner/test_groups.rb +0 -79
- data/test/scanner/test_keep.rb +0 -35
- data/test/scanner/test_literals.rb +0 -89
- data/test/scanner/test_meta.rb +0 -40
- data/test/scanner/test_properties.rb +0 -312
- data/test/scanner/test_quantifiers.rb +0 -37
- data/test/scanner/test_refcalls.rb +0 -52
- data/test/scanner/test_scripts.rb +0 -53
- data/test/scanner/test_sets.rb +0 -119
- data/test/scanner/test_types.rb +0 -35
- data/test/scanner/test_unicode_blocks.rb +0 -30
- data/test/support/disable_autotest.rb +0 -8
- data/test/syntax/test_all.rb +0 -6
- data/test/syntax/test_syntax.rb +0 -61
- data/test/syntax/test_syntax_token_map.rb +0 -25
- data/test/syntax/versions/test_1.8.rb +0 -55
- data/test/syntax/versions/test_1.9.1.rb +0 -36
- data/test/syntax/versions/test_1.9.3.rb +0 -32
- data/test/syntax/versions/test_2.0.0.rb +0 -37
- data/test/syntax/versions/test_2.2.0.rb +0 -32
- data/test/syntax/versions/test_aliases.rb +0 -129
- data/test/syntax/versions/test_all.rb +0 -5
- data/test/test_all.rb +0 -5
- data/test/token/test_all.rb +0 -2
- data/test/token/test_token.rb +0 -107
@@ -0,0 +1,172 @@
|
|
1
|
+
class Regexp::MatchLength
|
2
|
+
include Enumerable
|
3
|
+
|
4
|
+
def self.of(obj)
|
5
|
+
exp = obj.is_a?(Regexp::Expression::Base) ? obj : Regexp::Parser.parse(obj)
|
6
|
+
exp.match_length
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(exp, opts = {})
|
10
|
+
self.exp_class = exp.class
|
11
|
+
self.min_rep = exp.repetitions.min
|
12
|
+
self.max_rep = exp.repetitions.max
|
13
|
+
if base = opts[:base]
|
14
|
+
self.base_min = base
|
15
|
+
self.base_max = base
|
16
|
+
self.reify = ->{ '.' * base }
|
17
|
+
else
|
18
|
+
self.base_min = opts.fetch(:base_min)
|
19
|
+
self.base_max = opts.fetch(:base_max)
|
20
|
+
self.reify = opts.fetch(:reify)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def each(opts = {})
|
25
|
+
return enum_for(__method__, opts) unless block_given?
|
26
|
+
limit = opts[:limit] || 1000
|
27
|
+
yielded = 0
|
28
|
+
(min..max).each do |num|
|
29
|
+
next unless include?(num)
|
30
|
+
yield(num)
|
31
|
+
break if (yielded += 1) >= limit
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def endless_each(&block)
|
36
|
+
return enum_for(__method__) unless block_given?
|
37
|
+
(min..max).each { |num| yield(num) if include?(num) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def include?(length)
|
41
|
+
test_regexp.match?('X' * length)
|
42
|
+
end
|
43
|
+
|
44
|
+
def fixed?
|
45
|
+
min == max
|
46
|
+
end
|
47
|
+
|
48
|
+
def min
|
49
|
+
min_rep * base_min
|
50
|
+
end
|
51
|
+
|
52
|
+
def max
|
53
|
+
max_rep * base_max
|
54
|
+
end
|
55
|
+
|
56
|
+
def minmax
|
57
|
+
[min, max]
|
58
|
+
end
|
59
|
+
|
60
|
+
def inspect
|
61
|
+
type = exp_class.name.sub('Regexp::Expression::', '')
|
62
|
+
"#<#{self.class}<#{type}> min=#{min} max=#{max}>"
|
63
|
+
end
|
64
|
+
|
65
|
+
def to_re
|
66
|
+
"(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}"
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
|
72
|
+
|
73
|
+
def test_regexp
|
74
|
+
@test_regexp ||= Regexp.new("^#{to_re}$").tap do |regexp|
|
75
|
+
regexp.respond_to?(:match?) || def regexp.match?(str); !!match(str) end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
module Regexp::Expression
|
81
|
+
MatchLength = Regexp::MatchLength
|
82
|
+
|
83
|
+
[
|
84
|
+
CharacterSet,
|
85
|
+
CharacterSet::Intersection,
|
86
|
+
CharacterSet::IntersectedSequence,
|
87
|
+
CharacterSet::Range,
|
88
|
+
CharacterType::Base,
|
89
|
+
EscapeSequence::Base,
|
90
|
+
PosixClass,
|
91
|
+
UnicodeProperty::Base,
|
92
|
+
].each do |klass|
|
93
|
+
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
94
|
+
def match_length
|
95
|
+
MatchLength.new(self, base: 1)
|
96
|
+
end
|
97
|
+
RUBY
|
98
|
+
end
|
99
|
+
|
100
|
+
class Literal
|
101
|
+
def match_length
|
102
|
+
MatchLength.new(self, base: text.length)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
class Subexpression
|
107
|
+
def match_length
|
108
|
+
MatchLength.new(self,
|
109
|
+
base_min: map { |exp| exp.match_length.min }.inject(0, :+),
|
110
|
+
base_max: map { |exp| exp.match_length.max }.inject(0, :+),
|
111
|
+
reify: ->{ map { |exp| exp.match_length.to_re }.join })
|
112
|
+
end
|
113
|
+
|
114
|
+
def inner_match_length
|
115
|
+
dummy = Regexp::Expression::Root.build
|
116
|
+
dummy.expressions = expressions.map(&:clone)
|
117
|
+
dummy.quantifier = quantifier && quantifier.clone
|
118
|
+
dummy.match_length
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
[
|
123
|
+
Alternation,
|
124
|
+
Conditional::Expression,
|
125
|
+
].each do |klass|
|
126
|
+
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
127
|
+
def match_length
|
128
|
+
MatchLength.new(self,
|
129
|
+
base_min: map { |exp| exp.match_length.min }.min,
|
130
|
+
base_max: map { |exp| exp.match_length.max }.max,
|
131
|
+
reify: ->{ map { |exp| exp.match_length.to_re }.join('|') })
|
132
|
+
end
|
133
|
+
RUBY
|
134
|
+
end
|
135
|
+
|
136
|
+
[
|
137
|
+
Anchor::Base,
|
138
|
+
Assertion::Base,
|
139
|
+
Conditional::Condition,
|
140
|
+
FreeSpace,
|
141
|
+
Keep::Mark,
|
142
|
+
].each do |klass|
|
143
|
+
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
144
|
+
def match_length
|
145
|
+
MatchLength.new(self, base: 0)
|
146
|
+
end
|
147
|
+
RUBY
|
148
|
+
end
|
149
|
+
|
150
|
+
class Backreference::Base
|
151
|
+
def match_length
|
152
|
+
if referenced_expression.nil?
|
153
|
+
raise ArgumentError, 'Missing referenced_expression - not parsed?'
|
154
|
+
end
|
155
|
+
referenced_expression.unquantified_clone.match_length
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
class EscapeSequence::CodepointList
|
160
|
+
def match_length
|
161
|
+
MatchLength.new(self, base: codepoints.count)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# Special case. Absence group can match 0.. chars, irrespective of content.
|
166
|
+
# TODO: in theory, they *can* exclude match lengths with `.`: `(?~.{3})`
|
167
|
+
class Group::Absence
|
168
|
+
def match_length
|
169
|
+
MatchLength.new(self, base_min: 0, base_max: Float::INFINITY, reify: ->{ '.*' })
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
class Base
|
3
|
+
def multiline?
|
4
|
+
options[:m] == true
|
5
|
+
end
|
6
|
+
alias :m? :multiline?
|
7
|
+
|
8
|
+
def case_insensitive?
|
9
|
+
options[:i] == true
|
10
|
+
end
|
11
|
+
alias :i? :case_insensitive?
|
12
|
+
alias :ignore_case? :case_insensitive?
|
13
|
+
|
14
|
+
def free_spacing?
|
15
|
+
options[:x] == true
|
16
|
+
end
|
17
|
+
alias :x? :free_spacing?
|
18
|
+
alias :extended? :free_spacing?
|
19
|
+
|
20
|
+
def default_classes?
|
21
|
+
options[:d] == true
|
22
|
+
end
|
23
|
+
alias :d? :default_classes?
|
24
|
+
|
25
|
+
def ascii_classes?
|
26
|
+
options[:a] == true
|
27
|
+
end
|
28
|
+
alias :a? :ascii_classes?
|
29
|
+
|
30
|
+
def unicode_classes?
|
31
|
+
options[:u] == true
|
32
|
+
end
|
33
|
+
alias :u? :unicode_classes?
|
34
|
+
end
|
35
|
+
end
|
@@ -75,32 +75,23 @@ module Regexp::Expression
|
|
75
75
|
def one_of?(scope, top = true)
|
76
76
|
case scope
|
77
77
|
when Array
|
78
|
-
|
79
|
-
return (scope.include?(token) or scope.include?(:*))
|
80
|
-
else
|
81
|
-
return scope.include?(token)
|
82
|
-
end
|
78
|
+
scope.include?(:*) || scope.include?(token)
|
83
79
|
|
84
80
|
when Hash
|
85
81
|
if scope.has_key?(:*)
|
86
82
|
test_type = scope.has_key?(type) ? type : :*
|
87
|
-
|
83
|
+
one_of?(scope[test_type], false)
|
88
84
|
else
|
89
|
-
|
85
|
+
scope.has_key?(type) && one_of?(scope[type], false)
|
90
86
|
end
|
91
87
|
|
92
88
|
when Symbol
|
93
|
-
|
94
|
-
|
95
|
-
return is?(scope) unless top
|
96
|
-
return type?(scope) if top
|
89
|
+
scope.equal?(:*) || (top ? type?(scope) : is?(scope))
|
97
90
|
|
98
91
|
else
|
99
|
-
raise
|
92
|
+
raise ArgumentError,
|
93
|
+
"Array, Hash, or Symbol expected, #{scope.class.name} given"
|
100
94
|
end
|
101
|
-
|
102
|
-
false
|
103
95
|
end
|
104
|
-
|
105
96
|
end
|
106
97
|
end
|
@@ -14,7 +14,7 @@ module Regexp::Expression
|
|
14
14
|
#
|
15
15
|
# Returns self.
|
16
16
|
def traverse(include_self = false, &block)
|
17
|
-
|
17
|
+
return enum_for(__method__, include_self) unless block_given?
|
18
18
|
|
19
19
|
block.call(:enter, self, 0) if include_self
|
20
20
|
|
@@ -37,6 +37,8 @@ module Regexp::Expression
|
|
37
37
|
# Iterates over the expressions of this expression as an array, passing
|
38
38
|
# the expression and its index within its parent to the given block.
|
39
39
|
def each_expression(include_self = false, &block)
|
40
|
+
return enum_for(__method__, include_self) unless block_given?
|
41
|
+
|
40
42
|
traverse(include_self) do |event, exp, index|
|
41
43
|
yield(exp, index) unless event == :exit
|
42
44
|
end
|
@@ -18,13 +18,14 @@ module Regexp::Expression
|
|
18
18
|
end
|
19
19
|
|
20
20
|
class << self
|
21
|
-
def add_to(subexpression,
|
21
|
+
def add_to(subexpression, params = {}, active_opts = {})
|
22
22
|
sequence = at_levels(
|
23
23
|
subexpression.level,
|
24
24
|
subexpression.set_level,
|
25
|
-
|
25
|
+
params[:conditional_level] || subexpression.conditional_level
|
26
26
|
)
|
27
27
|
sequence.nesting_level = subexpression.nesting_level + 1
|
28
|
+
sequence.options = active_opts
|
28
29
|
subexpression.expressions << sequence
|
29
30
|
sequence
|
30
31
|
end
|
@@ -44,10 +45,6 @@ module Regexp::Expression
|
|
44
45
|
end
|
45
46
|
end
|
46
47
|
|
47
|
-
def text
|
48
|
-
to_s
|
49
|
-
end
|
50
|
-
|
51
48
|
def starts_at
|
52
49
|
expressions.first.starts_at
|
53
50
|
end
|
@@ -14,12 +14,8 @@ module Regexp::Expression
|
|
14
14
|
expressions.last << exp
|
15
15
|
end
|
16
16
|
|
17
|
-
def add_sequence
|
18
|
-
self.class::OPERAND.add_to(self)
|
19
|
-
end
|
20
|
-
|
21
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
22
|
-
sequences.last.last.quantify(token, text, min, max, mode)
|
17
|
+
def add_sequence(active_opts = {})
|
18
|
+
self.class::OPERAND.add_to(self, {}, active_opts)
|
23
19
|
end
|
24
20
|
|
25
21
|
def to_s(format = :full)
|
@@ -12,8 +12,8 @@ module Regexp::Expression
|
|
12
12
|
end
|
13
13
|
|
14
14
|
# Override base method to clone the expressions as well.
|
15
|
-
def initialize_clone(
|
16
|
-
|
15
|
+
def initialize_clone(orig)
|
16
|
+
self.expressions = orig.expressions.map(&:clone)
|
17
17
|
super
|
18
18
|
end
|
19
19
|
|
@@ -46,9 +46,7 @@ module Regexp::Expression
|
|
46
46
|
|
47
47
|
def to_s(format = :full)
|
48
48
|
# Note: the format does not get passed down to subexpressions.
|
49
|
-
#
|
50
|
-
# in Expression::Sequence, causing infinite recursion. Clean-up needed.
|
51
|
-
"#{@text}#{expressions.join}#{quantifier_affix(format)}"
|
49
|
+
"#{expressions.join}#{quantifier_affix(format)}"
|
52
50
|
end
|
53
51
|
|
54
52
|
def to_h
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -22,6 +22,7 @@ class Regexp::Lexer
|
|
22
22
|
self.nesting = 0
|
23
23
|
self.set_nesting = 0
|
24
24
|
self.conditional_nesting = 0
|
25
|
+
self.shift = 0
|
25
26
|
|
26
27
|
last = nil
|
27
28
|
Regexp::Scanner.scan(input) do |type, token, text, ts, te|
|
@@ -30,15 +31,13 @@ class Regexp::Lexer
|
|
30
31
|
|
31
32
|
ascend(type, token)
|
32
33
|
|
33
|
-
|
34
|
-
last
|
35
|
-
|
36
|
-
|
37
|
-
nesting, set_nesting, conditional_nesting)
|
34
|
+
if type == :quantifier and last
|
35
|
+
break_literal(last) if last.type == :literal
|
36
|
+
break_codepoint_list(last) if last.token == :codepoint_list
|
37
|
+
end
|
38
38
|
|
39
|
-
current =
|
40
|
-
|
41
|
-
last and last.type == :literal
|
39
|
+
current = Regexp::Token.new(type, token, text, ts + shift, te + shift,
|
40
|
+
nesting, set_nesting, conditional_nesting)
|
42
41
|
|
43
42
|
current = merge_condition(current) if type == :conditional and
|
44
43
|
[:condition, :condition_close].include?(token)
|
@@ -65,7 +64,7 @@ class Regexp::Lexer
|
|
65
64
|
|
66
65
|
private
|
67
66
|
|
68
|
-
attr_accessor :tokens, :nesting, :set_nesting, :conditional_nesting
|
67
|
+
attr_accessor :tokens, :nesting, :set_nesting, :conditional_nesting, :shift
|
69
68
|
|
70
69
|
def ascend(type, token)
|
71
70
|
case type
|
@@ -92,44 +91,31 @@ class Regexp::Lexer
|
|
92
91
|
# called by scan to break a literal run that is longer than one character
|
93
92
|
# into two separate tokens when it is followed by a quantifier
|
94
93
|
def break_literal(token)
|
95
|
-
|
96
|
-
if
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
last_length = last.length
|
106
|
-
end
|
107
|
-
|
108
|
-
tokens.pop
|
109
|
-
tokens << Regexp::Token.new(:literal, :literal, lead, token.ts,
|
110
|
-
(token.te - last_length), nesting, set_nesting, conditional_nesting)
|
111
|
-
|
112
|
-
tokens << Regexp::Token.new(:literal, :literal, last,
|
113
|
-
(token.ts + lead_length),
|
114
|
-
token.te, nesting, set_nesting, conditional_nesting)
|
115
|
-
end
|
94
|
+
lead, last, _ = token.text.partition(/.\z/mu)
|
95
|
+
return if lead.empty?
|
96
|
+
|
97
|
+
tokens.pop
|
98
|
+
tokens << Regexp::Token.new(:literal, :literal, lead,
|
99
|
+
token.ts, (token.te - last.bytesize),
|
100
|
+
nesting, set_nesting, conditional_nesting)
|
101
|
+
tokens << Regexp::Token.new(:literal, :literal, last,
|
102
|
+
(token.ts + lead.bytesize), token.te,
|
103
|
+
nesting, set_nesting, conditional_nesting)
|
116
104
|
end
|
117
105
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
106
|
+
def break_codepoint_list(token)
|
107
|
+
lead, _, tail = token.text.rpartition(' ')
|
108
|
+
return if lead.empty?
|
109
|
+
|
110
|
+
tokens.pop
|
111
|
+
tokens << Regexp::Token.new(:escape, :codepoint_list, lead + '}',
|
112
|
+
token.ts, (token.te - tail.length),
|
113
|
+
nesting, set_nesting, conditional_nesting)
|
114
|
+
tokens << Regexp::Token.new(:escape, :codepoint_list, '\u{' + tail,
|
115
|
+
(token.ts + lead.length + 1), (token.te + 3),
|
116
|
+
nesting, set_nesting, conditional_nesting)
|
122
117
|
|
123
|
-
|
124
|
-
:literal,
|
125
|
-
:literal,
|
126
|
-
last.text + current.text,
|
127
|
-
last.ts,
|
128
|
-
current.te,
|
129
|
-
nesting,
|
130
|
-
set_nesting,
|
131
|
-
conditional_nesting,
|
132
|
-
)
|
118
|
+
self.shift = shift + 3 # one space less, but extra \, u, {, and }
|
133
119
|
end
|
134
120
|
|
135
121
|
def merge_condition(current)
|