regexp_parser 2.4.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +98 -42
- data/README.md +46 -30
- data/lib/regexp_parser/expression/base.rb +17 -9
- data/lib/regexp_parser/expression/classes/backreference.rb +19 -2
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +8 -0
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +1 -1
- data/lib/regexp_parser/expression/classes/group.rb +10 -0
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/root.rb +3 -5
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -0
- data/lib/regexp_parser/expression/methods/construct.rb +43 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +9 -5
- data/lib/regexp_parser/expression/methods/traverse.rb +6 -3
- data/lib/regexp_parser/expression/quantifier.rb +6 -5
- data/lib/regexp_parser/expression/sequence.rb +6 -21
- data/lib/regexp_parser/expression/shared.rb +20 -3
- data/lib/regexp_parser/expression/subexpression.rb +4 -1
- data/lib/regexp_parser/expression.rb +4 -2
- data/lib/regexp_parser/lexer.rb +61 -29
- data/lib/regexp_parser/parser.rb +36 -26
- data/lib/regexp_parser/scanner/property.rl +1 -1
- data/lib/regexp_parser/scanner/scanner.rl +57 -42
- data/lib/regexp_parser/scanner.rb +873 -823
- data/lib/regexp_parser/syntax/token/escape.rb +1 -1
- data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
- data/lib/regexp_parser/syntax/versions.rb +2 -0
- data/lib/regexp_parser/version.rb +1 -1
- metadata +7 -5
@@ -0,0 +1,43 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
# default implementation, e.g. "atomic group", "hex escape", "word type", ..
|
4
|
+
def human_name
|
5
|
+
[token, type].compact.join(' ').tr('_', ' ')
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
Alternation.class_eval { def human_name; 'alternation' end }
|
10
|
+
Alternative.class_eval { def human_name; 'alternative' end }
|
11
|
+
Anchor::BOL.class_eval { def human_name; 'beginning of line' end }
|
12
|
+
Anchor::BOS.class_eval { def human_name; 'beginning of string' end }
|
13
|
+
Anchor::EOL.class_eval { def human_name; 'end of line' end }
|
14
|
+
Anchor::EOS.class_eval { def human_name; 'end of string' end }
|
15
|
+
Anchor::EOSobEOL.class_eval { def human_name; 'newline-ready end of string' end }
|
16
|
+
Anchor::MatchStart.class_eval { def human_name; 'match start' end }
|
17
|
+
Anchor::NonWordBoundary.class_eval { def human_name; 'no word boundary' end }
|
18
|
+
Anchor::WordBoundary.class_eval { def human_name; 'word boundary' end }
|
19
|
+
Assertion::Lookahead.class_eval { def human_name; 'lookahead' end }
|
20
|
+
Assertion::Lookbehind.class_eval { def human_name; 'lookbehind' end }
|
21
|
+
Assertion::NegativeLookahead.class_eval { def human_name; 'negative lookahead' end }
|
22
|
+
Assertion::NegativeLookbehind.class_eval { def human_name; 'negative lookbehind' end }
|
23
|
+
Backreference::Name.class_eval { def human_name; 'backreference by name' end }
|
24
|
+
Backreference::NameCall.class_eval { def human_name; 'subexpression call by name' end }
|
25
|
+
Backreference::Number.class_eval { def human_name; 'backreference' end }
|
26
|
+
Backreference::NumberRelative.class_eval { def human_name; 'relative backreference' end }
|
27
|
+
Backreference::NumberCall.class_eval { def human_name; 'subexpression call' end }
|
28
|
+
Backreference::NumberCallRelative.class_eval { def human_name; 'relative subexpression call' end }
|
29
|
+
CharacterSet::IntersectedSequence.class_eval { def human_name; 'intersected sequence' end }
|
30
|
+
CharacterSet::Intersection.class_eval { def human_name; 'intersection' end }
|
31
|
+
CharacterSet::Range.class_eval { def human_name; 'character range' end }
|
32
|
+
CharacterType::Any.class_eval { def human_name; 'match-all' end }
|
33
|
+
Comment.class_eval { def human_name; 'comment' end }
|
34
|
+
Conditional::Branch.class_eval { def human_name; 'conditional branch' end }
|
35
|
+
Conditional::Condition.class_eval { def human_name; 'condition' end }
|
36
|
+
Conditional::Expression.class_eval { def human_name; 'conditional' end }
|
37
|
+
Group::Capture.class_eval { def human_name; "capture group #{number}" end }
|
38
|
+
Group::Named.class_eval { def human_name; 'named capture group' end }
|
39
|
+
Keep::Mark.class_eval { def human_name; 'keep-mark lookbehind' end }
|
40
|
+
Literal.class_eval { def human_name; 'literal' end }
|
41
|
+
Root.class_eval { def human_name; 'root' end }
|
42
|
+
WhiteSpace.class_eval { def human_name; 'free space' end }
|
43
|
+
end
|
@@ -63,16 +63,20 @@ class Regexp::MatchLength
|
|
63
63
|
end
|
64
64
|
|
65
65
|
def to_re
|
66
|
-
|
66
|
+
/(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
|
67
67
|
end
|
68
68
|
|
69
69
|
private
|
70
70
|
|
71
71
|
attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
|
72
72
|
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
if Regexp.method_defined?(:match?) # ruby >= 2.4
|
74
|
+
def test_regexp
|
75
|
+
@test_regexp ||= /^#{to_re}$/
|
76
|
+
end
|
77
|
+
else
|
78
|
+
def test_regexp
|
79
|
+
@test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
|
76
80
|
end
|
77
81
|
end
|
78
82
|
end
|
@@ -112,7 +116,7 @@ module Regexp::Expression
|
|
112
116
|
end
|
113
117
|
|
114
118
|
def inner_match_length
|
115
|
-
dummy = Regexp::Expression::Root.
|
119
|
+
dummy = Regexp::Expression::Root.construct
|
116
120
|
dummy.expressions = expressions.map(&:clone)
|
117
121
|
dummy.quantifier = quantifier && quantifier.clone
|
118
122
|
dummy.match_length
|
@@ -36,11 +36,14 @@ module Regexp::Expression
|
|
36
36
|
|
37
37
|
# Iterates over the expressions of this expression as an array, passing
|
38
38
|
# the expression and its index within its parent to the given block.
|
39
|
-
def each_expression(include_self = false)
|
39
|
+
def each_expression(include_self = false, &block)
|
40
40
|
return enum_for(__method__, include_self) unless block_given?
|
41
41
|
|
42
|
-
|
43
|
-
|
42
|
+
block.call(self, 0) if include_self
|
43
|
+
|
44
|
+
each_with_index do |exp, index|
|
45
|
+
block.call(exp, index)
|
46
|
+
exp.each_expression(&block) unless exp.terminal?
|
44
47
|
end
|
45
48
|
end
|
46
49
|
|
@@ -14,7 +14,7 @@ module Regexp::Expression
|
|
14
14
|
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
15
15
|
|
16
16
|
init_from_token_and_options(*args)
|
17
|
-
@mode = (token[/greedy|reluctant|possessive/] || :greedy).to_sym
|
17
|
+
@mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
|
18
18
|
@min, @max = minmax
|
19
19
|
# TODO: remove in v3.0.0, stop removing parts of #token (?)
|
20
20
|
self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
|
@@ -44,10 +44,11 @@ module Regexp::Expression
|
|
44
44
|
def deprecated_old_init(token, text, min, max, mode = :greedy)
|
45
45
|
warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
|
46
46
|
"is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
|
47
|
-
"Please pass a Regexp::Token instead, e.g. replace `
|
48
|
-
"with `::Regexp::Token.new(:quantifier,
|
49
|
-
"will be derived automatically
|
50
|
-
"
|
47
|
+
"Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
|
48
|
+
"with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
|
49
|
+
"will be derived automatically.\n"\
|
50
|
+
"Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
|
51
|
+
"This is consistent with how Expression::Base instances are created. "
|
51
52
|
@token = token
|
52
53
|
@text = text
|
53
54
|
@min = min
|
@@ -7,31 +7,16 @@ module Regexp::Expression
|
|
7
7
|
# branches, and CharacterSet::Intersection intersected sequences.
|
8
8
|
class Sequence < Regexp::Expression::Subexpression
|
9
9
|
class << self
|
10
|
-
def add_to(
|
11
|
-
sequence =
|
12
|
-
|
13
|
-
|
14
|
-
params[:conditional_level] ||
|
10
|
+
def add_to(exp, params = {}, active_opts = {})
|
11
|
+
sequence = construct(
|
12
|
+
level: exp.level,
|
13
|
+
set_level: exp.set_level,
|
14
|
+
conditional_level: params[:conditional_level] || exp.conditional_level,
|
15
15
|
)
|
16
|
-
sequence.nesting_level = subexpression.nesting_level + 1
|
17
16
|
sequence.options = active_opts
|
18
|
-
|
17
|
+
exp.expressions << sequence
|
19
18
|
sequence
|
20
19
|
end
|
21
|
-
|
22
|
-
def at_levels(level, set_level, conditional_level)
|
23
|
-
token = Regexp::Token.new(
|
24
|
-
:expression,
|
25
|
-
:sequence,
|
26
|
-
'',
|
27
|
-
nil, # ts
|
28
|
-
nil, # te
|
29
|
-
level,
|
30
|
-
set_level,
|
31
|
-
conditional_level
|
32
|
-
)
|
33
|
-
new(token)
|
34
|
-
end
|
35
20
|
end
|
36
21
|
|
37
22
|
def starts_at
|
@@ -1,12 +1,16 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Shared
|
3
|
+
module ClassMethods; end # filled in ./methods/*.rb
|
4
|
+
|
3
5
|
def self.included(mod)
|
4
6
|
mod.class_eval do
|
7
|
+
extend Shared::ClassMethods
|
8
|
+
|
5
9
|
attr_accessor :type, :token, :text, :ts, :te,
|
6
10
|
:level, :set_level, :conditional_level,
|
7
|
-
:options
|
11
|
+
:options
|
8
12
|
|
9
|
-
attr_reader :nesting_level
|
13
|
+
attr_reader :nesting_level, :quantifier
|
10
14
|
end
|
11
15
|
end
|
12
16
|
|
@@ -60,6 +64,10 @@ module Regexp::Expression
|
|
60
64
|
!quantifier.nil?
|
61
65
|
end
|
62
66
|
|
67
|
+
def optional?
|
68
|
+
quantified? && quantifier.min == 0
|
69
|
+
end
|
70
|
+
|
63
71
|
def offset
|
64
72
|
[starts_at, full_length]
|
65
73
|
end
|
@@ -69,7 +77,11 @@ module Regexp::Expression
|
|
69
77
|
end
|
70
78
|
|
71
79
|
def terminal?
|
72
|
-
|
80
|
+
true # overridden to be false in Expression::Subexpression
|
81
|
+
end
|
82
|
+
|
83
|
+
def referential?
|
84
|
+
false # overridden to be true e.g. in Expression::Backreference::Base
|
73
85
|
end
|
74
86
|
|
75
87
|
def nesting_level=(lvl)
|
@@ -77,5 +89,10 @@ module Regexp::Expression
|
|
77
89
|
quantifier && quantifier.nesting_level = lvl
|
78
90
|
terminal? || each { |subexp| subexp.nesting_level = lvl + 1 }
|
79
91
|
end
|
92
|
+
|
93
|
+
def quantifier=(qtf)
|
94
|
+
@quantifier = qtf
|
95
|
+
@repetitions = nil # clear memoized value
|
96
|
+
end
|
80
97
|
end
|
81
98
|
end
|
@@ -19,7 +19,6 @@ module Regexp::Expression
|
|
19
19
|
if exp.is_a?(WhiteSpace) && last && last.is_a?(WhiteSpace)
|
20
20
|
last.merge(exp)
|
21
21
|
else
|
22
|
-
exp.nesting_level = nesting_level + 1
|
23
22
|
expressions << exp
|
24
23
|
end
|
25
24
|
end
|
@@ -53,6 +52,10 @@ module Regexp::Expression
|
|
53
52
|
)
|
54
53
|
end
|
55
54
|
|
55
|
+
def terminal?
|
56
|
+
false
|
57
|
+
end
|
58
|
+
|
56
59
|
private
|
57
60
|
|
58
61
|
def intersperse(expressions, separator)
|
@@ -13,6 +13,7 @@ require 'regexp_parser/expression/classes/backreference'
|
|
13
13
|
require 'regexp_parser/expression/classes/character_set'
|
14
14
|
require 'regexp_parser/expression/classes/character_set/intersection'
|
15
15
|
require 'regexp_parser/expression/classes/character_set/range'
|
16
|
+
require 'regexp_parser/expression/classes/character_type'
|
16
17
|
require 'regexp_parser/expression/classes/conditional'
|
17
18
|
require 'regexp_parser/expression/classes/escape_sequence'
|
18
19
|
require 'regexp_parser/expression/classes/free_space'
|
@@ -20,10 +21,11 @@ require 'regexp_parser/expression/classes/group'
|
|
20
21
|
require 'regexp_parser/expression/classes/keep'
|
21
22
|
require 'regexp_parser/expression/classes/literal'
|
22
23
|
require 'regexp_parser/expression/classes/posix_class'
|
23
|
-
require 'regexp_parser/expression/classes/property'
|
24
24
|
require 'regexp_parser/expression/classes/root'
|
25
|
-
require 'regexp_parser/expression/classes/
|
25
|
+
require 'regexp_parser/expression/classes/unicode_property'
|
26
26
|
|
27
|
+
require 'regexp_parser/expression/methods/construct'
|
28
|
+
require 'regexp_parser/expression/methods/human_name'
|
27
29
|
require 'regexp_parser/expression/methods/match'
|
28
30
|
require 'regexp_parser/expression/methods/match_length'
|
29
31
|
require 'regexp_parser/expression/methods/options'
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -13,50 +13,68 @@ class Regexp::Lexer
|
|
13
13
|
|
14
14
|
CONDITION_TOKENS = %i[condition condition_close].freeze
|
15
15
|
|
16
|
-
def self.lex(input, syntax =
|
17
|
-
new.lex(input, syntax, options: options, &block)
|
16
|
+
def self.lex(input, syntax = nil, options: nil, collect_tokens: true, &block)
|
17
|
+
new.lex(input, syntax, options: options, collect_tokens: collect_tokens, &block)
|
18
18
|
end
|
19
19
|
|
20
|
-
def lex(input, syntax =
|
21
|
-
syntax = Regexp::Syntax.for(syntax)
|
20
|
+
def lex(input, syntax = nil, options: nil, collect_tokens: true, &block)
|
21
|
+
syntax = syntax ? Regexp::Syntax.for(syntax) : Regexp::Syntax::CURRENT
|
22
22
|
|
23
|
+
self.block = block
|
24
|
+
self.collect_tokens = collect_tokens
|
23
25
|
self.tokens = []
|
26
|
+
self.prev_token = nil
|
27
|
+
self.preprev_token = nil
|
24
28
|
self.nesting = 0
|
25
29
|
self.set_nesting = 0
|
26
30
|
self.conditional_nesting = 0
|
27
31
|
self.shift = 0
|
28
32
|
|
29
|
-
|
30
|
-
Regexp::Scanner.scan(input, options: options) do |type, token, text, ts, te|
|
33
|
+
Regexp::Scanner.scan(input, options: options, collect_tokens: false) do |type, token, text, ts, te|
|
31
34
|
type, token = *syntax.normalize(type, token)
|
32
35
|
syntax.check! type, token
|
33
36
|
|
34
37
|
ascend(type, token)
|
35
38
|
|
36
|
-
if
|
37
|
-
|
38
|
-
|
39
|
+
if (last = prev_token) &&
|
40
|
+
type == :quantifier &&
|
41
|
+
(
|
42
|
+
(last.type == :literal && (parts = break_literal(last))) ||
|
43
|
+
(last.token == :codepoint_list && (parts = break_codepoint_list(last)))
|
44
|
+
)
|
45
|
+
emit(parts[0])
|
46
|
+
last = parts[1]
|
39
47
|
end
|
40
48
|
|
41
49
|
current = Regexp::Token.new(type, token, text, ts + shift, te + shift,
|
42
50
|
nesting, set_nesting, conditional_nesting)
|
43
51
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
52
|
+
if type == :conditional && CONDITION_TOKENS.include?(token)
|
53
|
+
current = merge_condition(current, last)
|
54
|
+
elsif last
|
55
|
+
last.next = current
|
56
|
+
current.previous = last
|
57
|
+
emit(last)
|
58
|
+
end
|
49
59
|
|
50
|
-
|
51
|
-
|
60
|
+
self.preprev_token = last
|
61
|
+
self.prev_token = current
|
52
62
|
|
53
63
|
descend(type, token)
|
54
64
|
end
|
55
65
|
|
56
|
-
if
|
57
|
-
|
66
|
+
emit(prev_token) if prev_token
|
67
|
+
|
68
|
+
collect_tokens ? tokens : nil
|
69
|
+
end
|
70
|
+
|
71
|
+
def emit(token)
|
72
|
+
if block
|
73
|
+
# TODO: in v3.0.0, remove `collect_tokens:` kwarg and only collect w/o block
|
74
|
+
res = block.call(token)
|
75
|
+
tokens << res if collect_tokens
|
58
76
|
else
|
59
|
-
tokens
|
77
|
+
tokens << token
|
60
78
|
end
|
61
79
|
end
|
62
80
|
|
@@ -66,7 +84,9 @@ class Regexp::Lexer
|
|
66
84
|
|
67
85
|
private
|
68
86
|
|
69
|
-
attr_accessor :
|
87
|
+
attr_accessor :block,
|
88
|
+
:collect_tokens, :tokens, :prev_token, :preprev_token,
|
89
|
+
:nesting, :set_nesting, :conditional_nesting, :shift
|
70
90
|
|
71
91
|
def ascend(type, token)
|
72
92
|
case type
|
@@ -96,34 +116,46 @@ class Regexp::Lexer
|
|
96
116
|
lead, last, _ = token.text.partition(/.\z/mu)
|
97
117
|
return if lead.empty?
|
98
118
|
|
99
|
-
|
100
|
-
tokens << Regexp::Token.new(:literal, :literal, lead,
|
119
|
+
token_1 = Regexp::Token.new(:literal, :literal, lead,
|
101
120
|
token.ts, (token.te - last.length),
|
102
121
|
nesting, set_nesting, conditional_nesting)
|
103
|
-
|
122
|
+
token_2 = Regexp::Token.new(:literal, :literal, last,
|
104
123
|
(token.ts + lead.length), token.te,
|
105
124
|
nesting, set_nesting, conditional_nesting)
|
125
|
+
|
126
|
+
token_1.previous = preprev_token
|
127
|
+
token_1.next = token_2
|
128
|
+
token_2.previous = token_1 # .next will be set by #lex
|
129
|
+
[token_1, token_2]
|
106
130
|
end
|
107
131
|
|
132
|
+
# if a codepoint list is followed by a quantifier, that quantifier applies
|
133
|
+
# to the last codepoint, e.g. /\u{61 62 63}{3}/ =~ 'abccc'
|
134
|
+
# c.f. #break_literal.
|
108
135
|
def break_codepoint_list(token)
|
109
136
|
lead, _, tail = token.text.rpartition(' ')
|
110
137
|
return if lead.empty?
|
111
138
|
|
112
|
-
|
113
|
-
tokens << Regexp::Token.new(:escape, :codepoint_list, lead + '}',
|
139
|
+
token_1 = Regexp::Token.new(:escape, :codepoint_list, lead + '}',
|
114
140
|
token.ts, (token.te - tail.length),
|
115
141
|
nesting, set_nesting, conditional_nesting)
|
116
|
-
|
142
|
+
token_2 = Regexp::Token.new(:escape, :codepoint_list, '\u{' + tail,
|
117
143
|
(token.ts + lead.length + 1), (token.te + 3),
|
118
144
|
nesting, set_nesting, conditional_nesting)
|
119
145
|
|
120
146
|
self.shift = shift + 3 # one space less, but extra \, u, {, and }
|
147
|
+
|
148
|
+
token_1.previous = preprev_token
|
149
|
+
token_1.next = token_2
|
150
|
+
token_2.previous = token_1 # .next will be set by #lex
|
151
|
+
[token_1, token_2]
|
121
152
|
end
|
122
153
|
|
123
|
-
def merge_condition(current)
|
124
|
-
|
125
|
-
Regexp::Token.new(:conditional, :condition, last.text + current.text,
|
154
|
+
def merge_condition(current, last)
|
155
|
+
token = Regexp::Token.new(:conditional, :condition, last.text + current.text,
|
126
156
|
last.ts, current.te, nesting, set_nesting, conditional_nesting)
|
157
|
+
token.previous = preprev_token # .next will be set by #lex
|
158
|
+
token
|
127
159
|
end
|
128
160
|
|
129
161
|
end # module Regexp::Lexer
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -18,12 +18,12 @@ class Regexp::Parser
|
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
-
def self.parse(input, syntax =
|
21
|
+
def self.parse(input, syntax = nil, options: nil, &block)
|
22
22
|
new.parse(input, syntax, options: options, &block)
|
23
23
|
end
|
24
24
|
|
25
|
-
def parse(input, syntax =
|
26
|
-
root = Root.
|
25
|
+
def parse(input, syntax = nil, options: nil, &block)
|
26
|
+
root = Root.construct(options: extract_options(input, options))
|
27
27
|
|
28
28
|
self.root = root
|
29
29
|
self.node = root
|
@@ -35,7 +35,7 @@ class Regexp::Parser
|
|
35
35
|
|
36
36
|
self.captured_group_counts = Hash.new(0)
|
37
37
|
|
38
|
-
Regexp::Lexer.scan(input, syntax, options: options) do |token|
|
38
|
+
Regexp::Lexer.scan(input, syntax, options: options, collect_tokens: false) do |token|
|
39
39
|
parse_token(token)
|
40
40
|
end
|
41
41
|
|
@@ -200,11 +200,11 @@ class Regexp::Parser
|
|
200
200
|
end
|
201
201
|
|
202
202
|
def captured_group_count_at_level
|
203
|
-
captured_group_counts[node
|
203
|
+
captured_group_counts[node]
|
204
204
|
end
|
205
205
|
|
206
206
|
def count_captured_group
|
207
|
-
captured_group_counts[node
|
207
|
+
captured_group_counts[node] += 1
|
208
208
|
end
|
209
209
|
|
210
210
|
def close_group
|
@@ -235,7 +235,15 @@ class Regexp::Parser
|
|
235
235
|
when :number, :number_ref
|
236
236
|
node << Backreference::Number.new(token, active_opts)
|
237
237
|
when :number_recursion_ref
|
238
|
-
node << Backreference::NumberRecursionLevel.new(token, active_opts)
|
238
|
+
node << Backreference::NumberRecursionLevel.new(token, active_opts).tap do |exp|
|
239
|
+
# TODO: should split off new token number_recursion_rel_ref and new
|
240
|
+
# class NumberRelativeRecursionLevel in v3.0.0 to get rid of this
|
241
|
+
if exp.text =~ /[<'][+-]/
|
242
|
+
assign_effective_number(exp)
|
243
|
+
else
|
244
|
+
exp.effective_number = exp.number
|
245
|
+
end
|
246
|
+
end
|
239
247
|
when :number_call
|
240
248
|
node << Backreference::NumberCall.new(token, active_opts)
|
241
249
|
when :number_rel_ref
|
@@ -254,6 +262,8 @@ class Regexp::Parser
|
|
254
262
|
def assign_effective_number(exp)
|
255
263
|
exp.effective_number =
|
256
264
|
exp.number + total_captured_group_count + (exp.number < 0 ? 1 : 0)
|
265
|
+
exp.effective_number > 0 ||
|
266
|
+
raise(ParserError, "Invalid reference: #{exp.reference}")
|
257
267
|
end
|
258
268
|
|
259
269
|
def conditional(token)
|
@@ -369,7 +379,7 @@ class Regexp::Parser
|
|
369
379
|
end
|
370
380
|
|
371
381
|
def sequence_operation(klass, token)
|
372
|
-
unless node.
|
382
|
+
unless node.instance_of?(klass)
|
373
383
|
operator = klass.new(token, active_opts)
|
374
384
|
sequence = operator.add_sequence(active_opts)
|
375
385
|
sequence.expressions = node.expressions
|
@@ -475,17 +485,14 @@ class Regexp::Parser
|
|
475
485
|
# description of the problem: https://github.com/ammar/regexp_parser/issues/3
|
476
486
|
# rationale for this solution: https://github.com/ammar/regexp_parser/pull/69
|
477
487
|
if target_node.quantified?
|
478
|
-
|
479
|
-
:
|
480
|
-
:
|
481
|
-
|
482
|
-
target_node.
|
483
|
-
|
484
|
-
|
485
|
-
target_node.set_level,
|
486
|
-
target_node.conditional_level
|
488
|
+
new_group = Group::Passive.construct(
|
489
|
+
token: :passive,
|
490
|
+
ts: target_node.ts,
|
491
|
+
level: target_node.level,
|
492
|
+
set_level: target_node.set_level,
|
493
|
+
conditional_level: target_node.conditional_level,
|
494
|
+
options: active_opts,
|
487
495
|
)
|
488
|
-
new_group = Group::Passive.new(new_token, active_opts)
|
489
496
|
new_group.implicit = true
|
490
497
|
new_group << target_node
|
491
498
|
increase_group_level(target_node)
|
@@ -534,7 +541,7 @@ class Regexp::Parser
|
|
534
541
|
|
535
542
|
def range(token)
|
536
543
|
exp = CharacterSet::Range.new(token, active_opts)
|
537
|
-
scope = node.last.
|
544
|
+
scope = node.last.instance_of?(CharacterSet::IntersectedSequence) ? node.last : node
|
538
545
|
exp << scope.expressions.pop
|
539
546
|
nest(exp)
|
540
547
|
end
|
@@ -561,7 +568,7 @@ class Regexp::Parser
|
|
561
568
|
end
|
562
569
|
|
563
570
|
def close_completed_character_set_range
|
564
|
-
decrease_nesting if node.
|
571
|
+
decrease_nesting if node.instance_of?(CharacterSet::Range) && node.complete?
|
565
572
|
end
|
566
573
|
|
567
574
|
def active_opts
|
@@ -572,15 +579,18 @@ class Regexp::Parser
|
|
572
579
|
# an instance of Backreference::Number, its #referenced_expression is set to
|
573
580
|
# the instance of Group::Capture that it refers to via its number.
|
574
581
|
def assign_referenced_expressions
|
575
|
-
|
576
|
-
|
582
|
+
# find all referencable and refering expressions
|
583
|
+
targets = { 0 => root }
|
584
|
+
referrers = []
|
577
585
|
root.each_expression do |exp|
|
578
586
|
exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
|
587
|
+
referrers << exp if exp.referential?
|
579
588
|
end
|
580
|
-
# assign
|
581
|
-
|
582
|
-
|
583
|
-
|
589
|
+
# assign reference expression to refering expressions
|
590
|
+
# (in a second iteration because there might be forward references)
|
591
|
+
referrers.each do |exp|
|
592
|
+
exp.referenced_expression = targets[exp.reference] ||
|
593
|
+
raise(ParserError, "Invalid reference: #{exp.reference}")
|
584
594
|
end
|
585
595
|
end
|
586
596
|
end # module Regexp::Parser
|
@@ -17,7 +17,7 @@
|
|
17
17
|
text = copy(data, ts-1, te)
|
18
18
|
type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property
|
19
19
|
|
20
|
-
name =
|
20
|
+
name = text[3..-2].gsub(/[\^\s_\-]/, '').downcase
|
21
21
|
|
22
22
|
token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
|
23
23
|
validation_error(:property, name) unless token
|