regexp_parser 1.7.0 → 2.8.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +8 -2
- data/LICENSE +1 -1
- data/Rakefile +6 -70
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +76 -0
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
- data/lib/regexp_parser/expression/classes/group.rb +28 -15
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
- data/lib/regexp_parser/expression/classes/root.rb +4 -19
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
- data/lib/regexp_parser/expression/methods/construct.rb +41 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +47 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
- data/lib/regexp_parser/expression/quantifier.rb +57 -17
- data/lib/regexp_parser/expression/sequence.rb +11 -47
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +111 -0
- data/lib/regexp_parser/expression/subexpression.rb +27 -19
- data/lib/regexp_parser/expression.rb +14 -141
- data/lib/regexp_parser/lexer.rb +83 -41
- data/lib/regexp_parser/parser.rb +371 -429
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +633 -0
- data/lib/regexp_parser/scanner/properties/short.csv +248 -0
- data/lib/regexp_parser/scanner/property.rl +4 -4
- data/lib/regexp_parser/scanner/scanner.rl +303 -368
- data/lib/regexp_parser/scanner.rb +1423 -1674
- data/lib/regexp_parser/syntax/any.rb +2 -7
- data/lib/regexp_parser/syntax/base.rb +92 -67
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +33 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/token/meta.rb +20 -0
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +19 -23
- metadata +52 -171
- data/CHANGELOG.md +0 -349
- data/README.md +0 -470
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -94
- data/spec/expression/clone_spec.rb +0 -120
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -100
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -89
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -55
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -51
- data/spec/parser/refcalls_spec.rb +0 -112
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/errors_spec.rb +0 -68
- data/spec/scanner/escapes_spec.rb +0 -53
- data/spec/scanner/free_space_spec.rb +0 -133
- data/spec/scanner/groups_spec.rb +0 -52
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -49
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -20
- data/spec/scanner/refcalls_spec.rb +0 -36
- data/spec/scanner/sets_spec.rb +0 -102
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -15
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
- /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
@@ -1,138 +1,7 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
class Base
|
4
|
-
attr_accessor :type, :token
|
5
|
-
attr_accessor :text, :ts
|
6
|
-
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
7
|
-
|
8
|
-
attr_accessor :quantifier
|
9
|
-
attr_accessor :options
|
10
|
-
|
11
|
-
def initialize(token, options = {})
|
12
|
-
self.type = token.type
|
13
|
-
self.token = token.token
|
14
|
-
self.text = token.text
|
15
|
-
self.ts = token.ts
|
16
|
-
self.level = token.level
|
17
|
-
self.set_level = token.set_level
|
18
|
-
self.conditional_level = token.conditional_level
|
19
|
-
self.nesting_level = 0
|
20
|
-
self.quantifier = nil
|
21
|
-
self.options = options
|
22
|
-
end
|
23
|
-
|
24
|
-
def initialize_clone(orig)
|
25
|
-
self.text = (orig.text ? orig.text.dup : nil)
|
26
|
-
self.options = (orig.options ? orig.options.dup : nil)
|
27
|
-
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
28
|
-
super
|
29
|
-
end
|
30
|
-
|
31
|
-
def to_re(format = :full)
|
32
|
-
::Regexp.new(to_s(format))
|
33
|
-
end
|
34
|
-
|
35
|
-
alias :starts_at :ts
|
36
|
-
|
37
|
-
def full_length
|
38
|
-
to_s.length
|
39
|
-
end
|
40
|
-
|
41
|
-
def offset
|
42
|
-
[starts_at, full_length]
|
43
|
-
end
|
44
|
-
|
45
|
-
def coded_offset
|
46
|
-
'@%d+%d' % offset
|
47
|
-
end
|
48
|
-
|
49
|
-
def to_s(format = :full)
|
50
|
-
"#{text}#{quantifier_affix(format)}"
|
51
|
-
end
|
52
|
-
|
53
|
-
def quantifier_affix(expression_format)
|
54
|
-
quantifier.to_s if quantified? && expression_format != :base
|
55
|
-
end
|
56
|
-
|
57
|
-
def terminal?
|
58
|
-
!respond_to?(:expressions)
|
59
|
-
end
|
60
|
-
|
61
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
62
|
-
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
63
|
-
end
|
64
|
-
|
65
|
-
def unquantified_clone
|
66
|
-
clone.tap { |exp| exp.quantifier = nil }
|
67
|
-
end
|
68
|
-
|
69
|
-
def quantified?
|
70
|
-
!quantifier.nil?
|
71
|
-
end
|
72
|
-
|
73
|
-
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
74
|
-
def quantity
|
75
|
-
return [nil,nil] unless quantified?
|
76
|
-
[quantifier.min, quantifier.max]
|
77
|
-
end
|
78
|
-
|
79
|
-
def repetitions
|
80
|
-
return 1..1 unless quantified?
|
81
|
-
min = quantifier.min
|
82
|
-
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
83
|
-
# fix Range#minmax - https://bugs.ruby-lang.org/issues/15807
|
84
|
-
(min..max).tap { |r| r.define_singleton_method(:minmax) { [min, max] } }
|
85
|
-
end
|
86
|
-
|
87
|
-
def greedy?
|
88
|
-
quantified? and quantifier.greedy?
|
89
|
-
end
|
90
|
-
|
91
|
-
def reluctant?
|
92
|
-
quantified? and quantifier.reluctant?
|
93
|
-
end
|
94
|
-
alias :lazy? :reluctant?
|
95
|
-
|
96
|
-
def possessive?
|
97
|
-
quantified? and quantifier.possessive?
|
98
|
-
end
|
99
|
-
|
100
|
-
def attributes
|
101
|
-
{
|
102
|
-
type: type,
|
103
|
-
token: token,
|
104
|
-
text: to_s(:base),
|
105
|
-
starts_at: ts,
|
106
|
-
length: full_length,
|
107
|
-
level: level,
|
108
|
-
set_level: set_level,
|
109
|
-
conditional_level: conditional_level,
|
110
|
-
options: options,
|
111
|
-
quantifier: quantified? ? quantifier.to_h : nil,
|
112
|
-
}
|
113
|
-
end
|
114
|
-
alias :to_h :attributes
|
115
|
-
end
|
116
|
-
|
117
|
-
def self.parsed(exp)
|
118
|
-
warn('WARNING: Regexp::Expression::Base.parsed is buggy and '\
|
119
|
-
'will be removed in 2.0.0. Use Regexp::Parser.parse instead.')
|
120
|
-
case exp
|
121
|
-
when String
|
122
|
-
Regexp::Parser.parse(exp)
|
123
|
-
when Regexp
|
124
|
-
Regexp::Parser.parse(exp.source) # <- causes loss of root options
|
125
|
-
when Regexp::Expression # <- never triggers
|
126
|
-
exp
|
127
|
-
else
|
128
|
-
raise ArgumentError, 'Expression.parsed accepts a String, Regexp, or '\
|
129
|
-
'a Regexp::Expression as a value for exp, but it '\
|
130
|
-
"was given #{exp.class.name}."
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
end # module Regexp::Expression
|
1
|
+
require 'regexp_parser/error'
|
135
2
|
|
3
|
+
require 'regexp_parser/expression/shared'
|
4
|
+
require 'regexp_parser/expression/base'
|
136
5
|
require 'regexp_parser/expression/quantifier'
|
137
6
|
require 'regexp_parser/expression/subexpression'
|
138
7
|
require 'regexp_parser/expression/sequence'
|
@@ -140,24 +9,28 @@ require 'regexp_parser/expression/sequence_operation'
|
|
140
9
|
|
141
10
|
require 'regexp_parser/expression/classes/alternation'
|
142
11
|
require 'regexp_parser/expression/classes/anchor'
|
143
|
-
require 'regexp_parser/expression/classes/
|
12
|
+
require 'regexp_parser/expression/classes/backreference'
|
13
|
+
require 'regexp_parser/expression/classes/character_set'
|
14
|
+
require 'regexp_parser/expression/classes/character_set/intersection'
|
15
|
+
require 'regexp_parser/expression/classes/character_set/range'
|
16
|
+
require 'regexp_parser/expression/classes/character_type'
|
144
17
|
require 'regexp_parser/expression/classes/conditional'
|
145
|
-
require 'regexp_parser/expression/classes/
|
18
|
+
require 'regexp_parser/expression/classes/escape_sequence'
|
146
19
|
require 'regexp_parser/expression/classes/free_space'
|
147
20
|
require 'regexp_parser/expression/classes/group'
|
148
21
|
require 'regexp_parser/expression/classes/keep'
|
149
22
|
require 'regexp_parser/expression/classes/literal'
|
150
23
|
require 'regexp_parser/expression/classes/posix_class'
|
151
|
-
require 'regexp_parser/expression/classes/property'
|
152
24
|
require 'regexp_parser/expression/classes/root'
|
153
|
-
require 'regexp_parser/expression/classes/
|
154
|
-
require 'regexp_parser/expression/classes/set/intersection'
|
155
|
-
require 'regexp_parser/expression/classes/set/range'
|
156
|
-
require 'regexp_parser/expression/classes/type'
|
25
|
+
require 'regexp_parser/expression/classes/unicode_property'
|
157
26
|
|
27
|
+
require 'regexp_parser/expression/methods/construct'
|
28
|
+
require 'regexp_parser/expression/methods/human_name'
|
158
29
|
require 'regexp_parser/expression/methods/match'
|
159
30
|
require 'regexp_parser/expression/methods/match_length'
|
160
31
|
require 'regexp_parser/expression/methods/options'
|
32
|
+
require 'regexp_parser/expression/methods/parts'
|
33
|
+
require 'regexp_parser/expression/methods/printing'
|
161
34
|
require 'regexp_parser/expression/methods/strfregexp'
|
162
35
|
require 'regexp_parser/expression/methods/tests'
|
163
36
|
require 'regexp_parser/expression/methods/traverse'
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -4,57 +4,77 @@
|
|
4
4
|
# given syntax flavor.
|
5
5
|
class Regexp::Lexer
|
6
6
|
|
7
|
-
OPENING_TOKENS = [
|
8
|
-
|
9
|
-
|
7
|
+
OPENING_TOKENS = %i[
|
8
|
+
capture passive lookahead nlookahead lookbehind nlookbehind
|
9
|
+
atomic options options_switch named absence open
|
10
10
|
].freeze
|
11
11
|
|
12
|
-
CLOSING_TOKENS = [
|
12
|
+
CLOSING_TOKENS = %i[close].freeze
|
13
13
|
|
14
|
-
|
15
|
-
|
14
|
+
CONDITION_TOKENS = %i[condition condition_close].freeze
|
15
|
+
|
16
|
+
def self.lex(input, syntax = nil, options: nil, collect_tokens: true, &block)
|
17
|
+
new.lex(input, syntax, options: options, collect_tokens: collect_tokens, &block)
|
16
18
|
end
|
17
19
|
|
18
|
-
def lex(input, syntax =
|
19
|
-
syntax = Regexp::Syntax.
|
20
|
+
def lex(input, syntax = nil, options: nil, collect_tokens: true, &block)
|
21
|
+
syntax = syntax ? Regexp::Syntax.for(syntax) : Regexp::Syntax::CURRENT
|
20
22
|
|
23
|
+
self.block = block
|
24
|
+
self.collect_tokens = collect_tokens
|
21
25
|
self.tokens = []
|
26
|
+
self.prev_token = nil
|
27
|
+
self.preprev_token = nil
|
22
28
|
self.nesting = 0
|
23
29
|
self.set_nesting = 0
|
24
30
|
self.conditional_nesting = 0
|
25
31
|
self.shift = 0
|
26
32
|
|
27
|
-
|
28
|
-
Regexp::Scanner.scan(input) do |type, token, text, ts, te|
|
33
|
+
Regexp::Scanner.scan(input, options: options, collect_tokens: false) do |type, token, text, ts, te|
|
29
34
|
type, token = *syntax.normalize(type, token)
|
30
35
|
syntax.check! type, token
|
31
36
|
|
32
37
|
ascend(type, token)
|
33
38
|
|
34
|
-
if
|
35
|
-
|
36
|
-
|
39
|
+
if (last = prev_token) &&
|
40
|
+
type == :quantifier &&
|
41
|
+
(
|
42
|
+
(last.type == :literal && (parts = break_literal(last))) ||
|
43
|
+
(last.token == :codepoint_list && (parts = break_codepoint_list(last)))
|
44
|
+
)
|
45
|
+
emit(parts[0])
|
46
|
+
last = parts[1]
|
37
47
|
end
|
38
48
|
|
39
49
|
current = Regexp::Token.new(type, token, text, ts + shift, te + shift,
|
40
50
|
nesting, set_nesting, conditional_nesting)
|
41
51
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
52
|
+
if type == :conditional && CONDITION_TOKENS.include?(token)
|
53
|
+
current = merge_condition(current, last)
|
54
|
+
elsif last
|
55
|
+
last.next = current
|
56
|
+
current.previous = last
|
57
|
+
emit(last)
|
58
|
+
end
|
47
59
|
|
48
|
-
|
49
|
-
|
60
|
+
self.preprev_token = last
|
61
|
+
self.prev_token = current
|
50
62
|
|
51
63
|
descend(type, token)
|
52
64
|
end
|
53
65
|
|
54
|
-
if
|
55
|
-
|
66
|
+
emit(prev_token) if prev_token
|
67
|
+
|
68
|
+
collect_tokens ? tokens : nil
|
69
|
+
end
|
70
|
+
|
71
|
+
def emit(token)
|
72
|
+
if block
|
73
|
+
# TODO: in v3.0.0, remove `collect_tokens:` kwarg and only collect w/o block
|
74
|
+
res = block.call(token)
|
75
|
+
tokens << res if collect_tokens
|
56
76
|
else
|
57
|
-
tokens
|
77
|
+
tokens << token
|
58
78
|
end
|
59
79
|
end
|
60
80
|
|
@@ -64,27 +84,37 @@ class Regexp::Lexer
|
|
64
84
|
|
65
85
|
private
|
66
86
|
|
67
|
-
attr_accessor :
|
87
|
+
attr_accessor :block,
|
88
|
+
:collect_tokens, :tokens, :prev_token, :preprev_token,
|
89
|
+
:nesting, :set_nesting, :conditional_nesting, :shift
|
68
90
|
|
69
91
|
def ascend(type, token)
|
92
|
+
return unless CLOSING_TOKENS.include?(token)
|
93
|
+
|
70
94
|
case type
|
71
95
|
when :group, :assertion
|
72
|
-
self.nesting = nesting - 1
|
96
|
+
self.nesting = nesting - 1
|
73
97
|
when :set
|
74
|
-
self.set_nesting = set_nesting - 1
|
98
|
+
self.set_nesting = set_nesting - 1
|
75
99
|
when :conditional
|
76
|
-
self.conditional_nesting = conditional_nesting - 1
|
100
|
+
self.conditional_nesting = conditional_nesting - 1
|
101
|
+
else
|
102
|
+
raise "unhandled nesting type #{type}"
|
77
103
|
end
|
78
104
|
end
|
79
105
|
|
80
106
|
def descend(type, token)
|
107
|
+
return unless OPENING_TOKENS.include?(token)
|
108
|
+
|
81
109
|
case type
|
82
110
|
when :group, :assertion
|
83
|
-
self.nesting = nesting + 1
|
111
|
+
self.nesting = nesting + 1
|
84
112
|
when :set
|
85
|
-
self.set_nesting = set_nesting + 1
|
113
|
+
self.set_nesting = set_nesting + 1
|
86
114
|
when :conditional
|
87
|
-
self.conditional_nesting = conditional_nesting + 1
|
115
|
+
self.conditional_nesting = conditional_nesting + 1
|
116
|
+
else
|
117
|
+
raise "unhandled nesting type #{type}"
|
88
118
|
end
|
89
119
|
end
|
90
120
|
|
@@ -94,34 +124,46 @@ class Regexp::Lexer
|
|
94
124
|
lead, last, _ = token.text.partition(/.\z/mu)
|
95
125
|
return if lead.empty?
|
96
126
|
|
97
|
-
|
98
|
-
|
99
|
-
token.ts, (token.te - last.bytesize),
|
127
|
+
token_1 = Regexp::Token.new(:literal, :literal, lead,
|
128
|
+
token.ts, (token.te - last.length),
|
100
129
|
nesting, set_nesting, conditional_nesting)
|
101
|
-
|
102
|
-
(token.ts + lead.
|
130
|
+
token_2 = Regexp::Token.new(:literal, :literal, last,
|
131
|
+
(token.ts + lead.length), token.te,
|
103
132
|
nesting, set_nesting, conditional_nesting)
|
133
|
+
|
134
|
+
token_1.previous = preprev_token
|
135
|
+
token_1.next = token_2
|
136
|
+
token_2.previous = token_1 # .next will be set by #lex
|
137
|
+
[token_1, token_2]
|
104
138
|
end
|
105
139
|
|
140
|
+
# if a codepoint list is followed by a quantifier, that quantifier applies
|
141
|
+
# to the last codepoint, e.g. /\u{61 62 63}{3}/ =~ 'abccc'
|
142
|
+
# c.f. #break_literal.
|
106
143
|
def break_codepoint_list(token)
|
107
144
|
lead, _, tail = token.text.rpartition(' ')
|
108
145
|
return if lead.empty?
|
109
146
|
|
110
|
-
|
111
|
-
tokens << Regexp::Token.new(:escape, :codepoint_list, lead + '}',
|
147
|
+
token_1 = Regexp::Token.new(:escape, :codepoint_list, lead + '}',
|
112
148
|
token.ts, (token.te - tail.length),
|
113
149
|
nesting, set_nesting, conditional_nesting)
|
114
|
-
|
150
|
+
token_2 = Regexp::Token.new(:escape, :codepoint_list, '\u{' + tail,
|
115
151
|
(token.ts + lead.length + 1), (token.te + 3),
|
116
152
|
nesting, set_nesting, conditional_nesting)
|
117
153
|
|
118
154
|
self.shift = shift + 3 # one space less, but extra \, u, {, and }
|
155
|
+
|
156
|
+
token_1.previous = preprev_token
|
157
|
+
token_1.next = token_2
|
158
|
+
token_2.previous = token_1 # .next will be set by #lex
|
159
|
+
[token_1, token_2]
|
119
160
|
end
|
120
161
|
|
121
|
-
def merge_condition(current)
|
122
|
-
|
123
|
-
Regexp::Token.new(:conditional, :condition, last.text + current.text,
|
162
|
+
def merge_condition(current, last)
|
163
|
+
token = Regexp::Token.new(:conditional, :condition, last.text + current.text,
|
124
164
|
last.ts, current.te, nesting, set_nesting, conditional_nesting)
|
165
|
+
token.previous = preprev_token # .next will be set by #lex
|
166
|
+
token
|
125
167
|
end
|
126
168
|
|
127
169
|
end # module Regexp::Lexer
|