regexp_parser 1.5.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +59 -0
- data/Gemfile +3 -3
- data/README.md +14 -6
- data/Rakefile +3 -4
- data/lib/regexp_parser/expression.rb +6 -43
- data/lib/regexp_parser/expression/classes/conditional.rb +3 -2
- data/lib/regexp_parser/expression/classes/escape.rb +0 -4
- data/lib/regexp_parser/expression/methods/match.rb +13 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
- data/lib/regexp_parser/expression/methods/options.rb +35 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
- data/lib/regexp_parser/expression/methods/tests.rb +6 -15
- data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
- data/lib/regexp_parser/expression/sequence.rb +3 -2
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
- data/lib/regexp_parser/lexer.rb +4 -25
- data/lib/regexp_parser/parser.rb +40 -33
- data/lib/regexp_parser/scanner.rb +1208 -1353
- data/lib/regexp_parser/scanner/char_type.rl +0 -3
- data/lib/regexp_parser/scanner/properties/long.yml +15 -1
- data/lib/regexp_parser/scanner/properties/short.yml +5 -0
- data/lib/regexp_parser/scanner/scanner.rl +116 -202
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +30 -0
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
- data/lib/regexp_parser/version.rb +1 -1
- data/spec/expression/base_spec.rb +14 -0
- data/spec/expression/methods/match_length_spec.rb +20 -0
- data/spec/expression/methods/match_spec.rb +25 -0
- data/spec/expression/methods/tests_spec.rb +2 -0
- data/spec/expression/methods/traverse_spec.rb +21 -0
- data/spec/expression/options_spec.rb +128 -0
- data/spec/expression/root_spec.rb +9 -0
- data/spec/expression/sequence_spec.rb +9 -0
- data/spec/lexer/conditionals_spec.rb +49 -119
- data/spec/lexer/delimiters_spec.rb +68 -0
- data/spec/lexer/escapes_spec.rb +8 -32
- data/spec/lexer/keep_spec.rb +5 -17
- data/spec/lexer/literals_spec.rb +73 -110
- data/spec/lexer/nesting_spec.rb +86 -117
- data/spec/lexer/refcalls_spec.rb +51 -50
- data/spec/parser/all_spec.rb +13 -1
- data/spec/parser/anchors_spec.rb +9 -23
- data/spec/parser/conditionals_spec.rb +9 -9
- data/spec/parser/errors_spec.rb +22 -43
- data/spec/parser/escapes_spec.rb +33 -44
- data/spec/parser/free_space_spec.rb +25 -4
- data/spec/parser/groups_spec.rb +98 -257
- data/spec/parser/keep_spec.rb +2 -15
- data/spec/parser/options_spec.rb +28 -0
- data/spec/parser/posix_classes_spec.rb +5 -24
- data/spec/parser/properties_spec.rb +42 -54
- data/spec/parser/quantifiers_spec.rb +42 -283
- data/spec/parser/refcalls_spec.rb +60 -185
- data/spec/parser/set/intersections_spec.rb +17 -17
- data/spec/parser/set/ranges_spec.rb +17 -17
- data/spec/parser/sets_spec.rb +5 -5
- data/spec/parser/types_spec.rb +11 -36
- data/spec/scanner/anchors_spec.rb +13 -28
- data/spec/scanner/conditionals_spec.rb +121 -173
- data/spec/scanner/delimiters_spec.rb +52 -0
- data/spec/scanner/errors_spec.rb +64 -87
- data/spec/scanner/escapes_spec.rb +53 -50
- data/spec/scanner/free_space_spec.rb +102 -165
- data/spec/scanner/groups_spec.rb +45 -64
- data/spec/scanner/keep_spec.rb +5 -28
- data/spec/scanner/literals_spec.rb +45 -81
- data/spec/scanner/meta_spec.rb +13 -33
- data/spec/scanner/options_spec.rb +36 -0
- data/spec/scanner/properties_spec.rb +43 -286
- data/spec/scanner/quantifiers_spec.rb +13 -28
- data/spec/scanner/refcalls_spec.rb +32 -48
- data/spec/scanner/sets_spec.rb +88 -102
- data/spec/scanner/types_spec.rb +10 -25
- data/spec/spec_helper.rb +1 -0
- data/spec/support/shared_examples.rb +77 -0
- data/spec/syntax/syntax_spec.rb +4 -0
- data/spec/syntax/versions/1.8.6_spec.rb +12 -33
- data/spec/syntax/versions/1.9.1_spec.rb +5 -18
- data/spec/syntax/versions/1.9.3_spec.rb +4 -17
- data/spec/syntax/versions/2.0.0_spec.rb +8 -23
- data/spec/syntax/versions/2.2.0_spec.rb +4 -17
- data/spec/syntax/versions/aliases_spec.rb +27 -109
- metadata +28 -10
- data/spec/scanner/scripts_spec.rb +0 -49
- data/spec/scanner/unicode_blocks_spec.rb +0 -28
@@ -2,7 +2,7 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
# edge cases with `...-&&...` and `...&&-...` are checked in test_ranges.rb
|
4
4
|
|
5
|
-
RSpec.describe('
|
5
|
+
RSpec.describe('CharacterSet::Intersection parsing') do
|
6
6
|
specify('parse set intersection') do
|
7
7
|
root = RP.parse('[a&&z]')
|
8
8
|
set = root[0]
|
@@ -22,9 +22,9 @@ RSpec.describe('SetIntersection parsing') do
|
|
22
22
|
expect(seq2.first.to_s).to eq 'z'
|
23
23
|
expect(seq2.first).to be_instance_of(Literal)
|
24
24
|
|
25
|
-
expect(set.
|
26
|
-
expect(set.
|
27
|
-
expect(set.
|
25
|
+
expect(set).not_to match 'a'
|
26
|
+
expect(set).not_to match '&'
|
27
|
+
expect(set).not_to match 'z'
|
28
28
|
end
|
29
29
|
|
30
30
|
specify('parse set intersection range and subset') do
|
@@ -46,9 +46,9 @@ RSpec.describe('SetIntersection parsing') do
|
|
46
46
|
expect(seq2.first.to_s).to eq '[^a]'
|
47
47
|
expect(seq2.first).to be_instance_of(CharacterSet)
|
48
48
|
|
49
|
-
expect(set.
|
50
|
-
expect(set.
|
51
|
-
expect(set.
|
49
|
+
expect(set).not_to match 'a'
|
50
|
+
expect(set).not_to match '&'
|
51
|
+
expect(set).to match 'b'
|
52
52
|
end
|
53
53
|
|
54
54
|
specify('parse set intersection trailing range') do
|
@@ -70,9 +70,9 @@ RSpec.describe('SetIntersection parsing') do
|
|
70
70
|
expect(seq2.first.to_s).to eq 'a-z'
|
71
71
|
expect(seq2.first).to be_instance_of(CharacterSet::Range)
|
72
72
|
|
73
|
-
expect(set.
|
74
|
-
expect(set.
|
75
|
-
expect(set.
|
73
|
+
expect(set).to match 'a'
|
74
|
+
expect(set).not_to match '&'
|
75
|
+
expect(set).not_to match 'b'
|
76
76
|
end
|
77
77
|
|
78
78
|
specify('parse set intersection type') do
|
@@ -94,9 +94,9 @@ RSpec.describe('SetIntersection parsing') do
|
|
94
94
|
expect(seq2.first.to_s).to eq '\\w'
|
95
95
|
expect(seq2.first).to be_instance_of(CharacterType::Word)
|
96
96
|
|
97
|
-
expect(set.
|
98
|
-
expect(set.
|
99
|
-
expect(set.
|
97
|
+
expect(set).to match 'a'
|
98
|
+
expect(set).not_to match '&'
|
99
|
+
expect(set).not_to match 'b'
|
100
100
|
end
|
101
101
|
|
102
102
|
specify('parse set intersection multipart') do
|
@@ -119,9 +119,9 @@ RSpec.describe('SetIntersection parsing') do
|
|
119
119
|
expect(seq3.count).to eq 3
|
120
120
|
expect(seq3.to_s).to eq 'efg'
|
121
121
|
|
122
|
-
expect(set.
|
123
|
-
expect(set.
|
124
|
-
expect(set.
|
125
|
-
expect(set.
|
122
|
+
expect(set).to match 'e'
|
123
|
+
expect(set).to match 'f'
|
124
|
+
expect(set).not_to match 'a'
|
125
|
+
expect(set).not_to match 'g'
|
126
126
|
end
|
127
127
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
RSpec.describe('
|
3
|
+
RSpec.describe('CharacterSet::Range parsing') do
|
4
4
|
specify('parse set range') do
|
5
5
|
root = RP.parse('[a-z]')
|
6
6
|
set = root[0]
|
@@ -13,7 +13,7 @@ RSpec.describe('SetRang parsing') do
|
|
13
13
|
expect(range.first).to be_instance_of(Literal)
|
14
14
|
expect(range.last.to_s).to eq 'z'
|
15
15
|
expect(range.last).to be_instance_of(Literal)
|
16
|
-
expect(set
|
16
|
+
expect(set).to match 'm'
|
17
17
|
end
|
18
18
|
|
19
19
|
specify('parse set range hex') do
|
@@ -28,7 +28,7 @@ RSpec.describe('SetRang parsing') do
|
|
28
28
|
expect(range.first).to be_instance_of(EscapeSequence::Hex)
|
29
29
|
expect(range.last.to_s).to eq '\\x99'
|
30
30
|
expect(range.last).to be_instance_of(EscapeSequence::Hex)
|
31
|
-
expect(set.
|
31
|
+
expect(set).to match '\\x50'
|
32
32
|
end
|
33
33
|
|
34
34
|
specify('parse set range unicode') do
|
@@ -43,7 +43,7 @@ RSpec.describe('SetRang parsing') do
|
|
43
43
|
expect(range.first).to be_instance_of(EscapeSequence::CodepointList)
|
44
44
|
expect(range.last.to_s).to eq '\\u1234'
|
45
45
|
expect(range.last).to be_instance_of(EscapeSequence::Codepoint)
|
46
|
-
expect(set.
|
46
|
+
expect(set).to match '\\u600'
|
47
47
|
end
|
48
48
|
|
49
49
|
specify('parse set range edge case leading dash') do
|
@@ -53,7 +53,7 @@ RSpec.describe('SetRang parsing') do
|
|
53
53
|
|
54
54
|
expect(set.count).to eq 1
|
55
55
|
expect(range.count).to eq 2
|
56
|
-
expect(set
|
56
|
+
expect(set).to match 'a'
|
57
57
|
end
|
58
58
|
|
59
59
|
specify('parse set range edge case trailing dash') do
|
@@ -63,7 +63,7 @@ RSpec.describe('SetRang parsing') do
|
|
63
63
|
|
64
64
|
expect(set.count).to eq 1
|
65
65
|
expect(range.count).to eq 2
|
66
|
-
expect(set
|
66
|
+
expect(set).to match '$'
|
67
67
|
end
|
68
68
|
|
69
69
|
specify('parse set range edge case leading negate') do
|
@@ -71,8 +71,8 @@ RSpec.describe('SetRang parsing') do
|
|
71
71
|
set = root[0]
|
72
72
|
|
73
73
|
expect(set.count).to eq 2
|
74
|
-
expect(set.
|
75
|
-
expect(set.
|
74
|
+
expect(set).to match 'a'
|
75
|
+
expect(set).not_to match 'z'
|
76
76
|
end
|
77
77
|
|
78
78
|
specify('parse set range edge case trailing negate') do
|
@@ -82,7 +82,7 @@ RSpec.describe('SetRang parsing') do
|
|
82
82
|
|
83
83
|
expect(set.count).to eq 1
|
84
84
|
expect(range.count).to eq 2
|
85
|
-
expect(set
|
85
|
+
expect(set).to match '$'
|
86
86
|
end
|
87
87
|
|
88
88
|
specify('parse set range edge case leading intersection') do
|
@@ -91,10 +91,10 @@ RSpec.describe('SetRang parsing') do
|
|
91
91
|
|
92
92
|
expect(set.count).to eq 1
|
93
93
|
expect(set.first.last.to_s).to eq '-bc'
|
94
|
-
expect(set.
|
95
|
-
expect(set.
|
96
|
-
expect(set.
|
97
|
-
expect(set.
|
94
|
+
expect(set).to match '-'
|
95
|
+
expect(set).to match 'b'
|
96
|
+
expect(set).not_to match 'a'
|
97
|
+
expect(set).not_to match 'c'
|
98
98
|
end
|
99
99
|
|
100
100
|
specify('parse set range edge case trailing intersection') do
|
@@ -103,9 +103,9 @@ RSpec.describe('SetRang parsing') do
|
|
103
103
|
|
104
104
|
expect(set.count).to eq 1
|
105
105
|
expect(set.first.first.to_s).to eq 'bc-'
|
106
|
-
expect(set.
|
107
|
-
expect(set.
|
108
|
-
expect(set.
|
109
|
-
expect(set.
|
106
|
+
expect(set).to match '-'
|
107
|
+
expect(set).to match 'b'
|
108
|
+
expect(set).not_to match 'a'
|
109
|
+
expect(set).not_to match 'c'
|
110
110
|
end
|
111
111
|
end
|
data/spec/parser/sets_spec.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
RSpec.describe('
|
3
|
+
RSpec.describe('CharacterSet parsing') do
|
4
4
|
specify('parse set basic') do
|
5
5
|
root = RP.parse('[ab]+')
|
6
6
|
exp = root[0]
|
@@ -39,10 +39,10 @@ RSpec.describe('Set parsing') do
|
|
39
39
|
expect(exp[1]).to be_instance_of(EscapeSequence::Backspace)
|
40
40
|
expect(exp[1].text).to eq '\\b'
|
41
41
|
|
42
|
-
expect(exp.
|
43
|
-
expect(exp.
|
44
|
-
expect(exp.
|
45
|
-
expect(exp.
|
42
|
+
expect(exp).to match 'a'
|
43
|
+
expect(exp).to match "\b"
|
44
|
+
expect(exp).not_to match 'b'
|
45
|
+
expect(exp).to match 'c'
|
46
46
|
end
|
47
47
|
|
48
48
|
specify('parse set escape sequence hex') do
|
data/spec/parser/types_spec.rb
CHANGED
@@ -1,43 +1,18 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
RSpec.describe('
|
4
|
-
|
5
|
-
|
6
|
-
/a\Dc/ => [1, :type, :nondigit, CharacterType::NonDigit],
|
3
|
+
RSpec.describe('CharacterType parsing') do
|
4
|
+
include_examples 'parse', /a\dc/, 1 => [:type, :digit, CharacterType::Digit]
|
5
|
+
include_examples 'parse', /a\Dc/, 1 => [:type, :nondigit, CharacterType::NonDigit]
|
7
6
|
|
8
|
-
|
9
|
-
|
7
|
+
include_examples 'parse', /a\sc/, 1 => [:type, :space, CharacterType::Space]
|
8
|
+
include_examples 'parse', /a\Sc/, 1 => [:type, :nonspace, CharacterType::NonSpace]
|
10
9
|
|
11
|
-
|
12
|
-
|
10
|
+
include_examples 'parse', /a\hc/, 1 => [:type, :hex, CharacterType::Hex]
|
11
|
+
include_examples 'parse', /a\Hc/, 1 => [:type, :nonhex, CharacterType::NonHex]
|
13
12
|
|
14
|
-
|
15
|
-
|
16
|
-
}
|
13
|
+
include_examples 'parse', /a\wc/, 1 => [:type, :word, CharacterType::Word]
|
14
|
+
include_examples 'parse', /a\Wc/, 1 => [:type, :nonword, CharacterType::NonWord]
|
17
15
|
|
18
|
-
|
19
|
-
|
20
|
-
root = RP.parse(pattern, 'ruby/1.9')
|
21
|
-
exp = root.expressions.at(index)
|
22
|
-
|
23
|
-
expect(exp).to be_a(klass)
|
24
|
-
|
25
|
-
expect(exp.type).to eq type
|
26
|
-
expect(exp.token).to eq token
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
tests_2_0 = { 'a\\Rc' => [1, :type, :linebreak, CharacterType::Linebreak], 'a\\Xc' => [1, :type, :xgrapheme, CharacterType::ExtendedGrapheme] }
|
31
|
-
|
32
|
-
tests_2_0.each_with_index do |(pattern, (index, type, token, klass)), count|
|
33
|
-
specify("parse_type_#{token}_#{count}") do
|
34
|
-
root = RP.parse(pattern, 'ruby/2.0')
|
35
|
-
exp = root.expressions.at(index)
|
36
|
-
|
37
|
-
expect(exp).to be_a(klass)
|
38
|
-
|
39
|
-
expect(exp.type).to eq type
|
40
|
-
expect(exp.token).to eq token
|
41
|
-
end
|
42
|
-
end
|
16
|
+
include_examples 'parse', 'a\\Rc', 1 => [:type, :linebreak, CharacterType::Linebreak]
|
17
|
+
include_examples 'parse', 'a\\Xc', 1 => [:type, :xgrapheme, CharacterType::ExtendedGrapheme]
|
43
18
|
end
|
@@ -1,36 +1,21 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe('Anchor scanning') do
|
4
|
-
|
5
|
-
|
6
|
-
'abc$' => [1, :anchor, :eol, '$', 3, 4],
|
4
|
+
include_examples 'scan', '^abc', 0 => [:anchor, :bol, '^', 0, 1]
|
5
|
+
include_examples 'scan', 'abc$', 1 => [:anchor, :eol, '$', 3, 4]
|
7
6
|
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
include_examples 'scan', '\Aabc', 0 => [:anchor, :bos, '\A', 0, 2]
|
8
|
+
include_examples 'scan', 'abc\z', 1 => [:anchor, :eos, '\z', 3, 5]
|
9
|
+
include_examples 'scan', 'abc\Z', 1 => [:anchor, :eos_ob_eol, '\Z', 3, 5]
|
11
10
|
|
12
|
-
|
13
|
-
|
11
|
+
include_examples 'scan', 'a\bc', 1 => [:anchor, :word_boundary, '\b', 1, 3]
|
12
|
+
include_examples 'scan', 'a\Bc', 1 => [:anchor, :nonword_boundary, '\B', 1, 3]
|
14
13
|
|
15
|
-
|
14
|
+
include_examples 'scan', 'a\Gc', 1 => [:anchor, :match_start, '\G', 1, 3]
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
}
|
23
|
-
|
24
|
-
tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
|
25
|
-
specify("scanner_#{type}_#{token}_#{count}") do
|
26
|
-
tokens = RS.scan(pattern)
|
27
|
-
result = tokens[index]
|
28
|
-
|
29
|
-
expect(result[0]).to eq type
|
30
|
-
expect(result[1]).to eq token
|
31
|
-
expect(result[2]).to eq text
|
32
|
-
expect(result[3]).to eq ts
|
33
|
-
expect(result[4]).to eq te
|
34
|
-
end
|
35
|
-
end
|
16
|
+
include_examples 'scan', "\\\\Ac", 0 => [:escape, :backslash, '\\\\', 0, 2]
|
17
|
+
include_examples 'scan', "a\\\\z", 1 => [:escape, :backslash, '\\\\', 1, 3]
|
18
|
+
include_examples 'scan', "a\\\\Z", 1 => [:escape, :backslash, '\\\\', 1, 3]
|
19
|
+
include_examples 'scan', "a\\\\bc", 1 => [:escape, :backslash, '\\\\', 1, 3]
|
20
|
+
include_examples 'scan', "a\\\\Bc", 1 => [:escape, :backslash, '\\\\', 1, 3]
|
36
21
|
end
|
@@ -1,180 +1,128 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe('Conditional scanning') do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
"(?'N'A)(?('N')T|F)2" => [5, :conditional, :condition, "'N'", 10, 13]
|
18
|
-
}
|
4
|
+
include_examples 'scan', /(a)(?(1)T|F)1/, 3 => [:conditional, :open, '(?', 3, 5]
|
5
|
+
include_examples 'scan', /(a)(?(1)T|F)2/, 4 => [:conditional, :condition_open, '(', 5, 6]
|
6
|
+
include_examples 'scan', /(a)(?(1)T|F)3/, 5 => [:conditional, :condition, '1', 6, 7]
|
7
|
+
include_examples 'scan', /(a)(?(1)T|F)4/, 6 => [:conditional, :condition_close, ')', 7, 8]
|
8
|
+
include_examples 'scan', /(a)(?(1)T|F)5/, 7 => [:literal, :literal, 'T', 8, 9]
|
9
|
+
include_examples 'scan', /(a)(?(1)T|F)6/, 8 => [:conditional, :separator, '|', 9, 10]
|
10
|
+
include_examples 'scan', /(a)(?(1)T|F)7/, 9 => [:literal, :literal, 'F', 10, 11]
|
11
|
+
include_examples 'scan', /(a)(?(1)T|F)8/, 10 => [:conditional, :close, ')', 11, 12]
|
12
|
+
include_examples 'scan', /(a)(?(1)TRUE)9/, 8 => [:conditional, :close, ')', 12, 13]
|
13
|
+
include_examples 'scan', /(a)(?(1)TRUE|)10/, 8 => [:conditional, :separator, '|', 12, 13]
|
14
|
+
include_examples 'scan', /(a)(?(1)TRUE|)11/, 9 => [:conditional, :close, ')', 13, 14]
|
15
|
+
include_examples 'scan', /(?<N>A)(?(<N>)T|F)1/, 5 => [:conditional, :condition, '<N>', 10, 13]
|
16
|
+
include_examples 'scan', /(?'N'A)(?('N')T|F)2/, 5 => [:conditional, :condition, "'N'", 10, 13]
|
19
17
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
18
|
+
include_examples 'scan', /(a(b(c)))(?(1)(?(2)d|(?(3)e|f))|(?(2)(?(1)g|h)))/,
|
19
|
+
0 => [:group, :capture, '(', 0, 1],
|
20
|
+
1 => [:literal, :literal, 'a', 1, 2],
|
21
|
+
2 => [:group, :capture, '(', 2, 3],
|
22
|
+
3 => [:literal, :literal, 'b', 3, 4],
|
23
|
+
4 => [:group, :capture, '(', 4, 5],
|
24
|
+
5 => [:literal, :literal, 'c', 5, 6],
|
25
|
+
6 => [:group, :close, ')', 6, 7],
|
26
|
+
7 => [:group, :close, ')', 7, 8],
|
27
|
+
8 => [:group, :close, ')', 8, 9],
|
28
|
+
9 => [:conditional, :open, '(?', 9, 11],
|
29
|
+
10 => [:conditional, :condition_open, '(', 11, 12],
|
30
|
+
11 => [:conditional, :condition, '1', 12, 13],
|
31
|
+
12 => [:conditional, :condition_close, ')', 13, 14],
|
32
|
+
13 => [:conditional, :open, '(?', 14, 16],
|
33
|
+
14 => [:conditional, :condition_open, '(', 16, 17],
|
34
|
+
15 => [:conditional, :condition, '2', 17, 18],
|
35
|
+
16 => [:conditional, :condition_close, ')', 18, 19],
|
36
|
+
17 => [:literal, :literal, 'd', 19, 20],
|
37
|
+
18 => [:conditional, :separator, '|', 20, 21],
|
38
|
+
19 => [:conditional, :open, '(?', 21, 23],
|
39
|
+
20 => [:conditional, :condition_open, '(', 23, 24],
|
40
|
+
21 => [:conditional, :condition, '3', 24, 25],
|
41
|
+
22 => [:conditional, :condition_close, ')', 25, 26],
|
42
|
+
23 => [:literal, :literal, 'e', 26, 27],
|
43
|
+
24 => [:conditional, :separator, '|', 27, 28],
|
44
|
+
25 => [:literal, :literal, 'f', 28, 29],
|
45
|
+
26 => [:conditional, :close, ')', 29, 30],
|
46
|
+
27 => [:conditional, :close, ')', 30, 31],
|
47
|
+
28 => [:conditional, :separator, '|', 31, 32],
|
48
|
+
29 => [:conditional, :open, '(?', 32, 34],
|
49
|
+
30 => [:conditional, :condition_open, '(', 34, 35],
|
50
|
+
31 => [:conditional, :condition, '2', 35, 36],
|
51
|
+
32 => [:conditional, :condition_close, ')', 36, 37],
|
52
|
+
33 => [:conditional, :open, '(?', 37, 39],
|
53
|
+
34 => [:conditional, :condition_open, '(', 39, 40],
|
54
|
+
35 => [:conditional, :condition, '1', 40, 41],
|
55
|
+
36 => [:conditional, :condition_close, ')', 41, 42],
|
56
|
+
37 => [:literal, :literal, 'g', 42, 43],
|
57
|
+
38 => [:conditional, :separator, '|', 43, 44],
|
58
|
+
39 => [:literal, :literal, 'h', 44, 45],
|
59
|
+
40 => [:conditional, :close, ')', 45, 46],
|
60
|
+
41 => [:conditional, :close, ')', 46, 47],
|
61
|
+
42 => [:conditional, :close, ')', 47, 48]
|
24
62
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
63
|
+
include_examples 'scan', /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/,
|
64
|
+
0 => [:group, :capture, '(', 0, 1],
|
65
|
+
1 => [:group, :capture, '(', 1, 2],
|
66
|
+
2 => [:literal, :literal, 'a', 2, 3],
|
67
|
+
3 => [:group, :close, ')', 3, 4],
|
68
|
+
4 => [:meta, :alternation, '|', 4, 5],
|
69
|
+
5 => [:group, :capture, '(', 5, 6],
|
70
|
+
6 => [:literal, :literal, 'b', 6, 7],
|
71
|
+
7 => [:group, :close, ')', 7, 8],
|
72
|
+
8 => [:meta, :alternation, '|', 8, 9],
|
73
|
+
9 => [:group, :capture, '(', 9, 10],
|
74
|
+
10 => [:conditional, :open, '(?', 10, 12],
|
75
|
+
11 => [:conditional, :condition_open, '(', 12, 13],
|
76
|
+
12 => [:conditional, :condition, '2', 13, 14],
|
77
|
+
13 => [:conditional, :condition_close, ')', 14, 15],
|
78
|
+
14 => [:group, :capture, '(', 15, 16],
|
79
|
+
15 => [:literal, :literal, 'c', 16, 17],
|
80
|
+
16 => [:group, :capture, '(', 17, 18],
|
81
|
+
17 => [:literal, :literal, 'd', 18, 19],
|
82
|
+
18 => [:meta, :alternation, '|', 19, 20],
|
83
|
+
19 => [:literal, :literal, 'e', 20, 21],
|
84
|
+
20 => [:group, :close, ')', 21, 22],
|
85
|
+
21 => [:quantifier, :one_or_more, '+', 22, 23],
|
86
|
+
22 => [:group, :close, ')', 23, 24],
|
87
|
+
23 => [:quantifier, :zero_or_one, '?', 24, 25],
|
88
|
+
24 => [:conditional, :separator, '|', 25, 26],
|
89
|
+
25 => [:conditional, :open, '(?', 26, 28],
|
90
|
+
26 => [:conditional, :condition_open, '(', 28, 29],
|
91
|
+
27 => [:conditional, :condition, '3', 29, 30],
|
92
|
+
28 => [:conditional, :condition_close, ')', 30, 31],
|
93
|
+
29 => [:literal, :literal, 'f', 31, 32],
|
94
|
+
30 => [:conditional, :separator, '|', 32, 33],
|
95
|
+
31 => [:conditional, :open, '(?', 33, 35],
|
96
|
+
32 => [:conditional, :condition_open, '(', 35, 36],
|
97
|
+
33 => [:conditional, :condition, '4', 36, 37],
|
98
|
+
34 => [:conditional, :condition_close, ')', 37, 38],
|
99
|
+
35 => [:group, :capture, '(', 38, 39],
|
100
|
+
36 => [:literal, :literal, 'g', 39, 40],
|
101
|
+
37 => [:meta, :alternation, '|', 40, 41],
|
102
|
+
38 => [:group, :capture, '(', 41, 42],
|
103
|
+
39 => [:literal, :literal, 'h', 42, 43],
|
104
|
+
40 => [:group, :close, ')', 43, 44],
|
105
|
+
41 => [:group, :capture, '(', 44, 45],
|
106
|
+
42 => [:literal, :literal, 'i', 45, 46],
|
107
|
+
43 => [:group, :close, ')', 46, 47],
|
108
|
+
44 => [:group, :close, ')', 47, 48],
|
109
|
+
45 => [:conditional, :close, ')', 48, 49],
|
110
|
+
46 => [:conditional, :close, ')', 49, 50],
|
111
|
+
47 => [:conditional, :close, ')', 50, 51],
|
112
|
+
48 => [:group, :close, ')', 51, 52],
|
113
|
+
49 => [:group, :close, ')', 52, 53]
|
32
114
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
[
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
[ 8, :group, :close, ')', 8, 9],
|
47
|
-
[ 9, :conditional, :open, '(?', 9, 11],
|
48
|
-
[10, :conditional, :condition_open, '(', 11, 12],
|
49
|
-
[11, :conditional, :condition, '1', 12, 13],
|
50
|
-
[12, :conditional, :condition_close, ')', 13, 14],
|
51
|
-
[13, :conditional, :open, '(?', 14, 16],
|
52
|
-
[14, :conditional, :condition_open, '(', 16, 17],
|
53
|
-
[15, :conditional, :condition, '2', 17, 18],
|
54
|
-
[16, :conditional, :condition_close, ')', 18, 19],
|
55
|
-
[17, :literal, :literal, 'd', 19, 20],
|
56
|
-
[18, :conditional, :separator, '|', 20, 21],
|
57
|
-
[19, :conditional, :open, '(?', 21, 23],
|
58
|
-
[20, :conditional, :condition_open, '(', 23, 24],
|
59
|
-
[21, :conditional, :condition, '3', 24, 25],
|
60
|
-
[22, :conditional, :condition_close, ')', 25, 26],
|
61
|
-
[23, :literal, :literal, 'e', 26, 27],
|
62
|
-
[24, :conditional, :separator, '|', 27, 28],
|
63
|
-
[25, :literal, :literal, 'f', 28, 29],
|
64
|
-
[26, :conditional, :close, ')', 29, 30],
|
65
|
-
[27, :conditional, :close, ')', 30, 31],
|
66
|
-
[28, :conditional, :separator, '|', 31, 32],
|
67
|
-
[29, :conditional, :open, '(?', 32, 34],
|
68
|
-
[30, :conditional, :condition_open, '(', 34, 35],
|
69
|
-
[31, :conditional, :condition, '2', 35, 36],
|
70
|
-
[32, :conditional, :condition_close, ')', 36, 37],
|
71
|
-
[33, :conditional, :open, '(?', 37, 39],
|
72
|
-
[34, :conditional, :condition_open, '(', 39, 40],
|
73
|
-
[35, :conditional, :condition, '1', 40, 41],
|
74
|
-
[36, :conditional, :condition_close, ')', 41, 42],
|
75
|
-
[37, :literal, :literal, 'g', 42, 43],
|
76
|
-
[38, :conditional, :separator, '|', 43, 44],
|
77
|
-
[39, :literal, :literal, 'h', 44, 45],
|
78
|
-
[40, :conditional, :close, ')', 45, 46],
|
79
|
-
[41, :conditional, :close, ')', 46, 47],
|
80
|
-
[42, :conditional, :close, ')', 47, 48]
|
81
|
-
].each do |index, type, token, text, ts, te|
|
82
|
-
result = tokens[index]
|
83
|
-
|
84
|
-
expect(result[0]).to eq type
|
85
|
-
expect(result[1]).to eq token
|
86
|
-
expect(result[2]).to eq text
|
87
|
-
expect(result[3]).to eq ts
|
88
|
-
expect(result[4]).to eq te
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
specify('scan conditional nested groups') do
|
93
|
-
regexp = '((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))'
|
94
|
-
tokens = RS.scan(regexp)
|
95
|
-
|
96
|
-
[
|
97
|
-
[ 0, :group, :capture, '(', 0, 1],
|
98
|
-
[ 1, :group, :capture, '(', 1, 2],
|
99
|
-
[ 2, :literal, :literal, 'a', 2, 3],
|
100
|
-
[ 3, :group, :close, ')', 3, 4],
|
101
|
-
[ 4, :meta, :alternation, '|', 4, 5],
|
102
|
-
[ 5, :group, :capture, '(', 5, 6],
|
103
|
-
[ 6, :literal, :literal, 'b', 6, 7],
|
104
|
-
[ 7, :group, :close, ')', 7, 8],
|
105
|
-
[ 8, :meta, :alternation, '|', 8, 9],
|
106
|
-
[ 9, :group, :capture, '(', 9, 10],
|
107
|
-
[10, :conditional, :open, '(?', 10, 12],
|
108
|
-
[11, :conditional, :condition_open, '(', 12, 13],
|
109
|
-
[12, :conditional, :condition, '2', 13, 14],
|
110
|
-
[13, :conditional, :condition_close, ')', 14, 15],
|
111
|
-
[14, :group, :capture, '(', 15, 16],
|
112
|
-
[15, :literal, :literal, 'c', 16, 17],
|
113
|
-
[16, :group, :capture, '(', 17, 18],
|
114
|
-
[17, :literal, :literal, 'd', 18, 19],
|
115
|
-
[18, :meta, :alternation, '|', 19, 20],
|
116
|
-
[19, :literal, :literal, 'e', 20, 21],
|
117
|
-
[20, :group, :close, ')', 21, 22],
|
118
|
-
[21, :quantifier, :one_or_more, '+', 22, 23],
|
119
|
-
[22, :group, :close, ')', 23, 24],
|
120
|
-
[23, :quantifier, :zero_or_one, '?', 24, 25],
|
121
|
-
[24, :conditional, :separator, '|', 25, 26],
|
122
|
-
[25, :conditional, :open, '(?', 26, 28],
|
123
|
-
[26, :conditional, :condition_open, '(', 28, 29],
|
124
|
-
[27, :conditional, :condition, '3', 29, 30],
|
125
|
-
[28, :conditional, :condition_close, ')', 30, 31],
|
126
|
-
[29, :literal, :literal, 'f', 31, 32],
|
127
|
-
[30, :conditional, :separator, '|', 32, 33],
|
128
|
-
[31, :conditional, :open, '(?', 33, 35],
|
129
|
-
[32, :conditional, :condition_open, '(', 35, 36],
|
130
|
-
[33, :conditional, :condition, '4', 36, 37],
|
131
|
-
[34, :conditional, :condition_close, ')', 37, 38],
|
132
|
-
[35, :group, :capture, '(', 38, 39],
|
133
|
-
[36, :literal, :literal, 'g', 39, 40],
|
134
|
-
[37, :meta, :alternation, '|', 40, 41],
|
135
|
-
[38, :group, :capture, '(', 41, 42],
|
136
|
-
[39, :literal, :literal, 'h', 42, 43],
|
137
|
-
[40, :group, :close, ')', 43, 44],
|
138
|
-
[41, :group, :capture, '(', 44, 45],
|
139
|
-
[42, :literal, :literal, 'i', 45, 46],
|
140
|
-
[43, :group, :close, ')', 46, 47],
|
141
|
-
[44, :group, :close, ')', 47, 48],
|
142
|
-
[45, :conditional, :close, ')', 48, 49],
|
143
|
-
[46, :conditional, :close, ')', 49, 50],
|
144
|
-
[47, :conditional, :close, ')', 50, 51],
|
145
|
-
[48, :group, :close, ')', 51, 52],
|
146
|
-
[49, :group, :close, ')', 52, 53]
|
147
|
-
].each do |index, type, token, text, ts, te|
|
148
|
-
result = tokens[index]
|
149
|
-
|
150
|
-
expect(result[0]).to eq type
|
151
|
-
expect(result[1]).to eq token
|
152
|
-
expect(result[2]).to eq text
|
153
|
-
expect(result[3]).to eq ts
|
154
|
-
expect(result[4]).to eq te
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
specify('scan conditional nested alternation') do
|
159
|
-
regexp = '(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p'
|
160
|
-
tokens = RS.scan(regexp)
|
161
|
-
|
162
|
-
[9, 11, 17, 19, 32, 34, 40, 42, 46, 48].each do |index|
|
163
|
-
result = tokens[index]
|
164
|
-
|
165
|
-
expect(result[0]).to eq :meta
|
166
|
-
expect(result[1]).to eq :alternation
|
167
|
-
expect(result[2]).to eq '|'
|
168
|
-
expect((result[4] - result[3])).to eq 1
|
169
|
-
end
|
170
|
-
|
171
|
-
[14, 37].each do |index|
|
172
|
-
result = tokens[index]
|
173
|
-
|
174
|
-
expect(result[0]).to eq :conditional
|
175
|
-
expect(result[1]).to eq :separator
|
176
|
-
expect(result[2]).to eq '|'
|
177
|
-
expect((result[4] - result[3])).to eq 1
|
178
|
-
end
|
179
|
-
end
|
115
|
+
include_examples 'scan', /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/,
|
116
|
+
9 => [:meta, :alternation, '|', 10, 11],
|
117
|
+
11 => [:meta, :alternation, '|', 12, 13],
|
118
|
+
14 => [:conditional, :separator, '|', 15, 16],
|
119
|
+
17 => [:meta, :alternation, '|', 18, 19],
|
120
|
+
19 => [:meta, :alternation, '|', 20, 21],
|
121
|
+
32 => [:meta, :alternation, '|', 34, 35],
|
122
|
+
34 => [:meta, :alternation, '|', 36, 37],
|
123
|
+
37 => [:conditional, :separator, '|', 39, 40],
|
124
|
+
40 => [:meta, :alternation, '|', 42, 43],
|
125
|
+
42 => [:meta, :alternation, '|', 44, 45],
|
126
|
+
46 => [:meta, :alternation, '|', 48, 49],
|
127
|
+
48 => [:meta, :alternation, '|', 50, 51]
|
180
128
|
end
|