regexp_parser 0.5.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +242 -0
- data/Gemfile +1 -0
- data/README.md +21 -17
- data/Rakefile +31 -0
- data/lib/regexp_parser/expression.rb +11 -9
- data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
- data/lib/regexp_parser/expression/classes/backref.rb +21 -16
- data/lib/regexp_parser/expression/classes/escape.rb +81 -10
- data/lib/regexp_parser/expression/classes/group.rb +20 -20
- data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
- data/lib/regexp_parser/expression/classes/property.rb +6 -0
- data/lib/regexp_parser/expression/classes/set.rb +10 -93
- data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
- data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
- data/lib/regexp_parser/expression/methods/tests.rb +4 -14
- data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +3 -4
- data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
- data/lib/regexp_parser/expression/subexpression.rb +6 -10
- data/lib/regexp_parser/lexer.rb +13 -17
- data/lib/regexp_parser/parser.rb +170 -116
- data/lib/regexp_parser/scanner.rb +952 -2431
- data/lib/regexp_parser/scanner/char_type.rl +31 -0
- data/lib/regexp_parser/scanner/properties/long.yml +561 -0
- data/lib/regexp_parser/scanner/properties/short.yml +225 -0
- data/lib/regexp_parser/scanner/property.rl +7 -806
- data/lib/regexp_parser/scanner/scanner.rl +112 -154
- data/lib/regexp_parser/syntax/base.rb +4 -4
- data/lib/regexp_parser/syntax/tokens.rb +1 -0
- data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
- data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
- data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
- data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
- data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +2 -1
- data/test/expression/test_base.rb +2 -1
- data/test/expression/test_clone.rb +0 -57
- data/test/expression/test_set.rb +31 -8
- data/test/expression/test_strfregexp.rb +13 -4
- data/test/expression/test_subexpression.rb +25 -0
- data/test/expression/test_traverse.rb +25 -25
- data/test/helpers.rb +1 -0
- data/test/lexer/test_all.rb +1 -1
- data/test/lexer/test_conditionals.rb +9 -7
- data/test/lexer/test_nesting.rb +39 -21
- data/test/lexer/test_refcalls.rb +4 -4
- data/test/parser/set/test_intersections.rb +127 -0
- data/test/parser/set/test_ranges.rb +111 -0
- data/test/parser/test_all.rb +4 -1
- data/test/parser/test_escapes.rb +41 -9
- data/test/parser/test_groups.rb +22 -3
- data/test/parser/test_posix_classes.rb +27 -0
- data/test/parser/test_properties.rb +17 -290
- data/test/parser/test_refcalls.rb +66 -26
- data/test/parser/test_sets.rb +132 -129
- data/test/scanner/test_all.rb +1 -7
- data/test/scanner/test_conditionals.rb +16 -16
- data/test/scanner/test_errors.rb +0 -30
- data/test/scanner/test_escapes.rb +1 -2
- data/test/scanner/test_free_space.rb +28 -28
- data/test/scanner/test_groups.rb +35 -35
- data/test/scanner/test_meta.rb +1 -1
- data/test/scanner/test_properties.rb +87 -114
- data/test/scanner/test_refcalls.rb +18 -18
- data/test/scanner/test_scripts.rb +19 -351
- data/test/scanner/test_sets.rb +87 -60
- data/test/scanner/test_unicode_blocks.rb +4 -105
- data/test/support/warning_extractor.rb +1 -1
- data/test/syntax/test_syntax.rb +7 -0
- data/test/syntax/versions/test_1.8.rb +2 -4
- metadata +17 -7
- data/ChangeLog +0 -325
- data/test/scanner/test_emojis.rb +0 -31
data/test/helpers.rb
CHANGED
data/test/lexer/test_all.rb
CHANGED
@@ -43,15 +43,17 @@ class LexerConditionals < Test::Unit::TestCase
|
|
43
43
|
[11, :conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
|
44
44
|
|
45
45
|
[12, :set, :open, '[', 30, 31, 3, 0, 2],
|
46
|
-
[13, :
|
47
|
-
[14, :set, :
|
46
|
+
[13, :literal, :literal, 'e', 31, 32, 3, 1, 2],
|
47
|
+
[14, :set, :range, '-', 32, 33, 3, 1, 2],
|
48
|
+
[15, :literal, :literal, 'g', 33, 34, 3, 1, 2],
|
49
|
+
[16, :set, :close, ']', 34, 35, 3, 0, 2],
|
48
50
|
|
49
|
-
[
|
50
|
-
[
|
51
|
-
[
|
51
|
+
[17, :conditional, :separator, '|', 35, 36, 3, 0, 2],
|
52
|
+
[23, :conditional, :close, ')', 41, 42, 3, 0, 1],
|
53
|
+
[25, :conditional, :close, ')', 43, 44, 2, 0, 0],
|
52
54
|
|
53
|
-
[
|
54
|
-
[
|
55
|
+
[26, :group, :close, ')', 44, 45, 1, 0, 0],
|
56
|
+
[27, :group, :close, ')', 45, 46, 0, 0, 0]
|
55
57
|
].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
|
56
58
|
struct = tokens.at(index)
|
57
59
|
|
data/test/lexer/test_nesting.rb
CHANGED
@@ -62,38 +62,56 @@ class LexerNesting < Test::Unit::TestCase
|
|
62
62
|
|
63
63
|
'a[b-e]f' => {
|
64
64
|
1 => [:set, :open, '[', 1, 2, 0, 0, 0],
|
65
|
-
2 => [:
|
66
|
-
3 => [:set, :
|
65
|
+
2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
|
66
|
+
3 => [:set, :range, '-', 3, 4, 0, 1, 0],
|
67
|
+
4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
|
68
|
+
5 => [:set, :close, ']', 5, 6, 0, 0, 0],
|
67
69
|
},
|
68
70
|
|
69
|
-
'[
|
71
|
+
'[[:word:]&&[^c]z]' => {
|
70
72
|
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
73
|
+
1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
|
74
|
+
2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
|
75
|
+
3 => [:set, :open, '[', 11, 12, 0, 1, 0],
|
76
|
+
4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
|
77
|
+
5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
|
78
|
+
6 => [:set, :close, ']', 14, 15, 0, 1, 0],
|
79
|
+
7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
|
80
|
+
8 => [:set, :close, ']', 16, 17, 0, 0, 0],
|
81
|
+
},
|
82
|
+
|
83
|
+
'[\p{word}&&[^c]z]' => {
|
84
|
+
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
85
|
+
1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
|
86
|
+
2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
|
87
|
+
3 => [:set, :open, '[', 11, 12, 0, 1, 0],
|
88
|
+
4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
|
89
|
+
5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
|
90
|
+
6 => [:set, :close, ']', 14, 15, 0, 1, 0],
|
91
|
+
7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
|
92
|
+
8 => [:set, :close, ']', 16, 17, 0, 0, 0],
|
77
93
|
},
|
78
94
|
|
79
95
|
'[a[b[c[d-g]]]]' => {
|
80
96
|
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
81
|
-
1 => [:
|
82
|
-
2 => [:
|
83
|
-
3 => [:
|
84
|
-
4 => [:
|
85
|
-
5 => [:
|
86
|
-
6 => [:
|
87
|
-
7 => [:
|
88
|
-
8 => [:
|
89
|
-
9 => [:
|
90
|
-
|
91
|
-
|
97
|
+
1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
|
98
|
+
2 => [:set, :open, '[', 2, 3, 0, 1, 0],
|
99
|
+
3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
|
100
|
+
4 => [:set, :open, '[', 4, 5, 0, 2, 0],
|
101
|
+
5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
|
102
|
+
6 => [:set, :open, '[', 6, 7, 0, 3, 0],
|
103
|
+
7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
|
104
|
+
8 => [:set, :range, '-', 8, 9, 0, 4, 0],
|
105
|
+
9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
|
106
|
+
10 => [:set, :close, ']', 10, 11, 0, 3, 0],
|
107
|
+
11 => [:set, :close, ']', 11, 12, 0, 2, 0],
|
108
|
+
12 => [:set, :close, ']', 12, 13, 0, 1, 0],
|
109
|
+
13 => [:set, :close, ']', 13, 14, 0, 0, 0],
|
92
110
|
},
|
93
111
|
}
|
94
112
|
|
95
113
|
tests.each_with_index do |(pattern, checks), count|
|
96
|
-
define_method "
|
114
|
+
define_method "test_lex_nesting_in_'#{pattern}'_#{count}" do
|
97
115
|
tokens = RL.lex(pattern, 'ruby/1.9')
|
98
116
|
|
99
117
|
checks.each do |offset, (type, token, text, ts, te, level, set_level, conditional_level)|
|
data/test/lexer/test_refcalls.rb
CHANGED
@@ -27,11 +27,11 @@ class LexerRefCalls < Test::Unit::TestCase
|
|
27
27
|
"(abc)\\g'-1'" => [3, :backref, :number_rel_call, "\\g'-1'", 5, 11, 0, 0, 0],
|
28
28
|
|
29
29
|
# Group back-references, with nesting level
|
30
|
-
'(?<X>abc)\k<X-0>' => [3, :backref, :
|
31
|
-
"(?<X>abc)\\k'X-0'" => [3, :backref, :
|
30
|
+
'(?<X>abc)\k<X-0>' => [3, :backref, :name_recursion_ref, '\k<X-0>', 9, 16, 0, 0, 0],
|
31
|
+
"(?<X>abc)\\k'X-0'" => [3, :backref, :name_recursion_ref, "\\k'X-0'", 9, 16, 0, 0, 0],
|
32
32
|
|
33
|
-
'(abc)\k<1-0>' => [3, :backref, :
|
34
|
-
"(abc)\\k'1-0'" => [3, :backref, :
|
33
|
+
'(abc)\k<1-0>' => [3, :backref, :number_recursion_ref, '\k<1-0>', 5, 12, 0, 0, 0],
|
34
|
+
"(abc)\\k'1-0'" => [3, :backref, :number_recursion_ref, "\\k'1-0'", 5, 12, 0, 0, 0],
|
35
35
|
}
|
36
36
|
|
37
37
|
tests.each_with_index do |(pattern, (index, type, token, text, ts, te, level, set_level, conditional_level)), count|
|
@@ -0,0 +1,127 @@
|
|
1
|
+
require File.expand_path('../../../helpers', __FILE__)
|
2
|
+
|
3
|
+
# edge cases with `...-&&...` and `...&&-...` are checked in test_ranges.rb
|
4
|
+
|
5
|
+
class ParserSetIntersections < Test::Unit::TestCase
|
6
|
+
def test_parse_set_intersection
|
7
|
+
root = RP.parse('[a&&z]')
|
8
|
+
set = root[0]
|
9
|
+
ints = set[0]
|
10
|
+
|
11
|
+
assert_equal 1, set.count
|
12
|
+
assert_equal CharacterSet::Intersection, ints.class
|
13
|
+
assert_equal 2, ints.count
|
14
|
+
|
15
|
+
seq1, seq2 = ints.expressions
|
16
|
+
assert_equal CharacterSet::IntersectedSequence, seq1.class
|
17
|
+
assert_equal 1, seq1.count
|
18
|
+
assert_equal 'a', seq1.first.to_s
|
19
|
+
assert_equal Literal, seq1.first.class
|
20
|
+
assert_equal CharacterSet::IntersectedSequence, seq2.class
|
21
|
+
assert_equal 1, seq2.count
|
22
|
+
assert_equal 'z', seq2.first.to_s
|
23
|
+
assert_equal Literal, seq2.first.class
|
24
|
+
|
25
|
+
refute set.matches?('a')
|
26
|
+
refute set.matches?('&')
|
27
|
+
refute set.matches?('z')
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_parse_set_intersection_range_and_subset
|
31
|
+
root = RP.parse('[a-z&&[^a]]')
|
32
|
+
set = root[0]
|
33
|
+
ints = set[0]
|
34
|
+
|
35
|
+
assert_equal 1, set.count
|
36
|
+
assert_equal CharacterSet::Intersection, ints.class
|
37
|
+
assert_equal 2, ints.count
|
38
|
+
|
39
|
+
seq1, seq2 = ints.expressions
|
40
|
+
assert_equal CharacterSet::IntersectedSequence, seq1.class
|
41
|
+
assert_equal 1, seq1.count
|
42
|
+
assert_equal 'a-z', seq1.first.to_s
|
43
|
+
assert_equal CharacterSet::Range, seq1.first.class
|
44
|
+
assert_equal CharacterSet::IntersectedSequence, seq2.class
|
45
|
+
assert_equal 1, seq2.count
|
46
|
+
assert_equal '[^a]', seq2.first.to_s
|
47
|
+
assert_equal CharacterSet, seq2.first.class
|
48
|
+
|
49
|
+
refute set.matches?('a')
|
50
|
+
refute set.matches?('&')
|
51
|
+
assert set.matches?('b')
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_parse_set_intersection_trailing_range
|
55
|
+
root = RP.parse('[a&&a-z]')
|
56
|
+
set = root[0]
|
57
|
+
ints = set[0]
|
58
|
+
|
59
|
+
assert_equal 1, set.count
|
60
|
+
assert_equal CharacterSet::Intersection, ints.class
|
61
|
+
assert_equal 2, ints.count
|
62
|
+
|
63
|
+
seq1, seq2 = ints.expressions
|
64
|
+
assert_equal CharacterSet::IntersectedSequence, seq1.class
|
65
|
+
assert_equal 1, seq1.count
|
66
|
+
assert_equal 'a', seq1.first.to_s
|
67
|
+
assert_equal Literal, seq1.first.class
|
68
|
+
assert_equal CharacterSet::IntersectedSequence, seq2.class
|
69
|
+
assert_equal 1, seq2.count
|
70
|
+
assert_equal 'a-z', seq2.first.to_s
|
71
|
+
assert_equal CharacterSet::Range, seq2.first.class
|
72
|
+
|
73
|
+
assert set.matches?('a')
|
74
|
+
refute set.matches?('&')
|
75
|
+
refute set.matches?('b')
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_parse_set_intersection_type
|
79
|
+
root = RP.parse('[a&&\w]')
|
80
|
+
set = root[0]
|
81
|
+
ints = set[0]
|
82
|
+
|
83
|
+
assert_equal 1, set.count
|
84
|
+
assert_equal CharacterSet::Intersection, ints.class
|
85
|
+
assert_equal 2, ints.count
|
86
|
+
|
87
|
+
seq1, seq2 = ints.expressions
|
88
|
+
assert_equal CharacterSet::IntersectedSequence, seq1.class
|
89
|
+
assert_equal 1, seq1.count
|
90
|
+
assert_equal 'a', seq1.first.to_s
|
91
|
+
assert_equal Literal, seq1.first.class
|
92
|
+
assert_equal CharacterSet::IntersectedSequence, seq2.class
|
93
|
+
assert_equal 1, seq2.count
|
94
|
+
assert_equal '\w', seq2.first.to_s
|
95
|
+
assert_equal CharacterType::Word, seq2.first.class
|
96
|
+
|
97
|
+
assert set.matches?('a')
|
98
|
+
refute set.matches?('&')
|
99
|
+
refute set.matches?('b')
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_parse_set_intersection_multipart
|
103
|
+
root = RP.parse('[\h&&\w&&efg]')
|
104
|
+
set = root[0]
|
105
|
+
ints = set[0]
|
106
|
+
|
107
|
+
assert_equal 1, set.count
|
108
|
+
assert_equal CharacterSet::Intersection, ints.class
|
109
|
+
assert_equal 3, ints.count
|
110
|
+
|
111
|
+
seq1, seq2, seq3 = ints.expressions
|
112
|
+
assert_equal CharacterSet::IntersectedSequence, seq1.class
|
113
|
+
assert_equal 1, seq1.count
|
114
|
+
assert_equal '\h', seq1.first.to_s
|
115
|
+
assert_equal CharacterSet::IntersectedSequence, seq2.class
|
116
|
+
assert_equal 1, seq2.count
|
117
|
+
assert_equal '\w', seq2.first.to_s
|
118
|
+
assert_equal CharacterSet::IntersectedSequence, seq3.class
|
119
|
+
assert_equal 3, seq3.count
|
120
|
+
assert_equal 'efg', seq3.to_s
|
121
|
+
|
122
|
+
assert set.matches?('e')
|
123
|
+
assert set.matches?('f')
|
124
|
+
refute set.matches?('a')
|
125
|
+
refute set.matches?('g')
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require File.expand_path('../../../helpers', __FILE__)
|
2
|
+
|
3
|
+
class ParserSetRangs < Test::Unit::TestCase
|
4
|
+
def test_parse_set_range
|
5
|
+
root = RP.parse('[a-z]')
|
6
|
+
set = root[0]
|
7
|
+
range = set[0]
|
8
|
+
|
9
|
+
assert_equal 1, set.count
|
10
|
+
assert_equal CharacterSet::Range, range.class
|
11
|
+
assert_equal 2, range.count
|
12
|
+
assert_equal 'a', range.first.to_s
|
13
|
+
assert_equal Literal, range.first.class
|
14
|
+
assert_equal 'z', range.last.to_s
|
15
|
+
assert_equal Literal, range.last.class
|
16
|
+
assert set.matches?('m')
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_parse_set_range_hex
|
20
|
+
root = RP.parse('[\x00-\x99]')
|
21
|
+
set = root[0]
|
22
|
+
range = set[0]
|
23
|
+
|
24
|
+
assert_equal 1, set.count
|
25
|
+
assert_equal CharacterSet::Range, range.class
|
26
|
+
assert_equal 2, range.count
|
27
|
+
assert_equal '\x00', range.first.to_s
|
28
|
+
assert_equal EscapeSequence::Hex, range.first.class
|
29
|
+
assert_equal '\x99', range.last.to_s
|
30
|
+
assert_equal EscapeSequence::Hex, range.last.class
|
31
|
+
assert set.matches?('\x50')
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_parse_set_range_unicode
|
35
|
+
root = RP.parse('[\u{40 42}-\u1234]')
|
36
|
+
set = root[0]
|
37
|
+
range = set[0]
|
38
|
+
|
39
|
+
assert_equal 1, set.count
|
40
|
+
assert_equal CharacterSet::Range, range.class
|
41
|
+
assert_equal 2, range.count
|
42
|
+
assert_equal '\u{40 42}', range.first.to_s
|
43
|
+
assert_equal EscapeSequence::CodepointList, range.first.class
|
44
|
+
assert_equal '\u1234', range.last.to_s
|
45
|
+
assert_equal EscapeSequence::Codepoint, range.last.class
|
46
|
+
assert set.matches?('\u600')
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_parse_set_range_edge_case_leading_dash
|
50
|
+
root = RP.parse('[--z]')
|
51
|
+
set = root[0]
|
52
|
+
range = set[0]
|
53
|
+
|
54
|
+
assert_equal 1, set.count
|
55
|
+
assert_equal 2, range.count
|
56
|
+
assert set.matches?('a')
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_parse_set_range_edge_case_trailing_dash
|
60
|
+
root = RP.parse('[!--]')
|
61
|
+
set = root[0]
|
62
|
+
range = set[0]
|
63
|
+
|
64
|
+
assert_equal 1, set.count
|
65
|
+
assert_equal 2, range.count
|
66
|
+
assert set.matches?('$')
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_parse_set_range_edge_case_leading_negate
|
70
|
+
root = RP.parse('[^-z]')
|
71
|
+
set = root[0]
|
72
|
+
|
73
|
+
assert_equal 2, set.count
|
74
|
+
assert set.matches?('a')
|
75
|
+
refute set.matches?('z')
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_parse_set_range_edge_case_trailing_negate
|
79
|
+
root = RP.parse('[!-^]')
|
80
|
+
set = root[0]
|
81
|
+
range = set[0]
|
82
|
+
|
83
|
+
assert_equal 1, set.count
|
84
|
+
assert_equal 2, range.count
|
85
|
+
assert set.matches?('$')
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_parse_set_range_edge_case_leading_intersection
|
89
|
+
root = RP.parse('[[\-ab]&&-bc]')
|
90
|
+
set = root[0]
|
91
|
+
|
92
|
+
assert_equal 1, set.count
|
93
|
+
assert_equal '-bc', set.first.last.to_s
|
94
|
+
assert set.matches?('-')
|
95
|
+
assert set.matches?('b')
|
96
|
+
refute set.matches?('a')
|
97
|
+
refute set.matches?('c')
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_parse_set_range_edge_case_trailing_intersection
|
101
|
+
root = RP.parse('[bc-&&[\-ab]]')
|
102
|
+
set = root[0]
|
103
|
+
|
104
|
+
assert_equal 1, set.count
|
105
|
+
assert_equal 'bc-', set.first.first.to_s
|
106
|
+
assert set.matches?('-')
|
107
|
+
assert set.matches?('b')
|
108
|
+
refute set.matches?('a')
|
109
|
+
refute set.matches?('c')
|
110
|
+
end
|
111
|
+
end
|
data/test/parser/test_all.rb
CHANGED
@@ -2,11 +2,14 @@ require File.expand_path("../../helpers", __FILE__)
|
|
2
2
|
|
3
3
|
%w{
|
4
4
|
alternation anchors errors escapes free_space groups
|
5
|
-
properties quantifiers refcalls sets types
|
5
|
+
posix_classes properties quantifiers refcalls sets types
|
6
6
|
}.each do|tc|
|
7
7
|
require File.expand_path("../test_#{tc}", __FILE__)
|
8
8
|
end
|
9
9
|
|
10
|
+
require File.expand_path('../set/test_ranges.rb', __FILE__)
|
11
|
+
require File.expand_path('../set/test_intersections.rb', __FILE__)
|
12
|
+
|
10
13
|
if RUBY_VERSION >= '2.0.0'
|
11
14
|
%w{conditionals keep}.each do|tc|
|
12
15
|
require File.expand_path("../test_#{tc}", __FILE__)
|
data/test/parser/test_escapes.rb
CHANGED
@@ -11,10 +11,6 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
11
11
|
/a\tc/ => [1, :escape, :tab, EscapeSequence::Tab],
|
12
12
|
/a\vc/ => [1, :escape, :vertical_tab, EscapeSequence::VerticalTab],
|
13
13
|
|
14
|
-
# special cases
|
15
|
-
/a\bc/ => [1, :anchor, :word_boundary, Anchor::WordBoundary],
|
16
|
-
/a\sc/ => [1, :type, :space, CharacterType::Space],
|
17
|
-
|
18
14
|
# meta character escapes
|
19
15
|
/a\.c/ => [1, :escape, :dot, EscapeSequence::Literal],
|
20
16
|
/a\?c/ => [1, :escape, :zero_or_one, EscapeSequence::Literal],
|
@@ -27,14 +23,15 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
27
23
|
/a\}c/ => [1, :escape, :interval_close, EscapeSequence::Literal],
|
28
24
|
|
29
25
|
# unicode escapes
|
30
|
-
/a\u0640/ => [1, :escape, :codepoint, EscapeSequence::
|
31
|
-
/a\u{41 1F60D}/ => [1, :escape, :codepoint_list, EscapeSequence::
|
26
|
+
/a\u0640/ => [1, :escape, :codepoint, EscapeSequence::Codepoint],
|
27
|
+
/a\u{41 1F60D}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
|
28
|
+
/a\u{10FFFF}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
|
32
29
|
|
33
30
|
# hex escapes
|
34
|
-
/a\xFF/n => [1, :escape, :hex, EscapeSequence::
|
31
|
+
/a\xFF/n => [1, :escape, :hex, EscapeSequence::Hex],
|
35
32
|
|
36
33
|
# octal escapes
|
37
|
-
/a\177/n => [1, :escape, :octal, EscapeSequence::
|
34
|
+
/a\177/n => [1, :escape, :octal, EscapeSequence::Octal],
|
38
35
|
}
|
39
36
|
|
40
37
|
tests.each_with_index do |(pattern, (index, type, token, klass)), count|
|
@@ -50,11 +47,35 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
50
47
|
end
|
51
48
|
end
|
52
49
|
|
50
|
+
def test_parse_chars_and_codepoints
|
51
|
+
root = RP.parse(/\n\?\101\x42\u0043\u{44 45}/)
|
52
|
+
|
53
|
+
assert_equal "\n", root[0].char
|
54
|
+
assert_equal 10, root[0].codepoint
|
55
|
+
|
56
|
+
assert_equal "?", root[1].char
|
57
|
+
assert_equal 63, root[1].codepoint
|
58
|
+
|
59
|
+
assert_equal "A", root[2].char
|
60
|
+
assert_equal 65, root[2].codepoint
|
61
|
+
|
62
|
+
assert_equal "B", root[3].char
|
63
|
+
assert_equal 66, root[3].codepoint
|
64
|
+
|
65
|
+
assert_equal "C", root[4].char
|
66
|
+
assert_equal 67, root[4].codepoint
|
67
|
+
|
68
|
+
assert_equal ["D", "E"], root[5].chars
|
69
|
+
assert_equal [68, 69], root[5].codepoints
|
70
|
+
end
|
71
|
+
|
53
72
|
def test_parse_escape_control_sequence_lower
|
54
73
|
root = RP.parse(/a\\\c2b/)
|
55
74
|
|
56
75
|
assert_equal EscapeSequence::Control, root[2].class
|
57
76
|
assert_equal '\\c2', root[2].text
|
77
|
+
assert_equal "\u0012", root[2].char
|
78
|
+
assert_equal 18, root[2].codepoint
|
58
79
|
end
|
59
80
|
|
60
81
|
def test_parse_escape_control_sequence_upper
|
@@ -62,6 +83,8 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
62
83
|
|
63
84
|
assert_equal EscapeSequence::Control, root[2].class
|
64
85
|
assert_equal '\\C-C', root[2].text
|
86
|
+
assert_equal "\u0003", root[2].char
|
87
|
+
assert_equal 3, root[2].codepoint
|
65
88
|
end
|
66
89
|
|
67
90
|
def test_parse_escape_meta_sequence
|
@@ -69,6 +92,8 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
69
92
|
|
70
93
|
assert_equal EscapeSequence::Meta, root[2].class
|
71
94
|
assert_equal '\\M-Z', root[2].text
|
95
|
+
assert_equal "\u00DA", root[2].char
|
96
|
+
assert_equal 218, root[2].codepoint
|
72
97
|
end
|
73
98
|
|
74
99
|
def test_parse_escape_meta_control_sequence
|
@@ -76,6 +101,8 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
76
101
|
|
77
102
|
assert_equal EscapeSequence::MetaControl, root[2].class
|
78
103
|
assert_equal '\\M-\\C-X', root[2].text
|
104
|
+
assert_equal "\u0098", root[2].char
|
105
|
+
assert_equal 152, root[2].codepoint
|
79
106
|
end
|
80
107
|
|
81
108
|
def test_parse_lower_c_meta_control_sequence
|
@@ -83,6 +110,8 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
83
110
|
|
84
111
|
assert_equal EscapeSequence::MetaControl, root[2].class
|
85
112
|
assert_equal '\\M-\\cX', root[2].text
|
113
|
+
assert_equal "\u0098", root[2].char
|
114
|
+
assert_equal 152, root[2].codepoint
|
86
115
|
end
|
87
116
|
|
88
117
|
def test_parse_escape_reverse_meta_control_sequence
|
@@ -90,6 +119,8 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
90
119
|
|
91
120
|
assert_equal EscapeSequence::MetaControl, root[2].class
|
92
121
|
assert_equal '\\C-\\M-X', root[2].text
|
122
|
+
assert_equal "\u0098", root[2].char
|
123
|
+
assert_equal 152, root[2].codepoint
|
93
124
|
end
|
94
125
|
|
95
126
|
def test_parse_escape_reverse_lower_c_meta_control_sequence
|
@@ -97,6 +128,7 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
97
128
|
|
98
129
|
assert_equal EscapeSequence::MetaControl, root[2].class
|
99
130
|
assert_equal '\\c\\M-X', root[2].text
|
131
|
+
assert_equal "\u0098", root[2].char
|
132
|
+
assert_equal 152, root[2].codepoint
|
100
133
|
end
|
101
|
-
|
102
134
|
end
|