regexp_parser 0.5.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +242 -0
- data/Gemfile +1 -0
- data/README.md +21 -17
- data/Rakefile +31 -0
- data/lib/regexp_parser/expression.rb +11 -9
- data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
- data/lib/regexp_parser/expression/classes/backref.rb +21 -16
- data/lib/regexp_parser/expression/classes/escape.rb +81 -10
- data/lib/regexp_parser/expression/classes/group.rb +20 -20
- data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
- data/lib/regexp_parser/expression/classes/property.rb +6 -0
- data/lib/regexp_parser/expression/classes/set.rb +10 -93
- data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
- data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
- data/lib/regexp_parser/expression/methods/tests.rb +4 -14
- data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +3 -4
- data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
- data/lib/regexp_parser/expression/subexpression.rb +6 -10
- data/lib/regexp_parser/lexer.rb +13 -17
- data/lib/regexp_parser/parser.rb +170 -116
- data/lib/regexp_parser/scanner.rb +952 -2431
- data/lib/regexp_parser/scanner/char_type.rl +31 -0
- data/lib/regexp_parser/scanner/properties/long.yml +561 -0
- data/lib/regexp_parser/scanner/properties/short.yml +225 -0
- data/lib/regexp_parser/scanner/property.rl +7 -806
- data/lib/regexp_parser/scanner/scanner.rl +112 -154
- data/lib/regexp_parser/syntax/base.rb +4 -4
- data/lib/regexp_parser/syntax/tokens.rb +1 -0
- data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
- data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
- data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
- data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
- data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +2 -1
- data/test/expression/test_base.rb +2 -1
- data/test/expression/test_clone.rb +0 -57
- data/test/expression/test_set.rb +31 -8
- data/test/expression/test_strfregexp.rb +13 -4
- data/test/expression/test_subexpression.rb +25 -0
- data/test/expression/test_traverse.rb +25 -25
- data/test/helpers.rb +1 -0
- data/test/lexer/test_all.rb +1 -1
- data/test/lexer/test_conditionals.rb +9 -7
- data/test/lexer/test_nesting.rb +39 -21
- data/test/lexer/test_refcalls.rb +4 -4
- data/test/parser/set/test_intersections.rb +127 -0
- data/test/parser/set/test_ranges.rb +111 -0
- data/test/parser/test_all.rb +4 -1
- data/test/parser/test_escapes.rb +41 -9
- data/test/parser/test_groups.rb +22 -3
- data/test/parser/test_posix_classes.rb +27 -0
- data/test/parser/test_properties.rb +17 -290
- data/test/parser/test_refcalls.rb +66 -26
- data/test/parser/test_sets.rb +132 -129
- data/test/scanner/test_all.rb +1 -7
- data/test/scanner/test_conditionals.rb +16 -16
- data/test/scanner/test_errors.rb +0 -30
- data/test/scanner/test_escapes.rb +1 -2
- data/test/scanner/test_free_space.rb +28 -28
- data/test/scanner/test_groups.rb +35 -35
- data/test/scanner/test_meta.rb +1 -1
- data/test/scanner/test_properties.rb +87 -114
- data/test/scanner/test_refcalls.rb +18 -18
- data/test/scanner/test_scripts.rb +19 -351
- data/test/scanner/test_sets.rb +87 -60
- data/test/scanner/test_unicode_blocks.rb +4 -105
- data/test/support/warning_extractor.rb +1 -1
- data/test/syntax/test_syntax.rb +7 -0
- data/test/syntax/versions/test_1.8.rb +2 -4
- metadata +17 -7
- data/ChangeLog +0 -325
- data/test/scanner/test_emojis.rb +0 -31
data/test/helpers.rb
CHANGED
data/test/lexer/test_all.rb
CHANGED
@@ -43,15 +43,17 @@ class LexerConditionals < Test::Unit::TestCase
|
|
43
43
|
[11, :conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
|
44
44
|
|
45
45
|
[12, :set, :open, '[', 30, 31, 3, 0, 2],
|
46
|
-
[13, :
|
47
|
-
[14, :set, :
|
46
|
+
[13, :literal, :literal, 'e', 31, 32, 3, 1, 2],
|
47
|
+
[14, :set, :range, '-', 32, 33, 3, 1, 2],
|
48
|
+
[15, :literal, :literal, 'g', 33, 34, 3, 1, 2],
|
49
|
+
[16, :set, :close, ']', 34, 35, 3, 0, 2],
|
48
50
|
|
49
|
-
[
|
50
|
-
[
|
51
|
-
[
|
51
|
+
[17, :conditional, :separator, '|', 35, 36, 3, 0, 2],
|
52
|
+
[23, :conditional, :close, ')', 41, 42, 3, 0, 1],
|
53
|
+
[25, :conditional, :close, ')', 43, 44, 2, 0, 0],
|
52
54
|
|
53
|
-
[
|
54
|
-
[
|
55
|
+
[26, :group, :close, ')', 44, 45, 1, 0, 0],
|
56
|
+
[27, :group, :close, ')', 45, 46, 0, 0, 0]
|
55
57
|
].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
|
56
58
|
struct = tokens.at(index)
|
57
59
|
|
data/test/lexer/test_nesting.rb
CHANGED
@@ -62,38 +62,56 @@ class LexerNesting < Test::Unit::TestCase
|
|
62
62
|
|
63
63
|
'a[b-e]f' => {
|
64
64
|
1 => [:set, :open, '[', 1, 2, 0, 0, 0],
|
65
|
-
2 => [:
|
66
|
-
3 => [:set, :
|
65
|
+
2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
|
66
|
+
3 => [:set, :range, '-', 3, 4, 0, 1, 0],
|
67
|
+
4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
|
68
|
+
5 => [:set, :close, ']', 5, 6, 0, 0, 0],
|
67
69
|
},
|
68
70
|
|
69
|
-
'[
|
71
|
+
'[[:word:]&&[^c]z]' => {
|
70
72
|
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
73
|
+
1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
|
74
|
+
2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
|
75
|
+
3 => [:set, :open, '[', 11, 12, 0, 1, 0],
|
76
|
+
4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
|
77
|
+
5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
|
78
|
+
6 => [:set, :close, ']', 14, 15, 0, 1, 0],
|
79
|
+
7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
|
80
|
+
8 => [:set, :close, ']', 16, 17, 0, 0, 0],
|
81
|
+
},
|
82
|
+
|
83
|
+
'[\p{word}&&[^c]z]' => {
|
84
|
+
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
85
|
+
1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
|
86
|
+
2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
|
87
|
+
3 => [:set, :open, '[', 11, 12, 0, 1, 0],
|
88
|
+
4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
|
89
|
+
5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
|
90
|
+
6 => [:set, :close, ']', 14, 15, 0, 1, 0],
|
91
|
+
7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
|
92
|
+
8 => [:set, :close, ']', 16, 17, 0, 0, 0],
|
77
93
|
},
|
78
94
|
|
79
95
|
'[a[b[c[d-g]]]]' => {
|
80
96
|
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
81
|
-
1 => [:
|
82
|
-
2 => [:
|
83
|
-
3 => [:
|
84
|
-
4 => [:
|
85
|
-
5 => [:
|
86
|
-
6 => [:
|
87
|
-
7 => [:
|
88
|
-
8 => [:
|
89
|
-
9 => [:
|
90
|
-
|
91
|
-
|
97
|
+
1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
|
98
|
+
2 => [:set, :open, '[', 2, 3, 0, 1, 0],
|
99
|
+
3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
|
100
|
+
4 => [:set, :open, '[', 4, 5, 0, 2, 0],
|
101
|
+
5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
|
102
|
+
6 => [:set, :open, '[', 6, 7, 0, 3, 0],
|
103
|
+
7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
|
104
|
+
8 => [:set, :range, '-', 8, 9, 0, 4, 0],
|
105
|
+
9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
|
106
|
+
10 => [:set, :close, ']', 10, 11, 0, 3, 0],
|
107
|
+
11 => [:set, :close, ']', 11, 12, 0, 2, 0],
|
108
|
+
12 => [:set, :close, ']', 12, 13, 0, 1, 0],
|
109
|
+
13 => [:set, :close, ']', 13, 14, 0, 0, 0],
|
92
110
|
},
|
93
111
|
}
|
94
112
|
|
95
113
|
tests.each_with_index do |(pattern, checks), count|
|
96
|
-
define_method "
|
114
|
+
define_method "test_lex_nesting_in_'#{pattern}'_#{count}" do
|
97
115
|
tokens = RL.lex(pattern, 'ruby/1.9')
|
98
116
|
|
99
117
|
checks.each do |offset, (type, token, text, ts, te, level, set_level, conditional_level)|
|
data/test/lexer/test_refcalls.rb
CHANGED
@@ -27,11 +27,11 @@ class LexerRefCalls < Test::Unit::TestCase
|
|
27
27
|
"(abc)\\g'-1'" => [3, :backref, :number_rel_call, "\\g'-1'", 5, 11, 0, 0, 0],
|
28
28
|
|
29
29
|
# Group back-references, with nesting level
|
30
|
-
'(?<X>abc)\k<X-0>' => [3, :backref, :
|
31
|
-
"(?<X>abc)\\k'X-0'" => [3, :backref, :
|
30
|
+
'(?<X>abc)\k<X-0>' => [3, :backref, :name_recursion_ref, '\k<X-0>', 9, 16, 0, 0, 0],
|
31
|
+
"(?<X>abc)\\k'X-0'" => [3, :backref, :name_recursion_ref, "\\k'X-0'", 9, 16, 0, 0, 0],
|
32
32
|
|
33
|
-
'(abc)\k<1-0>' => [3, :backref, :
|
34
|
-
"(abc)\\k'1-0'" => [3, :backref, :
|
33
|
+
'(abc)\k<1-0>' => [3, :backref, :number_recursion_ref, '\k<1-0>', 5, 12, 0, 0, 0],
|
34
|
+
"(abc)\\k'1-0'" => [3, :backref, :number_recursion_ref, "\\k'1-0'", 5, 12, 0, 0, 0],
|
35
35
|
}
|
36
36
|
|
37
37
|
tests.each_with_index do |(pattern, (index, type, token, text, ts, te, level, set_level, conditional_level)), count|
|
@@ -0,0 +1,127 @@
|
|
1
|
+
require File.expand_path('../../../helpers', __FILE__)
|
2
|
+
|
3
|
+
# edge cases with `...-&&...` and `...&&-...` are checked in test_ranges.rb
|
4
|
+
|
5
|
+
class ParserSetIntersections < Test::Unit::TestCase
|
6
|
+
def test_parse_set_intersection
|
7
|
+
root = RP.parse('[a&&z]')
|
8
|
+
set = root[0]
|
9
|
+
ints = set[0]
|
10
|
+
|
11
|
+
assert_equal 1, set.count
|
12
|
+
assert_equal CharacterSet::Intersection, ints.class
|
13
|
+
assert_equal 2, ints.count
|
14
|
+
|
15
|
+
seq1, seq2 = ints.expressions
|
16
|
+
assert_equal CharacterSet::IntersectedSequence, seq1.class
|
17
|
+
assert_equal 1, seq1.count
|
18
|
+
assert_equal 'a', seq1.first.to_s
|
19
|
+
assert_equal Literal, seq1.first.class
|
20
|
+
assert_equal CharacterSet::IntersectedSequence, seq2.class
|
21
|
+
assert_equal 1, seq2.count
|
22
|
+
assert_equal 'z', seq2.first.to_s
|
23
|
+
assert_equal Literal, seq2.first.class
|
24
|
+
|
25
|
+
refute set.matches?('a')
|
26
|
+
refute set.matches?('&')
|
27
|
+
refute set.matches?('z')
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_parse_set_intersection_range_and_subset
|
31
|
+
root = RP.parse('[a-z&&[^a]]')
|
32
|
+
set = root[0]
|
33
|
+
ints = set[0]
|
34
|
+
|
35
|
+
assert_equal 1, set.count
|
36
|
+
assert_equal CharacterSet::Intersection, ints.class
|
37
|
+
assert_equal 2, ints.count
|
38
|
+
|
39
|
+
seq1, seq2 = ints.expressions
|
40
|
+
assert_equal CharacterSet::IntersectedSequence, seq1.class
|
41
|
+
assert_equal 1, seq1.count
|
42
|
+
assert_equal 'a-z', seq1.first.to_s
|
43
|
+
assert_equal CharacterSet::Range, seq1.first.class
|
44
|
+
assert_equal CharacterSet::IntersectedSequence, seq2.class
|
45
|
+
assert_equal 1, seq2.count
|
46
|
+
assert_equal '[^a]', seq2.first.to_s
|
47
|
+
assert_equal CharacterSet, seq2.first.class
|
48
|
+
|
49
|
+
refute set.matches?('a')
|
50
|
+
refute set.matches?('&')
|
51
|
+
assert set.matches?('b')
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_parse_set_intersection_trailing_range
|
55
|
+
root = RP.parse('[a&&a-z]')
|
56
|
+
set = root[0]
|
57
|
+
ints = set[0]
|
58
|
+
|
59
|
+
assert_equal 1, set.count
|
60
|
+
assert_equal CharacterSet::Intersection, ints.class
|
61
|
+
assert_equal 2, ints.count
|
62
|
+
|
63
|
+
seq1, seq2 = ints.expressions
|
64
|
+
assert_equal CharacterSet::IntersectedSequence, seq1.class
|
65
|
+
assert_equal 1, seq1.count
|
66
|
+
assert_equal 'a', seq1.first.to_s
|
67
|
+
assert_equal Literal, seq1.first.class
|
68
|
+
assert_equal CharacterSet::IntersectedSequence, seq2.class
|
69
|
+
assert_equal 1, seq2.count
|
70
|
+
assert_equal 'a-z', seq2.first.to_s
|
71
|
+
assert_equal CharacterSet::Range, seq2.first.class
|
72
|
+
|
73
|
+
assert set.matches?('a')
|
74
|
+
refute set.matches?('&')
|
75
|
+
refute set.matches?('b')
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_parse_set_intersection_type
|
79
|
+
root = RP.parse('[a&&\w]')
|
80
|
+
set = root[0]
|
81
|
+
ints = set[0]
|
82
|
+
|
83
|
+
assert_equal 1, set.count
|
84
|
+
assert_equal CharacterSet::Intersection, ints.class
|
85
|
+
assert_equal 2, ints.count
|
86
|
+
|
87
|
+
seq1, seq2 = ints.expressions
|
88
|
+
assert_equal CharacterSet::IntersectedSequence, seq1.class
|
89
|
+
assert_equal 1, seq1.count
|
90
|
+
assert_equal 'a', seq1.first.to_s
|
91
|
+
assert_equal Literal, seq1.first.class
|
92
|
+
assert_equal CharacterSet::IntersectedSequence, seq2.class
|
93
|
+
assert_equal 1, seq2.count
|
94
|
+
assert_equal '\w', seq2.first.to_s
|
95
|
+
assert_equal CharacterType::Word, seq2.first.class
|
96
|
+
|
97
|
+
assert set.matches?('a')
|
98
|
+
refute set.matches?('&')
|
99
|
+
refute set.matches?('b')
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_parse_set_intersection_multipart
|
103
|
+
root = RP.parse('[\h&&\w&&efg]')
|
104
|
+
set = root[0]
|
105
|
+
ints = set[0]
|
106
|
+
|
107
|
+
assert_equal 1, set.count
|
108
|
+
assert_equal CharacterSet::Intersection, ints.class
|
109
|
+
assert_equal 3, ints.count
|
110
|
+
|
111
|
+
seq1, seq2, seq3 = ints.expressions
|
112
|
+
assert_equal CharacterSet::IntersectedSequence, seq1.class
|
113
|
+
assert_equal 1, seq1.count
|
114
|
+
assert_equal '\h', seq1.first.to_s
|
115
|
+
assert_equal CharacterSet::IntersectedSequence, seq2.class
|
116
|
+
assert_equal 1, seq2.count
|
117
|
+
assert_equal '\w', seq2.first.to_s
|
118
|
+
assert_equal CharacterSet::IntersectedSequence, seq3.class
|
119
|
+
assert_equal 3, seq3.count
|
120
|
+
assert_equal 'efg', seq3.to_s
|
121
|
+
|
122
|
+
assert set.matches?('e')
|
123
|
+
assert set.matches?('f')
|
124
|
+
refute set.matches?('a')
|
125
|
+
refute set.matches?('g')
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require File.expand_path('../../../helpers', __FILE__)
|
2
|
+
|
3
|
+
class ParserSetRangs < Test::Unit::TestCase
|
4
|
+
def test_parse_set_range
|
5
|
+
root = RP.parse('[a-z]')
|
6
|
+
set = root[0]
|
7
|
+
range = set[0]
|
8
|
+
|
9
|
+
assert_equal 1, set.count
|
10
|
+
assert_equal CharacterSet::Range, range.class
|
11
|
+
assert_equal 2, range.count
|
12
|
+
assert_equal 'a', range.first.to_s
|
13
|
+
assert_equal Literal, range.first.class
|
14
|
+
assert_equal 'z', range.last.to_s
|
15
|
+
assert_equal Literal, range.last.class
|
16
|
+
assert set.matches?('m')
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_parse_set_range_hex
|
20
|
+
root = RP.parse('[\x00-\x99]')
|
21
|
+
set = root[0]
|
22
|
+
range = set[0]
|
23
|
+
|
24
|
+
assert_equal 1, set.count
|
25
|
+
assert_equal CharacterSet::Range, range.class
|
26
|
+
assert_equal 2, range.count
|
27
|
+
assert_equal '\x00', range.first.to_s
|
28
|
+
assert_equal EscapeSequence::Hex, range.first.class
|
29
|
+
assert_equal '\x99', range.last.to_s
|
30
|
+
assert_equal EscapeSequence::Hex, range.last.class
|
31
|
+
assert set.matches?('\x50')
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_parse_set_range_unicode
|
35
|
+
root = RP.parse('[\u{40 42}-\u1234]')
|
36
|
+
set = root[0]
|
37
|
+
range = set[0]
|
38
|
+
|
39
|
+
assert_equal 1, set.count
|
40
|
+
assert_equal CharacterSet::Range, range.class
|
41
|
+
assert_equal 2, range.count
|
42
|
+
assert_equal '\u{40 42}', range.first.to_s
|
43
|
+
assert_equal EscapeSequence::CodepointList, range.first.class
|
44
|
+
assert_equal '\u1234', range.last.to_s
|
45
|
+
assert_equal EscapeSequence::Codepoint, range.last.class
|
46
|
+
assert set.matches?('\u600')
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_parse_set_range_edge_case_leading_dash
|
50
|
+
root = RP.parse('[--z]')
|
51
|
+
set = root[0]
|
52
|
+
range = set[0]
|
53
|
+
|
54
|
+
assert_equal 1, set.count
|
55
|
+
assert_equal 2, range.count
|
56
|
+
assert set.matches?('a')
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_parse_set_range_edge_case_trailing_dash
|
60
|
+
root = RP.parse('[!--]')
|
61
|
+
set = root[0]
|
62
|
+
range = set[0]
|
63
|
+
|
64
|
+
assert_equal 1, set.count
|
65
|
+
assert_equal 2, range.count
|
66
|
+
assert set.matches?('$')
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_parse_set_range_edge_case_leading_negate
|
70
|
+
root = RP.parse('[^-z]')
|
71
|
+
set = root[0]
|
72
|
+
|
73
|
+
assert_equal 2, set.count
|
74
|
+
assert set.matches?('a')
|
75
|
+
refute set.matches?('z')
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_parse_set_range_edge_case_trailing_negate
|
79
|
+
root = RP.parse('[!-^]')
|
80
|
+
set = root[0]
|
81
|
+
range = set[0]
|
82
|
+
|
83
|
+
assert_equal 1, set.count
|
84
|
+
assert_equal 2, range.count
|
85
|
+
assert set.matches?('$')
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_parse_set_range_edge_case_leading_intersection
|
89
|
+
root = RP.parse('[[\-ab]&&-bc]')
|
90
|
+
set = root[0]
|
91
|
+
|
92
|
+
assert_equal 1, set.count
|
93
|
+
assert_equal '-bc', set.first.last.to_s
|
94
|
+
assert set.matches?('-')
|
95
|
+
assert set.matches?('b')
|
96
|
+
refute set.matches?('a')
|
97
|
+
refute set.matches?('c')
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_parse_set_range_edge_case_trailing_intersection
|
101
|
+
root = RP.parse('[bc-&&[\-ab]]')
|
102
|
+
set = root[0]
|
103
|
+
|
104
|
+
assert_equal 1, set.count
|
105
|
+
assert_equal 'bc-', set.first.first.to_s
|
106
|
+
assert set.matches?('-')
|
107
|
+
assert set.matches?('b')
|
108
|
+
refute set.matches?('a')
|
109
|
+
refute set.matches?('c')
|
110
|
+
end
|
111
|
+
end
|
data/test/parser/test_all.rb
CHANGED
@@ -2,11 +2,14 @@ require File.expand_path("../../helpers", __FILE__)
|
|
2
2
|
|
3
3
|
%w{
|
4
4
|
alternation anchors errors escapes free_space groups
|
5
|
-
properties quantifiers refcalls sets types
|
5
|
+
posix_classes properties quantifiers refcalls sets types
|
6
6
|
}.each do|tc|
|
7
7
|
require File.expand_path("../test_#{tc}", __FILE__)
|
8
8
|
end
|
9
9
|
|
10
|
+
require File.expand_path('../set/test_ranges.rb', __FILE__)
|
11
|
+
require File.expand_path('../set/test_intersections.rb', __FILE__)
|
12
|
+
|
10
13
|
if RUBY_VERSION >= '2.0.0'
|
11
14
|
%w{conditionals keep}.each do|tc|
|
12
15
|
require File.expand_path("../test_#{tc}", __FILE__)
|
data/test/parser/test_escapes.rb
CHANGED
@@ -11,10 +11,6 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
11
11
|
/a\tc/ => [1, :escape, :tab, EscapeSequence::Tab],
|
12
12
|
/a\vc/ => [1, :escape, :vertical_tab, EscapeSequence::VerticalTab],
|
13
13
|
|
14
|
-
# special cases
|
15
|
-
/a\bc/ => [1, :anchor, :word_boundary, Anchor::WordBoundary],
|
16
|
-
/a\sc/ => [1, :type, :space, CharacterType::Space],
|
17
|
-
|
18
14
|
# meta character escapes
|
19
15
|
/a\.c/ => [1, :escape, :dot, EscapeSequence::Literal],
|
20
16
|
/a\?c/ => [1, :escape, :zero_or_one, EscapeSequence::Literal],
|
@@ -27,14 +23,15 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
27
23
|
/a\}c/ => [1, :escape, :interval_close, EscapeSequence::Literal],
|
28
24
|
|
29
25
|
# unicode escapes
|
30
|
-
/a\u0640/ => [1, :escape, :codepoint, EscapeSequence::
|
31
|
-
/a\u{41 1F60D}/ => [1, :escape, :codepoint_list, EscapeSequence::
|
26
|
+
/a\u0640/ => [1, :escape, :codepoint, EscapeSequence::Codepoint],
|
27
|
+
/a\u{41 1F60D}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
|
28
|
+
/a\u{10FFFF}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
|
32
29
|
|
33
30
|
# hex escapes
|
34
|
-
/a\xFF/n => [1, :escape, :hex, EscapeSequence::
|
31
|
+
/a\xFF/n => [1, :escape, :hex, EscapeSequence::Hex],
|
35
32
|
|
36
33
|
# octal escapes
|
37
|
-
/a\177/n => [1, :escape, :octal, EscapeSequence::
|
34
|
+
/a\177/n => [1, :escape, :octal, EscapeSequence::Octal],
|
38
35
|
}
|
39
36
|
|
40
37
|
tests.each_with_index do |(pattern, (index, type, token, klass)), count|
|
@@ -50,11 +47,35 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
50
47
|
end
|
51
48
|
end
|
52
49
|
|
50
|
+
def test_parse_chars_and_codepoints
|
51
|
+
root = RP.parse(/\n\?\101\x42\u0043\u{44 45}/)
|
52
|
+
|
53
|
+
assert_equal "\n", root[0].char
|
54
|
+
assert_equal 10, root[0].codepoint
|
55
|
+
|
56
|
+
assert_equal "?", root[1].char
|
57
|
+
assert_equal 63, root[1].codepoint
|
58
|
+
|
59
|
+
assert_equal "A", root[2].char
|
60
|
+
assert_equal 65, root[2].codepoint
|
61
|
+
|
62
|
+
assert_equal "B", root[3].char
|
63
|
+
assert_equal 66, root[3].codepoint
|
64
|
+
|
65
|
+
assert_equal "C", root[4].char
|
66
|
+
assert_equal 67, root[4].codepoint
|
67
|
+
|
68
|
+
assert_equal ["D", "E"], root[5].chars
|
69
|
+
assert_equal [68, 69], root[5].codepoints
|
70
|
+
end
|
71
|
+
|
53
72
|
def test_parse_escape_control_sequence_lower
|
54
73
|
root = RP.parse(/a\\\c2b/)
|
55
74
|
|
56
75
|
assert_equal EscapeSequence::Control, root[2].class
|
57
76
|
assert_equal '\\c2', root[2].text
|
77
|
+
assert_equal "\u0012", root[2].char
|
78
|
+
assert_equal 18, root[2].codepoint
|
58
79
|
end
|
59
80
|
|
60
81
|
def test_parse_escape_control_sequence_upper
|
@@ -62,6 +83,8 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
62
83
|
|
63
84
|
assert_equal EscapeSequence::Control, root[2].class
|
64
85
|
assert_equal '\\C-C', root[2].text
|
86
|
+
assert_equal "\u0003", root[2].char
|
87
|
+
assert_equal 3, root[2].codepoint
|
65
88
|
end
|
66
89
|
|
67
90
|
def test_parse_escape_meta_sequence
|
@@ -69,6 +92,8 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
69
92
|
|
70
93
|
assert_equal EscapeSequence::Meta, root[2].class
|
71
94
|
assert_equal '\\M-Z', root[2].text
|
95
|
+
assert_equal "\u00DA", root[2].char
|
96
|
+
assert_equal 218, root[2].codepoint
|
72
97
|
end
|
73
98
|
|
74
99
|
def test_parse_escape_meta_control_sequence
|
@@ -76,6 +101,8 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
76
101
|
|
77
102
|
assert_equal EscapeSequence::MetaControl, root[2].class
|
78
103
|
assert_equal '\\M-\\C-X', root[2].text
|
104
|
+
assert_equal "\u0098", root[2].char
|
105
|
+
assert_equal 152, root[2].codepoint
|
79
106
|
end
|
80
107
|
|
81
108
|
def test_parse_lower_c_meta_control_sequence
|
@@ -83,6 +110,8 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
83
110
|
|
84
111
|
assert_equal EscapeSequence::MetaControl, root[2].class
|
85
112
|
assert_equal '\\M-\\cX', root[2].text
|
113
|
+
assert_equal "\u0098", root[2].char
|
114
|
+
assert_equal 152, root[2].codepoint
|
86
115
|
end
|
87
116
|
|
88
117
|
def test_parse_escape_reverse_meta_control_sequence
|
@@ -90,6 +119,8 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
90
119
|
|
91
120
|
assert_equal EscapeSequence::MetaControl, root[2].class
|
92
121
|
assert_equal '\\C-\\M-X', root[2].text
|
122
|
+
assert_equal "\u0098", root[2].char
|
123
|
+
assert_equal 152, root[2].codepoint
|
93
124
|
end
|
94
125
|
|
95
126
|
def test_parse_escape_reverse_lower_c_meta_control_sequence
|
@@ -97,6 +128,7 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
97
128
|
|
98
129
|
assert_equal EscapeSequence::MetaControl, root[2].class
|
99
130
|
assert_equal '\\c\\M-X', root[2].text
|
131
|
+
assert_equal "\u0098", root[2].char
|
132
|
+
assert_equal 152, root[2].codepoint
|
100
133
|
end
|
101
|
-
|
102
134
|
end
|