regexp_parser 0.5.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +242 -0
- data/Gemfile +1 -0
- data/README.md +21 -17
- data/Rakefile +31 -0
- data/lib/regexp_parser/expression.rb +11 -9
- data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
- data/lib/regexp_parser/expression/classes/backref.rb +21 -16
- data/lib/regexp_parser/expression/classes/escape.rb +81 -10
- data/lib/regexp_parser/expression/classes/group.rb +20 -20
- data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
- data/lib/regexp_parser/expression/classes/property.rb +6 -0
- data/lib/regexp_parser/expression/classes/set.rb +10 -93
- data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
- data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
- data/lib/regexp_parser/expression/methods/tests.rb +4 -14
- data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +3 -4
- data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
- data/lib/regexp_parser/expression/subexpression.rb +6 -10
- data/lib/regexp_parser/lexer.rb +13 -17
- data/lib/regexp_parser/parser.rb +170 -116
- data/lib/regexp_parser/scanner.rb +952 -2431
- data/lib/regexp_parser/scanner/char_type.rl +31 -0
- data/lib/regexp_parser/scanner/properties/long.yml +561 -0
- data/lib/regexp_parser/scanner/properties/short.yml +225 -0
- data/lib/regexp_parser/scanner/property.rl +7 -806
- data/lib/regexp_parser/scanner/scanner.rl +112 -154
- data/lib/regexp_parser/syntax/base.rb +4 -4
- data/lib/regexp_parser/syntax/tokens.rb +1 -0
- data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
- data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
- data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
- data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
- data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +2 -1
- data/test/expression/test_base.rb +2 -1
- data/test/expression/test_clone.rb +0 -57
- data/test/expression/test_set.rb +31 -8
- data/test/expression/test_strfregexp.rb +13 -4
- data/test/expression/test_subexpression.rb +25 -0
- data/test/expression/test_traverse.rb +25 -25
- data/test/helpers.rb +1 -0
- data/test/lexer/test_all.rb +1 -1
- data/test/lexer/test_conditionals.rb +9 -7
- data/test/lexer/test_nesting.rb +39 -21
- data/test/lexer/test_refcalls.rb +4 -4
- data/test/parser/set/test_intersections.rb +127 -0
- data/test/parser/set/test_ranges.rb +111 -0
- data/test/parser/test_all.rb +4 -1
- data/test/parser/test_escapes.rb +41 -9
- data/test/parser/test_groups.rb +22 -3
- data/test/parser/test_posix_classes.rb +27 -0
- data/test/parser/test_properties.rb +17 -290
- data/test/parser/test_refcalls.rb +66 -26
- data/test/parser/test_sets.rb +132 -129
- data/test/scanner/test_all.rb +1 -7
- data/test/scanner/test_conditionals.rb +16 -16
- data/test/scanner/test_errors.rb +0 -30
- data/test/scanner/test_escapes.rb +1 -2
- data/test/scanner/test_free_space.rb +28 -28
- data/test/scanner/test_groups.rb +35 -35
- data/test/scanner/test_meta.rb +1 -1
- data/test/scanner/test_properties.rb +87 -114
- data/test/scanner/test_refcalls.rb +18 -18
- data/test/scanner/test_scripts.rb +19 -351
- data/test/scanner/test_sets.rb +87 -60
- data/test/scanner/test_unicode_blocks.rb +4 -105
- data/test/support/warning_extractor.rb +1 -1
- data/test/syntax/test_syntax.rb +7 -0
- data/test/syntax/versions/test_1.8.rb +2 -4
- metadata +17 -7
- data/ChangeLog +0 -325
- data/test/scanner/test_emojis.rb +0 -31
data/test/parser/test_sets.rb
CHANGED
@@ -1,176 +1,179 @@
|
|
1
1
|
require File.expand_path("../../helpers", __FILE__)
|
2
2
|
|
3
3
|
class TestParserSets < Test::Unit::TestCase
|
4
|
-
|
5
4
|
def test_parse_set_basic
|
6
|
-
root = RP.parse('[
|
7
|
-
exp = root
|
5
|
+
root = RP.parse('[ab]+')
|
6
|
+
exp = root[0]
|
7
|
+
|
8
|
+
assert_equal CharacterSet, exp.class
|
9
|
+
assert_equal 2, exp.count
|
8
10
|
|
9
|
-
assert_equal
|
10
|
-
assert_equal
|
11
|
+
assert_equal Literal, exp[0].class
|
12
|
+
assert_equal 'a', exp[0].text
|
13
|
+
assert_equal Literal, exp[1].class
|
14
|
+
assert_equal 'b', exp[1].text
|
11
15
|
|
12
|
-
|
13
|
-
assert_equal 1,
|
14
|
-
assert_equal(-1,
|
16
|
+
assert exp.quantified?
|
17
|
+
assert_equal 1, exp.quantifier.min
|
18
|
+
assert_equal(-1, exp.quantifier.max)
|
15
19
|
end
|
16
20
|
|
17
|
-
def
|
18
|
-
root = RP.parse('[
|
19
|
-
exp = root
|
21
|
+
def test_parse_set_char_type
|
22
|
+
root = RP.parse('[a\dc]')
|
23
|
+
exp = root[0]
|
20
24
|
|
21
|
-
assert_equal
|
25
|
+
assert_equal CharacterSet, exp.class
|
26
|
+
assert_equal 3, exp.count
|
22
27
|
|
23
|
-
assert_equal
|
24
|
-
assert_equal
|
28
|
+
assert_equal CharacterType::Digit, exp[1].class
|
29
|
+
assert_equal '\d', exp[1].text
|
30
|
+
end
|
25
31
|
|
26
|
-
|
32
|
+
def test_parse_set_escape_sequence_backspace
|
33
|
+
root = RP.parse('[a\bc]')
|
34
|
+
exp = root[0]
|
27
35
|
|
28
|
-
assert_equal
|
29
|
-
assert_equal
|
36
|
+
assert_equal CharacterSet, exp.class
|
37
|
+
assert_equal 3, exp.count
|
38
|
+
|
39
|
+
assert_equal EscapeSequence::Backspace, exp[1].class
|
40
|
+
assert_equal '\b', exp[1].text
|
41
|
+
|
42
|
+
assert exp.matches?('a')
|
43
|
+
assert exp.matches?("\b")
|
44
|
+
refute exp.matches?('b')
|
45
|
+
assert exp.matches?('c')
|
30
46
|
end
|
31
47
|
|
32
|
-
def
|
33
|
-
root = RP.parse('[
|
34
|
-
exp = root
|
48
|
+
def test_parse_set_escape_sequence_hex
|
49
|
+
root = RP.parse('[a\x20c]', :any)
|
50
|
+
exp = root[0]
|
51
|
+
|
52
|
+
assert_equal CharacterSet, exp.class
|
53
|
+
assert_equal 3, exp.count
|
35
54
|
|
36
|
-
assert_equal
|
37
|
-
assert_equal
|
38
|
-
assert_equal true, exp.include?('h')
|
39
|
-
assert_equal false, exp.include?(']')
|
55
|
+
assert_equal EscapeSequence::Hex, exp[1].class
|
56
|
+
assert_equal '\x20', exp[1].text
|
40
57
|
end
|
41
58
|
|
42
|
-
def
|
43
|
-
root = RP.parse('[\
|
44
|
-
exp = root
|
59
|
+
def test_parse_set_escape_sequence_codepoint
|
60
|
+
root = RP.parse('[a\u0640]')
|
61
|
+
exp = root[0]
|
45
62
|
|
46
|
-
assert_equal
|
47
|
-
assert_equal
|
48
|
-
|
49
|
-
assert_equal
|
63
|
+
assert_equal CharacterSet, exp.class
|
64
|
+
assert_equal 2, exp.count
|
65
|
+
|
66
|
+
assert_equal EscapeSequence::Codepoint, exp[1].class
|
67
|
+
assert_equal '\u0640', exp[1].text
|
50
68
|
end
|
51
69
|
|
52
|
-
def
|
53
|
-
root = RP.parse('[\
|
54
|
-
exp = root
|
70
|
+
def test_parse_set_escape_sequence_codepoint_list
|
71
|
+
root = RP.parse('[a\u{41 1F60D}]')
|
72
|
+
exp = root[0]
|
73
|
+
|
74
|
+
assert_equal CharacterSet, exp.class
|
75
|
+
assert_equal 2, exp.count
|
55
76
|
|
56
|
-
assert_equal
|
57
|
-
assert_equal
|
77
|
+
assert_equal EscapeSequence::CodepointList, exp[1].class
|
78
|
+
assert_equal '\u{41 1F60D}', exp[1].text
|
58
79
|
end
|
59
80
|
|
60
|
-
def
|
61
|
-
root = RP.parse('[
|
62
|
-
exp = root
|
81
|
+
def test_parse_set_posix_class
|
82
|
+
root = RP.parse('[[:digit:][:^lower:]]+')
|
83
|
+
exp = root[0]
|
84
|
+
|
85
|
+
assert_equal CharacterSet, exp.class
|
86
|
+
assert_equal 2, exp.count
|
63
87
|
|
64
|
-
assert_equal
|
65
|
-
assert_equal
|
88
|
+
assert_equal PosixClass, exp[0].class
|
89
|
+
assert_equal '[:digit:]', exp[0].text
|
90
|
+
assert_equal PosixClass, exp[1].class
|
91
|
+
assert_equal '[:^lower:]', exp[1].text
|
66
92
|
end
|
67
93
|
|
68
|
-
def
|
69
|
-
root = RP.parse('[a[
|
70
|
-
|
94
|
+
def test_parse_set_nesting
|
95
|
+
root = RP.parse('[a[b[c]d]e]')
|
96
|
+
|
97
|
+
exp = root[0]
|
98
|
+
assert_equal CharacterSet, exp.class
|
99
|
+
assert_equal 3, exp.count
|
100
|
+
assert_equal Literal, exp[0].class
|
101
|
+
assert_equal Literal, exp[2].class
|
102
|
+
|
103
|
+
subset1 = exp[1]
|
104
|
+
assert_equal CharacterSet, subset1.class
|
105
|
+
assert_equal 3, subset1.count
|
106
|
+
assert_equal Literal, subset1[0].class
|
107
|
+
assert_equal Literal, subset1[2].class
|
108
|
+
|
109
|
+
subset2 = subset1[1]
|
110
|
+
assert_equal CharacterSet, subset2.class
|
111
|
+
assert_equal 1, subset2.count
|
112
|
+
assert_equal Literal, subset2[0].class
|
113
|
+
end
|
71
114
|
|
72
|
-
|
73
|
-
|
115
|
+
def test_parse_set_nesting_negative
|
116
|
+
root = RP.parse('[a[^b[c]]]')
|
117
|
+
exp = root[0]
|
118
|
+
|
119
|
+
assert_equal CharacterSet, exp.class
|
120
|
+
assert_equal 2, exp.count
|
121
|
+
assert_equal Literal, exp[0].class
|
122
|
+
refute exp.negative?
|
123
|
+
|
124
|
+
subset1 = exp[1]
|
125
|
+
assert_equal CharacterSet, subset1.class
|
126
|
+
assert_equal 2, subset1.count
|
127
|
+
assert_equal Literal, subset1[0].class
|
128
|
+
assert subset1.negative?
|
129
|
+
|
130
|
+
subset2 = subset1[1]
|
131
|
+
assert_equal CharacterSet, subset2.class
|
132
|
+
assert_equal 1, subset2.count
|
133
|
+
assert_equal Literal, subset2[0].class
|
134
|
+
refute subset2.negative?
|
74
135
|
end
|
75
136
|
|
76
|
-
def
|
137
|
+
def test_parse_set_nesting_to_s
|
77
138
|
pattern = '[a[b[^c]]]'
|
78
|
-
root = RP.parse(pattern
|
139
|
+
root = RP.parse(pattern)
|
79
140
|
|
80
141
|
assert_equal pattern, root.to_s
|
81
142
|
end
|
82
143
|
|
83
|
-
def
|
84
|
-
root = RP.parse('
|
85
|
-
exp = root
|
144
|
+
def test_parse_set_literals_are_not_merged
|
145
|
+
root = RP.parse("[#{'a' * 10}]")
|
146
|
+
exp = root[0]
|
86
147
|
|
87
|
-
assert_equal
|
88
|
-
assert_equal true, exp.include?('a')
|
89
|
-
assert_equal true, exp.include?('b')
|
90
|
-
assert_equal true, exp.include?('c')
|
148
|
+
assert_equal 10, exp.count
|
91
149
|
end
|
92
150
|
|
93
|
-
def
|
94
|
-
root = RP.parse(
|
151
|
+
def test_parse_set_whitespace_is_not_merged
|
152
|
+
root = RP.parse("[#{' ' * 10}]")
|
153
|
+
exp = root[0]
|
95
154
|
|
96
|
-
|
97
|
-
assert_equal true, exp.is_a?(CharacterSet)
|
98
|
-
assert_equal true, exp.include?('a')
|
99
|
-
assert_equal true, exp.include?('b')
|
100
|
-
assert_equal false, exp.include?('b', true) # should not include b directly
|
101
|
-
|
102
|
-
sub = exp.members.at(1)
|
103
|
-
assert_equal false, sub.include?('a')
|
104
|
-
assert_equal true, sub.include?('b')
|
105
|
-
assert_equal true, sub.include?('b', true)
|
106
|
-
assert_equal false, sub.include?('c')
|
155
|
+
assert_equal 10, exp.count
|
107
156
|
end
|
108
157
|
|
109
|
-
def
|
110
|
-
root = RP.parse(
|
111
|
-
|
112
|
-
exp = root.expressions.at(0)
|
113
|
-
assert_equal true, exp.is_a?(CharacterSet)
|
114
|
-
assert_equal true, exp.include?('a')
|
115
|
-
assert_equal true, exp.include?('b')
|
116
|
-
assert_equal false, exp.include?('b', true) # should not include b directly
|
117
|
-
|
118
|
-
sub = exp.members.at(1)
|
119
|
-
assert_equal false, sub.include?('a')
|
120
|
-
assert_equal true, sub.include?('b')
|
121
|
-
assert_equal true, sub.include?('b', true)
|
122
|
-
assert_equal true, sub.include?('f', true)
|
123
|
-
assert_equal true, sub.include?('c')
|
124
|
-
assert_equal false, sub.include?('c', true)
|
125
|
-
|
126
|
-
sub2 = sub.members.at(1)
|
127
|
-
assert_equal false, sub2.include?('a')
|
128
|
-
assert_equal false, sub2.include?('b')
|
129
|
-
assert_equal true, sub2.include?('c')
|
130
|
-
assert_equal true, sub2.include?('c', true)
|
131
|
-
assert_equal true, sub2.include?('e', true)
|
132
|
-
assert_equal true, sub2.include?('d')
|
133
|
-
assert_equal false, sub2.include?('d', true)
|
134
|
-
|
135
|
-
sub3 = sub2.members.at(1)
|
136
|
-
assert_equal false, sub3.include?('a')
|
137
|
-
assert_equal false, sub3.include?('g')
|
138
|
-
assert_equal false, sub3.include?('b')
|
139
|
-
assert_equal false, sub3.include?('f')
|
140
|
-
assert_equal false, sub3.include?('c')
|
141
|
-
assert_equal false, sub3.include?('e')
|
142
|
-
assert_equal true, sub3.include?('d')
|
143
|
-
assert_equal true, sub3.include?('d', true)
|
144
|
-
end
|
145
|
-
|
146
|
-
# character subsets and negated posix classes are not available in ruby 1.8
|
147
|
-
if RUBY_VERSION >= '1.9'
|
148
|
-
def test_parse_set_nesting_matches
|
149
|
-
root = RP.parse('[a[b[^c]]]', 'ruby/1.9')
|
150
|
-
exp = root.expressions.at(0)
|
151
|
-
|
152
|
-
assert_equal true, exp.matches?('b')
|
153
|
-
assert_equal false, exp.matches?('c')
|
154
|
-
end
|
155
|
-
|
156
|
-
def test_parse_set_nesting_not_matches
|
157
|
-
root = RP.parse('[a[b[^c]]]', 'ruby/1.9')
|
158
|
-
exp = root.expressions.at(0)
|
158
|
+
def test_parse_set_whitespace_is_not_merged_in_x_mode
|
159
|
+
root = RP.parse("(?x)[#{' ' * 10}]")
|
160
|
+
exp = root[1]
|
159
161
|
|
160
|
-
|
161
|
-
|
162
|
+
assert_equal 10, exp.count
|
163
|
+
end
|
162
164
|
|
163
|
-
|
164
|
-
|
165
|
-
|
165
|
+
# TODO: Collations and equivalents need own exp class if they ever get enabled
|
166
|
+
def test_parse_set_collating_sequence
|
167
|
+
root = RP.parse('[a[.span-ll.]h]', :any)
|
168
|
+
exp = root[0]
|
166
169
|
|
167
|
-
|
170
|
+
assert_equal '[.span-ll.]', exp[1].to_s
|
171
|
+
end
|
168
172
|
|
169
|
-
|
170
|
-
|
173
|
+
def test_parse_set_character_equivalents
|
174
|
+
root = RP.parse('[a[=e=]h]', :any)
|
175
|
+
exp = root[0]
|
171
176
|
|
172
|
-
|
173
|
-
end
|
177
|
+
assert_equal '[=e=]', exp[1].to_s
|
174
178
|
end
|
175
|
-
|
176
179
|
end
|
data/test/scanner/test_all.rb
CHANGED
@@ -13,12 +13,6 @@ if RUBY_VERSION >= '2.0.0'
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
if RUBY_VERSION >= '2.5.0'
|
17
|
-
%w{emojis}.each do|tc|
|
18
|
-
require File.expand_path("../test_#{tc}", __FILE__)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
16
|
class TestRegexpScanner < Test::Unit::TestCase
|
23
17
|
|
24
18
|
def test_scanner_returns_an_array
|
@@ -38,7 +32,7 @@ class TestRegexpScanner < Test::Unit::TestCase
|
|
38
32
|
def test_scanner_token_count
|
39
33
|
re = /^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i
|
40
34
|
|
41
|
-
assert_equal
|
35
|
+
assert_equal 28, RS.scan(re).length
|
42
36
|
end
|
43
37
|
|
44
38
|
end
|
@@ -4,22 +4,22 @@ class ScannerConditionals < Test::Unit::TestCase
|
|
4
4
|
|
5
5
|
# Basic conditional scan token tests
|
6
6
|
tests = {
|
7
|
-
/(a)(?(1)T|F)/
|
8
|
-
/(a)(?(1)T|F)/
|
9
|
-
/(a)(?(1)T|F)/
|
10
|
-
/(a)(?(1)T|F)/
|
11
|
-
/(a)(?(1)T|F)/
|
12
|
-
/(a)(?(1)T|F)/
|
13
|
-
/(a)(?(1)T|F)/
|
14
|
-
/(a)(?(1)T|F)/
|
15
|
-
|
16
|
-
/(a)(?(1)TRUE)/
|
17
|
-
|
18
|
-
/(a)(?(1)TRUE|)/
|
19
|
-
/(a)(?(1)TRUE|)/
|
20
|
-
|
21
|
-
/(?<N>A)(?(<N>)T|F)/
|
22
|
-
/(?'N'A)(?('N')T|F)/
|
7
|
+
/(a)(?(1)T|F)1/ => [3, :conditional, :open, '(?', 3, 5],
|
8
|
+
/(a)(?(1)T|F)2/ => [4, :conditional, :condition_open, '(', 5, 6],
|
9
|
+
/(a)(?(1)T|F)3/ => [5, :conditional, :condition, '1', 6, 7],
|
10
|
+
/(a)(?(1)T|F)4/ => [6, :conditional, :condition_close, ')', 7, 8],
|
11
|
+
/(a)(?(1)T|F)5/ => [7, :literal, :literal, 'T', 8, 9],
|
12
|
+
/(a)(?(1)T|F)6/ => [8, :conditional, :separator, '|', 9, 10],
|
13
|
+
/(a)(?(1)T|F)7/ => [9, :literal, :literal, 'F', 10, 11],
|
14
|
+
/(a)(?(1)T|F)8/ => [10, :conditional, :close, ')', 11, 12],
|
15
|
+
|
16
|
+
/(a)(?(1)TRUE)9/ => [8, :conditional, :close, ')', 12, 13],
|
17
|
+
|
18
|
+
/(a)(?(1)TRUE|)10/ => [8, :conditional, :separator, '|', 12, 13],
|
19
|
+
/(a)(?(1)TRUE|)11/ => [9, :conditional, :close, ')', 13, 14],
|
20
|
+
|
21
|
+
/(?<N>A)(?(<N>)T|F)1/ => [5, :conditional, :condition, '<N>', 10, 13],
|
22
|
+
/(?'N'A)(?('N')T|F)2/ => [5, :conditional, :condition, "'N'", 10, 13],
|
23
23
|
}
|
24
24
|
|
25
25
|
tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
|
data/test/scanner/test_errors.rb
CHANGED
@@ -46,18 +46,6 @@ class ScannerErrors < Test::Unit::TestCase
|
|
46
46
|
assert_raise( RS::PrematureEndError ) { RS.scan('\x') }
|
47
47
|
end
|
48
48
|
|
49
|
-
def test_scanner_eof_in_wide_hex_escape
|
50
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{') }
|
51
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{0') }
|
52
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{02') }
|
53
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{024') }
|
54
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{0246') }
|
55
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468') }
|
56
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468A') }
|
57
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468AC') }
|
58
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468ACE') }
|
59
|
-
end
|
60
|
-
|
61
49
|
def test_scanner_eof_in_codepoint_escape
|
62
50
|
assert_raise( RS::PrematureEndError ) { RS.scan('\u') }
|
63
51
|
assert_raise( RS::PrematureEndError ) { RS.scan('\u0') }
|
@@ -94,24 +82,6 @@ class ScannerErrors < Test::Unit::TestCase
|
|
94
82
|
assert_raise( RS::InvalidSequenceError ) { RS.scan('\xZ0') }
|
95
83
|
end
|
96
84
|
|
97
|
-
def test_scanner_invalid_wide_hex_escape
|
98
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{}') }
|
99
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{ }') }
|
100
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{ A }') }
|
101
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0-}') }
|
102
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{Z00}') }
|
103
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{000Z}') }
|
104
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00ZZ}') }
|
105
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ}') }
|
106
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ0}') }
|
107
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ0X}') }
|
108
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00X') }
|
109
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00XYZ') }
|
110
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000XYZ') }
|
111
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{02468ACED') }
|
112
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{02468ACE]') }
|
113
|
-
end
|
114
|
-
|
115
85
|
def test_scanner_invalid_named_group
|
116
86
|
assert_raise( RS::InvalidGroupError ) { RS.scan("(?'')") }
|
117
87
|
assert_raise( RS::InvalidGroupError ) { RS.scan("(?''empty-name)") }
|
@@ -22,10 +22,9 @@ class ScannerEscapes < Test::Unit::TestCase
|
|
22
22
|
'a\x24c' => [1, :escape, :hex, '\x24', 1, 5],
|
23
23
|
'a\x0640c' => [1, :escape, :hex, '\x06', 1, 5],
|
24
24
|
|
25
|
-
'a\x{0640}c' => [1, :escape, :hex_wide, '\x{0640}', 1, 9],
|
26
|
-
|
27
25
|
'a\u0640c' => [1, :escape, :codepoint, '\u0640', 1, 7],
|
28
26
|
'a\u{640 0641}c' => [1, :escape, :codepoint_list, '\u{640 0641}', 1, 13],
|
27
|
+
'a\u{10FFFF}c' => [1, :escape, :codepoint_list, '\u{10FFFF}', 1, 11],
|
29
28
|
|
30
29
|
/a\cBc/ => [1, :escape, :control, '\cB', 1, 4],
|
31
30
|
/a\C-bc/ => [1, :escape, :control, '\C-b', 1, 5],
|
@@ -159,34 +159,34 @@ class ScannerFreeSpace < Test::Unit::TestCase
|
|
159
159
|
regexp = /(a (b((?x) (c d) ((?-x)(e f) )g) h)i j)/
|
160
160
|
tokens = RS.scan(regexp)
|
161
161
|
[
|
162
|
-
[ 0, :group, :capture,
|
163
|
-
[ 1, :literal, :literal,
|
164
|
-
[ 2, :group, :capture,
|
165
|
-
[ 3, :literal, :literal,
|
166
|
-
[ 4, :group, :capture,
|
167
|
-
[ 5, :group, :
|
168
|
-
[ 6, :group, :close,
|
169
|
-
[ 7, :free_space, :whitespace,
|
170
|
-
[ 8, :group, :capture,
|
171
|
-
[ 9, :literal, :literal,
|
172
|
-
[10, :free_space, :whitespace,
|
173
|
-
[11, :literal, :literal,
|
174
|
-
[12, :group, :close,
|
175
|
-
[13, :free_space, :whitespace,
|
176
|
-
[14, :group, :capture,
|
177
|
-
[15, :group, :
|
178
|
-
[16, :group, :close,
|
179
|
-
[17, :group, :capture,
|
180
|
-
[18, :literal, :literal,
|
181
|
-
[19, :group, :close,
|
182
|
-
[20, :literal, :literal,
|
183
|
-
[21, :group, :close,
|
184
|
-
[22, :literal, :literal,
|
185
|
-
[23, :group, :close,
|
186
|
-
[24, :literal, :literal,
|
187
|
-
[25, :group, :close,
|
188
|
-
[26, :literal, :literal,
|
189
|
-
[27, :group, :close,
|
162
|
+
[ 0, :group, :capture, '(', 0, 1],
|
163
|
+
[ 1, :literal, :literal, 'a ', 1, 3],
|
164
|
+
[ 2, :group, :capture, '(', 3, 4],
|
165
|
+
[ 3, :literal, :literal, 'b', 4, 5],
|
166
|
+
[ 4, :group, :capture, '(', 5, 6],
|
167
|
+
[ 5, :group, :options_switch, '(?x', 6, 9],
|
168
|
+
[ 6, :group, :close, ')', 9, 10],
|
169
|
+
[ 7, :free_space, :whitespace, ' ', 10, 11],
|
170
|
+
[ 8, :group, :capture, '(', 11, 12],
|
171
|
+
[ 9, :literal, :literal, 'c', 12, 13],
|
172
|
+
[10, :free_space, :whitespace, ' ', 13, 14],
|
173
|
+
[11, :literal, :literal, 'd', 14, 15],
|
174
|
+
[12, :group, :close, ')', 15, 16],
|
175
|
+
[13, :free_space, :whitespace, ' ', 16, 17],
|
176
|
+
[14, :group, :capture, '(', 17, 18],
|
177
|
+
[15, :group, :options_switch, '(?-x', 18, 22],
|
178
|
+
[16, :group, :close, ')', 22, 23],
|
179
|
+
[17, :group, :capture, '(', 23, 24],
|
180
|
+
[18, :literal, :literal, 'e f', 24, 27],
|
181
|
+
[19, :group, :close, ')', 27, 28],
|
182
|
+
[20, :literal, :literal, ' ', 28, 29],
|
183
|
+
[21, :group, :close, ')', 29, 30],
|
184
|
+
[22, :literal, :literal, 'g', 30, 31],
|
185
|
+
[23, :group, :close, ')', 31, 32],
|
186
|
+
[24, :literal, :literal, ' h', 32, 34],
|
187
|
+
[25, :group, :close, ')', 34, 35],
|
188
|
+
[26, :literal, :literal, 'i j', 35, 38],
|
189
|
+
[27, :group, :close, ')', 38, 39]
|
190
190
|
].each do |index, type, token, text, ts, te|
|
191
191
|
result = tokens[index]
|
192
192
|
|