regexp_parser 0.5.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +242 -0
- data/Gemfile +1 -0
- data/README.md +21 -17
- data/Rakefile +31 -0
- data/lib/regexp_parser/expression.rb +11 -9
- data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
- data/lib/regexp_parser/expression/classes/backref.rb +21 -16
- data/lib/regexp_parser/expression/classes/escape.rb +81 -10
- data/lib/regexp_parser/expression/classes/group.rb +20 -20
- data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
- data/lib/regexp_parser/expression/classes/property.rb +6 -0
- data/lib/regexp_parser/expression/classes/set.rb +10 -93
- data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
- data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
- data/lib/regexp_parser/expression/methods/tests.rb +4 -14
- data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +3 -4
- data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
- data/lib/regexp_parser/expression/subexpression.rb +6 -10
- data/lib/regexp_parser/lexer.rb +13 -17
- data/lib/regexp_parser/parser.rb +170 -116
- data/lib/regexp_parser/scanner.rb +952 -2431
- data/lib/regexp_parser/scanner/char_type.rl +31 -0
- data/lib/regexp_parser/scanner/properties/long.yml +561 -0
- data/lib/regexp_parser/scanner/properties/short.yml +225 -0
- data/lib/regexp_parser/scanner/property.rl +7 -806
- data/lib/regexp_parser/scanner/scanner.rl +112 -154
- data/lib/regexp_parser/syntax/base.rb +4 -4
- data/lib/regexp_parser/syntax/tokens.rb +1 -0
- data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
- data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
- data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
- data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
- data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +2 -1
- data/test/expression/test_base.rb +2 -1
- data/test/expression/test_clone.rb +0 -57
- data/test/expression/test_set.rb +31 -8
- data/test/expression/test_strfregexp.rb +13 -4
- data/test/expression/test_subexpression.rb +25 -0
- data/test/expression/test_traverse.rb +25 -25
- data/test/helpers.rb +1 -0
- data/test/lexer/test_all.rb +1 -1
- data/test/lexer/test_conditionals.rb +9 -7
- data/test/lexer/test_nesting.rb +39 -21
- data/test/lexer/test_refcalls.rb +4 -4
- data/test/parser/set/test_intersections.rb +127 -0
- data/test/parser/set/test_ranges.rb +111 -0
- data/test/parser/test_all.rb +4 -1
- data/test/parser/test_escapes.rb +41 -9
- data/test/parser/test_groups.rb +22 -3
- data/test/parser/test_posix_classes.rb +27 -0
- data/test/parser/test_properties.rb +17 -290
- data/test/parser/test_refcalls.rb +66 -26
- data/test/parser/test_sets.rb +132 -129
- data/test/scanner/test_all.rb +1 -7
- data/test/scanner/test_conditionals.rb +16 -16
- data/test/scanner/test_errors.rb +0 -30
- data/test/scanner/test_escapes.rb +1 -2
- data/test/scanner/test_free_space.rb +28 -28
- data/test/scanner/test_groups.rb +35 -35
- data/test/scanner/test_meta.rb +1 -1
- data/test/scanner/test_properties.rb +87 -114
- data/test/scanner/test_refcalls.rb +18 -18
- data/test/scanner/test_scripts.rb +19 -351
- data/test/scanner/test_sets.rb +87 -60
- data/test/scanner/test_unicode_blocks.rb +4 -105
- data/test/support/warning_extractor.rb +1 -1
- data/test/syntax/test_syntax.rb +7 -0
- data/test/syntax/versions/test_1.8.rb +2 -4
- metadata +17 -7
- data/ChangeLog +0 -325
- data/test/scanner/test_emojis.rb +0 -31
data/test/parser/test_sets.rb
CHANGED
@@ -1,176 +1,179 @@
|
|
1
1
|
require File.expand_path("../../helpers", __FILE__)
|
2
2
|
|
3
3
|
class TestParserSets < Test::Unit::TestCase
|
4
|
-
|
5
4
|
def test_parse_set_basic
|
6
|
-
root = RP.parse('[
|
7
|
-
exp = root
|
5
|
+
root = RP.parse('[ab]+')
|
6
|
+
exp = root[0]
|
7
|
+
|
8
|
+
assert_equal CharacterSet, exp.class
|
9
|
+
assert_equal 2, exp.count
|
8
10
|
|
9
|
-
assert_equal
|
10
|
-
assert_equal
|
11
|
+
assert_equal Literal, exp[0].class
|
12
|
+
assert_equal 'a', exp[0].text
|
13
|
+
assert_equal Literal, exp[1].class
|
14
|
+
assert_equal 'b', exp[1].text
|
11
15
|
|
12
|
-
|
13
|
-
assert_equal 1,
|
14
|
-
assert_equal(-1,
|
16
|
+
assert exp.quantified?
|
17
|
+
assert_equal 1, exp.quantifier.min
|
18
|
+
assert_equal(-1, exp.quantifier.max)
|
15
19
|
end
|
16
20
|
|
17
|
-
def
|
18
|
-
root = RP.parse('[
|
19
|
-
exp = root
|
21
|
+
def test_parse_set_char_type
|
22
|
+
root = RP.parse('[a\dc]')
|
23
|
+
exp = root[0]
|
20
24
|
|
21
|
-
assert_equal
|
25
|
+
assert_equal CharacterSet, exp.class
|
26
|
+
assert_equal 3, exp.count
|
22
27
|
|
23
|
-
assert_equal
|
24
|
-
assert_equal
|
28
|
+
assert_equal CharacterType::Digit, exp[1].class
|
29
|
+
assert_equal '\d', exp[1].text
|
30
|
+
end
|
25
31
|
|
26
|
-
|
32
|
+
def test_parse_set_escape_sequence_backspace
|
33
|
+
root = RP.parse('[a\bc]')
|
34
|
+
exp = root[0]
|
27
35
|
|
28
|
-
assert_equal
|
29
|
-
assert_equal
|
36
|
+
assert_equal CharacterSet, exp.class
|
37
|
+
assert_equal 3, exp.count
|
38
|
+
|
39
|
+
assert_equal EscapeSequence::Backspace, exp[1].class
|
40
|
+
assert_equal '\b', exp[1].text
|
41
|
+
|
42
|
+
assert exp.matches?('a')
|
43
|
+
assert exp.matches?("\b")
|
44
|
+
refute exp.matches?('b')
|
45
|
+
assert exp.matches?('c')
|
30
46
|
end
|
31
47
|
|
32
|
-
def
|
33
|
-
root = RP.parse('[
|
34
|
-
exp = root
|
48
|
+
def test_parse_set_escape_sequence_hex
|
49
|
+
root = RP.parse('[a\x20c]', :any)
|
50
|
+
exp = root[0]
|
51
|
+
|
52
|
+
assert_equal CharacterSet, exp.class
|
53
|
+
assert_equal 3, exp.count
|
35
54
|
|
36
|
-
assert_equal
|
37
|
-
assert_equal
|
38
|
-
assert_equal true, exp.include?('h')
|
39
|
-
assert_equal false, exp.include?(']')
|
55
|
+
assert_equal EscapeSequence::Hex, exp[1].class
|
56
|
+
assert_equal '\x20', exp[1].text
|
40
57
|
end
|
41
58
|
|
42
|
-
def
|
43
|
-
root = RP.parse('[\
|
44
|
-
exp = root
|
59
|
+
def test_parse_set_escape_sequence_codepoint
|
60
|
+
root = RP.parse('[a\u0640]')
|
61
|
+
exp = root[0]
|
45
62
|
|
46
|
-
assert_equal
|
47
|
-
assert_equal
|
48
|
-
|
49
|
-
assert_equal
|
63
|
+
assert_equal CharacterSet, exp.class
|
64
|
+
assert_equal 2, exp.count
|
65
|
+
|
66
|
+
assert_equal EscapeSequence::Codepoint, exp[1].class
|
67
|
+
assert_equal '\u0640', exp[1].text
|
50
68
|
end
|
51
69
|
|
52
|
-
def
|
53
|
-
root = RP.parse('[\
|
54
|
-
exp = root
|
70
|
+
def test_parse_set_escape_sequence_codepoint_list
|
71
|
+
root = RP.parse('[a\u{41 1F60D}]')
|
72
|
+
exp = root[0]
|
73
|
+
|
74
|
+
assert_equal CharacterSet, exp.class
|
75
|
+
assert_equal 2, exp.count
|
55
76
|
|
56
|
-
assert_equal
|
57
|
-
assert_equal
|
77
|
+
assert_equal EscapeSequence::CodepointList, exp[1].class
|
78
|
+
assert_equal '\u{41 1F60D}', exp[1].text
|
58
79
|
end
|
59
80
|
|
60
|
-
def
|
61
|
-
root = RP.parse('[
|
62
|
-
exp = root
|
81
|
+
def test_parse_set_posix_class
|
82
|
+
root = RP.parse('[[:digit:][:^lower:]]+')
|
83
|
+
exp = root[0]
|
84
|
+
|
85
|
+
assert_equal CharacterSet, exp.class
|
86
|
+
assert_equal 2, exp.count
|
63
87
|
|
64
|
-
assert_equal
|
65
|
-
assert_equal
|
88
|
+
assert_equal PosixClass, exp[0].class
|
89
|
+
assert_equal '[:digit:]', exp[0].text
|
90
|
+
assert_equal PosixClass, exp[1].class
|
91
|
+
assert_equal '[:^lower:]', exp[1].text
|
66
92
|
end
|
67
93
|
|
68
|
-
def
|
69
|
-
root = RP.parse('[a[
|
70
|
-
|
94
|
+
def test_parse_set_nesting
|
95
|
+
root = RP.parse('[a[b[c]d]e]')
|
96
|
+
|
97
|
+
exp = root[0]
|
98
|
+
assert_equal CharacterSet, exp.class
|
99
|
+
assert_equal 3, exp.count
|
100
|
+
assert_equal Literal, exp[0].class
|
101
|
+
assert_equal Literal, exp[2].class
|
102
|
+
|
103
|
+
subset1 = exp[1]
|
104
|
+
assert_equal CharacterSet, subset1.class
|
105
|
+
assert_equal 3, subset1.count
|
106
|
+
assert_equal Literal, subset1[0].class
|
107
|
+
assert_equal Literal, subset1[2].class
|
108
|
+
|
109
|
+
subset2 = subset1[1]
|
110
|
+
assert_equal CharacterSet, subset2.class
|
111
|
+
assert_equal 1, subset2.count
|
112
|
+
assert_equal Literal, subset2[0].class
|
113
|
+
end
|
71
114
|
|
72
|
-
|
73
|
-
|
115
|
+
def test_parse_set_nesting_negative
|
116
|
+
root = RP.parse('[a[^b[c]]]')
|
117
|
+
exp = root[0]
|
118
|
+
|
119
|
+
assert_equal CharacterSet, exp.class
|
120
|
+
assert_equal 2, exp.count
|
121
|
+
assert_equal Literal, exp[0].class
|
122
|
+
refute exp.negative?
|
123
|
+
|
124
|
+
subset1 = exp[1]
|
125
|
+
assert_equal CharacterSet, subset1.class
|
126
|
+
assert_equal 2, subset1.count
|
127
|
+
assert_equal Literal, subset1[0].class
|
128
|
+
assert subset1.negative?
|
129
|
+
|
130
|
+
subset2 = subset1[1]
|
131
|
+
assert_equal CharacterSet, subset2.class
|
132
|
+
assert_equal 1, subset2.count
|
133
|
+
assert_equal Literal, subset2[0].class
|
134
|
+
refute subset2.negative?
|
74
135
|
end
|
75
136
|
|
76
|
-
def
|
137
|
+
def test_parse_set_nesting_to_s
|
77
138
|
pattern = '[a[b[^c]]]'
|
78
|
-
root = RP.parse(pattern
|
139
|
+
root = RP.parse(pattern)
|
79
140
|
|
80
141
|
assert_equal pattern, root.to_s
|
81
142
|
end
|
82
143
|
|
83
|
-
def
|
84
|
-
root = RP.parse('
|
85
|
-
exp = root
|
144
|
+
def test_parse_set_literals_are_not_merged
|
145
|
+
root = RP.parse("[#{'a' * 10}]")
|
146
|
+
exp = root[0]
|
86
147
|
|
87
|
-
assert_equal
|
88
|
-
assert_equal true, exp.include?('a')
|
89
|
-
assert_equal true, exp.include?('b')
|
90
|
-
assert_equal true, exp.include?('c')
|
148
|
+
assert_equal 10, exp.count
|
91
149
|
end
|
92
150
|
|
93
|
-
def
|
94
|
-
root = RP.parse(
|
151
|
+
def test_parse_set_whitespace_is_not_merged
|
152
|
+
root = RP.parse("[#{' ' * 10}]")
|
153
|
+
exp = root[0]
|
95
154
|
|
96
|
-
|
97
|
-
assert_equal true, exp.is_a?(CharacterSet)
|
98
|
-
assert_equal true, exp.include?('a')
|
99
|
-
assert_equal true, exp.include?('b')
|
100
|
-
assert_equal false, exp.include?('b', true) # should not include b directly
|
101
|
-
|
102
|
-
sub = exp.members.at(1)
|
103
|
-
assert_equal false, sub.include?('a')
|
104
|
-
assert_equal true, sub.include?('b')
|
105
|
-
assert_equal true, sub.include?('b', true)
|
106
|
-
assert_equal false, sub.include?('c')
|
155
|
+
assert_equal 10, exp.count
|
107
156
|
end
|
108
157
|
|
109
|
-
def
|
110
|
-
root = RP.parse(
|
111
|
-
|
112
|
-
exp = root.expressions.at(0)
|
113
|
-
assert_equal true, exp.is_a?(CharacterSet)
|
114
|
-
assert_equal true, exp.include?('a')
|
115
|
-
assert_equal true, exp.include?('b')
|
116
|
-
assert_equal false, exp.include?('b', true) # should not include b directly
|
117
|
-
|
118
|
-
sub = exp.members.at(1)
|
119
|
-
assert_equal false, sub.include?('a')
|
120
|
-
assert_equal true, sub.include?('b')
|
121
|
-
assert_equal true, sub.include?('b', true)
|
122
|
-
assert_equal true, sub.include?('f', true)
|
123
|
-
assert_equal true, sub.include?('c')
|
124
|
-
assert_equal false, sub.include?('c', true)
|
125
|
-
|
126
|
-
sub2 = sub.members.at(1)
|
127
|
-
assert_equal false, sub2.include?('a')
|
128
|
-
assert_equal false, sub2.include?('b')
|
129
|
-
assert_equal true, sub2.include?('c')
|
130
|
-
assert_equal true, sub2.include?('c', true)
|
131
|
-
assert_equal true, sub2.include?('e', true)
|
132
|
-
assert_equal true, sub2.include?('d')
|
133
|
-
assert_equal false, sub2.include?('d', true)
|
134
|
-
|
135
|
-
sub3 = sub2.members.at(1)
|
136
|
-
assert_equal false, sub3.include?('a')
|
137
|
-
assert_equal false, sub3.include?('g')
|
138
|
-
assert_equal false, sub3.include?('b')
|
139
|
-
assert_equal false, sub3.include?('f')
|
140
|
-
assert_equal false, sub3.include?('c')
|
141
|
-
assert_equal false, sub3.include?('e')
|
142
|
-
assert_equal true, sub3.include?('d')
|
143
|
-
assert_equal true, sub3.include?('d', true)
|
144
|
-
end
|
145
|
-
|
146
|
-
# character subsets and negated posix classes are not available in ruby 1.8
|
147
|
-
if RUBY_VERSION >= '1.9'
|
148
|
-
def test_parse_set_nesting_matches
|
149
|
-
root = RP.parse('[a[b[^c]]]', 'ruby/1.9')
|
150
|
-
exp = root.expressions.at(0)
|
151
|
-
|
152
|
-
assert_equal true, exp.matches?('b')
|
153
|
-
assert_equal false, exp.matches?('c')
|
154
|
-
end
|
155
|
-
|
156
|
-
def test_parse_set_nesting_not_matches
|
157
|
-
root = RP.parse('[a[b[^c]]]', 'ruby/1.9')
|
158
|
-
exp = root.expressions.at(0)
|
158
|
+
def test_parse_set_whitespace_is_not_merged_in_x_mode
|
159
|
+
root = RP.parse("(?x)[#{' ' * 10}]")
|
160
|
+
exp = root[1]
|
159
161
|
|
160
|
-
|
161
|
-
|
162
|
+
assert_equal 10, exp.count
|
163
|
+
end
|
162
164
|
|
163
|
-
|
164
|
-
|
165
|
-
|
165
|
+
# TODO: Collations and equivalents need own exp class if they ever get enabled
|
166
|
+
def test_parse_set_collating_sequence
|
167
|
+
root = RP.parse('[a[.span-ll.]h]', :any)
|
168
|
+
exp = root[0]
|
166
169
|
|
167
|
-
|
170
|
+
assert_equal '[.span-ll.]', exp[1].to_s
|
171
|
+
end
|
168
172
|
|
169
|
-
|
170
|
-
|
173
|
+
def test_parse_set_character_equivalents
|
174
|
+
root = RP.parse('[a[=e=]h]', :any)
|
175
|
+
exp = root[0]
|
171
176
|
|
172
|
-
|
173
|
-
end
|
177
|
+
assert_equal '[=e=]', exp[1].to_s
|
174
178
|
end
|
175
|
-
|
176
179
|
end
|
data/test/scanner/test_all.rb
CHANGED
@@ -13,12 +13,6 @@ if RUBY_VERSION >= '2.0.0'
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
if RUBY_VERSION >= '2.5.0'
|
17
|
-
%w{emojis}.each do|tc|
|
18
|
-
require File.expand_path("../test_#{tc}", __FILE__)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
16
|
class TestRegexpScanner < Test::Unit::TestCase
|
23
17
|
|
24
18
|
def test_scanner_returns_an_array
|
@@ -38,7 +32,7 @@ class TestRegexpScanner < Test::Unit::TestCase
|
|
38
32
|
def test_scanner_token_count
|
39
33
|
re = /^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i
|
40
34
|
|
41
|
-
assert_equal
|
35
|
+
assert_equal 28, RS.scan(re).length
|
42
36
|
end
|
43
37
|
|
44
38
|
end
|
@@ -4,22 +4,22 @@ class ScannerConditionals < Test::Unit::TestCase
|
|
4
4
|
|
5
5
|
# Basic conditional scan token tests
|
6
6
|
tests = {
|
7
|
-
/(a)(?(1)T|F)/
|
8
|
-
/(a)(?(1)T|F)/
|
9
|
-
/(a)(?(1)T|F)/
|
10
|
-
/(a)(?(1)T|F)/
|
11
|
-
/(a)(?(1)T|F)/
|
12
|
-
/(a)(?(1)T|F)/
|
13
|
-
/(a)(?(1)T|F)/
|
14
|
-
/(a)(?(1)T|F)/
|
15
|
-
|
16
|
-
/(a)(?(1)TRUE)/
|
17
|
-
|
18
|
-
/(a)(?(1)TRUE|)/
|
19
|
-
/(a)(?(1)TRUE|)/
|
20
|
-
|
21
|
-
/(?<N>A)(?(<N>)T|F)/
|
22
|
-
/(?'N'A)(?('N')T|F)/
|
7
|
+
/(a)(?(1)T|F)1/ => [3, :conditional, :open, '(?', 3, 5],
|
8
|
+
/(a)(?(1)T|F)2/ => [4, :conditional, :condition_open, '(', 5, 6],
|
9
|
+
/(a)(?(1)T|F)3/ => [5, :conditional, :condition, '1', 6, 7],
|
10
|
+
/(a)(?(1)T|F)4/ => [6, :conditional, :condition_close, ')', 7, 8],
|
11
|
+
/(a)(?(1)T|F)5/ => [7, :literal, :literal, 'T', 8, 9],
|
12
|
+
/(a)(?(1)T|F)6/ => [8, :conditional, :separator, '|', 9, 10],
|
13
|
+
/(a)(?(1)T|F)7/ => [9, :literal, :literal, 'F', 10, 11],
|
14
|
+
/(a)(?(1)T|F)8/ => [10, :conditional, :close, ')', 11, 12],
|
15
|
+
|
16
|
+
/(a)(?(1)TRUE)9/ => [8, :conditional, :close, ')', 12, 13],
|
17
|
+
|
18
|
+
/(a)(?(1)TRUE|)10/ => [8, :conditional, :separator, '|', 12, 13],
|
19
|
+
/(a)(?(1)TRUE|)11/ => [9, :conditional, :close, ')', 13, 14],
|
20
|
+
|
21
|
+
/(?<N>A)(?(<N>)T|F)1/ => [5, :conditional, :condition, '<N>', 10, 13],
|
22
|
+
/(?'N'A)(?('N')T|F)2/ => [5, :conditional, :condition, "'N'", 10, 13],
|
23
23
|
}
|
24
24
|
|
25
25
|
tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
|
data/test/scanner/test_errors.rb
CHANGED
@@ -46,18 +46,6 @@ class ScannerErrors < Test::Unit::TestCase
|
|
46
46
|
assert_raise( RS::PrematureEndError ) { RS.scan('\x') }
|
47
47
|
end
|
48
48
|
|
49
|
-
def test_scanner_eof_in_wide_hex_escape
|
50
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{') }
|
51
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{0') }
|
52
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{02') }
|
53
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{024') }
|
54
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{0246') }
|
55
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468') }
|
56
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468A') }
|
57
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468AC') }
|
58
|
-
assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468ACE') }
|
59
|
-
end
|
60
|
-
|
61
49
|
def test_scanner_eof_in_codepoint_escape
|
62
50
|
assert_raise( RS::PrematureEndError ) { RS.scan('\u') }
|
63
51
|
assert_raise( RS::PrematureEndError ) { RS.scan('\u0') }
|
@@ -94,24 +82,6 @@ class ScannerErrors < Test::Unit::TestCase
|
|
94
82
|
assert_raise( RS::InvalidSequenceError ) { RS.scan('\xZ0') }
|
95
83
|
end
|
96
84
|
|
97
|
-
def test_scanner_invalid_wide_hex_escape
|
98
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{}') }
|
99
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{ }') }
|
100
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{ A }') }
|
101
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0-}') }
|
102
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{Z00}') }
|
103
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{000Z}') }
|
104
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00ZZ}') }
|
105
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ}') }
|
106
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ0}') }
|
107
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ0X}') }
|
108
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00X') }
|
109
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00XYZ') }
|
110
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000XYZ') }
|
111
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{02468ACED') }
|
112
|
-
assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{02468ACE]') }
|
113
|
-
end
|
114
|
-
|
115
85
|
def test_scanner_invalid_named_group
|
116
86
|
assert_raise( RS::InvalidGroupError ) { RS.scan("(?'')") }
|
117
87
|
assert_raise( RS::InvalidGroupError ) { RS.scan("(?''empty-name)") }
|
@@ -22,10 +22,9 @@ class ScannerEscapes < Test::Unit::TestCase
|
|
22
22
|
'a\x24c' => [1, :escape, :hex, '\x24', 1, 5],
|
23
23
|
'a\x0640c' => [1, :escape, :hex, '\x06', 1, 5],
|
24
24
|
|
25
|
-
'a\x{0640}c' => [1, :escape, :hex_wide, '\x{0640}', 1, 9],
|
26
|
-
|
27
25
|
'a\u0640c' => [1, :escape, :codepoint, '\u0640', 1, 7],
|
28
26
|
'a\u{640 0641}c' => [1, :escape, :codepoint_list, '\u{640 0641}', 1, 13],
|
27
|
+
'a\u{10FFFF}c' => [1, :escape, :codepoint_list, '\u{10FFFF}', 1, 11],
|
29
28
|
|
30
29
|
/a\cBc/ => [1, :escape, :control, '\cB', 1, 4],
|
31
30
|
/a\C-bc/ => [1, :escape, :control, '\C-b', 1, 5],
|
@@ -159,34 +159,34 @@ class ScannerFreeSpace < Test::Unit::TestCase
|
|
159
159
|
regexp = /(a (b((?x) (c d) ((?-x)(e f) )g) h)i j)/
|
160
160
|
tokens = RS.scan(regexp)
|
161
161
|
[
|
162
|
-
[ 0, :group, :capture,
|
163
|
-
[ 1, :literal, :literal,
|
164
|
-
[ 2, :group, :capture,
|
165
|
-
[ 3, :literal, :literal,
|
166
|
-
[ 4, :group, :capture,
|
167
|
-
[ 5, :group, :
|
168
|
-
[ 6, :group, :close,
|
169
|
-
[ 7, :free_space, :whitespace,
|
170
|
-
[ 8, :group, :capture,
|
171
|
-
[ 9, :literal, :literal,
|
172
|
-
[10, :free_space, :whitespace,
|
173
|
-
[11, :literal, :literal,
|
174
|
-
[12, :group, :close,
|
175
|
-
[13, :free_space, :whitespace,
|
176
|
-
[14, :group, :capture,
|
177
|
-
[15, :group, :
|
178
|
-
[16, :group, :close,
|
179
|
-
[17, :group, :capture,
|
180
|
-
[18, :literal, :literal,
|
181
|
-
[19, :group, :close,
|
182
|
-
[20, :literal, :literal,
|
183
|
-
[21, :group, :close,
|
184
|
-
[22, :literal, :literal,
|
185
|
-
[23, :group, :close,
|
186
|
-
[24, :literal, :literal,
|
187
|
-
[25, :group, :close,
|
188
|
-
[26, :literal, :literal,
|
189
|
-
[27, :group, :close,
|
162
|
+
[ 0, :group, :capture, '(', 0, 1],
|
163
|
+
[ 1, :literal, :literal, 'a ', 1, 3],
|
164
|
+
[ 2, :group, :capture, '(', 3, 4],
|
165
|
+
[ 3, :literal, :literal, 'b', 4, 5],
|
166
|
+
[ 4, :group, :capture, '(', 5, 6],
|
167
|
+
[ 5, :group, :options_switch, '(?x', 6, 9],
|
168
|
+
[ 6, :group, :close, ')', 9, 10],
|
169
|
+
[ 7, :free_space, :whitespace, ' ', 10, 11],
|
170
|
+
[ 8, :group, :capture, '(', 11, 12],
|
171
|
+
[ 9, :literal, :literal, 'c', 12, 13],
|
172
|
+
[10, :free_space, :whitespace, ' ', 13, 14],
|
173
|
+
[11, :literal, :literal, 'd', 14, 15],
|
174
|
+
[12, :group, :close, ')', 15, 16],
|
175
|
+
[13, :free_space, :whitespace, ' ', 16, 17],
|
176
|
+
[14, :group, :capture, '(', 17, 18],
|
177
|
+
[15, :group, :options_switch, '(?-x', 18, 22],
|
178
|
+
[16, :group, :close, ')', 22, 23],
|
179
|
+
[17, :group, :capture, '(', 23, 24],
|
180
|
+
[18, :literal, :literal, 'e f', 24, 27],
|
181
|
+
[19, :group, :close, ')', 27, 28],
|
182
|
+
[20, :literal, :literal, ' ', 28, 29],
|
183
|
+
[21, :group, :close, ')', 29, 30],
|
184
|
+
[22, :literal, :literal, 'g', 30, 31],
|
185
|
+
[23, :group, :close, ')', 31, 32],
|
186
|
+
[24, :literal, :literal, ' h', 32, 34],
|
187
|
+
[25, :group, :close, ')', 34, 35],
|
188
|
+
[26, :literal, :literal, 'i j', 35, 38],
|
189
|
+
[27, :group, :close, ')', 38, 39]
|
190
190
|
].each do |index, type, token, text, ts, te|
|
191
191
|
result = tokens[index]
|
192
192
|
|