regexp_parser 0.5.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +242 -0
  3. data/Gemfile +1 -0
  4. data/README.md +21 -17
  5. data/Rakefile +31 -0
  6. data/lib/regexp_parser/expression.rb +11 -9
  7. data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
  8. data/lib/regexp_parser/expression/classes/backref.rb +21 -16
  9. data/lib/regexp_parser/expression/classes/escape.rb +81 -10
  10. data/lib/regexp_parser/expression/classes/group.rb +20 -20
  11. data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
  12. data/lib/regexp_parser/expression/classes/property.rb +6 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +10 -93
  14. data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
  15. data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
  16. data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
  17. data/lib/regexp_parser/expression/methods/tests.rb +4 -14
  18. data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +3 -4
  20. data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
  21. data/lib/regexp_parser/expression/subexpression.rb +6 -10
  22. data/lib/regexp_parser/lexer.rb +13 -17
  23. data/lib/regexp_parser/parser.rb +170 -116
  24. data/lib/regexp_parser/scanner.rb +952 -2431
  25. data/lib/regexp_parser/scanner/char_type.rl +31 -0
  26. data/lib/regexp_parser/scanner/properties/long.yml +561 -0
  27. data/lib/regexp_parser/scanner/properties/short.yml +225 -0
  28. data/lib/regexp_parser/scanner/property.rl +7 -806
  29. data/lib/regexp_parser/scanner/scanner.rl +112 -154
  30. data/lib/regexp_parser/syntax/base.rb +4 -4
  31. data/lib/regexp_parser/syntax/tokens.rb +1 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
  34. data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
  35. data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
  36. data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
  38. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
  39. data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
  40. data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
  41. data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
  42. data/lib/regexp_parser/version.rb +1 -1
  43. data/regexp_parser.gemspec +2 -1
  44. data/test/expression/test_base.rb +2 -1
  45. data/test/expression/test_clone.rb +0 -57
  46. data/test/expression/test_set.rb +31 -8
  47. data/test/expression/test_strfregexp.rb +13 -4
  48. data/test/expression/test_subexpression.rb +25 -0
  49. data/test/expression/test_traverse.rb +25 -25
  50. data/test/helpers.rb +1 -0
  51. data/test/lexer/test_all.rb +1 -1
  52. data/test/lexer/test_conditionals.rb +9 -7
  53. data/test/lexer/test_nesting.rb +39 -21
  54. data/test/lexer/test_refcalls.rb +4 -4
  55. data/test/parser/set/test_intersections.rb +127 -0
  56. data/test/parser/set/test_ranges.rb +111 -0
  57. data/test/parser/test_all.rb +4 -1
  58. data/test/parser/test_escapes.rb +41 -9
  59. data/test/parser/test_groups.rb +22 -3
  60. data/test/parser/test_posix_classes.rb +27 -0
  61. data/test/parser/test_properties.rb +17 -290
  62. data/test/parser/test_refcalls.rb +66 -26
  63. data/test/parser/test_sets.rb +132 -129
  64. data/test/scanner/test_all.rb +1 -7
  65. data/test/scanner/test_conditionals.rb +16 -16
  66. data/test/scanner/test_errors.rb +0 -30
  67. data/test/scanner/test_escapes.rb +1 -2
  68. data/test/scanner/test_free_space.rb +28 -28
  69. data/test/scanner/test_groups.rb +35 -35
  70. data/test/scanner/test_meta.rb +1 -1
  71. data/test/scanner/test_properties.rb +87 -114
  72. data/test/scanner/test_refcalls.rb +18 -18
  73. data/test/scanner/test_scripts.rb +19 -351
  74. data/test/scanner/test_sets.rb +87 -60
  75. data/test/scanner/test_unicode_blocks.rb +4 -105
  76. data/test/support/warning_extractor.rb +1 -1
  77. data/test/syntax/test_syntax.rb +7 -0
  78. data/test/syntax/versions/test_1.8.rb +2 -4
  79. metadata +17 -7
  80. data/ChangeLog +0 -325
  81. data/test/scanner/test_emojis.rb +0 -31
@@ -1,176 +1,179 @@
1
1
  require File.expand_path("../../helpers", __FILE__)
2
2
 
3
3
  class TestParserSets < Test::Unit::TestCase
4
-
5
4
  def test_parse_set_basic
6
- root = RP.parse('[a-c]+', :any)
7
- exp = root.expressions.at(0)
5
+ root = RP.parse('[ab]+')
6
+ exp = root[0]
7
+
8
+ assert_equal CharacterSet, exp.class
9
+ assert_equal 2, exp.count
8
10
 
9
- assert_equal true, exp.is_a?(CharacterSet)
10
- assert_equal true, exp.include?('a-c')
11
+ assert_equal Literal, exp[0].class
12
+ assert_equal 'a', exp[0].text
13
+ assert_equal Literal, exp[1].class
14
+ assert_equal 'b', exp[1].text
11
15
 
12
- assert_equal true, exp.quantified?
13
- assert_equal 1, exp.quantifier.min
14
- assert_equal(-1, exp.quantifier.max)
16
+ assert exp.quantified?
17
+ assert_equal 1, exp.quantifier.min
18
+ assert_equal(-1, exp.quantifier.max)
15
19
  end
16
20
 
17
- def test_parse_set_posix_class
18
- root = RP.parse('[[:digit:][:lower:]]+', 'ruby/1.9')
19
- exp = root.expressions.at(0)
21
+ def test_parse_set_char_type
22
+ root = RP.parse('[a\dc]')
23
+ exp = root[0]
20
24
 
21
- assert_equal true, exp.is_a?(CharacterSet)
25
+ assert_equal CharacterSet, exp.class
26
+ assert_equal 3, exp.count
22
27
 
23
- assert_equal true, exp.include?('[:digit:]')
24
- assert_equal true, exp.include?('[:lower:]')
28
+ assert_equal CharacterType::Digit, exp[1].class
29
+ assert_equal '\d', exp[1].text
30
+ end
25
31
 
26
- assert_equal true, exp.matches?("6")
32
+ def test_parse_set_escape_sequence_backspace
33
+ root = RP.parse('[a\bc]')
34
+ exp = root[0]
27
35
 
28
- assert_equal true, exp.matches?("v")
29
- assert_equal false, exp.matches?("\x48")
36
+ assert_equal CharacterSet, exp.class
37
+ assert_equal 3, exp.count
38
+
39
+ assert_equal EscapeSequence::Backspace, exp[1].class
40
+ assert_equal '\b', exp[1].text
41
+
42
+ assert exp.matches?('a')
43
+ assert exp.matches?("\b")
44
+ refute exp.matches?('b')
45
+ assert exp.matches?('c')
30
46
  end
31
47
 
32
- def test_parse_set_members
33
- root = RP.parse('[ac-eh]', :any)
34
- exp = root.expressions.at(0)
48
+ def test_parse_set_escape_sequence_hex
49
+ root = RP.parse('[a\x20c]', :any)
50
+ exp = root[0]
51
+
52
+ assert_equal CharacterSet, exp.class
53
+ assert_equal 3, exp.count
35
54
 
36
- assert_equal true, exp.include?('a')
37
- assert_equal true, exp.include?('c-e')
38
- assert_equal true, exp.include?('h')
39
- assert_equal false, exp.include?(']')
55
+ assert_equal EscapeSequence::Hex, exp[1].class
56
+ assert_equal '\x20', exp[1].text
40
57
  end
41
58
 
42
- def test_parse_hex_members
43
- root = RP.parse('[\x20\x24-\x26\x28]', :any)
44
- exp = root.expressions.at(0)
59
+ def test_parse_set_escape_sequence_codepoint
60
+ root = RP.parse('[a\u0640]')
61
+ exp = root[0]
45
62
 
46
- assert_equal true, exp.include?('\x20')
47
- assert_equal true, exp.include?('\x24-\x26')
48
- assert_equal true, exp.include?('\x28')
49
- assert_equal false, exp.include?(']')
63
+ assert_equal CharacterSet, exp.class
64
+ assert_equal 2, exp.count
65
+
66
+ assert_equal EscapeSequence::Codepoint, exp[1].class
67
+ assert_equal '\u0640', exp[1].text
50
68
  end
51
69
 
52
- def test_parse_chat_type_set_members
53
- root = RP.parse('[\da-z]', :any)
54
- exp = root.expressions.at(0)
70
+ def test_parse_set_escape_sequence_codepoint_list
71
+ root = RP.parse('[a\u{41 1F60D}]')
72
+ exp = root[0]
73
+
74
+ assert_equal CharacterSet, exp.class
75
+ assert_equal 2, exp.count
55
76
 
56
- assert_equal true, exp.include?('\d')
57
- assert_equal true, exp.include?('a-z')
77
+ assert_equal EscapeSequence::CodepointList, exp[1].class
78
+ assert_equal '\u{41 1F60D}', exp[1].text
58
79
  end
59
80
 
60
- def test_parse_set_collating_sequence
61
- root = RP.parse('[a[.span-ll.]h]', :any)
62
- exp = root.expressions.at(0)
81
+ def test_parse_set_posix_class
82
+ root = RP.parse('[[:digit:][:^lower:]]+')
83
+ exp = root[0]
84
+
85
+ assert_equal CharacterSet, exp.class
86
+ assert_equal 2, exp.count
63
87
 
64
- assert_equal true, exp.include?('[.span-ll.]')
65
- assert_equal false, exp.include?(']')
88
+ assert_equal PosixClass, exp[0].class
89
+ assert_equal '[:digit:]', exp[0].text
90
+ assert_equal PosixClass, exp[1].class
91
+ assert_equal '[:^lower:]', exp[1].text
66
92
  end
67
93
 
68
- def test_parse_set_character_equivalents
69
- root = RP.parse('[a[=e=]h]', :any)
70
- exp = root.expressions.at(0)
94
+ def test_parse_set_nesting
95
+ root = RP.parse('[a[b[c]d]e]')
96
+
97
+ exp = root[0]
98
+ assert_equal CharacterSet, exp.class
99
+ assert_equal 3, exp.count
100
+ assert_equal Literal, exp[0].class
101
+ assert_equal Literal, exp[2].class
102
+
103
+ subset1 = exp[1]
104
+ assert_equal CharacterSet, subset1.class
105
+ assert_equal 3, subset1.count
106
+ assert_equal Literal, subset1[0].class
107
+ assert_equal Literal, subset1[2].class
108
+
109
+ subset2 = subset1[1]
110
+ assert_equal CharacterSet, subset2.class
111
+ assert_equal 1, subset2.count
112
+ assert_equal Literal, subset2[0].class
113
+ end
71
114
 
72
- assert_equal true, exp.include?('[=e=]')
73
- assert_equal false, exp.include?(']')
115
+ def test_parse_set_nesting_negative
116
+ root = RP.parse('[a[^b[c]]]')
117
+ exp = root[0]
118
+
119
+ assert_equal CharacterSet, exp.class
120
+ assert_equal 2, exp.count
121
+ assert_equal Literal, exp[0].class
122
+ refute exp.negative?
123
+
124
+ subset1 = exp[1]
125
+ assert_equal CharacterSet, subset1.class
126
+ assert_equal 2, subset1.count
127
+ assert_equal Literal, subset1[0].class
128
+ assert subset1.negative?
129
+
130
+ subset2 = subset1[1]
131
+ assert_equal CharacterSet, subset2.class
132
+ assert_equal 1, subset2.count
133
+ assert_equal Literal, subset2[0].class
134
+ refute subset2.negative?
74
135
  end
75
136
 
76
- def test_parse_set_nesting_tos
137
+ def test_parse_set_nesting_to_s
77
138
  pattern = '[a[b[^c]]]'
78
- root = RP.parse(pattern, 'ruby/1.9')
139
+ root = RP.parse(pattern)
79
140
 
80
141
  assert_equal pattern, root.to_s
81
142
  end
82
143
 
83
- def test_parse_set_nesting_include
84
- root = RP.parse('[a[b[^c]]]', 'ruby/1.9')
85
- exp = root.expressions.at(0)
144
+ def test_parse_set_literals_are_not_merged
145
+ root = RP.parse("[#{'a' * 10}]")
146
+ exp = root[0]
86
147
 
87
- assert_equal true, exp.is_a?(CharacterSet)
88
- assert_equal true, exp.include?('a')
89
- assert_equal true, exp.include?('b')
90
- assert_equal true, exp.include?('c')
148
+ assert_equal 10, exp.count
91
149
  end
92
150
 
93
- def test_parse_set_nesting_include_at_depth
94
- root = RP.parse('[a[b]c]', 'ruby/1.9')
151
+ def test_parse_set_whitespace_is_not_merged
152
+ root = RP.parse("[#{' ' * 10}]")
153
+ exp = root[0]
95
154
 
96
- exp = root.expressions.at(0)
97
- assert_equal true, exp.is_a?(CharacterSet)
98
- assert_equal true, exp.include?('a')
99
- assert_equal true, exp.include?('b')
100
- assert_equal false, exp.include?('b', true) # should not include b directly
101
-
102
- sub = exp.members.at(1)
103
- assert_equal false, sub.include?('a')
104
- assert_equal true, sub.include?('b')
105
- assert_equal true, sub.include?('b', true)
106
- assert_equal false, sub.include?('c')
155
+ assert_equal 10, exp.count
107
156
  end
108
157
 
109
- def test_parse_set_nesting_include_at_depth_2
110
- root = RP.parse('[a[b[c[d]e]f]g]', 'ruby/1.9')
111
-
112
- exp = root.expressions.at(0)
113
- assert_equal true, exp.is_a?(CharacterSet)
114
- assert_equal true, exp.include?('a')
115
- assert_equal true, exp.include?('b')
116
- assert_equal false, exp.include?('b', true) # should not include b directly
117
-
118
- sub = exp.members.at(1)
119
- assert_equal false, sub.include?('a')
120
- assert_equal true, sub.include?('b')
121
- assert_equal true, sub.include?('b', true)
122
- assert_equal true, sub.include?('f', true)
123
- assert_equal true, sub.include?('c')
124
- assert_equal false, sub.include?('c', true)
125
-
126
- sub2 = sub.members.at(1)
127
- assert_equal false, sub2.include?('a')
128
- assert_equal false, sub2.include?('b')
129
- assert_equal true, sub2.include?('c')
130
- assert_equal true, sub2.include?('c', true)
131
- assert_equal true, sub2.include?('e', true)
132
- assert_equal true, sub2.include?('d')
133
- assert_equal false, sub2.include?('d', true)
134
-
135
- sub3 = sub2.members.at(1)
136
- assert_equal false, sub3.include?('a')
137
- assert_equal false, sub3.include?('g')
138
- assert_equal false, sub3.include?('b')
139
- assert_equal false, sub3.include?('f')
140
- assert_equal false, sub3.include?('c')
141
- assert_equal false, sub3.include?('e')
142
- assert_equal true, sub3.include?('d')
143
- assert_equal true, sub3.include?('d', true)
144
- end
145
-
146
- # character subsets and negated posix classes are not available in ruby 1.8
147
- if RUBY_VERSION >= '1.9'
148
- def test_parse_set_nesting_matches
149
- root = RP.parse('[a[b[^c]]]', 'ruby/1.9')
150
- exp = root.expressions.at(0)
151
-
152
- assert_equal true, exp.matches?('b')
153
- assert_equal false, exp.matches?('c')
154
- end
155
-
156
- def test_parse_set_nesting_not_matches
157
- root = RP.parse('[a[b[^c]]]', 'ruby/1.9')
158
- exp = root.expressions.at(0)
158
+ def test_parse_set_whitespace_is_not_merged_in_x_mode
159
+ root = RP.parse("(?x)[#{' ' * 10}]")
160
+ exp = root[1]
159
161
 
160
- assert_equal false, exp.matches?('c')
161
- end
162
+ assert_equal 10, exp.count
163
+ end
162
164
 
163
- def test_parse_set_negated_posix_class
164
- root = RP.parse('[[:^xdigit:][:^lower:]]+', 'ruby/1.9')
165
- exp = root.expressions.at(0)
165
+ # TODO: Collations and equivalents need own exp class if they ever get enabled
166
+ def test_parse_set_collating_sequence
167
+ root = RP.parse('[a[.span-ll.]h]', :any)
168
+ exp = root[0]
166
169
 
167
- assert_equal true, exp.is_a?(CharacterSet)
170
+ assert_equal '[.span-ll.]', exp[1].to_s
171
+ end
168
172
 
169
- assert_equal true, exp.include?('[:^xdigit:]')
170
- assert_equal true, exp.include?('[:^lower:]')
173
+ def test_parse_set_character_equivalents
174
+ root = RP.parse('[a[=e=]h]', :any)
175
+ exp = root[0]
171
176
 
172
- assert_equal true, exp.matches?('GT')
173
- end
177
+ assert_equal '[=e=]', exp[1].to_s
174
178
  end
175
-
176
179
  end
@@ -13,12 +13,6 @@ if RUBY_VERSION >= '2.0.0'
13
13
  end
14
14
  end
15
15
 
16
- if RUBY_VERSION >= '2.5.0'
17
- %w{emojis}.each do|tc|
18
- require File.expand_path("../test_#{tc}", __FILE__)
19
- end
20
- end
21
-
22
16
  class TestRegexpScanner < Test::Unit::TestCase
23
17
 
24
18
  def test_scanner_returns_an_array
@@ -38,7 +32,7 @@ class TestRegexpScanner < Test::Unit::TestCase
38
32
  def test_scanner_token_count
39
33
  re = /^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i
40
34
 
41
- assert_equal 26, RS.scan(re).length
35
+ assert_equal 28, RS.scan(re).length
42
36
  end
43
37
 
44
38
  end
@@ -4,22 +4,22 @@ class ScannerConditionals < Test::Unit::TestCase
4
4
 
5
5
  # Basic conditional scan token tests
6
6
  tests = {
7
- /(a)(?(1)T|F)/ => [3, :conditional, :open, '(?', 3, 5],
8
- /(a)(?(1)T|F)/ => [4, :conditional, :condition_open, '(', 5, 6],
9
- /(a)(?(1)T|F)/ => [5, :conditional, :condition, '3', 6, 7],
10
- /(a)(?(1)T|F)/ => [6, :conditional, :condition_close, ')', 7, 8],
11
- /(a)(?(1)T|F)/ => [7, :literal, :literal, 'T', 8, 9],
12
- /(a)(?(1)T|F)/ => [8, :conditional, :separator, '|', 9, 10],
13
- /(a)(?(1)T|F)/ => [9, :literal, :literal, 'F', 10, 11],
14
- /(a)(?(1)T|F)/ => [10, :conditional, :close, ')', 11, 12],
15
-
16
- /(a)(?(1)TRUE)/ => [8, :conditional, :close, ')', 12, 13],
17
-
18
- /(a)(?(1)TRUE|)/ => [8, :conditional, :separator, '|', 12, 13],
19
- /(a)(?(1)TRUE|)/ => [9, :conditional, :close, ')', 13, 14],
20
-
21
- /(?<N>A)(?(<N>)T|F)/ => [5, :conditional, :condition, '<N>', 10, 13],
22
- /(?'N'A)(?('N')T|F)/ => [5, :conditional, :condition, "'N'", 10, 13],
7
+ /(a)(?(1)T|F)1/ => [3, :conditional, :open, '(?', 3, 5],
8
+ /(a)(?(1)T|F)2/ => [4, :conditional, :condition_open, '(', 5, 6],
9
+ /(a)(?(1)T|F)3/ => [5, :conditional, :condition, '1', 6, 7],
10
+ /(a)(?(1)T|F)4/ => [6, :conditional, :condition_close, ')', 7, 8],
11
+ /(a)(?(1)T|F)5/ => [7, :literal, :literal, 'T', 8, 9],
12
+ /(a)(?(1)T|F)6/ => [8, :conditional, :separator, '|', 9, 10],
13
+ /(a)(?(1)T|F)7/ => [9, :literal, :literal, 'F', 10, 11],
14
+ /(a)(?(1)T|F)8/ => [10, :conditional, :close, ')', 11, 12],
15
+
16
+ /(a)(?(1)TRUE)9/ => [8, :conditional, :close, ')', 12, 13],
17
+
18
+ /(a)(?(1)TRUE|)10/ => [8, :conditional, :separator, '|', 12, 13],
19
+ /(a)(?(1)TRUE|)11/ => [9, :conditional, :close, ')', 13, 14],
20
+
21
+ /(?<N>A)(?(<N>)T|F)1/ => [5, :conditional, :condition, '<N>', 10, 13],
22
+ /(?'N'A)(?('N')T|F)2/ => [5, :conditional, :condition, "'N'", 10, 13],
23
23
  }
24
24
 
25
25
  tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
@@ -46,18 +46,6 @@ class ScannerErrors < Test::Unit::TestCase
46
46
  assert_raise( RS::PrematureEndError ) { RS.scan('\x') }
47
47
  end
48
48
 
49
- def test_scanner_eof_in_wide_hex_escape
50
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{') }
51
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{0') }
52
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{02') }
53
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{024') }
54
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{0246') }
55
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468') }
56
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468A') }
57
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468AC') }
58
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468ACE') }
59
- end
60
-
61
49
  def test_scanner_eof_in_codepoint_escape
62
50
  assert_raise( RS::PrematureEndError ) { RS.scan('\u') }
63
51
  assert_raise( RS::PrematureEndError ) { RS.scan('\u0') }
@@ -94,24 +82,6 @@ class ScannerErrors < Test::Unit::TestCase
94
82
  assert_raise( RS::InvalidSequenceError ) { RS.scan('\xZ0') }
95
83
  end
96
84
 
97
- def test_scanner_invalid_wide_hex_escape
98
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{}') }
99
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{ }') }
100
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{ A }') }
101
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0-}') }
102
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{Z00}') }
103
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{000Z}') }
104
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00ZZ}') }
105
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ}') }
106
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ0}') }
107
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ0X}') }
108
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00X') }
109
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00XYZ') }
110
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000XYZ') }
111
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{02468ACED') }
112
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{02468ACE]') }
113
- end
114
-
115
85
  def test_scanner_invalid_named_group
116
86
  assert_raise( RS::InvalidGroupError ) { RS.scan("(?'')") }
117
87
  assert_raise( RS::InvalidGroupError ) { RS.scan("(?''empty-name)") }
@@ -22,10 +22,9 @@ class ScannerEscapes < Test::Unit::TestCase
22
22
  'a\x24c' => [1, :escape, :hex, '\x24', 1, 5],
23
23
  'a\x0640c' => [1, :escape, :hex, '\x06', 1, 5],
24
24
 
25
- 'a\x{0640}c' => [1, :escape, :hex_wide, '\x{0640}', 1, 9],
26
-
27
25
  'a\u0640c' => [1, :escape, :codepoint, '\u0640', 1, 7],
28
26
  'a\u{640 0641}c' => [1, :escape, :codepoint_list, '\u{640 0641}', 1, 13],
27
+ 'a\u{10FFFF}c' => [1, :escape, :codepoint_list, '\u{10FFFF}', 1, 11],
29
28
 
30
29
  /a\cBc/ => [1, :escape, :control, '\cB', 1, 4],
31
30
  /a\C-bc/ => [1, :escape, :control, '\C-b', 1, 5],
@@ -159,34 +159,34 @@ class ScannerFreeSpace < Test::Unit::TestCase
159
159
  regexp = /(a (b((?x) (c d) ((?-x)(e f) )g) h)i j)/
160
160
  tokens = RS.scan(regexp)
161
161
  [
162
- [ 0, :group, :capture, '(', 0, 1],
163
- [ 1, :literal, :literal, 'a ', 1, 3],
164
- [ 2, :group, :capture, '(', 3, 4],
165
- [ 3, :literal, :literal, 'b', 4, 5],
166
- [ 4, :group, :capture, '(', 5, 6],
167
- [ 5, :group, :options, '(?x', 6, 9],
168
- [ 6, :group, :close, ')', 9, 10],
169
- [ 7, :free_space, :whitespace, ' ', 10, 11],
170
- [ 8, :group, :capture, '(', 11, 12],
171
- [ 9, :literal, :literal, 'c', 12, 13],
172
- [10, :free_space, :whitespace, ' ', 13, 14],
173
- [11, :literal, :literal, 'd', 14, 15],
174
- [12, :group, :close, ')', 15, 16],
175
- [13, :free_space, :whitespace, ' ', 16, 17],
176
- [14, :group, :capture, '(', 17, 18],
177
- [15, :group, :options, '(?-x', 18, 22],
178
- [16, :group, :close, ')', 22, 23],
179
- [17, :group, :capture, '(', 23, 24],
180
- [18, :literal, :literal, 'e f', 24, 27],
181
- [19, :group, :close, ')', 27, 28],
182
- [20, :literal, :literal, ' ', 28, 29],
183
- [21, :group, :close, ')', 29, 30],
184
- [22, :literal, :literal, 'g', 30, 31],
185
- [23, :group, :close, ')', 31, 32],
186
- [24, :literal, :literal, ' h', 32, 34],
187
- [25, :group, :close, ')', 34, 35],
188
- [26, :literal, :literal, 'i j', 35, 38],
189
- [27, :group, :close, ')', 38, 39]
162
+ [ 0, :group, :capture, '(', 0, 1],
163
+ [ 1, :literal, :literal, 'a ', 1, 3],
164
+ [ 2, :group, :capture, '(', 3, 4],
165
+ [ 3, :literal, :literal, 'b', 4, 5],
166
+ [ 4, :group, :capture, '(', 5, 6],
167
+ [ 5, :group, :options_switch, '(?x', 6, 9],
168
+ [ 6, :group, :close, ')', 9, 10],
169
+ [ 7, :free_space, :whitespace, ' ', 10, 11],
170
+ [ 8, :group, :capture, '(', 11, 12],
171
+ [ 9, :literal, :literal, 'c', 12, 13],
172
+ [10, :free_space, :whitespace, ' ', 13, 14],
173
+ [11, :literal, :literal, 'd', 14, 15],
174
+ [12, :group, :close, ')', 15, 16],
175
+ [13, :free_space, :whitespace, ' ', 16, 17],
176
+ [14, :group, :capture, '(', 17, 18],
177
+ [15, :group, :options_switch, '(?-x', 18, 22],
178
+ [16, :group, :close, ')', 22, 23],
179
+ [17, :group, :capture, '(', 23, 24],
180
+ [18, :literal, :literal, 'e f', 24, 27],
181
+ [19, :group, :close, ')', 27, 28],
182
+ [20, :literal, :literal, ' ', 28, 29],
183
+ [21, :group, :close, ')', 29, 30],
184
+ [22, :literal, :literal, 'g', 30, 31],
185
+ [23, :group, :close, ')', 31, 32],
186
+ [24, :literal, :literal, ' h', 32, 34],
187
+ [25, :group, :close, ')', 34, 35],
188
+ [26, :literal, :literal, 'i j', 35, 38],
189
+ [27, :group, :close, ')', 38, 39]
190
190
  ].each do |index, type, token, text, ts, te|
191
191
  result = tokens[index]
192
192