regexp_parser 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +242 -0
  3. data/Gemfile +1 -0
  4. data/README.md +21 -17
  5. data/Rakefile +31 -0
  6. data/lib/regexp_parser/expression.rb +11 -9
  7. data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
  8. data/lib/regexp_parser/expression/classes/backref.rb +21 -16
  9. data/lib/regexp_parser/expression/classes/escape.rb +81 -10
  10. data/lib/regexp_parser/expression/classes/group.rb +20 -20
  11. data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
  12. data/lib/regexp_parser/expression/classes/property.rb +6 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +10 -93
  14. data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
  15. data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
  16. data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
  17. data/lib/regexp_parser/expression/methods/tests.rb +4 -14
  18. data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +3 -4
  20. data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
  21. data/lib/regexp_parser/expression/subexpression.rb +6 -10
  22. data/lib/regexp_parser/lexer.rb +13 -17
  23. data/lib/regexp_parser/parser.rb +170 -116
  24. data/lib/regexp_parser/scanner.rb +952 -2431
  25. data/lib/regexp_parser/scanner/char_type.rl +31 -0
  26. data/lib/regexp_parser/scanner/properties/long.yml +561 -0
  27. data/lib/regexp_parser/scanner/properties/short.yml +225 -0
  28. data/lib/regexp_parser/scanner/property.rl +7 -806
  29. data/lib/regexp_parser/scanner/scanner.rl +112 -154
  30. data/lib/regexp_parser/syntax/base.rb +4 -4
  31. data/lib/regexp_parser/syntax/tokens.rb +1 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
  34. data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
  35. data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
  36. data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
  38. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
  39. data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
  40. data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
  41. data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
  42. data/lib/regexp_parser/version.rb +1 -1
  43. data/regexp_parser.gemspec +2 -1
  44. data/test/expression/test_base.rb +2 -1
  45. data/test/expression/test_clone.rb +0 -57
  46. data/test/expression/test_set.rb +31 -8
  47. data/test/expression/test_strfregexp.rb +13 -4
  48. data/test/expression/test_subexpression.rb +25 -0
  49. data/test/expression/test_traverse.rb +25 -25
  50. data/test/helpers.rb +1 -0
  51. data/test/lexer/test_all.rb +1 -1
  52. data/test/lexer/test_conditionals.rb +9 -7
  53. data/test/lexer/test_nesting.rb +39 -21
  54. data/test/lexer/test_refcalls.rb +4 -4
  55. data/test/parser/set/test_intersections.rb +127 -0
  56. data/test/parser/set/test_ranges.rb +111 -0
  57. data/test/parser/test_all.rb +4 -1
  58. data/test/parser/test_escapes.rb +41 -9
  59. data/test/parser/test_groups.rb +22 -3
  60. data/test/parser/test_posix_classes.rb +27 -0
  61. data/test/parser/test_properties.rb +17 -290
  62. data/test/parser/test_refcalls.rb +66 -26
  63. data/test/parser/test_sets.rb +132 -129
  64. data/test/scanner/test_all.rb +1 -7
  65. data/test/scanner/test_conditionals.rb +16 -16
  66. data/test/scanner/test_errors.rb +0 -30
  67. data/test/scanner/test_escapes.rb +1 -2
  68. data/test/scanner/test_free_space.rb +28 -28
  69. data/test/scanner/test_groups.rb +35 -35
  70. data/test/scanner/test_meta.rb +1 -1
  71. data/test/scanner/test_properties.rb +87 -114
  72. data/test/scanner/test_refcalls.rb +18 -18
  73. data/test/scanner/test_scripts.rb +19 -351
  74. data/test/scanner/test_sets.rb +87 -60
  75. data/test/scanner/test_unicode_blocks.rb +4 -105
  76. data/test/support/warning_extractor.rb +1 -1
  77. data/test/syntax/test_syntax.rb +7 -0
  78. data/test/syntax/versions/test_1.8.rb +2 -4
  79. metadata +17 -7
  80. data/ChangeLog +0 -325
  81. data/test/scanner/test_emojis.rb +0 -31
@@ -1,176 +1,179 @@
1
1
  require File.expand_path("../../helpers", __FILE__)
2
2
 
3
3
  class TestParserSets < Test::Unit::TestCase
4
-
5
4
  def test_parse_set_basic
6
- root = RP.parse('[a-c]+', :any)
7
- exp = root.expressions.at(0)
5
+ root = RP.parse('[ab]+')
6
+ exp = root[0]
7
+
8
+ assert_equal CharacterSet, exp.class
9
+ assert_equal 2, exp.count
8
10
 
9
- assert_equal true, exp.is_a?(CharacterSet)
10
- assert_equal true, exp.include?('a-c')
11
+ assert_equal Literal, exp[0].class
12
+ assert_equal 'a', exp[0].text
13
+ assert_equal Literal, exp[1].class
14
+ assert_equal 'b', exp[1].text
11
15
 
12
- assert_equal true, exp.quantified?
13
- assert_equal 1, exp.quantifier.min
14
- assert_equal(-1, exp.quantifier.max)
16
+ assert exp.quantified?
17
+ assert_equal 1, exp.quantifier.min
18
+ assert_equal(-1, exp.quantifier.max)
15
19
  end
16
20
 
17
- def test_parse_set_posix_class
18
- root = RP.parse('[[:digit:][:lower:]]+', 'ruby/1.9')
19
- exp = root.expressions.at(0)
21
+ def test_parse_set_char_type
22
+ root = RP.parse('[a\dc]')
23
+ exp = root[0]
20
24
 
21
- assert_equal true, exp.is_a?(CharacterSet)
25
+ assert_equal CharacterSet, exp.class
26
+ assert_equal 3, exp.count
22
27
 
23
- assert_equal true, exp.include?('[:digit:]')
24
- assert_equal true, exp.include?('[:lower:]')
28
+ assert_equal CharacterType::Digit, exp[1].class
29
+ assert_equal '\d', exp[1].text
30
+ end
25
31
 
26
- assert_equal true, exp.matches?("6")
32
+ def test_parse_set_escape_sequence_backspace
33
+ root = RP.parse('[a\bc]')
34
+ exp = root[0]
27
35
 
28
- assert_equal true, exp.matches?("v")
29
- assert_equal false, exp.matches?("\x48")
36
+ assert_equal CharacterSet, exp.class
37
+ assert_equal 3, exp.count
38
+
39
+ assert_equal EscapeSequence::Backspace, exp[1].class
40
+ assert_equal '\b', exp[1].text
41
+
42
+ assert exp.matches?('a')
43
+ assert exp.matches?("\b")
44
+ refute exp.matches?('b')
45
+ assert exp.matches?('c')
30
46
  end
31
47
 
32
- def test_parse_set_members
33
- root = RP.parse('[ac-eh]', :any)
34
- exp = root.expressions.at(0)
48
+ def test_parse_set_escape_sequence_hex
49
+ root = RP.parse('[a\x20c]', :any)
50
+ exp = root[0]
51
+
52
+ assert_equal CharacterSet, exp.class
53
+ assert_equal 3, exp.count
35
54
 
36
- assert_equal true, exp.include?('a')
37
- assert_equal true, exp.include?('c-e')
38
- assert_equal true, exp.include?('h')
39
- assert_equal false, exp.include?(']')
55
+ assert_equal EscapeSequence::Hex, exp[1].class
56
+ assert_equal '\x20', exp[1].text
40
57
  end
41
58
 
42
- def test_parse_hex_members
43
- root = RP.parse('[\x20\x24-\x26\x28]', :any)
44
- exp = root.expressions.at(0)
59
+ def test_parse_set_escape_sequence_codepoint
60
+ root = RP.parse('[a\u0640]')
61
+ exp = root[0]
45
62
 
46
- assert_equal true, exp.include?('\x20')
47
- assert_equal true, exp.include?('\x24-\x26')
48
- assert_equal true, exp.include?('\x28')
49
- assert_equal false, exp.include?(']')
63
+ assert_equal CharacterSet, exp.class
64
+ assert_equal 2, exp.count
65
+
66
+ assert_equal EscapeSequence::Codepoint, exp[1].class
67
+ assert_equal '\u0640', exp[1].text
50
68
  end
51
69
 
52
- def test_parse_chat_type_set_members
53
- root = RP.parse('[\da-z]', :any)
54
- exp = root.expressions.at(0)
70
+ def test_parse_set_escape_sequence_codepoint_list
71
+ root = RP.parse('[a\u{41 1F60D}]')
72
+ exp = root[0]
73
+
74
+ assert_equal CharacterSet, exp.class
75
+ assert_equal 2, exp.count
55
76
 
56
- assert_equal true, exp.include?('\d')
57
- assert_equal true, exp.include?('a-z')
77
+ assert_equal EscapeSequence::CodepointList, exp[1].class
78
+ assert_equal '\u{41 1F60D}', exp[1].text
58
79
  end
59
80
 
60
- def test_parse_set_collating_sequence
61
- root = RP.parse('[a[.span-ll.]h]', :any)
62
- exp = root.expressions.at(0)
81
+ def test_parse_set_posix_class
82
+ root = RP.parse('[[:digit:][:^lower:]]+')
83
+ exp = root[0]
84
+
85
+ assert_equal CharacterSet, exp.class
86
+ assert_equal 2, exp.count
63
87
 
64
- assert_equal true, exp.include?('[.span-ll.]')
65
- assert_equal false, exp.include?(']')
88
+ assert_equal PosixClass, exp[0].class
89
+ assert_equal '[:digit:]', exp[0].text
90
+ assert_equal PosixClass, exp[1].class
91
+ assert_equal '[:^lower:]', exp[1].text
66
92
  end
67
93
 
68
- def test_parse_set_character_equivalents
69
- root = RP.parse('[a[=e=]h]', :any)
70
- exp = root.expressions.at(0)
94
+ def test_parse_set_nesting
95
+ root = RP.parse('[a[b[c]d]e]')
96
+
97
+ exp = root[0]
98
+ assert_equal CharacterSet, exp.class
99
+ assert_equal 3, exp.count
100
+ assert_equal Literal, exp[0].class
101
+ assert_equal Literal, exp[2].class
102
+
103
+ subset1 = exp[1]
104
+ assert_equal CharacterSet, subset1.class
105
+ assert_equal 3, subset1.count
106
+ assert_equal Literal, subset1[0].class
107
+ assert_equal Literal, subset1[2].class
108
+
109
+ subset2 = subset1[1]
110
+ assert_equal CharacterSet, subset2.class
111
+ assert_equal 1, subset2.count
112
+ assert_equal Literal, subset2[0].class
113
+ end
71
114
 
72
- assert_equal true, exp.include?('[=e=]')
73
- assert_equal false, exp.include?(']')
115
+ def test_parse_set_nesting_negative
116
+ root = RP.parse('[a[^b[c]]]')
117
+ exp = root[0]
118
+
119
+ assert_equal CharacterSet, exp.class
120
+ assert_equal 2, exp.count
121
+ assert_equal Literal, exp[0].class
122
+ refute exp.negative?
123
+
124
+ subset1 = exp[1]
125
+ assert_equal CharacterSet, subset1.class
126
+ assert_equal 2, subset1.count
127
+ assert_equal Literal, subset1[0].class
128
+ assert subset1.negative?
129
+
130
+ subset2 = subset1[1]
131
+ assert_equal CharacterSet, subset2.class
132
+ assert_equal 1, subset2.count
133
+ assert_equal Literal, subset2[0].class
134
+ refute subset2.negative?
74
135
  end
75
136
 
76
- def test_parse_set_nesting_tos
137
+ def test_parse_set_nesting_to_s
77
138
  pattern = '[a[b[^c]]]'
78
- root = RP.parse(pattern, 'ruby/1.9')
139
+ root = RP.parse(pattern)
79
140
 
80
141
  assert_equal pattern, root.to_s
81
142
  end
82
143
 
83
- def test_parse_set_nesting_include
84
- root = RP.parse('[a[b[^c]]]', 'ruby/1.9')
85
- exp = root.expressions.at(0)
144
+ def test_parse_set_literals_are_not_merged
145
+ root = RP.parse("[#{'a' * 10}]")
146
+ exp = root[0]
86
147
 
87
- assert_equal true, exp.is_a?(CharacterSet)
88
- assert_equal true, exp.include?('a')
89
- assert_equal true, exp.include?('b')
90
- assert_equal true, exp.include?('c')
148
+ assert_equal 10, exp.count
91
149
  end
92
150
 
93
- def test_parse_set_nesting_include_at_depth
94
- root = RP.parse('[a[b]c]', 'ruby/1.9')
151
+ def test_parse_set_whitespace_is_not_merged
152
+ root = RP.parse("[#{' ' * 10}]")
153
+ exp = root[0]
95
154
 
96
- exp = root.expressions.at(0)
97
- assert_equal true, exp.is_a?(CharacterSet)
98
- assert_equal true, exp.include?('a')
99
- assert_equal true, exp.include?('b')
100
- assert_equal false, exp.include?('b', true) # should not include b directly
101
-
102
- sub = exp.members.at(1)
103
- assert_equal false, sub.include?('a')
104
- assert_equal true, sub.include?('b')
105
- assert_equal true, sub.include?('b', true)
106
- assert_equal false, sub.include?('c')
155
+ assert_equal 10, exp.count
107
156
  end
108
157
 
109
- def test_parse_set_nesting_include_at_depth_2
110
- root = RP.parse('[a[b[c[d]e]f]g]', 'ruby/1.9')
111
-
112
- exp = root.expressions.at(0)
113
- assert_equal true, exp.is_a?(CharacterSet)
114
- assert_equal true, exp.include?('a')
115
- assert_equal true, exp.include?('b')
116
- assert_equal false, exp.include?('b', true) # should not include b directly
117
-
118
- sub = exp.members.at(1)
119
- assert_equal false, sub.include?('a')
120
- assert_equal true, sub.include?('b')
121
- assert_equal true, sub.include?('b', true)
122
- assert_equal true, sub.include?('f', true)
123
- assert_equal true, sub.include?('c')
124
- assert_equal false, sub.include?('c', true)
125
-
126
- sub2 = sub.members.at(1)
127
- assert_equal false, sub2.include?('a')
128
- assert_equal false, sub2.include?('b')
129
- assert_equal true, sub2.include?('c')
130
- assert_equal true, sub2.include?('c', true)
131
- assert_equal true, sub2.include?('e', true)
132
- assert_equal true, sub2.include?('d')
133
- assert_equal false, sub2.include?('d', true)
134
-
135
- sub3 = sub2.members.at(1)
136
- assert_equal false, sub3.include?('a')
137
- assert_equal false, sub3.include?('g')
138
- assert_equal false, sub3.include?('b')
139
- assert_equal false, sub3.include?('f')
140
- assert_equal false, sub3.include?('c')
141
- assert_equal false, sub3.include?('e')
142
- assert_equal true, sub3.include?('d')
143
- assert_equal true, sub3.include?('d', true)
144
- end
145
-
146
- # character subsets and negated posix classes are not available in ruby 1.8
147
- if RUBY_VERSION >= '1.9'
148
- def test_parse_set_nesting_matches
149
- root = RP.parse('[a[b[^c]]]', 'ruby/1.9')
150
- exp = root.expressions.at(0)
151
-
152
- assert_equal true, exp.matches?('b')
153
- assert_equal false, exp.matches?('c')
154
- end
155
-
156
- def test_parse_set_nesting_not_matches
157
- root = RP.parse('[a[b[^c]]]', 'ruby/1.9')
158
- exp = root.expressions.at(0)
158
+ def test_parse_set_whitespace_is_not_merged_in_x_mode
159
+ root = RP.parse("(?x)[#{' ' * 10}]")
160
+ exp = root[1]
159
161
 
160
- assert_equal false, exp.matches?('c')
161
- end
162
+ assert_equal 10, exp.count
163
+ end
162
164
 
163
- def test_parse_set_negated_posix_class
164
- root = RP.parse('[[:^xdigit:][:^lower:]]+', 'ruby/1.9')
165
- exp = root.expressions.at(0)
165
+ # TODO: Collations and equivalents need own exp class if they ever get enabled
166
+ def test_parse_set_collating_sequence
167
+ root = RP.parse('[a[.span-ll.]h]', :any)
168
+ exp = root[0]
166
169
 
167
- assert_equal true, exp.is_a?(CharacterSet)
170
+ assert_equal '[.span-ll.]', exp[1].to_s
171
+ end
168
172
 
169
- assert_equal true, exp.include?('[:^xdigit:]')
170
- assert_equal true, exp.include?('[:^lower:]')
173
+ def test_parse_set_character_equivalents
174
+ root = RP.parse('[a[=e=]h]', :any)
175
+ exp = root[0]
171
176
 
172
- assert_equal true, exp.matches?('GT')
173
- end
177
+ assert_equal '[=e=]', exp[1].to_s
174
178
  end
175
-
176
179
  end
@@ -13,12 +13,6 @@ if RUBY_VERSION >= '2.0.0'
13
13
  end
14
14
  end
15
15
 
16
- if RUBY_VERSION >= '2.5.0'
17
- %w{emojis}.each do|tc|
18
- require File.expand_path("../test_#{tc}", __FILE__)
19
- end
20
- end
21
-
22
16
  class TestRegexpScanner < Test::Unit::TestCase
23
17
 
24
18
  def test_scanner_returns_an_array
@@ -38,7 +32,7 @@ class TestRegexpScanner < Test::Unit::TestCase
38
32
  def test_scanner_token_count
39
33
  re = /^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i
40
34
 
41
- assert_equal 26, RS.scan(re).length
35
+ assert_equal 28, RS.scan(re).length
42
36
  end
43
37
 
44
38
  end
@@ -4,22 +4,22 @@ class ScannerConditionals < Test::Unit::TestCase
4
4
 
5
5
  # Basic conditional scan token tests
6
6
  tests = {
7
- /(a)(?(1)T|F)/ => [3, :conditional, :open, '(?', 3, 5],
8
- /(a)(?(1)T|F)/ => [4, :conditional, :condition_open, '(', 5, 6],
9
- /(a)(?(1)T|F)/ => [5, :conditional, :condition, '3', 6, 7],
10
- /(a)(?(1)T|F)/ => [6, :conditional, :condition_close, ')', 7, 8],
11
- /(a)(?(1)T|F)/ => [7, :literal, :literal, 'T', 8, 9],
12
- /(a)(?(1)T|F)/ => [8, :conditional, :separator, '|', 9, 10],
13
- /(a)(?(1)T|F)/ => [9, :literal, :literal, 'F', 10, 11],
14
- /(a)(?(1)T|F)/ => [10, :conditional, :close, ')', 11, 12],
15
-
16
- /(a)(?(1)TRUE)/ => [8, :conditional, :close, ')', 12, 13],
17
-
18
- /(a)(?(1)TRUE|)/ => [8, :conditional, :separator, '|', 12, 13],
19
- /(a)(?(1)TRUE|)/ => [9, :conditional, :close, ')', 13, 14],
20
-
21
- /(?<N>A)(?(<N>)T|F)/ => [5, :conditional, :condition, '<N>', 10, 13],
22
- /(?'N'A)(?('N')T|F)/ => [5, :conditional, :condition, "'N'", 10, 13],
7
+ /(a)(?(1)T|F)1/ => [3, :conditional, :open, '(?', 3, 5],
8
+ /(a)(?(1)T|F)2/ => [4, :conditional, :condition_open, '(', 5, 6],
9
+ /(a)(?(1)T|F)3/ => [5, :conditional, :condition, '1', 6, 7],
10
+ /(a)(?(1)T|F)4/ => [6, :conditional, :condition_close, ')', 7, 8],
11
+ /(a)(?(1)T|F)5/ => [7, :literal, :literal, 'T', 8, 9],
12
+ /(a)(?(1)T|F)6/ => [8, :conditional, :separator, '|', 9, 10],
13
+ /(a)(?(1)T|F)7/ => [9, :literal, :literal, 'F', 10, 11],
14
+ /(a)(?(1)T|F)8/ => [10, :conditional, :close, ')', 11, 12],
15
+
16
+ /(a)(?(1)TRUE)9/ => [8, :conditional, :close, ')', 12, 13],
17
+
18
+ /(a)(?(1)TRUE|)10/ => [8, :conditional, :separator, '|', 12, 13],
19
+ /(a)(?(1)TRUE|)11/ => [9, :conditional, :close, ')', 13, 14],
20
+
21
+ /(?<N>A)(?(<N>)T|F)1/ => [5, :conditional, :condition, '<N>', 10, 13],
22
+ /(?'N'A)(?('N')T|F)2/ => [5, :conditional, :condition, "'N'", 10, 13],
23
23
  }
24
24
 
25
25
  tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
@@ -46,18 +46,6 @@ class ScannerErrors < Test::Unit::TestCase
46
46
  assert_raise( RS::PrematureEndError ) { RS.scan('\x') }
47
47
  end
48
48
 
49
- def test_scanner_eof_in_wide_hex_escape
50
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{') }
51
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{0') }
52
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{02') }
53
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{024') }
54
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{0246') }
55
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468') }
56
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468A') }
57
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468AC') }
58
- assert_raise( RS::PrematureEndError ) { RS.scan('\x{02468ACE') }
59
- end
60
-
61
49
  def test_scanner_eof_in_codepoint_escape
62
50
  assert_raise( RS::PrematureEndError ) { RS.scan('\u') }
63
51
  assert_raise( RS::PrematureEndError ) { RS.scan('\u0') }
@@ -94,24 +82,6 @@ class ScannerErrors < Test::Unit::TestCase
94
82
  assert_raise( RS::InvalidSequenceError ) { RS.scan('\xZ0') }
95
83
  end
96
84
 
97
- def test_scanner_invalid_wide_hex_escape
98
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{}') }
99
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{ }') }
100
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{ A }') }
101
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0-}') }
102
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{Z00}') }
103
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{000Z}') }
104
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00ZZ}') }
105
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ}') }
106
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ0}') }
107
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000ZZ0X}') }
108
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00X') }
109
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{00XYZ') }
110
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{0000XYZ') }
111
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{02468ACED') }
112
- assert_raise( RS::InvalidSequenceError ) { RS.scan('\x{02468ACE]') }
113
- end
114
-
115
85
  def test_scanner_invalid_named_group
116
86
  assert_raise( RS::InvalidGroupError ) { RS.scan("(?'')") }
117
87
  assert_raise( RS::InvalidGroupError ) { RS.scan("(?''empty-name)") }
@@ -22,10 +22,9 @@ class ScannerEscapes < Test::Unit::TestCase
22
22
  'a\x24c' => [1, :escape, :hex, '\x24', 1, 5],
23
23
  'a\x0640c' => [1, :escape, :hex, '\x06', 1, 5],
24
24
 
25
- 'a\x{0640}c' => [1, :escape, :hex_wide, '\x{0640}', 1, 9],
26
-
27
25
  'a\u0640c' => [1, :escape, :codepoint, '\u0640', 1, 7],
28
26
  'a\u{640 0641}c' => [1, :escape, :codepoint_list, '\u{640 0641}', 1, 13],
27
+ 'a\u{10FFFF}c' => [1, :escape, :codepoint_list, '\u{10FFFF}', 1, 11],
29
28
 
30
29
  /a\cBc/ => [1, :escape, :control, '\cB', 1, 4],
31
30
  /a\C-bc/ => [1, :escape, :control, '\C-b', 1, 5],
@@ -159,34 +159,34 @@ class ScannerFreeSpace < Test::Unit::TestCase
159
159
  regexp = /(a (b((?x) (c d) ((?-x)(e f) )g) h)i j)/
160
160
  tokens = RS.scan(regexp)
161
161
  [
162
- [ 0, :group, :capture, '(', 0, 1],
163
- [ 1, :literal, :literal, 'a ', 1, 3],
164
- [ 2, :group, :capture, '(', 3, 4],
165
- [ 3, :literal, :literal, 'b', 4, 5],
166
- [ 4, :group, :capture, '(', 5, 6],
167
- [ 5, :group, :options, '(?x', 6, 9],
168
- [ 6, :group, :close, ')', 9, 10],
169
- [ 7, :free_space, :whitespace, ' ', 10, 11],
170
- [ 8, :group, :capture, '(', 11, 12],
171
- [ 9, :literal, :literal, 'c', 12, 13],
172
- [10, :free_space, :whitespace, ' ', 13, 14],
173
- [11, :literal, :literal, 'd', 14, 15],
174
- [12, :group, :close, ')', 15, 16],
175
- [13, :free_space, :whitespace, ' ', 16, 17],
176
- [14, :group, :capture, '(', 17, 18],
177
- [15, :group, :options, '(?-x', 18, 22],
178
- [16, :group, :close, ')', 22, 23],
179
- [17, :group, :capture, '(', 23, 24],
180
- [18, :literal, :literal, 'e f', 24, 27],
181
- [19, :group, :close, ')', 27, 28],
182
- [20, :literal, :literal, ' ', 28, 29],
183
- [21, :group, :close, ')', 29, 30],
184
- [22, :literal, :literal, 'g', 30, 31],
185
- [23, :group, :close, ')', 31, 32],
186
- [24, :literal, :literal, ' h', 32, 34],
187
- [25, :group, :close, ')', 34, 35],
188
- [26, :literal, :literal, 'i j', 35, 38],
189
- [27, :group, :close, ')', 38, 39]
162
+ [ 0, :group, :capture, '(', 0, 1],
163
+ [ 1, :literal, :literal, 'a ', 1, 3],
164
+ [ 2, :group, :capture, '(', 3, 4],
165
+ [ 3, :literal, :literal, 'b', 4, 5],
166
+ [ 4, :group, :capture, '(', 5, 6],
167
+ [ 5, :group, :options_switch, '(?x', 6, 9],
168
+ [ 6, :group, :close, ')', 9, 10],
169
+ [ 7, :free_space, :whitespace, ' ', 10, 11],
170
+ [ 8, :group, :capture, '(', 11, 12],
171
+ [ 9, :literal, :literal, 'c', 12, 13],
172
+ [10, :free_space, :whitespace, ' ', 13, 14],
173
+ [11, :literal, :literal, 'd', 14, 15],
174
+ [12, :group, :close, ')', 15, 16],
175
+ [13, :free_space, :whitespace, ' ', 16, 17],
176
+ [14, :group, :capture, '(', 17, 18],
177
+ [15, :group, :options_switch, '(?-x', 18, 22],
178
+ [16, :group, :close, ')', 22, 23],
179
+ [17, :group, :capture, '(', 23, 24],
180
+ [18, :literal, :literal, 'e f', 24, 27],
181
+ [19, :group, :close, ')', 27, 28],
182
+ [20, :literal, :literal, ' ', 28, 29],
183
+ [21, :group, :close, ')', 29, 30],
184
+ [22, :literal, :literal, 'g', 30, 31],
185
+ [23, :group, :close, ')', 31, 32],
186
+ [24, :literal, :literal, ' h', 32, 34],
187
+ [25, :group, :close, ')', 34, 35],
188
+ [26, :literal, :literal, 'i j', 35, 38],
189
+ [27, :group, :close, ')', 38, 39]
190
190
  ].each do |index, type, token, text, ts, te|
191
191
  result = tokens[index]
192
192