regexp_parser 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +242 -0
  3. data/Gemfile +1 -0
  4. data/README.md +21 -17
  5. data/Rakefile +31 -0
  6. data/lib/regexp_parser/expression.rb +11 -9
  7. data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
  8. data/lib/regexp_parser/expression/classes/backref.rb +21 -16
  9. data/lib/regexp_parser/expression/classes/escape.rb +81 -10
  10. data/lib/regexp_parser/expression/classes/group.rb +20 -20
  11. data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
  12. data/lib/regexp_parser/expression/classes/property.rb +6 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +10 -93
  14. data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
  15. data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
  16. data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
  17. data/lib/regexp_parser/expression/methods/tests.rb +4 -14
  18. data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +3 -4
  20. data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
  21. data/lib/regexp_parser/expression/subexpression.rb +6 -10
  22. data/lib/regexp_parser/lexer.rb +13 -17
  23. data/lib/regexp_parser/parser.rb +170 -116
  24. data/lib/regexp_parser/scanner.rb +952 -2431
  25. data/lib/regexp_parser/scanner/char_type.rl +31 -0
  26. data/lib/regexp_parser/scanner/properties/long.yml +561 -0
  27. data/lib/regexp_parser/scanner/properties/short.yml +225 -0
  28. data/lib/regexp_parser/scanner/property.rl +7 -806
  29. data/lib/regexp_parser/scanner/scanner.rl +112 -154
  30. data/lib/regexp_parser/syntax/base.rb +4 -4
  31. data/lib/regexp_parser/syntax/tokens.rb +1 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
  34. data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
  35. data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
  36. data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
  38. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
  39. data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
  40. data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
  41. data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
  42. data/lib/regexp_parser/version.rb +1 -1
  43. data/regexp_parser.gemspec +2 -1
  44. data/test/expression/test_base.rb +2 -1
  45. data/test/expression/test_clone.rb +0 -57
  46. data/test/expression/test_set.rb +31 -8
  47. data/test/expression/test_strfregexp.rb +13 -4
  48. data/test/expression/test_subexpression.rb +25 -0
  49. data/test/expression/test_traverse.rb +25 -25
  50. data/test/helpers.rb +1 -0
  51. data/test/lexer/test_all.rb +1 -1
  52. data/test/lexer/test_conditionals.rb +9 -7
  53. data/test/lexer/test_nesting.rb +39 -21
  54. data/test/lexer/test_refcalls.rb +4 -4
  55. data/test/parser/set/test_intersections.rb +127 -0
  56. data/test/parser/set/test_ranges.rb +111 -0
  57. data/test/parser/test_all.rb +4 -1
  58. data/test/parser/test_escapes.rb +41 -9
  59. data/test/parser/test_groups.rb +22 -3
  60. data/test/parser/test_posix_classes.rb +27 -0
  61. data/test/parser/test_properties.rb +17 -290
  62. data/test/parser/test_refcalls.rb +66 -26
  63. data/test/parser/test_sets.rb +132 -129
  64. data/test/scanner/test_all.rb +1 -7
  65. data/test/scanner/test_conditionals.rb +16 -16
  66. data/test/scanner/test_errors.rb +0 -30
  67. data/test/scanner/test_escapes.rb +1 -2
  68. data/test/scanner/test_free_space.rb +28 -28
  69. data/test/scanner/test_groups.rb +35 -35
  70. data/test/scanner/test_meta.rb +1 -1
  71. data/test/scanner/test_properties.rb +87 -114
  72. data/test/scanner/test_refcalls.rb +18 -18
  73. data/test/scanner/test_scripts.rb +19 -351
  74. data/test/scanner/test_sets.rb +87 -60
  75. data/test/scanner/test_unicode_blocks.rb +4 -105
  76. data/test/support/warning_extractor.rb +1 -1
  77. data/test/syntax/test_syntax.rb +7 -0
  78. data/test/syntax/versions/test_1.8.rb +2 -4
  79. metadata +17 -7
  80. data/ChangeLog +0 -325
  81. data/test/scanner/test_emojis.rb +0 -31
data/test/helpers.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require "test/unit"
2
2
  require File.expand_path("../../lib/regexp_parser", __FILE__)
3
+ require 'regexp_property_values'
3
4
 
4
5
  RS = Regexp::Scanner
5
6
  RL = Regexp::Lexer
@@ -31,7 +31,7 @@ class TestRegexpLexer < Test::Unit::TestCase
31
31
  def test_lexer_token_count
32
32
  tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
33
33
 
34
- assert_equal 26, tokens.length
34
+ assert_equal 28, tokens.length
35
35
  end
36
36
 
37
37
  def test_lexer_scan_alias
@@ -43,15 +43,17 @@ class LexerConditionals < Test::Unit::TestCase
43
43
  [11, :conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
44
44
 
45
45
  [12, :set, :open, '[', 30, 31, 3, 0, 2],
46
- [13, :set, :range, 'e-g', 31, 34, 3, 1, 2],
47
- [14, :set, :close, ']', 34, 35, 3, 0, 2],
46
+ [13, :literal, :literal, 'e', 31, 32, 3, 1, 2],
47
+ [14, :set, :range, '-', 32, 33, 3, 1, 2],
48
+ [15, :literal, :literal, 'g', 33, 34, 3, 1, 2],
49
+ [16, :set, :close, ']', 34, 35, 3, 0, 2],
48
50
 
49
- [15, :conditional, :separator, '|', 35, 36, 3, 0, 2],
50
- [19, :conditional, :close, ')', 41, 42, 3, 0, 1],
51
- [21, :conditional, :close, ')', 43, 44, 2, 0, 0],
51
+ [17, :conditional, :separator, '|', 35, 36, 3, 0, 2],
52
+ [23, :conditional, :close, ')', 41, 42, 3, 0, 1],
53
+ [25, :conditional, :close, ')', 43, 44, 2, 0, 0],
52
54
 
53
- [22, :group, :close, ')', 44, 45, 1, 0, 0],
54
- [23, :group, :close, ')', 45, 46, 0, 0, 0]
55
+ [26, :group, :close, ')', 44, 45, 1, 0, 0],
56
+ [27, :group, :close, ')', 45, 46, 0, 0, 0]
55
57
  ].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
56
58
  struct = tokens.at(index)
57
59
 
@@ -62,38 +62,56 @@ class LexerNesting < Test::Unit::TestCase
62
62
 
63
63
  'a[b-e]f' => {
64
64
  1 => [:set, :open, '[', 1, 2, 0, 0, 0],
65
- 2 => [:set, :range, 'b-e', 2, 5, 0, 1, 0],
66
- 3 => [:set, :close, ']', 5, 6, 0, 0, 0],
65
+ 2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
66
+ 3 => [:set, :range, '-', 3, 4, 0, 1, 0],
67
+ 4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
68
+ 5 => [:set, :close, ']', 5, 6, 0, 0, 0],
67
69
  },
68
70
 
69
- '[a-w&&[^c-g]z]' => {
71
+ '[[:word:]&&[^c]z]' => {
70
72
  0 => [:set, :open, '[', 0, 1, 0, 0, 0],
71
- 2 => [:set, :intersection, '&&', 4, 6, 0, 1, 0],
72
- 3 => [:subset, :open, '[', 6, 7, 0, 1, 0],
73
- 4 => [:subset, :negate, '^', 7, 8, 0, 2, 0],
74
- 5 => [:subset, :range, 'c-g', 8, 11, 0, 2, 0],
75
- 6 => [:subset, :close, ']', 11, 12, 0, 1, 0],
76
- 8 => [:set, :close, ']', 13, 14, 0, 0, 0],
73
+ 1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
74
+ 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
75
+ 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
76
+ 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
77
+ 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
78
+ 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
79
+ 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
80
+ 8 => [:set, :close, ']', 16, 17, 0, 0, 0],
81
+ },
82
+
83
+ '[\p{word}&&[^c]z]' => {
84
+ 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
85
+ 1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
86
+ 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
87
+ 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
88
+ 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
89
+ 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
90
+ 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
91
+ 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
92
+ 8 => [:set, :close, ']', 16, 17, 0, 0, 0],
77
93
  },
78
94
 
79
95
  '[a[b[c[d-g]]]]' => {
80
96
  0 => [:set, :open, '[', 0, 1, 0, 0, 0],
81
- 1 => [:set, :member, 'a', 1, 2, 0, 1, 0],
82
- 2 => [:subset, :open, '[', 2, 3, 0, 1, 0],
83
- 3 => [:subset, :member, 'b', 3, 4, 0, 2, 0],
84
- 4 => [:subset, :open, '[', 4, 5, 0, 2, 0],
85
- 5 => [:subset, :member, 'c', 5, 6, 0, 3, 0],
86
- 6 => [:subset, :open, '[', 6, 7, 0, 3, 0],
87
- 7 => [:subset, :range, 'd-g', 7, 10, 0, 4, 0],
88
- 8 => [:subset, :close, ']', 10, 11, 0, 3, 0],
89
- 9 => [:subset, :close, ']', 11, 12, 0, 2, 0],
90
- 10 => [:subset, :close, ']', 12, 13, 0, 1, 0],
91
- 11 => [:set, :close, ']', 13, 14, 0, 0, 0],
97
+ 1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
98
+ 2 => [:set, :open, '[', 2, 3, 0, 1, 0],
99
+ 3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
100
+ 4 => [:set, :open, '[', 4, 5, 0, 2, 0],
101
+ 5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
102
+ 6 => [:set, :open, '[', 6, 7, 0, 3, 0],
103
+ 7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
104
+ 8 => [:set, :range, '-', 8, 9, 0, 4, 0],
105
+ 9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
106
+ 10 => [:set, :close, ']', 10, 11, 0, 3, 0],
107
+ 11 => [:set, :close, ']', 11, 12, 0, 2, 0],
108
+ 12 => [:set, :close, ']', 12, 13, 0, 1, 0],
109
+ 13 => [:set, :close, ']', 13, 14, 0, 0, 0],
92
110
  },
93
111
  }
94
112
 
95
113
  tests.each_with_index do |(pattern, checks), count|
96
- define_method "test_lex_nesting_#{count}" do
114
+ define_method "test_lex_nesting_in_'#{pattern}'_#{count}" do
97
115
  tokens = RL.lex(pattern, 'ruby/1.9')
98
116
 
99
117
  checks.each do |offset, (type, token, text, ts, te, level, set_level, conditional_level)|
@@ -27,11 +27,11 @@ class LexerRefCalls < Test::Unit::TestCase
27
27
  "(abc)\\g'-1'" => [3, :backref, :number_rel_call, "\\g'-1'", 5, 11, 0, 0, 0],
28
28
 
29
29
  # Group back-references, with nesting level
30
- '(?<X>abc)\k<X-0>' => [3, :backref, :name_nest_ref, '\k<X-0>', 9, 16, 0, 0, 0],
31
- "(?<X>abc)\\k'X-0'" => [3, :backref, :name_nest_ref, "\\k'X-0'", 9, 16, 0, 0, 0],
30
+ '(?<X>abc)\k<X-0>' => [3, :backref, :name_recursion_ref, '\k<X-0>', 9, 16, 0, 0, 0],
31
+ "(?<X>abc)\\k'X-0'" => [3, :backref, :name_recursion_ref, "\\k'X-0'", 9, 16, 0, 0, 0],
32
32
 
33
- '(abc)\k<1-0>' => [3, :backref, :number_nest_ref, '\k<1-0>', 5, 12, 0, 0, 0],
34
- "(abc)\\k'1-0'" => [3, :backref, :number_nest_ref, "\\k'1-0'", 5, 12, 0, 0, 0],
33
+ '(abc)\k<1-0>' => [3, :backref, :number_recursion_ref, '\k<1-0>', 5, 12, 0, 0, 0],
34
+ "(abc)\\k'1-0'" => [3, :backref, :number_recursion_ref, "\\k'1-0'", 5, 12, 0, 0, 0],
35
35
  }
36
36
 
37
37
  tests.each_with_index do |(pattern, (index, type, token, text, ts, te, level, set_level, conditional_level)), count|
@@ -0,0 +1,127 @@
1
+ require File.expand_path('../../../helpers', __FILE__)
2
+
3
+ # edge cases with `...-&&...` and `...&&-...` are checked in test_ranges.rb
4
+
5
+ class ParserSetIntersections < Test::Unit::TestCase
6
+ def test_parse_set_intersection
7
+ root = RP.parse('[a&&z]')
8
+ set = root[0]
9
+ ints = set[0]
10
+
11
+ assert_equal 1, set.count
12
+ assert_equal CharacterSet::Intersection, ints.class
13
+ assert_equal 2, ints.count
14
+
15
+ seq1, seq2 = ints.expressions
16
+ assert_equal CharacterSet::IntersectedSequence, seq1.class
17
+ assert_equal 1, seq1.count
18
+ assert_equal 'a', seq1.first.to_s
19
+ assert_equal Literal, seq1.first.class
20
+ assert_equal CharacterSet::IntersectedSequence, seq2.class
21
+ assert_equal 1, seq2.count
22
+ assert_equal 'z', seq2.first.to_s
23
+ assert_equal Literal, seq2.first.class
24
+
25
+ refute set.matches?('a')
26
+ refute set.matches?('&')
27
+ refute set.matches?('z')
28
+ end
29
+
30
+ def test_parse_set_intersection_range_and_subset
31
+ root = RP.parse('[a-z&&[^a]]')
32
+ set = root[0]
33
+ ints = set[0]
34
+
35
+ assert_equal 1, set.count
36
+ assert_equal CharacterSet::Intersection, ints.class
37
+ assert_equal 2, ints.count
38
+
39
+ seq1, seq2 = ints.expressions
40
+ assert_equal CharacterSet::IntersectedSequence, seq1.class
41
+ assert_equal 1, seq1.count
42
+ assert_equal 'a-z', seq1.first.to_s
43
+ assert_equal CharacterSet::Range, seq1.first.class
44
+ assert_equal CharacterSet::IntersectedSequence, seq2.class
45
+ assert_equal 1, seq2.count
46
+ assert_equal '[^a]', seq2.first.to_s
47
+ assert_equal CharacterSet, seq2.first.class
48
+
49
+ refute set.matches?('a')
50
+ refute set.matches?('&')
51
+ assert set.matches?('b')
52
+ end
53
+
54
+ def test_parse_set_intersection_trailing_range
55
+ root = RP.parse('[a&&a-z]')
56
+ set = root[0]
57
+ ints = set[0]
58
+
59
+ assert_equal 1, set.count
60
+ assert_equal CharacterSet::Intersection, ints.class
61
+ assert_equal 2, ints.count
62
+
63
+ seq1, seq2 = ints.expressions
64
+ assert_equal CharacterSet::IntersectedSequence, seq1.class
65
+ assert_equal 1, seq1.count
66
+ assert_equal 'a', seq1.first.to_s
67
+ assert_equal Literal, seq1.first.class
68
+ assert_equal CharacterSet::IntersectedSequence, seq2.class
69
+ assert_equal 1, seq2.count
70
+ assert_equal 'a-z', seq2.first.to_s
71
+ assert_equal CharacterSet::Range, seq2.first.class
72
+
73
+ assert set.matches?('a')
74
+ refute set.matches?('&')
75
+ refute set.matches?('b')
76
+ end
77
+
78
+ def test_parse_set_intersection_type
79
+ root = RP.parse('[a&&\w]')
80
+ set = root[0]
81
+ ints = set[0]
82
+
83
+ assert_equal 1, set.count
84
+ assert_equal CharacterSet::Intersection, ints.class
85
+ assert_equal 2, ints.count
86
+
87
+ seq1, seq2 = ints.expressions
88
+ assert_equal CharacterSet::IntersectedSequence, seq1.class
89
+ assert_equal 1, seq1.count
90
+ assert_equal 'a', seq1.first.to_s
91
+ assert_equal Literal, seq1.first.class
92
+ assert_equal CharacterSet::IntersectedSequence, seq2.class
93
+ assert_equal 1, seq2.count
94
+ assert_equal '\w', seq2.first.to_s
95
+ assert_equal CharacterType::Word, seq2.first.class
96
+
97
+ assert set.matches?('a')
98
+ refute set.matches?('&')
99
+ refute set.matches?('b')
100
+ end
101
+
102
+ def test_parse_set_intersection_multipart
103
+ root = RP.parse('[\h&&\w&&efg]')
104
+ set = root[0]
105
+ ints = set[0]
106
+
107
+ assert_equal 1, set.count
108
+ assert_equal CharacterSet::Intersection, ints.class
109
+ assert_equal 3, ints.count
110
+
111
+ seq1, seq2, seq3 = ints.expressions
112
+ assert_equal CharacterSet::IntersectedSequence, seq1.class
113
+ assert_equal 1, seq1.count
114
+ assert_equal '\h', seq1.first.to_s
115
+ assert_equal CharacterSet::IntersectedSequence, seq2.class
116
+ assert_equal 1, seq2.count
117
+ assert_equal '\w', seq2.first.to_s
118
+ assert_equal CharacterSet::IntersectedSequence, seq3.class
119
+ assert_equal 3, seq3.count
120
+ assert_equal 'efg', seq3.to_s
121
+
122
+ assert set.matches?('e')
123
+ assert set.matches?('f')
124
+ refute set.matches?('a')
125
+ refute set.matches?('g')
126
+ end
127
+ end
@@ -0,0 +1,111 @@
1
+ require File.expand_path('../../../helpers', __FILE__)
2
+
3
+ class ParserSetRangs < Test::Unit::TestCase
4
+ def test_parse_set_range
5
+ root = RP.parse('[a-z]')
6
+ set = root[0]
7
+ range = set[0]
8
+
9
+ assert_equal 1, set.count
10
+ assert_equal CharacterSet::Range, range.class
11
+ assert_equal 2, range.count
12
+ assert_equal 'a', range.first.to_s
13
+ assert_equal Literal, range.first.class
14
+ assert_equal 'z', range.last.to_s
15
+ assert_equal Literal, range.last.class
16
+ assert set.matches?('m')
17
+ end
18
+
19
+ def test_parse_set_range_hex
20
+ root = RP.parse('[\x00-\x99]')
21
+ set = root[0]
22
+ range = set[0]
23
+
24
+ assert_equal 1, set.count
25
+ assert_equal CharacterSet::Range, range.class
26
+ assert_equal 2, range.count
27
+ assert_equal '\x00', range.first.to_s
28
+ assert_equal EscapeSequence::Hex, range.first.class
29
+ assert_equal '\x99', range.last.to_s
30
+ assert_equal EscapeSequence::Hex, range.last.class
31
+ assert set.matches?('\x50')
32
+ end
33
+
34
+ def test_parse_set_range_unicode
35
+ root = RP.parse('[\u{40 42}-\u1234]')
36
+ set = root[0]
37
+ range = set[0]
38
+
39
+ assert_equal 1, set.count
40
+ assert_equal CharacterSet::Range, range.class
41
+ assert_equal 2, range.count
42
+ assert_equal '\u{40 42}', range.first.to_s
43
+ assert_equal EscapeSequence::CodepointList, range.first.class
44
+ assert_equal '\u1234', range.last.to_s
45
+ assert_equal EscapeSequence::Codepoint, range.last.class
46
+ assert set.matches?('\u600')
47
+ end
48
+
49
+ def test_parse_set_range_edge_case_leading_dash
50
+ root = RP.parse('[--z]')
51
+ set = root[0]
52
+ range = set[0]
53
+
54
+ assert_equal 1, set.count
55
+ assert_equal 2, range.count
56
+ assert set.matches?('a')
57
+ end
58
+
59
+ def test_parse_set_range_edge_case_trailing_dash
60
+ root = RP.parse('[!--]')
61
+ set = root[0]
62
+ range = set[0]
63
+
64
+ assert_equal 1, set.count
65
+ assert_equal 2, range.count
66
+ assert set.matches?('$')
67
+ end
68
+
69
+ def test_parse_set_range_edge_case_leading_negate
70
+ root = RP.parse('[^-z]')
71
+ set = root[0]
72
+
73
+ assert_equal 2, set.count
74
+ assert set.matches?('a')
75
+ refute set.matches?('z')
76
+ end
77
+
78
+ def test_parse_set_range_edge_case_trailing_negate
79
+ root = RP.parse('[!-^]')
80
+ set = root[0]
81
+ range = set[0]
82
+
83
+ assert_equal 1, set.count
84
+ assert_equal 2, range.count
85
+ assert set.matches?('$')
86
+ end
87
+
88
+ def test_parse_set_range_edge_case_leading_intersection
89
+ root = RP.parse('[[\-ab]&&-bc]')
90
+ set = root[0]
91
+
92
+ assert_equal 1, set.count
93
+ assert_equal '-bc', set.first.last.to_s
94
+ assert set.matches?('-')
95
+ assert set.matches?('b')
96
+ refute set.matches?('a')
97
+ refute set.matches?('c')
98
+ end
99
+
100
+ def test_parse_set_range_edge_case_trailing_intersection
101
+ root = RP.parse('[bc-&&[\-ab]]')
102
+ set = root[0]
103
+
104
+ assert_equal 1, set.count
105
+ assert_equal 'bc-', set.first.first.to_s
106
+ assert set.matches?('-')
107
+ assert set.matches?('b')
108
+ refute set.matches?('a')
109
+ refute set.matches?('c')
110
+ end
111
+ end
@@ -2,11 +2,14 @@ require File.expand_path("../../helpers", __FILE__)
2
2
 
3
3
  %w{
4
4
  alternation anchors errors escapes free_space groups
5
- properties quantifiers refcalls sets types
5
+ posix_classes properties quantifiers refcalls sets types
6
6
  }.each do|tc|
7
7
  require File.expand_path("../test_#{tc}", __FILE__)
8
8
  end
9
9
 
10
+ require File.expand_path('../set/test_ranges.rb', __FILE__)
11
+ require File.expand_path('../set/test_intersections.rb', __FILE__)
12
+
10
13
  if RUBY_VERSION >= '2.0.0'
11
14
  %w{conditionals keep}.each do|tc|
12
15
  require File.expand_path("../test_#{tc}", __FILE__)
@@ -11,10 +11,6 @@ class TestParserEscapes < Test::Unit::TestCase
11
11
  /a\tc/ => [1, :escape, :tab, EscapeSequence::Tab],
12
12
  /a\vc/ => [1, :escape, :vertical_tab, EscapeSequence::VerticalTab],
13
13
 
14
- # special cases
15
- /a\bc/ => [1, :anchor, :word_boundary, Anchor::WordBoundary],
16
- /a\sc/ => [1, :type, :space, CharacterType::Space],
17
-
18
14
  # meta character escapes
19
15
  /a\.c/ => [1, :escape, :dot, EscapeSequence::Literal],
20
16
  /a\?c/ => [1, :escape, :zero_or_one, EscapeSequence::Literal],
@@ -27,14 +23,15 @@ class TestParserEscapes < Test::Unit::TestCase
27
23
  /a\}c/ => [1, :escape, :interval_close, EscapeSequence::Literal],
28
24
 
29
25
  # unicode escapes
30
- /a\u0640/ => [1, :escape, :codepoint, EscapeSequence::Literal],
31
- /a\u{41 1F60D}/ => [1, :escape, :codepoint_list, EscapeSequence::Literal],
26
+ /a\u0640/ => [1, :escape, :codepoint, EscapeSequence::Codepoint],
27
+ /a\u{41 1F60D}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
28
+ /a\u{10FFFF}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
32
29
 
33
30
  # hex escapes
34
- /a\xFF/n => [1, :escape, :hex, EscapeSequence::Literal],
31
+ /a\xFF/n => [1, :escape, :hex, EscapeSequence::Hex],
35
32
 
36
33
  # octal escapes
37
- /a\177/n => [1, :escape, :octal, EscapeSequence::Literal],
34
+ /a\177/n => [1, :escape, :octal, EscapeSequence::Octal],
38
35
  }
39
36
 
40
37
  tests.each_with_index do |(pattern, (index, type, token, klass)), count|
@@ -50,11 +47,35 @@ class TestParserEscapes < Test::Unit::TestCase
50
47
  end
51
48
  end
52
49
 
50
+ def test_parse_chars_and_codepoints
51
+ root = RP.parse(/\n\?\101\x42\u0043\u{44 45}/)
52
+
53
+ assert_equal "\n", root[0].char
54
+ assert_equal 10, root[0].codepoint
55
+
56
+ assert_equal "?", root[1].char
57
+ assert_equal 63, root[1].codepoint
58
+
59
+ assert_equal "A", root[2].char
60
+ assert_equal 65, root[2].codepoint
61
+
62
+ assert_equal "B", root[3].char
63
+ assert_equal 66, root[3].codepoint
64
+
65
+ assert_equal "C", root[4].char
66
+ assert_equal 67, root[4].codepoint
67
+
68
+ assert_equal ["D", "E"], root[5].chars
69
+ assert_equal [68, 69], root[5].codepoints
70
+ end
71
+
53
72
  def test_parse_escape_control_sequence_lower
54
73
  root = RP.parse(/a\\\c2b/)
55
74
 
56
75
  assert_equal EscapeSequence::Control, root[2].class
57
76
  assert_equal '\\c2', root[2].text
77
+ assert_equal "\u0012", root[2].char
78
+ assert_equal 18, root[2].codepoint
58
79
  end
59
80
 
60
81
  def test_parse_escape_control_sequence_upper
@@ -62,6 +83,8 @@ class TestParserEscapes < Test::Unit::TestCase
62
83
 
63
84
  assert_equal EscapeSequence::Control, root[2].class
64
85
  assert_equal '\\C-C', root[2].text
86
+ assert_equal "\u0003", root[2].char
87
+ assert_equal 3, root[2].codepoint
65
88
  end
66
89
 
67
90
  def test_parse_escape_meta_sequence
@@ -69,6 +92,8 @@ class TestParserEscapes < Test::Unit::TestCase
69
92
 
70
93
  assert_equal EscapeSequence::Meta, root[2].class
71
94
  assert_equal '\\M-Z', root[2].text
95
+ assert_equal "\u00DA", root[2].char
96
+ assert_equal 218, root[2].codepoint
72
97
  end
73
98
 
74
99
  def test_parse_escape_meta_control_sequence
@@ -76,6 +101,8 @@ class TestParserEscapes < Test::Unit::TestCase
76
101
 
77
102
  assert_equal EscapeSequence::MetaControl, root[2].class
78
103
  assert_equal '\\M-\\C-X', root[2].text
104
+ assert_equal "\u0098", root[2].char
105
+ assert_equal 152, root[2].codepoint
79
106
  end
80
107
 
81
108
  def test_parse_lower_c_meta_control_sequence
@@ -83,6 +110,8 @@ class TestParserEscapes < Test::Unit::TestCase
83
110
 
84
111
  assert_equal EscapeSequence::MetaControl, root[2].class
85
112
  assert_equal '\\M-\\cX', root[2].text
113
+ assert_equal "\u0098", root[2].char
114
+ assert_equal 152, root[2].codepoint
86
115
  end
87
116
 
88
117
  def test_parse_escape_reverse_meta_control_sequence
@@ -90,6 +119,8 @@ class TestParserEscapes < Test::Unit::TestCase
90
119
 
91
120
  assert_equal EscapeSequence::MetaControl, root[2].class
92
121
  assert_equal '\\C-\\M-X', root[2].text
122
+ assert_equal "\u0098", root[2].char
123
+ assert_equal 152, root[2].codepoint
93
124
  end
94
125
 
95
126
  def test_parse_escape_reverse_lower_c_meta_control_sequence
@@ -97,6 +128,7 @@ class TestParserEscapes < Test::Unit::TestCase
97
128
 
98
129
  assert_equal EscapeSequence::MetaControl, root[2].class
99
130
  assert_equal '\\c\\M-X', root[2].text
131
+ assert_equal "\u0098", root[2].char
132
+ assert_equal 152, root[2].codepoint
100
133
  end
101
-
102
134
  end