regexp_parser 0.5.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +242 -0
  3. data/Gemfile +1 -0
  4. data/README.md +21 -17
  5. data/Rakefile +31 -0
  6. data/lib/regexp_parser/expression.rb +11 -9
  7. data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
  8. data/lib/regexp_parser/expression/classes/backref.rb +21 -16
  9. data/lib/regexp_parser/expression/classes/escape.rb +81 -10
  10. data/lib/regexp_parser/expression/classes/group.rb +20 -20
  11. data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
  12. data/lib/regexp_parser/expression/classes/property.rb +6 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +10 -93
  14. data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
  15. data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
  16. data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
  17. data/lib/regexp_parser/expression/methods/tests.rb +4 -14
  18. data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +3 -4
  20. data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
  21. data/lib/regexp_parser/expression/subexpression.rb +6 -10
  22. data/lib/regexp_parser/lexer.rb +13 -17
  23. data/lib/regexp_parser/parser.rb +170 -116
  24. data/lib/regexp_parser/scanner.rb +952 -2431
  25. data/lib/regexp_parser/scanner/char_type.rl +31 -0
  26. data/lib/regexp_parser/scanner/properties/long.yml +561 -0
  27. data/lib/regexp_parser/scanner/properties/short.yml +225 -0
  28. data/lib/regexp_parser/scanner/property.rl +7 -806
  29. data/lib/regexp_parser/scanner/scanner.rl +112 -154
  30. data/lib/regexp_parser/syntax/base.rb +4 -4
  31. data/lib/regexp_parser/syntax/tokens.rb +1 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
  34. data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
  35. data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
  36. data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
  38. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
  39. data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
  40. data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
  41. data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
  42. data/lib/regexp_parser/version.rb +1 -1
  43. data/regexp_parser.gemspec +2 -1
  44. data/test/expression/test_base.rb +2 -1
  45. data/test/expression/test_clone.rb +0 -57
  46. data/test/expression/test_set.rb +31 -8
  47. data/test/expression/test_strfregexp.rb +13 -4
  48. data/test/expression/test_subexpression.rb +25 -0
  49. data/test/expression/test_traverse.rb +25 -25
  50. data/test/helpers.rb +1 -0
  51. data/test/lexer/test_all.rb +1 -1
  52. data/test/lexer/test_conditionals.rb +9 -7
  53. data/test/lexer/test_nesting.rb +39 -21
  54. data/test/lexer/test_refcalls.rb +4 -4
  55. data/test/parser/set/test_intersections.rb +127 -0
  56. data/test/parser/set/test_ranges.rb +111 -0
  57. data/test/parser/test_all.rb +4 -1
  58. data/test/parser/test_escapes.rb +41 -9
  59. data/test/parser/test_groups.rb +22 -3
  60. data/test/parser/test_posix_classes.rb +27 -0
  61. data/test/parser/test_properties.rb +17 -290
  62. data/test/parser/test_refcalls.rb +66 -26
  63. data/test/parser/test_sets.rb +132 -129
  64. data/test/scanner/test_all.rb +1 -7
  65. data/test/scanner/test_conditionals.rb +16 -16
  66. data/test/scanner/test_errors.rb +0 -30
  67. data/test/scanner/test_escapes.rb +1 -2
  68. data/test/scanner/test_free_space.rb +28 -28
  69. data/test/scanner/test_groups.rb +35 -35
  70. data/test/scanner/test_meta.rb +1 -1
  71. data/test/scanner/test_properties.rb +87 -114
  72. data/test/scanner/test_refcalls.rb +18 -18
  73. data/test/scanner/test_scripts.rb +19 -351
  74. data/test/scanner/test_sets.rb +87 -60
  75. data/test/scanner/test_unicode_blocks.rb +4 -105
  76. data/test/support/warning_extractor.rb +1 -1
  77. data/test/syntax/test_syntax.rb +7 -0
  78. data/test/syntax/versions/test_1.8.rb +2 -4
  79. metadata +17 -7
  80. data/ChangeLog +0 -325
  81. data/test/scanner/test_emojis.rb +0 -31
data/test/helpers.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require "test/unit"
2
2
  require File.expand_path("../../lib/regexp_parser", __FILE__)
3
+ require 'regexp_property_values'
3
4
 
4
5
  RS = Regexp::Scanner
5
6
  RL = Regexp::Lexer
@@ -31,7 +31,7 @@ class TestRegexpLexer < Test::Unit::TestCase
31
31
  def test_lexer_token_count
32
32
  tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
33
33
 
34
- assert_equal 26, tokens.length
34
+ assert_equal 28, tokens.length
35
35
  end
36
36
 
37
37
  def test_lexer_scan_alias
@@ -43,15 +43,17 @@ class LexerConditionals < Test::Unit::TestCase
43
43
  [11, :conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
44
44
 
45
45
  [12, :set, :open, '[', 30, 31, 3, 0, 2],
46
- [13, :set, :range, 'e-g', 31, 34, 3, 1, 2],
47
- [14, :set, :close, ']', 34, 35, 3, 0, 2],
46
+ [13, :literal, :literal, 'e', 31, 32, 3, 1, 2],
47
+ [14, :set, :range, '-', 32, 33, 3, 1, 2],
48
+ [15, :literal, :literal, 'g', 33, 34, 3, 1, 2],
49
+ [16, :set, :close, ']', 34, 35, 3, 0, 2],
48
50
 
49
- [15, :conditional, :separator, '|', 35, 36, 3, 0, 2],
50
- [19, :conditional, :close, ')', 41, 42, 3, 0, 1],
51
- [21, :conditional, :close, ')', 43, 44, 2, 0, 0],
51
+ [17, :conditional, :separator, '|', 35, 36, 3, 0, 2],
52
+ [23, :conditional, :close, ')', 41, 42, 3, 0, 1],
53
+ [25, :conditional, :close, ')', 43, 44, 2, 0, 0],
52
54
 
53
- [22, :group, :close, ')', 44, 45, 1, 0, 0],
54
- [23, :group, :close, ')', 45, 46, 0, 0, 0]
55
+ [26, :group, :close, ')', 44, 45, 1, 0, 0],
56
+ [27, :group, :close, ')', 45, 46, 0, 0, 0]
55
57
  ].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
56
58
  struct = tokens.at(index)
57
59
 
@@ -62,38 +62,56 @@ class LexerNesting < Test::Unit::TestCase
62
62
 
63
63
  'a[b-e]f' => {
64
64
  1 => [:set, :open, '[', 1, 2, 0, 0, 0],
65
- 2 => [:set, :range, 'b-e', 2, 5, 0, 1, 0],
66
- 3 => [:set, :close, ']', 5, 6, 0, 0, 0],
65
+ 2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
66
+ 3 => [:set, :range, '-', 3, 4, 0, 1, 0],
67
+ 4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
68
+ 5 => [:set, :close, ']', 5, 6, 0, 0, 0],
67
69
  },
68
70
 
69
- '[a-w&&[^c-g]z]' => {
71
+ '[[:word:]&&[^c]z]' => {
70
72
  0 => [:set, :open, '[', 0, 1, 0, 0, 0],
71
- 2 => [:set, :intersection, '&&', 4, 6, 0, 1, 0],
72
- 3 => [:subset, :open, '[', 6, 7, 0, 1, 0],
73
- 4 => [:subset, :negate, '^', 7, 8, 0, 2, 0],
74
- 5 => [:subset, :range, 'c-g', 8, 11, 0, 2, 0],
75
- 6 => [:subset, :close, ']', 11, 12, 0, 1, 0],
76
- 8 => [:set, :close, ']', 13, 14, 0, 0, 0],
73
+ 1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
74
+ 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
75
+ 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
76
+ 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
77
+ 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
78
+ 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
79
+ 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
80
+ 8 => [:set, :close, ']', 16, 17, 0, 0, 0],
81
+ },
82
+
83
+ '[\p{word}&&[^c]z]' => {
84
+ 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
85
+ 1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
86
+ 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
87
+ 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
88
+ 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
89
+ 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
90
+ 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
91
+ 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
92
+ 8 => [:set, :close, ']', 16, 17, 0, 0, 0],
77
93
  },
78
94
 
79
95
  '[a[b[c[d-g]]]]' => {
80
96
  0 => [:set, :open, '[', 0, 1, 0, 0, 0],
81
- 1 => [:set, :member, 'a', 1, 2, 0, 1, 0],
82
- 2 => [:subset, :open, '[', 2, 3, 0, 1, 0],
83
- 3 => [:subset, :member, 'b', 3, 4, 0, 2, 0],
84
- 4 => [:subset, :open, '[', 4, 5, 0, 2, 0],
85
- 5 => [:subset, :member, 'c', 5, 6, 0, 3, 0],
86
- 6 => [:subset, :open, '[', 6, 7, 0, 3, 0],
87
- 7 => [:subset, :range, 'd-g', 7, 10, 0, 4, 0],
88
- 8 => [:subset, :close, ']', 10, 11, 0, 3, 0],
89
- 9 => [:subset, :close, ']', 11, 12, 0, 2, 0],
90
- 10 => [:subset, :close, ']', 12, 13, 0, 1, 0],
91
- 11 => [:set, :close, ']', 13, 14, 0, 0, 0],
97
+ 1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
98
+ 2 => [:set, :open, '[', 2, 3, 0, 1, 0],
99
+ 3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
100
+ 4 => [:set, :open, '[', 4, 5, 0, 2, 0],
101
+ 5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
102
+ 6 => [:set, :open, '[', 6, 7, 0, 3, 0],
103
+ 7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
104
+ 8 => [:set, :range, '-', 8, 9, 0, 4, 0],
105
+ 9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
106
+ 10 => [:set, :close, ']', 10, 11, 0, 3, 0],
107
+ 11 => [:set, :close, ']', 11, 12, 0, 2, 0],
108
+ 12 => [:set, :close, ']', 12, 13, 0, 1, 0],
109
+ 13 => [:set, :close, ']', 13, 14, 0, 0, 0],
92
110
  },
93
111
  }
94
112
 
95
113
  tests.each_with_index do |(pattern, checks), count|
96
- define_method "test_lex_nesting_#{count}" do
114
+ define_method "test_lex_nesting_in_'#{pattern}'_#{count}" do
97
115
  tokens = RL.lex(pattern, 'ruby/1.9')
98
116
 
99
117
  checks.each do |offset, (type, token, text, ts, te, level, set_level, conditional_level)|
@@ -27,11 +27,11 @@ class LexerRefCalls < Test::Unit::TestCase
27
27
  "(abc)\\g'-1'" => [3, :backref, :number_rel_call, "\\g'-1'", 5, 11, 0, 0, 0],
28
28
 
29
29
  # Group back-references, with nesting level
30
- '(?<X>abc)\k<X-0>' => [3, :backref, :name_nest_ref, '\k<X-0>', 9, 16, 0, 0, 0],
31
- "(?<X>abc)\\k'X-0'" => [3, :backref, :name_nest_ref, "\\k'X-0'", 9, 16, 0, 0, 0],
30
+ '(?<X>abc)\k<X-0>' => [3, :backref, :name_recursion_ref, '\k<X-0>', 9, 16, 0, 0, 0],
31
+ "(?<X>abc)\\k'X-0'" => [3, :backref, :name_recursion_ref, "\\k'X-0'", 9, 16, 0, 0, 0],
32
32
 
33
- '(abc)\k<1-0>' => [3, :backref, :number_nest_ref, '\k<1-0>', 5, 12, 0, 0, 0],
34
- "(abc)\\k'1-0'" => [3, :backref, :number_nest_ref, "\\k'1-0'", 5, 12, 0, 0, 0],
33
+ '(abc)\k<1-0>' => [3, :backref, :number_recursion_ref, '\k<1-0>', 5, 12, 0, 0, 0],
34
+ "(abc)\\k'1-0'" => [3, :backref, :number_recursion_ref, "\\k'1-0'", 5, 12, 0, 0, 0],
35
35
  }
36
36
 
37
37
  tests.each_with_index do |(pattern, (index, type, token, text, ts, te, level, set_level, conditional_level)), count|
@@ -0,0 +1,127 @@
1
+ require File.expand_path('../../../helpers', __FILE__)
2
+
3
+ # edge cases with `...-&&...` and `...&&-...` are checked in test_ranges.rb
4
+
5
+ class ParserSetIntersections < Test::Unit::TestCase
6
+ def test_parse_set_intersection
7
+ root = RP.parse('[a&&z]')
8
+ set = root[0]
9
+ ints = set[0]
10
+
11
+ assert_equal 1, set.count
12
+ assert_equal CharacterSet::Intersection, ints.class
13
+ assert_equal 2, ints.count
14
+
15
+ seq1, seq2 = ints.expressions
16
+ assert_equal CharacterSet::IntersectedSequence, seq1.class
17
+ assert_equal 1, seq1.count
18
+ assert_equal 'a', seq1.first.to_s
19
+ assert_equal Literal, seq1.first.class
20
+ assert_equal CharacterSet::IntersectedSequence, seq2.class
21
+ assert_equal 1, seq2.count
22
+ assert_equal 'z', seq2.first.to_s
23
+ assert_equal Literal, seq2.first.class
24
+
25
+ refute set.matches?('a')
26
+ refute set.matches?('&')
27
+ refute set.matches?('z')
28
+ end
29
+
30
+ def test_parse_set_intersection_range_and_subset
31
+ root = RP.parse('[a-z&&[^a]]')
32
+ set = root[0]
33
+ ints = set[0]
34
+
35
+ assert_equal 1, set.count
36
+ assert_equal CharacterSet::Intersection, ints.class
37
+ assert_equal 2, ints.count
38
+
39
+ seq1, seq2 = ints.expressions
40
+ assert_equal CharacterSet::IntersectedSequence, seq1.class
41
+ assert_equal 1, seq1.count
42
+ assert_equal 'a-z', seq1.first.to_s
43
+ assert_equal CharacterSet::Range, seq1.first.class
44
+ assert_equal CharacterSet::IntersectedSequence, seq2.class
45
+ assert_equal 1, seq2.count
46
+ assert_equal '[^a]', seq2.first.to_s
47
+ assert_equal CharacterSet, seq2.first.class
48
+
49
+ refute set.matches?('a')
50
+ refute set.matches?('&')
51
+ assert set.matches?('b')
52
+ end
53
+
54
+ def test_parse_set_intersection_trailing_range
55
+ root = RP.parse('[a&&a-z]')
56
+ set = root[0]
57
+ ints = set[0]
58
+
59
+ assert_equal 1, set.count
60
+ assert_equal CharacterSet::Intersection, ints.class
61
+ assert_equal 2, ints.count
62
+
63
+ seq1, seq2 = ints.expressions
64
+ assert_equal CharacterSet::IntersectedSequence, seq1.class
65
+ assert_equal 1, seq1.count
66
+ assert_equal 'a', seq1.first.to_s
67
+ assert_equal Literal, seq1.first.class
68
+ assert_equal CharacterSet::IntersectedSequence, seq2.class
69
+ assert_equal 1, seq2.count
70
+ assert_equal 'a-z', seq2.first.to_s
71
+ assert_equal CharacterSet::Range, seq2.first.class
72
+
73
+ assert set.matches?('a')
74
+ refute set.matches?('&')
75
+ refute set.matches?('b')
76
+ end
77
+
78
+ def test_parse_set_intersection_type
79
+ root = RP.parse('[a&&\w]')
80
+ set = root[0]
81
+ ints = set[0]
82
+
83
+ assert_equal 1, set.count
84
+ assert_equal CharacterSet::Intersection, ints.class
85
+ assert_equal 2, ints.count
86
+
87
+ seq1, seq2 = ints.expressions
88
+ assert_equal CharacterSet::IntersectedSequence, seq1.class
89
+ assert_equal 1, seq1.count
90
+ assert_equal 'a', seq1.first.to_s
91
+ assert_equal Literal, seq1.first.class
92
+ assert_equal CharacterSet::IntersectedSequence, seq2.class
93
+ assert_equal 1, seq2.count
94
+ assert_equal '\w', seq2.first.to_s
95
+ assert_equal CharacterType::Word, seq2.first.class
96
+
97
+ assert set.matches?('a')
98
+ refute set.matches?('&')
99
+ refute set.matches?('b')
100
+ end
101
+
102
+ def test_parse_set_intersection_multipart
103
+ root = RP.parse('[\h&&\w&&efg]')
104
+ set = root[0]
105
+ ints = set[0]
106
+
107
+ assert_equal 1, set.count
108
+ assert_equal CharacterSet::Intersection, ints.class
109
+ assert_equal 3, ints.count
110
+
111
+ seq1, seq2, seq3 = ints.expressions
112
+ assert_equal CharacterSet::IntersectedSequence, seq1.class
113
+ assert_equal 1, seq1.count
114
+ assert_equal '\h', seq1.first.to_s
115
+ assert_equal CharacterSet::IntersectedSequence, seq2.class
116
+ assert_equal 1, seq2.count
117
+ assert_equal '\w', seq2.first.to_s
118
+ assert_equal CharacterSet::IntersectedSequence, seq3.class
119
+ assert_equal 3, seq3.count
120
+ assert_equal 'efg', seq3.to_s
121
+
122
+ assert set.matches?('e')
123
+ assert set.matches?('f')
124
+ refute set.matches?('a')
125
+ refute set.matches?('g')
126
+ end
127
+ end
@@ -0,0 +1,111 @@
1
+ require File.expand_path('../../../helpers', __FILE__)
2
+
3
+ class ParserSetRangs < Test::Unit::TestCase
4
+ def test_parse_set_range
5
+ root = RP.parse('[a-z]')
6
+ set = root[0]
7
+ range = set[0]
8
+
9
+ assert_equal 1, set.count
10
+ assert_equal CharacterSet::Range, range.class
11
+ assert_equal 2, range.count
12
+ assert_equal 'a', range.first.to_s
13
+ assert_equal Literal, range.first.class
14
+ assert_equal 'z', range.last.to_s
15
+ assert_equal Literal, range.last.class
16
+ assert set.matches?('m')
17
+ end
18
+
19
+ def test_parse_set_range_hex
20
+ root = RP.parse('[\x00-\x99]')
21
+ set = root[0]
22
+ range = set[0]
23
+
24
+ assert_equal 1, set.count
25
+ assert_equal CharacterSet::Range, range.class
26
+ assert_equal 2, range.count
27
+ assert_equal '\x00', range.first.to_s
28
+ assert_equal EscapeSequence::Hex, range.first.class
29
+ assert_equal '\x99', range.last.to_s
30
+ assert_equal EscapeSequence::Hex, range.last.class
31
+ assert set.matches?('\x50')
32
+ end
33
+
34
+ def test_parse_set_range_unicode
35
+ root = RP.parse('[\u{40 42}-\u1234]')
36
+ set = root[0]
37
+ range = set[0]
38
+
39
+ assert_equal 1, set.count
40
+ assert_equal CharacterSet::Range, range.class
41
+ assert_equal 2, range.count
42
+ assert_equal '\u{40 42}', range.first.to_s
43
+ assert_equal EscapeSequence::CodepointList, range.first.class
44
+ assert_equal '\u1234', range.last.to_s
45
+ assert_equal EscapeSequence::Codepoint, range.last.class
46
+ assert set.matches?('\u600')
47
+ end
48
+
49
+ def test_parse_set_range_edge_case_leading_dash
50
+ root = RP.parse('[--z]')
51
+ set = root[0]
52
+ range = set[0]
53
+
54
+ assert_equal 1, set.count
55
+ assert_equal 2, range.count
56
+ assert set.matches?('a')
57
+ end
58
+
59
+ def test_parse_set_range_edge_case_trailing_dash
60
+ root = RP.parse('[!--]')
61
+ set = root[0]
62
+ range = set[0]
63
+
64
+ assert_equal 1, set.count
65
+ assert_equal 2, range.count
66
+ assert set.matches?('$')
67
+ end
68
+
69
+ def test_parse_set_range_edge_case_leading_negate
70
+ root = RP.parse('[^-z]')
71
+ set = root[0]
72
+
73
+ assert_equal 2, set.count
74
+ assert set.matches?('a')
75
+ refute set.matches?('z')
76
+ end
77
+
78
+ def test_parse_set_range_edge_case_trailing_negate
79
+ root = RP.parse('[!-^]')
80
+ set = root[0]
81
+ range = set[0]
82
+
83
+ assert_equal 1, set.count
84
+ assert_equal 2, range.count
85
+ assert set.matches?('$')
86
+ end
87
+
88
+ def test_parse_set_range_edge_case_leading_intersection
89
+ root = RP.parse('[[\-ab]&&-bc]')
90
+ set = root[0]
91
+
92
+ assert_equal 1, set.count
93
+ assert_equal '-bc', set.first.last.to_s
94
+ assert set.matches?('-')
95
+ assert set.matches?('b')
96
+ refute set.matches?('a')
97
+ refute set.matches?('c')
98
+ end
99
+
100
+ def test_parse_set_range_edge_case_trailing_intersection
101
+ root = RP.parse('[bc-&&[\-ab]]')
102
+ set = root[0]
103
+
104
+ assert_equal 1, set.count
105
+ assert_equal 'bc-', set.first.first.to_s
106
+ assert set.matches?('-')
107
+ assert set.matches?('b')
108
+ refute set.matches?('a')
109
+ refute set.matches?('c')
110
+ end
111
+ end
@@ -2,11 +2,14 @@ require File.expand_path("../../helpers", __FILE__)
2
2
 
3
3
  %w{
4
4
  alternation anchors errors escapes free_space groups
5
- properties quantifiers refcalls sets types
5
+ posix_classes properties quantifiers refcalls sets types
6
6
  }.each do|tc|
7
7
  require File.expand_path("../test_#{tc}", __FILE__)
8
8
  end
9
9
 
10
+ require File.expand_path('../set/test_ranges.rb', __FILE__)
11
+ require File.expand_path('../set/test_intersections.rb', __FILE__)
12
+
10
13
  if RUBY_VERSION >= '2.0.0'
11
14
  %w{conditionals keep}.each do|tc|
12
15
  require File.expand_path("../test_#{tc}", __FILE__)
@@ -11,10 +11,6 @@ class TestParserEscapes < Test::Unit::TestCase
11
11
  /a\tc/ => [1, :escape, :tab, EscapeSequence::Tab],
12
12
  /a\vc/ => [1, :escape, :vertical_tab, EscapeSequence::VerticalTab],
13
13
 
14
- # special cases
15
- /a\bc/ => [1, :anchor, :word_boundary, Anchor::WordBoundary],
16
- /a\sc/ => [1, :type, :space, CharacterType::Space],
17
-
18
14
  # meta character escapes
19
15
  /a\.c/ => [1, :escape, :dot, EscapeSequence::Literal],
20
16
  /a\?c/ => [1, :escape, :zero_or_one, EscapeSequence::Literal],
@@ -27,14 +23,15 @@ class TestParserEscapes < Test::Unit::TestCase
27
23
  /a\}c/ => [1, :escape, :interval_close, EscapeSequence::Literal],
28
24
 
29
25
  # unicode escapes
30
- /a\u0640/ => [1, :escape, :codepoint, EscapeSequence::Literal],
31
- /a\u{41 1F60D}/ => [1, :escape, :codepoint_list, EscapeSequence::Literal],
26
+ /a\u0640/ => [1, :escape, :codepoint, EscapeSequence::Codepoint],
27
+ /a\u{41 1F60D}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
28
+ /a\u{10FFFF}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
32
29
 
33
30
  # hex escapes
34
- /a\xFF/n => [1, :escape, :hex, EscapeSequence::Literal],
31
+ /a\xFF/n => [1, :escape, :hex, EscapeSequence::Hex],
35
32
 
36
33
  # octal escapes
37
- /a\177/n => [1, :escape, :octal, EscapeSequence::Literal],
34
+ /a\177/n => [1, :escape, :octal, EscapeSequence::Octal],
38
35
  }
39
36
 
40
37
  tests.each_with_index do |(pattern, (index, type, token, klass)), count|
@@ -50,11 +47,35 @@ class TestParserEscapes < Test::Unit::TestCase
50
47
  end
51
48
  end
52
49
 
50
+ def test_parse_chars_and_codepoints
51
+ root = RP.parse(/\n\?\101\x42\u0043\u{44 45}/)
52
+
53
+ assert_equal "\n", root[0].char
54
+ assert_equal 10, root[0].codepoint
55
+
56
+ assert_equal "?", root[1].char
57
+ assert_equal 63, root[1].codepoint
58
+
59
+ assert_equal "A", root[2].char
60
+ assert_equal 65, root[2].codepoint
61
+
62
+ assert_equal "B", root[3].char
63
+ assert_equal 66, root[3].codepoint
64
+
65
+ assert_equal "C", root[4].char
66
+ assert_equal 67, root[4].codepoint
67
+
68
+ assert_equal ["D", "E"], root[5].chars
69
+ assert_equal [68, 69], root[5].codepoints
70
+ end
71
+
53
72
  def test_parse_escape_control_sequence_lower
54
73
  root = RP.parse(/a\\\c2b/)
55
74
 
56
75
  assert_equal EscapeSequence::Control, root[2].class
57
76
  assert_equal '\\c2', root[2].text
77
+ assert_equal "\u0012", root[2].char
78
+ assert_equal 18, root[2].codepoint
58
79
  end
59
80
 
60
81
  def test_parse_escape_control_sequence_upper
@@ -62,6 +83,8 @@ class TestParserEscapes < Test::Unit::TestCase
62
83
 
63
84
  assert_equal EscapeSequence::Control, root[2].class
64
85
  assert_equal '\\C-C', root[2].text
86
+ assert_equal "\u0003", root[2].char
87
+ assert_equal 3, root[2].codepoint
65
88
  end
66
89
 
67
90
  def test_parse_escape_meta_sequence
@@ -69,6 +92,8 @@ class TestParserEscapes < Test::Unit::TestCase
69
92
 
70
93
  assert_equal EscapeSequence::Meta, root[2].class
71
94
  assert_equal '\\M-Z', root[2].text
95
+ assert_equal "\u00DA", root[2].char
96
+ assert_equal 218, root[2].codepoint
72
97
  end
73
98
 
74
99
  def test_parse_escape_meta_control_sequence
@@ -76,6 +101,8 @@ class TestParserEscapes < Test::Unit::TestCase
76
101
 
77
102
  assert_equal EscapeSequence::MetaControl, root[2].class
78
103
  assert_equal '\\M-\\C-X', root[2].text
104
+ assert_equal "\u0098", root[2].char
105
+ assert_equal 152, root[2].codepoint
79
106
  end
80
107
 
81
108
  def test_parse_lower_c_meta_control_sequence
@@ -83,6 +110,8 @@ class TestParserEscapes < Test::Unit::TestCase
83
110
 
84
111
  assert_equal EscapeSequence::MetaControl, root[2].class
85
112
  assert_equal '\\M-\\cX', root[2].text
113
+ assert_equal "\u0098", root[2].char
114
+ assert_equal 152, root[2].codepoint
86
115
  end
87
116
 
88
117
  def test_parse_escape_reverse_meta_control_sequence
@@ -90,6 +119,8 @@ class TestParserEscapes < Test::Unit::TestCase
90
119
 
91
120
  assert_equal EscapeSequence::MetaControl, root[2].class
92
121
  assert_equal '\\C-\\M-X', root[2].text
122
+ assert_equal "\u0098", root[2].char
123
+ assert_equal 152, root[2].codepoint
93
124
  end
94
125
 
95
126
  def test_parse_escape_reverse_lower_c_meta_control_sequence
@@ -97,6 +128,7 @@ class TestParserEscapes < Test::Unit::TestCase
97
128
 
98
129
  assert_equal EscapeSequence::MetaControl, root[2].class
99
130
  assert_equal '\\c\\M-X', root[2].text
131
+ assert_equal "\u0098", root[2].char
132
+ assert_equal 152, root[2].codepoint
100
133
  end
101
-
102
134
  end