regexp_parser 0.5.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +242 -0
  3. data/Gemfile +1 -0
  4. data/README.md +21 -17
  5. data/Rakefile +31 -0
  6. data/lib/regexp_parser/expression.rb +11 -9
  7. data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
  8. data/lib/regexp_parser/expression/classes/backref.rb +21 -16
  9. data/lib/regexp_parser/expression/classes/escape.rb +81 -10
  10. data/lib/regexp_parser/expression/classes/group.rb +20 -20
  11. data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
  12. data/lib/regexp_parser/expression/classes/property.rb +6 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +10 -93
  14. data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
  15. data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
  16. data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
  17. data/lib/regexp_parser/expression/methods/tests.rb +4 -14
  18. data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +3 -4
  20. data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
  21. data/lib/regexp_parser/expression/subexpression.rb +6 -10
  22. data/lib/regexp_parser/lexer.rb +13 -17
  23. data/lib/regexp_parser/parser.rb +170 -116
  24. data/lib/regexp_parser/scanner.rb +952 -2431
  25. data/lib/regexp_parser/scanner/char_type.rl +31 -0
  26. data/lib/regexp_parser/scanner/properties/long.yml +561 -0
  27. data/lib/regexp_parser/scanner/properties/short.yml +225 -0
  28. data/lib/regexp_parser/scanner/property.rl +7 -806
  29. data/lib/regexp_parser/scanner/scanner.rl +112 -154
  30. data/lib/regexp_parser/syntax/base.rb +4 -4
  31. data/lib/regexp_parser/syntax/tokens.rb +1 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
  34. data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
  35. data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
  36. data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
  38. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
  39. data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
  40. data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
  41. data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
  42. data/lib/regexp_parser/version.rb +1 -1
  43. data/regexp_parser.gemspec +2 -1
  44. data/test/expression/test_base.rb +2 -1
  45. data/test/expression/test_clone.rb +0 -57
  46. data/test/expression/test_set.rb +31 -8
  47. data/test/expression/test_strfregexp.rb +13 -4
  48. data/test/expression/test_subexpression.rb +25 -0
  49. data/test/expression/test_traverse.rb +25 -25
  50. data/test/helpers.rb +1 -0
  51. data/test/lexer/test_all.rb +1 -1
  52. data/test/lexer/test_conditionals.rb +9 -7
  53. data/test/lexer/test_nesting.rb +39 -21
  54. data/test/lexer/test_refcalls.rb +4 -4
  55. data/test/parser/set/test_intersections.rb +127 -0
  56. data/test/parser/set/test_ranges.rb +111 -0
  57. data/test/parser/test_all.rb +4 -1
  58. data/test/parser/test_escapes.rb +41 -9
  59. data/test/parser/test_groups.rb +22 -3
  60. data/test/parser/test_posix_classes.rb +27 -0
  61. data/test/parser/test_properties.rb +17 -290
  62. data/test/parser/test_refcalls.rb +66 -26
  63. data/test/parser/test_sets.rb +132 -129
  64. data/test/scanner/test_all.rb +1 -7
  65. data/test/scanner/test_conditionals.rb +16 -16
  66. data/test/scanner/test_errors.rb +0 -30
  67. data/test/scanner/test_escapes.rb +1 -2
  68. data/test/scanner/test_free_space.rb +28 -28
  69. data/test/scanner/test_groups.rb +35 -35
  70. data/test/scanner/test_meta.rb +1 -1
  71. data/test/scanner/test_properties.rb +87 -114
  72. data/test/scanner/test_refcalls.rb +18 -18
  73. data/test/scanner/test_scripts.rb +19 -351
  74. data/test/scanner/test_sets.rb +87 -60
  75. data/test/scanner/test_unicode_blocks.rb +4 -105
  76. data/test/support/warning_extractor.rb +1 -1
  77. data/test/syntax/test_syntax.rb +7 -0
  78. data/test/syntax/versions/test_1.8.rb +2 -4
  79. metadata +17 -7
  80. data/ChangeLog +0 -325
  81. data/test/scanner/test_emojis.rb +0 -31
@@ -6,7 +6,7 @@ module Regexp::Syntax
6
6
  implements :anchor, Anchor::All
7
7
  implements :assertion, Assertion::Lookahead
8
8
  implements :backref, [:number]
9
-
9
+ implements :posixclass, PosixClass::Standard
10
10
  implements :escape,
11
11
  Escape::Basic + Escape::Backreference +
12
12
  Escape::ASCII + Escape::Meta + Escape::Control
@@ -19,9 +19,7 @@ module Regexp::Syntax
19
19
  Quantifier::Greedy + Quantifier::Reluctant +
20
20
  Quantifier::Interval + Quantifier::IntervalReluctant
21
21
 
22
- implements :set, CharacterSet::OpenClose +
23
- CharacterSet::Extended + CharacterSet::Types +
24
- CharacterSet::POSIX::Standard
22
+ implements :set, CharacterSet::OpenClose + CharacterSet::Extended
25
23
 
26
24
  implements :type,
27
25
  CharacterType::Extended
@@ -9,6 +9,10 @@ module Regexp::Syntax
9
9
  implements :backref, Backreference::All +
10
10
  SubexpressionCall::All
11
11
 
12
+ implements :posixclass, PosixClass::Extensions
13
+
14
+ implements :nonposixclass, PosixClass::All
15
+
12
16
  implements :escape, Escape::Unicode + Escape::Hex + Escape::Octal
13
17
 
14
18
  implements :type, CharacterType::Hex
@@ -21,16 +25,6 @@ module Regexp::Syntax
21
25
 
22
26
  implements :quantifier,
23
27
  Quantifier::Possessive + Quantifier::IntervalPossessive
24
-
25
- implements :set,
26
- CharacterSet::POSIX::StandardNegative +
27
- CharacterSet::POSIX::Extensions +
28
- CharacterSet::POSIX::ExtensionsNegative +
29
- UnicodeProperty::V1_9_0
30
-
31
- implements :subset, CharacterSet::OpenClose +
32
- CharacterSet::Extended + CharacterSet::Types +
33
- CharacterSet::POSIX::Standard
34
28
  end
35
29
  end
36
30
  end
@@ -10,8 +10,6 @@ module Regexp::Syntax
10
10
  implements :nonproperty, UnicodeProperty::V2_0_0
11
11
 
12
12
  implements :type, CharacterType::Clustered
13
- implements :set, CharacterSet::Clustered
14
- implements :subset, CharacterSet::Clustered
15
13
 
16
14
  excludes :property, :newline
17
15
  excludes :nonproperty, :newline
@@ -3,7 +3,7 @@ module Regexp::Syntax
3
3
  def initialize
4
4
  super
5
5
 
6
- implements :group, Group::Absence
6
+ implements :group, Group::V2_4_1
7
7
  end
8
8
  end
9
9
  end
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '0.5.0'
3
+ VERSION = '1.0.0'
4
4
  end
5
5
  end
@@ -23,7 +23,8 @@ Gem::Specification.new do |gem|
23
23
 
24
24
  gem.files = Dir.glob('{lib,test}/**/*.rb') +
25
25
  Dir.glob('lib/**/*.rl') +
26
- %w(Gemfile Rakefile LICENSE README.md ChangeLog regexp_parser.gemspec)
26
+ Dir.glob('lib/**/*.yml') +
27
+ %w(Gemfile Rakefile LICENSE README.md CHANGELOG.md regexp_parser.gemspec)
27
28
 
28
29
  gem.test_files = Dir.glob('test/**/*.rb')
29
30
 
@@ -40,7 +40,8 @@ class ExpressionBase < Test::Unit::TestCase
40
40
  assert_equal true, root[0].terminal?
41
41
  assert_equal true, root[1].terminal?
42
42
  assert_equal false, root[2].terminal?
43
- assert_equal true, root[2][0].terminal?
43
+ assert_equal false, root[2][0].terminal?
44
+ assert_equal true, root[2][0][0].terminal?
44
45
  assert_equal true, root[3].terminal?
45
46
  assert_equal true, root[4].terminal?
46
47
  end
@@ -86,61 +86,4 @@ class ExpressionClone < Test::Unit::TestCase
86
86
  end
87
87
  end
88
88
 
89
- def test_expression_clone_set
90
- root = RP.parse(/^a(b([cde])f)g$/)
91
- copy = root.clone
92
-
93
- root_2_1 = root.expressions[2][1]
94
-
95
- copy.expressions[2][1].each_with_index do |exp, index|
96
- assert exp.respond_to?(:members)
97
-
98
- # The members arrays are not equal.
99
- refute_equal exp.members.object_id,
100
- root_2_1[index].members.object_id
101
-
102
- # The members in the arrays are not equal.
103
- exp.members.each_with_index do |member, member_index|
104
- refute_equal member.object_id,
105
- root_2_1[index].members[member_index].object_id
106
- end
107
- end
108
- end
109
-
110
- def test_expression_clone_subset
111
- # Explicitly set syntax to ruby 1.9 because 1.8 does not
112
- # implement subsets.
113
- root = RP.parse('^a(b([c[def]g])h)i$', 'ruby/1.9')
114
- copy = root.clone
115
-
116
- root_set = root.expressions[2][1][0]
117
- copy_set = copy.expressions[2][1][0]
118
-
119
- root_subset = root_set.members[1]
120
- copy_subset = copy_set.members[1]
121
-
122
- # Sanity checks
123
- assert root_set.respond_to?(:members)
124
- assert copy_set.respond_to?(:members)
125
-
126
- assert root_subset.respond_to?(:members)
127
- assert copy_subset.respond_to?(:members)
128
-
129
- # The sets are not equal
130
- refute_equal copy_set.object_id, root_set.object_id
131
-
132
- # The subsets are not equal
133
- refute_equal copy_subset.object_id, root_subset.object_id
134
-
135
- # The subsets' members arrays are not equal.
136
- refute_equal copy_subset.members.object_id,
137
- root_subset.members.object_id
138
-
139
- # The subsets' members are not equal
140
- copy_subset.members.each_with_index do |member, member_index|
141
- refute_equal member.object_id,
142
- root_subset.members[member_index].object_id
143
- end
144
- end
145
-
146
89
  end
@@ -2,60 +2,83 @@ require File.expand_path("../../helpers", __FILE__)
2
2
 
3
3
  class ExpressionSet < Test::Unit::TestCase
4
4
 
5
- def test_expression_set_exapnd_members_digit
5
+ def test_expression_set_expand_members_digit
6
6
  set = RP.parse('[\d]').first
7
7
 
8
8
  assert_equal ['0-9'], set.expand_members
9
9
  assert_equal ['\p{Digit}'], set.expand_members(true)
10
10
  end
11
11
 
12
- def test_expression_set_exapnd_members_nondigit
12
+ def test_expression_set_expand_members_nondigit
13
13
  set = RP.parse('[\D]').first
14
14
 
15
15
  assert_equal ['^0-9'], set.expand_members
16
16
  assert_equal ['\P{Digit}'], set.expand_members(true)
17
17
  end
18
18
 
19
- def test_expression_set_exapnd_members_word
19
+ def test_expression_set_expand_members_word
20
20
  set = RP.parse('[\w]').first
21
21
 
22
22
  assert_equal ['A-Za-z0-9_'], set.expand_members
23
23
  assert_equal ['\p{Word}'], set.expand_members(true)
24
24
  end
25
25
 
26
- def test_expression_set_exapnd_members_nonword
26
+ def test_expression_set_expand_members_nonword
27
27
  set = RP.parse('[\W]').first
28
28
 
29
29
  assert_equal ['^A-Za-z0-9_'], set.expand_members
30
30
  assert_equal ['\P{Word}'], set.expand_members(true)
31
31
  end
32
32
 
33
- def test_expression_set_exapnd_members_space
33
+ def test_expression_set_expand_members_space
34
34
  set = RP.parse('[\s]').first
35
35
 
36
36
  assert_equal [' \t\f\v\n\r'], set.expand_members
37
37
  assert_equal ['\p{Space}'], set.expand_members(true)
38
38
  end
39
39
 
40
- def test_expression_set_exapnd_members_nonspace
40
+ def test_expression_set_expand_members_nonspace
41
41
  set = RP.parse('[\S]').first
42
42
 
43
43
  assert_equal ['^ \t\f\v\n\r'], set.expand_members
44
44
  assert_equal ['\P{Space}'], set.expand_members(true)
45
45
  end
46
46
 
47
- def test_expression_set_exapnd_members_xdigit
47
+ def test_expression_set_expand_members_xdigit
48
48
  set = RP.parse('[\h]').first
49
49
 
50
50
  assert_equal ['0-9A-Fa-f'], set.expand_members
51
51
  assert_equal ['\p{Xdigit}'], set.expand_members(true)
52
52
  end
53
53
 
54
- def test_expression_set_exapnd_members_nonxdigit
54
+ def test_expression_set_expand_members_nonxdigit
55
55
  set = RP.parse('[\H]').first
56
56
 
57
57
  assert_equal ['^0-9A-Fa-f'], set.expand_members
58
58
  assert_equal ['\P{Xdigit}'], set.expand_members(true)
59
59
  end
60
60
 
61
+ def test_expression_set_include
62
+ set = RP.parse('[ac-eh\s[:digit:]\x20[b]]').first
63
+
64
+ assert set.include?('a')
65
+ assert set.include?('a', true)
66
+ assert set.include?('c-e')
67
+ assert set.include?('h')
68
+ assert set.include?('\s')
69
+ assert set.include?('[:digit:]')
70
+ assert set.include?('\x20')
71
+
72
+ assert set.include?('b')
73
+ refute set.include?('b', true) # should not include b directly
74
+
75
+ refute set.include?(']')
76
+ refute set.include?('[')
77
+ refute set.include?('x')
78
+ refute set.include?('\S')
79
+
80
+ subset = set.last
81
+ assert subset.include?('b')
82
+ refute subset.include?('a')
83
+ end
61
84
  end
@@ -151,7 +151,7 @@ class Expressionstrfregexp < Test::Unit::TestCase
151
151
  set = seq_2.first
152
152
  assert_equal '[d-gk-p]', set.strfregexp('%t')
153
153
  assert_equal '[d-gk-p]+', set.strfregexp('%T')
154
- assert_equal '[d-gk-p]+', set.strfregexp('%~t')
154
+ assert_equal 'set:character', set.strfregexp('%~t')
155
155
  end
156
156
 
157
157
  def test_expression_strfregexp_combined
@@ -180,7 +180,10 @@ class Expressionstrfregexp < Test::Unit::TestCase
180
180
  assert_equal(
181
181
  "@0+15 expression:root\n" +
182
182
  " @0+1 a\n" +
183
- " @1+6 [b-d]*\n" +
183
+ " @1+6 set:character\n" +
184
+ " @2+3 set:range\n" +
185
+ " @2+1 b\n" +
186
+ " @4+1 d\n" +
184
187
  " @7+8 group:capture\n" +
185
188
  " @8+1 e\n" +
186
189
  " @9+4 group:capture\n" +
@@ -195,7 +198,10 @@ class Expressionstrfregexp < Test::Unit::TestCase
195
198
  assert_equal(
196
199
  "@0+15 expression:root-SEP-" +
197
200
  " @0+1 a-SEP-" +
198
- " @1+6 [b-d]*-SEP-" +
201
+ " @1+6 set:character-SEP-" +
202
+ " @2+3 set:range-SEP-" +
203
+ " @2+1 b-SEP-" +
204
+ " @4+1 d-SEP-" +
199
205
  " @7+8 group:capture-SEP-" +
200
206
  " @8+1 e-SEP-" +
201
207
  " @9+4 group:capture-SEP-" +
@@ -209,7 +215,10 @@ class Expressionstrfregexp < Test::Unit::TestCase
209
215
 
210
216
  assert_equal(
211
217
  "@0+1 a\n" +
212
- "@1+6 [b-d]*\n" +
218
+ "@1+6 set:character\n" +
219
+ " @2+3 set:range\n" +
220
+ " @2+1 b\n" +
221
+ " @4+1 d\n" +
213
222
  "@7+8 group:capture\n" +
214
223
  " @8+1 e\n" +
215
224
  " @9+4 group:capture\n" +
@@ -21,4 +21,29 @@ class ExpressionSubexpression < Test::Unit::TestCase
21
21
  end
22
22
  end
23
23
 
24
+ def test_subexpression_nesting_level
25
+ root = RP.parse(/a(b(c\d|[ef-g[h]]))/)
26
+
27
+ tests = {
28
+ 'a' => 1,
29
+ 'b' => 2,
30
+ '|' => 3,
31
+ 'c\d' => 4, # first alternative
32
+ 'c' => 5,
33
+ '\d' => 5,
34
+ '[ef-g[h]]' => 4, # second alternative
35
+ 'e' => 5,
36
+ '-' => 5,
37
+ 'f' => 6,
38
+ 'g' => 6,
39
+ 'h' => 6,
40
+ }
41
+
42
+ root.each_expression do |exp|
43
+ next unless (expected_nesting_level = tests.delete(exp.text))
44
+ assert_equal exp.nesting_level, expected_nesting_level
45
+ end
46
+
47
+ assert tests.empty?
48
+ end
24
49
  end
@@ -3,7 +3,7 @@ require File.expand_path("../../helpers", __FILE__)
3
3
  class SubexpressionTraverse < Test::Unit::TestCase
4
4
 
5
5
  def test_subexpression_traverse
6
- root = RP.parse(/a(b(c(d)))|g[hi]j|klmn/)
6
+ root = RP.parse(/a(b(c(d)))|g[h-i]j|klmn/)
7
7
 
8
8
  enters = 0
9
9
  visits = 0
@@ -15,14 +15,14 @@ class SubexpressionTraverse < Test::Unit::TestCase
15
15
  exits += 1 if event == :exit
16
16
  }
17
17
 
18
- assert_equal 7, enters
18
+ assert_equal 9, enters
19
19
  assert_equal exits, enters
20
20
 
21
- assert_equal 8, visits
21
+ assert_equal 9, visits
22
22
  end
23
23
 
24
24
  def test_subexpression_traverse_include_self
25
- root = RP.parse(/a(b(c(d)))|g[hi]j|klmn/)
25
+ root = RP.parse(/a(b(c(d)))|g[h-i]j|klmn/)
26
26
 
27
27
  enters = 0
28
28
  visits = 0
@@ -34,10 +34,10 @@ class SubexpressionTraverse < Test::Unit::TestCase
34
34
  exits += 1 if event == :exit
35
35
  }
36
36
 
37
- assert_equal 8, enters
37
+ assert_equal 10, enters
38
38
  assert_equal exits, enters
39
39
 
40
- assert_equal 8, visits
40
+ assert_equal 9, visits
41
41
  end
42
42
 
43
43
  def test_subexpression_walk_alias
@@ -54,18 +54,18 @@ class SubexpressionTraverse < Test::Unit::TestCase
54
54
  count += 1
55
55
  }
56
56
 
57
- assert_equal 10, count
57
+ assert_equal 13, count
58
58
  end
59
59
 
60
60
  def test_subexpression_each_expression_include_self
61
- root = RP.parse(/a(?x:b(c))|g[hi]/)
61
+ root = RP.parse(/a(?x:b(c))|g[h-k]/)
62
62
 
63
63
  count = 0
64
64
  root.each_expression(true) {|exp, index|
65
65
  count += 1
66
66
  }
67
67
 
68
- assert_equal 11, count
68
+ assert_equal 14, count
69
69
  end
70
70
 
71
71
  def test_subexpression_each_expression_indices
@@ -86,13 +86,13 @@ class SubexpressionTraverse < Test::Unit::TestCase
86
86
  assert_equal [0, 0, 1, 0, 2], indices
87
87
  end
88
88
 
89
- def test_subexpression_map_without_block
89
+ def test_subexpression_flat_map_without_block
90
90
  root = RP.parse(/a(b([c-e]+))?/)
91
91
 
92
- array = root.map
92
+ array = root.flat_map
93
93
 
94
94
  assert_equal Array, array.class
95
- assert_equal 5, array.length
95
+ assert_equal 8, array.length
96
96
 
97
97
  array.each do |item|
98
98
  assert_equal Array, item.class
@@ -102,35 +102,35 @@ class SubexpressionTraverse < Test::Unit::TestCase
102
102
  end
103
103
  end
104
104
 
105
- def test_subexpression_map_without_block_include_self
105
+ def test_subexpression_flat_map_without_block_include_self
106
106
  root = RP.parse(/a(b([c-e]+))?/)
107
107
 
108
- array = root.map(true)
108
+ array = root.flat_map(true)
109
109
 
110
110
  assert_equal Array, array.class
111
- assert_equal 6, array.length
111
+ assert_equal 9, array.length
112
112
  end
113
113
 
114
- def test_subexpression_map_indices
114
+ def test_subexpression_flat_map_indices
115
115
  root = RP.parse(/a(b([c-e]+))?f*g/)
116
116
 
117
- indices = root.map {|exp, index| index}
117
+ indices = root.flat_map {|exp, index| index}
118
118
 
119
- assert_equal [0, 1, 0, 1, 0, 2, 3], indices
119
+ assert_equal [0, 1, 0, 1, 0, 0, 0, 1, 2, 3], indices
120
120
  end
121
121
 
122
- def test_subexpression_map_indices_include_self
122
+ def test_subexpression_flat_map_indices_include_self
123
123
  root = RP.parse(/a(b([c-e]+))?f*g/)
124
124
 
125
- indices = root.map(true) {|exp, index| index}
125
+ indices = root.flat_map(true) {|exp, index| index}
126
126
 
127
- assert_equal [0, 0, 1, 0, 1, 0, 2, 3], indices
127
+ assert_equal [0, 0, 1, 0, 1, 0, 0, 0, 1, 2, 3], indices
128
128
  end
129
129
 
130
- def test_subexpression_map_expressions
130
+ def test_subexpression_flat_map_expressions
131
131
  root = RP.parse(/a(b(c(d)))/)
132
132
 
133
- levels = root.map {|exp, index|
133
+ levels = root.flat_map {|exp, index|
134
134
  [exp.level, exp.text] if exp.terminal?
135
135
  }.compact
136
136
 
@@ -139,10 +139,10 @@ class SubexpressionTraverse < Test::Unit::TestCase
139
139
  ], levels
140
140
  end
141
141
 
142
- def test_subexpression_map_expressions_include_self
142
+ def test_subexpression_flat_map_expressions_include_self
143
143
  root = RP.parse(/a(b(c(d)))/)
144
144
 
145
- levels = root.map(true) {|exp, index|
145
+ levels = root.flat_map(true) {|exp, index|
146
146
  [exp.level, exp.to_s]
147
147
  }.compact
148
148