regexp_parser 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +242 -0
  3. data/Gemfile +1 -0
  4. data/README.md +21 -17
  5. data/Rakefile +31 -0
  6. data/lib/regexp_parser/expression.rb +11 -9
  7. data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
  8. data/lib/regexp_parser/expression/classes/backref.rb +21 -16
  9. data/lib/regexp_parser/expression/classes/escape.rb +81 -10
  10. data/lib/regexp_parser/expression/classes/group.rb +20 -20
  11. data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
  12. data/lib/regexp_parser/expression/classes/property.rb +6 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +10 -93
  14. data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
  15. data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
  16. data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
  17. data/lib/regexp_parser/expression/methods/tests.rb +4 -14
  18. data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +3 -4
  20. data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
  21. data/lib/regexp_parser/expression/subexpression.rb +6 -10
  22. data/lib/regexp_parser/lexer.rb +13 -17
  23. data/lib/regexp_parser/parser.rb +170 -116
  24. data/lib/regexp_parser/scanner.rb +952 -2431
  25. data/lib/regexp_parser/scanner/char_type.rl +31 -0
  26. data/lib/regexp_parser/scanner/properties/long.yml +561 -0
  27. data/lib/regexp_parser/scanner/properties/short.yml +225 -0
  28. data/lib/regexp_parser/scanner/property.rl +7 -806
  29. data/lib/regexp_parser/scanner/scanner.rl +112 -154
  30. data/lib/regexp_parser/syntax/base.rb +4 -4
  31. data/lib/regexp_parser/syntax/tokens.rb +1 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
  34. data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
  35. data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
  36. data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
  38. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
  39. data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
  40. data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
  41. data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
  42. data/lib/regexp_parser/version.rb +1 -1
  43. data/regexp_parser.gemspec +2 -1
  44. data/test/expression/test_base.rb +2 -1
  45. data/test/expression/test_clone.rb +0 -57
  46. data/test/expression/test_set.rb +31 -8
  47. data/test/expression/test_strfregexp.rb +13 -4
  48. data/test/expression/test_subexpression.rb +25 -0
  49. data/test/expression/test_traverse.rb +25 -25
  50. data/test/helpers.rb +1 -0
  51. data/test/lexer/test_all.rb +1 -1
  52. data/test/lexer/test_conditionals.rb +9 -7
  53. data/test/lexer/test_nesting.rb +39 -21
  54. data/test/lexer/test_refcalls.rb +4 -4
  55. data/test/parser/set/test_intersections.rb +127 -0
  56. data/test/parser/set/test_ranges.rb +111 -0
  57. data/test/parser/test_all.rb +4 -1
  58. data/test/parser/test_escapes.rb +41 -9
  59. data/test/parser/test_groups.rb +22 -3
  60. data/test/parser/test_posix_classes.rb +27 -0
  61. data/test/parser/test_properties.rb +17 -290
  62. data/test/parser/test_refcalls.rb +66 -26
  63. data/test/parser/test_sets.rb +132 -129
  64. data/test/scanner/test_all.rb +1 -7
  65. data/test/scanner/test_conditionals.rb +16 -16
  66. data/test/scanner/test_errors.rb +0 -30
  67. data/test/scanner/test_escapes.rb +1 -2
  68. data/test/scanner/test_free_space.rb +28 -28
  69. data/test/scanner/test_groups.rb +35 -35
  70. data/test/scanner/test_meta.rb +1 -1
  71. data/test/scanner/test_properties.rb +87 -114
  72. data/test/scanner/test_refcalls.rb +18 -18
  73. data/test/scanner/test_scripts.rb +19 -351
  74. data/test/scanner/test_sets.rb +87 -60
  75. data/test/scanner/test_unicode_blocks.rb +4 -105
  76. data/test/support/warning_extractor.rb +1 -1
  77. data/test/syntax/test_syntax.rb +7 -0
  78. data/test/syntax/versions/test_1.8.rb +2 -4
  79. metadata +17 -7
  80. data/ChangeLog +0 -325
  81. data/test/scanner/test_emojis.rb +0 -31
@@ -6,7 +6,7 @@ module Regexp::Syntax
6
6
  implements :anchor, Anchor::All
7
7
  implements :assertion, Assertion::Lookahead
8
8
  implements :backref, [:number]
9
-
9
+ implements :posixclass, PosixClass::Standard
10
10
  implements :escape,
11
11
  Escape::Basic + Escape::Backreference +
12
12
  Escape::ASCII + Escape::Meta + Escape::Control
@@ -19,9 +19,7 @@ module Regexp::Syntax
19
19
  Quantifier::Greedy + Quantifier::Reluctant +
20
20
  Quantifier::Interval + Quantifier::IntervalReluctant
21
21
 
22
- implements :set, CharacterSet::OpenClose +
23
- CharacterSet::Extended + CharacterSet::Types +
24
- CharacterSet::POSIX::Standard
22
+ implements :set, CharacterSet::OpenClose + CharacterSet::Extended
25
23
 
26
24
  implements :type,
27
25
  CharacterType::Extended
@@ -9,6 +9,10 @@ module Regexp::Syntax
9
9
  implements :backref, Backreference::All +
10
10
  SubexpressionCall::All
11
11
 
12
+ implements :posixclass, PosixClass::Extensions
13
+
14
+ implements :nonposixclass, PosixClass::All
15
+
12
16
  implements :escape, Escape::Unicode + Escape::Hex + Escape::Octal
13
17
 
14
18
  implements :type, CharacterType::Hex
@@ -21,16 +25,6 @@ module Regexp::Syntax
21
25
 
22
26
  implements :quantifier,
23
27
  Quantifier::Possessive + Quantifier::IntervalPossessive
24
-
25
- implements :set,
26
- CharacterSet::POSIX::StandardNegative +
27
- CharacterSet::POSIX::Extensions +
28
- CharacterSet::POSIX::ExtensionsNegative +
29
- UnicodeProperty::V1_9_0
30
-
31
- implements :subset, CharacterSet::OpenClose +
32
- CharacterSet::Extended + CharacterSet::Types +
33
- CharacterSet::POSIX::Standard
34
28
  end
35
29
  end
36
30
  end
@@ -10,8 +10,6 @@ module Regexp::Syntax
10
10
  implements :nonproperty, UnicodeProperty::V2_0_0
11
11
 
12
12
  implements :type, CharacterType::Clustered
13
- implements :set, CharacterSet::Clustered
14
- implements :subset, CharacterSet::Clustered
15
13
 
16
14
  excludes :property, :newline
17
15
  excludes :nonproperty, :newline
@@ -3,7 +3,7 @@ module Regexp::Syntax
3
3
  def initialize
4
4
  super
5
5
 
6
- implements :group, Group::Absence
6
+ implements :group, Group::V2_4_1
7
7
  end
8
8
  end
9
9
  end
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '0.5.0'
3
+ VERSION = '1.0.0'
4
4
  end
5
5
  end
@@ -23,7 +23,8 @@ Gem::Specification.new do |gem|
23
23
 
24
24
  gem.files = Dir.glob('{lib,test}/**/*.rb') +
25
25
  Dir.glob('lib/**/*.rl') +
26
- %w(Gemfile Rakefile LICENSE README.md ChangeLog regexp_parser.gemspec)
26
+ Dir.glob('lib/**/*.yml') +
27
+ %w(Gemfile Rakefile LICENSE README.md CHANGELOG.md regexp_parser.gemspec)
27
28
 
28
29
  gem.test_files = Dir.glob('test/**/*.rb')
29
30
 
@@ -40,7 +40,8 @@ class ExpressionBase < Test::Unit::TestCase
40
40
  assert_equal true, root[0].terminal?
41
41
  assert_equal true, root[1].terminal?
42
42
  assert_equal false, root[2].terminal?
43
- assert_equal true, root[2][0].terminal?
43
+ assert_equal false, root[2][0].terminal?
44
+ assert_equal true, root[2][0][0].terminal?
44
45
  assert_equal true, root[3].terminal?
45
46
  assert_equal true, root[4].terminal?
46
47
  end
@@ -86,61 +86,4 @@ class ExpressionClone < Test::Unit::TestCase
86
86
  end
87
87
  end
88
88
 
89
- def test_expression_clone_set
90
- root = RP.parse(/^a(b([cde])f)g$/)
91
- copy = root.clone
92
-
93
- root_2_1 = root.expressions[2][1]
94
-
95
- copy.expressions[2][1].each_with_index do |exp, index|
96
- assert exp.respond_to?(:members)
97
-
98
- # The members arrays are not equal.
99
- refute_equal exp.members.object_id,
100
- root_2_1[index].members.object_id
101
-
102
- # The members in the arrays are not equal.
103
- exp.members.each_with_index do |member, member_index|
104
- refute_equal member.object_id,
105
- root_2_1[index].members[member_index].object_id
106
- end
107
- end
108
- end
109
-
110
- def test_expression_clone_subset
111
- # Explicitly set syntax to ruby 1.9 because 1.8 does not
112
- # implement subsets.
113
- root = RP.parse('^a(b([c[def]g])h)i$', 'ruby/1.9')
114
- copy = root.clone
115
-
116
- root_set = root.expressions[2][1][0]
117
- copy_set = copy.expressions[2][1][0]
118
-
119
- root_subset = root_set.members[1]
120
- copy_subset = copy_set.members[1]
121
-
122
- # Sanity checks
123
- assert root_set.respond_to?(:members)
124
- assert copy_set.respond_to?(:members)
125
-
126
- assert root_subset.respond_to?(:members)
127
- assert copy_subset.respond_to?(:members)
128
-
129
- # The sets are not equal
130
- refute_equal copy_set.object_id, root_set.object_id
131
-
132
- # The subsets are not equal
133
- refute_equal copy_subset.object_id, root_subset.object_id
134
-
135
- # The subsets' members arrays are not equal.
136
- refute_equal copy_subset.members.object_id,
137
- root_subset.members.object_id
138
-
139
- # The subsets' members are not equal
140
- copy_subset.members.each_with_index do |member, member_index|
141
- refute_equal member.object_id,
142
- root_subset.members[member_index].object_id
143
- end
144
- end
145
-
146
89
  end
@@ -2,60 +2,83 @@ require File.expand_path("../../helpers", __FILE__)
2
2
 
3
3
  class ExpressionSet < Test::Unit::TestCase
4
4
 
5
- def test_expression_set_exapnd_members_digit
5
+ def test_expression_set_expand_members_digit
6
6
  set = RP.parse('[\d]').first
7
7
 
8
8
  assert_equal ['0-9'], set.expand_members
9
9
  assert_equal ['\p{Digit}'], set.expand_members(true)
10
10
  end
11
11
 
12
- def test_expression_set_exapnd_members_nondigit
12
+ def test_expression_set_expand_members_nondigit
13
13
  set = RP.parse('[\D]').first
14
14
 
15
15
  assert_equal ['^0-9'], set.expand_members
16
16
  assert_equal ['\P{Digit}'], set.expand_members(true)
17
17
  end
18
18
 
19
- def test_expression_set_exapnd_members_word
19
+ def test_expression_set_expand_members_word
20
20
  set = RP.parse('[\w]').first
21
21
 
22
22
  assert_equal ['A-Za-z0-9_'], set.expand_members
23
23
  assert_equal ['\p{Word}'], set.expand_members(true)
24
24
  end
25
25
 
26
- def test_expression_set_exapnd_members_nonword
26
+ def test_expression_set_expand_members_nonword
27
27
  set = RP.parse('[\W]').first
28
28
 
29
29
  assert_equal ['^A-Za-z0-9_'], set.expand_members
30
30
  assert_equal ['\P{Word}'], set.expand_members(true)
31
31
  end
32
32
 
33
- def test_expression_set_exapnd_members_space
33
+ def test_expression_set_expand_members_space
34
34
  set = RP.parse('[\s]').first
35
35
 
36
36
  assert_equal [' \t\f\v\n\r'], set.expand_members
37
37
  assert_equal ['\p{Space}'], set.expand_members(true)
38
38
  end
39
39
 
40
- def test_expression_set_exapnd_members_nonspace
40
+ def test_expression_set_expand_members_nonspace
41
41
  set = RP.parse('[\S]').first
42
42
 
43
43
  assert_equal ['^ \t\f\v\n\r'], set.expand_members
44
44
  assert_equal ['\P{Space}'], set.expand_members(true)
45
45
  end
46
46
 
47
- def test_expression_set_exapnd_members_xdigit
47
+ def test_expression_set_expand_members_xdigit
48
48
  set = RP.parse('[\h]').first
49
49
 
50
50
  assert_equal ['0-9A-Fa-f'], set.expand_members
51
51
  assert_equal ['\p{Xdigit}'], set.expand_members(true)
52
52
  end
53
53
 
54
- def test_expression_set_exapnd_members_nonxdigit
54
+ def test_expression_set_expand_members_nonxdigit
55
55
  set = RP.parse('[\H]').first
56
56
 
57
57
  assert_equal ['^0-9A-Fa-f'], set.expand_members
58
58
  assert_equal ['\P{Xdigit}'], set.expand_members(true)
59
59
  end
60
60
 
61
+ def test_expression_set_include
62
+ set = RP.parse('[ac-eh\s[:digit:]\x20[b]]').first
63
+
64
+ assert set.include?('a')
65
+ assert set.include?('a', true)
66
+ assert set.include?('c-e')
67
+ assert set.include?('h')
68
+ assert set.include?('\s')
69
+ assert set.include?('[:digit:]')
70
+ assert set.include?('\x20')
71
+
72
+ assert set.include?('b')
73
+ refute set.include?('b', true) # should not include b directly
74
+
75
+ refute set.include?(']')
76
+ refute set.include?('[')
77
+ refute set.include?('x')
78
+ refute set.include?('\S')
79
+
80
+ subset = set.last
81
+ assert subset.include?('b')
82
+ refute subset.include?('a')
83
+ end
61
84
  end
@@ -151,7 +151,7 @@ class Expressionstrfregexp < Test::Unit::TestCase
151
151
  set = seq_2.first
152
152
  assert_equal '[d-gk-p]', set.strfregexp('%t')
153
153
  assert_equal '[d-gk-p]+', set.strfregexp('%T')
154
- assert_equal '[d-gk-p]+', set.strfregexp('%~t')
154
+ assert_equal 'set:character', set.strfregexp('%~t')
155
155
  end
156
156
 
157
157
  def test_expression_strfregexp_combined
@@ -180,7 +180,10 @@ class Expressionstrfregexp < Test::Unit::TestCase
180
180
  assert_equal(
181
181
  "@0+15 expression:root\n" +
182
182
  " @0+1 a\n" +
183
- " @1+6 [b-d]*\n" +
183
+ " @1+6 set:character\n" +
184
+ " @2+3 set:range\n" +
185
+ " @2+1 b\n" +
186
+ " @4+1 d\n" +
184
187
  " @7+8 group:capture\n" +
185
188
  " @8+1 e\n" +
186
189
  " @9+4 group:capture\n" +
@@ -195,7 +198,10 @@ class Expressionstrfregexp < Test::Unit::TestCase
195
198
  assert_equal(
196
199
  "@0+15 expression:root-SEP-" +
197
200
  " @0+1 a-SEP-" +
198
- " @1+6 [b-d]*-SEP-" +
201
+ " @1+6 set:character-SEP-" +
202
+ " @2+3 set:range-SEP-" +
203
+ " @2+1 b-SEP-" +
204
+ " @4+1 d-SEP-" +
199
205
  " @7+8 group:capture-SEP-" +
200
206
  " @8+1 e-SEP-" +
201
207
  " @9+4 group:capture-SEP-" +
@@ -209,7 +215,10 @@ class Expressionstrfregexp < Test::Unit::TestCase
209
215
 
210
216
  assert_equal(
211
217
  "@0+1 a\n" +
212
- "@1+6 [b-d]*\n" +
218
+ "@1+6 set:character\n" +
219
+ " @2+3 set:range\n" +
220
+ " @2+1 b\n" +
221
+ " @4+1 d\n" +
213
222
  "@7+8 group:capture\n" +
214
223
  " @8+1 e\n" +
215
224
  " @9+4 group:capture\n" +
@@ -21,4 +21,29 @@ class ExpressionSubexpression < Test::Unit::TestCase
21
21
  end
22
22
  end
23
23
 
24
+ def test_subexpression_nesting_level
25
+ root = RP.parse(/a(b(c\d|[ef-g[h]]))/)
26
+
27
+ tests = {
28
+ 'a' => 1,
29
+ 'b' => 2,
30
+ '|' => 3,
31
+ 'c\d' => 4, # first alternative
32
+ 'c' => 5,
33
+ '\d' => 5,
34
+ '[ef-g[h]]' => 4, # second alternative
35
+ 'e' => 5,
36
+ '-' => 5,
37
+ 'f' => 6,
38
+ 'g' => 6,
39
+ 'h' => 6,
40
+ }
41
+
42
+ root.each_expression do |exp|
43
+ next unless (expected_nesting_level = tests.delete(exp.text))
44
+ assert_equal exp.nesting_level, expected_nesting_level
45
+ end
46
+
47
+ assert tests.empty?
48
+ end
24
49
  end
@@ -3,7 +3,7 @@ require File.expand_path("../../helpers", __FILE__)
3
3
  class SubexpressionTraverse < Test::Unit::TestCase
4
4
 
5
5
  def test_subexpression_traverse
6
- root = RP.parse(/a(b(c(d)))|g[hi]j|klmn/)
6
+ root = RP.parse(/a(b(c(d)))|g[h-i]j|klmn/)
7
7
 
8
8
  enters = 0
9
9
  visits = 0
@@ -15,14 +15,14 @@ class SubexpressionTraverse < Test::Unit::TestCase
15
15
  exits += 1 if event == :exit
16
16
  }
17
17
 
18
- assert_equal 7, enters
18
+ assert_equal 9, enters
19
19
  assert_equal exits, enters
20
20
 
21
- assert_equal 8, visits
21
+ assert_equal 9, visits
22
22
  end
23
23
 
24
24
  def test_subexpression_traverse_include_self
25
- root = RP.parse(/a(b(c(d)))|g[hi]j|klmn/)
25
+ root = RP.parse(/a(b(c(d)))|g[h-i]j|klmn/)
26
26
 
27
27
  enters = 0
28
28
  visits = 0
@@ -34,10 +34,10 @@ class SubexpressionTraverse < Test::Unit::TestCase
34
34
  exits += 1 if event == :exit
35
35
  }
36
36
 
37
- assert_equal 8, enters
37
+ assert_equal 10, enters
38
38
  assert_equal exits, enters
39
39
 
40
- assert_equal 8, visits
40
+ assert_equal 9, visits
41
41
  end
42
42
 
43
43
  def test_subexpression_walk_alias
@@ -54,18 +54,18 @@ class SubexpressionTraverse < Test::Unit::TestCase
54
54
  count += 1
55
55
  }
56
56
 
57
- assert_equal 10, count
57
+ assert_equal 13, count
58
58
  end
59
59
 
60
60
  def test_subexpression_each_expression_include_self
61
- root = RP.parse(/a(?x:b(c))|g[hi]/)
61
+ root = RP.parse(/a(?x:b(c))|g[h-k]/)
62
62
 
63
63
  count = 0
64
64
  root.each_expression(true) {|exp, index|
65
65
  count += 1
66
66
  }
67
67
 
68
- assert_equal 11, count
68
+ assert_equal 14, count
69
69
  end
70
70
 
71
71
  def test_subexpression_each_expression_indices
@@ -86,13 +86,13 @@ class SubexpressionTraverse < Test::Unit::TestCase
86
86
  assert_equal [0, 0, 1, 0, 2], indices
87
87
  end
88
88
 
89
- def test_subexpression_map_without_block
89
+ def test_subexpression_flat_map_without_block
90
90
  root = RP.parse(/a(b([c-e]+))?/)
91
91
 
92
- array = root.map
92
+ array = root.flat_map
93
93
 
94
94
  assert_equal Array, array.class
95
- assert_equal 5, array.length
95
+ assert_equal 8, array.length
96
96
 
97
97
  array.each do |item|
98
98
  assert_equal Array, item.class
@@ -102,35 +102,35 @@ class SubexpressionTraverse < Test::Unit::TestCase
102
102
  end
103
103
  end
104
104
 
105
- def test_subexpression_map_without_block_include_self
105
+ def test_subexpression_flat_map_without_block_include_self
106
106
  root = RP.parse(/a(b([c-e]+))?/)
107
107
 
108
- array = root.map(true)
108
+ array = root.flat_map(true)
109
109
 
110
110
  assert_equal Array, array.class
111
- assert_equal 6, array.length
111
+ assert_equal 9, array.length
112
112
  end
113
113
 
114
- def test_subexpression_map_indices
114
+ def test_subexpression_flat_map_indices
115
115
  root = RP.parse(/a(b([c-e]+))?f*g/)
116
116
 
117
- indices = root.map {|exp, index| index}
117
+ indices = root.flat_map {|exp, index| index}
118
118
 
119
- assert_equal [0, 1, 0, 1, 0, 2, 3], indices
119
+ assert_equal [0, 1, 0, 1, 0, 0, 0, 1, 2, 3], indices
120
120
  end
121
121
 
122
- def test_subexpression_map_indices_include_self
122
+ def test_subexpression_flat_map_indices_include_self
123
123
  root = RP.parse(/a(b([c-e]+))?f*g/)
124
124
 
125
- indices = root.map(true) {|exp, index| index}
125
+ indices = root.flat_map(true) {|exp, index| index}
126
126
 
127
- assert_equal [0, 0, 1, 0, 1, 0, 2, 3], indices
127
+ assert_equal [0, 0, 1, 0, 1, 0, 0, 0, 1, 2, 3], indices
128
128
  end
129
129
 
130
- def test_subexpression_map_expressions
130
+ def test_subexpression_flat_map_expressions
131
131
  root = RP.parse(/a(b(c(d)))/)
132
132
 
133
- levels = root.map {|exp, index|
133
+ levels = root.flat_map {|exp, index|
134
134
  [exp.level, exp.text] if exp.terminal?
135
135
  }.compact
136
136
 
@@ -139,10 +139,10 @@ class SubexpressionTraverse < Test::Unit::TestCase
139
139
  ], levels
140
140
  end
141
141
 
142
- def test_subexpression_map_expressions_include_self
142
+ def test_subexpression_flat_map_expressions_include_self
143
143
  root = RP.parse(/a(b(c(d)))/)
144
144
 
145
- levels = root.map(true) {|exp, index|
145
+ levels = root.flat_map(true) {|exp, index|
146
146
  [exp.level, exp.to_s]
147
147
  }.compact
148
148