regexp_parser 0.1.1 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/ChangeLog +45 -0
  3. data/Rakefile +12 -44
  4. data/VERSION.yml +5 -0
  5. data/lib/regexp_parser.rb +5 -38
  6. data/lib/regexp_parser/expression.rb +68 -221
  7. data/lib/regexp_parser/expression/classes/alternation.rb +47 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +26 -0
  9. data/lib/regexp_parser/expression/classes/backref.rb +42 -0
  10. data/lib/regexp_parser/expression/classes/escape.rb +27 -0
  11. data/lib/regexp_parser/expression/classes/group.rb +67 -0
  12. data/lib/regexp_parser/expression/classes/literal.rb +7 -0
  13. data/lib/regexp_parser/expression/{property.rb → classes/property.rb} +1 -1
  14. data/lib/regexp_parser/expression/classes/root.rb +26 -0
  15. data/lib/regexp_parser/expression/classes/set.rb +100 -0
  16. data/lib/regexp_parser/expression/classes/type.rb +17 -0
  17. data/lib/regexp_parser/expression/quantifier.rb +26 -0
  18. data/lib/regexp_parser/expression/subexpression.rb +69 -0
  19. data/lib/regexp_parser/lexer.rb +4 -4
  20. data/lib/regexp_parser/parser.rb +31 -13
  21. data/lib/regexp_parser/scanner.rb +1849 -1488
  22. data/lib/regexp_parser/scanner/property.rl +7 -2
  23. data/lib/regexp_parser/scanner/scanner.rl +377 -191
  24. data/lib/regexp_parser/syntax.rb +7 -0
  25. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +4 -4
  26. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +9 -9
  27. data/lib/regexp_parser/syntax/ruby/2.0.0.rb +16 -0
  28. data/lib/regexp_parser/syntax/ruby/2.1.0.rb +13 -0
  29. data/lib/regexp_parser/syntax/tokens.rb +21 -320
  30. data/lib/regexp_parser/syntax/tokens/anchor.rb +17 -0
  31. data/lib/regexp_parser/syntax/tokens/assertion.rb +15 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +26 -0
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +48 -0
  34. data/lib/regexp_parser/syntax/tokens/character_type.rb +16 -0
  35. data/lib/regexp_parser/syntax/tokens/escape.rb +29 -0
  36. data/lib/regexp_parser/syntax/tokens/group.rb +22 -0
  37. data/lib/regexp_parser/syntax/tokens/meta.rb +15 -0
  38. data/lib/regexp_parser/syntax/tokens/quantifier.rb +37 -0
  39. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +204 -0
  40. data/lib/regexp_parser/token.rb +37 -0
  41. data/test/expression/test_all.rb +7 -0
  42. data/test/expression/test_base.rb +72 -0
  43. data/test/expression/test_clone.rb +144 -0
  44. data/test/{parser/test_expression.rb → expression/test_to_s.rb} +10 -10
  45. data/test/helpers.rb +1 -0
  46. data/test/parser/test_all.rb +1 -1
  47. data/test/parser/test_alternation.rb +35 -0
  48. data/test/parser/test_anchors.rb +2 -2
  49. data/test/parser/test_refcalls.rb +1 -1
  50. data/test/parser/test_sets.rb +54 -8
  51. data/test/scanner/test_anchors.rb +2 -2
  52. data/test/scanner/test_conditionals.rb +31 -0
  53. data/test/scanner/test_errors.rb +88 -8
  54. data/test/scanner/test_escapes.rb +4 -4
  55. data/test/scanner/test_groups.rb +7 -0
  56. data/test/scanner/test_quoting.rb +29 -0
  57. data/test/scanner/test_sets.rb +1 -0
  58. data/test/syntax/ruby/test_1.8.rb +3 -3
  59. data/test/test_all.rb +1 -1
  60. metadata +62 -48
  61. data/lib/regexp_parser/expression/set.rb +0 -59
@@ -0,0 +1,7 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ %w{
4
+ base to_s clone
5
+ }.each do|tc|
6
+ require File.expand_path("../test_#{tc}", __FILE__)
7
+ end
@@ -0,0 +1,72 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ExpressionBase < Test::Unit::TestCase
4
+
5
+ def test_expression_to_re
6
+ re_text = '^a*(b([cde]+))+f?$'
7
+
8
+ re = RP.parse(re_text).to_re
9
+
10
+ assert( re.is_a?(::Regexp),
11
+ 'Not a Regexp, but should be')
12
+
13
+ assert_equal( re.source, re_text )
14
+ end
15
+
16
+ def test_expression_terminal?
17
+ root = RP.parse('^a([b]+)c$')
18
+
19
+ assert_equal( false, root.terminal? )
20
+
21
+ assert_equal( true, root[0].terminal? )
22
+ assert_equal( true, root[1].terminal? )
23
+ assert_equal( false, root[2].terminal? )
24
+ assert_equal( true, root[2][0].terminal? )
25
+ assert_equal( true, root[3].terminal? )
26
+ assert_equal( true, root[4].terminal? )
27
+ end
28
+
29
+ def test_expression_alt_terminal?
30
+ root = RP.parse('^(ab|cd)$')
31
+
32
+ assert_equal( false, root.terminal? )
33
+
34
+ assert_equal( true, root[0].terminal? )
35
+ assert_equal( false, root[1].terminal? )
36
+ assert_equal( false, root[1][0].terminal? )
37
+ assert_equal( false, root[1][0][0].terminal? )
38
+ assert_equal( true, root[1][0][0][0].terminal? )
39
+ assert_equal( false, root[1][0][1].terminal? )
40
+ assert_equal( true, root[1][0][1][0].terminal? )
41
+ end
42
+
43
+ def test_expression_coded_offset
44
+ root = RP.parse('^a*(b+(c?))$')
45
+
46
+ assert_equal( '@0+12', root.coded_offset )
47
+
48
+ # All top level offsets
49
+ checks = [
50
+ [ '@0+1', '^' ],
51
+ [ '@1+2', 'a*' ],
52
+ [ '@3+8', '(b+(c?))' ],
53
+ ['@11+1', '$' ],
54
+ ].each_with_index do |check, i|
55
+ against = [ root[i].coded_offset, root[i].to_s ]
56
+ assert_equal( check, against )
57
+ end
58
+
59
+ # Nested expression
60
+ assert_equal(['@4+2', 'b+'],
61
+ [root[2][0].coded_offset, root[2][0].to_s])
62
+
63
+ # Nested subexpression
64
+ assert_equal(['@6+4', '(c?)'],
65
+ [root[2][1].coded_offset, root[2][1].to_s])
66
+
67
+ # Nested subexpression expression
68
+ assert_equal(['@7+2', 'c?'],
69
+ [root[2][1][0].coded_offset, root[2][1][0].to_s])
70
+ end
71
+
72
+ end
@@ -0,0 +1,144 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ExpressionClone < Test::Unit::TestCase
4
+
5
+ def test_expression_clone_base
6
+ root = RP.parse(/^(?i:a)b+$/i)
7
+ copy = root.clone
8
+
9
+ assert_not_equal(copy.object_id, root.object_id)
10
+
11
+ # The text content is equal but the objects are not.
12
+ assert_equal(copy.text, root.text)
13
+ assert_not_equal(copy.text.object_id, root.text.object_id)
14
+
15
+ root_1 = root.expressions[1]
16
+ copy_1 = copy.expressions[1]
17
+
18
+ # The options hash contents are equal but the objects are not.
19
+ assert_equal(copy_1.options, root_1.options)
20
+ assert_not_equal(copy_1.options.object_id,
21
+ root_1.options.object_id)
22
+
23
+ root_2 = root.expressions[2]
24
+ copy_2 = copy.expressions[2]
25
+
26
+ assert( root_2.quantified? )
27
+ assert( copy_2.quantified? )
28
+
29
+ # The quantifier contents are equal but the objects are not.
30
+ assert_equal(copy_2.quantifier.text, root_2.quantifier.text)
31
+
32
+ assert_not_equal(copy_2.quantifier.text.object_id,
33
+ root_2.quantifier.text.object_id)
34
+
35
+ assert_not_equal(copy_2.quantifier.object_id,
36
+ root_2.quantifier.object_id)
37
+ end
38
+
39
+ def test_expression_clone_subexpression
40
+ root = RP.parse(/^a(b([cde])f)g$/)
41
+ copy = root.clone
42
+
43
+ assert( root.respond_to?(:expressions) )
44
+ assert( copy.respond_to?(:expressions) )
45
+
46
+ # The expressions arrays are not equal.
47
+ assert_not_equal(copy.expressions.object_id,
48
+ root.expressions.object_id)
49
+
50
+ # The expressions in the arrays are not equal.
51
+ copy.expressions.each_with_index do |e, ei|
52
+ assert_not_equal(e.object_id,
53
+ root.expressions[ei].object_id)
54
+ end
55
+
56
+ # The expressions in nested expressions are not equal.
57
+ copy.expressions[2].each_with_index do |e, ei|
58
+ assert_not_equal(e.object_id,
59
+ root.expressions[2][ei].object_id)
60
+ end
61
+ end
62
+
63
+ # ruby 1.8 does not implement named groups
64
+ def test_expression_clone_named_group
65
+ root = RP.parse('^(?<somename>a)+bc$')
66
+ copy = root.clone
67
+
68
+ root_1 = root.expressions[1]
69
+ copy_1 = copy.expressions[1]
70
+
71
+ # The names are equal but their objects are not.
72
+ assert_equal(copy_1.name, root_1.name)
73
+ assert_not_equal(copy_1.name.object_id,
74
+ root_1.name.object_id)
75
+
76
+ # Verify super: text objects should be different.
77
+ assert_equal(copy_1.text, root_1.text)
78
+
79
+ # Verify super: expressions arrays are not equal.
80
+ assert_not_equal(copy_1.expressions.object_id,
81
+ root_1.expressions.object_id)
82
+
83
+ # Verify super: expressions in the arrays are not equal.
84
+ copy_1.expressions.each_with_index do |e, ei|
85
+ assert_not_equal(e.object_id,
86
+ root_1.expressions[ei].object_id)
87
+ end
88
+ end
89
+
90
+ def test_expression_clone_set
91
+ root = RP.parse(/^a(b([cde])f)g$/)
92
+ copy = root.clone
93
+
94
+ root_2_1 = root.expressions[2][1]
95
+
96
+ copy.expressions[2][1].each_with_index do |e, ei|
97
+ assert( e.respond_to?(:members) )
98
+
99
+ # The members arrays are not equal.
100
+ assert_not_equal( e.members.object_id, root_2_1[ei].members.object_id )
101
+
102
+ # The members in the arrays are not equal.
103
+ e.members.each_with_index do |m, mi|
104
+ assert_not_equal( m.object_id, root_2_1[ei].members[mi].object_id )
105
+ end
106
+ end
107
+ end
108
+
109
+ def test_expression_clone_subset
110
+ # Explicitly set syntax to ruby 1.9 because 1.8 does not
111
+ # implement subsets.
112
+ root = RP.parse('^a(b([c[def]g])h)i$', 'ruby/1.9')
113
+ copy = root.clone
114
+
115
+ root_set = root.expressions[2][1][0]
116
+ copy_set = copy.expressions[2][1][0]
117
+
118
+ root_subset = root_set.members[1]
119
+ copy_subset = copy_set.members[1]
120
+
121
+ # Sanity checks
122
+ assert( root_set.respond_to?(:members) )
123
+ assert( copy_set.respond_to?(:members) )
124
+
125
+ assert( root_subset.respond_to?(:members) )
126
+ assert( copy_subset.respond_to?(:members) )
127
+
128
+ # The sets are not equal
129
+ assert_not_equal(copy_set.object_id, root_set.object_id)
130
+
131
+ # The subsets are not equal
132
+ assert_not_equal(copy_subset.object_id, root_subset.object_id)
133
+
134
+ # The subsets' members arrays are not equal.
135
+ assert_not_equal( copy_subset.members.object_id,
136
+ root_subset.members.object_id )
137
+
138
+ # The subsets' members are not equal
139
+ copy_subset.members.each_with_index do |m, mi|
140
+ assert_not_equal(m.object_id, root_subset.members[mi].object_id)
141
+ end
142
+ end
143
+
144
+ end
@@ -1,48 +1,48 @@
1
1
  require File.expand_path("../../helpers", __FILE__)
2
2
 
3
- class ParserExpression < Test::Unit::TestCase
3
+ class ExpressionToS < Test::Unit::TestCase
4
4
 
5
- def test_parse_expression_to_s_literal_alternation
5
+ def test_expression_to_s_literal_alternation
6
6
  pattern = 'abcd|ghij|klmn|pqur'
7
7
  assert_equal( pattern, RP.parse(pattern).to_s )
8
8
  end
9
9
 
10
- def test_parse_expression_to_s_quantified_alternations
10
+ def test_expression_to_s_quantified_alternations
11
11
  pattern = '(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)'
12
12
  assert_equal( pattern, RP.parse(pattern).to_s )
13
13
  end
14
14
 
15
- def test_parse_expression_to_s_quantified_sets
15
+ def test_expression_to_s_quantified_sets
16
16
  pattern = '[abc]+|[^def]{3,6}'
17
17
  assert_equal( pattern, RP.parse(pattern).to_s )
18
18
  end
19
19
 
20
- def test_parse_expression_to_s_property_sets
20
+ def test_expression_to_s_property_sets
21
21
  pattern = '[\a\b\p{Lu}\P{Z}\c\d]+'
22
22
  assert_equal( pattern, RP.parse(pattern, 'ruby/1.9').to_s )
23
23
  end
24
24
 
25
- def test_parse_expression_to_s_groups
25
+ def test_expression_to_s_groups
26
26
  pattern = "(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++"
27
27
  assert_equal( pattern, RP.parse(pattern, 'ruby/1.9').to_s )
28
28
  end
29
29
 
30
- def test_parse_expression_to_s_assertions
30
+ def test_expression_to_s_assertions
31
31
  pattern = '(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?'
32
32
  assert_equal( pattern, RP.parse(pattern, 'ruby/1.9').to_s )
33
33
  end
34
34
 
35
- def test_parse_expression_to_s_comments
35
+ def test_expression_to_s_comments
36
36
  pattern = '(?#start)a(?#middle)b(?#end)'
37
37
  assert_equal( pattern, RP.parse(pattern).to_s )
38
38
  end
39
39
 
40
- def test_parse_expression_to_s_options
40
+ def test_expression_to_s_options
41
41
  pattern = '(?mix:start)a(?-mix:middle)b(?i-mx:end)'
42
42
  assert_equal( pattern, RP.parse(pattern).to_s )
43
43
  end
44
44
 
45
- def test_parse_expression_to_s_url
45
+ def test_expression_to_s_url
46
46
  pattern = '(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*'+
47
47
  '\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)'
48
48
  assert_equal( pattern, RP.parse(pattern).to_s )
data/test/helpers.rb CHANGED
@@ -4,5 +4,6 @@ require File.expand_path("../../lib/regexp_parser", __FILE__)
4
4
  RS = Regexp::Scanner
5
5
  RL = Regexp::Lexer
6
6
  RP = Regexp::Parser
7
+ RE = Regexp::Expression
7
8
 
8
9
  include Regexp::Expression
@@ -1,7 +1,7 @@
1
1
  require File.expand_path("../../helpers", __FILE__)
2
2
 
3
3
  %w{
4
- alternation anchors errors escapes expression groups properties
4
+ alternation anchors errors escapes groups properties
5
5
  quantifiers refcalls sets
6
6
  }.each do|tc|
7
7
  require File.expand_path("../test_#{tc}", __FILE__)
@@ -43,4 +43,39 @@ class ParserAlternation < Test::Unit::TestCase
43
43
  assert_equal( 2, nested.expressions.length )
44
44
  end
45
45
 
46
+ def test_parse_alternation_nested_groups
47
+ root = RP.parse('(i|ey|([ougfd]+)|(ney))')
48
+
49
+ alts = root.expressions[0][0].alternatives
50
+ assert_equal( 4, alts.length )
51
+ end
52
+
53
+ def test_parse_alternation_grouped_alts
54
+ root = RP.parse('ca((n)|(t)|(ll)|(b))')
55
+
56
+ alts = root.expressions[1][0].alternatives
57
+
58
+ assert_equal( 4, alts.length )
59
+ assert_equal( true, alts[0].is_a?(Sequence) )
60
+ assert_equal( true, alts[1].is_a?(Sequence) )
61
+ assert_equal( true, alts[2].is_a?(Sequence) )
62
+ assert_equal( true, alts[3].is_a?(Sequence) )
63
+ end
64
+
65
+ def test_parse_alternation_nested_grouped_alts
66
+ root = RP.parse('ca((n|t)|(ll|b))')
67
+
68
+ alts = root.expressions[1][0].alternatives
69
+
70
+ assert_equal( 2, alts.length )
71
+ assert_equal( true, alts[0].is_a?(Sequence) )
72
+ assert_equal( true, alts[1].is_a?(Sequence) )
73
+
74
+ subalts = root.expressions[1][0][0][0][0].alternatives
75
+
76
+ assert_equal( 2, alts.length )
77
+ assert_equal( true, subalts[0].is_a?(Sequence) )
78
+ assert_equal( true, subalts[1].is_a?(Sequence) )
79
+ end
80
+
46
81
  end
@@ -3,8 +3,8 @@ require File.expand_path("../../helpers", __FILE__)
3
3
  class TestParserAnchors < Test::Unit::TestCase
4
4
 
5
5
  tests = {
6
- '^a' => [0, :anchor, :beginning_of_line, Anchor::BOL],
7
- 'a$' => [1, :anchor, :end_of_line, Anchor::EOL],
6
+ '^a' => [0, :anchor, :bol, Anchor::BOL],
7
+ 'a$' => [1, :anchor, :eol, Anchor::EOL],
8
8
 
9
9
  '\Aa' => [0, :anchor, :bos, Anchor::BOS],
10
10
  'a\z' => [1, :anchor, :eos, Anchor::EOS],
@@ -1,6 +1,6 @@
1
1
  require File.expand_path("../../helpers", __FILE__)
2
2
 
3
- class TestParserGroups < Test::Unit::TestCase
3
+ class TestParserRefcalls < Test::Unit::TestCase
4
4
 
5
5
  def test_parse_backref_named_ab
6
6
  t = RP.parse('(?<X>abc)\k<X>', 'ruby/1.9')[1]
@@ -23,10 +23,8 @@ class TestParserSets < Test::Unit::TestCase
23
23
 
24
24
  assert_equal( true, exp.matches?("6") )
25
25
 
26
- # TODO: figure out why this generate wrong string, but only after
27
- # the assertion above (to_s "piles up")
28
- #assert_equal( true, exp.matches?("v") )
29
- #assert_equal( false, exp.matches?("\x48") )
26
+ assert_equal( true, exp.matches?("v") )
27
+ assert_equal( false, exp.matches?("\x48") )
30
28
  end
31
29
 
32
30
  def test_parse_set_members
@@ -67,16 +65,64 @@ class TestParserSets < Test::Unit::TestCase
67
65
  assert_equal( true, exp.include?('c') )
68
66
  end
69
67
 
68
+ def test_parse_set_nesting_include_at_depth
69
+ exp = RP.parse('[a[b]c]', 'ruby/1.9')[0]
70
+
71
+ assert_equal( true, exp.is_a?(CharacterSet) )
72
+ assert_equal( true, exp.include?('a') )
73
+ assert_equal( true, exp.include?('b') )
74
+ assert_equal( false, exp.include?('b', true) ) # should not include b directly
75
+
76
+ sub = exp.members[1]
77
+ assert_equal( false, sub.include?('a') )
78
+ assert_equal( true, sub.include?('b') )
79
+ assert_equal( true, sub.include?('b', true) )
80
+ assert_equal( false, sub.include?('c') )
81
+ end
82
+
83
+ def test_parse_set_nesting_include_at_depth_2
84
+ exp = RP.parse('[a[b[c[d]e]f]g]', 'ruby/1.9')[0]
85
+
86
+ assert_equal( true, exp.is_a?(CharacterSet) )
87
+ assert_equal( true, exp.include?('a') )
88
+ assert_equal( true, exp.include?('b') )
89
+ assert_equal( false, exp.include?('b', true) ) # should not include b directly
90
+
91
+ sub = exp.members[1]
92
+ assert_equal( false, sub.include?('a') )
93
+ assert_equal( true, sub.include?('b') )
94
+ assert_equal( true, sub.include?('b', true) )
95
+ assert_equal( true, sub.include?('f', true) )
96
+ assert_equal( true, sub.include?('c') )
97
+ assert_equal( false, sub.include?('c', true) )
98
+
99
+ sub2 = sub.members[1]
100
+ assert_equal( false, sub2.include?('a') )
101
+ assert_equal( false, sub2.include?('b') )
102
+ assert_equal( true, sub2.include?('c') )
103
+ assert_equal( true, sub2.include?('c', true) )
104
+ assert_equal( true, sub2.include?('e', true) )
105
+ assert_equal( true, sub2.include?('d') )
106
+ assert_equal( false, sub2.include?('d', true) )
107
+
108
+ sub3 = sub2.members[1]
109
+ assert_equal( false, sub3.include?('a') )
110
+ assert_equal( false, sub3.include?('g') )
111
+ assert_equal( false, sub3.include?('b') )
112
+ assert_equal( false, sub3.include?('f') )
113
+ assert_equal( false, sub3.include?('c') )
114
+ assert_equal( false, sub3.include?('e') )
115
+ assert_equal( true, sub3.include?('d') )
116
+ assert_equal( true, sub3.include?('d', true) )
117
+ end
118
+
70
119
  # character subsets and negated posix classes are not available in ruby 1.8
71
120
  if RUBY_VERSION >= '1.9'
72
121
  def test_parse_set_nesting_matches
73
122
  exp = RP.parse('[a[b[^c]]]', 'ruby/1.9')[0]
74
123
 
75
124
  assert_equal( true, exp.matches?("b") )
76
-
77
- # TODO: figure out why this generate wrong string, but only after
78
- # the assertion above (to_s "piles up")
79
- #assert_equal( false, exp.matches?("c") )
125
+ assert_equal( false, exp.matches?("c") )
80
126
  end
81
127
 
82
128
  def test_parse_set_nesting_not_matches