regexp_parser 0.1.1 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/ChangeLog +45 -0
  3. data/Rakefile +12 -44
  4. data/VERSION.yml +5 -0
  5. data/lib/regexp_parser.rb +5 -38
  6. data/lib/regexp_parser/expression.rb +68 -221
  7. data/lib/regexp_parser/expression/classes/alternation.rb +47 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +26 -0
  9. data/lib/regexp_parser/expression/classes/backref.rb +42 -0
  10. data/lib/regexp_parser/expression/classes/escape.rb +27 -0
  11. data/lib/regexp_parser/expression/classes/group.rb +67 -0
  12. data/lib/regexp_parser/expression/classes/literal.rb +7 -0
  13. data/lib/regexp_parser/expression/{property.rb → classes/property.rb} +1 -1
  14. data/lib/regexp_parser/expression/classes/root.rb +26 -0
  15. data/lib/regexp_parser/expression/classes/set.rb +100 -0
  16. data/lib/regexp_parser/expression/classes/type.rb +17 -0
  17. data/lib/regexp_parser/expression/quantifier.rb +26 -0
  18. data/lib/regexp_parser/expression/subexpression.rb +69 -0
  19. data/lib/regexp_parser/lexer.rb +4 -4
  20. data/lib/regexp_parser/parser.rb +31 -13
  21. data/lib/regexp_parser/scanner.rb +1849 -1488
  22. data/lib/regexp_parser/scanner/property.rl +7 -2
  23. data/lib/regexp_parser/scanner/scanner.rl +377 -191
  24. data/lib/regexp_parser/syntax.rb +7 -0
  25. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +4 -4
  26. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +9 -9
  27. data/lib/regexp_parser/syntax/ruby/2.0.0.rb +16 -0
  28. data/lib/regexp_parser/syntax/ruby/2.1.0.rb +13 -0
  29. data/lib/regexp_parser/syntax/tokens.rb +21 -320
  30. data/lib/regexp_parser/syntax/tokens/anchor.rb +17 -0
  31. data/lib/regexp_parser/syntax/tokens/assertion.rb +15 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +26 -0
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +48 -0
  34. data/lib/regexp_parser/syntax/tokens/character_type.rb +16 -0
  35. data/lib/regexp_parser/syntax/tokens/escape.rb +29 -0
  36. data/lib/regexp_parser/syntax/tokens/group.rb +22 -0
  37. data/lib/regexp_parser/syntax/tokens/meta.rb +15 -0
  38. data/lib/regexp_parser/syntax/tokens/quantifier.rb +37 -0
  39. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +204 -0
  40. data/lib/regexp_parser/token.rb +37 -0
  41. data/test/expression/test_all.rb +7 -0
  42. data/test/expression/test_base.rb +72 -0
  43. data/test/expression/test_clone.rb +144 -0
  44. data/test/{parser/test_expression.rb → expression/test_to_s.rb} +10 -10
  45. data/test/helpers.rb +1 -0
  46. data/test/parser/test_all.rb +1 -1
  47. data/test/parser/test_alternation.rb +35 -0
  48. data/test/parser/test_anchors.rb +2 -2
  49. data/test/parser/test_refcalls.rb +1 -1
  50. data/test/parser/test_sets.rb +54 -8
  51. data/test/scanner/test_anchors.rb +2 -2
  52. data/test/scanner/test_conditionals.rb +31 -0
  53. data/test/scanner/test_errors.rb +88 -8
  54. data/test/scanner/test_escapes.rb +4 -4
  55. data/test/scanner/test_groups.rb +7 -0
  56. data/test/scanner/test_quoting.rb +29 -0
  57. data/test/scanner/test_sets.rb +1 -0
  58. data/test/syntax/ruby/test_1.8.rb +3 -3
  59. data/test/test_all.rb +1 -1
  60. metadata +62 -48
  61. data/lib/regexp_parser/expression/set.rb +0 -59
@@ -0,0 +1,7 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ %w{
4
+ base to_s clone
5
+ }.each do|tc|
6
+ require File.expand_path("../test_#{tc}", __FILE__)
7
+ end
@@ -0,0 +1,72 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ExpressionBase < Test::Unit::TestCase
4
+
5
+ def test_expression_to_re
6
+ re_text = '^a*(b([cde]+))+f?$'
7
+
8
+ re = RP.parse(re_text).to_re
9
+
10
+ assert( re.is_a?(::Regexp),
11
+ 'Not a Regexp, but should be')
12
+
13
+ assert_equal( re.source, re_text )
14
+ end
15
+
16
+ def test_expression_terminal?
17
+ root = RP.parse('^a([b]+)c$')
18
+
19
+ assert_equal( false, root.terminal? )
20
+
21
+ assert_equal( true, root[0].terminal? )
22
+ assert_equal( true, root[1].terminal? )
23
+ assert_equal( false, root[2].terminal? )
24
+ assert_equal( true, root[2][0].terminal? )
25
+ assert_equal( true, root[3].terminal? )
26
+ assert_equal( true, root[4].terminal? )
27
+ end
28
+
29
+ def test_expression_alt_terminal?
30
+ root = RP.parse('^(ab|cd)$')
31
+
32
+ assert_equal( false, root.terminal? )
33
+
34
+ assert_equal( true, root[0].terminal? )
35
+ assert_equal( false, root[1].terminal? )
36
+ assert_equal( false, root[1][0].terminal? )
37
+ assert_equal( false, root[1][0][0].terminal? )
38
+ assert_equal( true, root[1][0][0][0].terminal? )
39
+ assert_equal( false, root[1][0][1].terminal? )
40
+ assert_equal( true, root[1][0][1][0].terminal? )
41
+ end
42
+
43
+ def test_expression_coded_offset
44
+ root = RP.parse('^a*(b+(c?))$')
45
+
46
+ assert_equal( '@0+12', root.coded_offset )
47
+
48
+ # All top level offsets
49
+ checks = [
50
+ [ '@0+1', '^' ],
51
+ [ '@1+2', 'a*' ],
52
+ [ '@3+8', '(b+(c?))' ],
53
+ ['@11+1', '$' ],
54
+ ].each_with_index do |check, i|
55
+ against = [ root[i].coded_offset, root[i].to_s ]
56
+ assert_equal( check, against )
57
+ end
58
+
59
+ # Nested expression
60
+ assert_equal(['@4+2', 'b+'],
61
+ [root[2][0].coded_offset, root[2][0].to_s])
62
+
63
+ # Nested subexpression
64
+ assert_equal(['@6+4', '(c?)'],
65
+ [root[2][1].coded_offset, root[2][1].to_s])
66
+
67
+ # Nested subexpression expression
68
+ assert_equal(['@7+2', 'c?'],
69
+ [root[2][1][0].coded_offset, root[2][1][0].to_s])
70
+ end
71
+
72
+ end
@@ -0,0 +1,144 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ExpressionClone < Test::Unit::TestCase
4
+
5
+ def test_expression_clone_base
6
+ root = RP.parse(/^(?i:a)b+$/i)
7
+ copy = root.clone
8
+
9
+ assert_not_equal(copy.object_id, root.object_id)
10
+
11
+ # The text content is equal but the objects are not.
12
+ assert_equal(copy.text, root.text)
13
+ assert_not_equal(copy.text.object_id, root.text.object_id)
14
+
15
+ root_1 = root.expressions[1]
16
+ copy_1 = copy.expressions[1]
17
+
18
+ # The options hash contents are equal but the objects are not.
19
+ assert_equal(copy_1.options, root_1.options)
20
+ assert_not_equal(copy_1.options.object_id,
21
+ root_1.options.object_id)
22
+
23
+ root_2 = root.expressions[2]
24
+ copy_2 = copy.expressions[2]
25
+
26
+ assert( root_2.quantified? )
27
+ assert( copy_2.quantified? )
28
+
29
+ # The quantifier contents are equal but the objects are not.
30
+ assert_equal(copy_2.quantifier.text, root_2.quantifier.text)
31
+
32
+ assert_not_equal(copy_2.quantifier.text.object_id,
33
+ root_2.quantifier.text.object_id)
34
+
35
+ assert_not_equal(copy_2.quantifier.object_id,
36
+ root_2.quantifier.object_id)
37
+ end
38
+
39
+ def test_expression_clone_subexpression
40
+ root = RP.parse(/^a(b([cde])f)g$/)
41
+ copy = root.clone
42
+
43
+ assert( root.respond_to?(:expressions) )
44
+ assert( copy.respond_to?(:expressions) )
45
+
46
+ # The expressions arrays are not equal.
47
+ assert_not_equal(copy.expressions.object_id,
48
+ root.expressions.object_id)
49
+
50
+ # The expressions in the arrays are not equal.
51
+ copy.expressions.each_with_index do |e, ei|
52
+ assert_not_equal(e.object_id,
53
+ root.expressions[ei].object_id)
54
+ end
55
+
56
+ # The expressions in nested expressions are not equal.
57
+ copy.expressions[2].each_with_index do |e, ei|
58
+ assert_not_equal(e.object_id,
59
+ root.expressions[2][ei].object_id)
60
+ end
61
+ end
62
+
63
+ # ruby 1.8 does not implement named groups
64
+ def test_expression_clone_named_group
65
+ root = RP.parse('^(?<somename>a)+bc$')
66
+ copy = root.clone
67
+
68
+ root_1 = root.expressions[1]
69
+ copy_1 = copy.expressions[1]
70
+
71
+ # The names are equal but their objects are not.
72
+ assert_equal(copy_1.name, root_1.name)
73
+ assert_not_equal(copy_1.name.object_id,
74
+ root_1.name.object_id)
75
+
76
+ # Verify super: text objects should be different.
77
+ assert_equal(copy_1.text, root_1.text)
78
+
79
+ # Verify super: expressions arrays are not equal.
80
+ assert_not_equal(copy_1.expressions.object_id,
81
+ root_1.expressions.object_id)
82
+
83
+ # Verify super: expressions in the arrays are not equal.
84
+ copy_1.expressions.each_with_index do |e, ei|
85
+ assert_not_equal(e.object_id,
86
+ root_1.expressions[ei].object_id)
87
+ end
88
+ end
89
+
90
+ def test_expression_clone_set
91
+ root = RP.parse(/^a(b([cde])f)g$/)
92
+ copy = root.clone
93
+
94
+ root_2_1 = root.expressions[2][1]
95
+
96
+ copy.expressions[2][1].each_with_index do |e, ei|
97
+ assert( e.respond_to?(:members) )
98
+
99
+ # The members arrays are not equal.
100
+ assert_not_equal( e.members.object_id, root_2_1[ei].members.object_id )
101
+
102
+ # The members in the arrays are not equal.
103
+ e.members.each_with_index do |m, mi|
104
+ assert_not_equal( m.object_id, root_2_1[ei].members[mi].object_id )
105
+ end
106
+ end
107
+ end
108
+
109
+ def test_expression_clone_subset
110
+ # Explicitly set syntax to ruby 1.9 because 1.8 does not
111
+ # implement subsets.
112
+ root = RP.parse('^a(b([c[def]g])h)i$', 'ruby/1.9')
113
+ copy = root.clone
114
+
115
+ root_set = root.expressions[2][1][0]
116
+ copy_set = copy.expressions[2][1][0]
117
+
118
+ root_subset = root_set.members[1]
119
+ copy_subset = copy_set.members[1]
120
+
121
+ # Sanity checks
122
+ assert( root_set.respond_to?(:members) )
123
+ assert( copy_set.respond_to?(:members) )
124
+
125
+ assert( root_subset.respond_to?(:members) )
126
+ assert( copy_subset.respond_to?(:members) )
127
+
128
+ # The sets are not equal
129
+ assert_not_equal(copy_set.object_id, root_set.object_id)
130
+
131
+ # The subsets are not equal
132
+ assert_not_equal(copy_subset.object_id, root_subset.object_id)
133
+
134
+ # The subsets' members arrays are not equal.
135
+ assert_not_equal( copy_subset.members.object_id,
136
+ root_subset.members.object_id )
137
+
138
+ # The subsets' members are not equal
139
+ copy_subset.members.each_with_index do |m, mi|
140
+ assert_not_equal(m.object_id, root_subset.members[mi].object_id)
141
+ end
142
+ end
143
+
144
+ end
@@ -1,48 +1,48 @@
1
1
  require File.expand_path("../../helpers", __FILE__)
2
2
 
3
- class ParserExpression < Test::Unit::TestCase
3
+ class ExpressionToS < Test::Unit::TestCase
4
4
 
5
- def test_parse_expression_to_s_literal_alternation
5
+ def test_expression_to_s_literal_alternation
6
6
  pattern = 'abcd|ghij|klmn|pqur'
7
7
  assert_equal( pattern, RP.parse(pattern).to_s )
8
8
  end
9
9
 
10
- def test_parse_expression_to_s_quantified_alternations
10
+ def test_expression_to_s_quantified_alternations
11
11
  pattern = '(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)'
12
12
  assert_equal( pattern, RP.parse(pattern).to_s )
13
13
  end
14
14
 
15
- def test_parse_expression_to_s_quantified_sets
15
+ def test_expression_to_s_quantified_sets
16
16
  pattern = '[abc]+|[^def]{3,6}'
17
17
  assert_equal( pattern, RP.parse(pattern).to_s )
18
18
  end
19
19
 
20
- def test_parse_expression_to_s_property_sets
20
+ def test_expression_to_s_property_sets
21
21
  pattern = '[\a\b\p{Lu}\P{Z}\c\d]+'
22
22
  assert_equal( pattern, RP.parse(pattern, 'ruby/1.9').to_s )
23
23
  end
24
24
 
25
- def test_parse_expression_to_s_groups
25
+ def test_expression_to_s_groups
26
26
  pattern = "(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++"
27
27
  assert_equal( pattern, RP.parse(pattern, 'ruby/1.9').to_s )
28
28
  end
29
29
 
30
- def test_parse_expression_to_s_assertions
30
+ def test_expression_to_s_assertions
31
31
  pattern = '(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?'
32
32
  assert_equal( pattern, RP.parse(pattern, 'ruby/1.9').to_s )
33
33
  end
34
34
 
35
- def test_parse_expression_to_s_comments
35
+ def test_expression_to_s_comments
36
36
  pattern = '(?#start)a(?#middle)b(?#end)'
37
37
  assert_equal( pattern, RP.parse(pattern).to_s )
38
38
  end
39
39
 
40
- def test_parse_expression_to_s_options
40
+ def test_expression_to_s_options
41
41
  pattern = '(?mix:start)a(?-mix:middle)b(?i-mx:end)'
42
42
  assert_equal( pattern, RP.parse(pattern).to_s )
43
43
  end
44
44
 
45
- def test_parse_expression_to_s_url
45
+ def test_expression_to_s_url
46
46
  pattern = '(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*'+
47
47
  '\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)'
48
48
  assert_equal( pattern, RP.parse(pattern).to_s )
data/test/helpers.rb CHANGED
@@ -4,5 +4,6 @@ require File.expand_path("../../lib/regexp_parser", __FILE__)
4
4
  RS = Regexp::Scanner
5
5
  RL = Regexp::Lexer
6
6
  RP = Regexp::Parser
7
+ RE = Regexp::Expression
7
8
 
8
9
  include Regexp::Expression
@@ -1,7 +1,7 @@
1
1
  require File.expand_path("../../helpers", __FILE__)
2
2
 
3
3
  %w{
4
- alternation anchors errors escapes expression groups properties
4
+ alternation anchors errors escapes groups properties
5
5
  quantifiers refcalls sets
6
6
  }.each do|tc|
7
7
  require File.expand_path("../test_#{tc}", __FILE__)
@@ -43,4 +43,39 @@ class ParserAlternation < Test::Unit::TestCase
43
43
  assert_equal( 2, nested.expressions.length )
44
44
  end
45
45
 
46
+ def test_parse_alternation_nested_groups
47
+ root = RP.parse('(i|ey|([ougfd]+)|(ney))')
48
+
49
+ alts = root.expressions[0][0].alternatives
50
+ assert_equal( 4, alts.length )
51
+ end
52
+
53
+ def test_parse_alternation_grouped_alts
54
+ root = RP.parse('ca((n)|(t)|(ll)|(b))')
55
+
56
+ alts = root.expressions[1][0].alternatives
57
+
58
+ assert_equal( 4, alts.length )
59
+ assert_equal( true, alts[0].is_a?(Sequence) )
60
+ assert_equal( true, alts[1].is_a?(Sequence) )
61
+ assert_equal( true, alts[2].is_a?(Sequence) )
62
+ assert_equal( true, alts[3].is_a?(Sequence) )
63
+ end
64
+
65
+ def test_parse_alternation_nested_grouped_alts
66
+ root = RP.parse('ca((n|t)|(ll|b))')
67
+
68
+ alts = root.expressions[1][0].alternatives
69
+
70
+ assert_equal( 2, alts.length )
71
+ assert_equal( true, alts[0].is_a?(Sequence) )
72
+ assert_equal( true, alts[1].is_a?(Sequence) )
73
+
74
+ subalts = root.expressions[1][0][0][0][0].alternatives
75
+
76
+ assert_equal( 2, alts.length )
77
+ assert_equal( true, subalts[0].is_a?(Sequence) )
78
+ assert_equal( true, subalts[1].is_a?(Sequence) )
79
+ end
80
+
46
81
  end
@@ -3,8 +3,8 @@ require File.expand_path("../../helpers", __FILE__)
3
3
  class TestParserAnchors < Test::Unit::TestCase
4
4
 
5
5
  tests = {
6
- '^a' => [0, :anchor, :beginning_of_line, Anchor::BOL],
7
- 'a$' => [1, :anchor, :end_of_line, Anchor::EOL],
6
+ '^a' => [0, :anchor, :bol, Anchor::BOL],
7
+ 'a$' => [1, :anchor, :eol, Anchor::EOL],
8
8
 
9
9
  '\Aa' => [0, :anchor, :bos, Anchor::BOS],
10
10
  'a\z' => [1, :anchor, :eos, Anchor::EOS],
@@ -1,6 +1,6 @@
1
1
  require File.expand_path("../../helpers", __FILE__)
2
2
 
3
- class TestParserGroups < Test::Unit::TestCase
3
+ class TestParserRefcalls < Test::Unit::TestCase
4
4
 
5
5
  def test_parse_backref_named_ab
6
6
  t = RP.parse('(?<X>abc)\k<X>', 'ruby/1.9')[1]
@@ -23,10 +23,8 @@ class TestParserSets < Test::Unit::TestCase
23
23
 
24
24
  assert_equal( true, exp.matches?("6") )
25
25
 
26
- # TODO: figure out why this generate wrong string, but only after
27
- # the assertion above (to_s "piles up")
28
- #assert_equal( true, exp.matches?("v") )
29
- #assert_equal( false, exp.matches?("\x48") )
26
+ assert_equal( true, exp.matches?("v") )
27
+ assert_equal( false, exp.matches?("\x48") )
30
28
  end
31
29
 
32
30
  def test_parse_set_members
@@ -67,16 +65,64 @@ class TestParserSets < Test::Unit::TestCase
67
65
  assert_equal( true, exp.include?('c') )
68
66
  end
69
67
 
68
+ def test_parse_set_nesting_include_at_depth
69
+ exp = RP.parse('[a[b]c]', 'ruby/1.9')[0]
70
+
71
+ assert_equal( true, exp.is_a?(CharacterSet) )
72
+ assert_equal( true, exp.include?('a') )
73
+ assert_equal( true, exp.include?('b') )
74
+ assert_equal( false, exp.include?('b', true) ) # should not include b directly
75
+
76
+ sub = exp.members[1]
77
+ assert_equal( false, sub.include?('a') )
78
+ assert_equal( true, sub.include?('b') )
79
+ assert_equal( true, sub.include?('b', true) )
80
+ assert_equal( false, sub.include?('c') )
81
+ end
82
+
83
+ def test_parse_set_nesting_include_at_depth_2
84
+ exp = RP.parse('[a[b[c[d]e]f]g]', 'ruby/1.9')[0]
85
+
86
+ assert_equal( true, exp.is_a?(CharacterSet) )
87
+ assert_equal( true, exp.include?('a') )
88
+ assert_equal( true, exp.include?('b') )
89
+ assert_equal( false, exp.include?('b', true) ) # should not include b directly
90
+
91
+ sub = exp.members[1]
92
+ assert_equal( false, sub.include?('a') )
93
+ assert_equal( true, sub.include?('b') )
94
+ assert_equal( true, sub.include?('b', true) )
95
+ assert_equal( true, sub.include?('f', true) )
96
+ assert_equal( true, sub.include?('c') )
97
+ assert_equal( false, sub.include?('c', true) )
98
+
99
+ sub2 = sub.members[1]
100
+ assert_equal( false, sub2.include?('a') )
101
+ assert_equal( false, sub2.include?('b') )
102
+ assert_equal( true, sub2.include?('c') )
103
+ assert_equal( true, sub2.include?('c', true) )
104
+ assert_equal( true, sub2.include?('e', true) )
105
+ assert_equal( true, sub2.include?('d') )
106
+ assert_equal( false, sub2.include?('d', true) )
107
+
108
+ sub3 = sub2.members[1]
109
+ assert_equal( false, sub3.include?('a') )
110
+ assert_equal( false, sub3.include?('g') )
111
+ assert_equal( false, sub3.include?('b') )
112
+ assert_equal( false, sub3.include?('f') )
113
+ assert_equal( false, sub3.include?('c') )
114
+ assert_equal( false, sub3.include?('e') )
115
+ assert_equal( true, sub3.include?('d') )
116
+ assert_equal( true, sub3.include?('d', true) )
117
+ end
118
+
70
119
  # character subsets and negated posix classes are not available in ruby 1.8
71
120
  if RUBY_VERSION >= '1.9'
72
121
  def test_parse_set_nesting_matches
73
122
  exp = RP.parse('[a[b[^c]]]', 'ruby/1.9')[0]
74
123
 
75
124
  assert_equal( true, exp.matches?("b") )
76
-
77
- # TODO: figure out why this generate wrong string, but only after
78
- # the assertion above (to_s "piles up")
79
- #assert_equal( false, exp.matches?("c") )
125
+ assert_equal( false, exp.matches?("c") )
80
126
  end
81
127
 
82
128
  def test_parse_set_nesting_not_matches