regexp_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/ChangeLog +4 -0
  2. data/LICENSE +22 -0
  3. data/README.rdoc +307 -0
  4. data/Rakefile +91 -0
  5. data/lib/regexp_parser/ctype.rb +48 -0
  6. data/lib/regexp_parser/expression/property.rb +108 -0
  7. data/lib/regexp_parser/expression/set.rb +59 -0
  8. data/lib/regexp_parser/expression.rb +287 -0
  9. data/lib/regexp_parser/lexer.rb +105 -0
  10. data/lib/regexp_parser/parser.rb +417 -0
  11. data/lib/regexp_parser/scanner/property.rl +534 -0
  12. data/lib/regexp_parser/scanner/scanner.rl +712 -0
  13. data/lib/regexp_parser/scanner.rb +3325 -0
  14. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
  15. data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
  16. data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
  17. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
  18. data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
  19. data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
  20. data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
  21. data/lib/regexp_parser/syntax/tokens.rb +332 -0
  22. data/lib/regexp_parser/syntax.rb +172 -0
  23. data/lib/regexp_parser.rb +45 -0
  24. data/test/helpers.rb +8 -0
  25. data/test/lexer/test_all.rb +26 -0
  26. data/test/lexer/test_literals.rb +120 -0
  27. data/test/lexer/test_nesting.rb +107 -0
  28. data/test/lexer/test_refcalls.rb +45 -0
  29. data/test/parser/test_all.rb +44 -0
  30. data/test/parser/test_alternation.rb +46 -0
  31. data/test/parser/test_anchors.rb +35 -0
  32. data/test/parser/test_errors.rb +59 -0
  33. data/test/parser/test_escapes.rb +48 -0
  34. data/test/parser/test_expression.rb +51 -0
  35. data/test/parser/test_groups.rb +69 -0
  36. data/test/parser/test_properties.rb +346 -0
  37. data/test/parser/test_quantifiers.rb +236 -0
  38. data/test/parser/test_refcalls.rb +101 -0
  39. data/test/parser/test_sets.rb +99 -0
  40. data/test/scanner/test_all.rb +30 -0
  41. data/test/scanner/test_anchors.rb +35 -0
  42. data/test/scanner/test_errors.rb +36 -0
  43. data/test/scanner/test_escapes.rb +49 -0
  44. data/test/scanner/test_groups.rb +41 -0
  45. data/test/scanner/test_literals.rb +85 -0
  46. data/test/scanner/test_meta.rb +36 -0
  47. data/test/scanner/test_properties.rb +315 -0
  48. data/test/scanner/test_quantifiers.rb +38 -0
  49. data/test/scanner/test_refcalls.rb +45 -0
  50. data/test/scanner/test_scripts.rb +314 -0
  51. data/test/scanner/test_sets.rb +80 -0
  52. data/test/scanner/test_types.rb +30 -0
  53. data/test/syntax/ruby/test_1.8.rb +57 -0
  54. data/test/syntax/ruby/test_1.9.1.rb +39 -0
  55. data/test/syntax/ruby/test_1.9.3.rb +38 -0
  56. data/test/syntax/ruby/test_all.rb +12 -0
  57. data/test/syntax/test_all.rb +19 -0
  58. data/test/test_all.rb +4 -0
  59. metadata +160 -0
@@ -0,0 +1,236 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class TestRegexpParserQuantifiers < Test::Unit::TestCase
4
+
5
+ # ?: zero-or-one
6
+ def test_parse_zero_or_one_greedy
7
+ t = RP.parse('a?bc')
8
+
9
+ assert_equal( true, t.expressions.first.quantified? )
10
+ assert_equal( :zero_or_one, t.expressions.first.quantifier.token )
11
+ assert_equal( 0, t.expressions.first.quantifier.min )
12
+ assert_equal( 1, t.expressions.first.quantifier.max )
13
+ assert_equal( :greedy, t.expressions.first.quantifier.mode )
14
+ end
15
+
16
+ def test_parse_zero_or_one_reluctant
17
+ t = RP.parse('a??bc')
18
+
19
+ assert_equal( true, t.expressions.first.quantified? )
20
+ assert_equal( :zero_or_one, t.expressions.first.quantifier.token )
21
+ assert_equal( 0, t.expressions.first.quantifier.min )
22
+ assert_equal( 1, t.expressions.first.quantifier.max )
23
+ assert_equal( :reluctant, t.expressions.first.quantifier.mode )
24
+ assert_equal( true, t.expressions.first.reluctant? )
25
+ end
26
+
27
+ def test_parse_zero_or_one_possessive
28
+ t = RP.parse('a?+bc')
29
+
30
+ assert_equal( true, t.expressions.first.quantified? )
31
+ assert_equal( :zero_or_one, t.expressions.first.quantifier.token )
32
+ assert_equal( 0, t.expressions.first.quantifier.min )
33
+ assert_equal( 1, t.expressions.first.quantifier.max )
34
+ assert_equal( :possessive, t.expressions.first.quantifier.mode )
35
+ assert_equal( true, t.expressions.first.possessive? )
36
+ end
37
+
38
+ # *: zero-or-more
39
+ def test_parse_zero_or_more_greedy
40
+ t = RP.parse('a*bc')
41
+
42
+ assert_equal( true, t.expressions.first.quantified? )
43
+ assert_equal( :zero_or_more, t.expressions.first.quantifier.token )
44
+ assert_equal( 0, t.expressions.first.quantifier.min )
45
+ assert_equal( -1, t.expressions.first.quantifier.max )
46
+ assert_equal( :greedy, t.expressions.first.quantifier.mode )
47
+ end
48
+
49
+ def test_parse_zero_or_more_reluctant
50
+ t = RP.parse('a*?bc')
51
+
52
+ assert_equal( true, t.expressions.first.quantified? )
53
+ assert_equal( :zero_or_more, t.expressions.first.quantifier.token )
54
+ assert_equal( 0, t.expressions.first.quantifier.min )
55
+ assert_equal( -1, t.expressions.first.quantifier.max )
56
+ assert_equal( :reluctant, t.expressions.first.quantifier.mode )
57
+ assert_equal( true, t.expressions.first.reluctant? )
58
+ end
59
+
60
+ def test_parse_zero_or_more_possessive
61
+ t = RP.parse('a*+bc')
62
+
63
+ assert_equal( true, t.expressions.first.quantified? )
64
+ assert_equal( :zero_or_more, t.expressions.first.quantifier.token )
65
+ assert_equal( 0, t.expressions.first.quantifier.min )
66
+ assert_equal( -1, t.expressions.first.quantifier.max )
67
+ assert_equal( :possessive, t.expressions.first.quantifier.mode )
68
+ assert_equal( true, t.expressions.first.possessive? )
69
+ end
70
+
71
+ # +: one-or-more
72
+ def test_parse_one_or_more_greedy
73
+ t = RP.parse('a+bc')
74
+
75
+ assert_equal( true, t.expressions.first.quantified? )
76
+ assert_equal( :one_or_more, t.expressions.first.quantifier.token )
77
+ assert_equal( 1, t.expressions.first.quantifier.min )
78
+ assert_equal( -1, t.expressions.first.quantifier.max )
79
+ assert_equal( :greedy, t.expressions.first.quantifier.mode )
80
+ end
81
+
82
+ def test_parse_one_or_more_reluctant
83
+ t = RP.parse('a+?bc')
84
+
85
+ assert_equal( true, t.expressions.first.quantified? )
86
+ assert_equal( :one_or_more, t.expressions.first.quantifier.token )
87
+ assert_equal( 1, t.expressions.first.quantifier.min )
88
+ assert_equal( -1, t.expressions.first.quantifier.max )
89
+ assert_equal( :reluctant, t.expressions.first.quantifier.mode )
90
+ assert_equal( true, t.expressions.first.reluctant? )
91
+ end
92
+
93
+ def test_parse_one_or_more_possessive
94
+ t = RP.parse('a++bc')
95
+
96
+ assert_equal( true, t.expressions.first.quantified? )
97
+ assert_equal( :one_or_more, t.expressions.first.quantifier.token )
98
+ assert_equal( 1, t.expressions.first.quantifier.min )
99
+ assert_equal( -1, t.expressions.first.quantifier.max )
100
+ assert_equal( :possessive, t.expressions.first.quantifier.mode )
101
+ assert_equal( true, t.expressions.first.possessive? )
102
+ end
103
+
104
+ # interval: min and max
105
+ def test_parse_intervals_min_max_greedy
106
+ t = RP.parse('a{2,4}bc')
107
+
108
+ assert_equal( true, t.expressions.first.quantified? )
109
+ assert_equal( :interval, t.expressions.first.quantifier.token )
110
+ assert_equal( 2, t.expressions.first.quantifier.min)
111
+ assert_equal( 4, t.expressions.first.quantifier.max)
112
+ assert_equal( :greedy, t.expressions.first.quantifier.mode )
113
+ end
114
+
115
+ def test_parse_intervals_min_max_reluctant
116
+ t = RP.parse('a{3,5}?bc')
117
+
118
+ assert_equal( true, t.expressions.first.quantified? )
119
+ assert_equal( :interval, t.expressions.first.quantifier.token )
120
+ assert_equal( 3, t.expressions.first.quantifier.min)
121
+ assert_equal( 5, t.expressions.first.quantifier.max)
122
+ assert_equal( :reluctant, t.expressions.first.quantifier.mode )
123
+ assert_equal( true, t.expressions.first.reluctant? )
124
+ end
125
+
126
+ def test_parse_intervals_min_max_possessive
127
+ t = RP.parse('a{2,4}+bc')
128
+
129
+ assert_equal( true, t.expressions.first.quantified? )
130
+ assert_equal( :interval, t.expressions.first.quantifier.token )
131
+ assert_equal( 2, t.expressions.first.quantifier.min)
132
+ assert_equal( 4, t.expressions.first.quantifier.max)
133
+ assert_equal( :possessive, t.expressions.first.quantifier.mode )
134
+ assert_equal( true, t.expressions.first.possessive? )
135
+ end
136
+
137
+ # interval: min only
138
+ def test_parse_intervals_min_only_greedy
139
+ t = RP.parse('a{2,}bc')
140
+
141
+ assert_equal( true, t.expressions.first.quantified? )
142
+ assert_equal( :interval, t.expressions.first.quantifier.token )
143
+ assert_equal( 2, t.expressions.first.quantifier.min)
144
+ assert_equal( -1, t.expressions.first.quantifier.max)
145
+ assert_equal( :greedy, t.expressions.first.quantifier.mode )
146
+ end
147
+
148
+ def test_parse_intervals_min_only_reluctant
149
+ t = RP.parse('a{2,}?bc')
150
+
151
+ assert_equal( true, t.expressions.first.quantified? )
152
+ assert_equal( :interval, t.expressions.first.quantifier.token )
153
+ assert_equal( 2, t.expressions.first.quantifier.min)
154
+ assert_equal( -1, t.expressions.first.quantifier.max)
155
+ assert_equal( :reluctant, t.expressions.first.quantifier.mode )
156
+ assert_equal( true, t.expressions.first.reluctant? )
157
+ end
158
+
159
+ def test_parse_intervals_min_only_possessive
160
+ t = RP.parse('a{3,}+bc')
161
+
162
+ assert_equal( true, t.expressions.first.quantified? )
163
+ assert_equal( :interval, t.expressions.first.quantifier.token )
164
+ assert_equal( 3, t.expressions.first.quantifier.min)
165
+ assert_equal( -1, t.expressions.first.quantifier.max)
166
+ assert_equal( :possessive, t.expressions.first.quantifier.mode )
167
+ assert_equal( true, t.expressions.first.possessive? )
168
+ end
169
+
170
+ # interval: max only
171
+ def test_parse_intervals_max_only_greedy
172
+ t = RP.parse('a{,2}bc')
173
+
174
+ assert_equal( true, t.expressions.first.quantified? )
175
+ assert_equal( :interval, t.expressions.first.quantifier.token )
176
+ assert_equal( 0, t.expressions.first.quantifier.min)
177
+ assert_equal( 2, t.expressions.first.quantifier.max)
178
+ assert_equal( :greedy, t.expressions.first.quantifier.mode )
179
+ end
180
+
181
+ def test_parse_intervals_max_only_reluctant
182
+ t = RP.parse('a{,4}?bc')
183
+
184
+ assert_equal( true, t.expressions.first.quantified? )
185
+ assert_equal( :interval, t.expressions.first.quantifier.token )
186
+ assert_equal( 0, t.expressions.first.quantifier.min)
187
+ assert_equal( 4, t.expressions.first.quantifier.max)
188
+ assert_equal( :reluctant, t.expressions.first.quantifier.mode )
189
+ assert_equal( true, t.expressions.first.reluctant? )
190
+ end
191
+
192
+ def test_parse_intervals_max_only_possessive
193
+ t = RP.parse('a{,3}+bc')
194
+
195
+ assert_equal( true, t.expressions.first.quantified? )
196
+ assert_equal( :interval, t.expressions.first.quantifier.token )
197
+ assert_equal( 0, t.expressions.first.quantifier.min)
198
+ assert_equal( 3, t.expressions.first.quantifier.max)
199
+ assert_equal( :possessive, t.expressions.first.quantifier.mode )
200
+ assert_equal( true, t.expressions.first.possessive? )
201
+ end
202
+
203
+ # interval: exact
204
+ def test_parse_intervals_exact_greedy
205
+ t = RP.parse('a{2}bc')
206
+
207
+ assert_equal( true, t.expressions.first.quantified? )
208
+ assert_equal( :interval, t.expressions.first.quantifier.token )
209
+ assert_equal( 2, t.expressions.first.quantifier.min)
210
+ assert_equal( 2, t.expressions.first.quantifier.max)
211
+ assert_equal( :greedy, t.expressions.first.quantifier.mode )
212
+ end
213
+
214
+ def test_parse_intervals_exact_reluctant
215
+ t = RP.parse('a{3}?bc')
216
+
217
+ assert_equal( true, t.expressions.first.quantified? )
218
+ assert_equal( :interval, t.expressions.first.quantifier.token )
219
+ assert_equal( 3, t.expressions.first.quantifier.min)
220
+ assert_equal( 3, t.expressions.first.quantifier.max)
221
+ assert_equal( :reluctant, t.expressions.first.quantifier.mode )
222
+ assert_equal( true, t.expressions.first.reluctant? )
223
+ end
224
+
225
+ def test_parse_intervals_exact_possessive
226
+ t = RP.parse('a{3}+bc')
227
+
228
+ assert_equal( true, t.expressions.first.quantified? )
229
+ assert_equal( :interval, t.expressions.first.quantifier.token )
230
+ assert_equal( 3, t.expressions.first.quantifier.min)
231
+ assert_equal( 3, t.expressions.first.quantifier.max)
232
+ assert_equal( :possessive, t.expressions.first.quantifier.mode )
233
+ assert_equal( true, t.expressions.first.possessive? )
234
+ end
235
+
236
+ end
@@ -0,0 +1,101 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class TestParserGroups < Test::Unit::TestCase
4
+
5
+ def test_parse_backref_named_ab
6
+ t = RP.parse('(?<X>abc)\k<X>')[1]
7
+
8
+ assert_equal( true, t.is_a?(Backreference::Name) )
9
+ end
10
+
11
+ def test_parse_backref_named_sq
12
+ t = RP.parse("(?<X>abc)\\k'X'")[1]
13
+
14
+ assert_equal( true, t.is_a?(Backreference::Name) )
15
+ end
16
+
17
+ def test_parse_backref_number_ab
18
+ t = RP.parse('(abc)\k<1>')[1]
19
+
20
+ assert_equal( true, t.is_a?(Backreference::Number) )
21
+ end
22
+
23
+ def test_parse_backref_number_sq
24
+ t = RP.parse("(abc)\\k'1'")[1]
25
+
26
+ assert_equal( true, t.is_a?(Backreference::Number) )
27
+ end
28
+
29
+ def test_parse_backref_number_relative_ab
30
+ t = RP.parse('(abc)\k<-1>')[1]
31
+
32
+ assert_equal( true, t.is_a?(Backreference::NumberRelative) )
33
+ end
34
+
35
+ def test_parse_backref_number_relative_sq
36
+ t = RP.parse("(abc)\\k'-1'")[1]
37
+
38
+ assert_equal( true, t.is_a?(Backreference::NumberRelative) )
39
+ end
40
+
41
+ def test_parse_backref_name_call_ab
42
+ t = RP.parse('(?<X>abc)\g<X>')[1]
43
+
44
+ assert_equal( true, t.is_a?(Backreference::NameCall) )
45
+ end
46
+
47
+ def test_parse_backref_name_call_sq
48
+ t = RP.parse("(?<X>abc)\\g'X'")[1]
49
+
50
+ assert_equal( true, t.is_a?(Backreference::NameCall) )
51
+ end
52
+
53
+ def test_parse_backref_number_call_ab
54
+ t = RP.parse('(abc)\g<1>')[1]
55
+
56
+ assert_equal( true, t.is_a?(Backreference::NumberCall) )
57
+ end
58
+
59
+ def test_parse_backref_number_call_sq
60
+ t = RP.parse("(abc)\\g'1'")[1]
61
+
62
+ assert_equal( true, t.is_a?(Backreference::NumberCall) )
63
+ end
64
+
65
+ def test_parse_backref_number_relative_call_ab
66
+ t = RP.parse('(abc)\g<-1>')[1]
67
+
68
+ assert_equal( true, t.is_a?(Backreference::NumberCallRelative) )
69
+ end
70
+
71
+ def test_parse_backref_number_relative_call_sq
72
+ t = RP.parse("(abc)\\g'-1'")[1]
73
+
74
+ assert_equal( true, t.is_a?(Backreference::NumberCallRelative) )
75
+ end
76
+
77
+ def test_parse_backref_name_nest_level_ab
78
+ t = RP.parse('(?<X>abc)\k<X-0>')[1]
79
+
80
+ assert_equal( true, t.is_a?(Backreference::NameNestLevel) )
81
+ end
82
+
83
+ def test_parse_backref_name_nest_level_sq
84
+ t = RP.parse("(?<X>abc)\\k'X-0'")[1]
85
+
86
+ assert_equal( true, t.is_a?(Backreference::NameNestLevel) )
87
+ end
88
+
89
+ def test_parse_backref_number_nest_level_ab
90
+ t = RP.parse('(abc)\k<1-0>')[1]
91
+
92
+ assert_equal( true, t.is_a?(Backreference::NumberNestLevel) )
93
+ end
94
+
95
+ def test_parse_backref_number_nest_level_sq
96
+ t = RP.parse("(abc)\\k'1-0'")[1]
97
+
98
+ assert_equal( true, t.is_a?(Backreference::NumberNestLevel) )
99
+ end
100
+
101
+ end
@@ -0,0 +1,99 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class TestParserSets < Test::Unit::TestCase
4
+
5
+ def test_parse_set_basic
6
+ exp = RP.parse('[a-c]+', :any).expressions[0]
7
+
8
+ assert_equal( true, exp.is_a?(CharacterSet) )
9
+ assert_equal( true, exp.include?('a-c') )
10
+
11
+ assert_equal( true, exp.quantified? )
12
+ assert_equal( 1, exp.quantifier.min )
13
+ assert_equal( -1, exp.quantifier.max )
14
+ end
15
+
16
+ def test_parse_set_posix_class
17
+ exp = RP.parse('[[:digit:][:lower:]]+', 'ruby/1.9').expressions[0]
18
+
19
+ assert_equal( true, exp.is_a?(CharacterSet) )
20
+
21
+ assert_equal( true, exp.include?('[:digit:]') )
22
+ assert_equal( true, exp.include?('[:lower:]') )
23
+
24
+ assert_equal( true, exp.matches?("6") )
25
+
26
+ # TODO: figure out why this generate wrong string, but only after
27
+ # the assertion above (to_s "piles up")
28
+ #assert_equal( true, exp.matches?("v") )
29
+ #assert_equal( false, exp.matches?("\x48") )
30
+ end
31
+
32
+ def test_parse_set_members
33
+ exp = RP.parse('[ac-eh]', :any)[0]
34
+
35
+ assert_equal( true, exp.include?('a') )
36
+ assert_equal( true, exp.include?('c-e') )
37
+ assert_equal( true, exp.include?('h') )
38
+ assert_equal( false, exp.include?(']') )
39
+ end
40
+
41
+ def test_parse_set_collating_sequence
42
+ exp = RP.parse('[a[.span-ll.]h]', :any)[0]
43
+
44
+ assert_equal( true, exp.include?('[.span-ll.]') )
45
+ assert_equal( false, exp.include?(']') )
46
+ end
47
+
48
+ def test_parse_set_character_equivalents
49
+ exp = RP.parse('[a[=e=]h]', :any)[0]
50
+
51
+ assert_equal( true, exp.include?('[=e=]') )
52
+ assert_equal( false, exp.include?(']') )
53
+ end
54
+
55
+ def test_parse_set_nesting_tos
56
+ pattern = '[a[b[^c]]]'
57
+
58
+ assert_equal( pattern, RP.parse(pattern, 'ruby/1.9').to_s )
59
+ end
60
+
61
+ def test_parse_set_nesting_include
62
+ exp = RP.parse('[a[b[^c]]]', 'ruby/1.9')[0]
63
+
64
+ assert_equal( true, exp.is_a?(CharacterSet) )
65
+ assert_equal( true, exp.include?('a') )
66
+ assert_equal( true, exp.include?('b') )
67
+ assert_equal( true, exp.include?('c') )
68
+ end
69
+
70
+ # character subsets and negated posix classes are not available in ruby 1.8
71
+ if RUBY_VERSION >= '1.9'
72
+ def test_parse_set_nesting_matches
73
+ exp = RP.parse('[a[b[^c]]]', 'ruby/1.9')[0]
74
+
75
+ assert_equal( true, exp.matches?("b") )
76
+
77
+ # TODO: figure out why this generate wrong string, but only after
78
+ # the assertion above (to_s "piles up")
79
+ #assert_equal( false, exp.matches?("c") )
80
+ end
81
+
82
+ def test_parse_set_nesting_not_matches
83
+ exp = RP.parse('[a[b[^c]]]', 'ruby/1.9')[0]
84
+ assert_equal( false, exp.matches?("c") )
85
+ end
86
+
87
+ def test_parse_set_negated_posix_class
88
+ exp = RP.parse('[[:^xdigit:][:^lower:]]+', 'ruby/1.9').expressions[0]
89
+
90
+ assert_equal( true, exp.is_a?(CharacterSet) )
91
+
92
+ assert_equal( true, exp.include?('[:^xdigit:]') )
93
+ assert_equal( true, exp.include?('[:^lower:]') )
94
+
95
+ assert_equal( true, exp.matches?("GT") )
96
+ end
97
+ end
98
+
99
+ end
@@ -0,0 +1,30 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ %w{
4
+ anchors errors escapes groups literals meta properties
5
+ quantifiers scripts sets types
6
+ }.each do|tc|
7
+ require File.expand_path("../test_#{tc}", __FILE__)
8
+ end
9
+
10
+ class TestRegexpScanner < Test::Unit::TestCase
11
+
12
+ def test_scanner_returns_an_array
13
+ assert_instance_of( Array, RS.scan('abc') )
14
+ end
15
+
16
+ def test_scanner_returns_tokens_as_arrays
17
+ tokens = RS.scan('^abc+[^one]{2,3}\b\d\\\C-C$')
18
+
19
+ assert( tokens.all?{|token|
20
+ token.kind_of?(Array) and token.length == 5
21
+ }, "Not all tokens are arrays of 5 elements")
22
+ end
23
+
24
+ def test_scanner_token_count
25
+ re = /^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i
26
+
27
+ assert_equal(26, RS.scan(re).length )
28
+ end
29
+
30
+ end