regexp_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/ChangeLog +4 -0
  2. data/LICENSE +22 -0
  3. data/README.rdoc +307 -0
  4. data/Rakefile +91 -0
  5. data/lib/regexp_parser/ctype.rb +48 -0
  6. data/lib/regexp_parser/expression/property.rb +108 -0
  7. data/lib/regexp_parser/expression/set.rb +59 -0
  8. data/lib/regexp_parser/expression.rb +287 -0
  9. data/lib/regexp_parser/lexer.rb +105 -0
  10. data/lib/regexp_parser/parser.rb +417 -0
  11. data/lib/regexp_parser/scanner/property.rl +534 -0
  12. data/lib/regexp_parser/scanner/scanner.rl +712 -0
  13. data/lib/regexp_parser/scanner.rb +3325 -0
  14. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
  15. data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
  16. data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
  17. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
  18. data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
  19. data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
  20. data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
  21. data/lib/regexp_parser/syntax/tokens.rb +332 -0
  22. data/lib/regexp_parser/syntax.rb +172 -0
  23. data/lib/regexp_parser.rb +45 -0
  24. data/test/helpers.rb +8 -0
  25. data/test/lexer/test_all.rb +26 -0
  26. data/test/lexer/test_literals.rb +120 -0
  27. data/test/lexer/test_nesting.rb +107 -0
  28. data/test/lexer/test_refcalls.rb +45 -0
  29. data/test/parser/test_all.rb +44 -0
  30. data/test/parser/test_alternation.rb +46 -0
  31. data/test/parser/test_anchors.rb +35 -0
  32. data/test/parser/test_errors.rb +59 -0
  33. data/test/parser/test_escapes.rb +48 -0
  34. data/test/parser/test_expression.rb +51 -0
  35. data/test/parser/test_groups.rb +69 -0
  36. data/test/parser/test_properties.rb +346 -0
  37. data/test/parser/test_quantifiers.rb +236 -0
  38. data/test/parser/test_refcalls.rb +101 -0
  39. data/test/parser/test_sets.rb +99 -0
  40. data/test/scanner/test_all.rb +30 -0
  41. data/test/scanner/test_anchors.rb +35 -0
  42. data/test/scanner/test_errors.rb +36 -0
  43. data/test/scanner/test_escapes.rb +49 -0
  44. data/test/scanner/test_groups.rb +41 -0
  45. data/test/scanner/test_literals.rb +85 -0
  46. data/test/scanner/test_meta.rb +36 -0
  47. data/test/scanner/test_properties.rb +315 -0
  48. data/test/scanner/test_quantifiers.rb +38 -0
  49. data/test/scanner/test_refcalls.rb +45 -0
  50. data/test/scanner/test_scripts.rb +314 -0
  51. data/test/scanner/test_sets.rb +80 -0
  52. data/test/scanner/test_types.rb +30 -0
  53. data/test/syntax/ruby/test_1.8.rb +57 -0
  54. data/test/syntax/ruby/test_1.9.1.rb +39 -0
  55. data/test/syntax/ruby/test_1.9.3.rb +38 -0
  56. data/test/syntax/ruby/test_all.rb +12 -0
  57. data/test/syntax/test_all.rb +19 -0
  58. data/test/test_all.rb +4 -0
  59. metadata +160 -0
@@ -0,0 +1,236 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class TestRegexpParserQuantifiers < Test::Unit::TestCase
4
+
5
+ # ?: zero-or-one
6
+ def test_parse_zero_or_one_greedy
7
+ t = RP.parse('a?bc')
8
+
9
+ assert_equal( true, t.expressions.first.quantified? )
10
+ assert_equal( :zero_or_one, t.expressions.first.quantifier.token )
11
+ assert_equal( 0, t.expressions.first.quantifier.min )
12
+ assert_equal( 1, t.expressions.first.quantifier.max )
13
+ assert_equal( :greedy, t.expressions.first.quantifier.mode )
14
+ end
15
+
16
+ def test_parse_zero_or_one_reluctant
17
+ t = RP.parse('a??bc')
18
+
19
+ assert_equal( true, t.expressions.first.quantified? )
20
+ assert_equal( :zero_or_one, t.expressions.first.quantifier.token )
21
+ assert_equal( 0, t.expressions.first.quantifier.min )
22
+ assert_equal( 1, t.expressions.first.quantifier.max )
23
+ assert_equal( :reluctant, t.expressions.first.quantifier.mode )
24
+ assert_equal( true, t.expressions.first.reluctant? )
25
+ end
26
+
27
+ def test_parse_zero_or_one_possessive
28
+ t = RP.parse('a?+bc')
29
+
30
+ assert_equal( true, t.expressions.first.quantified? )
31
+ assert_equal( :zero_or_one, t.expressions.first.quantifier.token )
32
+ assert_equal( 0, t.expressions.first.quantifier.min )
33
+ assert_equal( 1, t.expressions.first.quantifier.max )
34
+ assert_equal( :possessive, t.expressions.first.quantifier.mode )
35
+ assert_equal( true, t.expressions.first.possessive? )
36
+ end
37
+
38
+ # *: zero-or-more
39
+ def test_parse_zero_or_more_greedy
40
+ t = RP.parse('a*bc')
41
+
42
+ assert_equal( true, t.expressions.first.quantified? )
43
+ assert_equal( :zero_or_more, t.expressions.first.quantifier.token )
44
+ assert_equal( 0, t.expressions.first.quantifier.min )
45
+ assert_equal( -1, t.expressions.first.quantifier.max )
46
+ assert_equal( :greedy, t.expressions.first.quantifier.mode )
47
+ end
48
+
49
+ def test_parse_zero_or_more_reluctant
50
+ t = RP.parse('a*?bc')
51
+
52
+ assert_equal( true, t.expressions.first.quantified? )
53
+ assert_equal( :zero_or_more, t.expressions.first.quantifier.token )
54
+ assert_equal( 0, t.expressions.first.quantifier.min )
55
+ assert_equal( -1, t.expressions.first.quantifier.max )
56
+ assert_equal( :reluctant, t.expressions.first.quantifier.mode )
57
+ assert_equal( true, t.expressions.first.reluctant? )
58
+ end
59
+
60
+ def test_parse_zero_or_more_possessive
61
+ t = RP.parse('a*+bc')
62
+
63
+ assert_equal( true, t.expressions.first.quantified? )
64
+ assert_equal( :zero_or_more, t.expressions.first.quantifier.token )
65
+ assert_equal( 0, t.expressions.first.quantifier.min )
66
+ assert_equal( -1, t.expressions.first.quantifier.max )
67
+ assert_equal( :possessive, t.expressions.first.quantifier.mode )
68
+ assert_equal( true, t.expressions.first.possessive? )
69
+ end
70
+
71
+ # +: one-or-more
72
+ def test_parse_one_or_more_greedy
73
+ t = RP.parse('a+bc')
74
+
75
+ assert_equal( true, t.expressions.first.quantified? )
76
+ assert_equal( :one_or_more, t.expressions.first.quantifier.token )
77
+ assert_equal( 1, t.expressions.first.quantifier.min )
78
+ assert_equal( -1, t.expressions.first.quantifier.max )
79
+ assert_equal( :greedy, t.expressions.first.quantifier.mode )
80
+ end
81
+
82
+ def test_parse_one_or_more_reluctant
83
+ t = RP.parse('a+?bc')
84
+
85
+ assert_equal( true, t.expressions.first.quantified? )
86
+ assert_equal( :one_or_more, t.expressions.first.quantifier.token )
87
+ assert_equal( 1, t.expressions.first.quantifier.min )
88
+ assert_equal( -1, t.expressions.first.quantifier.max )
89
+ assert_equal( :reluctant, t.expressions.first.quantifier.mode )
90
+ assert_equal( true, t.expressions.first.reluctant? )
91
+ end
92
+
93
+ def test_parse_one_or_more_possessive
94
+ t = RP.parse('a++bc')
95
+
96
+ assert_equal( true, t.expressions.first.quantified? )
97
+ assert_equal( :one_or_more, t.expressions.first.quantifier.token )
98
+ assert_equal( 1, t.expressions.first.quantifier.min )
99
+ assert_equal( -1, t.expressions.first.quantifier.max )
100
+ assert_equal( :possessive, t.expressions.first.quantifier.mode )
101
+ assert_equal( true, t.expressions.first.possessive? )
102
+ end
103
+
104
+ # interval: min and max
105
+ def test_parse_intervals_min_max_greedy
106
+ t = RP.parse('a{2,4}bc')
107
+
108
+ assert_equal( true, t.expressions.first.quantified? )
109
+ assert_equal( :interval, t.expressions.first.quantifier.token )
110
+ assert_equal( 2, t.expressions.first.quantifier.min)
111
+ assert_equal( 4, t.expressions.first.quantifier.max)
112
+ assert_equal( :greedy, t.expressions.first.quantifier.mode )
113
+ end
114
+
115
+ def test_parse_intervals_min_max_reluctant
116
+ t = RP.parse('a{3,5}?bc')
117
+
118
+ assert_equal( true, t.expressions.first.quantified? )
119
+ assert_equal( :interval, t.expressions.first.quantifier.token )
120
+ assert_equal( 3, t.expressions.first.quantifier.min)
121
+ assert_equal( 5, t.expressions.first.quantifier.max)
122
+ assert_equal( :reluctant, t.expressions.first.quantifier.mode )
123
+ assert_equal( true, t.expressions.first.reluctant? )
124
+ end
125
+
126
+ def test_parse_intervals_min_max_possessive
127
+ t = RP.parse('a{2,4}+bc')
128
+
129
+ assert_equal( true, t.expressions.first.quantified? )
130
+ assert_equal( :interval, t.expressions.first.quantifier.token )
131
+ assert_equal( 2, t.expressions.first.quantifier.min)
132
+ assert_equal( 4, t.expressions.first.quantifier.max)
133
+ assert_equal( :possessive, t.expressions.first.quantifier.mode )
134
+ assert_equal( true, t.expressions.first.possessive? )
135
+ end
136
+
137
+ # interval: min only
138
+ def test_parse_intervals_min_only_greedy
139
+ t = RP.parse('a{2,}bc')
140
+
141
+ assert_equal( true, t.expressions.first.quantified? )
142
+ assert_equal( :interval, t.expressions.first.quantifier.token )
143
+ assert_equal( 2, t.expressions.first.quantifier.min)
144
+ assert_equal( -1, t.expressions.first.quantifier.max)
145
+ assert_equal( :greedy, t.expressions.first.quantifier.mode )
146
+ end
147
+
148
+ def test_parse_intervals_min_only_reluctant
149
+ t = RP.parse('a{2,}?bc')
150
+
151
+ assert_equal( true, t.expressions.first.quantified? )
152
+ assert_equal( :interval, t.expressions.first.quantifier.token )
153
+ assert_equal( 2, t.expressions.first.quantifier.min)
154
+ assert_equal( -1, t.expressions.first.quantifier.max)
155
+ assert_equal( :reluctant, t.expressions.first.quantifier.mode )
156
+ assert_equal( true, t.expressions.first.reluctant? )
157
+ end
158
+
159
+ def test_parse_intervals_min_only_possessive
160
+ t = RP.parse('a{3,}+bc')
161
+
162
+ assert_equal( true, t.expressions.first.quantified? )
163
+ assert_equal( :interval, t.expressions.first.quantifier.token )
164
+ assert_equal( 3, t.expressions.first.quantifier.min)
165
+ assert_equal( -1, t.expressions.first.quantifier.max)
166
+ assert_equal( :possessive, t.expressions.first.quantifier.mode )
167
+ assert_equal( true, t.expressions.first.possessive? )
168
+ end
169
+
170
+ # interval: max only
171
+ def test_parse_intervals_max_only_greedy
172
+ t = RP.parse('a{,2}bc')
173
+
174
+ assert_equal( true, t.expressions.first.quantified? )
175
+ assert_equal( :interval, t.expressions.first.quantifier.token )
176
+ assert_equal( 0, t.expressions.first.quantifier.min)
177
+ assert_equal( 2, t.expressions.first.quantifier.max)
178
+ assert_equal( :greedy, t.expressions.first.quantifier.mode )
179
+ end
180
+
181
+ def test_parse_intervals_max_only_reluctant
182
+ t = RP.parse('a{,4}?bc')
183
+
184
+ assert_equal( true, t.expressions.first.quantified? )
185
+ assert_equal( :interval, t.expressions.first.quantifier.token )
186
+ assert_equal( 0, t.expressions.first.quantifier.min)
187
+ assert_equal( 4, t.expressions.first.quantifier.max)
188
+ assert_equal( :reluctant, t.expressions.first.quantifier.mode )
189
+ assert_equal( true, t.expressions.first.reluctant? )
190
+ end
191
+
192
+ def test_parse_intervals_max_only_possessive
193
+ t = RP.parse('a{,3}+bc')
194
+
195
+ assert_equal( true, t.expressions.first.quantified? )
196
+ assert_equal( :interval, t.expressions.first.quantifier.token )
197
+ assert_equal( 0, t.expressions.first.quantifier.min)
198
+ assert_equal( 3, t.expressions.first.quantifier.max)
199
+ assert_equal( :possessive, t.expressions.first.quantifier.mode )
200
+ assert_equal( true, t.expressions.first.possessive? )
201
+ end
202
+
203
+ # interval: exact
204
+ def test_parse_intervals_exact_greedy
205
+ t = RP.parse('a{2}bc')
206
+
207
+ assert_equal( true, t.expressions.first.quantified? )
208
+ assert_equal( :interval, t.expressions.first.quantifier.token )
209
+ assert_equal( 2, t.expressions.first.quantifier.min)
210
+ assert_equal( 2, t.expressions.first.quantifier.max)
211
+ assert_equal( :greedy, t.expressions.first.quantifier.mode )
212
+ end
213
+
214
+ def test_parse_intervals_exact_reluctant
215
+ t = RP.parse('a{3}?bc')
216
+
217
+ assert_equal( true, t.expressions.first.quantified? )
218
+ assert_equal( :interval, t.expressions.first.quantifier.token )
219
+ assert_equal( 3, t.expressions.first.quantifier.min)
220
+ assert_equal( 3, t.expressions.first.quantifier.max)
221
+ assert_equal( :reluctant, t.expressions.first.quantifier.mode )
222
+ assert_equal( true, t.expressions.first.reluctant? )
223
+ end
224
+
225
+ def test_parse_intervals_exact_possessive
226
+ t = RP.parse('a{3}+bc')
227
+
228
+ assert_equal( true, t.expressions.first.quantified? )
229
+ assert_equal( :interval, t.expressions.first.quantifier.token )
230
+ assert_equal( 3, t.expressions.first.quantifier.min)
231
+ assert_equal( 3, t.expressions.first.quantifier.max)
232
+ assert_equal( :possessive, t.expressions.first.quantifier.mode )
233
+ assert_equal( true, t.expressions.first.possessive? )
234
+ end
235
+
236
+ end
@@ -0,0 +1,101 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class TestParserGroups < Test::Unit::TestCase
4
+
5
+ def test_parse_backref_named_ab
6
+ t = RP.parse('(?<X>abc)\k<X>')[1]
7
+
8
+ assert_equal( true, t.is_a?(Backreference::Name) )
9
+ end
10
+
11
+ def test_parse_backref_named_sq
12
+ t = RP.parse("(?<X>abc)\\k'X'")[1]
13
+
14
+ assert_equal( true, t.is_a?(Backreference::Name) )
15
+ end
16
+
17
+ def test_parse_backref_number_ab
18
+ t = RP.parse('(abc)\k<1>')[1]
19
+
20
+ assert_equal( true, t.is_a?(Backreference::Number) )
21
+ end
22
+
23
+ def test_parse_backref_number_sq
24
+ t = RP.parse("(abc)\\k'1'")[1]
25
+
26
+ assert_equal( true, t.is_a?(Backreference::Number) )
27
+ end
28
+
29
+ def test_parse_backref_number_relative_ab
30
+ t = RP.parse('(abc)\k<-1>')[1]
31
+
32
+ assert_equal( true, t.is_a?(Backreference::NumberRelative) )
33
+ end
34
+
35
+ def test_parse_backref_number_relative_sq
36
+ t = RP.parse("(abc)\\k'-1'")[1]
37
+
38
+ assert_equal( true, t.is_a?(Backreference::NumberRelative) )
39
+ end
40
+
41
+ def test_parse_backref_name_call_ab
42
+ t = RP.parse('(?<X>abc)\g<X>')[1]
43
+
44
+ assert_equal( true, t.is_a?(Backreference::NameCall) )
45
+ end
46
+
47
+ def test_parse_backref_name_call_sq
48
+ t = RP.parse("(?<X>abc)\\g'X'")[1]
49
+
50
+ assert_equal( true, t.is_a?(Backreference::NameCall) )
51
+ end
52
+
53
+ def test_parse_backref_number_call_ab
54
+ t = RP.parse('(abc)\g<1>')[1]
55
+
56
+ assert_equal( true, t.is_a?(Backreference::NumberCall) )
57
+ end
58
+
59
+ def test_parse_backref_number_call_sq
60
+ t = RP.parse("(abc)\\g'1'")[1]
61
+
62
+ assert_equal( true, t.is_a?(Backreference::NumberCall) )
63
+ end
64
+
65
+ def test_parse_backref_number_relative_call_ab
66
+ t = RP.parse('(abc)\g<-1>')[1]
67
+
68
+ assert_equal( true, t.is_a?(Backreference::NumberCallRelative) )
69
+ end
70
+
71
+ def test_parse_backref_number_relative_call_sq
72
+ t = RP.parse("(abc)\\g'-1'")[1]
73
+
74
+ assert_equal( true, t.is_a?(Backreference::NumberCallRelative) )
75
+ end
76
+
77
+ def test_parse_backref_name_nest_level_ab
78
+ t = RP.parse('(?<X>abc)\k<X-0>')[1]
79
+
80
+ assert_equal( true, t.is_a?(Backreference::NameNestLevel) )
81
+ end
82
+
83
+ def test_parse_backref_name_nest_level_sq
84
+ t = RP.parse("(?<X>abc)\\k'X-0'")[1]
85
+
86
+ assert_equal( true, t.is_a?(Backreference::NameNestLevel) )
87
+ end
88
+
89
+ def test_parse_backref_number_nest_level_ab
90
+ t = RP.parse('(abc)\k<1-0>')[1]
91
+
92
+ assert_equal( true, t.is_a?(Backreference::NumberNestLevel) )
93
+ end
94
+
95
+ def test_parse_backref_number_nest_level_sq
96
+ t = RP.parse("(abc)\\k'1-0'")[1]
97
+
98
+ assert_equal( true, t.is_a?(Backreference::NumberNestLevel) )
99
+ end
100
+
101
+ end
@@ -0,0 +1,99 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class TestParserSets < Test::Unit::TestCase
4
+
5
+ def test_parse_set_basic
6
+ exp = RP.parse('[a-c]+', :any).expressions[0]
7
+
8
+ assert_equal( true, exp.is_a?(CharacterSet) )
9
+ assert_equal( true, exp.include?('a-c') )
10
+
11
+ assert_equal( true, exp.quantified? )
12
+ assert_equal( 1, exp.quantifier.min )
13
+ assert_equal( -1, exp.quantifier.max )
14
+ end
15
+
16
+ def test_parse_set_posix_class
17
+ exp = RP.parse('[[:digit:][:lower:]]+', 'ruby/1.9').expressions[0]
18
+
19
+ assert_equal( true, exp.is_a?(CharacterSet) )
20
+
21
+ assert_equal( true, exp.include?('[:digit:]') )
22
+ assert_equal( true, exp.include?('[:lower:]') )
23
+
24
+ assert_equal( true, exp.matches?("6") )
25
+
26
+ # TODO: figure out why this generate wrong string, but only after
27
+ # the assertion above (to_s "piles up")
28
+ #assert_equal( true, exp.matches?("v") )
29
+ #assert_equal( false, exp.matches?("\x48") )
30
+ end
31
+
32
+ def test_parse_set_members
33
+ exp = RP.parse('[ac-eh]', :any)[0]
34
+
35
+ assert_equal( true, exp.include?('a') )
36
+ assert_equal( true, exp.include?('c-e') )
37
+ assert_equal( true, exp.include?('h') )
38
+ assert_equal( false, exp.include?(']') )
39
+ end
40
+
41
+ def test_parse_set_collating_sequence
42
+ exp = RP.parse('[a[.span-ll.]h]', :any)[0]
43
+
44
+ assert_equal( true, exp.include?('[.span-ll.]') )
45
+ assert_equal( false, exp.include?(']') )
46
+ end
47
+
48
+ def test_parse_set_character_equivalents
49
+ exp = RP.parse('[a[=e=]h]', :any)[0]
50
+
51
+ assert_equal( true, exp.include?('[=e=]') )
52
+ assert_equal( false, exp.include?(']') )
53
+ end
54
+
55
+ def test_parse_set_nesting_tos
56
+ pattern = '[a[b[^c]]]'
57
+
58
+ assert_equal( pattern, RP.parse(pattern, 'ruby/1.9').to_s )
59
+ end
60
+
61
+ def test_parse_set_nesting_include
62
+ exp = RP.parse('[a[b[^c]]]', 'ruby/1.9')[0]
63
+
64
+ assert_equal( true, exp.is_a?(CharacterSet) )
65
+ assert_equal( true, exp.include?('a') )
66
+ assert_equal( true, exp.include?('b') )
67
+ assert_equal( true, exp.include?('c') )
68
+ end
69
+
70
+ # character subsets and negated posix classes are not available in ruby 1.8
71
+ if RUBY_VERSION >= '1.9'
72
+ def test_parse_set_nesting_matches
73
+ exp = RP.parse('[a[b[^c]]]', 'ruby/1.9')[0]
74
+
75
+ assert_equal( true, exp.matches?("b") )
76
+
77
+ # TODO: figure out why this generate wrong string, but only after
78
+ # the assertion above (to_s "piles up")
79
+ #assert_equal( false, exp.matches?("c") )
80
+ end
81
+
82
+ def test_parse_set_nesting_not_matches
83
+ exp = RP.parse('[a[b[^c]]]', 'ruby/1.9')[0]
84
+ assert_equal( false, exp.matches?("c") )
85
+ end
86
+
87
+ def test_parse_set_negated_posix_class
88
+ exp = RP.parse('[[:^xdigit:][:^lower:]]+', 'ruby/1.9').expressions[0]
89
+
90
+ assert_equal( true, exp.is_a?(CharacterSet) )
91
+
92
+ assert_equal( true, exp.include?('[:^xdigit:]') )
93
+ assert_equal( true, exp.include?('[:^lower:]') )
94
+
95
+ assert_equal( true, exp.matches?("GT") )
96
+ end
97
+ end
98
+
99
+ end
@@ -0,0 +1,30 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ %w{
4
+ anchors errors escapes groups literals meta properties
5
+ quantifiers scripts sets types
6
+ }.each do|tc|
7
+ require File.expand_path("../test_#{tc}", __FILE__)
8
+ end
9
+
10
+ class TestRegexpScanner < Test::Unit::TestCase
11
+
12
+ def test_scanner_returns_an_array
13
+ assert_instance_of( Array, RS.scan('abc') )
14
+ end
15
+
16
+ def test_scanner_returns_tokens_as_arrays
17
+ tokens = RS.scan('^abc+[^one]{2,3}\b\d\\\C-C$')
18
+
19
+ assert( tokens.all?{|token|
20
+ token.kind_of?(Array) and token.length == 5
21
+ }, "Not all tokens are arrays of 5 elements")
22
+ end
23
+
24
+ def test_scanner_token_count
25
+ re = /^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i
26
+
27
+ assert_equal(26, RS.scan(re).length )
28
+ end
29
+
30
+ end