regexp_parser 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +4 -0
- data/LICENSE +22 -0
- data/README.rdoc +307 -0
- data/Rakefile +91 -0
- data/lib/regexp_parser/ctype.rb +48 -0
- data/lib/regexp_parser/expression/property.rb +108 -0
- data/lib/regexp_parser/expression/set.rb +59 -0
- data/lib/regexp_parser/expression.rb +287 -0
- data/lib/regexp_parser/lexer.rb +105 -0
- data/lib/regexp_parser/parser.rb +417 -0
- data/lib/regexp_parser/scanner/property.rl +534 -0
- data/lib/regexp_parser/scanner/scanner.rl +712 -0
- data/lib/regexp_parser/scanner.rb +3325 -0
- data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
- data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
- data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
- data/lib/regexp_parser/syntax/tokens.rb +332 -0
- data/lib/regexp_parser/syntax.rb +172 -0
- data/lib/regexp_parser.rb +45 -0
- data/test/helpers.rb +8 -0
- data/test/lexer/test_all.rb +26 -0
- data/test/lexer/test_literals.rb +120 -0
- data/test/lexer/test_nesting.rb +107 -0
- data/test/lexer/test_refcalls.rb +45 -0
- data/test/parser/test_all.rb +44 -0
- data/test/parser/test_alternation.rb +46 -0
- data/test/parser/test_anchors.rb +35 -0
- data/test/parser/test_errors.rb +59 -0
- data/test/parser/test_escapes.rb +48 -0
- data/test/parser/test_expression.rb +51 -0
- data/test/parser/test_groups.rb +69 -0
- data/test/parser/test_properties.rb +346 -0
- data/test/parser/test_quantifiers.rb +236 -0
- data/test/parser/test_refcalls.rb +101 -0
- data/test/parser/test_sets.rb +99 -0
- data/test/scanner/test_all.rb +30 -0
- data/test/scanner/test_anchors.rb +35 -0
- data/test/scanner/test_errors.rb +36 -0
- data/test/scanner/test_escapes.rb +49 -0
- data/test/scanner/test_groups.rb +41 -0
- data/test/scanner/test_literals.rb +85 -0
- data/test/scanner/test_meta.rb +36 -0
- data/test/scanner/test_properties.rb +315 -0
- data/test/scanner/test_quantifiers.rb +38 -0
- data/test/scanner/test_refcalls.rb +45 -0
- data/test/scanner/test_scripts.rb +314 -0
- data/test/scanner/test_sets.rb +80 -0
- data/test/scanner/test_types.rb +30 -0
- data/test/syntax/ruby/test_1.8.rb +57 -0
- data/test/syntax/ruby/test_1.9.1.rb +39 -0
- data/test/syntax/ruby/test_1.9.3.rb +38 -0
- data/test/syntax/ruby/test_all.rb +12 -0
- data/test/syntax/test_all.rb +19 -0
- data/test/test_all.rb +4 -0
- metadata +160 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ScannerQuantifiers < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
'a?' => [:quantifier, :zero_or_one, '?'],
|
7
|
+
'a??' => [:quantifier, :zero_or_one_reluctant, '??'],
|
8
|
+
'a?+' => [:quantifier, :zero_or_one_possessive, '?+'],
|
9
|
+
|
10
|
+
'a*' => [:quantifier, :zero_or_more, '*'],
|
11
|
+
'a*?' => [:quantifier, :zero_or_more_reluctant, '*?'],
|
12
|
+
'a*+' => [:quantifier, :zero_or_more_possessive, '*+'],
|
13
|
+
|
14
|
+
'a+' => [:quantifier, :one_or_more, '+'],
|
15
|
+
'a+?' => [:quantifier, :one_or_more_reluctant, '+?'],
|
16
|
+
'a++' => [:quantifier, :one_or_more_possessive, '++'],
|
17
|
+
|
18
|
+
'a{2}' => [:quantifier, :interval, '{2}'],
|
19
|
+
'a{2,}' => [:quantifier, :interval, '{2,}'],
|
20
|
+
'a{,2}' => [:quantifier, :interval, '{,2}'],
|
21
|
+
'a{2,4}' => [:quantifier, :interval, '{2,4}'],
|
22
|
+
}
|
23
|
+
|
24
|
+
counter = 0
|
25
|
+
tests.each do |pattern, test|
|
26
|
+
name = (test[1] == :interval ? "interval_#{counter += 1}" : test[1])
|
27
|
+
|
28
|
+
[:type, :token, :text].each_with_index do |member, i|
|
29
|
+
define_method "test_scan_#{test[0]}_#{name}_#{member}" do
|
30
|
+
|
31
|
+
token = RS.scan(pattern).last
|
32
|
+
assert_equal( test[i], token[i] )
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ScannerRefCalls < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
# Group back-references, named, numbered, and relative
|
7
|
+
'(?<X>abc)\k<X>' => [3, :backref, :name_ref_ab, '\k<X>', 9, 14],
|
8
|
+
"(?<X>abc)\\k'X'" => [3, :backref, :name_ref_sq, "\\k'X'", 9, 14],
|
9
|
+
|
10
|
+
'(abc)\k<1>' => [3, :backref, :number_ref_ab, '\k<1>', 5, 10],
|
11
|
+
"(abc)\\k'1'" => [3, :backref, :number_ref_sq, "\\k'1'", 5, 10],
|
12
|
+
|
13
|
+
'(abc)\k<-1>' => [3, :backref, :number_rel_ref_ab, '\k<-1>', 5, 11],
|
14
|
+
"(abc)\\k'-1'" => [3, :backref, :number_rel_ref_sq, "\\k'-1'", 5, 11],
|
15
|
+
|
16
|
+
# Sub-expression invocation, named, numbered, and relative
|
17
|
+
'(?<X>abc)\g<X>' => [3, :backref, :name_call_ab, '\g<X>', 9, 14],
|
18
|
+
"(?<X>abc)\\g'X'" => [3, :backref, :name_call_sq, "\\g'X'", 9, 14],
|
19
|
+
|
20
|
+
'(abc)\g<1>' => [3, :backref, :number_call_ab, '\g<1>', 5, 10],
|
21
|
+
"(abc)\\g'1'" => [3, :backref, :number_call_sq, "\\g'1'", 5, 10],
|
22
|
+
|
23
|
+
'(abc)\g<-1>' => [3, :backref, :number_rel_call_ab, '\g<-1>', 5, 11],
|
24
|
+
"(abc)\\g'-1'" => [3, :backref, :number_rel_call_sq, "\\g'-1'", 5, 11],
|
25
|
+
|
26
|
+
# Group back-references, with nesting level
|
27
|
+
'(?<X>abc)\k<X-0>' => [3, :backref, :name_nest_ref_ab, '\k<X-0>', 9, 16],
|
28
|
+
"(?<X>abc)\\k'X-0'" => [3, :backref, :name_nest_ref_sq, "\\k'X-0'", 9, 16],
|
29
|
+
|
30
|
+
'(abc)\k<1-0>' => [3, :backref, :number_nest_ref_ab, '\k<1-0>', 5, 12],
|
31
|
+
"(abc)\\k'1-0'" => [3, :backref, :number_nest_ref_sq, "\\k'1-0'", 5, 12],
|
32
|
+
}
|
33
|
+
|
34
|
+
count = 0
|
35
|
+
tests.each do |pattern, test|
|
36
|
+
define_method "test_scan_#{test[1]}_#{test[2]}_#{count+=1}" do
|
37
|
+
|
38
|
+
tokens = RS.scan(pattern)
|
39
|
+
assert_equal( test[1,5], tokens[test[0]])
|
40
|
+
assert_equal( test[3], pattern[tokens[test[0]][3], tokens[test[0]][4]])
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,314 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ScannerUnicodeScripts < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
'Arab' => :script_arabic,
|
7
|
+
'Arabic' => :script_arabic,
|
8
|
+
|
9
|
+
'Armi' => :script_imperial_aramaic,
|
10
|
+
'Imperial Aramaic' => :script_imperial_aramaic,
|
11
|
+
|
12
|
+
'Armn' => :script_armenian,
|
13
|
+
'Armenian' => :script_armenian,
|
14
|
+
|
15
|
+
'Avst' => :script_avestan,
|
16
|
+
'Avestan' => :script_avestan,
|
17
|
+
|
18
|
+
'Bali' => :script_balinese,
|
19
|
+
'Balinese' => :script_balinese,
|
20
|
+
|
21
|
+
'Bamu' => :script_bamum,
|
22
|
+
'Bamum' => :script_bamum,
|
23
|
+
|
24
|
+
'Batk' => :script_batak,
|
25
|
+
'Batak' => :script_batak,
|
26
|
+
|
27
|
+
'Beng' => :script_bengali,
|
28
|
+
'Bengali' => :script_bengali,
|
29
|
+
|
30
|
+
'Bopo' => :script_bopomofo,
|
31
|
+
'Bopomofo' => :script_bopomofo,
|
32
|
+
|
33
|
+
'Brah' => :script_brahmi,
|
34
|
+
'Brahmi' => :script_brahmi,
|
35
|
+
|
36
|
+
'Brai' => :script_braille,
|
37
|
+
'Braille' => :script_braille,
|
38
|
+
|
39
|
+
'Bugi' => :script_buginese,
|
40
|
+
'Buginese' => :script_buginese,
|
41
|
+
|
42
|
+
'Buhd' => :script_buhid,
|
43
|
+
'Buhid' => :script_buhid,
|
44
|
+
|
45
|
+
'Cans' => :script_canadian_aboriginal,
|
46
|
+
'Canadian Aboriginal' => :script_canadian_aboriginal,
|
47
|
+
|
48
|
+
'Cari' => :script_carian,
|
49
|
+
'Carian' => :script_carian,
|
50
|
+
|
51
|
+
'Cham' => :script_cham,
|
52
|
+
|
53
|
+
'Cher' => :script_cherokee,
|
54
|
+
'Cherokee' => :script_cherokee,
|
55
|
+
|
56
|
+
'Copt' => :script_coptic,
|
57
|
+
'Coptic' => :script_coptic,
|
58
|
+
'Qaac' => :script_coptic,
|
59
|
+
|
60
|
+
'Cprt' => :script_cypriot,
|
61
|
+
'Cypriot' => :script_cypriot,
|
62
|
+
|
63
|
+
'Cyrl' => :script_cyrillic,
|
64
|
+
'Cyrillic' => :script_cyrillic,
|
65
|
+
|
66
|
+
'Deva' => :script_devanagari,
|
67
|
+
'Devanagari' => :script_devanagari,
|
68
|
+
|
69
|
+
'Dsrt' => :script_deseret,
|
70
|
+
'Deseret' => :script_deseret,
|
71
|
+
|
72
|
+
'Egyp' => :script_egyptian_hieroglyphs,
|
73
|
+
'Egyptian Hieroglyphs' => :script_egyptian_hieroglyphs,
|
74
|
+
|
75
|
+
'Ethi' => :script_ethiopic,
|
76
|
+
'Ethiopic' => :script_ethiopic,
|
77
|
+
|
78
|
+
'Geor' => :script_georgian,
|
79
|
+
'Georgian' => :script_georgian,
|
80
|
+
|
81
|
+
'Glag' => :script_glagolitic,
|
82
|
+
'Glagolitic' => :script_glagolitic,
|
83
|
+
|
84
|
+
'Goth' => :script_gothic,
|
85
|
+
'Gothic' => :script_gothic,
|
86
|
+
|
87
|
+
'Grek' => :script_greek,
|
88
|
+
'Greek' => :script_greek,
|
89
|
+
|
90
|
+
'Gujr' => :script_gujarati,
|
91
|
+
'Gujarati' => :script_gujarati,
|
92
|
+
|
93
|
+
'Guru' => :script_gurmukhi,
|
94
|
+
'Gurmukhi' => :script_gurmukhi,
|
95
|
+
|
96
|
+
'Hang' => :script_hangul,
|
97
|
+
'Hangul' => :script_hangul,
|
98
|
+
|
99
|
+
'Hani' => :script_han,
|
100
|
+
'Han' => :script_han,
|
101
|
+
|
102
|
+
'Hano' => :script_hanunoo,
|
103
|
+
'Hanunoo' => :script_hanunoo,
|
104
|
+
|
105
|
+
'Hebr' => :script_hebrew,
|
106
|
+
'Hebrew' => :script_hebrew,
|
107
|
+
|
108
|
+
'Hira' => :script_hiragana,
|
109
|
+
'Hiragana' => :script_hiragana,
|
110
|
+
|
111
|
+
'Hrkt' => :script_katakana_or_hiragana,
|
112
|
+
'Katakana or Hiragana' => :script_katakana_or_hiragana,
|
113
|
+
|
114
|
+
'Ital' => :script_old_italic,
|
115
|
+
'Old Italic' => :script_old_italic,
|
116
|
+
|
117
|
+
'Java' => :script_javanese,
|
118
|
+
'Javanese' => :script_javanese,
|
119
|
+
|
120
|
+
'Kali' => :script_kayah_li,
|
121
|
+
'Kayah Li' => :script_kayah_li,
|
122
|
+
|
123
|
+
'Kana' => :script_katakana,
|
124
|
+
'Katakana' => :script_katakana,
|
125
|
+
|
126
|
+
'Khar' => :script_kharoshthi,
|
127
|
+
'Kharoshthi' => :script_kharoshthi,
|
128
|
+
|
129
|
+
'Khmr' => :script_khmer,
|
130
|
+
'Khmer' => :script_khmer,
|
131
|
+
|
132
|
+
'Knda' => :script_kannada,
|
133
|
+
'Kannada' => :script_kannada,
|
134
|
+
|
135
|
+
'Kthi' => :script_kaithi,
|
136
|
+
'Kaithi' => :script_kaithi,
|
137
|
+
|
138
|
+
'Lana' => :script_tai_tham,
|
139
|
+
'Tai Tham' => :script_tai_tham,
|
140
|
+
|
141
|
+
'Laoo' => :script_lao,
|
142
|
+
'Lao' => :script_lao,
|
143
|
+
|
144
|
+
'Latn' => :script_latin,
|
145
|
+
'Latin' => :script_latin,
|
146
|
+
|
147
|
+
'Lepc' => :script_lepcha,
|
148
|
+
'Lepcha' => :script_lepcha,
|
149
|
+
|
150
|
+
'Limb' => :script_limbu,
|
151
|
+
'Limbu' => :script_limbu,
|
152
|
+
|
153
|
+
'Linb' => :script_linear_b,
|
154
|
+
'Linear B' => :script_linear_b,
|
155
|
+
|
156
|
+
'Lisu' => :script_lisu,
|
157
|
+
|
158
|
+
'Lyci' => :script_lycian,
|
159
|
+
'Lycian' => :script_lycian,
|
160
|
+
|
161
|
+
'Lydi' => :script_lydian,
|
162
|
+
'Lydian' => :script_lydian,
|
163
|
+
|
164
|
+
'Mand' => :script_mandaic,
|
165
|
+
'Mandaic' => :script_mandaic,
|
166
|
+
|
167
|
+
'Mlym' => :script_malayalam,
|
168
|
+
'Malayalam' => :script_malayalam,
|
169
|
+
|
170
|
+
'Mong' => :script_mongolian,
|
171
|
+
'Mongolian' => :script_mongolian,
|
172
|
+
|
173
|
+
'Mtei' => :script_meetei_mayek,
|
174
|
+
'Meetei Mayek' => :script_meetei_mayek,
|
175
|
+
|
176
|
+
'Mymr' => :script_myanmar,
|
177
|
+
'Myanmar' => :script_myanmar,
|
178
|
+
|
179
|
+
'Nkoo' => :script_nko,
|
180
|
+
'Nko' => :script_nko,
|
181
|
+
|
182
|
+
'Ogam' => :script_ogham,
|
183
|
+
'Ogham' => :script_ogham,
|
184
|
+
|
185
|
+
'Olck' => :script_ol_chiki,
|
186
|
+
'Ol Chiki' => :script_ol_chiki,
|
187
|
+
|
188
|
+
'Orkh' => :script_old_turkic,
|
189
|
+
'Old Turkic' => :script_old_turkic,
|
190
|
+
|
191
|
+
'Orya' => :script_oriya,
|
192
|
+
'Oriya' => :script_oriya,
|
193
|
+
|
194
|
+
'Osma' => :script_osmanya,
|
195
|
+
'Osmanya' => :script_osmanya,
|
196
|
+
|
197
|
+
'Phag' => :script_phags_pa,
|
198
|
+
'Phags Pa' => :script_phags_pa,
|
199
|
+
|
200
|
+
'Phli' => :script_inscriptional_pahlavi,
|
201
|
+
'Inscriptional Pahlavi' => :script_inscriptional_pahlavi,
|
202
|
+
|
203
|
+
'Phnx' => :script_phoenician,
|
204
|
+
'Phoenician' => :script_phoenician,
|
205
|
+
|
206
|
+
'Prti' => :script_inscriptional_parthian,
|
207
|
+
'Inscriptional Parthian' => :script_inscriptional_parthian,
|
208
|
+
|
209
|
+
'Rjng' => :script_rejang,
|
210
|
+
'Rejang' => :script_rejang,
|
211
|
+
|
212
|
+
'Runr' => :script_runic,
|
213
|
+
'Runic' => :script_runic,
|
214
|
+
|
215
|
+
'Samr' => :script_samaritan,
|
216
|
+
'Samaritan' => :script_samaritan,
|
217
|
+
|
218
|
+
'Sarb' => :script_old_south_arabian,
|
219
|
+
'Old South Arabian' => :script_old_south_arabian,
|
220
|
+
|
221
|
+
'Saur' => :script_saurashtra,
|
222
|
+
'Saurashtra' => :script_saurashtra,
|
223
|
+
|
224
|
+
'Shaw' => :script_shavian,
|
225
|
+
'Shavian' => :script_shavian,
|
226
|
+
|
227
|
+
'Sinh' => :script_sinhala,
|
228
|
+
'Sinhala' => :script_sinhala,
|
229
|
+
|
230
|
+
'Sund' => :script_sundanese,
|
231
|
+
'Sundanese' => :script_sundanese,
|
232
|
+
|
233
|
+
'Sylo' => :script_syloti_nagri,
|
234
|
+
'Syloti Nagri' => :script_syloti_nagri,
|
235
|
+
|
236
|
+
'Syrc' => :script_syriac,
|
237
|
+
'Syriac' => :script_syriac,
|
238
|
+
|
239
|
+
'Tagb' => :script_tagbanwa,
|
240
|
+
'Tagbanwa' => :script_tagbanwa,
|
241
|
+
|
242
|
+
'Tale' => :script_tai_le,
|
243
|
+
'Tai Le' => :script_tai_le,
|
244
|
+
|
245
|
+
'Talu' => :script_new_tai_lue,
|
246
|
+
'New Tai Lue' => :script_new_tai_lue,
|
247
|
+
|
248
|
+
'Taml' => :script_tamil,
|
249
|
+
'Tamil' => :script_tamil,
|
250
|
+
|
251
|
+
'Tavt' => :script_tai_viet,
|
252
|
+
'Tai Viet' => :script_tai_viet,
|
253
|
+
|
254
|
+
'Telu' => :script_telugu,
|
255
|
+
'Telugu' => :script_telugu,
|
256
|
+
|
257
|
+
'Tfng' => :script_tifinagh,
|
258
|
+
'Tifinagh' => :script_tifinagh,
|
259
|
+
|
260
|
+
'Tglg' => :script_tagalog,
|
261
|
+
'Tagalog' => :script_tagalog,
|
262
|
+
|
263
|
+
'Thaa' => :script_thaana,
|
264
|
+
'Thaana' => :script_thaana,
|
265
|
+
|
266
|
+
'Thai' => :script_thai,
|
267
|
+
|
268
|
+
'Tibt' => :script_tibetan,
|
269
|
+
'Tibetan' => :script_tibetan,
|
270
|
+
|
271
|
+
'Ugar' => :script_ugaritic,
|
272
|
+
'Ugaritic' => :script_ugaritic,
|
273
|
+
|
274
|
+
'Vaii' => :script_vai,
|
275
|
+
'Vai' => :script_vai,
|
276
|
+
|
277
|
+
'Xpeo' => :script_old_persian,
|
278
|
+
'Old Persian' => :script_old_persian,
|
279
|
+
|
280
|
+
'Xsux' => :script_cuneiform,
|
281
|
+
'Cuneiform' => :script_cuneiform,
|
282
|
+
|
283
|
+
'Yiii' => :script_yi,
|
284
|
+
'Yi' => :script_yi,
|
285
|
+
|
286
|
+
'Zinh' => :script_inherited,
|
287
|
+
'Inherited' => :script_inherited,
|
288
|
+
'Qaai' => :script_inherited,
|
289
|
+
|
290
|
+
'Zyyy' => :script_common,
|
291
|
+
'Common' => :script_common,
|
292
|
+
|
293
|
+
'Zzzz' => :script_unknown,
|
294
|
+
'Unknown' => :script_unknown,
|
295
|
+
}
|
296
|
+
|
297
|
+
count = 0
|
298
|
+
tests.each do |property, test|
|
299
|
+
define_method "test_scan_property_#{test}_#{count+=1}" do
|
300
|
+
token = RS.scan("a\\p{#{property}}c")[1]
|
301
|
+
|
302
|
+
assert_equal( :property, token[0] )
|
303
|
+
assert_equal( test, token[1] )
|
304
|
+
end
|
305
|
+
|
306
|
+
define_method "test_scan_nonproperty_#{test}_#{count+=1}" do
|
307
|
+
token = RS.scan("a\\P{#{property}}c")[1]
|
308
|
+
|
309
|
+
assert_equal( :nonproperty, token[0] )
|
310
|
+
assert_equal( test, token[1] )
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ScannerSets < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
'[a]' => [0, :set, :open, '[', 0, 1],
|
7
|
+
'[b]' => [2, :set, :close, ']', 2, 3],
|
8
|
+
'[^n]' => [1, :set, :negate, '^', 1, 2],
|
9
|
+
|
10
|
+
'[c]' => [1, :set, :member, 'c', 1, 2],
|
11
|
+
'[\b]' => [1, :set, :backspace, '\b', 1, 3],
|
12
|
+
|
13
|
+
'[.]' => [1, :set, :member, '.', 1, 2],
|
14
|
+
'[?]' => [1, :set, :member, '?', 1, 2],
|
15
|
+
'[*]' => [1, :set, :member, '*', 1, 2],
|
16
|
+
'[+]' => [1, :set, :member, '+', 1, 2],
|
17
|
+
'[{]' => [1, :set, :member, '{', 1, 2],
|
18
|
+
'[}]' => [1, :set, :member, '}', 1, 2],
|
19
|
+
'[<]' => [1, :set, :member, '<', 1, 2],
|
20
|
+
'[>]' => [1, :set, :member, '>', 1, 2],
|
21
|
+
|
22
|
+
'[\.]' => [1, :set, :escape, '\.', 1, 3],
|
23
|
+
'[\!]' => [1, :set, :escape, '\!', 1, 3],
|
24
|
+
'[\#]' => [1, :set, :escape, '\#', 1, 3],
|
25
|
+
'[\]]' => [1, :set, :escape, '\]', 1, 3],
|
26
|
+
'[\\\]' => [1, :set, :escape, '\\\\', 1, 3],
|
27
|
+
'[a\-c]' => [2, :set, :escape, '\-', 2, 4],
|
28
|
+
|
29
|
+
'[\d]' => [1, :set, :type_digit, '\d', 1, 3],
|
30
|
+
'[\D]' => [1, :set, :type_nondigit, '\D', 1, 3],
|
31
|
+
|
32
|
+
'[\h]' => [1, :set, :type_hex, '\h', 1, 3],
|
33
|
+
'[\H]' => [1, :set, :type_nonhex, '\H', 1, 3],
|
34
|
+
|
35
|
+
'[\s]' => [1, :set, :type_space, '\s', 1, 3],
|
36
|
+
'[\S]' => [1, :set, :type_nonspace, '\S', 1, 3],
|
37
|
+
|
38
|
+
'[\w]' => [1, :set, :type_word, '\w', 1, 3],
|
39
|
+
'[\W]' => [1, :set, :type_nonword, '\W', 1, 3],
|
40
|
+
|
41
|
+
'[a-c]' => [1, :set, :range, 'a-c', 1, 4],
|
42
|
+
'[a-c-]' => [2, :set, :member, '-', 4, 6],
|
43
|
+
'[a-c^]' => [2, :set, :member, '^', 4, 5],
|
44
|
+
'[a-cd-f]' => [2, :set, :range, 'd-f', 4, 7],
|
45
|
+
|
46
|
+
'[a[:digit:]c]' => [2, :set, :class_digit, '[:digit:]', 2, 11],
|
47
|
+
'[[:digit:][:space:]]' => [2, :set, :class_space, '[:space:]', 10, 19],
|
48
|
+
'[[:^digit:]]' => [1, :set, :class_nondigit, '[:^digit:]', 1, 11],
|
49
|
+
|
50
|
+
'[a[.a-b.]c]' => [2, :set, :collation, '[.a-b.]', 2, 9],
|
51
|
+
'[a[=e=]c]' => [2, :set, :equivalent, '[=e=]', 2, 7],
|
52
|
+
|
53
|
+
'[a-d&&g-h]' => [2, :set, :intersection, '&&', 4, 6],
|
54
|
+
|
55
|
+
'[\\x20-\\x28]' => [1, :set, :range_hex, '\x20-\x28', 1, 10],
|
56
|
+
|
57
|
+
'[a\p{digit}c]' => [2, :set, :digit, '\p{digit}', 2, 11],
|
58
|
+
'[a\P{digit}c]' => [2, :set, :digit, '\P{digit}', 2, 11],
|
59
|
+
|
60
|
+
'[a\p{ALPHA}c]' => [2, :set, :alpha, '\p{ALPHA}', 2, 11],
|
61
|
+
'[a\p{P}c]' => [2, :set, :punct_any, '\p{P}', 2, 7],
|
62
|
+
'[a\p{P}\P{Z}c]' => [3, :set, :separator_any, '\P{Z}', 7, 12],
|
63
|
+
|
64
|
+
'[a-w&&[^c-g]z]' => [3, :subset, :open, '[', 6, 7],
|
65
|
+
'[a-w&&[^c-h]z]' => [4, :subset, :negate, '^', 7, 8],
|
66
|
+
'[a-w&&[^c-i]z]' => [5, :subset, :range, 'c-i', 8, 11],
|
67
|
+
'[a-w&&[^c-j]z]' => [6, :subset, :close, ']', 11, 12],
|
68
|
+
}
|
69
|
+
|
70
|
+
count = 0
|
71
|
+
tests.each do |pattern, test|
|
72
|
+
define_method "test_scan_#{test[1]}_#{test[2]}_#{count+=1}" do
|
73
|
+
|
74
|
+
tokens = RS.scan(pattern)
|
75
|
+
assert_equal( test[1,5], tokens[test[0]] )
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|