ruby-boost-regex 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +4 -0
- data/LICENSE +20 -0
- data/README.markdown +105 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/benchmark/benchmark.rb +71 -0
- data/benchmark/fasta.input +2500 -0
- data/ext/ruby-boost-regex/extconf.rb +5 -0
- data/ext/ruby-boost-regex/regexp.cpp +340 -0
- data/lib/ruby-boost-regex.rb +3 -0
- data/lib/ruby-boost-regex/ruby-parts.rb +25 -0
- data/spec/ruby-boost-regex_spec.rb +299 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +22 -0
- metadata +105 -0
@@ -0,0 +1,340 @@
|
|
1
|
+
#include <boost/regex.hpp>
|
2
|
+
#include <iostream>
|
3
|
+
#include <string>
|
4
|
+
#include <exception>
|
5
|
+
#include "ruby.h"
|
6
|
+
#include "re.h"
|
7
|
+
|
8
|
+
static VALUE rb_mBoost;
|
9
|
+
static VALUE rb_cBoostRegexp;
|
10
|
+
|
11
|
+
static VALUE rb_kRegexpMultiline;
|
12
|
+
static VALUE rb_kRegexpIgnorecase;
|
13
|
+
static VALUE rb_kRegexpExtended;
|
14
|
+
|
15
|
+
///////// imported from re.c
|
16
|
+
|
17
|
+
#define RE_TALLOC(n,t) ((t*)alloca((n)*sizeof(t)))
|
18
|
+
#define TMALLOC(n,t) ((t*)xmalloc((n)*sizeof(t)))
|
19
|
+
#define TREALLOC(s,n,t) (s=((t*)xrealloc(s,(n)*sizeof(t))))
|
20
|
+
#define MATCH_BUSY FL_USER2
|
21
|
+
|
22
|
+
static VALUE match_alloc(VALUE klass)
|
23
|
+
{
|
24
|
+
NEWOBJ(match, struct RMatch);
|
25
|
+
OBJSETUP(match, klass, T_MATCH);
|
26
|
+
|
27
|
+
match->str = 0;
|
28
|
+
match->regs = 0;
|
29
|
+
match->regs = ALLOC(struct re_registers);
|
30
|
+
MEMZERO(match->regs, struct re_registers, 1);
|
31
|
+
|
32
|
+
return (VALUE)match;
|
33
|
+
}
|
34
|
+
|
35
|
+
static void
|
36
|
+
init_regs(struct re_registers *regs, unsigned int num_regs)
|
37
|
+
{
|
38
|
+
int i;
|
39
|
+
|
40
|
+
regs->num_regs = num_regs;
|
41
|
+
if (num_regs < RE_NREGS)
|
42
|
+
num_regs = RE_NREGS;
|
43
|
+
|
44
|
+
if (regs->allocated == 0) {
|
45
|
+
regs->beg = TMALLOC(num_regs, int);
|
46
|
+
regs->end = TMALLOC(num_regs, int);
|
47
|
+
regs->allocated = num_regs;
|
48
|
+
}
|
49
|
+
else if (regs->allocated < num_regs) {
|
50
|
+
TREALLOC(regs->beg, num_regs, int);
|
51
|
+
TREALLOC(regs->end, num_regs, int);
|
52
|
+
regs->allocated = num_regs;
|
53
|
+
}
|
54
|
+
for (i=0; i<num_regs; i++) {
|
55
|
+
regs->beg[i] = regs->end[i] = -1;
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
void
|
60
|
+
re_copy_registers(struct re_registers *regs1, struct re_registers *regs2)
|
61
|
+
{
|
62
|
+
int i;
|
63
|
+
|
64
|
+
if (regs1 == regs2) return;
|
65
|
+
if (regs1->allocated == 0) {
|
66
|
+
regs1->beg = TMALLOC(regs2->num_regs, int);
|
67
|
+
regs1->end = TMALLOC(regs2->num_regs, int);
|
68
|
+
regs1->allocated = regs2->num_regs;
|
69
|
+
}
|
70
|
+
else if (regs1->allocated < regs2->num_regs) {
|
71
|
+
TREALLOC(regs1->beg, regs2->num_regs, int);
|
72
|
+
TREALLOC(regs1->end, regs2->num_regs, int);
|
73
|
+
regs1->allocated = regs2->num_regs;
|
74
|
+
}
|
75
|
+
for (i=0; i<regs2->num_regs; i++) {
|
76
|
+
regs1->beg[i] = regs2->beg[i];
|
77
|
+
regs1->end[i] = regs2->end[i];
|
78
|
+
}
|
79
|
+
regs1->num_regs = regs2->num_regs;
|
80
|
+
}
|
81
|
+
|
82
|
+
/////////////////////////////
|
83
|
+
|
84
|
+
// extracts the boost regex using Data_Get_Struct
|
85
|
+
boost::regex *get_br_from_value(VALUE self) {
|
86
|
+
boost::regex *p;
|
87
|
+
Data_Get_Struct(self, boost::regex, p);
|
88
|
+
return p;
|
89
|
+
}
|
90
|
+
|
91
|
+
/**
|
92
|
+
* Frees the boost regex.
|
93
|
+
*/
|
94
|
+
static void br_free(void *p) {
|
95
|
+
delete (boost::regex *)p;
|
96
|
+
}
|
97
|
+
|
98
|
+
/**
|
99
|
+
* Allocates a new Boost::Regexp by allocating heap space via new for boost::regex.
|
100
|
+
* Does not initialize the regexp.
|
101
|
+
*/
|
102
|
+
static VALUE br_alloc(VALUE klass) {
|
103
|
+
boost::regex *reg;
|
104
|
+
VALUE object;
|
105
|
+
|
106
|
+
reg = new boost::regex;
|
107
|
+
object = Data_Wrap_Struct(klass, NULL, br_free, reg);
|
108
|
+
return object;
|
109
|
+
}
|
110
|
+
|
111
|
+
/**
|
112
|
+
* Initializes a regexp with a pattern and options.
|
113
|
+
* The boost regex is already allocated so we just call the constructor here.
|
114
|
+
*/
|
115
|
+
VALUE br_init(int argc, VALUE *argv, VALUE self) {
|
116
|
+
VALUE reg_to_convert, flags;
|
117
|
+
boost::regex *reg;
|
118
|
+
VALUE str;
|
119
|
+
int newflags, oldflagsint;
|
120
|
+
rb_scan_args(argc, argv, "11", ®_to_convert, &flags);
|
121
|
+
|
122
|
+
reg = get_br_from_value(self);
|
123
|
+
if (NIL_P(flags)) {
|
124
|
+
flags = UINT2NUM(boost::regex_constants::normal);
|
125
|
+
}
|
126
|
+
try {
|
127
|
+
if (TYPE(reg_to_convert) == T_REGEXP) {
|
128
|
+
str = rb_funcall(reg_to_convert, rb_intern("source"), 0);
|
129
|
+
// calculate the flags to use
|
130
|
+
newflags = NUM2UINT(flags);
|
131
|
+
VALUE oldflags = rb_funcall(reg_to_convert, rb_intern("options"), 0);
|
132
|
+
int oldflagsint = FIX2INT(oldflags);
|
133
|
+
// convert ruby regexp flags to boost regex flags
|
134
|
+
if (oldflagsint & FIX2INT(rb_kRegexpIgnorecase))
|
135
|
+
newflags |= boost::regex_constants::icase;
|
136
|
+
if (oldflagsint & FIX2INT(rb_kRegexpMultiline))
|
137
|
+
newflags |= boost::regex_constants::mod_s;
|
138
|
+
if (oldflagsint & FIX2INT(rb_kRegexpExtended))
|
139
|
+
newflags |= boost::regex_constants::mod_x;
|
140
|
+
// convert back to fixnum
|
141
|
+
flags = UINT2NUM(newflags);
|
142
|
+
} else {
|
143
|
+
str = rb_convert_type(reg_to_convert, T_STRING, "String", "to_s");
|
144
|
+
}
|
145
|
+
*reg = boost::regex(RSTRING_PTR(str), FIX2UINT(flags));
|
146
|
+
} catch (boost::regex_error& exc) {
|
147
|
+
// C++ exceptions have to be re-raised as ruby
|
148
|
+
rb_raise(rb_eArgError, "Invalid regular expression");
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
152
|
+
static VALUE get_backref_for_modification() {
|
153
|
+
VALUE match;
|
154
|
+
match = rb_backref_get();
|
155
|
+
if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
|
156
|
+
match = match_alloc(rb_cMatch);
|
157
|
+
}
|
158
|
+
else {
|
159
|
+
if (rb_safe_level() >= 3)
|
160
|
+
OBJ_TAINT(match);
|
161
|
+
else
|
162
|
+
FL_UNSET(match, FL_TAINT);
|
163
|
+
}
|
164
|
+
return match;
|
165
|
+
}
|
166
|
+
|
167
|
+
static void
|
168
|
+
fill_regs_from_smatch(std::string::const_iterator first, std::string::const_iterator last, struct re_registers *regs, boost::smatch matches) {
|
169
|
+
init_regs(regs, matches.size());
|
170
|
+
regs->beg[0] = matches[0].first - first;
|
171
|
+
regs->end[0] = matches[0].second - first;
|
172
|
+
|
173
|
+
for (int idx = 1; idx <= matches.size(); idx++) {
|
174
|
+
if (!matches[idx].matched) {
|
175
|
+
regs->beg[idx] = regs->end[idx] = -1;
|
176
|
+
} else {
|
177
|
+
regs->beg[idx] = matches[idx].first - first;
|
178
|
+
regs->end[idx] = matches[idx].second - first;
|
179
|
+
}
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
/**
|
184
|
+
* General matcher method that re-raises exception as a Ruby exception. Gotta use this. sorry.
|
185
|
+
*/
|
186
|
+
static bool
|
187
|
+
br_reg_match_iters(VALUE str, std::string::const_iterator start, std::string::const_iterator stop, boost::smatch& matches, boost::regex reg)
|
188
|
+
{
|
189
|
+
static struct re_registers regs;
|
190
|
+
try {
|
191
|
+
if (boost::regex_search(start, stop, matches, reg)) {
|
192
|
+
VALUE match = get_backref_for_modification();
|
193
|
+
RMATCH(match)->str = rb_str_dup(str);
|
194
|
+
fill_regs_from_smatch(start, stop, ®s, matches);
|
195
|
+
re_copy_registers(RMATCH(match)->regs, ®s);
|
196
|
+
rb_backref_set(match);
|
197
|
+
return true;
|
198
|
+
} else {
|
199
|
+
rb_backref_set(Qnil);
|
200
|
+
return false;
|
201
|
+
}
|
202
|
+
} catch (std::runtime_error& err) {
|
203
|
+
rb_raise(rb_eRegexpError, "Ran out of resources while matching a Boost Regexp.");
|
204
|
+
}
|
205
|
+
}
|
206
|
+
|
207
|
+
static int
|
208
|
+
br_reg_search(VALUE self, VALUE str) {
|
209
|
+
boost::regex reg = *get_br_from_value(self);
|
210
|
+
std::string input = StringValuePtr(str);
|
211
|
+
|
212
|
+
std::string::const_iterator start, end;
|
213
|
+
start = input.begin();
|
214
|
+
end = input.end();
|
215
|
+
|
216
|
+
boost::smatch matches;
|
217
|
+
if (br_reg_match_iters(str, start, end, matches, reg)) {
|
218
|
+
return matches[0].first - start;
|
219
|
+
} else {
|
220
|
+
return -1;
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
static VALUE
|
225
|
+
br_reg_do_match(VALUE self, VALUE str) {
|
226
|
+
boost::regex reg = *get_br_from_value(self);
|
227
|
+
std::string input = StringValuePtr(str);
|
228
|
+
|
229
|
+
std::string::const_iterator start, end;
|
230
|
+
start = input.begin();
|
231
|
+
end = input.end();
|
232
|
+
|
233
|
+
boost::smatch matches;
|
234
|
+
if (br_reg_match_iters(str, start, end, matches, reg)) {
|
235
|
+
return rb_backref_get();
|
236
|
+
} else {
|
237
|
+
return Qnil;
|
238
|
+
}
|
239
|
+
}
|
240
|
+
|
241
|
+
static VALUE br_eql(VALUE self, VALUE other) {
|
242
|
+
if (!rb_obj_is_kind_of(other, rb_cBoostRegexp)) {
|
243
|
+
return Qfalse;
|
244
|
+
}
|
245
|
+
boost::regex a, b;
|
246
|
+
a = *get_br_from_value(self);
|
247
|
+
b = *get_br_from_value(other);
|
248
|
+
return (a == b) ? Qtrue : Qfalse;
|
249
|
+
}
|
250
|
+
|
251
|
+
static VALUE br_match_operator(VALUE self, VALUE str) {
|
252
|
+
int start = br_reg_search(self, str);
|
253
|
+
if (start < 0) {
|
254
|
+
return Qnil;
|
255
|
+
}
|
256
|
+
return INT2FIX(start);
|
257
|
+
}
|
258
|
+
|
259
|
+
static VALUE br_match_eqq_operator(VALUE self, VALUE str) {
|
260
|
+
int start = br_reg_search(self, str);
|
261
|
+
if (start < 0) {
|
262
|
+
return Qfalse;
|
263
|
+
} else {
|
264
|
+
return Qtrue;
|
265
|
+
}
|
266
|
+
}
|
267
|
+
|
268
|
+
static VALUE br_options(VALUE self) {
|
269
|
+
boost::regex reg = *get_br_from_value(self);
|
270
|
+
return UINT2NUM(reg.flags());
|
271
|
+
}
|
272
|
+
|
273
|
+
static VALUE br_source(VALUE self) {
|
274
|
+
boost::regex reg = *get_br_from_value(self);
|
275
|
+
std::string result = reg.str();
|
276
|
+
return rb_str_new(result.c_str(), result.size());
|
277
|
+
}
|
278
|
+
|
279
|
+
static VALUE br_tilde_operator(VALUE self) {
|
280
|
+
VALUE target = rb_gv_get("$_");
|
281
|
+
return br_match_operator(self, target);
|
282
|
+
}
|
283
|
+
|
284
|
+
extern "C" {
|
285
|
+
VALUE Init_BoostRegexHook()
|
286
|
+
{
|
287
|
+
rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError);
|
288
|
+
|
289
|
+
rb_cMatch = rb_const_get(rb_cObject, rb_intern("MatchData"));
|
290
|
+
rb_kRegexpMultiline = rb_const_get(rb_cRegexp, rb_intern("MULTILINE"));
|
291
|
+
rb_kRegexpIgnorecase = rb_const_get(rb_cRegexp, rb_intern("IGNORECASE"));
|
292
|
+
rb_kRegexpExtended = rb_const_get(rb_cRegexp, rb_intern("EXTENDED"));
|
293
|
+
|
294
|
+
rb_mBoost = rb_define_module("Boost");
|
295
|
+
rb_cBoostRegexp = rb_define_class_under(rb_mBoost, "Regexp", rb_cObject);
|
296
|
+
|
297
|
+
rb_define_alloc_func(rb_cBoostRegexp, br_alloc);
|
298
|
+
rb_define_method(rb_cBoostRegexp, "initialize", RUBY_METHOD_FUNC(br_init), -1);
|
299
|
+
rb_define_method(rb_cBoostRegexp, "=~", RUBY_METHOD_FUNC(br_match_operator), 1);
|
300
|
+
rb_define_method(rb_cBoostRegexp, "===", RUBY_METHOD_FUNC(br_match_eqq_operator), 1);
|
301
|
+
rb_define_method(rb_cBoostRegexp, "source", RUBY_METHOD_FUNC(br_source), 0);
|
302
|
+
rb_define_method(rb_cBoostRegexp, "match", RUBY_METHOD_FUNC(br_reg_do_match), 1);
|
303
|
+
rb_define_method(rb_cBoostRegexp, "options", RUBY_METHOD_FUNC(br_options), 0);
|
304
|
+
rb_define_method(rb_cBoostRegexp, "~", RUBY_METHOD_FUNC(br_tilde_operator), 0);
|
305
|
+
rb_define_method(rb_cBoostRegexp, "==", RUBY_METHOD_FUNC(br_eql), 1);
|
306
|
+
rb_define_method(rb_cBoostRegexp, "eql?", RUBY_METHOD_FUNC(br_eql), 1);
|
307
|
+
|
308
|
+
rb_define_const(rb_cBoostRegexp, "NORMAL", UINT2NUM(boost::regex_constants::normal));
|
309
|
+
rb_define_const(rb_cBoostRegexp, "ECMASCRIPT", UINT2NUM(boost::regex_constants::ECMAScript));
|
310
|
+
rb_define_const(rb_cBoostRegexp, "JAVASCRIPT", UINT2NUM(boost::regex_constants::JavaScript));
|
311
|
+
rb_define_const(rb_cBoostRegexp, "JSCRIPT", UINT2NUM(boost::regex_constants::JScript));
|
312
|
+
rb_define_const(rb_cBoostRegexp, "PERL", UINT2NUM(boost::regex_constants::perl));
|
313
|
+
rb_define_const(rb_cBoostRegexp, "BASIC", UINT2NUM(boost::regex_constants::basic));
|
314
|
+
rb_define_const(rb_cBoostRegexp, "SED", UINT2NUM(boost::regex_constants::sed));
|
315
|
+
rb_define_const(rb_cBoostRegexp, "EXTENDED", UINT2NUM(boost::regex_constants::extended));
|
316
|
+
rb_define_const(rb_cBoostRegexp, "AWK", UINT2NUM(boost::regex_constants::awk));
|
317
|
+
rb_define_const(rb_cBoostRegexp, "GREP", UINT2NUM(boost::regex_constants::grep));
|
318
|
+
rb_define_const(rb_cBoostRegexp, "EGREP", UINT2NUM(boost::regex_constants::egrep));
|
319
|
+
rb_define_const(rb_cBoostRegexp, "IGNORECASE", UINT2NUM(boost::regex_constants::icase));
|
320
|
+
rb_define_const(rb_cBoostRegexp, "NO_SUBS", UINT2NUM(boost::regex_constants::nosubs));
|
321
|
+
rb_define_const(rb_cBoostRegexp, "OPTIMIZE", UINT2NUM(boost::regex_constants::optimize));
|
322
|
+
rb_define_const(rb_cBoostRegexp, "COLLATE", UINT2NUM(boost::regex_constants::collate));
|
323
|
+
|
324
|
+
rb_define_const(rb_cBoostRegexp, "NO_EXCEPTIONS", UINT2NUM(boost::regex_constants::no_except));
|
325
|
+
rb_define_const(rb_cBoostRegexp, "SAVE_SUBEXPRESSION_LOCS", UINT2NUM(boost::regex_constants::save_subexpression_location));
|
326
|
+
|
327
|
+
rb_define_const(rb_cBoostRegexp, "NO_MOD_M", UINT2NUM(boost::regex_constants::no_mod_m));
|
328
|
+
rb_define_const(rb_cBoostRegexp, "DOTS_NEVER_NEWLINES", UINT2NUM(boost::regex_constants::no_mod_s));
|
329
|
+
rb_define_const(rb_cBoostRegexp, "DOTS_MATCH_NEWLINES", UINT2NUM(boost::regex_constants::mod_s));
|
330
|
+
rb_define_const(rb_cBoostRegexp, "IGNORE_WHITESPACE", UINT2NUM(boost::regex_constants::mod_x));
|
331
|
+
rb_define_const(rb_cBoostRegexp, "NO_EMPTY_EXPRESSIONS", UINT2NUM(boost::regex_constants::no_empty_expressions));
|
332
|
+
|
333
|
+
rb_define_const(rb_cBoostRegexp, "NO_CHAR_CLASSES", UINT2NUM(boost::regex_constants::no_char_classes));
|
334
|
+
rb_define_const(rb_cBoostRegexp, "NO_ESCAPE_IN_LISTS", UINT2NUM(boost::regex_constants::no_escape_in_lists));
|
335
|
+
rb_define_const(rb_cBoostRegexp, "NO_INTERVALS", UINT2NUM(boost::regex_constants::no_intervals));
|
336
|
+
rb_define_const(rb_cBoostRegexp, "BK_PLUS_QM", UINT2NUM(boost::regex_constants::bk_plus_qm));
|
337
|
+
rb_define_const(rb_cBoostRegexp, "BK_VBAR", UINT2NUM(boost::regex_constants::bk_vbar));
|
338
|
+
rb_define_const(rb_cBoostRegexp, "LITERAL", UINT2NUM(boost::regex_constants::literal));
|
339
|
+
}
|
340
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Boost
|
2
|
+
class Regexp
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def enable_monkey_patch!
|
6
|
+
::Regexp.__send__(:define_method, :boost!) do |*args|
|
7
|
+
flags = args.any? ? args.first : Boost::Regexp::NORMAL
|
8
|
+
Boost::Regexp.new(self.source, flags)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def casefold?
|
14
|
+
(self.options & IGNORECASE) > 0
|
15
|
+
end
|
16
|
+
|
17
|
+
def inspect
|
18
|
+
base = "/#{source}/"
|
19
|
+
base << "m" if options & Boost::Regexp::DOTS_MATCH_NEWLINES > 0
|
20
|
+
base << "i" if options & Boost::Regexp::IGNORECASE > 0
|
21
|
+
base << "x" if options & Boost::Regexp::IGNORE_WHITESPACE > 0
|
22
|
+
base
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,299 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe Boost::Regexp do
|
4
|
+
it "creates wrapped regexes" do
|
5
|
+
reg = Boost::Regexp.new("\\d{3}")
|
6
|
+
reg.should be_a(Boost::Regexp)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "can be initialized with a string" do
|
10
|
+
reg = Boost::Regexp.new("abc")
|
11
|
+
reg.should match("jklabcdef")
|
12
|
+
end
|
13
|
+
|
14
|
+
it "can be initialized with a regexp" do
|
15
|
+
reg = Boost::Regexp.new(/abc/)
|
16
|
+
reg.should match("jklabcdef")
|
17
|
+
end
|
18
|
+
|
19
|
+
it "can be initialized with a regexp with the ignorecase flag" do
|
20
|
+
reg = Boost::Regexp.new(/abc/i)
|
21
|
+
reg.options.should have_flag(Boost::Regexp::IGNORECASE)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "can be initialized with a regexp with the ignore whitespace flag" do
|
25
|
+
reg = Boost::Regexp.new(/abc/x)
|
26
|
+
reg.options.should have_flag(Boost::Regexp::IGNORE_WHITESPACE)
|
27
|
+
end
|
28
|
+
|
29
|
+
it "can be initialized with a regexp with the dots-match-newlines flag" do
|
30
|
+
reg = Boost::Regexp.new(/abc/m)
|
31
|
+
reg.options.should have_flag(Boost::Regexp::DOTS_MATCH_NEWLINES)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "will not initialize without unrequested flags" do
|
35
|
+
reg = Boost::Regexp.new(/abc/x)
|
36
|
+
reg.options.should_not have_flag(Boost::Regexp::DOTS_MATCH_NEWLINES)
|
37
|
+
reg.options.should_not have_flag(Boost::Regexp::IGNORECASE)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "has a source method" do
|
41
|
+
Boost::Regexp.new("abc").should respond_to(:source)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "has an options method" do
|
45
|
+
Boost::Regexp.new("abc").should respond_to(:options)
|
46
|
+
end
|
47
|
+
|
48
|
+
it "has a match method" do
|
49
|
+
Boost::Regexp.new("abc").should respond_to(:match)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "has a eql? method" do
|
53
|
+
Boost::Regexp.new("abc").should respond_to(:eql?)
|
54
|
+
end
|
55
|
+
|
56
|
+
it "has a =~ method" do
|
57
|
+
Boost::Regexp.new("abc").should respond_to(:=~)
|
58
|
+
end
|
59
|
+
|
60
|
+
it "has a ~ method" do
|
61
|
+
Boost::Regexp.new("abc").should respond_to(:~)
|
62
|
+
end
|
63
|
+
|
64
|
+
it "has a === method" do
|
65
|
+
Boost::Regexp.new("abc").should respond_to(:===)
|
66
|
+
end
|
67
|
+
|
68
|
+
it "has a == method" do
|
69
|
+
Boost::Regexp.new("abc").should respond_to(:==)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe Boost::Regexp, "#inspect" do
|
74
|
+
it "extracts the source for normal regexes" do
|
75
|
+
Boost::Regexp.new("\\d{3}").inspect.should == "/\\d{3}/"
|
76
|
+
end
|
77
|
+
|
78
|
+
it "shows the ignore-case flag when enabled" do
|
79
|
+
Boost::Regexp.new("\\d{3}", Boost::Regexp::IGNORECASE).inspect.should == "/\\d{3}/i"
|
80
|
+
end
|
81
|
+
|
82
|
+
it "shows the multiline flag when enabled" do
|
83
|
+
Boost::Regexp.new("\\d{3}", Boost::Regexp::DOTS_MATCH_NEWLINES).inspect.should == "/\\d{3}/m"
|
84
|
+
end
|
85
|
+
|
86
|
+
it "shows the ignore-whitespace flag when enabled" do
|
87
|
+
Boost::Regexp.new("\\d{3}", Boost::Regexp::IGNORE_WHITESPACE).inspect.should == "/\\d{3}/x"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
describe Boost::Regexp, "#source" do
|
92
|
+
it "returns the source string of the regexp" do
|
93
|
+
Boost::Regexp.new('(\d{3})-(\d{2})-(\d{4})').source.should == '(\d{3})-(\d{2})-(\d{4})'
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
describe Boost::Regexp, "#options" do
|
98
|
+
it "returns the flags used by the regexp" do
|
99
|
+
reg = Boost::Regexp.new("ab cd", Boost::Regexp::IGNORE_WHITESPACE)
|
100
|
+
reg.options.should have_flag(Boost::Regexp::IGNORE_WHITESPACE)
|
101
|
+
reg.options.should_not have_flag(Boost::Regexp::LITERAL)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
describe Boost::Regexp, "#==" do
|
106
|
+
it "correctly identifies two equal regexps" do
|
107
|
+
a = Boost::Regexp.new("abcd[efg]")
|
108
|
+
b = Boost::Regexp.new("abcd[efg]")
|
109
|
+
a.should == b
|
110
|
+
end
|
111
|
+
|
112
|
+
it "correctly identifies two different regexps" do
|
113
|
+
a = Boost::Regexp.new("abcd[efg]")
|
114
|
+
b = Boost::Regexp.new("abcd [efg]")
|
115
|
+
a.should_not == b
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
describe Boost::Regexp, "#eql?" do
|
120
|
+
it "correctly identifies two equal regexps using eql?" do
|
121
|
+
a = Boost::Regexp.new("abcd[efg]")
|
122
|
+
b = Boost::Regexp.new("abcd[efg]")
|
123
|
+
a.should eql(b)
|
124
|
+
end
|
125
|
+
|
126
|
+
it "correctly identifies two different regexps" do
|
127
|
+
a = Boost::Regexp.new("abcd[efg]")
|
128
|
+
b = Boost::Regexp.new("abcd [efg]")
|
129
|
+
a.should_not eql(b)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
describe Boost::Regexp, "#=~" do
|
134
|
+
it "returns the position of the match" do
|
135
|
+
result = Boost::Regexp.new("abcd") =~ "zxabcdefg"
|
136
|
+
result.should == 2
|
137
|
+
end
|
138
|
+
|
139
|
+
it "returns nil on no match" do
|
140
|
+
result = Boost::Regexp.new("abcd") =~ "aj3ioqh"
|
141
|
+
result.should be_nil
|
142
|
+
end
|
143
|
+
|
144
|
+
it "sets the special match variables on a match" do
|
145
|
+
Boost::Regexp.new("abcd") =~ "xyzabcdef"
|
146
|
+
$`.should == "xyz"
|
147
|
+
end
|
148
|
+
|
149
|
+
it "sets the special match variables to nil when a match fails" do
|
150
|
+
Boost::Regexp.new("abcd") =~ "uqioer"
|
151
|
+
$'.should be_nil
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
describe Boost::Regexp, "#===" do
|
156
|
+
it "returns true when a match succeeds" do
|
157
|
+
result = Boost::Regexp.new("abcd") === "uioabcdefg"
|
158
|
+
result.should be_true
|
159
|
+
end
|
160
|
+
|
161
|
+
it "returns false when a match fails" do
|
162
|
+
result = Boost::Regexp.new("abcd") === "uiot"
|
163
|
+
result.should be_false
|
164
|
+
end
|
165
|
+
|
166
|
+
it "sets the special match variables when a match succeeds" do
|
167
|
+
Boost::Regexp.new("abcd") === "xyzabcdef"
|
168
|
+
$`.should == "xyz"
|
169
|
+
end
|
170
|
+
|
171
|
+
it "sets the special match variables to nil when a match fails" do
|
172
|
+
Boost::Regexp.new("abcd") === "uqioer"
|
173
|
+
$'.should be_nil
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
describe Boost::Regexp, "#~" do
|
178
|
+
it "matches the regex against $_" do
|
179
|
+
$_ = "123abcdefg"
|
180
|
+
result = ~Boost::Regexp.new("abcd")
|
181
|
+
result.should == 3
|
182
|
+
end
|
183
|
+
|
184
|
+
it "negatively matches against $_" do
|
185
|
+
$_ = "12345"
|
186
|
+
result = ~Boost::Regexp.new("efgh")
|
187
|
+
result.should be_nil
|
188
|
+
end
|
189
|
+
|
190
|
+
it "sets the special match variables when a match succeeds" do
|
191
|
+
$_ = "xyzabcdef"
|
192
|
+
~Boost::Regexp.new("abcd")
|
193
|
+
$`.should == "xyz"
|
194
|
+
end
|
195
|
+
|
196
|
+
it "sets the special match variables to nil when a match fails" do
|
197
|
+
$_ = "uqioer"
|
198
|
+
~Boost::Regexp.new("abcd")
|
199
|
+
$'.should be_nil
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
describe Boost::Regexp, "#match" do
|
204
|
+
it "returns a MatchData object" do
|
205
|
+
Boost::Regexp.new('\d{3}').match("abc1234def").should be_a(MatchData)
|
206
|
+
end
|
207
|
+
|
208
|
+
it "returns $~" do
|
209
|
+
result = Boost::Regexp.new('\d{3}').match("abc1234def")
|
210
|
+
result.should == $~
|
211
|
+
end
|
212
|
+
|
213
|
+
it "provides captured subexpressions" do
|
214
|
+
result = Boost::Regexp.new('(\d{3})-(\d{3})-(\d{4})').match("abc 555-123-4567 def")
|
215
|
+
result[0].should == "555-123-4567"
|
216
|
+
result[1].should == "555"
|
217
|
+
result[2].should == "123"
|
218
|
+
result[3].should == "4567"
|
219
|
+
end
|
220
|
+
|
221
|
+
it "sets the value of Regexp#last_match" do
|
222
|
+
result = Boost::Regexp.new('\d{3}').match("abc123def")
|
223
|
+
result.should == Regexp.last_match
|
224
|
+
end
|
225
|
+
|
226
|
+
it "sets the special match variables when a match succeeds" do
|
227
|
+
Boost::Regexp.new("abcd").match "xyzabcdef"
|
228
|
+
$`.should == "xyz"
|
229
|
+
end
|
230
|
+
|
231
|
+
it "sets the special match variables to nil when a match fails" do
|
232
|
+
Boost::Regexp.new("abcd").match "uqioer"
|
233
|
+
$'.should be_nil
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
describe Boost::Regexp, "flags" do
|
238
|
+
it "matches with a case-insensitivity flag" do
|
239
|
+
Boost::Regexp.new('abc', Boost::Regexp::IGNORECASE).should match "DEFABCJKL"
|
240
|
+
end
|
241
|
+
|
242
|
+
it "responds correctly to casefold?" do
|
243
|
+
Boost::Regexp.new('abc', Boost::Regexp::IGNORECASE).casefold?.should be_true
|
244
|
+
Boost::Regexp.new('abc').casefold?.should be_false
|
245
|
+
end
|
246
|
+
|
247
|
+
it "ignores subexpressions when NO_SUBS is on" do
|
248
|
+
result = Boost::Regexp.new('abc(def)', Boost::Regexp::NO_SUBS).match("abcdef")
|
249
|
+
result[1].should be_nil
|
250
|
+
end
|
251
|
+
|
252
|
+
it "allows subexpressions when NO_SUBS is off" do
|
253
|
+
result = Boost::Regexp.new('abc(def)').match("abcdef")
|
254
|
+
result[1].should == "def"
|
255
|
+
end
|
256
|
+
|
257
|
+
it "raises an exception when given an invalid regexp and NO_EXCEPTIONS is off" do
|
258
|
+
lambda { Boost::Regexp.new("[[:alnum]]")}.should raise_exception(ArgumentError)
|
259
|
+
end
|
260
|
+
|
261
|
+
it "does not raise an exception when given an invalid regexp and NO_EXCEPTIONS is on" do
|
262
|
+
lambda { Boost::Regexp.new("[[:alnum]]", Boost::Regexp::NO_EXCEPTIONS)}.should_not raise_exception(ArgumentError)
|
263
|
+
end
|
264
|
+
|
265
|
+
it "ignores whitespace when IGNORE_WHITESPACE is set" do
|
266
|
+
Boost::Regexp.new("ab cd", Boost::Regexp::IGNORE_WHITESPACE).should match("abcd")
|
267
|
+
end
|
268
|
+
|
269
|
+
it "doesn't ignore whitespace when IGNORE_WHITESPACE is off" do
|
270
|
+
Boost::Regexp.new("ab cd").should_not match "abcd"
|
271
|
+
end
|
272
|
+
|
273
|
+
it "allows dots to match newlines when DOTS_MATCH_NEWLINES is on" do
|
274
|
+
Boost::Regexp.new("abc.def", Boost::Regexp::DOTS_MATCH_NEWLINES).should match "abc\ndef"
|
275
|
+
end
|
276
|
+
|
277
|
+
it "doesn't allow dots to match newlines when DOTS_NEVER_NEWLINES is on" do
|
278
|
+
Boost::Regexp.new("abc.def", Boost::Regexp::DOTS_NEVER_NEWLINES).should_not match "abc\ndef"
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
describe Boost::Regexp, "#enable_monkey_patch!" do
|
283
|
+
it "adds a boost! method to normal Regexps" do
|
284
|
+
Boost::Regexp.enable_monkey_patch!
|
285
|
+
/hello/.should respond_to(:boost!)
|
286
|
+
end
|
287
|
+
|
288
|
+
it "adds a boost! method that converts Regexps to Boost::Regexps" do
|
289
|
+
Boost::Regexp.enable_monkey_patch!
|
290
|
+
reg = /hello/.boost!
|
291
|
+
reg.should be_a(Boost::Regexp)
|
292
|
+
end
|
293
|
+
|
294
|
+
it "allows flags to be passed into the added boost! method" do
|
295
|
+
Boost::Regexp.enable_monkey_patch!
|
296
|
+
reg = /hello/.boost!(Boost::Regexp::IGNORECASE)
|
297
|
+
reg.casefold?.should be_true
|
298
|
+
end
|
299
|
+
end
|