ruby-boost-regex 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +4 -0
- data/LICENSE +20 -0
- data/README.markdown +105 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/benchmark/benchmark.rb +71 -0
- data/benchmark/fasta.input +2500 -0
- data/ext/ruby-boost-regex/extconf.rb +5 -0
- data/ext/ruby-boost-regex/regexp.cpp +340 -0
- data/lib/ruby-boost-regex.rb +3 -0
- data/lib/ruby-boost-regex/ruby-parts.rb +25 -0
- data/spec/ruby-boost-regex_spec.rb +299 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +22 -0
- metadata +105 -0
@@ -0,0 +1,340 @@
|
|
1
|
+
#include <boost/regex.hpp>
|
2
|
+
#include <iostream>
|
3
|
+
#include <string>
|
4
|
+
#include <exception>
|
5
|
+
#include "ruby.h"
|
6
|
+
#include "re.h"
|
7
|
+
|
8
|
+
static VALUE rb_mBoost;
|
9
|
+
static VALUE rb_cBoostRegexp;
|
10
|
+
|
11
|
+
static VALUE rb_kRegexpMultiline;
|
12
|
+
static VALUE rb_kRegexpIgnorecase;
|
13
|
+
static VALUE rb_kRegexpExtended;
|
14
|
+
|
15
|
+
///////// imported from re.c
|
16
|
+
|
17
|
+
#define RE_TALLOC(n,t) ((t*)alloca((n)*sizeof(t)))
|
18
|
+
#define TMALLOC(n,t) ((t*)xmalloc((n)*sizeof(t)))
|
19
|
+
#define TREALLOC(s,n,t) (s=((t*)xrealloc(s,(n)*sizeof(t))))
|
20
|
+
#define MATCH_BUSY FL_USER2
|
21
|
+
|
22
|
+
static VALUE match_alloc(VALUE klass)
|
23
|
+
{
|
24
|
+
NEWOBJ(match, struct RMatch);
|
25
|
+
OBJSETUP(match, klass, T_MATCH);
|
26
|
+
|
27
|
+
match->str = 0;
|
28
|
+
match->regs = 0;
|
29
|
+
match->regs = ALLOC(struct re_registers);
|
30
|
+
MEMZERO(match->regs, struct re_registers, 1);
|
31
|
+
|
32
|
+
return (VALUE)match;
|
33
|
+
}
|
34
|
+
|
35
|
+
static void
|
36
|
+
init_regs(struct re_registers *regs, unsigned int num_regs)
|
37
|
+
{
|
38
|
+
int i;
|
39
|
+
|
40
|
+
regs->num_regs = num_regs;
|
41
|
+
if (num_regs < RE_NREGS)
|
42
|
+
num_regs = RE_NREGS;
|
43
|
+
|
44
|
+
if (regs->allocated == 0) {
|
45
|
+
regs->beg = TMALLOC(num_regs, int);
|
46
|
+
regs->end = TMALLOC(num_regs, int);
|
47
|
+
regs->allocated = num_regs;
|
48
|
+
}
|
49
|
+
else if (regs->allocated < num_regs) {
|
50
|
+
TREALLOC(regs->beg, num_regs, int);
|
51
|
+
TREALLOC(regs->end, num_regs, int);
|
52
|
+
regs->allocated = num_regs;
|
53
|
+
}
|
54
|
+
for (i=0; i<num_regs; i++) {
|
55
|
+
regs->beg[i] = regs->end[i] = -1;
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
void
|
60
|
+
re_copy_registers(struct re_registers *regs1, struct re_registers *regs2)
|
61
|
+
{
|
62
|
+
int i;
|
63
|
+
|
64
|
+
if (regs1 == regs2) return;
|
65
|
+
if (regs1->allocated == 0) {
|
66
|
+
regs1->beg = TMALLOC(regs2->num_regs, int);
|
67
|
+
regs1->end = TMALLOC(regs2->num_regs, int);
|
68
|
+
regs1->allocated = regs2->num_regs;
|
69
|
+
}
|
70
|
+
else if (regs1->allocated < regs2->num_regs) {
|
71
|
+
TREALLOC(regs1->beg, regs2->num_regs, int);
|
72
|
+
TREALLOC(regs1->end, regs2->num_regs, int);
|
73
|
+
regs1->allocated = regs2->num_regs;
|
74
|
+
}
|
75
|
+
for (i=0; i<regs2->num_regs; i++) {
|
76
|
+
regs1->beg[i] = regs2->beg[i];
|
77
|
+
regs1->end[i] = regs2->end[i];
|
78
|
+
}
|
79
|
+
regs1->num_regs = regs2->num_regs;
|
80
|
+
}
|
81
|
+
|
82
|
+
/////////////////////////////
|
83
|
+
|
84
|
+
// extracts the boost regex using Data_Get_Struct
|
85
|
+
boost::regex *get_br_from_value(VALUE self) {
|
86
|
+
boost::regex *p;
|
87
|
+
Data_Get_Struct(self, boost::regex, p);
|
88
|
+
return p;
|
89
|
+
}
|
90
|
+
|
91
|
+
/**
|
92
|
+
* Frees the boost regex.
|
93
|
+
*/
|
94
|
+
static void br_free(void *p) {
|
95
|
+
delete (boost::regex *)p;
|
96
|
+
}
|
97
|
+
|
98
|
+
/**
|
99
|
+
* Allocates a new Boost::Regexp by allocating heap space via new for boost::regex.
|
100
|
+
* Does not initialize the regexp.
|
101
|
+
*/
|
102
|
+
static VALUE br_alloc(VALUE klass) {
|
103
|
+
boost::regex *reg;
|
104
|
+
VALUE object;
|
105
|
+
|
106
|
+
reg = new boost::regex;
|
107
|
+
object = Data_Wrap_Struct(klass, NULL, br_free, reg);
|
108
|
+
return object;
|
109
|
+
}
|
110
|
+
|
111
|
+
/**
|
112
|
+
* Initializes a regexp with a pattern and options.
|
113
|
+
* The boost regex is already allocated so we just call the constructor here.
|
114
|
+
*/
|
115
|
+
VALUE br_init(int argc, VALUE *argv, VALUE self) {
|
116
|
+
VALUE reg_to_convert, flags;
|
117
|
+
boost::regex *reg;
|
118
|
+
VALUE str;
|
119
|
+
int newflags, oldflagsint;
|
120
|
+
rb_scan_args(argc, argv, "11", ®_to_convert, &flags);
|
121
|
+
|
122
|
+
reg = get_br_from_value(self);
|
123
|
+
if (NIL_P(flags)) {
|
124
|
+
flags = UINT2NUM(boost::regex_constants::normal);
|
125
|
+
}
|
126
|
+
try {
|
127
|
+
if (TYPE(reg_to_convert) == T_REGEXP) {
|
128
|
+
str = rb_funcall(reg_to_convert, rb_intern("source"), 0);
|
129
|
+
// calculate the flags to use
|
130
|
+
newflags = NUM2UINT(flags);
|
131
|
+
VALUE oldflags = rb_funcall(reg_to_convert, rb_intern("options"), 0);
|
132
|
+
int oldflagsint = FIX2INT(oldflags);
|
133
|
+
// convert ruby regexp flags to boost regex flags
|
134
|
+
if (oldflagsint & FIX2INT(rb_kRegexpIgnorecase))
|
135
|
+
newflags |= boost::regex_constants::icase;
|
136
|
+
if (oldflagsint & FIX2INT(rb_kRegexpMultiline))
|
137
|
+
newflags |= boost::regex_constants::mod_s;
|
138
|
+
if (oldflagsint & FIX2INT(rb_kRegexpExtended))
|
139
|
+
newflags |= boost::regex_constants::mod_x;
|
140
|
+
// convert back to fixnum
|
141
|
+
flags = UINT2NUM(newflags);
|
142
|
+
} else {
|
143
|
+
str = rb_convert_type(reg_to_convert, T_STRING, "String", "to_s");
|
144
|
+
}
|
145
|
+
*reg = boost::regex(RSTRING_PTR(str), FIX2UINT(flags));
|
146
|
+
} catch (boost::regex_error& exc) {
|
147
|
+
// C++ exceptions have to be re-raised as ruby
|
148
|
+
rb_raise(rb_eArgError, "Invalid regular expression");
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
152
|
+
static VALUE get_backref_for_modification() {
|
153
|
+
VALUE match;
|
154
|
+
match = rb_backref_get();
|
155
|
+
if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
|
156
|
+
match = match_alloc(rb_cMatch);
|
157
|
+
}
|
158
|
+
else {
|
159
|
+
if (rb_safe_level() >= 3)
|
160
|
+
OBJ_TAINT(match);
|
161
|
+
else
|
162
|
+
FL_UNSET(match, FL_TAINT);
|
163
|
+
}
|
164
|
+
return match;
|
165
|
+
}
|
166
|
+
|
167
|
+
static void
|
168
|
+
fill_regs_from_smatch(std::string::const_iterator first, std::string::const_iterator last, struct re_registers *regs, boost::smatch matches) {
|
169
|
+
init_regs(regs, matches.size());
|
170
|
+
regs->beg[0] = matches[0].first - first;
|
171
|
+
regs->end[0] = matches[0].second - first;
|
172
|
+
|
173
|
+
for (int idx = 1; idx <= matches.size(); idx++) {
|
174
|
+
if (!matches[idx].matched) {
|
175
|
+
regs->beg[idx] = regs->end[idx] = -1;
|
176
|
+
} else {
|
177
|
+
regs->beg[idx] = matches[idx].first - first;
|
178
|
+
regs->end[idx] = matches[idx].second - first;
|
179
|
+
}
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
/**
|
184
|
+
* General matcher method that re-raises exception as a Ruby exception. Gotta use this. sorry.
|
185
|
+
*/
|
186
|
+
static bool
|
187
|
+
br_reg_match_iters(VALUE str, std::string::const_iterator start, std::string::const_iterator stop, boost::smatch& matches, boost::regex reg)
|
188
|
+
{
|
189
|
+
static struct re_registers regs;
|
190
|
+
try {
|
191
|
+
if (boost::regex_search(start, stop, matches, reg)) {
|
192
|
+
VALUE match = get_backref_for_modification();
|
193
|
+
RMATCH(match)->str = rb_str_dup(str);
|
194
|
+
fill_regs_from_smatch(start, stop, ®s, matches);
|
195
|
+
re_copy_registers(RMATCH(match)->regs, ®s);
|
196
|
+
rb_backref_set(match);
|
197
|
+
return true;
|
198
|
+
} else {
|
199
|
+
rb_backref_set(Qnil);
|
200
|
+
return false;
|
201
|
+
}
|
202
|
+
} catch (std::runtime_error& err) {
|
203
|
+
rb_raise(rb_eRegexpError, "Ran out of resources while matching a Boost Regexp.");
|
204
|
+
}
|
205
|
+
}
|
206
|
+
|
207
|
+
static int
|
208
|
+
br_reg_search(VALUE self, VALUE str) {
|
209
|
+
boost::regex reg = *get_br_from_value(self);
|
210
|
+
std::string input = StringValuePtr(str);
|
211
|
+
|
212
|
+
std::string::const_iterator start, end;
|
213
|
+
start = input.begin();
|
214
|
+
end = input.end();
|
215
|
+
|
216
|
+
boost::smatch matches;
|
217
|
+
if (br_reg_match_iters(str, start, end, matches, reg)) {
|
218
|
+
return matches[0].first - start;
|
219
|
+
} else {
|
220
|
+
return -1;
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
static VALUE
|
225
|
+
br_reg_do_match(VALUE self, VALUE str) {
|
226
|
+
boost::regex reg = *get_br_from_value(self);
|
227
|
+
std::string input = StringValuePtr(str);
|
228
|
+
|
229
|
+
std::string::const_iterator start, end;
|
230
|
+
start = input.begin();
|
231
|
+
end = input.end();
|
232
|
+
|
233
|
+
boost::smatch matches;
|
234
|
+
if (br_reg_match_iters(str, start, end, matches, reg)) {
|
235
|
+
return rb_backref_get();
|
236
|
+
} else {
|
237
|
+
return Qnil;
|
238
|
+
}
|
239
|
+
}
|
240
|
+
|
241
|
+
static VALUE br_eql(VALUE self, VALUE other) {
|
242
|
+
if (!rb_obj_is_kind_of(other, rb_cBoostRegexp)) {
|
243
|
+
return Qfalse;
|
244
|
+
}
|
245
|
+
boost::regex a, b;
|
246
|
+
a = *get_br_from_value(self);
|
247
|
+
b = *get_br_from_value(other);
|
248
|
+
return (a == b) ? Qtrue : Qfalse;
|
249
|
+
}
|
250
|
+
|
251
|
+
static VALUE br_match_operator(VALUE self, VALUE str) {
|
252
|
+
int start = br_reg_search(self, str);
|
253
|
+
if (start < 0) {
|
254
|
+
return Qnil;
|
255
|
+
}
|
256
|
+
return INT2FIX(start);
|
257
|
+
}
|
258
|
+
|
259
|
+
static VALUE br_match_eqq_operator(VALUE self, VALUE str) {
|
260
|
+
int start = br_reg_search(self, str);
|
261
|
+
if (start < 0) {
|
262
|
+
return Qfalse;
|
263
|
+
} else {
|
264
|
+
return Qtrue;
|
265
|
+
}
|
266
|
+
}
|
267
|
+
|
268
|
+
static VALUE br_options(VALUE self) {
|
269
|
+
boost::regex reg = *get_br_from_value(self);
|
270
|
+
return UINT2NUM(reg.flags());
|
271
|
+
}
|
272
|
+
|
273
|
+
static VALUE br_source(VALUE self) {
|
274
|
+
boost::regex reg = *get_br_from_value(self);
|
275
|
+
std::string result = reg.str();
|
276
|
+
return rb_str_new(result.c_str(), result.size());
|
277
|
+
}
|
278
|
+
|
279
|
+
static VALUE br_tilde_operator(VALUE self) {
|
280
|
+
VALUE target = rb_gv_get("$_");
|
281
|
+
return br_match_operator(self, target);
|
282
|
+
}
|
283
|
+
|
284
|
+
extern "C" {
|
285
|
+
VALUE Init_BoostRegexHook()
|
286
|
+
{
|
287
|
+
rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError);
|
288
|
+
|
289
|
+
rb_cMatch = rb_const_get(rb_cObject, rb_intern("MatchData"));
|
290
|
+
rb_kRegexpMultiline = rb_const_get(rb_cRegexp, rb_intern("MULTILINE"));
|
291
|
+
rb_kRegexpIgnorecase = rb_const_get(rb_cRegexp, rb_intern("IGNORECASE"));
|
292
|
+
rb_kRegexpExtended = rb_const_get(rb_cRegexp, rb_intern("EXTENDED"));
|
293
|
+
|
294
|
+
rb_mBoost = rb_define_module("Boost");
|
295
|
+
rb_cBoostRegexp = rb_define_class_under(rb_mBoost, "Regexp", rb_cObject);
|
296
|
+
|
297
|
+
rb_define_alloc_func(rb_cBoostRegexp, br_alloc);
|
298
|
+
rb_define_method(rb_cBoostRegexp, "initialize", RUBY_METHOD_FUNC(br_init), -1);
|
299
|
+
rb_define_method(rb_cBoostRegexp, "=~", RUBY_METHOD_FUNC(br_match_operator), 1);
|
300
|
+
rb_define_method(rb_cBoostRegexp, "===", RUBY_METHOD_FUNC(br_match_eqq_operator), 1);
|
301
|
+
rb_define_method(rb_cBoostRegexp, "source", RUBY_METHOD_FUNC(br_source), 0);
|
302
|
+
rb_define_method(rb_cBoostRegexp, "match", RUBY_METHOD_FUNC(br_reg_do_match), 1);
|
303
|
+
rb_define_method(rb_cBoostRegexp, "options", RUBY_METHOD_FUNC(br_options), 0);
|
304
|
+
rb_define_method(rb_cBoostRegexp, "~", RUBY_METHOD_FUNC(br_tilde_operator), 0);
|
305
|
+
rb_define_method(rb_cBoostRegexp, "==", RUBY_METHOD_FUNC(br_eql), 1);
|
306
|
+
rb_define_method(rb_cBoostRegexp, "eql?", RUBY_METHOD_FUNC(br_eql), 1);
|
307
|
+
|
308
|
+
rb_define_const(rb_cBoostRegexp, "NORMAL", UINT2NUM(boost::regex_constants::normal));
|
309
|
+
rb_define_const(rb_cBoostRegexp, "ECMASCRIPT", UINT2NUM(boost::regex_constants::ECMAScript));
|
310
|
+
rb_define_const(rb_cBoostRegexp, "JAVASCRIPT", UINT2NUM(boost::regex_constants::JavaScript));
|
311
|
+
rb_define_const(rb_cBoostRegexp, "JSCRIPT", UINT2NUM(boost::regex_constants::JScript));
|
312
|
+
rb_define_const(rb_cBoostRegexp, "PERL", UINT2NUM(boost::regex_constants::perl));
|
313
|
+
rb_define_const(rb_cBoostRegexp, "BASIC", UINT2NUM(boost::regex_constants::basic));
|
314
|
+
rb_define_const(rb_cBoostRegexp, "SED", UINT2NUM(boost::regex_constants::sed));
|
315
|
+
rb_define_const(rb_cBoostRegexp, "EXTENDED", UINT2NUM(boost::regex_constants::extended));
|
316
|
+
rb_define_const(rb_cBoostRegexp, "AWK", UINT2NUM(boost::regex_constants::awk));
|
317
|
+
rb_define_const(rb_cBoostRegexp, "GREP", UINT2NUM(boost::regex_constants::grep));
|
318
|
+
rb_define_const(rb_cBoostRegexp, "EGREP", UINT2NUM(boost::regex_constants::egrep));
|
319
|
+
rb_define_const(rb_cBoostRegexp, "IGNORECASE", UINT2NUM(boost::regex_constants::icase));
|
320
|
+
rb_define_const(rb_cBoostRegexp, "NO_SUBS", UINT2NUM(boost::regex_constants::nosubs));
|
321
|
+
rb_define_const(rb_cBoostRegexp, "OPTIMIZE", UINT2NUM(boost::regex_constants::optimize));
|
322
|
+
rb_define_const(rb_cBoostRegexp, "COLLATE", UINT2NUM(boost::regex_constants::collate));
|
323
|
+
|
324
|
+
rb_define_const(rb_cBoostRegexp, "NO_EXCEPTIONS", UINT2NUM(boost::regex_constants::no_except));
|
325
|
+
rb_define_const(rb_cBoostRegexp, "SAVE_SUBEXPRESSION_LOCS", UINT2NUM(boost::regex_constants::save_subexpression_location));
|
326
|
+
|
327
|
+
rb_define_const(rb_cBoostRegexp, "NO_MOD_M", UINT2NUM(boost::regex_constants::no_mod_m));
|
328
|
+
rb_define_const(rb_cBoostRegexp, "DOTS_NEVER_NEWLINES", UINT2NUM(boost::regex_constants::no_mod_s));
|
329
|
+
rb_define_const(rb_cBoostRegexp, "DOTS_MATCH_NEWLINES", UINT2NUM(boost::regex_constants::mod_s));
|
330
|
+
rb_define_const(rb_cBoostRegexp, "IGNORE_WHITESPACE", UINT2NUM(boost::regex_constants::mod_x));
|
331
|
+
rb_define_const(rb_cBoostRegexp, "NO_EMPTY_EXPRESSIONS", UINT2NUM(boost::regex_constants::no_empty_expressions));
|
332
|
+
|
333
|
+
rb_define_const(rb_cBoostRegexp, "NO_CHAR_CLASSES", UINT2NUM(boost::regex_constants::no_char_classes));
|
334
|
+
rb_define_const(rb_cBoostRegexp, "NO_ESCAPE_IN_LISTS", UINT2NUM(boost::regex_constants::no_escape_in_lists));
|
335
|
+
rb_define_const(rb_cBoostRegexp, "NO_INTERVALS", UINT2NUM(boost::regex_constants::no_intervals));
|
336
|
+
rb_define_const(rb_cBoostRegexp, "BK_PLUS_QM", UINT2NUM(boost::regex_constants::bk_plus_qm));
|
337
|
+
rb_define_const(rb_cBoostRegexp, "BK_VBAR", UINT2NUM(boost::regex_constants::bk_vbar));
|
338
|
+
rb_define_const(rb_cBoostRegexp, "LITERAL", UINT2NUM(boost::regex_constants::literal));
|
339
|
+
}
|
340
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Boost
|
2
|
+
class Regexp
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def enable_monkey_patch!
|
6
|
+
::Regexp.__send__(:define_method, :boost!) do |*args|
|
7
|
+
flags = args.any? ? args.first : Boost::Regexp::NORMAL
|
8
|
+
Boost::Regexp.new(self.source, flags)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def casefold?
|
14
|
+
(self.options & IGNORECASE) > 0
|
15
|
+
end
|
16
|
+
|
17
|
+
def inspect
|
18
|
+
base = "/#{source}/"
|
19
|
+
base << "m" if options & Boost::Regexp::DOTS_MATCH_NEWLINES > 0
|
20
|
+
base << "i" if options & Boost::Regexp::IGNORECASE > 0
|
21
|
+
base << "x" if options & Boost::Regexp::IGNORE_WHITESPACE > 0
|
22
|
+
base
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,299 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe Boost::Regexp do
|
4
|
+
it "creates wrapped regexes" do
|
5
|
+
reg = Boost::Regexp.new("\\d{3}")
|
6
|
+
reg.should be_a(Boost::Regexp)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "can be initialized with a string" do
|
10
|
+
reg = Boost::Regexp.new("abc")
|
11
|
+
reg.should match("jklabcdef")
|
12
|
+
end
|
13
|
+
|
14
|
+
it "can be initialized with a regexp" do
|
15
|
+
reg = Boost::Regexp.new(/abc/)
|
16
|
+
reg.should match("jklabcdef")
|
17
|
+
end
|
18
|
+
|
19
|
+
it "can be initialized with a regexp with the ignorecase flag" do
|
20
|
+
reg = Boost::Regexp.new(/abc/i)
|
21
|
+
reg.options.should have_flag(Boost::Regexp::IGNORECASE)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "can be initialized with a regexp with the ignore whitespace flag" do
|
25
|
+
reg = Boost::Regexp.new(/abc/x)
|
26
|
+
reg.options.should have_flag(Boost::Regexp::IGNORE_WHITESPACE)
|
27
|
+
end
|
28
|
+
|
29
|
+
it "can be initialized with a regexp with the dots-match-newlines flag" do
|
30
|
+
reg = Boost::Regexp.new(/abc/m)
|
31
|
+
reg.options.should have_flag(Boost::Regexp::DOTS_MATCH_NEWLINES)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "will not initialize without unrequested flags" do
|
35
|
+
reg = Boost::Regexp.new(/abc/x)
|
36
|
+
reg.options.should_not have_flag(Boost::Regexp::DOTS_MATCH_NEWLINES)
|
37
|
+
reg.options.should_not have_flag(Boost::Regexp::IGNORECASE)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "has a source method" do
|
41
|
+
Boost::Regexp.new("abc").should respond_to(:source)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "has an options method" do
|
45
|
+
Boost::Regexp.new("abc").should respond_to(:options)
|
46
|
+
end
|
47
|
+
|
48
|
+
it "has a match method" do
|
49
|
+
Boost::Regexp.new("abc").should respond_to(:match)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "has a eql? method" do
|
53
|
+
Boost::Regexp.new("abc").should respond_to(:eql?)
|
54
|
+
end
|
55
|
+
|
56
|
+
it "has a =~ method" do
|
57
|
+
Boost::Regexp.new("abc").should respond_to(:=~)
|
58
|
+
end
|
59
|
+
|
60
|
+
it "has a ~ method" do
|
61
|
+
Boost::Regexp.new("abc").should respond_to(:~)
|
62
|
+
end
|
63
|
+
|
64
|
+
it "has a === method" do
|
65
|
+
Boost::Regexp.new("abc").should respond_to(:===)
|
66
|
+
end
|
67
|
+
|
68
|
+
it "has a == method" do
|
69
|
+
Boost::Regexp.new("abc").should respond_to(:==)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe Boost::Regexp, "#inspect" do
|
74
|
+
it "extracts the source for normal regexes" do
|
75
|
+
Boost::Regexp.new("\\d{3}").inspect.should == "/\\d{3}/"
|
76
|
+
end
|
77
|
+
|
78
|
+
it "shows the ignore-case flag when enabled" do
|
79
|
+
Boost::Regexp.new("\\d{3}", Boost::Regexp::IGNORECASE).inspect.should == "/\\d{3}/i"
|
80
|
+
end
|
81
|
+
|
82
|
+
it "shows the multiline flag when enabled" do
|
83
|
+
Boost::Regexp.new("\\d{3}", Boost::Regexp::DOTS_MATCH_NEWLINES).inspect.should == "/\\d{3}/m"
|
84
|
+
end
|
85
|
+
|
86
|
+
it "shows the ignore-whitespace flag when enabled" do
|
87
|
+
Boost::Regexp.new("\\d{3}", Boost::Regexp::IGNORE_WHITESPACE).inspect.should == "/\\d{3}/x"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
describe Boost::Regexp, "#source" do
|
92
|
+
it "returns the source string of the regexp" do
|
93
|
+
Boost::Regexp.new('(\d{3})-(\d{2})-(\d{4})').source.should == '(\d{3})-(\d{2})-(\d{4})'
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
describe Boost::Regexp, "#options" do
|
98
|
+
it "returns the flags used by the regexp" do
|
99
|
+
reg = Boost::Regexp.new("ab cd", Boost::Regexp::IGNORE_WHITESPACE)
|
100
|
+
reg.options.should have_flag(Boost::Regexp::IGNORE_WHITESPACE)
|
101
|
+
reg.options.should_not have_flag(Boost::Regexp::LITERAL)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
describe Boost::Regexp, "#==" do
|
106
|
+
it "correctly identifies two equal regexps" do
|
107
|
+
a = Boost::Regexp.new("abcd[efg]")
|
108
|
+
b = Boost::Regexp.new("abcd[efg]")
|
109
|
+
a.should == b
|
110
|
+
end
|
111
|
+
|
112
|
+
it "correctly identifies two different regexps" do
|
113
|
+
a = Boost::Regexp.new("abcd[efg]")
|
114
|
+
b = Boost::Regexp.new("abcd [efg]")
|
115
|
+
a.should_not == b
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
describe Boost::Regexp, "#eql?" do
|
120
|
+
it "correctly identifies two equal regexps using eql?" do
|
121
|
+
a = Boost::Regexp.new("abcd[efg]")
|
122
|
+
b = Boost::Regexp.new("abcd[efg]")
|
123
|
+
a.should eql(b)
|
124
|
+
end
|
125
|
+
|
126
|
+
it "correctly identifies two different regexps" do
|
127
|
+
a = Boost::Regexp.new("abcd[efg]")
|
128
|
+
b = Boost::Regexp.new("abcd [efg]")
|
129
|
+
a.should_not eql(b)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
describe Boost::Regexp, "#=~" do
|
134
|
+
it "returns the position of the match" do
|
135
|
+
result = Boost::Regexp.new("abcd") =~ "zxabcdefg"
|
136
|
+
result.should == 2
|
137
|
+
end
|
138
|
+
|
139
|
+
it "returns nil on no match" do
|
140
|
+
result = Boost::Regexp.new("abcd") =~ "aj3ioqh"
|
141
|
+
result.should be_nil
|
142
|
+
end
|
143
|
+
|
144
|
+
it "sets the special match variables on a match" do
|
145
|
+
Boost::Regexp.new("abcd") =~ "xyzabcdef"
|
146
|
+
$`.should == "xyz"
|
147
|
+
end
|
148
|
+
|
149
|
+
it "sets the special match variables to nil when a match fails" do
|
150
|
+
Boost::Regexp.new("abcd") =~ "uqioer"
|
151
|
+
$'.should be_nil
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
describe Boost::Regexp, "#===" do
|
156
|
+
it "returns true when a match succeeds" do
|
157
|
+
result = Boost::Regexp.new("abcd") === "uioabcdefg"
|
158
|
+
result.should be_true
|
159
|
+
end
|
160
|
+
|
161
|
+
it "returns false when a match fails" do
|
162
|
+
result = Boost::Regexp.new("abcd") === "uiot"
|
163
|
+
result.should be_false
|
164
|
+
end
|
165
|
+
|
166
|
+
it "sets the special match variables when a match succeeds" do
|
167
|
+
Boost::Regexp.new("abcd") === "xyzabcdef"
|
168
|
+
$`.should == "xyz"
|
169
|
+
end
|
170
|
+
|
171
|
+
it "sets the special match variables to nil when a match fails" do
|
172
|
+
Boost::Regexp.new("abcd") === "uqioer"
|
173
|
+
$'.should be_nil
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
describe Boost::Regexp, "#~" do
|
178
|
+
it "matches the regex against $_" do
|
179
|
+
$_ = "123abcdefg"
|
180
|
+
result = ~Boost::Regexp.new("abcd")
|
181
|
+
result.should == 3
|
182
|
+
end
|
183
|
+
|
184
|
+
it "negatively matches against $_" do
|
185
|
+
$_ = "12345"
|
186
|
+
result = ~Boost::Regexp.new("efgh")
|
187
|
+
result.should be_nil
|
188
|
+
end
|
189
|
+
|
190
|
+
it "sets the special match variables when a match succeeds" do
|
191
|
+
$_ = "xyzabcdef"
|
192
|
+
~Boost::Regexp.new("abcd")
|
193
|
+
$`.should == "xyz"
|
194
|
+
end
|
195
|
+
|
196
|
+
it "sets the special match variables to nil when a match fails" do
|
197
|
+
$_ = "uqioer"
|
198
|
+
~Boost::Regexp.new("abcd")
|
199
|
+
$'.should be_nil
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
describe Boost::Regexp, "#match" do
|
204
|
+
it "returns a MatchData object" do
|
205
|
+
Boost::Regexp.new('\d{3}').match("abc1234def").should be_a(MatchData)
|
206
|
+
end
|
207
|
+
|
208
|
+
it "returns $~" do
|
209
|
+
result = Boost::Regexp.new('\d{3}').match("abc1234def")
|
210
|
+
result.should == $~
|
211
|
+
end
|
212
|
+
|
213
|
+
it "provides captured subexpressions" do
|
214
|
+
result = Boost::Regexp.new('(\d{3})-(\d{3})-(\d{4})').match("abc 555-123-4567 def")
|
215
|
+
result[0].should == "555-123-4567"
|
216
|
+
result[1].should == "555"
|
217
|
+
result[2].should == "123"
|
218
|
+
result[3].should == "4567"
|
219
|
+
end
|
220
|
+
|
221
|
+
it "sets the value of Regexp#last_match" do
|
222
|
+
result = Boost::Regexp.new('\d{3}').match("abc123def")
|
223
|
+
result.should == Regexp.last_match
|
224
|
+
end
|
225
|
+
|
226
|
+
it "sets the special match variables when a match succeeds" do
|
227
|
+
Boost::Regexp.new("abcd").match "xyzabcdef"
|
228
|
+
$`.should == "xyz"
|
229
|
+
end
|
230
|
+
|
231
|
+
it "sets the special match variables to nil when a match fails" do
|
232
|
+
Boost::Regexp.new("abcd").match "uqioer"
|
233
|
+
$'.should be_nil
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
describe Boost::Regexp, "flags" do
|
238
|
+
it "matches with a case-insensitivity flag" do
|
239
|
+
Boost::Regexp.new('abc', Boost::Regexp::IGNORECASE).should match "DEFABCJKL"
|
240
|
+
end
|
241
|
+
|
242
|
+
it "responds correctly to casefold?" do
|
243
|
+
Boost::Regexp.new('abc', Boost::Regexp::IGNORECASE).casefold?.should be_true
|
244
|
+
Boost::Regexp.new('abc').casefold?.should be_false
|
245
|
+
end
|
246
|
+
|
247
|
+
it "ignores subexpressions when NO_SUBS is on" do
|
248
|
+
result = Boost::Regexp.new('abc(def)', Boost::Regexp::NO_SUBS).match("abcdef")
|
249
|
+
result[1].should be_nil
|
250
|
+
end
|
251
|
+
|
252
|
+
it "allows subexpressions when NO_SUBS is off" do
|
253
|
+
result = Boost::Regexp.new('abc(def)').match("abcdef")
|
254
|
+
result[1].should == "def"
|
255
|
+
end
|
256
|
+
|
257
|
+
it "raises an exception when given an invalid regexp and NO_EXCEPTIONS is off" do
|
258
|
+
lambda { Boost::Regexp.new("[[:alnum]]")}.should raise_exception(ArgumentError)
|
259
|
+
end
|
260
|
+
|
261
|
+
it "does not raise an exception when given an invalid regexp and NO_EXCEPTIONS is on" do
|
262
|
+
lambda { Boost::Regexp.new("[[:alnum]]", Boost::Regexp::NO_EXCEPTIONS)}.should_not raise_exception(ArgumentError)
|
263
|
+
end
|
264
|
+
|
265
|
+
it "ignores whitespace when IGNORE_WHITESPACE is set" do
|
266
|
+
Boost::Regexp.new("ab cd", Boost::Regexp::IGNORE_WHITESPACE).should match("abcd")
|
267
|
+
end
|
268
|
+
|
269
|
+
it "doesn't ignore whitespace when IGNORE_WHITESPACE is off" do
|
270
|
+
Boost::Regexp.new("ab cd").should_not match "abcd"
|
271
|
+
end
|
272
|
+
|
273
|
+
it "allows dots to match newlines when DOTS_MATCH_NEWLINES is on" do
|
274
|
+
Boost::Regexp.new("abc.def", Boost::Regexp::DOTS_MATCH_NEWLINES).should match "abc\ndef"
|
275
|
+
end
|
276
|
+
|
277
|
+
it "doesn't allow dots to match newlines when DOTS_NEVER_NEWLINES is on" do
|
278
|
+
Boost::Regexp.new("abc.def", Boost::Regexp::DOTS_NEVER_NEWLINES).should_not match "abc\ndef"
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
describe Boost::Regexp, "#enable_monkey_patch!" do
|
283
|
+
it "adds a boost! method to normal Regexps" do
|
284
|
+
Boost::Regexp.enable_monkey_patch!
|
285
|
+
/hello/.should respond_to(:boost!)
|
286
|
+
end
|
287
|
+
|
288
|
+
it "adds a boost! method that converts Regexps to Boost::Regexps" do
|
289
|
+
Boost::Regexp.enable_monkey_patch!
|
290
|
+
reg = /hello/.boost!
|
291
|
+
reg.should be_a(Boost::Regexp)
|
292
|
+
end
|
293
|
+
|
294
|
+
it "allows flags to be passed into the added boost! method" do
|
295
|
+
Boost::Regexp.enable_monkey_patch!
|
296
|
+
reg = /hello/.boost!(Boost::Regexp::IGNORECASE)
|
297
|
+
reg.casefold?.should be_true
|
298
|
+
end
|
299
|
+
end
|