re2 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +71 -27
- data/Rakefile +6 -14
- data/ext/re2/extconf.rb +1 -1
- data/ext/re2/re2.cc +69 -51
- data/lib/re2/string.rb +100 -0
- data/spec/kernel_spec.rb +15 -0
- data/spec/re2/match_data_spec.rb +141 -0
- data/spec/re2/regexp_spec.rb +394 -0
- data/spec/re2/string_spec.rb +47 -0
- data/spec/re2_spec.rb +84 -0
- data/spec/spec_helper.rb +3 -0
- metadata +28 -6
- data/test/re2_test.rb +0 -265
data/README.md
CHANGED
@@ -30,33 +30,77 @@ Usage
|
|
30
30
|
|
31
31
|
You can use re2 as a mostly drop-in replacement for Ruby's own [Regexp][] and [MatchData][] classes:
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
33
|
+
```console
|
34
|
+
$ irb -rubygems
|
35
|
+
> require 're2'
|
36
|
+
> r = RE2::Regexp.new('w(\d)(\d+)')
|
37
|
+
=> #<RE2::Regexp /w(\d)(\d+)/>
|
38
|
+
> m = r.match("w1234")
|
39
|
+
=> #<RE2::MatchData "w1234" 1:"1" 2:"234">
|
40
|
+
> m[1]
|
41
|
+
=> "1"
|
42
|
+
> m.string
|
43
|
+
=> "w1234"
|
44
|
+
> r =~ "w1234"
|
45
|
+
=> true
|
46
|
+
> r !~ "bob"
|
47
|
+
=> true
|
48
|
+
> r.match("bob")
|
49
|
+
=> nil
|
50
|
+
```
|
51
|
+
|
52
|
+
As `RE2::Regexp.new` (or `RE2::Regexp.compile`) can be quite verbose, a helper method has been defined against `Kernel` so you can use a shorter version to create regular expressions:
|
53
|
+
|
54
|
+
```console
|
55
|
+
> RE2('(\d+)')
|
56
|
+
=> #<RE2::Regexp /(\d+)/>
|
57
|
+
```
|
58
|
+
|
59
|
+
Note the use of *single quotes* as double quotes will interpret `\d` as `d` as in the following example:
|
60
|
+
|
61
|
+
```console
|
62
|
+
> RE2("(\d+)")
|
63
|
+
=> #<RE2::Regexp /(d+)/>
|
64
|
+
```
|
49
65
|
|
50
66
|
As of 0.3.0, you can use named groups:
|
51
67
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
68
|
+
```console
|
69
|
+
> r = RE2::Regexp.new('(?P<name>\w+) (?P<age>\d+)')
|
70
|
+
=> #<RE2::Regexp /(?P<name>\w+) (?P<age>\d+)/>
|
71
|
+
> m = r.match("Bob 40")
|
72
|
+
=> #<RE2::MatchData "Bob 40" 1:"Bob" 2:"40">
|
73
|
+
> m[:name]
|
74
|
+
=> "Bob"
|
75
|
+
> m["age"]
|
76
|
+
=> "40"
|
77
|
+
```
|
78
|
+
|
79
|
+
As of 0.4.0, you can mix `RE2::String` into strings to provide helpers from the opposite direction:
|
80
|
+
|
81
|
+
```console
|
82
|
+
> require "re2/string"
|
83
|
+
> string = "My name is Robert Paulson"
|
84
|
+
=> "My name is Robert Paulson"
|
85
|
+
> string.extend(RE2::String)
|
86
|
+
=> "My name is Robert Paulson"
|
87
|
+
> string.re2_sub("Robert", "Dave")
|
88
|
+
=> "My name is Dave Paulson"
|
89
|
+
> string.re2_gsub("a", "e")
|
90
|
+
=> "My neme is Deve Peulson"
|
91
|
+
> string.re2_match('D(\S+)')
|
92
|
+
=> #<RE2::MatchData "Deve" 1:"eve">
|
93
|
+
> string.re2_escape
|
94
|
+
=> "My\\ neme\\ is\\ Deve\\ Peulson"
|
95
|
+
```
|
96
|
+
|
97
|
+
If you want these available to all strings, you can reopen `String` like so:
|
98
|
+
|
99
|
+
```ruby
|
100
|
+
class String
|
101
|
+
include RE2::String
|
102
|
+
end
|
103
|
+
```
|
60
104
|
|
61
105
|
Features
|
62
106
|
--------
|
@@ -75,11 +119,11 @@ Features
|
|
75
119
|
|
76
120
|
* Checking the options for an expression with `re2.options` or individually with `re2.case_sensitive?`
|
77
121
|
|
78
|
-
* Performing in-place replacement with [`RE2
|
122
|
+
* Performing in-place replacement with [`RE2.Replace(str, pattern, replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#335)
|
79
123
|
|
80
|
-
* Performing in-place global replacement with [`RE2
|
124
|
+
* Performing in-place global replacement with [`RE2.GlobalReplace(str, pattern, replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#352)
|
81
125
|
|
82
|
-
* Escaping regular expressions with [`RE2::Regexp.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377), `RE2::Regexp.quote(unquoted)` or `RE2
|
126
|
+
* Escaping regular expressions with [`RE2::Regexp.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377), `RE2::Regexp.quote(unquoted)` or `RE2.QuoteMeta(unquoted)`
|
83
127
|
|
84
128
|
Contact
|
85
129
|
-------
|
data/Rakefile
CHANGED
@@ -1,25 +1,17 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'rake/testtask'
|
4
|
-
rescue LoadError
|
5
|
-
require 'rubygems'
|
6
|
-
require 'rake/extensiontask'
|
7
|
-
require 'rake/testtask'
|
8
|
-
end
|
1
|
+
require 'rake/extensiontask'
|
2
|
+
require 'rake/testtask'
|
9
3
|
|
10
4
|
Rake::ExtensionTask.new('re2') do |e|
|
11
5
|
# e.config_options << "--with-re2-dir=/opt/local/re2"
|
12
6
|
end
|
13
7
|
|
14
8
|
Rake::TestTask.new do |t|
|
15
|
-
t.
|
9
|
+
t.libs << "spec"
|
10
|
+
t.test_files = FileList["spec/**/*_spec.rb"]
|
16
11
|
t.verbose = true
|
17
12
|
end
|
18
13
|
|
19
|
-
task :
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
task :test => :compile
|
14
|
+
task :test => :compile
|
15
|
+
task :spec => :test
|
24
16
|
task :default => :test
|
25
17
|
|
data/ext/re2/extconf.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# re2 (http://github.com/mudge/re2)
|
2
2
|
# Ruby bindings to re2, an "efficient, principled regular expression library"
|
3
3
|
#
|
4
|
-
# Copyright (c) 2010, Paul Mucur (http://
|
4
|
+
# Copyright (c) 2010-2012, Paul Mucur (http://mudge.name)
|
5
5
|
# Released under the BSD Licence, please see LICENSE.txt
|
6
6
|
|
7
7
|
require 'mkmf'
|
data/ext/re2/re2.cc
CHANGED
@@ -2,12 +2,13 @@
|
|
2
2
|
* re2 (http://github.com/mudge/re2)
|
3
3
|
* Ruby bindings to re2, an "efficient, principled regular expression library"
|
4
4
|
*
|
5
|
-
* Copyright (c) 2010, Paul Mucur (http://
|
5
|
+
* Copyright (c) 2010-2012, Paul Mucur (http://mudge.name)
|
6
6
|
* Released under the BSD Licence, please see LICENSE.txt
|
7
7
|
*/
|
8
8
|
|
9
9
|
#include <re2/re2.h>
|
10
10
|
#include <string>
|
11
|
+
#include <sstream>
|
11
12
|
using namespace std;
|
12
13
|
|
13
14
|
extern "C" {
|
@@ -17,12 +18,20 @@ extern "C" {
|
|
17
18
|
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
18
19
|
#define UNUSED(x) ((void)x)
|
19
20
|
|
20
|
-
#
|
21
|
-
|
21
|
+
#ifndef RSTRING_LEN
|
22
|
+
#define RSTRING_LEN(x) (RSTRING(x)->len)
|
22
23
|
#endif
|
23
24
|
|
24
|
-
#
|
25
|
-
|
25
|
+
#ifndef RSTRING_PTR
|
26
|
+
#define RSTRING_PTR(x) (RSTRING(x)->ptr)
|
27
|
+
#endif
|
28
|
+
|
29
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
30
|
+
#define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
|
31
|
+
(pattern->Match(text, startpos, endpos, anchor, match, nmatch))
|
32
|
+
#else
|
33
|
+
#define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
|
34
|
+
(pattern->Match(text, startpos, anchor, match, nmatch))
|
26
35
|
#endif
|
27
36
|
|
28
37
|
typedef struct {
|
@@ -77,7 +86,7 @@ extern "C" {
|
|
77
86
|
*
|
78
87
|
* @return [String] a frozen copy of the passed string.
|
79
88
|
* @example
|
80
|
-
* m = RE2('(\d+)').match("bob 123")
|
89
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
81
90
|
* m.string #=> "bob 123"
|
82
91
|
*/
|
83
92
|
static VALUE
|
@@ -94,7 +103,7 @@ extern "C" {
|
|
94
103
|
*
|
95
104
|
* @return [Fixnum] the number of elements
|
96
105
|
* @example
|
97
|
-
* m = RE2('(\d+)').match("bob 123")
|
106
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
98
107
|
* m.size #=> 2
|
99
108
|
* m.length #=> 2
|
100
109
|
*/
|
@@ -112,7 +121,7 @@ extern "C" {
|
|
112
121
|
*
|
113
122
|
* @return [RE2::Regexp] the regexp used in the match
|
114
123
|
* @example
|
115
|
-
* m = RE2('(\d+)').match("bob 123")
|
124
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
116
125
|
* m.regexp #=> #<RE2::Regexp /(\d+)/>
|
117
126
|
*/
|
118
127
|
static VALUE
|
@@ -135,7 +144,7 @@ extern "C" {
|
|
135
144
|
*
|
136
145
|
* @return [Array<String, nil>] the array of matches
|
137
146
|
* @example
|
138
|
-
* m = RE2('(\d+)').match("bob 123")
|
147
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
139
148
|
* m.to_a #=> ["123", "123"]
|
140
149
|
*/
|
141
150
|
static VALUE
|
@@ -144,9 +153,11 @@ extern "C" {
|
|
144
153
|
int i;
|
145
154
|
re2_matchdata *m;
|
146
155
|
re2::StringPiece match;
|
156
|
+
VALUE array;
|
147
157
|
|
148
158
|
Data_Get_Struct(self, re2_matchdata, m);
|
149
|
-
|
159
|
+
|
160
|
+
array = rb_ary_new2(m->number_of_matches);
|
150
161
|
for (i = 0; i < m->number_of_matches; i++) {
|
151
162
|
if (m->matches[i].empty()) {
|
152
163
|
rb_ary_push(array, Qnil);
|
@@ -155,6 +166,7 @@ extern "C" {
|
|
155
166
|
rb_ary_push(array, rb_str_new(match.data(), match.size()));
|
156
167
|
}
|
157
168
|
}
|
169
|
+
|
158
170
|
return array;
|
159
171
|
}
|
160
172
|
|
@@ -207,7 +219,7 @@ extern "C" {
|
|
207
219
|
* @param [Fixnum] index the index of the match to fetch
|
208
220
|
* @return [String, nil] the specified match
|
209
221
|
* @example
|
210
|
-
* m = RE2('(\d+)').match("bob 123")
|
222
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
211
223
|
* m[0] #=> "123"
|
212
224
|
*
|
213
225
|
* @overload [](start, length)
|
@@ -217,7 +229,7 @@ extern "C" {
|
|
217
229
|
* @param [Fixnum] length the number of elements to fetch
|
218
230
|
* @return [Array<String, nil>] the specified matches
|
219
231
|
* @example
|
220
|
-
* m = RE2('(\d+)').match("bob 123")
|
232
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
221
233
|
* m[0, 1] #=> ["123"]
|
222
234
|
*
|
223
235
|
* @overload [](range)
|
@@ -226,7 +238,7 @@ extern "C" {
|
|
226
238
|
* @param [Range] range the range of match indexes to fetch
|
227
239
|
* @return [Array<String, nil>] the specified matches
|
228
240
|
* @example
|
229
|
-
* m = RE2('(\d+)').match("bob 123")
|
241
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
230
242
|
* m[0..1] #=> "[123", "123"]
|
231
243
|
*
|
232
244
|
* @overload [](name)
|
@@ -235,7 +247,7 @@ extern "C" {
|
|
235
247
|
* @param [String, Symbol] name the name of the match to fetch
|
236
248
|
* @return [String, nil] the specific match
|
237
249
|
* @example
|
238
|
-
* m = RE2('(?P<number>\d+)').match("bob 123")
|
250
|
+
* m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
|
239
251
|
* m["number"] #=> "123"
|
240
252
|
* m[:number] #=> "123"
|
241
253
|
*/
|
@@ -272,7 +284,7 @@ extern "C" {
|
|
272
284
|
*
|
273
285
|
* @return [String] a printable version of the match
|
274
286
|
* @example
|
275
|
-
* m = RE2('(\d+)').match("bob 123")
|
287
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
276
288
|
* m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
|
277
289
|
*/
|
278
290
|
static VALUE
|
@@ -281,32 +293,33 @@ extern "C" {
|
|
281
293
|
int i;
|
282
294
|
re2_matchdata *m;
|
283
295
|
VALUE match, result;
|
296
|
+
ostringstream output;
|
284
297
|
|
285
298
|
Data_Get_Struct(self, re2_matchdata, m);
|
286
299
|
|
287
300
|
result = rb_str_new("#<RE2::MatchData", 16);
|
288
301
|
|
302
|
+
output << "#<RE2::MatchData";
|
303
|
+
|
289
304
|
for (i = 0; i < m->number_of_matches; i++) {
|
290
|
-
|
305
|
+
output << " ";
|
291
306
|
|
292
307
|
if (i > 0) {
|
293
|
-
|
294
|
-
snprintf(buf, sizeof(buf), "%d", i);
|
295
|
-
rb_str_cat2(result, buf);
|
296
|
-
rb_str_cat(result, ":", 1);
|
308
|
+
output << i << ":";
|
297
309
|
}
|
298
310
|
|
299
311
|
match = re2_matchdata_nth_match(i, self);
|
300
312
|
|
301
313
|
if (match == Qnil) {
|
302
|
-
|
314
|
+
output << "nil";
|
303
315
|
} else {
|
304
|
-
|
305
|
-
rb_str_cat(result, RSTRING_PTR(match), RSTRING_LEN(match));
|
306
|
-
rb_str_cat(result, "\"", 1);
|
316
|
+
output << "\"" << StringValuePtr(match) << "\"";
|
307
317
|
}
|
308
318
|
}
|
309
|
-
|
319
|
+
|
320
|
+
output << ">";
|
321
|
+
|
322
|
+
result = rb_str_new(output.str().data(), output.str().length());
|
310
323
|
|
311
324
|
return result;
|
312
325
|
}
|
@@ -459,11 +472,14 @@ extern "C" {
|
|
459
472
|
re2_regexp_inspect(VALUE self)
|
460
473
|
{
|
461
474
|
re2_pattern *p;
|
462
|
-
VALUE result
|
475
|
+
VALUE result;
|
476
|
+
ostringstream output;
|
463
477
|
|
464
478
|
Data_Get_Struct(self, re2_pattern, p);
|
465
|
-
|
466
|
-
|
479
|
+
|
480
|
+
output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
|
481
|
+
|
482
|
+
result = rb_str_new(output.str().data(), output.str().length());
|
467
483
|
|
468
484
|
return result;
|
469
485
|
}
|
@@ -706,30 +722,38 @@ extern "C" {
|
|
706
722
|
|
707
723
|
/*
|
708
724
|
* If the RE2 could not be created properly, returns an
|
709
|
-
* error string.
|
725
|
+
* error string otherwise returns nil.
|
710
726
|
*
|
711
|
-
* @return [String] the error string
|
727
|
+
* @return [String, nil] the error string or nil
|
712
728
|
*/
|
713
729
|
static VALUE
|
714
730
|
re2_regexp_error(VALUE self)
|
715
731
|
{
|
716
732
|
re2_pattern *p;
|
717
733
|
Data_Get_Struct(self, re2_pattern, p);
|
718
|
-
|
734
|
+
if (p->pattern->ok()) {
|
735
|
+
return Qnil;
|
736
|
+
} else {
|
737
|
+
return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
|
738
|
+
}
|
719
739
|
}
|
720
740
|
|
721
741
|
/*
|
722
742
|
* If the RE2 could not be created properly, returns
|
723
|
-
* the offending portion of the regexp.
|
743
|
+
* the offending portion of the regexp otherwise returns nil.
|
724
744
|
*
|
725
|
-
* @return [String] the offending portion of the regexp
|
745
|
+
* @return [String, nil] the offending portion of the regexp or nil
|
726
746
|
*/
|
727
747
|
static VALUE
|
728
748
|
re2_regexp_error_arg(VALUE self)
|
729
749
|
{
|
730
750
|
re2_pattern *p;
|
731
751
|
Data_Get_Struct(self, re2_pattern, p);
|
732
|
-
|
752
|
+
if (p->pattern->ok()) {
|
753
|
+
return Qnil;
|
754
|
+
} else {
|
755
|
+
return rb_str_new(p->pattern->error_arg().data(), p->pattern->error_arg().size());
|
756
|
+
}
|
733
757
|
}
|
734
758
|
|
735
759
|
/*
|
@@ -860,7 +884,7 @@ extern "C" {
|
|
860
884
|
* @raise [NoMemoryError] if there was not enough memory to allocate the matches
|
861
885
|
* @example
|
862
886
|
* r = RE2::Regexp.new('w(o)(o)')
|
863
|
-
* r.match('woo) #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
887
|
+
* r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
864
888
|
*
|
865
889
|
* @overload match(text, 0)
|
866
890
|
* Returns either true or false indicating whether a
|
@@ -907,11 +931,7 @@ extern "C" {
|
|
907
931
|
}
|
908
932
|
|
909
933
|
if (n == 0) {
|
910
|
-
|
911
|
-
matched = p->pattern->Match(StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
|
912
|
-
#else
|
913
|
-
matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED, 0, 0);
|
914
|
-
#endif
|
934
|
+
matched = match(p->pattern, StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
|
915
935
|
return BOOL2RUBY(matched);
|
916
936
|
} else {
|
917
937
|
|
@@ -931,11 +951,7 @@ extern "C" {
|
|
931
951
|
|
932
952
|
m->number_of_matches = n;
|
933
953
|
|
934
|
-
|
935
|
-
matched = p->pattern->Match(StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, m->matches, n);
|
936
|
-
#else
|
937
|
-
matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED, m->matches, n);
|
938
|
-
#endif
|
954
|
+
matched = match(p->pattern, StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, m->matches, n);
|
939
955
|
|
940
956
|
if (matched) {
|
941
957
|
return matchdata;
|
@@ -968,12 +984,13 @@ extern "C" {
|
|
968
984
|
* @param [String] str the string to modify
|
969
985
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
970
986
|
* @param [String] rewrite the string to replace with
|
987
|
+
* @return [String] the resulting string
|
971
988
|
* @example
|
972
|
-
* RE2
|
989
|
+
* RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
|
973
990
|
* re2 = RE2.new("hel+o")
|
974
|
-
* RE2
|
991
|
+
* RE2.Replace("hello there", re2, "yo") #=> "yo there"
|
975
992
|
* text = "Good morning"
|
976
|
-
* RE2
|
993
|
+
* RE2.Replace(text, "morn", "even") #=> "Good evening"
|
977
994
|
* text #=> "Good evening"
|
978
995
|
*/
|
979
996
|
static VALUE
|
@@ -1017,12 +1034,13 @@ extern "C" {
|
|
1017
1034
|
* @param [String] str the string to modify
|
1018
1035
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1019
1036
|
* @param [String] rewrite the string to replace with
|
1037
|
+
* @return [String] the resulting string
|
1020
1038
|
* @example
|
1021
|
-
* RE2
|
1039
|
+
* RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
|
1022
1040
|
* re2 = RE2.new("oo?")
|
1023
|
-
* RE2
|
1041
|
+
* RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
|
1024
1042
|
* text = "Good morning"
|
1025
|
-
* RE2
|
1043
|
+
* RE2.GlobalReplace(text, "o", "ee") #=> "Geeeed meerning"
|
1026
1044
|
* text #=> "Geeeed meerning"
|
1027
1045
|
*/
|
1028
1046
|
static VALUE
|