re2 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +71 -27
- data/Rakefile +6 -14
- data/ext/re2/extconf.rb +1 -1
- data/ext/re2/re2.cc +69 -51
- data/lib/re2/string.rb +100 -0
- data/spec/kernel_spec.rb +15 -0
- data/spec/re2/match_data_spec.rb +141 -0
- data/spec/re2/regexp_spec.rb +394 -0
- data/spec/re2/string_spec.rb +47 -0
- data/spec/re2_spec.rb +84 -0
- data/spec/spec_helper.rb +3 -0
- metadata +28 -6
- data/test/re2_test.rb +0 -265
data/README.md
CHANGED
@@ -30,33 +30,77 @@ Usage
|
|
30
30
|
|
31
31
|
You can use re2 as a mostly drop-in replacement for Ruby's own [Regexp][] and [MatchData][] classes:
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
33
|
+
```console
|
34
|
+
$ irb -rubygems
|
35
|
+
> require 're2'
|
36
|
+
> r = RE2::Regexp.new('w(\d)(\d+)')
|
37
|
+
=> #<RE2::Regexp /w(\d)(\d+)/>
|
38
|
+
> m = r.match("w1234")
|
39
|
+
=> #<RE2::MatchData "w1234" 1:"1" 2:"234">
|
40
|
+
> m[1]
|
41
|
+
=> "1"
|
42
|
+
> m.string
|
43
|
+
=> "w1234"
|
44
|
+
> r =~ "w1234"
|
45
|
+
=> true
|
46
|
+
> r !~ "bob"
|
47
|
+
=> true
|
48
|
+
> r.match("bob")
|
49
|
+
=> nil
|
50
|
+
```
|
51
|
+
|
52
|
+
As `RE2::Regexp.new` (or `RE2::Regexp.compile`) can be quite verbose, a helper method has been defined against `Kernel` so you can use a shorter version to create regular expressions:
|
53
|
+
|
54
|
+
```console
|
55
|
+
> RE2('(\d+)')
|
56
|
+
=> #<RE2::Regexp /(\d+)/>
|
57
|
+
```
|
58
|
+
|
59
|
+
Note the use of *single quotes* as double quotes will interpret `\d` as `d` as in the following example:
|
60
|
+
|
61
|
+
```console
|
62
|
+
> RE2("(\d+)")
|
63
|
+
=> #<RE2::Regexp /(d+)/>
|
64
|
+
```
|
49
65
|
|
50
66
|
As of 0.3.0, you can use named groups:
|
51
67
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
68
|
+
```console
|
69
|
+
> r = RE2::Regexp.new('(?P<name>\w+) (?P<age>\d+)')
|
70
|
+
=> #<RE2::Regexp /(?P<name>\w+) (?P<age>\d+)/>
|
71
|
+
> m = r.match("Bob 40")
|
72
|
+
=> #<RE2::MatchData "Bob 40" 1:"Bob" 2:"40">
|
73
|
+
> m[:name]
|
74
|
+
=> "Bob"
|
75
|
+
> m["age"]
|
76
|
+
=> "40"
|
77
|
+
```
|
78
|
+
|
79
|
+
As of 0.4.0, you can mix `RE2::String` into strings to provide helpers from the opposite direction:
|
80
|
+
|
81
|
+
```console
|
82
|
+
> require "re2/string"
|
83
|
+
> string = "My name is Robert Paulson"
|
84
|
+
=> "My name is Robert Paulson"
|
85
|
+
> string.extend(RE2::String)
|
86
|
+
=> "My name is Robert Paulson"
|
87
|
+
> string.re2_sub("Robert", "Dave")
|
88
|
+
=> "My name is Dave Paulson"
|
89
|
+
> string.re2_gsub("a", "e")
|
90
|
+
=> "My neme is Deve Peulson"
|
91
|
+
> string.re2_match('D(\S+)')
|
92
|
+
=> #<RE2::MatchData "Deve" 1:"eve">
|
93
|
+
> string.re2_escape
|
94
|
+
=> "My\\ neme\\ is\\ Deve\\ Peulson"
|
95
|
+
```
|
96
|
+
|
97
|
+
If you want these available to all strings, you can reopen `String` like so:
|
98
|
+
|
99
|
+
```ruby
|
100
|
+
class String
|
101
|
+
include RE2::String
|
102
|
+
end
|
103
|
+
```
|
60
104
|
|
61
105
|
Features
|
62
106
|
--------
|
@@ -75,11 +119,11 @@ Features
|
|
75
119
|
|
76
120
|
* Checking the options for an expression with `re2.options` or individually with `re2.case_sensitive?`
|
77
121
|
|
78
|
-
* Performing in-place replacement with [`RE2
|
122
|
+
* Performing in-place replacement with [`RE2.Replace(str, pattern, replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#335)
|
79
123
|
|
80
|
-
* Performing in-place global replacement with [`RE2
|
124
|
+
* Performing in-place global replacement with [`RE2.GlobalReplace(str, pattern, replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#352)
|
81
125
|
|
82
|
-
* Escaping regular expressions with [`RE2::Regexp.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377), `RE2::Regexp.quote(unquoted)` or `RE2
|
126
|
+
* Escaping regular expressions with [`RE2::Regexp.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377), `RE2::Regexp.quote(unquoted)` or `RE2.QuoteMeta(unquoted)`
|
83
127
|
|
84
128
|
Contact
|
85
129
|
-------
|
data/Rakefile
CHANGED
@@ -1,25 +1,17 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'rake/testtask'
|
4
|
-
rescue LoadError
|
5
|
-
require 'rubygems'
|
6
|
-
require 'rake/extensiontask'
|
7
|
-
require 'rake/testtask'
|
8
|
-
end
|
1
|
+
require 'rake/extensiontask'
|
2
|
+
require 'rake/testtask'
|
9
3
|
|
10
4
|
Rake::ExtensionTask.new('re2') do |e|
|
11
5
|
# e.config_options << "--with-re2-dir=/opt/local/re2"
|
12
6
|
end
|
13
7
|
|
14
8
|
Rake::TestTask.new do |t|
|
15
|
-
t.
|
9
|
+
t.libs << "spec"
|
10
|
+
t.test_files = FileList["spec/**/*_spec.rb"]
|
16
11
|
t.verbose = true
|
17
12
|
end
|
18
13
|
|
19
|
-
task :
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
task :test => :compile
|
14
|
+
task :test => :compile
|
15
|
+
task :spec => :test
|
24
16
|
task :default => :test
|
25
17
|
|
data/ext/re2/extconf.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# re2 (http://github.com/mudge/re2)
|
2
2
|
# Ruby bindings to re2, an "efficient, principled regular expression library"
|
3
3
|
#
|
4
|
-
# Copyright (c) 2010, Paul Mucur (http://
|
4
|
+
# Copyright (c) 2010-2012, Paul Mucur (http://mudge.name)
|
5
5
|
# Released under the BSD Licence, please see LICENSE.txt
|
6
6
|
|
7
7
|
require 'mkmf'
|
data/ext/re2/re2.cc
CHANGED
@@ -2,12 +2,13 @@
|
|
2
2
|
* re2 (http://github.com/mudge/re2)
|
3
3
|
* Ruby bindings to re2, an "efficient, principled regular expression library"
|
4
4
|
*
|
5
|
-
* Copyright (c) 2010, Paul Mucur (http://
|
5
|
+
* Copyright (c) 2010-2012, Paul Mucur (http://mudge.name)
|
6
6
|
* Released under the BSD Licence, please see LICENSE.txt
|
7
7
|
*/
|
8
8
|
|
9
9
|
#include <re2/re2.h>
|
10
10
|
#include <string>
|
11
|
+
#include <sstream>
|
11
12
|
using namespace std;
|
12
13
|
|
13
14
|
extern "C" {
|
@@ -17,12 +18,20 @@ extern "C" {
|
|
17
18
|
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
18
19
|
#define UNUSED(x) ((void)x)
|
19
20
|
|
20
|
-
#
|
21
|
-
|
21
|
+
#ifndef RSTRING_LEN
|
22
|
+
#define RSTRING_LEN(x) (RSTRING(x)->len)
|
22
23
|
#endif
|
23
24
|
|
24
|
-
#
|
25
|
-
|
25
|
+
#ifndef RSTRING_PTR
|
26
|
+
#define RSTRING_PTR(x) (RSTRING(x)->ptr)
|
27
|
+
#endif
|
28
|
+
|
29
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
30
|
+
#define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
|
31
|
+
(pattern->Match(text, startpos, endpos, anchor, match, nmatch))
|
32
|
+
#else
|
33
|
+
#define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
|
34
|
+
(pattern->Match(text, startpos, anchor, match, nmatch))
|
26
35
|
#endif
|
27
36
|
|
28
37
|
typedef struct {
|
@@ -77,7 +86,7 @@ extern "C" {
|
|
77
86
|
*
|
78
87
|
* @return [String] a frozen copy of the passed string.
|
79
88
|
* @example
|
80
|
-
* m = RE2('(\d+)').match("bob 123")
|
89
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
81
90
|
* m.string #=> "bob 123"
|
82
91
|
*/
|
83
92
|
static VALUE
|
@@ -94,7 +103,7 @@ extern "C" {
|
|
94
103
|
*
|
95
104
|
* @return [Fixnum] the number of elements
|
96
105
|
* @example
|
97
|
-
* m = RE2('(\d+)').match("bob 123")
|
106
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
98
107
|
* m.size #=> 2
|
99
108
|
* m.length #=> 2
|
100
109
|
*/
|
@@ -112,7 +121,7 @@ extern "C" {
|
|
112
121
|
*
|
113
122
|
* @return [RE2::Regexp] the regexp used in the match
|
114
123
|
* @example
|
115
|
-
* m = RE2('(\d+)').match("bob 123")
|
124
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
116
125
|
* m.regexp #=> #<RE2::Regexp /(\d+)/>
|
117
126
|
*/
|
118
127
|
static VALUE
|
@@ -135,7 +144,7 @@ extern "C" {
|
|
135
144
|
*
|
136
145
|
* @return [Array<String, nil>] the array of matches
|
137
146
|
* @example
|
138
|
-
* m = RE2('(\d+)').match("bob 123")
|
147
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
139
148
|
* m.to_a #=> ["123", "123"]
|
140
149
|
*/
|
141
150
|
static VALUE
|
@@ -144,9 +153,11 @@ extern "C" {
|
|
144
153
|
int i;
|
145
154
|
re2_matchdata *m;
|
146
155
|
re2::StringPiece match;
|
156
|
+
VALUE array;
|
147
157
|
|
148
158
|
Data_Get_Struct(self, re2_matchdata, m);
|
149
|
-
|
159
|
+
|
160
|
+
array = rb_ary_new2(m->number_of_matches);
|
150
161
|
for (i = 0; i < m->number_of_matches; i++) {
|
151
162
|
if (m->matches[i].empty()) {
|
152
163
|
rb_ary_push(array, Qnil);
|
@@ -155,6 +166,7 @@ extern "C" {
|
|
155
166
|
rb_ary_push(array, rb_str_new(match.data(), match.size()));
|
156
167
|
}
|
157
168
|
}
|
169
|
+
|
158
170
|
return array;
|
159
171
|
}
|
160
172
|
|
@@ -207,7 +219,7 @@ extern "C" {
|
|
207
219
|
* @param [Fixnum] index the index of the match to fetch
|
208
220
|
* @return [String, nil] the specified match
|
209
221
|
* @example
|
210
|
-
* m = RE2('(\d+)').match("bob 123")
|
222
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
211
223
|
* m[0] #=> "123"
|
212
224
|
*
|
213
225
|
* @overload [](start, length)
|
@@ -217,7 +229,7 @@ extern "C" {
|
|
217
229
|
* @param [Fixnum] length the number of elements to fetch
|
218
230
|
* @return [Array<String, nil>] the specified matches
|
219
231
|
* @example
|
220
|
-
* m = RE2('(\d+)').match("bob 123")
|
232
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
221
233
|
* m[0, 1] #=> ["123"]
|
222
234
|
*
|
223
235
|
* @overload [](range)
|
@@ -226,7 +238,7 @@ extern "C" {
|
|
226
238
|
* @param [Range] range the range of match indexes to fetch
|
227
239
|
* @return [Array<String, nil>] the specified matches
|
228
240
|
* @example
|
229
|
-
* m = RE2('(\d+)').match("bob 123")
|
241
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
230
242
|
* m[0..1] #=> "[123", "123"]
|
231
243
|
*
|
232
244
|
* @overload [](name)
|
@@ -235,7 +247,7 @@ extern "C" {
|
|
235
247
|
* @param [String, Symbol] name the name of the match to fetch
|
236
248
|
* @return [String, nil] the specific match
|
237
249
|
* @example
|
238
|
-
* m = RE2('(?P<number>\d+)').match("bob 123")
|
250
|
+
* m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
|
239
251
|
* m["number"] #=> "123"
|
240
252
|
* m[:number] #=> "123"
|
241
253
|
*/
|
@@ -272,7 +284,7 @@ extern "C" {
|
|
272
284
|
*
|
273
285
|
* @return [String] a printable version of the match
|
274
286
|
* @example
|
275
|
-
* m = RE2('(\d+)').match("bob 123")
|
287
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
276
288
|
* m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
|
277
289
|
*/
|
278
290
|
static VALUE
|
@@ -281,32 +293,33 @@ extern "C" {
|
|
281
293
|
int i;
|
282
294
|
re2_matchdata *m;
|
283
295
|
VALUE match, result;
|
296
|
+
ostringstream output;
|
284
297
|
|
285
298
|
Data_Get_Struct(self, re2_matchdata, m);
|
286
299
|
|
287
300
|
result = rb_str_new("#<RE2::MatchData", 16);
|
288
301
|
|
302
|
+
output << "#<RE2::MatchData";
|
303
|
+
|
289
304
|
for (i = 0; i < m->number_of_matches; i++) {
|
290
|
-
|
305
|
+
output << " ";
|
291
306
|
|
292
307
|
if (i > 0) {
|
293
|
-
|
294
|
-
snprintf(buf, sizeof(buf), "%d", i);
|
295
|
-
rb_str_cat2(result, buf);
|
296
|
-
rb_str_cat(result, ":", 1);
|
308
|
+
output << i << ":";
|
297
309
|
}
|
298
310
|
|
299
311
|
match = re2_matchdata_nth_match(i, self);
|
300
312
|
|
301
313
|
if (match == Qnil) {
|
302
|
-
|
314
|
+
output << "nil";
|
303
315
|
} else {
|
304
|
-
|
305
|
-
rb_str_cat(result, RSTRING_PTR(match), RSTRING_LEN(match));
|
306
|
-
rb_str_cat(result, "\"", 1);
|
316
|
+
output << "\"" << StringValuePtr(match) << "\"";
|
307
317
|
}
|
308
318
|
}
|
309
|
-
|
319
|
+
|
320
|
+
output << ">";
|
321
|
+
|
322
|
+
result = rb_str_new(output.str().data(), output.str().length());
|
310
323
|
|
311
324
|
return result;
|
312
325
|
}
|
@@ -459,11 +472,14 @@ extern "C" {
|
|
459
472
|
re2_regexp_inspect(VALUE self)
|
460
473
|
{
|
461
474
|
re2_pattern *p;
|
462
|
-
VALUE result
|
475
|
+
VALUE result;
|
476
|
+
ostringstream output;
|
463
477
|
|
464
478
|
Data_Get_Struct(self, re2_pattern, p);
|
465
|
-
|
466
|
-
|
479
|
+
|
480
|
+
output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
|
481
|
+
|
482
|
+
result = rb_str_new(output.str().data(), output.str().length());
|
467
483
|
|
468
484
|
return result;
|
469
485
|
}
|
@@ -706,30 +722,38 @@ extern "C" {
|
|
706
722
|
|
707
723
|
/*
|
708
724
|
* If the RE2 could not be created properly, returns an
|
709
|
-
* error string.
|
725
|
+
* error string otherwise returns nil.
|
710
726
|
*
|
711
|
-
* @return [String] the error string
|
727
|
+
* @return [String, nil] the error string or nil
|
712
728
|
*/
|
713
729
|
static VALUE
|
714
730
|
re2_regexp_error(VALUE self)
|
715
731
|
{
|
716
732
|
re2_pattern *p;
|
717
733
|
Data_Get_Struct(self, re2_pattern, p);
|
718
|
-
|
734
|
+
if (p->pattern->ok()) {
|
735
|
+
return Qnil;
|
736
|
+
} else {
|
737
|
+
return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
|
738
|
+
}
|
719
739
|
}
|
720
740
|
|
721
741
|
/*
|
722
742
|
* If the RE2 could not be created properly, returns
|
723
|
-
* the offending portion of the regexp.
|
743
|
+
* the offending portion of the regexp otherwise returns nil.
|
724
744
|
*
|
725
|
-
* @return [String] the offending portion of the regexp
|
745
|
+
* @return [String, nil] the offending portion of the regexp or nil
|
726
746
|
*/
|
727
747
|
static VALUE
|
728
748
|
re2_regexp_error_arg(VALUE self)
|
729
749
|
{
|
730
750
|
re2_pattern *p;
|
731
751
|
Data_Get_Struct(self, re2_pattern, p);
|
732
|
-
|
752
|
+
if (p->pattern->ok()) {
|
753
|
+
return Qnil;
|
754
|
+
} else {
|
755
|
+
return rb_str_new(p->pattern->error_arg().data(), p->pattern->error_arg().size());
|
756
|
+
}
|
733
757
|
}
|
734
758
|
|
735
759
|
/*
|
@@ -860,7 +884,7 @@ extern "C" {
|
|
860
884
|
* @raise [NoMemoryError] if there was not enough memory to allocate the matches
|
861
885
|
* @example
|
862
886
|
* r = RE2::Regexp.new('w(o)(o)')
|
863
|
-
* r.match('woo) #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
887
|
+
* r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
864
888
|
*
|
865
889
|
* @overload match(text, 0)
|
866
890
|
* Returns either true or false indicating whether a
|
@@ -907,11 +931,7 @@ extern "C" {
|
|
907
931
|
}
|
908
932
|
|
909
933
|
if (n == 0) {
|
910
|
-
|
911
|
-
matched = p->pattern->Match(StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
|
912
|
-
#else
|
913
|
-
matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED, 0, 0);
|
914
|
-
#endif
|
934
|
+
matched = match(p->pattern, StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
|
915
935
|
return BOOL2RUBY(matched);
|
916
936
|
} else {
|
917
937
|
|
@@ -931,11 +951,7 @@ extern "C" {
|
|
931
951
|
|
932
952
|
m->number_of_matches = n;
|
933
953
|
|
934
|
-
|
935
|
-
matched = p->pattern->Match(StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, m->matches, n);
|
936
|
-
#else
|
937
|
-
matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED, m->matches, n);
|
938
|
-
#endif
|
954
|
+
matched = match(p->pattern, StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, m->matches, n);
|
939
955
|
|
940
956
|
if (matched) {
|
941
957
|
return matchdata;
|
@@ -968,12 +984,13 @@ extern "C" {
|
|
968
984
|
* @param [String] str the string to modify
|
969
985
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
970
986
|
* @param [String] rewrite the string to replace with
|
987
|
+
* @return [String] the resulting string
|
971
988
|
* @example
|
972
|
-
* RE2
|
989
|
+
* RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
|
973
990
|
* re2 = RE2.new("hel+o")
|
974
|
-
* RE2
|
991
|
+
* RE2.Replace("hello there", re2, "yo") #=> "yo there"
|
975
992
|
* text = "Good morning"
|
976
|
-
* RE2
|
993
|
+
* RE2.Replace(text, "morn", "even") #=> "Good evening"
|
977
994
|
* text #=> "Good evening"
|
978
995
|
*/
|
979
996
|
static VALUE
|
@@ -1017,12 +1034,13 @@ extern "C" {
|
|
1017
1034
|
* @param [String] str the string to modify
|
1018
1035
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1019
1036
|
* @param [String] rewrite the string to replace with
|
1037
|
+
* @return [String] the resulting string
|
1020
1038
|
* @example
|
1021
|
-
* RE2
|
1039
|
+
* RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
|
1022
1040
|
* re2 = RE2.new("oo?")
|
1023
|
-
* RE2
|
1041
|
+
* RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
|
1024
1042
|
* text = "Good morning"
|
1025
|
-
* RE2
|
1043
|
+
* RE2.GlobalReplace(text, "o", "ee") #=> "Geeeed meerning"
|
1026
1044
|
* text #=> "Geeeed meerning"
|
1027
1045
|
*/
|
1028
1046
|
static VALUE
|