re2 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -30,33 +30,77 @@ Usage
30
30
 
31
31
  You can use re2 as a mostly drop-in replacement for Ruby's own [Regexp][] and [MatchData][] classes:
32
32
 
33
- $ irb -rubygems
34
- > require 're2'
35
- > r = RE2::Regexp.compile('w(\d)(\d+)')
36
- => #<RE2::Regexp /w(\d)(\d+)/>
37
- > m = r.match("w1234")
38
- => #<RE2::MatchData "w1234" 1:"1" 2:"234">
39
- > m[1]
40
- => "1"
41
- > m.string
42
- => "w1234"
43
- > r =~ "w1234"
44
- => true
45
- > r !~ "bob"
46
- => true
47
- > r.match("bob")
48
- => nil
33
+ ```console
34
+ $ irb -rubygems
35
+ > require 're2'
36
+ > r = RE2::Regexp.new('w(\d)(\d+)')
37
+ => #<RE2::Regexp /w(\d)(\d+)/>
38
+ > m = r.match("w1234")
39
+ => #<RE2::MatchData "w1234" 1:"1" 2:"234">
40
+ > m[1]
41
+ => "1"
42
+ > m.string
43
+ => "w1234"
44
+ > r =~ "w1234"
45
+ => true
46
+ > r !~ "bob"
47
+ => true
48
+ > r.match("bob")
49
+ => nil
50
+ ```
51
+
52
+ As `RE2::Regexp.new` (or `RE2::Regexp.compile`) can be quite verbose, a helper method has been defined against `Kernel` so you can use a shorter version to create regular expressions:
53
+
54
+ ```console
55
+ > RE2('(\d+)')
56
+ => #<RE2::Regexp /(\d+)/>
57
+ ```
58
+
59
+ Note the use of *single quotes* as double quotes will interpret `\d` as `d` as in the following example:
60
+
61
+ ```console
62
+ > RE2("(\d+)")
63
+ => #<RE2::Regexp /(d+)/>
64
+ ```
49
65
 
50
66
  As of 0.3.0, you can use named groups:
51
67
 
52
- > r = RE2::Regexp.compile('(?P<name>\w+) (?P<age>\d+)')
53
- => #<RE2::Regexp /(?P<name>\w+) (?P<age>\d+)/>
54
- > m = r.match("Bob 40")
55
- => #<RE2::MatchData "Bob 40" 1:"Bob" 2:"40">
56
- > m[:name]
57
- => "Bob"
58
- > m["age"]
59
- => "40"
68
+ ```console
69
+ > r = RE2::Regexp.new('(?P<name>\w+) (?P<age>\d+)')
70
+ => #<RE2::Regexp /(?P<name>\w+) (?P<age>\d+)/>
71
+ > m = r.match("Bob 40")
72
+ => #<RE2::MatchData "Bob 40" 1:"Bob" 2:"40">
73
+ > m[:name]
74
+ => "Bob"
75
+ > m["age"]
76
+ => "40"
77
+ ```
78
+
79
+ As of 0.4.0, you can mix `RE2::String` into strings to provide helpers from the opposite direction:
80
+
81
+ ```console
82
+ > require "re2/string"
83
+ > string = "My name is Robert Paulson"
84
+ => "My name is Robert Paulson"
85
+ > string.extend(RE2::String)
86
+ => "My name is Robert Paulson"
87
+ > string.re2_sub("Robert", "Dave")
88
+ => "My name is Dave Paulson"
89
+ > string.re2_gsub("a", "e")
90
+ => "My neme is Deve Peulson"
91
+ > string.re2_match('D(\S+)')
92
+ => #<RE2::MatchData "Deve" 1:"eve">
93
+ > string.re2_escape
94
+ => "My\\ neme\\ is\\ Deve\\ Peulson"
95
+ ```
96
+
97
+ If you want these available to all strings, you can reopen `String` like so:
98
+
99
+ ```ruby
100
+ class String
101
+ include RE2::String
102
+ end
103
+ ```
60
104
 
61
105
  Features
62
106
  --------
@@ -75,11 +119,11 @@ Features
75
119
 
76
120
  * Checking the options for an expression with `re2.options` or individually with `re2.case_sensitive?`
77
121
 
78
- * Performing in-place replacement with [`RE2::Replace(str, pattern, replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#335)
122
+ * Performing in-place replacement with [`RE2.Replace(str, pattern, replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#335)
79
123
 
80
- * Performing in-place global replacement with [`RE2::GlobalReplace(str, pattern, replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#352)
124
+ * Performing in-place global replacement with [`RE2.GlobalReplace(str, pattern, replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#352)
81
125
 
82
- * Escaping regular expressions with [`RE2::Regexp.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377), `RE2::Regexp.quote(unquoted)` or `RE2::QuoteMeta(unquoted)`
126
+ * Escaping regular expressions with [`RE2::Regexp.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377), `RE2::Regexp.quote(unquoted)` or `RE2.QuoteMeta(unquoted)`
83
127
 
84
128
  Contact
85
129
  -------
data/Rakefile CHANGED
@@ -1,25 +1,17 @@
1
- begin
2
- require 'rake/extensiontask'
3
- require 'rake/testtask'
4
- rescue LoadError
5
- require 'rubygems'
6
- require 'rake/extensiontask'
7
- require 'rake/testtask'
8
- end
1
+ require 'rake/extensiontask'
2
+ require 'rake/testtask'
9
3
 
10
4
  Rake::ExtensionTask.new('re2') do |e|
11
5
  # e.config_options << "--with-re2-dir=/opt/local/re2"
12
6
  end
13
7
 
14
8
  Rake::TestTask.new do |t|
15
- t.test_files = FileList["test/*_test.rb"]
9
+ t.libs << "spec"
10
+ t.test_files = FileList["spec/**/*_spec.rb"]
16
11
  t.verbose = true
17
12
  end
18
13
 
19
- task :valgrind do
20
- system "valgrind --tool=memcheck --leak-check=full --show-reachable=no --num-callers=15 --track-fds=yes --workaround-gcc296-bugs=yes --max-stackframe=7304328 --dsymutil=yes --track-origins=yes --log-file=report.txt ruby test/leak.rb"
21
- end
22
-
23
- task :test => :compile
14
+ task :test => :compile
15
+ task :spec => :test
24
16
  task :default => :test
25
17
 
@@ -1,7 +1,7 @@
1
1
  # re2 (http://github.com/mudge/re2)
2
2
  # Ruby bindings to re2, an "efficient, principled regular expression library"
3
3
  #
4
- # Copyright (c) 2010, Paul Mucur (http://mucur.name)
4
+ # Copyright (c) 2010-2012, Paul Mucur (http://mudge.name)
5
5
  # Released under the BSD Licence, please see LICENSE.txt
6
6
 
7
7
  require 'mkmf'
@@ -2,12 +2,13 @@
2
2
  * re2 (http://github.com/mudge/re2)
3
3
  * Ruby bindings to re2, an "efficient, principled regular expression library"
4
4
  *
5
- * Copyright (c) 2010, Paul Mucur (http://mucur.name)
5
+ * Copyright (c) 2010-2012, Paul Mucur (http://mudge.name)
6
6
  * Released under the BSD Licence, please see LICENSE.txt
7
7
  */
8
8
 
9
9
  #include <re2/re2.h>
10
10
  #include <string>
11
+ #include <sstream>
11
12
  using namespace std;
12
13
 
13
14
  extern "C" {
@@ -17,12 +18,20 @@ extern "C" {
17
18
  #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
18
19
  #define UNUSED(x) ((void)x)
19
20
 
20
- #if !defined(RSTRING_LEN)
21
- # define RSTRING_LEN(x) (RSTRING(x)->len)
21
+ #ifndef RSTRING_LEN
22
+ #define RSTRING_LEN(x) (RSTRING(x)->len)
22
23
  #endif
23
24
 
24
- #if !defined(RSTRING_PTR)
25
- # define RSTRING_PTR(x) (RSTRING(x)->ptr)
25
+ #ifndef RSTRING_PTR
26
+ #define RSTRING_PTR(x) (RSTRING(x)->ptr)
27
+ #endif
28
+
29
+ #ifdef HAVE_ENDPOS_ARGUMENT
30
+ #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
31
+ (pattern->Match(text, startpos, endpos, anchor, match, nmatch))
32
+ #else
33
+ #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
34
+ (pattern->Match(text, startpos, anchor, match, nmatch))
26
35
  #endif
27
36
 
28
37
  typedef struct {
@@ -77,7 +86,7 @@ extern "C" {
77
86
  *
78
87
  * @return [String] a frozen copy of the passed string.
79
88
  * @example
80
- * m = RE2('(\d+)').match("bob 123")
89
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
81
90
  * m.string #=> "bob 123"
82
91
  */
83
92
  static VALUE
@@ -94,7 +103,7 @@ extern "C" {
94
103
  *
95
104
  * @return [Fixnum] the number of elements
96
105
  * @example
97
- * m = RE2('(\d+)').match("bob 123")
106
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
98
107
  * m.size #=> 2
99
108
  * m.length #=> 2
100
109
  */
@@ -112,7 +121,7 @@ extern "C" {
112
121
  *
113
122
  * @return [RE2::Regexp] the regexp used in the match
114
123
  * @example
115
- * m = RE2('(\d+)').match("bob 123")
124
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
116
125
  * m.regexp #=> #<RE2::Regexp /(\d+)/>
117
126
  */
118
127
  static VALUE
@@ -135,7 +144,7 @@ extern "C" {
135
144
  *
136
145
  * @return [Array<String, nil>] the array of matches
137
146
  * @example
138
- * m = RE2('(\d+)').match("bob 123")
147
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
139
148
  * m.to_a #=> ["123", "123"]
140
149
  */
141
150
  static VALUE
@@ -144,9 +153,11 @@ extern "C" {
144
153
  int i;
145
154
  re2_matchdata *m;
146
155
  re2::StringPiece match;
156
+ VALUE array;
147
157
 
148
158
  Data_Get_Struct(self, re2_matchdata, m);
149
- VALUE array = rb_ary_new2(m->number_of_matches);
159
+
160
+ array = rb_ary_new2(m->number_of_matches);
150
161
  for (i = 0; i < m->number_of_matches; i++) {
151
162
  if (m->matches[i].empty()) {
152
163
  rb_ary_push(array, Qnil);
@@ -155,6 +166,7 @@ extern "C" {
155
166
  rb_ary_push(array, rb_str_new(match.data(), match.size()));
156
167
  }
157
168
  }
169
+
158
170
  return array;
159
171
  }
160
172
 
@@ -207,7 +219,7 @@ extern "C" {
207
219
  * @param [Fixnum] index the index of the match to fetch
208
220
  * @return [String, nil] the specified match
209
221
  * @example
210
- * m = RE2('(\d+)').match("bob 123")
222
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
211
223
  * m[0] #=> "123"
212
224
  *
213
225
  * @overload [](start, length)
@@ -217,7 +229,7 @@ extern "C" {
217
229
  * @param [Fixnum] length the number of elements to fetch
218
230
  * @return [Array<String, nil>] the specified matches
219
231
  * @example
220
- * m = RE2('(\d+)').match("bob 123")
232
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
221
233
  * m[0, 1] #=> ["123"]
222
234
  *
223
235
  * @overload [](range)
@@ -226,7 +238,7 @@ extern "C" {
226
238
  * @param [Range] range the range of match indexes to fetch
227
239
  * @return [Array<String, nil>] the specified matches
228
240
  * @example
229
- * m = RE2('(\d+)').match("bob 123")
241
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
230
242
  * m[0..1] #=> "[123", "123"]
231
243
  *
232
244
  * @overload [](name)
@@ -235,7 +247,7 @@ extern "C" {
235
247
  * @param [String, Symbol] name the name of the match to fetch
236
248
  * @return [String, nil] the specific match
237
249
  * @example
238
- * m = RE2('(?P<number>\d+)').match("bob 123")
250
+ * m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
239
251
  * m["number"] #=> "123"
240
252
  * m[:number] #=> "123"
241
253
  */
@@ -272,7 +284,7 @@ extern "C" {
272
284
  *
273
285
  * @return [String] a printable version of the match
274
286
  * @example
275
- * m = RE2('(\d+)').match("bob 123")
287
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
276
288
  * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
277
289
  */
278
290
  static VALUE
@@ -281,32 +293,33 @@ extern "C" {
281
293
  int i;
282
294
  re2_matchdata *m;
283
295
  VALUE match, result;
296
+ ostringstream output;
284
297
 
285
298
  Data_Get_Struct(self, re2_matchdata, m);
286
299
 
287
300
  result = rb_str_new("#<RE2::MatchData", 16);
288
301
 
302
+ output << "#<RE2::MatchData";
303
+
289
304
  for (i = 0; i < m->number_of_matches; i++) {
290
- rb_str_cat(result, " ", 1);
305
+ output << " ";
291
306
 
292
307
  if (i > 0) {
293
- char buf[sizeof(i)*3+1];
294
- snprintf(buf, sizeof(buf), "%d", i);
295
- rb_str_cat2(result, buf);
296
- rb_str_cat(result, ":", 1);
308
+ output << i << ":";
297
309
  }
298
310
 
299
311
  match = re2_matchdata_nth_match(i, self);
300
312
 
301
313
  if (match == Qnil) {
302
- rb_str_cat(result, "nil", 3);
314
+ output << "nil";
303
315
  } else {
304
- rb_str_cat(result, "\"", 1);
305
- rb_str_cat(result, RSTRING_PTR(match), RSTRING_LEN(match));
306
- rb_str_cat(result, "\"", 1);
316
+ output << "\"" << StringValuePtr(match) << "\"";
307
317
  }
308
318
  }
309
- rb_str_cat(result, ">", 1);
319
+
320
+ output << ">";
321
+
322
+ result = rb_str_new(output.str().data(), output.str().length());
310
323
 
311
324
  return result;
312
325
  }
@@ -459,11 +472,14 @@ extern "C" {
459
472
  re2_regexp_inspect(VALUE self)
460
473
  {
461
474
  re2_pattern *p;
462
- VALUE result = rb_str_new("#<RE2::Regexp /", 15);
475
+ VALUE result;
476
+ ostringstream output;
463
477
 
464
478
  Data_Get_Struct(self, re2_pattern, p);
465
- rb_str_cat(result, p->pattern->pattern().data(), p->pattern->pattern().size());
466
- rb_str_cat(result, "/>", 2);
479
+
480
+ output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
481
+
482
+ result = rb_str_new(output.str().data(), output.str().length());
467
483
 
468
484
  return result;
469
485
  }
@@ -706,30 +722,38 @@ extern "C" {
706
722
 
707
723
  /*
708
724
  * If the RE2 could not be created properly, returns an
709
- * error string.
725
+ * error string otherwise returns nil.
710
726
  *
711
- * @return [String] the error string
727
+ * @return [String, nil] the error string or nil
712
728
  */
713
729
  static VALUE
714
730
  re2_regexp_error(VALUE self)
715
731
  {
716
732
  re2_pattern *p;
717
733
  Data_Get_Struct(self, re2_pattern, p);
718
- return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
734
+ if (p->pattern->ok()) {
735
+ return Qnil;
736
+ } else {
737
+ return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
738
+ }
719
739
  }
720
740
 
721
741
  /*
722
742
  * If the RE2 could not be created properly, returns
723
- * the offending portion of the regexp.
743
+ * the offending portion of the regexp otherwise returns nil.
724
744
  *
725
- * @return [String] the offending portion of the regexp
745
+ * @return [String, nil] the offending portion of the regexp or nil
726
746
  */
727
747
  static VALUE
728
748
  re2_regexp_error_arg(VALUE self)
729
749
  {
730
750
  re2_pattern *p;
731
751
  Data_Get_Struct(self, re2_pattern, p);
732
- return rb_str_new(p->pattern->error_arg().data(), p->pattern->error_arg().size());
752
+ if (p->pattern->ok()) {
753
+ return Qnil;
754
+ } else {
755
+ return rb_str_new(p->pattern->error_arg().data(), p->pattern->error_arg().size());
756
+ }
733
757
  }
734
758
 
735
759
  /*
@@ -860,7 +884,7 @@ extern "C" {
860
884
  * @raise [NoMemoryError] if there was not enough memory to allocate the matches
861
885
  * @example
862
886
  * r = RE2::Regexp.new('w(o)(o)')
863
- * r.match('woo) #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
887
+ * r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
864
888
  *
865
889
  * @overload match(text, 0)
866
890
  * Returns either true or false indicating whether a
@@ -907,11 +931,7 @@ extern "C" {
907
931
  }
908
932
 
909
933
  if (n == 0) {
910
- #if defined(HAVE_ENDPOS_ARGUMENT)
911
- matched = p->pattern->Match(StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
912
- #else
913
- matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED, 0, 0);
914
- #endif
934
+ matched = match(p->pattern, StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
915
935
  return BOOL2RUBY(matched);
916
936
  } else {
917
937
 
@@ -931,11 +951,7 @@ extern "C" {
931
951
 
932
952
  m->number_of_matches = n;
933
953
 
934
- #if defined(HAVE_ENDPOS_ARGUMENT)
935
- matched = p->pattern->Match(StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, m->matches, n);
936
- #else
937
- matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED, m->matches, n);
938
- #endif
954
+ matched = match(p->pattern, StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, m->matches, n);
939
955
 
940
956
  if (matched) {
941
957
  return matchdata;
@@ -968,12 +984,13 @@ extern "C" {
968
984
  * @param [String] str the string to modify
969
985
  * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
970
986
  * @param [String] rewrite the string to replace with
987
+ * @return [String] the resulting string
971
988
  * @example
972
- * RE2::Replace("hello there", "hello", "howdy") #=> "howdy there"
989
+ * RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
973
990
  * re2 = RE2.new("hel+o")
974
- * RE2::Replace("hello there", re2, "yo") #=> "yo there"
991
+ * RE2.Replace("hello there", re2, "yo") #=> "yo there"
975
992
  * text = "Good morning"
976
- * RE2::Replace(text, "morn", "even") #=> "Good evening"
993
+ * RE2.Replace(text, "morn", "even") #=> "Good evening"
977
994
  * text #=> "Good evening"
978
995
  */
979
996
  static VALUE
@@ -1017,12 +1034,13 @@ extern "C" {
1017
1034
  * @param [String] str the string to modify
1018
1035
  * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1019
1036
  * @param [String] rewrite the string to replace with
1037
+ * @return [String] the resulting string
1020
1038
  * @example
1021
- * RE2::GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
1039
+ * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
1022
1040
  * re2 = RE2.new("oo?")
1023
- * RE2::GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
1041
+ * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
1024
1042
  * text = "Good morning"
1025
- * RE2::GlobalReplace(text, "o", "ee") #=> "Geeeed meerning"
1043
+ * RE2.GlobalReplace(text, "o", "ee") #=> "Geeeed meerning"
1026
1044
  * text #=> "Geeeed meerning"
1027
1045
  */
1028
1046
  static VALUE