re2 0.6.1 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 44eacddc1b64782c61eff633b15f40db1ba3b9d9
4
- data.tar.gz: 377ae25429bb8804c03a951d0150097d11cb808e
3
+ metadata.gz: 32a2347d40362a3014dfd03f948405964468a3c2
4
+ data.tar.gz: ea15968dae15568249200c6ba91bad66d0e28f6f
5
5
  SHA512:
6
- metadata.gz: 1f09c136bb17bfabfe739882cc61c7f0bf50dfba140aec66db0a6e2962de0d25e844804812137b5c3e798a61bb2b8002d83fc4d7766bc7fc52ce159535634b4e
7
- data.tar.gz: f9742540b13859de929a299ac28a646d305e4a72bcffdd0588dad268f53cb0135f440a1f914cd7504a3b01eb5bf9c569c54c83cfaa4ba0b7f131326ef01a0212
6
+ metadata.gz: 00588950e6c82fb5720043c04d0f833d819f3107a3c953f2284d9eaede74210c0c803f34b3e9fd89413c886fae1e4b853b5a85b18b3c2d788ff0a7e8b46036cf
7
+ data.tar.gz: af2d23503d089a15ea6dd32e00bc80622cd7b0b1773ed471edcc982ac6791a6d1bf0bf5332d2440446796b5a4da15a169fd7396e3cf729af8a300e52a5985bad
data/README.md CHANGED
@@ -4,7 +4,7 @@ re2 [![Build Status](https://travis-ci.org/mudge/re2.svg?branch=master)](http://
4
4
  A Ruby binding to [re2][], an "efficient, principled regular expression
5
5
  library".
6
6
 
7
- **Current version:** 0.6.1
7
+ **Current version:** 0.7.0
8
8
  **Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0.0, 2.1.0, Rubinius 2.2
9
9
 
10
10
  Installation
@@ -33,7 +33,7 @@ Documentation
33
33
  -------------
34
34
 
35
35
  Full documentation automatically generated from the latest version is
36
- available at <http://rubydoc.info/github/mudge/re2>.
36
+ available at <http://mudge.name/re2/>.
37
37
 
38
38
  Bear in mind that re2's regular expression syntax differs from PCRE, see the
39
39
  [official syntax page][] for more details.
@@ -55,6 +55,10 @@ $ irb -rubygems
55
55
  => "1"
56
56
  > m.string
57
57
  => "w1234"
58
+ > m.begin(1)
59
+ => 1
60
+ > m.end(1)
61
+ => 2
58
62
  > r =~ "w1234"
59
63
  => true
60
64
  > r !~ "bob"
@@ -151,6 +155,12 @@ Features
151
155
  [`RE2.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377) and
152
156
  `RE2.quote(unquoted)`
153
157
 
158
+ Contributions
159
+ -------------
160
+
161
+ Thanks to [Jason Woods](https://github.com/driskell) who contributed the
162
+ original implementations of `RE2::MatchData#begin` and `RE2::MatchData#end`.
163
+
154
164
  Contact
155
165
  -------
156
166
 
data/Rakefile CHANGED
@@ -9,10 +9,6 @@ Rake::TestTask.new do |t|
9
9
  t.verbose = true
10
10
  end
11
11
 
12
- task :valgrind do
13
- system "valgrind --tool=memcheck --leak-check=full --show-reachable=no --num-callers=15 --track-fds=yes --workaround-gcc296-bugs=yes --max-stackframe=7304328 --dsymutil=yes --track-origins=yes --log-file=report.txt ruby spec/leak.rb"
14
- end
15
-
16
12
  task :test => :compile
17
13
  task :spec => :test
18
14
  task :default => :test
@@ -11,6 +11,9 @@ incl, lib = dir_config("re2", "/usr/local/include", "/usr/local/lib")
11
11
  $CFLAGS << " -Wall -Wextra -funroll-loops"
12
12
 
13
13
  have_library("stdc++")
14
+ have_header("stdint.h")
15
+ have_func("rb_str_sublen")
16
+
14
17
  if have_library("re2")
15
18
 
16
19
  # Determine which version of re2 the user has installed.
@@ -8,6 +8,7 @@
8
8
 
9
9
  #include <re2/re2.h>
10
10
  #include <ruby.h>
11
+ #include <stdint.h>
11
12
  #include <string>
12
13
  #include <sstream>
13
14
  #include <vector>
@@ -17,6 +18,17 @@ using std::nothrow;
17
18
  using std::map;
18
19
  using std::vector;
19
20
 
21
+ #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
22
+ #define UNUSED(x) ((void)x)
23
+
24
+ #ifndef RSTRING_LEN
25
+ #define RSTRING_LEN(x) (RSTRING(x)->len)
26
+ #endif
27
+
28
+ #ifndef RSTRING_PTR
29
+ #define RSTRING_PTR(x) (RSTRING(x)->ptr)
30
+ #endif
31
+
20
32
  #ifdef HAVE_RUBY_ENCODING_H
21
33
  #include <ruby/encoding.h>
22
34
  #define ENCODED_STR_NEW(str, length, encoding) \
@@ -40,15 +52,20 @@ using std::vector;
40
52
  rb_str_new((const char *)str, (long)length)
41
53
  #endif
42
54
 
43
- #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
44
- #define UNUSED(x) ((void)x)
45
-
46
- #ifndef RSTRING_LEN
47
- #define RSTRING_LEN(x) (RSTRING(x)->len)
48
- #endif
49
-
50
- #ifndef RSTRING_PTR
51
- #define RSTRING_PTR(x) (RSTRING(x)->ptr)
55
+ #ifdef HAVE_RB_STR_SUBLEN
56
+ #define ENCODED_STR_SUBLEN(str, offset, encoding) \
57
+ LONG2NUM(rb_str_sublen(str, offset))
58
+ #else
59
+ #ifdef HAVE_RUBY_ENCODING_H
60
+ #define ENCODED_STR_SUBLEN(str, offset, encoding) \
61
+ ({ \
62
+ VALUE _string = ENCODED_STR_NEW(RSTRING_PTR(str), offset, encoding); \
63
+ rb_str_length(_string); \
64
+ })
65
+ #else
66
+ #define ENCODED_STR_SUBLEN(str, offset, encoding) \
67
+ LONG2NUM(offset)
68
+ #endif
52
69
  #endif
53
70
 
54
71
  #ifdef HAVE_ENDPOS_ARGUMENT
@@ -223,6 +240,49 @@ static VALUE re2_scanner_scan(VALUE self) {
223
240
  return result;
224
241
  }
225
242
 
243
+ /*
244
+ * Retrieve a matchdata by index or name.
245
+ */
246
+ re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
247
+ int id;
248
+ re2_matchdata *m;
249
+ re2_pattern *p;
250
+ map<string, int> groups;
251
+ string name;
252
+ re2::StringPiece *match;
253
+
254
+ Data_Get_Struct(self, re2_matchdata, m);
255
+ Data_Get_Struct(m->regexp, re2_pattern, p);
256
+
257
+ if (FIXNUM_P(idx)) {
258
+ id = FIX2INT(idx);
259
+ } else {
260
+ if (SYMBOL_P(idx)) {
261
+ name = rb_id2name(SYM2ID(idx));
262
+ } else {
263
+ name = StringValuePtr(idx);
264
+ }
265
+
266
+ groups = p->pattern->NamedCapturingGroups();
267
+
268
+ if (groups.count(name) == 1) {
269
+ id = groups[name];
270
+ } else {
271
+ return NULL;
272
+ }
273
+ }
274
+
275
+ if (id >= 0 && id < m->number_of_matches) {
276
+ match = &m->matches[id];
277
+
278
+ if (!match->empty()) {
279
+ return match;
280
+ }
281
+ }
282
+
283
+ return NULL;
284
+ }
285
+
226
286
  /*
227
287
  * Returns the number of elements in the match array (including nils).
228
288
  *
@@ -239,6 +299,67 @@ static VALUE re2_matchdata_size(VALUE self) {
239
299
  return INT2FIX(m->number_of_matches);
240
300
  }
241
301
 
302
+ /*
303
+ * Returns the offset of the start of the nth element of the matchdata.
304
+ *
305
+ * @param [Fixnum, String, Symbol] n the name or number of the match
306
+ * @return [Fixnum] the offset of the start of the match
307
+ * @example
308
+ * m = RE2::Regexp.new('ob (\d+)').match("bob 123")
309
+ * m.begin(0) #=> 1
310
+ * m.begin(1) #=> 4
311
+ */
312
+ static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
313
+ re2_matchdata *m;
314
+ re2_pattern *p;
315
+ re2::StringPiece *match;
316
+ long offset;
317
+
318
+ Data_Get_Struct(self, re2_matchdata, m);
319
+ Data_Get_Struct(m->regexp, re2_pattern, p);
320
+
321
+ match = re2_matchdata_find_match(n, self);
322
+ if (match == NULL) {
323
+ return Qnil;
324
+ } else {
325
+ offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text));
326
+
327
+ return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
328
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
329
+ }
330
+ }
331
+
332
+ /*
333
+ * Returns the offset of the character following the end of the nth element of the matchdata.
334
+ *
335
+ * @param [Fixnum, String, Symbol] n the name or number of the match
336
+ * @return [Fixnum] the offset of the character following the end of the match
337
+ * @example
338
+ * m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob")
339
+ * m.end(0) #=> 9
340
+ * m.end(1) #=> 7
341
+ */
342
+ static VALUE re2_matchdata_end(VALUE self, VALUE n) {
343
+ re2_matchdata *m;
344
+ re2_pattern *p;
345
+ re2::StringPiece *match;
346
+ long offset;
347
+
348
+ Data_Get_Struct(self, re2_matchdata, m);
349
+ Data_Get_Struct(m->regexp, re2_pattern, p);
350
+
351
+ match = re2_matchdata_find_match(n, self);
352
+
353
+ if (match == NULL) {
354
+ return Qnil;
355
+ } else {
356
+ offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text)) + match->size();
357
+
358
+ return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
359
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
360
+ }
361
+ }
362
+
242
363
  /*
243
364
  * Returns the {RE2::Regexp} used in the match.
244
365
  *
@@ -1050,8 +1171,8 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1050
1171
 
1051
1172
  m->number_of_matches = n;
1052
1173
 
1053
- matched = match(p->pattern, StringValuePtr(text), 0,
1054
- static_cast<int>(RSTRING_LEN(text)),
1174
+ matched = match(p->pattern, StringValuePtr(m->text), 0,
1175
+ static_cast<int>(RSTRING_LEN(m->text)),
1055
1176
  RE2::UNANCHORED, m->matches, n);
1056
1177
 
1057
1178
  if (matched) {
@@ -1216,6 +1337,10 @@ void Init_re2(void) {
1216
1337
  RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1217
1338
  rb_define_method(re2_cMatchData, "length",
1218
1339
  RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1340
+ rb_define_method(re2_cMatchData, "begin",
1341
+ RUBY_METHOD_FUNC(re2_matchdata_begin), 1);
1342
+ rb_define_method(re2_cMatchData, "end",
1343
+ RUBY_METHOD_FUNC(re2_matchdata_end), 1);
1219
1344
  rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
1220
1345
  -1); rb_define_method(re2_cMatchData, "to_s",
1221
1346
  RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+
1
3
  require "spec_helper"
2
4
 
3
5
  describe RE2::MatchData do
@@ -69,6 +71,11 @@ describe RE2::MatchData do
69
71
  md[:missing].must_be_nil
70
72
  end
71
73
 
74
+ it "raises an error if given an inappropriate index" do
75
+ md = RE2::Regexp.new('(\d+)').match("bob 123")
76
+ lambda { md[nil] }.must_raise(TypeError)
77
+ end
78
+
72
79
  if String.method_defined?(:encoding)
73
80
  it "returns UTF-8 encoded strings by default" do
74
81
  md = RE2::Regexp.new('(?P<name>\S+)').match("bob")
@@ -154,4 +161,48 @@ describe RE2::MatchData do
154
161
  m3.must_equal("56")
155
162
  end
156
163
  end
164
+
165
+ describe "#begin" do
166
+ it "returns the offset of the start of a match by index" do
167
+ md = RE2::Regexp.new('(wo{2})').match('a woohoo')
168
+ md.string[md.begin(0)..-1].must_equal('woohoo')
169
+ end
170
+
171
+ it "returns the offset of the start of a match by string name" do
172
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
173
+ md.string[md.begin('foo')..-1].must_equal('foobar')
174
+ end
175
+
176
+ it "returns the offset of the start of a match by symbol name" do
177
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
178
+ md.string[md.begin(:foo)..-1].must_equal('foobar')
179
+ end
180
+
181
+ it "returns the offset despite multibyte characters" do
182
+ md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
183
+ md.string[md.begin(0)..-1].must_equal('Ruby')
184
+ end
185
+ end
186
+
187
+ describe "#end" do
188
+ it "returns the offset of the character following the end of a match" do
189
+ md = RE2::Regexp.new('(wo{2})').match('a woohoo')
190
+ md.string[0...md.end(0)].must_equal('a woo')
191
+ end
192
+
193
+ it "returns the offset of a match by string name" do
194
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
195
+ md.string[0...md.end('foo')].must_equal('a foo')
196
+ end
197
+
198
+ it "returns the offset of a match by symbol name" do
199
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
200
+ md.string[0...md.end(:foo)].must_equal('a foo')
201
+ end
202
+
203
+ it "returns the offset despite multibyte characters" do
204
+ md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
205
+ md.string[0...md.end(0)].must_equal('I ♥ Ruby')
206
+ end
207
+ end
157
208
  end
@@ -11,6 +11,10 @@ describe RE2::Regexp do
11
11
  re = RE2::Regexp.new('woo', :case_sensitive => false)
12
12
  re.must_be_instance_of(RE2::Regexp)
13
13
  end
14
+
15
+ it "raises an error if given an inappropriate type" do
16
+ lambda { RE2::Regexp.new(nil) }.must_raise(TypeError)
17
+ end
14
18
  end
15
19
 
16
20
  describe "#compile" do
@@ -268,6 +272,10 @@ describe RE2::Regexp do
268
272
  lambda { re.match(nil) }.must_raise(TypeError)
269
273
  end
270
274
 
275
+ it "raises an exception when given an inappropriate number of matches" do
276
+ lambda { re.match("My name is Robert Paulson", {}) }.must_raise(TypeError)
277
+ end
278
+
271
279
  describe "with a specific number of matches under the total in the pattern" do
272
280
  subject { re.match("My name is Robert Paulson", 1) }
273
281
 
@@ -77,4 +77,3 @@ describe RE2 do
77
77
  end
78
78
  end
79
79
  end
80
-
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Mucur
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-25 00:00:00.000000000 Z
11
+ date: 2015-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler