re2 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 44eacddc1b64782c61eff633b15f40db1ba3b9d9
4
- data.tar.gz: 377ae25429bb8804c03a951d0150097d11cb808e
3
+ metadata.gz: 32a2347d40362a3014dfd03f948405964468a3c2
4
+ data.tar.gz: ea15968dae15568249200c6ba91bad66d0e28f6f
5
5
  SHA512:
6
- metadata.gz: 1f09c136bb17bfabfe739882cc61c7f0bf50dfba140aec66db0a6e2962de0d25e844804812137b5c3e798a61bb2b8002d83fc4d7766bc7fc52ce159535634b4e
7
- data.tar.gz: f9742540b13859de929a299ac28a646d305e4a72bcffdd0588dad268f53cb0135f440a1f914cd7504a3b01eb5bf9c569c54c83cfaa4ba0b7f131326ef01a0212
6
+ metadata.gz: 00588950e6c82fb5720043c04d0f833d819f3107a3c953f2284d9eaede74210c0c803f34b3e9fd89413c886fae1e4b853b5a85b18b3c2d788ff0a7e8b46036cf
7
+ data.tar.gz: af2d23503d089a15ea6dd32e00bc80622cd7b0b1773ed471edcc982ac6791a6d1bf0bf5332d2440446796b5a4da15a169fd7396e3cf729af8a300e52a5985bad
data/README.md CHANGED
@@ -4,7 +4,7 @@ re2 [![Build Status](https://travis-ci.org/mudge/re2.svg?branch=master)](http://
4
4
  A Ruby binding to [re2][], an "efficient, principled regular expression
5
5
  library".
6
6
 
7
- **Current version:** 0.6.1
7
+ **Current version:** 0.7.0
8
8
  **Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0.0, 2.1.0, Rubinius 2.2
9
9
 
10
10
  Installation
@@ -33,7 +33,7 @@ Documentation
33
33
  -------------
34
34
 
35
35
  Full documentation automatically generated from the latest version is
36
- available at <http://rubydoc.info/github/mudge/re2>.
36
+ available at <http://mudge.name/re2/>.
37
37
 
38
38
  Bear in mind that re2's regular expression syntax differs from PCRE, see the
39
39
  [official syntax page][] for more details.
@@ -55,6 +55,10 @@ $ irb -rubygems
55
55
  => "1"
56
56
  > m.string
57
57
  => "w1234"
58
+ > m.begin(1)
59
+ => 1
60
+ > m.end(1)
61
+ => 2
58
62
  > r =~ "w1234"
59
63
  => true
60
64
  > r !~ "bob"
@@ -151,6 +155,12 @@ Features
151
155
  [`RE2.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377) and
152
156
  `RE2.quote(unquoted)`
153
157
 
158
+ Contributions
159
+ -------------
160
+
161
+ Thanks to [Jason Woods](https://github.com/driskell) who contributed the
162
+ original implementations of `RE2::MatchData#begin` and `RE2::MatchData#end`.
163
+
154
164
  Contact
155
165
  -------
156
166
 
data/Rakefile CHANGED
@@ -9,10 +9,6 @@ Rake::TestTask.new do |t|
9
9
  t.verbose = true
10
10
  end
11
11
 
12
- task :valgrind do
13
- system "valgrind --tool=memcheck --leak-check=full --show-reachable=no --num-callers=15 --track-fds=yes --workaround-gcc296-bugs=yes --max-stackframe=7304328 --dsymutil=yes --track-origins=yes --log-file=report.txt ruby spec/leak.rb"
14
- end
15
-
16
12
  task :test => :compile
17
13
  task :spec => :test
18
14
  task :default => :test
@@ -11,6 +11,9 @@ incl, lib = dir_config("re2", "/usr/local/include", "/usr/local/lib")
11
11
  $CFLAGS << " -Wall -Wextra -funroll-loops"
12
12
 
13
13
  have_library("stdc++")
14
+ have_header("stdint.h")
15
+ have_func("rb_str_sublen")
16
+
14
17
  if have_library("re2")
15
18
 
16
19
  # Determine which version of re2 the user has installed.
@@ -8,6 +8,7 @@
8
8
 
9
9
  #include <re2/re2.h>
10
10
  #include <ruby.h>
11
+ #include <stdint.h>
11
12
  #include <string>
12
13
  #include <sstream>
13
14
  #include <vector>
@@ -17,6 +18,17 @@ using std::nothrow;
17
18
  using std::map;
18
19
  using std::vector;
19
20
 
21
+ #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
22
+ #define UNUSED(x) ((void)x)
23
+
24
+ #ifndef RSTRING_LEN
25
+ #define RSTRING_LEN(x) (RSTRING(x)->len)
26
+ #endif
27
+
28
+ #ifndef RSTRING_PTR
29
+ #define RSTRING_PTR(x) (RSTRING(x)->ptr)
30
+ #endif
31
+
20
32
  #ifdef HAVE_RUBY_ENCODING_H
21
33
  #include <ruby/encoding.h>
22
34
  #define ENCODED_STR_NEW(str, length, encoding) \
@@ -40,15 +52,20 @@ using std::vector;
40
52
  rb_str_new((const char *)str, (long)length)
41
53
  #endif
42
54
 
43
- #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
44
- #define UNUSED(x) ((void)x)
45
-
46
- #ifndef RSTRING_LEN
47
- #define RSTRING_LEN(x) (RSTRING(x)->len)
48
- #endif
49
-
50
- #ifndef RSTRING_PTR
51
- #define RSTRING_PTR(x) (RSTRING(x)->ptr)
55
+ #ifdef HAVE_RB_STR_SUBLEN
56
+ #define ENCODED_STR_SUBLEN(str, offset, encoding) \
57
+ LONG2NUM(rb_str_sublen(str, offset))
58
+ #else
59
+ #ifdef HAVE_RUBY_ENCODING_H
60
+ #define ENCODED_STR_SUBLEN(str, offset, encoding) \
61
+ ({ \
62
+ VALUE _string = ENCODED_STR_NEW(RSTRING_PTR(str), offset, encoding); \
63
+ rb_str_length(_string); \
64
+ })
65
+ #else
66
+ #define ENCODED_STR_SUBLEN(str, offset, encoding) \
67
+ LONG2NUM(offset)
68
+ #endif
52
69
  #endif
53
70
 
54
71
  #ifdef HAVE_ENDPOS_ARGUMENT
@@ -223,6 +240,49 @@ static VALUE re2_scanner_scan(VALUE self) {
223
240
  return result;
224
241
  }
225
242
 
243
+ /*
244
+ * Retrieve a matchdata by index or name.
245
+ */
246
+ re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
247
+ int id;
248
+ re2_matchdata *m;
249
+ re2_pattern *p;
250
+ map<string, int> groups;
251
+ string name;
252
+ re2::StringPiece *match;
253
+
254
+ Data_Get_Struct(self, re2_matchdata, m);
255
+ Data_Get_Struct(m->regexp, re2_pattern, p);
256
+
257
+ if (FIXNUM_P(idx)) {
258
+ id = FIX2INT(idx);
259
+ } else {
260
+ if (SYMBOL_P(idx)) {
261
+ name = rb_id2name(SYM2ID(idx));
262
+ } else {
263
+ name = StringValuePtr(idx);
264
+ }
265
+
266
+ groups = p->pattern->NamedCapturingGroups();
267
+
268
+ if (groups.count(name) == 1) {
269
+ id = groups[name];
270
+ } else {
271
+ return NULL;
272
+ }
273
+ }
274
+
275
+ if (id >= 0 && id < m->number_of_matches) {
276
+ match = &m->matches[id];
277
+
278
+ if (!match->empty()) {
279
+ return match;
280
+ }
281
+ }
282
+
283
+ return NULL;
284
+ }
285
+
226
286
  /*
227
287
  * Returns the number of elements in the match array (including nils).
228
288
  *
@@ -239,6 +299,67 @@ static VALUE re2_matchdata_size(VALUE self) {
239
299
  return INT2FIX(m->number_of_matches);
240
300
  }
241
301
 
302
+ /*
303
+ * Returns the offset of the start of the nth element of the matchdata.
304
+ *
305
+ * @param [Fixnum, String, Symbol] n the name or number of the match
306
+ * @return [Fixnum] the offset of the start of the match
307
+ * @example
308
+ * m = RE2::Regexp.new('ob (\d+)').match("bob 123")
309
+ * m.begin(0) #=> 1
310
+ * m.begin(1) #=> 4
311
+ */
312
+ static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
313
+ re2_matchdata *m;
314
+ re2_pattern *p;
315
+ re2::StringPiece *match;
316
+ long offset;
317
+
318
+ Data_Get_Struct(self, re2_matchdata, m);
319
+ Data_Get_Struct(m->regexp, re2_pattern, p);
320
+
321
+ match = re2_matchdata_find_match(n, self);
322
+ if (match == NULL) {
323
+ return Qnil;
324
+ } else {
325
+ offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text));
326
+
327
+ return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
328
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
329
+ }
330
+ }
331
+
332
+ /*
333
+ * Returns the offset of the character following the end of the nth element of the matchdata.
334
+ *
335
+ * @param [Fixnum, String, Symbol] n the name or number of the match
336
+ * @return [Fixnum] the offset of the character following the end of the match
337
+ * @example
338
+ * m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob")
339
+ * m.end(0) #=> 9
340
+ * m.end(1) #=> 7
341
+ */
342
+ static VALUE re2_matchdata_end(VALUE self, VALUE n) {
343
+ re2_matchdata *m;
344
+ re2_pattern *p;
345
+ re2::StringPiece *match;
346
+ long offset;
347
+
348
+ Data_Get_Struct(self, re2_matchdata, m);
349
+ Data_Get_Struct(m->regexp, re2_pattern, p);
350
+
351
+ match = re2_matchdata_find_match(n, self);
352
+
353
+ if (match == NULL) {
354
+ return Qnil;
355
+ } else {
356
+ offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text)) + match->size();
357
+
358
+ return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
359
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
360
+ }
361
+ }
362
+
242
363
  /*
243
364
  * Returns the {RE2::Regexp} used in the match.
244
365
  *
@@ -1050,8 +1171,8 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1050
1171
 
1051
1172
  m->number_of_matches = n;
1052
1173
 
1053
- matched = match(p->pattern, StringValuePtr(text), 0,
1054
- static_cast<int>(RSTRING_LEN(text)),
1174
+ matched = match(p->pattern, StringValuePtr(m->text), 0,
1175
+ static_cast<int>(RSTRING_LEN(m->text)),
1055
1176
  RE2::UNANCHORED, m->matches, n);
1056
1177
 
1057
1178
  if (matched) {
@@ -1216,6 +1337,10 @@ void Init_re2(void) {
1216
1337
  RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1217
1338
  rb_define_method(re2_cMatchData, "length",
1218
1339
  RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1340
+ rb_define_method(re2_cMatchData, "begin",
1341
+ RUBY_METHOD_FUNC(re2_matchdata_begin), 1);
1342
+ rb_define_method(re2_cMatchData, "end",
1343
+ RUBY_METHOD_FUNC(re2_matchdata_end), 1);
1219
1344
  rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
1220
1345
  -1); rb_define_method(re2_cMatchData, "to_s",
1221
1346
  RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+
1
3
  require "spec_helper"
2
4
 
3
5
  describe RE2::MatchData do
@@ -69,6 +71,11 @@ describe RE2::MatchData do
69
71
  md[:missing].must_be_nil
70
72
  end
71
73
 
74
+ it "raises an error if given an inappropriate index" do
75
+ md = RE2::Regexp.new('(\d+)').match("bob 123")
76
+ lambda { md[nil] }.must_raise(TypeError)
77
+ end
78
+
72
79
  if String.method_defined?(:encoding)
73
80
  it "returns UTF-8 encoded strings by default" do
74
81
  md = RE2::Regexp.new('(?P<name>\S+)').match("bob")
@@ -154,4 +161,48 @@ describe RE2::MatchData do
154
161
  m3.must_equal("56")
155
162
  end
156
163
  end
164
+
165
+ describe "#begin" do
166
+ it "returns the offset of the start of a match by index" do
167
+ md = RE2::Regexp.new('(wo{2})').match('a woohoo')
168
+ md.string[md.begin(0)..-1].must_equal('woohoo')
169
+ end
170
+
171
+ it "returns the offset of the start of a match by string name" do
172
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
173
+ md.string[md.begin('foo')..-1].must_equal('foobar')
174
+ end
175
+
176
+ it "returns the offset of the start of a match by symbol name" do
177
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
178
+ md.string[md.begin(:foo)..-1].must_equal('foobar')
179
+ end
180
+
181
+ it "returns the offset despite multibyte characters" do
182
+ md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
183
+ md.string[md.begin(0)..-1].must_equal('Ruby')
184
+ end
185
+ end
186
+
187
+ describe "#end" do
188
+ it "returns the offset of the character following the end of a match" do
189
+ md = RE2::Regexp.new('(wo{2})').match('a woohoo')
190
+ md.string[0...md.end(0)].must_equal('a woo')
191
+ end
192
+
193
+ it "returns the offset of a match by string name" do
194
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
195
+ md.string[0...md.end('foo')].must_equal('a foo')
196
+ end
197
+
198
+ it "returns the offset of a match by symbol name" do
199
+ md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
200
+ md.string[0...md.end(:foo)].must_equal('a foo')
201
+ end
202
+
203
+ it "returns the offset despite multibyte characters" do
204
+ md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
205
+ md.string[0...md.end(0)].must_equal('I ♥ Ruby')
206
+ end
207
+ end
157
208
  end
@@ -11,6 +11,10 @@ describe RE2::Regexp do
11
11
  re = RE2::Regexp.new('woo', :case_sensitive => false)
12
12
  re.must_be_instance_of(RE2::Regexp)
13
13
  end
14
+
15
+ it "raises an error if given an inappropriate type" do
16
+ lambda { RE2::Regexp.new(nil) }.must_raise(TypeError)
17
+ end
14
18
  end
15
19
 
16
20
  describe "#compile" do
@@ -268,6 +272,10 @@ describe RE2::Regexp do
268
272
  lambda { re.match(nil) }.must_raise(TypeError)
269
273
  end
270
274
 
275
+ it "raises an exception when given an inappropriate number of matches" do
276
+ lambda { re.match("My name is Robert Paulson", {}) }.must_raise(TypeError)
277
+ end
278
+
271
279
  describe "with a specific number of matches under the total in the pattern" do
272
280
  subject { re.match("My name is Robert Paulson", 1) }
273
281
 
@@ -77,4 +77,3 @@ describe RE2 do
77
77
  end
78
78
  end
79
79
  end
80
-
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Mucur
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-25 00:00:00.000000000 Z
11
+ date: 2015-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler