re2 0.6.1 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +12 -2
- data/Rakefile +0 -4
- data/ext/re2/extconf.rb +3 -0
- data/ext/re2/re2.cc +136 -11
- data/spec/re2/match_data_spec.rb +51 -0
- data/spec/re2/regexp_spec.rb +8 -0
- data/spec/re2_spec.rb +0 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 32a2347d40362a3014dfd03f948405964468a3c2
|
4
|
+
data.tar.gz: ea15968dae15568249200c6ba91bad66d0e28f6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 00588950e6c82fb5720043c04d0f833d819f3107a3c953f2284d9eaede74210c0c803f34b3e9fd89413c886fae1e4b853b5a85b18b3c2d788ff0a7e8b46036cf
|
7
|
+
data.tar.gz: af2d23503d089a15ea6dd32e00bc80622cd7b0b1773ed471edcc982ac6791a6d1bf0bf5332d2440446796b5a4da15a169fd7396e3cf729af8a300e52a5985bad
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@ re2 [![Build Status](https://travis-ci.org/mudge/re2.svg?branch=master)](http://
|
|
4
4
|
A Ruby binding to [re2][], an "efficient, principled regular expression
|
5
5
|
library".
|
6
6
|
|
7
|
-
**Current version:** 0.
|
7
|
+
**Current version:** 0.7.0
|
8
8
|
**Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0.0, 2.1.0, Rubinius 2.2
|
9
9
|
|
10
10
|
Installation
|
@@ -33,7 +33,7 @@ Documentation
|
|
33
33
|
-------------
|
34
34
|
|
35
35
|
Full documentation automatically generated from the latest version is
|
36
|
-
available at <http://
|
36
|
+
available at <http://mudge.name/re2/>.
|
37
37
|
|
38
38
|
Bear in mind that re2's regular expression syntax differs from PCRE, see the
|
39
39
|
[official syntax page][] for more details.
|
@@ -55,6 +55,10 @@ $ irb -rubygems
|
|
55
55
|
=> "1"
|
56
56
|
> m.string
|
57
57
|
=> "w1234"
|
58
|
+
> m.begin(1)
|
59
|
+
=> 1
|
60
|
+
> m.end(1)
|
61
|
+
=> 2
|
58
62
|
> r =~ "w1234"
|
59
63
|
=> true
|
60
64
|
> r !~ "bob"
|
@@ -151,6 +155,12 @@ Features
|
|
151
155
|
[`RE2.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377) and
|
152
156
|
`RE2.quote(unquoted)`
|
153
157
|
|
158
|
+
Contributions
|
159
|
+
-------------
|
160
|
+
|
161
|
+
Thanks to [Jason Woods](https://github.com/driskell) who contributed the
|
162
|
+
original implementations of `RE2::MatchData#begin` and `RE2::MatchData#end`.
|
163
|
+
|
154
164
|
Contact
|
155
165
|
-------
|
156
166
|
|
data/Rakefile
CHANGED
@@ -9,10 +9,6 @@ Rake::TestTask.new do |t|
|
|
9
9
|
t.verbose = true
|
10
10
|
end
|
11
11
|
|
12
|
-
task :valgrind do
|
13
|
-
system "valgrind --tool=memcheck --leak-check=full --show-reachable=no --num-callers=15 --track-fds=yes --workaround-gcc296-bugs=yes --max-stackframe=7304328 --dsymutil=yes --track-origins=yes --log-file=report.txt ruby spec/leak.rb"
|
14
|
-
end
|
15
|
-
|
16
12
|
task :test => :compile
|
17
13
|
task :spec => :test
|
18
14
|
task :default => :test
|
data/ext/re2/extconf.rb
CHANGED
@@ -11,6 +11,9 @@ incl, lib = dir_config("re2", "/usr/local/include", "/usr/local/lib")
|
|
11
11
|
$CFLAGS << " -Wall -Wextra -funroll-loops"
|
12
12
|
|
13
13
|
have_library("stdc++")
|
14
|
+
have_header("stdint.h")
|
15
|
+
have_func("rb_str_sublen")
|
16
|
+
|
14
17
|
if have_library("re2")
|
15
18
|
|
16
19
|
# Determine which version of re2 the user has installed.
|
data/ext/re2/re2.cc
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
#include <re2/re2.h>
|
10
10
|
#include <ruby.h>
|
11
|
+
#include <stdint.h>
|
11
12
|
#include <string>
|
12
13
|
#include <sstream>
|
13
14
|
#include <vector>
|
@@ -17,6 +18,17 @@ using std::nothrow;
|
|
17
18
|
using std::map;
|
18
19
|
using std::vector;
|
19
20
|
|
21
|
+
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
22
|
+
#define UNUSED(x) ((void)x)
|
23
|
+
|
24
|
+
#ifndef RSTRING_LEN
|
25
|
+
#define RSTRING_LEN(x) (RSTRING(x)->len)
|
26
|
+
#endif
|
27
|
+
|
28
|
+
#ifndef RSTRING_PTR
|
29
|
+
#define RSTRING_PTR(x) (RSTRING(x)->ptr)
|
30
|
+
#endif
|
31
|
+
|
20
32
|
#ifdef HAVE_RUBY_ENCODING_H
|
21
33
|
#include <ruby/encoding.h>
|
22
34
|
#define ENCODED_STR_NEW(str, length, encoding) \
|
@@ -40,15 +52,20 @@ using std::vector;
|
|
40
52
|
rb_str_new((const char *)str, (long)length)
|
41
53
|
#endif
|
42
54
|
|
43
|
-
#
|
44
|
-
#define
|
45
|
-
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
|
50
|
-
|
51
|
-
|
55
|
+
#ifdef HAVE_RB_STR_SUBLEN
|
56
|
+
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
57
|
+
LONG2NUM(rb_str_sublen(str, offset))
|
58
|
+
#else
|
59
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
60
|
+
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
61
|
+
({ \
|
62
|
+
VALUE _string = ENCODED_STR_NEW(RSTRING_PTR(str), offset, encoding); \
|
63
|
+
rb_str_length(_string); \
|
64
|
+
})
|
65
|
+
#else
|
66
|
+
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
67
|
+
LONG2NUM(offset)
|
68
|
+
#endif
|
52
69
|
#endif
|
53
70
|
|
54
71
|
#ifdef HAVE_ENDPOS_ARGUMENT
|
@@ -223,6 +240,49 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
223
240
|
return result;
|
224
241
|
}
|
225
242
|
|
243
|
+
/*
|
244
|
+
* Retrieve a matchdata by index or name.
|
245
|
+
*/
|
246
|
+
re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
|
247
|
+
int id;
|
248
|
+
re2_matchdata *m;
|
249
|
+
re2_pattern *p;
|
250
|
+
map<string, int> groups;
|
251
|
+
string name;
|
252
|
+
re2::StringPiece *match;
|
253
|
+
|
254
|
+
Data_Get_Struct(self, re2_matchdata, m);
|
255
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
256
|
+
|
257
|
+
if (FIXNUM_P(idx)) {
|
258
|
+
id = FIX2INT(idx);
|
259
|
+
} else {
|
260
|
+
if (SYMBOL_P(idx)) {
|
261
|
+
name = rb_id2name(SYM2ID(idx));
|
262
|
+
} else {
|
263
|
+
name = StringValuePtr(idx);
|
264
|
+
}
|
265
|
+
|
266
|
+
groups = p->pattern->NamedCapturingGroups();
|
267
|
+
|
268
|
+
if (groups.count(name) == 1) {
|
269
|
+
id = groups[name];
|
270
|
+
} else {
|
271
|
+
return NULL;
|
272
|
+
}
|
273
|
+
}
|
274
|
+
|
275
|
+
if (id >= 0 && id < m->number_of_matches) {
|
276
|
+
match = &m->matches[id];
|
277
|
+
|
278
|
+
if (!match->empty()) {
|
279
|
+
return match;
|
280
|
+
}
|
281
|
+
}
|
282
|
+
|
283
|
+
return NULL;
|
284
|
+
}
|
285
|
+
|
226
286
|
/*
|
227
287
|
* Returns the number of elements in the match array (including nils).
|
228
288
|
*
|
@@ -239,6 +299,67 @@ static VALUE re2_matchdata_size(VALUE self) {
|
|
239
299
|
return INT2FIX(m->number_of_matches);
|
240
300
|
}
|
241
301
|
|
302
|
+
/*
|
303
|
+
* Returns the offset of the start of the nth element of the matchdata.
|
304
|
+
*
|
305
|
+
* @param [Fixnum, String, Symbol] n the name or number of the match
|
306
|
+
* @return [Fixnum] the offset of the start of the match
|
307
|
+
* @example
|
308
|
+
* m = RE2::Regexp.new('ob (\d+)').match("bob 123")
|
309
|
+
* m.begin(0) #=> 1
|
310
|
+
* m.begin(1) #=> 4
|
311
|
+
*/
|
312
|
+
static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
|
313
|
+
re2_matchdata *m;
|
314
|
+
re2_pattern *p;
|
315
|
+
re2::StringPiece *match;
|
316
|
+
long offset;
|
317
|
+
|
318
|
+
Data_Get_Struct(self, re2_matchdata, m);
|
319
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
320
|
+
|
321
|
+
match = re2_matchdata_find_match(n, self);
|
322
|
+
if (match == NULL) {
|
323
|
+
return Qnil;
|
324
|
+
} else {
|
325
|
+
offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text));
|
326
|
+
|
327
|
+
return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
|
328
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
|
329
|
+
}
|
330
|
+
}
|
331
|
+
|
332
|
+
/*
|
333
|
+
* Returns the offset of the character following the end of the nth element of the matchdata.
|
334
|
+
*
|
335
|
+
* @param [Fixnum, String, Symbol] n the name or number of the match
|
336
|
+
* @return [Fixnum] the offset of the character following the end of the match
|
337
|
+
* @example
|
338
|
+
* m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob")
|
339
|
+
* m.end(0) #=> 9
|
340
|
+
* m.end(1) #=> 7
|
341
|
+
*/
|
342
|
+
static VALUE re2_matchdata_end(VALUE self, VALUE n) {
|
343
|
+
re2_matchdata *m;
|
344
|
+
re2_pattern *p;
|
345
|
+
re2::StringPiece *match;
|
346
|
+
long offset;
|
347
|
+
|
348
|
+
Data_Get_Struct(self, re2_matchdata, m);
|
349
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
350
|
+
|
351
|
+
match = re2_matchdata_find_match(n, self);
|
352
|
+
|
353
|
+
if (match == NULL) {
|
354
|
+
return Qnil;
|
355
|
+
} else {
|
356
|
+
offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text)) + match->size();
|
357
|
+
|
358
|
+
return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
|
359
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
|
360
|
+
}
|
361
|
+
}
|
362
|
+
|
242
363
|
/*
|
243
364
|
* Returns the {RE2::Regexp} used in the match.
|
244
365
|
*
|
@@ -1050,8 +1171,8 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1050
1171
|
|
1051
1172
|
m->number_of_matches = n;
|
1052
1173
|
|
1053
|
-
matched = match(p->pattern, StringValuePtr(text), 0,
|
1054
|
-
static_cast<int>(RSTRING_LEN(text)),
|
1174
|
+
matched = match(p->pattern, StringValuePtr(m->text), 0,
|
1175
|
+
static_cast<int>(RSTRING_LEN(m->text)),
|
1055
1176
|
RE2::UNANCHORED, m->matches, n);
|
1056
1177
|
|
1057
1178
|
if (matched) {
|
@@ -1216,6 +1337,10 @@ void Init_re2(void) {
|
|
1216
1337
|
RUBY_METHOD_FUNC(re2_matchdata_size), 0);
|
1217
1338
|
rb_define_method(re2_cMatchData, "length",
|
1218
1339
|
RUBY_METHOD_FUNC(re2_matchdata_size), 0);
|
1340
|
+
rb_define_method(re2_cMatchData, "begin",
|
1341
|
+
RUBY_METHOD_FUNC(re2_matchdata_begin), 1);
|
1342
|
+
rb_define_method(re2_cMatchData, "end",
|
1343
|
+
RUBY_METHOD_FUNC(re2_matchdata_end), 1);
|
1219
1344
|
rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
|
1220
1345
|
-1); rb_define_method(re2_cMatchData, "to_s",
|
1221
1346
|
RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
|
data/spec/re2/match_data_spec.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
require "spec_helper"
|
2
4
|
|
3
5
|
describe RE2::MatchData do
|
@@ -69,6 +71,11 @@ describe RE2::MatchData do
|
|
69
71
|
md[:missing].must_be_nil
|
70
72
|
end
|
71
73
|
|
74
|
+
it "raises an error if given an inappropriate index" do
|
75
|
+
md = RE2::Regexp.new('(\d+)').match("bob 123")
|
76
|
+
lambda { md[nil] }.must_raise(TypeError)
|
77
|
+
end
|
78
|
+
|
72
79
|
if String.method_defined?(:encoding)
|
73
80
|
it "returns UTF-8 encoded strings by default" do
|
74
81
|
md = RE2::Regexp.new('(?P<name>\S+)').match("bob")
|
@@ -154,4 +161,48 @@ describe RE2::MatchData do
|
|
154
161
|
m3.must_equal("56")
|
155
162
|
end
|
156
163
|
end
|
164
|
+
|
165
|
+
describe "#begin" do
|
166
|
+
it "returns the offset of the start of a match by index" do
|
167
|
+
md = RE2::Regexp.new('(wo{2})').match('a woohoo')
|
168
|
+
md.string[md.begin(0)..-1].must_equal('woohoo')
|
169
|
+
end
|
170
|
+
|
171
|
+
it "returns the offset of the start of a match by string name" do
|
172
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
173
|
+
md.string[md.begin('foo')..-1].must_equal('foobar')
|
174
|
+
end
|
175
|
+
|
176
|
+
it "returns the offset of the start of a match by symbol name" do
|
177
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
178
|
+
md.string[md.begin(:foo)..-1].must_equal('foobar')
|
179
|
+
end
|
180
|
+
|
181
|
+
it "returns the offset despite multibyte characters" do
|
182
|
+
md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
|
183
|
+
md.string[md.begin(0)..-1].must_equal('Ruby')
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
describe "#end" do
|
188
|
+
it "returns the offset of the character following the end of a match" do
|
189
|
+
md = RE2::Regexp.new('(wo{2})').match('a woohoo')
|
190
|
+
md.string[0...md.end(0)].must_equal('a woo')
|
191
|
+
end
|
192
|
+
|
193
|
+
it "returns the offset of a match by string name" do
|
194
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
195
|
+
md.string[0...md.end('foo')].must_equal('a foo')
|
196
|
+
end
|
197
|
+
|
198
|
+
it "returns the offset of a match by symbol name" do
|
199
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
200
|
+
md.string[0...md.end(:foo)].must_equal('a foo')
|
201
|
+
end
|
202
|
+
|
203
|
+
it "returns the offset despite multibyte characters" do
|
204
|
+
md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
|
205
|
+
md.string[0...md.end(0)].must_equal('I ♥ Ruby')
|
206
|
+
end
|
207
|
+
end
|
157
208
|
end
|
data/spec/re2/regexp_spec.rb
CHANGED
@@ -11,6 +11,10 @@ describe RE2::Regexp do
|
|
11
11
|
re = RE2::Regexp.new('woo', :case_sensitive => false)
|
12
12
|
re.must_be_instance_of(RE2::Regexp)
|
13
13
|
end
|
14
|
+
|
15
|
+
it "raises an error if given an inappropriate type" do
|
16
|
+
lambda { RE2::Regexp.new(nil) }.must_raise(TypeError)
|
17
|
+
end
|
14
18
|
end
|
15
19
|
|
16
20
|
describe "#compile" do
|
@@ -268,6 +272,10 @@ describe RE2::Regexp do
|
|
268
272
|
lambda { re.match(nil) }.must_raise(TypeError)
|
269
273
|
end
|
270
274
|
|
275
|
+
it "raises an exception when given an inappropriate number of matches" do
|
276
|
+
lambda { re.match("My name is Robert Paulson", {}) }.must_raise(TypeError)
|
277
|
+
end
|
278
|
+
|
271
279
|
describe "with a specific number of matches under the total in the pattern" do
|
272
280
|
subject { re.match("My name is Robert Paulson", 1) }
|
273
281
|
|
data/spec/re2_spec.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: re2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Mucur
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-01-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|