re2 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -2
- data/Rakefile +0 -4
- data/ext/re2/extconf.rb +3 -0
- data/ext/re2/re2.cc +136 -11
- data/spec/re2/match_data_spec.rb +51 -0
- data/spec/re2/regexp_spec.rb +8 -0
- data/spec/re2_spec.rb +0 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 32a2347d40362a3014dfd03f948405964468a3c2
|
4
|
+
data.tar.gz: ea15968dae15568249200c6ba91bad66d0e28f6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 00588950e6c82fb5720043c04d0f833d819f3107a3c953f2284d9eaede74210c0c803f34b3e9fd89413c886fae1e4b853b5a85b18b3c2d788ff0a7e8b46036cf
|
7
|
+
data.tar.gz: af2d23503d089a15ea6dd32e00bc80622cd7b0b1773ed471edcc982ac6791a6d1bf0bf5332d2440446796b5a4da15a169fd7396e3cf729af8a300e52a5985bad
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@ re2 [](http://
|
|
4
4
|
A Ruby binding to [re2][], an "efficient, principled regular expression
|
5
5
|
library".
|
6
6
|
|
7
|
-
**Current version:** 0.
|
7
|
+
**Current version:** 0.7.0
|
8
8
|
**Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0.0, 2.1.0, Rubinius 2.2
|
9
9
|
|
10
10
|
Installation
|
@@ -33,7 +33,7 @@ Documentation
|
|
33
33
|
-------------
|
34
34
|
|
35
35
|
Full documentation automatically generated from the latest version is
|
36
|
-
available at <http://
|
36
|
+
available at <http://mudge.name/re2/>.
|
37
37
|
|
38
38
|
Bear in mind that re2's regular expression syntax differs from PCRE, see the
|
39
39
|
[official syntax page][] for more details.
|
@@ -55,6 +55,10 @@ $ irb -rubygems
|
|
55
55
|
=> "1"
|
56
56
|
> m.string
|
57
57
|
=> "w1234"
|
58
|
+
> m.begin(1)
|
59
|
+
=> 1
|
60
|
+
> m.end(1)
|
61
|
+
=> 2
|
58
62
|
> r =~ "w1234"
|
59
63
|
=> true
|
60
64
|
> r !~ "bob"
|
@@ -151,6 +155,12 @@ Features
|
|
151
155
|
[`RE2.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377) and
|
152
156
|
`RE2.quote(unquoted)`
|
153
157
|
|
158
|
+
Contributions
|
159
|
+
-------------
|
160
|
+
|
161
|
+
Thanks to [Jason Woods](https://github.com/driskell) who contributed the
|
162
|
+
original implementations of `RE2::MatchData#begin` and `RE2::MatchData#end`.
|
163
|
+
|
154
164
|
Contact
|
155
165
|
-------
|
156
166
|
|
data/Rakefile
CHANGED
@@ -9,10 +9,6 @@ Rake::TestTask.new do |t|
|
|
9
9
|
t.verbose = true
|
10
10
|
end
|
11
11
|
|
12
|
-
task :valgrind do
|
13
|
-
system "valgrind --tool=memcheck --leak-check=full --show-reachable=no --num-callers=15 --track-fds=yes --workaround-gcc296-bugs=yes --max-stackframe=7304328 --dsymutil=yes --track-origins=yes --log-file=report.txt ruby spec/leak.rb"
|
14
|
-
end
|
15
|
-
|
16
12
|
task :test => :compile
|
17
13
|
task :spec => :test
|
18
14
|
task :default => :test
|
data/ext/re2/extconf.rb
CHANGED
@@ -11,6 +11,9 @@ incl, lib = dir_config("re2", "/usr/local/include", "/usr/local/lib")
|
|
11
11
|
$CFLAGS << " -Wall -Wextra -funroll-loops"
|
12
12
|
|
13
13
|
have_library("stdc++")
|
14
|
+
have_header("stdint.h")
|
15
|
+
have_func("rb_str_sublen")
|
16
|
+
|
14
17
|
if have_library("re2")
|
15
18
|
|
16
19
|
# Determine which version of re2 the user has installed.
|
data/ext/re2/re2.cc
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
#include <re2/re2.h>
|
10
10
|
#include <ruby.h>
|
11
|
+
#include <stdint.h>
|
11
12
|
#include <string>
|
12
13
|
#include <sstream>
|
13
14
|
#include <vector>
|
@@ -17,6 +18,17 @@ using std::nothrow;
|
|
17
18
|
using std::map;
|
18
19
|
using std::vector;
|
19
20
|
|
21
|
+
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
22
|
+
#define UNUSED(x) ((void)x)
|
23
|
+
|
24
|
+
#ifndef RSTRING_LEN
|
25
|
+
#define RSTRING_LEN(x) (RSTRING(x)->len)
|
26
|
+
#endif
|
27
|
+
|
28
|
+
#ifndef RSTRING_PTR
|
29
|
+
#define RSTRING_PTR(x) (RSTRING(x)->ptr)
|
30
|
+
#endif
|
31
|
+
|
20
32
|
#ifdef HAVE_RUBY_ENCODING_H
|
21
33
|
#include <ruby/encoding.h>
|
22
34
|
#define ENCODED_STR_NEW(str, length, encoding) \
|
@@ -40,15 +52,20 @@ using std::vector;
|
|
40
52
|
rb_str_new((const char *)str, (long)length)
|
41
53
|
#endif
|
42
54
|
|
43
|
-
#
|
44
|
-
#define
|
45
|
-
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
|
50
|
-
|
51
|
-
|
55
|
+
#ifdef HAVE_RB_STR_SUBLEN
|
56
|
+
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
57
|
+
LONG2NUM(rb_str_sublen(str, offset))
|
58
|
+
#else
|
59
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
60
|
+
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
61
|
+
({ \
|
62
|
+
VALUE _string = ENCODED_STR_NEW(RSTRING_PTR(str), offset, encoding); \
|
63
|
+
rb_str_length(_string); \
|
64
|
+
})
|
65
|
+
#else
|
66
|
+
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
67
|
+
LONG2NUM(offset)
|
68
|
+
#endif
|
52
69
|
#endif
|
53
70
|
|
54
71
|
#ifdef HAVE_ENDPOS_ARGUMENT
|
@@ -223,6 +240,49 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
223
240
|
return result;
|
224
241
|
}
|
225
242
|
|
243
|
+
/*
|
244
|
+
* Retrieve a matchdata by index or name.
|
245
|
+
*/
|
246
|
+
re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
|
247
|
+
int id;
|
248
|
+
re2_matchdata *m;
|
249
|
+
re2_pattern *p;
|
250
|
+
map<string, int> groups;
|
251
|
+
string name;
|
252
|
+
re2::StringPiece *match;
|
253
|
+
|
254
|
+
Data_Get_Struct(self, re2_matchdata, m);
|
255
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
256
|
+
|
257
|
+
if (FIXNUM_P(idx)) {
|
258
|
+
id = FIX2INT(idx);
|
259
|
+
} else {
|
260
|
+
if (SYMBOL_P(idx)) {
|
261
|
+
name = rb_id2name(SYM2ID(idx));
|
262
|
+
} else {
|
263
|
+
name = StringValuePtr(idx);
|
264
|
+
}
|
265
|
+
|
266
|
+
groups = p->pattern->NamedCapturingGroups();
|
267
|
+
|
268
|
+
if (groups.count(name) == 1) {
|
269
|
+
id = groups[name];
|
270
|
+
} else {
|
271
|
+
return NULL;
|
272
|
+
}
|
273
|
+
}
|
274
|
+
|
275
|
+
if (id >= 0 && id < m->number_of_matches) {
|
276
|
+
match = &m->matches[id];
|
277
|
+
|
278
|
+
if (!match->empty()) {
|
279
|
+
return match;
|
280
|
+
}
|
281
|
+
}
|
282
|
+
|
283
|
+
return NULL;
|
284
|
+
}
|
285
|
+
|
226
286
|
/*
|
227
287
|
* Returns the number of elements in the match array (including nils).
|
228
288
|
*
|
@@ -239,6 +299,67 @@ static VALUE re2_matchdata_size(VALUE self) {
|
|
239
299
|
return INT2FIX(m->number_of_matches);
|
240
300
|
}
|
241
301
|
|
302
|
+
/*
|
303
|
+
* Returns the offset of the start of the nth element of the matchdata.
|
304
|
+
*
|
305
|
+
* @param [Fixnum, String, Symbol] n the name or number of the match
|
306
|
+
* @return [Fixnum] the offset of the start of the match
|
307
|
+
* @example
|
308
|
+
* m = RE2::Regexp.new('ob (\d+)').match("bob 123")
|
309
|
+
* m.begin(0) #=> 1
|
310
|
+
* m.begin(1) #=> 4
|
311
|
+
*/
|
312
|
+
static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
|
313
|
+
re2_matchdata *m;
|
314
|
+
re2_pattern *p;
|
315
|
+
re2::StringPiece *match;
|
316
|
+
long offset;
|
317
|
+
|
318
|
+
Data_Get_Struct(self, re2_matchdata, m);
|
319
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
320
|
+
|
321
|
+
match = re2_matchdata_find_match(n, self);
|
322
|
+
if (match == NULL) {
|
323
|
+
return Qnil;
|
324
|
+
} else {
|
325
|
+
offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text));
|
326
|
+
|
327
|
+
return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
|
328
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
|
329
|
+
}
|
330
|
+
}
|
331
|
+
|
332
|
+
/*
|
333
|
+
* Returns the offset of the character following the end of the nth element of the matchdata.
|
334
|
+
*
|
335
|
+
* @param [Fixnum, String, Symbol] n the name or number of the match
|
336
|
+
* @return [Fixnum] the offset of the character following the end of the match
|
337
|
+
* @example
|
338
|
+
* m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob")
|
339
|
+
* m.end(0) #=> 9
|
340
|
+
* m.end(1) #=> 7
|
341
|
+
*/
|
342
|
+
static VALUE re2_matchdata_end(VALUE self, VALUE n) {
|
343
|
+
re2_matchdata *m;
|
344
|
+
re2_pattern *p;
|
345
|
+
re2::StringPiece *match;
|
346
|
+
long offset;
|
347
|
+
|
348
|
+
Data_Get_Struct(self, re2_matchdata, m);
|
349
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
350
|
+
|
351
|
+
match = re2_matchdata_find_match(n, self);
|
352
|
+
|
353
|
+
if (match == NULL) {
|
354
|
+
return Qnil;
|
355
|
+
} else {
|
356
|
+
offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text)) + match->size();
|
357
|
+
|
358
|
+
return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
|
359
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
|
360
|
+
}
|
361
|
+
}
|
362
|
+
|
242
363
|
/*
|
243
364
|
* Returns the {RE2::Regexp} used in the match.
|
244
365
|
*
|
@@ -1050,8 +1171,8 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1050
1171
|
|
1051
1172
|
m->number_of_matches = n;
|
1052
1173
|
|
1053
|
-
matched = match(p->pattern, StringValuePtr(text), 0,
|
1054
|
-
static_cast<int>(RSTRING_LEN(text)),
|
1174
|
+
matched = match(p->pattern, StringValuePtr(m->text), 0,
|
1175
|
+
static_cast<int>(RSTRING_LEN(m->text)),
|
1055
1176
|
RE2::UNANCHORED, m->matches, n);
|
1056
1177
|
|
1057
1178
|
if (matched) {
|
@@ -1216,6 +1337,10 @@ void Init_re2(void) {
|
|
1216
1337
|
RUBY_METHOD_FUNC(re2_matchdata_size), 0);
|
1217
1338
|
rb_define_method(re2_cMatchData, "length",
|
1218
1339
|
RUBY_METHOD_FUNC(re2_matchdata_size), 0);
|
1340
|
+
rb_define_method(re2_cMatchData, "begin",
|
1341
|
+
RUBY_METHOD_FUNC(re2_matchdata_begin), 1);
|
1342
|
+
rb_define_method(re2_cMatchData, "end",
|
1343
|
+
RUBY_METHOD_FUNC(re2_matchdata_end), 1);
|
1219
1344
|
rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
|
1220
1345
|
-1); rb_define_method(re2_cMatchData, "to_s",
|
1221
1346
|
RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
|
data/spec/re2/match_data_spec.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
require "spec_helper"
|
2
4
|
|
3
5
|
describe RE2::MatchData do
|
@@ -69,6 +71,11 @@ describe RE2::MatchData do
|
|
69
71
|
md[:missing].must_be_nil
|
70
72
|
end
|
71
73
|
|
74
|
+
it "raises an error if given an inappropriate index" do
|
75
|
+
md = RE2::Regexp.new('(\d+)').match("bob 123")
|
76
|
+
lambda { md[nil] }.must_raise(TypeError)
|
77
|
+
end
|
78
|
+
|
72
79
|
if String.method_defined?(:encoding)
|
73
80
|
it "returns UTF-8 encoded strings by default" do
|
74
81
|
md = RE2::Regexp.new('(?P<name>\S+)').match("bob")
|
@@ -154,4 +161,48 @@ describe RE2::MatchData do
|
|
154
161
|
m3.must_equal("56")
|
155
162
|
end
|
156
163
|
end
|
164
|
+
|
165
|
+
describe "#begin" do
|
166
|
+
it "returns the offset of the start of a match by index" do
|
167
|
+
md = RE2::Regexp.new('(wo{2})').match('a woohoo')
|
168
|
+
md.string[md.begin(0)..-1].must_equal('woohoo')
|
169
|
+
end
|
170
|
+
|
171
|
+
it "returns the offset of the start of a match by string name" do
|
172
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
173
|
+
md.string[md.begin('foo')..-1].must_equal('foobar')
|
174
|
+
end
|
175
|
+
|
176
|
+
it "returns the offset of the start of a match by symbol name" do
|
177
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
178
|
+
md.string[md.begin(:foo)..-1].must_equal('foobar')
|
179
|
+
end
|
180
|
+
|
181
|
+
it "returns the offset despite multibyte characters" do
|
182
|
+
md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
|
183
|
+
md.string[md.begin(0)..-1].must_equal('Ruby')
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
describe "#end" do
|
188
|
+
it "returns the offset of the character following the end of a match" do
|
189
|
+
md = RE2::Regexp.new('(wo{2})').match('a woohoo')
|
190
|
+
md.string[0...md.end(0)].must_equal('a woo')
|
191
|
+
end
|
192
|
+
|
193
|
+
it "returns the offset of a match by string name" do
|
194
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
195
|
+
md.string[0...md.end('foo')].must_equal('a foo')
|
196
|
+
end
|
197
|
+
|
198
|
+
it "returns the offset of a match by symbol name" do
|
199
|
+
md = RE2::Regexp.new('(?P<foo>fo{2})').match('a foobar')
|
200
|
+
md.string[0...md.end(:foo)].must_equal('a foo')
|
201
|
+
end
|
202
|
+
|
203
|
+
it "returns the offset despite multibyte characters" do
|
204
|
+
md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby')
|
205
|
+
md.string[0...md.end(0)].must_equal('I ♥ Ruby')
|
206
|
+
end
|
207
|
+
end
|
157
208
|
end
|
data/spec/re2/regexp_spec.rb
CHANGED
@@ -11,6 +11,10 @@ describe RE2::Regexp do
|
|
11
11
|
re = RE2::Regexp.new('woo', :case_sensitive => false)
|
12
12
|
re.must_be_instance_of(RE2::Regexp)
|
13
13
|
end
|
14
|
+
|
15
|
+
it "raises an error if given an inappropriate type" do
|
16
|
+
lambda { RE2::Regexp.new(nil) }.must_raise(TypeError)
|
17
|
+
end
|
14
18
|
end
|
15
19
|
|
16
20
|
describe "#compile" do
|
@@ -268,6 +272,10 @@ describe RE2::Regexp do
|
|
268
272
|
lambda { re.match(nil) }.must_raise(TypeError)
|
269
273
|
end
|
270
274
|
|
275
|
+
it "raises an exception when given an inappropriate number of matches" do
|
276
|
+
lambda { re.match("My name is Robert Paulson", {}) }.must_raise(TypeError)
|
277
|
+
end
|
278
|
+
|
271
279
|
describe "with a specific number of matches under the total in the pattern" do
|
272
280
|
subject { re.match("My name is Robert Paulson", 1) }
|
273
281
|
|
data/spec/re2_spec.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: re2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Mucur
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-01-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|