re2 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/ext/re2/extconf.rb +3 -0
- data/ext/re2/re2.cc +39 -0
- data/spec/re2/scanner_spec.rb +110 -0
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cfd64dee3272c8a58e0c930a40b2e00be07cbc63
|
4
|
+
data.tar.gz: a8f226f7e6d0ba8bab49110ce8e932880d0895c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e8b1a351577da23b21d137ea2df06932ec338be3647bbb28ea81b39fe5dfb8181049545084338a59aa22a92a1cb366b2b925eef54a3151c132333493ea2105f8
|
7
|
+
data.tar.gz: d611759608d9755d751c41ed446bf32eecb6ae653200ee44f78dce1d23d969c53b1a9db1ede456343121a22f98928adaea13d07d242d571e78db8a84fca1348a
|
data/README.md
CHANGED
@@ -4,8 +4,8 @@ re2 [![Build Status](https://travis-ci.org/mudge/re2.svg?branch=master)](http://
|
|
4
4
|
A Ruby binding to [re2][], an "efficient, principled regular expression
|
5
5
|
library".
|
6
6
|
|
7
|
-
**Current version:** 1.
|
8
|
-
**Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0.0, 2.1.0, 2.2, 2.3, Rubinius
|
7
|
+
**Current version:** 1.1.0
|
8
|
+
**Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0.0, 2.1.0, 2.2, 2.3, Rubinius 3.8
|
9
9
|
|
10
10
|
Installation
|
11
11
|
------------
|
@@ -167,7 +167,8 @@ Contributions
|
|
167
167
|
|
168
168
|
* Thanks to [Jason Woods](https://github.com/driskell) who contributed the
|
169
169
|
original implementations of `RE2::MatchData#begin` and `RE2::MatchData#end`;
|
170
|
-
* Thanks to [Stefano Rivera](https://github.com/stefanor) who first contributed C++11 support
|
170
|
+
* Thanks to [Stefano Rivera](https://github.com/stefanor) who first contributed C++11 support;
|
171
|
+
* Thanks to [Stan Hu](https://github.com/stanhu) for reporting a bug with empty patterns and `RE2::Regexp#scan`.
|
171
172
|
|
172
173
|
Contact
|
173
174
|
-------
|
data/ext/re2/extconf.rb
CHANGED
@@ -6,6 +6,9 @@
|
|
6
6
|
|
7
7
|
require 'mkmf'
|
8
8
|
|
9
|
+
RbConfig::MAKEFILE_CONFIG["CC"] = ENV["CC"] if ENV["CC"]
|
10
|
+
RbConfig::MAKEFILE_CONFIG["CXX"] = ENV["CXX"] if ENV["CXX"]
|
11
|
+
|
9
12
|
incl, lib = dir_config("re2", "/usr/local/include", "/usr/local/lib")
|
10
13
|
|
11
14
|
$CFLAGS << " -Wall -Wextra -funroll-loops"
|
data/ext/re2/re2.cc
CHANGED
@@ -89,6 +89,7 @@ typedef struct {
|
|
89
89
|
typedef struct {
|
90
90
|
re2::StringPiece *input;
|
91
91
|
int number_of_capturing_groups;
|
92
|
+
bool eof;
|
92
93
|
VALUE regexp, text;
|
93
94
|
} re2_scanner;
|
94
95
|
|
@@ -172,6 +173,21 @@ static VALUE re2_scanner_string(VALUE self) {
|
|
172
173
|
return c->text;
|
173
174
|
}
|
174
175
|
|
176
|
+
/*
|
177
|
+
* Returns whether the scanner has consumed all input or not.
|
178
|
+
*
|
179
|
+
* @return [Boolean] whether the scanner has consumed all input or not
|
180
|
+
* @example
|
181
|
+
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
182
|
+
* c.eof? #=> true
|
183
|
+
*/
|
184
|
+
static VALUE re2_scanner_eof(VALUE self) {
|
185
|
+
re2_scanner *c;
|
186
|
+
Data_Get_Struct(self, re2_scanner, c);
|
187
|
+
|
188
|
+
return BOOL2RUBY(c->eof);
|
189
|
+
}
|
190
|
+
|
175
191
|
/*
|
176
192
|
* Rewind the scanner to the start of the string.
|
177
193
|
*
|
@@ -188,6 +204,7 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
188
204
|
Data_Get_Struct(self, re2_scanner, c);
|
189
205
|
|
190
206
|
c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text));
|
207
|
+
c->eof = false;
|
191
208
|
|
192
209
|
return self;
|
193
210
|
}
|
@@ -204,6 +221,8 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
204
221
|
*/
|
205
222
|
static VALUE re2_scanner_scan(VALUE self) {
|
206
223
|
int i;
|
224
|
+
size_t original_input_size, new_input_size;
|
225
|
+
bool input_advanced;
|
207
226
|
re2_pattern *p;
|
208
227
|
re2_scanner *c;
|
209
228
|
VALUE result;
|
@@ -215,6 +234,12 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
215
234
|
vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
216
235
|
vector<string> matches(c->number_of_capturing_groups);
|
217
236
|
|
237
|
+
if (c->eof) {
|
238
|
+
return Qnil;
|
239
|
+
}
|
240
|
+
|
241
|
+
original_input_size = c->input->size();
|
242
|
+
|
218
243
|
for (i = 0; i < c->number_of_capturing_groups; i++) {
|
219
244
|
matches[i] = "";
|
220
245
|
argv[i] = &matches[i];
|
@@ -224,6 +249,9 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
224
249
|
if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0],
|
225
250
|
c->number_of_capturing_groups)) {
|
226
251
|
result = rb_ary_new2(c->number_of_capturing_groups);
|
252
|
+
new_input_size = c->input->size();
|
253
|
+
input_advanced = new_input_size < original_input_size;
|
254
|
+
|
227
255
|
for (i = 0; i < c->number_of_capturing_groups; i++) {
|
228
256
|
if (matches[i].empty()) {
|
229
257
|
rb_ary_push(result, Qnil);
|
@@ -233,6 +261,14 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
233
261
|
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"));
|
234
262
|
}
|
235
263
|
}
|
264
|
+
|
265
|
+
/* Check whether we've exhausted the input yet. */
|
266
|
+
c->eof = new_input_size == 0;
|
267
|
+
|
268
|
+
/* If the match didn't advance the input, we need to do this ourselves. */
|
269
|
+
if (!input_advanced && new_input_size > 0) {
|
270
|
+
c->input->remove_prefix(1);
|
271
|
+
}
|
236
272
|
} else {
|
237
273
|
result = Qnil;
|
238
274
|
}
|
@@ -1216,6 +1252,7 @@ static VALUE re2_regexp_scan(VALUE self, VALUE text) {
|
|
1216
1252
|
c->regexp = self;
|
1217
1253
|
c->text = text;
|
1218
1254
|
c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
|
1255
|
+
c->eof = false;
|
1219
1256
|
|
1220
1257
|
return scanner;
|
1221
1258
|
}
|
@@ -1349,6 +1386,8 @@ void Init_re2(void) {
|
|
1349
1386
|
|
1350
1387
|
rb_define_method(re2_cScanner, "string",
|
1351
1388
|
RUBY_METHOD_FUNC(re2_scanner_string), 0);
|
1389
|
+
rb_define_method(re2_cScanner, "eof?",
|
1390
|
+
RUBY_METHOD_FUNC(re2_scanner_eof), 0);
|
1352
1391
|
rb_define_method(re2_cScanner, "regexp",
|
1353
1392
|
RUBY_METHOD_FUNC(re2_scanner_regexp), 0);
|
1354
1393
|
rb_define_method(re2_cScanner, "scan",
|
data/spec/re2/scanner_spec.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
RSpec.describe RE2::Scanner do
|
2
4
|
describe "#regexp" do
|
3
5
|
it "returns the original pattern for the scanner" do
|
@@ -42,6 +44,57 @@ RSpec.describe RE2::Scanner do
|
|
42
44
|
scanner = r.scan("Foo bar")
|
43
45
|
expect(scanner.scan).to be_nil
|
44
46
|
end
|
47
|
+
|
48
|
+
it "returns an empty array if the input is empty" do
|
49
|
+
r = RE2::Regexp.new("")
|
50
|
+
scanner = r.scan("")
|
51
|
+
expect(scanner.scan).to eq([])
|
52
|
+
expect(scanner.scan).to be_nil
|
53
|
+
end
|
54
|
+
|
55
|
+
it "returns an array of nil with an empty input and capture" do
|
56
|
+
r = RE2::Regexp.new("()")
|
57
|
+
scanner = r.scan("")
|
58
|
+
expect(scanner.scan).to eq([nil])
|
59
|
+
expect(scanner.scan).to be_nil
|
60
|
+
end
|
61
|
+
|
62
|
+
it "returns an empty array for every match if the pattern is empty" do
|
63
|
+
r = RE2::Regexp.new("")
|
64
|
+
scanner = r.scan("Foo")
|
65
|
+
expect(scanner.scan).to eq([])
|
66
|
+
expect(scanner.scan).to eq([])
|
67
|
+
expect(scanner.scan).to eq([])
|
68
|
+
expect(scanner.scan).to eq([])
|
69
|
+
expect(scanner.scan).to be_nil
|
70
|
+
end
|
71
|
+
|
72
|
+
it "returns an array of nil if the pattern is an empty capturing group" do
|
73
|
+
r = RE2::Regexp.new("()")
|
74
|
+
scanner = r.scan("Foo")
|
75
|
+
expect(scanner.scan).to eq([nil])
|
76
|
+
expect(scanner.scan).to eq([nil])
|
77
|
+
expect(scanner.scan).to eq([nil])
|
78
|
+
expect(scanner.scan).to eq([nil])
|
79
|
+
expect(scanner.scan).to be_nil
|
80
|
+
end
|
81
|
+
|
82
|
+
it "returns array of nils with multiple empty capturing groups" do
|
83
|
+
r = RE2::Regexp.new("()()()")
|
84
|
+
scanner = r.scan("Foo")
|
85
|
+
expect(scanner.scan).to eq([nil, nil, nil])
|
86
|
+
expect(scanner.scan).to eq([nil, nil, nil])
|
87
|
+
expect(scanner.scan).to eq([nil, nil, nil])
|
88
|
+
expect(scanner.scan).to eq([nil, nil, nil])
|
89
|
+
expect(scanner.scan).to be_nil
|
90
|
+
end
|
91
|
+
|
92
|
+
it "supports empty groups with multibyte characters" do
|
93
|
+
r = RE2::Regexp.new("()€")
|
94
|
+
scanner = r.scan("€")
|
95
|
+
expect(scanner.scan).to eq([nil])
|
96
|
+
expect(scanner.scan).to be_nil
|
97
|
+
end
|
45
98
|
end
|
46
99
|
|
47
100
|
it "is enumerable" do
|
@@ -84,5 +137,62 @@ RSpec.describe RE2::Scanner do
|
|
84
137
|
scanner.rewind
|
85
138
|
expect(scanner.to_enum.first).to eq(["1"])
|
86
139
|
end
|
140
|
+
|
141
|
+
it "resets the eof? check" do
|
142
|
+
r = RE2::Regexp.new('(\d)')
|
143
|
+
scanner = r.scan("1")
|
144
|
+
scanner.scan
|
145
|
+
expect(scanner.eof?).to be_truthy
|
146
|
+
scanner.rewind
|
147
|
+
expect(scanner.eof?).to be_falsey
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
describe "#eof?" do
|
152
|
+
it "returns false if the input has not been consumed" do
|
153
|
+
r = RE2::Regexp.new('(\d)')
|
154
|
+
scanner = r.scan("1 2 3")
|
155
|
+
|
156
|
+
expect(scanner.eof?).to be_falsey
|
157
|
+
end
|
158
|
+
|
159
|
+
it "returns true if the input has been consumed" do
|
160
|
+
r = RE2::Regexp.new('(\d)')
|
161
|
+
scanner = r.scan("1")
|
162
|
+
scanner.scan
|
163
|
+
|
164
|
+
expect(scanner.eof?).to be_truthy
|
165
|
+
end
|
166
|
+
|
167
|
+
it "returns false if no match is made" do
|
168
|
+
r = RE2::Regexp.new('(\d)')
|
169
|
+
scanner = r.scan("a")
|
170
|
+
scanner.scan
|
171
|
+
|
172
|
+
expect(scanner.eof?).to be_falsey
|
173
|
+
end
|
174
|
+
|
175
|
+
it "returns false with an empty input that has not been scanned" do
|
176
|
+
r = RE2::Regexp.new("")
|
177
|
+
scanner = r.scan("")
|
178
|
+
|
179
|
+
expect(scanner.eof?).to be_falsey
|
180
|
+
end
|
181
|
+
|
182
|
+
it "returns false with an empty input that has not been matched" do
|
183
|
+
r = RE2::Regexp.new('(\d)')
|
184
|
+
scanner = r.scan("")
|
185
|
+
scanner.scan
|
186
|
+
|
187
|
+
expect(scanner.eof?).to be_falsey
|
188
|
+
end
|
189
|
+
|
190
|
+
it "returns true with an empty input that has been matched" do
|
191
|
+
r = RE2::Regexp.new("")
|
192
|
+
scanner = r.scan("")
|
193
|
+
scanner.scan
|
194
|
+
|
195
|
+
expect(scanner.eof?).to be_truthy
|
196
|
+
end
|
87
197
|
end
|
88
198
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: re2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Mucur
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -80,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
80
80
|
version: '0'
|
81
81
|
requirements: []
|
82
82
|
rubyforge_project:
|
83
|
-
rubygems_version: 2.
|
83
|
+
rubygems_version: 2.6.11
|
84
84
|
signing_key:
|
85
85
|
specification_version: 4
|
86
86
|
summary: Ruby bindings to re2.
|
@@ -92,4 +92,3 @@ test_files:
|
|
92
92
|
- spec/re2/match_data_spec.rb
|
93
93
|
- spec/re2/string_spec.rb
|
94
94
|
- spec/re2/scanner_spec.rb
|
95
|
-
has_rdoc:
|