re2 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/ext/re2/extconf.rb +3 -0
- data/ext/re2/re2.cc +39 -0
- data/spec/re2/scanner_spec.rb +110 -0
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cfd64dee3272c8a58e0c930a40b2e00be07cbc63
|
4
|
+
data.tar.gz: a8f226f7e6d0ba8bab49110ce8e932880d0895c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e8b1a351577da23b21d137ea2df06932ec338be3647bbb28ea81b39fe5dfb8181049545084338a59aa22a92a1cb366b2b925eef54a3151c132333493ea2105f8
|
7
|
+
data.tar.gz: d611759608d9755d751c41ed446bf32eecb6ae653200ee44f78dce1d23d969c53b1a9db1ede456343121a22f98928adaea13d07d242d571e78db8a84fca1348a
|
data/README.md
CHANGED
@@ -4,8 +4,8 @@ re2 [](http://
|
|
4
4
|
A Ruby binding to [re2][], an "efficient, principled regular expression
|
5
5
|
library".
|
6
6
|
|
7
|
-
**Current version:** 1.
|
8
|
-
**Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0.0, 2.1.0, 2.2, 2.3, Rubinius
|
7
|
+
**Current version:** 1.1.0
|
8
|
+
**Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0.0, 2.1.0, 2.2, 2.3, Rubinius 3.8
|
9
9
|
|
10
10
|
Installation
|
11
11
|
------------
|
@@ -167,7 +167,8 @@ Contributions
|
|
167
167
|
|
168
168
|
* Thanks to [Jason Woods](https://github.com/driskell) who contributed the
|
169
169
|
original implementations of `RE2::MatchData#begin` and `RE2::MatchData#end`;
|
170
|
-
* Thanks to [Stefano Rivera](https://github.com/stefanor) who first contributed C++11 support
|
170
|
+
* Thanks to [Stefano Rivera](https://github.com/stefanor) who first contributed C++11 support;
|
171
|
+
* Thanks to [Stan Hu](https://github.com/stanhu) for reporting a bug with empty patterns and `RE2::Regexp#scan`.
|
171
172
|
|
172
173
|
Contact
|
173
174
|
-------
|
data/ext/re2/extconf.rb
CHANGED
@@ -6,6 +6,9 @@
|
|
6
6
|
|
7
7
|
require 'mkmf'
|
8
8
|
|
9
|
+
RbConfig::MAKEFILE_CONFIG["CC"] = ENV["CC"] if ENV["CC"]
|
10
|
+
RbConfig::MAKEFILE_CONFIG["CXX"] = ENV["CXX"] if ENV["CXX"]
|
11
|
+
|
9
12
|
incl, lib = dir_config("re2", "/usr/local/include", "/usr/local/lib")
|
10
13
|
|
11
14
|
$CFLAGS << " -Wall -Wextra -funroll-loops"
|
data/ext/re2/re2.cc
CHANGED
@@ -89,6 +89,7 @@ typedef struct {
|
|
89
89
|
typedef struct {
|
90
90
|
re2::StringPiece *input;
|
91
91
|
int number_of_capturing_groups;
|
92
|
+
bool eof;
|
92
93
|
VALUE regexp, text;
|
93
94
|
} re2_scanner;
|
94
95
|
|
@@ -172,6 +173,21 @@ static VALUE re2_scanner_string(VALUE self) {
|
|
172
173
|
return c->text;
|
173
174
|
}
|
174
175
|
|
176
|
+
/*
|
177
|
+
* Returns whether the scanner has consumed all input or not.
|
178
|
+
*
|
179
|
+
* @return [Boolean] whether the scanner has consumed all input or not
|
180
|
+
* @example
|
181
|
+
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
182
|
+
* c.eof? #=> true
|
183
|
+
*/
|
184
|
+
static VALUE re2_scanner_eof(VALUE self) {
|
185
|
+
re2_scanner *c;
|
186
|
+
Data_Get_Struct(self, re2_scanner, c);
|
187
|
+
|
188
|
+
return BOOL2RUBY(c->eof);
|
189
|
+
}
|
190
|
+
|
175
191
|
/*
|
176
192
|
* Rewind the scanner to the start of the string.
|
177
193
|
*
|
@@ -188,6 +204,7 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
188
204
|
Data_Get_Struct(self, re2_scanner, c);
|
189
205
|
|
190
206
|
c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text));
|
207
|
+
c->eof = false;
|
191
208
|
|
192
209
|
return self;
|
193
210
|
}
|
@@ -204,6 +221,8 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
204
221
|
*/
|
205
222
|
static VALUE re2_scanner_scan(VALUE self) {
|
206
223
|
int i;
|
224
|
+
size_t original_input_size, new_input_size;
|
225
|
+
bool input_advanced;
|
207
226
|
re2_pattern *p;
|
208
227
|
re2_scanner *c;
|
209
228
|
VALUE result;
|
@@ -215,6 +234,12 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
215
234
|
vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
216
235
|
vector<string> matches(c->number_of_capturing_groups);
|
217
236
|
|
237
|
+
if (c->eof) {
|
238
|
+
return Qnil;
|
239
|
+
}
|
240
|
+
|
241
|
+
original_input_size = c->input->size();
|
242
|
+
|
218
243
|
for (i = 0; i < c->number_of_capturing_groups; i++) {
|
219
244
|
matches[i] = "";
|
220
245
|
argv[i] = &matches[i];
|
@@ -224,6 +249,9 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
224
249
|
if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0],
|
225
250
|
c->number_of_capturing_groups)) {
|
226
251
|
result = rb_ary_new2(c->number_of_capturing_groups);
|
252
|
+
new_input_size = c->input->size();
|
253
|
+
input_advanced = new_input_size < original_input_size;
|
254
|
+
|
227
255
|
for (i = 0; i < c->number_of_capturing_groups; i++) {
|
228
256
|
if (matches[i].empty()) {
|
229
257
|
rb_ary_push(result, Qnil);
|
@@ -233,6 +261,14 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
233
261
|
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"));
|
234
262
|
}
|
235
263
|
}
|
264
|
+
|
265
|
+
/* Check whether we've exhausted the input yet. */
|
266
|
+
c->eof = new_input_size == 0;
|
267
|
+
|
268
|
+
/* If the match didn't advance the input, we need to do this ourselves. */
|
269
|
+
if (!input_advanced && new_input_size > 0) {
|
270
|
+
c->input->remove_prefix(1);
|
271
|
+
}
|
236
272
|
} else {
|
237
273
|
result = Qnil;
|
238
274
|
}
|
@@ -1216,6 +1252,7 @@ static VALUE re2_regexp_scan(VALUE self, VALUE text) {
|
|
1216
1252
|
c->regexp = self;
|
1217
1253
|
c->text = text;
|
1218
1254
|
c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
|
1255
|
+
c->eof = false;
|
1219
1256
|
|
1220
1257
|
return scanner;
|
1221
1258
|
}
|
@@ -1349,6 +1386,8 @@ void Init_re2(void) {
|
|
1349
1386
|
|
1350
1387
|
rb_define_method(re2_cScanner, "string",
|
1351
1388
|
RUBY_METHOD_FUNC(re2_scanner_string), 0);
|
1389
|
+
rb_define_method(re2_cScanner, "eof?",
|
1390
|
+
RUBY_METHOD_FUNC(re2_scanner_eof), 0);
|
1352
1391
|
rb_define_method(re2_cScanner, "regexp",
|
1353
1392
|
RUBY_METHOD_FUNC(re2_scanner_regexp), 0);
|
1354
1393
|
rb_define_method(re2_cScanner, "scan",
|
data/spec/re2/scanner_spec.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
RSpec.describe RE2::Scanner do
|
2
4
|
describe "#regexp" do
|
3
5
|
it "returns the original pattern for the scanner" do
|
@@ -42,6 +44,57 @@ RSpec.describe RE2::Scanner do
|
|
42
44
|
scanner = r.scan("Foo bar")
|
43
45
|
expect(scanner.scan).to be_nil
|
44
46
|
end
|
47
|
+
|
48
|
+
it "returns an empty array if the input is empty" do
|
49
|
+
r = RE2::Regexp.new("")
|
50
|
+
scanner = r.scan("")
|
51
|
+
expect(scanner.scan).to eq([])
|
52
|
+
expect(scanner.scan).to be_nil
|
53
|
+
end
|
54
|
+
|
55
|
+
it "returns an array of nil with an empty input and capture" do
|
56
|
+
r = RE2::Regexp.new("()")
|
57
|
+
scanner = r.scan("")
|
58
|
+
expect(scanner.scan).to eq([nil])
|
59
|
+
expect(scanner.scan).to be_nil
|
60
|
+
end
|
61
|
+
|
62
|
+
it "returns an empty array for every match if the pattern is empty" do
|
63
|
+
r = RE2::Regexp.new("")
|
64
|
+
scanner = r.scan("Foo")
|
65
|
+
expect(scanner.scan).to eq([])
|
66
|
+
expect(scanner.scan).to eq([])
|
67
|
+
expect(scanner.scan).to eq([])
|
68
|
+
expect(scanner.scan).to eq([])
|
69
|
+
expect(scanner.scan).to be_nil
|
70
|
+
end
|
71
|
+
|
72
|
+
it "returns an array of nil if the pattern is an empty capturing group" do
|
73
|
+
r = RE2::Regexp.new("()")
|
74
|
+
scanner = r.scan("Foo")
|
75
|
+
expect(scanner.scan).to eq([nil])
|
76
|
+
expect(scanner.scan).to eq([nil])
|
77
|
+
expect(scanner.scan).to eq([nil])
|
78
|
+
expect(scanner.scan).to eq([nil])
|
79
|
+
expect(scanner.scan).to be_nil
|
80
|
+
end
|
81
|
+
|
82
|
+
it "returns array of nils with multiple empty capturing groups" do
|
83
|
+
r = RE2::Regexp.new("()()()")
|
84
|
+
scanner = r.scan("Foo")
|
85
|
+
expect(scanner.scan).to eq([nil, nil, nil])
|
86
|
+
expect(scanner.scan).to eq([nil, nil, nil])
|
87
|
+
expect(scanner.scan).to eq([nil, nil, nil])
|
88
|
+
expect(scanner.scan).to eq([nil, nil, nil])
|
89
|
+
expect(scanner.scan).to be_nil
|
90
|
+
end
|
91
|
+
|
92
|
+
it "supports empty groups with multibyte characters" do
|
93
|
+
r = RE2::Regexp.new("()€")
|
94
|
+
scanner = r.scan("€")
|
95
|
+
expect(scanner.scan).to eq([nil])
|
96
|
+
expect(scanner.scan).to be_nil
|
97
|
+
end
|
45
98
|
end
|
46
99
|
|
47
100
|
it "is enumerable" do
|
@@ -84,5 +137,62 @@ RSpec.describe RE2::Scanner do
|
|
84
137
|
scanner.rewind
|
85
138
|
expect(scanner.to_enum.first).to eq(["1"])
|
86
139
|
end
|
140
|
+
|
141
|
+
it "resets the eof? check" do
|
142
|
+
r = RE2::Regexp.new('(\d)')
|
143
|
+
scanner = r.scan("1")
|
144
|
+
scanner.scan
|
145
|
+
expect(scanner.eof?).to be_truthy
|
146
|
+
scanner.rewind
|
147
|
+
expect(scanner.eof?).to be_falsey
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
describe "#eof?" do
|
152
|
+
it "returns false if the input has not been consumed" do
|
153
|
+
r = RE2::Regexp.new('(\d)')
|
154
|
+
scanner = r.scan("1 2 3")
|
155
|
+
|
156
|
+
expect(scanner.eof?).to be_falsey
|
157
|
+
end
|
158
|
+
|
159
|
+
it "returns true if the input has been consumed" do
|
160
|
+
r = RE2::Regexp.new('(\d)')
|
161
|
+
scanner = r.scan("1")
|
162
|
+
scanner.scan
|
163
|
+
|
164
|
+
expect(scanner.eof?).to be_truthy
|
165
|
+
end
|
166
|
+
|
167
|
+
it "returns false if no match is made" do
|
168
|
+
r = RE2::Regexp.new('(\d)')
|
169
|
+
scanner = r.scan("a")
|
170
|
+
scanner.scan
|
171
|
+
|
172
|
+
expect(scanner.eof?).to be_falsey
|
173
|
+
end
|
174
|
+
|
175
|
+
it "returns false with an empty input that has not been scanned" do
|
176
|
+
r = RE2::Regexp.new("")
|
177
|
+
scanner = r.scan("")
|
178
|
+
|
179
|
+
expect(scanner.eof?).to be_falsey
|
180
|
+
end
|
181
|
+
|
182
|
+
it "returns false with an empty input that has not been matched" do
|
183
|
+
r = RE2::Regexp.new('(\d)')
|
184
|
+
scanner = r.scan("")
|
185
|
+
scanner.scan
|
186
|
+
|
187
|
+
expect(scanner.eof?).to be_falsey
|
188
|
+
end
|
189
|
+
|
190
|
+
it "returns true with an empty input that has been matched" do
|
191
|
+
r = RE2::Regexp.new("")
|
192
|
+
scanner = r.scan("")
|
193
|
+
scanner.scan
|
194
|
+
|
195
|
+
expect(scanner.eof?).to be_truthy
|
196
|
+
end
|
87
197
|
end
|
88
198
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: re2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Mucur
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -80,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
80
80
|
version: '0'
|
81
81
|
requirements: []
|
82
82
|
rubyforge_project:
|
83
|
-
rubygems_version: 2.
|
83
|
+
rubygems_version: 2.6.11
|
84
84
|
signing_key:
|
85
85
|
specification_version: 4
|
86
86
|
summary: Ruby bindings to re2.
|
@@ -92,4 +92,3 @@ test_files:
|
|
92
92
|
- spec/re2/match_data_spec.rb
|
93
93
|
- spec/re2/string_spec.rb
|
94
94
|
- spec/re2/scanner_spec.rb
|
95
|
-
has_rdoc:
|