re2 2.0.0.beta1-aarch64-linux → 2.1.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -9
- data/LICENSE-DEPENDENCIES.txt +237 -0
- data/LICENSE.txt +1 -1
- data/README.md +73 -18
- data/Rakefile +1 -24
- data/dependencies.yml +3 -3
- data/ext/re2/extconf.rb +63 -9
- data/ext/re2/re2.cc +74 -23
- data/lib/2.6/re2.so +0 -0
- data/lib/2.7/re2.so +0 -0
- data/lib/3.0/re2.so +0 -0
- data/lib/3.1/re2.so +0 -0
- data/lib/3.2/re2.so +0 -0
- data/lib/re2/version.rb +1 -1
- data/re2.gemspec +7 -6
- data/spec/kernel_spec.rb +3 -3
- data/spec/re2/match_data_spec.rb +24 -0
- data/spec/re2/regexp_spec.rb +6 -0
- data/spec/re2/scanner_spec.rb +76 -22
- data/spec/re2/set_spec.rb +41 -1
- data/spec/re2/string_spec.rb +7 -3
- data/spec/re2_spec.rb +104 -10
- data/spec/spec_helper.rb +10 -0
- metadata +11 -7
data/ext/re2/re2.cc
CHANGED
@@ -39,18 +39,9 @@ using std::vector;
|
|
39
39
|
rb_enc_associate_index(_string, _enc); \
|
40
40
|
_string; \
|
41
41
|
})
|
42
|
-
#define ENCODED_STR_NEW2(str, length, str2) \
|
43
|
-
({ \
|
44
|
-
VALUE _string = rb_str_new(str, length); \
|
45
|
-
int _enc = rb_enc_get_index(str2); \
|
46
|
-
rb_enc_associate_index(_string, _enc); \
|
47
|
-
_string; \
|
48
|
-
})
|
49
42
|
#else
|
50
43
|
#define ENCODED_STR_NEW(str, length, encoding) \
|
51
44
|
rb_str_new((const char *)str, (long)length)
|
52
|
-
#define ENCODED_STR_NEW2(str, length, str2) \
|
53
|
-
rb_str_new((const char *)str, (long)length)
|
54
45
|
#endif
|
55
46
|
|
56
47
|
#ifdef HAVE_RB_STR_SUBLEN
|
@@ -284,6 +275,10 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
284
275
|
* Scan the given text incrementally for matches, returning an array of
|
285
276
|
* matches on each subsequent call. Returns nil if no matches are found.
|
286
277
|
*
|
278
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
279
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
280
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
281
|
+
*
|
287
282
|
* @return [Array<String>] the matches.
|
288
283
|
* @example
|
289
284
|
* s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
@@ -312,7 +307,6 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
312
307
|
original_input_size = c->input->size();
|
313
308
|
|
314
309
|
for (i = 0; i < c->number_of_capturing_groups; i++) {
|
315
|
-
matches[i] = "";
|
316
310
|
argv[i] = &matches[i];
|
317
311
|
args[i] = &argv[i];
|
318
312
|
}
|
@@ -504,6 +498,10 @@ static VALUE re2_regexp_allocate(VALUE klass) {
|
|
504
498
|
/*
|
505
499
|
* Returns the array of matches.
|
506
500
|
*
|
501
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
502
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
503
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
504
|
+
*
|
507
505
|
* @return [Array<String, nil>] the array of matches
|
508
506
|
* @example
|
509
507
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
@@ -579,6 +577,10 @@ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
|
|
579
577
|
/*
|
580
578
|
* Retrieve zero, one or more matches by index or name.
|
581
579
|
*
|
580
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
581
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
582
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
583
|
+
*
|
582
584
|
* @return [Array<String, nil>, String, Boolean]
|
583
585
|
*
|
584
586
|
* @overload [](index)
|
@@ -646,6 +648,10 @@ static VALUE re2_matchdata_to_s(VALUE self) {
|
|
646
648
|
/*
|
647
649
|
* Returns a printable version of the match.
|
648
650
|
*
|
651
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
652
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
653
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
654
|
+
*
|
649
655
|
* @return [String] a printable version of the match
|
650
656
|
* @example
|
651
657
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
@@ -690,6 +696,10 @@ static VALUE re2_matchdata_inspect(VALUE self) {
|
|
690
696
|
/*
|
691
697
|
* Returns the array of submatches for pattern matching.
|
692
698
|
*
|
699
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
700
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
701
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
702
|
+
*
|
693
703
|
* @return [Array<String, nil>] the array of submatches
|
694
704
|
* @example
|
695
705
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
@@ -735,6 +745,10 @@ static VALUE re2_matchdata_deconstruct(VALUE self) {
|
|
735
745
|
* more keys than there are capturing groups. Given keys will populate the hash in
|
736
746
|
* order but an invalid name will cause the hash to be immediately returned.
|
737
747
|
*
|
748
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
749
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
750
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
751
|
+
*
|
738
752
|
* @return [Hash] a hash of capturing group names to submatches
|
739
753
|
* @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
|
740
754
|
* @example
|
@@ -866,6 +880,10 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
866
880
|
/*
|
867
881
|
* Returns a printable version of the regular expression +re2+.
|
868
882
|
*
|
883
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
884
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
885
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
886
|
+
*
|
869
887
|
* @return [String] a printable version of the regular expression
|
870
888
|
* @example
|
871
889
|
* re2 = RE2::Regexp.new("woo?")
|
@@ -889,6 +907,10 @@ static VALUE re2_regexp_inspect(VALUE self) {
|
|
889
907
|
/*
|
890
908
|
* Returns a string version of the regular expression +re2+.
|
891
909
|
*
|
910
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
911
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
912
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
913
|
+
*
|
892
914
|
* @return [String] a string version of the regular expression
|
893
915
|
* @example
|
894
916
|
* re2 = RE2::Regexp.new("woo?")
|
@@ -1116,6 +1138,10 @@ static VALUE re2_regexp_error(VALUE self) {
|
|
1116
1138
|
* If the RE2 could not be created properly, returns
|
1117
1139
|
* the offending portion of the regexp otherwise returns nil.
|
1118
1140
|
*
|
1141
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1142
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1143
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1144
|
+
*
|
1119
1145
|
* @return [String, nil] the offending portion of the regexp or nil
|
1120
1146
|
*/
|
1121
1147
|
static VALUE re2_regexp_error_arg(VALUE self) {
|
@@ -1212,6 +1238,10 @@ static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
|
|
1212
1238
|
/*
|
1213
1239
|
* Returns a hash of names to capturing indices of groups.
|
1214
1240
|
*
|
1241
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1242
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1243
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1244
|
+
*
|
1215
1245
|
* @return [Hash] a hash of names to capturing indices
|
1216
1246
|
*/
|
1217
1247
|
static VALUE re2_regexp_named_capturing_groups(VALUE self) {
|
@@ -1242,16 +1272,23 @@ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
|
|
1242
1272
|
* @return [Boolean, RE2::MatchData]
|
1243
1273
|
*
|
1244
1274
|
* @overload match(text)
|
1245
|
-
* Returns an {RE2::MatchData} containing the matching
|
1246
|
-
*
|
1247
|
-
*
|
1275
|
+
* Returns an {RE2::MatchData} containing the matching pattern and all
|
1276
|
+
* subpatterns resulting from looking for the regexp in +text+ if the pattern
|
1277
|
+
* contains capturing groups.
|
1278
|
+
*
|
1279
|
+
* Returns either true or false indicating whether a successful match was
|
1280
|
+
* made if the pattern contains no capturing groups.
|
1248
1281
|
*
|
1249
1282
|
* @param [String] text the text to search
|
1250
|
-
* @return [RE2::MatchData] the
|
1283
|
+
* @return [RE2::MatchData] if the pattern contains capturing groups
|
1284
|
+
* @return [Boolean] if the pattern does not contain capturing groups
|
1251
1285
|
* @raise [NoMemoryError] if there was not enough memory to allocate the matches
|
1252
|
-
* @example
|
1286
|
+
* @example Matching with capturing groups
|
1253
1287
|
* r = RE2::Regexp.new('w(o)(o)')
|
1254
1288
|
* r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
1289
|
+
* @example Matching without capturing groups
|
1290
|
+
* r = RE2::Regexp.new('woo')
|
1291
|
+
* r.match('woo') #=> true
|
1255
1292
|
*
|
1256
1293
|
* @overload match(text, 0)
|
1257
1294
|
* Returns either true or false indicating whether a
|
@@ -1390,6 +1427,10 @@ static VALUE re2_regexp_scan(VALUE self, VALUE text) {
|
|
1390
1427
|
* Returns a copy of +str+ with the first occurrence +pattern+
|
1391
1428
|
* replaced with +rewrite+.
|
1392
1429
|
*
|
1430
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1431
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1432
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1433
|
+
*
|
1393
1434
|
* @param [String] str the string to modify
|
1394
1435
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1395
1436
|
* @param [String] rewrite the string to replace with
|
@@ -1404,7 +1445,9 @@ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
|
|
1404
1445
|
UNUSED(self);
|
1405
1446
|
re2_pattern *p;
|
1406
1447
|
|
1407
|
-
/*
|
1448
|
+
/* Take a copy of str so it can be modified in-place by
|
1449
|
+
* RE2::Replace.
|
1450
|
+
*/
|
1408
1451
|
string str_as_string(StringValuePtr(str));
|
1409
1452
|
|
1410
1453
|
/* Do the replacement. */
|
@@ -1418,8 +1461,8 @@ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
|
|
1418
1461
|
RE2::Replace(&str_as_string, StringValuePtr(pattern),
|
1419
1462
|
StringValuePtr(rewrite));
|
1420
1463
|
|
1421
|
-
return
|
1422
|
-
|
1464
|
+
return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
|
1465
|
+
"UTF-8");
|
1423
1466
|
}
|
1424
1467
|
|
1425
1468
|
}
|
@@ -1427,6 +1470,10 @@ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
|
|
1427
1470
|
/*
|
1428
1471
|
* Return a copy of +str+ with +pattern+ replaced by +rewrite+.
|
1429
1472
|
*
|
1473
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1474
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1475
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1476
|
+
*
|
1430
1477
|
* @param [String] str the string to modify
|
1431
1478
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1432
1479
|
* @param [String] rewrite the string to replace with
|
@@ -1440,7 +1487,9 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
|
|
1440
1487
|
VALUE rewrite) {
|
1441
1488
|
UNUSED(self);
|
1442
1489
|
|
1443
|
-
/*
|
1490
|
+
/* Take a copy of str so it can be modified in-place by
|
1491
|
+
* RE2::GlobalReplace.
|
1492
|
+
*/
|
1444
1493
|
re2_pattern *p;
|
1445
1494
|
string str_as_string(StringValuePtr(str));
|
1446
1495
|
|
@@ -1455,8 +1504,8 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
|
|
1455
1504
|
RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
|
1456
1505
|
StringValuePtr(rewrite));
|
1457
1506
|
|
1458
|
-
return
|
1459
|
-
|
1507
|
+
return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
|
1508
|
+
"UTF-8");
|
1460
1509
|
}
|
1461
1510
|
}
|
1462
1511
|
|
@@ -1579,11 +1628,12 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1579
1628
|
* set.add("def") #=> 1
|
1580
1629
|
*/
|
1581
1630
|
static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
1582
|
-
|
1631
|
+
StringValue(pattern);
|
1583
1632
|
re2::StringPiece regex(RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
1584
1633
|
std::string err;
|
1585
1634
|
re2_set *s;
|
1586
1635
|
Data_Get_Struct(self, re2_set, s);
|
1636
|
+
|
1587
1637
|
int index = s->set->Add(regex, &err);
|
1588
1638
|
if (index < 0) {
|
1589
1639
|
rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", err.c_str());
|
@@ -1669,7 +1719,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
|
1669
1719
|
VALUE str, options, exception_option;
|
1670
1720
|
bool raise_exception = true;
|
1671
1721
|
rb_scan_args(argc, argv, "11", &str, &options);
|
1672
|
-
|
1722
|
+
|
1723
|
+
StringValue(str);
|
1673
1724
|
re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
|
1674
1725
|
std::vector<int> v;
|
1675
1726
|
re2_set *s;
|
data/lib/2.6/re2.so
ADDED
Binary file
|
data/lib/2.7/re2.so
CHANGED
Binary file
|
data/lib/3.0/re2.so
CHANGED
Binary file
|
data/lib/3.1/re2.so
CHANGED
Binary file
|
data/lib/3.2/re2.so
CHANGED
Binary file
|
data/lib/re2/version.rb
CHANGED
data/re2.gemspec
CHANGED
@@ -2,14 +2,14 @@ require_relative 'lib/re2/version'
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "re2"
|
5
|
-
s.summary = "Ruby bindings to
|
6
|
-
s.description = 'Ruby bindings to
|
5
|
+
s.summary = "Ruby bindings to RE2."
|
6
|
+
s.description = 'Ruby bindings to RE2, "a fast, safe, thread-friendly alternative to backtracking regular expression engines like those used in PCRE, Perl, and Python".'
|
7
7
|
s.version = RE2::VERSION
|
8
|
-
s.authors = ["Paul Mucur"]
|
8
|
+
s.authors = ["Paul Mucur", "Stan Hu"]
|
9
9
|
s.homepage = "https://github.com/mudge/re2"
|
10
10
|
s.extensions = ["ext/re2/extconf.rb"]
|
11
11
|
s.license = "BSD-3-Clause"
|
12
|
-
s.required_ruby_version = ">= 2.
|
12
|
+
s.required_ruby_version = ">= 2.6.0"
|
13
13
|
s.files = [
|
14
14
|
".rspec",
|
15
15
|
"dependencies.yml",
|
@@ -22,6 +22,7 @@ Gem::Specification.new do |s|
|
|
22
22
|
"lib/re2/string.rb",
|
23
23
|
"lib/re2/version.rb",
|
24
24
|
"LICENSE.txt",
|
25
|
+
"LICENSE-DEPENDENCIES.txt",
|
25
26
|
"README.md",
|
26
27
|
"Rakefile",
|
27
28
|
"re2.gemspec"
|
@@ -36,8 +37,8 @@ Gem::Specification.new do |s|
|
|
36
37
|
"spec/re2/set_spec.rb",
|
37
38
|
"spec/re2/scanner_spec.rb"
|
38
39
|
]
|
39
|
-
s.add_development_dependency
|
40
|
-
s.add_development_dependency
|
40
|
+
s.add_development_dependency("rake-compiler", "~> 1.2.1")
|
41
|
+
s.add_development_dependency("rake-compiler-dock", "~> 1.3.0")
|
41
42
|
s.add_development_dependency("rspec", "~> 3.2")
|
42
43
|
s.add_runtime_dependency("mini_portile2", "~> 2.8.4") # keep version in sync with extconf.rb
|
43
44
|
end
|
data/spec/kernel_spec.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
RSpec.describe Kernel do
|
2
|
-
describe "
|
2
|
+
describe ".RE2" do
|
3
3
|
it "returns an RE2::Regexp instance given a pattern" do
|
4
4
|
expect(RE2('w(o)(o)')).to be_a(RE2::Regexp)
|
5
5
|
end
|
6
6
|
|
7
7
|
it "returns an RE2::Regexp instance given a pattern and options" do
|
8
8
|
re = RE2('w(o)(o)', :case_sensitive => false)
|
9
|
-
|
10
|
-
expect(re).
|
9
|
+
|
10
|
+
expect(re).not_to be_case_sensitive
|
11
11
|
end
|
12
12
|
end
|
13
13
|
end
|
data/spec/re2/match_data_spec.rb
CHANGED
@@ -10,6 +10,18 @@ RSpec.describe RE2::MatchData do
|
|
10
10
|
a = RE2::Regexp.new('(\d?)(a)(b)').match('ab').to_a
|
11
11
|
expect(a).to eq(["ab", nil, "a", "b"])
|
12
12
|
end
|
13
|
+
|
14
|
+
it "returns UTF-8 strings if the pattern is UTF-8" do
|
15
|
+
a = RE2::Regexp.new('w(o)(o)').match('woo').to_a
|
16
|
+
|
17
|
+
expect(a.map(&:encoding)).to all eq(Encoding::UTF_8)
|
18
|
+
end
|
19
|
+
|
20
|
+
it "returns ISO-8859-1 strings if the pattern is not UTF-8" do
|
21
|
+
a = RE2::Regexp.new('w(o)(o)', :utf8 => false).match('woo').to_a
|
22
|
+
|
23
|
+
expect(a.map(&:encoding)).to all eq(Encoding::ISO_8859_1)
|
24
|
+
end
|
13
25
|
end
|
14
26
|
|
15
27
|
describe "#[]" do
|
@@ -19,6 +31,18 @@ RSpec.describe RE2::MatchData do
|
|
19
31
|
expect(md[2]).to eq("23")
|
20
32
|
end
|
21
33
|
|
34
|
+
it "returns a UTF-8 string by numerical index if the pattern is UTF-8" do
|
35
|
+
md = RE2::Regexp.new('(\d)(\d{2})').match("123")
|
36
|
+
|
37
|
+
expect(md[1].encoding).to eq(Encoding::UTF_8)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "returns a ISO-8859-1 string by numerical index if the pattern is not UTF-8" do
|
41
|
+
md = RE2::Regexp.new('(\d)(\d{2})', :utf8 => false).match("123")
|
42
|
+
|
43
|
+
expect(md[1].encoding).to eq(Encoding::ISO_8859_1)
|
44
|
+
end
|
45
|
+
|
22
46
|
it "has the whole match as the 0th item" do
|
23
47
|
md = RE2::Regexp.new('(\d)(\d{2})').match("123")
|
24
48
|
expect(md[0]).to eq("123")
|
data/spec/re2/regexp_spec.rb
CHANGED
@@ -287,6 +287,12 @@ RSpec.describe RE2::Regexp do
|
|
287
287
|
expect(re.match("My age is 99", 0)).to eq(false)
|
288
288
|
end
|
289
289
|
|
290
|
+
it "returns only true or false if the pattern has no capturing groups" do
|
291
|
+
re = RE2::Regexp.new('My name is')
|
292
|
+
|
293
|
+
expect(re.match('My name is Robert Paulson')).to eq(true)
|
294
|
+
end
|
295
|
+
|
290
296
|
it "raises an exception when given nil" do
|
291
297
|
expect { re.match(nil) }.to raise_error(TypeError)
|
292
298
|
end
|
data/spec/re2/scanner_spec.rb
CHANGED
@@ -21,9 +21,10 @@ RSpec.describe RE2::Scanner do
|
|
21
21
|
end
|
22
22
|
|
23
23
|
describe "#scan" do
|
24
|
-
it "returns the next array of matches" do
|
24
|
+
it "returns the next array of matches", :aggregate_failures do
|
25
25
|
r = RE2::Regexp.new('(\w+)')
|
26
26
|
scanner = r.scan("It is a truth universally acknowledged")
|
27
|
+
|
27
28
|
expect(scanner.scan).to eq(["It"])
|
28
29
|
expect(scanner.scan).to eq(["is"])
|
29
30
|
expect(scanner.scan).to eq(["a"])
|
@@ -33,41 +34,73 @@ RSpec.describe RE2::Scanner do
|
|
33
34
|
expect(scanner.scan).to be_nil
|
34
35
|
end
|
35
36
|
|
37
|
+
it "returns UTF-8 matches if the pattern is UTF-8" do
|
38
|
+
r = RE2::Regexp.new('(\w+)')
|
39
|
+
scanner = r.scan("It")
|
40
|
+
matches = scanner.scan
|
41
|
+
|
42
|
+
expect(matches.first.encoding).to eq(Encoding::UTF_8)
|
43
|
+
end
|
44
|
+
|
45
|
+
it "returns ISO-8859-1 matches if the pattern is not UTF-8" do
|
46
|
+
r = RE2::Regexp.new('(\w+)', :utf8 => false)
|
47
|
+
scanner = r.scan("It")
|
48
|
+
matches = scanner.scan
|
49
|
+
|
50
|
+
expect(matches.first.encoding).to eq(Encoding::ISO_8859_1)
|
51
|
+
end
|
52
|
+
|
53
|
+
it "returns multiple capturing groups at a time", :aggregate_failures do
|
54
|
+
r = RE2::Regexp.new('(\w+) (\w+)')
|
55
|
+
scanner = r.scan("It is a truth universally acknowledged")
|
56
|
+
|
57
|
+
expect(scanner.scan).to eq(["It", "is"])
|
58
|
+
expect(scanner.scan).to eq(["a", "truth"])
|
59
|
+
expect(scanner.scan).to eq(["universally", "acknowledged"])
|
60
|
+
expect(scanner.scan).to be_nil
|
61
|
+
end
|
62
|
+
|
36
63
|
it "returns an empty array if there are no capturing groups" do
|
37
64
|
r = RE2::Regexp.new('\w+')
|
38
65
|
scanner = r.scan("Foo bar")
|
66
|
+
|
39
67
|
expect(scanner.scan).to eq([])
|
40
68
|
end
|
41
69
|
|
42
70
|
it "returns nil if there is no match" do
|
43
71
|
r = RE2::Regexp.new('\d+')
|
44
72
|
scanner = r.scan("Foo bar")
|
73
|
+
|
45
74
|
expect(scanner.scan).to be_nil
|
46
75
|
end
|
47
76
|
|
48
77
|
it "returns nil if the regexp is invalid" do
|
49
78
|
r = RE2::Regexp.new('???', :log_errors => false)
|
50
79
|
scanner = r.scan("Foo bar")
|
80
|
+
|
51
81
|
expect(scanner.scan).to be_nil
|
52
82
|
end
|
53
83
|
|
54
|
-
it "returns an empty array if the input is empty" do
|
84
|
+
it "returns an empty array if the input is empty", :aggregate_failures do
|
55
85
|
r = RE2::Regexp.new("")
|
56
86
|
scanner = r.scan("")
|
87
|
+
|
57
88
|
expect(scanner.scan).to eq([])
|
58
89
|
expect(scanner.scan).to be_nil
|
59
90
|
end
|
60
91
|
|
61
|
-
it "returns an array of nil with an empty input and capture" do
|
92
|
+
it "returns an array of nil with an empty input and capture", :aggregate_failures do
|
62
93
|
r = RE2::Regexp.new("()")
|
63
94
|
scanner = r.scan("")
|
95
|
+
|
64
96
|
expect(scanner.scan).to eq([nil])
|
65
97
|
expect(scanner.scan).to be_nil
|
66
98
|
end
|
67
99
|
|
68
|
-
it "returns an empty array for every match if the pattern is empty" do
|
100
|
+
it "returns an empty array for every match if the pattern is empty", :aggregate_failures do
|
69
101
|
r = RE2::Regexp.new("")
|
70
102
|
scanner = r.scan("Foo")
|
103
|
+
|
71
104
|
expect(scanner.scan).to eq([])
|
72
105
|
expect(scanner.scan).to eq([])
|
73
106
|
expect(scanner.scan).to eq([])
|
@@ -75,9 +108,10 @@ RSpec.describe RE2::Scanner do
|
|
75
108
|
expect(scanner.scan).to be_nil
|
76
109
|
end
|
77
110
|
|
78
|
-
it "returns an array of nil if the pattern is an empty capturing group" do
|
111
|
+
it "returns an array of nil if the pattern is an empty capturing group", :aggregate_failures do
|
79
112
|
r = RE2::Regexp.new("()")
|
80
113
|
scanner = r.scan("Foo")
|
114
|
+
|
81
115
|
expect(scanner.scan).to eq([nil])
|
82
116
|
expect(scanner.scan).to eq([nil])
|
83
117
|
expect(scanner.scan).to eq([nil])
|
@@ -85,9 +119,10 @@ RSpec.describe RE2::Scanner do
|
|
85
119
|
expect(scanner.scan).to be_nil
|
86
120
|
end
|
87
121
|
|
88
|
-
it "returns array of nils with multiple empty capturing groups" do
|
122
|
+
it "returns array of nils with multiple empty capturing groups", :aggregate_failures do
|
89
123
|
r = RE2::Regexp.new("()()()")
|
90
124
|
scanner = r.scan("Foo")
|
125
|
+
|
91
126
|
expect(scanner.scan).to eq([nil, nil, nil])
|
92
127
|
expect(scanner.scan).to eq([nil, nil, nil])
|
93
128
|
expect(scanner.scan).to eq([nil, nil, nil])
|
@@ -95,17 +130,34 @@ RSpec.describe RE2::Scanner do
|
|
95
130
|
expect(scanner.scan).to be_nil
|
96
131
|
end
|
97
132
|
|
98
|
-
it "supports empty groups with multibyte characters" do
|
133
|
+
it "supports empty groups with multibyte characters", :aggregate_failures do
|
99
134
|
r = RE2::Regexp.new("()€")
|
100
135
|
scanner = r.scan("€")
|
136
|
+
|
101
137
|
expect(scanner.scan).to eq([nil])
|
102
138
|
expect(scanner.scan).to be_nil
|
103
139
|
end
|
140
|
+
|
141
|
+
it "raises a Type Error if given input that can't be coerced to a String" do
|
142
|
+
r = RE2::Regexp.new('(\w+)')
|
143
|
+
|
144
|
+
expect { r.scan(0) }.to raise_error(TypeError)
|
145
|
+
end
|
146
|
+
|
147
|
+
it "accepts input that can be coerced to a String", :aggregate_failures do
|
148
|
+
r = RE2::Regexp.new('(\w+)')
|
149
|
+
scanner = r.scan(StringLike.new("Hello world"))
|
150
|
+
|
151
|
+
expect(scanner.scan).to eq(["Hello"])
|
152
|
+
expect(scanner.scan).to eq(["world"])
|
153
|
+
expect(scanner.scan).to be_nil
|
154
|
+
end
|
104
155
|
end
|
105
156
|
|
106
157
|
it "is enumerable" do
|
107
158
|
r = RE2::Regexp.new('(\d)')
|
108
159
|
scanner = r.scan("There are 1 some 2 numbers 3")
|
160
|
+
|
109
161
|
expect(scanner).to be_a(Enumerable)
|
110
162
|
end
|
111
163
|
|
@@ -113,12 +165,8 @@ RSpec.describe RE2::Scanner do
|
|
113
165
|
it "yields each match" do
|
114
166
|
r = RE2::Regexp.new('(\d)')
|
115
167
|
scanner = r.scan("There are 1 some 2 numbers 3")
|
116
|
-
matches = []
|
117
|
-
scanner.each do |match|
|
118
|
-
matches << match
|
119
|
-
end
|
120
168
|
|
121
|
-
expect(
|
169
|
+
expect { |b| scanner.each(&b) }.to yield_successive_args(["1"], ["2"], ["3"])
|
122
170
|
end
|
123
171
|
|
124
172
|
it "returns an enumerator when not given a block" do
|
@@ -135,22 +183,28 @@ RSpec.describe RE2::Scanner do
|
|
135
183
|
end
|
136
184
|
|
137
185
|
describe "#rewind" do
|
138
|
-
it "resets any consumption" do
|
186
|
+
it "resets any consumption", :aggregate_failures do
|
139
187
|
r = RE2::Regexp.new('(\d)')
|
140
188
|
scanner = r.scan("There are 1 some 2 numbers 3")
|
189
|
+
|
141
190
|
expect(scanner.to_enum.first).to eq(["1"])
|
142
191
|
expect(scanner.to_enum.first).to eq(["2"])
|
192
|
+
|
143
193
|
scanner.rewind
|
194
|
+
|
144
195
|
expect(scanner.to_enum.first).to eq(["1"])
|
145
196
|
end
|
146
197
|
|
147
|
-
it "resets the eof? check" do
|
198
|
+
it "resets the eof? check", :aggregate_failures do
|
148
199
|
r = RE2::Regexp.new('(\d)')
|
149
200
|
scanner = r.scan("1")
|
150
201
|
scanner.scan
|
151
|
-
|
202
|
+
|
203
|
+
expect(scanner).to be_eof
|
204
|
+
|
152
205
|
scanner.rewind
|
153
|
-
|
206
|
+
|
207
|
+
expect(scanner).not_to be_eof
|
154
208
|
end
|
155
209
|
end
|
156
210
|
|
@@ -159,7 +213,7 @@ RSpec.describe RE2::Scanner do
|
|
159
213
|
r = RE2::Regexp.new('(\d)')
|
160
214
|
scanner = r.scan("1 2 3")
|
161
215
|
|
162
|
-
expect(scanner
|
216
|
+
expect(scanner).not_to be_eof
|
163
217
|
end
|
164
218
|
|
165
219
|
it "returns true if the input has been consumed" do
|
@@ -167,7 +221,7 @@ RSpec.describe RE2::Scanner do
|
|
167
221
|
scanner = r.scan("1")
|
168
222
|
scanner.scan
|
169
223
|
|
170
|
-
expect(scanner
|
224
|
+
expect(scanner).to be_eof
|
171
225
|
end
|
172
226
|
|
173
227
|
it "returns false if no match is made" do
|
@@ -175,14 +229,14 @@ RSpec.describe RE2::Scanner do
|
|
175
229
|
scanner = r.scan("a")
|
176
230
|
scanner.scan
|
177
231
|
|
178
|
-
expect(scanner
|
232
|
+
expect(scanner).not_to be_eof
|
179
233
|
end
|
180
234
|
|
181
235
|
it "returns false with an empty input that has not been scanned" do
|
182
236
|
r = RE2::Regexp.new("")
|
183
237
|
scanner = r.scan("")
|
184
238
|
|
185
|
-
expect(scanner
|
239
|
+
expect(scanner).not_to be_eof
|
186
240
|
end
|
187
241
|
|
188
242
|
it "returns false with an empty input that has not been matched" do
|
@@ -190,7 +244,7 @@ RSpec.describe RE2::Scanner do
|
|
190
244
|
scanner = r.scan("")
|
191
245
|
scanner.scan
|
192
246
|
|
193
|
-
expect(scanner
|
247
|
+
expect(scanner).not_to be_eof
|
194
248
|
end
|
195
249
|
|
196
250
|
it "returns true with an empty input that has been matched" do
|
@@ -198,7 +252,7 @@ RSpec.describe RE2::Scanner do
|
|
198
252
|
scanner = r.scan("")
|
199
253
|
scanner.scan
|
200
254
|
|
201
|
-
expect(scanner
|
255
|
+
expect(scanner).to be_eof
|
202
256
|
end
|
203
257
|
end
|
204
258
|
end
|