re2 2.4.2-x64-mingw-ucrt → 2.5.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +236 -192
- data/ext/re2/extconf.rb +6 -70
- data/ext/re2/re2.cc +483 -288
- data/ext/re2/recipes.rb +8 -0
- data/lib/3.1/re2.so +0 -0
- data/lib/3.2/re2.so +0 -0
- data/lib/re2/regexp.rb +69 -0
- data/lib/re2/scanner.rb +8 -0
- data/lib/re2/string.rb +9 -59
- data/lib/re2/version.rb +9 -1
- data/lib/re2.rb +7 -3
- data/re2.gemspec +1 -0
- data/spec/kernel_spec.rb +2 -2
- data/spec/re2/match_data_spec.rb +64 -25
- data/spec/re2/regexp_spec.rb +492 -113
- data/spec/re2/scanner_spec.rb +3 -8
- data/spec/re2/set_spec.rb +18 -18
- data/spec/re2_spec.rb +4 -4
- metadata +3 -2
data/ext/re2/re2.cc
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
/*
|
2
|
-
* re2 (
|
3
|
-
* Ruby bindings to
|
2
|
+
* re2 (https://github.com/mudge/re2)
|
3
|
+
* Ruby bindings to RE2, a "fast, safe, thread-friendly alternative to
|
4
|
+
* backtracking regular expression engines like those used in PCRE, Perl, and
|
5
|
+
* Python".
|
4
6
|
*
|
5
|
-
* Copyright (c) 2010
|
7
|
+
* Copyright (c) 2010, Paul Mucur (https://mudge.name)
|
6
8
|
* Released under the BSD Licence, please see LICENSE.txt
|
7
9
|
*/
|
8
10
|
|
@@ -42,13 +44,14 @@ typedef struct {
|
|
42
44
|
} re2_set;
|
43
45
|
|
44
46
|
VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
|
45
|
-
re2_eSetMatchError, re2_eSetUnsupportedError;
|
47
|
+
re2_eSetMatchError, re2_eSetUnsupportedError, re2_eRegexpUnsupportedError;
|
46
48
|
|
47
49
|
/* Symbols used in RE2 options. */
|
48
50
|
static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
49
51
|
id_max_mem, id_literal, id_never_nl, id_case_sensitive,
|
50
|
-
id_perl_classes, id_word_boundary, id_one_line,
|
51
|
-
|
52
|
+
id_perl_classes, id_word_boundary, id_one_line, id_unanchored,
|
53
|
+
id_anchor, id_anchor_start, id_anchor_both, id_exception,
|
54
|
+
id_submatches, id_startpos, id_endpos;
|
52
55
|
|
53
56
|
inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
|
54
57
|
if (encoding == RE2::Options::EncodingUTF8) {
|
@@ -122,9 +125,9 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
|
|
122
125
|
}
|
123
126
|
}
|
124
127
|
|
125
|
-
/* For compatibility with
|
128
|
+
/* For compatibility with Ruby < 2.7 */
|
126
129
|
#ifdef HAVE_RB_GC_MARK_MOVABLE
|
127
|
-
#define re2_compact_callback(x)
|
130
|
+
#define re2_compact_callback(x) (x),
|
128
131
|
#else
|
129
132
|
#define rb_gc_mark_movable(x) rb_gc_mark(x)
|
130
133
|
#define re2_compact_callback(x)
|
@@ -163,16 +166,18 @@ static size_t re2_matchdata_memsize(const void *ptr) {
|
|
163
166
|
}
|
164
167
|
|
165
168
|
static const rb_data_type_t re2_matchdata_data_type = {
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
169
|
+
"RE2::MatchData",
|
170
|
+
{
|
171
|
+
re2_matchdata_mark,
|
172
|
+
re2_matchdata_free,
|
173
|
+
re2_matchdata_memsize,
|
171
174
|
re2_compact_callback(re2_matchdata_compact)
|
172
175
|
},
|
176
|
+
0,
|
177
|
+
0,
|
173
178
|
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
174
179
|
// macro to update VALUE references, as to trigger write barriers.
|
175
|
-
|
180
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
176
181
|
};
|
177
182
|
|
178
183
|
static void re2_scanner_mark(void *ptr) {
|
@@ -208,16 +213,18 @@ static size_t re2_scanner_memsize(const void *ptr) {
|
|
208
213
|
}
|
209
214
|
|
210
215
|
static const rb_data_type_t re2_scanner_data_type = {
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
+
"RE2::Scanner",
|
217
|
+
{
|
218
|
+
re2_scanner_mark,
|
219
|
+
re2_scanner_free,
|
220
|
+
re2_scanner_memsize,
|
216
221
|
re2_compact_callback(re2_scanner_compact)
|
217
222
|
},
|
223
|
+
0,
|
224
|
+
0,
|
218
225
|
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
219
226
|
// macro to update VALUE references, as to trigger write barriers.
|
220
|
-
|
227
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
221
228
|
};
|
222
229
|
|
223
230
|
static void re2_regexp_free(void *ptr) {
|
@@ -239,15 +246,17 @@ static size_t re2_regexp_memsize(const void *ptr) {
|
|
239
246
|
}
|
240
247
|
|
241
248
|
static const rb_data_type_t re2_regexp_data_type = {
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
249
|
+
"RE2::Regexp",
|
250
|
+
{
|
251
|
+
0,
|
252
|
+
re2_regexp_free,
|
253
|
+
re2_regexp_memsize,
|
247
254
|
},
|
255
|
+
0,
|
256
|
+
0,
|
248
257
|
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
249
258
|
// macro to update VALUE references, as to trigger write barriers.
|
250
|
-
|
259
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
251
260
|
};
|
252
261
|
|
253
262
|
static VALUE re2_matchdata_allocate(VALUE klass) {
|
@@ -264,12 +273,14 @@ static VALUE re2_scanner_allocate(VALUE klass) {
|
|
264
273
|
}
|
265
274
|
|
266
275
|
/*
|
267
|
-
* Returns a frozen copy of the
|
276
|
+
* Returns a frozen copy of the text supplied when matching.
|
268
277
|
*
|
269
|
-
*
|
278
|
+
* If the text was already a frozen string, returns the original.
|
279
|
+
*
|
280
|
+
* @return [String] a frozen string with the text supplied when matching
|
270
281
|
* @example
|
271
|
-
* m = RE2::Regexp.new('(\d+)').
|
272
|
-
* m.string
|
282
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
283
|
+
* m.string #=> "bob 123"
|
273
284
|
*/
|
274
285
|
static VALUE re2_matchdata_string(const VALUE self) {
|
275
286
|
re2_matchdata *m;
|
@@ -279,9 +290,10 @@ static VALUE re2_matchdata_string(const VALUE self) {
|
|
279
290
|
}
|
280
291
|
|
281
292
|
/*
|
282
|
-
* Returns the
|
293
|
+
* Returns the text supplied when incrementally matching with
|
294
|
+
* {RE2::Regexp#scan}.
|
283
295
|
*
|
284
|
-
* @return [String] the original string
|
296
|
+
* @return [String] the original string passed to {RE2::Regexp#scan}
|
285
297
|
* @example
|
286
298
|
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
287
299
|
* c.string #=> "foo"
|
@@ -294,9 +306,9 @@ static VALUE re2_scanner_string(const VALUE self) {
|
|
294
306
|
}
|
295
307
|
|
296
308
|
/*
|
297
|
-
* Returns whether the
|
309
|
+
* Returns whether the {RE2::Scanner} has consumed all input or not.
|
298
310
|
*
|
299
|
-
* @return [Boolean] whether the
|
311
|
+
* @return [Boolean] whether the {RE2::Scanner} has consumed all input or not
|
300
312
|
* @example
|
301
313
|
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
302
314
|
* c.eof? #=> true
|
@@ -309,7 +321,7 @@ static VALUE re2_scanner_eof(const VALUE self) {
|
|
309
321
|
}
|
310
322
|
|
311
323
|
/*
|
312
|
-
* Rewind the
|
324
|
+
* Rewind the {RE2::Scanner} to the start of the string.
|
313
325
|
*
|
314
326
|
* @example
|
315
327
|
* s = RE2::Regexp.new('(\d+)').scan("1 2 3")
|
@@ -331,14 +343,19 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
331
343
|
}
|
332
344
|
|
333
345
|
/*
|
334
|
-
* Scan the given text incrementally for matches
|
335
|
-
*
|
346
|
+
* Scan the given text incrementally for matches using
|
347
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
|
348
|
+
* `FindAndConsume`}, returning an array of submatches on each subsequent
|
349
|
+
* call. Returns `nil` if no matches are found or an empty array for every
|
350
|
+
* match if the pattern has no capturing groups.
|
336
351
|
*
|
337
352
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
338
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
339
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
353
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
354
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
340
355
|
*
|
341
|
-
* @return [Array<String>] the
|
356
|
+
* @return [Array<String>] if the pattern has capturing groups
|
357
|
+
* @return [[]] if the pattern does not have capturing groups
|
358
|
+
* @return [nil] if no matches are found
|
342
359
|
* @example
|
343
360
|
* s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
344
361
|
* s.scan #=> ["Foo"]
|
@@ -353,7 +370,7 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
353
370
|
|
354
371
|
std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
|
355
372
|
std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
356
|
-
std::vector<
|
373
|
+
std::vector<re2::StringPiece> matches(c->number_of_capturing_groups);
|
357
374
|
|
358
375
|
if (c->eof) {
|
359
376
|
return Qnil;
|
@@ -397,9 +414,6 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
397
414
|
}
|
398
415
|
}
|
399
416
|
|
400
|
-
/*
|
401
|
-
* Retrieve a matchdata by index or name.
|
402
|
-
*/
|
403
417
|
static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
404
418
|
re2_matchdata *m;
|
405
419
|
re2_pattern *p;
|
@@ -435,13 +449,14 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
|
435
449
|
}
|
436
450
|
|
437
451
|
/*
|
438
|
-
* Returns the number of elements in the
|
452
|
+
* Returns the number of elements in the {RE2::MatchData} (including the
|
453
|
+
* overall match, submatches and any `nils`).
|
439
454
|
*
|
440
455
|
* @return [Integer] the number of elements
|
441
456
|
* @example
|
442
457
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
443
|
-
* m.size
|
444
|
-
* m.length
|
458
|
+
* m.size #=> 2
|
459
|
+
* m.length #=> 2
|
445
460
|
*/
|
446
461
|
static VALUE re2_matchdata_size(const VALUE self) {
|
447
462
|
re2_matchdata *m;
|
@@ -452,14 +467,15 @@ static VALUE re2_matchdata_size(const VALUE self) {
|
|
452
467
|
}
|
453
468
|
|
454
469
|
/*
|
455
|
-
* Returns the offset of the start of the nth element of the
|
470
|
+
* Returns the offset of the start of the nth element of the {RE2::MatchData}.
|
456
471
|
*
|
457
|
-
* @param [Integer, String, Symbol] n the name or number of the
|
458
|
-
* @return [Integer] the offset of the start of the match
|
472
|
+
* @param [Integer, String, Symbol] n the name or number of the submatch
|
473
|
+
* @return [Integer, nil] the offset of the start of the match or `nil` if
|
474
|
+
* there is no such submatch
|
459
475
|
* @example
|
460
476
|
* m = RE2::Regexp.new('ob (\d+)').match("bob 123")
|
461
|
-
* m.begin(0)
|
462
|
-
* m.begin(1)
|
477
|
+
* m.begin(0) #=> 1
|
478
|
+
* m.begin(1) #=> 4
|
463
479
|
*/
|
464
480
|
static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
465
481
|
re2_matchdata *m;
|
@@ -477,14 +493,16 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
|
477
493
|
}
|
478
494
|
|
479
495
|
/*
|
480
|
-
* Returns the offset of the character following the end of the nth element of
|
496
|
+
* Returns the offset of the character following the end of the nth element of
|
497
|
+
* the {RE2::MatchData}.
|
481
498
|
*
|
482
499
|
* @param [Integer, String, Symbol] n the name or number of the match
|
483
|
-
* @return [Integer] the offset of the character following the end of the
|
500
|
+
* @return [Integer, nil] the offset of the character following the end of the
|
501
|
+
* match or `nil` if there is no such match
|
484
502
|
* @example
|
485
503
|
* m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob")
|
486
|
-
* m.end(0)
|
487
|
-
* m.end(1)
|
504
|
+
* m.end(0) #=> 9
|
505
|
+
* m.end(1) #=> 7
|
488
506
|
*/
|
489
507
|
static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
490
508
|
re2_matchdata *m;
|
@@ -504,10 +522,10 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
|
504
522
|
/*
|
505
523
|
* Returns the {RE2::Regexp} used in the match.
|
506
524
|
*
|
507
|
-
* @return [RE2::Regexp] the
|
525
|
+
* @return [RE2::Regexp] the regular expression used in the match
|
508
526
|
* @example
|
509
527
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
510
|
-
* m.regexp
|
528
|
+
* m.regexp #=> #<RE2::Regexp /(\d+)/>
|
511
529
|
*/
|
512
530
|
static VALUE re2_matchdata_regexp(const VALUE self) {
|
513
531
|
re2_matchdata *m;
|
@@ -517,12 +535,12 @@ static VALUE re2_matchdata_regexp(const VALUE self) {
|
|
517
535
|
}
|
518
536
|
|
519
537
|
/*
|
520
|
-
* Returns the {RE2::Regexp} used in the
|
538
|
+
* Returns the {RE2::Regexp} used in the {RE2::Scanner}.
|
521
539
|
*
|
522
|
-
* @return [RE2::Regexp] the
|
540
|
+
* @return [RE2::Regexp] the regular expression used in the {RE2::Scanner}
|
523
541
|
* @example
|
524
542
|
* c = RE2::Regexp.new('(\d+)').scan("bob 123")
|
525
|
-
* c.regexp
|
543
|
+
* c.regexp #=> #<RE2::Regexp /(\d+)/>
|
526
544
|
*/
|
527
545
|
static VALUE re2_scanner_regexp(const VALUE self) {
|
528
546
|
re2_scanner *c;
|
@@ -538,16 +556,17 @@ static VALUE re2_regexp_allocate(VALUE klass) {
|
|
538
556
|
}
|
539
557
|
|
540
558
|
/*
|
541
|
-
* Returns the array of matches
|
559
|
+
* Returns the array of matches including the overall match, submatches and any
|
560
|
+
* `nil`s.
|
542
561
|
*
|
543
562
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
544
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
545
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
563
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
564
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
546
565
|
*
|
547
566
|
* @return [Array<String, nil>] the array of matches
|
548
567
|
* @example
|
549
568
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
550
|
-
* m.to_a
|
569
|
+
* m.to_a #=> ["123", "123"]
|
551
570
|
*/
|
552
571
|
static VALUE re2_matchdata_to_a(const VALUE self) {
|
553
572
|
re2_matchdata *m;
|
@@ -613,19 +632,17 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
|
|
613
632
|
* Retrieve zero, one or more matches by index or name.
|
614
633
|
*
|
615
634
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
616
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
617
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
618
|
-
*
|
619
|
-
* @return [Array<String, nil>, String, Boolean]
|
635
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
636
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
620
637
|
*
|
621
638
|
* @overload [](index)
|
622
639
|
* Access a particular match by index.
|
623
640
|
*
|
624
641
|
* @param [Integer] index the index of the match to fetch
|
625
|
-
* @return [String, nil] the specified match
|
642
|
+
* @return [String, nil] the specified match or `nil` if it isn't present
|
626
643
|
* @example
|
627
644
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
628
|
-
* m[0]
|
645
|
+
* m[0] #=> "123"
|
629
646
|
*
|
630
647
|
* @overload [](start, length)
|
631
648
|
* Access a range of matches by starting index and length.
|
@@ -635,7 +652,7 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
|
|
635
652
|
* @return [Array<String, nil>] the specified matches
|
636
653
|
* @example
|
637
654
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
638
|
-
* m[0, 1]
|
655
|
+
* m[0, 1] #=> ["123"]
|
639
656
|
*
|
640
657
|
* @overload [](range)
|
641
658
|
* Access a range of matches by index.
|
@@ -644,13 +661,13 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
|
|
644
661
|
* @return [Array<String, nil>] the specified matches
|
645
662
|
* @example
|
646
663
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
647
|
-
* m[0..1]
|
664
|
+
* m[0..1] #=> "[123", "123"]
|
648
665
|
*
|
649
666
|
* @overload [](name)
|
650
667
|
* Access a particular match by name.
|
651
668
|
*
|
652
669
|
* @param [String, Symbol] name the name of the match to fetch
|
653
|
-
* @return [String, nil] the specific match
|
670
|
+
* @return [String, nil] the specific match or `nil` if it isn't present
|
654
671
|
* @example
|
655
672
|
* m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
|
656
673
|
* m["number"] #=> "123"
|
@@ -684,13 +701,13 @@ static VALUE re2_matchdata_to_s(const VALUE self) {
|
|
684
701
|
* Returns a printable version of the match.
|
685
702
|
*
|
686
703
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
687
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
688
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
704
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
705
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
689
706
|
*
|
690
707
|
* @return [String] a printable version of the match
|
691
708
|
* @example
|
692
709
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
693
|
-
* m.inspect
|
710
|
+
* m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
|
694
711
|
*/
|
695
712
|
static VALUE re2_matchdata_inspect(const VALUE self) {
|
696
713
|
re2_matchdata *m;
|
@@ -728,13 +745,14 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
|
|
728
745
|
* Returns the array of submatches for pattern matching.
|
729
746
|
*
|
730
747
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
731
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
732
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is
|
748
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
749
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is
|
750
|
+
* undefined).
|
733
751
|
*
|
734
752
|
* @return [Array<String, nil>] the array of submatches
|
735
753
|
* @example
|
736
754
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
737
|
-
* m.deconstruct
|
755
|
+
* m.deconstruct #=> ["123"]
|
738
756
|
*
|
739
757
|
* @example pattern matching
|
740
758
|
* case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
|
@@ -774,17 +792,18 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
|
774
792
|
* order but an invalid name will cause the hash to be immediately returned.
|
775
793
|
*
|
776
794
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
777
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
778
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
795
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
796
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
779
797
|
*
|
780
798
|
* @return [Hash] a hash of capturing group names to submatches
|
781
|
-
* @param [Array<Symbol>, nil] keys an array of Symbol capturing group names
|
799
|
+
* @param [Array<Symbol>, nil] keys an array of `Symbol` capturing group names
|
800
|
+
* or `nil` to return all names
|
782
801
|
* @example
|
783
802
|
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
784
|
-
* m.deconstruct_keys(nil)
|
785
|
-
* m.deconstruct_keys([:numbers])
|
786
|
-
* m.deconstruct_keys([:fruit])
|
787
|
-
* m.deconstruct_keys([:letters, :fruit])
|
803
|
+
* m.deconstruct_keys(nil) #=> {numbers: "123", letters: "abc"}
|
804
|
+
* m.deconstruct_keys([:numbers]) #=> {numbers: "123"}
|
805
|
+
* m.deconstruct_keys([:fruit]) #=> {}
|
806
|
+
* m.deconstruct_keys([:letters, :fruit]) #=> {letters: "abc"}
|
788
807
|
*
|
789
808
|
* @example pattern matching
|
790
809
|
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
@@ -833,11 +852,9 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
|
|
833
852
|
}
|
834
853
|
|
835
854
|
/*
|
836
|
-
*
|
837
|
-
* +pattern+ stored inside. Equivalent to +RE2::Regexp.new+.
|
855
|
+
* Shorthand to compile a new {RE2::Regexp}.
|
838
856
|
*
|
839
857
|
* @see RE2::Regexp#initialize
|
840
|
-
*
|
841
858
|
*/
|
842
859
|
static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
|
843
860
|
return rb_class_new_instance(argc, argv, re2_cRegexp);
|
@@ -845,22 +862,21 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
|
|
845
862
|
|
846
863
|
/*
|
847
864
|
* Returns a new {RE2::Regexp} object with a compiled version of
|
848
|
-
*
|
849
|
-
*
|
850
|
-
* @return [RE2::Regexp]
|
865
|
+
* `pattern` stored inside.
|
851
866
|
*
|
852
867
|
* @overload initialize(pattern)
|
853
868
|
* Returns a new {RE2::Regexp} object with a compiled version of
|
854
|
-
*
|
869
|
+
* `pattern` stored inside with the default options.
|
855
870
|
*
|
856
871
|
* @param [String] pattern the pattern to compile
|
857
|
-
* @return [RE2::Regexp]
|
872
|
+
* @return [RE2::Regexp] a {RE2::Regexp} with the specified pattern
|
873
|
+
* @raise [TypeError] if the given pattern can't be coerced to a `String`
|
858
874
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled
|
859
|
-
*
|
875
|
+
* pattern
|
860
876
|
*
|
861
877
|
* @overload initialize(pattern, options)
|
862
878
|
* Returns a new {RE2::Regexp} object with a compiled version of
|
863
|
-
*
|
879
|
+
* `pattern` stored inside with the specified options.
|
864
880
|
*
|
865
881
|
* @param [String] pattern the pattern to compile
|
866
882
|
* @param [Hash] options the options with which to compile the pattern
|
@@ -870,12 +886,13 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
|
|
870
886
|
* @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
|
871
887
|
* @option options [Integer] :max_mem approx. max memory footprint of RE2
|
872
888
|
* @option options [Boolean] :literal (false) interpret string as literal, not regexp
|
873
|
-
* @option options [Boolean] :never_nl (false) never match
|
874
|
-
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
|
875
|
-
* @option options [Boolean] :perl_classes (false) allow Perl's
|
876
|
-
* @option options [Boolean] :word_boundary (false) allow
|
877
|
-
* @option options [Boolean] :one_line (false)
|
878
|
-
* @return [RE2::Regexp]
|
889
|
+
* @option options [Boolean] :never_nl (false) never match `\n`, even if it is in regexp
|
890
|
+
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with `(?i)` unless in `posix_syntax` mode)
|
891
|
+
* @option options [Boolean] :perl_classes (false) allow Perl's `\d` `\s` `\w` `\D` `\S` `\W` when in `posix_syntax` mode
|
892
|
+
* @option options [Boolean] :word_boundary (false) allow `\b` `\B` (word boundary and not) when in `posix_syntax` mode
|
893
|
+
* @option options [Boolean] :one_line (false) `^` and `$` only match beginning and end of text when in `posix_syntax` mode
|
894
|
+
* @return [RE2::Regexp] a {RE2::Regexp} with the specified pattern and options
|
895
|
+
* @raise [TypeError] if the given pattern can't be coerced to a `String`
|
879
896
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
880
897
|
*/
|
881
898
|
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
@@ -906,16 +923,17 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
906
923
|
}
|
907
924
|
|
908
925
|
/*
|
909
|
-
* Returns a printable version of the regular expression
|
926
|
+
* Returns a printable version of the regular expression.
|
910
927
|
*
|
911
928
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
912
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
913
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is
|
929
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
930
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is
|
931
|
+
* undefined).
|
914
932
|
*
|
915
933
|
* @return [String] a printable version of the regular expression
|
916
934
|
* @example
|
917
935
|
* re2 = RE2::Regexp.new("woo?")
|
918
|
-
* re2.inspect
|
936
|
+
* re2.inspect #=> "#<RE2::Regexp /woo?/>"
|
919
937
|
*/
|
920
938
|
static VALUE re2_regexp_inspect(const VALUE self) {
|
921
939
|
re2_pattern *p;
|
@@ -931,16 +949,16 @@ static VALUE re2_regexp_inspect(const VALUE self) {
|
|
931
949
|
}
|
932
950
|
|
933
951
|
/*
|
934
|
-
* Returns a string version of the regular expression
|
952
|
+
* Returns a string version of the regular expression.
|
935
953
|
*
|
936
954
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
937
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
938
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
955
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
956
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
939
957
|
*
|
940
958
|
* @return [String] a string version of the regular expression
|
941
959
|
* @example
|
942
960
|
* re2 = RE2::Regexp.new("woo?")
|
943
|
-
* re2.to_s
|
961
|
+
* re2.to_s #=> "woo?"
|
944
962
|
*/
|
945
963
|
static VALUE re2_regexp_to_s(const VALUE self) {
|
946
964
|
re2_pattern *p;
|
@@ -952,13 +970,12 @@ static VALUE re2_regexp_to_s(const VALUE self) {
|
|
952
970
|
}
|
953
971
|
|
954
972
|
/*
|
955
|
-
* Returns whether or not the regular expression
|
956
|
-
* was compiled successfully or not.
|
973
|
+
* Returns whether or not the regular expression was compiled successfully.
|
957
974
|
*
|
958
975
|
* @return [Boolean] whether or not compilation was successful
|
959
976
|
* @example
|
960
977
|
* re2 = RE2::Regexp.new("woo?")
|
961
|
-
* re2.ok?
|
978
|
+
* re2.ok? #=> true
|
962
979
|
*/
|
963
980
|
static VALUE re2_regexp_ok(const VALUE self) {
|
964
981
|
re2_pattern *p;
|
@@ -968,13 +985,13 @@ static VALUE re2_regexp_ok(const VALUE self) {
|
|
968
985
|
}
|
969
986
|
|
970
987
|
/*
|
971
|
-
* Returns whether or not the regular expression
|
972
|
-
*
|
988
|
+
* Returns whether or not the regular expression was compiled with the `utf8`
|
989
|
+
* option set to `true`.
|
973
990
|
*
|
974
|
-
* @return [Boolean] the utf8 option
|
991
|
+
* @return [Boolean] the `utf8` option
|
975
992
|
* @example
|
976
|
-
* re2 = RE2::Regexp.new("woo?", :
|
977
|
-
* re2.utf8?
|
993
|
+
* re2 = RE2::Regexp.new("woo?", utf8: true)
|
994
|
+
* re2.utf8? #=> true
|
978
995
|
*/
|
979
996
|
static VALUE re2_regexp_utf8(const VALUE self) {
|
980
997
|
re2_pattern *p;
|
@@ -984,13 +1001,13 @@ static VALUE re2_regexp_utf8(const VALUE self) {
|
|
984
1001
|
}
|
985
1002
|
|
986
1003
|
/*
|
987
|
-
* Returns whether or not the regular expression
|
988
|
-
*
|
1004
|
+
* Returns whether or not the regular expression was compiled with the
|
1005
|
+
* `posix_syntax` option set to `true`.
|
989
1006
|
*
|
990
|
-
* @return [Boolean] the posix_syntax option
|
1007
|
+
* @return [Boolean] the `posix_syntax` option
|
991
1008
|
* @example
|
992
|
-
* re2 = RE2::Regexp.new("woo?", :
|
993
|
-
* re2.posix_syntax?
|
1009
|
+
* re2 = RE2::Regexp.new("woo?", posix_syntax: true)
|
1010
|
+
* re2.posix_syntax? #=> true
|
994
1011
|
*/
|
995
1012
|
static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
996
1013
|
re2_pattern *p;
|
@@ -1000,13 +1017,13 @@ static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
|
1000
1017
|
}
|
1001
1018
|
|
1002
1019
|
/*
|
1003
|
-
* Returns whether or not the regular expression
|
1004
|
-
*
|
1020
|
+
* Returns whether or not the regular expression was compiled with the
|
1021
|
+
* `longest_match` option set to `true`.
|
1005
1022
|
*
|
1006
|
-
* @return [Boolean] the longest_match option
|
1023
|
+
* @return [Boolean] the `longest_match` option
|
1007
1024
|
* @example
|
1008
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1009
|
-
* re2.longest_match?
|
1025
|
+
* re2 = RE2::Regexp.new("woo?", longest_match: true)
|
1026
|
+
* re2.longest_match? #=> true
|
1010
1027
|
*/
|
1011
1028
|
static VALUE re2_regexp_longest_match(const VALUE self) {
|
1012
1029
|
re2_pattern *p;
|
@@ -1016,13 +1033,13 @@ static VALUE re2_regexp_longest_match(const VALUE self) {
|
|
1016
1033
|
}
|
1017
1034
|
|
1018
1035
|
/*
|
1019
|
-
* Returns whether or not the regular expression
|
1020
|
-
*
|
1036
|
+
* Returns whether or not the regular expression was compiled with the
|
1037
|
+
* `log_errors` option set to `true`.
|
1021
1038
|
*
|
1022
|
-
* @return [Boolean] the log_errors option
|
1039
|
+
* @return [Boolean] the `log_errors` option
|
1023
1040
|
* @example
|
1024
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1025
|
-
* re2.log_errors?
|
1041
|
+
* re2 = RE2::Regexp.new("woo?", log_errors: true)
|
1042
|
+
* re2.log_errors? #=> true
|
1026
1043
|
*/
|
1027
1044
|
static VALUE re2_regexp_log_errors(const VALUE self) {
|
1028
1045
|
re2_pattern *p;
|
@@ -1032,13 +1049,12 @@ static VALUE re2_regexp_log_errors(const VALUE self) {
|
|
1032
1049
|
}
|
1033
1050
|
|
1034
1051
|
/*
|
1035
|
-
* Returns the max_mem setting for the regular expression
|
1036
|
-
* +re2+.
|
1052
|
+
* Returns the `max_mem` setting for the regular expression.
|
1037
1053
|
*
|
1038
|
-
* @return [Integer] the max_mem option
|
1054
|
+
* @return [Integer] the `max_mem` option
|
1039
1055
|
* @example
|
1040
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1041
|
-
* re2.max_mem
|
1056
|
+
* re2 = RE2::Regexp.new("woo?", max_mem: 1024)
|
1057
|
+
* re2.max_mem #=> 1024
|
1042
1058
|
*/
|
1043
1059
|
static VALUE re2_regexp_max_mem(const VALUE self) {
|
1044
1060
|
re2_pattern *p;
|
@@ -1048,13 +1064,13 @@ static VALUE re2_regexp_max_mem(const VALUE self) {
|
|
1048
1064
|
}
|
1049
1065
|
|
1050
1066
|
/*
|
1051
|
-
* Returns whether or not the regular expression
|
1052
|
-
*
|
1067
|
+
* Returns whether or not the regular expression was compiled with the
|
1068
|
+
* `literal` option set to `true`.
|
1053
1069
|
*
|
1054
|
-
* @return [Boolean] the literal option
|
1070
|
+
* @return [Boolean] the `literal` option
|
1055
1071
|
* @example
|
1056
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1057
|
-
* re2.literal?
|
1072
|
+
* re2 = RE2::Regexp.new("woo?", literal: true)
|
1073
|
+
* re2.literal? #=> true
|
1058
1074
|
*/
|
1059
1075
|
static VALUE re2_regexp_literal(const VALUE self) {
|
1060
1076
|
re2_pattern *p;
|
@@ -1064,13 +1080,13 @@ static VALUE re2_regexp_literal(const VALUE self) {
|
|
1064
1080
|
}
|
1065
1081
|
|
1066
1082
|
/*
|
1067
|
-
* Returns whether or not the regular expression
|
1068
|
-
*
|
1083
|
+
* Returns whether or not the regular expression was compiled with the
|
1084
|
+
* `never_nl` option set to `true`.
|
1069
1085
|
*
|
1070
|
-
* @return [Boolean] the never_nl option
|
1086
|
+
* @return [Boolean] the `never_nl` option
|
1071
1087
|
* @example
|
1072
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1073
|
-
* re2.never_nl?
|
1088
|
+
* re2 = RE2::Regexp.new("woo?", never_nl: true)
|
1089
|
+
* re2.never_nl? #=> true
|
1074
1090
|
*/
|
1075
1091
|
static VALUE re2_regexp_never_nl(const VALUE self) {
|
1076
1092
|
re2_pattern *p;
|
@@ -1080,13 +1096,13 @@ static VALUE re2_regexp_never_nl(const VALUE self) {
|
|
1080
1096
|
}
|
1081
1097
|
|
1082
1098
|
/*
|
1083
|
-
* Returns whether or not the regular expression
|
1084
|
-
*
|
1099
|
+
* Returns whether or not the regular expression was compiled with the
|
1100
|
+
* `case_sensitive` option set to `true`.
|
1085
1101
|
*
|
1086
|
-
* @return [Boolean] the case_sensitive option
|
1102
|
+
* @return [Boolean] the `case_sensitive` option
|
1087
1103
|
* @example
|
1088
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1089
|
-
* re2.case_sensitive?
|
1104
|
+
* re2 = RE2::Regexp.new("woo?", case_sensitive: true)
|
1105
|
+
* re2.case_sensitive? #=> true
|
1090
1106
|
*/
|
1091
1107
|
static VALUE re2_regexp_case_sensitive(const VALUE self) {
|
1092
1108
|
re2_pattern *p;
|
@@ -1096,27 +1112,27 @@ static VALUE re2_regexp_case_sensitive(const VALUE self) {
|
|
1096
1112
|
}
|
1097
1113
|
|
1098
1114
|
/*
|
1099
|
-
* Returns whether or not the regular expression
|
1100
|
-
*
|
1115
|
+
* Returns whether or not the regular expression was compiled with the
|
1116
|
+
* `case_sensitive` option set to `false`.
|
1101
1117
|
*
|
1102
|
-
* @return [Boolean] the inverse of the case_sensitive option
|
1118
|
+
* @return [Boolean] the inverse of the `case_sensitive` option
|
1103
1119
|
* @example
|
1104
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1105
|
-
* re2.case_insensitive?
|
1106
|
-
* re2.casefold?
|
1120
|
+
* re2 = RE2::Regexp.new("woo?", case_sensitive: true)
|
1121
|
+
* re2.case_insensitive? #=> false
|
1122
|
+
* re2.casefold? #=> false
|
1107
1123
|
*/
|
1108
1124
|
static VALUE re2_regexp_case_insensitive(const VALUE self) {
|
1109
1125
|
return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
|
1110
1126
|
}
|
1111
1127
|
|
1112
1128
|
/*
|
1113
|
-
* Returns whether or not the regular expression
|
1114
|
-
*
|
1129
|
+
* Returns whether or not the regular expression was compiled with the
|
1130
|
+
* perl_classes option set to `true`.
|
1115
1131
|
*
|
1116
|
-
* @return [Boolean] the perl_classes option
|
1132
|
+
* @return [Boolean] the `perl_classes` option
|
1117
1133
|
* @example
|
1118
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1119
|
-
* re2.perl_classes?
|
1134
|
+
* re2 = RE2::Regexp.new("woo?", perl_classes: true)
|
1135
|
+
* re2.perl_classes? #=> true
|
1120
1136
|
*/
|
1121
1137
|
static VALUE re2_regexp_perl_classes(const VALUE self) {
|
1122
1138
|
re2_pattern *p;
|
@@ -1126,13 +1142,13 @@ static VALUE re2_regexp_perl_classes(const VALUE self) {
|
|
1126
1142
|
}
|
1127
1143
|
|
1128
1144
|
/*
|
1129
|
-
* Returns whether or not the regular expression
|
1130
|
-
*
|
1145
|
+
* Returns whether or not the regular expression was compiled with the
|
1146
|
+
* `word_boundary` option set to `true`.
|
1131
1147
|
*
|
1132
|
-
* @return [Boolean] the word_boundary option
|
1148
|
+
* @return [Boolean] the `word_boundary` option
|
1133
1149
|
* @example
|
1134
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1135
|
-
* re2.word_boundary?
|
1150
|
+
* re2 = RE2::Regexp.new("woo?", word_boundary: true)
|
1151
|
+
* re2.word_boundary? #=> true
|
1136
1152
|
*/
|
1137
1153
|
static VALUE re2_regexp_word_boundary(const VALUE self) {
|
1138
1154
|
re2_pattern *p;
|
@@ -1142,13 +1158,13 @@ static VALUE re2_regexp_word_boundary(const VALUE self) {
|
|
1142
1158
|
}
|
1143
1159
|
|
1144
1160
|
/*
|
1145
|
-
* Returns whether or not the regular expression
|
1146
|
-
*
|
1161
|
+
* Returns whether or not the regular expression was compiled with the
|
1162
|
+
* `one_line` option set to `true`.
|
1147
1163
|
*
|
1148
|
-
* @return [Boolean] the one_line option
|
1164
|
+
* @return [Boolean] the `one_line` option
|
1149
1165
|
* @example
|
1150
|
-
* re2 = RE2::Regexp.new("woo?", :
|
1151
|
-
* re2.one_line?
|
1166
|
+
* re2 = RE2::Regexp.new("woo?", one_line: true)
|
1167
|
+
* re2.one_line? #=> true
|
1152
1168
|
*/
|
1153
1169
|
static VALUE re2_regexp_one_line(const VALUE self) {
|
1154
1170
|
re2_pattern *p;
|
@@ -1158,10 +1174,10 @@ static VALUE re2_regexp_one_line(const VALUE self) {
|
|
1158
1174
|
}
|
1159
1175
|
|
1160
1176
|
/*
|
1161
|
-
* If the RE2 could not be created properly, returns an
|
1162
|
-
*
|
1177
|
+
* If the {RE2::Regexp} could not be created properly, returns an error string
|
1178
|
+
* otherwise returns `nil`.
|
1163
1179
|
*
|
1164
|
-
* @return [String, nil] the error string or nil
|
1180
|
+
* @return [String, nil] the error string or `nil`
|
1165
1181
|
*/
|
1166
1182
|
static VALUE re2_regexp_error(const VALUE self) {
|
1167
1183
|
re2_pattern *p;
|
@@ -1175,14 +1191,14 @@ static VALUE re2_regexp_error(const VALUE self) {
|
|
1175
1191
|
}
|
1176
1192
|
|
1177
1193
|
/*
|
1178
|
-
* If the RE2 could not be created properly, returns
|
1179
|
-
* the offending portion of the regexp otherwise returns nil
|
1194
|
+
* If the {RE2::Regexp} could not be created properly, returns
|
1195
|
+
* the offending portion of the regexp otherwise returns `nil`.
|
1180
1196
|
*
|
1181
1197
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1182
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
1183
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1198
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1199
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1184
1200
|
*
|
1185
|
-
* @return [String, nil] the offending portion of the regexp or nil
|
1201
|
+
* @return [String, nil] the offending portion of the regexp or `nil`
|
1186
1202
|
*/
|
1187
1203
|
static VALUE re2_regexp_error_arg(const VALUE self) {
|
1188
1204
|
re2_pattern *p;
|
@@ -1212,8 +1228,7 @@ static VALUE re2_regexp_program_size(const VALUE self) {
|
|
1212
1228
|
}
|
1213
1229
|
|
1214
1230
|
/*
|
1215
|
-
* Returns a hash of the options currently set for
|
1216
|
-
* +re2+.
|
1231
|
+
* Returns a hash of the options currently set for the {RE2::Regexp}.
|
1217
1232
|
*
|
1218
1233
|
* @return [Hash] the options
|
1219
1234
|
*/
|
@@ -1264,8 +1279,8 @@ static VALUE re2_regexp_options(const VALUE self) {
|
|
1264
1279
|
|
1265
1280
|
/*
|
1266
1281
|
* Returns the number of capturing subpatterns, or -1 if the regexp
|
1267
|
-
* wasn't valid on construction. The overall match (
|
1268
|
-
* count: if the regexp is "(a)(b)"
|
1282
|
+
* wasn't valid on construction. The overall match (`$0`) does not
|
1283
|
+
* count: if the regexp is `"(a)(b)"`, returns 2.
|
1269
1284
|
*
|
1270
1285
|
* @return [Integer] the number of capturing subpatterns
|
1271
1286
|
*/
|
@@ -1280,8 +1295,8 @@ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
|
1280
1295
|
* Returns a hash of names to capturing indices of groups.
|
1281
1296
|
*
|
1282
1297
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1283
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
1284
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1298
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1299
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1285
1300
|
*
|
1286
1301
|
* @return [Hash] a hash of names to capturing indices
|
1287
1302
|
*/
|
@@ -1303,63 +1318,93 @@ static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
|
|
1303
1318
|
}
|
1304
1319
|
|
1305
1320
|
/*
|
1306
|
-
*
|
1307
|
-
*
|
1308
|
-
* instance
|
1321
|
+
* General matching: match the pattern against the given `text` using
|
1322
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
|
1323
|
+
* `Match`} and return a {RE2::MatchData} instance with the specified number of
|
1324
|
+
* submatches (defaults to the total number of capturing groups) or a boolean
|
1325
|
+
* (if no submatches are required).
|
1309
1326
|
*
|
1310
|
-
*
|
1327
|
+
* The number of submatches has a significant impact on performance: requesting
|
1328
|
+
* one submatch is much faster than requesting more than one and requesting
|
1329
|
+
* zero submatches is faster still.
|
1311
1330
|
*
|
1312
1331
|
* @overload match(text)
|
1313
|
-
* Returns
|
1314
|
-
*
|
1332
|
+
* Returns a {RE2::MatchData} containing the matching pattern and all
|
1333
|
+
* submatches resulting from looking for the regexp in `text` if the pattern
|
1315
1334
|
* contains capturing groups.
|
1316
1335
|
*
|
1317
|
-
* Returns either true or false indicating whether a successful match was
|
1336
|
+
* Returns either `true` or `false` indicating whether a successful match was
|
1318
1337
|
* made if the pattern contains no capturing groups.
|
1319
1338
|
*
|
1320
1339
|
* @param [String] text the text to search
|
1321
|
-
* @return [RE2::MatchData] if the pattern contains capturing groups
|
1340
|
+
* @return [RE2::MatchData, nil] if the pattern contains capturing groups
|
1322
1341
|
* @return [Boolean] if the pattern does not contain capturing groups
|
1323
|
-
* @raise [NoMemoryError] if there was not enough memory to allocate the
|
1342
|
+
* @raise [NoMemoryError] if there was not enough memory to allocate the submatches
|
1343
|
+
* @raise [TypeError] if given text that cannot be coerced to a `String`
|
1324
1344
|
* @example Matching with capturing groups
|
1325
1345
|
* r = RE2::Regexp.new('w(o)(o)')
|
1326
|
-
* r.match('woo')
|
1346
|
+
* r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
1327
1347
|
* @example Matching without capturing groups
|
1328
1348
|
* r = RE2::Regexp.new('woo')
|
1329
|
-
* r.match('woo')
|
1349
|
+
* r.match('woo') #=> true
|
1330
1350
|
*
|
1331
|
-
* @overload match(text,
|
1332
|
-
*
|
1333
|
-
*
|
1351
|
+
* @overload match(text, options)
|
1352
|
+
* See `match(text)` but with customisable offsets for starting and ending
|
1353
|
+
* matches, optional anchoring to the start or both ends of the text and a
|
1354
|
+
* specific number of submatches to extract (padded with `nil`s if
|
1355
|
+
* necessary).
|
1334
1356
|
*
|
1335
1357
|
* @param [String] text the text to search
|
1336
|
-
* @
|
1358
|
+
* @param [Hash] options the options with which to perform the match
|
1359
|
+
* @option options [Integer] :startpos (0) offset at which to start matching
|
1360
|
+
* @option options [Integer] :endpos offset at which to stop matching, defaults to the text length
|
1361
|
+
* @option options [Symbol] :anchor (:unanchored) one of :unanchored, :anchor_start, :anchor_both to anchor the match
|
1362
|
+
* @option options [Integer] :submatches how many submatches to extract (0 is
|
1363
|
+
* fastest), defaults to the number of capturing groups
|
1364
|
+
* @return [RE2::MatchData, nil] if extracting any submatches
|
1365
|
+
* @return [Boolean] if not extracting any submatches
|
1366
|
+
* @raise [ArgumentError] if given a negative number of submatches, invalid
|
1367
|
+
* anchor or invalid startpos, endpos pair
|
1337
1368
|
* @raise [NoMemoryError] if there was not enough memory to allocate the matches
|
1338
|
-
* @
|
1369
|
+
* @raise [TypeError] if given non-String text, non-numeric number of
|
1370
|
+
* submatches, non-symbol anchor or non-hash options
|
1371
|
+
* @raise [RE2::Regexp::UnsupportedError] if given an endpos argument on a
|
1372
|
+
* version of RE2 that does not support it
|
1373
|
+
* @example Matching with capturing groups
|
1339
1374
|
* r = RE2::Regexp.new('w(o)(o)')
|
1340
|
-
* r.match('woo',
|
1341
|
-
* r.match('
|
1375
|
+
* r.match('woo', submatches: 1) #=> #<RE2::MatchData "woo" 1:"o">
|
1376
|
+
* r.match('woo', submatches: 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
|
1377
|
+
* r.match('woot', anchor: :anchor_both, submatches: 0)
|
1378
|
+
* #=> false
|
1379
|
+
* r.match('woot', anchor: :anchor_start, submatches: 0)
|
1380
|
+
* #=> true
|
1381
|
+
* @example Matching without capturing groups
|
1382
|
+
* r = RE2::Regexp.new('wo+')
|
1383
|
+
* r.match('woot', anchor: :anchor_both) #=> false
|
1384
|
+
* r.match('woot', anchor: :anchor_start) #=> true
|
1342
1385
|
*
|
1343
|
-
* @overload match(text,
|
1344
|
-
*
|
1345
|
-
*
|
1386
|
+
* @overload match(text, submatches)
|
1387
|
+
* @deprecated Legacy syntax for matching against `text` with a specific
|
1388
|
+
* number of submatches to extract. Use `match(text, submatches: n)` instead.
|
1346
1389
|
*
|
1347
1390
|
* @param [String] text the text to search
|
1348
|
-
* @param [Integer]
|
1349
|
-
* @return [RE2::MatchData]
|
1350
|
-
* @
|
1351
|
-
* @raise [NoMemoryError] if there was not enough memory to allocate the
|
1391
|
+
* @param [Integer] submatches the number of submatches to extract
|
1392
|
+
* @return [RE2::MatchData, nil] if extracting any submatches
|
1393
|
+
* @return [Boolean] if not extracting any submatches
|
1394
|
+
* @raise [NoMemoryError] if there was not enough memory to allocate the submatches
|
1395
|
+
* @raise [TypeError] if given non-numeric number of submatches
|
1352
1396
|
* @example
|
1353
1397
|
* r = RE2::Regexp.new('w(o)(o)')
|
1398
|
+
* r.match('woo', 0) #=> true
|
1354
1399
|
* r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
|
1355
|
-
* r.match('woo',
|
1400
|
+
* r.match('woo', 2) #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
1356
1401
|
*/
|
1357
1402
|
static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
1358
1403
|
re2_pattern *p;
|
1359
1404
|
re2_matchdata *m;
|
1360
|
-
VALUE text,
|
1405
|
+
VALUE text, options;
|
1361
1406
|
|
1362
|
-
rb_scan_args(argc, argv, "11", &text, &
|
1407
|
+
rb_scan_args(argc, argv, "11", &text, &options);
|
1363
1408
|
|
1364
1409
|
/* Ensure text is a string. */
|
1365
1410
|
StringValue(text);
|
@@ -1367,12 +1412,80 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1367
1412
|
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1368
1413
|
|
1369
1414
|
int n;
|
1415
|
+
int startpos = 0;
|
1416
|
+
int endpos = RSTRING_LEN(text);
|
1417
|
+
RE2::Anchor anchor = RE2::UNANCHORED;
|
1370
1418
|
|
1371
|
-
if (RTEST(
|
1372
|
-
|
1419
|
+
if (RTEST(options)) {
|
1420
|
+
if (FIXNUM_P(options)) {
|
1421
|
+
n = NUM2INT(options);
|
1422
|
+
|
1423
|
+
if (n < 0) {
|
1424
|
+
rb_raise(rb_eArgError, "number of matches should be >= 0");
|
1425
|
+
}
|
1426
|
+
} else {
|
1427
|
+
if (TYPE(options) != T_HASH) {
|
1428
|
+
options = rb_Hash(options);
|
1429
|
+
}
|
1430
|
+
|
1431
|
+
VALUE endpos_option = rb_hash_aref(options, ID2SYM(id_endpos));
|
1432
|
+
if (!NIL_P(endpos_option)) {
|
1433
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1434
|
+
Check_Type(endpos_option, T_FIXNUM);
|
1435
|
+
|
1436
|
+
endpos = NUM2INT(endpos_option);
|
1437
|
+
|
1438
|
+
if (endpos < 0) {
|
1439
|
+
rb_raise(rb_eArgError, "endpos should be >= 0");
|
1440
|
+
}
|
1441
|
+
#else
|
1442
|
+
rb_raise(re2_eRegexpUnsupportedError, "current version of RE2::Match() does not support endpos argument");
|
1443
|
+
#endif
|
1444
|
+
}
|
1445
|
+
|
1446
|
+
VALUE anchor_option = rb_hash_aref(options, ID2SYM(id_anchor));
|
1447
|
+
if (!NIL_P(anchor_option)) {
|
1448
|
+
Check_Type(anchor_option, T_SYMBOL);
|
1449
|
+
|
1450
|
+
ID id_anchor_option = SYM2ID(anchor_option);
|
1451
|
+
if (id_anchor_option == id_unanchored) {
|
1452
|
+
anchor = RE2::UNANCHORED;
|
1453
|
+
} else if (id_anchor_option == id_anchor_start) {
|
1454
|
+
anchor = RE2::ANCHOR_START;
|
1455
|
+
} else if (id_anchor_option == id_anchor_both) {
|
1456
|
+
anchor = RE2::ANCHOR_BOTH;
|
1457
|
+
} else {
|
1458
|
+
rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
|
1459
|
+
}
|
1460
|
+
}
|
1461
|
+
|
1462
|
+
VALUE submatches_option = rb_hash_aref(options, ID2SYM(id_submatches));
|
1463
|
+
if (!NIL_P(submatches_option)) {
|
1464
|
+
Check_Type(submatches_option, T_FIXNUM);
|
1465
|
+
|
1466
|
+
n = NUM2INT(submatches_option);
|
1467
|
+
|
1468
|
+
if (n < 0) {
|
1469
|
+
rb_raise(rb_eArgError, "number of matches should be >= 0");
|
1470
|
+
}
|
1471
|
+
} else {
|
1472
|
+
if (!p->pattern->ok()) {
|
1473
|
+
return Qnil;
|
1474
|
+
}
|
1373
1475
|
|
1374
|
-
|
1375
|
-
|
1476
|
+
n = p->pattern->NumberOfCapturingGroups();
|
1477
|
+
}
|
1478
|
+
|
1479
|
+
VALUE startpos_option = rb_hash_aref(options, ID2SYM(id_startpos));
|
1480
|
+
if (!NIL_P(startpos_option)) {
|
1481
|
+
Check_Type(startpos_option, T_FIXNUM);
|
1482
|
+
|
1483
|
+
startpos = NUM2INT(startpos_option);
|
1484
|
+
|
1485
|
+
if (startpos < 0) {
|
1486
|
+
rb_raise(rb_eArgError, "startpos should be >= 0");
|
1487
|
+
}
|
1488
|
+
}
|
1376
1489
|
}
|
1377
1490
|
} else {
|
1378
1491
|
if (!p->pattern->ok()) {
|
@@ -1382,12 +1495,16 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1382
1495
|
n = p->pattern->NumberOfCapturingGroups();
|
1383
1496
|
}
|
1384
1497
|
|
1498
|
+
if (startpos > endpos) {
|
1499
|
+
rb_raise(rb_eArgError, "startpos should be <= endpos");
|
1500
|
+
}
|
1501
|
+
|
1385
1502
|
if (n == 0) {
|
1386
1503
|
#ifdef HAVE_ENDPOS_ARGUMENT
|
1387
|
-
bool matched = p->pattern->Match(RSTRING_PTR(text),
|
1388
|
-
|
1504
|
+
bool matched = p->pattern->Match(RSTRING_PTR(text), startpos,
|
1505
|
+
endpos, anchor, 0, 0);
|
1389
1506
|
#else
|
1390
|
-
bool matched = p->pattern->Match(RSTRING_PTR(text),
|
1507
|
+
bool matched = p->pattern->Match(RSTRING_PTR(text), startpos, anchor,
|
1391
1508
|
0, 0);
|
1392
1509
|
#endif
|
1393
1510
|
return BOOL2RUBY(matched);
|
@@ -1412,11 +1529,11 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1412
1529
|
m->number_of_matches = n;
|
1413
1530
|
|
1414
1531
|
#ifdef HAVE_ENDPOS_ARGUMENT
|
1415
|
-
bool matched = p->pattern->Match(RSTRING_PTR(m->text),
|
1416
|
-
|
1532
|
+
bool matched = p->pattern->Match(RSTRING_PTR(m->text), startpos,
|
1533
|
+
endpos, anchor, m->matches, n);
|
1417
1534
|
#else
|
1418
|
-
bool matched = p->pattern->Match(RSTRING_PTR(m->text),
|
1419
|
-
|
1535
|
+
bool matched = p->pattern->Match(RSTRING_PTR(m->text), startpos,
|
1536
|
+
anchor, m->matches, n);
|
1420
1537
|
#endif
|
1421
1538
|
if (matched) {
|
1422
1539
|
return matchdata;
|
@@ -1427,22 +1544,54 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
|
1427
1544
|
}
|
1428
1545
|
|
1429
1546
|
/*
|
1430
|
-
* Returns true
|
1431
|
-
*
|
1547
|
+
* Returns true if the pattern matches any substring of the given text using
|
1548
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L413-L427
|
1549
|
+
* `PartialMatch`}.
|
1432
1550
|
*
|
1433
1551
|
* @return [Boolean] whether the match was successful
|
1552
|
+
* @raise [TypeError] if text cannot be coerced to a `String`
|
1434
1553
|
*/
|
1435
1554
|
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
1436
|
-
|
1555
|
+
re2_pattern *p;
|
1556
|
+
|
1557
|
+
/* Ensure text is a string. */
|
1558
|
+
StringValue(text);
|
1559
|
+
|
1560
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1561
|
+
|
1562
|
+
return BOOL2RUBY(RE2::PartialMatch(RSTRING_PTR(text), *p->pattern));
|
1563
|
+
}
|
1564
|
+
|
1565
|
+
/*
|
1566
|
+
* Returns true if the pattern matches the given text using
|
1567
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L376-L411
|
1568
|
+
* `FullMatch`}.
|
1569
|
+
*
|
1570
|
+
* @return [Boolean] whether the match was successful
|
1571
|
+
* @raise [TypeError] if text cannot be coerced to a `String`
|
1572
|
+
*/
|
1573
|
+
static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
|
1574
|
+
re2_pattern *p;
|
1575
|
+
|
1576
|
+
/* Ensure text is a string. */
|
1577
|
+
StringValue(text);
|
1437
1578
|
|
1438
|
-
|
1579
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1580
|
+
|
1581
|
+
return BOOL2RUBY(RE2::FullMatch(RSTRING_PTR(text), *p->pattern));
|
1439
1582
|
}
|
1440
1583
|
|
1441
1584
|
/*
|
1442
|
-
* Returns a {RE2::Scanner} for scanning the given text incrementally
|
1585
|
+
* Returns a {RE2::Scanner} for scanning the given text incrementally with
|
1586
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
|
1587
|
+
* `FindAndConsume`}.
|
1443
1588
|
*
|
1589
|
+
* @param [text] text the text to scan incrementally
|
1590
|
+
* @return [RE2::Scanner] an `Enumerable` {RE2::Scanner} object
|
1591
|
+
* @raises [TypeError] if `text` cannot be coerced to a `String`
|
1444
1592
|
* @example
|
1445
1593
|
* c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
1594
|
+
* #=> #<RE2::Scanner:0x0000000000000001>
|
1446
1595
|
*/
|
1447
1596
|
static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
1448
1597
|
/* Ensure text is a string. */
|
@@ -1471,17 +1620,40 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
|
1471
1620
|
}
|
1472
1621
|
|
1473
1622
|
/*
|
1474
|
-
* Returns
|
1475
|
-
*
|
1623
|
+
* Returns whether the underlying RE2 version supports passing an `endpos`
|
1624
|
+
* argument to
|
1625
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
|
1626
|
+
* Match}. If not, {RE2::Regexp#match} will raise an error if attempting to
|
1627
|
+
* pass an `endpos`.
|
1628
|
+
*
|
1629
|
+
* @return [Boolean] whether the underlying
|
1630
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
|
1631
|
+
* Match} has an endpos argument
|
1632
|
+
*/
|
1633
|
+
static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
|
1634
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1635
|
+
return Qtrue;
|
1636
|
+
#else
|
1637
|
+
return Qfalse;
|
1638
|
+
#endif
|
1639
|
+
}
|
1640
|
+
|
1641
|
+
/*
|
1642
|
+
* Returns a copy of `str` with the first occurrence `pattern` replaced with
|
1643
|
+
* `rewrite` using
|
1644
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L465-L480
|
1645
|
+
* `Replace`}.
|
1476
1646
|
*
|
1477
1647
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1478
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
1479
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1648
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1649
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1480
1650
|
*
|
1481
1651
|
* @param [String] str the string to modify
|
1482
1652
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1483
1653
|
* @param [String] rewrite the string to replace with
|
1484
1654
|
* @return [String] the resulting string
|
1655
|
+
* @raises [TypeError] if the given rewrite or pattern (if not provided as a
|
1656
|
+
* {RE2::Regexp}) cannot be coerced to `String`s
|
1485
1657
|
* @example
|
1486
1658
|
* RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
|
1487
1659
|
* re2 = RE2::Regexp.new("hel+o")
|
@@ -1517,20 +1689,24 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
|
|
1517
1689
|
}
|
1518
1690
|
|
1519
1691
|
/*
|
1520
|
-
* Return a copy of
|
1692
|
+
* Return a copy of `str` with `pattern` replaced by `rewrite` using
|
1693
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L482-L497
|
1694
|
+
* `GlobalReplace`}.
|
1521
1695
|
*
|
1522
1696
|
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1523
|
-
* returned in UTF-8 by default or ISO-8859-1 if the
|
1524
|
-
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1697
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1698
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1525
1699
|
*
|
1526
1700
|
* @param [String] str the string to modify
|
1527
1701
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1528
1702
|
* @param [String] rewrite the string to replace with
|
1703
|
+
* @raises [TypeError] if the given rewrite or pattern (if not provided as a
|
1704
|
+
* {RE2::Regexp}) cannot be coerced to `String`s
|
1529
1705
|
* @return [String] the resulting string
|
1530
1706
|
* @example
|
1531
1707
|
* re2 = RE2::Regexp.new("oo?")
|
1532
|
-
* RE2.GlobalReplace("whoops-doops", re2, "e")
|
1533
|
-
* RE2.GlobalReplace("hello there", "e", "i")
|
1708
|
+
* RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
|
1709
|
+
* RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
|
1534
1710
|
*/
|
1535
1711
|
static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
1536
1712
|
VALUE rewrite) {
|
@@ -1562,14 +1738,17 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
|
1562
1738
|
}
|
1563
1739
|
|
1564
1740
|
/*
|
1565
|
-
* Returns a version of str with all potentially meaningful regexp
|
1566
|
-
*
|
1567
|
-
*
|
1741
|
+
* Returns a version of `str` with all potentially meaningful regexp characters
|
1742
|
+
* escaped using
|
1743
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L512-L518
|
1744
|
+
* `QuoteMeta`}. The returned string, used as a regular expression, will
|
1745
|
+
* exactly match the original string.
|
1568
1746
|
*
|
1569
1747
|
* @param [String] unquoted the unquoted string
|
1748
|
+
* @raises [TypeError] if the given unquoted string cannot be coerced to a `String`
|
1570
1749
|
* @return [String] the escaped string
|
1571
1750
|
* @example
|
1572
|
-
* RE2::Regexp.escape("1.5-2.0?")
|
1751
|
+
* RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
|
1573
1752
|
*/
|
1574
1753
|
static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
|
1575
1754
|
StringValue(unquoted);
|
@@ -1598,15 +1777,17 @@ static size_t re2_set_memsize(const void *ptr) {
|
|
1598
1777
|
}
|
1599
1778
|
|
1600
1779
|
static const rb_data_type_t re2_set_data_type = {
|
1601
|
-
|
1602
|
-
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1780
|
+
"RE2::Set",
|
1781
|
+
{
|
1782
|
+
0,
|
1783
|
+
re2_set_free,
|
1784
|
+
re2_set_memsize,
|
1606
1785
|
},
|
1786
|
+
0,
|
1787
|
+
0,
|
1607
1788
|
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
1608
1789
|
// macro to update VALUE references, as to trigger write barriers.
|
1609
|
-
|
1790
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
1610
1791
|
};
|
1611
1792
|
|
1612
1793
|
static VALUE re2_set_allocate(VALUE klass) {
|
@@ -1633,14 +1814,14 @@ static VALUE re2_set_allocate(VALUE klass) {
|
|
1633
1814
|
* Returns a new {RE2::Set} object for the specified anchor with the default
|
1634
1815
|
* options.
|
1635
1816
|
*
|
1636
|
-
* @param [Symbol] anchor
|
1637
|
-
* @raise [ArgumentError] if anchor is not
|
1817
|
+
* @param [Symbol] anchor one of `:unanchored`, `:anchor_start`, `:anchor_both`
|
1818
|
+
* @raise [ArgumentError] if anchor is not `:unanchored`, `:anchor_start` or `:anchor_both`
|
1638
1819
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1639
1820
|
*
|
1640
1821
|
* @overload initialize(anchor, options)
|
1641
1822
|
* Returns a new {RE2::Set} object with the specified options.
|
1642
1823
|
*
|
1643
|
-
* @param [Symbol] anchor
|
1824
|
+
* @param [Symbol] anchor one of `:unanchored`, `:anchor_start`, `:anchor_both`
|
1644
1825
|
* @param [Hash] options the options with which to compile the pattern
|
1645
1826
|
* @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
|
1646
1827
|
* @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
|
@@ -1648,13 +1829,13 @@ static VALUE re2_set_allocate(VALUE klass) {
|
|
1648
1829
|
* @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
|
1649
1830
|
* @option options [Integer] :max_mem approx. max memory footprint of RE2
|
1650
1831
|
* @option options [Boolean] :literal (false) interpret string as literal, not regexp
|
1651
|
-
* @option options [Boolean] :never_nl (false) never match
|
1652
|
-
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
|
1653
|
-
* @option options [Boolean] :perl_classes (false) allow Perl's
|
1654
|
-
* @option options [Boolean] :word_boundary (false) allow
|
1655
|
-
* @option options [Boolean] :one_line (false)
|
1656
|
-
* @return [RE2::Set]
|
1657
|
-
* @raise [ArgumentError] if anchor is not one of the accepted choices
|
1832
|
+
* @option options [Boolean] :never_nl (false) never match `\n`, even if it is in regexp
|
1833
|
+
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with `(?i)` unless in `posix_syntax` mode)
|
1834
|
+
* @option options [Boolean] :perl_classes (false) allow Perl's `\d` `\s` `\w` `\D` `\S` `\W` when in `posix_syntax` mode
|
1835
|
+
* @option options [Boolean] :word_boundary (false) allow `\b` `\B` (word boundary and not) when in `posix_syntax` mode
|
1836
|
+
* @option options [Boolean] :one_line (false) `^` and `$` only match beginning and end of text when in `posix_syntax` mode
|
1837
|
+
* @return [RE2::Set] a {RE2::Set} with the specified anchor and options
|
1838
|
+
* @raise [ArgumentError] if `anchor` is not one of the accepted choices
|
1658
1839
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1659
1840
|
*/
|
1660
1841
|
static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
@@ -1668,12 +1849,12 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1668
1849
|
|
1669
1850
|
if (!NIL_P(anchor)) {
|
1670
1851
|
Check_Type(anchor, T_SYMBOL);
|
1671
|
-
ID
|
1672
|
-
if (
|
1852
|
+
ID id_anchor_arg = SYM2ID(anchor);
|
1853
|
+
if (id_anchor_arg == id_unanchored) {
|
1673
1854
|
re2_anchor = RE2::UNANCHORED;
|
1674
|
-
} else if (
|
1855
|
+
} else if (id_anchor_arg == id_anchor_start) {
|
1675
1856
|
re2_anchor = RE2::ANCHOR_START;
|
1676
|
-
} else if (
|
1857
|
+
} else if (id_anchor_arg == id_anchor_both) {
|
1677
1858
|
re2_anchor = RE2::ANCHOR_BOTH;
|
1678
1859
|
} else {
|
1679
1860
|
rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
|
@@ -1696,15 +1877,16 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1696
1877
|
|
1697
1878
|
/*
|
1698
1879
|
* Adds a pattern to the set. Returns the index that will identify the pattern
|
1699
|
-
* in the output of #match. Cannot be called after #compile
|
1880
|
+
* in the output of {RE2::Set#match}. Cannot be called after {RE2::Set#compile}
|
1881
|
+
* has been called.
|
1700
1882
|
*
|
1701
1883
|
* @param [String] pattern the regex pattern
|
1702
1884
|
* @return [Integer] the index of the pattern in the set
|
1703
1885
|
* @raise [ArgumentError] if called after compile or the pattern is rejected
|
1704
1886
|
* @example
|
1705
1887
|
* set = RE2::Set.new
|
1706
|
-
* set.add("abc")
|
1707
|
-
* set.add("def")
|
1888
|
+
* set.add("abc") #=> 0
|
1889
|
+
* set.add("def") #=> 1
|
1708
1890
|
*/
|
1709
1891
|
static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
1710
1892
|
StringValue(pattern);
|
@@ -1732,14 +1914,14 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
|
1732
1914
|
}
|
1733
1915
|
|
1734
1916
|
/*
|
1735
|
-
* Compiles a Set so it can be used to match against. Must be called
|
1736
|
-
* and before #match.
|
1917
|
+
* Compiles a {RE2::Set} so it can be used to match against. Must be called
|
1918
|
+
* after {RE2::Set#add} and before {RE2::Set#match}.
|
1737
1919
|
*
|
1738
|
-
* @return [
|
1920
|
+
* @return [Boolean] whether compilation was a success
|
1739
1921
|
* @example
|
1740
1922
|
* set = RE2::Set.new
|
1741
1923
|
* set.add("abc")
|
1742
|
-
* set.compile
|
1924
|
+
* set.compile #=> true
|
1743
1925
|
*/
|
1744
1926
|
static VALUE re2_set_compile(VALUE self) {
|
1745
1927
|
re2_set *s;
|
@@ -1749,11 +1931,12 @@ static VALUE re2_set_compile(VALUE self) {
|
|
1749
1931
|
}
|
1750
1932
|
|
1751
1933
|
/*
|
1752
|
-
* Returns whether the underlying
|
1753
|
-
*
|
1754
|
-
*
|
1934
|
+
* Returns whether the underlying RE2 version outputs error information from
|
1935
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/set.h#L62-L65
|
1936
|
+
* `RE2::Set::Match`}. If not, {RE2::Set#match} will raise an error if attempting to set
|
1937
|
+
* its `:exception` option to `true`.
|
1755
1938
|
*
|
1756
|
-
* @return [
|
1939
|
+
* @return [Boolean] whether the underlying RE2 outputs error information from {RE2::Set} matches
|
1757
1940
|
*/
|
1758
1941
|
static VALUE re2_set_match_raises_errors_p(VALUE) {
|
1759
1942
|
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
@@ -1777,31 +1960,31 @@ static VALUE re2_set_match_raises_errors_p(VALUE) {
|
|
1777
1960
|
* @param [String] str the text to match against
|
1778
1961
|
* @return [Array<Integer>] the indices of matching regexps
|
1779
1962
|
* @raise [MatchError] if an error occurs while matching
|
1780
|
-
* @raise [UnsupportedError] if the underlying version of
|
1963
|
+
* @raise [UnsupportedError] if the underlying version of RE2 does not output error information
|
1781
1964
|
* @example
|
1782
1965
|
* set = RE2::Set.new
|
1783
1966
|
* set.add("abc")
|
1784
1967
|
* set.add("def")
|
1785
1968
|
* set.compile
|
1786
|
-
* set.match("abcdef")
|
1969
|
+
* set.match("abcdef") #=> [0, 1]
|
1787
1970
|
*
|
1788
1971
|
* @overload match(str, options)
|
1789
1972
|
* Returns an array of integer indices of patterns matching the given string
|
1790
1973
|
* (if any). Raises exceptions if there are any errors while matching and the
|
1791
|
-
*
|
1974
|
+
* `:exception` option is set to true.
|
1792
1975
|
*
|
1793
1976
|
* @param [String] str the text to match against
|
1794
1977
|
* @param [Hash] options the options with which to match
|
1795
|
-
* @option options [Boolean] :exception (true) whether to raise exceptions with
|
1978
|
+
* @option options [Boolean] :exception (true) whether to raise exceptions with RE2's error information (not supported on ABI version 0 of RE2)
|
1796
1979
|
* @return [Array<Integer>] the indices of matching regexps
|
1797
1980
|
* @raise [MatchError] if an error occurs while matching
|
1798
|
-
* @raise [UnsupportedError] if the underlying version of
|
1981
|
+
* @raise [UnsupportedError] if the underlying version of RE2 does not output error information
|
1799
1982
|
* @example
|
1800
1983
|
* set = RE2::Set.new
|
1801
1984
|
* set.add("abc")
|
1802
1985
|
* set.add("def")
|
1803
1986
|
* set.compile
|
1804
|
-
* set.match("abcdef", :
|
1987
|
+
* set.match("abcdef", exception: true) #=> [0, 1]
|
1805
1988
|
*/
|
1806
1989
|
static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
1807
1990
|
VALUE str, options;
|
@@ -1869,6 +2052,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
|
1869
2052
|
extern "C" void Init_re2(void) {
|
1870
2053
|
re2_mRE2 = rb_define_module("RE2");
|
1871
2054
|
re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
|
2055
|
+
re2_eRegexpUnsupportedError = rb_define_class_under(re2_cRegexp,
|
2056
|
+
"UnsupportedError", rb_const_get(rb_cObject, rb_intern("StandardError")));
|
1872
2057
|
re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
|
1873
2058
|
re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
|
1874
2059
|
re2_cSet = rb_define_class_under(re2_mRE2, "Set", rb_cObject);
|
@@ -1922,6 +2107,8 @@ extern "C" void Init_re2(void) {
|
|
1922
2107
|
rb_define_method(re2_cScanner, "rewind",
|
1923
2108
|
RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
|
1924
2109
|
|
2110
|
+
rb_define_singleton_method(re2_cRegexp, "match_has_endpos_argument?",
|
2111
|
+
RUBY_METHOD_FUNC(re2_regexp_match_has_endpos_argument_p), 0);
|
1925
2112
|
rb_define_method(re2_cRegexp, "initialize",
|
1926
2113
|
RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
|
1927
2114
|
rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
|
@@ -1939,12 +2126,14 @@ extern "C" void Init_re2(void) {
|
|
1939
2126
|
RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
|
1940
2127
|
rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
|
1941
2128
|
-1);
|
1942
|
-
rb_define_method(re2_cRegexp, "match?",
|
1943
|
-
|
1944
|
-
rb_define_method(re2_cRegexp, "
|
1945
|
-
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
1946
|
-
rb_define_method(re2_cRegexp, "===",
|
2129
|
+
rb_define_method(re2_cRegexp, "match?", RUBY_METHOD_FUNC(re2_regexp_match_p),
|
2130
|
+
1);
|
2131
|
+
rb_define_method(re2_cRegexp, "partial_match?",
|
1947
2132
|
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
2133
|
+
rb_define_method(re2_cRegexp, "=~", RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
2134
|
+
rb_define_method(re2_cRegexp, "===", RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
2135
|
+
rb_define_method(re2_cRegexp, "full_match?",
|
2136
|
+
RUBY_METHOD_FUNC(re2_regexp_full_match_p), 1);
|
1948
2137
|
rb_define_method(re2_cRegexp, "scan",
|
1949
2138
|
RUBY_METHOD_FUNC(re2_regexp_scan), 1);
|
1950
2139
|
rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
|
@@ -2001,6 +2190,8 @@ extern "C" void Init_re2(void) {
|
|
2001
2190
|
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
2002
2191
|
rb_define_singleton_method(re2_cRegexp, "quote",
|
2003
2192
|
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
2193
|
+
|
2194
|
+
// (see RE2::Regexp#initialize)
|
2004
2195
|
rb_define_singleton_method(re2_cRegexp, "compile",
|
2005
2196
|
RUBY_METHOD_FUNC(rb_class_new_instance), -1);
|
2006
2197
|
|
@@ -2019,7 +2210,11 @@ extern "C" void Init_re2(void) {
|
|
2019
2210
|
id_word_boundary = rb_intern("word_boundary");
|
2020
2211
|
id_one_line = rb_intern("one_line");
|
2021
2212
|
id_unanchored = rb_intern("unanchored");
|
2213
|
+
id_anchor = rb_intern("anchor");
|
2022
2214
|
id_anchor_start = rb_intern("anchor_start");
|
2023
2215
|
id_anchor_both = rb_intern("anchor_both");
|
2024
2216
|
id_exception = rb_intern("exception");
|
2217
|
+
id_submatches = rb_intern("submatches");
|
2218
|
+
id_startpos = rb_intern("startpos");
|
2219
|
+
id_endpos = rb_intern("endpos");
|
2025
2220
|
}
|