re2 2.4.3-aarch64-linux → 2.5.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/re2/re2.cc CHANGED
@@ -1,8 +1,10 @@
1
1
  /*
2
- * re2 (http://github.com/mudge/re2)
3
- * Ruby bindings to re2, an "efficient, principled regular expression library"
2
+ * re2 (https://github.com/mudge/re2)
3
+ * Ruby bindings to RE2, a "fast, safe, thread-friendly alternative to
4
+ * backtracking regular expression engines like those used in PCRE, Perl, and
5
+ * Python".
4
6
  *
5
- * Copyright (c) 2010-2014, Paul Mucur (http://mudge.name)
7
+ * Copyright (c) 2010, Paul Mucur (https://mudge.name)
6
8
  * Released under the BSD Licence, please see LICENSE.txt
7
9
  */
8
10
 
@@ -42,13 +44,14 @@ typedef struct {
42
44
  } re2_set;
43
45
 
44
46
  VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
45
- re2_eSetMatchError, re2_eSetUnsupportedError;
47
+ re2_eSetMatchError, re2_eSetUnsupportedError, re2_eRegexpUnsupportedError;
46
48
 
47
49
  /* Symbols used in RE2 options. */
48
50
  static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
49
51
  id_max_mem, id_literal, id_never_nl, id_case_sensitive,
50
- id_perl_classes, id_word_boundary, id_one_line,
51
- id_unanchored, id_anchor_start, id_anchor_both, id_exception;
52
+ id_perl_classes, id_word_boundary, id_one_line, id_unanchored,
53
+ id_anchor, id_anchor_start, id_anchor_both, id_exception,
54
+ id_submatches, id_startpos, id_endpos;
52
55
 
53
56
  inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
54
57
  if (encoding == RE2::Options::EncodingUTF8) {
@@ -122,7 +125,7 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
122
125
  }
123
126
  }
124
127
 
125
- /* For compatibility with ruby < 2.7 */
128
+ /* For compatibility with Ruby < 2.7 */
126
129
  #ifdef HAVE_RB_GC_MARK_MOVABLE
127
130
  #define re2_compact_callback(x) (x),
128
131
  #else
@@ -270,12 +273,14 @@ static VALUE re2_scanner_allocate(VALUE klass) {
270
273
  }
271
274
 
272
275
  /*
273
- * Returns a frozen copy of the string passed into +match+.
276
+ * Returns a frozen copy of the text supplied when matching.
274
277
  *
275
- * @return [String] a frozen copy of the passed string.
278
+ * If the text was already a frozen string, returns the original.
279
+ *
280
+ * @return [String] a frozen string with the text supplied when matching
276
281
  * @example
277
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
278
- * m.string #=> "bob 123"
282
+ * m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
283
+ * m.string #=> "bob 123"
279
284
  */
280
285
  static VALUE re2_matchdata_string(const VALUE self) {
281
286
  re2_matchdata *m;
@@ -285,9 +290,10 @@ static VALUE re2_matchdata_string(const VALUE self) {
285
290
  }
286
291
 
287
292
  /*
288
- * Returns the string passed into the scanner.
293
+ * Returns the text supplied when incrementally matching with
294
+ * {RE2::Regexp#scan}.
289
295
  *
290
- * @return [String] the original string.
296
+ * @return [String] the original string passed to {RE2::Regexp#scan}
291
297
  * @example
292
298
  * c = RE2::Regexp.new('(\d+)').scan("foo")
293
299
  * c.string #=> "foo"
@@ -300,9 +306,9 @@ static VALUE re2_scanner_string(const VALUE self) {
300
306
  }
301
307
 
302
308
  /*
303
- * Returns whether the scanner has consumed all input or not.
309
+ * Returns whether the {RE2::Scanner} has consumed all input or not.
304
310
  *
305
- * @return [Boolean] whether the scanner has consumed all input or not
311
+ * @return [Boolean] whether the {RE2::Scanner} has consumed all input or not
306
312
  * @example
307
313
  * c = RE2::Regexp.new('(\d+)').scan("foo")
308
314
  * c.eof? #=> true
@@ -315,7 +321,7 @@ static VALUE re2_scanner_eof(const VALUE self) {
315
321
  }
316
322
 
317
323
  /*
318
- * Rewind the scanner to the start of the string.
324
+ * Rewind the {RE2::Scanner} to the start of the string.
319
325
  *
320
326
  * @example
321
327
  * s = RE2::Regexp.new('(\d+)').scan("1 2 3")
@@ -337,14 +343,19 @@ static VALUE re2_scanner_rewind(VALUE self) {
337
343
  }
338
344
 
339
345
  /*
340
- * Scan the given text incrementally for matches, returning an array of
341
- * matches on each subsequent call. Returns nil if no matches are found.
346
+ * Scan the given text incrementally for matches using
347
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
348
+ * `FindAndConsume`}, returning an array of submatches on each subsequent
349
+ * call. Returns `nil` if no matches are found or an empty array for every
350
+ * match if the pattern has no capturing groups.
342
351
  *
343
352
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
344
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
345
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
353
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
354
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
346
355
  *
347
- * @return [Array<String>] the matches.
356
+ * @return [Array<String>] if the pattern has capturing groups
357
+ * @return [[]] if the pattern does not have capturing groups
358
+ * @return [nil] if no matches are found
348
359
  * @example
349
360
  * s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
350
361
  * s.scan #=> ["Foo"]
@@ -359,7 +370,7 @@ static VALUE re2_scanner_scan(VALUE self) {
359
370
 
360
371
  std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
361
372
  std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
362
- std::vector<std::string> matches(c->number_of_capturing_groups);
373
+ std::vector<re2::StringPiece> matches(c->number_of_capturing_groups);
363
374
 
364
375
  if (c->eof) {
365
376
  return Qnil;
@@ -403,9 +414,6 @@ static VALUE re2_scanner_scan(VALUE self) {
403
414
  }
404
415
  }
405
416
 
406
- /*
407
- * Retrieve a matchdata by index or name.
408
- */
409
417
  static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
410
418
  re2_matchdata *m;
411
419
  re2_pattern *p;
@@ -441,13 +449,14 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
441
449
  }
442
450
 
443
451
  /*
444
- * Returns the number of elements in the match array (including nils).
452
+ * Returns the number of elements in the {RE2::MatchData} (including the
453
+ * overall match, submatches and any `nils`).
445
454
  *
446
455
  * @return [Integer] the number of elements
447
456
  * @example
448
457
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
449
- * m.size #=> 2
450
- * m.length #=> 2
458
+ * m.size #=> 2
459
+ * m.length #=> 2
451
460
  */
452
461
  static VALUE re2_matchdata_size(const VALUE self) {
453
462
  re2_matchdata *m;
@@ -458,14 +467,15 @@ static VALUE re2_matchdata_size(const VALUE self) {
458
467
  }
459
468
 
460
469
  /*
461
- * Returns the offset of the start of the nth element of the matchdata.
470
+ * Returns the offset of the start of the nth element of the {RE2::MatchData}.
462
471
  *
463
- * @param [Integer, String, Symbol] n the name or number of the match
464
- * @return [Integer] the offset of the start of the match
472
+ * @param [Integer, String, Symbol] n the name or number of the submatch
473
+ * @return [Integer, nil] the offset of the start of the match or `nil` if
474
+ * there is no such submatch
465
475
  * @example
466
476
  * m = RE2::Regexp.new('ob (\d+)').match("bob 123")
467
- * m.begin(0) #=> 1
468
- * m.begin(1) #=> 4
477
+ * m.begin(0) #=> 1
478
+ * m.begin(1) #=> 4
469
479
  */
470
480
  static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
471
481
  re2_matchdata *m;
@@ -483,14 +493,16 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
483
493
  }
484
494
 
485
495
  /*
486
- * Returns the offset of the character following the end of the nth element of the matchdata.
496
+ * Returns the offset of the character following the end of the nth element of
497
+ * the {RE2::MatchData}.
487
498
  *
488
499
  * @param [Integer, String, Symbol] n the name or number of the match
489
- * @return [Integer] the offset of the character following the end of the match
500
+ * @return [Integer, nil] the offset of the character following the end of the
501
+ * match or `nil` if there is no such match
490
502
  * @example
491
503
  * m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob")
492
- * m.end(0) #=> 9
493
- * m.end(1) #=> 7
504
+ * m.end(0) #=> 9
505
+ * m.end(1) #=> 7
494
506
  */
495
507
  static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
496
508
  re2_matchdata *m;
@@ -510,10 +522,10 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
510
522
  /*
511
523
  * Returns the {RE2::Regexp} used in the match.
512
524
  *
513
- * @return [RE2::Regexp] the regexp used in the match
525
+ * @return [RE2::Regexp] the regular expression used in the match
514
526
  * @example
515
527
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
516
- * m.regexp #=> #<RE2::Regexp /(\d+)/>
528
+ * m.regexp #=> #<RE2::Regexp /(\d+)/>
517
529
  */
518
530
  static VALUE re2_matchdata_regexp(const VALUE self) {
519
531
  re2_matchdata *m;
@@ -523,12 +535,12 @@ static VALUE re2_matchdata_regexp(const VALUE self) {
523
535
  }
524
536
 
525
537
  /*
526
- * Returns the {RE2::Regexp} used in the scanner.
538
+ * Returns the {RE2::Regexp} used in the {RE2::Scanner}.
527
539
  *
528
- * @return [RE2::Regexp] the regexp used in the scanner
540
+ * @return [RE2::Regexp] the regular expression used in the {RE2::Scanner}
529
541
  * @example
530
542
  * c = RE2::Regexp.new('(\d+)').scan("bob 123")
531
- * c.regexp #=> #<RE2::Regexp /(\d+)/>
543
+ * c.regexp #=> #<RE2::Regexp /(\d+)/>
532
544
  */
533
545
  static VALUE re2_scanner_regexp(const VALUE self) {
534
546
  re2_scanner *c;
@@ -544,16 +556,17 @@ static VALUE re2_regexp_allocate(VALUE klass) {
544
556
  }
545
557
 
546
558
  /*
547
- * Returns the array of matches.
559
+ * Returns the array of matches including the overall match, submatches and any
560
+ * `nil`s.
548
561
  *
549
562
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
550
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
551
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
563
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
564
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
552
565
  *
553
566
  * @return [Array<String, nil>] the array of matches
554
567
  * @example
555
568
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
556
- * m.to_a #=> ["123", "123"]
569
+ * m.to_a #=> ["123", "123"]
557
570
  */
558
571
  static VALUE re2_matchdata_to_a(const VALUE self) {
559
572
  re2_matchdata *m;
@@ -619,19 +632,17 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
619
632
  * Retrieve zero, one or more matches by index or name.
620
633
  *
621
634
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
622
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
623
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
624
- *
625
- * @return [Array<String, nil>, String, Boolean]
635
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
636
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
626
637
  *
627
638
  * @overload [](index)
628
639
  * Access a particular match by index.
629
640
  *
630
641
  * @param [Integer] index the index of the match to fetch
631
- * @return [String, nil] the specified match
642
+ * @return [String, nil] the specified match or `nil` if it isn't present
632
643
  * @example
633
644
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
634
- * m[0] #=> "123"
645
+ * m[0] #=> "123"
635
646
  *
636
647
  * @overload [](start, length)
637
648
  * Access a range of matches by starting index and length.
@@ -641,7 +652,7 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
641
652
  * @return [Array<String, nil>] the specified matches
642
653
  * @example
643
654
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
644
- * m[0, 1] #=> ["123"]
655
+ * m[0, 1] #=> ["123"]
645
656
  *
646
657
  * @overload [](range)
647
658
  * Access a range of matches by index.
@@ -650,13 +661,13 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
650
661
  * @return [Array<String, nil>] the specified matches
651
662
  * @example
652
663
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
653
- * m[0..1] #=> "[123", "123"]
664
+ * m[0..1] #=> "[123", "123"]
654
665
  *
655
666
  * @overload [](name)
656
667
  * Access a particular match by name.
657
668
  *
658
669
  * @param [String, Symbol] name the name of the match to fetch
659
- * @return [String, nil] the specific match
670
+ * @return [String, nil] the specific match or `nil` if it isn't present
660
671
  * @example
661
672
  * m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
662
673
  * m["number"] #=> "123"
@@ -690,13 +701,13 @@ static VALUE re2_matchdata_to_s(const VALUE self) {
690
701
  * Returns a printable version of the match.
691
702
  *
692
703
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
693
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
694
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
704
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
705
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
695
706
  *
696
707
  * @return [String] a printable version of the match
697
708
  * @example
698
709
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
699
- * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
710
+ * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
700
711
  */
701
712
  static VALUE re2_matchdata_inspect(const VALUE self) {
702
713
  re2_matchdata *m;
@@ -734,13 +745,14 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
734
745
  * Returns the array of submatches for pattern matching.
735
746
  *
736
747
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
737
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
738
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
748
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
749
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is
750
+ * undefined).
739
751
  *
740
752
  * @return [Array<String, nil>] the array of submatches
741
753
  * @example
742
754
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
743
- * m.deconstruct #=> ["123"]
755
+ * m.deconstruct #=> ["123"]
744
756
  *
745
757
  * @example pattern matching
746
758
  * case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
@@ -780,17 +792,18 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
780
792
  * order but an invalid name will cause the hash to be immediately returned.
781
793
  *
782
794
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
783
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
784
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
795
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
796
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
785
797
  *
786
798
  * @return [Hash] a hash of capturing group names to submatches
787
- * @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
799
+ * @param [Array<Symbol>, nil] keys an array of `Symbol` capturing group names
800
+ * or `nil` to return all names
788
801
  * @example
789
802
  * m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
790
- * m.deconstruct_keys(nil) #=> {:numbers => "123", :letters => "abc"}
791
- * m.deconstruct_keys([:numbers]) #=> {:numbers => "123"}
792
- * m.deconstruct_keys([:fruit]) #=> {}
793
- * m.deconstruct_keys([:letters, :fruit]) #=> {:letters => "abc"}
803
+ * m.deconstruct_keys(nil) #=> {numbers: "123", letters: "abc"}
804
+ * m.deconstruct_keys([:numbers]) #=> {numbers: "123"}
805
+ * m.deconstruct_keys([:fruit]) #=> {}
806
+ * m.deconstruct_keys([:letters, :fruit]) #=> {letters: "abc"}
794
807
  *
795
808
  * @example pattern matching
796
809
  * case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
@@ -839,11 +852,9 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
839
852
  }
840
853
 
841
854
  /*
842
- * Returns a new RE2 object with a compiled version of
843
- * +pattern+ stored inside. Equivalent to +RE2::Regexp.new+.
855
+ * Shorthand to compile a new {RE2::Regexp}.
844
856
  *
845
857
  * @see RE2::Regexp#initialize
846
- *
847
858
  */
848
859
  static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
849
860
  return rb_class_new_instance(argc, argv, re2_cRegexp);
@@ -851,22 +862,21 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
851
862
 
852
863
  /*
853
864
  * Returns a new {RE2::Regexp} object with a compiled version of
854
- * +pattern+ stored inside.
855
- *
856
- * @return [RE2::Regexp]
865
+ * `pattern` stored inside.
857
866
  *
858
867
  * @overload initialize(pattern)
859
868
  * Returns a new {RE2::Regexp} object with a compiled version of
860
- * +pattern+ stored inside with the default options.
869
+ * `pattern` stored inside with the default options.
861
870
  *
862
871
  * @param [String] pattern the pattern to compile
863
- * @return [RE2::Regexp] an RE2::Regexp with the specified pattern
872
+ * @return [RE2::Regexp] a {RE2::Regexp} with the specified pattern
873
+ * @raise [TypeError] if the given pattern can't be coerced to a `String`
864
874
  * @raise [NoMemoryError] if memory could not be allocated for the compiled
865
- * pattern
875
+ * pattern
866
876
  *
867
877
  * @overload initialize(pattern, options)
868
878
  * Returns a new {RE2::Regexp} object with a compiled version of
869
- * +pattern+ stored inside with the specified options.
879
+ * `pattern` stored inside with the specified options.
870
880
  *
871
881
  * @param [String] pattern the pattern to compile
872
882
  * @param [Hash] options the options with which to compile the pattern
@@ -876,12 +886,13 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
876
886
  * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
877
887
  * @option options [Integer] :max_mem approx. max memory footprint of RE2
878
888
  * @option options [Boolean] :literal (false) interpret string as literal, not regexp
879
- * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
880
- * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
881
- * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
882
- * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
883
- * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
884
- * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
889
+ * @option options [Boolean] :never_nl (false) never match `\n`, even if it is in regexp
890
+ * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with `(?i)` unless in `posix_syntax` mode)
891
+ * @option options [Boolean] :perl_classes (false) allow Perl's `\d` `\s` `\w` `\D` `\S` `\W` when in `posix_syntax` mode
892
+ * @option options [Boolean] :word_boundary (false) allow `\b` `\B` (word boundary and not) when in `posix_syntax` mode
893
+ * @option options [Boolean] :one_line (false) `^` and `$` only match beginning and end of text when in `posix_syntax` mode
894
+ * @return [RE2::Regexp] a {RE2::Regexp} with the specified pattern and options
895
+ * @raise [TypeError] if the given pattern can't be coerced to a `String`
885
896
  * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
886
897
  */
887
898
  static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
@@ -912,16 +923,17 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
912
923
  }
913
924
 
914
925
  /*
915
- * Returns a printable version of the regular expression +re2+.
926
+ * Returns a printable version of the regular expression.
916
927
  *
917
928
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
918
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
919
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
929
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
930
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is
931
+ * undefined).
920
932
  *
921
933
  * @return [String] a printable version of the regular expression
922
934
  * @example
923
935
  * re2 = RE2::Regexp.new("woo?")
924
- * re2.inspect #=> "#<RE2::Regexp /woo?/>"
936
+ * re2.inspect #=> "#<RE2::Regexp /woo?/>"
925
937
  */
926
938
  static VALUE re2_regexp_inspect(const VALUE self) {
927
939
  re2_pattern *p;
@@ -937,16 +949,16 @@ static VALUE re2_regexp_inspect(const VALUE self) {
937
949
  }
938
950
 
939
951
  /*
940
- * Returns a string version of the regular expression +re2+.
952
+ * Returns a string version of the regular expression.
941
953
  *
942
954
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
943
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
944
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
955
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
956
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
945
957
  *
946
958
  * @return [String] a string version of the regular expression
947
959
  * @example
948
960
  * re2 = RE2::Regexp.new("woo?")
949
- * re2.to_s #=> "woo?"
961
+ * re2.to_s #=> "woo?"
950
962
  */
951
963
  static VALUE re2_regexp_to_s(const VALUE self) {
952
964
  re2_pattern *p;
@@ -958,13 +970,12 @@ static VALUE re2_regexp_to_s(const VALUE self) {
958
970
  }
959
971
 
960
972
  /*
961
- * Returns whether or not the regular expression +re2+
962
- * was compiled successfully or not.
973
+ * Returns whether or not the regular expression was compiled successfully.
963
974
  *
964
975
  * @return [Boolean] whether or not compilation was successful
965
976
  * @example
966
977
  * re2 = RE2::Regexp.new("woo?")
967
- * re2.ok? #=> true
978
+ * re2.ok? #=> true
968
979
  */
969
980
  static VALUE re2_regexp_ok(const VALUE self) {
970
981
  re2_pattern *p;
@@ -974,13 +985,13 @@ static VALUE re2_regexp_ok(const VALUE self) {
974
985
  }
975
986
 
976
987
  /*
977
- * Returns whether or not the regular expression +re2+
978
- * was compiled with the utf8 option set to true.
988
+ * Returns whether or not the regular expression was compiled with the `utf8`
989
+ * option set to `true`.
979
990
  *
980
- * @return [Boolean] the utf8 option
991
+ * @return [Boolean] the `utf8` option
981
992
  * @example
982
- * re2 = RE2::Regexp.new("woo?", :utf8 => true)
983
- * re2.utf8? #=> true
993
+ * re2 = RE2::Regexp.new("woo?", utf8: true)
994
+ * re2.utf8? #=> true
984
995
  */
985
996
  static VALUE re2_regexp_utf8(const VALUE self) {
986
997
  re2_pattern *p;
@@ -990,13 +1001,13 @@ static VALUE re2_regexp_utf8(const VALUE self) {
990
1001
  }
991
1002
 
992
1003
  /*
993
- * Returns whether or not the regular expression +re2+
994
- * was compiled with the posix_syntax option set to true.
1004
+ * Returns whether or not the regular expression was compiled with the
1005
+ * `posix_syntax` option set to `true`.
995
1006
  *
996
- * @return [Boolean] the posix_syntax option
1007
+ * @return [Boolean] the `posix_syntax` option
997
1008
  * @example
998
- * re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
999
- * re2.posix_syntax? #=> true
1009
+ * re2 = RE2::Regexp.new("woo?", posix_syntax: true)
1010
+ * re2.posix_syntax? #=> true
1000
1011
  */
1001
1012
  static VALUE re2_regexp_posix_syntax(const VALUE self) {
1002
1013
  re2_pattern *p;
@@ -1006,13 +1017,13 @@ static VALUE re2_regexp_posix_syntax(const VALUE self) {
1006
1017
  }
1007
1018
 
1008
1019
  /*
1009
- * Returns whether or not the regular expression +re2+
1010
- * was compiled with the longest_match option set to true.
1020
+ * Returns whether or not the regular expression was compiled with the
1021
+ * `longest_match` option set to `true`.
1011
1022
  *
1012
- * @return [Boolean] the longest_match option
1023
+ * @return [Boolean] the `longest_match` option
1013
1024
  * @example
1014
- * re2 = RE2::Regexp.new("woo?", :longest_match => true)
1015
- * re2.longest_match? #=> true
1025
+ * re2 = RE2::Regexp.new("woo?", longest_match: true)
1026
+ * re2.longest_match? #=> true
1016
1027
  */
1017
1028
  static VALUE re2_regexp_longest_match(const VALUE self) {
1018
1029
  re2_pattern *p;
@@ -1022,13 +1033,13 @@ static VALUE re2_regexp_longest_match(const VALUE self) {
1022
1033
  }
1023
1034
 
1024
1035
  /*
1025
- * Returns whether or not the regular expression +re2+
1026
- * was compiled with the log_errors option set to true.
1036
+ * Returns whether or not the regular expression was compiled with the
1037
+ * `log_errors` option set to `true`.
1027
1038
  *
1028
- * @return [Boolean] the log_errors option
1039
+ * @return [Boolean] the `log_errors` option
1029
1040
  * @example
1030
- * re2 = RE2::Regexp.new("woo?", :log_errors => true)
1031
- * re2.log_errors? #=> true
1041
+ * re2 = RE2::Regexp.new("woo?", log_errors: true)
1042
+ * re2.log_errors? #=> true
1032
1043
  */
1033
1044
  static VALUE re2_regexp_log_errors(const VALUE self) {
1034
1045
  re2_pattern *p;
@@ -1038,13 +1049,12 @@ static VALUE re2_regexp_log_errors(const VALUE self) {
1038
1049
  }
1039
1050
 
1040
1051
  /*
1041
- * Returns the max_mem setting for the regular expression
1042
- * +re2+.
1052
+ * Returns the `max_mem` setting for the regular expression.
1043
1053
  *
1044
- * @return [Integer] the max_mem option
1054
+ * @return [Integer] the `max_mem` option
1045
1055
  * @example
1046
- * re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
1047
- * re2.max_mem #=> 1024
1056
+ * re2 = RE2::Regexp.new("woo?", max_mem: 1024)
1057
+ * re2.max_mem #=> 1024
1048
1058
  */
1049
1059
  static VALUE re2_regexp_max_mem(const VALUE self) {
1050
1060
  re2_pattern *p;
@@ -1054,13 +1064,13 @@ static VALUE re2_regexp_max_mem(const VALUE self) {
1054
1064
  }
1055
1065
 
1056
1066
  /*
1057
- * Returns whether or not the regular expression +re2+
1058
- * was compiled with the literal option set to true.
1067
+ * Returns whether or not the regular expression was compiled with the
1068
+ * `literal` option set to `true`.
1059
1069
  *
1060
- * @return [Boolean] the literal option
1070
+ * @return [Boolean] the `literal` option
1061
1071
  * @example
1062
- * re2 = RE2::Regexp.new("woo?", :literal => true)
1063
- * re2.literal? #=> true
1072
+ * re2 = RE2::Regexp.new("woo?", literal: true)
1073
+ * re2.literal? #=> true
1064
1074
  */
1065
1075
  static VALUE re2_regexp_literal(const VALUE self) {
1066
1076
  re2_pattern *p;
@@ -1070,13 +1080,13 @@ static VALUE re2_regexp_literal(const VALUE self) {
1070
1080
  }
1071
1081
 
1072
1082
  /*
1073
- * Returns whether or not the regular expression +re2+
1074
- * was compiled with the never_nl option set to true.
1083
+ * Returns whether or not the regular expression was compiled with the
1084
+ * `never_nl` option set to `true`.
1075
1085
  *
1076
- * @return [Boolean] the never_nl option
1086
+ * @return [Boolean] the `never_nl` option
1077
1087
  * @example
1078
- * re2 = RE2::Regexp.new("woo?", :never_nl => true)
1079
- * re2.never_nl? #=> true
1088
+ * re2 = RE2::Regexp.new("woo?", never_nl: true)
1089
+ * re2.never_nl? #=> true
1080
1090
  */
1081
1091
  static VALUE re2_regexp_never_nl(const VALUE self) {
1082
1092
  re2_pattern *p;
@@ -1086,13 +1096,13 @@ static VALUE re2_regexp_never_nl(const VALUE self) {
1086
1096
  }
1087
1097
 
1088
1098
  /*
1089
- * Returns whether or not the regular expression +re2+
1090
- * was compiled with the case_sensitive option set to true.
1099
+ * Returns whether or not the regular expression was compiled with the
1100
+ * `case_sensitive` option set to `true`.
1091
1101
  *
1092
- * @return [Boolean] the case_sensitive option
1102
+ * @return [Boolean] the `case_sensitive` option
1093
1103
  * @example
1094
- * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
1095
- * re2.case_sensitive? #=> true
1104
+ * re2 = RE2::Regexp.new("woo?", case_sensitive: true)
1105
+ * re2.case_sensitive? #=> true
1096
1106
  */
1097
1107
  static VALUE re2_regexp_case_sensitive(const VALUE self) {
1098
1108
  re2_pattern *p;
@@ -1102,27 +1112,27 @@ static VALUE re2_regexp_case_sensitive(const VALUE self) {
1102
1112
  }
1103
1113
 
1104
1114
  /*
1105
- * Returns whether or not the regular expression +re2+
1106
- * was compiled with the case_sensitive option set to false.
1115
+ * Returns whether or not the regular expression was compiled with the
1116
+ * `case_sensitive` option set to `false`.
1107
1117
  *
1108
- * @return [Boolean] the inverse of the case_sensitive option
1118
+ * @return [Boolean] the inverse of the `case_sensitive` option
1109
1119
  * @example
1110
- * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
1111
- * re2.case_insensitive? #=> false
1112
- * re2.casefold? #=> false
1120
+ * re2 = RE2::Regexp.new("woo?", case_sensitive: true)
1121
+ * re2.case_insensitive? #=> false
1122
+ * re2.casefold? #=> false
1113
1123
  */
1114
1124
  static VALUE re2_regexp_case_insensitive(const VALUE self) {
1115
1125
  return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
1116
1126
  }
1117
1127
 
1118
1128
  /*
1119
- * Returns whether or not the regular expression +re2+
1120
- * was compiled with the perl_classes option set to true.
1129
+ * Returns whether or not the regular expression was compiled with the
1130
+ * perl_classes option set to `true`.
1121
1131
  *
1122
- * @return [Boolean] the perl_classes option
1132
+ * @return [Boolean] the `perl_classes` option
1123
1133
  * @example
1124
- * re2 = RE2::Regexp.new("woo?", :perl_classes => true)
1125
- * re2.perl_classes? #=> true
1134
+ * re2 = RE2::Regexp.new("woo?", perl_classes: true)
1135
+ * re2.perl_classes? #=> true
1126
1136
  */
1127
1137
  static VALUE re2_regexp_perl_classes(const VALUE self) {
1128
1138
  re2_pattern *p;
@@ -1132,13 +1142,13 @@ static VALUE re2_regexp_perl_classes(const VALUE self) {
1132
1142
  }
1133
1143
 
1134
1144
  /*
1135
- * Returns whether or not the regular expression +re2+
1136
- * was compiled with the word_boundary option set to true.
1145
+ * Returns whether or not the regular expression was compiled with the
1146
+ * `word_boundary` option set to `true`.
1137
1147
  *
1138
- * @return [Boolean] the word_boundary option
1148
+ * @return [Boolean] the `word_boundary` option
1139
1149
  * @example
1140
- * re2 = RE2::Regexp.new("woo?", :word_boundary => true)
1141
- * re2.word_boundary? #=> true
1150
+ * re2 = RE2::Regexp.new("woo?", word_boundary: true)
1151
+ * re2.word_boundary? #=> true
1142
1152
  */
1143
1153
  static VALUE re2_regexp_word_boundary(const VALUE self) {
1144
1154
  re2_pattern *p;
@@ -1148,13 +1158,13 @@ static VALUE re2_regexp_word_boundary(const VALUE self) {
1148
1158
  }
1149
1159
 
1150
1160
  /*
1151
- * Returns whether or not the regular expression +re2+
1152
- * was compiled with the one_line option set to true.
1161
+ * Returns whether or not the regular expression was compiled with the
1162
+ * `one_line` option set to `true`.
1153
1163
  *
1154
- * @return [Boolean] the one_line option
1164
+ * @return [Boolean] the `one_line` option
1155
1165
  * @example
1156
- * re2 = RE2::Regexp.new("woo?", :one_line => true)
1157
- * re2.one_line? #=> true
1166
+ * re2 = RE2::Regexp.new("woo?", one_line: true)
1167
+ * re2.one_line? #=> true
1158
1168
  */
1159
1169
  static VALUE re2_regexp_one_line(const VALUE self) {
1160
1170
  re2_pattern *p;
@@ -1164,10 +1174,10 @@ static VALUE re2_regexp_one_line(const VALUE self) {
1164
1174
  }
1165
1175
 
1166
1176
  /*
1167
- * If the RE2 could not be created properly, returns an
1168
- * error string otherwise returns nil.
1177
+ * If the {RE2::Regexp} could not be created properly, returns an error string
1178
+ * otherwise returns `nil`.
1169
1179
  *
1170
- * @return [String, nil] the error string or nil
1180
+ * @return [String, nil] the error string or `nil`
1171
1181
  */
1172
1182
  static VALUE re2_regexp_error(const VALUE self) {
1173
1183
  re2_pattern *p;
@@ -1181,14 +1191,14 @@ static VALUE re2_regexp_error(const VALUE self) {
1181
1191
  }
1182
1192
 
1183
1193
  /*
1184
- * If the RE2 could not be created properly, returns
1185
- * the offending portion of the regexp otherwise returns nil.
1194
+ * If the {RE2::Regexp} could not be created properly, returns
1195
+ * the offending portion of the regexp otherwise returns `nil`.
1186
1196
  *
1187
1197
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
1188
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
1189
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1198
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
1199
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
1190
1200
  *
1191
- * @return [String, nil] the offending portion of the regexp or nil
1201
+ * @return [String, nil] the offending portion of the regexp or `nil`
1192
1202
  */
1193
1203
  static VALUE re2_regexp_error_arg(const VALUE self) {
1194
1204
  re2_pattern *p;
@@ -1218,8 +1228,7 @@ static VALUE re2_regexp_program_size(const VALUE self) {
1218
1228
  }
1219
1229
 
1220
1230
  /*
1221
- * Returns a hash of the options currently set for
1222
- * +re2+.
1231
+ * Returns a hash of the options currently set for the {RE2::Regexp}.
1223
1232
  *
1224
1233
  * @return [Hash] the options
1225
1234
  */
@@ -1270,8 +1279,8 @@ static VALUE re2_regexp_options(const VALUE self) {
1270
1279
 
1271
1280
  /*
1272
1281
  * Returns the number of capturing subpatterns, or -1 if the regexp
1273
- * wasn't valid on construction. The overall match ($0) does not
1274
- * count: if the regexp is "(a)(b)", returns 2.
1282
+ * wasn't valid on construction. The overall match (`$0`) does not
1283
+ * count: if the regexp is `"(a)(b)"`, returns 2.
1275
1284
  *
1276
1285
  * @return [Integer] the number of capturing subpatterns
1277
1286
  */
@@ -1286,8 +1295,8 @@ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
1286
1295
  * Returns a hash of names to capturing indices of groups.
1287
1296
  *
1288
1297
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
1289
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
1290
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1298
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
1299
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
1291
1300
  *
1292
1301
  * @return [Hash] a hash of names to capturing indices
1293
1302
  */
@@ -1309,63 +1318,93 @@ static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
1309
1318
  }
1310
1319
 
1311
1320
  /*
1312
- * Match the pattern against the given +text+ and return either
1313
- * a boolean (if no submatches are required) or a {RE2::MatchData}
1314
- * instance.
1321
+ * General matching: match the pattern against the given `text` using
1322
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
1323
+ * `Match`} and return a {RE2::MatchData} instance with the specified number of
1324
+ * submatches (defaults to the total number of capturing groups) or a boolean
1325
+ * (if no submatches are required).
1315
1326
  *
1316
- * @return [Boolean, RE2::MatchData]
1327
+ * The number of submatches has a significant impact on performance: requesting
1328
+ * one submatch is much faster than requesting more than one and requesting
1329
+ * zero submatches is faster still.
1317
1330
  *
1318
1331
  * @overload match(text)
1319
- * Returns an {RE2::MatchData} containing the matching pattern and all
1320
- * subpatterns resulting from looking for the regexp in +text+ if the pattern
1332
+ * Returns a {RE2::MatchData} containing the matching pattern and all
1333
+ * submatches resulting from looking for the regexp in `text` if the pattern
1321
1334
  * contains capturing groups.
1322
1335
  *
1323
- * Returns either true or false indicating whether a successful match was
1336
+ * Returns either `true` or `false` indicating whether a successful match was
1324
1337
  * made if the pattern contains no capturing groups.
1325
1338
  *
1326
1339
  * @param [String] text the text to search
1327
- * @return [RE2::MatchData] if the pattern contains capturing groups
1340
+ * @return [RE2::MatchData, nil] if the pattern contains capturing groups
1328
1341
  * @return [Boolean] if the pattern does not contain capturing groups
1329
- * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1342
+ * @raise [NoMemoryError] if there was not enough memory to allocate the submatches
1343
+ * @raise [TypeError] if given text that cannot be coerced to a `String`
1330
1344
  * @example Matching with capturing groups
1331
1345
  * r = RE2::Regexp.new('w(o)(o)')
1332
- * r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
1346
+ * r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
1333
1347
  * @example Matching without capturing groups
1334
1348
  * r = RE2::Regexp.new('woo')
1335
- * r.match('woo') #=> true
1349
+ * r.match('woo') #=> true
1336
1350
  *
1337
- * @overload match(text, 0)
1338
- * Returns either true or false indicating whether a
1339
- * successful match was made.
1351
+ * @overload match(text, options)
1352
+ * See `match(text)` but with customisable offsets for starting and ending
1353
+ * matches, optional anchoring to the start or both ends of the text and a
1354
+ * specific number of submatches to extract (padded with `nil`s if
1355
+ * necessary).
1340
1356
  *
1341
1357
  * @param [String] text the text to search
1342
- * @return [Boolean] whether the match was successful
1358
+ * @param [Hash] options the options with which to perform the match
1359
+ * @option options [Integer] :startpos (0) offset at which to start matching
1360
+ * @option options [Integer] :endpos offset at which to stop matching, defaults to the text length
1361
+ * @option options [Symbol] :anchor (:unanchored) one of :unanchored, :anchor_start, :anchor_both to anchor the match
1362
+ * @option options [Integer] :submatches how many submatches to extract (0 is
1363
+ * fastest), defaults to the number of capturing groups
1364
+ * @return [RE2::MatchData, nil] if extracting any submatches
1365
+ * @return [Boolean] if not extracting any submatches
1366
+ * @raise [ArgumentError] if given a negative number of submatches, invalid
1367
+ * anchor or invalid startpos, endpos pair
1343
1368
  * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1344
- * @example
1369
+ * @raise [TypeError] if given non-String text, non-numeric number of
1370
+ * submatches, non-symbol anchor or non-hash options
1371
+ * @raise [RE2::Regexp::UnsupportedError] if given an endpos argument on a
1372
+ * version of RE2 that does not support it
1373
+ * @example Matching with capturing groups
1345
1374
  * r = RE2::Regexp.new('w(o)(o)')
1346
- * r.match('woo', 0) #=> true
1347
- * r.match('bob', 0) #=> false
1375
+ * r.match('woo', submatches: 1) #=> #<RE2::MatchData "woo" 1:"o">
1376
+ * r.match('woo', submatches: 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
1377
+ * r.match('woot', anchor: :anchor_both, submatches: 0)
1378
+ * #=> false
1379
+ * r.match('woot', anchor: :anchor_start, submatches: 0)
1380
+ * #=> true
1381
+ * @example Matching without capturing groups
1382
+ * r = RE2::Regexp.new('wo+')
1383
+ * r.match('woot', anchor: :anchor_both) #=> false
1384
+ * r.match('woot', anchor: :anchor_start) #=> true
1348
1385
  *
1349
- * @overload match(text, number_of_matches)
1350
- * See +match(text)+ but with a specific number of
1351
- * matches returned (padded with nils if necessary).
1386
+ * @overload match(text, submatches)
1387
+ * @deprecated Legacy syntax for matching against `text` with a specific
1388
+ * number of submatches to extract. Use `match(text, submatches: n)` instead.
1352
1389
  *
1353
1390
  * @param [String] text the text to search
1354
- * @param [Integer] number_of_matches the number of matches to return
1355
- * @return [RE2::MatchData] the matches
1356
- * @raise [ArgumentError] if given a negative number of matches
1357
- * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1391
+ * @param [Integer] submatches the number of submatches to extract
1392
+ * @return [RE2::MatchData, nil] if extracting any submatches
1393
+ * @return [Boolean] if not extracting any submatches
1394
+ * @raise [NoMemoryError] if there was not enough memory to allocate the submatches
1395
+ * @raise [TypeError] if given non-numeric number of submatches
1358
1396
  * @example
1359
1397
  * r = RE2::Regexp.new('w(o)(o)')
1398
+ * r.match('woo', 0) #=> true
1360
1399
  * r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
1361
- * r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
1400
+ * r.match('woo', 2) #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
1362
1401
  */
1363
1402
  static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1364
1403
  re2_pattern *p;
1365
1404
  re2_matchdata *m;
1366
- VALUE text, number_of_matches;
1405
+ VALUE text, options;
1367
1406
 
1368
- rb_scan_args(argc, argv, "11", &text, &number_of_matches);
1407
+ rb_scan_args(argc, argv, "11", &text, &options);
1369
1408
 
1370
1409
  /* Ensure text is a string. */
1371
1410
  StringValue(text);
@@ -1373,12 +1412,80 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1373
1412
  TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1374
1413
 
1375
1414
  int n;
1415
+ int startpos = 0;
1416
+ int endpos = RSTRING_LEN(text);
1417
+ RE2::Anchor anchor = RE2::UNANCHORED;
1376
1418
 
1377
- if (RTEST(number_of_matches)) {
1378
- n = NUM2INT(number_of_matches);
1419
+ if (RTEST(options)) {
1420
+ if (FIXNUM_P(options)) {
1421
+ n = NUM2INT(options);
1422
+
1423
+ if (n < 0) {
1424
+ rb_raise(rb_eArgError, "number of matches should be >= 0");
1425
+ }
1426
+ } else {
1427
+ if (TYPE(options) != T_HASH) {
1428
+ options = rb_Hash(options);
1429
+ }
1379
1430
 
1380
- if (n < 0) {
1381
- rb_raise(rb_eArgError, "number of matches should be >= 0");
1431
+ VALUE endpos_option = rb_hash_aref(options, ID2SYM(id_endpos));
1432
+ if (!NIL_P(endpos_option)) {
1433
+ #ifdef HAVE_ENDPOS_ARGUMENT
1434
+ Check_Type(endpos_option, T_FIXNUM);
1435
+
1436
+ endpos = NUM2INT(endpos_option);
1437
+
1438
+ if (endpos < 0) {
1439
+ rb_raise(rb_eArgError, "endpos should be >= 0");
1440
+ }
1441
+ #else
1442
+ rb_raise(re2_eRegexpUnsupportedError, "current version of RE2::Match() does not support endpos argument");
1443
+ #endif
1444
+ }
1445
+
1446
+ VALUE anchor_option = rb_hash_aref(options, ID2SYM(id_anchor));
1447
+ if (!NIL_P(anchor_option)) {
1448
+ Check_Type(anchor_option, T_SYMBOL);
1449
+
1450
+ ID id_anchor_option = SYM2ID(anchor_option);
1451
+ if (id_anchor_option == id_unanchored) {
1452
+ anchor = RE2::UNANCHORED;
1453
+ } else if (id_anchor_option == id_anchor_start) {
1454
+ anchor = RE2::ANCHOR_START;
1455
+ } else if (id_anchor_option == id_anchor_both) {
1456
+ anchor = RE2::ANCHOR_BOTH;
1457
+ } else {
1458
+ rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
1459
+ }
1460
+ }
1461
+
1462
+ VALUE submatches_option = rb_hash_aref(options, ID2SYM(id_submatches));
1463
+ if (!NIL_P(submatches_option)) {
1464
+ Check_Type(submatches_option, T_FIXNUM);
1465
+
1466
+ n = NUM2INT(submatches_option);
1467
+
1468
+ if (n < 0) {
1469
+ rb_raise(rb_eArgError, "number of matches should be >= 0");
1470
+ }
1471
+ } else {
1472
+ if (!p->pattern->ok()) {
1473
+ return Qnil;
1474
+ }
1475
+
1476
+ n = p->pattern->NumberOfCapturingGroups();
1477
+ }
1478
+
1479
+ VALUE startpos_option = rb_hash_aref(options, ID2SYM(id_startpos));
1480
+ if (!NIL_P(startpos_option)) {
1481
+ Check_Type(startpos_option, T_FIXNUM);
1482
+
1483
+ startpos = NUM2INT(startpos_option);
1484
+
1485
+ if (startpos < 0) {
1486
+ rb_raise(rb_eArgError, "startpos should be >= 0");
1487
+ }
1488
+ }
1382
1489
  }
1383
1490
  } else {
1384
1491
  if (!p->pattern->ok()) {
@@ -1388,12 +1495,16 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1388
1495
  n = p->pattern->NumberOfCapturingGroups();
1389
1496
  }
1390
1497
 
1498
+ if (startpos > endpos) {
1499
+ rb_raise(rb_eArgError, "startpos should be <= endpos");
1500
+ }
1501
+
1391
1502
  if (n == 0) {
1392
1503
  #ifdef HAVE_ENDPOS_ARGUMENT
1393
- bool matched = p->pattern->Match(RSTRING_PTR(text), 0,
1394
- RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
1504
+ bool matched = p->pattern->Match(RSTRING_PTR(text), startpos,
1505
+ endpos, anchor, 0, 0);
1395
1506
  #else
1396
- bool matched = p->pattern->Match(RSTRING_PTR(text), 0, RE2::UNANCHORED,
1507
+ bool matched = p->pattern->Match(RSTRING_PTR(text), startpos, anchor,
1397
1508
  0, 0);
1398
1509
  #endif
1399
1510
  return BOOL2RUBY(matched);
@@ -1418,11 +1529,11 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1418
1529
  m->number_of_matches = n;
1419
1530
 
1420
1531
  #ifdef HAVE_ENDPOS_ARGUMENT
1421
- bool matched = p->pattern->Match(RSTRING_PTR(m->text), 0,
1422
- RSTRING_LEN(m->text), RE2::UNANCHORED, m->matches, n);
1532
+ bool matched = p->pattern->Match(RSTRING_PTR(m->text), startpos,
1533
+ endpos, anchor, m->matches, n);
1423
1534
  #else
1424
- bool matched = p->pattern->Match(RSTRING_PTR(m->text), 0,
1425
- RE2::UNANCHORED, m->matches, n);
1535
+ bool matched = p->pattern->Match(RSTRING_PTR(m->text), startpos,
1536
+ anchor, m->matches, n);
1426
1537
  #endif
1427
1538
  if (matched) {
1428
1539
  return matchdata;
@@ -1433,22 +1544,54 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1433
1544
  }
1434
1545
 
1435
1546
  /*
1436
- * Returns true or false to indicate a successful match.
1437
- * Equivalent to +re2.match(text, 0)+.
1547
+ * Returns true if the pattern matches any substring of the given text using
1548
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L413-L427
1549
+ * `PartialMatch`}.
1438
1550
  *
1439
1551
  * @return [Boolean] whether the match was successful
1552
+ * @raise [TypeError] if text cannot be coerced to a `String`
1440
1553
  */
1441
1554
  static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
1442
- VALUE argv[2] = { text, INT2FIX(0) };
1555
+ re2_pattern *p;
1556
+
1557
+ /* Ensure text is a string. */
1558
+ StringValue(text);
1559
+
1560
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1561
+
1562
+ return BOOL2RUBY(RE2::PartialMatch(RSTRING_PTR(text), *p->pattern));
1563
+ }
1564
+
1565
+ /*
1566
+ * Returns true if the pattern matches the given text using
1567
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L376-L411
1568
+ * `FullMatch`}.
1569
+ *
1570
+ * @return [Boolean] whether the match was successful
1571
+ * @raise [TypeError] if text cannot be coerced to a `String`
1572
+ */
1573
+ static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
1574
+ re2_pattern *p;
1575
+
1576
+ /* Ensure text is a string. */
1577
+ StringValue(text);
1443
1578
 
1444
- return re2_regexp_match(2, argv, self);
1579
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1580
+
1581
+ return BOOL2RUBY(RE2::FullMatch(RSTRING_PTR(text), *p->pattern));
1445
1582
  }
1446
1583
 
1447
1584
  /*
1448
- * Returns a {RE2::Scanner} for scanning the given text incrementally.
1585
+ * Returns a {RE2::Scanner} for scanning the given text incrementally with
1586
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
1587
+ * `FindAndConsume`}.
1449
1588
  *
1589
+ * @param [text] text the text to scan incrementally
1590
+ * @return [RE2::Scanner] an `Enumerable` {RE2::Scanner} object
1591
+ * @raises [TypeError] if `text` cannot be coerced to a `String`
1450
1592
  * @example
1451
1593
  * c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
1594
+ * #=> #<RE2::Scanner:0x0000000000000001>
1452
1595
  */
1453
1596
  static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
1454
1597
  /* Ensure text is a string. */
@@ -1477,17 +1620,40 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
1477
1620
  }
1478
1621
 
1479
1622
  /*
1480
- * Returns a copy of +str+ with the first occurrence +pattern+
1481
- * replaced with +rewrite+.
1623
+ * Returns whether the underlying RE2 version supports passing an `endpos`
1624
+ * argument to
1625
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
1626
+ * Match}. If not, {RE2::Regexp#match} will raise an error if attempting to
1627
+ * pass an `endpos`.
1628
+ *
1629
+ * @return [Boolean] whether the underlying
1630
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
1631
+ * Match} has an endpos argument
1632
+ */
1633
+ static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
1634
+ #ifdef HAVE_ENDPOS_ARGUMENT
1635
+ return Qtrue;
1636
+ #else
1637
+ return Qfalse;
1638
+ #endif
1639
+ }
1640
+
1641
+ /*
1642
+ * Returns a copy of `str` with the first occurrence `pattern` replaced with
1643
+ * `rewrite` using
1644
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L465-L480
1645
+ * `Replace`}.
1482
1646
  *
1483
1647
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
1484
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
1485
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1648
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
1649
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
1486
1650
  *
1487
1651
  * @param [String] str the string to modify
1488
1652
  * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1489
1653
  * @param [String] rewrite the string to replace with
1490
1654
  * @return [String] the resulting string
1655
+ * @raises [TypeError] if the given rewrite or pattern (if not provided as a
1656
+ * {RE2::Regexp}) cannot be coerced to `String`s
1491
1657
  * @example
1492
1658
  * RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
1493
1659
  * re2 = RE2::Regexp.new("hel+o")
@@ -1523,20 +1689,24 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
1523
1689
  }
1524
1690
 
1525
1691
  /*
1526
- * Return a copy of +str+ with +pattern+ replaced by +rewrite+.
1692
+ * Return a copy of `str` with `pattern` replaced by `rewrite` using
1693
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L482-L497
1694
+ * `GlobalReplace`}.
1527
1695
  *
1528
1696
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
1529
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
1530
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1697
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
1698
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
1531
1699
  *
1532
1700
  * @param [String] str the string to modify
1533
1701
  * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1534
1702
  * @param [String] rewrite the string to replace with
1703
+ * @raises [TypeError] if the given rewrite or pattern (if not provided as a
1704
+ * {RE2::Regexp}) cannot be coerced to `String`s
1535
1705
  * @return [String] the resulting string
1536
1706
  * @example
1537
1707
  * re2 = RE2::Regexp.new("oo?")
1538
- * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
1539
- * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
1708
+ * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
1709
+ * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
1540
1710
  */
1541
1711
  static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
1542
1712
  VALUE rewrite) {
@@ -1568,14 +1738,17 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
1568
1738
  }
1569
1739
 
1570
1740
  /*
1571
- * Returns a version of str with all potentially meaningful regexp
1572
- * characters escaped. The returned string, used as a regular
1573
- * expression, will exactly match the original string.
1741
+ * Returns a version of `str` with all potentially meaningful regexp characters
1742
+ * escaped using
1743
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L512-L518
1744
+ * `QuoteMeta`}. The returned string, used as a regular expression, will
1745
+ * exactly match the original string.
1574
1746
  *
1575
1747
  * @param [String] unquoted the unquoted string
1748
+ * @raises [TypeError] if the given unquoted string cannot be coerced to a `String`
1576
1749
  * @return [String] the escaped string
1577
1750
  * @example
1578
- * RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
1751
+ * RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
1579
1752
  */
1580
1753
  static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
1581
1754
  StringValue(unquoted);
@@ -1641,14 +1814,14 @@ static VALUE re2_set_allocate(VALUE klass) {
1641
1814
  * Returns a new {RE2::Set} object for the specified anchor with the default
1642
1815
  * options.
1643
1816
  *
1644
- * @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
1645
- * @raise [ArgumentError] if anchor is not :unanchored, :anchor_start or :anchor_both
1817
+ * @param [Symbol] anchor one of `:unanchored`, `:anchor_start`, `:anchor_both`
1818
+ * @raise [ArgumentError] if anchor is not `:unanchored`, `:anchor_start` or `:anchor_both`
1646
1819
  * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1647
1820
  *
1648
1821
  * @overload initialize(anchor, options)
1649
1822
  * Returns a new {RE2::Set} object with the specified options.
1650
1823
  *
1651
- * @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
1824
+ * @param [Symbol] anchor one of `:unanchored`, `:anchor_start`, `:anchor_both`
1652
1825
  * @param [Hash] options the options with which to compile the pattern
1653
1826
  * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
1654
1827
  * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
@@ -1656,13 +1829,13 @@ static VALUE re2_set_allocate(VALUE klass) {
1656
1829
  * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
1657
1830
  * @option options [Integer] :max_mem approx. max memory footprint of RE2
1658
1831
  * @option options [Boolean] :literal (false) interpret string as literal, not regexp
1659
- * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
1660
- * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
1661
- * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
1662
- * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
1663
- * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
1664
- * @return [RE2::Set] an RE2::Set with the specified anchor and options
1665
- * @raise [ArgumentError] if anchor is not one of the accepted choices
1832
+ * @option options [Boolean] :never_nl (false) never match `\n`, even if it is in regexp
1833
+ * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with `(?i)` unless in `posix_syntax` mode)
1834
+ * @option options [Boolean] :perl_classes (false) allow Perl's `\d` `\s` `\w` `\D` `\S` `\W` when in `posix_syntax` mode
1835
+ * @option options [Boolean] :word_boundary (false) allow `\b` `\B` (word boundary and not) when in `posix_syntax` mode
1836
+ * @option options [Boolean] :one_line (false) `^` and `$` only match beginning and end of text when in `posix_syntax` mode
1837
+ * @return [RE2::Set] a {RE2::Set} with the specified anchor and options
1838
+ * @raise [ArgumentError] if `anchor` is not one of the accepted choices
1666
1839
  * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1667
1840
  */
1668
1841
  static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
@@ -1676,12 +1849,12 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1676
1849
 
1677
1850
  if (!NIL_P(anchor)) {
1678
1851
  Check_Type(anchor, T_SYMBOL);
1679
- ID id_anchor = SYM2ID(anchor);
1680
- if (id_anchor == id_unanchored) {
1852
+ ID id_anchor_arg = SYM2ID(anchor);
1853
+ if (id_anchor_arg == id_unanchored) {
1681
1854
  re2_anchor = RE2::UNANCHORED;
1682
- } else if (id_anchor == id_anchor_start) {
1855
+ } else if (id_anchor_arg == id_anchor_start) {
1683
1856
  re2_anchor = RE2::ANCHOR_START;
1684
- } else if (id_anchor == id_anchor_both) {
1857
+ } else if (id_anchor_arg == id_anchor_both) {
1685
1858
  re2_anchor = RE2::ANCHOR_BOTH;
1686
1859
  } else {
1687
1860
  rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
@@ -1704,15 +1877,16 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1704
1877
 
1705
1878
  /*
1706
1879
  * Adds a pattern to the set. Returns the index that will identify the pattern
1707
- * in the output of #match. Cannot be called after #compile has been called.
1880
+ * in the output of {RE2::Set#match}. Cannot be called after {RE2::Set#compile}
1881
+ * has been called.
1708
1882
  *
1709
1883
  * @param [String] pattern the regex pattern
1710
1884
  * @return [Integer] the index of the pattern in the set
1711
1885
  * @raise [ArgumentError] if called after compile or the pattern is rejected
1712
1886
  * @example
1713
1887
  * set = RE2::Set.new
1714
- * set.add("abc") #=> 0
1715
- * set.add("def") #=> 1
1888
+ * set.add("abc") #=> 0
1889
+ * set.add("def") #=> 1
1716
1890
  */
1717
1891
  static VALUE re2_set_add(VALUE self, VALUE pattern) {
1718
1892
  StringValue(pattern);
@@ -1740,14 +1914,14 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
1740
1914
  }
1741
1915
 
1742
1916
  /*
1743
- * Compiles a Set so it can be used to match against. Must be called after #add
1744
- * and before #match.
1917
+ * Compiles a {RE2::Set} so it can be used to match against. Must be called
1918
+ * after {RE2::Set#add} and before {RE2::Set#match}.
1745
1919
  *
1746
- * @return [Bool] whether compilation was a success
1920
+ * @return [Boolean] whether compilation was a success
1747
1921
  * @example
1748
1922
  * set = RE2::Set.new
1749
1923
  * set.add("abc")
1750
- * set.compile # => true
1924
+ * set.compile #=> true
1751
1925
  */
1752
1926
  static VALUE re2_set_compile(VALUE self) {
1753
1927
  re2_set *s;
@@ -1757,11 +1931,12 @@ static VALUE re2_set_compile(VALUE self) {
1757
1931
  }
1758
1932
 
1759
1933
  /*
1760
- * Returns whether the underlying re2 version outputs error information from
1761
- * RE2::Set::Match. If not, #match will raise an error if attempting to set its
1762
- * :exception option to true.
1934
+ * Returns whether the underlying RE2 version outputs error information from
1935
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/set.h#L62-L65
1936
+ * `RE2::Set::Match`}. If not, {RE2::Set#match} will raise an error if attempting to set
1937
+ * its `:exception` option to `true`.
1763
1938
  *
1764
- * @return [Bool] whether the underlying re2 outputs error information from Set matches
1939
+ * @return [Boolean] whether the underlying RE2 outputs error information from {RE2::Set} matches
1765
1940
  */
1766
1941
  static VALUE re2_set_match_raises_errors_p(VALUE) {
1767
1942
  #ifdef HAVE_ERROR_INFO_ARGUMENT
@@ -1785,31 +1960,31 @@ static VALUE re2_set_match_raises_errors_p(VALUE) {
1785
1960
  * @param [String] str the text to match against
1786
1961
  * @return [Array<Integer>] the indices of matching regexps
1787
1962
  * @raise [MatchError] if an error occurs while matching
1788
- * @raise [UnsupportedError] if the underlying version of re2 does not output error information
1963
+ * @raise [UnsupportedError] if the underlying version of RE2 does not output error information
1789
1964
  * @example
1790
1965
  * set = RE2::Set.new
1791
1966
  * set.add("abc")
1792
1967
  * set.add("def")
1793
1968
  * set.compile
1794
- * set.match("abcdef") # => [0, 1]
1969
+ * set.match("abcdef") #=> [0, 1]
1795
1970
  *
1796
1971
  * @overload match(str, options)
1797
1972
  * Returns an array of integer indices of patterns matching the given string
1798
1973
  * (if any). Raises exceptions if there are any errors while matching and the
1799
- * :exception option is set to true.
1974
+ * `:exception` option is set to true.
1800
1975
  *
1801
1976
  * @param [String] str the text to match against
1802
1977
  * @param [Hash] options the options with which to match
1803
- * @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
1978
+ * @option options [Boolean] :exception (true) whether to raise exceptions with RE2's error information (not supported on ABI version 0 of RE2)
1804
1979
  * @return [Array<Integer>] the indices of matching regexps
1805
1980
  * @raise [MatchError] if an error occurs while matching
1806
- * @raise [UnsupportedError] if the underlying version of re2 does not output error information
1981
+ * @raise [UnsupportedError] if the underlying version of RE2 does not output error information
1807
1982
  * @example
1808
1983
  * set = RE2::Set.new
1809
1984
  * set.add("abc")
1810
1985
  * set.add("def")
1811
1986
  * set.compile
1812
- * set.match("abcdef", :exception => true) # => [0, 1]
1987
+ * set.match("abcdef", exception: true) #=> [0, 1]
1813
1988
  */
1814
1989
  static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
1815
1990
  VALUE str, options;
@@ -1877,6 +2052,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
1877
2052
  extern "C" void Init_re2(void) {
1878
2053
  re2_mRE2 = rb_define_module("RE2");
1879
2054
  re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
2055
+ re2_eRegexpUnsupportedError = rb_define_class_under(re2_cRegexp,
2056
+ "UnsupportedError", rb_const_get(rb_cObject, rb_intern("StandardError")));
1880
2057
  re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
1881
2058
  re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
1882
2059
  re2_cSet = rb_define_class_under(re2_mRE2, "Set", rb_cObject);
@@ -1930,6 +2107,8 @@ extern "C" void Init_re2(void) {
1930
2107
  rb_define_method(re2_cScanner, "rewind",
1931
2108
  RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
1932
2109
 
2110
+ rb_define_singleton_method(re2_cRegexp, "match_has_endpos_argument?",
2111
+ RUBY_METHOD_FUNC(re2_regexp_match_has_endpos_argument_p), 0);
1933
2112
  rb_define_method(re2_cRegexp, "initialize",
1934
2113
  RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
1935
2114
  rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
@@ -1947,12 +2126,14 @@ extern "C" void Init_re2(void) {
1947
2126
  RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
1948
2127
  rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
1949
2128
  -1);
1950
- rb_define_method(re2_cRegexp, "match?",
1951
- RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1952
- rb_define_method(re2_cRegexp, "=~",
1953
- RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1954
- rb_define_method(re2_cRegexp, "===",
2129
+ rb_define_method(re2_cRegexp, "match?", RUBY_METHOD_FUNC(re2_regexp_match_p),
2130
+ 1);
2131
+ rb_define_method(re2_cRegexp, "partial_match?",
1955
2132
  RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
2133
+ rb_define_method(re2_cRegexp, "=~", RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
2134
+ rb_define_method(re2_cRegexp, "===", RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
2135
+ rb_define_method(re2_cRegexp, "full_match?",
2136
+ RUBY_METHOD_FUNC(re2_regexp_full_match_p), 1);
1956
2137
  rb_define_method(re2_cRegexp, "scan",
1957
2138
  RUBY_METHOD_FUNC(re2_regexp_scan), 1);
1958
2139
  rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
@@ -2009,6 +2190,8 @@ extern "C" void Init_re2(void) {
2009
2190
  RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
2010
2191
  rb_define_singleton_method(re2_cRegexp, "quote",
2011
2192
  RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
2193
+
2194
+ // (see RE2::Regexp#initialize)
2012
2195
  rb_define_singleton_method(re2_cRegexp, "compile",
2013
2196
  RUBY_METHOD_FUNC(rb_class_new_instance), -1);
2014
2197
 
@@ -2027,7 +2210,11 @@ extern "C" void Init_re2(void) {
2027
2210
  id_word_boundary = rb_intern("word_boundary");
2028
2211
  id_one_line = rb_intern("one_line");
2029
2212
  id_unanchored = rb_intern("unanchored");
2213
+ id_anchor = rb_intern("anchor");
2030
2214
  id_anchor_start = rb_intern("anchor_start");
2031
2215
  id_anchor_both = rb_intern("anchor_both");
2032
2216
  id_exception = rb_intern("exception");
2217
+ id_submatches = rb_intern("submatches");
2218
+ id_startpos = rb_intern("startpos");
2219
+ id_endpos = rb_intern("endpos");
2033
2220
  }