re2 2.4.2-x86_64-darwin → 2.5.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/re2/re2.cc CHANGED
@@ -1,8 +1,10 @@
1
1
  /*
2
- * re2 (http://github.com/mudge/re2)
3
- * Ruby bindings to re2, an "efficient, principled regular expression library"
2
+ * re2 (https://github.com/mudge/re2)
3
+ * Ruby bindings to RE2, a "fast, safe, thread-friendly alternative to
4
+ * backtracking regular expression engines like those used in PCRE, Perl, and
5
+ * Python".
4
6
  *
5
- * Copyright (c) 2010-2014, Paul Mucur (http://mudge.name)
7
+ * Copyright (c) 2010, Paul Mucur (https://mudge.name)
6
8
  * Released under the BSD Licence, please see LICENSE.txt
7
9
  */
8
10
 
@@ -42,13 +44,14 @@ typedef struct {
42
44
  } re2_set;
43
45
 
44
46
  VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
45
- re2_eSetMatchError, re2_eSetUnsupportedError;
47
+ re2_eSetMatchError, re2_eSetUnsupportedError, re2_eRegexpUnsupportedError;
46
48
 
47
49
  /* Symbols used in RE2 options. */
48
50
  static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
49
51
  id_max_mem, id_literal, id_never_nl, id_case_sensitive,
50
- id_perl_classes, id_word_boundary, id_one_line,
51
- id_unanchored, id_anchor_start, id_anchor_both, id_exception;
52
+ id_perl_classes, id_word_boundary, id_one_line, id_unanchored,
53
+ id_anchor, id_anchor_start, id_anchor_both, id_exception,
54
+ id_submatches, id_startpos, id_endpos;
52
55
 
53
56
  inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
54
57
  if (encoding == RE2::Options::EncodingUTF8) {
@@ -122,9 +125,9 @@ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
122
125
  }
123
126
  }
124
127
 
125
- /* For compatibility with ruby < 2.7 */
128
+ /* For compatibility with Ruby < 2.7 */
126
129
  #ifdef HAVE_RB_GC_MARK_MOVABLE
127
- #define re2_compact_callback(x) .dcompact = (x),
130
+ #define re2_compact_callback(x) (x),
128
131
  #else
129
132
  #define rb_gc_mark_movable(x) rb_gc_mark(x)
130
133
  #define re2_compact_callback(x)
@@ -163,16 +166,18 @@ static size_t re2_matchdata_memsize(const void *ptr) {
163
166
  }
164
167
 
165
168
  static const rb_data_type_t re2_matchdata_data_type = {
166
- .wrap_struct_name = "RE2::MatchData",
167
- .function = {
168
- .dmark = re2_matchdata_mark,
169
- .dfree = re2_matchdata_free,
170
- .dsize = re2_matchdata_memsize,
169
+ "RE2::MatchData",
170
+ {
171
+ re2_matchdata_mark,
172
+ re2_matchdata_free,
173
+ re2_matchdata_memsize,
171
174
  re2_compact_callback(re2_matchdata_compact)
172
175
  },
176
+ 0,
177
+ 0,
173
178
  // IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
174
179
  // macro to update VALUE references, as to trigger write barriers.
175
- .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
180
+ RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
176
181
  };
177
182
 
178
183
  static void re2_scanner_mark(void *ptr) {
@@ -208,16 +213,18 @@ static size_t re2_scanner_memsize(const void *ptr) {
208
213
  }
209
214
 
210
215
  static const rb_data_type_t re2_scanner_data_type = {
211
- .wrap_struct_name = "RE2::Scanner",
212
- .function = {
213
- .dmark = re2_scanner_mark,
214
- .dfree = re2_scanner_free,
215
- .dsize = re2_scanner_memsize,
216
+ "RE2::Scanner",
217
+ {
218
+ re2_scanner_mark,
219
+ re2_scanner_free,
220
+ re2_scanner_memsize,
216
221
  re2_compact_callback(re2_scanner_compact)
217
222
  },
223
+ 0,
224
+ 0,
218
225
  // IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
219
226
  // macro to update VALUE references, as to trigger write barriers.
220
- .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
227
+ RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
221
228
  };
222
229
 
223
230
  static void re2_regexp_free(void *ptr) {
@@ -239,15 +246,17 @@ static size_t re2_regexp_memsize(const void *ptr) {
239
246
  }
240
247
 
241
248
  static const rb_data_type_t re2_regexp_data_type = {
242
- .wrap_struct_name = "RE2::Regexp",
243
- .function = {
244
- .dmark = NULL,
245
- .dfree = re2_regexp_free,
246
- .dsize = re2_regexp_memsize,
249
+ "RE2::Regexp",
250
+ {
251
+ 0,
252
+ re2_regexp_free,
253
+ re2_regexp_memsize,
247
254
  },
255
+ 0,
256
+ 0,
248
257
  // IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
249
258
  // macro to update VALUE references, as to trigger write barriers.
250
- .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
259
+ RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
251
260
  };
252
261
 
253
262
  static VALUE re2_matchdata_allocate(VALUE klass) {
@@ -264,12 +273,14 @@ static VALUE re2_scanner_allocate(VALUE klass) {
264
273
  }
265
274
 
266
275
  /*
267
- * Returns a frozen copy of the string passed into +match+.
276
+ * Returns a frozen copy of the text supplied when matching.
268
277
  *
269
- * @return [String] a frozen copy of the passed string.
278
+ * If the text was already a frozen string, returns the original.
279
+ *
280
+ * @return [String] a frozen string with the text supplied when matching
270
281
  * @example
271
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
272
- * m.string #=> "bob 123"
282
+ * m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
283
+ * m.string #=> "bob 123"
273
284
  */
274
285
  static VALUE re2_matchdata_string(const VALUE self) {
275
286
  re2_matchdata *m;
@@ -279,9 +290,10 @@ static VALUE re2_matchdata_string(const VALUE self) {
279
290
  }
280
291
 
281
292
  /*
282
- * Returns the string passed into the scanner.
293
+ * Returns the text supplied when incrementally matching with
294
+ * {RE2::Regexp#scan}.
283
295
  *
284
- * @return [String] the original string.
296
+ * @return [String] the original string passed to {RE2::Regexp#scan}
285
297
  * @example
286
298
  * c = RE2::Regexp.new('(\d+)').scan("foo")
287
299
  * c.string #=> "foo"
@@ -294,9 +306,9 @@ static VALUE re2_scanner_string(const VALUE self) {
294
306
  }
295
307
 
296
308
  /*
297
- * Returns whether the scanner has consumed all input or not.
309
+ * Returns whether the {RE2::Scanner} has consumed all input or not.
298
310
  *
299
- * @return [Boolean] whether the scanner has consumed all input or not
311
+ * @return [Boolean] whether the {RE2::Scanner} has consumed all input or not
300
312
  * @example
301
313
  * c = RE2::Regexp.new('(\d+)').scan("foo")
302
314
  * c.eof? #=> true
@@ -309,7 +321,7 @@ static VALUE re2_scanner_eof(const VALUE self) {
309
321
  }
310
322
 
311
323
  /*
312
- * Rewind the scanner to the start of the string.
324
+ * Rewind the {RE2::Scanner} to the start of the string.
313
325
  *
314
326
  * @example
315
327
  * s = RE2::Regexp.new('(\d+)').scan("1 2 3")
@@ -331,14 +343,19 @@ static VALUE re2_scanner_rewind(VALUE self) {
331
343
  }
332
344
 
333
345
  /*
334
- * Scan the given text incrementally for matches, returning an array of
335
- * matches on each subsequent call. Returns nil if no matches are found.
346
+ * Scan the given text incrementally for matches using
347
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
348
+ * `FindAndConsume`}, returning an array of submatches on each subsequent
349
+ * call. Returns `nil` if no matches are found or an empty array for every
350
+ * match if the pattern has no capturing groups.
336
351
  *
337
352
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
338
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
339
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
353
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
354
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
340
355
  *
341
- * @return [Array<String>] the matches.
356
+ * @return [Array<String>] if the pattern has capturing groups
357
+ * @return [[]] if the pattern does not have capturing groups
358
+ * @return [nil] if no matches are found
342
359
  * @example
343
360
  * s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
344
361
  * s.scan #=> ["Foo"]
@@ -353,7 +370,7 @@ static VALUE re2_scanner_scan(VALUE self) {
353
370
 
354
371
  std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
355
372
  std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
356
- std::vector<std::string> matches(c->number_of_capturing_groups);
373
+ std::vector<re2::StringPiece> matches(c->number_of_capturing_groups);
357
374
 
358
375
  if (c->eof) {
359
376
  return Qnil;
@@ -397,9 +414,6 @@ static VALUE re2_scanner_scan(VALUE self) {
397
414
  }
398
415
  }
399
416
 
400
- /*
401
- * Retrieve a matchdata by index or name.
402
- */
403
417
  static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
404
418
  re2_matchdata *m;
405
419
  re2_pattern *p;
@@ -435,13 +449,14 @@ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
435
449
  }
436
450
 
437
451
  /*
438
- * Returns the number of elements in the match array (including nils).
452
+ * Returns the number of elements in the {RE2::MatchData} (including the
453
+ * overall match, submatches and any `nils`).
439
454
  *
440
455
  * @return [Integer] the number of elements
441
456
  * @example
442
457
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
443
- * m.size #=> 2
444
- * m.length #=> 2
458
+ * m.size #=> 2
459
+ * m.length #=> 2
445
460
  */
446
461
  static VALUE re2_matchdata_size(const VALUE self) {
447
462
  re2_matchdata *m;
@@ -452,14 +467,15 @@ static VALUE re2_matchdata_size(const VALUE self) {
452
467
  }
453
468
 
454
469
  /*
455
- * Returns the offset of the start of the nth element of the matchdata.
470
+ * Returns the offset of the start of the nth element of the {RE2::MatchData}.
456
471
  *
457
- * @param [Integer, String, Symbol] n the name or number of the match
458
- * @return [Integer] the offset of the start of the match
472
+ * @param [Integer, String, Symbol] n the name or number of the submatch
473
+ * @return [Integer, nil] the offset of the start of the match or `nil` if
474
+ * there is no such submatch
459
475
  * @example
460
476
  * m = RE2::Regexp.new('ob (\d+)').match("bob 123")
461
- * m.begin(0) #=> 1
462
- * m.begin(1) #=> 4
477
+ * m.begin(0) #=> 1
478
+ * m.begin(1) #=> 4
463
479
  */
464
480
  static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
465
481
  re2_matchdata *m;
@@ -477,14 +493,16 @@ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
477
493
  }
478
494
 
479
495
  /*
480
- * Returns the offset of the character following the end of the nth element of the matchdata.
496
+ * Returns the offset of the character following the end of the nth element of
497
+ * the {RE2::MatchData}.
481
498
  *
482
499
  * @param [Integer, String, Symbol] n the name or number of the match
483
- * @return [Integer] the offset of the character following the end of the match
500
+ * @return [Integer, nil] the offset of the character following the end of the
501
+ * match or `nil` if there is no such match
484
502
  * @example
485
503
  * m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob")
486
- * m.end(0) #=> 9
487
- * m.end(1) #=> 7
504
+ * m.end(0) #=> 9
505
+ * m.end(1) #=> 7
488
506
  */
489
507
  static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
490
508
  re2_matchdata *m;
@@ -504,10 +522,10 @@ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
504
522
  /*
505
523
  * Returns the {RE2::Regexp} used in the match.
506
524
  *
507
- * @return [RE2::Regexp] the regexp used in the match
525
+ * @return [RE2::Regexp] the regular expression used in the match
508
526
  * @example
509
527
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
510
- * m.regexp #=> #<RE2::Regexp /(\d+)/>
528
+ * m.regexp #=> #<RE2::Regexp /(\d+)/>
511
529
  */
512
530
  static VALUE re2_matchdata_regexp(const VALUE self) {
513
531
  re2_matchdata *m;
@@ -517,12 +535,12 @@ static VALUE re2_matchdata_regexp(const VALUE self) {
517
535
  }
518
536
 
519
537
  /*
520
- * Returns the {RE2::Regexp} used in the scanner.
538
+ * Returns the {RE2::Regexp} used in the {RE2::Scanner}.
521
539
  *
522
- * @return [RE2::Regexp] the regexp used in the scanner
540
+ * @return [RE2::Regexp] the regular expression used in the {RE2::Scanner}
523
541
  * @example
524
542
  * c = RE2::Regexp.new('(\d+)').scan("bob 123")
525
- * c.regexp #=> #<RE2::Regexp /(\d+)/>
543
+ * c.regexp #=> #<RE2::Regexp /(\d+)/>
526
544
  */
527
545
  static VALUE re2_scanner_regexp(const VALUE self) {
528
546
  re2_scanner *c;
@@ -538,16 +556,17 @@ static VALUE re2_regexp_allocate(VALUE klass) {
538
556
  }
539
557
 
540
558
  /*
541
- * Returns the array of matches.
559
+ * Returns the array of matches including the overall match, submatches and any
560
+ * `nil`s.
542
561
  *
543
562
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
544
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
545
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
563
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
564
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
546
565
  *
547
566
  * @return [Array<String, nil>] the array of matches
548
567
  * @example
549
568
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
550
- * m.to_a #=> ["123", "123"]
569
+ * m.to_a #=> ["123", "123"]
551
570
  */
552
571
  static VALUE re2_matchdata_to_a(const VALUE self) {
553
572
  re2_matchdata *m;
@@ -613,19 +632,17 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
613
632
  * Retrieve zero, one or more matches by index or name.
614
633
  *
615
634
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
616
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
617
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
618
- *
619
- * @return [Array<String, nil>, String, Boolean]
635
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
636
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
620
637
  *
621
638
  * @overload [](index)
622
639
  * Access a particular match by index.
623
640
  *
624
641
  * @param [Integer] index the index of the match to fetch
625
- * @return [String, nil] the specified match
642
+ * @return [String, nil] the specified match or `nil` if it isn't present
626
643
  * @example
627
644
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
628
- * m[0] #=> "123"
645
+ * m[0] #=> "123"
629
646
  *
630
647
  * @overload [](start, length)
631
648
  * Access a range of matches by starting index and length.
@@ -635,7 +652,7 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
635
652
  * @return [Array<String, nil>] the specified matches
636
653
  * @example
637
654
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
638
- * m[0, 1] #=> ["123"]
655
+ * m[0, 1] #=> ["123"]
639
656
  *
640
657
  * @overload [](range)
641
658
  * Access a range of matches by index.
@@ -644,13 +661,13 @@ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
644
661
  * @return [Array<String, nil>] the specified matches
645
662
  * @example
646
663
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
647
- * m[0..1] #=> "[123", "123"]
664
+ * m[0..1] #=> "[123", "123"]
648
665
  *
649
666
  * @overload [](name)
650
667
  * Access a particular match by name.
651
668
  *
652
669
  * @param [String, Symbol] name the name of the match to fetch
653
- * @return [String, nil] the specific match
670
+ * @return [String, nil] the specific match or `nil` if it isn't present
654
671
  * @example
655
672
  * m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
656
673
  * m["number"] #=> "123"
@@ -684,13 +701,13 @@ static VALUE re2_matchdata_to_s(const VALUE self) {
684
701
  * Returns a printable version of the match.
685
702
  *
686
703
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
687
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
688
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
704
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
705
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
689
706
  *
690
707
  * @return [String] a printable version of the match
691
708
  * @example
692
709
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
693
- * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
710
+ * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
694
711
  */
695
712
  static VALUE re2_matchdata_inspect(const VALUE self) {
696
713
  re2_matchdata *m;
@@ -728,13 +745,14 @@ static VALUE re2_matchdata_inspect(const VALUE self) {
728
745
  * Returns the array of submatches for pattern matching.
729
746
  *
730
747
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
731
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
732
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
748
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
749
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is
750
+ * undefined).
733
751
  *
734
752
  * @return [Array<String, nil>] the array of submatches
735
753
  * @example
736
754
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
737
- * m.deconstruct #=> ["123"]
755
+ * m.deconstruct #=> ["123"]
738
756
  *
739
757
  * @example pattern matching
740
758
  * case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
@@ -774,17 +792,18 @@ static VALUE re2_matchdata_deconstruct(const VALUE self) {
774
792
  * order but an invalid name will cause the hash to be immediately returned.
775
793
  *
776
794
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
777
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
778
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
795
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
796
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
779
797
  *
780
798
  * @return [Hash] a hash of capturing group names to submatches
781
- * @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
799
+ * @param [Array<Symbol>, nil] keys an array of `Symbol` capturing group names
800
+ * or `nil` to return all names
782
801
  * @example
783
802
  * m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
784
- * m.deconstruct_keys(nil) #=> {:numbers => "123", :letters => "abc"}
785
- * m.deconstruct_keys([:numbers]) #=> {:numbers => "123"}
786
- * m.deconstruct_keys([:fruit]) #=> {}
787
- * m.deconstruct_keys([:letters, :fruit]) #=> {:letters => "abc"}
803
+ * m.deconstruct_keys(nil) #=> {numbers: "123", letters: "abc"}
804
+ * m.deconstruct_keys([:numbers]) #=> {numbers: "123"}
805
+ * m.deconstruct_keys([:fruit]) #=> {}
806
+ * m.deconstruct_keys([:letters, :fruit]) #=> {letters: "abc"}
788
807
  *
789
808
  * @example pattern matching
790
809
  * case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
@@ -833,11 +852,9 @@ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys)
833
852
  }
834
853
 
835
854
  /*
836
- * Returns a new RE2 object with a compiled version of
837
- * +pattern+ stored inside. Equivalent to +RE2::Regexp.new+.
855
+ * Shorthand to compile a new {RE2::Regexp}.
838
856
  *
839
857
  * @see RE2::Regexp#initialize
840
- *
841
858
  */
842
859
  static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
843
860
  return rb_class_new_instance(argc, argv, re2_cRegexp);
@@ -845,22 +862,21 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
845
862
 
846
863
  /*
847
864
  * Returns a new {RE2::Regexp} object with a compiled version of
848
- * +pattern+ stored inside.
849
- *
850
- * @return [RE2::Regexp]
865
+ * `pattern` stored inside.
851
866
  *
852
867
  * @overload initialize(pattern)
853
868
  * Returns a new {RE2::Regexp} object with a compiled version of
854
- * +pattern+ stored inside with the default options.
869
+ * `pattern` stored inside with the default options.
855
870
  *
856
871
  * @param [String] pattern the pattern to compile
857
- * @return [RE2::Regexp] an RE2::Regexp with the specified pattern
872
+ * @return [RE2::Regexp] a {RE2::Regexp} with the specified pattern
873
+ * @raise [TypeError] if the given pattern can't be coerced to a `String`
858
874
  * @raise [NoMemoryError] if memory could not be allocated for the compiled
859
- * pattern
875
+ * pattern
860
876
  *
861
877
  * @overload initialize(pattern, options)
862
878
  * Returns a new {RE2::Regexp} object with a compiled version of
863
- * +pattern+ stored inside with the specified options.
879
+ * `pattern` stored inside with the specified options.
864
880
  *
865
881
  * @param [String] pattern the pattern to compile
866
882
  * @param [Hash] options the options with which to compile the pattern
@@ -870,12 +886,13 @@ static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
870
886
  * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
871
887
  * @option options [Integer] :max_mem approx. max memory footprint of RE2
872
888
  * @option options [Boolean] :literal (false) interpret string as literal, not regexp
873
- * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
874
- * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
875
- * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
876
- * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
877
- * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
878
- * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
889
+ * @option options [Boolean] :never_nl (false) never match `\n`, even if it is in regexp
890
+ * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with `(?i)` unless in `posix_syntax` mode)
891
+ * @option options [Boolean] :perl_classes (false) allow Perl's `\d` `\s` `\w` `\D` `\S` `\W` when in `posix_syntax` mode
892
+ * @option options [Boolean] :word_boundary (false) allow `\b` `\B` (word boundary and not) when in `posix_syntax` mode
893
+ * @option options [Boolean] :one_line (false) `^` and `$` only match beginning and end of text when in `posix_syntax` mode
894
+ * @return [RE2::Regexp] a {RE2::Regexp} with the specified pattern and options
895
+ * @raise [TypeError] if the given pattern can't be coerced to a `String`
879
896
  * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
880
897
  */
881
898
  static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
@@ -906,16 +923,17 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
906
923
  }
907
924
 
908
925
  /*
909
- * Returns a printable version of the regular expression +re2+.
926
+ * Returns a printable version of the regular expression.
910
927
  *
911
928
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
912
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
913
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
929
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
930
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is
931
+ * undefined).
914
932
  *
915
933
  * @return [String] a printable version of the regular expression
916
934
  * @example
917
935
  * re2 = RE2::Regexp.new("woo?")
918
- * re2.inspect #=> "#<RE2::Regexp /woo?/>"
936
+ * re2.inspect #=> "#<RE2::Regexp /woo?/>"
919
937
  */
920
938
  static VALUE re2_regexp_inspect(const VALUE self) {
921
939
  re2_pattern *p;
@@ -931,16 +949,16 @@ static VALUE re2_regexp_inspect(const VALUE self) {
931
949
  }
932
950
 
933
951
  /*
934
- * Returns a string version of the regular expression +re2+.
952
+ * Returns a string version of the regular expression.
935
953
  *
936
954
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
937
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
938
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
955
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
956
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
939
957
  *
940
958
  * @return [String] a string version of the regular expression
941
959
  * @example
942
960
  * re2 = RE2::Regexp.new("woo?")
943
- * re2.to_s #=> "woo?"
961
+ * re2.to_s #=> "woo?"
944
962
  */
945
963
  static VALUE re2_regexp_to_s(const VALUE self) {
946
964
  re2_pattern *p;
@@ -952,13 +970,12 @@ static VALUE re2_regexp_to_s(const VALUE self) {
952
970
  }
953
971
 
954
972
  /*
955
- * Returns whether or not the regular expression +re2+
956
- * was compiled successfully or not.
973
+ * Returns whether or not the regular expression was compiled successfully.
957
974
  *
958
975
  * @return [Boolean] whether or not compilation was successful
959
976
  * @example
960
977
  * re2 = RE2::Regexp.new("woo?")
961
- * re2.ok? #=> true
978
+ * re2.ok? #=> true
962
979
  */
963
980
  static VALUE re2_regexp_ok(const VALUE self) {
964
981
  re2_pattern *p;
@@ -968,13 +985,13 @@ static VALUE re2_regexp_ok(const VALUE self) {
968
985
  }
969
986
 
970
987
  /*
971
- * Returns whether or not the regular expression +re2+
972
- * was compiled with the utf8 option set to true.
988
+ * Returns whether or not the regular expression was compiled with the `utf8`
989
+ * option set to `true`.
973
990
  *
974
- * @return [Boolean] the utf8 option
991
+ * @return [Boolean] the `utf8` option
975
992
  * @example
976
- * re2 = RE2::Regexp.new("woo?", :utf8 => true)
977
- * re2.utf8? #=> true
993
+ * re2 = RE2::Regexp.new("woo?", utf8: true)
994
+ * re2.utf8? #=> true
978
995
  */
979
996
  static VALUE re2_regexp_utf8(const VALUE self) {
980
997
  re2_pattern *p;
@@ -984,13 +1001,13 @@ static VALUE re2_regexp_utf8(const VALUE self) {
984
1001
  }
985
1002
 
986
1003
  /*
987
- * Returns whether or not the regular expression +re2+
988
- * was compiled with the posix_syntax option set to true.
1004
+ * Returns whether or not the regular expression was compiled with the
1005
+ * `posix_syntax` option set to `true`.
989
1006
  *
990
- * @return [Boolean] the posix_syntax option
1007
+ * @return [Boolean] the `posix_syntax` option
991
1008
  * @example
992
- * re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
993
- * re2.posix_syntax? #=> true
1009
+ * re2 = RE2::Regexp.new("woo?", posix_syntax: true)
1010
+ * re2.posix_syntax? #=> true
994
1011
  */
995
1012
  static VALUE re2_regexp_posix_syntax(const VALUE self) {
996
1013
  re2_pattern *p;
@@ -1000,13 +1017,13 @@ static VALUE re2_regexp_posix_syntax(const VALUE self) {
1000
1017
  }
1001
1018
 
1002
1019
  /*
1003
- * Returns whether or not the regular expression +re2+
1004
- * was compiled with the longest_match option set to true.
1020
+ * Returns whether or not the regular expression was compiled with the
1021
+ * `longest_match` option set to `true`.
1005
1022
  *
1006
- * @return [Boolean] the longest_match option
1023
+ * @return [Boolean] the `longest_match` option
1007
1024
  * @example
1008
- * re2 = RE2::Regexp.new("woo?", :longest_match => true)
1009
- * re2.longest_match? #=> true
1025
+ * re2 = RE2::Regexp.new("woo?", longest_match: true)
1026
+ * re2.longest_match? #=> true
1010
1027
  */
1011
1028
  static VALUE re2_regexp_longest_match(const VALUE self) {
1012
1029
  re2_pattern *p;
@@ -1016,13 +1033,13 @@ static VALUE re2_regexp_longest_match(const VALUE self) {
1016
1033
  }
1017
1034
 
1018
1035
  /*
1019
- * Returns whether or not the regular expression +re2+
1020
- * was compiled with the log_errors option set to true.
1036
+ * Returns whether or not the regular expression was compiled with the
1037
+ * `log_errors` option set to `true`.
1021
1038
  *
1022
- * @return [Boolean] the log_errors option
1039
+ * @return [Boolean] the `log_errors` option
1023
1040
  * @example
1024
- * re2 = RE2::Regexp.new("woo?", :log_errors => true)
1025
- * re2.log_errors? #=> true
1041
+ * re2 = RE2::Regexp.new("woo?", log_errors: true)
1042
+ * re2.log_errors? #=> true
1026
1043
  */
1027
1044
  static VALUE re2_regexp_log_errors(const VALUE self) {
1028
1045
  re2_pattern *p;
@@ -1032,13 +1049,12 @@ static VALUE re2_regexp_log_errors(const VALUE self) {
1032
1049
  }
1033
1050
 
1034
1051
  /*
1035
- * Returns the max_mem setting for the regular expression
1036
- * +re2+.
1052
+ * Returns the `max_mem` setting for the regular expression.
1037
1053
  *
1038
- * @return [Integer] the max_mem option
1054
+ * @return [Integer] the `max_mem` option
1039
1055
  * @example
1040
- * re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
1041
- * re2.max_mem #=> 1024
1056
+ * re2 = RE2::Regexp.new("woo?", max_mem: 1024)
1057
+ * re2.max_mem #=> 1024
1042
1058
  */
1043
1059
  static VALUE re2_regexp_max_mem(const VALUE self) {
1044
1060
  re2_pattern *p;
@@ -1048,13 +1064,13 @@ static VALUE re2_regexp_max_mem(const VALUE self) {
1048
1064
  }
1049
1065
 
1050
1066
  /*
1051
- * Returns whether or not the regular expression +re2+
1052
- * was compiled with the literal option set to true.
1067
+ * Returns whether or not the regular expression was compiled with the
1068
+ * `literal` option set to `true`.
1053
1069
  *
1054
- * @return [Boolean] the literal option
1070
+ * @return [Boolean] the `literal` option
1055
1071
  * @example
1056
- * re2 = RE2::Regexp.new("woo?", :literal => true)
1057
- * re2.literal? #=> true
1072
+ * re2 = RE2::Regexp.new("woo?", literal: true)
1073
+ * re2.literal? #=> true
1058
1074
  */
1059
1075
  static VALUE re2_regexp_literal(const VALUE self) {
1060
1076
  re2_pattern *p;
@@ -1064,13 +1080,13 @@ static VALUE re2_regexp_literal(const VALUE self) {
1064
1080
  }
1065
1081
 
1066
1082
  /*
1067
- * Returns whether or not the regular expression +re2+
1068
- * was compiled with the never_nl option set to true.
1083
+ * Returns whether or not the regular expression was compiled with the
1084
+ * `never_nl` option set to `true`.
1069
1085
  *
1070
- * @return [Boolean] the never_nl option
1086
+ * @return [Boolean] the `never_nl` option
1071
1087
  * @example
1072
- * re2 = RE2::Regexp.new("woo?", :never_nl => true)
1073
- * re2.never_nl? #=> true
1088
+ * re2 = RE2::Regexp.new("woo?", never_nl: true)
1089
+ * re2.never_nl? #=> true
1074
1090
  */
1075
1091
  static VALUE re2_regexp_never_nl(const VALUE self) {
1076
1092
  re2_pattern *p;
@@ -1080,13 +1096,13 @@ static VALUE re2_regexp_never_nl(const VALUE self) {
1080
1096
  }
1081
1097
 
1082
1098
  /*
1083
- * Returns whether or not the regular expression +re2+
1084
- * was compiled with the case_sensitive option set to true.
1099
+ * Returns whether or not the regular expression was compiled with the
1100
+ * `case_sensitive` option set to `true`.
1085
1101
  *
1086
- * @return [Boolean] the case_sensitive option
1102
+ * @return [Boolean] the `case_sensitive` option
1087
1103
  * @example
1088
- * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
1089
- * re2.case_sensitive? #=> true
1104
+ * re2 = RE2::Regexp.new("woo?", case_sensitive: true)
1105
+ * re2.case_sensitive? #=> true
1090
1106
  */
1091
1107
  static VALUE re2_regexp_case_sensitive(const VALUE self) {
1092
1108
  re2_pattern *p;
@@ -1096,27 +1112,27 @@ static VALUE re2_regexp_case_sensitive(const VALUE self) {
1096
1112
  }
1097
1113
 
1098
1114
  /*
1099
- * Returns whether or not the regular expression +re2+
1100
- * was compiled with the case_sensitive option set to false.
1115
+ * Returns whether or not the regular expression was compiled with the
1116
+ * `case_sensitive` option set to `false`.
1101
1117
  *
1102
- * @return [Boolean] the inverse of the case_sensitive option
1118
+ * @return [Boolean] the inverse of the `case_sensitive` option
1103
1119
  * @example
1104
- * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
1105
- * re2.case_insensitive? #=> false
1106
- * re2.casefold? #=> false
1120
+ * re2 = RE2::Regexp.new("woo?", case_sensitive: true)
1121
+ * re2.case_insensitive? #=> false
1122
+ * re2.casefold? #=> false
1107
1123
  */
1108
1124
  static VALUE re2_regexp_case_insensitive(const VALUE self) {
1109
1125
  return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
1110
1126
  }
1111
1127
 
1112
1128
  /*
1113
- * Returns whether or not the regular expression +re2+
1114
- * was compiled with the perl_classes option set to true.
1129
+ * Returns whether or not the regular expression was compiled with the
1130
+ * perl_classes option set to `true`.
1115
1131
  *
1116
- * @return [Boolean] the perl_classes option
1132
+ * @return [Boolean] the `perl_classes` option
1117
1133
  * @example
1118
- * re2 = RE2::Regexp.new("woo?", :perl_classes => true)
1119
- * re2.perl_classes? #=> true
1134
+ * re2 = RE2::Regexp.new("woo?", perl_classes: true)
1135
+ * re2.perl_classes? #=> true
1120
1136
  */
1121
1137
  static VALUE re2_regexp_perl_classes(const VALUE self) {
1122
1138
  re2_pattern *p;
@@ -1126,13 +1142,13 @@ static VALUE re2_regexp_perl_classes(const VALUE self) {
1126
1142
  }
1127
1143
 
1128
1144
  /*
1129
- * Returns whether or not the regular expression +re2+
1130
- * was compiled with the word_boundary option set to true.
1145
+ * Returns whether or not the regular expression was compiled with the
1146
+ * `word_boundary` option set to `true`.
1131
1147
  *
1132
- * @return [Boolean] the word_boundary option
1148
+ * @return [Boolean] the `word_boundary` option
1133
1149
  * @example
1134
- * re2 = RE2::Regexp.new("woo?", :word_boundary => true)
1135
- * re2.word_boundary? #=> true
1150
+ * re2 = RE2::Regexp.new("woo?", word_boundary: true)
1151
+ * re2.word_boundary? #=> true
1136
1152
  */
1137
1153
  static VALUE re2_regexp_word_boundary(const VALUE self) {
1138
1154
  re2_pattern *p;
@@ -1142,13 +1158,13 @@ static VALUE re2_regexp_word_boundary(const VALUE self) {
1142
1158
  }
1143
1159
 
1144
1160
  /*
1145
- * Returns whether or not the regular expression +re2+
1146
- * was compiled with the one_line option set to true.
1161
+ * Returns whether or not the regular expression was compiled with the
1162
+ * `one_line` option set to `true`.
1147
1163
  *
1148
- * @return [Boolean] the one_line option
1164
+ * @return [Boolean] the `one_line` option
1149
1165
  * @example
1150
- * re2 = RE2::Regexp.new("woo?", :one_line => true)
1151
- * re2.one_line? #=> true
1166
+ * re2 = RE2::Regexp.new("woo?", one_line: true)
1167
+ * re2.one_line? #=> true
1152
1168
  */
1153
1169
  static VALUE re2_regexp_one_line(const VALUE self) {
1154
1170
  re2_pattern *p;
@@ -1158,10 +1174,10 @@ static VALUE re2_regexp_one_line(const VALUE self) {
1158
1174
  }
1159
1175
 
1160
1176
  /*
1161
- * If the RE2 could not be created properly, returns an
1162
- * error string otherwise returns nil.
1177
+ * If the {RE2::Regexp} could not be created properly, returns an error string
1178
+ * otherwise returns `nil`.
1163
1179
  *
1164
- * @return [String, nil] the error string or nil
1180
+ * @return [String, nil] the error string or `nil`
1165
1181
  */
1166
1182
  static VALUE re2_regexp_error(const VALUE self) {
1167
1183
  re2_pattern *p;
@@ -1175,14 +1191,14 @@ static VALUE re2_regexp_error(const VALUE self) {
1175
1191
  }
1176
1192
 
1177
1193
  /*
1178
- * If the RE2 could not be created properly, returns
1179
- * the offending portion of the regexp otherwise returns nil.
1194
+ * If the {RE2::Regexp} could not be created properly, returns
1195
+ * the offending portion of the regexp otherwise returns `nil`.
1180
1196
  *
1181
1197
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
1182
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
1183
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1198
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
1199
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
1184
1200
  *
1185
- * @return [String, nil] the offending portion of the regexp or nil
1201
+ * @return [String, nil] the offending portion of the regexp or `nil`
1186
1202
  */
1187
1203
  static VALUE re2_regexp_error_arg(const VALUE self) {
1188
1204
  re2_pattern *p;
@@ -1212,8 +1228,7 @@ static VALUE re2_regexp_program_size(const VALUE self) {
1212
1228
  }
1213
1229
 
1214
1230
  /*
1215
- * Returns a hash of the options currently set for
1216
- * +re2+.
1231
+ * Returns a hash of the options currently set for the {RE2::Regexp}.
1217
1232
  *
1218
1233
  * @return [Hash] the options
1219
1234
  */
@@ -1264,8 +1279,8 @@ static VALUE re2_regexp_options(const VALUE self) {
1264
1279
 
1265
1280
  /*
1266
1281
  * Returns the number of capturing subpatterns, or -1 if the regexp
1267
- * wasn't valid on construction. The overall match ($0) does not
1268
- * count: if the regexp is "(a)(b)", returns 2.
1282
+ * wasn't valid on construction. The overall match (`$0`) does not
1283
+ * count: if the regexp is `"(a)(b)"`, returns 2.
1269
1284
  *
1270
1285
  * @return [Integer] the number of capturing subpatterns
1271
1286
  */
@@ -1280,8 +1295,8 @@ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
1280
1295
  * Returns a hash of names to capturing indices of groups.
1281
1296
  *
1282
1297
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
1283
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
1284
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1298
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
1299
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
1285
1300
  *
1286
1301
  * @return [Hash] a hash of names to capturing indices
1287
1302
  */
@@ -1303,63 +1318,93 @@ static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
1303
1318
  }
1304
1319
 
1305
1320
  /*
1306
- * Match the pattern against the given +text+ and return either
1307
- * a boolean (if no submatches are required) or a {RE2::MatchData}
1308
- * instance.
1321
+ * General matching: match the pattern against the given `text` using
1322
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
1323
+ * `Match`} and return a {RE2::MatchData} instance with the specified number of
1324
+ * submatches (defaults to the total number of capturing groups) or a boolean
1325
+ * (if no submatches are required).
1309
1326
  *
1310
- * @return [Boolean, RE2::MatchData]
1327
+ * The number of submatches has a significant impact on performance: requesting
1328
+ * one submatch is much faster than requesting more than one and requesting
1329
+ * zero submatches is faster still.
1311
1330
  *
1312
1331
  * @overload match(text)
1313
- * Returns an {RE2::MatchData} containing the matching pattern and all
1314
- * subpatterns resulting from looking for the regexp in +text+ if the pattern
1332
+ * Returns a {RE2::MatchData} containing the matching pattern and all
1333
+ * submatches resulting from looking for the regexp in `text` if the pattern
1315
1334
  * contains capturing groups.
1316
1335
  *
1317
- * Returns either true or false indicating whether a successful match was
1336
+ * Returns either `true` or `false` indicating whether a successful match was
1318
1337
  * made if the pattern contains no capturing groups.
1319
1338
  *
1320
1339
  * @param [String] text the text to search
1321
- * @return [RE2::MatchData] if the pattern contains capturing groups
1340
+ * @return [RE2::MatchData, nil] if the pattern contains capturing groups
1322
1341
  * @return [Boolean] if the pattern does not contain capturing groups
1323
- * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1342
+ * @raise [NoMemoryError] if there was not enough memory to allocate the submatches
1343
+ * @raise [TypeError] if given text that cannot be coerced to a `String`
1324
1344
  * @example Matching with capturing groups
1325
1345
  * r = RE2::Regexp.new('w(o)(o)')
1326
- * r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
1346
+ * r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
1327
1347
  * @example Matching without capturing groups
1328
1348
  * r = RE2::Regexp.new('woo')
1329
- * r.match('woo') #=> true
1349
+ * r.match('woo') #=> true
1330
1350
  *
1331
- * @overload match(text, 0)
1332
- * Returns either true or false indicating whether a
1333
- * successful match was made.
1351
+ * @overload match(text, options)
1352
+ * See `match(text)` but with customisable offsets for starting and ending
1353
+ * matches, optional anchoring to the start or both ends of the text and a
1354
+ * specific number of submatches to extract (padded with `nil`s if
1355
+ * necessary).
1334
1356
  *
1335
1357
  * @param [String] text the text to search
1336
- * @return [Boolean] whether the match was successful
1358
+ * @param [Hash] options the options with which to perform the match
1359
+ * @option options [Integer] :startpos (0) offset at which to start matching
1360
+ * @option options [Integer] :endpos offset at which to stop matching, defaults to the text length
1361
+ * @option options [Symbol] :anchor (:unanchored) one of :unanchored, :anchor_start, :anchor_both to anchor the match
1362
+ * @option options [Integer] :submatches how many submatches to extract (0 is
1363
+ * fastest), defaults to the number of capturing groups
1364
+ * @return [RE2::MatchData, nil] if extracting any submatches
1365
+ * @return [Boolean] if not extracting any submatches
1366
+ * @raise [ArgumentError] if given a negative number of submatches, invalid
1367
+ * anchor or invalid startpos, endpos pair
1337
1368
  * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1338
- * @example
1369
+ * @raise [TypeError] if given non-String text, non-numeric number of
1370
+ * submatches, non-symbol anchor or non-hash options
1371
+ * @raise [RE2::Regexp::UnsupportedError] if given an endpos argument on a
1372
+ * version of RE2 that does not support it
1373
+ * @example Matching with capturing groups
1339
1374
  * r = RE2::Regexp.new('w(o)(o)')
1340
- * r.match('woo', 0) #=> true
1341
- * r.match('bob', 0) #=> false
1375
+ * r.match('woo', submatches: 1) #=> #<RE2::MatchData "woo" 1:"o">
1376
+ * r.match('woo', submatches: 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
1377
+ * r.match('woot', anchor: :anchor_both, submatches: 0)
1378
+ * #=> false
1379
+ * r.match('woot', anchor: :anchor_start, submatches: 0)
1380
+ * #=> true
1381
+ * @example Matching without capturing groups
1382
+ * r = RE2::Regexp.new('wo+')
1383
+ * r.match('woot', anchor: :anchor_both) #=> false
1384
+ * r.match('woot', anchor: :anchor_start) #=> true
1342
1385
  *
1343
- * @overload match(text, number_of_matches)
1344
- * See +match(text)+ but with a specific number of
1345
- * matches returned (padded with nils if necessary).
1386
+ * @overload match(text, submatches)
1387
+ * @deprecated Legacy syntax for matching against `text` with a specific
1388
+ * number of submatches to extract. Use `match(text, submatches: n)` instead.
1346
1389
  *
1347
1390
  * @param [String] text the text to search
1348
- * @param [Integer] number_of_matches the number of matches to return
1349
- * @return [RE2::MatchData] the matches
1350
- * @raise [ArgumentError] if given a negative number of matches
1351
- * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1391
+ * @param [Integer] submatches the number of submatches to extract
1392
+ * @return [RE2::MatchData, nil] if extracting any submatches
1393
+ * @return [Boolean] if not extracting any submatches
1394
+ * @raise [NoMemoryError] if there was not enough memory to allocate the submatches
1395
+ * @raise [TypeError] if given non-numeric number of submatches
1352
1396
  * @example
1353
1397
  * r = RE2::Regexp.new('w(o)(o)')
1398
+ * r.match('woo', 0) #=> true
1354
1399
  * r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
1355
- * r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
1400
+ * r.match('woo', 2) #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
1356
1401
  */
1357
1402
  static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1358
1403
  re2_pattern *p;
1359
1404
  re2_matchdata *m;
1360
- VALUE text, number_of_matches;
1405
+ VALUE text, options;
1361
1406
 
1362
- rb_scan_args(argc, argv, "11", &text, &number_of_matches);
1407
+ rb_scan_args(argc, argv, "11", &text, &options);
1363
1408
 
1364
1409
  /* Ensure text is a string. */
1365
1410
  StringValue(text);
@@ -1367,12 +1412,80 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1367
1412
  TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1368
1413
 
1369
1414
  int n;
1415
+ int startpos = 0;
1416
+ int endpos = RSTRING_LEN(text);
1417
+ RE2::Anchor anchor = RE2::UNANCHORED;
1370
1418
 
1371
- if (RTEST(number_of_matches)) {
1372
- n = NUM2INT(number_of_matches);
1419
+ if (RTEST(options)) {
1420
+ if (FIXNUM_P(options)) {
1421
+ n = NUM2INT(options);
1422
+
1423
+ if (n < 0) {
1424
+ rb_raise(rb_eArgError, "number of matches should be >= 0");
1425
+ }
1426
+ } else {
1427
+ if (TYPE(options) != T_HASH) {
1428
+ options = rb_Hash(options);
1429
+ }
1430
+
1431
+ VALUE endpos_option = rb_hash_aref(options, ID2SYM(id_endpos));
1432
+ if (!NIL_P(endpos_option)) {
1433
+ #ifdef HAVE_ENDPOS_ARGUMENT
1434
+ Check_Type(endpos_option, T_FIXNUM);
1435
+
1436
+ endpos = NUM2INT(endpos_option);
1437
+
1438
+ if (endpos < 0) {
1439
+ rb_raise(rb_eArgError, "endpos should be >= 0");
1440
+ }
1441
+ #else
1442
+ rb_raise(re2_eRegexpUnsupportedError, "current version of RE2::Match() does not support endpos argument");
1443
+ #endif
1444
+ }
1445
+
1446
+ VALUE anchor_option = rb_hash_aref(options, ID2SYM(id_anchor));
1447
+ if (!NIL_P(anchor_option)) {
1448
+ Check_Type(anchor_option, T_SYMBOL);
1449
+
1450
+ ID id_anchor_option = SYM2ID(anchor_option);
1451
+ if (id_anchor_option == id_unanchored) {
1452
+ anchor = RE2::UNANCHORED;
1453
+ } else if (id_anchor_option == id_anchor_start) {
1454
+ anchor = RE2::ANCHOR_START;
1455
+ } else if (id_anchor_option == id_anchor_both) {
1456
+ anchor = RE2::ANCHOR_BOTH;
1457
+ } else {
1458
+ rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
1459
+ }
1460
+ }
1461
+
1462
+ VALUE submatches_option = rb_hash_aref(options, ID2SYM(id_submatches));
1463
+ if (!NIL_P(submatches_option)) {
1464
+ Check_Type(submatches_option, T_FIXNUM);
1465
+
1466
+ n = NUM2INT(submatches_option);
1467
+
1468
+ if (n < 0) {
1469
+ rb_raise(rb_eArgError, "number of matches should be >= 0");
1470
+ }
1471
+ } else {
1472
+ if (!p->pattern->ok()) {
1473
+ return Qnil;
1474
+ }
1373
1475
 
1374
- if (n < 0) {
1375
- rb_raise(rb_eArgError, "number of matches should be >= 0");
1476
+ n = p->pattern->NumberOfCapturingGroups();
1477
+ }
1478
+
1479
+ VALUE startpos_option = rb_hash_aref(options, ID2SYM(id_startpos));
1480
+ if (!NIL_P(startpos_option)) {
1481
+ Check_Type(startpos_option, T_FIXNUM);
1482
+
1483
+ startpos = NUM2INT(startpos_option);
1484
+
1485
+ if (startpos < 0) {
1486
+ rb_raise(rb_eArgError, "startpos should be >= 0");
1487
+ }
1488
+ }
1376
1489
  }
1377
1490
  } else {
1378
1491
  if (!p->pattern->ok()) {
@@ -1382,12 +1495,16 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1382
1495
  n = p->pattern->NumberOfCapturingGroups();
1383
1496
  }
1384
1497
 
1498
+ if (startpos > endpos) {
1499
+ rb_raise(rb_eArgError, "startpos should be <= endpos");
1500
+ }
1501
+
1385
1502
  if (n == 0) {
1386
1503
  #ifdef HAVE_ENDPOS_ARGUMENT
1387
- bool matched = p->pattern->Match(RSTRING_PTR(text), 0,
1388
- RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
1504
+ bool matched = p->pattern->Match(RSTRING_PTR(text), startpos,
1505
+ endpos, anchor, 0, 0);
1389
1506
  #else
1390
- bool matched = p->pattern->Match(RSTRING_PTR(text), 0, RE2::UNANCHORED,
1507
+ bool matched = p->pattern->Match(RSTRING_PTR(text), startpos, anchor,
1391
1508
  0, 0);
1392
1509
  #endif
1393
1510
  return BOOL2RUBY(matched);
@@ -1412,11 +1529,11 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1412
1529
  m->number_of_matches = n;
1413
1530
 
1414
1531
  #ifdef HAVE_ENDPOS_ARGUMENT
1415
- bool matched = p->pattern->Match(RSTRING_PTR(m->text), 0,
1416
- RSTRING_LEN(m->text), RE2::UNANCHORED, m->matches, n);
1532
+ bool matched = p->pattern->Match(RSTRING_PTR(m->text), startpos,
1533
+ endpos, anchor, m->matches, n);
1417
1534
  #else
1418
- bool matched = p->pattern->Match(RSTRING_PTR(m->text), 0,
1419
- RE2::UNANCHORED, m->matches, n);
1535
+ bool matched = p->pattern->Match(RSTRING_PTR(m->text), startpos,
1536
+ anchor, m->matches, n);
1420
1537
  #endif
1421
1538
  if (matched) {
1422
1539
  return matchdata;
@@ -1427,22 +1544,54 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1427
1544
  }
1428
1545
 
1429
1546
  /*
1430
- * Returns true or false to indicate a successful match.
1431
- * Equivalent to +re2.match(text, 0)+.
1547
+ * Returns true if the pattern matches any substring of the given text using
1548
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L413-L427
1549
+ * `PartialMatch`}.
1432
1550
  *
1433
1551
  * @return [Boolean] whether the match was successful
1552
+ * @raise [TypeError] if text cannot be coerced to a `String`
1434
1553
  */
1435
1554
  static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
1436
- VALUE argv[2] = { text, INT2FIX(0) };
1555
+ re2_pattern *p;
1556
+
1557
+ /* Ensure text is a string. */
1558
+ StringValue(text);
1559
+
1560
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1561
+
1562
+ return BOOL2RUBY(RE2::PartialMatch(RSTRING_PTR(text), *p->pattern));
1563
+ }
1564
+
1565
+ /*
1566
+ * Returns true if the pattern matches the given text using
1567
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L376-L411
1568
+ * `FullMatch`}.
1569
+ *
1570
+ * @return [Boolean] whether the match was successful
1571
+ * @raise [TypeError] if text cannot be coerced to a `String`
1572
+ */
1573
+ static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
1574
+ re2_pattern *p;
1575
+
1576
+ /* Ensure text is a string. */
1577
+ StringValue(text);
1437
1578
 
1438
- return re2_regexp_match(2, argv, self);
1579
+ TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
1580
+
1581
+ return BOOL2RUBY(RE2::FullMatch(RSTRING_PTR(text), *p->pattern));
1439
1582
  }
1440
1583
 
1441
1584
  /*
1442
- * Returns a {RE2::Scanner} for scanning the given text incrementally.
1585
+ * Returns a {RE2::Scanner} for scanning the given text incrementally with
1586
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
1587
+ * `FindAndConsume`}.
1443
1588
  *
1589
+ * @param [text] text the text to scan incrementally
1590
+ * @return [RE2::Scanner] an `Enumerable` {RE2::Scanner} object
1591
+ * @raises [TypeError] if `text` cannot be coerced to a `String`
1444
1592
  * @example
1445
1593
  * c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
1594
+ * #=> #<RE2::Scanner:0x0000000000000001>
1446
1595
  */
1447
1596
  static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
1448
1597
  /* Ensure text is a string. */
@@ -1471,17 +1620,40 @@ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
1471
1620
  }
1472
1621
 
1473
1622
  /*
1474
- * Returns a copy of +str+ with the first occurrence +pattern+
1475
- * replaced with +rewrite+.
1623
+ * Returns whether the underlying RE2 version supports passing an `endpos`
1624
+ * argument to
1625
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
1626
+ * Match}. If not, {RE2::Regexp#match} will raise an error if attempting to
1627
+ * pass an `endpos`.
1628
+ *
1629
+ * @return [Boolean] whether the underlying
1630
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
1631
+ * Match} has an endpos argument
1632
+ */
1633
+ static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
1634
+ #ifdef HAVE_ENDPOS_ARGUMENT
1635
+ return Qtrue;
1636
+ #else
1637
+ return Qfalse;
1638
+ #endif
1639
+ }
1640
+
1641
+ /*
1642
+ * Returns a copy of `str` with the first occurrence `pattern` replaced with
1643
+ * `rewrite` using
1644
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L465-L480
1645
+ * `Replace`}.
1476
1646
  *
1477
1647
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
1478
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
1479
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1648
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
1649
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
1480
1650
  *
1481
1651
  * @param [String] str the string to modify
1482
1652
  * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1483
1653
  * @param [String] rewrite the string to replace with
1484
1654
  * @return [String] the resulting string
1655
+ * @raises [TypeError] if the given rewrite or pattern (if not provided as a
1656
+ * {RE2::Regexp}) cannot be coerced to `String`s
1485
1657
  * @example
1486
1658
  * RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
1487
1659
  * re2 = RE2::Regexp.new("hel+o")
@@ -1517,20 +1689,24 @@ static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
1517
1689
  }
1518
1690
 
1519
1691
  /*
1520
- * Return a copy of +str+ with +pattern+ replaced by +rewrite+.
1692
+ * Return a copy of `str` with `pattern` replaced by `rewrite` using
1693
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L482-L497
1694
+ * `GlobalReplace`}.
1521
1695
  *
1522
1696
  * Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
1523
- * returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
1524
- * RE2::Regexp is set to false (any other encoding's behaviour is undefined).
1697
+ * returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
1698
+ * {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
1525
1699
  *
1526
1700
  * @param [String] str the string to modify
1527
1701
  * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1528
1702
  * @param [String] rewrite the string to replace with
1703
+ * @raises [TypeError] if the given rewrite or pattern (if not provided as a
1704
+ * {RE2::Regexp}) cannot be coerced to `String`s
1529
1705
  * @return [String] the resulting string
1530
1706
  * @example
1531
1707
  * re2 = RE2::Regexp.new("oo?")
1532
- * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
1533
- * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
1708
+ * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
1709
+ * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
1534
1710
  */
1535
1711
  static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
1536
1712
  VALUE rewrite) {
@@ -1562,14 +1738,17 @@ static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
1562
1738
  }
1563
1739
 
1564
1740
  /*
1565
- * Returns a version of str with all potentially meaningful regexp
1566
- * characters escaped. The returned string, used as a regular
1567
- * expression, will exactly match the original string.
1741
+ * Returns a version of `str` with all potentially meaningful regexp characters
1742
+ * escaped using
1743
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L512-L518
1744
+ * `QuoteMeta`}. The returned string, used as a regular expression, will
1745
+ * exactly match the original string.
1568
1746
  *
1569
1747
  * @param [String] unquoted the unquoted string
1748
+ * @raises [TypeError] if the given unquoted string cannot be coerced to a `String`
1570
1749
  * @return [String] the escaped string
1571
1750
  * @example
1572
- * RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
1751
+ * RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
1573
1752
  */
1574
1753
  static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
1575
1754
  StringValue(unquoted);
@@ -1598,15 +1777,17 @@ static size_t re2_set_memsize(const void *ptr) {
1598
1777
  }
1599
1778
 
1600
1779
  static const rb_data_type_t re2_set_data_type = {
1601
- .wrap_struct_name = "RE2::Set",
1602
- .function = {
1603
- .dmark = NULL,
1604
- .dfree = re2_set_free,
1605
- .dsize = re2_set_memsize,
1780
+ "RE2::Set",
1781
+ {
1782
+ 0,
1783
+ re2_set_free,
1784
+ re2_set_memsize,
1606
1785
  },
1786
+ 0,
1787
+ 0,
1607
1788
  // IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
1608
1789
  // macro to update VALUE references, as to trigger write barriers.
1609
- .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
1790
+ RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
1610
1791
  };
1611
1792
 
1612
1793
  static VALUE re2_set_allocate(VALUE klass) {
@@ -1633,14 +1814,14 @@ static VALUE re2_set_allocate(VALUE klass) {
1633
1814
  * Returns a new {RE2::Set} object for the specified anchor with the default
1634
1815
  * options.
1635
1816
  *
1636
- * @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
1637
- * @raise [ArgumentError] if anchor is not :unanchored, :anchor_start or :anchor_both
1817
+ * @param [Symbol] anchor one of `:unanchored`, `:anchor_start`, `:anchor_both`
1818
+ * @raise [ArgumentError] if anchor is not `:unanchored`, `:anchor_start` or `:anchor_both`
1638
1819
  * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1639
1820
  *
1640
1821
  * @overload initialize(anchor, options)
1641
1822
  * Returns a new {RE2::Set} object with the specified options.
1642
1823
  *
1643
- * @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
1824
+ * @param [Symbol] anchor one of `:unanchored`, `:anchor_start`, `:anchor_both`
1644
1825
  * @param [Hash] options the options with which to compile the pattern
1645
1826
  * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
1646
1827
  * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
@@ -1648,13 +1829,13 @@ static VALUE re2_set_allocate(VALUE klass) {
1648
1829
  * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
1649
1830
  * @option options [Integer] :max_mem approx. max memory footprint of RE2
1650
1831
  * @option options [Boolean] :literal (false) interpret string as literal, not regexp
1651
- * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
1652
- * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
1653
- * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
1654
- * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
1655
- * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
1656
- * @return [RE2::Set] an RE2::Set with the specified anchor and options
1657
- * @raise [ArgumentError] if anchor is not one of the accepted choices
1832
+ * @option options [Boolean] :never_nl (false) never match `\n`, even if it is in regexp
1833
+ * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with `(?i)` unless in `posix_syntax` mode)
1834
+ * @option options [Boolean] :perl_classes (false) allow Perl's `\d` `\s` `\w` `\D` `\S` `\W` when in `posix_syntax` mode
1835
+ * @option options [Boolean] :word_boundary (false) allow `\b` `\B` (word boundary and not) when in `posix_syntax` mode
1836
+ * @option options [Boolean] :one_line (false) `^` and `$` only match beginning and end of text when in `posix_syntax` mode
1837
+ * @return [RE2::Set] a {RE2::Set} with the specified anchor and options
1838
+ * @raise [ArgumentError] if `anchor` is not one of the accepted choices
1658
1839
  * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1659
1840
  */
1660
1841
  static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
@@ -1668,12 +1849,12 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1668
1849
 
1669
1850
  if (!NIL_P(anchor)) {
1670
1851
  Check_Type(anchor, T_SYMBOL);
1671
- ID id_anchor = SYM2ID(anchor);
1672
- if (id_anchor == id_unanchored) {
1852
+ ID id_anchor_arg = SYM2ID(anchor);
1853
+ if (id_anchor_arg == id_unanchored) {
1673
1854
  re2_anchor = RE2::UNANCHORED;
1674
- } else if (id_anchor == id_anchor_start) {
1855
+ } else if (id_anchor_arg == id_anchor_start) {
1675
1856
  re2_anchor = RE2::ANCHOR_START;
1676
- } else if (id_anchor == id_anchor_both) {
1857
+ } else if (id_anchor_arg == id_anchor_both) {
1677
1858
  re2_anchor = RE2::ANCHOR_BOTH;
1678
1859
  } else {
1679
1860
  rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
@@ -1696,15 +1877,16 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1696
1877
 
1697
1878
  /*
1698
1879
  * Adds a pattern to the set. Returns the index that will identify the pattern
1699
- * in the output of #match. Cannot be called after #compile has been called.
1880
+ * in the output of {RE2::Set#match}. Cannot be called after {RE2::Set#compile}
1881
+ * has been called.
1700
1882
  *
1701
1883
  * @param [String] pattern the regex pattern
1702
1884
  * @return [Integer] the index of the pattern in the set
1703
1885
  * @raise [ArgumentError] if called after compile or the pattern is rejected
1704
1886
  * @example
1705
1887
  * set = RE2::Set.new
1706
- * set.add("abc") #=> 0
1707
- * set.add("def") #=> 1
1888
+ * set.add("abc") #=> 0
1889
+ * set.add("def") #=> 1
1708
1890
  */
1709
1891
  static VALUE re2_set_add(VALUE self, VALUE pattern) {
1710
1892
  StringValue(pattern);
@@ -1732,14 +1914,14 @@ static VALUE re2_set_add(VALUE self, VALUE pattern) {
1732
1914
  }
1733
1915
 
1734
1916
  /*
1735
- * Compiles a Set so it can be used to match against. Must be called after #add
1736
- * and before #match.
1917
+ * Compiles a {RE2::Set} so it can be used to match against. Must be called
1918
+ * after {RE2::Set#add} and before {RE2::Set#match}.
1737
1919
  *
1738
- * @return [Bool] whether compilation was a success
1920
+ * @return [Boolean] whether compilation was a success
1739
1921
  * @example
1740
1922
  * set = RE2::Set.new
1741
1923
  * set.add("abc")
1742
- * set.compile # => true
1924
+ * set.compile #=> true
1743
1925
  */
1744
1926
  static VALUE re2_set_compile(VALUE self) {
1745
1927
  re2_set *s;
@@ -1749,11 +1931,12 @@ static VALUE re2_set_compile(VALUE self) {
1749
1931
  }
1750
1932
 
1751
1933
  /*
1752
- * Returns whether the underlying re2 version outputs error information from
1753
- * RE2::Set::Match. If not, #match will raise an error if attempting to set its
1754
- * :exception option to true.
1934
+ * Returns whether the underlying RE2 version outputs error information from
1935
+ * {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/set.h#L62-L65
1936
+ * `RE2::Set::Match`}. If not, {RE2::Set#match} will raise an error if attempting to set
1937
+ * its `:exception` option to `true`.
1755
1938
  *
1756
- * @return [Bool] whether the underlying re2 outputs error information from Set matches
1939
+ * @return [Boolean] whether the underlying RE2 outputs error information from {RE2::Set} matches
1757
1940
  */
1758
1941
  static VALUE re2_set_match_raises_errors_p(VALUE) {
1759
1942
  #ifdef HAVE_ERROR_INFO_ARGUMENT
@@ -1777,31 +1960,31 @@ static VALUE re2_set_match_raises_errors_p(VALUE) {
1777
1960
  * @param [String] str the text to match against
1778
1961
  * @return [Array<Integer>] the indices of matching regexps
1779
1962
  * @raise [MatchError] if an error occurs while matching
1780
- * @raise [UnsupportedError] if the underlying version of re2 does not output error information
1963
+ * @raise [UnsupportedError] if the underlying version of RE2 does not output error information
1781
1964
  * @example
1782
1965
  * set = RE2::Set.new
1783
1966
  * set.add("abc")
1784
1967
  * set.add("def")
1785
1968
  * set.compile
1786
- * set.match("abcdef") # => [0, 1]
1969
+ * set.match("abcdef") #=> [0, 1]
1787
1970
  *
1788
1971
  * @overload match(str, options)
1789
1972
  * Returns an array of integer indices of patterns matching the given string
1790
1973
  * (if any). Raises exceptions if there are any errors while matching and the
1791
- * :exception option is set to true.
1974
+ * `:exception` option is set to true.
1792
1975
  *
1793
1976
  * @param [String] str the text to match against
1794
1977
  * @param [Hash] options the options with which to match
1795
- * @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
1978
+ * @option options [Boolean] :exception (true) whether to raise exceptions with RE2's error information (not supported on ABI version 0 of RE2)
1796
1979
  * @return [Array<Integer>] the indices of matching regexps
1797
1980
  * @raise [MatchError] if an error occurs while matching
1798
- * @raise [UnsupportedError] if the underlying version of re2 does not output error information
1981
+ * @raise [UnsupportedError] if the underlying version of RE2 does not output error information
1799
1982
  * @example
1800
1983
  * set = RE2::Set.new
1801
1984
  * set.add("abc")
1802
1985
  * set.add("def")
1803
1986
  * set.compile
1804
- * set.match("abcdef", :exception => true) # => [0, 1]
1987
+ * set.match("abcdef", exception: true) #=> [0, 1]
1805
1988
  */
1806
1989
  static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
1807
1990
  VALUE str, options;
@@ -1869,6 +2052,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
1869
2052
  extern "C" void Init_re2(void) {
1870
2053
  re2_mRE2 = rb_define_module("RE2");
1871
2054
  re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
2055
+ re2_eRegexpUnsupportedError = rb_define_class_under(re2_cRegexp,
2056
+ "UnsupportedError", rb_const_get(rb_cObject, rb_intern("StandardError")));
1872
2057
  re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
1873
2058
  re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
1874
2059
  re2_cSet = rb_define_class_under(re2_mRE2, "Set", rb_cObject);
@@ -1922,6 +2107,8 @@ extern "C" void Init_re2(void) {
1922
2107
  rb_define_method(re2_cScanner, "rewind",
1923
2108
  RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
1924
2109
 
2110
+ rb_define_singleton_method(re2_cRegexp, "match_has_endpos_argument?",
2111
+ RUBY_METHOD_FUNC(re2_regexp_match_has_endpos_argument_p), 0);
1925
2112
  rb_define_method(re2_cRegexp, "initialize",
1926
2113
  RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
1927
2114
  rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
@@ -1939,12 +2126,14 @@ extern "C" void Init_re2(void) {
1939
2126
  RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
1940
2127
  rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
1941
2128
  -1);
1942
- rb_define_method(re2_cRegexp, "match?",
1943
- RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1944
- rb_define_method(re2_cRegexp, "=~",
1945
- RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1946
- rb_define_method(re2_cRegexp, "===",
2129
+ rb_define_method(re2_cRegexp, "match?", RUBY_METHOD_FUNC(re2_regexp_match_p),
2130
+ 1);
2131
+ rb_define_method(re2_cRegexp, "partial_match?",
1947
2132
  RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
2133
+ rb_define_method(re2_cRegexp, "=~", RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
2134
+ rb_define_method(re2_cRegexp, "===", RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
2135
+ rb_define_method(re2_cRegexp, "full_match?",
2136
+ RUBY_METHOD_FUNC(re2_regexp_full_match_p), 1);
1948
2137
  rb_define_method(re2_cRegexp, "scan",
1949
2138
  RUBY_METHOD_FUNC(re2_regexp_scan), 1);
1950
2139
  rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
@@ -2001,6 +2190,8 @@ extern "C" void Init_re2(void) {
2001
2190
  RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
2002
2191
  rb_define_singleton_method(re2_cRegexp, "quote",
2003
2192
  RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
2193
+
2194
+ // (see RE2::Regexp#initialize)
2004
2195
  rb_define_singleton_method(re2_cRegexp, "compile",
2005
2196
  RUBY_METHOD_FUNC(rb_class_new_instance), -1);
2006
2197
 
@@ -2019,7 +2210,11 @@ extern "C" void Init_re2(void) {
2019
2210
  id_word_boundary = rb_intern("word_boundary");
2020
2211
  id_one_line = rb_intern("one_line");
2021
2212
  id_unanchored = rb_intern("unanchored");
2213
+ id_anchor = rb_intern("anchor");
2022
2214
  id_anchor_start = rb_intern("anchor_start");
2023
2215
  id_anchor_both = rb_intern("anchor_both");
2024
2216
  id_exception = rb_intern("exception");
2217
+ id_submatches = rb_intern("submatches");
2218
+ id_startpos = rb_intern("startpos");
2219
+ id_endpos = rb_intern("endpos");
2025
2220
  }