re2 2.24.0 → 2.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/re2/string.rb CHANGED
@@ -13,14 +13,14 @@ require "re2"
13
13
  module RE2
14
14
  # @deprecated Use methods on {RE2} and {RE2::Regexp} instead.
15
15
  module String
16
- # @deprecated Use {RE2.Replace} instead.
16
+ # @deprecated Use {RE2.replace} instead.
17
17
  def re2_sub(*args)
18
- RE2.Replace(self, *args)
18
+ RE2.replace(self, *args)
19
19
  end
20
20
 
21
- # @deprecated Use {RE2.GlobalReplace} instead.
21
+ # @deprecated Use {RE2.global_replace} instead.
22
22
  def re2_gsub(*args)
23
- RE2.GlobalReplace(self, *args)
23
+ RE2.global_replace(self, *args)
24
24
  end
25
25
 
26
26
  # @deprecated Use {RE2::Regexp#match} instead.
@@ -28,9 +28,9 @@ module RE2
28
28
  RE2::Regexp.new(pattern).match(self, *args)
29
29
  end
30
30
 
31
- # @deprecated Use {RE2.QuoteMeta} instead.
31
+ # @deprecated Use {RE2.escape} instead.
32
32
  def re2_escape
33
- RE2.QuoteMeta(self)
33
+ RE2.escape(self)
34
34
  end
35
35
 
36
36
  alias_method :re2_quote, :re2_escape
data/lib/re2/version.rb CHANGED
@@ -10,5 +10,5 @@
10
10
 
11
11
 
12
12
  module RE2
13
- VERSION = "2.24.0"
13
+ VERSION = "2.26.0"
14
14
  end
@@ -66,10 +66,16 @@ RSpec.describe RE2::MatchData do
66
66
  expect(a).to eq(["woo", "o", "o"])
67
67
  end
68
68
 
69
- it "populates optional capturing groups with nil if they are missing" do
69
+ it "populates optional capturing groups with empty strings if they match zero characters" do
70
70
  a = RE2::Regexp.new('(\d?)(a)(b)').match('ab').to_a
71
71
 
72
- expect(a).to eq(["ab", nil, "a", "b"])
72
+ expect(a).to eq(["ab", "", "a", "b"])
73
+ end
74
+
75
+ it "distinguishes between zero-length matches and unmatched groups" do
76
+ a = RE2::Regexp.new('()(a)?').match('b').to_a
77
+
78
+ expect(a).to eq(["", "", nil])
73
79
  end
74
80
 
75
81
  it "returns UTF-8 strings if the pattern is UTF-8" do
@@ -158,6 +164,12 @@ RSpec.describe RE2::MatchData do
158
164
  expect(md[:numbers]).to eq("123")
159
165
  end
160
166
 
167
+ it "returns an empty string for a zero-length capturing group" do
168
+ md = RE2::Regexp.new('()').match("bob")
169
+
170
+ expect(md[1]).to eq("")
171
+ end
172
+
161
173
  it "returns nil if no such named group exists", :aggregate_failures do
162
174
  md = RE2::Regexp.new('(\d+)').match("bob 123")
163
175
 
@@ -281,6 +293,12 @@ RSpec.describe RE2::MatchData do
281
293
  expect(md.inspect).to eq('#<RE2::MatchData "1234 " 1:"1234" 2:nil>')
282
294
  end
283
295
 
296
+ it "represents zero-length capturing groups as empty strings" do
297
+ md = RE2::Regexp.new('()').match("bob")
298
+
299
+ expect(md.inspect).to eq('#<RE2::MatchData "" 1:"">')
300
+ end
301
+
284
302
  it "supports matches with null bytes" do
285
303
  md = RE2::Regexp.new("(\\w\0\\w) (\\w\0\\w)").match("a\0b c\0d")
286
304
 
@@ -299,6 +317,12 @@ RSpec.describe RE2::MatchData do
299
317
  expect(md.to_s).to eq("23456")
300
318
  end
301
319
 
320
+ it "returns an empty string for a zero-length match" do
321
+ md = RE2::Regexp.new('()').match("bob")
322
+
323
+ expect(md.to_s).to eq("")
324
+ end
325
+
302
326
  it "raises an error when called on an uninitialized object" do
303
327
  expect { described_class.allocate.to_s }.to raise_error(TypeError, /uninitialized RE2::MatchData/)
304
328
  end
@@ -346,6 +370,12 @@ RSpec.describe RE2::MatchData do
346
370
  expect(md.string[md.begin(0)..-1]).to eq('Ruby')
347
371
  end
348
372
 
373
+ it "returns the offset for a zero-length capturing group" do
374
+ md = RE2::Regexp.new('()').match("bob")
375
+
376
+ expect(md.begin(1)).to eq(0)
377
+ end
378
+
349
379
  it "returns nil for non-existent numerical matches" do
350
380
  md = RE2::Regexp.new('(\d)').match('123')
351
381
 
@@ -419,6 +449,12 @@ RSpec.describe RE2::MatchData do
419
449
  expect(md.string[0...md.end(0)]).to eq('I ♥ Ruby')
420
450
  end
421
451
 
452
+ it "returns the offset for a zero-length capturing group" do
453
+ md = RE2::Regexp.new('()').match("bob")
454
+
455
+ expect(md.end(1)).to eq(0)
456
+ end
457
+
422
458
  it "returns nil for non-existent numerical matches" do
423
459
  md = RE2::Regexp.new('(\d)').match('123')
424
460
 
@@ -461,6 +497,264 @@ RSpec.describe RE2::MatchData do
461
497
  end
462
498
  end
463
499
 
500
+ describe "#pre_match" do
501
+ it "returns the portion of the string before the match" do
502
+ md = RE2::Regexp.new('(\d+)').match("bob 123 456")
503
+
504
+ expect(md.pre_match).to eq("bob ")
505
+ end
506
+
507
+ it "returns an empty string when the match starts at the beginning" do
508
+ md = RE2::Regexp.new('(\w+)').match("bob 123")
509
+
510
+ expect(md.pre_match).to eq("")
511
+ end
512
+
513
+ it "supports multibyte characters" do
514
+ md = RE2::Regexp.new('(\d+)').match("I ♥ 123")
515
+
516
+ expect(md.pre_match).to eq("I ♥ ")
517
+ end
518
+
519
+ it "returns UTF-8 strings by default" do
520
+ md = RE2::Regexp.new('(\d+)').match("abc 123")
521
+
522
+ expect(md.pre_match.encoding).to eq(Encoding::UTF_8)
523
+ end
524
+
525
+ it "returns ISO-8859-1 strings if the pattern is not UTF-8" do
526
+ md = RE2::Regexp.new('(\d+)', utf8: false).match("abc 123")
527
+
528
+ expect(md.pre_match.encoding).to eq(Encoding::ISO_8859_1)
529
+ end
530
+
531
+ it "returns the text before a zero-length match" do
532
+ md = RE2::Regexp.new('()').match("bob")
533
+
534
+ expect(md.pre_match).to eq("")
535
+ end
536
+
537
+ it "raises an error when called on an uninitialized object" do
538
+ expect { described_class.allocate.pre_match }.to raise_error(TypeError, /uninitialized RE2::MatchData/)
539
+ end
540
+ end
541
+
542
+ describe "#post_match" do
543
+ it "returns the portion of the string after the match" do
544
+ md = RE2::Regexp.new('(\d+)').match("bob 123 456")
545
+
546
+ expect(md.post_match).to eq(" 456")
547
+ end
548
+
549
+ it "returns an empty string when the match ends at the end" do
550
+ md = RE2::Regexp.new('(\d+)$').match("bob 123")
551
+
552
+ expect(md.post_match).to eq("")
553
+ end
554
+
555
+ it "supports multibyte characters" do
556
+ md = RE2::Regexp.new('(\d+)').match("123 ♥ world")
557
+
558
+ expect(md.post_match).to eq(" ♥ world")
559
+ end
560
+
561
+ it "returns UTF-8 strings by default" do
562
+ md = RE2::Regexp.new('(\d+)').match("abc 123 def")
563
+
564
+ expect(md.post_match.encoding).to eq(Encoding::UTF_8)
565
+ end
566
+
567
+ it "returns ISO-8859-1 strings if the pattern is not UTF-8" do
568
+ md = RE2::Regexp.new('(\d+)', utf8: false).match("abc 123 def")
569
+
570
+ expect(md.post_match.encoding).to eq(Encoding::ISO_8859_1)
571
+ end
572
+
573
+ it "returns the text after a zero-length match" do
574
+ md = RE2::Regexp.new('()').match("bob")
575
+
576
+ expect(md.post_match).to eq("bob")
577
+ end
578
+
579
+ it "raises an error when called on an uninitialized object" do
580
+ expect { described_class.allocate.post_match }.to raise_error(TypeError, /uninitialized RE2::MatchData/)
581
+ end
582
+ end
583
+
584
+ describe "#offset" do
585
+ it "returns the offset of a match by index" do
586
+ md = RE2::Regexp.new('ob (\d+)').match("bob 123")
587
+
588
+ expect(md.offset(0)).to eq([1, 7])
589
+ end
590
+
591
+ it "returns the offset of a submatch by index" do
592
+ md = RE2::Regexp.new('ob (\d+)').match("bob 123")
593
+
594
+ expect(md.offset(1)).to eq([4, 7])
595
+ end
596
+
597
+ it "returns the offset of a match by string name" do
598
+ md = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
599
+
600
+ expect(md.offset("number")).to eq([4, 7])
601
+ end
602
+
603
+ it "returns the offset of a match by symbol name" do
604
+ md = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
605
+
606
+ expect(md.offset(:number)).to eq([4, 7])
607
+ end
608
+
609
+ it "returns character offsets despite multibyte characters" do
610
+ md = RE2::Regexp.new('(Ruby)').match("I ♥ Ruby")
611
+
612
+ expect(md.offset(0)).to eq([4, 8])
613
+ end
614
+
615
+ it "returns identical offsets for a zero-length capturing group" do
616
+ md = RE2::Regexp.new('()').match("bob")
617
+
618
+ expect(md.offset(1)).to eq([0, 0])
619
+ end
620
+
621
+ it "returns nil for non-existent numerical matches" do
622
+ md = RE2::Regexp.new('(\d)').match("123")
623
+
624
+ expect(md.offset(10)).to be_nil
625
+ end
626
+
627
+ it "returns nil for non-existent named matches" do
628
+ md = RE2::Regexp.new('(\d)').match("123")
629
+
630
+ expect(md.offset("foo")).to be_nil
631
+ end
632
+
633
+ it "raises a type error if given an invalid name or number" do
634
+ md = RE2::Regexp.new('(\d)').match("123")
635
+
636
+ expect { md.offset(nil) }.to raise_error(TypeError)
637
+ end
638
+
639
+ it "raises an error when called on an uninitialized object" do
640
+ expect { described_class.allocate.offset(0) }.to raise_error(TypeError, /uninitialized RE2::MatchData/)
641
+ end
642
+ end
643
+
644
+ describe "#match_length" do
645
+ it "returns the length of the overall match" do
646
+ md = RE2::Regexp.new('ob (\d+)').match("bob 123")
647
+
648
+ expect(md.match_length(0)).to eq(6)
649
+ end
650
+
651
+ it "returns the length of a submatch by index" do
652
+ md = RE2::Regexp.new('ob (\d+)').match("bob 123")
653
+
654
+ expect(md.match_length(1)).to eq(3)
655
+ end
656
+
657
+ it "returns the length of a match by string name" do
658
+ md = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
659
+
660
+ expect(md.match_length("number")).to eq(3)
661
+ end
662
+
663
+ it "returns the length of a match by symbol name" do
664
+ md = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
665
+
666
+ expect(md.match_length(:number)).to eq(3)
667
+ end
668
+
669
+ it "returns character length despite multibyte characters" do
670
+ md = RE2::Regexp.new('(♥ Ruby)').match("I ♥ Ruby!")
671
+
672
+ expect(md.match_length(0)).to eq(6)
673
+ end
674
+
675
+ it "returns zero for a zero-length capturing group" do
676
+ md = RE2::Regexp.new('()').match("bob")
677
+
678
+ expect(md.match_length(1)).to eq(0)
679
+ end
680
+
681
+ it "returns nil for non-existent numerical matches" do
682
+ md = RE2::Regexp.new('(\d)').match("123")
683
+
684
+ expect(md.match_length(10)).to be_nil
685
+ end
686
+
687
+ it "returns nil for non-existent named matches" do
688
+ md = RE2::Regexp.new('(\d)').match("123")
689
+
690
+ expect(md.match_length("foo")).to be_nil
691
+ end
692
+
693
+ it "raises a type error if given an invalid name or number" do
694
+ md = RE2::Regexp.new('(\d)').match("123")
695
+
696
+ expect { md.match_length(nil) }.to raise_error(TypeError)
697
+ end
698
+
699
+ it "raises an error when called on an uninitialized object" do
700
+ expect { described_class.allocate.match_length(0) }.to raise_error(TypeError, /uninitialized RE2::MatchData/)
701
+ end
702
+ end
703
+
704
+ describe "#values_at" do
705
+ it "returns match values at the given indices" do
706
+ md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789")
707
+
708
+ expect(md.values_at(1, 3)).to eq(["123", "789"])
709
+ end
710
+
711
+ it "returns match values by named groups" do
712
+ md = RE2::Regexp.new('(?P<a>\d+) (?P<b>\d+)').match("123 456")
713
+
714
+ expect(md.values_at(:a, :b)).to eq(["123", "456"])
715
+ end
716
+
717
+ it "supports a mix of indices and names" do
718
+ md = RE2::Regexp.new('(?P<a>\d+) (\d+)').match("123 456")
719
+
720
+ expect(md.values_at(2, :a)).to eq(["456", "123"])
721
+ end
722
+
723
+ it "returns nil for non-existent indices" do
724
+ md = RE2::Regexp.new('(\d+)').match("123")
725
+
726
+ expect(md.values_at(1, 5)).to eq(["123", nil])
727
+ end
728
+
729
+ it "returns nil for non-existent names" do
730
+ md = RE2::Regexp.new('(?P<a>\d+)').match("123")
731
+
732
+ expect(md.values_at(:a, :z)).to eq(["123", nil])
733
+ end
734
+
735
+ it "returns an empty string for a zero-length capturing group" do
736
+ md = RE2::Regexp.new('()(b)').match("bob")
737
+
738
+ expect(md.values_at(1, 2)).to eq(["", "b"])
739
+ end
740
+
741
+ it "returns the full match when given index 0" do
742
+ md = RE2::Regexp.new('(\d+) (\d+)').match("123 456")
743
+
744
+ expect(md.values_at(0, 1)).to eq(["123 456", "123"])
745
+ end
746
+
747
+ it "returns an empty array if given no arguments" do
748
+ md = RE2::Regexp.new('(\d+) (\d+)').match("123 456")
749
+
750
+ expect(md.values_at).to be_empty
751
+ end
752
+
753
+ it "raises an error when called on an uninitialized object" do
754
+ expect { described_class.allocate.values_at(1) }.to raise_error(TypeError, /uninitialized RE2::MatchData/)
755
+ end
756
+ end
757
+
464
758
  describe "#deconstruct" do
465
759
  it "returns all capturing groups" do
466
760
  md = RE2::Regexp.new('w(o)(o)').match('woo')
@@ -474,11 +768,107 @@ RSpec.describe RE2::MatchData do
474
768
  expect(md.deconstruct).to eq(['o', 'o', nil])
475
769
  end
476
770
 
771
+ it "includes zero-length capturing groups as empty strings" do
772
+ md = RE2::Regexp.new('()').match("bob")
773
+
774
+ expect(md.deconstruct).to eq([""])
775
+ end
776
+
477
777
  it "raises an error when called on an uninitialized object" do
478
778
  expect { described_class.allocate.deconstruct }.to raise_error(TypeError, /uninitialized RE2::MatchData/)
479
779
  end
480
780
  end
481
781
 
782
+ describe "#captures" do
783
+ it "returns all capturing groups" do
784
+ md = RE2::Regexp.new('w(o)(o)').match('woo')
785
+
786
+ expect(md.captures).to eq(['o', 'o'])
787
+ end
788
+
789
+ it "includes optional capturing groups as nil" do
790
+ md = RE2::Regexp.new('w(.)(.)(.)?').match('woo')
791
+
792
+ expect(md.captures).to eq(['o', 'o', nil])
793
+ end
794
+
795
+ it "raises an error when called on an uninitialized object" do
796
+ expect { described_class.allocate.captures }.to raise_error(TypeError, /uninitialized RE2::MatchData/)
797
+ end
798
+ end
799
+
800
+ describe "#named_captures" do
801
+ it "returns a hash of capturing group names to matched strings" do
802
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
803
+
804
+ expect(md.named_captures).to eq("numbers" => "123", "letters" => "abc")
805
+ end
806
+
807
+ it "returns an empty hash if there are no named capturing groups" do
808
+ md = RE2::Regexp.new('(\d+)').match('123')
809
+
810
+ expect(md.named_captures).to be_empty
811
+ end
812
+
813
+ it "returns unmatched optional groups as nil" do
814
+ md = RE2::Regexp.new('(?P<a>\d+) (?P<b>\w+)?').match('123 ')
815
+
816
+ expect(md.named_captures).to eq("a" => "123", "b" => nil)
817
+ end
818
+
819
+ it "returns an empty string for a zero-length named capturing group" do
820
+ md = RE2::Regexp.new('(?P<empty>)(?P<word>\w+)').match("bob")
821
+
822
+ expect(md.named_captures).to eq("empty" => "", "word" => "bob")
823
+ end
824
+
825
+ it "returns symbol keys when symbolize_names: true" do
826
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
827
+
828
+ expect(md.named_captures(symbolize_names: true)).to eq(numbers: "123", letters: "abc")
829
+ end
830
+
831
+ it "returns string keys when symbolize_names: false" do
832
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
833
+
834
+ expect(md.named_captures(symbolize_names: false)).to eq("numbers" => "123", "letters" => "abc")
835
+ end
836
+
837
+ it "raises an error when called on an uninitialized object" do
838
+ expect { described_class.allocate.named_captures }.to raise_error(TypeError, /uninitialized RE2::MatchData/)
839
+ end
840
+ end
841
+
842
+ describe "#names" do
843
+ it "returns an array of names of named capturing groups" do
844
+ md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
845
+
846
+ expect(md.names).to eq(["letters", "numbers"])
847
+ end
848
+
849
+ it "returns an empty array if there are no named capturing groups" do
850
+ md = RE2::Regexp.new('(\d+)').match('123')
851
+
852
+ expect(md.names).to be_empty
853
+ end
854
+
855
+ it "returns UTF-8 strings if the pattern is UTF-8" do
856
+ md = RE2::Regexp.new('(?P<numbers>\d+)').match('123')
857
+
858
+ expect(md.names.first.encoding).to eq(Encoding::UTF_8)
859
+ end
860
+
861
+ it "returns ISO-8859-1 strings if the pattern is not UTF-8" do
862
+ md = RE2::Regexp.new('(?P<numbers>\d+)', utf8: false).match('123')
863
+
864
+ expect(md.names.first.encoding).to eq(Encoding::ISO_8859_1)
865
+ end
866
+
867
+ it "raises an error when called on an uninitialized object" do
868
+ expect { described_class.allocate.names }.to raise_error(TypeError, /uninitialized RE2::MatchData/)
869
+ end
870
+ end
871
+
482
872
  describe "#deconstruct_keys" do
483
873
  it "returns all named captures if given nil" do
484
874
  md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
@@ -510,6 +900,12 @@ RSpec.describe RE2::MatchData do
510
900
  expect(md.deconstruct_keys(nil)).to eq({})
511
901
  end
512
902
 
903
+ it "returns an empty string for a zero-length named capturing group" do
904
+ md = RE2::Regexp.new('(?P<empty>)(?P<word>\w+)').match("bob")
905
+
906
+ expect(md.deconstruct_keys(nil)).to eq(empty: "", word: "bob")
907
+ end
908
+
513
909
  it "raises an error if given a non-array of keys" do
514
910
  md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
515
911
 
@@ -723,7 +723,6 @@ RSpec.describe RE2::Regexp do
723
723
 
724
724
  it "raises an exception if given too large a number of submatches instead of options" do
725
725
  re = RE2::Regexp.new('(\w+) (\w+) (\w+)')
726
- md = re.match("one two three", 2)
727
726
 
728
727
  expect { re.match("one two three", INT_MAX) }.to raise_error(RangeError, "number of matches should be < #{INT_MAX}")
729
728
  end
@@ -963,6 +962,64 @@ RSpec.describe RE2::Regexp do
963
962
  end
964
963
  end
965
964
 
965
+ describe "#names" do
966
+ it "returns an array of names of named capturing groups" do
967
+ expect(RE2::Regexp.new('(?P<bob>a)(?P<rob>b)').names).to eq(["bob", "rob"])
968
+ end
969
+
970
+ it "returns an empty array if there are no named capturing groups" do
971
+ expect(RE2::Regexp.new('(a)(b)').names).to be_empty
972
+ end
973
+
974
+ it "returns an empty array for a pattern with no capturing groups" do
975
+ expect(RE2::Regexp.new('ab').names).to be_empty
976
+ end
977
+
978
+ it "returns an empty array for an invalid regexp" do
979
+ expect(RE2::Regexp.new('???', log_errors: false).names).to be_empty
980
+ end
981
+
982
+ it "returns UTF-8 strings if the pattern is UTF-8" do
983
+ names = RE2::Regexp.new('(?P<bob>a)').names
984
+
985
+ expect(names.first.encoding).to eq(Encoding::UTF_8)
986
+ end
987
+
988
+ it "returns ISO-8859-1 strings if the pattern is not UTF-8" do
989
+ names = RE2::Regexp.new('(?P<bob>a)', utf8: false).names
990
+
991
+ expect(names.first.encoding).to eq(Encoding::ISO_8859_1)
992
+ end
993
+
994
+ it "raises an error when called on an uninitialized object" do
995
+ expect { described_class.allocate.names }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
996
+ end
997
+ end
998
+
999
+ describe "#named_captures" do
1000
+ it "returns a hash of names to indices" do
1001
+ expect(RE2::Regexp.new('(?P<bob>a)').named_captures).to eq("bob" => 1)
1002
+ end
1003
+
1004
+ it "maps names to indices with several groups" do
1005
+ groups = RE2::Regexp.new('(?P<bob>a)(o)(?P<rob>e)').named_captures
1006
+
1007
+ expect(groups).to eq("bob" => 1, "rob" => 3)
1008
+ end
1009
+
1010
+ it "returns an empty hash for a pattern with no named groups" do
1011
+ expect(RE2::Regexp.new('(a)(b)').named_captures).to be_empty
1012
+ end
1013
+
1014
+ it "returns an empty hash for an invalid regexp" do
1015
+ expect(RE2::Regexp.new('???', log_errors: false).named_captures).to be_empty
1016
+ end
1017
+
1018
+ it "raises an error when called on an uninitialized object" do
1019
+ expect { described_class.allocate.named_captures }.to raise_error(TypeError, /uninitialized RE2::Regexp/)
1020
+ end
1021
+ end
1022
+
966
1023
  describe "#scan" do
967
1024
  it "returns a scanner" do
968
1025
  r = RE2::Regexp.new('(\w+)')
@@ -185,11 +185,11 @@ RSpec.describe RE2::Scanner do
185
185
  expect(scanner.scan).to be_nil
186
186
  end
187
187
 
188
- it "returns an array of nil with an empty input and capture", :aggregate_failures do
188
+ it "returns an array of empty strings with an empty input and capture", :aggregate_failures do
189
189
  r = RE2::Regexp.new("()")
190
190
  scanner = r.scan("")
191
191
 
192
- expect(scanner.scan).to eq([nil])
192
+ expect(scanner.scan).to eq([""])
193
193
  expect(scanner.scan).to be_nil
194
194
  end
195
195
 
@@ -204,25 +204,34 @@ RSpec.describe RE2::Scanner do
204
204
  expect(scanner.scan).to be_nil
205
205
  end
206
206
 
207
- it "returns an array of nil if the pattern is an empty capturing group", :aggregate_failures do
207
+ it "returns an array of empty strings if the pattern is an empty capturing group", :aggregate_failures do
208
208
  r = RE2::Regexp.new("()")
209
209
  scanner = r.scan("Foo")
210
210
 
211
- expect(scanner.scan).to eq([nil])
212
- expect(scanner.scan).to eq([nil])
213
- expect(scanner.scan).to eq([nil])
214
- expect(scanner.scan).to eq([nil])
211
+ expect(scanner.scan).to eq([""])
212
+ expect(scanner.scan).to eq([""])
213
+ expect(scanner.scan).to eq([""])
214
+ expect(scanner.scan).to eq([""])
215
215
  expect(scanner.scan).to be_nil
216
216
  end
217
217
 
218
- it "returns array of nils with multiple empty capturing groups", :aggregate_failures do
218
+ it "returns array of empty strings with multiple empty capturing groups", :aggregate_failures do
219
219
  r = RE2::Regexp.new("()()()")
220
220
  scanner = r.scan("Foo")
221
221
 
222
- expect(scanner.scan).to eq([nil, nil, nil])
223
- expect(scanner.scan).to eq([nil, nil, nil])
224
- expect(scanner.scan).to eq([nil, nil, nil])
225
- expect(scanner.scan).to eq([nil, nil, nil])
222
+ expect(scanner.scan).to eq(["", "", ""])
223
+ expect(scanner.scan).to eq(["", "", ""])
224
+ expect(scanner.scan).to eq(["", "", ""])
225
+ expect(scanner.scan).to eq(["", "", ""])
226
+ expect(scanner.scan).to be_nil
227
+ end
228
+
229
+ it "distinguishes zero-length matches from unmatched groups", :aggregate_failures do
230
+ r = RE2::Regexp.new("()(a)?")
231
+ scanner = r.scan("b")
232
+
233
+ expect(scanner.scan).to eq(["", nil])
234
+ expect(scanner.scan).to eq(["", nil])
226
235
  expect(scanner.scan).to be_nil
227
236
  end
228
237
 
@@ -230,7 +239,7 @@ RSpec.describe RE2::Scanner do
230
239
  r = RE2::Regexp.new("()€")
231
240
  scanner = r.scan("€")
232
241
 
233
- expect(scanner.scan).to eq([nil])
242
+ expect(scanner.scan).to eq([""])
234
243
  expect(scanner.scan).to be_nil
235
244
  end
236
245