strscan 1.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,16 +4,25 @@
4
4
  Copyright (c) 1999-2006 Minero Aoki
5
5
 
6
6
  This program is free software.
7
- You can distribute/modify this program under the terms of
8
- the Ruby License. For details, see the file COPYING.
7
+ You can redistribute this program under the terms of the Ruby's or 2-clause
8
+ BSD License. For details, see the COPYING and LICENSE.txt files.
9
9
  */
10
10
 
11
11
  #include "ruby/ruby.h"
12
12
  #include "ruby/re.h"
13
13
  #include "ruby/encoding.h"
14
- #include "regint.h"
15
14
 
16
- #define STRSCAN_VERSION "0.7.0"
15
+ #ifdef RUBY_EXTCONF_H
16
+ # include RUBY_EXTCONF_H
17
+ #endif
18
+
19
+ #ifdef HAVE_ONIG_REGION_MEMSIZE
20
+ extern size_t onig_region_memsize(const struct re_registers *regs);
21
+ #endif
22
+
23
+ #include <stdbool.h>
24
+
25
+ #define STRSCAN_VERSION "3.0.1"
17
26
 
18
27
  /* =======================================================================
19
28
  Data Type Definitions
@@ -41,6 +50,9 @@ struct strscanner
41
50
 
42
51
  /* regexp used for last scan */
43
52
  VALUE regex;
53
+
54
+ /* anchor mode */
55
+ bool fixed_anchor_p;
44
56
  };
45
57
 
46
58
  #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
@@ -65,7 +77,6 @@ struct strscanner
65
77
  ======================================================================= */
66
78
 
67
79
  static inline long minl _((const long n, const long x));
68
- static VALUE infect _((VALUE str, struct strscanner *p));
69
80
  static VALUE extract_range _((struct strscanner *p, long beg_i, long end_i));
70
81
  static VALUE extract_beg_len _((struct strscanner *p, long beg_i, long len));
71
82
 
@@ -126,13 +137,6 @@ static VALUE inspect2 _((struct strscanner *p));
126
137
  Utils
127
138
  ======================================================================= */
128
139
 
129
- static VALUE
130
- infect(VALUE str, struct strscanner *p)
131
- {
132
- OBJ_INFECT(str, p->str);
133
- return str;
134
- }
135
-
136
140
  static VALUE
137
141
  str_new(struct strscanner *p, const char *ptr, long len)
138
142
  {
@@ -152,7 +156,7 @@ extract_range(struct strscanner *p, long beg_i, long end_i)
152
156
  {
153
157
  if (beg_i > S_LEN(p)) return Qnil;
154
158
  end_i = minl(end_i, S_LEN(p));
155
- return infect(str_new(p, S_PBEG(p) + beg_i, end_i - beg_i), p);
159
+ return str_new(p, S_PBEG(p) + beg_i, end_i - beg_i);
156
160
  }
157
161
 
158
162
  static VALUE
@@ -160,7 +164,7 @@ extract_beg_len(struct strscanner *p, long beg_i, long len)
160
164
  {
161
165
  if (beg_i > S_LEN(p)) return Qnil;
162
166
  len = minl(len, S_LEN(p) - beg_i);
163
- return infect(str_new(p, S_PBEG(p) + beg_i, len), p);
167
+ return str_new(p, S_PBEG(p) + beg_i, len);
164
168
  }
165
169
 
166
170
  /* =======================================================================
@@ -172,6 +176,7 @@ strscan_mark(void *ptr)
172
176
  {
173
177
  struct strscanner *p = ptr;
174
178
  rb_gc_mark(p->str);
179
+ rb_gc_mark(p->regex);
175
180
  }
176
181
 
177
182
  static void
@@ -186,7 +191,11 @@ static size_t
186
191
  strscan_memsize(const void *ptr)
187
192
  {
188
193
  const struct strscanner *p = ptr;
189
- return sizeof(*p) - sizeof(p->regs) + onig_region_memsize(&p->regs);
194
+ size_t size = sizeof(*p) - sizeof(p->regs);
195
+ #ifdef HAVE_ONIG_REGION_MEMSIZE
196
+ size += onig_region_memsize(&p->regs);
197
+ #endif
198
+ return size;
190
199
  }
191
200
 
192
201
  static const rb_data_type_t strscanner_type = {
@@ -204,23 +213,46 @@ strscan_s_allocate(VALUE klass)
204
213
  CLEAR_MATCH_STATUS(p);
205
214
  onig_region_init(&(p->regs));
206
215
  p->str = Qnil;
216
+ p->regex = Qnil;
207
217
  return obj;
208
218
  }
209
219
 
210
220
  /*
211
- * call-seq: StringScanner.new(string, dup = false)
221
+ * call-seq:
222
+ * StringScanner.new(string, fixed_anchor: false)
223
+ * StringScanner.new(string, dup = false)
212
224
  *
213
225
  * Creates a new StringScanner object to scan over the given +string+.
226
+ *
227
+ * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
228
+ * the string. Otherwise, +\A+ always matches the current position.
229
+ *
214
230
  * +dup+ argument is obsolete and not used now.
215
231
  */
216
232
  static VALUE
217
233
  strscan_initialize(int argc, VALUE *argv, VALUE self)
218
234
  {
219
235
  struct strscanner *p;
220
- VALUE str, need_dup;
236
+ VALUE str, options;
221
237
 
222
238
  p = check_strscan(self);
223
- rb_scan_args(argc, argv, "11", &str, &need_dup);
239
+ rb_scan_args(argc, argv, "11", &str, &options);
240
+ options = rb_check_hash_type(options);
241
+ if (!NIL_P(options)) {
242
+ VALUE fixed_anchor;
243
+ ID keyword_ids[1];
244
+ keyword_ids[0] = rb_intern("fixed_anchor");
245
+ rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
246
+ if (fixed_anchor == Qundef) {
247
+ p->fixed_anchor_p = false;
248
+ }
249
+ else {
250
+ p->fixed_anchor_p = RTEST(fixed_anchor);
251
+ }
252
+ }
253
+ else {
254
+ p->fixed_anchor_p = false;
255
+ }
224
256
  StringValue(str);
225
257
  p->str = str;
226
258
 
@@ -294,7 +326,7 @@ strscan_reset(VALUE self)
294
326
  * terminate
295
327
  * clear
296
328
  *
297
- * Set the scan pointer to the end of the string and clear matching data.
329
+ * Sets the scan pointer to the end of the string and clear matching data.
298
330
  */
299
331
  static VALUE
300
332
  strscan_terminate(VALUE self)
@@ -413,19 +445,16 @@ static VALUE
413
445
  strscan_get_charpos(VALUE self)
414
446
  {
415
447
  struct strscanner *p;
416
- VALUE substr;
417
448
 
418
449
  GET_SCANNER(self, p);
419
450
 
420
- substr = rb_funcall(p->str, id_byteslice, 2, INT2FIX(0), INT2NUM(p->curr));
421
-
422
- return rb_str_length(substr);
451
+ return LONG2NUM(rb_enc_strlen(S_PBEG(p), CURPTR(p), rb_enc_get(p->str)));
423
452
  }
424
453
 
425
454
  /*
426
455
  * call-seq: pos=(n)
427
456
  *
428
- * Set the byte position of the scan pointer.
457
+ * Sets the byte position of the scan pointer.
429
458
  *
430
459
  * s = StringScanner.new('test string')
431
460
  * s.pos = 7 # -> 7
@@ -443,19 +472,86 @@ strscan_set_pos(VALUE self, VALUE v)
443
472
  if (i < 0) rb_raise(rb_eRangeError, "index out of range");
444
473
  if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range");
445
474
  p->curr = i;
446
- return INT2NUM(i);
475
+ return LONG2NUM(i);
476
+ }
477
+
478
+ static inline UChar *
479
+ match_target(struct strscanner *p)
480
+ {
481
+ if (p->fixed_anchor_p) {
482
+ return (UChar *)S_PBEG(p);
483
+ }
484
+ else
485
+ {
486
+ return (UChar *)CURPTR(p);
487
+ }
488
+ }
489
+
490
+ static inline void
491
+ set_registers(struct strscanner *p, size_t length)
492
+ {
493
+ const int at = 0;
494
+ OnigRegion *regs = &(p->regs);
495
+ onig_region_clear(regs);
496
+ if (onig_region_set(regs, at, 0, 0)) return;
497
+ if (p->fixed_anchor_p) {
498
+ regs->beg[at] = p->curr;
499
+ regs->end[at] = p->curr + length;
500
+ }
501
+ else
502
+ {
503
+ regs->end[at] = length;
504
+ }
505
+ }
506
+
507
+ static inline void
508
+ succ(struct strscanner *p)
509
+ {
510
+ if (p->fixed_anchor_p) {
511
+ p->curr = p->regs.end[0];
512
+ }
513
+ else
514
+ {
515
+ p->curr += p->regs.end[0];
516
+ }
517
+ }
518
+
519
+ static inline long
520
+ last_match_length(struct strscanner *p)
521
+ {
522
+ if (p->fixed_anchor_p) {
523
+ return p->regs.end[0] - p->prev;
524
+ }
525
+ else
526
+ {
527
+ return p->regs.end[0];
528
+ }
529
+ }
530
+
531
+ static inline long
532
+ adjust_register_position(struct strscanner *p, long position)
533
+ {
534
+ if (p->fixed_anchor_p) {
535
+ return position;
536
+ }
537
+ else {
538
+ return p->prev + position;
539
+ }
447
540
  }
448
541
 
449
542
  static VALUE
450
- strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
543
+ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
451
544
  {
452
- regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
453
545
  struct strscanner *p;
454
- regex_t *re;
455
- long ret;
456
- int tmpreg;
457
546
 
458
- Check_Type(regex, T_REGEXP);
547
+ if (headonly) {
548
+ if (!RB_TYPE_P(pattern, T_REGEXP)) {
549
+ StringValue(pattern);
550
+ }
551
+ }
552
+ else {
553
+ Check_Type(pattern, T_REGEXP);
554
+ }
459
555
  GET_SCANNER(self, p);
460
556
 
461
557
  CLEAR_MATCH_STATUS(p);
@@ -463,49 +559,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
463
559
  return Qnil;
464
560
  }
465
561
 
466
- p->regex = regex;
467
- re = rb_reg_prepare_re(regex, p->str);
468
- tmpreg = re != RREGEXP_PTR(regex);
469
- if (!tmpreg) RREGEXP(regex)->usecnt++;
562
+ if (RB_TYPE_P(pattern, T_REGEXP)) {
563
+ regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
564
+ regex_t *re;
565
+ long ret;
566
+ int tmpreg;
567
+
568
+ p->regex = pattern;
569
+ re = rb_reg_prepare_re(pattern, p->str);
570
+ tmpreg = re != RREGEXP_PTR(pattern);
571
+ if (!tmpreg) RREGEXP(pattern)->usecnt++;
572
+
573
+ if (headonly) {
574
+ ret = onig_match(re,
575
+ match_target(p),
576
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
577
+ (UChar* )CURPTR(p),
578
+ &(p->regs),
579
+ ONIG_OPTION_NONE);
580
+ }
581
+ else {
582
+ ret = onig_search(re,
583
+ match_target(p),
584
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
585
+ (UChar* )CURPTR(p),
586
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
587
+ &(p->regs),
588
+ ONIG_OPTION_NONE);
589
+ }
590
+ if (!tmpreg) RREGEXP(pattern)->usecnt--;
591
+ if (tmpreg) {
592
+ if (RREGEXP(pattern)->usecnt) {
593
+ onig_free(re);
594
+ }
595
+ else {
596
+ onig_free(RREGEXP_PTR(pattern));
597
+ RREGEXP_PTR(pattern) = re;
598
+ }
599
+ }
470
600
 
471
- if (headonly) {
472
- ret = onig_match(re, (UChar* )CURPTR(p),
473
- (UChar* )(CURPTR(p) + S_RESTLEN(p)),
474
- (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
601
+ if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
602
+ if (ret < 0) {
603
+ /* not matched */
604
+ return Qnil;
605
+ }
475
606
  }
476
607
  else {
477
- ret = onig_search(re,
478
- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
479
- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
480
- &(p->regs), ONIG_OPTION_NONE);
481
- }
482
- if (!tmpreg) RREGEXP(regex)->usecnt--;
483
- if (tmpreg) {
484
- if (RREGEXP(regex)->usecnt) {
485
- onig_free(re);
608
+ rb_enc_check(p->str, pattern);
609
+ if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
610
+ return Qnil;
486
611
  }
487
- else {
488
- onig_free(RREGEXP_PTR(regex));
489
- RREGEXP_PTR(regex) = re;
612
+ if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
613
+ return Qnil;
490
614
  }
491
- }
492
-
493
- if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
494
- if (ret < 0) {
495
- /* not matched */
496
- return Qnil;
615
+ set_registers(p, RSTRING_LEN(pattern));
497
616
  }
498
617
 
499
618
  MATCHED(p);
500
619
  p->prev = p->curr;
620
+
501
621
  if (succptr) {
502
- p->curr += p->regs.end[0];
622
+ succ(p);
503
623
  }
504
- if (getstr) {
505
- return extract_beg_len(p, p->prev, p->regs.end[0]);
506
- }
507
- else {
508
- return INT2FIX(p->regs.end[0]);
624
+ {
625
+ const long length = last_match_length(p);
626
+ if (getstr) {
627
+ return extract_beg_len(p, p->prev, length);
628
+ }
629
+ else {
630
+ return INT2FIX(length);
631
+ }
509
632
  }
510
633
  }
511
634
 
@@ -520,7 +643,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
520
643
  * p s.scan(/\w+/) # -> "test"
521
644
  * p s.scan(/\w+/) # -> nil
522
645
  * p s.scan(/\s+/) # -> " "
523
- * p s.scan(/\w+/) # -> "string"
646
+ * p s.scan("str") # -> "str"
647
+ * p s.scan(/\w+/) # -> "ing"
524
648
  * p s.scan(/./) # -> nil
525
649
  *
526
650
  */
@@ -539,6 +663,7 @@ strscan_scan(VALUE self, VALUE re)
539
663
  * s = StringScanner.new('test string')
540
664
  * p s.match?(/\w+/) # -> 4
541
665
  * p s.match?(/\w+/) # -> 4
666
+ * p s.match?("test") # -> 4
542
667
  * p s.match?(/\s+/) # -> nil
543
668
  */
544
669
  static VALUE
@@ -560,7 +685,8 @@ strscan_match_p(VALUE self, VALUE re)
560
685
  * p s.skip(/\w+/) # -> 4
561
686
  * p s.skip(/\w+/) # -> nil
562
687
  * p s.skip(/\s+/) # -> 1
563
- * p s.skip(/\w+/) # -> 6
688
+ * p s.skip("st") # -> 2
689
+ * p s.skip(/\w+/) # -> 4
564
690
  * p s.skip(/./) # -> nil
565
691
  *
566
692
  */
@@ -704,7 +830,12 @@ static void
704
830
  adjust_registers_to_matched(struct strscanner *p)
705
831
  {
706
832
  onig_region_clear(&(p->regs));
707
- onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
833
+ if (p->fixed_anchor_p) {
834
+ onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
835
+ }
836
+ else {
837
+ onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
838
+ }
708
839
  }
709
840
 
710
841
  /*
@@ -716,9 +847,8 @@ adjust_registers_to_matched(struct strscanner *p)
716
847
  * s.getch # => "b"
717
848
  * s.getch # => nil
718
849
  *
719
- * $KCODE = 'EUC'
720
- * s = StringScanner.new("\244\242")
721
- * s.getch # => "\244\242" # Japanese hira-kana "A" in EUC-JP
850
+ * s = StringScanner.new("\244\242".force_encoding("euc-jp"))
851
+ * s.getch # => "\x{A4A2}" # Japanese hira-kana "A" in EUC-JP
722
852
  * s.getch # => nil
723
853
  */
724
854
  static VALUE
@@ -738,8 +868,9 @@ strscan_getch(VALUE self)
738
868
  p->curr += len;
739
869
  MATCHED(p);
740
870
  adjust_registers_to_matched(p);
741
- return extract_range(p, p->prev + p->regs.beg[0],
742
- p->prev + p->regs.end[0]);
871
+ return extract_range(p,
872
+ adjust_register_position(p, p->regs.beg[0]),
873
+ adjust_register_position(p, p->regs.end[0]));
743
874
  }
744
875
 
745
876
  /*
@@ -752,10 +883,9 @@ strscan_getch(VALUE self)
752
883
  * s.get_byte # => "b"
753
884
  * s.get_byte # => nil
754
885
  *
755
- * $KCODE = 'EUC'
756
- * s = StringScanner.new("\244\242")
757
- * s.get_byte # => "\244"
758
- * s.get_byte # => "\242"
886
+ * s = StringScanner.new("\244\242".force_encoding("euc-jp"))
887
+ * s.get_byte # => "\xA4"
888
+ * s.get_byte # => "\xA2"
759
889
  * s.get_byte # => nil
760
890
  */
761
891
  static VALUE
@@ -772,8 +902,9 @@ strscan_get_byte(VALUE self)
772
902
  p->curr++;
773
903
  MATCHED(p);
774
904
  adjust_registers_to_matched(p);
775
- return extract_range(p, p->prev + p->regs.beg[0],
776
- p->prev + p->regs.end[0]);
905
+ return extract_range(p,
906
+ adjust_register_position(p, p->regs.beg[0]),
907
+ adjust_register_position(p, p->regs.end[0]));
777
908
  }
778
909
 
779
910
  /*
@@ -808,7 +939,7 @@ strscan_peek(VALUE self, VALUE vlen)
808
939
 
809
940
  len = NUM2LONG(vlen);
810
941
  if (EOS_P(p))
811
- return infect(str_new(p, "", 0), p);
942
+ return str_new(p, "", 0);
812
943
 
813
944
  len = minl(len, S_RESTLEN(p));
814
945
  return extract_beg_len(p, p->curr, len);
@@ -826,7 +957,7 @@ strscan_peep(VALUE self, VALUE vlen)
826
957
  }
827
958
 
828
959
  /*
829
- * Set the scan pointer to the previous position. Only one previous position is
960
+ * Sets the scan pointer to the previous position. Only one previous position is
830
961
  * remembered, and it changes with each scanning operation.
831
962
  *
832
963
  * s = StringScanner.new('test string')
@@ -850,7 +981,7 @@ strscan_unscan(VALUE self)
850
981
  }
851
982
 
852
983
  /*
853
- * Returns +true+ iff the scan pointer is at the beginning of the line.
984
+ * Returns +true+ if and only if the scan pointer is at the beginning of the line.
854
985
  *
855
986
  * s = StringScanner.new("test\ntest\n")
856
987
  * s.bol? # => true
@@ -903,7 +1034,7 @@ strscan_empty_p(VALUE self)
903
1034
  }
904
1035
 
905
1036
  /*
906
- * Returns true iff there is more data in the string. See #eos?.
1037
+ * Returns true if and only if there is more data in the string. See #eos?.
907
1038
  * This method is obsolete; use #eos? instead.
908
1039
  *
909
1040
  * s = StringScanner.new('test string')
@@ -920,7 +1051,7 @@ strscan_rest_p(VALUE self)
920
1051
  }
921
1052
 
922
1053
  /*
923
- * Returns +true+ iff the last match was successful.
1054
+ * Returns +true+ if and only if the last match was successful.
924
1055
  *
925
1056
  * s = StringScanner.new('test string')
926
1057
  * s.match?(/\w+/) # => 4
@@ -951,13 +1082,15 @@ strscan_matched(VALUE self)
951
1082
 
952
1083
  GET_SCANNER(self, p);
953
1084
  if (! MATCHED_P(p)) return Qnil;
954
- return extract_range(p, p->prev + p->regs.beg[0],
955
- p->prev + p->regs.end[0]);
1085
+ return extract_range(p,
1086
+ adjust_register_position(p, p->regs.beg[0]),
1087
+ adjust_register_position(p, p->regs.end[0]));
956
1088
  }
957
1089
 
958
1090
  /*
959
- * Returns the size of the most recent match (see #matched), or +nil+ if there
960
- * was no recent match.
1091
+ * Returns the size of the most recent match in bytes, or +nil+ if there
1092
+ * was no recent match. This is different than <tt>matched.size</tt>,
1093
+ * which will return the size in characters.
961
1094
  *
962
1095
  * s = StringScanner.new('test string')
963
1096
  * s.check /\w+/ # -> "test"
@@ -972,7 +1105,7 @@ strscan_matched_size(VALUE self)
972
1105
 
973
1106
  GET_SCANNER(self, p);
974
1107
  if (! MATCHED_P(p)) return Qnil;
975
- return INT2NUM(p->regs.end[0] - p->regs.beg[0]);
1108
+ return LONG2NUM(p->regs.end[0] - p->regs.beg[0]);
976
1109
  }
977
1110
 
978
1111
  static int
@@ -1034,7 +1167,7 @@ strscan_aref(VALUE self, VALUE idx)
1034
1167
  idx = rb_sym2str(idx);
1035
1168
  /* fall through */
1036
1169
  case T_STRING:
1037
- if (!p->regex) return Qnil;
1170
+ if (!RTEST(p->regex)) return Qnil;
1038
1171
  RSTRING_GETMEM(idx, name, i);
1039
1172
  i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx));
1040
1173
  break;
@@ -1048,8 +1181,9 @@ strscan_aref(VALUE self, VALUE idx)
1048
1181
  if (i >= p->regs.num_regs) return Qnil;
1049
1182
  if (p->regs.beg[i] == -1) return Qnil;
1050
1183
 
1051
- return extract_range(p, p->prev + p->regs.beg[i],
1052
- p->prev + p->regs.end[i]);
1184
+ return extract_range(p,
1185
+ adjust_register_position(p, p->regs.beg[i]),
1186
+ adjust_register_position(p, p->regs.end[i]));
1053
1187
  }
1054
1188
 
1055
1189
  /*
@@ -1098,8 +1232,9 @@ strscan_captures(VALUE self)
1098
1232
  new_ary = rb_ary_new2(num_regs);
1099
1233
 
1100
1234
  for (i = 1; i < num_regs; i++) {
1101
- VALUE str = extract_range(p, p->prev + p->regs.beg[i],
1102
- p->prev + p->regs.end[i]);
1235
+ VALUE str = extract_range(p,
1236
+ adjust_register_position(p, p->regs.beg[i]),
1237
+ adjust_register_position(p, p->regs.end[i]));
1103
1238
  rb_ary_push(new_ary, str);
1104
1239
  }
1105
1240
 
@@ -1154,7 +1289,9 @@ strscan_pre_match(VALUE self)
1154
1289
 
1155
1290
  GET_SCANNER(self, p);
1156
1291
  if (! MATCHED_P(p)) return Qnil;
1157
- return extract_range(p, 0, p->prev + p->regs.beg[0]);
1292
+ return extract_range(p,
1293
+ 0,
1294
+ adjust_register_position(p, p->regs.beg[0]));
1158
1295
  }
1159
1296
 
1160
1297
  /*
@@ -1173,7 +1310,9 @@ strscan_post_match(VALUE self)
1173
1310
 
1174
1311
  GET_SCANNER(self, p);
1175
1312
  if (! MATCHED_P(p)) return Qnil;
1176
- return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
1313
+ return extract_range(p,
1314
+ adjust_register_position(p, p->regs.end[0]),
1315
+ S_LEN(p));
1177
1316
  }
1178
1317
 
1179
1318
  /*
@@ -1187,7 +1326,7 @@ strscan_rest(VALUE self)
1187
1326
 
1188
1327
  GET_SCANNER(self, p);
1189
1328
  if (EOS_P(p)) {
1190
- return infect(str_new(p, "", 0), p);
1329
+ return str_new(p, "", 0);
1191
1330
  }
1192
1331
  return extract_range(p, p->curr, S_LEN(p));
1193
1332
  }
@@ -1242,11 +1381,11 @@ strscan_inspect(VALUE self)
1242
1381
  p = check_strscan(self);
1243
1382
  if (NIL_P(p->str)) {
1244
1383
  a = rb_sprintf("#<%"PRIsVALUE" (uninitialized)>", rb_obj_class(self));
1245
- return infect(a, p);
1384
+ return a;
1246
1385
  }
1247
1386
  if (EOS_P(p)) {
1248
1387
  a = rb_sprintf("#<%"PRIsVALUE" fin>", rb_obj_class(self));
1249
- return infect(a, p);
1388
+ return a;
1250
1389
  }
1251
1390
  if (p->curr == 0) {
1252
1391
  b = inspect2(p);
@@ -1254,7 +1393,7 @@ strscan_inspect(VALUE self)
1254
1393
  rb_obj_class(self),
1255
1394
  p->curr, S_LEN(p),
1256
1395
  b);
1257
- return infect(a, p);
1396
+ return a;
1258
1397
  }
1259
1398
  a = inspect1(p);
1260
1399
  b = inspect2(p);
@@ -1262,7 +1401,7 @@ strscan_inspect(VALUE self)
1262
1401
  rb_obj_class(self),
1263
1402
  p->curr, S_LEN(p),
1264
1403
  a, b);
1265
- return infect(a, p);
1404
+ return a;
1266
1405
  }
1267
1406
 
1268
1407
  static VALUE
@@ -1302,6 +1441,23 @@ inspect2(struct strscanner *p)
1302
1441
  return rb_str_dump(str);
1303
1442
  }
1304
1443
 
1444
+ /*
1445
+ * call-seq:
1446
+ * scanner.fixed_anchor? -> true or false
1447
+ *
1448
+ * Whether +scanner+ uses fixed anchor mode or not.
1449
+ *
1450
+ * If fixed anchor mode is used, +\A+ always matches the beginning of
1451
+ * the string. Otherwise, +\A+ always matches the current position.
1452
+ */
1453
+ static VALUE
1454
+ strscan_fixed_anchor_p(VALUE self)
1455
+ {
1456
+ struct strscanner *p;
1457
+ p = check_strscan(self);
1458
+ return p->fixed_anchor_p ? Qtrue : Qfalse;
1459
+ }
1460
+
1305
1461
  /* =======================================================================
1306
1462
  Ruby Interface
1307
1463
  ======================================================================= */
@@ -1378,7 +1534,7 @@ inspect2(struct strscanner *p)
1378
1534
  *
1379
1535
  * === Finding Where we Are
1380
1536
  *
1381
- * - #beginning_of_line? (#bol?)
1537
+ * - #beginning_of_line? (<tt>#bol?</tt>)
1382
1538
  * - #eos?
1383
1539
  * - #rest?
1384
1540
  * - #rest_size
@@ -1395,13 +1551,13 @@ inspect2(struct strscanner *p)
1395
1551
  * - #matched
1396
1552
  * - #matched?
1397
1553
  * - #matched_size
1398
- * - []
1554
+ * - <tt>#[]</tt>
1399
1555
  * - #pre_match
1400
1556
  * - #post_match
1401
1557
  *
1402
1558
  * === Miscellaneous
1403
1559
  *
1404
- * - <<
1560
+ * - <tt><<</tt>
1405
1561
  * - #concat
1406
1562
  * - #string
1407
1563
  * - #string=
@@ -1412,6 +1568,11 @@ inspect2(struct strscanner *p)
1412
1568
  void
1413
1569
  Init_strscan(void)
1414
1570
  {
1571
+ #ifdef HAVE_RB_EXT_RACTOR_SAFE
1572
+ rb_ext_ractor_safe(true);
1573
+ #endif
1574
+
1575
+ #undef rb_intern
1415
1576
  ID id_scanerr = rb_intern("ScanError");
1416
1577
  VALUE tmp;
1417
1578
 
@@ -1487,4 +1648,6 @@ Init_strscan(void)
1487
1648
  rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
1488
1649
 
1489
1650
  rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
1651
+
1652
+ rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
1490
1653
  }