strscan 1.0.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/strscan/extconf.rb +2 -1
- data/ext/strscan/strscan.c +265 -102
- metadata +7 -21
- data/ext/strscan/regenc.h +0 -254
- data/ext/strscan/regint.h +0 -938
data/ext/strscan/strscan.c
CHANGED
@@ -4,16 +4,25 @@
|
|
4
4
|
Copyright (c) 1999-2006 Minero Aoki
|
5
5
|
|
6
6
|
This program is free software.
|
7
|
-
You can
|
8
|
-
|
7
|
+
You can redistribute this program under the terms of the Ruby's or 2-clause
|
8
|
+
BSD License. For details, see the COPYING and LICENSE.txt files.
|
9
9
|
*/
|
10
10
|
|
11
11
|
#include "ruby/ruby.h"
|
12
12
|
#include "ruby/re.h"
|
13
13
|
#include "ruby/encoding.h"
|
14
|
-
#include "regint.h"
|
15
14
|
|
16
|
-
#
|
15
|
+
#ifdef RUBY_EXTCONF_H
|
16
|
+
# include RUBY_EXTCONF_H
|
17
|
+
#endif
|
18
|
+
|
19
|
+
#ifdef HAVE_ONIG_REGION_MEMSIZE
|
20
|
+
extern size_t onig_region_memsize(const struct re_registers *regs);
|
21
|
+
#endif
|
22
|
+
|
23
|
+
#include <stdbool.h>
|
24
|
+
|
25
|
+
#define STRSCAN_VERSION "3.0.1"
|
17
26
|
|
18
27
|
/* =======================================================================
|
19
28
|
Data Type Definitions
|
@@ -41,6 +50,9 @@ struct strscanner
|
|
41
50
|
|
42
51
|
/* regexp used for last scan */
|
43
52
|
VALUE regex;
|
53
|
+
|
54
|
+
/* anchor mode */
|
55
|
+
bool fixed_anchor_p;
|
44
56
|
};
|
45
57
|
|
46
58
|
#define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
|
@@ -65,7 +77,6 @@ struct strscanner
|
|
65
77
|
======================================================================= */
|
66
78
|
|
67
79
|
static inline long minl _((const long n, const long x));
|
68
|
-
static VALUE infect _((VALUE str, struct strscanner *p));
|
69
80
|
static VALUE extract_range _((struct strscanner *p, long beg_i, long end_i));
|
70
81
|
static VALUE extract_beg_len _((struct strscanner *p, long beg_i, long len));
|
71
82
|
|
@@ -126,13 +137,6 @@ static VALUE inspect2 _((struct strscanner *p));
|
|
126
137
|
Utils
|
127
138
|
======================================================================= */
|
128
139
|
|
129
|
-
static VALUE
|
130
|
-
infect(VALUE str, struct strscanner *p)
|
131
|
-
{
|
132
|
-
OBJ_INFECT(str, p->str);
|
133
|
-
return str;
|
134
|
-
}
|
135
|
-
|
136
140
|
static VALUE
|
137
141
|
str_new(struct strscanner *p, const char *ptr, long len)
|
138
142
|
{
|
@@ -152,7 +156,7 @@ extract_range(struct strscanner *p, long beg_i, long end_i)
|
|
152
156
|
{
|
153
157
|
if (beg_i > S_LEN(p)) return Qnil;
|
154
158
|
end_i = minl(end_i, S_LEN(p));
|
155
|
-
return
|
159
|
+
return str_new(p, S_PBEG(p) + beg_i, end_i - beg_i);
|
156
160
|
}
|
157
161
|
|
158
162
|
static VALUE
|
@@ -160,7 +164,7 @@ extract_beg_len(struct strscanner *p, long beg_i, long len)
|
|
160
164
|
{
|
161
165
|
if (beg_i > S_LEN(p)) return Qnil;
|
162
166
|
len = minl(len, S_LEN(p) - beg_i);
|
163
|
-
return
|
167
|
+
return str_new(p, S_PBEG(p) + beg_i, len);
|
164
168
|
}
|
165
169
|
|
166
170
|
/* =======================================================================
|
@@ -172,6 +176,7 @@ strscan_mark(void *ptr)
|
|
172
176
|
{
|
173
177
|
struct strscanner *p = ptr;
|
174
178
|
rb_gc_mark(p->str);
|
179
|
+
rb_gc_mark(p->regex);
|
175
180
|
}
|
176
181
|
|
177
182
|
static void
|
@@ -186,7 +191,11 @@ static size_t
|
|
186
191
|
strscan_memsize(const void *ptr)
|
187
192
|
{
|
188
193
|
const struct strscanner *p = ptr;
|
189
|
-
|
194
|
+
size_t size = sizeof(*p) - sizeof(p->regs);
|
195
|
+
#ifdef HAVE_ONIG_REGION_MEMSIZE
|
196
|
+
size += onig_region_memsize(&p->regs);
|
197
|
+
#endif
|
198
|
+
return size;
|
190
199
|
}
|
191
200
|
|
192
201
|
static const rb_data_type_t strscanner_type = {
|
@@ -204,23 +213,46 @@ strscan_s_allocate(VALUE klass)
|
|
204
213
|
CLEAR_MATCH_STATUS(p);
|
205
214
|
onig_region_init(&(p->regs));
|
206
215
|
p->str = Qnil;
|
216
|
+
p->regex = Qnil;
|
207
217
|
return obj;
|
208
218
|
}
|
209
219
|
|
210
220
|
/*
|
211
|
-
* call-seq:
|
221
|
+
* call-seq:
|
222
|
+
* StringScanner.new(string, fixed_anchor: false)
|
223
|
+
* StringScanner.new(string, dup = false)
|
212
224
|
*
|
213
225
|
* Creates a new StringScanner object to scan over the given +string+.
|
226
|
+
*
|
227
|
+
* If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
|
228
|
+
* the string. Otherwise, +\A+ always matches the current position.
|
229
|
+
*
|
214
230
|
* +dup+ argument is obsolete and not used now.
|
215
231
|
*/
|
216
232
|
static VALUE
|
217
233
|
strscan_initialize(int argc, VALUE *argv, VALUE self)
|
218
234
|
{
|
219
235
|
struct strscanner *p;
|
220
|
-
VALUE str,
|
236
|
+
VALUE str, options;
|
221
237
|
|
222
238
|
p = check_strscan(self);
|
223
|
-
rb_scan_args(argc, argv, "11", &str, &
|
239
|
+
rb_scan_args(argc, argv, "11", &str, &options);
|
240
|
+
options = rb_check_hash_type(options);
|
241
|
+
if (!NIL_P(options)) {
|
242
|
+
VALUE fixed_anchor;
|
243
|
+
ID keyword_ids[1];
|
244
|
+
keyword_ids[0] = rb_intern("fixed_anchor");
|
245
|
+
rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
|
246
|
+
if (fixed_anchor == Qundef) {
|
247
|
+
p->fixed_anchor_p = false;
|
248
|
+
}
|
249
|
+
else {
|
250
|
+
p->fixed_anchor_p = RTEST(fixed_anchor);
|
251
|
+
}
|
252
|
+
}
|
253
|
+
else {
|
254
|
+
p->fixed_anchor_p = false;
|
255
|
+
}
|
224
256
|
StringValue(str);
|
225
257
|
p->str = str;
|
226
258
|
|
@@ -294,7 +326,7 @@ strscan_reset(VALUE self)
|
|
294
326
|
* terminate
|
295
327
|
* clear
|
296
328
|
*
|
297
|
-
*
|
329
|
+
* Sets the scan pointer to the end of the string and clear matching data.
|
298
330
|
*/
|
299
331
|
static VALUE
|
300
332
|
strscan_terminate(VALUE self)
|
@@ -413,19 +445,16 @@ static VALUE
|
|
413
445
|
strscan_get_charpos(VALUE self)
|
414
446
|
{
|
415
447
|
struct strscanner *p;
|
416
|
-
VALUE substr;
|
417
448
|
|
418
449
|
GET_SCANNER(self, p);
|
419
450
|
|
420
|
-
|
421
|
-
|
422
|
-
return rb_str_length(substr);
|
451
|
+
return LONG2NUM(rb_enc_strlen(S_PBEG(p), CURPTR(p), rb_enc_get(p->str)));
|
423
452
|
}
|
424
453
|
|
425
454
|
/*
|
426
455
|
* call-seq: pos=(n)
|
427
456
|
*
|
428
|
-
*
|
457
|
+
* Sets the byte position of the scan pointer.
|
429
458
|
*
|
430
459
|
* s = StringScanner.new('test string')
|
431
460
|
* s.pos = 7 # -> 7
|
@@ -443,19 +472,86 @@ strscan_set_pos(VALUE self, VALUE v)
|
|
443
472
|
if (i < 0) rb_raise(rb_eRangeError, "index out of range");
|
444
473
|
if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range");
|
445
474
|
p->curr = i;
|
446
|
-
return
|
475
|
+
return LONG2NUM(i);
|
476
|
+
}
|
477
|
+
|
478
|
+
static inline UChar *
|
479
|
+
match_target(struct strscanner *p)
|
480
|
+
{
|
481
|
+
if (p->fixed_anchor_p) {
|
482
|
+
return (UChar *)S_PBEG(p);
|
483
|
+
}
|
484
|
+
else
|
485
|
+
{
|
486
|
+
return (UChar *)CURPTR(p);
|
487
|
+
}
|
488
|
+
}
|
489
|
+
|
490
|
+
static inline void
|
491
|
+
set_registers(struct strscanner *p, size_t length)
|
492
|
+
{
|
493
|
+
const int at = 0;
|
494
|
+
OnigRegion *regs = &(p->regs);
|
495
|
+
onig_region_clear(regs);
|
496
|
+
if (onig_region_set(regs, at, 0, 0)) return;
|
497
|
+
if (p->fixed_anchor_p) {
|
498
|
+
regs->beg[at] = p->curr;
|
499
|
+
regs->end[at] = p->curr + length;
|
500
|
+
}
|
501
|
+
else
|
502
|
+
{
|
503
|
+
regs->end[at] = length;
|
504
|
+
}
|
505
|
+
}
|
506
|
+
|
507
|
+
static inline void
|
508
|
+
succ(struct strscanner *p)
|
509
|
+
{
|
510
|
+
if (p->fixed_anchor_p) {
|
511
|
+
p->curr = p->regs.end[0];
|
512
|
+
}
|
513
|
+
else
|
514
|
+
{
|
515
|
+
p->curr += p->regs.end[0];
|
516
|
+
}
|
517
|
+
}
|
518
|
+
|
519
|
+
static inline long
|
520
|
+
last_match_length(struct strscanner *p)
|
521
|
+
{
|
522
|
+
if (p->fixed_anchor_p) {
|
523
|
+
return p->regs.end[0] - p->prev;
|
524
|
+
}
|
525
|
+
else
|
526
|
+
{
|
527
|
+
return p->regs.end[0];
|
528
|
+
}
|
529
|
+
}
|
530
|
+
|
531
|
+
static inline long
|
532
|
+
adjust_register_position(struct strscanner *p, long position)
|
533
|
+
{
|
534
|
+
if (p->fixed_anchor_p) {
|
535
|
+
return position;
|
536
|
+
}
|
537
|
+
else {
|
538
|
+
return p->prev + position;
|
539
|
+
}
|
447
540
|
}
|
448
541
|
|
449
542
|
static VALUE
|
450
|
-
strscan_do_scan(VALUE self, VALUE
|
543
|
+
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
|
451
544
|
{
|
452
|
-
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
453
545
|
struct strscanner *p;
|
454
|
-
regex_t *re;
|
455
|
-
long ret;
|
456
|
-
int tmpreg;
|
457
546
|
|
458
|
-
|
547
|
+
if (headonly) {
|
548
|
+
if (!RB_TYPE_P(pattern, T_REGEXP)) {
|
549
|
+
StringValue(pattern);
|
550
|
+
}
|
551
|
+
}
|
552
|
+
else {
|
553
|
+
Check_Type(pattern, T_REGEXP);
|
554
|
+
}
|
459
555
|
GET_SCANNER(self, p);
|
460
556
|
|
461
557
|
CLEAR_MATCH_STATUS(p);
|
@@ -463,49 +559,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
|
|
463
559
|
return Qnil;
|
464
560
|
}
|
465
561
|
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
562
|
+
if (RB_TYPE_P(pattern, T_REGEXP)) {
|
563
|
+
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
564
|
+
regex_t *re;
|
565
|
+
long ret;
|
566
|
+
int tmpreg;
|
567
|
+
|
568
|
+
p->regex = pattern;
|
569
|
+
re = rb_reg_prepare_re(pattern, p->str);
|
570
|
+
tmpreg = re != RREGEXP_PTR(pattern);
|
571
|
+
if (!tmpreg) RREGEXP(pattern)->usecnt++;
|
572
|
+
|
573
|
+
if (headonly) {
|
574
|
+
ret = onig_match(re,
|
575
|
+
match_target(p),
|
576
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
577
|
+
(UChar* )CURPTR(p),
|
578
|
+
&(p->regs),
|
579
|
+
ONIG_OPTION_NONE);
|
580
|
+
}
|
581
|
+
else {
|
582
|
+
ret = onig_search(re,
|
583
|
+
match_target(p),
|
584
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
585
|
+
(UChar* )CURPTR(p),
|
586
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
587
|
+
&(p->regs),
|
588
|
+
ONIG_OPTION_NONE);
|
589
|
+
}
|
590
|
+
if (!tmpreg) RREGEXP(pattern)->usecnt--;
|
591
|
+
if (tmpreg) {
|
592
|
+
if (RREGEXP(pattern)->usecnt) {
|
593
|
+
onig_free(re);
|
594
|
+
}
|
595
|
+
else {
|
596
|
+
onig_free(RREGEXP_PTR(pattern));
|
597
|
+
RREGEXP_PTR(pattern) = re;
|
598
|
+
}
|
599
|
+
}
|
470
600
|
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
601
|
+
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
602
|
+
if (ret < 0) {
|
603
|
+
/* not matched */
|
604
|
+
return Qnil;
|
605
|
+
}
|
475
606
|
}
|
476
607
|
else {
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
&(p->regs), ONIG_OPTION_NONE);
|
481
|
-
}
|
482
|
-
if (!tmpreg) RREGEXP(regex)->usecnt--;
|
483
|
-
if (tmpreg) {
|
484
|
-
if (RREGEXP(regex)->usecnt) {
|
485
|
-
onig_free(re);
|
608
|
+
rb_enc_check(p->str, pattern);
|
609
|
+
if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
|
610
|
+
return Qnil;
|
486
611
|
}
|
487
|
-
|
488
|
-
|
489
|
-
RREGEXP_PTR(regex) = re;
|
612
|
+
if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
|
613
|
+
return Qnil;
|
490
614
|
}
|
491
|
-
|
492
|
-
|
493
|
-
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
494
|
-
if (ret < 0) {
|
495
|
-
/* not matched */
|
496
|
-
return Qnil;
|
615
|
+
set_registers(p, RSTRING_LEN(pattern));
|
497
616
|
}
|
498
617
|
|
499
618
|
MATCHED(p);
|
500
619
|
p->prev = p->curr;
|
620
|
+
|
501
621
|
if (succptr) {
|
502
|
-
p
|
622
|
+
succ(p);
|
503
623
|
}
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
624
|
+
{
|
625
|
+
const long length = last_match_length(p);
|
626
|
+
if (getstr) {
|
627
|
+
return extract_beg_len(p, p->prev, length);
|
628
|
+
}
|
629
|
+
else {
|
630
|
+
return INT2FIX(length);
|
631
|
+
}
|
509
632
|
}
|
510
633
|
}
|
511
634
|
|
@@ -520,7 +643,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
|
|
520
643
|
* p s.scan(/\w+/) # -> "test"
|
521
644
|
* p s.scan(/\w+/) # -> nil
|
522
645
|
* p s.scan(/\s+/) # -> " "
|
523
|
-
* p s.scan(
|
646
|
+
* p s.scan("str") # -> "str"
|
647
|
+
* p s.scan(/\w+/) # -> "ing"
|
524
648
|
* p s.scan(/./) # -> nil
|
525
649
|
*
|
526
650
|
*/
|
@@ -539,6 +663,7 @@ strscan_scan(VALUE self, VALUE re)
|
|
539
663
|
* s = StringScanner.new('test string')
|
540
664
|
* p s.match?(/\w+/) # -> 4
|
541
665
|
* p s.match?(/\w+/) # -> 4
|
666
|
+
* p s.match?("test") # -> 4
|
542
667
|
* p s.match?(/\s+/) # -> nil
|
543
668
|
*/
|
544
669
|
static VALUE
|
@@ -560,7 +685,8 @@ strscan_match_p(VALUE self, VALUE re)
|
|
560
685
|
* p s.skip(/\w+/) # -> 4
|
561
686
|
* p s.skip(/\w+/) # -> nil
|
562
687
|
* p s.skip(/\s+/) # -> 1
|
563
|
-
* p s.skip(
|
688
|
+
* p s.skip("st") # -> 2
|
689
|
+
* p s.skip(/\w+/) # -> 4
|
564
690
|
* p s.skip(/./) # -> nil
|
565
691
|
*
|
566
692
|
*/
|
@@ -704,7 +830,12 @@ static void
|
|
704
830
|
adjust_registers_to_matched(struct strscanner *p)
|
705
831
|
{
|
706
832
|
onig_region_clear(&(p->regs));
|
707
|
-
|
833
|
+
if (p->fixed_anchor_p) {
|
834
|
+
onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
|
835
|
+
}
|
836
|
+
else {
|
837
|
+
onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
|
838
|
+
}
|
708
839
|
}
|
709
840
|
|
710
841
|
/*
|
@@ -716,9 +847,8 @@ adjust_registers_to_matched(struct strscanner *p)
|
|
716
847
|
* s.getch # => "b"
|
717
848
|
* s.getch # => nil
|
718
849
|
*
|
719
|
-
*
|
720
|
-
* s
|
721
|
-
* s.getch # => "\244\242" # Japanese hira-kana "A" in EUC-JP
|
850
|
+
* s = StringScanner.new("\244\242".force_encoding("euc-jp"))
|
851
|
+
* s.getch # => "\x{A4A2}" # Japanese hira-kana "A" in EUC-JP
|
722
852
|
* s.getch # => nil
|
723
853
|
*/
|
724
854
|
static VALUE
|
@@ -738,8 +868,9 @@ strscan_getch(VALUE self)
|
|
738
868
|
p->curr += len;
|
739
869
|
MATCHED(p);
|
740
870
|
adjust_registers_to_matched(p);
|
741
|
-
return extract_range(p,
|
742
|
-
|
871
|
+
return extract_range(p,
|
872
|
+
adjust_register_position(p, p->regs.beg[0]),
|
873
|
+
adjust_register_position(p, p->regs.end[0]));
|
743
874
|
}
|
744
875
|
|
745
876
|
/*
|
@@ -752,10 +883,9 @@ strscan_getch(VALUE self)
|
|
752
883
|
* s.get_byte # => "b"
|
753
884
|
* s.get_byte # => nil
|
754
885
|
*
|
755
|
-
*
|
756
|
-
* s
|
757
|
-
* s.get_byte # => "\
|
758
|
-
* s.get_byte # => "\242"
|
886
|
+
* s = StringScanner.new("\244\242".force_encoding("euc-jp"))
|
887
|
+
* s.get_byte # => "\xA4"
|
888
|
+
* s.get_byte # => "\xA2"
|
759
889
|
* s.get_byte # => nil
|
760
890
|
*/
|
761
891
|
static VALUE
|
@@ -772,8 +902,9 @@ strscan_get_byte(VALUE self)
|
|
772
902
|
p->curr++;
|
773
903
|
MATCHED(p);
|
774
904
|
adjust_registers_to_matched(p);
|
775
|
-
return extract_range(p,
|
776
|
-
|
905
|
+
return extract_range(p,
|
906
|
+
adjust_register_position(p, p->regs.beg[0]),
|
907
|
+
adjust_register_position(p, p->regs.end[0]));
|
777
908
|
}
|
778
909
|
|
779
910
|
/*
|
@@ -808,7 +939,7 @@ strscan_peek(VALUE self, VALUE vlen)
|
|
808
939
|
|
809
940
|
len = NUM2LONG(vlen);
|
810
941
|
if (EOS_P(p))
|
811
|
-
return
|
942
|
+
return str_new(p, "", 0);
|
812
943
|
|
813
944
|
len = minl(len, S_RESTLEN(p));
|
814
945
|
return extract_beg_len(p, p->curr, len);
|
@@ -826,7 +957,7 @@ strscan_peep(VALUE self, VALUE vlen)
|
|
826
957
|
}
|
827
958
|
|
828
959
|
/*
|
829
|
-
*
|
960
|
+
* Sets the scan pointer to the previous position. Only one previous position is
|
830
961
|
* remembered, and it changes with each scanning operation.
|
831
962
|
*
|
832
963
|
* s = StringScanner.new('test string')
|
@@ -850,7 +981,7 @@ strscan_unscan(VALUE self)
|
|
850
981
|
}
|
851
982
|
|
852
983
|
/*
|
853
|
-
* Returns +true+
|
984
|
+
* Returns +true+ if and only if the scan pointer is at the beginning of the line.
|
854
985
|
*
|
855
986
|
* s = StringScanner.new("test\ntest\n")
|
856
987
|
* s.bol? # => true
|
@@ -903,7 +1034,7 @@ strscan_empty_p(VALUE self)
|
|
903
1034
|
}
|
904
1035
|
|
905
1036
|
/*
|
906
|
-
* Returns true
|
1037
|
+
* Returns true if and only if there is more data in the string. See #eos?.
|
907
1038
|
* This method is obsolete; use #eos? instead.
|
908
1039
|
*
|
909
1040
|
* s = StringScanner.new('test string')
|
@@ -920,7 +1051,7 @@ strscan_rest_p(VALUE self)
|
|
920
1051
|
}
|
921
1052
|
|
922
1053
|
/*
|
923
|
-
* Returns +true+
|
1054
|
+
* Returns +true+ if and only if the last match was successful.
|
924
1055
|
*
|
925
1056
|
* s = StringScanner.new('test string')
|
926
1057
|
* s.match?(/\w+/) # => 4
|
@@ -951,13 +1082,15 @@ strscan_matched(VALUE self)
|
|
951
1082
|
|
952
1083
|
GET_SCANNER(self, p);
|
953
1084
|
if (! MATCHED_P(p)) return Qnil;
|
954
|
-
return extract_range(p,
|
955
|
-
|
1085
|
+
return extract_range(p,
|
1086
|
+
adjust_register_position(p, p->regs.beg[0]),
|
1087
|
+
adjust_register_position(p, p->regs.end[0]));
|
956
1088
|
}
|
957
1089
|
|
958
1090
|
/*
|
959
|
-
* Returns the size of the most recent match
|
960
|
-
* was no recent match.
|
1091
|
+
* Returns the size of the most recent match in bytes, or +nil+ if there
|
1092
|
+
* was no recent match. This is different than <tt>matched.size</tt>,
|
1093
|
+
* which will return the size in characters.
|
961
1094
|
*
|
962
1095
|
* s = StringScanner.new('test string')
|
963
1096
|
* s.check /\w+/ # -> "test"
|
@@ -972,7 +1105,7 @@ strscan_matched_size(VALUE self)
|
|
972
1105
|
|
973
1106
|
GET_SCANNER(self, p);
|
974
1107
|
if (! MATCHED_P(p)) return Qnil;
|
975
|
-
return
|
1108
|
+
return LONG2NUM(p->regs.end[0] - p->regs.beg[0]);
|
976
1109
|
}
|
977
1110
|
|
978
1111
|
static int
|
@@ -1034,7 +1167,7 @@ strscan_aref(VALUE self, VALUE idx)
|
|
1034
1167
|
idx = rb_sym2str(idx);
|
1035
1168
|
/* fall through */
|
1036
1169
|
case T_STRING:
|
1037
|
-
if (!p->regex) return Qnil;
|
1170
|
+
if (!RTEST(p->regex)) return Qnil;
|
1038
1171
|
RSTRING_GETMEM(idx, name, i);
|
1039
1172
|
i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx));
|
1040
1173
|
break;
|
@@ -1048,8 +1181,9 @@ strscan_aref(VALUE self, VALUE idx)
|
|
1048
1181
|
if (i >= p->regs.num_regs) return Qnil;
|
1049
1182
|
if (p->regs.beg[i] == -1) return Qnil;
|
1050
1183
|
|
1051
|
-
return extract_range(p,
|
1052
|
-
|
1184
|
+
return extract_range(p,
|
1185
|
+
adjust_register_position(p, p->regs.beg[i]),
|
1186
|
+
adjust_register_position(p, p->regs.end[i]));
|
1053
1187
|
}
|
1054
1188
|
|
1055
1189
|
/*
|
@@ -1098,8 +1232,9 @@ strscan_captures(VALUE self)
|
|
1098
1232
|
new_ary = rb_ary_new2(num_regs);
|
1099
1233
|
|
1100
1234
|
for (i = 1; i < num_regs; i++) {
|
1101
|
-
VALUE str = extract_range(p,
|
1102
|
-
|
1235
|
+
VALUE str = extract_range(p,
|
1236
|
+
adjust_register_position(p, p->regs.beg[i]),
|
1237
|
+
adjust_register_position(p, p->regs.end[i]));
|
1103
1238
|
rb_ary_push(new_ary, str);
|
1104
1239
|
}
|
1105
1240
|
|
@@ -1154,7 +1289,9 @@ strscan_pre_match(VALUE self)
|
|
1154
1289
|
|
1155
1290
|
GET_SCANNER(self, p);
|
1156
1291
|
if (! MATCHED_P(p)) return Qnil;
|
1157
|
-
return extract_range(p,
|
1292
|
+
return extract_range(p,
|
1293
|
+
0,
|
1294
|
+
adjust_register_position(p, p->regs.beg[0]));
|
1158
1295
|
}
|
1159
1296
|
|
1160
1297
|
/*
|
@@ -1173,7 +1310,9 @@ strscan_post_match(VALUE self)
|
|
1173
1310
|
|
1174
1311
|
GET_SCANNER(self, p);
|
1175
1312
|
if (! MATCHED_P(p)) return Qnil;
|
1176
|
-
return extract_range(p,
|
1313
|
+
return extract_range(p,
|
1314
|
+
adjust_register_position(p, p->regs.end[0]),
|
1315
|
+
S_LEN(p));
|
1177
1316
|
}
|
1178
1317
|
|
1179
1318
|
/*
|
@@ -1187,7 +1326,7 @@ strscan_rest(VALUE self)
|
|
1187
1326
|
|
1188
1327
|
GET_SCANNER(self, p);
|
1189
1328
|
if (EOS_P(p)) {
|
1190
|
-
return
|
1329
|
+
return str_new(p, "", 0);
|
1191
1330
|
}
|
1192
1331
|
return extract_range(p, p->curr, S_LEN(p));
|
1193
1332
|
}
|
@@ -1242,11 +1381,11 @@ strscan_inspect(VALUE self)
|
|
1242
1381
|
p = check_strscan(self);
|
1243
1382
|
if (NIL_P(p->str)) {
|
1244
1383
|
a = rb_sprintf("#<%"PRIsVALUE" (uninitialized)>", rb_obj_class(self));
|
1245
|
-
return
|
1384
|
+
return a;
|
1246
1385
|
}
|
1247
1386
|
if (EOS_P(p)) {
|
1248
1387
|
a = rb_sprintf("#<%"PRIsVALUE" fin>", rb_obj_class(self));
|
1249
|
-
return
|
1388
|
+
return a;
|
1250
1389
|
}
|
1251
1390
|
if (p->curr == 0) {
|
1252
1391
|
b = inspect2(p);
|
@@ -1254,7 +1393,7 @@ strscan_inspect(VALUE self)
|
|
1254
1393
|
rb_obj_class(self),
|
1255
1394
|
p->curr, S_LEN(p),
|
1256
1395
|
b);
|
1257
|
-
return
|
1396
|
+
return a;
|
1258
1397
|
}
|
1259
1398
|
a = inspect1(p);
|
1260
1399
|
b = inspect2(p);
|
@@ -1262,7 +1401,7 @@ strscan_inspect(VALUE self)
|
|
1262
1401
|
rb_obj_class(self),
|
1263
1402
|
p->curr, S_LEN(p),
|
1264
1403
|
a, b);
|
1265
|
-
return
|
1404
|
+
return a;
|
1266
1405
|
}
|
1267
1406
|
|
1268
1407
|
static VALUE
|
@@ -1302,6 +1441,23 @@ inspect2(struct strscanner *p)
|
|
1302
1441
|
return rb_str_dump(str);
|
1303
1442
|
}
|
1304
1443
|
|
1444
|
+
/*
|
1445
|
+
* call-seq:
|
1446
|
+
* scanner.fixed_anchor? -> true or false
|
1447
|
+
*
|
1448
|
+
* Whether +scanner+ uses fixed anchor mode or not.
|
1449
|
+
*
|
1450
|
+
* If fixed anchor mode is used, +\A+ always matches the beginning of
|
1451
|
+
* the string. Otherwise, +\A+ always matches the current position.
|
1452
|
+
*/
|
1453
|
+
static VALUE
|
1454
|
+
strscan_fixed_anchor_p(VALUE self)
|
1455
|
+
{
|
1456
|
+
struct strscanner *p;
|
1457
|
+
p = check_strscan(self);
|
1458
|
+
return p->fixed_anchor_p ? Qtrue : Qfalse;
|
1459
|
+
}
|
1460
|
+
|
1305
1461
|
/* =======================================================================
|
1306
1462
|
Ruby Interface
|
1307
1463
|
======================================================================= */
|
@@ -1378,7 +1534,7 @@ inspect2(struct strscanner *p)
|
|
1378
1534
|
*
|
1379
1535
|
* === Finding Where we Are
|
1380
1536
|
*
|
1381
|
-
* - #beginning_of_line? (
|
1537
|
+
* - #beginning_of_line? (<tt>#bol?</tt>)
|
1382
1538
|
* - #eos?
|
1383
1539
|
* - #rest?
|
1384
1540
|
* - #rest_size
|
@@ -1395,13 +1551,13 @@ inspect2(struct strscanner *p)
|
|
1395
1551
|
* - #matched
|
1396
1552
|
* - #matched?
|
1397
1553
|
* - #matched_size
|
1398
|
-
* - []
|
1554
|
+
* - <tt>#[]</tt>
|
1399
1555
|
* - #pre_match
|
1400
1556
|
* - #post_match
|
1401
1557
|
*
|
1402
1558
|
* === Miscellaneous
|
1403
1559
|
*
|
1404
|
-
* -
|
1560
|
+
* - <tt><<</tt>
|
1405
1561
|
* - #concat
|
1406
1562
|
* - #string
|
1407
1563
|
* - #string=
|
@@ -1412,6 +1568,11 @@ inspect2(struct strscanner *p)
|
|
1412
1568
|
void
|
1413
1569
|
Init_strscan(void)
|
1414
1570
|
{
|
1571
|
+
#ifdef HAVE_RB_EXT_RACTOR_SAFE
|
1572
|
+
rb_ext_ractor_safe(true);
|
1573
|
+
#endif
|
1574
|
+
|
1575
|
+
#undef rb_intern
|
1415
1576
|
ID id_scanerr = rb_intern("ScanError");
|
1416
1577
|
VALUE tmp;
|
1417
1578
|
|
@@ -1487,4 +1648,6 @@ Init_strscan(void)
|
|
1487
1648
|
rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
|
1488
1649
|
|
1489
1650
|
rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
|
1651
|
+
|
1652
|
+
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
|
1490
1653
|
}
|