strscan 1.0.0 → 3.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/strscan/extconf.rb +2 -1
- data/ext/strscan/strscan.c +265 -102
- metadata +7 -21
- data/ext/strscan/regenc.h +0 -254
- data/ext/strscan/regint.h +0 -938
data/ext/strscan/strscan.c
CHANGED
@@ -4,16 +4,25 @@
|
|
4
4
|
Copyright (c) 1999-2006 Minero Aoki
|
5
5
|
|
6
6
|
This program is free software.
|
7
|
-
You can
|
8
|
-
|
7
|
+
You can redistribute this program under the terms of the Ruby's or 2-clause
|
8
|
+
BSD License. For details, see the COPYING and LICENSE.txt files.
|
9
9
|
*/
|
10
10
|
|
11
11
|
#include "ruby/ruby.h"
|
12
12
|
#include "ruby/re.h"
|
13
13
|
#include "ruby/encoding.h"
|
14
|
-
#include "regint.h"
|
15
14
|
|
16
|
-
#
|
15
|
+
#ifdef RUBY_EXTCONF_H
|
16
|
+
# include RUBY_EXTCONF_H
|
17
|
+
#endif
|
18
|
+
|
19
|
+
#ifdef HAVE_ONIG_REGION_MEMSIZE
|
20
|
+
extern size_t onig_region_memsize(const struct re_registers *regs);
|
21
|
+
#endif
|
22
|
+
|
23
|
+
#include <stdbool.h>
|
24
|
+
|
25
|
+
#define STRSCAN_VERSION "3.0.1"
|
17
26
|
|
18
27
|
/* =======================================================================
|
19
28
|
Data Type Definitions
|
@@ -41,6 +50,9 @@ struct strscanner
|
|
41
50
|
|
42
51
|
/* regexp used for last scan */
|
43
52
|
VALUE regex;
|
53
|
+
|
54
|
+
/* anchor mode */
|
55
|
+
bool fixed_anchor_p;
|
44
56
|
};
|
45
57
|
|
46
58
|
#define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
|
@@ -65,7 +77,6 @@ struct strscanner
|
|
65
77
|
======================================================================= */
|
66
78
|
|
67
79
|
static inline long minl _((const long n, const long x));
|
68
|
-
static VALUE infect _((VALUE str, struct strscanner *p));
|
69
80
|
static VALUE extract_range _((struct strscanner *p, long beg_i, long end_i));
|
70
81
|
static VALUE extract_beg_len _((struct strscanner *p, long beg_i, long len));
|
71
82
|
|
@@ -126,13 +137,6 @@ static VALUE inspect2 _((struct strscanner *p));
|
|
126
137
|
Utils
|
127
138
|
======================================================================= */
|
128
139
|
|
129
|
-
static VALUE
|
130
|
-
infect(VALUE str, struct strscanner *p)
|
131
|
-
{
|
132
|
-
OBJ_INFECT(str, p->str);
|
133
|
-
return str;
|
134
|
-
}
|
135
|
-
|
136
140
|
static VALUE
|
137
141
|
str_new(struct strscanner *p, const char *ptr, long len)
|
138
142
|
{
|
@@ -152,7 +156,7 @@ extract_range(struct strscanner *p, long beg_i, long end_i)
|
|
152
156
|
{
|
153
157
|
if (beg_i > S_LEN(p)) return Qnil;
|
154
158
|
end_i = minl(end_i, S_LEN(p));
|
155
|
-
return
|
159
|
+
return str_new(p, S_PBEG(p) + beg_i, end_i - beg_i);
|
156
160
|
}
|
157
161
|
|
158
162
|
static VALUE
|
@@ -160,7 +164,7 @@ extract_beg_len(struct strscanner *p, long beg_i, long len)
|
|
160
164
|
{
|
161
165
|
if (beg_i > S_LEN(p)) return Qnil;
|
162
166
|
len = minl(len, S_LEN(p) - beg_i);
|
163
|
-
return
|
167
|
+
return str_new(p, S_PBEG(p) + beg_i, len);
|
164
168
|
}
|
165
169
|
|
166
170
|
/* =======================================================================
|
@@ -172,6 +176,7 @@ strscan_mark(void *ptr)
|
|
172
176
|
{
|
173
177
|
struct strscanner *p = ptr;
|
174
178
|
rb_gc_mark(p->str);
|
179
|
+
rb_gc_mark(p->regex);
|
175
180
|
}
|
176
181
|
|
177
182
|
static void
|
@@ -186,7 +191,11 @@ static size_t
|
|
186
191
|
strscan_memsize(const void *ptr)
|
187
192
|
{
|
188
193
|
const struct strscanner *p = ptr;
|
189
|
-
|
194
|
+
size_t size = sizeof(*p) - sizeof(p->regs);
|
195
|
+
#ifdef HAVE_ONIG_REGION_MEMSIZE
|
196
|
+
size += onig_region_memsize(&p->regs);
|
197
|
+
#endif
|
198
|
+
return size;
|
190
199
|
}
|
191
200
|
|
192
201
|
static const rb_data_type_t strscanner_type = {
|
@@ -204,23 +213,46 @@ strscan_s_allocate(VALUE klass)
|
|
204
213
|
CLEAR_MATCH_STATUS(p);
|
205
214
|
onig_region_init(&(p->regs));
|
206
215
|
p->str = Qnil;
|
216
|
+
p->regex = Qnil;
|
207
217
|
return obj;
|
208
218
|
}
|
209
219
|
|
210
220
|
/*
|
211
|
-
* call-seq:
|
221
|
+
* call-seq:
|
222
|
+
* StringScanner.new(string, fixed_anchor: false)
|
223
|
+
* StringScanner.new(string, dup = false)
|
212
224
|
*
|
213
225
|
* Creates a new StringScanner object to scan over the given +string+.
|
226
|
+
*
|
227
|
+
* If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
|
228
|
+
* the string. Otherwise, +\A+ always matches the current position.
|
229
|
+
*
|
214
230
|
* +dup+ argument is obsolete and not used now.
|
215
231
|
*/
|
216
232
|
static VALUE
|
217
233
|
strscan_initialize(int argc, VALUE *argv, VALUE self)
|
218
234
|
{
|
219
235
|
struct strscanner *p;
|
220
|
-
VALUE str,
|
236
|
+
VALUE str, options;
|
221
237
|
|
222
238
|
p = check_strscan(self);
|
223
|
-
rb_scan_args(argc, argv, "11", &str, &
|
239
|
+
rb_scan_args(argc, argv, "11", &str, &options);
|
240
|
+
options = rb_check_hash_type(options);
|
241
|
+
if (!NIL_P(options)) {
|
242
|
+
VALUE fixed_anchor;
|
243
|
+
ID keyword_ids[1];
|
244
|
+
keyword_ids[0] = rb_intern("fixed_anchor");
|
245
|
+
rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
|
246
|
+
if (fixed_anchor == Qundef) {
|
247
|
+
p->fixed_anchor_p = false;
|
248
|
+
}
|
249
|
+
else {
|
250
|
+
p->fixed_anchor_p = RTEST(fixed_anchor);
|
251
|
+
}
|
252
|
+
}
|
253
|
+
else {
|
254
|
+
p->fixed_anchor_p = false;
|
255
|
+
}
|
224
256
|
StringValue(str);
|
225
257
|
p->str = str;
|
226
258
|
|
@@ -294,7 +326,7 @@ strscan_reset(VALUE self)
|
|
294
326
|
* terminate
|
295
327
|
* clear
|
296
328
|
*
|
297
|
-
*
|
329
|
+
* Sets the scan pointer to the end of the string and clear matching data.
|
298
330
|
*/
|
299
331
|
static VALUE
|
300
332
|
strscan_terminate(VALUE self)
|
@@ -413,19 +445,16 @@ static VALUE
|
|
413
445
|
strscan_get_charpos(VALUE self)
|
414
446
|
{
|
415
447
|
struct strscanner *p;
|
416
|
-
VALUE substr;
|
417
448
|
|
418
449
|
GET_SCANNER(self, p);
|
419
450
|
|
420
|
-
|
421
|
-
|
422
|
-
return rb_str_length(substr);
|
451
|
+
return LONG2NUM(rb_enc_strlen(S_PBEG(p), CURPTR(p), rb_enc_get(p->str)));
|
423
452
|
}
|
424
453
|
|
425
454
|
/*
|
426
455
|
* call-seq: pos=(n)
|
427
456
|
*
|
428
|
-
*
|
457
|
+
* Sets the byte position of the scan pointer.
|
429
458
|
*
|
430
459
|
* s = StringScanner.new('test string')
|
431
460
|
* s.pos = 7 # -> 7
|
@@ -443,19 +472,86 @@ strscan_set_pos(VALUE self, VALUE v)
|
|
443
472
|
if (i < 0) rb_raise(rb_eRangeError, "index out of range");
|
444
473
|
if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range");
|
445
474
|
p->curr = i;
|
446
|
-
return
|
475
|
+
return LONG2NUM(i);
|
476
|
+
}
|
477
|
+
|
478
|
+
static inline UChar *
|
479
|
+
match_target(struct strscanner *p)
|
480
|
+
{
|
481
|
+
if (p->fixed_anchor_p) {
|
482
|
+
return (UChar *)S_PBEG(p);
|
483
|
+
}
|
484
|
+
else
|
485
|
+
{
|
486
|
+
return (UChar *)CURPTR(p);
|
487
|
+
}
|
488
|
+
}
|
489
|
+
|
490
|
+
static inline void
|
491
|
+
set_registers(struct strscanner *p, size_t length)
|
492
|
+
{
|
493
|
+
const int at = 0;
|
494
|
+
OnigRegion *regs = &(p->regs);
|
495
|
+
onig_region_clear(regs);
|
496
|
+
if (onig_region_set(regs, at, 0, 0)) return;
|
497
|
+
if (p->fixed_anchor_p) {
|
498
|
+
regs->beg[at] = p->curr;
|
499
|
+
regs->end[at] = p->curr + length;
|
500
|
+
}
|
501
|
+
else
|
502
|
+
{
|
503
|
+
regs->end[at] = length;
|
504
|
+
}
|
505
|
+
}
|
506
|
+
|
507
|
+
static inline void
|
508
|
+
succ(struct strscanner *p)
|
509
|
+
{
|
510
|
+
if (p->fixed_anchor_p) {
|
511
|
+
p->curr = p->regs.end[0];
|
512
|
+
}
|
513
|
+
else
|
514
|
+
{
|
515
|
+
p->curr += p->regs.end[0];
|
516
|
+
}
|
517
|
+
}
|
518
|
+
|
519
|
+
static inline long
|
520
|
+
last_match_length(struct strscanner *p)
|
521
|
+
{
|
522
|
+
if (p->fixed_anchor_p) {
|
523
|
+
return p->regs.end[0] - p->prev;
|
524
|
+
}
|
525
|
+
else
|
526
|
+
{
|
527
|
+
return p->regs.end[0];
|
528
|
+
}
|
529
|
+
}
|
530
|
+
|
531
|
+
static inline long
|
532
|
+
adjust_register_position(struct strscanner *p, long position)
|
533
|
+
{
|
534
|
+
if (p->fixed_anchor_p) {
|
535
|
+
return position;
|
536
|
+
}
|
537
|
+
else {
|
538
|
+
return p->prev + position;
|
539
|
+
}
|
447
540
|
}
|
448
541
|
|
449
542
|
static VALUE
|
450
|
-
strscan_do_scan(VALUE self, VALUE
|
543
|
+
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
|
451
544
|
{
|
452
|
-
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
453
545
|
struct strscanner *p;
|
454
|
-
regex_t *re;
|
455
|
-
long ret;
|
456
|
-
int tmpreg;
|
457
546
|
|
458
|
-
|
547
|
+
if (headonly) {
|
548
|
+
if (!RB_TYPE_P(pattern, T_REGEXP)) {
|
549
|
+
StringValue(pattern);
|
550
|
+
}
|
551
|
+
}
|
552
|
+
else {
|
553
|
+
Check_Type(pattern, T_REGEXP);
|
554
|
+
}
|
459
555
|
GET_SCANNER(self, p);
|
460
556
|
|
461
557
|
CLEAR_MATCH_STATUS(p);
|
@@ -463,49 +559,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
|
|
463
559
|
return Qnil;
|
464
560
|
}
|
465
561
|
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
562
|
+
if (RB_TYPE_P(pattern, T_REGEXP)) {
|
563
|
+
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
564
|
+
regex_t *re;
|
565
|
+
long ret;
|
566
|
+
int tmpreg;
|
567
|
+
|
568
|
+
p->regex = pattern;
|
569
|
+
re = rb_reg_prepare_re(pattern, p->str);
|
570
|
+
tmpreg = re != RREGEXP_PTR(pattern);
|
571
|
+
if (!tmpreg) RREGEXP(pattern)->usecnt++;
|
572
|
+
|
573
|
+
if (headonly) {
|
574
|
+
ret = onig_match(re,
|
575
|
+
match_target(p),
|
576
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
577
|
+
(UChar* )CURPTR(p),
|
578
|
+
&(p->regs),
|
579
|
+
ONIG_OPTION_NONE);
|
580
|
+
}
|
581
|
+
else {
|
582
|
+
ret = onig_search(re,
|
583
|
+
match_target(p),
|
584
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
585
|
+
(UChar* )CURPTR(p),
|
586
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
587
|
+
&(p->regs),
|
588
|
+
ONIG_OPTION_NONE);
|
589
|
+
}
|
590
|
+
if (!tmpreg) RREGEXP(pattern)->usecnt--;
|
591
|
+
if (tmpreg) {
|
592
|
+
if (RREGEXP(pattern)->usecnt) {
|
593
|
+
onig_free(re);
|
594
|
+
}
|
595
|
+
else {
|
596
|
+
onig_free(RREGEXP_PTR(pattern));
|
597
|
+
RREGEXP_PTR(pattern) = re;
|
598
|
+
}
|
599
|
+
}
|
470
600
|
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
601
|
+
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
602
|
+
if (ret < 0) {
|
603
|
+
/* not matched */
|
604
|
+
return Qnil;
|
605
|
+
}
|
475
606
|
}
|
476
607
|
else {
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
&(p->regs), ONIG_OPTION_NONE);
|
481
|
-
}
|
482
|
-
if (!tmpreg) RREGEXP(regex)->usecnt--;
|
483
|
-
if (tmpreg) {
|
484
|
-
if (RREGEXP(regex)->usecnt) {
|
485
|
-
onig_free(re);
|
608
|
+
rb_enc_check(p->str, pattern);
|
609
|
+
if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
|
610
|
+
return Qnil;
|
486
611
|
}
|
487
|
-
|
488
|
-
|
489
|
-
RREGEXP_PTR(regex) = re;
|
612
|
+
if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
|
613
|
+
return Qnil;
|
490
614
|
}
|
491
|
-
|
492
|
-
|
493
|
-
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
494
|
-
if (ret < 0) {
|
495
|
-
/* not matched */
|
496
|
-
return Qnil;
|
615
|
+
set_registers(p, RSTRING_LEN(pattern));
|
497
616
|
}
|
498
617
|
|
499
618
|
MATCHED(p);
|
500
619
|
p->prev = p->curr;
|
620
|
+
|
501
621
|
if (succptr) {
|
502
|
-
p
|
622
|
+
succ(p);
|
503
623
|
}
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
624
|
+
{
|
625
|
+
const long length = last_match_length(p);
|
626
|
+
if (getstr) {
|
627
|
+
return extract_beg_len(p, p->prev, length);
|
628
|
+
}
|
629
|
+
else {
|
630
|
+
return INT2FIX(length);
|
631
|
+
}
|
509
632
|
}
|
510
633
|
}
|
511
634
|
|
@@ -520,7 +643,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
|
|
520
643
|
* p s.scan(/\w+/) # -> "test"
|
521
644
|
* p s.scan(/\w+/) # -> nil
|
522
645
|
* p s.scan(/\s+/) # -> " "
|
523
|
-
* p s.scan(
|
646
|
+
* p s.scan("str") # -> "str"
|
647
|
+
* p s.scan(/\w+/) # -> "ing"
|
524
648
|
* p s.scan(/./) # -> nil
|
525
649
|
*
|
526
650
|
*/
|
@@ -539,6 +663,7 @@ strscan_scan(VALUE self, VALUE re)
|
|
539
663
|
* s = StringScanner.new('test string')
|
540
664
|
* p s.match?(/\w+/) # -> 4
|
541
665
|
* p s.match?(/\w+/) # -> 4
|
666
|
+
* p s.match?("test") # -> 4
|
542
667
|
* p s.match?(/\s+/) # -> nil
|
543
668
|
*/
|
544
669
|
static VALUE
|
@@ -560,7 +685,8 @@ strscan_match_p(VALUE self, VALUE re)
|
|
560
685
|
* p s.skip(/\w+/) # -> 4
|
561
686
|
* p s.skip(/\w+/) # -> nil
|
562
687
|
* p s.skip(/\s+/) # -> 1
|
563
|
-
* p s.skip(
|
688
|
+
* p s.skip("st") # -> 2
|
689
|
+
* p s.skip(/\w+/) # -> 4
|
564
690
|
* p s.skip(/./) # -> nil
|
565
691
|
*
|
566
692
|
*/
|
@@ -704,7 +830,12 @@ static void
|
|
704
830
|
adjust_registers_to_matched(struct strscanner *p)
|
705
831
|
{
|
706
832
|
onig_region_clear(&(p->regs));
|
707
|
-
|
833
|
+
if (p->fixed_anchor_p) {
|
834
|
+
onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
|
835
|
+
}
|
836
|
+
else {
|
837
|
+
onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
|
838
|
+
}
|
708
839
|
}
|
709
840
|
|
710
841
|
/*
|
@@ -716,9 +847,8 @@ adjust_registers_to_matched(struct strscanner *p)
|
|
716
847
|
* s.getch # => "b"
|
717
848
|
* s.getch # => nil
|
718
849
|
*
|
719
|
-
*
|
720
|
-
* s
|
721
|
-
* s.getch # => "\244\242" # Japanese hira-kana "A" in EUC-JP
|
850
|
+
* s = StringScanner.new("\244\242".force_encoding("euc-jp"))
|
851
|
+
* s.getch # => "\x{A4A2}" # Japanese hira-kana "A" in EUC-JP
|
722
852
|
* s.getch # => nil
|
723
853
|
*/
|
724
854
|
static VALUE
|
@@ -738,8 +868,9 @@ strscan_getch(VALUE self)
|
|
738
868
|
p->curr += len;
|
739
869
|
MATCHED(p);
|
740
870
|
adjust_registers_to_matched(p);
|
741
|
-
return extract_range(p,
|
742
|
-
|
871
|
+
return extract_range(p,
|
872
|
+
adjust_register_position(p, p->regs.beg[0]),
|
873
|
+
adjust_register_position(p, p->regs.end[0]));
|
743
874
|
}
|
744
875
|
|
745
876
|
/*
|
@@ -752,10 +883,9 @@ strscan_getch(VALUE self)
|
|
752
883
|
* s.get_byte # => "b"
|
753
884
|
* s.get_byte # => nil
|
754
885
|
*
|
755
|
-
*
|
756
|
-
* s
|
757
|
-
* s.get_byte # => "\
|
758
|
-
* s.get_byte # => "\242"
|
886
|
+
* s = StringScanner.new("\244\242".force_encoding("euc-jp"))
|
887
|
+
* s.get_byte # => "\xA4"
|
888
|
+
* s.get_byte # => "\xA2"
|
759
889
|
* s.get_byte # => nil
|
760
890
|
*/
|
761
891
|
static VALUE
|
@@ -772,8 +902,9 @@ strscan_get_byte(VALUE self)
|
|
772
902
|
p->curr++;
|
773
903
|
MATCHED(p);
|
774
904
|
adjust_registers_to_matched(p);
|
775
|
-
return extract_range(p,
|
776
|
-
|
905
|
+
return extract_range(p,
|
906
|
+
adjust_register_position(p, p->regs.beg[0]),
|
907
|
+
adjust_register_position(p, p->regs.end[0]));
|
777
908
|
}
|
778
909
|
|
779
910
|
/*
|
@@ -808,7 +939,7 @@ strscan_peek(VALUE self, VALUE vlen)
|
|
808
939
|
|
809
940
|
len = NUM2LONG(vlen);
|
810
941
|
if (EOS_P(p))
|
811
|
-
return
|
942
|
+
return str_new(p, "", 0);
|
812
943
|
|
813
944
|
len = minl(len, S_RESTLEN(p));
|
814
945
|
return extract_beg_len(p, p->curr, len);
|
@@ -826,7 +957,7 @@ strscan_peep(VALUE self, VALUE vlen)
|
|
826
957
|
}
|
827
958
|
|
828
959
|
/*
|
829
|
-
*
|
960
|
+
* Sets the scan pointer to the previous position. Only one previous position is
|
830
961
|
* remembered, and it changes with each scanning operation.
|
831
962
|
*
|
832
963
|
* s = StringScanner.new('test string')
|
@@ -850,7 +981,7 @@ strscan_unscan(VALUE self)
|
|
850
981
|
}
|
851
982
|
|
852
983
|
/*
|
853
|
-
* Returns +true+
|
984
|
+
* Returns +true+ if and only if the scan pointer is at the beginning of the line.
|
854
985
|
*
|
855
986
|
* s = StringScanner.new("test\ntest\n")
|
856
987
|
* s.bol? # => true
|
@@ -903,7 +1034,7 @@ strscan_empty_p(VALUE self)
|
|
903
1034
|
}
|
904
1035
|
|
905
1036
|
/*
|
906
|
-
* Returns true
|
1037
|
+
* Returns true if and only if there is more data in the string. See #eos?.
|
907
1038
|
* This method is obsolete; use #eos? instead.
|
908
1039
|
*
|
909
1040
|
* s = StringScanner.new('test string')
|
@@ -920,7 +1051,7 @@ strscan_rest_p(VALUE self)
|
|
920
1051
|
}
|
921
1052
|
|
922
1053
|
/*
|
923
|
-
* Returns +true+
|
1054
|
+
* Returns +true+ if and only if the last match was successful.
|
924
1055
|
*
|
925
1056
|
* s = StringScanner.new('test string')
|
926
1057
|
* s.match?(/\w+/) # => 4
|
@@ -951,13 +1082,15 @@ strscan_matched(VALUE self)
|
|
951
1082
|
|
952
1083
|
GET_SCANNER(self, p);
|
953
1084
|
if (! MATCHED_P(p)) return Qnil;
|
954
|
-
return extract_range(p,
|
955
|
-
|
1085
|
+
return extract_range(p,
|
1086
|
+
adjust_register_position(p, p->regs.beg[0]),
|
1087
|
+
adjust_register_position(p, p->regs.end[0]));
|
956
1088
|
}
|
957
1089
|
|
958
1090
|
/*
|
959
|
-
* Returns the size of the most recent match
|
960
|
-
* was no recent match.
|
1091
|
+
* Returns the size of the most recent match in bytes, or +nil+ if there
|
1092
|
+
* was no recent match. This is different than <tt>matched.size</tt>,
|
1093
|
+
* which will return the size in characters.
|
961
1094
|
*
|
962
1095
|
* s = StringScanner.new('test string')
|
963
1096
|
* s.check /\w+/ # -> "test"
|
@@ -972,7 +1105,7 @@ strscan_matched_size(VALUE self)
|
|
972
1105
|
|
973
1106
|
GET_SCANNER(self, p);
|
974
1107
|
if (! MATCHED_P(p)) return Qnil;
|
975
|
-
return
|
1108
|
+
return LONG2NUM(p->regs.end[0] - p->regs.beg[0]);
|
976
1109
|
}
|
977
1110
|
|
978
1111
|
static int
|
@@ -1034,7 +1167,7 @@ strscan_aref(VALUE self, VALUE idx)
|
|
1034
1167
|
idx = rb_sym2str(idx);
|
1035
1168
|
/* fall through */
|
1036
1169
|
case T_STRING:
|
1037
|
-
if (!p->regex) return Qnil;
|
1170
|
+
if (!RTEST(p->regex)) return Qnil;
|
1038
1171
|
RSTRING_GETMEM(idx, name, i);
|
1039
1172
|
i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx));
|
1040
1173
|
break;
|
@@ -1048,8 +1181,9 @@ strscan_aref(VALUE self, VALUE idx)
|
|
1048
1181
|
if (i >= p->regs.num_regs) return Qnil;
|
1049
1182
|
if (p->regs.beg[i] == -1) return Qnil;
|
1050
1183
|
|
1051
|
-
return extract_range(p,
|
1052
|
-
|
1184
|
+
return extract_range(p,
|
1185
|
+
adjust_register_position(p, p->regs.beg[i]),
|
1186
|
+
adjust_register_position(p, p->regs.end[i]));
|
1053
1187
|
}
|
1054
1188
|
|
1055
1189
|
/*
|
@@ -1098,8 +1232,9 @@ strscan_captures(VALUE self)
|
|
1098
1232
|
new_ary = rb_ary_new2(num_regs);
|
1099
1233
|
|
1100
1234
|
for (i = 1; i < num_regs; i++) {
|
1101
|
-
VALUE str = extract_range(p,
|
1102
|
-
|
1235
|
+
VALUE str = extract_range(p,
|
1236
|
+
adjust_register_position(p, p->regs.beg[i]),
|
1237
|
+
adjust_register_position(p, p->regs.end[i]));
|
1103
1238
|
rb_ary_push(new_ary, str);
|
1104
1239
|
}
|
1105
1240
|
|
@@ -1154,7 +1289,9 @@ strscan_pre_match(VALUE self)
|
|
1154
1289
|
|
1155
1290
|
GET_SCANNER(self, p);
|
1156
1291
|
if (! MATCHED_P(p)) return Qnil;
|
1157
|
-
return extract_range(p,
|
1292
|
+
return extract_range(p,
|
1293
|
+
0,
|
1294
|
+
adjust_register_position(p, p->regs.beg[0]));
|
1158
1295
|
}
|
1159
1296
|
|
1160
1297
|
/*
|
@@ -1173,7 +1310,9 @@ strscan_post_match(VALUE self)
|
|
1173
1310
|
|
1174
1311
|
GET_SCANNER(self, p);
|
1175
1312
|
if (! MATCHED_P(p)) return Qnil;
|
1176
|
-
return extract_range(p,
|
1313
|
+
return extract_range(p,
|
1314
|
+
adjust_register_position(p, p->regs.end[0]),
|
1315
|
+
S_LEN(p));
|
1177
1316
|
}
|
1178
1317
|
|
1179
1318
|
/*
|
@@ -1187,7 +1326,7 @@ strscan_rest(VALUE self)
|
|
1187
1326
|
|
1188
1327
|
GET_SCANNER(self, p);
|
1189
1328
|
if (EOS_P(p)) {
|
1190
|
-
return
|
1329
|
+
return str_new(p, "", 0);
|
1191
1330
|
}
|
1192
1331
|
return extract_range(p, p->curr, S_LEN(p));
|
1193
1332
|
}
|
@@ -1242,11 +1381,11 @@ strscan_inspect(VALUE self)
|
|
1242
1381
|
p = check_strscan(self);
|
1243
1382
|
if (NIL_P(p->str)) {
|
1244
1383
|
a = rb_sprintf("#<%"PRIsVALUE" (uninitialized)>", rb_obj_class(self));
|
1245
|
-
return
|
1384
|
+
return a;
|
1246
1385
|
}
|
1247
1386
|
if (EOS_P(p)) {
|
1248
1387
|
a = rb_sprintf("#<%"PRIsVALUE" fin>", rb_obj_class(self));
|
1249
|
-
return
|
1388
|
+
return a;
|
1250
1389
|
}
|
1251
1390
|
if (p->curr == 0) {
|
1252
1391
|
b = inspect2(p);
|
@@ -1254,7 +1393,7 @@ strscan_inspect(VALUE self)
|
|
1254
1393
|
rb_obj_class(self),
|
1255
1394
|
p->curr, S_LEN(p),
|
1256
1395
|
b);
|
1257
|
-
return
|
1396
|
+
return a;
|
1258
1397
|
}
|
1259
1398
|
a = inspect1(p);
|
1260
1399
|
b = inspect2(p);
|
@@ -1262,7 +1401,7 @@ strscan_inspect(VALUE self)
|
|
1262
1401
|
rb_obj_class(self),
|
1263
1402
|
p->curr, S_LEN(p),
|
1264
1403
|
a, b);
|
1265
|
-
return
|
1404
|
+
return a;
|
1266
1405
|
}
|
1267
1406
|
|
1268
1407
|
static VALUE
|
@@ -1302,6 +1441,23 @@ inspect2(struct strscanner *p)
|
|
1302
1441
|
return rb_str_dump(str);
|
1303
1442
|
}
|
1304
1443
|
|
1444
|
+
/*
|
1445
|
+
* call-seq:
|
1446
|
+
* scanner.fixed_anchor? -> true or false
|
1447
|
+
*
|
1448
|
+
* Whether +scanner+ uses fixed anchor mode or not.
|
1449
|
+
*
|
1450
|
+
* If fixed anchor mode is used, +\A+ always matches the beginning of
|
1451
|
+
* the string. Otherwise, +\A+ always matches the current position.
|
1452
|
+
*/
|
1453
|
+
static VALUE
|
1454
|
+
strscan_fixed_anchor_p(VALUE self)
|
1455
|
+
{
|
1456
|
+
struct strscanner *p;
|
1457
|
+
p = check_strscan(self);
|
1458
|
+
return p->fixed_anchor_p ? Qtrue : Qfalse;
|
1459
|
+
}
|
1460
|
+
|
1305
1461
|
/* =======================================================================
|
1306
1462
|
Ruby Interface
|
1307
1463
|
======================================================================= */
|
@@ -1378,7 +1534,7 @@ inspect2(struct strscanner *p)
|
|
1378
1534
|
*
|
1379
1535
|
* === Finding Where we Are
|
1380
1536
|
*
|
1381
|
-
* - #beginning_of_line? (
|
1537
|
+
* - #beginning_of_line? (<tt>#bol?</tt>)
|
1382
1538
|
* - #eos?
|
1383
1539
|
* - #rest?
|
1384
1540
|
* - #rest_size
|
@@ -1395,13 +1551,13 @@ inspect2(struct strscanner *p)
|
|
1395
1551
|
* - #matched
|
1396
1552
|
* - #matched?
|
1397
1553
|
* - #matched_size
|
1398
|
-
* - []
|
1554
|
+
* - <tt>#[]</tt>
|
1399
1555
|
* - #pre_match
|
1400
1556
|
* - #post_match
|
1401
1557
|
*
|
1402
1558
|
* === Miscellaneous
|
1403
1559
|
*
|
1404
|
-
* -
|
1560
|
+
* - <tt><<</tt>
|
1405
1561
|
* - #concat
|
1406
1562
|
* - #string
|
1407
1563
|
* - #string=
|
@@ -1412,6 +1568,11 @@ inspect2(struct strscanner *p)
|
|
1412
1568
|
void
|
1413
1569
|
Init_strscan(void)
|
1414
1570
|
{
|
1571
|
+
#ifdef HAVE_RB_EXT_RACTOR_SAFE
|
1572
|
+
rb_ext_ractor_safe(true);
|
1573
|
+
#endif
|
1574
|
+
|
1575
|
+
#undef rb_intern
|
1415
1576
|
ID id_scanerr = rb_intern("ScanError");
|
1416
1577
|
VALUE tmp;
|
1417
1578
|
|
@@ -1487,4 +1648,6 @@ Init_strscan(void)
|
|
1487
1648
|
rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
|
1488
1649
|
|
1489
1650
|
rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
|
1651
|
+
|
1652
|
+
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
|
1490
1653
|
}
|