strscan 1.0.0 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/strscan/strscan.c +214 -60
- metadata +19 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b623224c9ff6a9b576b776bcc212c459e7205b4151546a2f6bfb8c7aeb648e8
|
4
|
+
data.tar.gz: 0f60465fe2fc38d2b1e505b9e463181b1e264c20b4c1f6941f2f2df6b7de3371
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 44cc8212b5a7a069bb991ebeb14f608721a7b9e79720b281bd03df1d3d89cdfc95936ce9b555848141e02c79082435e88170e4c912119dd1b91ad033b9bcccae
|
7
|
+
data.tar.gz: 6b4e0c7b95409576e6ae1e1d1d39bc78155f2d010af162514bfcd6984dbd6450803f15d8827080dadfb8fdba9c9312d9dc9e59b4377db34606056783480f6eb1
|
data/ext/strscan/strscan.c
CHANGED
@@ -13,7 +13,9 @@
|
|
13
13
|
#include "ruby/encoding.h"
|
14
14
|
#include "regint.h"
|
15
15
|
|
16
|
-
#
|
16
|
+
#include <stdbool.h>
|
17
|
+
|
18
|
+
#define STRSCAN_VERSION "1.0.2"
|
17
19
|
|
18
20
|
/* =======================================================================
|
19
21
|
Data Type Definitions
|
@@ -41,6 +43,9 @@ struct strscanner
|
|
41
43
|
|
42
44
|
/* regexp used for last scan */
|
43
45
|
VALUE regex;
|
46
|
+
|
47
|
+
/* anchor mode */
|
48
|
+
bool fixed_anchor_p;
|
44
49
|
};
|
45
50
|
|
46
51
|
#define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
|
@@ -208,19 +213,41 @@ strscan_s_allocate(VALUE klass)
|
|
208
213
|
}
|
209
214
|
|
210
215
|
/*
|
211
|
-
* call-seq:
|
216
|
+
* call-seq:
|
217
|
+
* StringScanner.new(string, fixed_anchor: false)
|
218
|
+
* StringScanner.new(string, dup = false)
|
212
219
|
*
|
213
220
|
* Creates a new StringScanner object to scan over the given +string+.
|
221
|
+
*
|
222
|
+
* If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
|
223
|
+
* the string. Otherwise, +\A+ always matches the current position.
|
224
|
+
*
|
214
225
|
* +dup+ argument is obsolete and not used now.
|
215
226
|
*/
|
216
227
|
static VALUE
|
217
228
|
strscan_initialize(int argc, VALUE *argv, VALUE self)
|
218
229
|
{
|
219
230
|
struct strscanner *p;
|
220
|
-
VALUE str,
|
231
|
+
VALUE str, options;
|
221
232
|
|
222
233
|
p = check_strscan(self);
|
223
|
-
rb_scan_args(argc, argv, "11", &str, &
|
234
|
+
rb_scan_args(argc, argv, "11", &str, &options);
|
235
|
+
options = rb_check_hash_type(options);
|
236
|
+
if (!NIL_P(options)) {
|
237
|
+
VALUE fixed_anchor;
|
238
|
+
ID keyword_ids[1];
|
239
|
+
keyword_ids[0] = rb_intern("fixed_anchor");
|
240
|
+
rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
|
241
|
+
if (fixed_anchor == Qundef) {
|
242
|
+
p->fixed_anchor_p = false;
|
243
|
+
}
|
244
|
+
else {
|
245
|
+
p->fixed_anchor_p = RTEST(fixed_anchor);
|
246
|
+
}
|
247
|
+
}
|
248
|
+
else {
|
249
|
+
p->fixed_anchor_p = false;
|
250
|
+
}
|
224
251
|
StringValue(str);
|
225
252
|
p->str = str;
|
226
253
|
|
@@ -294,7 +321,7 @@ strscan_reset(VALUE self)
|
|
294
321
|
* terminate
|
295
322
|
* clear
|
296
323
|
*
|
297
|
-
*
|
324
|
+
* Sets the scan pointer to the end of the string and clear matching data.
|
298
325
|
*/
|
299
326
|
static VALUE
|
300
327
|
strscan_terminate(VALUE self)
|
@@ -425,7 +452,7 @@ strscan_get_charpos(VALUE self)
|
|
425
452
|
/*
|
426
453
|
* call-seq: pos=(n)
|
427
454
|
*
|
428
|
-
*
|
455
|
+
* Sets the byte position of the scan pointer.
|
429
456
|
*
|
430
457
|
* s = StringScanner.new('test string')
|
431
458
|
* s.pos = 7 # -> 7
|
@@ -446,16 +473,79 @@ strscan_set_pos(VALUE self, VALUE v)
|
|
446
473
|
return INT2NUM(i);
|
447
474
|
}
|
448
475
|
|
476
|
+
static inline UChar *
|
477
|
+
match_target(struct strscanner *p)
|
478
|
+
{
|
479
|
+
if (p->fixed_anchor_p) {
|
480
|
+
return (UChar *)S_PBEG(p);
|
481
|
+
}
|
482
|
+
else
|
483
|
+
{
|
484
|
+
return (UChar *)CURPTR(p);
|
485
|
+
}
|
486
|
+
}
|
487
|
+
|
488
|
+
static inline void
|
489
|
+
set_registers(struct strscanner *p, size_t length)
|
490
|
+
{
|
491
|
+
onig_region_clear(&(p->regs));
|
492
|
+
if (p->fixed_anchor_p) {
|
493
|
+
onig_region_set(&(p->regs), 0, p->curr, p->curr + length);
|
494
|
+
}
|
495
|
+
else
|
496
|
+
{
|
497
|
+
onig_region_set(&(p->regs), 0, 0, length);
|
498
|
+
}
|
499
|
+
}
|
500
|
+
|
501
|
+
static inline void
|
502
|
+
succ(struct strscanner *p)
|
503
|
+
{
|
504
|
+
if (p->fixed_anchor_p) {
|
505
|
+
p->curr = p->regs.end[0];
|
506
|
+
}
|
507
|
+
else
|
508
|
+
{
|
509
|
+
p->curr += p->regs.end[0];
|
510
|
+
}
|
511
|
+
}
|
512
|
+
|
513
|
+
static inline long
|
514
|
+
last_match_length(struct strscanner *p)
|
515
|
+
{
|
516
|
+
if (p->fixed_anchor_p) {
|
517
|
+
return p->regs.end[0] - p->prev;
|
518
|
+
}
|
519
|
+
else
|
520
|
+
{
|
521
|
+
return p->regs.end[0];
|
522
|
+
}
|
523
|
+
}
|
524
|
+
|
525
|
+
static inline long
|
526
|
+
adjust_register_position(struct strscanner *p, long position)
|
527
|
+
{
|
528
|
+
if (p->fixed_anchor_p) {
|
529
|
+
return position;
|
530
|
+
}
|
531
|
+
else {
|
532
|
+
return p->prev + position;
|
533
|
+
}
|
534
|
+
}
|
535
|
+
|
449
536
|
static VALUE
|
450
|
-
strscan_do_scan(VALUE self, VALUE
|
537
|
+
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
|
451
538
|
{
|
452
|
-
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
453
539
|
struct strscanner *p;
|
454
|
-
regex_t *re;
|
455
|
-
long ret;
|
456
|
-
int tmpreg;
|
457
540
|
|
458
|
-
|
541
|
+
if (headonly) {
|
542
|
+
if (!RB_TYPE_P(pattern, T_REGEXP)) {
|
543
|
+
StringValue(pattern);
|
544
|
+
}
|
545
|
+
}
|
546
|
+
else {
|
547
|
+
Check_Type(pattern, T_REGEXP);
|
548
|
+
}
|
459
549
|
GET_SCANNER(self, p);
|
460
550
|
|
461
551
|
CLEAR_MATCH_STATUS(p);
|
@@ -463,49 +553,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
|
|
463
553
|
return Qnil;
|
464
554
|
}
|
465
555
|
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
556
|
+
if (RB_TYPE_P(pattern, T_REGEXP)) {
|
557
|
+
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
558
|
+
regex_t *re;
|
559
|
+
long ret;
|
560
|
+
int tmpreg;
|
561
|
+
|
562
|
+
p->regex = pattern;
|
563
|
+
re = rb_reg_prepare_re(pattern, p->str);
|
564
|
+
tmpreg = re != RREGEXP_PTR(pattern);
|
565
|
+
if (!tmpreg) RREGEXP(pattern)->usecnt++;
|
566
|
+
|
567
|
+
if (headonly) {
|
568
|
+
ret = onig_match(re,
|
569
|
+
match_target(p),
|
570
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
571
|
+
(UChar* )CURPTR(p),
|
572
|
+
&(p->regs),
|
573
|
+
ONIG_OPTION_NONE);
|
574
|
+
}
|
575
|
+
else {
|
576
|
+
ret = onig_search(re,
|
577
|
+
match_target(p),
|
578
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
579
|
+
(UChar* )CURPTR(p),
|
580
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
581
|
+
&(p->regs),
|
582
|
+
ONIG_OPTION_NONE);
|
583
|
+
}
|
584
|
+
if (!tmpreg) RREGEXP(pattern)->usecnt--;
|
585
|
+
if (tmpreg) {
|
586
|
+
if (RREGEXP(pattern)->usecnt) {
|
587
|
+
onig_free(re);
|
588
|
+
}
|
589
|
+
else {
|
590
|
+
onig_free(RREGEXP_PTR(pattern));
|
591
|
+
RREGEXP_PTR(pattern) = re;
|
592
|
+
}
|
593
|
+
}
|
470
594
|
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
595
|
+
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
596
|
+
if (ret < 0) {
|
597
|
+
/* not matched */
|
598
|
+
return Qnil;
|
599
|
+
}
|
475
600
|
}
|
476
601
|
else {
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
&(p->regs), ONIG_OPTION_NONE);
|
481
|
-
}
|
482
|
-
if (!tmpreg) RREGEXP(regex)->usecnt--;
|
483
|
-
if (tmpreg) {
|
484
|
-
if (RREGEXP(regex)->usecnt) {
|
485
|
-
onig_free(re);
|
602
|
+
rb_enc_check(p->str, pattern);
|
603
|
+
if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
|
604
|
+
return Qnil;
|
486
605
|
}
|
487
|
-
|
488
|
-
|
489
|
-
RREGEXP_PTR(regex) = re;
|
606
|
+
if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
|
607
|
+
return Qnil;
|
490
608
|
}
|
491
|
-
|
492
|
-
|
493
|
-
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
494
|
-
if (ret < 0) {
|
495
|
-
/* not matched */
|
496
|
-
return Qnil;
|
609
|
+
set_registers(p, RSTRING_LEN(pattern));
|
497
610
|
}
|
498
611
|
|
499
612
|
MATCHED(p);
|
500
613
|
p->prev = p->curr;
|
614
|
+
|
501
615
|
if (succptr) {
|
502
|
-
p
|
503
|
-
}
|
504
|
-
if (getstr) {
|
505
|
-
return extract_beg_len(p, p->prev, p->regs.end[0]);
|
616
|
+
succ(p);
|
506
617
|
}
|
507
|
-
|
508
|
-
|
618
|
+
{
|
619
|
+
const long length = last_match_length(p);
|
620
|
+
if (getstr) {
|
621
|
+
return extract_beg_len(p, p->prev, length);
|
622
|
+
}
|
623
|
+
else {
|
624
|
+
return INT2FIX(length);
|
625
|
+
}
|
509
626
|
}
|
510
627
|
}
|
511
628
|
|
@@ -520,7 +637,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
|
|
520
637
|
* p s.scan(/\w+/) # -> "test"
|
521
638
|
* p s.scan(/\w+/) # -> nil
|
522
639
|
* p s.scan(/\s+/) # -> " "
|
523
|
-
* p s.scan(
|
640
|
+
* p s.scan("str") # -> "str"
|
641
|
+
* p s.scan(/\w+/) # -> "ing"
|
524
642
|
* p s.scan(/./) # -> nil
|
525
643
|
*
|
526
644
|
*/
|
@@ -539,6 +657,7 @@ strscan_scan(VALUE self, VALUE re)
|
|
539
657
|
* s = StringScanner.new('test string')
|
540
658
|
* p s.match?(/\w+/) # -> 4
|
541
659
|
* p s.match?(/\w+/) # -> 4
|
660
|
+
* p s.match?("test") # -> 4
|
542
661
|
* p s.match?(/\s+/) # -> nil
|
543
662
|
*/
|
544
663
|
static VALUE
|
@@ -560,7 +679,8 @@ strscan_match_p(VALUE self, VALUE re)
|
|
560
679
|
* p s.skip(/\w+/) # -> 4
|
561
680
|
* p s.skip(/\w+/) # -> nil
|
562
681
|
* p s.skip(/\s+/) # -> 1
|
563
|
-
* p s.skip(
|
682
|
+
* p s.skip("st") # -> 2
|
683
|
+
* p s.skip(/\w+/) # -> 4
|
564
684
|
* p s.skip(/./) # -> nil
|
565
685
|
*
|
566
686
|
*/
|
@@ -704,7 +824,12 @@ static void
|
|
704
824
|
adjust_registers_to_matched(struct strscanner *p)
|
705
825
|
{
|
706
826
|
onig_region_clear(&(p->regs));
|
707
|
-
|
827
|
+
if (p->fixed_anchor_p) {
|
828
|
+
onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
|
829
|
+
}
|
830
|
+
else {
|
831
|
+
onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
|
832
|
+
}
|
708
833
|
}
|
709
834
|
|
710
835
|
/*
|
@@ -738,8 +863,9 @@ strscan_getch(VALUE self)
|
|
738
863
|
p->curr += len;
|
739
864
|
MATCHED(p);
|
740
865
|
adjust_registers_to_matched(p);
|
741
|
-
return extract_range(p,
|
742
|
-
|
866
|
+
return extract_range(p,
|
867
|
+
adjust_register_position(p, p->regs.beg[0]),
|
868
|
+
adjust_register_position(p, p->regs.end[0]));
|
743
869
|
}
|
744
870
|
|
745
871
|
/*
|
@@ -772,8 +898,9 @@ strscan_get_byte(VALUE self)
|
|
772
898
|
p->curr++;
|
773
899
|
MATCHED(p);
|
774
900
|
adjust_registers_to_matched(p);
|
775
|
-
return extract_range(p,
|
776
|
-
|
901
|
+
return extract_range(p,
|
902
|
+
adjust_register_position(p, p->regs.beg[0]),
|
903
|
+
adjust_register_position(p, p->regs.end[0]));
|
777
904
|
}
|
778
905
|
|
779
906
|
/*
|
@@ -826,7 +953,7 @@ strscan_peep(VALUE self, VALUE vlen)
|
|
826
953
|
}
|
827
954
|
|
828
955
|
/*
|
829
|
-
*
|
956
|
+
* Sets the scan pointer to the previous position. Only one previous position is
|
830
957
|
* remembered, and it changes with each scanning operation.
|
831
958
|
*
|
832
959
|
* s = StringScanner.new('test string')
|
@@ -951,8 +1078,9 @@ strscan_matched(VALUE self)
|
|
951
1078
|
|
952
1079
|
GET_SCANNER(self, p);
|
953
1080
|
if (! MATCHED_P(p)) return Qnil;
|
954
|
-
return extract_range(p,
|
955
|
-
|
1081
|
+
return extract_range(p,
|
1082
|
+
adjust_register_position(p, p->regs.beg[0]),
|
1083
|
+
adjust_register_position(p, p->regs.end[0]));
|
956
1084
|
}
|
957
1085
|
|
958
1086
|
/*
|
@@ -1048,8 +1176,9 @@ strscan_aref(VALUE self, VALUE idx)
|
|
1048
1176
|
if (i >= p->regs.num_regs) return Qnil;
|
1049
1177
|
if (p->regs.beg[i] == -1) return Qnil;
|
1050
1178
|
|
1051
|
-
return extract_range(p,
|
1052
|
-
|
1179
|
+
return extract_range(p,
|
1180
|
+
adjust_register_position(p, p->regs.beg[i]),
|
1181
|
+
adjust_register_position(p, p->regs.end[i]));
|
1053
1182
|
}
|
1054
1183
|
|
1055
1184
|
/*
|
@@ -1098,8 +1227,9 @@ strscan_captures(VALUE self)
|
|
1098
1227
|
new_ary = rb_ary_new2(num_regs);
|
1099
1228
|
|
1100
1229
|
for (i = 1; i < num_regs; i++) {
|
1101
|
-
VALUE str = extract_range(p,
|
1102
|
-
|
1230
|
+
VALUE str = extract_range(p,
|
1231
|
+
adjust_register_position(p, p->regs.beg[i]),
|
1232
|
+
adjust_register_position(p, p->regs.end[i]));
|
1103
1233
|
rb_ary_push(new_ary, str);
|
1104
1234
|
}
|
1105
1235
|
|
@@ -1154,7 +1284,9 @@ strscan_pre_match(VALUE self)
|
|
1154
1284
|
|
1155
1285
|
GET_SCANNER(self, p);
|
1156
1286
|
if (! MATCHED_P(p)) return Qnil;
|
1157
|
-
return extract_range(p,
|
1287
|
+
return extract_range(p,
|
1288
|
+
0,
|
1289
|
+
adjust_register_position(p, p->regs.beg[0]));
|
1158
1290
|
}
|
1159
1291
|
|
1160
1292
|
/*
|
@@ -1173,7 +1305,9 @@ strscan_post_match(VALUE self)
|
|
1173
1305
|
|
1174
1306
|
GET_SCANNER(self, p);
|
1175
1307
|
if (! MATCHED_P(p)) return Qnil;
|
1176
|
-
return extract_range(p,
|
1308
|
+
return extract_range(p,
|
1309
|
+
adjust_register_position(p, p->regs.end[0]),
|
1310
|
+
S_LEN(p));
|
1177
1311
|
}
|
1178
1312
|
|
1179
1313
|
/*
|
@@ -1302,6 +1436,23 @@ inspect2(struct strscanner *p)
|
|
1302
1436
|
return rb_str_dump(str);
|
1303
1437
|
}
|
1304
1438
|
|
1439
|
+
/*
|
1440
|
+
* call-seq:
|
1441
|
+
* scanner.fixed_anchor? -> true or false
|
1442
|
+
*
|
1443
|
+
* Whether +scanner+ uses fixed anchor mode or not.
|
1444
|
+
*
|
1445
|
+
* If fixed anchor mode is used, +\A+ always matches the beginning of
|
1446
|
+
* the string. Otherwise, +\A+ always matches the current position.
|
1447
|
+
*/
|
1448
|
+
static VALUE
|
1449
|
+
strscan_fixed_anchor_p(VALUE self)
|
1450
|
+
{
|
1451
|
+
struct strscanner *p;
|
1452
|
+
p = check_strscan(self);
|
1453
|
+
return p->fixed_anchor_p ? Qtrue : Qfalse;
|
1454
|
+
}
|
1455
|
+
|
1305
1456
|
/* =======================================================================
|
1306
1457
|
Ruby Interface
|
1307
1458
|
======================================================================= */
|
@@ -1412,6 +1563,7 @@ inspect2(struct strscanner *p)
|
|
1412
1563
|
void
|
1413
1564
|
Init_strscan(void)
|
1414
1565
|
{
|
1566
|
+
#undef rb_intern
|
1415
1567
|
ID id_scanerr = rb_intern("ScanError");
|
1416
1568
|
VALUE tmp;
|
1417
1569
|
|
@@ -1487,4 +1639,6 @@ Init_strscan(void)
|
|
1487
1639
|
rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
|
1488
1640
|
|
1489
1641
|
rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
|
1642
|
+
|
1643
|
+
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
|
1490
1644
|
}
|
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: strscan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
|
+
- Sutou Kouhei
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2019-10-12 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: rake-compiler
|
@@ -24,9 +25,24 @@ dependencies:
|
|
24
25
|
- - ">="
|
25
26
|
- !ruby/object:Gem::Version
|
26
27
|
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: benchmark-driver
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
27
42
|
description: Provides lexical scanning operations on a String.
|
28
43
|
email:
|
29
44
|
-
|
45
|
+
- kou@cozmixng.org
|
30
46
|
executables: []
|
31
47
|
extensions:
|
32
48
|
- ext/strscan/extconf.rb
|
@@ -56,7 +72,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
56
72
|
version: '0'
|
57
73
|
requirements: []
|
58
74
|
rubyforge_project:
|
59
|
-
rubygems_version: 2.7.6
|
75
|
+
rubygems_version: 2.7.6.2
|
60
76
|
signing_key:
|
61
77
|
specification_version: 4
|
62
78
|
summary: Provides lexical scanning operations on a String.
|