strscan 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/strscan/strscan.c +214 -60
- metadata +19 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b623224c9ff6a9b576b776bcc212c459e7205b4151546a2f6bfb8c7aeb648e8
|
4
|
+
data.tar.gz: 0f60465fe2fc38d2b1e505b9e463181b1e264c20b4c1f6941f2f2df6b7de3371
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 44cc8212b5a7a069bb991ebeb14f608721a7b9e79720b281bd03df1d3d89cdfc95936ce9b555848141e02c79082435e88170e4c912119dd1b91ad033b9bcccae
|
7
|
+
data.tar.gz: 6b4e0c7b95409576e6ae1e1d1d39bc78155f2d010af162514bfcd6984dbd6450803f15d8827080dadfb8fdba9c9312d9dc9e59b4377db34606056783480f6eb1
|
data/ext/strscan/strscan.c
CHANGED
@@ -13,7 +13,9 @@
|
|
13
13
|
#include "ruby/encoding.h"
|
14
14
|
#include "regint.h"
|
15
15
|
|
16
|
-
#
|
16
|
+
#include <stdbool.h>
|
17
|
+
|
18
|
+
#define STRSCAN_VERSION "1.0.2"
|
17
19
|
|
18
20
|
/* =======================================================================
|
19
21
|
Data Type Definitions
|
@@ -41,6 +43,9 @@ struct strscanner
|
|
41
43
|
|
42
44
|
/* regexp used for last scan */
|
43
45
|
VALUE regex;
|
46
|
+
|
47
|
+
/* anchor mode */
|
48
|
+
bool fixed_anchor_p;
|
44
49
|
};
|
45
50
|
|
46
51
|
#define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
|
@@ -208,19 +213,41 @@ strscan_s_allocate(VALUE klass)
|
|
208
213
|
}
|
209
214
|
|
210
215
|
/*
|
211
|
-
* call-seq:
|
216
|
+
* call-seq:
|
217
|
+
* StringScanner.new(string, fixed_anchor: false)
|
218
|
+
* StringScanner.new(string, dup = false)
|
212
219
|
*
|
213
220
|
* Creates a new StringScanner object to scan over the given +string+.
|
221
|
+
*
|
222
|
+
* If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
|
223
|
+
* the string. Otherwise, +\A+ always matches the current position.
|
224
|
+
*
|
214
225
|
* +dup+ argument is obsolete and not used now.
|
215
226
|
*/
|
216
227
|
static VALUE
|
217
228
|
strscan_initialize(int argc, VALUE *argv, VALUE self)
|
218
229
|
{
|
219
230
|
struct strscanner *p;
|
220
|
-
VALUE str,
|
231
|
+
VALUE str, options;
|
221
232
|
|
222
233
|
p = check_strscan(self);
|
223
|
-
rb_scan_args(argc, argv, "11", &str, &
|
234
|
+
rb_scan_args(argc, argv, "11", &str, &options);
|
235
|
+
options = rb_check_hash_type(options);
|
236
|
+
if (!NIL_P(options)) {
|
237
|
+
VALUE fixed_anchor;
|
238
|
+
ID keyword_ids[1];
|
239
|
+
keyword_ids[0] = rb_intern("fixed_anchor");
|
240
|
+
rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
|
241
|
+
if (fixed_anchor == Qundef) {
|
242
|
+
p->fixed_anchor_p = false;
|
243
|
+
}
|
244
|
+
else {
|
245
|
+
p->fixed_anchor_p = RTEST(fixed_anchor);
|
246
|
+
}
|
247
|
+
}
|
248
|
+
else {
|
249
|
+
p->fixed_anchor_p = false;
|
250
|
+
}
|
224
251
|
StringValue(str);
|
225
252
|
p->str = str;
|
226
253
|
|
@@ -294,7 +321,7 @@ strscan_reset(VALUE self)
|
|
294
321
|
* terminate
|
295
322
|
* clear
|
296
323
|
*
|
297
|
-
*
|
324
|
+
* Sets the scan pointer to the end of the string and clear matching data.
|
298
325
|
*/
|
299
326
|
static VALUE
|
300
327
|
strscan_terminate(VALUE self)
|
@@ -425,7 +452,7 @@ strscan_get_charpos(VALUE self)
|
|
425
452
|
/*
|
426
453
|
* call-seq: pos=(n)
|
427
454
|
*
|
428
|
-
*
|
455
|
+
* Sets the byte position of the scan pointer.
|
429
456
|
*
|
430
457
|
* s = StringScanner.new('test string')
|
431
458
|
* s.pos = 7 # -> 7
|
@@ -446,16 +473,79 @@ strscan_set_pos(VALUE self, VALUE v)
|
|
446
473
|
return INT2NUM(i);
|
447
474
|
}
|
448
475
|
|
476
|
+
static inline UChar *
|
477
|
+
match_target(struct strscanner *p)
|
478
|
+
{
|
479
|
+
if (p->fixed_anchor_p) {
|
480
|
+
return (UChar *)S_PBEG(p);
|
481
|
+
}
|
482
|
+
else
|
483
|
+
{
|
484
|
+
return (UChar *)CURPTR(p);
|
485
|
+
}
|
486
|
+
}
|
487
|
+
|
488
|
+
static inline void
|
489
|
+
set_registers(struct strscanner *p, size_t length)
|
490
|
+
{
|
491
|
+
onig_region_clear(&(p->regs));
|
492
|
+
if (p->fixed_anchor_p) {
|
493
|
+
onig_region_set(&(p->regs), 0, p->curr, p->curr + length);
|
494
|
+
}
|
495
|
+
else
|
496
|
+
{
|
497
|
+
onig_region_set(&(p->regs), 0, 0, length);
|
498
|
+
}
|
499
|
+
}
|
500
|
+
|
501
|
+
static inline void
|
502
|
+
succ(struct strscanner *p)
|
503
|
+
{
|
504
|
+
if (p->fixed_anchor_p) {
|
505
|
+
p->curr = p->regs.end[0];
|
506
|
+
}
|
507
|
+
else
|
508
|
+
{
|
509
|
+
p->curr += p->regs.end[0];
|
510
|
+
}
|
511
|
+
}
|
512
|
+
|
513
|
+
static inline long
|
514
|
+
last_match_length(struct strscanner *p)
|
515
|
+
{
|
516
|
+
if (p->fixed_anchor_p) {
|
517
|
+
return p->regs.end[0] - p->prev;
|
518
|
+
}
|
519
|
+
else
|
520
|
+
{
|
521
|
+
return p->regs.end[0];
|
522
|
+
}
|
523
|
+
}
|
524
|
+
|
525
|
+
static inline long
|
526
|
+
adjust_register_position(struct strscanner *p, long position)
|
527
|
+
{
|
528
|
+
if (p->fixed_anchor_p) {
|
529
|
+
return position;
|
530
|
+
}
|
531
|
+
else {
|
532
|
+
return p->prev + position;
|
533
|
+
}
|
534
|
+
}
|
535
|
+
|
449
536
|
static VALUE
|
450
|
-
strscan_do_scan(VALUE self, VALUE
|
537
|
+
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
|
451
538
|
{
|
452
|
-
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
453
539
|
struct strscanner *p;
|
454
|
-
regex_t *re;
|
455
|
-
long ret;
|
456
|
-
int tmpreg;
|
457
540
|
|
458
|
-
|
541
|
+
if (headonly) {
|
542
|
+
if (!RB_TYPE_P(pattern, T_REGEXP)) {
|
543
|
+
StringValue(pattern);
|
544
|
+
}
|
545
|
+
}
|
546
|
+
else {
|
547
|
+
Check_Type(pattern, T_REGEXP);
|
548
|
+
}
|
459
549
|
GET_SCANNER(self, p);
|
460
550
|
|
461
551
|
CLEAR_MATCH_STATUS(p);
|
@@ -463,49 +553,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
|
|
463
553
|
return Qnil;
|
464
554
|
}
|
465
555
|
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
556
|
+
if (RB_TYPE_P(pattern, T_REGEXP)) {
|
557
|
+
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
558
|
+
regex_t *re;
|
559
|
+
long ret;
|
560
|
+
int tmpreg;
|
561
|
+
|
562
|
+
p->regex = pattern;
|
563
|
+
re = rb_reg_prepare_re(pattern, p->str);
|
564
|
+
tmpreg = re != RREGEXP_PTR(pattern);
|
565
|
+
if (!tmpreg) RREGEXP(pattern)->usecnt++;
|
566
|
+
|
567
|
+
if (headonly) {
|
568
|
+
ret = onig_match(re,
|
569
|
+
match_target(p),
|
570
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
571
|
+
(UChar* )CURPTR(p),
|
572
|
+
&(p->regs),
|
573
|
+
ONIG_OPTION_NONE);
|
574
|
+
}
|
575
|
+
else {
|
576
|
+
ret = onig_search(re,
|
577
|
+
match_target(p),
|
578
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
579
|
+
(UChar* )CURPTR(p),
|
580
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
581
|
+
&(p->regs),
|
582
|
+
ONIG_OPTION_NONE);
|
583
|
+
}
|
584
|
+
if (!tmpreg) RREGEXP(pattern)->usecnt--;
|
585
|
+
if (tmpreg) {
|
586
|
+
if (RREGEXP(pattern)->usecnt) {
|
587
|
+
onig_free(re);
|
588
|
+
}
|
589
|
+
else {
|
590
|
+
onig_free(RREGEXP_PTR(pattern));
|
591
|
+
RREGEXP_PTR(pattern) = re;
|
592
|
+
}
|
593
|
+
}
|
470
594
|
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
595
|
+
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
596
|
+
if (ret < 0) {
|
597
|
+
/* not matched */
|
598
|
+
return Qnil;
|
599
|
+
}
|
475
600
|
}
|
476
601
|
else {
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
&(p->regs), ONIG_OPTION_NONE);
|
481
|
-
}
|
482
|
-
if (!tmpreg) RREGEXP(regex)->usecnt--;
|
483
|
-
if (tmpreg) {
|
484
|
-
if (RREGEXP(regex)->usecnt) {
|
485
|
-
onig_free(re);
|
602
|
+
rb_enc_check(p->str, pattern);
|
603
|
+
if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
|
604
|
+
return Qnil;
|
486
605
|
}
|
487
|
-
|
488
|
-
|
489
|
-
RREGEXP_PTR(regex) = re;
|
606
|
+
if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
|
607
|
+
return Qnil;
|
490
608
|
}
|
491
|
-
|
492
|
-
|
493
|
-
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
494
|
-
if (ret < 0) {
|
495
|
-
/* not matched */
|
496
|
-
return Qnil;
|
609
|
+
set_registers(p, RSTRING_LEN(pattern));
|
497
610
|
}
|
498
611
|
|
499
612
|
MATCHED(p);
|
500
613
|
p->prev = p->curr;
|
614
|
+
|
501
615
|
if (succptr) {
|
502
|
-
p
|
503
|
-
}
|
504
|
-
if (getstr) {
|
505
|
-
return extract_beg_len(p, p->prev, p->regs.end[0]);
|
616
|
+
succ(p);
|
506
617
|
}
|
507
|
-
|
508
|
-
|
618
|
+
{
|
619
|
+
const long length = last_match_length(p);
|
620
|
+
if (getstr) {
|
621
|
+
return extract_beg_len(p, p->prev, length);
|
622
|
+
}
|
623
|
+
else {
|
624
|
+
return INT2FIX(length);
|
625
|
+
}
|
509
626
|
}
|
510
627
|
}
|
511
628
|
|
@@ -520,7 +637,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
|
|
520
637
|
* p s.scan(/\w+/) # -> "test"
|
521
638
|
* p s.scan(/\w+/) # -> nil
|
522
639
|
* p s.scan(/\s+/) # -> " "
|
523
|
-
* p s.scan(
|
640
|
+
* p s.scan("str") # -> "str"
|
641
|
+
* p s.scan(/\w+/) # -> "ing"
|
524
642
|
* p s.scan(/./) # -> nil
|
525
643
|
*
|
526
644
|
*/
|
@@ -539,6 +657,7 @@ strscan_scan(VALUE self, VALUE re)
|
|
539
657
|
* s = StringScanner.new('test string')
|
540
658
|
* p s.match?(/\w+/) # -> 4
|
541
659
|
* p s.match?(/\w+/) # -> 4
|
660
|
+
* p s.match?("test") # -> 4
|
542
661
|
* p s.match?(/\s+/) # -> nil
|
543
662
|
*/
|
544
663
|
static VALUE
|
@@ -560,7 +679,8 @@ strscan_match_p(VALUE self, VALUE re)
|
|
560
679
|
* p s.skip(/\w+/) # -> 4
|
561
680
|
* p s.skip(/\w+/) # -> nil
|
562
681
|
* p s.skip(/\s+/) # -> 1
|
563
|
-
* p s.skip(
|
682
|
+
* p s.skip("st") # -> 2
|
683
|
+
* p s.skip(/\w+/) # -> 4
|
564
684
|
* p s.skip(/./) # -> nil
|
565
685
|
*
|
566
686
|
*/
|
@@ -704,7 +824,12 @@ static void
|
|
704
824
|
adjust_registers_to_matched(struct strscanner *p)
|
705
825
|
{
|
706
826
|
onig_region_clear(&(p->regs));
|
707
|
-
|
827
|
+
if (p->fixed_anchor_p) {
|
828
|
+
onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
|
829
|
+
}
|
830
|
+
else {
|
831
|
+
onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
|
832
|
+
}
|
708
833
|
}
|
709
834
|
|
710
835
|
/*
|
@@ -738,8 +863,9 @@ strscan_getch(VALUE self)
|
|
738
863
|
p->curr += len;
|
739
864
|
MATCHED(p);
|
740
865
|
adjust_registers_to_matched(p);
|
741
|
-
return extract_range(p,
|
742
|
-
|
866
|
+
return extract_range(p,
|
867
|
+
adjust_register_position(p, p->regs.beg[0]),
|
868
|
+
adjust_register_position(p, p->regs.end[0]));
|
743
869
|
}
|
744
870
|
|
745
871
|
/*
|
@@ -772,8 +898,9 @@ strscan_get_byte(VALUE self)
|
|
772
898
|
p->curr++;
|
773
899
|
MATCHED(p);
|
774
900
|
adjust_registers_to_matched(p);
|
775
|
-
return extract_range(p,
|
776
|
-
|
901
|
+
return extract_range(p,
|
902
|
+
adjust_register_position(p, p->regs.beg[0]),
|
903
|
+
adjust_register_position(p, p->regs.end[0]));
|
777
904
|
}
|
778
905
|
|
779
906
|
/*
|
@@ -826,7 +953,7 @@ strscan_peep(VALUE self, VALUE vlen)
|
|
826
953
|
}
|
827
954
|
|
828
955
|
/*
|
829
|
-
*
|
956
|
+
* Sets the scan pointer to the previous position. Only one previous position is
|
830
957
|
* remembered, and it changes with each scanning operation.
|
831
958
|
*
|
832
959
|
* s = StringScanner.new('test string')
|
@@ -951,8 +1078,9 @@ strscan_matched(VALUE self)
|
|
951
1078
|
|
952
1079
|
GET_SCANNER(self, p);
|
953
1080
|
if (! MATCHED_P(p)) return Qnil;
|
954
|
-
return extract_range(p,
|
955
|
-
|
1081
|
+
return extract_range(p,
|
1082
|
+
adjust_register_position(p, p->regs.beg[0]),
|
1083
|
+
adjust_register_position(p, p->regs.end[0]));
|
956
1084
|
}
|
957
1085
|
|
958
1086
|
/*
|
@@ -1048,8 +1176,9 @@ strscan_aref(VALUE self, VALUE idx)
|
|
1048
1176
|
if (i >= p->regs.num_regs) return Qnil;
|
1049
1177
|
if (p->regs.beg[i] == -1) return Qnil;
|
1050
1178
|
|
1051
|
-
return extract_range(p,
|
1052
|
-
|
1179
|
+
return extract_range(p,
|
1180
|
+
adjust_register_position(p, p->regs.beg[i]),
|
1181
|
+
adjust_register_position(p, p->regs.end[i]));
|
1053
1182
|
}
|
1054
1183
|
|
1055
1184
|
/*
|
@@ -1098,8 +1227,9 @@ strscan_captures(VALUE self)
|
|
1098
1227
|
new_ary = rb_ary_new2(num_regs);
|
1099
1228
|
|
1100
1229
|
for (i = 1; i < num_regs; i++) {
|
1101
|
-
VALUE str = extract_range(p,
|
1102
|
-
|
1230
|
+
VALUE str = extract_range(p,
|
1231
|
+
adjust_register_position(p, p->regs.beg[i]),
|
1232
|
+
adjust_register_position(p, p->regs.end[i]));
|
1103
1233
|
rb_ary_push(new_ary, str);
|
1104
1234
|
}
|
1105
1235
|
|
@@ -1154,7 +1284,9 @@ strscan_pre_match(VALUE self)
|
|
1154
1284
|
|
1155
1285
|
GET_SCANNER(self, p);
|
1156
1286
|
if (! MATCHED_P(p)) return Qnil;
|
1157
|
-
return extract_range(p,
|
1287
|
+
return extract_range(p,
|
1288
|
+
0,
|
1289
|
+
adjust_register_position(p, p->regs.beg[0]));
|
1158
1290
|
}
|
1159
1291
|
|
1160
1292
|
/*
|
@@ -1173,7 +1305,9 @@ strscan_post_match(VALUE self)
|
|
1173
1305
|
|
1174
1306
|
GET_SCANNER(self, p);
|
1175
1307
|
if (! MATCHED_P(p)) return Qnil;
|
1176
|
-
return extract_range(p,
|
1308
|
+
return extract_range(p,
|
1309
|
+
adjust_register_position(p, p->regs.end[0]),
|
1310
|
+
S_LEN(p));
|
1177
1311
|
}
|
1178
1312
|
|
1179
1313
|
/*
|
@@ -1302,6 +1436,23 @@ inspect2(struct strscanner *p)
|
|
1302
1436
|
return rb_str_dump(str);
|
1303
1437
|
}
|
1304
1438
|
|
1439
|
+
/*
|
1440
|
+
* call-seq:
|
1441
|
+
* scanner.fixed_anchor? -> true or false
|
1442
|
+
*
|
1443
|
+
* Whether +scanner+ uses fixed anchor mode or not.
|
1444
|
+
*
|
1445
|
+
* If fixed anchor mode is used, +\A+ always matches the beginning of
|
1446
|
+
* the string. Otherwise, +\A+ always matches the current position.
|
1447
|
+
*/
|
1448
|
+
static VALUE
|
1449
|
+
strscan_fixed_anchor_p(VALUE self)
|
1450
|
+
{
|
1451
|
+
struct strscanner *p;
|
1452
|
+
p = check_strscan(self);
|
1453
|
+
return p->fixed_anchor_p ? Qtrue : Qfalse;
|
1454
|
+
}
|
1455
|
+
|
1305
1456
|
/* =======================================================================
|
1306
1457
|
Ruby Interface
|
1307
1458
|
======================================================================= */
|
@@ -1412,6 +1563,7 @@ inspect2(struct strscanner *p)
|
|
1412
1563
|
void
|
1413
1564
|
Init_strscan(void)
|
1414
1565
|
{
|
1566
|
+
#undef rb_intern
|
1415
1567
|
ID id_scanerr = rb_intern("ScanError");
|
1416
1568
|
VALUE tmp;
|
1417
1569
|
|
@@ -1487,4 +1639,6 @@ Init_strscan(void)
|
|
1487
1639
|
rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
|
1488
1640
|
|
1489
1641
|
rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
|
1642
|
+
|
1643
|
+
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
|
1490
1644
|
}
|
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: strscan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
|
+
- Sutou Kouhei
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2019-10-12 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: rake-compiler
|
@@ -24,9 +25,24 @@ dependencies:
|
|
24
25
|
- - ">="
|
25
26
|
- !ruby/object:Gem::Version
|
26
27
|
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: benchmark-driver
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
27
42
|
description: Provides lexical scanning operations on a String.
|
28
43
|
email:
|
29
44
|
-
|
45
|
+
- kou@cozmixng.org
|
30
46
|
executables: []
|
31
47
|
extensions:
|
32
48
|
- ext/strscan/extconf.rb
|
@@ -56,7 +72,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
56
72
|
version: '0'
|
57
73
|
requirements: []
|
58
74
|
rubyforge_project:
|
59
|
-
rubygems_version: 2.7.6
|
75
|
+
rubygems_version: 2.7.6.2
|
60
76
|
signing_key:
|
61
77
|
specification_version: 4
|
62
78
|
summary: Provides lexical scanning operations on a String.
|