strscan 1.0.0 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/strscan/extconf.rb +2 -1
- data/ext/strscan/strscan.c +227 -62
- metadata +19 -5
- data/ext/strscan/regenc.h +0 -254
- data/ext/strscan/regint.h +0 -938
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 126a67b7200ba8c3f3aeb661e55a30a37b23bdd9fe68c3f8ebfc4ad9f6c2cdcb
|
4
|
+
data.tar.gz: d0fafda0d3353b100c32a1437ee58258742f914a2dbb49dfde267cb15974f9f5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ef088b41a17fd9afbee17734527fa911aea8f5b16b6f4547efb6246648f1b81ce54c6c09ebddd8c1d5b9d8c02b997d9e12f83d32214a2f25bcb5854238ad9f0d
|
7
|
+
data.tar.gz: 4d93362e2c96ffd62c8ccfdbfff4d77558d1948173901176ff80f0b0c12b272bd6787a6016146756bb71b19a68a00d6add0de3cb95086cec78142873584ff6c6
|
data/ext/strscan/extconf.rb
CHANGED
data/ext/strscan/strscan.c
CHANGED
@@ -11,9 +11,18 @@
|
|
11
11
|
#include "ruby/ruby.h"
|
12
12
|
#include "ruby/re.h"
|
13
13
|
#include "ruby/encoding.h"
|
14
|
-
#include "regint.h"
|
15
14
|
|
16
|
-
#
|
15
|
+
#ifdef RUBY_EXTCONF_H
|
16
|
+
# include RUBY_EXTCONF_H
|
17
|
+
#endif
|
18
|
+
|
19
|
+
#ifdef HAVE_ONIG_REGION_MEMSIZE
|
20
|
+
extern size_t onig_region_memsize(const struct re_registers *regs);
|
21
|
+
#endif
|
22
|
+
|
23
|
+
#include <stdbool.h>
|
24
|
+
|
25
|
+
#define STRSCAN_VERSION "1.0.3"
|
17
26
|
|
18
27
|
/* =======================================================================
|
19
28
|
Data Type Definitions
|
@@ -41,6 +50,9 @@ struct strscanner
|
|
41
50
|
|
42
51
|
/* regexp used for last scan */
|
43
52
|
VALUE regex;
|
53
|
+
|
54
|
+
/* anchor mode */
|
55
|
+
bool fixed_anchor_p;
|
44
56
|
};
|
45
57
|
|
46
58
|
#define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
|
@@ -186,7 +198,11 @@ static size_t
|
|
186
198
|
strscan_memsize(const void *ptr)
|
187
199
|
{
|
188
200
|
const struct strscanner *p = ptr;
|
189
|
-
|
201
|
+
size_t size = sizeof(*p) - sizeof(p->regs);
|
202
|
+
#ifdef HAVE_ONIG_REGION_MEMSIZE
|
203
|
+
size += onig_region_memsize(&p->regs);
|
204
|
+
#endif
|
205
|
+
return size;
|
190
206
|
}
|
191
207
|
|
192
208
|
static const rb_data_type_t strscanner_type = {
|
@@ -208,19 +224,41 @@ strscan_s_allocate(VALUE klass)
|
|
208
224
|
}
|
209
225
|
|
210
226
|
/*
|
211
|
-
* call-seq:
|
227
|
+
* call-seq:
|
228
|
+
* StringScanner.new(string, fixed_anchor: false)
|
229
|
+
* StringScanner.new(string, dup = false)
|
212
230
|
*
|
213
231
|
* Creates a new StringScanner object to scan over the given +string+.
|
232
|
+
*
|
233
|
+
* If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
|
234
|
+
* the string. Otherwise, +\A+ always matches the current position.
|
235
|
+
*
|
214
236
|
* +dup+ argument is obsolete and not used now.
|
215
237
|
*/
|
216
238
|
static VALUE
|
217
239
|
strscan_initialize(int argc, VALUE *argv, VALUE self)
|
218
240
|
{
|
219
241
|
struct strscanner *p;
|
220
|
-
VALUE str,
|
242
|
+
VALUE str, options;
|
221
243
|
|
222
244
|
p = check_strscan(self);
|
223
|
-
rb_scan_args(argc, argv, "11", &str, &
|
245
|
+
rb_scan_args(argc, argv, "11", &str, &options);
|
246
|
+
options = rb_check_hash_type(options);
|
247
|
+
if (!NIL_P(options)) {
|
248
|
+
VALUE fixed_anchor;
|
249
|
+
ID keyword_ids[1];
|
250
|
+
keyword_ids[0] = rb_intern("fixed_anchor");
|
251
|
+
rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
|
252
|
+
if (fixed_anchor == Qundef) {
|
253
|
+
p->fixed_anchor_p = false;
|
254
|
+
}
|
255
|
+
else {
|
256
|
+
p->fixed_anchor_p = RTEST(fixed_anchor);
|
257
|
+
}
|
258
|
+
}
|
259
|
+
else {
|
260
|
+
p->fixed_anchor_p = false;
|
261
|
+
}
|
224
262
|
StringValue(str);
|
225
263
|
p->str = str;
|
226
264
|
|
@@ -294,7 +332,7 @@ strscan_reset(VALUE self)
|
|
294
332
|
* terminate
|
295
333
|
* clear
|
296
334
|
*
|
297
|
-
*
|
335
|
+
* Sets the scan pointer to the end of the string and clear matching data.
|
298
336
|
*/
|
299
337
|
static VALUE
|
300
338
|
strscan_terminate(VALUE self)
|
@@ -425,7 +463,7 @@ strscan_get_charpos(VALUE self)
|
|
425
463
|
/*
|
426
464
|
* call-seq: pos=(n)
|
427
465
|
*
|
428
|
-
*
|
466
|
+
* Sets the byte position of the scan pointer.
|
429
467
|
*
|
430
468
|
* s = StringScanner.new('test string')
|
431
469
|
* s.pos = 7 # -> 7
|
@@ -446,16 +484,79 @@ strscan_set_pos(VALUE self, VALUE v)
|
|
446
484
|
return INT2NUM(i);
|
447
485
|
}
|
448
486
|
|
487
|
+
static inline UChar *
|
488
|
+
match_target(struct strscanner *p)
|
489
|
+
{
|
490
|
+
if (p->fixed_anchor_p) {
|
491
|
+
return (UChar *)S_PBEG(p);
|
492
|
+
}
|
493
|
+
else
|
494
|
+
{
|
495
|
+
return (UChar *)CURPTR(p);
|
496
|
+
}
|
497
|
+
}
|
498
|
+
|
499
|
+
static inline void
|
500
|
+
set_registers(struct strscanner *p, size_t length)
|
501
|
+
{
|
502
|
+
onig_region_clear(&(p->regs));
|
503
|
+
if (p->fixed_anchor_p) {
|
504
|
+
onig_region_set(&(p->regs), 0, p->curr, p->curr + length);
|
505
|
+
}
|
506
|
+
else
|
507
|
+
{
|
508
|
+
onig_region_set(&(p->regs), 0, 0, length);
|
509
|
+
}
|
510
|
+
}
|
511
|
+
|
512
|
+
static inline void
|
513
|
+
succ(struct strscanner *p)
|
514
|
+
{
|
515
|
+
if (p->fixed_anchor_p) {
|
516
|
+
p->curr = p->regs.end[0];
|
517
|
+
}
|
518
|
+
else
|
519
|
+
{
|
520
|
+
p->curr += p->regs.end[0];
|
521
|
+
}
|
522
|
+
}
|
523
|
+
|
524
|
+
static inline long
|
525
|
+
last_match_length(struct strscanner *p)
|
526
|
+
{
|
527
|
+
if (p->fixed_anchor_p) {
|
528
|
+
return p->regs.end[0] - p->prev;
|
529
|
+
}
|
530
|
+
else
|
531
|
+
{
|
532
|
+
return p->regs.end[0];
|
533
|
+
}
|
534
|
+
}
|
535
|
+
|
536
|
+
static inline long
|
537
|
+
adjust_register_position(struct strscanner *p, long position)
|
538
|
+
{
|
539
|
+
if (p->fixed_anchor_p) {
|
540
|
+
return position;
|
541
|
+
}
|
542
|
+
else {
|
543
|
+
return p->prev + position;
|
544
|
+
}
|
545
|
+
}
|
546
|
+
|
449
547
|
static VALUE
|
450
|
-
strscan_do_scan(VALUE self, VALUE
|
548
|
+
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
|
451
549
|
{
|
452
|
-
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
453
550
|
struct strscanner *p;
|
454
|
-
regex_t *re;
|
455
|
-
long ret;
|
456
|
-
int tmpreg;
|
457
551
|
|
458
|
-
|
552
|
+
if (headonly) {
|
553
|
+
if (!RB_TYPE_P(pattern, T_REGEXP)) {
|
554
|
+
StringValue(pattern);
|
555
|
+
}
|
556
|
+
}
|
557
|
+
else {
|
558
|
+
Check_Type(pattern, T_REGEXP);
|
559
|
+
}
|
459
560
|
GET_SCANNER(self, p);
|
460
561
|
|
461
562
|
CLEAR_MATCH_STATUS(p);
|
@@ -463,49 +564,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
|
|
463
564
|
return Qnil;
|
464
565
|
}
|
465
566
|
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
567
|
+
if (RB_TYPE_P(pattern, T_REGEXP)) {
|
568
|
+
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
569
|
+
regex_t *re;
|
570
|
+
long ret;
|
571
|
+
int tmpreg;
|
572
|
+
|
573
|
+
p->regex = pattern;
|
574
|
+
re = rb_reg_prepare_re(pattern, p->str);
|
575
|
+
tmpreg = re != RREGEXP_PTR(pattern);
|
576
|
+
if (!tmpreg) RREGEXP(pattern)->usecnt++;
|
577
|
+
|
578
|
+
if (headonly) {
|
579
|
+
ret = onig_match(re,
|
580
|
+
match_target(p),
|
581
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
582
|
+
(UChar* )CURPTR(p),
|
583
|
+
&(p->regs),
|
584
|
+
ONIG_OPTION_NONE);
|
585
|
+
}
|
586
|
+
else {
|
587
|
+
ret = onig_search(re,
|
588
|
+
match_target(p),
|
589
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
590
|
+
(UChar* )CURPTR(p),
|
591
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
592
|
+
&(p->regs),
|
593
|
+
ONIG_OPTION_NONE);
|
594
|
+
}
|
595
|
+
if (!tmpreg) RREGEXP(pattern)->usecnt--;
|
596
|
+
if (tmpreg) {
|
597
|
+
if (RREGEXP(pattern)->usecnt) {
|
598
|
+
onig_free(re);
|
599
|
+
}
|
600
|
+
else {
|
601
|
+
onig_free(RREGEXP_PTR(pattern));
|
602
|
+
RREGEXP_PTR(pattern) = re;
|
603
|
+
}
|
604
|
+
}
|
470
605
|
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
606
|
+
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
607
|
+
if (ret < 0) {
|
608
|
+
/* not matched */
|
609
|
+
return Qnil;
|
610
|
+
}
|
475
611
|
}
|
476
612
|
else {
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
&(p->regs), ONIG_OPTION_NONE);
|
481
|
-
}
|
482
|
-
if (!tmpreg) RREGEXP(regex)->usecnt--;
|
483
|
-
if (tmpreg) {
|
484
|
-
if (RREGEXP(regex)->usecnt) {
|
485
|
-
onig_free(re);
|
613
|
+
rb_enc_check(p->str, pattern);
|
614
|
+
if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
|
615
|
+
return Qnil;
|
486
616
|
}
|
487
|
-
|
488
|
-
|
489
|
-
RREGEXP_PTR(regex) = re;
|
617
|
+
if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
|
618
|
+
return Qnil;
|
490
619
|
}
|
491
|
-
|
492
|
-
|
493
|
-
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
494
|
-
if (ret < 0) {
|
495
|
-
/* not matched */
|
496
|
-
return Qnil;
|
620
|
+
set_registers(p, RSTRING_LEN(pattern));
|
497
621
|
}
|
498
622
|
|
499
623
|
MATCHED(p);
|
500
624
|
p->prev = p->curr;
|
625
|
+
|
501
626
|
if (succptr) {
|
502
|
-
p
|
503
|
-
}
|
504
|
-
if (getstr) {
|
505
|
-
return extract_beg_len(p, p->prev, p->regs.end[0]);
|
627
|
+
succ(p);
|
506
628
|
}
|
507
|
-
|
508
|
-
|
629
|
+
{
|
630
|
+
const long length = last_match_length(p);
|
631
|
+
if (getstr) {
|
632
|
+
return extract_beg_len(p, p->prev, length);
|
633
|
+
}
|
634
|
+
else {
|
635
|
+
return INT2FIX(length);
|
636
|
+
}
|
509
637
|
}
|
510
638
|
}
|
511
639
|
|
@@ -520,7 +648,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
|
|
520
648
|
* p s.scan(/\w+/) # -> "test"
|
521
649
|
* p s.scan(/\w+/) # -> nil
|
522
650
|
* p s.scan(/\s+/) # -> " "
|
523
|
-
* p s.scan(
|
651
|
+
* p s.scan("str") # -> "str"
|
652
|
+
* p s.scan(/\w+/) # -> "ing"
|
524
653
|
* p s.scan(/./) # -> nil
|
525
654
|
*
|
526
655
|
*/
|
@@ -539,6 +668,7 @@ strscan_scan(VALUE self, VALUE re)
|
|
539
668
|
* s = StringScanner.new('test string')
|
540
669
|
* p s.match?(/\w+/) # -> 4
|
541
670
|
* p s.match?(/\w+/) # -> 4
|
671
|
+
* p s.match?("test") # -> 4
|
542
672
|
* p s.match?(/\s+/) # -> nil
|
543
673
|
*/
|
544
674
|
static VALUE
|
@@ -560,7 +690,8 @@ strscan_match_p(VALUE self, VALUE re)
|
|
560
690
|
* p s.skip(/\w+/) # -> 4
|
561
691
|
* p s.skip(/\w+/) # -> nil
|
562
692
|
* p s.skip(/\s+/) # -> 1
|
563
|
-
* p s.skip(
|
693
|
+
* p s.skip("st") # -> 2
|
694
|
+
* p s.skip(/\w+/) # -> 4
|
564
695
|
* p s.skip(/./) # -> nil
|
565
696
|
*
|
566
697
|
*/
|
@@ -704,7 +835,12 @@ static void
|
|
704
835
|
adjust_registers_to_matched(struct strscanner *p)
|
705
836
|
{
|
706
837
|
onig_region_clear(&(p->regs));
|
707
|
-
|
838
|
+
if (p->fixed_anchor_p) {
|
839
|
+
onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
|
840
|
+
}
|
841
|
+
else {
|
842
|
+
onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
|
843
|
+
}
|
708
844
|
}
|
709
845
|
|
710
846
|
/*
|
@@ -738,8 +874,9 @@ strscan_getch(VALUE self)
|
|
738
874
|
p->curr += len;
|
739
875
|
MATCHED(p);
|
740
876
|
adjust_registers_to_matched(p);
|
741
|
-
return extract_range(p,
|
742
|
-
|
877
|
+
return extract_range(p,
|
878
|
+
adjust_register_position(p, p->regs.beg[0]),
|
879
|
+
adjust_register_position(p, p->regs.end[0]));
|
743
880
|
}
|
744
881
|
|
745
882
|
/*
|
@@ -772,8 +909,9 @@ strscan_get_byte(VALUE self)
|
|
772
909
|
p->curr++;
|
773
910
|
MATCHED(p);
|
774
911
|
adjust_registers_to_matched(p);
|
775
|
-
return extract_range(p,
|
776
|
-
|
912
|
+
return extract_range(p,
|
913
|
+
adjust_register_position(p, p->regs.beg[0]),
|
914
|
+
adjust_register_position(p, p->regs.end[0]));
|
777
915
|
}
|
778
916
|
|
779
917
|
/*
|
@@ -826,7 +964,7 @@ strscan_peep(VALUE self, VALUE vlen)
|
|
826
964
|
}
|
827
965
|
|
828
966
|
/*
|
829
|
-
*
|
967
|
+
* Sets the scan pointer to the previous position. Only one previous position is
|
830
968
|
* remembered, and it changes with each scanning operation.
|
831
969
|
*
|
832
970
|
* s = StringScanner.new('test string')
|
@@ -951,8 +1089,9 @@ strscan_matched(VALUE self)
|
|
951
1089
|
|
952
1090
|
GET_SCANNER(self, p);
|
953
1091
|
if (! MATCHED_P(p)) return Qnil;
|
954
|
-
return extract_range(p,
|
955
|
-
|
1092
|
+
return extract_range(p,
|
1093
|
+
adjust_register_position(p, p->regs.beg[0]),
|
1094
|
+
adjust_register_position(p, p->regs.end[0]));
|
956
1095
|
}
|
957
1096
|
|
958
1097
|
/*
|
@@ -1048,8 +1187,9 @@ strscan_aref(VALUE self, VALUE idx)
|
|
1048
1187
|
if (i >= p->regs.num_regs) return Qnil;
|
1049
1188
|
if (p->regs.beg[i] == -1) return Qnil;
|
1050
1189
|
|
1051
|
-
return extract_range(p,
|
1052
|
-
|
1190
|
+
return extract_range(p,
|
1191
|
+
adjust_register_position(p, p->regs.beg[i]),
|
1192
|
+
adjust_register_position(p, p->regs.end[i]));
|
1053
1193
|
}
|
1054
1194
|
|
1055
1195
|
/*
|
@@ -1098,8 +1238,9 @@ strscan_captures(VALUE self)
|
|
1098
1238
|
new_ary = rb_ary_new2(num_regs);
|
1099
1239
|
|
1100
1240
|
for (i = 1; i < num_regs; i++) {
|
1101
|
-
VALUE str = extract_range(p,
|
1102
|
-
|
1241
|
+
VALUE str = extract_range(p,
|
1242
|
+
adjust_register_position(p, p->regs.beg[i]),
|
1243
|
+
adjust_register_position(p, p->regs.end[i]));
|
1103
1244
|
rb_ary_push(new_ary, str);
|
1104
1245
|
}
|
1105
1246
|
|
@@ -1154,7 +1295,9 @@ strscan_pre_match(VALUE self)
|
|
1154
1295
|
|
1155
1296
|
GET_SCANNER(self, p);
|
1156
1297
|
if (! MATCHED_P(p)) return Qnil;
|
1157
|
-
return extract_range(p,
|
1298
|
+
return extract_range(p,
|
1299
|
+
0,
|
1300
|
+
adjust_register_position(p, p->regs.beg[0]));
|
1158
1301
|
}
|
1159
1302
|
|
1160
1303
|
/*
|
@@ -1173,7 +1316,9 @@ strscan_post_match(VALUE self)
|
|
1173
1316
|
|
1174
1317
|
GET_SCANNER(self, p);
|
1175
1318
|
if (! MATCHED_P(p)) return Qnil;
|
1176
|
-
return extract_range(p,
|
1319
|
+
return extract_range(p,
|
1320
|
+
adjust_register_position(p, p->regs.end[0]),
|
1321
|
+
S_LEN(p));
|
1177
1322
|
}
|
1178
1323
|
|
1179
1324
|
/*
|
@@ -1302,6 +1447,23 @@ inspect2(struct strscanner *p)
|
|
1302
1447
|
return rb_str_dump(str);
|
1303
1448
|
}
|
1304
1449
|
|
1450
|
+
/*
|
1451
|
+
* call-seq:
|
1452
|
+
* scanner.fixed_anchor? -> true or false
|
1453
|
+
*
|
1454
|
+
* Whether +scanner+ uses fixed anchor mode or not.
|
1455
|
+
*
|
1456
|
+
* If fixed anchor mode is used, +\A+ always matches the beginning of
|
1457
|
+
* the string. Otherwise, +\A+ always matches the current position.
|
1458
|
+
*/
|
1459
|
+
static VALUE
|
1460
|
+
strscan_fixed_anchor_p(VALUE self)
|
1461
|
+
{
|
1462
|
+
struct strscanner *p;
|
1463
|
+
p = check_strscan(self);
|
1464
|
+
return p->fixed_anchor_p ? Qtrue : Qfalse;
|
1465
|
+
}
|
1466
|
+
|
1305
1467
|
/* =======================================================================
|
1306
1468
|
Ruby Interface
|
1307
1469
|
======================================================================= */
|
@@ -1412,6 +1574,7 @@ inspect2(struct strscanner *p)
|
|
1412
1574
|
void
|
1413
1575
|
Init_strscan(void)
|
1414
1576
|
{
|
1577
|
+
#undef rb_intern
|
1415
1578
|
ID id_scanerr = rb_intern("ScanError");
|
1416
1579
|
VALUE tmp;
|
1417
1580
|
|
@@ -1487,4 +1650,6 @@ Init_strscan(void)
|
|
1487
1650
|
rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
|
1488
1651
|
|
1489
1652
|
rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
|
1653
|
+
|
1654
|
+
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
|
1490
1655
|
}
|