strscan 1.0.0 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/strscan/extconf.rb +2 -1
- data/ext/strscan/strscan.c +227 -62
- metadata +19 -5
- data/ext/strscan/regenc.h +0 -254
- data/ext/strscan/regint.h +0 -938
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 126a67b7200ba8c3f3aeb661e55a30a37b23bdd9fe68c3f8ebfc4ad9f6c2cdcb
|
4
|
+
data.tar.gz: d0fafda0d3353b100c32a1437ee58258742f914a2dbb49dfde267cb15974f9f5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ef088b41a17fd9afbee17734527fa911aea8f5b16b6f4547efb6246648f1b81ce54c6c09ebddd8c1d5b9d8c02b997d9e12f83d32214a2f25bcb5854238ad9f0d
|
7
|
+
data.tar.gz: 4d93362e2c96ffd62c8ccfdbfff4d77558d1948173901176ff80f0b0c12b272bd6787a6016146756bb71b19a68a00d6add0de3cb95086cec78142873584ff6c6
|
data/ext/strscan/extconf.rb
CHANGED
data/ext/strscan/strscan.c
CHANGED
@@ -11,9 +11,18 @@
|
|
11
11
|
#include "ruby/ruby.h"
|
12
12
|
#include "ruby/re.h"
|
13
13
|
#include "ruby/encoding.h"
|
14
|
-
#include "regint.h"
|
15
14
|
|
16
|
-
#
|
15
|
+
#ifdef RUBY_EXTCONF_H
|
16
|
+
# include RUBY_EXTCONF_H
|
17
|
+
#endif
|
18
|
+
|
19
|
+
#ifdef HAVE_ONIG_REGION_MEMSIZE
|
20
|
+
extern size_t onig_region_memsize(const struct re_registers *regs);
|
21
|
+
#endif
|
22
|
+
|
23
|
+
#include <stdbool.h>
|
24
|
+
|
25
|
+
#define STRSCAN_VERSION "1.0.3"
|
17
26
|
|
18
27
|
/* =======================================================================
|
19
28
|
Data Type Definitions
|
@@ -41,6 +50,9 @@ struct strscanner
|
|
41
50
|
|
42
51
|
/* regexp used for last scan */
|
43
52
|
VALUE regex;
|
53
|
+
|
54
|
+
/* anchor mode */
|
55
|
+
bool fixed_anchor_p;
|
44
56
|
};
|
45
57
|
|
46
58
|
#define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
|
@@ -186,7 +198,11 @@ static size_t
|
|
186
198
|
strscan_memsize(const void *ptr)
|
187
199
|
{
|
188
200
|
const struct strscanner *p = ptr;
|
189
|
-
|
201
|
+
size_t size = sizeof(*p) - sizeof(p->regs);
|
202
|
+
#ifdef HAVE_ONIG_REGION_MEMSIZE
|
203
|
+
size += onig_region_memsize(&p->regs);
|
204
|
+
#endif
|
205
|
+
return size;
|
190
206
|
}
|
191
207
|
|
192
208
|
static const rb_data_type_t strscanner_type = {
|
@@ -208,19 +224,41 @@ strscan_s_allocate(VALUE klass)
|
|
208
224
|
}
|
209
225
|
|
210
226
|
/*
|
211
|
-
* call-seq:
|
227
|
+
* call-seq:
|
228
|
+
* StringScanner.new(string, fixed_anchor: false)
|
229
|
+
* StringScanner.new(string, dup = false)
|
212
230
|
*
|
213
231
|
* Creates a new StringScanner object to scan over the given +string+.
|
232
|
+
*
|
233
|
+
* If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
|
234
|
+
* the string. Otherwise, +\A+ always matches the current position.
|
235
|
+
*
|
214
236
|
* +dup+ argument is obsolete and not used now.
|
215
237
|
*/
|
216
238
|
static VALUE
|
217
239
|
strscan_initialize(int argc, VALUE *argv, VALUE self)
|
218
240
|
{
|
219
241
|
struct strscanner *p;
|
220
|
-
VALUE str,
|
242
|
+
VALUE str, options;
|
221
243
|
|
222
244
|
p = check_strscan(self);
|
223
|
-
rb_scan_args(argc, argv, "11", &str, &
|
245
|
+
rb_scan_args(argc, argv, "11", &str, &options);
|
246
|
+
options = rb_check_hash_type(options);
|
247
|
+
if (!NIL_P(options)) {
|
248
|
+
VALUE fixed_anchor;
|
249
|
+
ID keyword_ids[1];
|
250
|
+
keyword_ids[0] = rb_intern("fixed_anchor");
|
251
|
+
rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
|
252
|
+
if (fixed_anchor == Qundef) {
|
253
|
+
p->fixed_anchor_p = false;
|
254
|
+
}
|
255
|
+
else {
|
256
|
+
p->fixed_anchor_p = RTEST(fixed_anchor);
|
257
|
+
}
|
258
|
+
}
|
259
|
+
else {
|
260
|
+
p->fixed_anchor_p = false;
|
261
|
+
}
|
224
262
|
StringValue(str);
|
225
263
|
p->str = str;
|
226
264
|
|
@@ -294,7 +332,7 @@ strscan_reset(VALUE self)
|
|
294
332
|
* terminate
|
295
333
|
* clear
|
296
334
|
*
|
297
|
-
*
|
335
|
+
* Sets the scan pointer to the end of the string and clear matching data.
|
298
336
|
*/
|
299
337
|
static VALUE
|
300
338
|
strscan_terminate(VALUE self)
|
@@ -425,7 +463,7 @@ strscan_get_charpos(VALUE self)
|
|
425
463
|
/*
|
426
464
|
* call-seq: pos=(n)
|
427
465
|
*
|
428
|
-
*
|
466
|
+
* Sets the byte position of the scan pointer.
|
429
467
|
*
|
430
468
|
* s = StringScanner.new('test string')
|
431
469
|
* s.pos = 7 # -> 7
|
@@ -446,16 +484,79 @@ strscan_set_pos(VALUE self, VALUE v)
|
|
446
484
|
return INT2NUM(i);
|
447
485
|
}
|
448
486
|
|
487
|
+
static inline UChar *
|
488
|
+
match_target(struct strscanner *p)
|
489
|
+
{
|
490
|
+
if (p->fixed_anchor_p) {
|
491
|
+
return (UChar *)S_PBEG(p);
|
492
|
+
}
|
493
|
+
else
|
494
|
+
{
|
495
|
+
return (UChar *)CURPTR(p);
|
496
|
+
}
|
497
|
+
}
|
498
|
+
|
499
|
+
static inline void
|
500
|
+
set_registers(struct strscanner *p, size_t length)
|
501
|
+
{
|
502
|
+
onig_region_clear(&(p->regs));
|
503
|
+
if (p->fixed_anchor_p) {
|
504
|
+
onig_region_set(&(p->regs), 0, p->curr, p->curr + length);
|
505
|
+
}
|
506
|
+
else
|
507
|
+
{
|
508
|
+
onig_region_set(&(p->regs), 0, 0, length);
|
509
|
+
}
|
510
|
+
}
|
511
|
+
|
512
|
+
static inline void
|
513
|
+
succ(struct strscanner *p)
|
514
|
+
{
|
515
|
+
if (p->fixed_anchor_p) {
|
516
|
+
p->curr = p->regs.end[0];
|
517
|
+
}
|
518
|
+
else
|
519
|
+
{
|
520
|
+
p->curr += p->regs.end[0];
|
521
|
+
}
|
522
|
+
}
|
523
|
+
|
524
|
+
static inline long
|
525
|
+
last_match_length(struct strscanner *p)
|
526
|
+
{
|
527
|
+
if (p->fixed_anchor_p) {
|
528
|
+
return p->regs.end[0] - p->prev;
|
529
|
+
}
|
530
|
+
else
|
531
|
+
{
|
532
|
+
return p->regs.end[0];
|
533
|
+
}
|
534
|
+
}
|
535
|
+
|
536
|
+
static inline long
|
537
|
+
adjust_register_position(struct strscanner *p, long position)
|
538
|
+
{
|
539
|
+
if (p->fixed_anchor_p) {
|
540
|
+
return position;
|
541
|
+
}
|
542
|
+
else {
|
543
|
+
return p->prev + position;
|
544
|
+
}
|
545
|
+
}
|
546
|
+
|
449
547
|
static VALUE
|
450
|
-
strscan_do_scan(VALUE self, VALUE
|
548
|
+
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
|
451
549
|
{
|
452
|
-
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
453
550
|
struct strscanner *p;
|
454
|
-
regex_t *re;
|
455
|
-
long ret;
|
456
|
-
int tmpreg;
|
457
551
|
|
458
|
-
|
552
|
+
if (headonly) {
|
553
|
+
if (!RB_TYPE_P(pattern, T_REGEXP)) {
|
554
|
+
StringValue(pattern);
|
555
|
+
}
|
556
|
+
}
|
557
|
+
else {
|
558
|
+
Check_Type(pattern, T_REGEXP);
|
559
|
+
}
|
459
560
|
GET_SCANNER(self, p);
|
460
561
|
|
461
562
|
CLEAR_MATCH_STATUS(p);
|
@@ -463,49 +564,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
|
|
463
564
|
return Qnil;
|
464
565
|
}
|
465
566
|
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
567
|
+
if (RB_TYPE_P(pattern, T_REGEXP)) {
|
568
|
+
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
569
|
+
regex_t *re;
|
570
|
+
long ret;
|
571
|
+
int tmpreg;
|
572
|
+
|
573
|
+
p->regex = pattern;
|
574
|
+
re = rb_reg_prepare_re(pattern, p->str);
|
575
|
+
tmpreg = re != RREGEXP_PTR(pattern);
|
576
|
+
if (!tmpreg) RREGEXP(pattern)->usecnt++;
|
577
|
+
|
578
|
+
if (headonly) {
|
579
|
+
ret = onig_match(re,
|
580
|
+
match_target(p),
|
581
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
582
|
+
(UChar* )CURPTR(p),
|
583
|
+
&(p->regs),
|
584
|
+
ONIG_OPTION_NONE);
|
585
|
+
}
|
586
|
+
else {
|
587
|
+
ret = onig_search(re,
|
588
|
+
match_target(p),
|
589
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
590
|
+
(UChar* )CURPTR(p),
|
591
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
592
|
+
&(p->regs),
|
593
|
+
ONIG_OPTION_NONE);
|
594
|
+
}
|
595
|
+
if (!tmpreg) RREGEXP(pattern)->usecnt--;
|
596
|
+
if (tmpreg) {
|
597
|
+
if (RREGEXP(pattern)->usecnt) {
|
598
|
+
onig_free(re);
|
599
|
+
}
|
600
|
+
else {
|
601
|
+
onig_free(RREGEXP_PTR(pattern));
|
602
|
+
RREGEXP_PTR(pattern) = re;
|
603
|
+
}
|
604
|
+
}
|
470
605
|
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
606
|
+
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
607
|
+
if (ret < 0) {
|
608
|
+
/* not matched */
|
609
|
+
return Qnil;
|
610
|
+
}
|
475
611
|
}
|
476
612
|
else {
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
&(p->regs), ONIG_OPTION_NONE);
|
481
|
-
}
|
482
|
-
if (!tmpreg) RREGEXP(regex)->usecnt--;
|
483
|
-
if (tmpreg) {
|
484
|
-
if (RREGEXP(regex)->usecnt) {
|
485
|
-
onig_free(re);
|
613
|
+
rb_enc_check(p->str, pattern);
|
614
|
+
if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
|
615
|
+
return Qnil;
|
486
616
|
}
|
487
|
-
|
488
|
-
|
489
|
-
RREGEXP_PTR(regex) = re;
|
617
|
+
if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
|
618
|
+
return Qnil;
|
490
619
|
}
|
491
|
-
|
492
|
-
|
493
|
-
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
494
|
-
if (ret < 0) {
|
495
|
-
/* not matched */
|
496
|
-
return Qnil;
|
620
|
+
set_registers(p, RSTRING_LEN(pattern));
|
497
621
|
}
|
498
622
|
|
499
623
|
MATCHED(p);
|
500
624
|
p->prev = p->curr;
|
625
|
+
|
501
626
|
if (succptr) {
|
502
|
-
p
|
503
|
-
}
|
504
|
-
if (getstr) {
|
505
|
-
return extract_beg_len(p, p->prev, p->regs.end[0]);
|
627
|
+
succ(p);
|
506
628
|
}
|
507
|
-
|
508
|
-
|
629
|
+
{
|
630
|
+
const long length = last_match_length(p);
|
631
|
+
if (getstr) {
|
632
|
+
return extract_beg_len(p, p->prev, length);
|
633
|
+
}
|
634
|
+
else {
|
635
|
+
return INT2FIX(length);
|
636
|
+
}
|
509
637
|
}
|
510
638
|
}
|
511
639
|
|
@@ -520,7 +648,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
|
|
520
648
|
* p s.scan(/\w+/) # -> "test"
|
521
649
|
* p s.scan(/\w+/) # -> nil
|
522
650
|
* p s.scan(/\s+/) # -> " "
|
523
|
-
* p s.scan(
|
651
|
+
* p s.scan("str") # -> "str"
|
652
|
+
* p s.scan(/\w+/) # -> "ing"
|
524
653
|
* p s.scan(/./) # -> nil
|
525
654
|
*
|
526
655
|
*/
|
@@ -539,6 +668,7 @@ strscan_scan(VALUE self, VALUE re)
|
|
539
668
|
* s = StringScanner.new('test string')
|
540
669
|
* p s.match?(/\w+/) # -> 4
|
541
670
|
* p s.match?(/\w+/) # -> 4
|
671
|
+
* p s.match?("test") # -> 4
|
542
672
|
* p s.match?(/\s+/) # -> nil
|
543
673
|
*/
|
544
674
|
static VALUE
|
@@ -560,7 +690,8 @@ strscan_match_p(VALUE self, VALUE re)
|
|
560
690
|
* p s.skip(/\w+/) # -> 4
|
561
691
|
* p s.skip(/\w+/) # -> nil
|
562
692
|
* p s.skip(/\s+/) # -> 1
|
563
|
-
* p s.skip(
|
693
|
+
* p s.skip("st") # -> 2
|
694
|
+
* p s.skip(/\w+/) # -> 4
|
564
695
|
* p s.skip(/./) # -> nil
|
565
696
|
*
|
566
697
|
*/
|
@@ -704,7 +835,12 @@ static void
|
|
704
835
|
adjust_registers_to_matched(struct strscanner *p)
|
705
836
|
{
|
706
837
|
onig_region_clear(&(p->regs));
|
707
|
-
|
838
|
+
if (p->fixed_anchor_p) {
|
839
|
+
onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
|
840
|
+
}
|
841
|
+
else {
|
842
|
+
onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
|
843
|
+
}
|
708
844
|
}
|
709
845
|
|
710
846
|
/*
|
@@ -738,8 +874,9 @@ strscan_getch(VALUE self)
|
|
738
874
|
p->curr += len;
|
739
875
|
MATCHED(p);
|
740
876
|
adjust_registers_to_matched(p);
|
741
|
-
return extract_range(p,
|
742
|
-
|
877
|
+
return extract_range(p,
|
878
|
+
adjust_register_position(p, p->regs.beg[0]),
|
879
|
+
adjust_register_position(p, p->regs.end[0]));
|
743
880
|
}
|
744
881
|
|
745
882
|
/*
|
@@ -772,8 +909,9 @@ strscan_get_byte(VALUE self)
|
|
772
909
|
p->curr++;
|
773
910
|
MATCHED(p);
|
774
911
|
adjust_registers_to_matched(p);
|
775
|
-
return extract_range(p,
|
776
|
-
|
912
|
+
return extract_range(p,
|
913
|
+
adjust_register_position(p, p->regs.beg[0]),
|
914
|
+
adjust_register_position(p, p->regs.end[0]));
|
777
915
|
}
|
778
916
|
|
779
917
|
/*
|
@@ -826,7 +964,7 @@ strscan_peep(VALUE self, VALUE vlen)
|
|
826
964
|
}
|
827
965
|
|
828
966
|
/*
|
829
|
-
*
|
967
|
+
* Sets the scan pointer to the previous position. Only one previous position is
|
830
968
|
* remembered, and it changes with each scanning operation.
|
831
969
|
*
|
832
970
|
* s = StringScanner.new('test string')
|
@@ -951,8 +1089,9 @@ strscan_matched(VALUE self)
|
|
951
1089
|
|
952
1090
|
GET_SCANNER(self, p);
|
953
1091
|
if (! MATCHED_P(p)) return Qnil;
|
954
|
-
return extract_range(p,
|
955
|
-
|
1092
|
+
return extract_range(p,
|
1093
|
+
adjust_register_position(p, p->regs.beg[0]),
|
1094
|
+
adjust_register_position(p, p->regs.end[0]));
|
956
1095
|
}
|
957
1096
|
|
958
1097
|
/*
|
@@ -1048,8 +1187,9 @@ strscan_aref(VALUE self, VALUE idx)
|
|
1048
1187
|
if (i >= p->regs.num_regs) return Qnil;
|
1049
1188
|
if (p->regs.beg[i] == -1) return Qnil;
|
1050
1189
|
|
1051
|
-
return extract_range(p,
|
1052
|
-
|
1190
|
+
return extract_range(p,
|
1191
|
+
adjust_register_position(p, p->regs.beg[i]),
|
1192
|
+
adjust_register_position(p, p->regs.end[i]));
|
1053
1193
|
}
|
1054
1194
|
|
1055
1195
|
/*
|
@@ -1098,8 +1238,9 @@ strscan_captures(VALUE self)
|
|
1098
1238
|
new_ary = rb_ary_new2(num_regs);
|
1099
1239
|
|
1100
1240
|
for (i = 1; i < num_regs; i++) {
|
1101
|
-
VALUE str = extract_range(p,
|
1102
|
-
|
1241
|
+
VALUE str = extract_range(p,
|
1242
|
+
adjust_register_position(p, p->regs.beg[i]),
|
1243
|
+
adjust_register_position(p, p->regs.end[i]));
|
1103
1244
|
rb_ary_push(new_ary, str);
|
1104
1245
|
}
|
1105
1246
|
|
@@ -1154,7 +1295,9 @@ strscan_pre_match(VALUE self)
|
|
1154
1295
|
|
1155
1296
|
GET_SCANNER(self, p);
|
1156
1297
|
if (! MATCHED_P(p)) return Qnil;
|
1157
|
-
return extract_range(p,
|
1298
|
+
return extract_range(p,
|
1299
|
+
0,
|
1300
|
+
adjust_register_position(p, p->regs.beg[0]));
|
1158
1301
|
}
|
1159
1302
|
|
1160
1303
|
/*
|
@@ -1173,7 +1316,9 @@ strscan_post_match(VALUE self)
|
|
1173
1316
|
|
1174
1317
|
GET_SCANNER(self, p);
|
1175
1318
|
if (! MATCHED_P(p)) return Qnil;
|
1176
|
-
return extract_range(p,
|
1319
|
+
return extract_range(p,
|
1320
|
+
adjust_register_position(p, p->regs.end[0]),
|
1321
|
+
S_LEN(p));
|
1177
1322
|
}
|
1178
1323
|
|
1179
1324
|
/*
|
@@ -1302,6 +1447,23 @@ inspect2(struct strscanner *p)
|
|
1302
1447
|
return rb_str_dump(str);
|
1303
1448
|
}
|
1304
1449
|
|
1450
|
+
/*
|
1451
|
+
* call-seq:
|
1452
|
+
* scanner.fixed_anchor? -> true or false
|
1453
|
+
*
|
1454
|
+
* Whether +scanner+ uses fixed anchor mode or not.
|
1455
|
+
*
|
1456
|
+
* If fixed anchor mode is used, +\A+ always matches the beginning of
|
1457
|
+
* the string. Otherwise, +\A+ always matches the current position.
|
1458
|
+
*/
|
1459
|
+
static VALUE
|
1460
|
+
strscan_fixed_anchor_p(VALUE self)
|
1461
|
+
{
|
1462
|
+
struct strscanner *p;
|
1463
|
+
p = check_strscan(self);
|
1464
|
+
return p->fixed_anchor_p ? Qtrue : Qfalse;
|
1465
|
+
}
|
1466
|
+
|
1305
1467
|
/* =======================================================================
|
1306
1468
|
Ruby Interface
|
1307
1469
|
======================================================================= */
|
@@ -1412,6 +1574,7 @@ inspect2(struct strscanner *p)
|
|
1412
1574
|
void
|
1413
1575
|
Init_strscan(void)
|
1414
1576
|
{
|
1577
|
+
#undef rb_intern
|
1415
1578
|
ID id_scanerr = rb_intern("ScanError");
|
1416
1579
|
VALUE tmp;
|
1417
1580
|
|
@@ -1487,4 +1650,6 @@ Init_strscan(void)
|
|
1487
1650
|
rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
|
1488
1651
|
|
1489
1652
|
rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
|
1653
|
+
|
1654
|
+
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
|
1490
1655
|
}
|