strscan 1.0.0 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/ext/strscan/strscan.c +214 -60
  3. metadata +19 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5bf6a08ae437ad9be84bb4e617a3a1f5205f83da3dbf539001e5c9aa733c2321
4
- data.tar.gz: 002cb175c04faded9975993500883192ce6d841bebe14964a91161bd80642033
3
+ metadata.gz: 8b623224c9ff6a9b576b776bcc212c459e7205b4151546a2f6bfb8c7aeb648e8
4
+ data.tar.gz: 0f60465fe2fc38d2b1e505b9e463181b1e264c20b4c1f6941f2f2df6b7de3371
5
5
  SHA512:
6
- metadata.gz: ea00e403b94d4492c6670b7cfaeff3770b5547b913fdb44b8c7e0ba6a08fb14383c71a74142a6673d6691446e1483984e85c550a3be212f5a140ceb25bbf173e
7
- data.tar.gz: 410f645b7211199459d66f9a3825262f12bda9184cbc3b8893ca254cc6708da19886544a38a1cf25b39ddf9a1e16b30e8d8a04a3ecd55243f3a80619b94e8993
6
+ metadata.gz: 44cc8212b5a7a069bb991ebeb14f608721a7b9e79720b281bd03df1d3d89cdfc95936ce9b555848141e02c79082435e88170e4c912119dd1b91ad033b9bcccae
7
+ data.tar.gz: 6b4e0c7b95409576e6ae1e1d1d39bc78155f2d010af162514bfcd6984dbd6450803f15d8827080dadfb8fdba9c9312d9dc9e59b4377db34606056783480f6eb1
@@ -13,7 +13,9 @@
13
13
  #include "ruby/encoding.h"
14
14
  #include "regint.h"
15
15
 
16
- #define STRSCAN_VERSION "0.7.0"
16
+ #include <stdbool.h>
17
+
18
+ #define STRSCAN_VERSION "1.0.2"
17
19
 
18
20
  /* =======================================================================
19
21
  Data Type Definitions
@@ -41,6 +43,9 @@ struct strscanner
41
43
 
42
44
  /* regexp used for last scan */
43
45
  VALUE regex;
46
+
47
+ /* anchor mode */
48
+ bool fixed_anchor_p;
44
49
  };
45
50
 
46
51
  #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
@@ -208,19 +213,41 @@ strscan_s_allocate(VALUE klass)
208
213
  }
209
214
 
210
215
  /*
211
- * call-seq: StringScanner.new(string, dup = false)
216
+ * call-seq:
217
+ * StringScanner.new(string, fixed_anchor: false)
218
+ * StringScanner.new(string, dup = false)
212
219
  *
213
220
  * Creates a new StringScanner object to scan over the given +string+.
221
+ *
222
+ * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
223
+ * the string. Otherwise, +\A+ always matches the current position.
224
+ *
214
225
  * +dup+ argument is obsolete and not used now.
215
226
  */
216
227
  static VALUE
217
228
  strscan_initialize(int argc, VALUE *argv, VALUE self)
218
229
  {
219
230
  struct strscanner *p;
220
- VALUE str, need_dup;
231
+ VALUE str, options;
221
232
 
222
233
  p = check_strscan(self);
223
- rb_scan_args(argc, argv, "11", &str, &need_dup);
234
+ rb_scan_args(argc, argv, "11", &str, &options);
235
+ options = rb_check_hash_type(options);
236
+ if (!NIL_P(options)) {
237
+ VALUE fixed_anchor;
238
+ ID keyword_ids[1];
239
+ keyword_ids[0] = rb_intern("fixed_anchor");
240
+ rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
241
+ if (fixed_anchor == Qundef) {
242
+ p->fixed_anchor_p = false;
243
+ }
244
+ else {
245
+ p->fixed_anchor_p = RTEST(fixed_anchor);
246
+ }
247
+ }
248
+ else {
249
+ p->fixed_anchor_p = false;
250
+ }
224
251
  StringValue(str);
225
252
  p->str = str;
226
253
 
@@ -294,7 +321,7 @@ strscan_reset(VALUE self)
294
321
  * terminate
295
322
  * clear
296
323
  *
297
- * Set the scan pointer to the end of the string and clear matching data.
324
+ * Sets the scan pointer to the end of the string and clear matching data.
298
325
  */
299
326
  static VALUE
300
327
  strscan_terminate(VALUE self)
@@ -425,7 +452,7 @@ strscan_get_charpos(VALUE self)
425
452
  /*
426
453
  * call-seq: pos=(n)
427
454
  *
428
- * Set the byte position of the scan pointer.
455
+ * Sets the byte position of the scan pointer.
429
456
  *
430
457
  * s = StringScanner.new('test string')
431
458
  * s.pos = 7 # -> 7
@@ -446,16 +473,79 @@ strscan_set_pos(VALUE self, VALUE v)
446
473
  return INT2NUM(i);
447
474
  }
448
475
 
476
+ static inline UChar *
477
+ match_target(struct strscanner *p)
478
+ {
479
+ if (p->fixed_anchor_p) {
480
+ return (UChar *)S_PBEG(p);
481
+ }
482
+ else
483
+ {
484
+ return (UChar *)CURPTR(p);
485
+ }
486
+ }
487
+
488
+ static inline void
489
+ set_registers(struct strscanner *p, size_t length)
490
+ {
491
+ onig_region_clear(&(p->regs));
492
+ if (p->fixed_anchor_p) {
493
+ onig_region_set(&(p->regs), 0, p->curr, p->curr + length);
494
+ }
495
+ else
496
+ {
497
+ onig_region_set(&(p->regs), 0, 0, length);
498
+ }
499
+ }
500
+
501
+ static inline void
502
+ succ(struct strscanner *p)
503
+ {
504
+ if (p->fixed_anchor_p) {
505
+ p->curr = p->regs.end[0];
506
+ }
507
+ else
508
+ {
509
+ p->curr += p->regs.end[0];
510
+ }
511
+ }
512
+
513
+ static inline long
514
+ last_match_length(struct strscanner *p)
515
+ {
516
+ if (p->fixed_anchor_p) {
517
+ return p->regs.end[0] - p->prev;
518
+ }
519
+ else
520
+ {
521
+ return p->regs.end[0];
522
+ }
523
+ }
524
+
525
+ static inline long
526
+ adjust_register_position(struct strscanner *p, long position)
527
+ {
528
+ if (p->fixed_anchor_p) {
529
+ return position;
530
+ }
531
+ else {
532
+ return p->prev + position;
533
+ }
534
+ }
535
+
449
536
  static VALUE
450
- strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
537
+ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
451
538
  {
452
- regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
453
539
  struct strscanner *p;
454
- regex_t *re;
455
- long ret;
456
- int tmpreg;
457
540
 
458
- Check_Type(regex, T_REGEXP);
541
+ if (headonly) {
542
+ if (!RB_TYPE_P(pattern, T_REGEXP)) {
543
+ StringValue(pattern);
544
+ }
545
+ }
546
+ else {
547
+ Check_Type(pattern, T_REGEXP);
548
+ }
459
549
  GET_SCANNER(self, p);
460
550
 
461
551
  CLEAR_MATCH_STATUS(p);
@@ -463,49 +553,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
463
553
  return Qnil;
464
554
  }
465
555
 
466
- p->regex = regex;
467
- re = rb_reg_prepare_re(regex, p->str);
468
- tmpreg = re != RREGEXP_PTR(regex);
469
- if (!tmpreg) RREGEXP(regex)->usecnt++;
556
+ if (RB_TYPE_P(pattern, T_REGEXP)) {
557
+ regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
558
+ regex_t *re;
559
+ long ret;
560
+ int tmpreg;
561
+
562
+ p->regex = pattern;
563
+ re = rb_reg_prepare_re(pattern, p->str);
564
+ tmpreg = re != RREGEXP_PTR(pattern);
565
+ if (!tmpreg) RREGEXP(pattern)->usecnt++;
566
+
567
+ if (headonly) {
568
+ ret = onig_match(re,
569
+ match_target(p),
570
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
571
+ (UChar* )CURPTR(p),
572
+ &(p->regs),
573
+ ONIG_OPTION_NONE);
574
+ }
575
+ else {
576
+ ret = onig_search(re,
577
+ match_target(p),
578
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
579
+ (UChar* )CURPTR(p),
580
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
581
+ &(p->regs),
582
+ ONIG_OPTION_NONE);
583
+ }
584
+ if (!tmpreg) RREGEXP(pattern)->usecnt--;
585
+ if (tmpreg) {
586
+ if (RREGEXP(pattern)->usecnt) {
587
+ onig_free(re);
588
+ }
589
+ else {
590
+ onig_free(RREGEXP_PTR(pattern));
591
+ RREGEXP_PTR(pattern) = re;
592
+ }
593
+ }
470
594
 
471
- if (headonly) {
472
- ret = onig_match(re, (UChar* )CURPTR(p),
473
- (UChar* )(CURPTR(p) + S_RESTLEN(p)),
474
- (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
595
+ if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
596
+ if (ret < 0) {
597
+ /* not matched */
598
+ return Qnil;
599
+ }
475
600
  }
476
601
  else {
477
- ret = onig_search(re,
478
- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
479
- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
480
- &(p->regs), ONIG_OPTION_NONE);
481
- }
482
- if (!tmpreg) RREGEXP(regex)->usecnt--;
483
- if (tmpreg) {
484
- if (RREGEXP(regex)->usecnt) {
485
- onig_free(re);
602
+ rb_enc_check(p->str, pattern);
603
+ if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
604
+ return Qnil;
486
605
  }
487
- else {
488
- onig_free(RREGEXP_PTR(regex));
489
- RREGEXP_PTR(regex) = re;
606
+ if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
607
+ return Qnil;
490
608
  }
491
- }
492
-
493
- if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
494
- if (ret < 0) {
495
- /* not matched */
496
- return Qnil;
609
+ set_registers(p, RSTRING_LEN(pattern));
497
610
  }
498
611
 
499
612
  MATCHED(p);
500
613
  p->prev = p->curr;
614
+
501
615
  if (succptr) {
502
- p->curr += p->regs.end[0];
503
- }
504
- if (getstr) {
505
- return extract_beg_len(p, p->prev, p->regs.end[0]);
616
+ succ(p);
506
617
  }
507
- else {
508
- return INT2FIX(p->regs.end[0]);
618
+ {
619
+ const long length = last_match_length(p);
620
+ if (getstr) {
621
+ return extract_beg_len(p, p->prev, length);
622
+ }
623
+ else {
624
+ return INT2FIX(length);
625
+ }
509
626
  }
510
627
  }
511
628
 
@@ -520,7 +637,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
520
637
  * p s.scan(/\w+/) # -> "test"
521
638
  * p s.scan(/\w+/) # -> nil
522
639
  * p s.scan(/\s+/) # -> " "
523
- * p s.scan(/\w+/) # -> "string"
640
+ * p s.scan("str") # -> "str"
641
+ * p s.scan(/\w+/) # -> "ing"
524
642
  * p s.scan(/./) # -> nil
525
643
  *
526
644
  */
@@ -539,6 +657,7 @@ strscan_scan(VALUE self, VALUE re)
539
657
  * s = StringScanner.new('test string')
540
658
  * p s.match?(/\w+/) # -> 4
541
659
  * p s.match?(/\w+/) # -> 4
660
+ * p s.match?("test") # -> 4
542
661
  * p s.match?(/\s+/) # -> nil
543
662
  */
544
663
  static VALUE
@@ -560,7 +679,8 @@ strscan_match_p(VALUE self, VALUE re)
560
679
  * p s.skip(/\w+/) # -> 4
561
680
  * p s.skip(/\w+/) # -> nil
562
681
  * p s.skip(/\s+/) # -> 1
563
- * p s.skip(/\w+/) # -> 6
682
+ * p s.skip("st") # -> 2
683
+ * p s.skip(/\w+/) # -> 4
564
684
  * p s.skip(/./) # -> nil
565
685
  *
566
686
  */
@@ -704,7 +824,12 @@ static void
704
824
  adjust_registers_to_matched(struct strscanner *p)
705
825
  {
706
826
  onig_region_clear(&(p->regs));
707
- onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
827
+ if (p->fixed_anchor_p) {
828
+ onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
829
+ }
830
+ else {
831
+ onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
832
+ }
708
833
  }
709
834
 
710
835
  /*
@@ -738,8 +863,9 @@ strscan_getch(VALUE self)
738
863
  p->curr += len;
739
864
  MATCHED(p);
740
865
  adjust_registers_to_matched(p);
741
- return extract_range(p, p->prev + p->regs.beg[0],
742
- p->prev + p->regs.end[0]);
866
+ return extract_range(p,
867
+ adjust_register_position(p, p->regs.beg[0]),
868
+ adjust_register_position(p, p->regs.end[0]));
743
869
  }
744
870
 
745
871
  /*
@@ -772,8 +898,9 @@ strscan_get_byte(VALUE self)
772
898
  p->curr++;
773
899
  MATCHED(p);
774
900
  adjust_registers_to_matched(p);
775
- return extract_range(p, p->prev + p->regs.beg[0],
776
- p->prev + p->regs.end[0]);
901
+ return extract_range(p,
902
+ adjust_register_position(p, p->regs.beg[0]),
903
+ adjust_register_position(p, p->regs.end[0]));
777
904
  }
778
905
 
779
906
  /*
@@ -826,7 +953,7 @@ strscan_peep(VALUE self, VALUE vlen)
826
953
  }
827
954
 
828
955
  /*
829
- * Set the scan pointer to the previous position. Only one previous position is
956
+ * Sets the scan pointer to the previous position. Only one previous position is
830
957
  * remembered, and it changes with each scanning operation.
831
958
  *
832
959
  * s = StringScanner.new('test string')
@@ -951,8 +1078,9 @@ strscan_matched(VALUE self)
951
1078
 
952
1079
  GET_SCANNER(self, p);
953
1080
  if (! MATCHED_P(p)) return Qnil;
954
- return extract_range(p, p->prev + p->regs.beg[0],
955
- p->prev + p->regs.end[0]);
1081
+ return extract_range(p,
1082
+ adjust_register_position(p, p->regs.beg[0]),
1083
+ adjust_register_position(p, p->regs.end[0]));
956
1084
  }
957
1085
 
958
1086
  /*
@@ -1048,8 +1176,9 @@ strscan_aref(VALUE self, VALUE idx)
1048
1176
  if (i >= p->regs.num_regs) return Qnil;
1049
1177
  if (p->regs.beg[i] == -1) return Qnil;
1050
1178
 
1051
- return extract_range(p, p->prev + p->regs.beg[i],
1052
- p->prev + p->regs.end[i]);
1179
+ return extract_range(p,
1180
+ adjust_register_position(p, p->regs.beg[i]),
1181
+ adjust_register_position(p, p->regs.end[i]));
1053
1182
  }
1054
1183
 
1055
1184
  /*
@@ -1098,8 +1227,9 @@ strscan_captures(VALUE self)
1098
1227
  new_ary = rb_ary_new2(num_regs);
1099
1228
 
1100
1229
  for (i = 1; i < num_regs; i++) {
1101
- VALUE str = extract_range(p, p->prev + p->regs.beg[i],
1102
- p->prev + p->regs.end[i]);
1230
+ VALUE str = extract_range(p,
1231
+ adjust_register_position(p, p->regs.beg[i]),
1232
+ adjust_register_position(p, p->regs.end[i]));
1103
1233
  rb_ary_push(new_ary, str);
1104
1234
  }
1105
1235
 
@@ -1154,7 +1284,9 @@ strscan_pre_match(VALUE self)
1154
1284
 
1155
1285
  GET_SCANNER(self, p);
1156
1286
  if (! MATCHED_P(p)) return Qnil;
1157
- return extract_range(p, 0, p->prev + p->regs.beg[0]);
1287
+ return extract_range(p,
1288
+ 0,
1289
+ adjust_register_position(p, p->regs.beg[0]));
1158
1290
  }
1159
1291
 
1160
1292
  /*
@@ -1173,7 +1305,9 @@ strscan_post_match(VALUE self)
1173
1305
 
1174
1306
  GET_SCANNER(self, p);
1175
1307
  if (! MATCHED_P(p)) return Qnil;
1176
- return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
1308
+ return extract_range(p,
1309
+ adjust_register_position(p, p->regs.end[0]),
1310
+ S_LEN(p));
1177
1311
  }
1178
1312
 
1179
1313
  /*
@@ -1302,6 +1436,23 @@ inspect2(struct strscanner *p)
1302
1436
  return rb_str_dump(str);
1303
1437
  }
1304
1438
 
1439
+ /*
1440
+ * call-seq:
1441
+ * scanner.fixed_anchor? -> true or false
1442
+ *
1443
+ * Whether +scanner+ uses fixed anchor mode or not.
1444
+ *
1445
+ * If fixed anchor mode is used, +\A+ always matches the beginning of
1446
+ * the string. Otherwise, +\A+ always matches the current position.
1447
+ */
1448
+ static VALUE
1449
+ strscan_fixed_anchor_p(VALUE self)
1450
+ {
1451
+ struct strscanner *p;
1452
+ p = check_strscan(self);
1453
+ return p->fixed_anchor_p ? Qtrue : Qfalse;
1454
+ }
1455
+
1305
1456
  /* =======================================================================
1306
1457
  Ruby Interface
1307
1458
  ======================================================================= */
@@ -1412,6 +1563,7 @@ inspect2(struct strscanner *p)
1412
1563
  void
1413
1564
  Init_strscan(void)
1414
1565
  {
1566
+ #undef rb_intern
1415
1567
  ID id_scanerr = rb_intern("ScanError");
1416
1568
  VALUE tmp;
1417
1569
 
@@ -1487,4 +1639,6 @@ Init_strscan(void)
1487
1639
  rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
1488
1640
 
1489
1641
  rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
1642
+
1643
+ rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
1490
1644
  }
metadata CHANGED
@@ -1,14 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: strscan
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
8
+ - Sutou Kouhei
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2017-12-19 00:00:00.000000000 Z
12
+ date: 2019-10-12 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: rake-compiler
@@ -24,9 +25,24 @@ dependencies:
24
25
  - - ">="
25
26
  - !ruby/object:Gem::Version
26
27
  version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: benchmark-driver
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
27
42
  description: Provides lexical scanning operations on a String.
28
43
  email:
29
44
  -
45
+ - kou@cozmixng.org
30
46
  executables: []
31
47
  extensions:
32
48
  - ext/strscan/extconf.rb
@@ -56,7 +72,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
56
72
  version: '0'
57
73
  requirements: []
58
74
  rubyforge_project:
59
- rubygems_version: 2.7.6
75
+ rubygems_version: 2.7.6.2
60
76
  signing_key:
61
77
  specification_version: 4
62
78
  summary: Provides lexical scanning operations on a String.