strscan 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/ext/strscan/strscan.c +214 -60
  3. metadata +19 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5bf6a08ae437ad9be84bb4e617a3a1f5205f83da3dbf539001e5c9aa733c2321
4
- data.tar.gz: 002cb175c04faded9975993500883192ce6d841bebe14964a91161bd80642033
3
+ metadata.gz: 8b623224c9ff6a9b576b776bcc212c459e7205b4151546a2f6bfb8c7aeb648e8
4
+ data.tar.gz: 0f60465fe2fc38d2b1e505b9e463181b1e264c20b4c1f6941f2f2df6b7de3371
5
5
  SHA512:
6
- metadata.gz: ea00e403b94d4492c6670b7cfaeff3770b5547b913fdb44b8c7e0ba6a08fb14383c71a74142a6673d6691446e1483984e85c550a3be212f5a140ceb25bbf173e
7
- data.tar.gz: 410f645b7211199459d66f9a3825262f12bda9184cbc3b8893ca254cc6708da19886544a38a1cf25b39ddf9a1e16b30e8d8a04a3ecd55243f3a80619b94e8993
6
+ metadata.gz: 44cc8212b5a7a069bb991ebeb14f608721a7b9e79720b281bd03df1d3d89cdfc95936ce9b555848141e02c79082435e88170e4c912119dd1b91ad033b9bcccae
7
+ data.tar.gz: 6b4e0c7b95409576e6ae1e1d1d39bc78155f2d010af162514bfcd6984dbd6450803f15d8827080dadfb8fdba9c9312d9dc9e59b4377db34606056783480f6eb1
@@ -13,7 +13,9 @@
13
13
  #include "ruby/encoding.h"
14
14
  #include "regint.h"
15
15
 
16
- #define STRSCAN_VERSION "0.7.0"
16
+ #include <stdbool.h>
17
+
18
+ #define STRSCAN_VERSION "1.0.2"
17
19
 
18
20
  /* =======================================================================
19
21
  Data Type Definitions
@@ -41,6 +43,9 @@ struct strscanner
41
43
 
42
44
  /* regexp used for last scan */
43
45
  VALUE regex;
46
+
47
+ /* anchor mode */
48
+ bool fixed_anchor_p;
44
49
  };
45
50
 
46
51
  #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
@@ -208,19 +213,41 @@ strscan_s_allocate(VALUE klass)
208
213
  }
209
214
 
210
215
  /*
211
- * call-seq: StringScanner.new(string, dup = false)
216
+ * call-seq:
217
+ * StringScanner.new(string, fixed_anchor: false)
218
+ * StringScanner.new(string, dup = false)
212
219
  *
213
220
  * Creates a new StringScanner object to scan over the given +string+.
221
+ *
222
+ * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
223
+ * the string. Otherwise, +\A+ always matches the current position.
224
+ *
214
225
  * +dup+ argument is obsolete and not used now.
215
226
  */
216
227
  static VALUE
217
228
  strscan_initialize(int argc, VALUE *argv, VALUE self)
218
229
  {
219
230
  struct strscanner *p;
220
- VALUE str, need_dup;
231
+ VALUE str, options;
221
232
 
222
233
  p = check_strscan(self);
223
- rb_scan_args(argc, argv, "11", &str, &need_dup);
234
+ rb_scan_args(argc, argv, "11", &str, &options);
235
+ options = rb_check_hash_type(options);
236
+ if (!NIL_P(options)) {
237
+ VALUE fixed_anchor;
238
+ ID keyword_ids[1];
239
+ keyword_ids[0] = rb_intern("fixed_anchor");
240
+ rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
241
+ if (fixed_anchor == Qundef) {
242
+ p->fixed_anchor_p = false;
243
+ }
244
+ else {
245
+ p->fixed_anchor_p = RTEST(fixed_anchor);
246
+ }
247
+ }
248
+ else {
249
+ p->fixed_anchor_p = false;
250
+ }
224
251
  StringValue(str);
225
252
  p->str = str;
226
253
 
@@ -294,7 +321,7 @@ strscan_reset(VALUE self)
294
321
  * terminate
295
322
  * clear
296
323
  *
297
- * Set the scan pointer to the end of the string and clear matching data.
324
+ * Sets the scan pointer to the end of the string and clear matching data.
298
325
  */
299
326
  static VALUE
300
327
  strscan_terminate(VALUE self)
@@ -425,7 +452,7 @@ strscan_get_charpos(VALUE self)
425
452
  /*
426
453
  * call-seq: pos=(n)
427
454
  *
428
- * Set the byte position of the scan pointer.
455
+ * Sets the byte position of the scan pointer.
429
456
  *
430
457
  * s = StringScanner.new('test string')
431
458
  * s.pos = 7 # -> 7
@@ -446,16 +473,79 @@ strscan_set_pos(VALUE self, VALUE v)
446
473
  return INT2NUM(i);
447
474
  }
448
475
 
476
+ static inline UChar *
477
+ match_target(struct strscanner *p)
478
+ {
479
+ if (p->fixed_anchor_p) {
480
+ return (UChar *)S_PBEG(p);
481
+ }
482
+ else
483
+ {
484
+ return (UChar *)CURPTR(p);
485
+ }
486
+ }
487
+
488
+ static inline void
489
+ set_registers(struct strscanner *p, size_t length)
490
+ {
491
+ onig_region_clear(&(p->regs));
492
+ if (p->fixed_anchor_p) {
493
+ onig_region_set(&(p->regs), 0, p->curr, p->curr + length);
494
+ }
495
+ else
496
+ {
497
+ onig_region_set(&(p->regs), 0, 0, length);
498
+ }
499
+ }
500
+
501
+ static inline void
502
+ succ(struct strscanner *p)
503
+ {
504
+ if (p->fixed_anchor_p) {
505
+ p->curr = p->regs.end[0];
506
+ }
507
+ else
508
+ {
509
+ p->curr += p->regs.end[0];
510
+ }
511
+ }
512
+
513
+ static inline long
514
+ last_match_length(struct strscanner *p)
515
+ {
516
+ if (p->fixed_anchor_p) {
517
+ return p->regs.end[0] - p->prev;
518
+ }
519
+ else
520
+ {
521
+ return p->regs.end[0];
522
+ }
523
+ }
524
+
525
+ static inline long
526
+ adjust_register_position(struct strscanner *p, long position)
527
+ {
528
+ if (p->fixed_anchor_p) {
529
+ return position;
530
+ }
531
+ else {
532
+ return p->prev + position;
533
+ }
534
+ }
535
+
449
536
  static VALUE
450
- strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
537
+ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
451
538
  {
452
- regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
453
539
  struct strscanner *p;
454
- regex_t *re;
455
- long ret;
456
- int tmpreg;
457
540
 
458
- Check_Type(regex, T_REGEXP);
541
+ if (headonly) {
542
+ if (!RB_TYPE_P(pattern, T_REGEXP)) {
543
+ StringValue(pattern);
544
+ }
545
+ }
546
+ else {
547
+ Check_Type(pattern, T_REGEXP);
548
+ }
459
549
  GET_SCANNER(self, p);
460
550
 
461
551
  CLEAR_MATCH_STATUS(p);
@@ -463,49 +553,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
463
553
  return Qnil;
464
554
  }
465
555
 
466
- p->regex = regex;
467
- re = rb_reg_prepare_re(regex, p->str);
468
- tmpreg = re != RREGEXP_PTR(regex);
469
- if (!tmpreg) RREGEXP(regex)->usecnt++;
556
+ if (RB_TYPE_P(pattern, T_REGEXP)) {
557
+ regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
558
+ regex_t *re;
559
+ long ret;
560
+ int tmpreg;
561
+
562
+ p->regex = pattern;
563
+ re = rb_reg_prepare_re(pattern, p->str);
564
+ tmpreg = re != RREGEXP_PTR(pattern);
565
+ if (!tmpreg) RREGEXP(pattern)->usecnt++;
566
+
567
+ if (headonly) {
568
+ ret = onig_match(re,
569
+ match_target(p),
570
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
571
+ (UChar* )CURPTR(p),
572
+ &(p->regs),
573
+ ONIG_OPTION_NONE);
574
+ }
575
+ else {
576
+ ret = onig_search(re,
577
+ match_target(p),
578
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
579
+ (UChar* )CURPTR(p),
580
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
581
+ &(p->regs),
582
+ ONIG_OPTION_NONE);
583
+ }
584
+ if (!tmpreg) RREGEXP(pattern)->usecnt--;
585
+ if (tmpreg) {
586
+ if (RREGEXP(pattern)->usecnt) {
587
+ onig_free(re);
588
+ }
589
+ else {
590
+ onig_free(RREGEXP_PTR(pattern));
591
+ RREGEXP_PTR(pattern) = re;
592
+ }
593
+ }
470
594
 
471
- if (headonly) {
472
- ret = onig_match(re, (UChar* )CURPTR(p),
473
- (UChar* )(CURPTR(p) + S_RESTLEN(p)),
474
- (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
595
+ if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
596
+ if (ret < 0) {
597
+ /* not matched */
598
+ return Qnil;
599
+ }
475
600
  }
476
601
  else {
477
- ret = onig_search(re,
478
- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
479
- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
480
- &(p->regs), ONIG_OPTION_NONE);
481
- }
482
- if (!tmpreg) RREGEXP(regex)->usecnt--;
483
- if (tmpreg) {
484
- if (RREGEXP(regex)->usecnt) {
485
- onig_free(re);
602
+ rb_enc_check(p->str, pattern);
603
+ if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
604
+ return Qnil;
486
605
  }
487
- else {
488
- onig_free(RREGEXP_PTR(regex));
489
- RREGEXP_PTR(regex) = re;
606
+ if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
607
+ return Qnil;
490
608
  }
491
- }
492
-
493
- if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
494
- if (ret < 0) {
495
- /* not matched */
496
- return Qnil;
609
+ set_registers(p, RSTRING_LEN(pattern));
497
610
  }
498
611
 
499
612
  MATCHED(p);
500
613
  p->prev = p->curr;
614
+
501
615
  if (succptr) {
502
- p->curr += p->regs.end[0];
503
- }
504
- if (getstr) {
505
- return extract_beg_len(p, p->prev, p->regs.end[0]);
616
+ succ(p);
506
617
  }
507
- else {
508
- return INT2FIX(p->regs.end[0]);
618
+ {
619
+ const long length = last_match_length(p);
620
+ if (getstr) {
621
+ return extract_beg_len(p, p->prev, length);
622
+ }
623
+ else {
624
+ return INT2FIX(length);
625
+ }
509
626
  }
510
627
  }
511
628
 
@@ -520,7 +637,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
520
637
  * p s.scan(/\w+/) # -> "test"
521
638
  * p s.scan(/\w+/) # -> nil
522
639
  * p s.scan(/\s+/) # -> " "
523
- * p s.scan(/\w+/) # -> "string"
640
+ * p s.scan("str") # -> "str"
641
+ * p s.scan(/\w+/) # -> "ing"
524
642
  * p s.scan(/./) # -> nil
525
643
  *
526
644
  */
@@ -539,6 +657,7 @@ strscan_scan(VALUE self, VALUE re)
539
657
  * s = StringScanner.new('test string')
540
658
  * p s.match?(/\w+/) # -> 4
541
659
  * p s.match?(/\w+/) # -> 4
660
+ * p s.match?("test") # -> 4
542
661
  * p s.match?(/\s+/) # -> nil
543
662
  */
544
663
  static VALUE
@@ -560,7 +679,8 @@ strscan_match_p(VALUE self, VALUE re)
560
679
  * p s.skip(/\w+/) # -> 4
561
680
  * p s.skip(/\w+/) # -> nil
562
681
  * p s.skip(/\s+/) # -> 1
563
- * p s.skip(/\w+/) # -> 6
682
+ * p s.skip("st") # -> 2
683
+ * p s.skip(/\w+/) # -> 4
564
684
  * p s.skip(/./) # -> nil
565
685
  *
566
686
  */
@@ -704,7 +824,12 @@ static void
704
824
  adjust_registers_to_matched(struct strscanner *p)
705
825
  {
706
826
  onig_region_clear(&(p->regs));
707
- onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
827
+ if (p->fixed_anchor_p) {
828
+ onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
829
+ }
830
+ else {
831
+ onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
832
+ }
708
833
  }
709
834
 
710
835
  /*
@@ -738,8 +863,9 @@ strscan_getch(VALUE self)
738
863
  p->curr += len;
739
864
  MATCHED(p);
740
865
  adjust_registers_to_matched(p);
741
- return extract_range(p, p->prev + p->regs.beg[0],
742
- p->prev + p->regs.end[0]);
866
+ return extract_range(p,
867
+ adjust_register_position(p, p->regs.beg[0]),
868
+ adjust_register_position(p, p->regs.end[0]));
743
869
  }
744
870
 
745
871
  /*
@@ -772,8 +898,9 @@ strscan_get_byte(VALUE self)
772
898
  p->curr++;
773
899
  MATCHED(p);
774
900
  adjust_registers_to_matched(p);
775
- return extract_range(p, p->prev + p->regs.beg[0],
776
- p->prev + p->regs.end[0]);
901
+ return extract_range(p,
902
+ adjust_register_position(p, p->regs.beg[0]),
903
+ adjust_register_position(p, p->regs.end[0]));
777
904
  }
778
905
 
779
906
  /*
@@ -826,7 +953,7 @@ strscan_peep(VALUE self, VALUE vlen)
826
953
  }
827
954
 
828
955
  /*
829
- * Set the scan pointer to the previous position. Only one previous position is
956
+ * Sets the scan pointer to the previous position. Only one previous position is
830
957
  * remembered, and it changes with each scanning operation.
831
958
  *
832
959
  * s = StringScanner.new('test string')
@@ -951,8 +1078,9 @@ strscan_matched(VALUE self)
951
1078
 
952
1079
  GET_SCANNER(self, p);
953
1080
  if (! MATCHED_P(p)) return Qnil;
954
- return extract_range(p, p->prev + p->regs.beg[0],
955
- p->prev + p->regs.end[0]);
1081
+ return extract_range(p,
1082
+ adjust_register_position(p, p->regs.beg[0]),
1083
+ adjust_register_position(p, p->regs.end[0]));
956
1084
  }
957
1085
 
958
1086
  /*
@@ -1048,8 +1176,9 @@ strscan_aref(VALUE self, VALUE idx)
1048
1176
  if (i >= p->regs.num_regs) return Qnil;
1049
1177
  if (p->regs.beg[i] == -1) return Qnil;
1050
1178
 
1051
- return extract_range(p, p->prev + p->regs.beg[i],
1052
- p->prev + p->regs.end[i]);
1179
+ return extract_range(p,
1180
+ adjust_register_position(p, p->regs.beg[i]),
1181
+ adjust_register_position(p, p->regs.end[i]));
1053
1182
  }
1054
1183
 
1055
1184
  /*
@@ -1098,8 +1227,9 @@ strscan_captures(VALUE self)
1098
1227
  new_ary = rb_ary_new2(num_regs);
1099
1228
 
1100
1229
  for (i = 1; i < num_regs; i++) {
1101
- VALUE str = extract_range(p, p->prev + p->regs.beg[i],
1102
- p->prev + p->regs.end[i]);
1230
+ VALUE str = extract_range(p,
1231
+ adjust_register_position(p, p->regs.beg[i]),
1232
+ adjust_register_position(p, p->regs.end[i]));
1103
1233
  rb_ary_push(new_ary, str);
1104
1234
  }
1105
1235
 
@@ -1154,7 +1284,9 @@ strscan_pre_match(VALUE self)
1154
1284
 
1155
1285
  GET_SCANNER(self, p);
1156
1286
  if (! MATCHED_P(p)) return Qnil;
1157
- return extract_range(p, 0, p->prev + p->regs.beg[0]);
1287
+ return extract_range(p,
1288
+ 0,
1289
+ adjust_register_position(p, p->regs.beg[0]));
1158
1290
  }
1159
1291
 
1160
1292
  /*
@@ -1173,7 +1305,9 @@ strscan_post_match(VALUE self)
1173
1305
 
1174
1306
  GET_SCANNER(self, p);
1175
1307
  if (! MATCHED_P(p)) return Qnil;
1176
- return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
1308
+ return extract_range(p,
1309
+ adjust_register_position(p, p->regs.end[0]),
1310
+ S_LEN(p));
1177
1311
  }
1178
1312
 
1179
1313
  /*
@@ -1302,6 +1436,23 @@ inspect2(struct strscanner *p)
1302
1436
  return rb_str_dump(str);
1303
1437
  }
1304
1438
 
1439
+ /*
1440
+ * call-seq:
1441
+ * scanner.fixed_anchor? -> true or false
1442
+ *
1443
+ * Whether +scanner+ uses fixed anchor mode or not.
1444
+ *
1445
+ * If fixed anchor mode is used, +\A+ always matches the beginning of
1446
+ * the string. Otherwise, +\A+ always matches the current position.
1447
+ */
1448
+ static VALUE
1449
+ strscan_fixed_anchor_p(VALUE self)
1450
+ {
1451
+ struct strscanner *p;
1452
+ p = check_strscan(self);
1453
+ return p->fixed_anchor_p ? Qtrue : Qfalse;
1454
+ }
1455
+
1305
1456
  /* =======================================================================
1306
1457
  Ruby Interface
1307
1458
  ======================================================================= */
@@ -1412,6 +1563,7 @@ inspect2(struct strscanner *p)
1412
1563
  void
1413
1564
  Init_strscan(void)
1414
1565
  {
1566
+ #undef rb_intern
1415
1567
  ID id_scanerr = rb_intern("ScanError");
1416
1568
  VALUE tmp;
1417
1569
 
@@ -1487,4 +1639,6 @@ Init_strscan(void)
1487
1639
  rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
1488
1640
 
1489
1641
  rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
1642
+
1643
+ rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
1490
1644
  }
metadata CHANGED
@@ -1,14 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: strscan
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
8
+ - Sutou Kouhei
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2017-12-19 00:00:00.000000000 Z
12
+ date: 2019-10-12 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: rake-compiler
@@ -24,9 +25,24 @@ dependencies:
24
25
  - - ">="
25
26
  - !ruby/object:Gem::Version
26
27
  version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: benchmark-driver
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
27
42
  description: Provides lexical scanning operations on a String.
28
43
  email:
29
44
  -
45
+ - kou@cozmixng.org
30
46
  executables: []
31
47
  extensions:
32
48
  - ext/strscan/extconf.rb
@@ -56,7 +72,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
56
72
  version: '0'
57
73
  requirements: []
58
74
  rubyforge_project:
59
- rubygems_version: 2.7.6
75
+ rubygems_version: 2.7.6.2
60
76
  signing_key:
61
77
  specification_version: 4
62
78
  summary: Provides lexical scanning operations on a String.