strscan 1.0.0 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5bf6a08ae437ad9be84bb4e617a3a1f5205f83da3dbf539001e5c9aa733c2321
4
- data.tar.gz: 002cb175c04faded9975993500883192ce6d841bebe14964a91161bd80642033
3
+ metadata.gz: 126a67b7200ba8c3f3aeb661e55a30a37b23bdd9fe68c3f8ebfc4ad9f6c2cdcb
4
+ data.tar.gz: d0fafda0d3353b100c32a1437ee58258742f914a2dbb49dfde267cb15974f9f5
5
5
  SHA512:
6
- metadata.gz: ea00e403b94d4492c6670b7cfaeff3770b5547b913fdb44b8c7e0ba6a08fb14383c71a74142a6673d6691446e1483984e85c550a3be212f5a140ceb25bbf173e
7
- data.tar.gz: 410f645b7211199459d66f9a3825262f12bda9184cbc3b8893ca254cc6708da19886544a38a1cf25b39ddf9a1e16b30e8d8a04a3ecd55243f3a80619b94e8993
6
+ metadata.gz: ef088b41a17fd9afbee17734527fa911aea8f5b16b6f4547efb6246648f1b81ce54c6c09ebddd8c1d5b9d8c02b997d9e12f83d32214a2f25bcb5854238ad9f0d
7
+ data.tar.gz: 4d93362e2c96ffd62c8ccfdbfff4d77558d1948173901176ff80f0b0c12b272bd6787a6016146756bb71b19a68a00d6add0de3cb95086cec78142873584ff6c6
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  require 'mkmf'
3
- $INCFLAGS << " -I$(top_srcdir)"
3
+ $INCFLAGS << " -I$(top_srcdir)" if $extmk
4
+ have_func("onig_region_memsize", "ruby.h")
4
5
  create_makefile 'strscan'
@@ -11,9 +11,18 @@
11
11
  #include "ruby/ruby.h"
12
12
  #include "ruby/re.h"
13
13
  #include "ruby/encoding.h"
14
- #include "regint.h"
15
14
 
16
- #define STRSCAN_VERSION "0.7.0"
15
+ #ifdef RUBY_EXTCONF_H
16
+ # include RUBY_EXTCONF_H
17
+ #endif
18
+
19
+ #ifdef HAVE_ONIG_REGION_MEMSIZE
20
+ extern size_t onig_region_memsize(const struct re_registers *regs);
21
+ #endif
22
+
23
+ #include <stdbool.h>
24
+
25
+ #define STRSCAN_VERSION "1.0.3"
17
26
 
18
27
  /* =======================================================================
19
28
  Data Type Definitions
@@ -41,6 +50,9 @@ struct strscanner
41
50
 
42
51
  /* regexp used for last scan */
43
52
  VALUE regex;
53
+
54
+ /* anchor mode */
55
+ bool fixed_anchor_p;
44
56
  };
45
57
 
46
58
  #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
@@ -186,7 +198,11 @@ static size_t
186
198
  strscan_memsize(const void *ptr)
187
199
  {
188
200
  const struct strscanner *p = ptr;
189
- return sizeof(*p) - sizeof(p->regs) + onig_region_memsize(&p->regs);
201
+ size_t size = sizeof(*p) - sizeof(p->regs);
202
+ #ifdef HAVE_ONIG_REGION_MEMSIZE
203
+ size += onig_region_memsize(&p->regs);
204
+ #endif
205
+ return size;
190
206
  }
191
207
 
192
208
  static const rb_data_type_t strscanner_type = {
@@ -208,19 +224,41 @@ strscan_s_allocate(VALUE klass)
208
224
  }
209
225
 
210
226
  /*
211
- * call-seq: StringScanner.new(string, dup = false)
227
+ * call-seq:
228
+ * StringScanner.new(string, fixed_anchor: false)
229
+ * StringScanner.new(string, dup = false)
212
230
  *
213
231
  * Creates a new StringScanner object to scan over the given +string+.
232
+ *
233
+ * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
234
+ * the string. Otherwise, +\A+ always matches the current position.
235
+ *
214
236
  * +dup+ argument is obsolete and not used now.
215
237
  */
216
238
  static VALUE
217
239
  strscan_initialize(int argc, VALUE *argv, VALUE self)
218
240
  {
219
241
  struct strscanner *p;
220
- VALUE str, need_dup;
242
+ VALUE str, options;
221
243
 
222
244
  p = check_strscan(self);
223
- rb_scan_args(argc, argv, "11", &str, &need_dup);
245
+ rb_scan_args(argc, argv, "11", &str, &options);
246
+ options = rb_check_hash_type(options);
247
+ if (!NIL_P(options)) {
248
+ VALUE fixed_anchor;
249
+ ID keyword_ids[1];
250
+ keyword_ids[0] = rb_intern("fixed_anchor");
251
+ rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
252
+ if (fixed_anchor == Qundef) {
253
+ p->fixed_anchor_p = false;
254
+ }
255
+ else {
256
+ p->fixed_anchor_p = RTEST(fixed_anchor);
257
+ }
258
+ }
259
+ else {
260
+ p->fixed_anchor_p = false;
261
+ }
224
262
  StringValue(str);
225
263
  p->str = str;
226
264
 
@@ -294,7 +332,7 @@ strscan_reset(VALUE self)
294
332
  * terminate
295
333
  * clear
296
334
  *
297
- * Set the scan pointer to the end of the string and clear matching data.
335
+ * Sets the scan pointer to the end of the string and clear matching data.
298
336
  */
299
337
  static VALUE
300
338
  strscan_terminate(VALUE self)
@@ -425,7 +463,7 @@ strscan_get_charpos(VALUE self)
425
463
  /*
426
464
  * call-seq: pos=(n)
427
465
  *
428
- * Set the byte position of the scan pointer.
466
+ * Sets the byte position of the scan pointer.
429
467
  *
430
468
  * s = StringScanner.new('test string')
431
469
  * s.pos = 7 # -> 7
@@ -446,16 +484,79 @@ strscan_set_pos(VALUE self, VALUE v)
446
484
  return INT2NUM(i);
447
485
  }
448
486
 
487
+ static inline UChar *
488
+ match_target(struct strscanner *p)
489
+ {
490
+ if (p->fixed_anchor_p) {
491
+ return (UChar *)S_PBEG(p);
492
+ }
493
+ else
494
+ {
495
+ return (UChar *)CURPTR(p);
496
+ }
497
+ }
498
+
499
+ static inline void
500
+ set_registers(struct strscanner *p, size_t length)
501
+ {
502
+ onig_region_clear(&(p->regs));
503
+ if (p->fixed_anchor_p) {
504
+ onig_region_set(&(p->regs), 0, p->curr, p->curr + length);
505
+ }
506
+ else
507
+ {
508
+ onig_region_set(&(p->regs), 0, 0, length);
509
+ }
510
+ }
511
+
512
+ static inline void
513
+ succ(struct strscanner *p)
514
+ {
515
+ if (p->fixed_anchor_p) {
516
+ p->curr = p->regs.end[0];
517
+ }
518
+ else
519
+ {
520
+ p->curr += p->regs.end[0];
521
+ }
522
+ }
523
+
524
+ static inline long
525
+ last_match_length(struct strscanner *p)
526
+ {
527
+ if (p->fixed_anchor_p) {
528
+ return p->regs.end[0] - p->prev;
529
+ }
530
+ else
531
+ {
532
+ return p->regs.end[0];
533
+ }
534
+ }
535
+
536
+ static inline long
537
+ adjust_register_position(struct strscanner *p, long position)
538
+ {
539
+ if (p->fixed_anchor_p) {
540
+ return position;
541
+ }
542
+ else {
543
+ return p->prev + position;
544
+ }
545
+ }
546
+
449
547
  static VALUE
450
- strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
548
+ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
451
549
  {
452
- regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
453
550
  struct strscanner *p;
454
- regex_t *re;
455
- long ret;
456
- int tmpreg;
457
551
 
458
- Check_Type(regex, T_REGEXP);
552
+ if (headonly) {
553
+ if (!RB_TYPE_P(pattern, T_REGEXP)) {
554
+ StringValue(pattern);
555
+ }
556
+ }
557
+ else {
558
+ Check_Type(pattern, T_REGEXP);
559
+ }
459
560
  GET_SCANNER(self, p);
460
561
 
461
562
  CLEAR_MATCH_STATUS(p);
@@ -463,49 +564,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
463
564
  return Qnil;
464
565
  }
465
566
 
466
- p->regex = regex;
467
- re = rb_reg_prepare_re(regex, p->str);
468
- tmpreg = re != RREGEXP_PTR(regex);
469
- if (!tmpreg) RREGEXP(regex)->usecnt++;
567
+ if (RB_TYPE_P(pattern, T_REGEXP)) {
568
+ regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
569
+ regex_t *re;
570
+ long ret;
571
+ int tmpreg;
572
+
573
+ p->regex = pattern;
574
+ re = rb_reg_prepare_re(pattern, p->str);
575
+ tmpreg = re != RREGEXP_PTR(pattern);
576
+ if (!tmpreg) RREGEXP(pattern)->usecnt++;
577
+
578
+ if (headonly) {
579
+ ret = onig_match(re,
580
+ match_target(p),
581
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
582
+ (UChar* )CURPTR(p),
583
+ &(p->regs),
584
+ ONIG_OPTION_NONE);
585
+ }
586
+ else {
587
+ ret = onig_search(re,
588
+ match_target(p),
589
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
590
+ (UChar* )CURPTR(p),
591
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
592
+ &(p->regs),
593
+ ONIG_OPTION_NONE);
594
+ }
595
+ if (!tmpreg) RREGEXP(pattern)->usecnt--;
596
+ if (tmpreg) {
597
+ if (RREGEXP(pattern)->usecnt) {
598
+ onig_free(re);
599
+ }
600
+ else {
601
+ onig_free(RREGEXP_PTR(pattern));
602
+ RREGEXP_PTR(pattern) = re;
603
+ }
604
+ }
470
605
 
471
- if (headonly) {
472
- ret = onig_match(re, (UChar* )CURPTR(p),
473
- (UChar* )(CURPTR(p) + S_RESTLEN(p)),
474
- (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
606
+ if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
607
+ if (ret < 0) {
608
+ /* not matched */
609
+ return Qnil;
610
+ }
475
611
  }
476
612
  else {
477
- ret = onig_search(re,
478
- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
479
- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
480
- &(p->regs), ONIG_OPTION_NONE);
481
- }
482
- if (!tmpreg) RREGEXP(regex)->usecnt--;
483
- if (tmpreg) {
484
- if (RREGEXP(regex)->usecnt) {
485
- onig_free(re);
613
+ rb_enc_check(p->str, pattern);
614
+ if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
615
+ return Qnil;
486
616
  }
487
- else {
488
- onig_free(RREGEXP_PTR(regex));
489
- RREGEXP_PTR(regex) = re;
617
+ if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
618
+ return Qnil;
490
619
  }
491
- }
492
-
493
- if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
494
- if (ret < 0) {
495
- /* not matched */
496
- return Qnil;
620
+ set_registers(p, RSTRING_LEN(pattern));
497
621
  }
498
622
 
499
623
  MATCHED(p);
500
624
  p->prev = p->curr;
625
+
501
626
  if (succptr) {
502
- p->curr += p->regs.end[0];
503
- }
504
- if (getstr) {
505
- return extract_beg_len(p, p->prev, p->regs.end[0]);
627
+ succ(p);
506
628
  }
507
- else {
508
- return INT2FIX(p->regs.end[0]);
629
+ {
630
+ const long length = last_match_length(p);
631
+ if (getstr) {
632
+ return extract_beg_len(p, p->prev, length);
633
+ }
634
+ else {
635
+ return INT2FIX(length);
636
+ }
509
637
  }
510
638
  }
511
639
 
@@ -520,7 +648,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
520
648
  * p s.scan(/\w+/) # -> "test"
521
649
  * p s.scan(/\w+/) # -> nil
522
650
  * p s.scan(/\s+/) # -> " "
523
- * p s.scan(/\w+/) # -> "string"
651
+ * p s.scan("str") # -> "str"
652
+ * p s.scan(/\w+/) # -> "ing"
524
653
  * p s.scan(/./) # -> nil
525
654
  *
526
655
  */
@@ -539,6 +668,7 @@ strscan_scan(VALUE self, VALUE re)
539
668
  * s = StringScanner.new('test string')
540
669
  * p s.match?(/\w+/) # -> 4
541
670
  * p s.match?(/\w+/) # -> 4
671
+ * p s.match?("test") # -> 4
542
672
  * p s.match?(/\s+/) # -> nil
543
673
  */
544
674
  static VALUE
@@ -560,7 +690,8 @@ strscan_match_p(VALUE self, VALUE re)
560
690
  * p s.skip(/\w+/) # -> 4
561
691
  * p s.skip(/\w+/) # -> nil
562
692
  * p s.skip(/\s+/) # -> 1
563
- * p s.skip(/\w+/) # -> 6
693
+ * p s.skip("st") # -> 2
694
+ * p s.skip(/\w+/) # -> 4
564
695
  * p s.skip(/./) # -> nil
565
696
  *
566
697
  */
@@ -704,7 +835,12 @@ static void
704
835
  adjust_registers_to_matched(struct strscanner *p)
705
836
  {
706
837
  onig_region_clear(&(p->regs));
707
- onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
838
+ if (p->fixed_anchor_p) {
839
+ onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
840
+ }
841
+ else {
842
+ onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
843
+ }
708
844
  }
709
845
 
710
846
  /*
@@ -738,8 +874,9 @@ strscan_getch(VALUE self)
738
874
  p->curr += len;
739
875
  MATCHED(p);
740
876
  adjust_registers_to_matched(p);
741
- return extract_range(p, p->prev + p->regs.beg[0],
742
- p->prev + p->regs.end[0]);
877
+ return extract_range(p,
878
+ adjust_register_position(p, p->regs.beg[0]),
879
+ adjust_register_position(p, p->regs.end[0]));
743
880
  }
744
881
 
745
882
  /*
@@ -772,8 +909,9 @@ strscan_get_byte(VALUE self)
772
909
  p->curr++;
773
910
  MATCHED(p);
774
911
  adjust_registers_to_matched(p);
775
- return extract_range(p, p->prev + p->regs.beg[0],
776
- p->prev + p->regs.end[0]);
912
+ return extract_range(p,
913
+ adjust_register_position(p, p->regs.beg[0]),
914
+ adjust_register_position(p, p->regs.end[0]));
777
915
  }
778
916
 
779
917
  /*
@@ -826,7 +964,7 @@ strscan_peep(VALUE self, VALUE vlen)
826
964
  }
827
965
 
828
966
  /*
829
- * Set the scan pointer to the previous position. Only one previous position is
967
+ * Sets the scan pointer to the previous position. Only one previous position is
830
968
  * remembered, and it changes with each scanning operation.
831
969
  *
832
970
  * s = StringScanner.new('test string')
@@ -951,8 +1089,9 @@ strscan_matched(VALUE self)
951
1089
 
952
1090
  GET_SCANNER(self, p);
953
1091
  if (! MATCHED_P(p)) return Qnil;
954
- return extract_range(p, p->prev + p->regs.beg[0],
955
- p->prev + p->regs.end[0]);
1092
+ return extract_range(p,
1093
+ adjust_register_position(p, p->regs.beg[0]),
1094
+ adjust_register_position(p, p->regs.end[0]));
956
1095
  }
957
1096
 
958
1097
  /*
@@ -1048,8 +1187,9 @@ strscan_aref(VALUE self, VALUE idx)
1048
1187
  if (i >= p->regs.num_regs) return Qnil;
1049
1188
  if (p->regs.beg[i] == -1) return Qnil;
1050
1189
 
1051
- return extract_range(p, p->prev + p->regs.beg[i],
1052
- p->prev + p->regs.end[i]);
1190
+ return extract_range(p,
1191
+ adjust_register_position(p, p->regs.beg[i]),
1192
+ adjust_register_position(p, p->regs.end[i]));
1053
1193
  }
1054
1194
 
1055
1195
  /*
@@ -1098,8 +1238,9 @@ strscan_captures(VALUE self)
1098
1238
  new_ary = rb_ary_new2(num_regs);
1099
1239
 
1100
1240
  for (i = 1; i < num_regs; i++) {
1101
- VALUE str = extract_range(p, p->prev + p->regs.beg[i],
1102
- p->prev + p->regs.end[i]);
1241
+ VALUE str = extract_range(p,
1242
+ adjust_register_position(p, p->regs.beg[i]),
1243
+ adjust_register_position(p, p->regs.end[i]));
1103
1244
  rb_ary_push(new_ary, str);
1104
1245
  }
1105
1246
 
@@ -1154,7 +1295,9 @@ strscan_pre_match(VALUE self)
1154
1295
 
1155
1296
  GET_SCANNER(self, p);
1156
1297
  if (! MATCHED_P(p)) return Qnil;
1157
- return extract_range(p, 0, p->prev + p->regs.beg[0]);
1298
+ return extract_range(p,
1299
+ 0,
1300
+ adjust_register_position(p, p->regs.beg[0]));
1158
1301
  }
1159
1302
 
1160
1303
  /*
@@ -1173,7 +1316,9 @@ strscan_post_match(VALUE self)
1173
1316
 
1174
1317
  GET_SCANNER(self, p);
1175
1318
  if (! MATCHED_P(p)) return Qnil;
1176
- return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
1319
+ return extract_range(p,
1320
+ adjust_register_position(p, p->regs.end[0]),
1321
+ S_LEN(p));
1177
1322
  }
1178
1323
 
1179
1324
  /*
@@ -1302,6 +1447,23 @@ inspect2(struct strscanner *p)
1302
1447
  return rb_str_dump(str);
1303
1448
  }
1304
1449
 
1450
+ /*
1451
+ * call-seq:
1452
+ * scanner.fixed_anchor? -> true or false
1453
+ *
1454
+ * Whether +scanner+ uses fixed anchor mode or not.
1455
+ *
1456
+ * If fixed anchor mode is used, +\A+ always matches the beginning of
1457
+ * the string. Otherwise, +\A+ always matches the current position.
1458
+ */
1459
+ static VALUE
1460
+ strscan_fixed_anchor_p(VALUE self)
1461
+ {
1462
+ struct strscanner *p;
1463
+ p = check_strscan(self);
1464
+ return p->fixed_anchor_p ? Qtrue : Qfalse;
1465
+ }
1466
+
1305
1467
  /* =======================================================================
1306
1468
  Ruby Interface
1307
1469
  ======================================================================= */
@@ -1412,6 +1574,7 @@ inspect2(struct strscanner *p)
1412
1574
  void
1413
1575
  Init_strscan(void)
1414
1576
  {
1577
+ #undef rb_intern
1415
1578
  ID id_scanerr = rb_intern("ScanError");
1416
1579
  VALUE tmp;
1417
1580
 
@@ -1487,4 +1650,6 @@ Init_strscan(void)
1487
1650
  rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
1488
1651
 
1489
1652
  rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
1653
+
1654
+ rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
1490
1655
  }