strscan 1.0.0 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5bf6a08ae437ad9be84bb4e617a3a1f5205f83da3dbf539001e5c9aa733c2321
4
- data.tar.gz: 002cb175c04faded9975993500883192ce6d841bebe14964a91161bd80642033
3
+ metadata.gz: 126a67b7200ba8c3f3aeb661e55a30a37b23bdd9fe68c3f8ebfc4ad9f6c2cdcb
4
+ data.tar.gz: d0fafda0d3353b100c32a1437ee58258742f914a2dbb49dfde267cb15974f9f5
5
5
  SHA512:
6
- metadata.gz: ea00e403b94d4492c6670b7cfaeff3770b5547b913fdb44b8c7e0ba6a08fb14383c71a74142a6673d6691446e1483984e85c550a3be212f5a140ceb25bbf173e
7
- data.tar.gz: 410f645b7211199459d66f9a3825262f12bda9184cbc3b8893ca254cc6708da19886544a38a1cf25b39ddf9a1e16b30e8d8a04a3ecd55243f3a80619b94e8993
6
+ metadata.gz: ef088b41a17fd9afbee17734527fa911aea8f5b16b6f4547efb6246648f1b81ce54c6c09ebddd8c1d5b9d8c02b997d9e12f83d32214a2f25bcb5854238ad9f0d
7
+ data.tar.gz: 4d93362e2c96ffd62c8ccfdbfff4d77558d1948173901176ff80f0b0c12b272bd6787a6016146756bb71b19a68a00d6add0de3cb95086cec78142873584ff6c6
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  require 'mkmf'
3
- $INCFLAGS << " -I$(top_srcdir)"
3
+ $INCFLAGS << " -I$(top_srcdir)" if $extmk
4
+ have_func("onig_region_memsize", "ruby.h")
4
5
  create_makefile 'strscan'
@@ -11,9 +11,18 @@
11
11
  #include "ruby/ruby.h"
12
12
  #include "ruby/re.h"
13
13
  #include "ruby/encoding.h"
14
- #include "regint.h"
15
14
 
16
- #define STRSCAN_VERSION "0.7.0"
15
+ #ifdef RUBY_EXTCONF_H
16
+ # include RUBY_EXTCONF_H
17
+ #endif
18
+
19
+ #ifdef HAVE_ONIG_REGION_MEMSIZE
20
+ extern size_t onig_region_memsize(const struct re_registers *regs);
21
+ #endif
22
+
23
+ #include <stdbool.h>
24
+
25
+ #define STRSCAN_VERSION "1.0.3"
17
26
 
18
27
  /* =======================================================================
19
28
  Data Type Definitions
@@ -41,6 +50,9 @@ struct strscanner
41
50
 
42
51
  /* regexp used for last scan */
43
52
  VALUE regex;
53
+
54
+ /* anchor mode */
55
+ bool fixed_anchor_p;
44
56
  };
45
57
 
46
58
  #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
@@ -186,7 +198,11 @@ static size_t
186
198
  strscan_memsize(const void *ptr)
187
199
  {
188
200
  const struct strscanner *p = ptr;
189
- return sizeof(*p) - sizeof(p->regs) + onig_region_memsize(&p->regs);
201
+ size_t size = sizeof(*p) - sizeof(p->regs);
202
+ #ifdef HAVE_ONIG_REGION_MEMSIZE
203
+ size += onig_region_memsize(&p->regs);
204
+ #endif
205
+ return size;
190
206
  }
191
207
 
192
208
  static const rb_data_type_t strscanner_type = {
@@ -208,19 +224,41 @@ strscan_s_allocate(VALUE klass)
208
224
  }
209
225
 
210
226
  /*
211
- * call-seq: StringScanner.new(string, dup = false)
227
+ * call-seq:
228
+ * StringScanner.new(string, fixed_anchor: false)
229
+ * StringScanner.new(string, dup = false)
212
230
  *
213
231
  * Creates a new StringScanner object to scan over the given +string+.
232
+ *
233
+ * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
234
+ * the string. Otherwise, +\A+ always matches the current position.
235
+ *
214
236
  * +dup+ argument is obsolete and not used now.
215
237
  */
216
238
  static VALUE
217
239
  strscan_initialize(int argc, VALUE *argv, VALUE self)
218
240
  {
219
241
  struct strscanner *p;
220
- VALUE str, need_dup;
242
+ VALUE str, options;
221
243
 
222
244
  p = check_strscan(self);
223
- rb_scan_args(argc, argv, "11", &str, &need_dup);
245
+ rb_scan_args(argc, argv, "11", &str, &options);
246
+ options = rb_check_hash_type(options);
247
+ if (!NIL_P(options)) {
248
+ VALUE fixed_anchor;
249
+ ID keyword_ids[1];
250
+ keyword_ids[0] = rb_intern("fixed_anchor");
251
+ rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
252
+ if (fixed_anchor == Qundef) {
253
+ p->fixed_anchor_p = false;
254
+ }
255
+ else {
256
+ p->fixed_anchor_p = RTEST(fixed_anchor);
257
+ }
258
+ }
259
+ else {
260
+ p->fixed_anchor_p = false;
261
+ }
224
262
  StringValue(str);
225
263
  p->str = str;
226
264
 
@@ -294,7 +332,7 @@ strscan_reset(VALUE self)
294
332
  * terminate
295
333
  * clear
296
334
  *
297
- * Set the scan pointer to the end of the string and clear matching data.
335
+ * Sets the scan pointer to the end of the string and clear matching data.
298
336
  */
299
337
  static VALUE
300
338
  strscan_terminate(VALUE self)
@@ -425,7 +463,7 @@ strscan_get_charpos(VALUE self)
425
463
  /*
426
464
  * call-seq: pos=(n)
427
465
  *
428
- * Set the byte position of the scan pointer.
466
+ * Sets the byte position of the scan pointer.
429
467
  *
430
468
  * s = StringScanner.new('test string')
431
469
  * s.pos = 7 # -> 7
@@ -446,16 +484,79 @@ strscan_set_pos(VALUE self, VALUE v)
446
484
  return INT2NUM(i);
447
485
  }
448
486
 
487
+ static inline UChar *
488
+ match_target(struct strscanner *p)
489
+ {
490
+ if (p->fixed_anchor_p) {
491
+ return (UChar *)S_PBEG(p);
492
+ }
493
+ else
494
+ {
495
+ return (UChar *)CURPTR(p);
496
+ }
497
+ }
498
+
499
+ static inline void
500
+ set_registers(struct strscanner *p, size_t length)
501
+ {
502
+ onig_region_clear(&(p->regs));
503
+ if (p->fixed_anchor_p) {
504
+ onig_region_set(&(p->regs), 0, p->curr, p->curr + length);
505
+ }
506
+ else
507
+ {
508
+ onig_region_set(&(p->regs), 0, 0, length);
509
+ }
510
+ }
511
+
512
+ static inline void
513
+ succ(struct strscanner *p)
514
+ {
515
+ if (p->fixed_anchor_p) {
516
+ p->curr = p->regs.end[0];
517
+ }
518
+ else
519
+ {
520
+ p->curr += p->regs.end[0];
521
+ }
522
+ }
523
+
524
+ static inline long
525
+ last_match_length(struct strscanner *p)
526
+ {
527
+ if (p->fixed_anchor_p) {
528
+ return p->regs.end[0] - p->prev;
529
+ }
530
+ else
531
+ {
532
+ return p->regs.end[0];
533
+ }
534
+ }
535
+
536
+ static inline long
537
+ adjust_register_position(struct strscanner *p, long position)
538
+ {
539
+ if (p->fixed_anchor_p) {
540
+ return position;
541
+ }
542
+ else {
543
+ return p->prev + position;
544
+ }
545
+ }
546
+
449
547
  static VALUE
450
- strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
548
+ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
451
549
  {
452
- regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
453
550
  struct strscanner *p;
454
- regex_t *re;
455
- long ret;
456
- int tmpreg;
457
551
 
458
- Check_Type(regex, T_REGEXP);
552
+ if (headonly) {
553
+ if (!RB_TYPE_P(pattern, T_REGEXP)) {
554
+ StringValue(pattern);
555
+ }
556
+ }
557
+ else {
558
+ Check_Type(pattern, T_REGEXP);
559
+ }
459
560
  GET_SCANNER(self, p);
460
561
 
461
562
  CLEAR_MATCH_STATUS(p);
@@ -463,49 +564,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
463
564
  return Qnil;
464
565
  }
465
566
 
466
- p->regex = regex;
467
- re = rb_reg_prepare_re(regex, p->str);
468
- tmpreg = re != RREGEXP_PTR(regex);
469
- if (!tmpreg) RREGEXP(regex)->usecnt++;
567
+ if (RB_TYPE_P(pattern, T_REGEXP)) {
568
+ regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
569
+ regex_t *re;
570
+ long ret;
571
+ int tmpreg;
572
+
573
+ p->regex = pattern;
574
+ re = rb_reg_prepare_re(pattern, p->str);
575
+ tmpreg = re != RREGEXP_PTR(pattern);
576
+ if (!tmpreg) RREGEXP(pattern)->usecnt++;
577
+
578
+ if (headonly) {
579
+ ret = onig_match(re,
580
+ match_target(p),
581
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
582
+ (UChar* )CURPTR(p),
583
+ &(p->regs),
584
+ ONIG_OPTION_NONE);
585
+ }
586
+ else {
587
+ ret = onig_search(re,
588
+ match_target(p),
589
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
590
+ (UChar* )CURPTR(p),
591
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
592
+ &(p->regs),
593
+ ONIG_OPTION_NONE);
594
+ }
595
+ if (!tmpreg) RREGEXP(pattern)->usecnt--;
596
+ if (tmpreg) {
597
+ if (RREGEXP(pattern)->usecnt) {
598
+ onig_free(re);
599
+ }
600
+ else {
601
+ onig_free(RREGEXP_PTR(pattern));
602
+ RREGEXP_PTR(pattern) = re;
603
+ }
604
+ }
470
605
 
471
- if (headonly) {
472
- ret = onig_match(re, (UChar* )CURPTR(p),
473
- (UChar* )(CURPTR(p) + S_RESTLEN(p)),
474
- (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
606
+ if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
607
+ if (ret < 0) {
608
+ /* not matched */
609
+ return Qnil;
610
+ }
475
611
  }
476
612
  else {
477
- ret = onig_search(re,
478
- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
479
- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
480
- &(p->regs), ONIG_OPTION_NONE);
481
- }
482
- if (!tmpreg) RREGEXP(regex)->usecnt--;
483
- if (tmpreg) {
484
- if (RREGEXP(regex)->usecnt) {
485
- onig_free(re);
613
+ rb_enc_check(p->str, pattern);
614
+ if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
615
+ return Qnil;
486
616
  }
487
- else {
488
- onig_free(RREGEXP_PTR(regex));
489
- RREGEXP_PTR(regex) = re;
617
+ if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
618
+ return Qnil;
490
619
  }
491
- }
492
-
493
- if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
494
- if (ret < 0) {
495
- /* not matched */
496
- return Qnil;
620
+ set_registers(p, RSTRING_LEN(pattern));
497
621
  }
498
622
 
499
623
  MATCHED(p);
500
624
  p->prev = p->curr;
625
+
501
626
  if (succptr) {
502
- p->curr += p->regs.end[0];
503
- }
504
- if (getstr) {
505
- return extract_beg_len(p, p->prev, p->regs.end[0]);
627
+ succ(p);
506
628
  }
507
- else {
508
- return INT2FIX(p->regs.end[0]);
629
+ {
630
+ const long length = last_match_length(p);
631
+ if (getstr) {
632
+ return extract_beg_len(p, p->prev, length);
633
+ }
634
+ else {
635
+ return INT2FIX(length);
636
+ }
509
637
  }
510
638
  }
511
639
 
@@ -520,7 +648,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
520
648
  * p s.scan(/\w+/) # -> "test"
521
649
  * p s.scan(/\w+/) # -> nil
522
650
  * p s.scan(/\s+/) # -> " "
523
- * p s.scan(/\w+/) # -> "string"
651
+ * p s.scan("str") # -> "str"
652
+ * p s.scan(/\w+/) # -> "ing"
524
653
  * p s.scan(/./) # -> nil
525
654
  *
526
655
  */
@@ -539,6 +668,7 @@ strscan_scan(VALUE self, VALUE re)
539
668
  * s = StringScanner.new('test string')
540
669
  * p s.match?(/\w+/) # -> 4
541
670
  * p s.match?(/\w+/) # -> 4
671
+ * p s.match?("test") # -> 4
542
672
  * p s.match?(/\s+/) # -> nil
543
673
  */
544
674
  static VALUE
@@ -560,7 +690,8 @@ strscan_match_p(VALUE self, VALUE re)
560
690
  * p s.skip(/\w+/) # -> 4
561
691
  * p s.skip(/\w+/) # -> nil
562
692
  * p s.skip(/\s+/) # -> 1
563
- * p s.skip(/\w+/) # -> 6
693
+ * p s.skip("st") # -> 2
694
+ * p s.skip(/\w+/) # -> 4
564
695
  * p s.skip(/./) # -> nil
565
696
  *
566
697
  */
@@ -704,7 +835,12 @@ static void
704
835
  adjust_registers_to_matched(struct strscanner *p)
705
836
  {
706
837
  onig_region_clear(&(p->regs));
707
- onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
838
+ if (p->fixed_anchor_p) {
839
+ onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
840
+ }
841
+ else {
842
+ onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
843
+ }
708
844
  }
709
845
 
710
846
  /*
@@ -738,8 +874,9 @@ strscan_getch(VALUE self)
738
874
  p->curr += len;
739
875
  MATCHED(p);
740
876
  adjust_registers_to_matched(p);
741
- return extract_range(p, p->prev + p->regs.beg[0],
742
- p->prev + p->regs.end[0]);
877
+ return extract_range(p,
878
+ adjust_register_position(p, p->regs.beg[0]),
879
+ adjust_register_position(p, p->regs.end[0]));
743
880
  }
744
881
 
745
882
  /*
@@ -772,8 +909,9 @@ strscan_get_byte(VALUE self)
772
909
  p->curr++;
773
910
  MATCHED(p);
774
911
  adjust_registers_to_matched(p);
775
- return extract_range(p, p->prev + p->regs.beg[0],
776
- p->prev + p->regs.end[0]);
912
+ return extract_range(p,
913
+ adjust_register_position(p, p->regs.beg[0]),
914
+ adjust_register_position(p, p->regs.end[0]));
777
915
  }
778
916
 
779
917
  /*
@@ -826,7 +964,7 @@ strscan_peep(VALUE self, VALUE vlen)
826
964
  }
827
965
 
828
966
  /*
829
- * Set the scan pointer to the previous position. Only one previous position is
967
+ * Sets the scan pointer to the previous position. Only one previous position is
830
968
  * remembered, and it changes with each scanning operation.
831
969
  *
832
970
  * s = StringScanner.new('test string')
@@ -951,8 +1089,9 @@ strscan_matched(VALUE self)
951
1089
 
952
1090
  GET_SCANNER(self, p);
953
1091
  if (! MATCHED_P(p)) return Qnil;
954
- return extract_range(p, p->prev + p->regs.beg[0],
955
- p->prev + p->regs.end[0]);
1092
+ return extract_range(p,
1093
+ adjust_register_position(p, p->regs.beg[0]),
1094
+ adjust_register_position(p, p->regs.end[0]));
956
1095
  }
957
1096
 
958
1097
  /*
@@ -1048,8 +1187,9 @@ strscan_aref(VALUE self, VALUE idx)
1048
1187
  if (i >= p->regs.num_regs) return Qnil;
1049
1188
  if (p->regs.beg[i] == -1) return Qnil;
1050
1189
 
1051
- return extract_range(p, p->prev + p->regs.beg[i],
1052
- p->prev + p->regs.end[i]);
1190
+ return extract_range(p,
1191
+ adjust_register_position(p, p->regs.beg[i]),
1192
+ adjust_register_position(p, p->regs.end[i]));
1053
1193
  }
1054
1194
 
1055
1195
  /*
@@ -1098,8 +1238,9 @@ strscan_captures(VALUE self)
1098
1238
  new_ary = rb_ary_new2(num_regs);
1099
1239
 
1100
1240
  for (i = 1; i < num_regs; i++) {
1101
- VALUE str = extract_range(p, p->prev + p->regs.beg[i],
1102
- p->prev + p->regs.end[i]);
1241
+ VALUE str = extract_range(p,
1242
+ adjust_register_position(p, p->regs.beg[i]),
1243
+ adjust_register_position(p, p->regs.end[i]));
1103
1244
  rb_ary_push(new_ary, str);
1104
1245
  }
1105
1246
 
@@ -1154,7 +1295,9 @@ strscan_pre_match(VALUE self)
1154
1295
 
1155
1296
  GET_SCANNER(self, p);
1156
1297
  if (! MATCHED_P(p)) return Qnil;
1157
- return extract_range(p, 0, p->prev + p->regs.beg[0]);
1298
+ return extract_range(p,
1299
+ 0,
1300
+ adjust_register_position(p, p->regs.beg[0]));
1158
1301
  }
1159
1302
 
1160
1303
  /*
@@ -1173,7 +1316,9 @@ strscan_post_match(VALUE self)
1173
1316
 
1174
1317
  GET_SCANNER(self, p);
1175
1318
  if (! MATCHED_P(p)) return Qnil;
1176
- return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
1319
+ return extract_range(p,
1320
+ adjust_register_position(p, p->regs.end[0]),
1321
+ S_LEN(p));
1177
1322
  }
1178
1323
 
1179
1324
  /*
@@ -1302,6 +1447,23 @@ inspect2(struct strscanner *p)
1302
1447
  return rb_str_dump(str);
1303
1448
  }
1304
1449
 
1450
+ /*
1451
+ * call-seq:
1452
+ * scanner.fixed_anchor? -> true or false
1453
+ *
1454
+ * Whether +scanner+ uses fixed anchor mode or not.
1455
+ *
1456
+ * If fixed anchor mode is used, +\A+ always matches the beginning of
1457
+ * the string. Otherwise, +\A+ always matches the current position.
1458
+ */
1459
+ static VALUE
1460
+ strscan_fixed_anchor_p(VALUE self)
1461
+ {
1462
+ struct strscanner *p;
1463
+ p = check_strscan(self);
1464
+ return p->fixed_anchor_p ? Qtrue : Qfalse;
1465
+ }
1466
+
1305
1467
  /* =======================================================================
1306
1468
  Ruby Interface
1307
1469
  ======================================================================= */
@@ -1412,6 +1574,7 @@ inspect2(struct strscanner *p)
1412
1574
  void
1413
1575
  Init_strscan(void)
1414
1576
  {
1577
+ #undef rb_intern
1415
1578
  ID id_scanerr = rb_intern("ScanError");
1416
1579
  VALUE tmp;
1417
1580
 
@@ -1487,4 +1650,6 @@ Init_strscan(void)
1487
1650
  rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
1488
1651
 
1489
1652
  rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
1653
+
1654
+ rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
1490
1655
  }