strscan 3.1.0 → 3.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,7 +22,7 @@ extern size_t onig_region_memsize(const struct re_registers *regs);
22
22
 
23
23
  #include <stdbool.h>
24
24
 
25
- #define STRSCAN_VERSION "3.1.0"
25
+ #define STRSCAN_VERSION "3.1.4"
26
26
 
27
27
  /* =======================================================================
28
28
  Data Type Definitions
@@ -32,6 +32,8 @@ static VALUE StringScanner;
32
32
  static VALUE ScanError;
33
33
  static ID id_byteslice;
34
34
 
35
+ static int usascii_encindex, utf8_encindex, binary_encindex;
36
+
35
37
  struct strscanner
36
38
  {
37
39
  /* multi-purpose flags */
@@ -56,8 +58,13 @@ struct strscanner
56
58
  };
57
59
 
58
60
  #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
59
- #define MATCHED(s) (s)->flags |= FLAG_MATCHED
60
- #define CLEAR_MATCH_STATUS(s) (s)->flags &= ~FLAG_MATCHED
61
+ #define MATCHED(s) ((s)->flags |= FLAG_MATCHED)
62
+ #define CLEAR_MATCHED(s) ((s)->flags &= ~FLAG_MATCHED)
63
+ #define CLEAR_NAMED_CAPTURES(s) ((s)->regex = Qnil)
64
+ #define CLEAR_MATCH_STATUS(s) do {\
65
+ CLEAR_MATCHED(s);\
66
+ CLEAR_NAMED_CAPTURES(s);\
67
+ } while (0)
61
68
 
62
69
  #define S_PBEG(s) (RSTRING_PTR((s)->str))
63
70
  #define S_LEN(s) (RSTRING_LEN((s)->str))
@@ -115,6 +122,7 @@ static VALUE strscan_get_byte _((VALUE self));
115
122
  static VALUE strscan_getbyte _((VALUE self));
116
123
  static VALUE strscan_peek _((VALUE self, VALUE len));
117
124
  static VALUE strscan_peep _((VALUE self, VALUE len));
125
+ static VALUE strscan_scan_base10_integer _((VALUE self));
118
126
  static VALUE strscan_unscan _((VALUE self));
119
127
  static VALUE strscan_bol_p _((VALUE self));
120
128
  static VALUE strscan_eos_p _((VALUE self));
@@ -213,21 +221,32 @@ strscan_s_allocate(VALUE klass)
213
221
  CLEAR_MATCH_STATUS(p);
214
222
  onig_region_init(&(p->regs));
215
223
  p->str = Qnil;
216
- p->regex = Qnil;
217
224
  return obj;
218
225
  }
219
226
 
220
227
  /*
221
- * call-seq:
222
- * StringScanner.new(string, fixed_anchor: false)
223
- * StringScanner.new(string, dup = false)
228
+ * :markup: markdown
229
+ * :include: strscan/link_refs.txt
224
230
  *
225
- * Creates a new StringScanner object to scan over the given +string+.
226
- *
227
- * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
228
- * the string. Otherwise, +\A+ always matches the current position.
231
+ * call-seq:
232
+ * StringScanner.new(string, fixed_anchor: false) -> string_scanner
233
+ *
234
+ * Returns a new `StringScanner` object whose [stored string][1]
235
+ * is the given `string`;
236
+ * sets the [fixed-anchor property][10]:
237
+ *
238
+ * ```rb
239
+ * scanner = StringScanner.new('foobarbaz')
240
+ * scanner.string # => "foobarbaz"
241
+ * scanner.fixed_anchor? # => false
242
+ * put_situation(scanner)
243
+ * # Situation:
244
+ * # pos: 0
245
+ * # charpos: 0
246
+ * # rest: "foobarbaz"
247
+ * # rest_size: 9
248
+ * ```
229
249
  *
230
- * +dup+ argument is obsolete and not used now.
231
250
  */
232
251
  static VALUE
233
252
  strscan_initialize(int argc, VALUE *argv, VALUE self)
@@ -266,11 +285,14 @@ check_strscan(VALUE obj)
266
285
  }
267
286
 
268
287
  /*
288
+ * :markup: markdown
289
+ * :include: strscan/link_refs.txt
290
+ *
269
291
  * call-seq:
270
- * dup
271
- * clone
292
+ * dup -> shallow_copy
272
293
  *
273
- * Duplicates a StringScanner object.
294
+ * Returns a shallow copy of `self`;
295
+ * the [stored string][1] in the copy is the same string as in `self`.
274
296
  */
275
297
  static VALUE
276
298
  strscan_init_copy(VALUE vself, VALUE vorig)
@@ -297,10 +319,13 @@ strscan_init_copy(VALUE vself, VALUE vorig)
297
319
  ======================================================================= */
298
320
 
299
321
  /*
300
- * call-seq: StringScanner.must_C_version
322
+ * call-seq:
323
+ * StringScanner.must_C_version -> self
301
324
  *
302
- * This method is defined for backward compatibility.
325
+ * Returns +self+; defined for backward compatibility.
303
326
  */
327
+
328
+ /* :nodoc: */
304
329
  static VALUE
305
330
  strscan_s_mustc(VALUE self)
306
331
  {
@@ -308,7 +333,30 @@ strscan_s_mustc(VALUE self)
308
333
  }
309
334
 
310
335
  /*
311
- * Reset the scan pointer (index 0) and clear matching data.
336
+ * :markup: markdown
337
+ * :include: strscan/link_refs.txt
338
+ *
339
+ * call-seq:
340
+ * reset -> self
341
+ *
342
+ * Sets both [byte position][2] and [character position][7] to zero,
343
+ * and clears [match values][9];
344
+ * returns +self+:
345
+ *
346
+ * ```rb
347
+ * scanner = StringScanner.new('foobarbaz')
348
+ * scanner.exist?(/bar/) # => 6
349
+ * scanner.reset # => #<StringScanner 0/9 @ "fooba...">
350
+ * put_situation(scanner)
351
+ * # Situation:
352
+ * # pos: 0
353
+ * # charpos: 0
354
+ * # rest: "foobarbaz"
355
+ * # rest_size: 9
356
+ * # => nil
357
+ * match_values_cleared?(scanner) # => true
358
+ * ```
359
+ *
312
360
  */
313
361
  static VALUE
314
362
  strscan_reset(VALUE self)
@@ -322,11 +370,9 @@ strscan_reset(VALUE self)
322
370
  }
323
371
 
324
372
  /*
325
- * call-seq:
326
- * terminate
327
- * clear
328
- *
329
- * Sets the scan pointer to the end of the string and clear matching data.
373
+ * :markup: markdown
374
+ * :include: strscan/link_refs.txt
375
+ * :include: strscan/methods/terminate.md
330
376
  */
331
377
  static VALUE
332
378
  strscan_terminate(VALUE self)
@@ -340,9 +386,13 @@ strscan_terminate(VALUE self)
340
386
  }
341
387
 
342
388
  /*
343
- * Equivalent to #terminate.
344
- * This method is obsolete; use #terminate instead.
389
+ * call-seq:
390
+ * clear -> self
391
+ *
392
+ * This method is obsolete; use the equivalent method StringScanner#terminate.
345
393
  */
394
+
395
+ /* :nodoc: */
346
396
  static VALUE
347
397
  strscan_clear(VALUE self)
348
398
  {
@@ -351,7 +401,21 @@ strscan_clear(VALUE self)
351
401
  }
352
402
 
353
403
  /*
354
- * Returns the string being scanned.
404
+ * :markup: markdown
405
+ * :include: strscan/link_refs.txt
406
+ *
407
+ * call-seq:
408
+ * string -> stored_string
409
+ *
410
+ * Returns the [stored string][1]:
411
+ *
412
+ * ```rb
413
+ * scanner = StringScanner.new('foobar')
414
+ * scanner.string # => "foobar"
415
+ * scanner.concat('baz')
416
+ * scanner.string # => "foobarbaz"
417
+ * ```
418
+ *
355
419
  */
356
420
  static VALUE
357
421
  strscan_get_string(VALUE self)
@@ -363,10 +427,39 @@ strscan_get_string(VALUE self)
363
427
  }
364
428
 
365
429
  /*
366
- * call-seq: string=(str)
430
+ * :markup: markdown
431
+ * :include: strscan/link_refs.txt
432
+ *
433
+ * call-seq:
434
+ * string = other_string -> other_string
435
+ *
436
+ * Replaces the [stored string][1] with the given `other_string`:
437
+ *
438
+ * - Sets both [positions][11] to zero.
439
+ * - Clears [match values][9].
440
+ * - Returns `other_string`.
441
+ *
442
+ * ```rb
443
+ * scanner = StringScanner.new('foobar')
444
+ * scanner.scan(/foo/)
445
+ * put_situation(scanner)
446
+ * # Situation:
447
+ * # pos: 3
448
+ * # charpos: 3
449
+ * # rest: "bar"
450
+ * # rest_size: 3
451
+ * match_values_cleared?(scanner) # => false
452
+ *
453
+ * scanner.string = 'baz' # => "baz"
454
+ * put_situation(scanner)
455
+ * # Situation:
456
+ * # pos: 0
457
+ * # charpos: 0
458
+ * # rest: "baz"
459
+ * # rest_size: 3
460
+ * match_values_cleared?(scanner) # => true
461
+ * ```
367
462
  *
368
- * Changes the string being scanned to +str+ and resets the scanner.
369
- * Returns +str+.
370
463
  */
371
464
  static VALUE
372
465
  strscan_set_string(VALUE self, VALUE str)
@@ -381,18 +474,33 @@ strscan_set_string(VALUE self, VALUE str)
381
474
  }
382
475
 
383
476
  /*
384
- * call-seq:
385
- * concat(str)
386
- * <<(str)
477
+ * :markup: markdown
478
+ * :include: strscan/link_refs.txt
387
479
  *
388
- * Appends +str+ to the string being scanned.
389
- * This method does not affect scan pointer.
480
+ * call-seq:
481
+ * concat(more_string) -> self
482
+ *
483
+ * - Appends the given `more_string`
484
+ * to the [stored string][1].
485
+ * - Returns `self`.
486
+ * - Does not affect the [positions][11]
487
+ * or [match values][9].
488
+ *
489
+ *
490
+ * ```rb
491
+ * scanner = StringScanner.new('foo')
492
+ * scanner.string # => "foo"
493
+ * scanner.terminate
494
+ * scanner.concat('barbaz') # => #<StringScanner 3/9 "foo" @ "barba...">
495
+ * scanner.string # => "foobarbaz"
496
+ * put_situation(scanner)
497
+ * # Situation:
498
+ * # pos: 3
499
+ * # charpos: 3
500
+ * # rest: "barbaz"
501
+ * # rest_size: 6
502
+ * ```
390
503
  *
391
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
392
- * s.scan(/Fri /)
393
- * s << " +1000 GMT"
394
- * s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT"
395
- * s.scan(/Dec/) # -> "Dec"
396
504
  */
397
505
  static VALUE
398
506
  strscan_concat(VALUE self, VALUE str)
@@ -406,18 +514,9 @@ strscan_concat(VALUE self, VALUE str)
406
514
  }
407
515
 
408
516
  /*
409
- * Returns the byte position of the scan pointer. In the 'reset' position, this
410
- * value is zero. In the 'terminated' position (i.e. the string is exhausted),
411
- * this value is the bytesize of the string.
412
- *
413
- * In short, it's a 0-based index into bytes of the string.
414
- *
415
- * s = StringScanner.new('test string')
416
- * s.pos # -> 0
417
- * s.scan_until /str/ # -> "test str"
418
- * s.pos # -> 8
419
- * s.terminate # -> #<StringScanner fin>
420
- * s.pos # -> 11
517
+ * :markup: markdown
518
+ * :include: strscan/link_refs.txt
519
+ * :include: strscan/methods/get_pos.md
421
520
  */
422
521
  static VALUE
423
522
  strscan_get_pos(VALUE self)
@@ -425,21 +524,13 @@ strscan_get_pos(VALUE self)
425
524
  struct strscanner *p;
426
525
 
427
526
  GET_SCANNER(self, p);
428
- return INT2FIX(p->curr);
527
+ return LONG2NUM(p->curr);
429
528
  }
430
529
 
431
530
  /*
432
- * Returns the character position of the scan pointer. In the 'reset' position, this
433
- * value is zero. In the 'terminated' position (i.e. the string is exhausted),
434
- * this value is the size of the string.
435
- *
436
- * In short, it's a 0-based index into the string.
437
- *
438
- * s = StringScanner.new("abc\u00e4def\u00f6ghi")
439
- * s.charpos # -> 0
440
- * s.scan_until(/\u00e4/) # -> "abc\u00E4"
441
- * s.pos # -> 5
442
- * s.charpos # -> 4
531
+ * :markup: markdown
532
+ * :include: strscan/link_refs.txt
533
+ * :include: strscan/methods/get_charpos.md
443
534
  */
444
535
  static VALUE
445
536
  strscan_get_charpos(VALUE self)
@@ -452,13 +543,9 @@ strscan_get_charpos(VALUE self)
452
543
  }
453
544
 
454
545
  /*
455
- * call-seq: pos=(n)
456
- *
457
- * Sets the byte position of the scan pointer.
458
- *
459
- * s = StringScanner.new('test string')
460
- * s.pos = 7 # -> 7
461
- * s.rest # -> "ring"
546
+ * :markup: markdown
547
+ * :include: strscan/link_refs.txt
548
+ * :include: strscan/methods/set_pos.md
462
549
  */
463
550
  static VALUE
464
551
  strscan_set_pos(VALUE self, VALUE v)
@@ -467,7 +554,7 @@ strscan_set_pos(VALUE self, VALUE v)
467
554
  long i;
468
555
 
469
556
  GET_SCANNER(self, p);
470
- i = NUM2INT(v);
557
+ i = NUM2LONG(v);
471
558
  if (i < 0) i += S_LEN(p);
472
559
  if (i < 0) rb_raise(rb_eRangeError, "index out of range");
473
560
  if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range");
@@ -488,19 +575,20 @@ match_target(struct strscanner *p)
488
575
  }
489
576
 
490
577
  static inline void
491
- set_registers(struct strscanner *p, size_t length)
578
+ set_registers(struct strscanner *p, size_t pos, size_t length)
492
579
  {
493
580
  const int at = 0;
494
581
  OnigRegion *regs = &(p->regs);
495
582
  onig_region_clear(regs);
496
583
  if (onig_region_set(regs, at, 0, 0)) return;
497
584
  if (p->fixed_anchor_p) {
498
- regs->beg[at] = p->curr;
499
- regs->end[at] = p->curr + length;
585
+ regs->beg[at] = pos + p->curr;
586
+ regs->end[at] = pos + p->curr + length;
500
587
  }
501
588
  else
502
589
  {
503
- regs->end[at] = length;
590
+ regs->beg[at] = pos;
591
+ regs->end[at] = pos + length;
504
592
  }
505
593
  }
506
594
 
@@ -546,12 +634,13 @@ rb_reg_onig_match(VALUE re, VALUE str,
546
634
  OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
547
635
  void *args, struct re_registers *regs)
548
636
  {
637
+ OnigPosition result;
549
638
  regex_t *reg = rb_reg_prepare_re(re, str);
550
639
 
551
640
  bool tmpreg = reg != RREGEXP_PTR(re);
552
641
  if (!tmpreg) RREGEXP(re)->usecnt++;
553
642
 
554
- OnigPosition result = match(reg, str, regs, args);
643
+ result = match(reg, str, regs, args);
555
644
 
556
645
  if (!tmpreg) RREGEXP(re)->usecnt--;
557
646
  if (tmpreg) {
@@ -601,19 +690,19 @@ strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_pt
601
690
  ONIG_OPTION_NONE);
602
691
  }
603
692
 
693
+ static void
694
+ strscan_enc_check(VALUE str1, VALUE str2)
695
+ {
696
+ if (RB_ENCODING_GET(str1) != RB_ENCODING_GET(str2)) {
697
+ rb_enc_check(str1, str2);
698
+ }
699
+ }
700
+
604
701
  static VALUE
605
702
  strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
606
703
  {
607
704
  struct strscanner *p;
608
705
 
609
- if (headonly) {
610
- if (!RB_TYPE_P(pattern, T_REGEXP)) {
611
- StringValue(pattern);
612
- }
613
- }
614
- else {
615
- Check_Type(pattern, T_REGEXP);
616
- }
617
706
  GET_SCANNER(self, p);
618
707
 
619
708
  CLEAR_MATCH_STATUS(p);
@@ -622,26 +711,42 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
622
711
  }
623
712
 
624
713
  if (RB_TYPE_P(pattern, T_REGEXP)) {
714
+ OnigPosition ret;
625
715
  p->regex = pattern;
626
- OnigPosition ret = rb_reg_onig_match(pattern,
627
- p->str,
628
- headonly ? strscan_match : strscan_search,
629
- (void *)p,
630
- &(p->regs));
716
+ ret = rb_reg_onig_match(p->regex,
717
+ p->str,
718
+ headonly ? strscan_match : strscan_search,
719
+ (void *)p,
720
+ &(p->regs));
631
721
 
632
722
  if (ret == ONIG_MISMATCH) {
633
723
  return Qnil;
634
724
  }
635
725
  }
636
726
  else {
637
- rb_enc_check(p->str, pattern);
727
+ StringValue(pattern);
638
728
  if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
729
+ strscan_enc_check(p->str, pattern);
639
730
  return Qnil;
640
731
  }
641
- if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
642
- return Qnil;
732
+
733
+ if (headonly) {
734
+ strscan_enc_check(p->str, pattern);
735
+
736
+ if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
737
+ return Qnil;
738
+ }
739
+ set_registers(p, 0, RSTRING_LEN(pattern));
740
+ }
741
+ else {
742
+ rb_encoding *enc = rb_enc_check(p->str, pattern);
743
+ long pos = rb_memsearch(RSTRING_PTR(pattern), RSTRING_LEN(pattern),
744
+ CURPTR(p), S_RESTLEN(p), enc);
745
+ if (pos == -1) {
746
+ return Qnil;
747
+ }
748
+ set_registers(p, pos, RSTRING_LEN(pattern));
643
749
  }
644
- set_registers(p, RSTRING_LEN(pattern));
645
750
  }
646
751
 
647
752
  MATCHED(p);
@@ -662,20 +767,9 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
662
767
  }
663
768
 
664
769
  /*
665
- * call-seq: scan(pattern) => String
666
- *
667
- * Tries to match with +pattern+ at the current position. If there's a match,
668
- * the scanner advances the "scan pointer" and returns the matched string.
669
- * Otherwise, the scanner returns +nil+.
670
- *
671
- * s = StringScanner.new('test string')
672
- * p s.scan(/\w+/) # -> "test"
673
- * p s.scan(/\w+/) # -> nil
674
- * p s.scan(/\s+/) # -> " "
675
- * p s.scan("str") # -> "str"
676
- * p s.scan(/\w+/) # -> "ing"
677
- * p s.scan(/./) # -> nil
678
- *
770
+ * :markup: markdown
771
+ * :include: strscan/link_refs.txt
772
+ * :include: strscan/methods/scan.md
679
773
  */
680
774
  static VALUE
681
775
  strscan_scan(VALUE self, VALUE re)
@@ -684,16 +778,60 @@ strscan_scan(VALUE self, VALUE re)
684
778
  }
685
779
 
686
780
  /*
687
- * call-seq: match?(pattern)
781
+ * :markup: markdown
782
+ * :include: strscan/link_refs.txt
688
783
  *
689
- * Tests whether the given +pattern+ is matched from the current scan pointer.
690
- * Returns the length of the match, or +nil+. The scan pointer is not advanced.
784
+ * call-seq:
785
+ * match?(pattern) -> updated_position or nil
786
+ *
787
+ * Attempts to [match][17] the given `pattern`
788
+ * at the beginning of the [target substring][3];
789
+ * does not modify the [positions][11].
790
+ *
791
+ * If the match succeeds:
792
+ *
793
+ * - Sets [match values][9].
794
+ * - Returns the size in bytes of the matched substring.
795
+ *
796
+ *
797
+ * ```rb
798
+ * scanner = StringScanner.new('foobarbaz')
799
+ * scanner.pos = 3
800
+ * scanner.match?(/bar/) => 3
801
+ * put_match_values(scanner)
802
+ * # Basic match values:
803
+ * # matched?: true
804
+ * # matched_size: 3
805
+ * # pre_match: "foo"
806
+ * # matched : "bar"
807
+ * # post_match: "baz"
808
+ * # Captured match values:
809
+ * # size: 1
810
+ * # captures: []
811
+ * # named_captures: {}
812
+ * # values_at: ["bar", nil]
813
+ * # []:
814
+ * # [0]: "bar"
815
+ * # [1]: nil
816
+ * put_situation(scanner)
817
+ * # Situation:
818
+ * # pos: 3
819
+ * # charpos: 3
820
+ * # rest: "barbaz"
821
+ * # rest_size: 6
822
+ * ```
823
+ *
824
+ * If the match fails:
825
+ *
826
+ * - Clears match values.
827
+ * - Returns `nil`.
828
+ * - Does not increment positions.
829
+ *
830
+ * ```rb
831
+ * scanner.match?(/nope/) # => nil
832
+ * match_values_cleared?(scanner) # => true
833
+ * ```
691
834
  *
692
- * s = StringScanner.new('test string')
693
- * p s.match?(/\w+/) # -> 4
694
- * p s.match?(/\w+/) # -> 4
695
- * p s.match?("test") # -> 4
696
- * p s.match?(/\s+/) # -> nil
697
835
  */
698
836
  static VALUE
699
837
  strscan_match_p(VALUE self, VALUE re)
@@ -702,22 +840,9 @@ strscan_match_p(VALUE self, VALUE re)
702
840
  }
703
841
 
704
842
  /*
705
- * call-seq: skip(pattern)
706
- *
707
- * Attempts to skip over the given +pattern+ beginning with the scan pointer.
708
- * If it matches, the scan pointer is advanced to the end of the match, and the
709
- * length of the match is returned. Otherwise, +nil+ is returned.
710
- *
711
- * It's similar to #scan, but without returning the matched string.
712
- *
713
- * s = StringScanner.new('test string')
714
- * p s.skip(/\w+/) # -> 4
715
- * p s.skip(/\w+/) # -> nil
716
- * p s.skip(/\s+/) # -> 1
717
- * p s.skip("st") # -> 2
718
- * p s.skip(/\w+/) # -> 4
719
- * p s.skip(/./) # -> nil
720
- *
843
+ * :markup: markdown
844
+ * :include: strscan/link_refs.txt
845
+ * :include: strscan/methods/skip.md
721
846
  */
722
847
  static VALUE
723
848
  strscan_skip(VALUE self, VALUE re)
@@ -726,19 +851,59 @@ strscan_skip(VALUE self, VALUE re)
726
851
  }
727
852
 
728
853
  /*
729
- * call-seq: check(pattern)
730
- *
731
- * This returns the value that #scan would return, without advancing the scan
732
- * pointer. The match register is affected, though.
854
+ * :markup: markdown
855
+ * :include: strscan/link_refs.txt
733
856
  *
734
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
735
- * s.check /Fri/ # -> "Fri"
736
- * s.pos # -> 0
737
- * s.matched # -> "Fri"
738
- * s.check /12/ # -> nil
739
- * s.matched # -> nil
857
+ * call-seq:
858
+ * check(pattern) -> matched_substring or nil
859
+ *
860
+ * Attempts to [match][17] the given `pattern`
861
+ * at the beginning of the [target substring][3];
862
+ * does not modify the [positions][11].
863
+ *
864
+ * If the match succeeds:
865
+ *
866
+ * - Returns the matched substring.
867
+ * - Sets all [match values][9].
868
+ *
869
+ * ```rb
870
+ * scanner = StringScanner.new('foobarbaz')
871
+ * scanner.pos = 3
872
+ * scanner.check('bar') # => "bar"
873
+ * put_match_values(scanner)
874
+ * # Basic match values:
875
+ * # matched?: true
876
+ * # matched_size: 3
877
+ * # pre_match: "foo"
878
+ * # matched : "bar"
879
+ * # post_match: "baz"
880
+ * # Captured match values:
881
+ * # size: 1
882
+ * # captures: []
883
+ * # named_captures: {}
884
+ * # values_at: ["bar", nil]
885
+ * # []:
886
+ * # [0]: "bar"
887
+ * # [1]: nil
888
+ * # => 0..1
889
+ * put_situation(scanner)
890
+ * # Situation:
891
+ * # pos: 3
892
+ * # charpos: 3
893
+ * # rest: "barbaz"
894
+ * # rest_size: 6
895
+ * ```
896
+ *
897
+ * If the match fails:
898
+ *
899
+ * - Returns `nil`.
900
+ * - Clears all [match values][9].
901
+ *
902
+ * ```rb
903
+ * scanner.check(/nope/) # => nil
904
+ * match_values_cleared?(scanner) # => true
905
+ * ```
740
906
  *
741
- * Mnemonic: it "checks" to see whether a #scan will return a value.
742
907
  */
743
908
  static VALUE
744
909
  strscan_check(VALUE self, VALUE re)
@@ -747,15 +912,24 @@ strscan_check(VALUE self, VALUE re)
747
912
  }
748
913
 
749
914
  /*
750
- * call-seq: scan_full(pattern, advance_pointer_p, return_string_p)
915
+ * call-seq:
916
+ * scan_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or nil
917
+ *
918
+ * Equivalent to one of the following:
919
+ *
920
+ * - +advance_pointer_p+ +true+:
921
+ *
922
+ * - +return_string_p+ +true+: StringScanner#scan(pattern).
923
+ * - +return_string_p+ +false+: StringScanner#skip(pattern).
924
+ *
925
+ * - +advance_pointer_p+ +false+:
751
926
  *
752
- * Tests whether the given +pattern+ is matched from the current scan pointer.
753
- * Advances the scan pointer if +advance_pointer_p+ is true.
754
- * Returns the matched string if +return_string_p+ is true.
755
- * The match register is affected.
927
+ * - +return_string_p+ +true+: StringScanner#check(pattern).
928
+ * - +return_string_p+ +false+: StringScanner#match?(pattern).
756
929
  *
757
- * "full" means "#scan with full parameters".
758
930
  */
931
+
932
+ /* :nodoc: */
759
933
  static VALUE
760
934
  strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
761
935
  {
@@ -763,16 +937,9 @@ strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
763
937
  }
764
938
 
765
939
  /*
766
- * call-seq: scan_until(pattern)
767
- *
768
- * Scans the string _until_ the +pattern+ is matched. Returns the substring up
769
- * to and including the end of the match, advancing the scan pointer to that
770
- * location. If there is no match, +nil+ is returned.
771
- *
772
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
773
- * s.scan_until(/1/) # -> "Fri Dec 1"
774
- * s.pre_match # -> "Fri Dec "
775
- * s.scan_until(/XYZ/) # -> nil
940
+ * :markup: markdown
941
+ * :include: strscan/link_refs.txt
942
+ * :include: strscan/methods/scan_until.md
776
943
  */
777
944
  static VALUE
778
945
  strscan_scan_until(VALUE self, VALUE re)
@@ -781,17 +948,61 @@ strscan_scan_until(VALUE self, VALUE re)
781
948
  }
782
949
 
783
950
  /*
784
- * call-seq: exist?(pattern)
951
+ * :markup: markdown
952
+ * :include: strscan/link_refs.txt
785
953
  *
786
- * Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string,
787
- * without advancing the scan pointer. This predicates whether a #scan_until
788
- * will return a value.
954
+ * call-seq:
955
+ * exist?(pattern) -> byte_offset or nil
956
+ *
957
+ * Attempts to [match][17] the given `pattern`
958
+ * anywhere (at any [position][2])
959
+ * n the [target substring][3];
960
+ * does not modify the [positions][11].
961
+ *
962
+ * If the match succeeds:
963
+ *
964
+ * - Returns a byte offset:
965
+ * the distance in bytes between the current [position][2]
966
+ * and the end of the matched substring.
967
+ * - Sets all [match values][9].
968
+ *
969
+ * ```rb
970
+ * scanner = StringScanner.new('foobarbazbatbam')
971
+ * scanner.pos = 6
972
+ * scanner.exist?(/bat/) # => 6
973
+ * put_match_values(scanner)
974
+ * # Basic match values:
975
+ * # matched?: true
976
+ * # matched_size: 3
977
+ * # pre_match: "foobarbaz"
978
+ * # matched : "bat"
979
+ * # post_match: "bam"
980
+ * # Captured match values:
981
+ * # size: 1
982
+ * # captures: []
983
+ * # named_captures: {}
984
+ * # values_at: ["bat", nil]
985
+ * # []:
986
+ * # [0]: "bat"
987
+ * # [1]: nil
988
+ * put_situation(scanner)
989
+ * # Situation:
990
+ * # pos: 6
991
+ * # charpos: 6
992
+ * # rest: "bazbatbam"
993
+ * # rest_size: 9
994
+ * ```
995
+ *
996
+ * If the match fails:
997
+ *
998
+ * - Returns `nil`.
999
+ * - Clears all [match values][9].
1000
+ *
1001
+ * ```rb
1002
+ * scanner.exist?(/nope/) # => nil
1003
+ * match_values_cleared?(scanner) # => true
1004
+ * ```
789
1005
  *
790
- * s = StringScanner.new('test string')
791
- * s.exist? /s/ # -> 3
792
- * s.scan /test/ # -> "test"
793
- * s.exist? /s/ # -> 2
794
- * s.exist? /e/ # -> nil
795
1006
  */
796
1007
  static VALUE
797
1008
  strscan_exist_p(VALUE self, VALUE re)
@@ -800,20 +1011,9 @@ strscan_exist_p(VALUE self, VALUE re)
800
1011
  }
801
1012
 
802
1013
  /*
803
- * call-seq: skip_until(pattern)
804
- *
805
- * Advances the scan pointer until +pattern+ is matched and consumed. Returns
806
- * the number of bytes advanced, or +nil+ if no match was found.
807
- *
808
- * Look ahead to match +pattern+, and advance the scan pointer to the _end_
809
- * of the match. Return the number of characters advanced, or +nil+ if the
810
- * match was unsuccessful.
811
- *
812
- * It's similar to #scan_until, but without returning the intervening string.
813
- *
814
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
815
- * s.skip_until /12/ # -> 10
816
- * s #
1014
+ * :markup: markdown
1015
+ * :include: strscan/link_refs.txt
1016
+ * :include: strscan/methods/skip_until.md
817
1017
  */
818
1018
  static VALUE
819
1019
  strscan_skip_until(VALUE self, VALUE re)
@@ -822,17 +1022,61 @@ strscan_skip_until(VALUE self, VALUE re)
822
1022
  }
823
1023
 
824
1024
  /*
825
- * call-seq: check_until(pattern)
1025
+ * :markup: markdown
1026
+ * :include: strscan/link_refs.txt
826
1027
  *
827
- * This returns the value that #scan_until would return, without advancing the
828
- * scan pointer. The match register is affected, though.
829
- *
830
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
831
- * s.check_until /12/ # -> "Fri Dec 12"
832
- * s.pos # -> 0
833
- * s.matched # -> 12
1028
+ * call-seq:
1029
+ * check_until(pattern) -> substring or nil
1030
+ *
1031
+ * Attempts to [match][17] the given `pattern`
1032
+ * anywhere (at any [position][2])
1033
+ * in the [target substring][3];
1034
+ * does not modify the [positions][11].
1035
+ *
1036
+ * If the match succeeds:
1037
+ *
1038
+ * - Sets all [match values][9].
1039
+ * - Returns the matched substring,
1040
+ * which extends from the current [position][2]
1041
+ * to the end of the matched substring.
1042
+ *
1043
+ * ```rb
1044
+ * scanner = StringScanner.new('foobarbazbatbam')
1045
+ * scanner.pos = 6
1046
+ * scanner.check_until(/bat/) # => "bazbat"
1047
+ * put_match_values(scanner)
1048
+ * # Basic match values:
1049
+ * # matched?: true
1050
+ * # matched_size: 3
1051
+ * # pre_match: "foobarbaz"
1052
+ * # matched : "bat"
1053
+ * # post_match: "bam"
1054
+ * # Captured match values:
1055
+ * # size: 1
1056
+ * # captures: []
1057
+ * # named_captures: {}
1058
+ * # values_at: ["bat", nil]
1059
+ * # []:
1060
+ * # [0]: "bat"
1061
+ * # [1]: nil
1062
+ * put_situation(scanner)
1063
+ * # Situation:
1064
+ * # pos: 6
1065
+ * # charpos: 6
1066
+ * # rest: "bazbatbam"
1067
+ * # rest_size: 9
1068
+ * ```
1069
+ *
1070
+ * If the match fails:
1071
+ *
1072
+ * - Clears all [match values][9].
1073
+ * - Returns `nil`.
1074
+ *
1075
+ * ```rb
1076
+ * scanner.check_until(/nope/) # => nil
1077
+ * match_values_cleared?(scanner) # => true
1078
+ * ```
834
1079
  *
835
- * Mnemonic: it "checks" to see whether a #scan_until will return a value.
836
1080
  */
837
1081
  static VALUE
838
1082
  strscan_check_until(VALUE self, VALUE re)
@@ -841,14 +1085,24 @@ strscan_check_until(VALUE self, VALUE re)
841
1085
  }
842
1086
 
843
1087
  /*
844
- * call-seq: search_full(pattern, advance_pointer_p, return_string_p)
1088
+ * call-seq:
1089
+ * search_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or position_delta or nil
1090
+ *
1091
+ * Equivalent to one of the following:
1092
+ *
1093
+ * - +advance_pointer_p+ +true+:
1094
+ *
1095
+ * - +return_string_p+ +true+: StringScanner#scan_until(pattern).
1096
+ * - +return_string_p+ +false+: StringScanner#skip_until(pattern).
1097
+ *
1098
+ * - +advance_pointer_p+ +false+:
1099
+ *
1100
+ * - +return_string_p+ +true+: StringScanner#check_until(pattern).
1101
+ * - +return_string_p+ +false+: StringScanner#exist?(pattern).
845
1102
  *
846
- * Scans the string _until_ the +pattern+ is matched.
847
- * Advances the scan pointer if +advance_pointer_p+, otherwise not.
848
- * Returns the matched string if +return_string_p+ is true, otherwise
849
- * returns the number of bytes advanced.
850
- * This method does affect the match register.
851
1103
  */
1104
+
1105
+ /* :nodoc: */
852
1106
  static VALUE
853
1107
  strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f)
854
1108
  {
@@ -868,17 +1122,9 @@ adjust_registers_to_matched(struct strscanner *p)
868
1122
  }
869
1123
 
870
1124
  /*
871
- * Scans one character and returns it.
872
- * This method is multibyte character sensitive.
873
- *
874
- * s = StringScanner.new("ab")
875
- * s.getch # => "a"
876
- * s.getch # => "b"
877
- * s.getch # => nil
878
- *
879
- * s = StringScanner.new("\244\242".force_encoding("euc-jp"))
880
- * s.getch # => "\x{A4A2}" # Japanese hira-kana "A" in EUC-JP
881
- * s.getch # => nil
1125
+ * :markup: markdown
1126
+ * :include: strscan/link_refs.txt
1127
+ * :include: strscan/methods/getch.md
882
1128
  */
883
1129
  static VALUE
884
1130
  strscan_getch(VALUE self)
@@ -903,19 +1149,55 @@ strscan_getch(VALUE self)
903
1149
  }
904
1150
 
905
1151
  /*
906
- * Scans one byte and returns it.
1152
+ * call-seq:
1153
+ * scan_byte -> integer_byte
1154
+ *
1155
+ * Scans one byte and returns it as an integer.
907
1156
  * This method is not multibyte character sensitive.
908
1157
  * See also: #getch.
909
1158
  *
1159
+ */
1160
+ static VALUE
1161
+ strscan_scan_byte(VALUE self)
1162
+ {
1163
+ struct strscanner *p;
1164
+ VALUE byte;
1165
+
1166
+ GET_SCANNER(self, p);
1167
+ CLEAR_MATCH_STATUS(p);
1168
+ if (EOS_P(p))
1169
+ return Qnil;
1170
+
1171
+ byte = INT2FIX((unsigned char)*CURPTR(p));
1172
+ p->prev = p->curr;
1173
+ p->curr++;
1174
+ MATCHED(p);
1175
+ adjust_registers_to_matched(p);
1176
+ return byte;
1177
+ }
1178
+
1179
+ /*
1180
+ * Peeks at the current byte and returns it as an integer.
1181
+ *
910
1182
  * s = StringScanner.new('ab')
911
- * s.get_byte # => "a"
912
- * s.get_byte # => "b"
913
- * s.get_byte # => nil
914
- *
915
- * s = StringScanner.new("\244\242".force_encoding("euc-jp"))
916
- * s.get_byte # => "\xA4"
917
- * s.get_byte # => "\xA2"
918
- * s.get_byte # => nil
1183
+ * s.peek_byte # => 97
1184
+ */
1185
+ static VALUE
1186
+ strscan_peek_byte(VALUE self)
1187
+ {
1188
+ struct strscanner *p;
1189
+
1190
+ GET_SCANNER(self, p);
1191
+ if (EOS_P(p))
1192
+ return Qnil;
1193
+
1194
+ return INT2FIX((unsigned char)*CURPTR(p));
1195
+ }
1196
+
1197
+ /*
1198
+ * :markup: markdown
1199
+ * :include: strscan/link_refs.txt
1200
+ * :include: strscan/methods/get_byte.md
919
1201
  */
920
1202
  static VALUE
921
1203
  strscan_get_byte(VALUE self)
@@ -937,9 +1219,14 @@ strscan_get_byte(VALUE self)
937
1219
  }
938
1220
 
939
1221
  /*
1222
+ * call-seq:
1223
+ * getbyte
1224
+ *
940
1225
  * Equivalent to #get_byte.
941
1226
  * This method is obsolete; use #get_byte instead.
942
1227
  */
1228
+
1229
+ /* :nodoc: */
943
1230
  static VALUE
944
1231
  strscan_getbyte(VALUE self)
945
1232
  {
@@ -948,14 +1235,22 @@ strscan_getbyte(VALUE self)
948
1235
  }
949
1236
 
950
1237
  /*
951
- * call-seq: peek(len)
1238
+ * :markup: markdown
1239
+ * :include: strscan/link_refs.txt
952
1240
  *
953
- * Extracts a string corresponding to <tt>string[pos,len]</tt>, without
954
- * advancing the scan pointer.
1241
+ * call-seq:
1242
+ * peek(length) -> substring
955
1243
  *
956
- * s = StringScanner.new('test string')
957
- * s.peek(7) # => "test st"
958
- * s.peek(7) # => "test st"
1244
+ * Returns the substring `string[pos, length]`;
1245
+ * does not update [match values][9] or [positions][11]:
1246
+ *
1247
+ * ```rb
1248
+ * scanner = StringScanner.new('foobarbaz')
1249
+ * scanner.pos = 3
1250
+ * scanner.peek(3) # => "bar"
1251
+ * scanner.terminate
1252
+ * scanner.peek(3) # => ""
1253
+ * ```
959
1254
  *
960
1255
  */
961
1256
  static VALUE
@@ -975,9 +1270,14 @@ strscan_peek(VALUE self, VALUE vlen)
975
1270
  }
976
1271
 
977
1272
  /*
1273
+ * call-seq:
1274
+ * peep
1275
+ *
978
1276
  * Equivalent to #peek.
979
1277
  * This method is obsolete; use #peek instead.
980
1278
  */
1279
+
1280
+ /* :nodoc: */
981
1281
  static VALUE
982
1282
  strscan_peep(VALUE self, VALUE vlen)
983
1283
  {
@@ -985,16 +1285,158 @@ strscan_peep(VALUE self, VALUE vlen)
985
1285
  return strscan_peek(self, vlen);
986
1286
  }
987
1287
 
1288
+ static VALUE
1289
+ strscan_parse_integer(struct strscanner *p, int base, long len)
1290
+ {
1291
+ VALUE buffer_v, integer;
1292
+
1293
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
1294
+
1295
+ MEMCPY(buffer, CURPTR(p), char, len);
1296
+ buffer[len] = '\0';
1297
+ integer = rb_cstr2inum(buffer, base);
1298
+ RB_ALLOCV_END(buffer_v);
1299
+ p->curr += len;
1300
+
1301
+ MATCHED(p);
1302
+ adjust_registers_to_matched(p);
1303
+
1304
+ return integer;
1305
+ }
1306
+
1307
+ static inline bool
1308
+ strscan_ascii_compat_fastpath(VALUE str) {
1309
+ int encindex = ENCODING_GET_INLINED(str);
1310
+ // The overwhelming majority of strings are in one of these 3 encodings.
1311
+ return encindex == utf8_encindex || encindex == binary_encindex || encindex == usascii_encindex;
1312
+ }
1313
+
1314
+ static inline void
1315
+ strscan_must_ascii_compat(VALUE str)
1316
+ {
1317
+ // The overwhelming majority of strings are in one of these 3 encodings.
1318
+ if (RB_LIKELY(strscan_ascii_compat_fastpath(str))) {
1319
+ return;
1320
+ }
1321
+
1322
+ rb_must_asciicompat(str);
1323
+ }
1324
+
1325
+ static VALUE
1326
+ strscan_scan_base10_integer(VALUE self)
1327
+ {
1328
+ char *ptr;
1329
+ long len = 0;
1330
+ struct strscanner *p;
1331
+
1332
+ GET_SCANNER(self, p);
1333
+ CLEAR_MATCH_STATUS(p);
1334
+
1335
+ strscan_must_ascii_compat(p->str);
1336
+
1337
+ ptr = CURPTR(p);
1338
+
1339
+ long remaining_len = S_RESTLEN(p);
1340
+
1341
+ if (remaining_len <= 0) {
1342
+ return Qnil;
1343
+ }
1344
+
1345
+ if (ptr[len] == '-' || ptr[len] == '+') {
1346
+ len++;
1347
+ }
1348
+
1349
+ if (!rb_isdigit(ptr[len])) {
1350
+ return Qnil;
1351
+ }
1352
+
1353
+ p->prev = p->curr;
1354
+
1355
+ while (len < remaining_len && rb_isdigit(ptr[len])) {
1356
+ len++;
1357
+ }
1358
+
1359
+ return strscan_parse_integer(p, 10, len);
1360
+ }
1361
+
1362
+ static VALUE
1363
+ strscan_scan_base16_integer(VALUE self)
1364
+ {
1365
+ char *ptr;
1366
+ long len = 0;
1367
+ struct strscanner *p;
1368
+
1369
+ GET_SCANNER(self, p);
1370
+ CLEAR_MATCH_STATUS(p);
1371
+
1372
+ strscan_must_ascii_compat(p->str);
1373
+
1374
+ ptr = CURPTR(p);
1375
+
1376
+ long remaining_len = S_RESTLEN(p);
1377
+
1378
+ if (remaining_len <= 0) {
1379
+ return Qnil;
1380
+ }
1381
+
1382
+ if (ptr[len] == '-' || ptr[len] == '+') {
1383
+ len++;
1384
+ }
1385
+
1386
+ if ((remaining_len >= (len + 3)) && ptr[len] == '0' && ptr[len + 1] == 'x' && rb_isxdigit(ptr[len + 2])) {
1387
+ len += 2;
1388
+ }
1389
+
1390
+ if (len >= remaining_len || !rb_isxdigit(ptr[len])) {
1391
+ return Qnil;
1392
+ }
1393
+
1394
+ p->prev = p->curr;
1395
+
1396
+ while (len < remaining_len && rb_isxdigit(ptr[len])) {
1397
+ len++;
1398
+ }
1399
+
1400
+ return strscan_parse_integer(p, 16, len);
1401
+ }
1402
+
988
1403
  /*
989
- * Sets the scan pointer to the previous position. Only one previous position is
990
- * remembered, and it changes with each scanning operation.
1404
+ * :markup: markdown
1405
+ * :include: strscan/link_refs.txt
1406
+ *
1407
+ * call-seq:
1408
+ * unscan -> self
1409
+ *
1410
+ * Sets the [position][2] to its value previous to the recent successful
1411
+ * [match][17] attempt:
1412
+ *
1413
+ * ```rb
1414
+ * scanner = StringScanner.new('foobarbaz')
1415
+ * scanner.scan(/foo/)
1416
+ * put_situation(scanner)
1417
+ * # Situation:
1418
+ * # pos: 3
1419
+ * # charpos: 3
1420
+ * # rest: "barbaz"
1421
+ * # rest_size: 6
1422
+ * scanner.unscan
1423
+ * # => #<StringScanner 0/9 @ "fooba...">
1424
+ * put_situation(scanner)
1425
+ * # Situation:
1426
+ * # pos: 0
1427
+ * # charpos: 0
1428
+ * # rest: "foobarbaz"
1429
+ * # rest_size: 9
1430
+ * ```
1431
+ *
1432
+ * Raises an exception if match values are clear:
1433
+ *
1434
+ * ```rb
1435
+ * scanner.scan(/nope/) # => nil
1436
+ * match_values_cleared?(scanner) # => true
1437
+ * scanner.unscan # Raises StringScanner::Error.
1438
+ * ```
991
1439
  *
992
- * s = StringScanner.new('test string')
993
- * s.scan(/\w+/) # => "test"
994
- * s.unscan
995
- * s.scan(/../) # => "te"
996
- * s.scan(/\d/) # => nil
997
- * s.unscan # ScanError: unscan failed: previous match record not exist
998
1440
  */
999
1441
  static VALUE
1000
1442
  strscan_unscan(VALUE self)
@@ -1010,16 +1452,37 @@ strscan_unscan(VALUE self)
1010
1452
  }
1011
1453
 
1012
1454
  /*
1013
- * Returns +true+ if and only if the scan pointer is at the beginning of the line.
1014
- *
1015
- * s = StringScanner.new("test\ntest\n")
1016
- * s.bol? # => true
1017
- * s.scan(/te/)
1018
- * s.bol? # => false
1019
- * s.scan(/st\n/)
1020
- * s.bol? # => true
1021
- * s.terminate
1022
- * s.bol? # => true
1455
+ *
1456
+ * :markup: markdown
1457
+ * :include: strscan/link_refs.txt
1458
+ *
1459
+ * call-seq:
1460
+ * beginning_of_line? -> true or false
1461
+ *
1462
+ * Returns whether the [position][2] is at the beginning of a line;
1463
+ * that is, at the beginning of the [stored string][1]
1464
+ * or immediately after a newline:
1465
+ *
1466
+ * scanner = StringScanner.new(MULTILINE_TEXT)
1467
+ * scanner.string
1468
+ * # => "Go placidly amid the noise and haste,\nand remember what peace there may be in silence.\n"
1469
+ * scanner.pos # => 0
1470
+ * scanner.beginning_of_line? # => true
1471
+ *
1472
+ * scanner.scan_until(/,/) # => "Go placidly amid the noise and haste,"
1473
+ * scanner.beginning_of_line? # => false
1474
+ *
1475
+ * scanner.scan(/\n/) # => "\n"
1476
+ * scanner.beginning_of_line? # => true
1477
+ *
1478
+ * scanner.terminate
1479
+ * scanner.beginning_of_line? # => true
1480
+ *
1481
+ * scanner.concat('x')
1482
+ * scanner.terminate
1483
+ * scanner.beginning_of_line? # => false
1484
+ *
1485
+ * StringScanner#bol? is an alias for StringScanner#beginning_of_line?.
1023
1486
  */
1024
1487
  static VALUE
1025
1488
  strscan_bol_p(VALUE self)
@@ -1033,14 +1496,24 @@ strscan_bol_p(VALUE self)
1033
1496
  }
1034
1497
 
1035
1498
  /*
1036
- * Returns +true+ if the scan pointer is at the end of the string.
1499
+ * :markup: markdown
1500
+ * :include: strscan/link_refs.txt
1501
+ *
1502
+ * call-seq:
1503
+ * eos? -> true or false
1504
+ *
1505
+ * Returns whether the [position][2]
1506
+ * is at the end of the [stored string][1]:
1507
+ *
1508
+ * ```rb
1509
+ * scanner = StringScanner.new('foobarbaz')
1510
+ * scanner.eos? # => false
1511
+ * pos = 3
1512
+ * scanner.eos? # => false
1513
+ * scanner.terminate
1514
+ * scanner.eos? # => true
1515
+ * ```
1037
1516
  *
1038
- * s = StringScanner.new('test string')
1039
- * p s.eos? # => false
1040
- * s.scan(/test/)
1041
- * p s.eos? # => false
1042
- * s.terminate
1043
- * p s.eos? # => true
1044
1517
  */
1045
1518
  static VALUE
1046
1519
  strscan_eos_p(VALUE self)
@@ -1052,9 +1525,14 @@ strscan_eos_p(VALUE self)
1052
1525
  }
1053
1526
 
1054
1527
  /*
1528
+ * call-seq:
1529
+ * empty?
1530
+ *
1055
1531
  * Equivalent to #eos?.
1056
1532
  * This method is obsolete, use #eos? instead.
1057
1533
  */
1534
+
1535
+ /* :nodoc: */
1058
1536
  static VALUE
1059
1537
  strscan_empty_p(VALUE self)
1060
1538
  {
@@ -1063,6 +1541,9 @@ strscan_empty_p(VALUE self)
1063
1541
  }
1064
1542
 
1065
1543
  /*
1544
+ * call-seq:
1545
+ * rest?
1546
+ *
1066
1547
  * Returns true if and only if there is more data in the string. See #eos?.
1067
1548
  * This method is obsolete; use #eos? instead.
1068
1549
  *
@@ -1071,6 +1552,8 @@ strscan_empty_p(VALUE self)
1071
1552
  * s.eos? # => false
1072
1553
  * s.rest? # => true
1073
1554
  */
1555
+
1556
+ /* :nodoc: */
1074
1557
  static VALUE
1075
1558
  strscan_rest_p(VALUE self)
1076
1559
  {
@@ -1081,13 +1564,26 @@ strscan_rest_p(VALUE self)
1081
1564
  }
1082
1565
 
1083
1566
  /*
1084
- * Returns +true+ if and only if the last match was successful.
1567
+ * :markup: markdown
1568
+ * :include: strscan/link_refs.txt
1569
+ *
1570
+ * call-seq:
1571
+ * matched? -> true or false
1572
+ *
1573
+ * Returns `true` of the most recent [match attempt][17] was successful,
1574
+ * `false` otherwise;
1575
+ * see [Basic Matched Values][18]:
1576
+ *
1577
+ * ```rb
1578
+ * scanner = StringScanner.new('foobarbaz')
1579
+ * scanner.matched? # => false
1580
+ * scanner.pos = 3
1581
+ * scanner.exist?(/baz/) # => 6
1582
+ * scanner.matched? # => true
1583
+ * scanner.exist?(/nope/) # => nil
1584
+ * scanner.matched? # => false
1585
+ * ```
1085
1586
  *
1086
- * s = StringScanner.new('test string')
1087
- * s.match?(/\w+/) # => 4
1088
- * s.matched? # => true
1089
- * s.match?(/\d+/) # => nil
1090
- * s.matched? # => false
1091
1587
  */
1092
1588
  static VALUE
1093
1589
  strscan_matched_p(VALUE self)
@@ -1099,11 +1595,27 @@ strscan_matched_p(VALUE self)
1099
1595
  }
1100
1596
 
1101
1597
  /*
1102
- * Returns the last matched string.
1598
+ * :markup: markdown
1599
+ * :include: strscan/link_refs.txt
1600
+ *
1601
+ * call-seq:
1602
+ * matched -> matched_substring or nil
1603
+ *
1604
+ * Returns the matched substring from the most recent [match][17] attempt
1605
+ * if it was successful,
1606
+ * or `nil` otherwise;
1607
+ * see [Basic Matched Values][18]:
1608
+ *
1609
+ * ```rb
1610
+ * scanner = StringScanner.new('foobarbaz')
1611
+ * scanner.matched # => nil
1612
+ * scanner.pos = 3
1613
+ * scanner.match?(/bar/) # => 3
1614
+ * scanner.matched # => "bar"
1615
+ * scanner.match?(/nope/) # => nil
1616
+ * scanner.matched # => nil
1617
+ * ```
1103
1618
  *
1104
- * s = StringScanner.new('test string')
1105
- * s.match?(/\w+/) # -> 4
1106
- * s.matched # -> "test"
1107
1619
  */
1108
1620
  static VALUE
1109
1621
  strscan_matched(VALUE self)
@@ -1118,15 +1630,29 @@ strscan_matched(VALUE self)
1118
1630
  }
1119
1631
 
1120
1632
  /*
1121
- * Returns the size of the most recent match in bytes, or +nil+ if there
1122
- * was no recent match. This is different than <tt>matched.size</tt>,
1123
- * which will return the size in characters.
1633
+ * :markup: markdown
1634
+ * :include: strscan/link_refs.txt
1635
+ *
1636
+ * call-seq:
1637
+ * matched_size -> substring_size or nil
1638
+ *
1639
+ * Returns the size (in bytes) of the matched substring
1640
+ * from the most recent match [match attempt][17] if it was successful,
1641
+ * or `nil` otherwise;
1642
+ * see [Basic Matched Values][18]:
1643
+ *
1644
+ * ```rb
1645
+ * scanner = StringScanner.new('foobarbaz')
1646
+ * scanner.matched_size # => nil
1647
+ *
1648
+ * pos = 3
1649
+ * scanner.exist?(/baz/) # => 9
1650
+ * scanner.matched_size # => 3
1651
+ *
1652
+ * scanner.exist?(/nope/) # => nil
1653
+ * scanner.matched_size # => nil
1654
+ * ```
1124
1655
  *
1125
- * s = StringScanner.new('test string')
1126
- * s.check /\w+/ # -> "test"
1127
- * s.matched_size # -> 4
1128
- * s.check /\d+/ # -> nil
1129
- * s.matched_size # -> nil
1130
1656
  */
1131
1657
  static VALUE
1132
1658
  strscan_matched_size(VALUE self)
@@ -1141,46 +1667,89 @@ strscan_matched_size(VALUE self)
1141
1667
  static int
1142
1668
  name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end, rb_encoding *enc)
1143
1669
  {
1144
- int num;
1145
-
1146
- num = onig_name_to_backref_number(RREGEXP_PTR(regexp),
1147
- (const unsigned char* )name, (const unsigned char* )name_end, regs);
1148
- if (num >= 1) {
1149
- return num;
1150
- }
1151
- else {
1152
- rb_enc_raise(enc, rb_eIndexError, "undefined group name reference: %.*s",
1153
- rb_long2int(name_end - name), name);
1670
+ if (RTEST(regexp)) {
1671
+ int num = onig_name_to_backref_number(RREGEXP_PTR(regexp),
1672
+ (const unsigned char* )name,
1673
+ (const unsigned char* )name_end,
1674
+ regs);
1675
+ if (num >= 1) {
1676
+ return num;
1677
+ }
1154
1678
  }
1155
-
1156
- UNREACHABLE;
1679
+ rb_enc_raise(enc, rb_eIndexError, "undefined group name reference: %.*s",
1680
+ rb_long2int(name_end - name), name);
1157
1681
  }
1158
1682
 
1159
1683
  /*
1160
- * call-seq: [](n)
1161
- *
1162
- * Returns the n-th subgroup in the most recent match.
1163
- *
1164
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1165
- * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1166
- * s[0] # -> "Fri Dec 12 "
1167
- * s[1] # -> "Fri"
1168
- * s[2] # -> "Dec"
1169
- * s[3] # -> "12"
1170
- * s.post_match # -> "1975 14:39"
1171
- * s.pre_match # -> ""
1172
- *
1173
- * s.reset
1174
- * s.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /) # -> "Fri Dec 12 "
1175
- * s[0] # -> "Fri Dec 12 "
1176
- * s[1] # -> "Fri"
1177
- * s[2] # -> "Dec"
1178
- * s[3] # -> "12"
1179
- * s[:wday] # -> "Fri"
1180
- * s[:month] # -> "Dec"
1181
- * s[:day] # -> "12"
1182
- * s.post_match # -> "1975 14:39"
1183
- * s.pre_match # -> ""
1684
+ *
1685
+ * :markup: markdown
1686
+ * :include: strscan/link_refs.txt
1687
+ *
1688
+ * call-seq:
1689
+ * [](specifier) -> substring or nil
1690
+ *
1691
+ * Returns a captured substring or `nil`;
1692
+ * see [Captured Match Values][13].
1693
+ *
1694
+ * When there are captures:
1695
+ *
1696
+ * ```rb
1697
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
1698
+ * scanner.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /)
1699
+ * ```
1700
+ *
1701
+ * - `specifier` zero: returns the entire matched substring:
1702
+ *
1703
+ * ```rb
1704
+ * scanner[0] # => "Fri Dec 12 "
1705
+ * scanner.pre_match # => ""
1706
+ * scanner.post_match # => "1975 14:39"
1707
+ * ```
1708
+ *
1709
+ * - `specifier` positive integer. returns the `n`th capture, or `nil` if out of range:
1710
+ *
1711
+ * ```rb
1712
+ * scanner[1] # => "Fri"
1713
+ * scanner[2] # => "Dec"
1714
+ * scanner[3] # => "12"
1715
+ * scanner[4] # => nil
1716
+ * ```
1717
+ *
1718
+ * - `specifier` negative integer. counts backward from the last subgroup:
1719
+ *
1720
+ * ```rb
1721
+ * scanner[-1] # => "12"
1722
+ * scanner[-4] # => "Fri Dec 12 "
1723
+ * scanner[-5] # => nil
1724
+ * ```
1725
+ *
1726
+ * - `specifier` symbol or string. returns the named subgroup, or `nil` if no such:
1727
+ *
1728
+ * ```rb
1729
+ * scanner[:wday] # => "Fri"
1730
+ * scanner['wday'] # => "Fri"
1731
+ * scanner[:month] # => "Dec"
1732
+ * scanner[:day] # => "12"
1733
+ * scanner[:nope] # => nil
1734
+ * ```
1735
+ *
1736
+ * When there are no captures, only `[0]` returns non-`nil`:
1737
+ *
1738
+ * ```rb
1739
+ * scanner = StringScanner.new('foobarbaz')
1740
+ * scanner.exist?(/bar/)
1741
+ * scanner[0] # => "bar"
1742
+ * scanner[1] # => nil
1743
+ * ```
1744
+ *
1745
+ * For a failed match, even `[0]` returns `nil`:
1746
+ *
1747
+ * ```rb
1748
+ * scanner.scan(/nope/) # => nil
1749
+ * scanner[0] # => nil
1750
+ * scanner[1] # => nil
1751
+ * ```
1752
+ *
1184
1753
  */
1185
1754
  static VALUE
1186
1755
  strscan_aref(VALUE self, VALUE idx)
@@ -1197,7 +1766,6 @@ strscan_aref(VALUE self, VALUE idx)
1197
1766
  idx = rb_sym2str(idx);
1198
1767
  /* fall through */
1199
1768
  case T_STRING:
1200
- if (!RTEST(p->regex)) return Qnil;
1201
1769
  RSTRING_GETMEM(idx, name, i);
1202
1770
  i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx));
1203
1771
  break;
@@ -1217,14 +1785,28 @@ strscan_aref(VALUE self, VALUE idx)
1217
1785
  }
1218
1786
 
1219
1787
  /*
1220
- * call-seq: size
1788
+ * :markup: markdown
1789
+ * :include: strscan/link_refs.txt
1221
1790
  *
1222
- * Returns the amount of subgroups in the most recent match.
1223
- * The full match counts as a subgroup.
1791
+ * call-seq:
1792
+ * size -> captures_count
1793
+ *
1794
+ * Returns the count of captures if the most recent match attempt succeeded, `nil` otherwise;
1795
+ * see [Captures Match Values][13]:
1796
+ *
1797
+ * ```rb
1798
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
1799
+ * scanner.size # => nil
1800
+ *
1801
+ * pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
1802
+ * scanner.match?(pattern)
1803
+ * scanner.values_at(*0..scanner.size) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil]
1804
+ * scanner.size # => 4
1805
+ *
1806
+ * scanner.match?(/nope/) # => nil
1807
+ * scanner.size # => nil
1808
+ * ```
1224
1809
  *
1225
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1226
- * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1227
- * s.size # -> 4
1228
1810
  */
1229
1811
  static VALUE
1230
1812
  strscan_size(VALUE self)
@@ -1237,16 +1819,30 @@ strscan_size(VALUE self)
1237
1819
  }
1238
1820
 
1239
1821
  /*
1240
- * call-seq: captures
1822
+ * :markup: markdown
1823
+ * :include: strscan/link_refs.txt
1824
+ *
1825
+ * call-seq:
1826
+ * captures -> substring_array or nil
1827
+ *
1828
+ * Returns the array of [captured match values][13] at indexes `(1..)`
1829
+ * if the most recent match attempt succeeded, or `nil` otherwise:
1830
+ *
1831
+ * ```rb
1832
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
1833
+ * scanner.captures # => nil
1834
+ *
1835
+ * scanner.exist?(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /)
1836
+ * scanner.captures # => ["Fri", "Dec", "12"]
1837
+ * scanner.values_at(*0..4) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil]
1838
+ *
1839
+ * scanner.exist?(/Fri/)
1840
+ * scanner.captures # => []
1241
1841
  *
1242
- * Returns the subgroups in the most recent match (not including the full match).
1243
- * If nothing was priorly matched, it returns nil.
1842
+ * scanner.scan(/nope/)
1843
+ * scanner.captures # => nil
1844
+ * ```
1244
1845
  *
1245
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1246
- * s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> "Fri Dec 12 "
1247
- * s.captures # -> ["Fri", "Dec", "12", nil]
1248
- * s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> nil
1249
- * s.captures # -> nil
1250
1846
  */
1251
1847
  static VALUE
1252
1848
  strscan_captures(VALUE self)
@@ -1276,17 +1872,25 @@ strscan_captures(VALUE self)
1276
1872
  }
1277
1873
 
1278
1874
  /*
1279
- * call-seq:
1280
- * scanner.values_at( i1, i2, ... iN ) -> an_array
1875
+ * :markup: markdown
1876
+ * :include: strscan/link_refs.txt
1281
1877
  *
1282
- * Returns the subgroups in the most recent match at the given indices.
1283
- * If nothing was priorly matched, it returns nil.
1878
+ * call-seq:
1879
+ * values_at(*specifiers) -> array_of_captures or nil
1880
+ *
1881
+ * Returns an array of captured substrings, or `nil` of none.
1882
+ *
1883
+ * For each `specifier`, the returned substring is `[specifier]`;
1884
+ * see #[].
1885
+ *
1886
+ * ```rb
1887
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
1888
+ * pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
1889
+ * scanner.match?(pattern)
1890
+ * scanner.values_at(*0..3) # => ["Fri Dec 12 ", "Fri", "Dec", "12"]
1891
+ * scanner.values_at(*%i[wday month day]) # => ["Fri", "Dec", "12"]
1892
+ * ```
1284
1893
  *
1285
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1286
- * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1287
- * s.values_at 0, -1, 5, 2 # -> ["Fri Dec 12 ", "12", nil, "Dec"]
1288
- * s.scan(/(\w+) (\w+) (\d+) /) # -> nil
1289
- * s.values_at 0, -1, 5, 2 # -> nil
1290
1894
  */
1291
1895
 
1292
1896
  static VALUE
@@ -1308,13 +1912,29 @@ strscan_values_at(int argc, VALUE *argv, VALUE self)
1308
1912
  }
1309
1913
 
1310
1914
  /*
1311
- * Returns the <i><b>pre</b>-match</i> (in the regular expression sense) of the last scan.
1915
+ * :markup: markdown
1916
+ * :include: strscan/link_refs.txt
1917
+ *
1918
+ * call-seq:
1919
+ * pre_match -> substring
1920
+ *
1921
+ * Returns the substring that precedes the matched substring
1922
+ * from the most recent match attempt if it was successful,
1923
+ * or `nil` otherwise;
1924
+ * see [Basic Match Values][18]:
1925
+ *
1926
+ * ```rb
1927
+ * scanner = StringScanner.new('foobarbaz')
1928
+ * scanner.pre_match # => nil
1929
+ *
1930
+ * scanner.pos = 3
1931
+ * scanner.exist?(/baz/) # => 6
1932
+ * scanner.pre_match # => "foobar" # Substring of entire string, not just target string.
1933
+ *
1934
+ * scanner.exist?(/nope/) # => nil
1935
+ * scanner.pre_match # => nil
1936
+ * ```
1312
1937
  *
1313
- * s = StringScanner.new('test string')
1314
- * s.scan(/\w+/) # -> "test"
1315
- * s.scan(/\s+/) # -> " "
1316
- * s.pre_match # -> "test"
1317
- * s.post_match # -> "string"
1318
1938
  */
1319
1939
  static VALUE
1320
1940
  strscan_pre_match(VALUE self)
@@ -1329,13 +1949,29 @@ strscan_pre_match(VALUE self)
1329
1949
  }
1330
1950
 
1331
1951
  /*
1332
- * Returns the <i><b>post</b>-match</i> (in the regular expression sense) of the last scan.
1952
+ * :markup: markdown
1953
+ * :include: strscan/link_refs.txt
1954
+ *
1955
+ * call-seq:
1956
+ * post_match -> substring
1957
+ *
1958
+ * Returns the substring that follows the matched substring
1959
+ * from the most recent match attempt if it was successful,
1960
+ * or `nil` otherwise;
1961
+ * see [Basic Match Values][18]:
1962
+ *
1963
+ * ```rb
1964
+ * scanner = StringScanner.new('foobarbaz')
1965
+ * scanner.post_match # => nil
1966
+ *
1967
+ * scanner.pos = 3
1968
+ * scanner.match?(/bar/) # => 3
1969
+ * scanner.post_match # => "baz"
1970
+ *
1971
+ * scanner.match?(/nope/) # => nil
1972
+ * scanner.post_match # => nil
1973
+ * ```
1333
1974
  *
1334
- * s = StringScanner.new('test string')
1335
- * s.scan(/\w+/) # -> "test"
1336
- * s.scan(/\s+/) # -> " "
1337
- * s.pre_match # -> "test"
1338
- * s.post_match # -> "string"
1339
1975
  */
1340
1976
  static VALUE
1341
1977
  strscan_post_match(VALUE self)
@@ -1350,8 +1986,24 @@ strscan_post_match(VALUE self)
1350
1986
  }
1351
1987
 
1352
1988
  /*
1353
- * Returns the "rest" of the string (i.e. everything after the scan pointer).
1354
- * If there is no more data (eos? = true), it returns <tt>""</tt>.
1989
+ * :markup: markdown
1990
+ * :include: strscan/link_refs.txt
1991
+ *
1992
+ * call-seq:
1993
+ * rest -> target_substring
1994
+ *
1995
+ * Returns the 'rest' of the [stored string][1] (all after the current [position][2]),
1996
+ * which is the [target substring][3]:
1997
+ *
1998
+ * ```rb
1999
+ * scanner = StringScanner.new('foobarbaz')
2000
+ * scanner.rest # => "foobarbaz"
2001
+ * scanner.pos = 3
2002
+ * scanner.rest # => "barbaz"
2003
+ * scanner.terminate
2004
+ * scanner.rest # => ""
2005
+ * ```
2006
+ *
1355
2007
  */
1356
2008
  static VALUE
1357
2009
  strscan_rest(VALUE self)
@@ -1366,7 +2018,26 @@ strscan_rest(VALUE self)
1366
2018
  }
1367
2019
 
1368
2020
  /*
1369
- * <tt>s.rest_size</tt> is equivalent to <tt>s.rest.size</tt>.
2021
+ * :markup: markdown
2022
+ * :include: strscan/link_refs.txt
2023
+ *
2024
+ * call-seq:
2025
+ * rest_size -> integer
2026
+ *
2027
+ * Returns the size (in bytes) of the #rest of the [stored string][1]:
2028
+ *
2029
+ * ```rb
2030
+ * scanner = StringScanner.new('foobarbaz')
2031
+ * scanner.rest # => "foobarbaz"
2032
+ * scanner.rest_size # => 9
2033
+ * scanner.pos = 3
2034
+ * scanner.rest # => "barbaz"
2035
+ * scanner.rest_size # => 6
2036
+ * scanner.terminate
2037
+ * scanner.rest # => ""
2038
+ * scanner.rest_size # => 0
2039
+ * ```
2040
+ *
1370
2041
  */
1371
2042
  static VALUE
1372
2043
  strscan_rest_size(VALUE self)
@@ -1383,9 +2054,14 @@ strscan_rest_size(VALUE self)
1383
2054
  }
1384
2055
 
1385
2056
  /*
2057
+ * call-seq:
2058
+ * restsize
2059
+ *
1386
2060
  * <tt>s.restsize</tt> is equivalent to <tt>s.rest_size</tt>.
1387
2061
  * This method is obsolete; use #rest_size instead.
1388
2062
  */
2063
+
2064
+ /* :nodoc: */
1389
2065
  static VALUE
1390
2066
  strscan_restsize(VALUE self)
1391
2067
  {
@@ -1396,15 +2072,39 @@ strscan_restsize(VALUE self)
1396
2072
  #define INSPECT_LENGTH 5
1397
2073
 
1398
2074
  /*
1399
- * Returns a string that represents the StringScanner object, showing:
1400
- * - the current position
1401
- * - the size of the string
1402
- * - the characters surrounding the scan pointer
1403
- *
1404
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1405
- * s.inspect # -> '#<StringScanner 0/21 @ "Fri D...">'
1406
- * s.scan_until /12/ # -> "Fri Dec 12"
1407
- * s.inspect # -> '#<StringScanner 10/21 "...ec 12" @ " 1975...">'
2075
+ * :markup: markdown
2076
+ * :include: strscan/link_refs.txt
2077
+ *
2078
+ * call-seq:
2079
+ * inspect -> string
2080
+ *
2081
+ * Returns a string representation of `self` that may show:
2082
+ *
2083
+ * 1. The current [position][2].
2084
+ * 2. The size (in bytes) of the [stored string][1].
2085
+ * 3. The substring preceding the current position.
2086
+ * 4. The substring following the current position (which is also the [target substring][3]).
2087
+ *
2088
+ * ```rb
2089
+ * scanner = StringScanner.new("Fri Dec 12 1975 14:39")
2090
+ * scanner.pos = 11
2091
+ * scanner.inspect # => "#<StringScanner 11/21 \"...c 12 \" @ \"1975 ...\">"
2092
+ * ```
2093
+ *
2094
+ * If at beginning-of-string, item 4 above (following substring) is omitted:
2095
+ *
2096
+ * ```rb
2097
+ * scanner.reset
2098
+ * scanner.inspect # => "#<StringScanner 0/21 @ \"Fri D...\">"
2099
+ * ```
2100
+ *
2101
+ * If at end-of-string, all items above are omitted:
2102
+ *
2103
+ * ```rb
2104
+ * scanner.terminate
2105
+ * scanner.inspect # => "#<StringScanner fin>"
2106
+ * ```
2107
+ *
1408
2108
  */
1409
2109
  static VALUE
1410
2110
  strscan_inspect(VALUE self)
@@ -1476,13 +2176,13 @@ inspect2(struct strscanner *p)
1476
2176
  }
1477
2177
 
1478
2178
  /*
1479
- * call-seq:
1480
- * scanner.fixed_anchor? -> true or false
2179
+ * :markup: markdown
2180
+ * :include: strscan/link_refs.txt
1481
2181
  *
1482
- * Whether +scanner+ uses fixed anchor mode or not.
2182
+ * call-seq:
2183
+ * fixed_anchor? -> true or false
1483
2184
  *
1484
- * If fixed anchor mode is used, +\A+ always matches the beginning of
1485
- * the string. Otherwise, +\A+ always matches the current position.
2185
+ * Returns whether the [fixed-anchor property][10] is set.
1486
2186
  */
1487
2187
  static VALUE
1488
2188
  strscan_fixed_anchor_p(VALUE self)
@@ -1511,28 +2211,49 @@ named_captures_iter(const OnigUChar *name,
1511
2211
  VALUE value = RUBY_Qnil;
1512
2212
  int i;
1513
2213
  for (i = 0; i < back_num; i++) {
1514
- value = strscan_aref(data->self, INT2NUM(back_refs[i]));
2214
+ VALUE v = strscan_aref(data->self, INT2NUM(back_refs[i]));
2215
+ if (!RB_NIL_P(v)) {
2216
+ value = v;
2217
+ }
1515
2218
  }
1516
2219
  rb_hash_aset(data->captures, key, value);
1517
2220
  return 0;
1518
2221
  }
1519
2222
 
1520
2223
  /*
2224
+ * :markup: markdown
2225
+ * :include: strscan/link_refs.txt
2226
+ *
1521
2227
  * call-seq:
1522
- * scanner.named_captures -> hash
2228
+ * named_captures -> hash
1523
2229
  *
1524
- * Returns a hash of string variables matching the regular expression.
2230
+ * Returns the array of captured match values at indexes (1..)
2231
+ * if the most recent match attempt succeeded, or nil otherwise;
2232
+ * see [Captured Match Values][13]:
2233
+ *
2234
+ * ```rb
2235
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
2236
+ * scanner.named_captures # => {}
2237
+ *
2238
+ * pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
2239
+ * scanner.match?(pattern)
2240
+ * scanner.named_captures # => {"wday"=>"Fri", "month"=>"Dec", "day"=>"12"}
2241
+ *
2242
+ * scanner.string = 'nope'
2243
+ * scanner.match?(pattern)
2244
+ * scanner.named_captures # => {"wday"=>nil, "month"=>nil, "day"=>nil}
2245
+ *
2246
+ * scanner.match?(/nosuch/)
2247
+ * scanner.named_captures # => {}
2248
+ * ```
1525
2249
  *
1526
- * scan = StringScanner.new('foobarbaz')
1527
- * scan.match?(/(?<f>foo)(?<r>bar)(?<z>baz)/)
1528
- * scan.named_captures # -> {"f"=>"foo", "r"=>"bar", "z"=>"baz"}
1529
2250
  */
1530
2251
  static VALUE
1531
2252
  strscan_named_captures(VALUE self)
1532
2253
  {
1533
2254
  struct strscanner *p;
1534
- GET_SCANNER(self, p);
1535
2255
  named_captures_data data;
2256
+ GET_SCANNER(self, p);
1536
2257
  data.self = self;
1537
2258
  data.captures = rb_hash_new();
1538
2259
  if (!RB_NIL_P(p->regex)) {
@@ -1549,107 +2270,11 @@ strscan_named_captures(VALUE self)
1549
2270
  /*
1550
2271
  * Document-class: StringScanner
1551
2272
  *
1552
- * StringScanner provides for lexical scanning operations on a String. Here is
1553
- * an example of its usage:
1554
- *
1555
- * require 'strscan'
1556
- *
1557
- * s = StringScanner.new('This is an example string')
1558
- * s.eos? # -> false
1559
- *
1560
- * p s.scan(/\w+/) # -> "This"
1561
- * p s.scan(/\w+/) # -> nil
1562
- * p s.scan(/\s+/) # -> " "
1563
- * p s.scan(/\s+/) # -> nil
1564
- * p s.scan(/\w+/) # -> "is"
1565
- * s.eos? # -> false
1566
- *
1567
- * p s.scan(/\s+/) # -> " "
1568
- * p s.scan(/\w+/) # -> "an"
1569
- * p s.scan(/\s+/) # -> " "
1570
- * p s.scan(/\w+/) # -> "example"
1571
- * p s.scan(/\s+/) # -> " "
1572
- * p s.scan(/\w+/) # -> "string"
1573
- * s.eos? # -> true
2273
+ * :markup: markdown
1574
2274
  *
1575
- * p s.scan(/\s+/) # -> nil
1576
- * p s.scan(/\w+/) # -> nil
2275
+ * :include: strscan/link_refs.txt
2276
+ * :include: strscan/strscan.md
1577
2277
  *
1578
- * Scanning a string means remembering the position of a <i>scan pointer</i>,
1579
- * which is just an index. The point of scanning is to move forward a bit at
1580
- * a time, so matches are sought after the scan pointer; usually immediately
1581
- * after it.
1582
- *
1583
- * Given the string "test string", here are the pertinent scan pointer
1584
- * positions:
1585
- *
1586
- * t e s t s t r i n g
1587
- * 0 1 2 ... 1
1588
- * 0
1589
- *
1590
- * When you #scan for a pattern (a regular expression), the match must occur
1591
- * at the character after the scan pointer. If you use #scan_until, then the
1592
- * match can occur anywhere after the scan pointer. In both cases, the scan
1593
- * pointer moves <i>just beyond</i> the last character of the match, ready to
1594
- * scan again from the next character onwards. This is demonstrated by the
1595
- * example above.
1596
- *
1597
- * == Method Categories
1598
- *
1599
- * There are other methods besides the plain scanners. You can look ahead in
1600
- * the string without actually scanning. You can access the most recent match.
1601
- * You can modify the string being scanned, reset or terminate the scanner,
1602
- * find out or change the position of the scan pointer, skip ahead, and so on.
1603
- *
1604
- * === Advancing the Scan Pointer
1605
- *
1606
- * - #getch
1607
- * - #get_byte
1608
- * - #scan
1609
- * - #scan_until
1610
- * - #skip
1611
- * - #skip_until
1612
- *
1613
- * === Looking Ahead
1614
- *
1615
- * - #check
1616
- * - #check_until
1617
- * - #exist?
1618
- * - #match?
1619
- * - #peek
1620
- *
1621
- * === Finding Where we Are
1622
- *
1623
- * - #beginning_of_line? (<tt>#bol?</tt>)
1624
- * - #eos?
1625
- * - #rest?
1626
- * - #rest_size
1627
- * - #pos
1628
- *
1629
- * === Setting Where we Are
1630
- *
1631
- * - #reset
1632
- * - #terminate
1633
- * - #pos=
1634
- *
1635
- * === Match Data
1636
- *
1637
- * - #matched
1638
- * - #matched?
1639
- * - #matched_size
1640
- * - <tt>#[]</tt>
1641
- * - #pre_match
1642
- * - #post_match
1643
- *
1644
- * === Miscellaneous
1645
- *
1646
- * - <tt><<</tt>
1647
- * - #concat
1648
- * - #string
1649
- * - #string=
1650
- * - #unscan
1651
- *
1652
- * There are aliases to several of the methods.
1653
2278
  */
1654
2279
  void
1655
2280
  Init_strscan(void)
@@ -1664,6 +2289,10 @@ Init_strscan(void)
1664
2289
 
1665
2290
  id_byteslice = rb_intern("byteslice");
1666
2291
 
2292
+ usascii_encindex = rb_usascii_encindex();
2293
+ utf8_encindex = rb_utf8_encindex();
2294
+ binary_encindex = rb_ascii8bit_encindex();
2295
+
1667
2296
  StringScanner = rb_define_class("StringScanner", rb_cObject);
1668
2297
  ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
1669
2298
  if (!rb_const_defined(rb_cObject, id_scanerr)) {
@@ -1708,9 +2337,14 @@ Init_strscan(void)
1708
2337
  rb_define_method(StringScanner, "getch", strscan_getch, 0);
1709
2338
  rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
1710
2339
  rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
2340
+ rb_define_method(StringScanner, "scan_byte", strscan_scan_byte, 0);
1711
2341
  rb_define_method(StringScanner, "peek", strscan_peek, 1);
2342
+ rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0);
1712
2343
  rb_define_method(StringScanner, "peep", strscan_peep, 1);
1713
2344
 
2345
+ rb_define_private_method(StringScanner, "scan_base10_integer", strscan_scan_base10_integer, 0);
2346
+ rb_define_private_method(StringScanner, "scan_base16_integer", strscan_scan_base16_integer, 0);
2347
+
1714
2348
  rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
1715
2349
 
1716
2350
  rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
@@ -1738,4 +2372,6 @@ Init_strscan(void)
1738
2372
  rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
1739
2373
 
1740
2374
  rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0);
2375
+
2376
+ rb_require("strscan/strscan");
1741
2377
  }