strscan 3.1.0 → 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,7 +22,7 @@ extern size_t onig_region_memsize(const struct re_registers *regs);
22
22
 
23
23
  #include <stdbool.h>
24
24
 
25
- #define STRSCAN_VERSION "3.1.0"
25
+ #define STRSCAN_VERSION "3.1.2"
26
26
 
27
27
  /* =======================================================================
28
28
  Data Type Definitions
@@ -32,6 +32,8 @@ static VALUE StringScanner;
32
32
  static VALUE ScanError;
33
33
  static ID id_byteslice;
34
34
 
35
+ static int usascii_encindex, utf8_encindex, binary_encindex;
36
+
35
37
  struct strscanner
36
38
  {
37
39
  /* multi-purpose flags */
@@ -115,6 +117,7 @@ static VALUE strscan_get_byte _((VALUE self));
115
117
  static VALUE strscan_getbyte _((VALUE self));
116
118
  static VALUE strscan_peek _((VALUE self, VALUE len));
117
119
  static VALUE strscan_peep _((VALUE self, VALUE len));
120
+ static VALUE strscan_scan_base10_integer _((VALUE self));
118
121
  static VALUE strscan_unscan _((VALUE self));
119
122
  static VALUE strscan_bol_p _((VALUE self));
120
123
  static VALUE strscan_eos_p _((VALUE self));
@@ -218,16 +221,28 @@ strscan_s_allocate(VALUE klass)
218
221
  }
219
222
 
220
223
  /*
221
- * call-seq:
222
- * StringScanner.new(string, fixed_anchor: false)
223
- * StringScanner.new(string, dup = false)
224
- *
225
- * Creates a new StringScanner object to scan over the given +string+.
224
+ * :markup: markdown
225
+ * :include: strscan/link_refs.txt
226
226
  *
227
- * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
228
- * the string. Otherwise, +\A+ always matches the current position.
227
+ * call-seq:
228
+ * StringScanner.new(string, fixed_anchor: false) -> string_scanner
229
+ *
230
+ * Returns a new `StringScanner` object whose [stored string][1]
231
+ * is the given `string`;
232
+ * sets the [fixed-anchor property][10]:
233
+ *
234
+ * ```rb
235
+ * scanner = StringScanner.new('foobarbaz')
236
+ * scanner.string # => "foobarbaz"
237
+ * scanner.fixed_anchor? # => false
238
+ * put_situation(scanner)
239
+ * # Situation:
240
+ * # pos: 0
241
+ * # charpos: 0
242
+ * # rest: "foobarbaz"
243
+ * # rest_size: 9
244
+ * ```
229
245
  *
230
- * +dup+ argument is obsolete and not used now.
231
246
  */
232
247
  static VALUE
233
248
  strscan_initialize(int argc, VALUE *argv, VALUE self)
@@ -266,11 +281,14 @@ check_strscan(VALUE obj)
266
281
  }
267
282
 
268
283
  /*
284
+ * :markup: markdown
285
+ * :include: strscan/link_refs.txt
286
+ *
269
287
  * call-seq:
270
- * dup
271
- * clone
288
+ * dup -> shallow_copy
272
289
  *
273
- * Duplicates a StringScanner object.
290
+ * Returns a shallow copy of `self`;
291
+ * the [stored string][1] in the copy is the same string as in `self`.
274
292
  */
275
293
  static VALUE
276
294
  strscan_init_copy(VALUE vself, VALUE vorig)
@@ -297,10 +315,13 @@ strscan_init_copy(VALUE vself, VALUE vorig)
297
315
  ======================================================================= */
298
316
 
299
317
  /*
300
- * call-seq: StringScanner.must_C_version
318
+ * call-seq:
319
+ * StringScanner.must_C_version -> self
301
320
  *
302
- * This method is defined for backward compatibility.
321
+ * Returns +self+; defined for backward compatibility.
303
322
  */
323
+
324
+ /* :nodoc: */
304
325
  static VALUE
305
326
  strscan_s_mustc(VALUE self)
306
327
  {
@@ -308,7 +329,30 @@ strscan_s_mustc(VALUE self)
308
329
  }
309
330
 
310
331
  /*
311
- * Reset the scan pointer (index 0) and clear matching data.
332
+ * :markup: markdown
333
+ * :include: strscan/link_refs.txt
334
+ *
335
+ * call-seq:
336
+ * reset -> self
337
+ *
338
+ * Sets both [byte position][2] and [character position][7] to zero,
339
+ * and clears [match values][9];
340
+ * returns +self+:
341
+ *
342
+ * ```rb
343
+ * scanner = StringScanner.new('foobarbaz')
344
+ * scanner.exist?(/bar/) # => 6
345
+ * scanner.reset # => #<StringScanner 0/9 @ "fooba...">
346
+ * put_situation(scanner)
347
+ * # Situation:
348
+ * # pos: 0
349
+ * # charpos: 0
350
+ * # rest: "foobarbaz"
351
+ * # rest_size: 9
352
+ * # => nil
353
+ * match_values_cleared?(scanner) # => true
354
+ * ```
355
+ *
312
356
  */
313
357
  static VALUE
314
358
  strscan_reset(VALUE self)
@@ -322,11 +366,9 @@ strscan_reset(VALUE self)
322
366
  }
323
367
 
324
368
  /*
325
- * call-seq:
326
- * terminate
327
- * clear
328
- *
329
- * Sets the scan pointer to the end of the string and clear matching data.
369
+ * :markup: markdown
370
+ * :include: strscan/link_refs.txt
371
+ * :include: strscan/methods/terminate.md
330
372
  */
331
373
  static VALUE
332
374
  strscan_terminate(VALUE self)
@@ -340,9 +382,13 @@ strscan_terminate(VALUE self)
340
382
  }
341
383
 
342
384
  /*
343
- * Equivalent to #terminate.
344
- * This method is obsolete; use #terminate instead.
385
+ * call-seq:
386
+ * clear -> self
387
+ *
388
+ * This method is obsolete; use the equivalent method StringScanner#terminate.
345
389
  */
390
+
391
+ /* :nodoc: */
346
392
  static VALUE
347
393
  strscan_clear(VALUE self)
348
394
  {
@@ -351,7 +397,21 @@ strscan_clear(VALUE self)
351
397
  }
352
398
 
353
399
  /*
354
- * Returns the string being scanned.
400
+ * :markup: markdown
401
+ * :include: strscan/link_refs.txt
402
+ *
403
+ * call-seq:
404
+ * string -> stored_string
405
+ *
406
+ * Returns the [stored string][1]:
407
+ *
408
+ * ```rb
409
+ * scanner = StringScanner.new('foobar')
410
+ * scanner.string # => "foobar"
411
+ * scanner.concat('baz')
412
+ * scanner.string # => "foobarbaz"
413
+ * ```
414
+ *
355
415
  */
356
416
  static VALUE
357
417
  strscan_get_string(VALUE self)
@@ -363,10 +423,39 @@ strscan_get_string(VALUE self)
363
423
  }
364
424
 
365
425
  /*
366
- * call-seq: string=(str)
426
+ * :markup: markdown
427
+ * :include: strscan/link_refs.txt
428
+ *
429
+ * call-seq:
430
+ * string = other_string -> other_string
431
+ *
432
+ * Replaces the [stored string][1] with the given `other_string`:
433
+ *
434
+ * - Sets both [positions][11] to zero.
435
+ * - Clears [match values][9].
436
+ * - Returns `other_string`.
437
+ *
438
+ * ```rb
439
+ * scanner = StringScanner.new('foobar')
440
+ * scanner.scan(/foo/)
441
+ * put_situation(scanner)
442
+ * # Situation:
443
+ * # pos: 3
444
+ * # charpos: 3
445
+ * # rest: "bar"
446
+ * # rest_size: 3
447
+ * match_values_cleared?(scanner) # => false
448
+ *
449
+ * scanner.string = 'baz' # => "baz"
450
+ * put_situation(scanner)
451
+ * # Situation:
452
+ * # pos: 0
453
+ * # charpos: 0
454
+ * # rest: "baz"
455
+ * # rest_size: 3
456
+ * match_values_cleared?(scanner) # => true
457
+ * ```
367
458
  *
368
- * Changes the string being scanned to +str+ and resets the scanner.
369
- * Returns +str+.
370
459
  */
371
460
  static VALUE
372
461
  strscan_set_string(VALUE self, VALUE str)
@@ -381,18 +470,33 @@ strscan_set_string(VALUE self, VALUE str)
381
470
  }
382
471
 
383
472
  /*
384
- * call-seq:
385
- * concat(str)
386
- * <<(str)
473
+ * :markup: markdown
474
+ * :include: strscan/link_refs.txt
387
475
  *
388
- * Appends +str+ to the string being scanned.
389
- * This method does not affect scan pointer.
476
+ * call-seq:
477
+ * concat(more_string) -> self
478
+ *
479
+ * - Appends the given `more_string`
480
+ * to the [stored string][1].
481
+ * - Returns `self`.
482
+ * - Does not affect the [positions][11]
483
+ * or [match values][9].
484
+ *
485
+ *
486
+ * ```rb
487
+ * scanner = StringScanner.new('foo')
488
+ * scanner.string # => "foo"
489
+ * scanner.terminate
490
+ * scanner.concat('barbaz') # => #<StringScanner 3/9 "foo" @ "barba...">
491
+ * scanner.string # => "foobarbaz"
492
+ * put_situation(scanner)
493
+ * # Situation:
494
+ * # pos: 3
495
+ * # charpos: 3
496
+ * # rest: "barbaz"
497
+ * # rest_size: 6
498
+ * ```
390
499
  *
391
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
392
- * s.scan(/Fri /)
393
- * s << " +1000 GMT"
394
- * s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT"
395
- * s.scan(/Dec/) # -> "Dec"
396
500
  */
397
501
  static VALUE
398
502
  strscan_concat(VALUE self, VALUE str)
@@ -406,18 +510,9 @@ strscan_concat(VALUE self, VALUE str)
406
510
  }
407
511
 
408
512
  /*
409
- * Returns the byte position of the scan pointer. In the 'reset' position, this
410
- * value is zero. In the 'terminated' position (i.e. the string is exhausted),
411
- * this value is the bytesize of the string.
412
- *
413
- * In short, it's a 0-based index into bytes of the string.
414
- *
415
- * s = StringScanner.new('test string')
416
- * s.pos # -> 0
417
- * s.scan_until /str/ # -> "test str"
418
- * s.pos # -> 8
419
- * s.terminate # -> #<StringScanner fin>
420
- * s.pos # -> 11
513
+ * :markup: markdown
514
+ * :include: strscan/link_refs.txt
515
+ * :include: strscan/methods/get_pos.md
421
516
  */
422
517
  static VALUE
423
518
  strscan_get_pos(VALUE self)
@@ -429,17 +524,9 @@ strscan_get_pos(VALUE self)
429
524
  }
430
525
 
431
526
  /*
432
- * Returns the character position of the scan pointer. In the 'reset' position, this
433
- * value is zero. In the 'terminated' position (i.e. the string is exhausted),
434
- * this value is the size of the string.
435
- *
436
- * In short, it's a 0-based index into the string.
437
- *
438
- * s = StringScanner.new("abc\u00e4def\u00f6ghi")
439
- * s.charpos # -> 0
440
- * s.scan_until(/\u00e4/) # -> "abc\u00E4"
441
- * s.pos # -> 5
442
- * s.charpos # -> 4
527
+ * :markup: markdown
528
+ * :include: strscan/link_refs.txt
529
+ * :include: strscan/methods/get_charpos.md
443
530
  */
444
531
  static VALUE
445
532
  strscan_get_charpos(VALUE self)
@@ -452,13 +539,9 @@ strscan_get_charpos(VALUE self)
452
539
  }
453
540
 
454
541
  /*
455
- * call-seq: pos=(n)
456
- *
457
- * Sets the byte position of the scan pointer.
458
- *
459
- * s = StringScanner.new('test string')
460
- * s.pos = 7 # -> 7
461
- * s.rest # -> "ring"
542
+ * :markup: markdown
543
+ * :include: strscan/link_refs.txt
544
+ * :include: strscan/methods/set_pos.md
462
545
  */
463
546
  static VALUE
464
547
  strscan_set_pos(VALUE self, VALUE v)
@@ -546,12 +629,13 @@ rb_reg_onig_match(VALUE re, VALUE str,
546
629
  OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
547
630
  void *args, struct re_registers *regs)
548
631
  {
632
+ OnigPosition result;
549
633
  regex_t *reg = rb_reg_prepare_re(re, str);
550
634
 
551
635
  bool tmpreg = reg != RREGEXP_PTR(re);
552
636
  if (!tmpreg) RREGEXP(re)->usecnt++;
553
637
 
554
- OnigPosition result = match(reg, str, regs, args);
638
+ result = match(reg, str, regs, args);
555
639
 
556
640
  if (!tmpreg) RREGEXP(re)->usecnt--;
557
641
  if (tmpreg) {
@@ -601,19 +685,19 @@ strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_pt
601
685
  ONIG_OPTION_NONE);
602
686
  }
603
687
 
688
+ static void
689
+ strscan_enc_check(VALUE str1, VALUE str2)
690
+ {
691
+ if (RB_ENCODING_GET(str1) != RB_ENCODING_GET(str2)) {
692
+ rb_enc_check(str1, str2);
693
+ }
694
+ }
695
+
604
696
  static VALUE
605
697
  strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
606
698
  {
607
699
  struct strscanner *p;
608
700
 
609
- if (headonly) {
610
- if (!RB_TYPE_P(pattern, T_REGEXP)) {
611
- StringValue(pattern);
612
- }
613
- }
614
- else {
615
- Check_Type(pattern, T_REGEXP);
616
- }
617
701
  GET_SCANNER(self, p);
618
702
 
619
703
  CLEAR_MATCH_STATUS(p);
@@ -622,26 +706,42 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
622
706
  }
623
707
 
624
708
  if (RB_TYPE_P(pattern, T_REGEXP)) {
709
+ OnigPosition ret;
625
710
  p->regex = pattern;
626
- OnigPosition ret = rb_reg_onig_match(pattern,
627
- p->str,
628
- headonly ? strscan_match : strscan_search,
629
- (void *)p,
630
- &(p->regs));
711
+ ret = rb_reg_onig_match(p->regex,
712
+ p->str,
713
+ headonly ? strscan_match : strscan_search,
714
+ (void *)p,
715
+ &(p->regs));
631
716
 
632
717
  if (ret == ONIG_MISMATCH) {
633
718
  return Qnil;
634
719
  }
635
720
  }
636
721
  else {
637
- rb_enc_check(p->str, pattern);
722
+ StringValue(pattern);
638
723
  if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
724
+ strscan_enc_check(p->str, pattern);
639
725
  return Qnil;
640
726
  }
641
- if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
642
- return Qnil;
727
+
728
+ if (headonly) {
729
+ strscan_enc_check(p->str, pattern);
730
+
731
+ if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
732
+ return Qnil;
733
+ }
734
+ set_registers(p, RSTRING_LEN(pattern));
735
+ }
736
+ else {
737
+ rb_encoding *enc = rb_enc_check(p->str, pattern);
738
+ long pos = rb_memsearch(RSTRING_PTR(pattern), RSTRING_LEN(pattern),
739
+ CURPTR(p), S_RESTLEN(p), enc);
740
+ if (pos == -1) {
741
+ return Qnil;
742
+ }
743
+ set_registers(p, RSTRING_LEN(pattern) + pos);
643
744
  }
644
- set_registers(p, RSTRING_LEN(pattern));
645
745
  }
646
746
 
647
747
  MATCHED(p);
@@ -662,20 +762,9 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
662
762
  }
663
763
 
664
764
  /*
665
- * call-seq: scan(pattern) => String
666
- *
667
- * Tries to match with +pattern+ at the current position. If there's a match,
668
- * the scanner advances the "scan pointer" and returns the matched string.
669
- * Otherwise, the scanner returns +nil+.
670
- *
671
- * s = StringScanner.new('test string')
672
- * p s.scan(/\w+/) # -> "test"
673
- * p s.scan(/\w+/) # -> nil
674
- * p s.scan(/\s+/) # -> " "
675
- * p s.scan("str") # -> "str"
676
- * p s.scan(/\w+/) # -> "ing"
677
- * p s.scan(/./) # -> nil
678
- *
765
+ * :markup: markdown
766
+ * :include: strscan/link_refs.txt
767
+ * :include: strscan/methods/scan.md
679
768
  */
680
769
  static VALUE
681
770
  strscan_scan(VALUE self, VALUE re)
@@ -684,16 +773,60 @@ strscan_scan(VALUE self, VALUE re)
684
773
  }
685
774
 
686
775
  /*
687
- * call-seq: match?(pattern)
776
+ * :markup: markdown
777
+ * :include: strscan/link_refs.txt
688
778
  *
689
- * Tests whether the given +pattern+ is matched from the current scan pointer.
690
- * Returns the length of the match, or +nil+. The scan pointer is not advanced.
779
+ * call-seq:
780
+ * match?(pattern) -> updated_position or nil
781
+ *
782
+ * Attempts to [match][17] the given `pattern`
783
+ * at the beginning of the [target substring][3];
784
+ * does not modify the [positions][11].
785
+ *
786
+ * If the match succeeds:
787
+ *
788
+ * - Sets [match values][9].
789
+ * - Returns the size in bytes of the matched substring.
790
+ *
791
+ *
792
+ * ```rb
793
+ * scanner = StringScanner.new('foobarbaz')
794
+ * scanner.pos = 3
795
+ * scanner.match?(/bar/) => 3
796
+ * put_match_values(scanner)
797
+ * # Basic match values:
798
+ * # matched?: true
799
+ * # matched_size: 3
800
+ * # pre_match: "foo"
801
+ * # matched : "bar"
802
+ * # post_match: "baz"
803
+ * # Captured match values:
804
+ * # size: 1
805
+ * # captures: []
806
+ * # named_captures: {}
807
+ * # values_at: ["bar", nil]
808
+ * # []:
809
+ * # [0]: "bar"
810
+ * # [1]: nil
811
+ * put_situation(scanner)
812
+ * # Situation:
813
+ * # pos: 3
814
+ * # charpos: 3
815
+ * # rest: "barbaz"
816
+ * # rest_size: 6
817
+ * ```
818
+ *
819
+ * If the match fails:
820
+ *
821
+ * - Clears match values.
822
+ * - Returns `nil`.
823
+ * - Does not increment positions.
824
+ *
825
+ * ```rb
826
+ * scanner.match?(/nope/) # => nil
827
+ * match_values_cleared?(scanner) # => true
828
+ * ```
691
829
  *
692
- * s = StringScanner.new('test string')
693
- * p s.match?(/\w+/) # -> 4
694
- * p s.match?(/\w+/) # -> 4
695
- * p s.match?("test") # -> 4
696
- * p s.match?(/\s+/) # -> nil
697
830
  */
698
831
  static VALUE
699
832
  strscan_match_p(VALUE self, VALUE re)
@@ -702,22 +835,9 @@ strscan_match_p(VALUE self, VALUE re)
702
835
  }
703
836
 
704
837
  /*
705
- * call-seq: skip(pattern)
706
- *
707
- * Attempts to skip over the given +pattern+ beginning with the scan pointer.
708
- * If it matches, the scan pointer is advanced to the end of the match, and the
709
- * length of the match is returned. Otherwise, +nil+ is returned.
710
- *
711
- * It's similar to #scan, but without returning the matched string.
712
- *
713
- * s = StringScanner.new('test string')
714
- * p s.skip(/\w+/) # -> 4
715
- * p s.skip(/\w+/) # -> nil
716
- * p s.skip(/\s+/) # -> 1
717
- * p s.skip("st") # -> 2
718
- * p s.skip(/\w+/) # -> 4
719
- * p s.skip(/./) # -> nil
720
- *
838
+ * :markup: markdown
839
+ * :include: strscan/link_refs.txt
840
+ * :include: strscan/methods/skip.md
721
841
  */
722
842
  static VALUE
723
843
  strscan_skip(VALUE self, VALUE re)
@@ -726,19 +846,59 @@ strscan_skip(VALUE self, VALUE re)
726
846
  }
727
847
 
728
848
  /*
729
- * call-seq: check(pattern)
730
- *
731
- * This returns the value that #scan would return, without advancing the scan
732
- * pointer. The match register is affected, though.
849
+ * :markup: markdown
850
+ * :include: strscan/link_refs.txt
733
851
  *
734
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
735
- * s.check /Fri/ # -> "Fri"
736
- * s.pos # -> 0
737
- * s.matched # -> "Fri"
738
- * s.check /12/ # -> nil
739
- * s.matched # -> nil
852
+ * call-seq:
853
+ * check(pattern) -> matched_substring or nil
854
+ *
855
+ * Attempts to [match][17] the given `pattern`
856
+ * at the beginning of the [target substring][3];
857
+ * does not modify the [positions][11].
858
+ *
859
+ * If the match succeeds:
860
+ *
861
+ * - Returns the matched substring.
862
+ * - Sets all [match values][9].
863
+ *
864
+ * ```rb
865
+ * scanner = StringScanner.new('foobarbaz')
866
+ * scanner.pos = 3
867
+ * scanner.check('bar') # => "bar"
868
+ * put_match_values(scanner)
869
+ * # Basic match values:
870
+ * # matched?: true
871
+ * # matched_size: 3
872
+ * # pre_match: "foo"
873
+ * # matched : "bar"
874
+ * # post_match: "baz"
875
+ * # Captured match values:
876
+ * # size: 1
877
+ * # captures: []
878
+ * # named_captures: {}
879
+ * # values_at: ["bar", nil]
880
+ * # []:
881
+ * # [0]: "bar"
882
+ * # [1]: nil
883
+ * # => 0..1
884
+ * put_situation(scanner)
885
+ * # Situation:
886
+ * # pos: 3
887
+ * # charpos: 3
888
+ * # rest: "barbaz"
889
+ * # rest_size: 6
890
+ * ```
891
+ *
892
+ * If the match fails:
893
+ *
894
+ * - Returns `nil`.
895
+ * - Clears all [match values][9].
896
+ *
897
+ * ```rb
898
+ * scanner.check(/nope/) # => nil
899
+ * match_values_cleared?(scanner) # => true
900
+ * ```
740
901
  *
741
- * Mnemonic: it "checks" to see whether a #scan will return a value.
742
902
  */
743
903
  static VALUE
744
904
  strscan_check(VALUE self, VALUE re)
@@ -747,15 +907,24 @@ strscan_check(VALUE self, VALUE re)
747
907
  }
748
908
 
749
909
  /*
750
- * call-seq: scan_full(pattern, advance_pointer_p, return_string_p)
910
+ * call-seq:
911
+ * scan_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or nil
912
+ *
913
+ * Equivalent to one of the following:
914
+ *
915
+ * - +advance_pointer_p+ +true+:
916
+ *
917
+ * - +return_string_p+ +true+: StringScanner#scan(pattern).
918
+ * - +return_string_p+ +false+: StringScanner#skip(pattern).
751
919
  *
752
- * Tests whether the given +pattern+ is matched from the current scan pointer.
753
- * Advances the scan pointer if +advance_pointer_p+ is true.
754
- * Returns the matched string if +return_string_p+ is true.
755
- * The match register is affected.
920
+ * - +advance_pointer_p+ +false+:
921
+ *
922
+ * - +return_string_p+ +true+: StringScanner#check(pattern).
923
+ * - +return_string_p+ +false+: StringScanner#match?(pattern).
756
924
  *
757
- * "full" means "#scan with full parameters".
758
925
  */
926
+
927
+ /* :nodoc: */
759
928
  static VALUE
760
929
  strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
761
930
  {
@@ -763,16 +932,9 @@ strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
763
932
  }
764
933
 
765
934
  /*
766
- * call-seq: scan_until(pattern)
767
- *
768
- * Scans the string _until_ the +pattern+ is matched. Returns the substring up
769
- * to and including the end of the match, advancing the scan pointer to that
770
- * location. If there is no match, +nil+ is returned.
771
- *
772
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
773
- * s.scan_until(/1/) # -> "Fri Dec 1"
774
- * s.pre_match # -> "Fri Dec "
775
- * s.scan_until(/XYZ/) # -> nil
935
+ * :markup: markdown
936
+ * :include: strscan/link_refs.txt
937
+ * :include: strscan/methods/scan_until.md
776
938
  */
777
939
  static VALUE
778
940
  strscan_scan_until(VALUE self, VALUE re)
@@ -781,17 +943,61 @@ strscan_scan_until(VALUE self, VALUE re)
781
943
  }
782
944
 
783
945
  /*
784
- * call-seq: exist?(pattern)
946
+ * :markup: markdown
947
+ * :include: strscan/link_refs.txt
785
948
  *
786
- * Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string,
787
- * without advancing the scan pointer. This predicates whether a #scan_until
788
- * will return a value.
949
+ * call-seq:
950
+ * exist?(pattern) -> byte_offset or nil
951
+ *
952
+ * Attempts to [match][17] the given `pattern`
953
+ * anywhere (at any [position][2])
954
+ * n the [target substring][3];
955
+ * does not modify the [positions][11].
956
+ *
957
+ * If the match succeeds:
958
+ *
959
+ * - Returns a byte offset:
960
+ * the distance in bytes between the current [position][2]
961
+ * and the end of the matched substring.
962
+ * - Sets all [match values][9].
963
+ *
964
+ * ```rb
965
+ * scanner = StringScanner.new('foobarbazbatbam')
966
+ * scanner.pos = 6
967
+ * scanner.exist?(/bat/) # => 6
968
+ * put_match_values(scanner)
969
+ * # Basic match values:
970
+ * # matched?: true
971
+ * # matched_size: 3
972
+ * # pre_match: "foobarbaz"
973
+ * # matched : "bat"
974
+ * # post_match: "bam"
975
+ * # Captured match values:
976
+ * # size: 1
977
+ * # captures: []
978
+ * # named_captures: {}
979
+ * # values_at: ["bat", nil]
980
+ * # []:
981
+ * # [0]: "bat"
982
+ * # [1]: nil
983
+ * put_situation(scanner)
984
+ * # Situation:
985
+ * # pos: 6
986
+ * # charpos: 6
987
+ * # rest: "bazbatbam"
988
+ * # rest_size: 9
989
+ * ```
990
+ *
991
+ * If the match fails:
992
+ *
993
+ * - Returns `nil`.
994
+ * - Clears all [match values][9].
995
+ *
996
+ * ```rb
997
+ * scanner.exist?(/nope/) # => nil
998
+ * match_values_cleared?(scanner) # => true
999
+ * ```
789
1000
  *
790
- * s = StringScanner.new('test string')
791
- * s.exist? /s/ # -> 3
792
- * s.scan /test/ # -> "test"
793
- * s.exist? /s/ # -> 2
794
- * s.exist? /e/ # -> nil
795
1001
  */
796
1002
  static VALUE
797
1003
  strscan_exist_p(VALUE self, VALUE re)
@@ -800,20 +1006,9 @@ strscan_exist_p(VALUE self, VALUE re)
800
1006
  }
801
1007
 
802
1008
  /*
803
- * call-seq: skip_until(pattern)
804
- *
805
- * Advances the scan pointer until +pattern+ is matched and consumed. Returns
806
- * the number of bytes advanced, or +nil+ if no match was found.
807
- *
808
- * Look ahead to match +pattern+, and advance the scan pointer to the _end_
809
- * of the match. Return the number of characters advanced, or +nil+ if the
810
- * match was unsuccessful.
811
- *
812
- * It's similar to #scan_until, but without returning the intervening string.
813
- *
814
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
815
- * s.skip_until /12/ # -> 10
816
- * s #
1009
+ * :markup: markdown
1010
+ * :include: strscan/link_refs.txt
1011
+ * :include: strscan/methods/skip_until.md
817
1012
  */
818
1013
  static VALUE
819
1014
  strscan_skip_until(VALUE self, VALUE re)
@@ -822,17 +1017,61 @@ strscan_skip_until(VALUE self, VALUE re)
822
1017
  }
823
1018
 
824
1019
  /*
825
- * call-seq: check_until(pattern)
1020
+ * :markup: markdown
1021
+ * :include: strscan/link_refs.txt
826
1022
  *
827
- * This returns the value that #scan_until would return, without advancing the
828
- * scan pointer. The match register is affected, though.
829
- *
830
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
831
- * s.check_until /12/ # -> "Fri Dec 12"
832
- * s.pos # -> 0
833
- * s.matched # -> 12
1023
+ * call-seq:
1024
+ * check_until(pattern) -> substring or nil
1025
+ *
1026
+ * Attempts to [match][17] the given `pattern`
1027
+ * anywhere (at any [position][2])
1028
+ * in the [target substring][3];
1029
+ * does not modify the [positions][11].
1030
+ *
1031
+ * If the match succeeds:
1032
+ *
1033
+ * - Sets all [match values][9].
1034
+ * - Returns the matched substring,
1035
+ * which extends from the current [position][2]
1036
+ * to the end of the matched substring.
1037
+ *
1038
+ * ```rb
1039
+ * scanner = StringScanner.new('foobarbazbatbam')
1040
+ * scanner.pos = 6
1041
+ * scanner.check_until(/bat/) # => "bazbat"
1042
+ * put_match_values(scanner)
1043
+ * # Basic match values:
1044
+ * # matched?: true
1045
+ * # matched_size: 3
1046
+ * # pre_match: "foobarbaz"
1047
+ * # matched : "bat"
1048
+ * # post_match: "bam"
1049
+ * # Captured match values:
1050
+ * # size: 1
1051
+ * # captures: []
1052
+ * # named_captures: {}
1053
+ * # values_at: ["bat", nil]
1054
+ * # []:
1055
+ * # [0]: "bat"
1056
+ * # [1]: nil
1057
+ * put_situation(scanner)
1058
+ * # Situation:
1059
+ * # pos: 6
1060
+ * # charpos: 6
1061
+ * # rest: "bazbatbam"
1062
+ * # rest_size: 9
1063
+ * ```
1064
+ *
1065
+ * If the match fails:
1066
+ *
1067
+ * - Clears all [match values][9].
1068
+ * - Returns `nil`.
1069
+ *
1070
+ * ```rb
1071
+ * scanner.check_until(/nope/) # => nil
1072
+ * match_values_cleared?(scanner) # => true
1073
+ * ```
834
1074
  *
835
- * Mnemonic: it "checks" to see whether a #scan_until will return a value.
836
1075
  */
837
1076
  static VALUE
838
1077
  strscan_check_until(VALUE self, VALUE re)
@@ -841,14 +1080,24 @@ strscan_check_until(VALUE self, VALUE re)
841
1080
  }
842
1081
 
843
1082
  /*
844
- * call-seq: search_full(pattern, advance_pointer_p, return_string_p)
1083
+ * call-seq:
1084
+ * search_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or position_delta or nil
1085
+ *
1086
+ * Equivalent to one of the following:
1087
+ *
1088
+ * - +advance_pointer_p+ +true+:
1089
+ *
1090
+ * - +return_string_p+ +true+: StringScanner#scan_until(pattern).
1091
+ * - +return_string_p+ +false+: StringScanner#skip_until(pattern).
1092
+ *
1093
+ * - +advance_pointer_p+ +false+:
1094
+ *
1095
+ * - +return_string_p+ +true+: StringScanner#check_until(pattern).
1096
+ * - +return_string_p+ +false+: StringScanner#exist?(pattern).
845
1097
  *
846
- * Scans the string _until_ the +pattern+ is matched.
847
- * Advances the scan pointer if +advance_pointer_p+, otherwise not.
848
- * Returns the matched string if +return_string_p+ is true, otherwise
849
- * returns the number of bytes advanced.
850
- * This method does affect the match register.
851
1098
  */
1099
+
1100
+ /* :nodoc: */
852
1101
  static VALUE
853
1102
  strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f)
854
1103
  {
@@ -868,17 +1117,9 @@ adjust_registers_to_matched(struct strscanner *p)
868
1117
  }
869
1118
 
870
1119
  /*
871
- * Scans one character and returns it.
872
- * This method is multibyte character sensitive.
873
- *
874
- * s = StringScanner.new("ab")
875
- * s.getch # => "a"
876
- * s.getch # => "b"
877
- * s.getch # => nil
878
- *
879
- * s = StringScanner.new("\244\242".force_encoding("euc-jp"))
880
- * s.getch # => "\x{A4A2}" # Japanese hira-kana "A" in EUC-JP
881
- * s.getch # => nil
1120
+ * :markup: markdown
1121
+ * :include: strscan/link_refs.txt
1122
+ * :include: strscan/methods/getch.md
882
1123
  */
883
1124
  static VALUE
884
1125
  strscan_getch(VALUE self)
@@ -903,19 +1144,55 @@ strscan_getch(VALUE self)
903
1144
  }
904
1145
 
905
1146
  /*
906
- * Scans one byte and returns it.
1147
+ * call-seq:
1148
+ * scan_byte -> integer_byte
1149
+ *
1150
+ * Scans one byte and returns it as an integer.
907
1151
  * This method is not multibyte character sensitive.
908
1152
  * See also: #getch.
909
1153
  *
1154
+ */
1155
+ static VALUE
1156
+ strscan_scan_byte(VALUE self)
1157
+ {
1158
+ struct strscanner *p;
1159
+ VALUE byte;
1160
+
1161
+ GET_SCANNER(self, p);
1162
+ CLEAR_MATCH_STATUS(p);
1163
+ if (EOS_P(p))
1164
+ return Qnil;
1165
+
1166
+ byte = INT2FIX((unsigned char)*CURPTR(p));
1167
+ p->prev = p->curr;
1168
+ p->curr++;
1169
+ MATCHED(p);
1170
+ adjust_registers_to_matched(p);
1171
+ return byte;
1172
+ }
1173
+
1174
+ /*
1175
+ * Peeks at the current byte and returns it as an integer.
1176
+ *
910
1177
  * s = StringScanner.new('ab')
911
- * s.get_byte # => "a"
912
- * s.get_byte # => "b"
913
- * s.get_byte # => nil
914
- *
915
- * s = StringScanner.new("\244\242".force_encoding("euc-jp"))
916
- * s.get_byte # => "\xA4"
917
- * s.get_byte # => "\xA2"
918
- * s.get_byte # => nil
1178
+ * s.peek_byte # => 97
1179
+ */
1180
+ static VALUE
1181
+ strscan_peek_byte(VALUE self)
1182
+ {
1183
+ struct strscanner *p;
1184
+
1185
+ GET_SCANNER(self, p);
1186
+ if (EOS_P(p))
1187
+ return Qnil;
1188
+
1189
+ return INT2FIX((unsigned char)*CURPTR(p));
1190
+ }
1191
+
1192
+ /*
1193
+ * :markup: markdown
1194
+ * :include: strscan/link_refs.txt
1195
+ * :include: strscan/methods/get_byte.md
919
1196
  */
920
1197
  static VALUE
921
1198
  strscan_get_byte(VALUE self)
@@ -937,9 +1214,14 @@ strscan_get_byte(VALUE self)
937
1214
  }
938
1215
 
939
1216
  /*
1217
+ * call-seq:
1218
+ * getbyte
1219
+ *
940
1220
  * Equivalent to #get_byte.
941
1221
  * This method is obsolete; use #get_byte instead.
942
1222
  */
1223
+
1224
+ /* :nodoc: */
943
1225
  static VALUE
944
1226
  strscan_getbyte(VALUE self)
945
1227
  {
@@ -948,14 +1230,22 @@ strscan_getbyte(VALUE self)
948
1230
  }
949
1231
 
950
1232
  /*
951
- * call-seq: peek(len)
1233
+ * :markup: markdown
1234
+ * :include: strscan/link_refs.txt
1235
+ *
1236
+ * call-seq:
1237
+ * peek(length) -> substring
952
1238
  *
953
- * Extracts a string corresponding to <tt>string[pos,len]</tt>, without
954
- * advancing the scan pointer.
1239
+ * Returns the substring `string[pos, length]`;
1240
+ * does not update [match values][9] or [positions][11]:
955
1241
  *
956
- * s = StringScanner.new('test string')
957
- * s.peek(7) # => "test st"
958
- * s.peek(7) # => "test st"
1242
+ * ```rb
1243
+ * scanner = StringScanner.new('foobarbaz')
1244
+ * scanner.pos = 3
1245
+ * scanner.peek(3) # => "bar"
1246
+ * scanner.terminate
1247
+ * scanner.peek(3) # => ""
1248
+ * ```
959
1249
  *
960
1250
  */
961
1251
  static VALUE
@@ -975,9 +1265,14 @@ strscan_peek(VALUE self, VALUE vlen)
975
1265
  }
976
1266
 
977
1267
  /*
1268
+ * call-seq:
1269
+ * peep
1270
+ *
978
1271
  * Equivalent to #peek.
979
1272
  * This method is obsolete; use #peek instead.
980
1273
  */
1274
+
1275
+ /* :nodoc: */
981
1276
  static VALUE
982
1277
  strscan_peep(VALUE self, VALUE vlen)
983
1278
  {
@@ -985,16 +1280,156 @@ strscan_peep(VALUE self, VALUE vlen)
985
1280
  return strscan_peek(self, vlen);
986
1281
  }
987
1282
 
1283
+ static VALUE
1284
+ strscan_parse_integer(struct strscanner *p, int base, long len)
1285
+ {
1286
+ VALUE buffer_v, integer;
1287
+
1288
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
1289
+
1290
+ MEMCPY(buffer, CURPTR(p), char, len);
1291
+ buffer[len] = '\0';
1292
+ integer = rb_cstr2inum(buffer, base);
1293
+ RB_ALLOCV_END(buffer_v);
1294
+ p->curr += len;
1295
+ return integer;
1296
+ }
1297
+
1298
+ static inline bool
1299
+ strscan_ascii_compat_fastpath(VALUE str) {
1300
+ int encindex = ENCODING_GET_INLINED(str);
1301
+ // The overwhelming majority of strings are in one of these 3 encodings.
1302
+ return encindex == utf8_encindex || encindex == binary_encindex || encindex == usascii_encindex;
1303
+ }
1304
+
1305
+ static inline void
1306
+ strscan_must_ascii_compat(VALUE str)
1307
+ {
1308
+ // The overwhelming majority of strings are in one of these 3 encodings.
1309
+ if (RB_LIKELY(strscan_ascii_compat_fastpath(str))) {
1310
+ return;
1311
+ }
1312
+
1313
+ rb_must_asciicompat(str);
1314
+ }
1315
+
1316
+ static VALUE
1317
+ strscan_scan_base10_integer(VALUE self)
1318
+ {
1319
+ char *ptr;
1320
+ long len = 0;
1321
+ struct strscanner *p;
1322
+
1323
+ GET_SCANNER(self, p);
1324
+ CLEAR_MATCH_STATUS(p);
1325
+
1326
+ strscan_must_ascii_compat(p->str);
1327
+
1328
+ ptr = CURPTR(p);
1329
+
1330
+ long remaining_len = S_RESTLEN(p);
1331
+
1332
+ if (remaining_len <= 0) {
1333
+ return Qnil;
1334
+ }
1335
+
1336
+ if (ptr[len] == '-' || ptr[len] == '+') {
1337
+ len++;
1338
+ }
1339
+
1340
+ if (!rb_isdigit(ptr[len])) {
1341
+ return Qnil;
1342
+ }
1343
+
1344
+ MATCHED(p);
1345
+ p->prev = p->curr;
1346
+
1347
+ while (len < remaining_len && rb_isdigit(ptr[len])) {
1348
+ len++;
1349
+ }
1350
+
1351
+ return strscan_parse_integer(p, 10, len);
1352
+ }
1353
+
1354
+ static VALUE
1355
+ strscan_scan_base16_integer(VALUE self)
1356
+ {
1357
+ char *ptr;
1358
+ long len = 0;
1359
+ struct strscanner *p;
1360
+
1361
+ GET_SCANNER(self, p);
1362
+ CLEAR_MATCH_STATUS(p);
1363
+
1364
+ strscan_must_ascii_compat(p->str);
1365
+
1366
+ ptr = CURPTR(p);
1367
+
1368
+ long remaining_len = S_RESTLEN(p);
1369
+
1370
+ if (remaining_len <= 0) {
1371
+ return Qnil;
1372
+ }
1373
+
1374
+ if (ptr[len] == '-' || ptr[len] == '+') {
1375
+ len++;
1376
+ }
1377
+
1378
+ if ((remaining_len >= (len + 2)) && ptr[len] == '0' && ptr[len + 1] == 'x') {
1379
+ len += 2;
1380
+ }
1381
+
1382
+ if (len >= remaining_len || !rb_isxdigit(ptr[len])) {
1383
+ return Qnil;
1384
+ }
1385
+
1386
+ MATCHED(p);
1387
+ p->prev = p->curr;
1388
+
1389
+ while (len < remaining_len && rb_isxdigit(ptr[len])) {
1390
+ len++;
1391
+ }
1392
+
1393
+ return strscan_parse_integer(p, 16, len);
1394
+ }
1395
+
988
1396
  /*
989
- * Sets the scan pointer to the previous position. Only one previous position is
990
- * remembered, and it changes with each scanning operation.
1397
+ * :markup: markdown
1398
+ * :include: strscan/link_refs.txt
1399
+ *
1400
+ * call-seq:
1401
+ * unscan -> self
1402
+ *
1403
+ * Sets the [position][2] to its value previous to the recent successful
1404
+ * [match][17] attempt:
1405
+ *
1406
+ * ```rb
1407
+ * scanner = StringScanner.new('foobarbaz')
1408
+ * scanner.scan(/foo/)
1409
+ * put_situation(scanner)
1410
+ * # Situation:
1411
+ * # pos: 3
1412
+ * # charpos: 3
1413
+ * # rest: "barbaz"
1414
+ * # rest_size: 6
1415
+ * scanner.unscan
1416
+ * # => #<StringScanner 0/9 @ "fooba...">
1417
+ * put_situation(scanner)
1418
+ * # Situation:
1419
+ * # pos: 0
1420
+ * # charpos: 0
1421
+ * # rest: "foobarbaz"
1422
+ * # rest_size: 9
1423
+ * ```
1424
+ *
1425
+ * Raises an exception if match values are clear:
1426
+ *
1427
+ * ```rb
1428
+ * scanner.scan(/nope/) # => nil
1429
+ * match_values_cleared?(scanner) # => true
1430
+ * scanner.unscan # Raises StringScanner::Error.
1431
+ * ```
991
1432
  *
992
- * s = StringScanner.new('test string')
993
- * s.scan(/\w+/) # => "test"
994
- * s.unscan
995
- * s.scan(/../) # => "te"
996
- * s.scan(/\d/) # => nil
997
- * s.unscan # ScanError: unscan failed: previous match record not exist
998
1433
  */
999
1434
  static VALUE
1000
1435
  strscan_unscan(VALUE self)
@@ -1010,16 +1445,37 @@ strscan_unscan(VALUE self)
1010
1445
  }
1011
1446
 
1012
1447
  /*
1013
- * Returns +true+ if and only if the scan pointer is at the beginning of the line.
1014
- *
1015
- * s = StringScanner.new("test\ntest\n")
1016
- * s.bol? # => true
1017
- * s.scan(/te/)
1018
- * s.bol? # => false
1019
- * s.scan(/st\n/)
1020
- * s.bol? # => true
1021
- * s.terminate
1022
- * s.bol? # => true
1448
+ *
1449
+ * :markup: markdown
1450
+ * :include: strscan/link_refs.txt
1451
+ *
1452
+ * call-seq:
1453
+ * beginning_of_line? -> true or false
1454
+ *
1455
+ * Returns whether the [position][2] is at the beginning of a line;
1456
+ * that is, at the beginning of the [stored string][1]
1457
+ * or immediately after a newline:
1458
+ *
1459
+ * scanner = StringScanner.new(MULTILINE_TEXT)
1460
+ * scanner.string
1461
+ * # => "Go placidly amid the noise and haste,\nand remember what peace there may be in silence.\n"
1462
+ * scanner.pos # => 0
1463
+ * scanner.beginning_of_line? # => true
1464
+ *
1465
+ * scanner.scan_until(/,/) # => "Go placidly amid the noise and haste,"
1466
+ * scanner.beginning_of_line? # => false
1467
+ *
1468
+ * scanner.scan(/\n/) # => "\n"
1469
+ * scanner.beginning_of_line? # => true
1470
+ *
1471
+ * scanner.terminate
1472
+ * scanner.beginning_of_line? # => true
1473
+ *
1474
+ * scanner.concat('x')
1475
+ * scanner.terminate
1476
+ * scanner.beginning_of_line? # => false
1477
+ *
1478
+ * StringScanner#bol? is an alias for StringScanner#beginning_of_line?.
1023
1479
  */
1024
1480
  static VALUE
1025
1481
  strscan_bol_p(VALUE self)
@@ -1033,14 +1489,24 @@ strscan_bol_p(VALUE self)
1033
1489
  }
1034
1490
 
1035
1491
  /*
1036
- * Returns +true+ if the scan pointer is at the end of the string.
1492
+ * :markup: markdown
1493
+ * :include: strscan/link_refs.txt
1494
+ *
1495
+ * call-seq:
1496
+ * eos? -> true or false
1497
+ *
1498
+ * Returns whether the [position][2]
1499
+ * is at the end of the [stored string][1]:
1500
+ *
1501
+ * ```rb
1502
+ * scanner = StringScanner.new('foobarbaz')
1503
+ * scanner.eos? # => false
1504
+ * pos = 3
1505
+ * scanner.eos? # => false
1506
+ * scanner.terminate
1507
+ * scanner.eos? # => true
1508
+ * ```
1037
1509
  *
1038
- * s = StringScanner.new('test string')
1039
- * p s.eos? # => false
1040
- * s.scan(/test/)
1041
- * p s.eos? # => false
1042
- * s.terminate
1043
- * p s.eos? # => true
1044
1510
  */
1045
1511
  static VALUE
1046
1512
  strscan_eos_p(VALUE self)
@@ -1052,9 +1518,14 @@ strscan_eos_p(VALUE self)
1052
1518
  }
1053
1519
 
1054
1520
  /*
1521
+ * call-seq:
1522
+ * empty?
1523
+ *
1055
1524
  * Equivalent to #eos?.
1056
1525
  * This method is obsolete, use #eos? instead.
1057
1526
  */
1527
+
1528
+ /* :nodoc: */
1058
1529
  static VALUE
1059
1530
  strscan_empty_p(VALUE self)
1060
1531
  {
@@ -1063,6 +1534,9 @@ strscan_empty_p(VALUE self)
1063
1534
  }
1064
1535
 
1065
1536
  /*
1537
+ * call-seq:
1538
+ * rest?
1539
+ *
1066
1540
  * Returns true if and only if there is more data in the string. See #eos?.
1067
1541
  * This method is obsolete; use #eos? instead.
1068
1542
  *
@@ -1071,6 +1545,8 @@ strscan_empty_p(VALUE self)
1071
1545
  * s.eos? # => false
1072
1546
  * s.rest? # => true
1073
1547
  */
1548
+
1549
+ /* :nodoc: */
1074
1550
  static VALUE
1075
1551
  strscan_rest_p(VALUE self)
1076
1552
  {
@@ -1081,13 +1557,26 @@ strscan_rest_p(VALUE self)
1081
1557
  }
1082
1558
 
1083
1559
  /*
1084
- * Returns +true+ if and only if the last match was successful.
1560
+ * :markup: markdown
1561
+ * :include: strscan/link_refs.txt
1562
+ *
1563
+ * call-seq:
1564
+ * matched? -> true or false
1565
+ *
1566
+ * Returns `true` of the most recent [match attempt][17] was successful,
1567
+ * `false` otherwise;
1568
+ * see [Basic Matched Values][18]:
1569
+ *
1570
+ * ```rb
1571
+ * scanner = StringScanner.new('foobarbaz')
1572
+ * scanner.matched? # => false
1573
+ * scanner.pos = 3
1574
+ * scanner.exist?(/baz/) # => 6
1575
+ * scanner.matched? # => true
1576
+ * scanner.exist?(/nope/) # => nil
1577
+ * scanner.matched? # => false
1578
+ * ```
1085
1579
  *
1086
- * s = StringScanner.new('test string')
1087
- * s.match?(/\w+/) # => 4
1088
- * s.matched? # => true
1089
- * s.match?(/\d+/) # => nil
1090
- * s.matched? # => false
1091
1580
  */
1092
1581
  static VALUE
1093
1582
  strscan_matched_p(VALUE self)
@@ -1099,11 +1588,27 @@ strscan_matched_p(VALUE self)
1099
1588
  }
1100
1589
 
1101
1590
  /*
1102
- * Returns the last matched string.
1591
+ * :markup: markdown
1592
+ * :include: strscan/link_refs.txt
1593
+ *
1594
+ * call-seq:
1595
+ * matched -> matched_substring or nil
1596
+ *
1597
+ * Returns the matched substring from the most recent [match][17] attempt
1598
+ * if it was successful,
1599
+ * or `nil` otherwise;
1600
+ * see [Basic Matched Values][18]:
1601
+ *
1602
+ * ```rb
1603
+ * scanner = StringScanner.new('foobarbaz')
1604
+ * scanner.matched # => nil
1605
+ * scanner.pos = 3
1606
+ * scanner.match?(/bar/) # => 3
1607
+ * scanner.matched # => "bar"
1608
+ * scanner.match?(/nope/) # => nil
1609
+ * scanner.matched # => nil
1610
+ * ```
1103
1611
  *
1104
- * s = StringScanner.new('test string')
1105
- * s.match?(/\w+/) # -> 4
1106
- * s.matched # -> "test"
1107
1612
  */
1108
1613
  static VALUE
1109
1614
  strscan_matched(VALUE self)
@@ -1118,15 +1623,29 @@ strscan_matched(VALUE self)
1118
1623
  }
1119
1624
 
1120
1625
  /*
1121
- * Returns the size of the most recent match in bytes, or +nil+ if there
1122
- * was no recent match. This is different than <tt>matched.size</tt>,
1123
- * which will return the size in characters.
1626
+ * :markup: markdown
1627
+ * :include: strscan/link_refs.txt
1628
+ *
1629
+ * call-seq:
1630
+ * matched_size -> substring_size or nil
1631
+ *
1632
+ * Returns the size (in bytes) of the matched substring
1633
+ * from the most recent match [match attempt][17] if it was successful,
1634
+ * or `nil` otherwise;
1635
+ * see [Basic Matched Values][18]:
1636
+ *
1637
+ * ```rb
1638
+ * scanner = StringScanner.new('foobarbaz')
1639
+ * scanner.matched_size # => nil
1640
+ *
1641
+ * pos = 3
1642
+ * scanner.exist?(/baz/) # => 9
1643
+ * scanner.matched_size # => 3
1644
+ *
1645
+ * scanner.exist?(/nope/) # => nil
1646
+ * scanner.matched_size # => nil
1647
+ * ```
1124
1648
  *
1125
- * s = StringScanner.new('test string')
1126
- * s.check /\w+/ # -> "test"
1127
- * s.matched_size # -> 4
1128
- * s.check /\d+/ # -> nil
1129
- * s.matched_size # -> nil
1130
1649
  */
1131
1650
  static VALUE
1132
1651
  strscan_matched_size(VALUE self)
@@ -1157,30 +1676,75 @@ name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name
1157
1676
  }
1158
1677
 
1159
1678
  /*
1160
- * call-seq: [](n)
1161
- *
1162
- * Returns the n-th subgroup in the most recent match.
1163
- *
1164
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1165
- * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1166
- * s[0] # -> "Fri Dec 12 "
1167
- * s[1] # -> "Fri"
1168
- * s[2] # -> "Dec"
1169
- * s[3] # -> "12"
1170
- * s.post_match # -> "1975 14:39"
1171
- * s.pre_match # -> ""
1172
- *
1173
- * s.reset
1174
- * s.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /) # -> "Fri Dec 12 "
1175
- * s[0] # -> "Fri Dec 12 "
1176
- * s[1] # -> "Fri"
1177
- * s[2] # -> "Dec"
1178
- * s[3] # -> "12"
1179
- * s[:wday] # -> "Fri"
1180
- * s[:month] # -> "Dec"
1181
- * s[:day] # -> "12"
1182
- * s.post_match # -> "1975 14:39"
1183
- * s.pre_match # -> ""
1679
+ *
1680
+ * :markup: markdown
1681
+ * :include: strscan/link_refs.txt
1682
+ *
1683
+ * call-seq:
1684
+ * [](specifier) -> substring or nil
1685
+ *
1686
+ * Returns a captured substring or `nil`;
1687
+ * see [Captured Match Values][13].
1688
+ *
1689
+ * When there are captures:
1690
+ *
1691
+ * ```rb
1692
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
1693
+ * scanner.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /)
1694
+ * ```
1695
+ *
1696
+ * - `specifier` zero: returns the entire matched substring:
1697
+ *
1698
+ * ```rb
1699
+ * scanner[0] # => "Fri Dec 12 "
1700
+ * scanner.pre_match # => ""
1701
+ * scanner.post_match # => "1975 14:39"
1702
+ * ```
1703
+ *
1704
+ * - `specifier` positive integer. returns the `n`th capture, or `nil` if out of range:
1705
+ *
1706
+ * ```rb
1707
+ * scanner[1] # => "Fri"
1708
+ * scanner[2] # => "Dec"
1709
+ * scanner[3] # => "12"
1710
+ * scanner[4] # => nil
1711
+ * ```
1712
+ *
1713
+ * - `specifier` negative integer. counts backward from the last subgroup:
1714
+ *
1715
+ * ```rb
1716
+ * scanner[-1] # => "12"
1717
+ * scanner[-4] # => "Fri Dec 12 "
1718
+ * scanner[-5] # => nil
1719
+ * ```
1720
+ *
1721
+ * - `specifier` symbol or string. returns the named subgroup, or `nil` if no such:
1722
+ *
1723
+ * ```rb
1724
+ * scanner[:wday] # => "Fri"
1725
+ * scanner['wday'] # => "Fri"
1726
+ * scanner[:month] # => "Dec"
1727
+ * scanner[:day] # => "12"
1728
+ * scanner[:nope] # => nil
1729
+ * ```
1730
+ *
1731
+ * When there are no captures, only `[0]` returns non-`nil`:
1732
+ *
1733
+ * ```rb
1734
+ * scanner = StringScanner.new('foobarbaz')
1735
+ * scanner.exist?(/bar/)
1736
+ * scanner[0] # => "bar"
1737
+ * scanner[1] # => nil
1738
+ * ```
1739
+ *
1740
+ * For a failed match, even `[0]` returns `nil`:
1741
+ *
1742
+ * ```rb
1743
+ * scanner.scan(/nope/) # => nil
1744
+ * scanner[0] # => nil
1745
+ * scanner[1] # => nil
1746
+ * ```
1747
+ *
1184
1748
  */
1185
1749
  static VALUE
1186
1750
  strscan_aref(VALUE self, VALUE idx)
@@ -1217,14 +1781,28 @@ strscan_aref(VALUE self, VALUE idx)
1217
1781
  }
1218
1782
 
1219
1783
  /*
1220
- * call-seq: size
1784
+ * :markup: markdown
1785
+ * :include: strscan/link_refs.txt
1786
+ *
1787
+ * call-seq:
1788
+ * size -> captures_count
1789
+ *
1790
+ * Returns the count of captures if the most recent match attempt succeeded, `nil` otherwise;
1791
+ * see [Captures Match Values][13]:
1792
+ *
1793
+ * ```rb
1794
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
1795
+ * scanner.size # => nil
1221
1796
  *
1222
- * Returns the amount of subgroups in the most recent match.
1223
- * The full match counts as a subgroup.
1797
+ * pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
1798
+ * scanner.match?(pattern)
1799
+ * scanner.values_at(*0..scanner.size) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil]
1800
+ * scanner.size # => 4
1801
+ *
1802
+ * scanner.match?(/nope/) # => nil
1803
+ * scanner.size # => nil
1804
+ * ```
1224
1805
  *
1225
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1226
- * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1227
- * s.size # -> 4
1228
1806
  */
1229
1807
  static VALUE
1230
1808
  strscan_size(VALUE self)
@@ -1237,16 +1815,30 @@ strscan_size(VALUE self)
1237
1815
  }
1238
1816
 
1239
1817
  /*
1240
- * call-seq: captures
1818
+ * :markup: markdown
1819
+ * :include: strscan/link_refs.txt
1820
+ *
1821
+ * call-seq:
1822
+ * captures -> substring_array or nil
1241
1823
  *
1242
- * Returns the subgroups in the most recent match (not including the full match).
1243
- * If nothing was priorly matched, it returns nil.
1824
+ * Returns the array of [captured match values][13] at indexes `(1..)`
1825
+ * if the most recent match attempt succeeded, or `nil` otherwise:
1826
+ *
1827
+ * ```rb
1828
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
1829
+ * scanner.captures # => nil
1830
+ *
1831
+ * scanner.exist?(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /)
1832
+ * scanner.captures # => ["Fri", "Dec", "12"]
1833
+ * scanner.values_at(*0..4) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil]
1834
+ *
1835
+ * scanner.exist?(/Fri/)
1836
+ * scanner.captures # => []
1837
+ *
1838
+ * scanner.scan(/nope/)
1839
+ * scanner.captures # => nil
1840
+ * ```
1244
1841
  *
1245
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1246
- * s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> "Fri Dec 12 "
1247
- * s.captures # -> ["Fri", "Dec", "12", nil]
1248
- * s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> nil
1249
- * s.captures # -> nil
1250
1842
  */
1251
1843
  static VALUE
1252
1844
  strscan_captures(VALUE self)
@@ -1276,17 +1868,25 @@ strscan_captures(VALUE self)
1276
1868
  }
1277
1869
 
1278
1870
  /*
1279
- * call-seq:
1280
- * scanner.values_at( i1, i2, ... iN ) -> an_array
1871
+ * :markup: markdown
1872
+ * :include: strscan/link_refs.txt
1281
1873
  *
1282
- * Returns the subgroups in the most recent match at the given indices.
1283
- * If nothing was priorly matched, it returns nil.
1874
+ * call-seq:
1875
+ * values_at(*specifiers) -> array_of_captures or nil
1876
+ *
1877
+ * Returns an array of captured substrings, or `nil` of none.
1878
+ *
1879
+ * For each `specifier`, the returned substring is `[specifier]`;
1880
+ * see #[].
1881
+ *
1882
+ * ```rb
1883
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
1884
+ * pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
1885
+ * scanner.match?(pattern)
1886
+ * scanner.values_at(*0..3) # => ["Fri Dec 12 ", "Fri", "Dec", "12"]
1887
+ * scanner.values_at(*%i[wday month day]) # => ["Fri", "Dec", "12"]
1888
+ * ```
1284
1889
  *
1285
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1286
- * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1287
- * s.values_at 0, -1, 5, 2 # -> ["Fri Dec 12 ", "12", nil, "Dec"]
1288
- * s.scan(/(\w+) (\w+) (\d+) /) # -> nil
1289
- * s.values_at 0, -1, 5, 2 # -> nil
1290
1890
  */
1291
1891
 
1292
1892
  static VALUE
@@ -1308,13 +1908,29 @@ strscan_values_at(int argc, VALUE *argv, VALUE self)
1308
1908
  }
1309
1909
 
1310
1910
  /*
1311
- * Returns the <i><b>pre</b>-match</i> (in the regular expression sense) of the last scan.
1911
+ * :markup: markdown
1912
+ * :include: strscan/link_refs.txt
1913
+ *
1914
+ * call-seq:
1915
+ * pre_match -> substring
1916
+ *
1917
+ * Returns the substring that precedes the matched substring
1918
+ * from the most recent match attempt if it was successful,
1919
+ * or `nil` otherwise;
1920
+ * see [Basic Match Values][18]:
1921
+ *
1922
+ * ```rb
1923
+ * scanner = StringScanner.new('foobarbaz')
1924
+ * scanner.pre_match # => nil
1925
+ *
1926
+ * scanner.pos = 3
1927
+ * scanner.exist?(/baz/) # => 6
1928
+ * scanner.pre_match # => "foobar" # Substring of entire string, not just target string.
1929
+ *
1930
+ * scanner.exist?(/nope/) # => nil
1931
+ * scanner.pre_match # => nil
1932
+ * ```
1312
1933
  *
1313
- * s = StringScanner.new('test string')
1314
- * s.scan(/\w+/) # -> "test"
1315
- * s.scan(/\s+/) # -> " "
1316
- * s.pre_match # -> "test"
1317
- * s.post_match # -> "string"
1318
1934
  */
1319
1935
  static VALUE
1320
1936
  strscan_pre_match(VALUE self)
@@ -1329,13 +1945,29 @@ strscan_pre_match(VALUE self)
1329
1945
  }
1330
1946
 
1331
1947
  /*
1332
- * Returns the <i><b>post</b>-match</i> (in the regular expression sense) of the last scan.
1948
+ * :markup: markdown
1949
+ * :include: strscan/link_refs.txt
1950
+ *
1951
+ * call-seq:
1952
+ * post_match -> substring
1953
+ *
1954
+ * Returns the substring that follows the matched substring
1955
+ * from the most recent match attempt if it was successful,
1956
+ * or `nil` otherwise;
1957
+ * see [Basic Match Values][18]:
1958
+ *
1959
+ * ```rb
1960
+ * scanner = StringScanner.new('foobarbaz')
1961
+ * scanner.post_match # => nil
1962
+ *
1963
+ * scanner.pos = 3
1964
+ * scanner.match?(/bar/) # => 3
1965
+ * scanner.post_match # => "baz"
1966
+ *
1967
+ * scanner.match?(/nope/) # => nil
1968
+ * scanner.post_match # => nil
1969
+ * ```
1333
1970
  *
1334
- * s = StringScanner.new('test string')
1335
- * s.scan(/\w+/) # -> "test"
1336
- * s.scan(/\s+/) # -> " "
1337
- * s.pre_match # -> "test"
1338
- * s.post_match # -> "string"
1339
1971
  */
1340
1972
  static VALUE
1341
1973
  strscan_post_match(VALUE self)
@@ -1350,8 +1982,24 @@ strscan_post_match(VALUE self)
1350
1982
  }
1351
1983
 
1352
1984
  /*
1353
- * Returns the "rest" of the string (i.e. everything after the scan pointer).
1354
- * If there is no more data (eos? = true), it returns <tt>""</tt>.
1985
+ * :markup: markdown
1986
+ * :include: strscan/link_refs.txt
1987
+ *
1988
+ * call-seq:
1989
+ * rest -> target_substring
1990
+ *
1991
+ * Returns the 'rest' of the [stored string][1] (all after the current [position][2]),
1992
+ * which is the [target substring][3]:
1993
+ *
1994
+ * ```rb
1995
+ * scanner = StringScanner.new('foobarbaz')
1996
+ * scanner.rest # => "foobarbaz"
1997
+ * scanner.pos = 3
1998
+ * scanner.rest # => "barbaz"
1999
+ * scanner.terminate
2000
+ * scanner.rest # => ""
2001
+ * ```
2002
+ *
1355
2003
  */
1356
2004
  static VALUE
1357
2005
  strscan_rest(VALUE self)
@@ -1366,7 +2014,26 @@ strscan_rest(VALUE self)
1366
2014
  }
1367
2015
 
1368
2016
  /*
1369
- * <tt>s.rest_size</tt> is equivalent to <tt>s.rest.size</tt>.
2017
+ * :markup: markdown
2018
+ * :include: strscan/link_refs.txt
2019
+ *
2020
+ * call-seq:
2021
+ * rest_size -> integer
2022
+ *
2023
+ * Returns the size (in bytes) of the #rest of the [stored string][1]:
2024
+ *
2025
+ * ```rb
2026
+ * scanner = StringScanner.new('foobarbaz')
2027
+ * scanner.rest # => "foobarbaz"
2028
+ * scanner.rest_size # => 9
2029
+ * scanner.pos = 3
2030
+ * scanner.rest # => "barbaz"
2031
+ * scanner.rest_size # => 6
2032
+ * scanner.terminate
2033
+ * scanner.rest # => ""
2034
+ * scanner.rest_size # => 0
2035
+ * ```
2036
+ *
1370
2037
  */
1371
2038
  static VALUE
1372
2039
  strscan_rest_size(VALUE self)
@@ -1383,9 +2050,14 @@ strscan_rest_size(VALUE self)
1383
2050
  }
1384
2051
 
1385
2052
  /*
2053
+ * call-seq:
2054
+ * restsize
2055
+ *
1386
2056
  * <tt>s.restsize</tt> is equivalent to <tt>s.rest_size</tt>.
1387
2057
  * This method is obsolete; use #rest_size instead.
1388
2058
  */
2059
+
2060
+ /* :nodoc: */
1389
2061
  static VALUE
1390
2062
  strscan_restsize(VALUE self)
1391
2063
  {
@@ -1396,15 +2068,39 @@ strscan_restsize(VALUE self)
1396
2068
  #define INSPECT_LENGTH 5
1397
2069
 
1398
2070
  /*
1399
- * Returns a string that represents the StringScanner object, showing:
1400
- * - the current position
1401
- * - the size of the string
1402
- * - the characters surrounding the scan pointer
1403
- *
1404
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1405
- * s.inspect # -> '#<StringScanner 0/21 @ "Fri D...">'
1406
- * s.scan_until /12/ # -> "Fri Dec 12"
1407
- * s.inspect # -> '#<StringScanner 10/21 "...ec 12" @ " 1975...">'
2071
+ * :markup: markdown
2072
+ * :include: strscan/link_refs.txt
2073
+ *
2074
+ * call-seq:
2075
+ * inspect -> string
2076
+ *
2077
+ * Returns a string representation of `self` that may show:
2078
+ *
2079
+ * 1. The current [position][2].
2080
+ * 2. The size (in bytes) of the [stored string][1].
2081
+ * 3. The substring preceding the current position.
2082
+ * 4. The substring following the current position (which is also the [target substring][3]).
2083
+ *
2084
+ * ```rb
2085
+ * scanner = StringScanner.new("Fri Dec 12 1975 14:39")
2086
+ * scanner.pos = 11
2087
+ * scanner.inspect # => "#<StringScanner 11/21 \"...c 12 \" @ \"1975 ...\">"
2088
+ * ```
2089
+ *
2090
+ * If at beginning-of-string, item 4 above (following substring) is omitted:
2091
+ *
2092
+ * ```rb
2093
+ * scanner.reset
2094
+ * scanner.inspect # => "#<StringScanner 0/21 @ \"Fri D...\">"
2095
+ * ```
2096
+ *
2097
+ * If at end-of-string, all items above are omitted:
2098
+ *
2099
+ * ```rb
2100
+ * scanner.terminate
2101
+ * scanner.inspect # => "#<StringScanner fin>"
2102
+ * ```
2103
+ *
1408
2104
  */
1409
2105
  static VALUE
1410
2106
  strscan_inspect(VALUE self)
@@ -1476,13 +2172,13 @@ inspect2(struct strscanner *p)
1476
2172
  }
1477
2173
 
1478
2174
  /*
1479
- * call-seq:
1480
- * scanner.fixed_anchor? -> true or false
2175
+ * :markup: markdown
2176
+ * :include: strscan/link_refs.txt
1481
2177
  *
1482
- * Whether +scanner+ uses fixed anchor mode or not.
2178
+ * call-seq:
2179
+ * fixed_anchor? -> true or false
1483
2180
  *
1484
- * If fixed anchor mode is used, +\A+ always matches the beginning of
1485
- * the string. Otherwise, +\A+ always matches the current position.
2181
+ * Returns whether the [fixed-anchor property][10] is set.
1486
2182
  */
1487
2183
  static VALUE
1488
2184
  strscan_fixed_anchor_p(VALUE self)
@@ -1518,21 +2214,39 @@ named_captures_iter(const OnigUChar *name,
1518
2214
  }
1519
2215
 
1520
2216
  /*
2217
+ * :markup: markdown
2218
+ * :include: strscan/link_refs.txt
2219
+ *
1521
2220
  * call-seq:
1522
- * scanner.named_captures -> hash
2221
+ * named_captures -> hash
2222
+ *
2223
+ * Returns the array of captured match values at indexes (1..)
2224
+ * if the most recent match attempt succeeded, or nil otherwise;
2225
+ * see [Captured Match Values][13]:
1523
2226
  *
1524
- * Returns a hash of string variables matching the regular expression.
2227
+ * ```rb
2228
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
2229
+ * scanner.named_captures # => {}
2230
+ *
2231
+ * pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
2232
+ * scanner.match?(pattern)
2233
+ * scanner.named_captures # => {"wday"=>"Fri", "month"=>"Dec", "day"=>"12"}
2234
+ *
2235
+ * scanner.string = 'nope'
2236
+ * scanner.match?(pattern)
2237
+ * scanner.named_captures # => {"wday"=>nil, "month"=>nil, "day"=>nil}
2238
+ *
2239
+ * scanner.match?(/nosuch/)
2240
+ * scanner.named_captures # => {}
2241
+ * ```
1525
2242
  *
1526
- * scan = StringScanner.new('foobarbaz')
1527
- * scan.match?(/(?<f>foo)(?<r>bar)(?<z>baz)/)
1528
- * scan.named_captures # -> {"f"=>"foo", "r"=>"bar", "z"=>"baz"}
1529
2243
  */
1530
2244
  static VALUE
1531
2245
  strscan_named_captures(VALUE self)
1532
2246
  {
1533
2247
  struct strscanner *p;
1534
- GET_SCANNER(self, p);
1535
2248
  named_captures_data data;
2249
+ GET_SCANNER(self, p);
1536
2250
  data.self = self;
1537
2251
  data.captures = rb_hash_new();
1538
2252
  if (!RB_NIL_P(p->regex)) {
@@ -1549,107 +2263,11 @@ strscan_named_captures(VALUE self)
1549
2263
  /*
1550
2264
  * Document-class: StringScanner
1551
2265
  *
1552
- * StringScanner provides for lexical scanning operations on a String. Here is
1553
- * an example of its usage:
1554
- *
1555
- * require 'strscan'
1556
- *
1557
- * s = StringScanner.new('This is an example string')
1558
- * s.eos? # -> false
1559
- *
1560
- * p s.scan(/\w+/) # -> "This"
1561
- * p s.scan(/\w+/) # -> nil
1562
- * p s.scan(/\s+/) # -> " "
1563
- * p s.scan(/\s+/) # -> nil
1564
- * p s.scan(/\w+/) # -> "is"
1565
- * s.eos? # -> false
1566
- *
1567
- * p s.scan(/\s+/) # -> " "
1568
- * p s.scan(/\w+/) # -> "an"
1569
- * p s.scan(/\s+/) # -> " "
1570
- * p s.scan(/\w+/) # -> "example"
1571
- * p s.scan(/\s+/) # -> " "
1572
- * p s.scan(/\w+/) # -> "string"
1573
- * s.eos? # -> true
1574
- *
1575
- * p s.scan(/\s+/) # -> nil
1576
- * p s.scan(/\w+/) # -> nil
2266
+ * :markup: markdown
1577
2267
  *
1578
- * Scanning a string means remembering the position of a <i>scan pointer</i>,
1579
- * which is just an index. The point of scanning is to move forward a bit at
1580
- * a time, so matches are sought after the scan pointer; usually immediately
1581
- * after it.
2268
+ * :include: strscan/link_refs.txt
2269
+ * :include: strscan/strscan.md
1582
2270
  *
1583
- * Given the string "test string", here are the pertinent scan pointer
1584
- * positions:
1585
- *
1586
- * t e s t s t r i n g
1587
- * 0 1 2 ... 1
1588
- * 0
1589
- *
1590
- * When you #scan for a pattern (a regular expression), the match must occur
1591
- * at the character after the scan pointer. If you use #scan_until, then the
1592
- * match can occur anywhere after the scan pointer. In both cases, the scan
1593
- * pointer moves <i>just beyond</i> the last character of the match, ready to
1594
- * scan again from the next character onwards. This is demonstrated by the
1595
- * example above.
1596
- *
1597
- * == Method Categories
1598
- *
1599
- * There are other methods besides the plain scanners. You can look ahead in
1600
- * the string without actually scanning. You can access the most recent match.
1601
- * You can modify the string being scanned, reset or terminate the scanner,
1602
- * find out or change the position of the scan pointer, skip ahead, and so on.
1603
- *
1604
- * === Advancing the Scan Pointer
1605
- *
1606
- * - #getch
1607
- * - #get_byte
1608
- * - #scan
1609
- * - #scan_until
1610
- * - #skip
1611
- * - #skip_until
1612
- *
1613
- * === Looking Ahead
1614
- *
1615
- * - #check
1616
- * - #check_until
1617
- * - #exist?
1618
- * - #match?
1619
- * - #peek
1620
- *
1621
- * === Finding Where we Are
1622
- *
1623
- * - #beginning_of_line? (<tt>#bol?</tt>)
1624
- * - #eos?
1625
- * - #rest?
1626
- * - #rest_size
1627
- * - #pos
1628
- *
1629
- * === Setting Where we Are
1630
- *
1631
- * - #reset
1632
- * - #terminate
1633
- * - #pos=
1634
- *
1635
- * === Match Data
1636
- *
1637
- * - #matched
1638
- * - #matched?
1639
- * - #matched_size
1640
- * - <tt>#[]</tt>
1641
- * - #pre_match
1642
- * - #post_match
1643
- *
1644
- * === Miscellaneous
1645
- *
1646
- * - <tt><<</tt>
1647
- * - #concat
1648
- * - #string
1649
- * - #string=
1650
- * - #unscan
1651
- *
1652
- * There are aliases to several of the methods.
1653
2271
  */
1654
2272
  void
1655
2273
  Init_strscan(void)
@@ -1664,6 +2282,10 @@ Init_strscan(void)
1664
2282
 
1665
2283
  id_byteslice = rb_intern("byteslice");
1666
2284
 
2285
+ usascii_encindex = rb_usascii_encindex();
2286
+ utf8_encindex = rb_utf8_encindex();
2287
+ binary_encindex = rb_ascii8bit_encindex();
2288
+
1667
2289
  StringScanner = rb_define_class("StringScanner", rb_cObject);
1668
2290
  ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
1669
2291
  if (!rb_const_defined(rb_cObject, id_scanerr)) {
@@ -1708,9 +2330,14 @@ Init_strscan(void)
1708
2330
  rb_define_method(StringScanner, "getch", strscan_getch, 0);
1709
2331
  rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
1710
2332
  rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
2333
+ rb_define_method(StringScanner, "scan_byte", strscan_scan_byte, 0);
1711
2334
  rb_define_method(StringScanner, "peek", strscan_peek, 1);
2335
+ rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0);
1712
2336
  rb_define_method(StringScanner, "peep", strscan_peep, 1);
1713
2337
 
2338
+ rb_define_private_method(StringScanner, "scan_base10_integer", strscan_scan_base10_integer, 0);
2339
+ rb_define_private_method(StringScanner, "scan_base16_integer", strscan_scan_base16_integer, 0);
2340
+
1714
2341
  rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
1715
2342
 
1716
2343
  rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
@@ -1738,4 +2365,6 @@ Init_strscan(void)
1738
2365
  rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
1739
2366
 
1740
2367
  rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0);
2368
+
2369
+ rb_require("strscan/strscan");
1741
2370
  }