strscan 3.1.0 → 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -22,7 +22,7 @@ extern size_t onig_region_memsize(const struct re_registers *regs);
22
22
 
23
23
  #include <stdbool.h>
24
24
 
25
- #define STRSCAN_VERSION "3.1.0"
25
+ #define STRSCAN_VERSION "3.1.1"
26
26
 
27
27
  /* =======================================================================
28
28
  Data Type Definitions
@@ -32,6 +32,8 @@ static VALUE StringScanner;
32
32
  static VALUE ScanError;
33
33
  static ID id_byteslice;
34
34
 
35
+ static int usascii_encindex, utf8_encindex, binary_encindex;
36
+
35
37
  struct strscanner
36
38
  {
37
39
  /* multi-purpose flags */
@@ -115,6 +117,7 @@ static VALUE strscan_get_byte _((VALUE self));
115
117
  static VALUE strscan_getbyte _((VALUE self));
116
118
  static VALUE strscan_peek _((VALUE self, VALUE len));
117
119
  static VALUE strscan_peep _((VALUE self, VALUE len));
120
+ static VALUE strscan_scan_base10_integer _((VALUE self));
118
121
  static VALUE strscan_unscan _((VALUE self));
119
122
  static VALUE strscan_bol_p _((VALUE self));
120
123
  static VALUE strscan_eos_p _((VALUE self));
@@ -218,16 +221,28 @@ strscan_s_allocate(VALUE klass)
218
221
  }
219
222
 
220
223
  /*
221
- * call-seq:
222
- * StringScanner.new(string, fixed_anchor: false)
223
- * StringScanner.new(string, dup = false)
224
- *
225
- * Creates a new StringScanner object to scan over the given +string+.
224
+ * :markup: markdown
225
+ * :include: strscan/link_refs.txt
226
226
  *
227
- * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
228
- * the string. Otherwise, +\A+ always matches the current position.
227
+ * call-seq:
228
+ * StringScanner.new(string, fixed_anchor: false) -> string_scanner
229
+ *
230
+ * Returns a new `StringScanner` object whose [stored string][1]
231
+ * is the given `string`;
232
+ * sets the [fixed-anchor property][10]:
233
+ *
234
+ * ```
235
+ * scanner = StringScanner.new('foobarbaz')
236
+ * scanner.string # => "foobarbaz"
237
+ * scanner.fixed_anchor? # => false
238
+ * put_situation(scanner)
239
+ * # Situation:
240
+ * # pos: 0
241
+ * # charpos: 0
242
+ * # rest: "foobarbaz"
243
+ * # rest_size: 9
244
+ * ```
229
245
  *
230
- * +dup+ argument is obsolete and not used now.
231
246
  */
232
247
  static VALUE
233
248
  strscan_initialize(int argc, VALUE *argv, VALUE self)
@@ -266,11 +281,14 @@ check_strscan(VALUE obj)
266
281
  }
267
282
 
268
283
  /*
284
+ * :markup: markdown
285
+ * :include: strscan/link_refs.txt
286
+ *
269
287
  * call-seq:
270
- * dup
271
- * clone
288
+ * dup -> shallow_copy
272
289
  *
273
- * Duplicates a StringScanner object.
290
+ * Returns a shallow copy of `self`;
291
+ * the [stored string][1] in the copy is the same string as in `self`.
274
292
  */
275
293
  static VALUE
276
294
  strscan_init_copy(VALUE vself, VALUE vorig)
@@ -297,10 +315,13 @@ strscan_init_copy(VALUE vself, VALUE vorig)
297
315
  ======================================================================= */
298
316
 
299
317
  /*
300
- * call-seq: StringScanner.must_C_version
318
+ * call-seq:
319
+ * StringScanner.must_C_version -> self
301
320
  *
302
- * This method is defined for backward compatibility.
321
+ * Returns +self+; defined for backward compatibility.
303
322
  */
323
+
324
+ /* :nodoc: */
304
325
  static VALUE
305
326
  strscan_s_mustc(VALUE self)
306
327
  {
@@ -308,7 +329,30 @@ strscan_s_mustc(VALUE self)
308
329
  }
309
330
 
310
331
  /*
311
- * Reset the scan pointer (index 0) and clear matching data.
332
+ * :markup: markdown
333
+ * :include: strscan/link_refs.txt
334
+ *
335
+ * call-seq:
336
+ * reset -> self
337
+ *
338
+ * Sets both [byte position][2] and [character position][7] to zero,
339
+ * and clears [match values][9];
340
+ * returns +self+:
341
+ *
342
+ * ```
343
+ * scanner = StringScanner.new('foobarbaz')
344
+ * scanner.exist?(/bar/) # => 6
345
+ * scanner.reset # => #<StringScanner 0/9 @ "fooba...">
346
+ * put_situation(scanner)
347
+ * # Situation:
348
+ * # pos: 0
349
+ * # charpos: 0
350
+ * # rest: "foobarbaz"
351
+ * # rest_size: 9
352
+ * # => nil
353
+ * match_values_cleared?(scanner) # => true
354
+ * ```
355
+ *
312
356
  */
313
357
  static VALUE
314
358
  strscan_reset(VALUE self)
@@ -322,11 +366,9 @@ strscan_reset(VALUE self)
322
366
  }
323
367
 
324
368
  /*
325
- * call-seq:
326
- * terminate
327
- * clear
328
- *
329
- * Sets the scan pointer to the end of the string and clear matching data.
369
+ * :markup: markdown
370
+ * :include: strscan/link_refs.txt
371
+ * :include: strscan/methods/terminate.md
330
372
  */
331
373
  static VALUE
332
374
  strscan_terminate(VALUE self)
@@ -340,9 +382,13 @@ strscan_terminate(VALUE self)
340
382
  }
341
383
 
342
384
  /*
343
- * Equivalent to #terminate.
344
- * This method is obsolete; use #terminate instead.
385
+ * call-seq:
386
+ * clear -> self
387
+ *
388
+ * This method is obsolete; use the equivalent method StringScanner#terminate.
345
389
  */
390
+
391
+ /* :nodoc: */
346
392
  static VALUE
347
393
  strscan_clear(VALUE self)
348
394
  {
@@ -351,7 +397,21 @@ strscan_clear(VALUE self)
351
397
  }
352
398
 
353
399
  /*
354
- * Returns the string being scanned.
400
+ * :markup: markdown
401
+ * :include: strscan/link_refs.txt
402
+ *
403
+ * call-seq:
404
+ * string -> stored_string
405
+ *
406
+ * Returns the [stored string][1]:
407
+ *
408
+ * ```
409
+ * scanner = StringScanner.new('foobar')
410
+ * scanner.string # => "foobar"
411
+ * scanner.concat('baz')
412
+ * scanner.string # => "foobarbaz"
413
+ * ```
414
+ *
355
415
  */
356
416
  static VALUE
357
417
  strscan_get_string(VALUE self)
@@ -363,10 +423,39 @@ strscan_get_string(VALUE self)
363
423
  }
364
424
 
365
425
  /*
366
- * call-seq: string=(str)
426
+ * :markup: markdown
427
+ * :include: strscan/link_refs.txt
428
+ *
429
+ * call-seq:
430
+ * string = other_string -> other_string
431
+ *
432
+ * Replaces the [stored string][1] with the given `other_string`:
433
+ *
434
+ * - Sets both [positions][11] to zero.
435
+ * - Clears [match values][9].
436
+ * - Returns `other_string`.
437
+ *
438
+ * ```
439
+ * scanner = StringScanner.new('foobar')
440
+ * scanner.scan(/foo/)
441
+ * put_situation(scanner)
442
+ * # Situation:
443
+ * # pos: 3
444
+ * # charpos: 3
445
+ * # rest: "bar"
446
+ * # rest_size: 3
447
+ * match_values_cleared?(scanner) # => false
448
+ *
449
+ * scanner.string = 'baz' # => "baz"
450
+ * put_situation(scanner)
451
+ * # Situation:
452
+ * # pos: 0
453
+ * # charpos: 0
454
+ * # rest: "baz"
455
+ * # rest_size: 3
456
+ * match_values_cleared?(scanner) # => true
457
+ * ```
367
458
  *
368
- * Changes the string being scanned to +str+ and resets the scanner.
369
- * Returns +str+.
370
459
  */
371
460
  static VALUE
372
461
  strscan_set_string(VALUE self, VALUE str)
@@ -381,18 +470,33 @@ strscan_set_string(VALUE self, VALUE str)
381
470
  }
382
471
 
383
472
  /*
384
- * call-seq:
385
- * concat(str)
386
- * <<(str)
473
+ * :markup: markdown
474
+ * :include: strscan/link_refs.txt
387
475
  *
388
- * Appends +str+ to the string being scanned.
389
- * This method does not affect scan pointer.
476
+ * call-seq:
477
+ * concat(more_string) -> self
478
+ *
479
+ * - Appends the given `more_string`
480
+ * to the [stored string][1].
481
+ * - Returns `self`.
482
+ * - Does not affect the [positions][11]
483
+ * or [match values][9].
484
+ *
485
+ *
486
+ * ```
487
+ * scanner = StringScanner.new('foo')
488
+ * scanner.string # => "foo"
489
+ * scanner.terminate
490
+ * scanner.concat('barbaz') # => #<StringScanner 3/9 "foo" @ "barba...">
491
+ * scanner.string # => "foobarbaz"
492
+ * put_situation(scanner)
493
+ * # Situation:
494
+ * # pos: 3
495
+ * # charpos: 3
496
+ * # rest: "barbaz"
497
+ * # rest_size: 6
498
+ * ```
390
499
  *
391
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
392
- * s.scan(/Fri /)
393
- * s << " +1000 GMT"
394
- * s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT"
395
- * s.scan(/Dec/) # -> "Dec"
396
500
  */
397
501
  static VALUE
398
502
  strscan_concat(VALUE self, VALUE str)
@@ -406,18 +510,9 @@ strscan_concat(VALUE self, VALUE str)
406
510
  }
407
511
 
408
512
  /*
409
- * Returns the byte position of the scan pointer. In the 'reset' position, this
410
- * value is zero. In the 'terminated' position (i.e. the string is exhausted),
411
- * this value is the bytesize of the string.
412
- *
413
- * In short, it's a 0-based index into bytes of the string.
414
- *
415
- * s = StringScanner.new('test string')
416
- * s.pos # -> 0
417
- * s.scan_until /str/ # -> "test str"
418
- * s.pos # -> 8
419
- * s.terminate # -> #<StringScanner fin>
420
- * s.pos # -> 11
513
+ * :markup: markdown
514
+ * :include: strscan/link_refs.txt
515
+ * :include: strscan/methods/get_pos.md
421
516
  */
422
517
  static VALUE
423
518
  strscan_get_pos(VALUE self)
@@ -429,17 +524,9 @@ strscan_get_pos(VALUE self)
429
524
  }
430
525
 
431
526
  /*
432
- * Returns the character position of the scan pointer. In the 'reset' position, this
433
- * value is zero. In the 'terminated' position (i.e. the string is exhausted),
434
- * this value is the size of the string.
435
- *
436
- * In short, it's a 0-based index into the string.
437
- *
438
- * s = StringScanner.new("abc\u00e4def\u00f6ghi")
439
- * s.charpos # -> 0
440
- * s.scan_until(/\u00e4/) # -> "abc\u00E4"
441
- * s.pos # -> 5
442
- * s.charpos # -> 4
527
+ * :markup: markdown
528
+ * :include: strscan/link_refs.txt
529
+ * :include: strscan/methods/get_charpos.md
443
530
  */
444
531
  static VALUE
445
532
  strscan_get_charpos(VALUE self)
@@ -452,13 +539,9 @@ strscan_get_charpos(VALUE self)
452
539
  }
453
540
 
454
541
  /*
455
- * call-seq: pos=(n)
456
- *
457
- * Sets the byte position of the scan pointer.
458
- *
459
- * s = StringScanner.new('test string')
460
- * s.pos = 7 # -> 7
461
- * s.rest # -> "ring"
542
+ * :markup: markdown
543
+ * :include: strscan/link_refs.txt
544
+ * :include: strscan/methods/set_pos.md
462
545
  */
463
546
  static VALUE
464
547
  strscan_set_pos(VALUE self, VALUE v)
@@ -546,12 +629,13 @@ rb_reg_onig_match(VALUE re, VALUE str,
546
629
  OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
547
630
  void *args, struct re_registers *regs)
548
631
  {
632
+ OnigPosition result;
549
633
  regex_t *reg = rb_reg_prepare_re(re, str);
550
634
 
551
635
  bool tmpreg = reg != RREGEXP_PTR(re);
552
636
  if (!tmpreg) RREGEXP(re)->usecnt++;
553
637
 
554
- OnigPosition result = match(reg, str, regs, args);
638
+ result = match(reg, str, regs, args);
555
639
 
556
640
  if (!tmpreg) RREGEXP(re)->usecnt--;
557
641
  if (tmpreg) {
@@ -601,19 +685,19 @@ strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_pt
601
685
  ONIG_OPTION_NONE);
602
686
  }
603
687
 
688
+ static void
689
+ strscan_enc_check(VALUE str1, VALUE str2)
690
+ {
691
+ if (RB_ENCODING_GET(str1) != RB_ENCODING_GET(str2)) {
692
+ rb_enc_check(str1, str2);
693
+ }
694
+ }
695
+
604
696
  static VALUE
605
697
  strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
606
698
  {
607
699
  struct strscanner *p;
608
700
 
609
- if (headonly) {
610
- if (!RB_TYPE_P(pattern, T_REGEXP)) {
611
- StringValue(pattern);
612
- }
613
- }
614
- else {
615
- Check_Type(pattern, T_REGEXP);
616
- }
617
701
  GET_SCANNER(self, p);
618
702
 
619
703
  CLEAR_MATCH_STATUS(p);
@@ -622,26 +706,42 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
622
706
  }
623
707
 
624
708
  if (RB_TYPE_P(pattern, T_REGEXP)) {
709
+ OnigPosition ret;
625
710
  p->regex = pattern;
626
- OnigPosition ret = rb_reg_onig_match(pattern,
627
- p->str,
628
- headonly ? strscan_match : strscan_search,
629
- (void *)p,
630
- &(p->regs));
711
+ ret = rb_reg_onig_match(p->regex,
712
+ p->str,
713
+ headonly ? strscan_match : strscan_search,
714
+ (void *)p,
715
+ &(p->regs));
631
716
 
632
717
  if (ret == ONIG_MISMATCH) {
633
718
  return Qnil;
634
719
  }
635
720
  }
636
721
  else {
637
- rb_enc_check(p->str, pattern);
722
+ StringValue(pattern);
638
723
  if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
724
+ strscan_enc_check(p->str, pattern);
639
725
  return Qnil;
640
726
  }
641
- if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
642
- return Qnil;
727
+
728
+ if (headonly) {
729
+ strscan_enc_check(p->str, pattern);
730
+
731
+ if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
732
+ return Qnil;
733
+ }
734
+ set_registers(p, RSTRING_LEN(pattern));
735
+ }
736
+ else {
737
+ rb_encoding *enc = rb_enc_check(p->str, pattern);
738
+ long pos = rb_memsearch(RSTRING_PTR(pattern), RSTRING_LEN(pattern),
739
+ CURPTR(p), S_RESTLEN(p), enc);
740
+ if (pos == -1) {
741
+ return Qnil;
742
+ }
743
+ set_registers(p, RSTRING_LEN(pattern) + pos);
643
744
  }
644
- set_registers(p, RSTRING_LEN(pattern));
645
745
  }
646
746
 
647
747
  MATCHED(p);
@@ -662,20 +762,9 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
662
762
  }
663
763
 
664
764
  /*
665
- * call-seq: scan(pattern) => String
666
- *
667
- * Tries to match with +pattern+ at the current position. If there's a match,
668
- * the scanner advances the "scan pointer" and returns the matched string.
669
- * Otherwise, the scanner returns +nil+.
670
- *
671
- * s = StringScanner.new('test string')
672
- * p s.scan(/\w+/) # -> "test"
673
- * p s.scan(/\w+/) # -> nil
674
- * p s.scan(/\s+/) # -> " "
675
- * p s.scan("str") # -> "str"
676
- * p s.scan(/\w+/) # -> "ing"
677
- * p s.scan(/./) # -> nil
678
- *
765
+ * :markup: markdown
766
+ * :include: strscan/link_refs.txt
767
+ * :include: strscan/methods/scan.md
679
768
  */
680
769
  static VALUE
681
770
  strscan_scan(VALUE self, VALUE re)
@@ -684,16 +773,60 @@ strscan_scan(VALUE self, VALUE re)
684
773
  }
685
774
 
686
775
  /*
687
- * call-seq: match?(pattern)
776
+ * :markup: markdown
777
+ * :include: strscan/link_refs.txt
688
778
  *
689
- * Tests whether the given +pattern+ is matched from the current scan pointer.
690
- * Returns the length of the match, or +nil+. The scan pointer is not advanced.
779
+ * call-seq:
780
+ * match?(pattern) -> updated_position or nil
781
+ *
782
+ * Attempts to [match][17] the given `pattern`
783
+ * at the beginning of the [target substring][3];
784
+ * does not modify the [positions][11].
785
+ *
786
+ * If the match succeeds:
787
+ *
788
+ * - Sets [match values][9].
789
+ * - Returns the size in bytes of the matched substring.
790
+ *
791
+ *
792
+ * ```
793
+ * scanner = StringScanner.new('foobarbaz')
794
+ * scanner.pos = 3
795
+ * scanner.match?(/bar/) => 3
796
+ * put_match_values(scanner)
797
+ * # Basic match values:
798
+ * # matched?: true
799
+ * # matched_size: 3
800
+ * # pre_match: "foo"
801
+ * # matched : "bar"
802
+ * # post_match: "baz"
803
+ * # Captured match values:
804
+ * # size: 1
805
+ * # captures: []
806
+ * # named_captures: {}
807
+ * # values_at: ["bar", nil]
808
+ * # []:
809
+ * # [0]: "bar"
810
+ * # [1]: nil
811
+ * put_situation(scanner)
812
+ * # Situation:
813
+ * # pos: 3
814
+ * # charpos: 3
815
+ * # rest: "barbaz"
816
+ * # rest_size: 6
817
+ * ```
818
+ *
819
+ * If the match fails:
820
+ *
821
+ * - Clears match values.
822
+ * - Returns `nil`.
823
+ * - Does not increment positions.
824
+ *
825
+ * ```
826
+ * scanner.match?(/nope/) # => nil
827
+ * match_values_cleared?(scanner) # => true
828
+ * ```
691
829
  *
692
- * s = StringScanner.new('test string')
693
- * p s.match?(/\w+/) # -> 4
694
- * p s.match?(/\w+/) # -> 4
695
- * p s.match?("test") # -> 4
696
- * p s.match?(/\s+/) # -> nil
697
830
  */
698
831
  static VALUE
699
832
  strscan_match_p(VALUE self, VALUE re)
@@ -702,22 +835,9 @@ strscan_match_p(VALUE self, VALUE re)
702
835
  }
703
836
 
704
837
  /*
705
- * call-seq: skip(pattern)
706
- *
707
- * Attempts to skip over the given +pattern+ beginning with the scan pointer.
708
- * If it matches, the scan pointer is advanced to the end of the match, and the
709
- * length of the match is returned. Otherwise, +nil+ is returned.
710
- *
711
- * It's similar to #scan, but without returning the matched string.
712
- *
713
- * s = StringScanner.new('test string')
714
- * p s.skip(/\w+/) # -> 4
715
- * p s.skip(/\w+/) # -> nil
716
- * p s.skip(/\s+/) # -> 1
717
- * p s.skip("st") # -> 2
718
- * p s.skip(/\w+/) # -> 4
719
- * p s.skip(/./) # -> nil
720
- *
838
+ * :markup: markdown
839
+ * :include: strscan/link_refs.txt
840
+ * :include: strscan/methods/skip.md
721
841
  */
722
842
  static VALUE
723
843
  strscan_skip(VALUE self, VALUE re)
@@ -726,19 +846,59 @@ strscan_skip(VALUE self, VALUE re)
726
846
  }
727
847
 
728
848
  /*
729
- * call-seq: check(pattern)
730
- *
731
- * This returns the value that #scan would return, without advancing the scan
732
- * pointer. The match register is affected, though.
849
+ * :markup: markdown
850
+ * :include: strscan/link_refs.txt
733
851
  *
734
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
735
- * s.check /Fri/ # -> "Fri"
736
- * s.pos # -> 0
737
- * s.matched # -> "Fri"
738
- * s.check /12/ # -> nil
739
- * s.matched # -> nil
852
+ * call-seq:
853
+ * check(pattern) -> matched_substring or nil
854
+ *
855
+ * Attempts to [match][17] the given `pattern`
856
+ * at the beginning of the [target substring][3];
857
+ * does not modify the [positions][11].
858
+ *
859
+ * If the match succeeds:
860
+ *
861
+ * - Returns the matched substring.
862
+ * - Sets all [match values][9].
863
+ *
864
+ * ```
865
+ * scanner = StringScanner.new('foobarbaz')
866
+ * scanner.pos = 3
867
+ * scanner.check('bar') # => "bar"
868
+ * put_match_values(scanner)
869
+ * # Basic match values:
870
+ * # matched?: true
871
+ * # matched_size: 3
872
+ * # pre_match: "foo"
873
+ * # matched : "bar"
874
+ * # post_match: "baz"
875
+ * # Captured match values:
876
+ * # size: 1
877
+ * # captures: []
878
+ * # named_captures: {}
879
+ * # values_at: ["bar", nil]
880
+ * # []:
881
+ * # [0]: "bar"
882
+ * # [1]: nil
883
+ * # => 0..1
884
+ * put_situation(scanner)
885
+ * # Situation:
886
+ * # pos: 3
887
+ * # charpos: 3
888
+ * # rest: "barbaz"
889
+ * # rest_size: 6
890
+ * ```
891
+ *
892
+ * If the match fails:
893
+ *
894
+ * - Returns `nil`.
895
+ * - Clears all [match values][9].
896
+ *
897
+ * ```
898
+ * scanner.check(/nope/) # => nil
899
+ * match_values_cleared?(scanner) # => true
900
+ * ```
740
901
  *
741
- * Mnemonic: it "checks" to see whether a #scan will return a value.
742
902
  */
743
903
  static VALUE
744
904
  strscan_check(VALUE self, VALUE re)
@@ -747,15 +907,24 @@ strscan_check(VALUE self, VALUE re)
747
907
  }
748
908
 
749
909
  /*
750
- * call-seq: scan_full(pattern, advance_pointer_p, return_string_p)
910
+ * call-seq:
911
+ * scan_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or nil
912
+ *
913
+ * Equivalent to one of the following:
914
+ *
915
+ * - +advance_pointer_p+ +true+:
916
+ *
917
+ * - +return_string_p+ +true+: StringScanner#scan(pattern).
918
+ * - +return_string_p+ +false+: StringScanner#skip(pattern).
751
919
  *
752
- * Tests whether the given +pattern+ is matched from the current scan pointer.
753
- * Advances the scan pointer if +advance_pointer_p+ is true.
754
- * Returns the matched string if +return_string_p+ is true.
755
- * The match register is affected.
920
+ * - +advance_pointer_p+ +false+:
921
+ *
922
+ * - +return_string_p+ +true+: StringScanner#check(pattern).
923
+ * - +return_string_p+ +false+: StringScanner#match?(pattern).
756
924
  *
757
- * "full" means "#scan with full parameters".
758
925
  */
926
+
927
+ /* :nodoc: */
759
928
  static VALUE
760
929
  strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
761
930
  {
@@ -763,16 +932,9 @@ strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
763
932
  }
764
933
 
765
934
  /*
766
- * call-seq: scan_until(pattern)
767
- *
768
- * Scans the string _until_ the +pattern+ is matched. Returns the substring up
769
- * to and including the end of the match, advancing the scan pointer to that
770
- * location. If there is no match, +nil+ is returned.
771
- *
772
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
773
- * s.scan_until(/1/) # -> "Fri Dec 1"
774
- * s.pre_match # -> "Fri Dec "
775
- * s.scan_until(/XYZ/) # -> nil
935
+ * :markup: markdown
936
+ * :include: strscan/link_refs.txt
937
+ * :include: strscan/methods/scan_until.md
776
938
  */
777
939
  static VALUE
778
940
  strscan_scan_until(VALUE self, VALUE re)
@@ -781,17 +943,61 @@ strscan_scan_until(VALUE self, VALUE re)
781
943
  }
782
944
 
783
945
  /*
784
- * call-seq: exist?(pattern)
946
+ * :markup: markdown
947
+ * :include: strscan/link_refs.txt
785
948
  *
786
- * Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string,
787
- * without advancing the scan pointer. This predicates whether a #scan_until
788
- * will return a value.
949
+ * call-seq:
950
+ * exist?(pattern) -> byte_offset or nil
951
+ *
952
+ * Attempts to [match][17] the given `pattern`
953
+ * anywhere (at any [position][2])
954
+ * n the [target substring][3];
955
+ * does not modify the [positions][11].
956
+ *
957
+ * If the match succeeds:
958
+ *
959
+ * - Returns a byte offset:
960
+ * the distance in bytes between the current [position][2]
961
+ * and the end of the matched substring.
962
+ * - Sets all [match values][9].
963
+ *
964
+ * ```
965
+ * scanner = StringScanner.new('foobarbazbatbam')
966
+ * scanner.pos = 6
967
+ * scanner.exist?(/bat/) # => 6
968
+ * put_match_values(scanner)
969
+ * # Basic match values:
970
+ * # matched?: true
971
+ * # matched_size: 3
972
+ * # pre_match: "foobarbaz"
973
+ * # matched : "bat"
974
+ * # post_match: "bam"
975
+ * # Captured match values:
976
+ * # size: 1
977
+ * # captures: []
978
+ * # named_captures: {}
979
+ * # values_at: ["bat", nil]
980
+ * # []:
981
+ * # [0]: "bat"
982
+ * # [1]: nil
983
+ * put_situation(scanner)
984
+ * # Situation:
985
+ * # pos: 6
986
+ * # charpos: 6
987
+ * # rest: "bazbatbam"
988
+ * # rest_size: 9
989
+ * ```
990
+ *
991
+ * If the match fails:
992
+ *
993
+ * - Returns `nil`.
994
+ * - Clears all [match values][9].
995
+ *
996
+ * ```
997
+ * scanner.exist?(/nope/) # => nil
998
+ * match_values_cleared?(scanner) # => true
999
+ * ```
789
1000
  *
790
- * s = StringScanner.new('test string')
791
- * s.exist? /s/ # -> 3
792
- * s.scan /test/ # -> "test"
793
- * s.exist? /s/ # -> 2
794
- * s.exist? /e/ # -> nil
795
1001
  */
796
1002
  static VALUE
797
1003
  strscan_exist_p(VALUE self, VALUE re)
@@ -800,20 +1006,9 @@ strscan_exist_p(VALUE self, VALUE re)
800
1006
  }
801
1007
 
802
1008
  /*
803
- * call-seq: skip_until(pattern)
804
- *
805
- * Advances the scan pointer until +pattern+ is matched and consumed. Returns
806
- * the number of bytes advanced, or +nil+ if no match was found.
807
- *
808
- * Look ahead to match +pattern+, and advance the scan pointer to the _end_
809
- * of the match. Return the number of characters advanced, or +nil+ if the
810
- * match was unsuccessful.
811
- *
812
- * It's similar to #scan_until, but without returning the intervening string.
813
- *
814
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
815
- * s.skip_until /12/ # -> 10
816
- * s #
1009
+ * :markup: markdown
1010
+ * :include: strscan/link_refs.txt
1011
+ * :include: strscan/methods/skip_until.md
817
1012
  */
818
1013
  static VALUE
819
1014
  strscan_skip_until(VALUE self, VALUE re)
@@ -822,17 +1017,61 @@ strscan_skip_until(VALUE self, VALUE re)
822
1017
  }
823
1018
 
824
1019
  /*
825
- * call-seq: check_until(pattern)
1020
+ * :markup: markdown
1021
+ * :include: strscan/link_refs.txt
826
1022
  *
827
- * This returns the value that #scan_until would return, without advancing the
828
- * scan pointer. The match register is affected, though.
829
- *
830
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
831
- * s.check_until /12/ # -> "Fri Dec 12"
832
- * s.pos # -> 0
833
- * s.matched # -> 12
1023
+ * call-seq:
1024
+ * check_until(pattern) -> substring or nil
1025
+ *
1026
+ * Attempts to [match][17] the given `pattern`
1027
+ * anywhere (at any [position][2])
1028
+ * in the [target substring][3];
1029
+ * does not modify the [positions][11].
1030
+ *
1031
+ * If the match succeeds:
1032
+ *
1033
+ * - Sets all [match values][9].
1034
+ * - Returns the matched substring,
1035
+ * which extends from the current [position][2]
1036
+ * to the end of the matched substring.
1037
+ *
1038
+ * ```
1039
+ * scanner = StringScanner.new('foobarbazbatbam')
1040
+ * scanner.pos = 6
1041
+ * scanner.check_until(/bat/) # => "bazbat"
1042
+ * put_match_values(scanner)
1043
+ * # Basic match values:
1044
+ * # matched?: true
1045
+ * # matched_size: 3
1046
+ * # pre_match: "foobarbaz"
1047
+ * # matched : "bat"
1048
+ * # post_match: "bam"
1049
+ * # Captured match values:
1050
+ * # size: 1
1051
+ * # captures: []
1052
+ * # named_captures: {}
1053
+ * # values_at: ["bat", nil]
1054
+ * # []:
1055
+ * # [0]: "bat"
1056
+ * # [1]: nil
1057
+ * put_situation(scanner)
1058
+ * # Situation:
1059
+ * # pos: 6
1060
+ * # charpos: 6
1061
+ * # rest: "bazbatbam"
1062
+ * # rest_size: 9
1063
+ * ```
1064
+ *
1065
+ * If the match fails:
1066
+ *
1067
+ * - Clears all [match values][9].
1068
+ * - Returns `nil`.
1069
+ *
1070
+ * ```
1071
+ * scanner.check_until(/nope/) # => nil
1072
+ * match_values_cleared?(scanner) # => true
1073
+ * ```
834
1074
  *
835
- * Mnemonic: it "checks" to see whether a #scan_until will return a value.
836
1075
  */
837
1076
  static VALUE
838
1077
  strscan_check_until(VALUE self, VALUE re)
@@ -841,14 +1080,24 @@ strscan_check_until(VALUE self, VALUE re)
841
1080
  }
842
1081
 
843
1082
  /*
844
- * call-seq: search_full(pattern, advance_pointer_p, return_string_p)
1083
+ * call-seq:
1084
+ * search_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or position_delta or nil
1085
+ *
1086
+ * Equivalent to one of the following:
1087
+ *
1088
+ * - +advance_pointer_p+ +true+:
1089
+ *
1090
+ * - +return_string_p+ +true+: StringScanner#scan_until(pattern).
1091
+ * - +return_string_p+ +false+: StringScanner#skip_until(pattern).
1092
+ *
1093
+ * - +advance_pointer_p+ +false+:
1094
+ *
1095
+ * - +return_string_p+ +true+: StringScanner#check_until(pattern).
1096
+ * - +return_string_p+ +false+: StringScanner#exist?(pattern).
845
1097
  *
846
- * Scans the string _until_ the +pattern+ is matched.
847
- * Advances the scan pointer if +advance_pointer_p+, otherwise not.
848
- * Returns the matched string if +return_string_p+ is true, otherwise
849
- * returns the number of bytes advanced.
850
- * This method does affect the match register.
851
1098
  */
1099
+
1100
+ /* :nodoc: */
852
1101
  static VALUE
853
1102
  strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f)
854
1103
  {
@@ -868,17 +1117,9 @@ adjust_registers_to_matched(struct strscanner *p)
868
1117
  }
869
1118
 
870
1119
  /*
871
- * Scans one character and returns it.
872
- * This method is multibyte character sensitive.
873
- *
874
- * s = StringScanner.new("ab")
875
- * s.getch # => "a"
876
- * s.getch # => "b"
877
- * s.getch # => nil
878
- *
879
- * s = StringScanner.new("\244\242".force_encoding("euc-jp"))
880
- * s.getch # => "\x{A4A2}" # Japanese hira-kana "A" in EUC-JP
881
- * s.getch # => nil
1120
+ * :markup: markdown
1121
+ * :include: strscan/link_refs.txt
1122
+ * :include: strscan/methods/getch.md
882
1123
  */
883
1124
  static VALUE
884
1125
  strscan_getch(VALUE self)
@@ -903,19 +1144,55 @@ strscan_getch(VALUE self)
903
1144
  }
904
1145
 
905
1146
  /*
906
- * Scans one byte and returns it.
1147
+ * call-seq:
1148
+ * scan_byte -> integer_byte
1149
+ *
1150
+ * Scans one byte and returns it as an integer.
907
1151
  * This method is not multibyte character sensitive.
908
1152
  * See also: #getch.
909
1153
  *
1154
+ */
1155
+ static VALUE
1156
+ strscan_scan_byte(VALUE self)
1157
+ {
1158
+ struct strscanner *p;
1159
+ VALUE byte;
1160
+
1161
+ GET_SCANNER(self, p);
1162
+ CLEAR_MATCH_STATUS(p);
1163
+ if (EOS_P(p))
1164
+ return Qnil;
1165
+
1166
+ byte = INT2FIX((unsigned char)*CURPTR(p));
1167
+ p->prev = p->curr;
1168
+ p->curr++;
1169
+ MATCHED(p);
1170
+ adjust_registers_to_matched(p);
1171
+ return byte;
1172
+ }
1173
+
1174
+ /*
1175
+ * Peeks at the current byte and returns it as an integer.
1176
+ *
910
1177
  * s = StringScanner.new('ab')
911
- * s.get_byte # => "a"
912
- * s.get_byte # => "b"
913
- * s.get_byte # => nil
914
- *
915
- * s = StringScanner.new("\244\242".force_encoding("euc-jp"))
916
- * s.get_byte # => "\xA4"
917
- * s.get_byte # => "\xA2"
918
- * s.get_byte # => nil
1178
+ * s.peek_byte # => 97
1179
+ */
1180
+ static VALUE
1181
+ strscan_peek_byte(VALUE self)
1182
+ {
1183
+ struct strscanner *p;
1184
+
1185
+ GET_SCANNER(self, p);
1186
+ if (EOS_P(p))
1187
+ return Qnil;
1188
+
1189
+ return INT2FIX((unsigned char)*CURPTR(p));
1190
+ }
1191
+
1192
+ /*
1193
+ * :markup: markdown
1194
+ * :include: strscan/link_refs.txt
1195
+ * :include: strscan/methods/get_byte.md
919
1196
  */
920
1197
  static VALUE
921
1198
  strscan_get_byte(VALUE self)
@@ -937,9 +1214,14 @@ strscan_get_byte(VALUE self)
937
1214
  }
938
1215
 
939
1216
  /*
1217
+ * call-seq:
1218
+ * getbyte
1219
+ *
940
1220
  * Equivalent to #get_byte.
941
1221
  * This method is obsolete; use #get_byte instead.
942
1222
  */
1223
+
1224
+ /* :nodoc: */
943
1225
  static VALUE
944
1226
  strscan_getbyte(VALUE self)
945
1227
  {
@@ -948,14 +1230,22 @@ strscan_getbyte(VALUE self)
948
1230
  }
949
1231
 
950
1232
  /*
951
- * call-seq: peek(len)
1233
+ * :markup: markdown
1234
+ * :include: strscan/link_refs.txt
1235
+ *
1236
+ * call-seq:
1237
+ * peek(length) -> substring
952
1238
  *
953
- * Extracts a string corresponding to <tt>string[pos,len]</tt>, without
954
- * advancing the scan pointer.
1239
+ * Returns the substring `string[pos, length]`;
1240
+ * does not update [match values][9] or [positions][11]:
955
1241
  *
956
- * s = StringScanner.new('test string')
957
- * s.peek(7) # => "test st"
958
- * s.peek(7) # => "test st"
1242
+ * ```
1243
+ * scanner = StringScanner.new('foobarbaz')
1244
+ * scanner.pos = 3
1245
+ * scanner.peek(3) # => "bar"
1246
+ * scanner.terminate
1247
+ * scanner.peek(3) # => ""
1248
+ * ```
959
1249
  *
960
1250
  */
961
1251
  static VALUE
@@ -975,9 +1265,14 @@ strscan_peek(VALUE self, VALUE vlen)
975
1265
  }
976
1266
 
977
1267
  /*
1268
+ * call-seq:
1269
+ * peep
1270
+ *
978
1271
  * Equivalent to #peek.
979
1272
  * This method is obsolete; use #peek instead.
980
1273
  */
1274
+
1275
+ /* :nodoc: */
981
1276
  static VALUE
982
1277
  strscan_peep(VALUE self, VALUE vlen)
983
1278
  {
@@ -985,16 +1280,156 @@ strscan_peep(VALUE self, VALUE vlen)
985
1280
  return strscan_peek(self, vlen);
986
1281
  }
987
1282
 
1283
+ static VALUE
1284
+ strscan_parse_integer(struct strscanner *p, int base, long len)
1285
+ {
1286
+ VALUE buffer_v, integer;
1287
+
1288
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
1289
+
1290
+ MEMCPY(buffer, CURPTR(p), char, len);
1291
+ buffer[len] = '\0';
1292
+ integer = rb_cstr2inum(buffer, base);
1293
+ RB_ALLOCV_END(buffer_v);
1294
+ p->curr += len;
1295
+ return integer;
1296
+ }
1297
+
1298
+ static inline bool
1299
+ strscan_ascii_compat_fastpath(VALUE str) {
1300
+ int encindex = ENCODING_GET_INLINED(str);
1301
+ // The overwhelming majority of strings are in one of these 3 encodings.
1302
+ return encindex == utf8_encindex || encindex == binary_encindex || encindex == usascii_encindex;
1303
+ }
1304
+
1305
+ static inline void
1306
+ strscan_must_ascii_compat(VALUE str)
1307
+ {
1308
+ // The overwhelming majority of strings are in one of these 3 encodings.
1309
+ if (RB_LIKELY(strscan_ascii_compat_fastpath(str))) {
1310
+ return;
1311
+ }
1312
+
1313
+ rb_must_asciicompat(str);
1314
+ }
1315
+
1316
+ static VALUE
1317
+ strscan_scan_base10_integer(VALUE self)
1318
+ {
1319
+ char *ptr;
1320
+ long len = 0;
1321
+ struct strscanner *p;
1322
+
1323
+ GET_SCANNER(self, p);
1324
+ CLEAR_MATCH_STATUS(p);
1325
+
1326
+ strscan_must_ascii_compat(p->str);
1327
+
1328
+ ptr = CURPTR(p);
1329
+
1330
+ long remaining_len = S_RESTLEN(p);
1331
+
1332
+ if (remaining_len <= 0) {
1333
+ return Qnil;
1334
+ }
1335
+
1336
+ if (ptr[len] == '-' || ptr[len] == '+') {
1337
+ len++;
1338
+ }
1339
+
1340
+ if (!rb_isdigit(ptr[len])) {
1341
+ return Qnil;
1342
+ }
1343
+
1344
+ MATCHED(p);
1345
+ p->prev = p->curr;
1346
+
1347
+ while (len < remaining_len && rb_isdigit(ptr[len])) {
1348
+ len++;
1349
+ }
1350
+
1351
+ return strscan_parse_integer(p, 10, len);
1352
+ }
1353
+
1354
+ static VALUE
1355
+ strscan_scan_base16_integer(VALUE self)
1356
+ {
1357
+ char *ptr;
1358
+ long len = 0;
1359
+ struct strscanner *p;
1360
+
1361
+ GET_SCANNER(self, p);
1362
+ CLEAR_MATCH_STATUS(p);
1363
+
1364
+ strscan_must_ascii_compat(p->str);
1365
+
1366
+ ptr = CURPTR(p);
1367
+
1368
+ long remaining_len = S_RESTLEN(p);
1369
+
1370
+ if (remaining_len <= 0) {
1371
+ return Qnil;
1372
+ }
1373
+
1374
+ if (ptr[len] == '-' || ptr[len] == '+') {
1375
+ len++;
1376
+ }
1377
+
1378
+ if ((remaining_len >= (len + 2)) && ptr[len] == '0' && ptr[len + 1] == 'x') {
1379
+ len += 2;
1380
+ }
1381
+
1382
+ if (len >= remaining_len || !rb_isxdigit(ptr[len])) {
1383
+ return Qnil;
1384
+ }
1385
+
1386
+ MATCHED(p);
1387
+ p->prev = p->curr;
1388
+
1389
+ while (len < remaining_len && rb_isxdigit(ptr[len])) {
1390
+ len++;
1391
+ }
1392
+
1393
+ return strscan_parse_integer(p, 16, len);
1394
+ }
1395
+
988
1396
  /*
989
- * Sets the scan pointer to the previous position. Only one previous position is
990
- * remembered, and it changes with each scanning operation.
1397
+ * :markup: markdown
1398
+ * :include: strscan/link_refs.txt
1399
+ *
1400
+ * call-seq:
1401
+ * unscan -> self
1402
+ *
1403
+ * Sets the [position][2] to its value previous to the recent successful
1404
+ * [match][17] attempt:
1405
+ *
1406
+ * ```
1407
+ * scanner = StringScanner.new('foobarbaz')
1408
+ * scanner.scan(/foo/)
1409
+ * put_situation(scanner)
1410
+ * # Situation:
1411
+ * # pos: 3
1412
+ * # charpos: 3
1413
+ * # rest: "barbaz"
1414
+ * # rest_size: 6
1415
+ * scanner.unscan
1416
+ * # => #<StringScanner 0/9 @ "fooba...">
1417
+ * put_situation(scanner)
1418
+ * # Situation:
1419
+ * # pos: 0
1420
+ * # charpos: 0
1421
+ * # rest: "foobarbaz"
1422
+ * # rest_size: 9
1423
+ * ```
1424
+ *
1425
+ * Raises an exception if match values are clear:
1426
+ *
1427
+ * ```
1428
+ * scanner.scan(/nope/) # => nil
1429
+ * match_values_cleared?(scanner) # => true
1430
+ * scanner.unscan # Raises StringScanner::Error.
1431
+ * ```
991
1432
  *
992
- * s = StringScanner.new('test string')
993
- * s.scan(/\w+/) # => "test"
994
- * s.unscan
995
- * s.scan(/../) # => "te"
996
- * s.scan(/\d/) # => nil
997
- * s.unscan # ScanError: unscan failed: previous match record not exist
998
1433
  */
999
1434
  static VALUE
1000
1435
  strscan_unscan(VALUE self)
@@ -1010,16 +1445,37 @@ strscan_unscan(VALUE self)
1010
1445
  }
1011
1446
 
1012
1447
  /*
1013
- * Returns +true+ if and only if the scan pointer is at the beginning of the line.
1014
- *
1015
- * s = StringScanner.new("test\ntest\n")
1016
- * s.bol? # => true
1017
- * s.scan(/te/)
1018
- * s.bol? # => false
1019
- * s.scan(/st\n/)
1020
- * s.bol? # => true
1021
- * s.terminate
1022
- * s.bol? # => true
1448
+ *
1449
+ * :markup: markdown
1450
+ * :include: strscan/link_refs.txt
1451
+ *
1452
+ * call-seq:
1453
+ * beginning_of_line? -> true or false
1454
+ *
1455
+ * Returns whether the [position][2] is at the beginning of a line;
1456
+ * that is, at the beginning of the [stored string][1]
1457
+ * or immediately after a newline:
1458
+ *
1459
+ * scanner = StringScanner.new(MULTILINE_TEXT)
1460
+ * scanner.string
1461
+ * # => "Go placidly amid the noise and haste,\nand remember what peace there may be in silence.\n"
1462
+ * scanner.pos # => 0
1463
+ * scanner.beginning_of_line? # => true
1464
+ *
1465
+ * scanner.scan_until(/,/) # => "Go placidly amid the noise and haste,"
1466
+ * scanner.beginning_of_line? # => false
1467
+ *
1468
+ * scanner.scan(/\n/) # => "\n"
1469
+ * scanner.beginning_of_line? # => true
1470
+ *
1471
+ * scanner.terminate
1472
+ * scanner.beginning_of_line? # => true
1473
+ *
1474
+ * scanner.concat('x')
1475
+ * scanner.terminate
1476
+ * scanner.beginning_of_line? # => false
1477
+ *
1478
+ * StringScanner#bol? is an alias for StringScanner#beginning_of_line?.
1023
1479
  */
1024
1480
  static VALUE
1025
1481
  strscan_bol_p(VALUE self)
@@ -1033,14 +1489,24 @@ strscan_bol_p(VALUE self)
1033
1489
  }
1034
1490
 
1035
1491
  /*
1036
- * Returns +true+ if the scan pointer is at the end of the string.
1492
+ * :markup: markdown
1493
+ * :include: strscan/link_refs.txt
1494
+ *
1495
+ * call-seq:
1496
+ * eos? -> true or false
1497
+ *
1498
+ * Returns whether the [position][2]
1499
+ * is at the end of the [stored string][1]:
1500
+ *
1501
+ * ```
1502
+ * scanner = StringScanner.new('foobarbaz')
1503
+ * scanner.eos? # => false
1504
+ * pos = 3
1505
+ * scanner.eos? # => false
1506
+ * scanner.terminate
1507
+ * scanner.eos? # => true
1508
+ * ```
1037
1509
  *
1038
- * s = StringScanner.new('test string')
1039
- * p s.eos? # => false
1040
- * s.scan(/test/)
1041
- * p s.eos? # => false
1042
- * s.terminate
1043
- * p s.eos? # => true
1044
1510
  */
1045
1511
  static VALUE
1046
1512
  strscan_eos_p(VALUE self)
@@ -1052,9 +1518,14 @@ strscan_eos_p(VALUE self)
1052
1518
  }
1053
1519
 
1054
1520
  /*
1521
+ * call-seq:
1522
+ * empty?
1523
+ *
1055
1524
  * Equivalent to #eos?.
1056
1525
  * This method is obsolete, use #eos? instead.
1057
1526
  */
1527
+
1528
+ /* :nodoc: */
1058
1529
  static VALUE
1059
1530
  strscan_empty_p(VALUE self)
1060
1531
  {
@@ -1063,6 +1534,9 @@ strscan_empty_p(VALUE self)
1063
1534
  }
1064
1535
 
1065
1536
  /*
1537
+ * call-seq:
1538
+ * rest?
1539
+ *
1066
1540
  * Returns true if and only if there is more data in the string. See #eos?.
1067
1541
  * This method is obsolete; use #eos? instead.
1068
1542
  *
@@ -1071,6 +1545,8 @@ strscan_empty_p(VALUE self)
1071
1545
  * s.eos? # => false
1072
1546
  * s.rest? # => true
1073
1547
  */
1548
+
1549
+ /* :nodoc: */
1074
1550
  static VALUE
1075
1551
  strscan_rest_p(VALUE self)
1076
1552
  {
@@ -1081,13 +1557,26 @@ strscan_rest_p(VALUE self)
1081
1557
  }
1082
1558
 
1083
1559
  /*
1084
- * Returns +true+ if and only if the last match was successful.
1560
+ * :markup: markdown
1561
+ * :include: strscan/link_refs.txt
1562
+ *
1563
+ * call-seq:
1564
+ * matched? -> true or false
1565
+ *
1566
+ * Returns `true` of the most recent [match attempt][17] was successful,
1567
+ * `false` otherwise;
1568
+ * see [Basic Matched Values][18]:
1569
+ *
1570
+ * ```
1571
+ * scanner = StringScanner.new('foobarbaz')
1572
+ * scanner.matched? # => false
1573
+ * scanner.pos = 3
1574
+ * scanner.exist?(/baz/) # => 6
1575
+ * scanner.matched? # => true
1576
+ * scanner.exist?(/nope/) # => nil
1577
+ * scanner.matched? # => false
1578
+ * ```
1085
1579
  *
1086
- * s = StringScanner.new('test string')
1087
- * s.match?(/\w+/) # => 4
1088
- * s.matched? # => true
1089
- * s.match?(/\d+/) # => nil
1090
- * s.matched? # => false
1091
1580
  */
1092
1581
  static VALUE
1093
1582
  strscan_matched_p(VALUE self)
@@ -1099,11 +1588,27 @@ strscan_matched_p(VALUE self)
1099
1588
  }
1100
1589
 
1101
1590
  /*
1102
- * Returns the last matched string.
1591
+ * :markup: markdown
1592
+ * :include: strscan/link_refs.txt
1593
+ *
1594
+ * call-seq:
1595
+ * matched -> matched_substring or nil
1596
+ *
1597
+ * Returns the matched substring from the most recent [match][17] attempt
1598
+ * if it was successful,
1599
+ * or `nil` otherwise;
1600
+ * see [Basic Matched Values][18]:
1601
+ *
1602
+ * ```
1603
+ * scanner = StringScanner.new('foobarbaz')
1604
+ * scanner.matched # => nil
1605
+ * scanner.pos = 3
1606
+ * scanner.match?(/bar/) # => 3
1607
+ * scanner.matched # => "bar"
1608
+ * scanner.match?(/nope/) # => nil
1609
+ * scanner.matched # => nil
1610
+ * ```
1103
1611
  *
1104
- * s = StringScanner.new('test string')
1105
- * s.match?(/\w+/) # -> 4
1106
- * s.matched # -> "test"
1107
1612
  */
1108
1613
  static VALUE
1109
1614
  strscan_matched(VALUE self)
@@ -1118,15 +1623,29 @@ strscan_matched(VALUE self)
1118
1623
  }
1119
1624
 
1120
1625
  /*
1121
- * Returns the size of the most recent match in bytes, or +nil+ if there
1122
- * was no recent match. This is different than <tt>matched.size</tt>,
1123
- * which will return the size in characters.
1626
+ * :markup: markdown
1627
+ * :include: strscan/link_refs.txt
1628
+ *
1629
+ * call-seq:
1630
+ * matched_size -> substring_size or nil
1631
+ *
1632
+ * Returns the size (in bytes) of the matched substring
1633
+ * from the most recent match [match attempt][17] if it was successful,
1634
+ * or `nil` otherwise;
1635
+ * see [Basic Matched Values][18]:
1636
+ *
1637
+ * ```
1638
+ * scanner = StringScanner.new('foobarbaz')
1639
+ * scanner.matched_size # => nil
1640
+ *
1641
+ * pos = 3
1642
+ * scanner.exist?(/baz/) # => 9
1643
+ * scanner.matched_size # => 3
1644
+ *
1645
+ * scanner.exist?(/nope/) # => nil
1646
+ * scanner.matched_size # => nil
1647
+ * ```
1124
1648
  *
1125
- * s = StringScanner.new('test string')
1126
- * s.check /\w+/ # -> "test"
1127
- * s.matched_size # -> 4
1128
- * s.check /\d+/ # -> nil
1129
- * s.matched_size # -> nil
1130
1649
  */
1131
1650
  static VALUE
1132
1651
  strscan_matched_size(VALUE self)
@@ -1157,30 +1676,75 @@ name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name
1157
1676
  }
1158
1677
 
1159
1678
  /*
1160
- * call-seq: [](n)
1161
- *
1162
- * Returns the n-th subgroup in the most recent match.
1163
- *
1164
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1165
- * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1166
- * s[0] # -> "Fri Dec 12 "
1167
- * s[1] # -> "Fri"
1168
- * s[2] # -> "Dec"
1169
- * s[3] # -> "12"
1170
- * s.post_match # -> "1975 14:39"
1171
- * s.pre_match # -> ""
1172
- *
1173
- * s.reset
1174
- * s.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /) # -> "Fri Dec 12 "
1175
- * s[0] # -> "Fri Dec 12 "
1176
- * s[1] # -> "Fri"
1177
- * s[2] # -> "Dec"
1178
- * s[3] # -> "12"
1179
- * s[:wday] # -> "Fri"
1180
- * s[:month] # -> "Dec"
1181
- * s[:day] # -> "12"
1182
- * s.post_match # -> "1975 14:39"
1183
- * s.pre_match # -> ""
1679
+ *
1680
+ * :markup: markdown
1681
+ * :include: strscan/link_refs.txt
1682
+ *
1683
+ * call-seq:
1684
+ * [](specifier) -> substring or nil
1685
+ *
1686
+ * Returns a captured substring or `nil`;
1687
+ * see [Captured Match Values][13].
1688
+ *
1689
+ * When there are captures:
1690
+ *
1691
+ * ```
1692
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
1693
+ * scanner.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /)
1694
+ * ```
1695
+ *
1696
+ * - `specifier` zero: returns the entire matched substring:
1697
+ *
1698
+ * ```
1699
+ * scanner[0] # => "Fri Dec 12 "
1700
+ * scanner.pre_match # => ""
1701
+ * scanner.post_match # => "1975 14:39"
1702
+ * ```
1703
+ *
1704
+ * - `specifier` positive integer. returns the `n`th capture, or `nil` if out of range:
1705
+ *
1706
+ * ```
1707
+ * scanner[1] # => "Fri"
1708
+ * scanner[2] # => "Dec"
1709
+ * scanner[3] # => "12"
1710
+ * scanner[4] # => nil
1711
+ * ```
1712
+ *
1713
+ * - `specifier` negative integer. counts backward from the last subgroup:
1714
+ *
1715
+ * ```
1716
+ * scanner[-1] # => "12"
1717
+ * scanner[-4] # => "Fri Dec 12 "
1718
+ * scanner[-5] # => nil
1719
+ * ```
1720
+ *
1721
+ * - `specifier` symbol or string. returns the named subgroup, or `nil` if no such:
1722
+ *
1723
+ * ```
1724
+ * scanner[:wday] # => "Fri"
1725
+ * scanner['wday'] # => "Fri"
1726
+ * scanner[:month] # => "Dec"
1727
+ * scanner[:day] # => "12"
1728
+ * scanner[:nope] # => nil
1729
+ * ```
1730
+ *
1731
+ * When there are no captures, only `[0]` returns non-`nil`:
1732
+ *
1733
+ * ```
1734
+ * scanner = StringScanner.new('foobarbaz')
1735
+ * scanner.exist?(/bar/)
1736
+ * scanner[0] # => "bar"
1737
+ * scanner[1] # => nil
1738
+ * ```
1739
+ *
1740
+ * For a failed match, even `[0]` returns `nil`:
1741
+ *
1742
+ * ```
1743
+ * scanner.scan(/nope/) # => nil
1744
+ * scanner[0] # => nil
1745
+ * scanner[1] # => nil
1746
+ * ```
1747
+ *
1184
1748
  */
1185
1749
  static VALUE
1186
1750
  strscan_aref(VALUE self, VALUE idx)
@@ -1217,14 +1781,28 @@ strscan_aref(VALUE self, VALUE idx)
1217
1781
  }
1218
1782
 
1219
1783
  /*
1220
- * call-seq: size
1784
+ * :markup: markdown
1785
+ * :include: strscan/link_refs.txt
1786
+ *
1787
+ * call-seq:
1788
+ * size -> captures_count
1789
+ *
1790
+ * Returns the count of captures if the most recent match attempt succeeded, `nil` otherwise;
1791
+ * see [Captures Match Values][13]:
1792
+ *
1793
+ * ```
1794
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
1795
+ * scanner.size # => nil
1221
1796
  *
1222
- * Returns the amount of subgroups in the most recent match.
1223
- * The full match counts as a subgroup.
1797
+ * pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
1798
+ * scanner.match?(pattern)
1799
+ * scanner.values_at(*0..scanner.size) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil]
1800
+ * scanner.size # => 4
1801
+ *
1802
+ * scanner.match?(/nope/) # => nil
1803
+ * scanner.size # => nil
1804
+ * ```
1224
1805
  *
1225
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1226
- * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1227
- * s.size # -> 4
1228
1806
  */
1229
1807
  static VALUE
1230
1808
  strscan_size(VALUE self)
@@ -1237,16 +1815,30 @@ strscan_size(VALUE self)
1237
1815
  }
1238
1816
 
1239
1817
  /*
1240
- * call-seq: captures
1818
+ * :markup: markdown
1819
+ * :include: strscan/link_refs.txt
1820
+ *
1821
+ * call-seq:
1822
+ * captures -> substring_array or nil
1241
1823
  *
1242
- * Returns the subgroups in the most recent match (not including the full match).
1243
- * If nothing was priorly matched, it returns nil.
1824
+ * Returns the array of [captured match values][13] at indexes `(1..)`
1825
+ * if the most recent match attempt succeeded, or `nil` otherwise:
1826
+ *
1827
+ * ```
1828
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
1829
+ * scanner.captures # => nil
1830
+ *
1831
+ * scanner.exist?(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /)
1832
+ * scanner.captures # => ["Fri", "Dec", "12"]
1833
+ * scanner.values_at(*0..4) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil]
1834
+ *
1835
+ * scanner.exist?(/Fri/)
1836
+ * scanner.captures # => []
1837
+ *
1838
+ * scanner.scan(/nope/)
1839
+ * scanner.captures # => nil
1840
+ * ```
1244
1841
  *
1245
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1246
- * s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> "Fri Dec 12 "
1247
- * s.captures # -> ["Fri", "Dec", "12", nil]
1248
- * s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> nil
1249
- * s.captures # -> nil
1250
1842
  */
1251
1843
  static VALUE
1252
1844
  strscan_captures(VALUE self)
@@ -1276,17 +1868,25 @@ strscan_captures(VALUE self)
1276
1868
  }
1277
1869
 
1278
1870
  /*
1279
- * call-seq:
1280
- * scanner.values_at( i1, i2, ... iN ) -> an_array
1871
+ * :markup: markdown
1872
+ * :include: strscan/link_refs.txt
1281
1873
  *
1282
- * Returns the subgroups in the most recent match at the given indices.
1283
- * If nothing was priorly matched, it returns nil.
1874
+ * call-seq:
1875
+ * values_at(*specifiers) -> array_of_captures or nil
1876
+ *
1877
+ * Returns an array of captured substrings, or `nil` of none.
1878
+ *
1879
+ * For each `specifier`, the returned substring is `[specifier]`;
1880
+ * see #[].
1881
+ *
1882
+ * ```
1883
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
1884
+ * pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
1885
+ * scanner.match?(pattern)
1886
+ * scanner.values_at(*0..3) # => ["Fri Dec 12 ", "Fri", "Dec", "12"]
1887
+ * scanner.values_at(*%i[wday month day]) # => ["Fri", "Dec", "12"]
1888
+ * ```
1284
1889
  *
1285
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1286
- * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1287
- * s.values_at 0, -1, 5, 2 # -> ["Fri Dec 12 ", "12", nil, "Dec"]
1288
- * s.scan(/(\w+) (\w+) (\d+) /) # -> nil
1289
- * s.values_at 0, -1, 5, 2 # -> nil
1290
1890
  */
1291
1891
 
1292
1892
  static VALUE
@@ -1308,13 +1908,29 @@ strscan_values_at(int argc, VALUE *argv, VALUE self)
1308
1908
  }
1309
1909
 
1310
1910
  /*
1311
- * Returns the <i><b>pre</b>-match</i> (in the regular expression sense) of the last scan.
1911
+ * :markup: markdown
1912
+ * :include: strscan/link_refs.txt
1913
+ *
1914
+ * call-seq:
1915
+ * pre_match -> substring
1916
+ *
1917
+ * Returns the substring that precedes the matched substring
1918
+ * from the most recent match attempt if it was successful,
1919
+ * or `nil` otherwise;
1920
+ * see [Basic Match Values][18]:
1921
+ *
1922
+ * ```
1923
+ * scanner = StringScanner.new('foobarbaz')
1924
+ * scanner.pre_match # => nil
1925
+ *
1926
+ * scanner.pos = 3
1927
+ * scanner.exist?(/baz/) # => 6
1928
+ * scanner.pre_match # => "foobar" # Substring of entire string, not just target string.
1929
+ *
1930
+ * scanner.exist?(/nope/) # => nil
1931
+ * scanner.pre_match # => nil
1932
+ * ```
1312
1933
  *
1313
- * s = StringScanner.new('test string')
1314
- * s.scan(/\w+/) # -> "test"
1315
- * s.scan(/\s+/) # -> " "
1316
- * s.pre_match # -> "test"
1317
- * s.post_match # -> "string"
1318
1934
  */
1319
1935
  static VALUE
1320
1936
  strscan_pre_match(VALUE self)
@@ -1329,13 +1945,29 @@ strscan_pre_match(VALUE self)
1329
1945
  }
1330
1946
 
1331
1947
  /*
1332
- * Returns the <i><b>post</b>-match</i> (in the regular expression sense) of the last scan.
1948
+ * :markup: markdown
1949
+ * :include: strscan/link_refs.txt
1950
+ *
1951
+ * call-seq:
1952
+ * post_match -> substring
1953
+ *
1954
+ * Returns the substring that follows the matched substring
1955
+ * from the most recent match attempt if it was successful,
1956
+ * or `nil` otherwise;
1957
+ * see [Basic Match Values][18]:
1958
+ *
1959
+ * ```
1960
+ * scanner = StringScanner.new('foobarbaz')
1961
+ * scanner.post_match # => nil
1962
+ *
1963
+ * scanner.pos = 3
1964
+ * scanner.match?(/bar/) # => 3
1965
+ * scanner.post_match # => "baz"
1966
+ *
1967
+ * scanner.match?(/nope/) # => nil
1968
+ * scanner.post_match # => nil
1969
+ * ```
1333
1970
  *
1334
- * s = StringScanner.new('test string')
1335
- * s.scan(/\w+/) # -> "test"
1336
- * s.scan(/\s+/) # -> " "
1337
- * s.pre_match # -> "test"
1338
- * s.post_match # -> "string"
1339
1971
  */
1340
1972
  static VALUE
1341
1973
  strscan_post_match(VALUE self)
@@ -1350,8 +1982,24 @@ strscan_post_match(VALUE self)
1350
1982
  }
1351
1983
 
1352
1984
  /*
1353
- * Returns the "rest" of the string (i.e. everything after the scan pointer).
1354
- * If there is no more data (eos? = true), it returns <tt>""</tt>.
1985
+ * :markup: markdown
1986
+ * :include: strscan/link_refs.txt
1987
+ *
1988
+ * call-seq:
1989
+ * rest -> target_substring
1990
+ *
1991
+ * Returns the 'rest' of the [stored string][1] (all after the current [position][2]),
1992
+ * which is the [target substring][3]:
1993
+ *
1994
+ * ```
1995
+ * scanner = StringScanner.new('foobarbaz')
1996
+ * scanner.rest # => "foobarbaz"
1997
+ * scanner.pos = 3
1998
+ * scanner.rest # => "barbaz"
1999
+ * scanner.terminate
2000
+ * scanner.rest # => ""
2001
+ * ```
2002
+ *
1355
2003
  */
1356
2004
  static VALUE
1357
2005
  strscan_rest(VALUE self)
@@ -1366,7 +2014,26 @@ strscan_rest(VALUE self)
1366
2014
  }
1367
2015
 
1368
2016
  /*
1369
- * <tt>s.rest_size</tt> is equivalent to <tt>s.rest.size</tt>.
2017
+ * :markup: markdown
2018
+ * :include: strscan/link_refs.txt
2019
+ *
2020
+ * call-seq:
2021
+ * rest_size -> integer
2022
+ *
2023
+ * Returns the size (in bytes) of the #rest of the [stored string][1]:
2024
+ *
2025
+ * ```
2026
+ * scanner = StringScanner.new('foobarbaz')
2027
+ * scanner.rest # => "foobarbaz"
2028
+ * scanner.rest_size # => 9
2029
+ * scanner.pos = 3
2030
+ * scanner.rest # => "barbaz"
2031
+ * scanner.rest_size # => 6
2032
+ * scanner.terminate
2033
+ * scanner.rest # => ""
2034
+ * scanner.rest_size # => 0
2035
+ * ```
2036
+ *
1370
2037
  */
1371
2038
  static VALUE
1372
2039
  strscan_rest_size(VALUE self)
@@ -1383,9 +2050,14 @@ strscan_rest_size(VALUE self)
1383
2050
  }
1384
2051
 
1385
2052
  /*
2053
+ * call-seq:
2054
+ * restsize
2055
+ *
1386
2056
  * <tt>s.restsize</tt> is equivalent to <tt>s.rest_size</tt>.
1387
2057
  * This method is obsolete; use #rest_size instead.
1388
2058
  */
2059
+
2060
+ /* :nodoc: */
1389
2061
  static VALUE
1390
2062
  strscan_restsize(VALUE self)
1391
2063
  {
@@ -1396,15 +2068,39 @@ strscan_restsize(VALUE self)
1396
2068
  #define INSPECT_LENGTH 5
1397
2069
 
1398
2070
  /*
1399
- * Returns a string that represents the StringScanner object, showing:
1400
- * - the current position
1401
- * - the size of the string
1402
- * - the characters surrounding the scan pointer
1403
- *
1404
- * s = StringScanner.new("Fri Dec 12 1975 14:39")
1405
- * s.inspect # -> '#<StringScanner 0/21 @ "Fri D...">'
1406
- * s.scan_until /12/ # -> "Fri Dec 12"
1407
- * s.inspect # -> '#<StringScanner 10/21 "...ec 12" @ " 1975...">'
2071
+ * :markup: markdown
2072
+ * :include: strscan/link_refs.txt
2073
+ *
2074
+ * call-seq:
2075
+ * inspect -> string
2076
+ *
2077
+ * Returns a string representation of `self` that may show:
2078
+ *
2079
+ * 1. The current [position][2].
2080
+ * 2. The size (in bytes) of the [stored string][1].
2081
+ * 3. The substring preceding the current position.
2082
+ * 4. The substring following the current position (which is also the [target substring][3]).
2083
+ *
2084
+ * ```
2085
+ * scanner = StringScanner.new("Fri Dec 12 1975 14:39")
2086
+ * scanner.pos = 11
2087
+ * scanner.inspect # => "#<StringScanner 11/21 \"...c 12 \" @ \"1975 ...\">"
2088
+ * ```
2089
+ *
2090
+ * If at beginning-of-string, item 4 above (following substring) is omitted:
2091
+ *
2092
+ * ```
2093
+ * scanner.reset
2094
+ * scanner.inspect # => "#<StringScanner 0/21 @ \"Fri D...\">"
2095
+ * ```
2096
+ *
2097
+ * If at end-of-string, all items above are omitted:
2098
+ *
2099
+ * ```
2100
+ * scanner.terminate
2101
+ * scanner.inspect # => "#<StringScanner fin>"
2102
+ * ```
2103
+ *
1408
2104
  */
1409
2105
  static VALUE
1410
2106
  strscan_inspect(VALUE self)
@@ -1476,13 +2172,13 @@ inspect2(struct strscanner *p)
1476
2172
  }
1477
2173
 
1478
2174
  /*
1479
- * call-seq:
1480
- * scanner.fixed_anchor? -> true or false
2175
+ * :markup: markdown
2176
+ * :include: strscan/link_refs.txt
1481
2177
  *
1482
- * Whether +scanner+ uses fixed anchor mode or not.
2178
+ * call-seq:
2179
+ * fixed_anchor? -> true or false
1483
2180
  *
1484
- * If fixed anchor mode is used, +\A+ always matches the beginning of
1485
- * the string. Otherwise, +\A+ always matches the current position.
2181
+ * Returns whether the [fixed-anchor property][10] is set.
1486
2182
  */
1487
2183
  static VALUE
1488
2184
  strscan_fixed_anchor_p(VALUE self)
@@ -1518,21 +2214,39 @@ named_captures_iter(const OnigUChar *name,
1518
2214
  }
1519
2215
 
1520
2216
  /*
2217
+ * :markup: markdown
2218
+ * :include: strscan/link_refs.txt
2219
+ *
1521
2220
  * call-seq:
1522
- * scanner.named_captures -> hash
2221
+ * named_captures -> hash
2222
+ *
2223
+ * Returns the array of captured match values at indexes (1..)
2224
+ * if the most recent match attempt succeeded, or nil otherwise;
2225
+ * see [Captured Match Values][13]:
1523
2226
  *
1524
- * Returns a hash of string variables matching the regular expression.
2227
+ * ```
2228
+ * scanner = StringScanner.new('Fri Dec 12 1975 14:39')
2229
+ * scanner.named_captures # => {}
2230
+ *
2231
+ * pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
2232
+ * scanner.match?(pattern)
2233
+ * scanner.named_captures # => {"wday"=>"Fri", "month"=>"Dec", "day"=>"12"}
2234
+ *
2235
+ * scanner.string = 'nope'
2236
+ * scanner.match?(pattern)
2237
+ * scanner.named_captures # => {"wday"=>nil, "month"=>nil, "day"=>nil}
2238
+ *
2239
+ * scanner.match?(/nosuch/)
2240
+ * scanner.named_captures # => {}
2241
+ * ```
1525
2242
  *
1526
- * scan = StringScanner.new('foobarbaz')
1527
- * scan.match?(/(?<f>foo)(?<r>bar)(?<z>baz)/)
1528
- * scan.named_captures # -> {"f"=>"foo", "r"=>"bar", "z"=>"baz"}
1529
2243
  */
1530
2244
  static VALUE
1531
2245
  strscan_named_captures(VALUE self)
1532
2246
  {
1533
2247
  struct strscanner *p;
1534
- GET_SCANNER(self, p);
1535
2248
  named_captures_data data;
2249
+ GET_SCANNER(self, p);
1536
2250
  data.self = self;
1537
2251
  data.captures = rb_hash_new();
1538
2252
  if (!RB_NIL_P(p->regex)) {
@@ -1549,107 +2263,11 @@ strscan_named_captures(VALUE self)
1549
2263
  /*
1550
2264
  * Document-class: StringScanner
1551
2265
  *
1552
- * StringScanner provides for lexical scanning operations on a String. Here is
1553
- * an example of its usage:
1554
- *
1555
- * require 'strscan'
1556
- *
1557
- * s = StringScanner.new('This is an example string')
1558
- * s.eos? # -> false
1559
- *
1560
- * p s.scan(/\w+/) # -> "This"
1561
- * p s.scan(/\w+/) # -> nil
1562
- * p s.scan(/\s+/) # -> " "
1563
- * p s.scan(/\s+/) # -> nil
1564
- * p s.scan(/\w+/) # -> "is"
1565
- * s.eos? # -> false
1566
- *
1567
- * p s.scan(/\s+/) # -> " "
1568
- * p s.scan(/\w+/) # -> "an"
1569
- * p s.scan(/\s+/) # -> " "
1570
- * p s.scan(/\w+/) # -> "example"
1571
- * p s.scan(/\s+/) # -> " "
1572
- * p s.scan(/\w+/) # -> "string"
1573
- * s.eos? # -> true
1574
- *
1575
- * p s.scan(/\s+/) # -> nil
1576
- * p s.scan(/\w+/) # -> nil
2266
+ * :markup: markdown
1577
2267
  *
1578
- * Scanning a string means remembering the position of a <i>scan pointer</i>,
1579
- * which is just an index. The point of scanning is to move forward a bit at
1580
- * a time, so matches are sought after the scan pointer; usually immediately
1581
- * after it.
2268
+ * :include: strscan/link_refs.txt
2269
+ * :include: strscan/strscan.md
1582
2270
  *
1583
- * Given the string "test string", here are the pertinent scan pointer
1584
- * positions:
1585
- *
1586
- * t e s t s t r i n g
1587
- * 0 1 2 ... 1
1588
- * 0
1589
- *
1590
- * When you #scan for a pattern (a regular expression), the match must occur
1591
- * at the character after the scan pointer. If you use #scan_until, then the
1592
- * match can occur anywhere after the scan pointer. In both cases, the scan
1593
- * pointer moves <i>just beyond</i> the last character of the match, ready to
1594
- * scan again from the next character onwards. This is demonstrated by the
1595
- * example above.
1596
- *
1597
- * == Method Categories
1598
- *
1599
- * There are other methods besides the plain scanners. You can look ahead in
1600
- * the string without actually scanning. You can access the most recent match.
1601
- * You can modify the string being scanned, reset or terminate the scanner,
1602
- * find out or change the position of the scan pointer, skip ahead, and so on.
1603
- *
1604
- * === Advancing the Scan Pointer
1605
- *
1606
- * - #getch
1607
- * - #get_byte
1608
- * - #scan
1609
- * - #scan_until
1610
- * - #skip
1611
- * - #skip_until
1612
- *
1613
- * === Looking Ahead
1614
- *
1615
- * - #check
1616
- * - #check_until
1617
- * - #exist?
1618
- * - #match?
1619
- * - #peek
1620
- *
1621
- * === Finding Where we Are
1622
- *
1623
- * - #beginning_of_line? (<tt>#bol?</tt>)
1624
- * - #eos?
1625
- * - #rest?
1626
- * - #rest_size
1627
- * - #pos
1628
- *
1629
- * === Setting Where we Are
1630
- *
1631
- * - #reset
1632
- * - #terminate
1633
- * - #pos=
1634
- *
1635
- * === Match Data
1636
- *
1637
- * - #matched
1638
- * - #matched?
1639
- * - #matched_size
1640
- * - <tt>#[]</tt>
1641
- * - #pre_match
1642
- * - #post_match
1643
- *
1644
- * === Miscellaneous
1645
- *
1646
- * - <tt><<</tt>
1647
- * - #concat
1648
- * - #string
1649
- * - #string=
1650
- * - #unscan
1651
- *
1652
- * There are aliases to several of the methods.
1653
2271
  */
1654
2272
  void
1655
2273
  Init_strscan(void)
@@ -1664,6 +2282,10 @@ Init_strscan(void)
1664
2282
 
1665
2283
  id_byteslice = rb_intern("byteslice");
1666
2284
 
2285
+ usascii_encindex = rb_usascii_encindex();
2286
+ utf8_encindex = rb_utf8_encindex();
2287
+ binary_encindex = rb_ascii8bit_encindex();
2288
+
1667
2289
  StringScanner = rb_define_class("StringScanner", rb_cObject);
1668
2290
  ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
1669
2291
  if (!rb_const_defined(rb_cObject, id_scanerr)) {
@@ -1708,9 +2330,14 @@ Init_strscan(void)
1708
2330
  rb_define_method(StringScanner, "getch", strscan_getch, 0);
1709
2331
  rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
1710
2332
  rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
2333
+ rb_define_method(StringScanner, "scan_byte", strscan_scan_byte, 0);
1711
2334
  rb_define_method(StringScanner, "peek", strscan_peek, 1);
2335
+ rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0);
1712
2336
  rb_define_method(StringScanner, "peep", strscan_peep, 1);
1713
2337
 
2338
+ rb_define_private_method(StringScanner, "scan_base10_integer", strscan_scan_base10_integer, 0);
2339
+ rb_define_private_method(StringScanner, "scan_base16_integer", strscan_scan_base16_integer, 0);
2340
+
1714
2341
  rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
1715
2342
 
1716
2343
  rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
@@ -1738,4 +2365,6 @@ Init_strscan(void)
1738
2365
  rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
1739
2366
 
1740
2367
  rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0);
2368
+
2369
+ rb_require("strscan/strscan");
1741
2370
  }