strscan 3.0.9 → 3.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rdoc_options +3 -0
- data/doc/strscan/helper_methods.md +128 -0
- data/doc/strscan/link_refs.txt +17 -0
- data/doc/strscan/methods/get_byte.md +30 -0
- data/doc/strscan/methods/get_charpos.md +19 -0
- data/doc/strscan/methods/get_pos.md +14 -0
- data/doc/strscan/methods/getch.md +43 -0
- data/doc/strscan/methods/scan.md +51 -0
- data/doc/strscan/methods/scan_until.md +52 -0
- data/doc/strscan/methods/set_pos.md +27 -0
- data/doc/strscan/methods/skip.md +43 -0
- data/doc/strscan/methods/skip_until.md +49 -0
- data/doc/strscan/methods/terminate.md +30 -0
- data/doc/strscan/strscan.md +544 -0
- data/ext/strscan/strscan.c +1066 -437
- data/lib/strscan/strscan.rb +25 -0
- metadata +39 -9
data/ext/strscan/strscan.c
CHANGED
@@ -22,7 +22,7 @@ extern size_t onig_region_memsize(const struct re_registers *regs);
|
|
22
22
|
|
23
23
|
#include <stdbool.h>
|
24
24
|
|
25
|
-
#define STRSCAN_VERSION "3.
|
25
|
+
#define STRSCAN_VERSION "3.1.1"
|
26
26
|
|
27
27
|
/* =======================================================================
|
28
28
|
Data Type Definitions
|
@@ -32,6 +32,8 @@ static VALUE StringScanner;
|
|
32
32
|
static VALUE ScanError;
|
33
33
|
static ID id_byteslice;
|
34
34
|
|
35
|
+
static int usascii_encindex, utf8_encindex, binary_encindex;
|
36
|
+
|
35
37
|
struct strscanner
|
36
38
|
{
|
37
39
|
/* multi-purpose flags */
|
@@ -115,6 +117,7 @@ static VALUE strscan_get_byte _((VALUE self));
|
|
115
117
|
static VALUE strscan_getbyte _((VALUE self));
|
116
118
|
static VALUE strscan_peek _((VALUE self, VALUE len));
|
117
119
|
static VALUE strscan_peep _((VALUE self, VALUE len));
|
120
|
+
static VALUE strscan_scan_base10_integer _((VALUE self));
|
118
121
|
static VALUE strscan_unscan _((VALUE self));
|
119
122
|
static VALUE strscan_bol_p _((VALUE self));
|
120
123
|
static VALUE strscan_eos_p _((VALUE self));
|
@@ -218,16 +221,28 @@ strscan_s_allocate(VALUE klass)
|
|
218
221
|
}
|
219
222
|
|
220
223
|
/*
|
221
|
-
*
|
222
|
-
*
|
223
|
-
* StringScanner.new(string, dup = false)
|
224
|
-
*
|
225
|
-
* Creates a new StringScanner object to scan over the given +string+.
|
224
|
+
* :markup: markdown
|
225
|
+
* :include: strscan/link_refs.txt
|
226
226
|
*
|
227
|
-
*
|
228
|
-
*
|
227
|
+
* call-seq:
|
228
|
+
* StringScanner.new(string, fixed_anchor: false) -> string_scanner
|
229
|
+
*
|
230
|
+
* Returns a new `StringScanner` object whose [stored string][1]
|
231
|
+
* is the given `string`;
|
232
|
+
* sets the [fixed-anchor property][10]:
|
233
|
+
*
|
234
|
+
* ```
|
235
|
+
* scanner = StringScanner.new('foobarbaz')
|
236
|
+
* scanner.string # => "foobarbaz"
|
237
|
+
* scanner.fixed_anchor? # => false
|
238
|
+
* put_situation(scanner)
|
239
|
+
* # Situation:
|
240
|
+
* # pos: 0
|
241
|
+
* # charpos: 0
|
242
|
+
* # rest: "foobarbaz"
|
243
|
+
* # rest_size: 9
|
244
|
+
* ```
|
229
245
|
*
|
230
|
-
* +dup+ argument is obsolete and not used now.
|
231
246
|
*/
|
232
247
|
static VALUE
|
233
248
|
strscan_initialize(int argc, VALUE *argv, VALUE self)
|
@@ -266,11 +281,14 @@ check_strscan(VALUE obj)
|
|
266
281
|
}
|
267
282
|
|
268
283
|
/*
|
284
|
+
* :markup: markdown
|
285
|
+
* :include: strscan/link_refs.txt
|
286
|
+
*
|
269
287
|
* call-seq:
|
270
|
-
* dup
|
271
|
-
* clone
|
288
|
+
* dup -> shallow_copy
|
272
289
|
*
|
273
|
-
*
|
290
|
+
* Returns a shallow copy of `self`;
|
291
|
+
* the [stored string][1] in the copy is the same string as in `self`.
|
274
292
|
*/
|
275
293
|
static VALUE
|
276
294
|
strscan_init_copy(VALUE vself, VALUE vorig)
|
@@ -297,10 +315,13 @@ strscan_init_copy(VALUE vself, VALUE vorig)
|
|
297
315
|
======================================================================= */
|
298
316
|
|
299
317
|
/*
|
300
|
-
* call-seq:
|
318
|
+
* call-seq:
|
319
|
+
* StringScanner.must_C_version -> self
|
301
320
|
*
|
302
|
-
*
|
321
|
+
* Returns +self+; defined for backward compatibility.
|
303
322
|
*/
|
323
|
+
|
324
|
+
/* :nodoc: */
|
304
325
|
static VALUE
|
305
326
|
strscan_s_mustc(VALUE self)
|
306
327
|
{
|
@@ -308,7 +329,30 @@ strscan_s_mustc(VALUE self)
|
|
308
329
|
}
|
309
330
|
|
310
331
|
/*
|
311
|
-
*
|
332
|
+
* :markup: markdown
|
333
|
+
* :include: strscan/link_refs.txt
|
334
|
+
*
|
335
|
+
* call-seq:
|
336
|
+
* reset -> self
|
337
|
+
*
|
338
|
+
* Sets both [byte position][2] and [character position][7] to zero,
|
339
|
+
* and clears [match values][9];
|
340
|
+
* returns +self+:
|
341
|
+
*
|
342
|
+
* ```
|
343
|
+
* scanner = StringScanner.new('foobarbaz')
|
344
|
+
* scanner.exist?(/bar/) # => 6
|
345
|
+
* scanner.reset # => #<StringScanner 0/9 @ "fooba...">
|
346
|
+
* put_situation(scanner)
|
347
|
+
* # Situation:
|
348
|
+
* # pos: 0
|
349
|
+
* # charpos: 0
|
350
|
+
* # rest: "foobarbaz"
|
351
|
+
* # rest_size: 9
|
352
|
+
* # => nil
|
353
|
+
* match_values_cleared?(scanner) # => true
|
354
|
+
* ```
|
355
|
+
*
|
312
356
|
*/
|
313
357
|
static VALUE
|
314
358
|
strscan_reset(VALUE self)
|
@@ -322,11 +366,9 @@ strscan_reset(VALUE self)
|
|
322
366
|
}
|
323
367
|
|
324
368
|
/*
|
325
|
-
*
|
326
|
-
*
|
327
|
-
*
|
328
|
-
*
|
329
|
-
* Sets the scan pointer to the end of the string and clear matching data.
|
369
|
+
* :markup: markdown
|
370
|
+
* :include: strscan/link_refs.txt
|
371
|
+
* :include: strscan/methods/terminate.md
|
330
372
|
*/
|
331
373
|
static VALUE
|
332
374
|
strscan_terminate(VALUE self)
|
@@ -340,9 +382,13 @@ strscan_terminate(VALUE self)
|
|
340
382
|
}
|
341
383
|
|
342
384
|
/*
|
343
|
-
*
|
344
|
-
*
|
385
|
+
* call-seq:
|
386
|
+
* clear -> self
|
387
|
+
*
|
388
|
+
* This method is obsolete; use the equivalent method StringScanner#terminate.
|
345
389
|
*/
|
390
|
+
|
391
|
+
/* :nodoc: */
|
346
392
|
static VALUE
|
347
393
|
strscan_clear(VALUE self)
|
348
394
|
{
|
@@ -351,7 +397,21 @@ strscan_clear(VALUE self)
|
|
351
397
|
}
|
352
398
|
|
353
399
|
/*
|
354
|
-
*
|
400
|
+
* :markup: markdown
|
401
|
+
* :include: strscan/link_refs.txt
|
402
|
+
*
|
403
|
+
* call-seq:
|
404
|
+
* string -> stored_string
|
405
|
+
*
|
406
|
+
* Returns the [stored string][1]:
|
407
|
+
*
|
408
|
+
* ```
|
409
|
+
* scanner = StringScanner.new('foobar')
|
410
|
+
* scanner.string # => "foobar"
|
411
|
+
* scanner.concat('baz')
|
412
|
+
* scanner.string # => "foobarbaz"
|
413
|
+
* ```
|
414
|
+
*
|
355
415
|
*/
|
356
416
|
static VALUE
|
357
417
|
strscan_get_string(VALUE self)
|
@@ -363,10 +423,39 @@ strscan_get_string(VALUE self)
|
|
363
423
|
}
|
364
424
|
|
365
425
|
/*
|
366
|
-
*
|
426
|
+
* :markup: markdown
|
427
|
+
* :include: strscan/link_refs.txt
|
428
|
+
*
|
429
|
+
* call-seq:
|
430
|
+
* string = other_string -> other_string
|
431
|
+
*
|
432
|
+
* Replaces the [stored string][1] with the given `other_string`:
|
433
|
+
*
|
434
|
+
* - Sets both [positions][11] to zero.
|
435
|
+
* - Clears [match values][9].
|
436
|
+
* - Returns `other_string`.
|
437
|
+
*
|
438
|
+
* ```
|
439
|
+
* scanner = StringScanner.new('foobar')
|
440
|
+
* scanner.scan(/foo/)
|
441
|
+
* put_situation(scanner)
|
442
|
+
* # Situation:
|
443
|
+
* # pos: 3
|
444
|
+
* # charpos: 3
|
445
|
+
* # rest: "bar"
|
446
|
+
* # rest_size: 3
|
447
|
+
* match_values_cleared?(scanner) # => false
|
448
|
+
*
|
449
|
+
* scanner.string = 'baz' # => "baz"
|
450
|
+
* put_situation(scanner)
|
451
|
+
* # Situation:
|
452
|
+
* # pos: 0
|
453
|
+
* # charpos: 0
|
454
|
+
* # rest: "baz"
|
455
|
+
* # rest_size: 3
|
456
|
+
* match_values_cleared?(scanner) # => true
|
457
|
+
* ```
|
367
458
|
*
|
368
|
-
* Changes the string being scanned to +str+ and resets the scanner.
|
369
|
-
* Returns +str+.
|
370
459
|
*/
|
371
460
|
static VALUE
|
372
461
|
strscan_set_string(VALUE self, VALUE str)
|
@@ -381,18 +470,33 @@ strscan_set_string(VALUE self, VALUE str)
|
|
381
470
|
}
|
382
471
|
|
383
472
|
/*
|
384
|
-
*
|
385
|
-
*
|
386
|
-
* <<(str)
|
473
|
+
* :markup: markdown
|
474
|
+
* :include: strscan/link_refs.txt
|
387
475
|
*
|
388
|
-
*
|
389
|
-
*
|
476
|
+
* call-seq:
|
477
|
+
* concat(more_string) -> self
|
478
|
+
*
|
479
|
+
* - Appends the given `more_string`
|
480
|
+
* to the [stored string][1].
|
481
|
+
* - Returns `self`.
|
482
|
+
* - Does not affect the [positions][11]
|
483
|
+
* or [match values][9].
|
484
|
+
*
|
485
|
+
*
|
486
|
+
* ```
|
487
|
+
* scanner = StringScanner.new('foo')
|
488
|
+
* scanner.string # => "foo"
|
489
|
+
* scanner.terminate
|
490
|
+
* scanner.concat('barbaz') # => #<StringScanner 3/9 "foo" @ "barba...">
|
491
|
+
* scanner.string # => "foobarbaz"
|
492
|
+
* put_situation(scanner)
|
493
|
+
* # Situation:
|
494
|
+
* # pos: 3
|
495
|
+
* # charpos: 3
|
496
|
+
* # rest: "barbaz"
|
497
|
+
* # rest_size: 6
|
498
|
+
* ```
|
390
499
|
*
|
391
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
392
|
-
* s.scan(/Fri /)
|
393
|
-
* s << " +1000 GMT"
|
394
|
-
* s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT"
|
395
|
-
* s.scan(/Dec/) # -> "Dec"
|
396
500
|
*/
|
397
501
|
static VALUE
|
398
502
|
strscan_concat(VALUE self, VALUE str)
|
@@ -406,18 +510,9 @@ strscan_concat(VALUE self, VALUE str)
|
|
406
510
|
}
|
407
511
|
|
408
512
|
/*
|
409
|
-
*
|
410
|
-
*
|
411
|
-
*
|
412
|
-
*
|
413
|
-
* In short, it's a 0-based index into bytes of the string.
|
414
|
-
*
|
415
|
-
* s = StringScanner.new('test string')
|
416
|
-
* s.pos # -> 0
|
417
|
-
* s.scan_until /str/ # -> "test str"
|
418
|
-
* s.pos # -> 8
|
419
|
-
* s.terminate # -> #<StringScanner fin>
|
420
|
-
* s.pos # -> 11
|
513
|
+
* :markup: markdown
|
514
|
+
* :include: strscan/link_refs.txt
|
515
|
+
* :include: strscan/methods/get_pos.md
|
421
516
|
*/
|
422
517
|
static VALUE
|
423
518
|
strscan_get_pos(VALUE self)
|
@@ -429,17 +524,9 @@ strscan_get_pos(VALUE self)
|
|
429
524
|
}
|
430
525
|
|
431
526
|
/*
|
432
|
-
*
|
433
|
-
*
|
434
|
-
*
|
435
|
-
*
|
436
|
-
* In short, it's a 0-based index into the string.
|
437
|
-
*
|
438
|
-
* s = StringScanner.new("abc\u00e4def\u00f6ghi")
|
439
|
-
* s.charpos # -> 0
|
440
|
-
* s.scan_until(/\u00e4/) # -> "abc\u00E4"
|
441
|
-
* s.pos # -> 5
|
442
|
-
* s.charpos # -> 4
|
527
|
+
* :markup: markdown
|
528
|
+
* :include: strscan/link_refs.txt
|
529
|
+
* :include: strscan/methods/get_charpos.md
|
443
530
|
*/
|
444
531
|
static VALUE
|
445
532
|
strscan_get_charpos(VALUE self)
|
@@ -452,13 +539,9 @@ strscan_get_charpos(VALUE self)
|
|
452
539
|
}
|
453
540
|
|
454
541
|
/*
|
455
|
-
*
|
456
|
-
*
|
457
|
-
*
|
458
|
-
*
|
459
|
-
* s = StringScanner.new('test string')
|
460
|
-
* s.pos = 7 # -> 7
|
461
|
-
* s.rest # -> "ring"
|
542
|
+
* :markup: markdown
|
543
|
+
* :include: strscan/link_refs.txt
|
544
|
+
* :include: strscan/methods/set_pos.md
|
462
545
|
*/
|
463
546
|
static VALUE
|
464
547
|
strscan_set_pos(VALUE self, VALUE v)
|
@@ -546,12 +629,13 @@ rb_reg_onig_match(VALUE re, VALUE str,
|
|
546
629
|
OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
|
547
630
|
void *args, struct re_registers *regs)
|
548
631
|
{
|
632
|
+
OnigPosition result;
|
549
633
|
regex_t *reg = rb_reg_prepare_re(re, str);
|
550
634
|
|
551
635
|
bool tmpreg = reg != RREGEXP_PTR(re);
|
552
636
|
if (!tmpreg) RREGEXP(re)->usecnt++;
|
553
637
|
|
554
|
-
|
638
|
+
result = match(reg, str, regs, args);
|
555
639
|
|
556
640
|
if (!tmpreg) RREGEXP(re)->usecnt--;
|
557
641
|
if (tmpreg) {
|
@@ -601,19 +685,19 @@ strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_pt
|
|
601
685
|
ONIG_OPTION_NONE);
|
602
686
|
}
|
603
687
|
|
688
|
+
static void
|
689
|
+
strscan_enc_check(VALUE str1, VALUE str2)
|
690
|
+
{
|
691
|
+
if (RB_ENCODING_GET(str1) != RB_ENCODING_GET(str2)) {
|
692
|
+
rb_enc_check(str1, str2);
|
693
|
+
}
|
694
|
+
}
|
695
|
+
|
604
696
|
static VALUE
|
605
697
|
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
|
606
698
|
{
|
607
699
|
struct strscanner *p;
|
608
700
|
|
609
|
-
if (headonly) {
|
610
|
-
if (!RB_TYPE_P(pattern, T_REGEXP)) {
|
611
|
-
StringValue(pattern);
|
612
|
-
}
|
613
|
-
}
|
614
|
-
else {
|
615
|
-
Check_Type(pattern, T_REGEXP);
|
616
|
-
}
|
617
701
|
GET_SCANNER(self, p);
|
618
702
|
|
619
703
|
CLEAR_MATCH_STATUS(p);
|
@@ -622,26 +706,42 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
|
|
622
706
|
}
|
623
707
|
|
624
708
|
if (RB_TYPE_P(pattern, T_REGEXP)) {
|
709
|
+
OnigPosition ret;
|
625
710
|
p->regex = pattern;
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
711
|
+
ret = rb_reg_onig_match(p->regex,
|
712
|
+
p->str,
|
713
|
+
headonly ? strscan_match : strscan_search,
|
714
|
+
(void *)p,
|
715
|
+
&(p->regs));
|
631
716
|
|
632
717
|
if (ret == ONIG_MISMATCH) {
|
633
718
|
return Qnil;
|
634
719
|
}
|
635
720
|
}
|
636
721
|
else {
|
637
|
-
|
722
|
+
StringValue(pattern);
|
638
723
|
if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
|
724
|
+
strscan_enc_check(p->str, pattern);
|
639
725
|
return Qnil;
|
640
726
|
}
|
641
|
-
|
642
|
-
|
727
|
+
|
728
|
+
if (headonly) {
|
729
|
+
strscan_enc_check(p->str, pattern);
|
730
|
+
|
731
|
+
if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
|
732
|
+
return Qnil;
|
733
|
+
}
|
734
|
+
set_registers(p, RSTRING_LEN(pattern));
|
735
|
+
}
|
736
|
+
else {
|
737
|
+
rb_encoding *enc = rb_enc_check(p->str, pattern);
|
738
|
+
long pos = rb_memsearch(RSTRING_PTR(pattern), RSTRING_LEN(pattern),
|
739
|
+
CURPTR(p), S_RESTLEN(p), enc);
|
740
|
+
if (pos == -1) {
|
741
|
+
return Qnil;
|
742
|
+
}
|
743
|
+
set_registers(p, RSTRING_LEN(pattern) + pos);
|
643
744
|
}
|
644
|
-
set_registers(p, RSTRING_LEN(pattern));
|
645
745
|
}
|
646
746
|
|
647
747
|
MATCHED(p);
|
@@ -662,20 +762,9 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
|
|
662
762
|
}
|
663
763
|
|
664
764
|
/*
|
665
|
-
*
|
666
|
-
*
|
667
|
-
*
|
668
|
-
* the scanner advances the "scan pointer" and returns the matched string.
|
669
|
-
* Otherwise, the scanner returns +nil+.
|
670
|
-
*
|
671
|
-
* s = StringScanner.new('test string')
|
672
|
-
* p s.scan(/\w+/) # -> "test"
|
673
|
-
* p s.scan(/\w+/) # -> nil
|
674
|
-
* p s.scan(/\s+/) # -> " "
|
675
|
-
* p s.scan("str") # -> "str"
|
676
|
-
* p s.scan(/\w+/) # -> "ing"
|
677
|
-
* p s.scan(/./) # -> nil
|
678
|
-
*
|
765
|
+
* :markup: markdown
|
766
|
+
* :include: strscan/link_refs.txt
|
767
|
+
* :include: strscan/methods/scan.md
|
679
768
|
*/
|
680
769
|
static VALUE
|
681
770
|
strscan_scan(VALUE self, VALUE re)
|
@@ -684,16 +773,60 @@ strscan_scan(VALUE self, VALUE re)
|
|
684
773
|
}
|
685
774
|
|
686
775
|
/*
|
687
|
-
*
|
776
|
+
* :markup: markdown
|
777
|
+
* :include: strscan/link_refs.txt
|
688
778
|
*
|
689
|
-
*
|
690
|
-
*
|
779
|
+
* call-seq:
|
780
|
+
* match?(pattern) -> updated_position or nil
|
781
|
+
*
|
782
|
+
* Attempts to [match][17] the given `pattern`
|
783
|
+
* at the beginning of the [target substring][3];
|
784
|
+
* does not modify the [positions][11].
|
785
|
+
*
|
786
|
+
* If the match succeeds:
|
787
|
+
*
|
788
|
+
* - Sets [match values][9].
|
789
|
+
* - Returns the size in bytes of the matched substring.
|
790
|
+
*
|
791
|
+
*
|
792
|
+
* ```
|
793
|
+
* scanner = StringScanner.new('foobarbaz')
|
794
|
+
* scanner.pos = 3
|
795
|
+
* scanner.match?(/bar/) => 3
|
796
|
+
* put_match_values(scanner)
|
797
|
+
* # Basic match values:
|
798
|
+
* # matched?: true
|
799
|
+
* # matched_size: 3
|
800
|
+
* # pre_match: "foo"
|
801
|
+
* # matched : "bar"
|
802
|
+
* # post_match: "baz"
|
803
|
+
* # Captured match values:
|
804
|
+
* # size: 1
|
805
|
+
* # captures: []
|
806
|
+
* # named_captures: {}
|
807
|
+
* # values_at: ["bar", nil]
|
808
|
+
* # []:
|
809
|
+
* # [0]: "bar"
|
810
|
+
* # [1]: nil
|
811
|
+
* put_situation(scanner)
|
812
|
+
* # Situation:
|
813
|
+
* # pos: 3
|
814
|
+
* # charpos: 3
|
815
|
+
* # rest: "barbaz"
|
816
|
+
* # rest_size: 6
|
817
|
+
* ```
|
818
|
+
*
|
819
|
+
* If the match fails:
|
820
|
+
*
|
821
|
+
* - Clears match values.
|
822
|
+
* - Returns `nil`.
|
823
|
+
* - Does not increment positions.
|
824
|
+
*
|
825
|
+
* ```
|
826
|
+
* scanner.match?(/nope/) # => nil
|
827
|
+
* match_values_cleared?(scanner) # => true
|
828
|
+
* ```
|
691
829
|
*
|
692
|
-
* s = StringScanner.new('test string')
|
693
|
-
* p s.match?(/\w+/) # -> 4
|
694
|
-
* p s.match?(/\w+/) # -> 4
|
695
|
-
* p s.match?("test") # -> 4
|
696
|
-
* p s.match?(/\s+/) # -> nil
|
697
830
|
*/
|
698
831
|
static VALUE
|
699
832
|
strscan_match_p(VALUE self, VALUE re)
|
@@ -702,22 +835,9 @@ strscan_match_p(VALUE self, VALUE re)
|
|
702
835
|
}
|
703
836
|
|
704
837
|
/*
|
705
|
-
*
|
706
|
-
*
|
707
|
-
*
|
708
|
-
* If it matches, the scan pointer is advanced to the end of the match, and the
|
709
|
-
* length of the match is returned. Otherwise, +nil+ is returned.
|
710
|
-
*
|
711
|
-
* It's similar to #scan, but without returning the matched string.
|
712
|
-
*
|
713
|
-
* s = StringScanner.new('test string')
|
714
|
-
* p s.skip(/\w+/) # -> 4
|
715
|
-
* p s.skip(/\w+/) # -> nil
|
716
|
-
* p s.skip(/\s+/) # -> 1
|
717
|
-
* p s.skip("st") # -> 2
|
718
|
-
* p s.skip(/\w+/) # -> 4
|
719
|
-
* p s.skip(/./) # -> nil
|
720
|
-
*
|
838
|
+
* :markup: markdown
|
839
|
+
* :include: strscan/link_refs.txt
|
840
|
+
* :include: strscan/methods/skip.md
|
721
841
|
*/
|
722
842
|
static VALUE
|
723
843
|
strscan_skip(VALUE self, VALUE re)
|
@@ -726,19 +846,59 @@ strscan_skip(VALUE self, VALUE re)
|
|
726
846
|
}
|
727
847
|
|
728
848
|
/*
|
729
|
-
*
|
730
|
-
*
|
731
|
-
* This returns the value that #scan would return, without advancing the scan
|
732
|
-
* pointer. The match register is affected, though.
|
849
|
+
* :markup: markdown
|
850
|
+
* :include: strscan/link_refs.txt
|
733
851
|
*
|
734
|
-
*
|
735
|
-
*
|
736
|
-
*
|
737
|
-
*
|
738
|
-
*
|
739
|
-
*
|
852
|
+
* call-seq:
|
853
|
+
* check(pattern) -> matched_substring or nil
|
854
|
+
*
|
855
|
+
* Attempts to [match][17] the given `pattern`
|
856
|
+
* at the beginning of the [target substring][3];
|
857
|
+
* does not modify the [positions][11].
|
858
|
+
*
|
859
|
+
* If the match succeeds:
|
860
|
+
*
|
861
|
+
* - Returns the matched substring.
|
862
|
+
* - Sets all [match values][9].
|
863
|
+
*
|
864
|
+
* ```
|
865
|
+
* scanner = StringScanner.new('foobarbaz')
|
866
|
+
* scanner.pos = 3
|
867
|
+
* scanner.check('bar') # => "bar"
|
868
|
+
* put_match_values(scanner)
|
869
|
+
* # Basic match values:
|
870
|
+
* # matched?: true
|
871
|
+
* # matched_size: 3
|
872
|
+
* # pre_match: "foo"
|
873
|
+
* # matched : "bar"
|
874
|
+
* # post_match: "baz"
|
875
|
+
* # Captured match values:
|
876
|
+
* # size: 1
|
877
|
+
* # captures: []
|
878
|
+
* # named_captures: {}
|
879
|
+
* # values_at: ["bar", nil]
|
880
|
+
* # []:
|
881
|
+
* # [0]: "bar"
|
882
|
+
* # [1]: nil
|
883
|
+
* # => 0..1
|
884
|
+
* put_situation(scanner)
|
885
|
+
* # Situation:
|
886
|
+
* # pos: 3
|
887
|
+
* # charpos: 3
|
888
|
+
* # rest: "barbaz"
|
889
|
+
* # rest_size: 6
|
890
|
+
* ```
|
891
|
+
*
|
892
|
+
* If the match fails:
|
893
|
+
*
|
894
|
+
* - Returns `nil`.
|
895
|
+
* - Clears all [match values][9].
|
896
|
+
*
|
897
|
+
* ```
|
898
|
+
* scanner.check(/nope/) # => nil
|
899
|
+
* match_values_cleared?(scanner) # => true
|
900
|
+
* ```
|
740
901
|
*
|
741
|
-
* Mnemonic: it "checks" to see whether a #scan will return a value.
|
742
902
|
*/
|
743
903
|
static VALUE
|
744
904
|
strscan_check(VALUE self, VALUE re)
|
@@ -747,15 +907,24 @@ strscan_check(VALUE self, VALUE re)
|
|
747
907
|
}
|
748
908
|
|
749
909
|
/*
|
750
|
-
* call-seq:
|
910
|
+
* call-seq:
|
911
|
+
* scan_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or nil
|
912
|
+
*
|
913
|
+
* Equivalent to one of the following:
|
914
|
+
*
|
915
|
+
* - +advance_pointer_p+ +true+:
|
916
|
+
*
|
917
|
+
* - +return_string_p+ +true+: StringScanner#scan(pattern).
|
918
|
+
* - +return_string_p+ +false+: StringScanner#skip(pattern).
|
751
919
|
*
|
752
|
-
*
|
753
|
-
*
|
754
|
-
*
|
755
|
-
*
|
920
|
+
* - +advance_pointer_p+ +false+:
|
921
|
+
*
|
922
|
+
* - +return_string_p+ +true+: StringScanner#check(pattern).
|
923
|
+
* - +return_string_p+ +false+: StringScanner#match?(pattern).
|
756
924
|
*
|
757
|
-
* "full" means "#scan with full parameters".
|
758
925
|
*/
|
926
|
+
|
927
|
+
/* :nodoc: */
|
759
928
|
static VALUE
|
760
929
|
strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
|
761
930
|
{
|
@@ -763,16 +932,9 @@ strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
|
|
763
932
|
}
|
764
933
|
|
765
934
|
/*
|
766
|
-
*
|
767
|
-
*
|
768
|
-
*
|
769
|
-
* to and including the end of the match, advancing the scan pointer to that
|
770
|
-
* location. If there is no match, +nil+ is returned.
|
771
|
-
*
|
772
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
773
|
-
* s.scan_until(/1/) # -> "Fri Dec 1"
|
774
|
-
* s.pre_match # -> "Fri Dec "
|
775
|
-
* s.scan_until(/XYZ/) # -> nil
|
935
|
+
* :markup: markdown
|
936
|
+
* :include: strscan/link_refs.txt
|
937
|
+
* :include: strscan/methods/scan_until.md
|
776
938
|
*/
|
777
939
|
static VALUE
|
778
940
|
strscan_scan_until(VALUE self, VALUE re)
|
@@ -781,17 +943,61 @@ strscan_scan_until(VALUE self, VALUE re)
|
|
781
943
|
}
|
782
944
|
|
783
945
|
/*
|
784
|
-
*
|
946
|
+
* :markup: markdown
|
947
|
+
* :include: strscan/link_refs.txt
|
785
948
|
*
|
786
|
-
*
|
787
|
-
*
|
788
|
-
*
|
949
|
+
* call-seq:
|
950
|
+
* exist?(pattern) -> byte_offset or nil
|
951
|
+
*
|
952
|
+
* Attempts to [match][17] the given `pattern`
|
953
|
+
* anywhere (at any [position][2])
|
954
|
+
* n the [target substring][3];
|
955
|
+
* does not modify the [positions][11].
|
956
|
+
*
|
957
|
+
* If the match succeeds:
|
958
|
+
*
|
959
|
+
* - Returns a byte offset:
|
960
|
+
* the distance in bytes between the current [position][2]
|
961
|
+
* and the end of the matched substring.
|
962
|
+
* - Sets all [match values][9].
|
963
|
+
*
|
964
|
+
* ```
|
965
|
+
* scanner = StringScanner.new('foobarbazbatbam')
|
966
|
+
* scanner.pos = 6
|
967
|
+
* scanner.exist?(/bat/) # => 6
|
968
|
+
* put_match_values(scanner)
|
969
|
+
* # Basic match values:
|
970
|
+
* # matched?: true
|
971
|
+
* # matched_size: 3
|
972
|
+
* # pre_match: "foobarbaz"
|
973
|
+
* # matched : "bat"
|
974
|
+
* # post_match: "bam"
|
975
|
+
* # Captured match values:
|
976
|
+
* # size: 1
|
977
|
+
* # captures: []
|
978
|
+
* # named_captures: {}
|
979
|
+
* # values_at: ["bat", nil]
|
980
|
+
* # []:
|
981
|
+
* # [0]: "bat"
|
982
|
+
* # [1]: nil
|
983
|
+
* put_situation(scanner)
|
984
|
+
* # Situation:
|
985
|
+
* # pos: 6
|
986
|
+
* # charpos: 6
|
987
|
+
* # rest: "bazbatbam"
|
988
|
+
* # rest_size: 9
|
989
|
+
* ```
|
990
|
+
*
|
991
|
+
* If the match fails:
|
992
|
+
*
|
993
|
+
* - Returns `nil`.
|
994
|
+
* - Clears all [match values][9].
|
995
|
+
*
|
996
|
+
* ```
|
997
|
+
* scanner.exist?(/nope/) # => nil
|
998
|
+
* match_values_cleared?(scanner) # => true
|
999
|
+
* ```
|
789
1000
|
*
|
790
|
-
* s = StringScanner.new('test string')
|
791
|
-
* s.exist? /s/ # -> 3
|
792
|
-
* s.scan /test/ # -> "test"
|
793
|
-
* s.exist? /s/ # -> 2
|
794
|
-
* s.exist? /e/ # -> nil
|
795
1001
|
*/
|
796
1002
|
static VALUE
|
797
1003
|
strscan_exist_p(VALUE self, VALUE re)
|
@@ -800,20 +1006,9 @@ strscan_exist_p(VALUE self, VALUE re)
|
|
800
1006
|
}
|
801
1007
|
|
802
1008
|
/*
|
803
|
-
*
|
804
|
-
*
|
805
|
-
*
|
806
|
-
* the number of bytes advanced, or +nil+ if no match was found.
|
807
|
-
*
|
808
|
-
* Look ahead to match +pattern+, and advance the scan pointer to the _end_
|
809
|
-
* of the match. Return the number of characters advanced, or +nil+ if the
|
810
|
-
* match was unsuccessful.
|
811
|
-
*
|
812
|
-
* It's similar to #scan_until, but without returning the intervening string.
|
813
|
-
*
|
814
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
815
|
-
* s.skip_until /12/ # -> 10
|
816
|
-
* s #
|
1009
|
+
* :markup: markdown
|
1010
|
+
* :include: strscan/link_refs.txt
|
1011
|
+
* :include: strscan/methods/skip_until.md
|
817
1012
|
*/
|
818
1013
|
static VALUE
|
819
1014
|
strscan_skip_until(VALUE self, VALUE re)
|
@@ -822,17 +1017,61 @@ strscan_skip_until(VALUE self, VALUE re)
|
|
822
1017
|
}
|
823
1018
|
|
824
1019
|
/*
|
825
|
-
*
|
1020
|
+
* :markup: markdown
|
1021
|
+
* :include: strscan/link_refs.txt
|
826
1022
|
*
|
827
|
-
*
|
828
|
-
*
|
829
|
-
*
|
830
|
-
*
|
831
|
-
*
|
832
|
-
*
|
833
|
-
*
|
1023
|
+
* call-seq:
|
1024
|
+
* check_until(pattern) -> substring or nil
|
1025
|
+
*
|
1026
|
+
* Attempts to [match][17] the given `pattern`
|
1027
|
+
* anywhere (at any [position][2])
|
1028
|
+
* in the [target substring][3];
|
1029
|
+
* does not modify the [positions][11].
|
1030
|
+
*
|
1031
|
+
* If the match succeeds:
|
1032
|
+
*
|
1033
|
+
* - Sets all [match values][9].
|
1034
|
+
* - Returns the matched substring,
|
1035
|
+
* which extends from the current [position][2]
|
1036
|
+
* to the end of the matched substring.
|
1037
|
+
*
|
1038
|
+
* ```
|
1039
|
+
* scanner = StringScanner.new('foobarbazbatbam')
|
1040
|
+
* scanner.pos = 6
|
1041
|
+
* scanner.check_until(/bat/) # => "bazbat"
|
1042
|
+
* put_match_values(scanner)
|
1043
|
+
* # Basic match values:
|
1044
|
+
* # matched?: true
|
1045
|
+
* # matched_size: 3
|
1046
|
+
* # pre_match: "foobarbaz"
|
1047
|
+
* # matched : "bat"
|
1048
|
+
* # post_match: "bam"
|
1049
|
+
* # Captured match values:
|
1050
|
+
* # size: 1
|
1051
|
+
* # captures: []
|
1052
|
+
* # named_captures: {}
|
1053
|
+
* # values_at: ["bat", nil]
|
1054
|
+
* # []:
|
1055
|
+
* # [0]: "bat"
|
1056
|
+
* # [1]: nil
|
1057
|
+
* put_situation(scanner)
|
1058
|
+
* # Situation:
|
1059
|
+
* # pos: 6
|
1060
|
+
* # charpos: 6
|
1061
|
+
* # rest: "bazbatbam"
|
1062
|
+
* # rest_size: 9
|
1063
|
+
* ```
|
1064
|
+
*
|
1065
|
+
* If the match fails:
|
1066
|
+
*
|
1067
|
+
* - Clears all [match values][9].
|
1068
|
+
* - Returns `nil`.
|
1069
|
+
*
|
1070
|
+
* ```
|
1071
|
+
* scanner.check_until(/nope/) # => nil
|
1072
|
+
* match_values_cleared?(scanner) # => true
|
1073
|
+
* ```
|
834
1074
|
*
|
835
|
-
* Mnemonic: it "checks" to see whether a #scan_until will return a value.
|
836
1075
|
*/
|
837
1076
|
static VALUE
|
838
1077
|
strscan_check_until(VALUE self, VALUE re)
|
@@ -841,14 +1080,24 @@ strscan_check_until(VALUE self, VALUE re)
|
|
841
1080
|
}
|
842
1081
|
|
843
1082
|
/*
|
844
|
-
* call-seq:
|
1083
|
+
* call-seq:
|
1084
|
+
* search_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or position_delta or nil
|
1085
|
+
*
|
1086
|
+
* Equivalent to one of the following:
|
1087
|
+
*
|
1088
|
+
* - +advance_pointer_p+ +true+:
|
1089
|
+
*
|
1090
|
+
* - +return_string_p+ +true+: StringScanner#scan_until(pattern).
|
1091
|
+
* - +return_string_p+ +false+: StringScanner#skip_until(pattern).
|
1092
|
+
*
|
1093
|
+
* - +advance_pointer_p+ +false+:
|
1094
|
+
*
|
1095
|
+
* - +return_string_p+ +true+: StringScanner#check_until(pattern).
|
1096
|
+
* - +return_string_p+ +false+: StringScanner#exist?(pattern).
|
845
1097
|
*
|
846
|
-
* Scans the string _until_ the +pattern+ is matched.
|
847
|
-
* Advances the scan pointer if +advance_pointer_p+, otherwise not.
|
848
|
-
* Returns the matched string if +return_string_p+ is true, otherwise
|
849
|
-
* returns the number of bytes advanced.
|
850
|
-
* This method does affect the match register.
|
851
1098
|
*/
|
1099
|
+
|
1100
|
+
/* :nodoc: */
|
852
1101
|
static VALUE
|
853
1102
|
strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f)
|
854
1103
|
{
|
@@ -868,17 +1117,9 @@ adjust_registers_to_matched(struct strscanner *p)
|
|
868
1117
|
}
|
869
1118
|
|
870
1119
|
/*
|
871
|
-
*
|
872
|
-
*
|
873
|
-
*
|
874
|
-
* s = StringScanner.new("ab")
|
875
|
-
* s.getch # => "a"
|
876
|
-
* s.getch # => "b"
|
877
|
-
* s.getch # => nil
|
878
|
-
*
|
879
|
-
* s = StringScanner.new("\244\242".force_encoding("euc-jp"))
|
880
|
-
* s.getch # => "\x{A4A2}" # Japanese hira-kana "A" in EUC-JP
|
881
|
-
* s.getch # => nil
|
1120
|
+
* :markup: markdown
|
1121
|
+
* :include: strscan/link_refs.txt
|
1122
|
+
* :include: strscan/methods/getch.md
|
882
1123
|
*/
|
883
1124
|
static VALUE
|
884
1125
|
strscan_getch(VALUE self)
|
@@ -903,19 +1144,55 @@ strscan_getch(VALUE self)
|
|
903
1144
|
}
|
904
1145
|
|
905
1146
|
/*
|
906
|
-
*
|
1147
|
+
* call-seq:
|
1148
|
+
* scan_byte -> integer_byte
|
1149
|
+
*
|
1150
|
+
* Scans one byte and returns it as an integer.
|
907
1151
|
* This method is not multibyte character sensitive.
|
908
1152
|
* See also: #getch.
|
909
1153
|
*
|
1154
|
+
*/
|
1155
|
+
static VALUE
|
1156
|
+
strscan_scan_byte(VALUE self)
|
1157
|
+
{
|
1158
|
+
struct strscanner *p;
|
1159
|
+
VALUE byte;
|
1160
|
+
|
1161
|
+
GET_SCANNER(self, p);
|
1162
|
+
CLEAR_MATCH_STATUS(p);
|
1163
|
+
if (EOS_P(p))
|
1164
|
+
return Qnil;
|
1165
|
+
|
1166
|
+
byte = INT2FIX((unsigned char)*CURPTR(p));
|
1167
|
+
p->prev = p->curr;
|
1168
|
+
p->curr++;
|
1169
|
+
MATCHED(p);
|
1170
|
+
adjust_registers_to_matched(p);
|
1171
|
+
return byte;
|
1172
|
+
}
|
1173
|
+
|
1174
|
+
/*
|
1175
|
+
* Peeks at the current byte and returns it as an integer.
|
1176
|
+
*
|
910
1177
|
* s = StringScanner.new('ab')
|
911
|
-
* s.
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
*
|
917
|
-
|
918
|
-
|
1178
|
+
* s.peek_byte # => 97
|
1179
|
+
*/
|
1180
|
+
static VALUE
|
1181
|
+
strscan_peek_byte(VALUE self)
|
1182
|
+
{
|
1183
|
+
struct strscanner *p;
|
1184
|
+
|
1185
|
+
GET_SCANNER(self, p);
|
1186
|
+
if (EOS_P(p))
|
1187
|
+
return Qnil;
|
1188
|
+
|
1189
|
+
return INT2FIX((unsigned char)*CURPTR(p));
|
1190
|
+
}
|
1191
|
+
|
1192
|
+
/*
|
1193
|
+
* :markup: markdown
|
1194
|
+
* :include: strscan/link_refs.txt
|
1195
|
+
* :include: strscan/methods/get_byte.md
|
919
1196
|
*/
|
920
1197
|
static VALUE
|
921
1198
|
strscan_get_byte(VALUE self)
|
@@ -937,9 +1214,14 @@ strscan_get_byte(VALUE self)
|
|
937
1214
|
}
|
938
1215
|
|
939
1216
|
/*
|
1217
|
+
* call-seq:
|
1218
|
+
* getbyte
|
1219
|
+
*
|
940
1220
|
* Equivalent to #get_byte.
|
941
1221
|
* This method is obsolete; use #get_byte instead.
|
942
1222
|
*/
|
1223
|
+
|
1224
|
+
/* :nodoc: */
|
943
1225
|
static VALUE
|
944
1226
|
strscan_getbyte(VALUE self)
|
945
1227
|
{
|
@@ -948,14 +1230,22 @@ strscan_getbyte(VALUE self)
|
|
948
1230
|
}
|
949
1231
|
|
950
1232
|
/*
|
951
|
-
*
|
1233
|
+
* :markup: markdown
|
1234
|
+
* :include: strscan/link_refs.txt
|
1235
|
+
*
|
1236
|
+
* call-seq:
|
1237
|
+
* peek(length) -> substring
|
952
1238
|
*
|
953
|
-
*
|
954
|
-
*
|
1239
|
+
* Returns the substring `string[pos, length]`;
|
1240
|
+
* does not update [match values][9] or [positions][11]:
|
955
1241
|
*
|
956
|
-
*
|
957
|
-
*
|
958
|
-
*
|
1242
|
+
* ```
|
1243
|
+
* scanner = StringScanner.new('foobarbaz')
|
1244
|
+
* scanner.pos = 3
|
1245
|
+
* scanner.peek(3) # => "bar"
|
1246
|
+
* scanner.terminate
|
1247
|
+
* scanner.peek(3) # => ""
|
1248
|
+
* ```
|
959
1249
|
*
|
960
1250
|
*/
|
961
1251
|
static VALUE
|
@@ -975,9 +1265,14 @@ strscan_peek(VALUE self, VALUE vlen)
|
|
975
1265
|
}
|
976
1266
|
|
977
1267
|
/*
|
1268
|
+
* call-seq:
|
1269
|
+
* peep
|
1270
|
+
*
|
978
1271
|
* Equivalent to #peek.
|
979
1272
|
* This method is obsolete; use #peek instead.
|
980
1273
|
*/
|
1274
|
+
|
1275
|
+
/* :nodoc: */
|
981
1276
|
static VALUE
|
982
1277
|
strscan_peep(VALUE self, VALUE vlen)
|
983
1278
|
{
|
@@ -985,16 +1280,156 @@ strscan_peep(VALUE self, VALUE vlen)
|
|
985
1280
|
return strscan_peek(self, vlen);
|
986
1281
|
}
|
987
1282
|
|
1283
|
+
static VALUE
|
1284
|
+
strscan_parse_integer(struct strscanner *p, int base, long len)
|
1285
|
+
{
|
1286
|
+
VALUE buffer_v, integer;
|
1287
|
+
|
1288
|
+
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
1289
|
+
|
1290
|
+
MEMCPY(buffer, CURPTR(p), char, len);
|
1291
|
+
buffer[len] = '\0';
|
1292
|
+
integer = rb_cstr2inum(buffer, base);
|
1293
|
+
RB_ALLOCV_END(buffer_v);
|
1294
|
+
p->curr += len;
|
1295
|
+
return integer;
|
1296
|
+
}
|
1297
|
+
|
1298
|
+
static inline bool
|
1299
|
+
strscan_ascii_compat_fastpath(VALUE str) {
|
1300
|
+
int encindex = ENCODING_GET_INLINED(str);
|
1301
|
+
// The overwhelming majority of strings are in one of these 3 encodings.
|
1302
|
+
return encindex == utf8_encindex || encindex == binary_encindex || encindex == usascii_encindex;
|
1303
|
+
}
|
1304
|
+
|
1305
|
+
static inline void
|
1306
|
+
strscan_must_ascii_compat(VALUE str)
|
1307
|
+
{
|
1308
|
+
// The overwhelming majority of strings are in one of these 3 encodings.
|
1309
|
+
if (RB_LIKELY(strscan_ascii_compat_fastpath(str))) {
|
1310
|
+
return;
|
1311
|
+
}
|
1312
|
+
|
1313
|
+
rb_must_asciicompat(str);
|
1314
|
+
}
|
1315
|
+
|
1316
|
+
static VALUE
|
1317
|
+
strscan_scan_base10_integer(VALUE self)
|
1318
|
+
{
|
1319
|
+
char *ptr;
|
1320
|
+
long len = 0;
|
1321
|
+
struct strscanner *p;
|
1322
|
+
|
1323
|
+
GET_SCANNER(self, p);
|
1324
|
+
CLEAR_MATCH_STATUS(p);
|
1325
|
+
|
1326
|
+
strscan_must_ascii_compat(p->str);
|
1327
|
+
|
1328
|
+
ptr = CURPTR(p);
|
1329
|
+
|
1330
|
+
long remaining_len = S_RESTLEN(p);
|
1331
|
+
|
1332
|
+
if (remaining_len <= 0) {
|
1333
|
+
return Qnil;
|
1334
|
+
}
|
1335
|
+
|
1336
|
+
if (ptr[len] == '-' || ptr[len] == '+') {
|
1337
|
+
len++;
|
1338
|
+
}
|
1339
|
+
|
1340
|
+
if (!rb_isdigit(ptr[len])) {
|
1341
|
+
return Qnil;
|
1342
|
+
}
|
1343
|
+
|
1344
|
+
MATCHED(p);
|
1345
|
+
p->prev = p->curr;
|
1346
|
+
|
1347
|
+
while (len < remaining_len && rb_isdigit(ptr[len])) {
|
1348
|
+
len++;
|
1349
|
+
}
|
1350
|
+
|
1351
|
+
return strscan_parse_integer(p, 10, len);
|
1352
|
+
}
|
1353
|
+
|
1354
|
+
static VALUE
|
1355
|
+
strscan_scan_base16_integer(VALUE self)
|
1356
|
+
{
|
1357
|
+
char *ptr;
|
1358
|
+
long len = 0;
|
1359
|
+
struct strscanner *p;
|
1360
|
+
|
1361
|
+
GET_SCANNER(self, p);
|
1362
|
+
CLEAR_MATCH_STATUS(p);
|
1363
|
+
|
1364
|
+
strscan_must_ascii_compat(p->str);
|
1365
|
+
|
1366
|
+
ptr = CURPTR(p);
|
1367
|
+
|
1368
|
+
long remaining_len = S_RESTLEN(p);
|
1369
|
+
|
1370
|
+
if (remaining_len <= 0) {
|
1371
|
+
return Qnil;
|
1372
|
+
}
|
1373
|
+
|
1374
|
+
if (ptr[len] == '-' || ptr[len] == '+') {
|
1375
|
+
len++;
|
1376
|
+
}
|
1377
|
+
|
1378
|
+
if ((remaining_len >= (len + 2)) && ptr[len] == '0' && ptr[len + 1] == 'x') {
|
1379
|
+
len += 2;
|
1380
|
+
}
|
1381
|
+
|
1382
|
+
if (len >= remaining_len || !rb_isxdigit(ptr[len])) {
|
1383
|
+
return Qnil;
|
1384
|
+
}
|
1385
|
+
|
1386
|
+
MATCHED(p);
|
1387
|
+
p->prev = p->curr;
|
1388
|
+
|
1389
|
+
while (len < remaining_len && rb_isxdigit(ptr[len])) {
|
1390
|
+
len++;
|
1391
|
+
}
|
1392
|
+
|
1393
|
+
return strscan_parse_integer(p, 16, len);
|
1394
|
+
}
|
1395
|
+
|
988
1396
|
/*
|
989
|
-
*
|
990
|
-
*
|
1397
|
+
* :markup: markdown
|
1398
|
+
* :include: strscan/link_refs.txt
|
1399
|
+
*
|
1400
|
+
* call-seq:
|
1401
|
+
* unscan -> self
|
1402
|
+
*
|
1403
|
+
* Sets the [position][2] to its value previous to the recent successful
|
1404
|
+
* [match][17] attempt:
|
1405
|
+
*
|
1406
|
+
* ```
|
1407
|
+
* scanner = StringScanner.new('foobarbaz')
|
1408
|
+
* scanner.scan(/foo/)
|
1409
|
+
* put_situation(scanner)
|
1410
|
+
* # Situation:
|
1411
|
+
* # pos: 3
|
1412
|
+
* # charpos: 3
|
1413
|
+
* # rest: "barbaz"
|
1414
|
+
* # rest_size: 6
|
1415
|
+
* scanner.unscan
|
1416
|
+
* # => #<StringScanner 0/9 @ "fooba...">
|
1417
|
+
* put_situation(scanner)
|
1418
|
+
* # Situation:
|
1419
|
+
* # pos: 0
|
1420
|
+
* # charpos: 0
|
1421
|
+
* # rest: "foobarbaz"
|
1422
|
+
* # rest_size: 9
|
1423
|
+
* ```
|
1424
|
+
*
|
1425
|
+
* Raises an exception if match values are clear:
|
1426
|
+
*
|
1427
|
+
* ```
|
1428
|
+
* scanner.scan(/nope/) # => nil
|
1429
|
+
* match_values_cleared?(scanner) # => true
|
1430
|
+
* scanner.unscan # Raises StringScanner::Error.
|
1431
|
+
* ```
|
991
1432
|
*
|
992
|
-
* s = StringScanner.new('test string')
|
993
|
-
* s.scan(/\w+/) # => "test"
|
994
|
-
* s.unscan
|
995
|
-
* s.scan(/../) # => "te"
|
996
|
-
* s.scan(/\d/) # => nil
|
997
|
-
* s.unscan # ScanError: unscan failed: previous match record not exist
|
998
1433
|
*/
|
999
1434
|
static VALUE
|
1000
1435
|
strscan_unscan(VALUE self)
|
@@ -1010,16 +1445,37 @@ strscan_unscan(VALUE self)
|
|
1010
1445
|
}
|
1011
1446
|
|
1012
1447
|
/*
|
1013
|
-
*
|
1014
|
-
*
|
1015
|
-
*
|
1016
|
-
*
|
1017
|
-
*
|
1018
|
-
*
|
1019
|
-
*
|
1020
|
-
*
|
1021
|
-
*
|
1022
|
-
*
|
1448
|
+
*
|
1449
|
+
* :markup: markdown
|
1450
|
+
* :include: strscan/link_refs.txt
|
1451
|
+
*
|
1452
|
+
* call-seq:
|
1453
|
+
* beginning_of_line? -> true or false
|
1454
|
+
*
|
1455
|
+
* Returns whether the [position][2] is at the beginning of a line;
|
1456
|
+
* that is, at the beginning of the [stored string][1]
|
1457
|
+
* or immediately after a newline:
|
1458
|
+
*
|
1459
|
+
* scanner = StringScanner.new(MULTILINE_TEXT)
|
1460
|
+
* scanner.string
|
1461
|
+
* # => "Go placidly amid the noise and haste,\nand remember what peace there may be in silence.\n"
|
1462
|
+
* scanner.pos # => 0
|
1463
|
+
* scanner.beginning_of_line? # => true
|
1464
|
+
*
|
1465
|
+
* scanner.scan_until(/,/) # => "Go placidly amid the noise and haste,"
|
1466
|
+
* scanner.beginning_of_line? # => false
|
1467
|
+
*
|
1468
|
+
* scanner.scan(/\n/) # => "\n"
|
1469
|
+
* scanner.beginning_of_line? # => true
|
1470
|
+
*
|
1471
|
+
* scanner.terminate
|
1472
|
+
* scanner.beginning_of_line? # => true
|
1473
|
+
*
|
1474
|
+
* scanner.concat('x')
|
1475
|
+
* scanner.terminate
|
1476
|
+
* scanner.beginning_of_line? # => false
|
1477
|
+
*
|
1478
|
+
* StringScanner#bol? is an alias for StringScanner#beginning_of_line?.
|
1023
1479
|
*/
|
1024
1480
|
static VALUE
|
1025
1481
|
strscan_bol_p(VALUE self)
|
@@ -1033,14 +1489,24 @@ strscan_bol_p(VALUE self)
|
|
1033
1489
|
}
|
1034
1490
|
|
1035
1491
|
/*
|
1036
|
-
*
|
1492
|
+
* :markup: markdown
|
1493
|
+
* :include: strscan/link_refs.txt
|
1494
|
+
*
|
1495
|
+
* call-seq:
|
1496
|
+
* eos? -> true or false
|
1497
|
+
*
|
1498
|
+
* Returns whether the [position][2]
|
1499
|
+
* is at the end of the [stored string][1]:
|
1500
|
+
*
|
1501
|
+
* ```
|
1502
|
+
* scanner = StringScanner.new('foobarbaz')
|
1503
|
+
* scanner.eos? # => false
|
1504
|
+
* pos = 3
|
1505
|
+
* scanner.eos? # => false
|
1506
|
+
* scanner.terminate
|
1507
|
+
* scanner.eos? # => true
|
1508
|
+
* ```
|
1037
1509
|
*
|
1038
|
-
* s = StringScanner.new('test string')
|
1039
|
-
* p s.eos? # => false
|
1040
|
-
* s.scan(/test/)
|
1041
|
-
* p s.eos? # => false
|
1042
|
-
* s.terminate
|
1043
|
-
* p s.eos? # => true
|
1044
1510
|
*/
|
1045
1511
|
static VALUE
|
1046
1512
|
strscan_eos_p(VALUE self)
|
@@ -1052,9 +1518,14 @@ strscan_eos_p(VALUE self)
|
|
1052
1518
|
}
|
1053
1519
|
|
1054
1520
|
/*
|
1521
|
+
* call-seq:
|
1522
|
+
* empty?
|
1523
|
+
*
|
1055
1524
|
* Equivalent to #eos?.
|
1056
1525
|
* This method is obsolete, use #eos? instead.
|
1057
1526
|
*/
|
1527
|
+
|
1528
|
+
/* :nodoc: */
|
1058
1529
|
static VALUE
|
1059
1530
|
strscan_empty_p(VALUE self)
|
1060
1531
|
{
|
@@ -1063,6 +1534,9 @@ strscan_empty_p(VALUE self)
|
|
1063
1534
|
}
|
1064
1535
|
|
1065
1536
|
/*
|
1537
|
+
* call-seq:
|
1538
|
+
* rest?
|
1539
|
+
*
|
1066
1540
|
* Returns true if and only if there is more data in the string. See #eos?.
|
1067
1541
|
* This method is obsolete; use #eos? instead.
|
1068
1542
|
*
|
@@ -1071,6 +1545,8 @@ strscan_empty_p(VALUE self)
|
|
1071
1545
|
* s.eos? # => false
|
1072
1546
|
* s.rest? # => true
|
1073
1547
|
*/
|
1548
|
+
|
1549
|
+
/* :nodoc: */
|
1074
1550
|
static VALUE
|
1075
1551
|
strscan_rest_p(VALUE self)
|
1076
1552
|
{
|
@@ -1081,13 +1557,26 @@ strscan_rest_p(VALUE self)
|
|
1081
1557
|
}
|
1082
1558
|
|
1083
1559
|
/*
|
1084
|
-
*
|
1560
|
+
* :markup: markdown
|
1561
|
+
* :include: strscan/link_refs.txt
|
1562
|
+
*
|
1563
|
+
* call-seq:
|
1564
|
+
* matched? -> true or false
|
1565
|
+
*
|
1566
|
+
* Returns `true` of the most recent [match attempt][17] was successful,
|
1567
|
+
* `false` otherwise;
|
1568
|
+
* see [Basic Matched Values][18]:
|
1569
|
+
*
|
1570
|
+
* ```
|
1571
|
+
* scanner = StringScanner.new('foobarbaz')
|
1572
|
+
* scanner.matched? # => false
|
1573
|
+
* scanner.pos = 3
|
1574
|
+
* scanner.exist?(/baz/) # => 6
|
1575
|
+
* scanner.matched? # => true
|
1576
|
+
* scanner.exist?(/nope/) # => nil
|
1577
|
+
* scanner.matched? # => false
|
1578
|
+
* ```
|
1085
1579
|
*
|
1086
|
-
* s = StringScanner.new('test string')
|
1087
|
-
* s.match?(/\w+/) # => 4
|
1088
|
-
* s.matched? # => true
|
1089
|
-
* s.match?(/\d+/) # => nil
|
1090
|
-
* s.matched? # => false
|
1091
1580
|
*/
|
1092
1581
|
static VALUE
|
1093
1582
|
strscan_matched_p(VALUE self)
|
@@ -1099,11 +1588,27 @@ strscan_matched_p(VALUE self)
|
|
1099
1588
|
}
|
1100
1589
|
|
1101
1590
|
/*
|
1102
|
-
*
|
1591
|
+
* :markup: markdown
|
1592
|
+
* :include: strscan/link_refs.txt
|
1593
|
+
*
|
1594
|
+
* call-seq:
|
1595
|
+
* matched -> matched_substring or nil
|
1596
|
+
*
|
1597
|
+
* Returns the matched substring from the most recent [match][17] attempt
|
1598
|
+
* if it was successful,
|
1599
|
+
* or `nil` otherwise;
|
1600
|
+
* see [Basic Matched Values][18]:
|
1601
|
+
*
|
1602
|
+
* ```
|
1603
|
+
* scanner = StringScanner.new('foobarbaz')
|
1604
|
+
* scanner.matched # => nil
|
1605
|
+
* scanner.pos = 3
|
1606
|
+
* scanner.match?(/bar/) # => 3
|
1607
|
+
* scanner.matched # => "bar"
|
1608
|
+
* scanner.match?(/nope/) # => nil
|
1609
|
+
* scanner.matched # => nil
|
1610
|
+
* ```
|
1103
1611
|
*
|
1104
|
-
* s = StringScanner.new('test string')
|
1105
|
-
* s.match?(/\w+/) # -> 4
|
1106
|
-
* s.matched # -> "test"
|
1107
1612
|
*/
|
1108
1613
|
static VALUE
|
1109
1614
|
strscan_matched(VALUE self)
|
@@ -1118,15 +1623,29 @@ strscan_matched(VALUE self)
|
|
1118
1623
|
}
|
1119
1624
|
|
1120
1625
|
/*
|
1121
|
-
*
|
1122
|
-
*
|
1123
|
-
*
|
1626
|
+
* :markup: markdown
|
1627
|
+
* :include: strscan/link_refs.txt
|
1628
|
+
*
|
1629
|
+
* call-seq:
|
1630
|
+
* matched_size -> substring_size or nil
|
1631
|
+
*
|
1632
|
+
* Returns the size (in bytes) of the matched substring
|
1633
|
+
* from the most recent match [match attempt][17] if it was successful,
|
1634
|
+
* or `nil` otherwise;
|
1635
|
+
* see [Basic Matched Values][18]:
|
1636
|
+
*
|
1637
|
+
* ```
|
1638
|
+
* scanner = StringScanner.new('foobarbaz')
|
1639
|
+
* scanner.matched_size # => nil
|
1640
|
+
*
|
1641
|
+
* pos = 3
|
1642
|
+
* scanner.exist?(/baz/) # => 9
|
1643
|
+
* scanner.matched_size # => 3
|
1644
|
+
*
|
1645
|
+
* scanner.exist?(/nope/) # => nil
|
1646
|
+
* scanner.matched_size # => nil
|
1647
|
+
* ```
|
1124
1648
|
*
|
1125
|
-
* s = StringScanner.new('test string')
|
1126
|
-
* s.check /\w+/ # -> "test"
|
1127
|
-
* s.matched_size # -> 4
|
1128
|
-
* s.check /\d+/ # -> nil
|
1129
|
-
* s.matched_size # -> nil
|
1130
1649
|
*/
|
1131
1650
|
static VALUE
|
1132
1651
|
strscan_matched_size(VALUE self)
|
@@ -1157,30 +1676,75 @@ name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name
|
|
1157
1676
|
}
|
1158
1677
|
|
1159
1678
|
/*
|
1160
|
-
*
|
1161
|
-
*
|
1162
|
-
*
|
1163
|
-
*
|
1164
|
-
*
|
1165
|
-
*
|
1166
|
-
*
|
1167
|
-
*
|
1168
|
-
*
|
1169
|
-
*
|
1170
|
-
*
|
1171
|
-
*
|
1172
|
-
*
|
1173
|
-
*
|
1174
|
-
*
|
1175
|
-
*
|
1176
|
-
*
|
1177
|
-
*
|
1178
|
-
*
|
1179
|
-
*
|
1180
|
-
*
|
1181
|
-
*
|
1182
|
-
*
|
1183
|
-
*
|
1679
|
+
*
|
1680
|
+
* :markup: markdown
|
1681
|
+
* :include: strscan/link_refs.txt
|
1682
|
+
*
|
1683
|
+
* call-seq:
|
1684
|
+
* [](specifier) -> substring or nil
|
1685
|
+
*
|
1686
|
+
* Returns a captured substring or `nil`;
|
1687
|
+
* see [Captured Match Values][13].
|
1688
|
+
*
|
1689
|
+
* When there are captures:
|
1690
|
+
*
|
1691
|
+
* ```
|
1692
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
1693
|
+
* scanner.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /)
|
1694
|
+
* ```
|
1695
|
+
*
|
1696
|
+
* - `specifier` zero: returns the entire matched substring:
|
1697
|
+
*
|
1698
|
+
* ```
|
1699
|
+
* scanner[0] # => "Fri Dec 12 "
|
1700
|
+
* scanner.pre_match # => ""
|
1701
|
+
* scanner.post_match # => "1975 14:39"
|
1702
|
+
* ```
|
1703
|
+
*
|
1704
|
+
* - `specifier` positive integer. returns the `n`th capture, or `nil` if out of range:
|
1705
|
+
*
|
1706
|
+
* ```
|
1707
|
+
* scanner[1] # => "Fri"
|
1708
|
+
* scanner[2] # => "Dec"
|
1709
|
+
* scanner[3] # => "12"
|
1710
|
+
* scanner[4] # => nil
|
1711
|
+
* ```
|
1712
|
+
*
|
1713
|
+
* - `specifier` negative integer. counts backward from the last subgroup:
|
1714
|
+
*
|
1715
|
+
* ```
|
1716
|
+
* scanner[-1] # => "12"
|
1717
|
+
* scanner[-4] # => "Fri Dec 12 "
|
1718
|
+
* scanner[-5] # => nil
|
1719
|
+
* ```
|
1720
|
+
*
|
1721
|
+
* - `specifier` symbol or string. returns the named subgroup, or `nil` if no such:
|
1722
|
+
*
|
1723
|
+
* ```
|
1724
|
+
* scanner[:wday] # => "Fri"
|
1725
|
+
* scanner['wday'] # => "Fri"
|
1726
|
+
* scanner[:month] # => "Dec"
|
1727
|
+
* scanner[:day] # => "12"
|
1728
|
+
* scanner[:nope] # => nil
|
1729
|
+
* ```
|
1730
|
+
*
|
1731
|
+
* When there are no captures, only `[0]` returns non-`nil`:
|
1732
|
+
*
|
1733
|
+
* ```
|
1734
|
+
* scanner = StringScanner.new('foobarbaz')
|
1735
|
+
* scanner.exist?(/bar/)
|
1736
|
+
* scanner[0] # => "bar"
|
1737
|
+
* scanner[1] # => nil
|
1738
|
+
* ```
|
1739
|
+
*
|
1740
|
+
* For a failed match, even `[0]` returns `nil`:
|
1741
|
+
*
|
1742
|
+
* ```
|
1743
|
+
* scanner.scan(/nope/) # => nil
|
1744
|
+
* scanner[0] # => nil
|
1745
|
+
* scanner[1] # => nil
|
1746
|
+
* ```
|
1747
|
+
*
|
1184
1748
|
*/
|
1185
1749
|
static VALUE
|
1186
1750
|
strscan_aref(VALUE self, VALUE idx)
|
@@ -1217,14 +1781,28 @@ strscan_aref(VALUE self, VALUE idx)
|
|
1217
1781
|
}
|
1218
1782
|
|
1219
1783
|
/*
|
1220
|
-
*
|
1784
|
+
* :markup: markdown
|
1785
|
+
* :include: strscan/link_refs.txt
|
1786
|
+
*
|
1787
|
+
* call-seq:
|
1788
|
+
* size -> captures_count
|
1789
|
+
*
|
1790
|
+
* Returns the count of captures if the most recent match attempt succeeded, `nil` otherwise;
|
1791
|
+
* see [Captures Match Values][13]:
|
1792
|
+
*
|
1793
|
+
* ```
|
1794
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
1795
|
+
* scanner.size # => nil
|
1221
1796
|
*
|
1222
|
-
*
|
1223
|
-
*
|
1797
|
+
* pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
|
1798
|
+
* scanner.match?(pattern)
|
1799
|
+
* scanner.values_at(*0..scanner.size) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil]
|
1800
|
+
* scanner.size # => 4
|
1801
|
+
*
|
1802
|
+
* scanner.match?(/nope/) # => nil
|
1803
|
+
* scanner.size # => nil
|
1804
|
+
* ```
|
1224
1805
|
*
|
1225
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
1226
|
-
* s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
|
1227
|
-
* s.size # -> 4
|
1228
1806
|
*/
|
1229
1807
|
static VALUE
|
1230
1808
|
strscan_size(VALUE self)
|
@@ -1237,16 +1815,30 @@ strscan_size(VALUE self)
|
|
1237
1815
|
}
|
1238
1816
|
|
1239
1817
|
/*
|
1240
|
-
*
|
1818
|
+
* :markup: markdown
|
1819
|
+
* :include: strscan/link_refs.txt
|
1820
|
+
*
|
1821
|
+
* call-seq:
|
1822
|
+
* captures -> substring_array or nil
|
1241
1823
|
*
|
1242
|
-
* Returns the
|
1243
|
-
*
|
1824
|
+
* Returns the array of [captured match values][13] at indexes `(1..)`
|
1825
|
+
* if the most recent match attempt succeeded, or `nil` otherwise:
|
1826
|
+
*
|
1827
|
+
* ```
|
1828
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
1829
|
+
* scanner.captures # => nil
|
1830
|
+
*
|
1831
|
+
* scanner.exist?(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /)
|
1832
|
+
* scanner.captures # => ["Fri", "Dec", "12"]
|
1833
|
+
* scanner.values_at(*0..4) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil]
|
1834
|
+
*
|
1835
|
+
* scanner.exist?(/Fri/)
|
1836
|
+
* scanner.captures # => []
|
1837
|
+
*
|
1838
|
+
* scanner.scan(/nope/)
|
1839
|
+
* scanner.captures # => nil
|
1840
|
+
* ```
|
1244
1841
|
*
|
1245
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
1246
|
-
* s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> "Fri Dec 12 "
|
1247
|
-
* s.captures # -> ["Fri", "Dec", "12", nil]
|
1248
|
-
* s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> nil
|
1249
|
-
* s.captures # -> nil
|
1250
1842
|
*/
|
1251
1843
|
static VALUE
|
1252
1844
|
strscan_captures(VALUE self)
|
@@ -1276,17 +1868,25 @@ strscan_captures(VALUE self)
|
|
1276
1868
|
}
|
1277
1869
|
|
1278
1870
|
/*
|
1279
|
-
*
|
1280
|
-
*
|
1871
|
+
* :markup: markdown
|
1872
|
+
* :include: strscan/link_refs.txt
|
1281
1873
|
*
|
1282
|
-
*
|
1283
|
-
*
|
1874
|
+
* call-seq:
|
1875
|
+
* values_at(*specifiers) -> array_of_captures or nil
|
1876
|
+
*
|
1877
|
+
* Returns an array of captured substrings, or `nil` of none.
|
1878
|
+
*
|
1879
|
+
* For each `specifier`, the returned substring is `[specifier]`;
|
1880
|
+
* see #[].
|
1881
|
+
*
|
1882
|
+
* ```
|
1883
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
1884
|
+
* pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
|
1885
|
+
* scanner.match?(pattern)
|
1886
|
+
* scanner.values_at(*0..3) # => ["Fri Dec 12 ", "Fri", "Dec", "12"]
|
1887
|
+
* scanner.values_at(*%i[wday month day]) # => ["Fri", "Dec", "12"]
|
1888
|
+
* ```
|
1284
1889
|
*
|
1285
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
1286
|
-
* s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
|
1287
|
-
* s.values_at 0, -1, 5, 2 # -> ["Fri Dec 12 ", "12", nil, "Dec"]
|
1288
|
-
* s.scan(/(\w+) (\w+) (\d+) /) # -> nil
|
1289
|
-
* s.values_at 0, -1, 5, 2 # -> nil
|
1290
1890
|
*/
|
1291
1891
|
|
1292
1892
|
static VALUE
|
@@ -1308,13 +1908,29 @@ strscan_values_at(int argc, VALUE *argv, VALUE self)
|
|
1308
1908
|
}
|
1309
1909
|
|
1310
1910
|
/*
|
1311
|
-
*
|
1911
|
+
* :markup: markdown
|
1912
|
+
* :include: strscan/link_refs.txt
|
1913
|
+
*
|
1914
|
+
* call-seq:
|
1915
|
+
* pre_match -> substring
|
1916
|
+
*
|
1917
|
+
* Returns the substring that precedes the matched substring
|
1918
|
+
* from the most recent match attempt if it was successful,
|
1919
|
+
* or `nil` otherwise;
|
1920
|
+
* see [Basic Match Values][18]:
|
1921
|
+
*
|
1922
|
+
* ```
|
1923
|
+
* scanner = StringScanner.new('foobarbaz')
|
1924
|
+
* scanner.pre_match # => nil
|
1925
|
+
*
|
1926
|
+
* scanner.pos = 3
|
1927
|
+
* scanner.exist?(/baz/) # => 6
|
1928
|
+
* scanner.pre_match # => "foobar" # Substring of entire string, not just target string.
|
1929
|
+
*
|
1930
|
+
* scanner.exist?(/nope/) # => nil
|
1931
|
+
* scanner.pre_match # => nil
|
1932
|
+
* ```
|
1312
1933
|
*
|
1313
|
-
* s = StringScanner.new('test string')
|
1314
|
-
* s.scan(/\w+/) # -> "test"
|
1315
|
-
* s.scan(/\s+/) # -> " "
|
1316
|
-
* s.pre_match # -> "test"
|
1317
|
-
* s.post_match # -> "string"
|
1318
1934
|
*/
|
1319
1935
|
static VALUE
|
1320
1936
|
strscan_pre_match(VALUE self)
|
@@ -1329,13 +1945,29 @@ strscan_pre_match(VALUE self)
|
|
1329
1945
|
}
|
1330
1946
|
|
1331
1947
|
/*
|
1332
|
-
*
|
1948
|
+
* :markup: markdown
|
1949
|
+
* :include: strscan/link_refs.txt
|
1950
|
+
*
|
1951
|
+
* call-seq:
|
1952
|
+
* post_match -> substring
|
1953
|
+
*
|
1954
|
+
* Returns the substring that follows the matched substring
|
1955
|
+
* from the most recent match attempt if it was successful,
|
1956
|
+
* or `nil` otherwise;
|
1957
|
+
* see [Basic Match Values][18]:
|
1958
|
+
*
|
1959
|
+
* ```
|
1960
|
+
* scanner = StringScanner.new('foobarbaz')
|
1961
|
+
* scanner.post_match # => nil
|
1962
|
+
*
|
1963
|
+
* scanner.pos = 3
|
1964
|
+
* scanner.match?(/bar/) # => 3
|
1965
|
+
* scanner.post_match # => "baz"
|
1966
|
+
*
|
1967
|
+
* scanner.match?(/nope/) # => nil
|
1968
|
+
* scanner.post_match # => nil
|
1969
|
+
* ```
|
1333
1970
|
*
|
1334
|
-
* s = StringScanner.new('test string')
|
1335
|
-
* s.scan(/\w+/) # -> "test"
|
1336
|
-
* s.scan(/\s+/) # -> " "
|
1337
|
-
* s.pre_match # -> "test"
|
1338
|
-
* s.post_match # -> "string"
|
1339
1971
|
*/
|
1340
1972
|
static VALUE
|
1341
1973
|
strscan_post_match(VALUE self)
|
@@ -1350,8 +1982,24 @@ strscan_post_match(VALUE self)
|
|
1350
1982
|
}
|
1351
1983
|
|
1352
1984
|
/*
|
1353
|
-
*
|
1354
|
-
*
|
1985
|
+
* :markup: markdown
|
1986
|
+
* :include: strscan/link_refs.txt
|
1987
|
+
*
|
1988
|
+
* call-seq:
|
1989
|
+
* rest -> target_substring
|
1990
|
+
*
|
1991
|
+
* Returns the 'rest' of the [stored string][1] (all after the current [position][2]),
|
1992
|
+
* which is the [target substring][3]:
|
1993
|
+
*
|
1994
|
+
* ```
|
1995
|
+
* scanner = StringScanner.new('foobarbaz')
|
1996
|
+
* scanner.rest # => "foobarbaz"
|
1997
|
+
* scanner.pos = 3
|
1998
|
+
* scanner.rest # => "barbaz"
|
1999
|
+
* scanner.terminate
|
2000
|
+
* scanner.rest # => ""
|
2001
|
+
* ```
|
2002
|
+
*
|
1355
2003
|
*/
|
1356
2004
|
static VALUE
|
1357
2005
|
strscan_rest(VALUE self)
|
@@ -1366,7 +2014,26 @@ strscan_rest(VALUE self)
|
|
1366
2014
|
}
|
1367
2015
|
|
1368
2016
|
/*
|
1369
|
-
*
|
2017
|
+
* :markup: markdown
|
2018
|
+
* :include: strscan/link_refs.txt
|
2019
|
+
*
|
2020
|
+
* call-seq:
|
2021
|
+
* rest_size -> integer
|
2022
|
+
*
|
2023
|
+
* Returns the size (in bytes) of the #rest of the [stored string][1]:
|
2024
|
+
*
|
2025
|
+
* ```
|
2026
|
+
* scanner = StringScanner.new('foobarbaz')
|
2027
|
+
* scanner.rest # => "foobarbaz"
|
2028
|
+
* scanner.rest_size # => 9
|
2029
|
+
* scanner.pos = 3
|
2030
|
+
* scanner.rest # => "barbaz"
|
2031
|
+
* scanner.rest_size # => 6
|
2032
|
+
* scanner.terminate
|
2033
|
+
* scanner.rest # => ""
|
2034
|
+
* scanner.rest_size # => 0
|
2035
|
+
* ```
|
2036
|
+
*
|
1370
2037
|
*/
|
1371
2038
|
static VALUE
|
1372
2039
|
strscan_rest_size(VALUE self)
|
@@ -1383,9 +2050,14 @@ strscan_rest_size(VALUE self)
|
|
1383
2050
|
}
|
1384
2051
|
|
1385
2052
|
/*
|
2053
|
+
* call-seq:
|
2054
|
+
* restsize
|
2055
|
+
*
|
1386
2056
|
* <tt>s.restsize</tt> is equivalent to <tt>s.rest_size</tt>.
|
1387
2057
|
* This method is obsolete; use #rest_size instead.
|
1388
2058
|
*/
|
2059
|
+
|
2060
|
+
/* :nodoc: */
|
1389
2061
|
static VALUE
|
1390
2062
|
strscan_restsize(VALUE self)
|
1391
2063
|
{
|
@@ -1396,15 +2068,39 @@ strscan_restsize(VALUE self)
|
|
1396
2068
|
#define INSPECT_LENGTH 5
|
1397
2069
|
|
1398
2070
|
/*
|
1399
|
-
*
|
1400
|
-
*
|
1401
|
-
*
|
1402
|
-
* -
|
1403
|
-
*
|
1404
|
-
*
|
1405
|
-
*
|
1406
|
-
*
|
1407
|
-
*
|
2071
|
+
* :markup: markdown
|
2072
|
+
* :include: strscan/link_refs.txt
|
2073
|
+
*
|
2074
|
+
* call-seq:
|
2075
|
+
* inspect -> string
|
2076
|
+
*
|
2077
|
+
* Returns a string representation of `self` that may show:
|
2078
|
+
*
|
2079
|
+
* 1. The current [position][2].
|
2080
|
+
* 2. The size (in bytes) of the [stored string][1].
|
2081
|
+
* 3. The substring preceding the current position.
|
2082
|
+
* 4. The substring following the current position (which is also the [target substring][3]).
|
2083
|
+
*
|
2084
|
+
* ```
|
2085
|
+
* scanner = StringScanner.new("Fri Dec 12 1975 14:39")
|
2086
|
+
* scanner.pos = 11
|
2087
|
+
* scanner.inspect # => "#<StringScanner 11/21 \"...c 12 \" @ \"1975 ...\">"
|
2088
|
+
* ```
|
2089
|
+
*
|
2090
|
+
* If at beginning-of-string, item 4 above (following substring) is omitted:
|
2091
|
+
*
|
2092
|
+
* ```
|
2093
|
+
* scanner.reset
|
2094
|
+
* scanner.inspect # => "#<StringScanner 0/21 @ \"Fri D...\">"
|
2095
|
+
* ```
|
2096
|
+
*
|
2097
|
+
* If at end-of-string, all items above are omitted:
|
2098
|
+
*
|
2099
|
+
* ```
|
2100
|
+
* scanner.terminate
|
2101
|
+
* scanner.inspect # => "#<StringScanner fin>"
|
2102
|
+
* ```
|
2103
|
+
*
|
1408
2104
|
*/
|
1409
2105
|
static VALUE
|
1410
2106
|
strscan_inspect(VALUE self)
|
@@ -1476,13 +2172,13 @@ inspect2(struct strscanner *p)
|
|
1476
2172
|
}
|
1477
2173
|
|
1478
2174
|
/*
|
1479
|
-
*
|
1480
|
-
*
|
2175
|
+
* :markup: markdown
|
2176
|
+
* :include: strscan/link_refs.txt
|
1481
2177
|
*
|
1482
|
-
*
|
2178
|
+
* call-seq:
|
2179
|
+
* fixed_anchor? -> true or false
|
1483
2180
|
*
|
1484
|
-
*
|
1485
|
-
* the string. Otherwise, +\A+ always matches the current position.
|
2181
|
+
* Returns whether the [fixed-anchor property][10] is set.
|
1486
2182
|
*/
|
1487
2183
|
static VALUE
|
1488
2184
|
strscan_fixed_anchor_p(VALUE self)
|
@@ -1518,21 +2214,39 @@ named_captures_iter(const OnigUChar *name,
|
|
1518
2214
|
}
|
1519
2215
|
|
1520
2216
|
/*
|
2217
|
+
* :markup: markdown
|
2218
|
+
* :include: strscan/link_refs.txt
|
2219
|
+
*
|
1521
2220
|
* call-seq:
|
1522
|
-
*
|
2221
|
+
* named_captures -> hash
|
2222
|
+
*
|
2223
|
+
* Returns the array of captured match values at indexes (1..)
|
2224
|
+
* if the most recent match attempt succeeded, or nil otherwise;
|
2225
|
+
* see [Captured Match Values][13]:
|
1523
2226
|
*
|
1524
|
-
*
|
2227
|
+
* ```
|
2228
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
2229
|
+
* scanner.named_captures # => {}
|
2230
|
+
*
|
2231
|
+
* pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
|
2232
|
+
* scanner.match?(pattern)
|
2233
|
+
* scanner.named_captures # => {"wday"=>"Fri", "month"=>"Dec", "day"=>"12"}
|
2234
|
+
*
|
2235
|
+
* scanner.string = 'nope'
|
2236
|
+
* scanner.match?(pattern)
|
2237
|
+
* scanner.named_captures # => {"wday"=>nil, "month"=>nil, "day"=>nil}
|
2238
|
+
*
|
2239
|
+
* scanner.match?(/nosuch/)
|
2240
|
+
* scanner.named_captures # => {}
|
2241
|
+
* ```
|
1525
2242
|
*
|
1526
|
-
* scan = StringScanner.new('foobarbaz')
|
1527
|
-
* scan.match?(/(?<f>foo)(?<r>bar)(?<z>baz)/)
|
1528
|
-
* scan.named_captures # -> {"f"=>"foo", "r"=>"bar", "z"=>"baz"}
|
1529
2243
|
*/
|
1530
2244
|
static VALUE
|
1531
2245
|
strscan_named_captures(VALUE self)
|
1532
2246
|
{
|
1533
2247
|
struct strscanner *p;
|
1534
|
-
GET_SCANNER(self, p);
|
1535
2248
|
named_captures_data data;
|
2249
|
+
GET_SCANNER(self, p);
|
1536
2250
|
data.self = self;
|
1537
2251
|
data.captures = rb_hash_new();
|
1538
2252
|
if (!RB_NIL_P(p->regex)) {
|
@@ -1549,107 +2263,11 @@ strscan_named_captures(VALUE self)
|
|
1549
2263
|
/*
|
1550
2264
|
* Document-class: StringScanner
|
1551
2265
|
*
|
1552
|
-
*
|
1553
|
-
* an example of its usage:
|
1554
|
-
*
|
1555
|
-
* require 'strscan'
|
1556
|
-
*
|
1557
|
-
* s = StringScanner.new('This is an example string')
|
1558
|
-
* s.eos? # -> false
|
1559
|
-
*
|
1560
|
-
* p s.scan(/\w+/) # -> "This"
|
1561
|
-
* p s.scan(/\w+/) # -> nil
|
1562
|
-
* p s.scan(/\s+/) # -> " "
|
1563
|
-
* p s.scan(/\s+/) # -> nil
|
1564
|
-
* p s.scan(/\w+/) # -> "is"
|
1565
|
-
* s.eos? # -> false
|
1566
|
-
*
|
1567
|
-
* p s.scan(/\s+/) # -> " "
|
1568
|
-
* p s.scan(/\w+/) # -> "an"
|
1569
|
-
* p s.scan(/\s+/) # -> " "
|
1570
|
-
* p s.scan(/\w+/) # -> "example"
|
1571
|
-
* p s.scan(/\s+/) # -> " "
|
1572
|
-
* p s.scan(/\w+/) # -> "string"
|
1573
|
-
* s.eos? # -> true
|
1574
|
-
*
|
1575
|
-
* p s.scan(/\s+/) # -> nil
|
1576
|
-
* p s.scan(/\w+/) # -> nil
|
2266
|
+
* :markup: markdown
|
1577
2267
|
*
|
1578
|
-
*
|
1579
|
-
*
|
1580
|
-
* a time, so matches are sought after the scan pointer; usually immediately
|
1581
|
-
* after it.
|
2268
|
+
* :include: strscan/link_refs.txt
|
2269
|
+
* :include: strscan/strscan.md
|
1582
2270
|
*
|
1583
|
-
* Given the string "test string", here are the pertinent scan pointer
|
1584
|
-
* positions:
|
1585
|
-
*
|
1586
|
-
* t e s t s t r i n g
|
1587
|
-
* 0 1 2 ... 1
|
1588
|
-
* 0
|
1589
|
-
*
|
1590
|
-
* When you #scan for a pattern (a regular expression), the match must occur
|
1591
|
-
* at the character after the scan pointer. If you use #scan_until, then the
|
1592
|
-
* match can occur anywhere after the scan pointer. In both cases, the scan
|
1593
|
-
* pointer moves <i>just beyond</i> the last character of the match, ready to
|
1594
|
-
* scan again from the next character onwards. This is demonstrated by the
|
1595
|
-
* example above.
|
1596
|
-
*
|
1597
|
-
* == Method Categories
|
1598
|
-
*
|
1599
|
-
* There are other methods besides the plain scanners. You can look ahead in
|
1600
|
-
* the string without actually scanning. You can access the most recent match.
|
1601
|
-
* You can modify the string being scanned, reset or terminate the scanner,
|
1602
|
-
* find out or change the position of the scan pointer, skip ahead, and so on.
|
1603
|
-
*
|
1604
|
-
* === Advancing the Scan Pointer
|
1605
|
-
*
|
1606
|
-
* - #getch
|
1607
|
-
* - #get_byte
|
1608
|
-
* - #scan
|
1609
|
-
* - #scan_until
|
1610
|
-
* - #skip
|
1611
|
-
* - #skip_until
|
1612
|
-
*
|
1613
|
-
* === Looking Ahead
|
1614
|
-
*
|
1615
|
-
* - #check
|
1616
|
-
* - #check_until
|
1617
|
-
* - #exist?
|
1618
|
-
* - #match?
|
1619
|
-
* - #peek
|
1620
|
-
*
|
1621
|
-
* === Finding Where we Are
|
1622
|
-
*
|
1623
|
-
* - #beginning_of_line? (<tt>#bol?</tt>)
|
1624
|
-
* - #eos?
|
1625
|
-
* - #rest?
|
1626
|
-
* - #rest_size
|
1627
|
-
* - #pos
|
1628
|
-
*
|
1629
|
-
* === Setting Where we Are
|
1630
|
-
*
|
1631
|
-
* - #reset
|
1632
|
-
* - #terminate
|
1633
|
-
* - #pos=
|
1634
|
-
*
|
1635
|
-
* === Match Data
|
1636
|
-
*
|
1637
|
-
* - #matched
|
1638
|
-
* - #matched?
|
1639
|
-
* - #matched_size
|
1640
|
-
* - <tt>#[]</tt>
|
1641
|
-
* - #pre_match
|
1642
|
-
* - #post_match
|
1643
|
-
*
|
1644
|
-
* === Miscellaneous
|
1645
|
-
*
|
1646
|
-
* - <tt><<</tt>
|
1647
|
-
* - #concat
|
1648
|
-
* - #string
|
1649
|
-
* - #string=
|
1650
|
-
* - #unscan
|
1651
|
-
*
|
1652
|
-
* There are aliases to several of the methods.
|
1653
2271
|
*/
|
1654
2272
|
void
|
1655
2273
|
Init_strscan(void)
|
@@ -1664,6 +2282,10 @@ Init_strscan(void)
|
|
1664
2282
|
|
1665
2283
|
id_byteslice = rb_intern("byteslice");
|
1666
2284
|
|
2285
|
+
usascii_encindex = rb_usascii_encindex();
|
2286
|
+
utf8_encindex = rb_utf8_encindex();
|
2287
|
+
binary_encindex = rb_ascii8bit_encindex();
|
2288
|
+
|
1667
2289
|
StringScanner = rb_define_class("StringScanner", rb_cObject);
|
1668
2290
|
ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
|
1669
2291
|
if (!rb_const_defined(rb_cObject, id_scanerr)) {
|
@@ -1708,9 +2330,14 @@ Init_strscan(void)
|
|
1708
2330
|
rb_define_method(StringScanner, "getch", strscan_getch, 0);
|
1709
2331
|
rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
|
1710
2332
|
rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
|
2333
|
+
rb_define_method(StringScanner, "scan_byte", strscan_scan_byte, 0);
|
1711
2334
|
rb_define_method(StringScanner, "peek", strscan_peek, 1);
|
2335
|
+
rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0);
|
1712
2336
|
rb_define_method(StringScanner, "peep", strscan_peep, 1);
|
1713
2337
|
|
2338
|
+
rb_define_private_method(StringScanner, "scan_base10_integer", strscan_scan_base10_integer, 0);
|
2339
|
+
rb_define_private_method(StringScanner, "scan_base16_integer", strscan_scan_base16_integer, 0);
|
2340
|
+
|
1714
2341
|
rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
|
1715
2342
|
|
1716
2343
|
rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
|
@@ -1738,4 +2365,6 @@ Init_strscan(void)
|
|
1738
2365
|
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
|
1739
2366
|
|
1740
2367
|
rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0);
|
2368
|
+
|
2369
|
+
rb_require("strscan/strscan");
|
1741
2370
|
}
|