strscan 3.1.0 → 3.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rdoc_options +3 -0
- data/doc/strscan/helper_methods.md +124 -0
- data/doc/strscan/link_refs.txt +17 -0
- data/doc/strscan/methods/get_byte.md +30 -0
- data/doc/strscan/methods/get_charpos.md +19 -0
- data/doc/strscan/methods/get_pos.md +14 -0
- data/doc/strscan/methods/getch.md +43 -0
- data/doc/strscan/methods/scan.md +51 -0
- data/doc/strscan/methods/scan_until.md +52 -0
- data/doc/strscan/methods/set_pos.md +27 -0
- data/doc/strscan/methods/skip.md +43 -0
- data/doc/strscan/methods/skip_until.md +49 -0
- data/doc/strscan/methods/terminate.md +30 -0
- data/doc/strscan/strscan.md +544 -0
- data/ext/strscan/strscan.c +1066 -437
- data/lib/strscan/strscan.rb +25 -0
- metadata +39 -9
data/ext/strscan/strscan.c
CHANGED
@@ -22,7 +22,7 @@ extern size_t onig_region_memsize(const struct re_registers *regs);
|
|
22
22
|
|
23
23
|
#include <stdbool.h>
|
24
24
|
|
25
|
-
#define STRSCAN_VERSION "3.1.
|
25
|
+
#define STRSCAN_VERSION "3.1.2"
|
26
26
|
|
27
27
|
/* =======================================================================
|
28
28
|
Data Type Definitions
|
@@ -32,6 +32,8 @@ static VALUE StringScanner;
|
|
32
32
|
static VALUE ScanError;
|
33
33
|
static ID id_byteslice;
|
34
34
|
|
35
|
+
static int usascii_encindex, utf8_encindex, binary_encindex;
|
36
|
+
|
35
37
|
struct strscanner
|
36
38
|
{
|
37
39
|
/* multi-purpose flags */
|
@@ -115,6 +117,7 @@ static VALUE strscan_get_byte _((VALUE self));
|
|
115
117
|
static VALUE strscan_getbyte _((VALUE self));
|
116
118
|
static VALUE strscan_peek _((VALUE self, VALUE len));
|
117
119
|
static VALUE strscan_peep _((VALUE self, VALUE len));
|
120
|
+
static VALUE strscan_scan_base10_integer _((VALUE self));
|
118
121
|
static VALUE strscan_unscan _((VALUE self));
|
119
122
|
static VALUE strscan_bol_p _((VALUE self));
|
120
123
|
static VALUE strscan_eos_p _((VALUE self));
|
@@ -218,16 +221,28 @@ strscan_s_allocate(VALUE klass)
|
|
218
221
|
}
|
219
222
|
|
220
223
|
/*
|
221
|
-
*
|
222
|
-
*
|
223
|
-
* StringScanner.new(string, dup = false)
|
224
|
-
*
|
225
|
-
* Creates a new StringScanner object to scan over the given +string+.
|
224
|
+
* :markup: markdown
|
225
|
+
* :include: strscan/link_refs.txt
|
226
226
|
*
|
227
|
-
*
|
228
|
-
*
|
227
|
+
* call-seq:
|
228
|
+
* StringScanner.new(string, fixed_anchor: false) -> string_scanner
|
229
|
+
*
|
230
|
+
* Returns a new `StringScanner` object whose [stored string][1]
|
231
|
+
* is the given `string`;
|
232
|
+
* sets the [fixed-anchor property][10]:
|
233
|
+
*
|
234
|
+
* ```rb
|
235
|
+
* scanner = StringScanner.new('foobarbaz')
|
236
|
+
* scanner.string # => "foobarbaz"
|
237
|
+
* scanner.fixed_anchor? # => false
|
238
|
+
* put_situation(scanner)
|
239
|
+
* # Situation:
|
240
|
+
* # pos: 0
|
241
|
+
* # charpos: 0
|
242
|
+
* # rest: "foobarbaz"
|
243
|
+
* # rest_size: 9
|
244
|
+
* ```
|
229
245
|
*
|
230
|
-
* +dup+ argument is obsolete and not used now.
|
231
246
|
*/
|
232
247
|
static VALUE
|
233
248
|
strscan_initialize(int argc, VALUE *argv, VALUE self)
|
@@ -266,11 +281,14 @@ check_strscan(VALUE obj)
|
|
266
281
|
}
|
267
282
|
|
268
283
|
/*
|
284
|
+
* :markup: markdown
|
285
|
+
* :include: strscan/link_refs.txt
|
286
|
+
*
|
269
287
|
* call-seq:
|
270
|
-
* dup
|
271
|
-
* clone
|
288
|
+
* dup -> shallow_copy
|
272
289
|
*
|
273
|
-
*
|
290
|
+
* Returns a shallow copy of `self`;
|
291
|
+
* the [stored string][1] in the copy is the same string as in `self`.
|
274
292
|
*/
|
275
293
|
static VALUE
|
276
294
|
strscan_init_copy(VALUE vself, VALUE vorig)
|
@@ -297,10 +315,13 @@ strscan_init_copy(VALUE vself, VALUE vorig)
|
|
297
315
|
======================================================================= */
|
298
316
|
|
299
317
|
/*
|
300
|
-
* call-seq:
|
318
|
+
* call-seq:
|
319
|
+
* StringScanner.must_C_version -> self
|
301
320
|
*
|
302
|
-
*
|
321
|
+
* Returns +self+; defined for backward compatibility.
|
303
322
|
*/
|
323
|
+
|
324
|
+
/* :nodoc: */
|
304
325
|
static VALUE
|
305
326
|
strscan_s_mustc(VALUE self)
|
306
327
|
{
|
@@ -308,7 +329,30 @@ strscan_s_mustc(VALUE self)
|
|
308
329
|
}
|
309
330
|
|
310
331
|
/*
|
311
|
-
*
|
332
|
+
* :markup: markdown
|
333
|
+
* :include: strscan/link_refs.txt
|
334
|
+
*
|
335
|
+
* call-seq:
|
336
|
+
* reset -> self
|
337
|
+
*
|
338
|
+
* Sets both [byte position][2] and [character position][7] to zero,
|
339
|
+
* and clears [match values][9];
|
340
|
+
* returns +self+:
|
341
|
+
*
|
342
|
+
* ```rb
|
343
|
+
* scanner = StringScanner.new('foobarbaz')
|
344
|
+
* scanner.exist?(/bar/) # => 6
|
345
|
+
* scanner.reset # => #<StringScanner 0/9 @ "fooba...">
|
346
|
+
* put_situation(scanner)
|
347
|
+
* # Situation:
|
348
|
+
* # pos: 0
|
349
|
+
* # charpos: 0
|
350
|
+
* # rest: "foobarbaz"
|
351
|
+
* # rest_size: 9
|
352
|
+
* # => nil
|
353
|
+
* match_values_cleared?(scanner) # => true
|
354
|
+
* ```
|
355
|
+
*
|
312
356
|
*/
|
313
357
|
static VALUE
|
314
358
|
strscan_reset(VALUE self)
|
@@ -322,11 +366,9 @@ strscan_reset(VALUE self)
|
|
322
366
|
}
|
323
367
|
|
324
368
|
/*
|
325
|
-
*
|
326
|
-
*
|
327
|
-
*
|
328
|
-
*
|
329
|
-
* Sets the scan pointer to the end of the string and clear matching data.
|
369
|
+
* :markup: markdown
|
370
|
+
* :include: strscan/link_refs.txt
|
371
|
+
* :include: strscan/methods/terminate.md
|
330
372
|
*/
|
331
373
|
static VALUE
|
332
374
|
strscan_terminate(VALUE self)
|
@@ -340,9 +382,13 @@ strscan_terminate(VALUE self)
|
|
340
382
|
}
|
341
383
|
|
342
384
|
/*
|
343
|
-
*
|
344
|
-
*
|
385
|
+
* call-seq:
|
386
|
+
* clear -> self
|
387
|
+
*
|
388
|
+
* This method is obsolete; use the equivalent method StringScanner#terminate.
|
345
389
|
*/
|
390
|
+
|
391
|
+
/* :nodoc: */
|
346
392
|
static VALUE
|
347
393
|
strscan_clear(VALUE self)
|
348
394
|
{
|
@@ -351,7 +397,21 @@ strscan_clear(VALUE self)
|
|
351
397
|
}
|
352
398
|
|
353
399
|
/*
|
354
|
-
*
|
400
|
+
* :markup: markdown
|
401
|
+
* :include: strscan/link_refs.txt
|
402
|
+
*
|
403
|
+
* call-seq:
|
404
|
+
* string -> stored_string
|
405
|
+
*
|
406
|
+
* Returns the [stored string][1]:
|
407
|
+
*
|
408
|
+
* ```rb
|
409
|
+
* scanner = StringScanner.new('foobar')
|
410
|
+
* scanner.string # => "foobar"
|
411
|
+
* scanner.concat('baz')
|
412
|
+
* scanner.string # => "foobarbaz"
|
413
|
+
* ```
|
414
|
+
*
|
355
415
|
*/
|
356
416
|
static VALUE
|
357
417
|
strscan_get_string(VALUE self)
|
@@ -363,10 +423,39 @@ strscan_get_string(VALUE self)
|
|
363
423
|
}
|
364
424
|
|
365
425
|
/*
|
366
|
-
*
|
426
|
+
* :markup: markdown
|
427
|
+
* :include: strscan/link_refs.txt
|
428
|
+
*
|
429
|
+
* call-seq:
|
430
|
+
* string = other_string -> other_string
|
431
|
+
*
|
432
|
+
* Replaces the [stored string][1] with the given `other_string`:
|
433
|
+
*
|
434
|
+
* - Sets both [positions][11] to zero.
|
435
|
+
* - Clears [match values][9].
|
436
|
+
* - Returns `other_string`.
|
437
|
+
*
|
438
|
+
* ```rb
|
439
|
+
* scanner = StringScanner.new('foobar')
|
440
|
+
* scanner.scan(/foo/)
|
441
|
+
* put_situation(scanner)
|
442
|
+
* # Situation:
|
443
|
+
* # pos: 3
|
444
|
+
* # charpos: 3
|
445
|
+
* # rest: "bar"
|
446
|
+
* # rest_size: 3
|
447
|
+
* match_values_cleared?(scanner) # => false
|
448
|
+
*
|
449
|
+
* scanner.string = 'baz' # => "baz"
|
450
|
+
* put_situation(scanner)
|
451
|
+
* # Situation:
|
452
|
+
* # pos: 0
|
453
|
+
* # charpos: 0
|
454
|
+
* # rest: "baz"
|
455
|
+
* # rest_size: 3
|
456
|
+
* match_values_cleared?(scanner) # => true
|
457
|
+
* ```
|
367
458
|
*
|
368
|
-
* Changes the string being scanned to +str+ and resets the scanner.
|
369
|
-
* Returns +str+.
|
370
459
|
*/
|
371
460
|
static VALUE
|
372
461
|
strscan_set_string(VALUE self, VALUE str)
|
@@ -381,18 +470,33 @@ strscan_set_string(VALUE self, VALUE str)
|
|
381
470
|
}
|
382
471
|
|
383
472
|
/*
|
384
|
-
*
|
385
|
-
*
|
386
|
-
* <<(str)
|
473
|
+
* :markup: markdown
|
474
|
+
* :include: strscan/link_refs.txt
|
387
475
|
*
|
388
|
-
*
|
389
|
-
*
|
476
|
+
* call-seq:
|
477
|
+
* concat(more_string) -> self
|
478
|
+
*
|
479
|
+
* - Appends the given `more_string`
|
480
|
+
* to the [stored string][1].
|
481
|
+
* - Returns `self`.
|
482
|
+
* - Does not affect the [positions][11]
|
483
|
+
* or [match values][9].
|
484
|
+
*
|
485
|
+
*
|
486
|
+
* ```rb
|
487
|
+
* scanner = StringScanner.new('foo')
|
488
|
+
* scanner.string # => "foo"
|
489
|
+
* scanner.terminate
|
490
|
+
* scanner.concat('barbaz') # => #<StringScanner 3/9 "foo" @ "barba...">
|
491
|
+
* scanner.string # => "foobarbaz"
|
492
|
+
* put_situation(scanner)
|
493
|
+
* # Situation:
|
494
|
+
* # pos: 3
|
495
|
+
* # charpos: 3
|
496
|
+
* # rest: "barbaz"
|
497
|
+
* # rest_size: 6
|
498
|
+
* ```
|
390
499
|
*
|
391
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
392
|
-
* s.scan(/Fri /)
|
393
|
-
* s << " +1000 GMT"
|
394
|
-
* s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT"
|
395
|
-
* s.scan(/Dec/) # -> "Dec"
|
396
500
|
*/
|
397
501
|
static VALUE
|
398
502
|
strscan_concat(VALUE self, VALUE str)
|
@@ -406,18 +510,9 @@ strscan_concat(VALUE self, VALUE str)
|
|
406
510
|
}
|
407
511
|
|
408
512
|
/*
|
409
|
-
*
|
410
|
-
*
|
411
|
-
*
|
412
|
-
*
|
413
|
-
* In short, it's a 0-based index into bytes of the string.
|
414
|
-
*
|
415
|
-
* s = StringScanner.new('test string')
|
416
|
-
* s.pos # -> 0
|
417
|
-
* s.scan_until /str/ # -> "test str"
|
418
|
-
* s.pos # -> 8
|
419
|
-
* s.terminate # -> #<StringScanner fin>
|
420
|
-
* s.pos # -> 11
|
513
|
+
* :markup: markdown
|
514
|
+
* :include: strscan/link_refs.txt
|
515
|
+
* :include: strscan/methods/get_pos.md
|
421
516
|
*/
|
422
517
|
static VALUE
|
423
518
|
strscan_get_pos(VALUE self)
|
@@ -429,17 +524,9 @@ strscan_get_pos(VALUE self)
|
|
429
524
|
}
|
430
525
|
|
431
526
|
/*
|
432
|
-
*
|
433
|
-
*
|
434
|
-
*
|
435
|
-
*
|
436
|
-
* In short, it's a 0-based index into the string.
|
437
|
-
*
|
438
|
-
* s = StringScanner.new("abc\u00e4def\u00f6ghi")
|
439
|
-
* s.charpos # -> 0
|
440
|
-
* s.scan_until(/\u00e4/) # -> "abc\u00E4"
|
441
|
-
* s.pos # -> 5
|
442
|
-
* s.charpos # -> 4
|
527
|
+
* :markup: markdown
|
528
|
+
* :include: strscan/link_refs.txt
|
529
|
+
* :include: strscan/methods/get_charpos.md
|
443
530
|
*/
|
444
531
|
static VALUE
|
445
532
|
strscan_get_charpos(VALUE self)
|
@@ -452,13 +539,9 @@ strscan_get_charpos(VALUE self)
|
|
452
539
|
}
|
453
540
|
|
454
541
|
/*
|
455
|
-
*
|
456
|
-
*
|
457
|
-
*
|
458
|
-
*
|
459
|
-
* s = StringScanner.new('test string')
|
460
|
-
* s.pos = 7 # -> 7
|
461
|
-
* s.rest # -> "ring"
|
542
|
+
* :markup: markdown
|
543
|
+
* :include: strscan/link_refs.txt
|
544
|
+
* :include: strscan/methods/set_pos.md
|
462
545
|
*/
|
463
546
|
static VALUE
|
464
547
|
strscan_set_pos(VALUE self, VALUE v)
|
@@ -546,12 +629,13 @@ rb_reg_onig_match(VALUE re, VALUE str,
|
|
546
629
|
OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
|
547
630
|
void *args, struct re_registers *regs)
|
548
631
|
{
|
632
|
+
OnigPosition result;
|
549
633
|
regex_t *reg = rb_reg_prepare_re(re, str);
|
550
634
|
|
551
635
|
bool tmpreg = reg != RREGEXP_PTR(re);
|
552
636
|
if (!tmpreg) RREGEXP(re)->usecnt++;
|
553
637
|
|
554
|
-
|
638
|
+
result = match(reg, str, regs, args);
|
555
639
|
|
556
640
|
if (!tmpreg) RREGEXP(re)->usecnt--;
|
557
641
|
if (tmpreg) {
|
@@ -601,19 +685,19 @@ strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_pt
|
|
601
685
|
ONIG_OPTION_NONE);
|
602
686
|
}
|
603
687
|
|
688
|
+
static void
|
689
|
+
strscan_enc_check(VALUE str1, VALUE str2)
|
690
|
+
{
|
691
|
+
if (RB_ENCODING_GET(str1) != RB_ENCODING_GET(str2)) {
|
692
|
+
rb_enc_check(str1, str2);
|
693
|
+
}
|
694
|
+
}
|
695
|
+
|
604
696
|
static VALUE
|
605
697
|
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
|
606
698
|
{
|
607
699
|
struct strscanner *p;
|
608
700
|
|
609
|
-
if (headonly) {
|
610
|
-
if (!RB_TYPE_P(pattern, T_REGEXP)) {
|
611
|
-
StringValue(pattern);
|
612
|
-
}
|
613
|
-
}
|
614
|
-
else {
|
615
|
-
Check_Type(pattern, T_REGEXP);
|
616
|
-
}
|
617
701
|
GET_SCANNER(self, p);
|
618
702
|
|
619
703
|
CLEAR_MATCH_STATUS(p);
|
@@ -622,26 +706,42 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
|
|
622
706
|
}
|
623
707
|
|
624
708
|
if (RB_TYPE_P(pattern, T_REGEXP)) {
|
709
|
+
OnigPosition ret;
|
625
710
|
p->regex = pattern;
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
711
|
+
ret = rb_reg_onig_match(p->regex,
|
712
|
+
p->str,
|
713
|
+
headonly ? strscan_match : strscan_search,
|
714
|
+
(void *)p,
|
715
|
+
&(p->regs));
|
631
716
|
|
632
717
|
if (ret == ONIG_MISMATCH) {
|
633
718
|
return Qnil;
|
634
719
|
}
|
635
720
|
}
|
636
721
|
else {
|
637
|
-
|
722
|
+
StringValue(pattern);
|
638
723
|
if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
|
724
|
+
strscan_enc_check(p->str, pattern);
|
639
725
|
return Qnil;
|
640
726
|
}
|
641
|
-
|
642
|
-
|
727
|
+
|
728
|
+
if (headonly) {
|
729
|
+
strscan_enc_check(p->str, pattern);
|
730
|
+
|
731
|
+
if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
|
732
|
+
return Qnil;
|
733
|
+
}
|
734
|
+
set_registers(p, RSTRING_LEN(pattern));
|
735
|
+
}
|
736
|
+
else {
|
737
|
+
rb_encoding *enc = rb_enc_check(p->str, pattern);
|
738
|
+
long pos = rb_memsearch(RSTRING_PTR(pattern), RSTRING_LEN(pattern),
|
739
|
+
CURPTR(p), S_RESTLEN(p), enc);
|
740
|
+
if (pos == -1) {
|
741
|
+
return Qnil;
|
742
|
+
}
|
743
|
+
set_registers(p, RSTRING_LEN(pattern) + pos);
|
643
744
|
}
|
644
|
-
set_registers(p, RSTRING_LEN(pattern));
|
645
745
|
}
|
646
746
|
|
647
747
|
MATCHED(p);
|
@@ -662,20 +762,9 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
|
|
662
762
|
}
|
663
763
|
|
664
764
|
/*
|
665
|
-
*
|
666
|
-
*
|
667
|
-
*
|
668
|
-
* the scanner advances the "scan pointer" and returns the matched string.
|
669
|
-
* Otherwise, the scanner returns +nil+.
|
670
|
-
*
|
671
|
-
* s = StringScanner.new('test string')
|
672
|
-
* p s.scan(/\w+/) # -> "test"
|
673
|
-
* p s.scan(/\w+/) # -> nil
|
674
|
-
* p s.scan(/\s+/) # -> " "
|
675
|
-
* p s.scan("str") # -> "str"
|
676
|
-
* p s.scan(/\w+/) # -> "ing"
|
677
|
-
* p s.scan(/./) # -> nil
|
678
|
-
*
|
765
|
+
* :markup: markdown
|
766
|
+
* :include: strscan/link_refs.txt
|
767
|
+
* :include: strscan/methods/scan.md
|
679
768
|
*/
|
680
769
|
static VALUE
|
681
770
|
strscan_scan(VALUE self, VALUE re)
|
@@ -684,16 +773,60 @@ strscan_scan(VALUE self, VALUE re)
|
|
684
773
|
}
|
685
774
|
|
686
775
|
/*
|
687
|
-
*
|
776
|
+
* :markup: markdown
|
777
|
+
* :include: strscan/link_refs.txt
|
688
778
|
*
|
689
|
-
*
|
690
|
-
*
|
779
|
+
* call-seq:
|
780
|
+
* match?(pattern) -> updated_position or nil
|
781
|
+
*
|
782
|
+
* Attempts to [match][17] the given `pattern`
|
783
|
+
* at the beginning of the [target substring][3];
|
784
|
+
* does not modify the [positions][11].
|
785
|
+
*
|
786
|
+
* If the match succeeds:
|
787
|
+
*
|
788
|
+
* - Sets [match values][9].
|
789
|
+
* - Returns the size in bytes of the matched substring.
|
790
|
+
*
|
791
|
+
*
|
792
|
+
* ```rb
|
793
|
+
* scanner = StringScanner.new('foobarbaz')
|
794
|
+
* scanner.pos = 3
|
795
|
+
* scanner.match?(/bar/) => 3
|
796
|
+
* put_match_values(scanner)
|
797
|
+
* # Basic match values:
|
798
|
+
* # matched?: true
|
799
|
+
* # matched_size: 3
|
800
|
+
* # pre_match: "foo"
|
801
|
+
* # matched : "bar"
|
802
|
+
* # post_match: "baz"
|
803
|
+
* # Captured match values:
|
804
|
+
* # size: 1
|
805
|
+
* # captures: []
|
806
|
+
* # named_captures: {}
|
807
|
+
* # values_at: ["bar", nil]
|
808
|
+
* # []:
|
809
|
+
* # [0]: "bar"
|
810
|
+
* # [1]: nil
|
811
|
+
* put_situation(scanner)
|
812
|
+
* # Situation:
|
813
|
+
* # pos: 3
|
814
|
+
* # charpos: 3
|
815
|
+
* # rest: "barbaz"
|
816
|
+
* # rest_size: 6
|
817
|
+
* ```
|
818
|
+
*
|
819
|
+
* If the match fails:
|
820
|
+
*
|
821
|
+
* - Clears match values.
|
822
|
+
* - Returns `nil`.
|
823
|
+
* - Does not increment positions.
|
824
|
+
*
|
825
|
+
* ```rb
|
826
|
+
* scanner.match?(/nope/) # => nil
|
827
|
+
* match_values_cleared?(scanner) # => true
|
828
|
+
* ```
|
691
829
|
*
|
692
|
-
* s = StringScanner.new('test string')
|
693
|
-
* p s.match?(/\w+/) # -> 4
|
694
|
-
* p s.match?(/\w+/) # -> 4
|
695
|
-
* p s.match?("test") # -> 4
|
696
|
-
* p s.match?(/\s+/) # -> nil
|
697
830
|
*/
|
698
831
|
static VALUE
|
699
832
|
strscan_match_p(VALUE self, VALUE re)
|
@@ -702,22 +835,9 @@ strscan_match_p(VALUE self, VALUE re)
|
|
702
835
|
}
|
703
836
|
|
704
837
|
/*
|
705
|
-
*
|
706
|
-
*
|
707
|
-
*
|
708
|
-
* If it matches, the scan pointer is advanced to the end of the match, and the
|
709
|
-
* length of the match is returned. Otherwise, +nil+ is returned.
|
710
|
-
*
|
711
|
-
* It's similar to #scan, but without returning the matched string.
|
712
|
-
*
|
713
|
-
* s = StringScanner.new('test string')
|
714
|
-
* p s.skip(/\w+/) # -> 4
|
715
|
-
* p s.skip(/\w+/) # -> nil
|
716
|
-
* p s.skip(/\s+/) # -> 1
|
717
|
-
* p s.skip("st") # -> 2
|
718
|
-
* p s.skip(/\w+/) # -> 4
|
719
|
-
* p s.skip(/./) # -> nil
|
720
|
-
*
|
838
|
+
* :markup: markdown
|
839
|
+
* :include: strscan/link_refs.txt
|
840
|
+
* :include: strscan/methods/skip.md
|
721
841
|
*/
|
722
842
|
static VALUE
|
723
843
|
strscan_skip(VALUE self, VALUE re)
|
@@ -726,19 +846,59 @@ strscan_skip(VALUE self, VALUE re)
|
|
726
846
|
}
|
727
847
|
|
728
848
|
/*
|
729
|
-
*
|
730
|
-
*
|
731
|
-
* This returns the value that #scan would return, without advancing the scan
|
732
|
-
* pointer. The match register is affected, though.
|
849
|
+
* :markup: markdown
|
850
|
+
* :include: strscan/link_refs.txt
|
733
851
|
*
|
734
|
-
*
|
735
|
-
*
|
736
|
-
*
|
737
|
-
*
|
738
|
-
*
|
739
|
-
*
|
852
|
+
* call-seq:
|
853
|
+
* check(pattern) -> matched_substring or nil
|
854
|
+
*
|
855
|
+
* Attempts to [match][17] the given `pattern`
|
856
|
+
* at the beginning of the [target substring][3];
|
857
|
+
* does not modify the [positions][11].
|
858
|
+
*
|
859
|
+
* If the match succeeds:
|
860
|
+
*
|
861
|
+
* - Returns the matched substring.
|
862
|
+
* - Sets all [match values][9].
|
863
|
+
*
|
864
|
+
* ```rb
|
865
|
+
* scanner = StringScanner.new('foobarbaz')
|
866
|
+
* scanner.pos = 3
|
867
|
+
* scanner.check('bar') # => "bar"
|
868
|
+
* put_match_values(scanner)
|
869
|
+
* # Basic match values:
|
870
|
+
* # matched?: true
|
871
|
+
* # matched_size: 3
|
872
|
+
* # pre_match: "foo"
|
873
|
+
* # matched : "bar"
|
874
|
+
* # post_match: "baz"
|
875
|
+
* # Captured match values:
|
876
|
+
* # size: 1
|
877
|
+
* # captures: []
|
878
|
+
* # named_captures: {}
|
879
|
+
* # values_at: ["bar", nil]
|
880
|
+
* # []:
|
881
|
+
* # [0]: "bar"
|
882
|
+
* # [1]: nil
|
883
|
+
* # => 0..1
|
884
|
+
* put_situation(scanner)
|
885
|
+
* # Situation:
|
886
|
+
* # pos: 3
|
887
|
+
* # charpos: 3
|
888
|
+
* # rest: "barbaz"
|
889
|
+
* # rest_size: 6
|
890
|
+
* ```
|
891
|
+
*
|
892
|
+
* If the match fails:
|
893
|
+
*
|
894
|
+
* - Returns `nil`.
|
895
|
+
* - Clears all [match values][9].
|
896
|
+
*
|
897
|
+
* ```rb
|
898
|
+
* scanner.check(/nope/) # => nil
|
899
|
+
* match_values_cleared?(scanner) # => true
|
900
|
+
* ```
|
740
901
|
*
|
741
|
-
* Mnemonic: it "checks" to see whether a #scan will return a value.
|
742
902
|
*/
|
743
903
|
static VALUE
|
744
904
|
strscan_check(VALUE self, VALUE re)
|
@@ -747,15 +907,24 @@ strscan_check(VALUE self, VALUE re)
|
|
747
907
|
}
|
748
908
|
|
749
909
|
/*
|
750
|
-
* call-seq:
|
910
|
+
* call-seq:
|
911
|
+
* scan_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or nil
|
912
|
+
*
|
913
|
+
* Equivalent to one of the following:
|
914
|
+
*
|
915
|
+
* - +advance_pointer_p+ +true+:
|
916
|
+
*
|
917
|
+
* - +return_string_p+ +true+: StringScanner#scan(pattern).
|
918
|
+
* - +return_string_p+ +false+: StringScanner#skip(pattern).
|
751
919
|
*
|
752
|
-
*
|
753
|
-
*
|
754
|
-
*
|
755
|
-
*
|
920
|
+
* - +advance_pointer_p+ +false+:
|
921
|
+
*
|
922
|
+
* - +return_string_p+ +true+: StringScanner#check(pattern).
|
923
|
+
* - +return_string_p+ +false+: StringScanner#match?(pattern).
|
756
924
|
*
|
757
|
-
* "full" means "#scan with full parameters".
|
758
925
|
*/
|
926
|
+
|
927
|
+
/* :nodoc: */
|
759
928
|
static VALUE
|
760
929
|
strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
|
761
930
|
{
|
@@ -763,16 +932,9 @@ strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
|
|
763
932
|
}
|
764
933
|
|
765
934
|
/*
|
766
|
-
*
|
767
|
-
*
|
768
|
-
*
|
769
|
-
* to and including the end of the match, advancing the scan pointer to that
|
770
|
-
* location. If there is no match, +nil+ is returned.
|
771
|
-
*
|
772
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
773
|
-
* s.scan_until(/1/) # -> "Fri Dec 1"
|
774
|
-
* s.pre_match # -> "Fri Dec "
|
775
|
-
* s.scan_until(/XYZ/) # -> nil
|
935
|
+
* :markup: markdown
|
936
|
+
* :include: strscan/link_refs.txt
|
937
|
+
* :include: strscan/methods/scan_until.md
|
776
938
|
*/
|
777
939
|
static VALUE
|
778
940
|
strscan_scan_until(VALUE self, VALUE re)
|
@@ -781,17 +943,61 @@ strscan_scan_until(VALUE self, VALUE re)
|
|
781
943
|
}
|
782
944
|
|
783
945
|
/*
|
784
|
-
*
|
946
|
+
* :markup: markdown
|
947
|
+
* :include: strscan/link_refs.txt
|
785
948
|
*
|
786
|
-
*
|
787
|
-
*
|
788
|
-
*
|
949
|
+
* call-seq:
|
950
|
+
* exist?(pattern) -> byte_offset or nil
|
951
|
+
*
|
952
|
+
* Attempts to [match][17] the given `pattern`
|
953
|
+
* anywhere (at any [position][2])
|
954
|
+
* n the [target substring][3];
|
955
|
+
* does not modify the [positions][11].
|
956
|
+
*
|
957
|
+
* If the match succeeds:
|
958
|
+
*
|
959
|
+
* - Returns a byte offset:
|
960
|
+
* the distance in bytes between the current [position][2]
|
961
|
+
* and the end of the matched substring.
|
962
|
+
* - Sets all [match values][9].
|
963
|
+
*
|
964
|
+
* ```rb
|
965
|
+
* scanner = StringScanner.new('foobarbazbatbam')
|
966
|
+
* scanner.pos = 6
|
967
|
+
* scanner.exist?(/bat/) # => 6
|
968
|
+
* put_match_values(scanner)
|
969
|
+
* # Basic match values:
|
970
|
+
* # matched?: true
|
971
|
+
* # matched_size: 3
|
972
|
+
* # pre_match: "foobarbaz"
|
973
|
+
* # matched : "bat"
|
974
|
+
* # post_match: "bam"
|
975
|
+
* # Captured match values:
|
976
|
+
* # size: 1
|
977
|
+
* # captures: []
|
978
|
+
* # named_captures: {}
|
979
|
+
* # values_at: ["bat", nil]
|
980
|
+
* # []:
|
981
|
+
* # [0]: "bat"
|
982
|
+
* # [1]: nil
|
983
|
+
* put_situation(scanner)
|
984
|
+
* # Situation:
|
985
|
+
* # pos: 6
|
986
|
+
* # charpos: 6
|
987
|
+
* # rest: "bazbatbam"
|
988
|
+
* # rest_size: 9
|
989
|
+
* ```
|
990
|
+
*
|
991
|
+
* If the match fails:
|
992
|
+
*
|
993
|
+
* - Returns `nil`.
|
994
|
+
* - Clears all [match values][9].
|
995
|
+
*
|
996
|
+
* ```rb
|
997
|
+
* scanner.exist?(/nope/) # => nil
|
998
|
+
* match_values_cleared?(scanner) # => true
|
999
|
+
* ```
|
789
1000
|
*
|
790
|
-
* s = StringScanner.new('test string')
|
791
|
-
* s.exist? /s/ # -> 3
|
792
|
-
* s.scan /test/ # -> "test"
|
793
|
-
* s.exist? /s/ # -> 2
|
794
|
-
* s.exist? /e/ # -> nil
|
795
1001
|
*/
|
796
1002
|
static VALUE
|
797
1003
|
strscan_exist_p(VALUE self, VALUE re)
|
@@ -800,20 +1006,9 @@ strscan_exist_p(VALUE self, VALUE re)
|
|
800
1006
|
}
|
801
1007
|
|
802
1008
|
/*
|
803
|
-
*
|
804
|
-
*
|
805
|
-
*
|
806
|
-
* the number of bytes advanced, or +nil+ if no match was found.
|
807
|
-
*
|
808
|
-
* Look ahead to match +pattern+, and advance the scan pointer to the _end_
|
809
|
-
* of the match. Return the number of characters advanced, or +nil+ if the
|
810
|
-
* match was unsuccessful.
|
811
|
-
*
|
812
|
-
* It's similar to #scan_until, but without returning the intervening string.
|
813
|
-
*
|
814
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
815
|
-
* s.skip_until /12/ # -> 10
|
816
|
-
* s #
|
1009
|
+
* :markup: markdown
|
1010
|
+
* :include: strscan/link_refs.txt
|
1011
|
+
* :include: strscan/methods/skip_until.md
|
817
1012
|
*/
|
818
1013
|
static VALUE
|
819
1014
|
strscan_skip_until(VALUE self, VALUE re)
|
@@ -822,17 +1017,61 @@ strscan_skip_until(VALUE self, VALUE re)
|
|
822
1017
|
}
|
823
1018
|
|
824
1019
|
/*
|
825
|
-
*
|
1020
|
+
* :markup: markdown
|
1021
|
+
* :include: strscan/link_refs.txt
|
826
1022
|
*
|
827
|
-
*
|
828
|
-
*
|
829
|
-
*
|
830
|
-
*
|
831
|
-
*
|
832
|
-
*
|
833
|
-
*
|
1023
|
+
* call-seq:
|
1024
|
+
* check_until(pattern) -> substring or nil
|
1025
|
+
*
|
1026
|
+
* Attempts to [match][17] the given `pattern`
|
1027
|
+
* anywhere (at any [position][2])
|
1028
|
+
* in the [target substring][3];
|
1029
|
+
* does not modify the [positions][11].
|
1030
|
+
*
|
1031
|
+
* If the match succeeds:
|
1032
|
+
*
|
1033
|
+
* - Sets all [match values][9].
|
1034
|
+
* - Returns the matched substring,
|
1035
|
+
* which extends from the current [position][2]
|
1036
|
+
* to the end of the matched substring.
|
1037
|
+
*
|
1038
|
+
* ```rb
|
1039
|
+
* scanner = StringScanner.new('foobarbazbatbam')
|
1040
|
+
* scanner.pos = 6
|
1041
|
+
* scanner.check_until(/bat/) # => "bazbat"
|
1042
|
+
* put_match_values(scanner)
|
1043
|
+
* # Basic match values:
|
1044
|
+
* # matched?: true
|
1045
|
+
* # matched_size: 3
|
1046
|
+
* # pre_match: "foobarbaz"
|
1047
|
+
* # matched : "bat"
|
1048
|
+
* # post_match: "bam"
|
1049
|
+
* # Captured match values:
|
1050
|
+
* # size: 1
|
1051
|
+
* # captures: []
|
1052
|
+
* # named_captures: {}
|
1053
|
+
* # values_at: ["bat", nil]
|
1054
|
+
* # []:
|
1055
|
+
* # [0]: "bat"
|
1056
|
+
* # [1]: nil
|
1057
|
+
* put_situation(scanner)
|
1058
|
+
* # Situation:
|
1059
|
+
* # pos: 6
|
1060
|
+
* # charpos: 6
|
1061
|
+
* # rest: "bazbatbam"
|
1062
|
+
* # rest_size: 9
|
1063
|
+
* ```
|
1064
|
+
*
|
1065
|
+
* If the match fails:
|
1066
|
+
*
|
1067
|
+
* - Clears all [match values][9].
|
1068
|
+
* - Returns `nil`.
|
1069
|
+
*
|
1070
|
+
* ```rb
|
1071
|
+
* scanner.check_until(/nope/) # => nil
|
1072
|
+
* match_values_cleared?(scanner) # => true
|
1073
|
+
* ```
|
834
1074
|
*
|
835
|
-
* Mnemonic: it "checks" to see whether a #scan_until will return a value.
|
836
1075
|
*/
|
837
1076
|
static VALUE
|
838
1077
|
strscan_check_until(VALUE self, VALUE re)
|
@@ -841,14 +1080,24 @@ strscan_check_until(VALUE self, VALUE re)
|
|
841
1080
|
}
|
842
1081
|
|
843
1082
|
/*
|
844
|
-
* call-seq:
|
1083
|
+
* call-seq:
|
1084
|
+
* search_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or position_delta or nil
|
1085
|
+
*
|
1086
|
+
* Equivalent to one of the following:
|
1087
|
+
*
|
1088
|
+
* - +advance_pointer_p+ +true+:
|
1089
|
+
*
|
1090
|
+
* - +return_string_p+ +true+: StringScanner#scan_until(pattern).
|
1091
|
+
* - +return_string_p+ +false+: StringScanner#skip_until(pattern).
|
1092
|
+
*
|
1093
|
+
* - +advance_pointer_p+ +false+:
|
1094
|
+
*
|
1095
|
+
* - +return_string_p+ +true+: StringScanner#check_until(pattern).
|
1096
|
+
* - +return_string_p+ +false+: StringScanner#exist?(pattern).
|
845
1097
|
*
|
846
|
-
* Scans the string _until_ the +pattern+ is matched.
|
847
|
-
* Advances the scan pointer if +advance_pointer_p+, otherwise not.
|
848
|
-
* Returns the matched string if +return_string_p+ is true, otherwise
|
849
|
-
* returns the number of bytes advanced.
|
850
|
-
* This method does affect the match register.
|
851
1098
|
*/
|
1099
|
+
|
1100
|
+
/* :nodoc: */
|
852
1101
|
static VALUE
|
853
1102
|
strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f)
|
854
1103
|
{
|
@@ -868,17 +1117,9 @@ adjust_registers_to_matched(struct strscanner *p)
|
|
868
1117
|
}
|
869
1118
|
|
870
1119
|
/*
|
871
|
-
*
|
872
|
-
*
|
873
|
-
*
|
874
|
-
* s = StringScanner.new("ab")
|
875
|
-
* s.getch # => "a"
|
876
|
-
* s.getch # => "b"
|
877
|
-
* s.getch # => nil
|
878
|
-
*
|
879
|
-
* s = StringScanner.new("\244\242".force_encoding("euc-jp"))
|
880
|
-
* s.getch # => "\x{A4A2}" # Japanese hira-kana "A" in EUC-JP
|
881
|
-
* s.getch # => nil
|
1120
|
+
* :markup: markdown
|
1121
|
+
* :include: strscan/link_refs.txt
|
1122
|
+
* :include: strscan/methods/getch.md
|
882
1123
|
*/
|
883
1124
|
static VALUE
|
884
1125
|
strscan_getch(VALUE self)
|
@@ -903,19 +1144,55 @@ strscan_getch(VALUE self)
|
|
903
1144
|
}
|
904
1145
|
|
905
1146
|
/*
|
906
|
-
*
|
1147
|
+
* call-seq:
|
1148
|
+
* scan_byte -> integer_byte
|
1149
|
+
*
|
1150
|
+
* Scans one byte and returns it as an integer.
|
907
1151
|
* This method is not multibyte character sensitive.
|
908
1152
|
* See also: #getch.
|
909
1153
|
*
|
1154
|
+
*/
|
1155
|
+
static VALUE
|
1156
|
+
strscan_scan_byte(VALUE self)
|
1157
|
+
{
|
1158
|
+
struct strscanner *p;
|
1159
|
+
VALUE byte;
|
1160
|
+
|
1161
|
+
GET_SCANNER(self, p);
|
1162
|
+
CLEAR_MATCH_STATUS(p);
|
1163
|
+
if (EOS_P(p))
|
1164
|
+
return Qnil;
|
1165
|
+
|
1166
|
+
byte = INT2FIX((unsigned char)*CURPTR(p));
|
1167
|
+
p->prev = p->curr;
|
1168
|
+
p->curr++;
|
1169
|
+
MATCHED(p);
|
1170
|
+
adjust_registers_to_matched(p);
|
1171
|
+
return byte;
|
1172
|
+
}
|
1173
|
+
|
1174
|
+
/*
|
1175
|
+
* Peeks at the current byte and returns it as an integer.
|
1176
|
+
*
|
910
1177
|
* s = StringScanner.new('ab')
|
911
|
-
* s.
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
*
|
917
|
-
|
918
|
-
|
1178
|
+
* s.peek_byte # => 97
|
1179
|
+
*/
|
1180
|
+
static VALUE
|
1181
|
+
strscan_peek_byte(VALUE self)
|
1182
|
+
{
|
1183
|
+
struct strscanner *p;
|
1184
|
+
|
1185
|
+
GET_SCANNER(self, p);
|
1186
|
+
if (EOS_P(p))
|
1187
|
+
return Qnil;
|
1188
|
+
|
1189
|
+
return INT2FIX((unsigned char)*CURPTR(p));
|
1190
|
+
}
|
1191
|
+
|
1192
|
+
/*
|
1193
|
+
* :markup: markdown
|
1194
|
+
* :include: strscan/link_refs.txt
|
1195
|
+
* :include: strscan/methods/get_byte.md
|
919
1196
|
*/
|
920
1197
|
static VALUE
|
921
1198
|
strscan_get_byte(VALUE self)
|
@@ -937,9 +1214,14 @@ strscan_get_byte(VALUE self)
|
|
937
1214
|
}
|
938
1215
|
|
939
1216
|
/*
|
1217
|
+
* call-seq:
|
1218
|
+
* getbyte
|
1219
|
+
*
|
940
1220
|
* Equivalent to #get_byte.
|
941
1221
|
* This method is obsolete; use #get_byte instead.
|
942
1222
|
*/
|
1223
|
+
|
1224
|
+
/* :nodoc: */
|
943
1225
|
static VALUE
|
944
1226
|
strscan_getbyte(VALUE self)
|
945
1227
|
{
|
@@ -948,14 +1230,22 @@ strscan_getbyte(VALUE self)
|
|
948
1230
|
}
|
949
1231
|
|
950
1232
|
/*
|
951
|
-
*
|
1233
|
+
* :markup: markdown
|
1234
|
+
* :include: strscan/link_refs.txt
|
1235
|
+
*
|
1236
|
+
* call-seq:
|
1237
|
+
* peek(length) -> substring
|
952
1238
|
*
|
953
|
-
*
|
954
|
-
*
|
1239
|
+
* Returns the substring `string[pos, length]`;
|
1240
|
+
* does not update [match values][9] or [positions][11]:
|
955
1241
|
*
|
956
|
-
*
|
957
|
-
*
|
958
|
-
*
|
1242
|
+
* ```rb
|
1243
|
+
* scanner = StringScanner.new('foobarbaz')
|
1244
|
+
* scanner.pos = 3
|
1245
|
+
* scanner.peek(3) # => "bar"
|
1246
|
+
* scanner.terminate
|
1247
|
+
* scanner.peek(3) # => ""
|
1248
|
+
* ```
|
959
1249
|
*
|
960
1250
|
*/
|
961
1251
|
static VALUE
|
@@ -975,9 +1265,14 @@ strscan_peek(VALUE self, VALUE vlen)
|
|
975
1265
|
}
|
976
1266
|
|
977
1267
|
/*
|
1268
|
+
* call-seq:
|
1269
|
+
* peep
|
1270
|
+
*
|
978
1271
|
* Equivalent to #peek.
|
979
1272
|
* This method is obsolete; use #peek instead.
|
980
1273
|
*/
|
1274
|
+
|
1275
|
+
/* :nodoc: */
|
981
1276
|
static VALUE
|
982
1277
|
strscan_peep(VALUE self, VALUE vlen)
|
983
1278
|
{
|
@@ -985,16 +1280,156 @@ strscan_peep(VALUE self, VALUE vlen)
|
|
985
1280
|
return strscan_peek(self, vlen);
|
986
1281
|
}
|
987
1282
|
|
1283
|
+
static VALUE
|
1284
|
+
strscan_parse_integer(struct strscanner *p, int base, long len)
|
1285
|
+
{
|
1286
|
+
VALUE buffer_v, integer;
|
1287
|
+
|
1288
|
+
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
1289
|
+
|
1290
|
+
MEMCPY(buffer, CURPTR(p), char, len);
|
1291
|
+
buffer[len] = '\0';
|
1292
|
+
integer = rb_cstr2inum(buffer, base);
|
1293
|
+
RB_ALLOCV_END(buffer_v);
|
1294
|
+
p->curr += len;
|
1295
|
+
return integer;
|
1296
|
+
}
|
1297
|
+
|
1298
|
+
static inline bool
|
1299
|
+
strscan_ascii_compat_fastpath(VALUE str) {
|
1300
|
+
int encindex = ENCODING_GET_INLINED(str);
|
1301
|
+
// The overwhelming majority of strings are in one of these 3 encodings.
|
1302
|
+
return encindex == utf8_encindex || encindex == binary_encindex || encindex == usascii_encindex;
|
1303
|
+
}
|
1304
|
+
|
1305
|
+
static inline void
|
1306
|
+
strscan_must_ascii_compat(VALUE str)
|
1307
|
+
{
|
1308
|
+
// The overwhelming majority of strings are in one of these 3 encodings.
|
1309
|
+
if (RB_LIKELY(strscan_ascii_compat_fastpath(str))) {
|
1310
|
+
return;
|
1311
|
+
}
|
1312
|
+
|
1313
|
+
rb_must_asciicompat(str);
|
1314
|
+
}
|
1315
|
+
|
1316
|
+
static VALUE
|
1317
|
+
strscan_scan_base10_integer(VALUE self)
|
1318
|
+
{
|
1319
|
+
char *ptr;
|
1320
|
+
long len = 0;
|
1321
|
+
struct strscanner *p;
|
1322
|
+
|
1323
|
+
GET_SCANNER(self, p);
|
1324
|
+
CLEAR_MATCH_STATUS(p);
|
1325
|
+
|
1326
|
+
strscan_must_ascii_compat(p->str);
|
1327
|
+
|
1328
|
+
ptr = CURPTR(p);
|
1329
|
+
|
1330
|
+
long remaining_len = S_RESTLEN(p);
|
1331
|
+
|
1332
|
+
if (remaining_len <= 0) {
|
1333
|
+
return Qnil;
|
1334
|
+
}
|
1335
|
+
|
1336
|
+
if (ptr[len] == '-' || ptr[len] == '+') {
|
1337
|
+
len++;
|
1338
|
+
}
|
1339
|
+
|
1340
|
+
if (!rb_isdigit(ptr[len])) {
|
1341
|
+
return Qnil;
|
1342
|
+
}
|
1343
|
+
|
1344
|
+
MATCHED(p);
|
1345
|
+
p->prev = p->curr;
|
1346
|
+
|
1347
|
+
while (len < remaining_len && rb_isdigit(ptr[len])) {
|
1348
|
+
len++;
|
1349
|
+
}
|
1350
|
+
|
1351
|
+
return strscan_parse_integer(p, 10, len);
|
1352
|
+
}
|
1353
|
+
|
1354
|
+
static VALUE
|
1355
|
+
strscan_scan_base16_integer(VALUE self)
|
1356
|
+
{
|
1357
|
+
char *ptr;
|
1358
|
+
long len = 0;
|
1359
|
+
struct strscanner *p;
|
1360
|
+
|
1361
|
+
GET_SCANNER(self, p);
|
1362
|
+
CLEAR_MATCH_STATUS(p);
|
1363
|
+
|
1364
|
+
strscan_must_ascii_compat(p->str);
|
1365
|
+
|
1366
|
+
ptr = CURPTR(p);
|
1367
|
+
|
1368
|
+
long remaining_len = S_RESTLEN(p);
|
1369
|
+
|
1370
|
+
if (remaining_len <= 0) {
|
1371
|
+
return Qnil;
|
1372
|
+
}
|
1373
|
+
|
1374
|
+
if (ptr[len] == '-' || ptr[len] == '+') {
|
1375
|
+
len++;
|
1376
|
+
}
|
1377
|
+
|
1378
|
+
if ((remaining_len >= (len + 2)) && ptr[len] == '0' && ptr[len + 1] == 'x') {
|
1379
|
+
len += 2;
|
1380
|
+
}
|
1381
|
+
|
1382
|
+
if (len >= remaining_len || !rb_isxdigit(ptr[len])) {
|
1383
|
+
return Qnil;
|
1384
|
+
}
|
1385
|
+
|
1386
|
+
MATCHED(p);
|
1387
|
+
p->prev = p->curr;
|
1388
|
+
|
1389
|
+
while (len < remaining_len && rb_isxdigit(ptr[len])) {
|
1390
|
+
len++;
|
1391
|
+
}
|
1392
|
+
|
1393
|
+
return strscan_parse_integer(p, 16, len);
|
1394
|
+
}
|
1395
|
+
|
988
1396
|
/*
|
989
|
-
*
|
990
|
-
*
|
1397
|
+
* :markup: markdown
|
1398
|
+
* :include: strscan/link_refs.txt
|
1399
|
+
*
|
1400
|
+
* call-seq:
|
1401
|
+
* unscan -> self
|
1402
|
+
*
|
1403
|
+
* Sets the [position][2] to its value previous to the recent successful
|
1404
|
+
* [match][17] attempt:
|
1405
|
+
*
|
1406
|
+
* ```rb
|
1407
|
+
* scanner = StringScanner.new('foobarbaz')
|
1408
|
+
* scanner.scan(/foo/)
|
1409
|
+
* put_situation(scanner)
|
1410
|
+
* # Situation:
|
1411
|
+
* # pos: 3
|
1412
|
+
* # charpos: 3
|
1413
|
+
* # rest: "barbaz"
|
1414
|
+
* # rest_size: 6
|
1415
|
+
* scanner.unscan
|
1416
|
+
* # => #<StringScanner 0/9 @ "fooba...">
|
1417
|
+
* put_situation(scanner)
|
1418
|
+
* # Situation:
|
1419
|
+
* # pos: 0
|
1420
|
+
* # charpos: 0
|
1421
|
+
* # rest: "foobarbaz"
|
1422
|
+
* # rest_size: 9
|
1423
|
+
* ```
|
1424
|
+
*
|
1425
|
+
* Raises an exception if match values are clear:
|
1426
|
+
*
|
1427
|
+
* ```rb
|
1428
|
+
* scanner.scan(/nope/) # => nil
|
1429
|
+
* match_values_cleared?(scanner) # => true
|
1430
|
+
* scanner.unscan # Raises StringScanner::Error.
|
1431
|
+
* ```
|
991
1432
|
*
|
992
|
-
* s = StringScanner.new('test string')
|
993
|
-
* s.scan(/\w+/) # => "test"
|
994
|
-
* s.unscan
|
995
|
-
* s.scan(/../) # => "te"
|
996
|
-
* s.scan(/\d/) # => nil
|
997
|
-
* s.unscan # ScanError: unscan failed: previous match record not exist
|
998
1433
|
*/
|
999
1434
|
static VALUE
|
1000
1435
|
strscan_unscan(VALUE self)
|
@@ -1010,16 +1445,37 @@ strscan_unscan(VALUE self)
|
|
1010
1445
|
}
|
1011
1446
|
|
1012
1447
|
/*
|
1013
|
-
*
|
1014
|
-
*
|
1015
|
-
*
|
1016
|
-
*
|
1017
|
-
*
|
1018
|
-
*
|
1019
|
-
*
|
1020
|
-
*
|
1021
|
-
*
|
1022
|
-
*
|
1448
|
+
*
|
1449
|
+
* :markup: markdown
|
1450
|
+
* :include: strscan/link_refs.txt
|
1451
|
+
*
|
1452
|
+
* call-seq:
|
1453
|
+
* beginning_of_line? -> true or false
|
1454
|
+
*
|
1455
|
+
* Returns whether the [position][2] is at the beginning of a line;
|
1456
|
+
* that is, at the beginning of the [stored string][1]
|
1457
|
+
* or immediately after a newline:
|
1458
|
+
*
|
1459
|
+
* scanner = StringScanner.new(MULTILINE_TEXT)
|
1460
|
+
* scanner.string
|
1461
|
+
* # => "Go placidly amid the noise and haste,\nand remember what peace there may be in silence.\n"
|
1462
|
+
* scanner.pos # => 0
|
1463
|
+
* scanner.beginning_of_line? # => true
|
1464
|
+
*
|
1465
|
+
* scanner.scan_until(/,/) # => "Go placidly amid the noise and haste,"
|
1466
|
+
* scanner.beginning_of_line? # => false
|
1467
|
+
*
|
1468
|
+
* scanner.scan(/\n/) # => "\n"
|
1469
|
+
* scanner.beginning_of_line? # => true
|
1470
|
+
*
|
1471
|
+
* scanner.terminate
|
1472
|
+
* scanner.beginning_of_line? # => true
|
1473
|
+
*
|
1474
|
+
* scanner.concat('x')
|
1475
|
+
* scanner.terminate
|
1476
|
+
* scanner.beginning_of_line? # => false
|
1477
|
+
*
|
1478
|
+
* StringScanner#bol? is an alias for StringScanner#beginning_of_line?.
|
1023
1479
|
*/
|
1024
1480
|
static VALUE
|
1025
1481
|
strscan_bol_p(VALUE self)
|
@@ -1033,14 +1489,24 @@ strscan_bol_p(VALUE self)
|
|
1033
1489
|
}
|
1034
1490
|
|
1035
1491
|
/*
|
1036
|
-
*
|
1492
|
+
* :markup: markdown
|
1493
|
+
* :include: strscan/link_refs.txt
|
1494
|
+
*
|
1495
|
+
* call-seq:
|
1496
|
+
* eos? -> true or false
|
1497
|
+
*
|
1498
|
+
* Returns whether the [position][2]
|
1499
|
+
* is at the end of the [stored string][1]:
|
1500
|
+
*
|
1501
|
+
* ```rb
|
1502
|
+
* scanner = StringScanner.new('foobarbaz')
|
1503
|
+
* scanner.eos? # => false
|
1504
|
+
* pos = 3
|
1505
|
+
* scanner.eos? # => false
|
1506
|
+
* scanner.terminate
|
1507
|
+
* scanner.eos? # => true
|
1508
|
+
* ```
|
1037
1509
|
*
|
1038
|
-
* s = StringScanner.new('test string')
|
1039
|
-
* p s.eos? # => false
|
1040
|
-
* s.scan(/test/)
|
1041
|
-
* p s.eos? # => false
|
1042
|
-
* s.terminate
|
1043
|
-
* p s.eos? # => true
|
1044
1510
|
*/
|
1045
1511
|
static VALUE
|
1046
1512
|
strscan_eos_p(VALUE self)
|
@@ -1052,9 +1518,14 @@ strscan_eos_p(VALUE self)
|
|
1052
1518
|
}
|
1053
1519
|
|
1054
1520
|
/*
|
1521
|
+
* call-seq:
|
1522
|
+
* empty?
|
1523
|
+
*
|
1055
1524
|
* Equivalent to #eos?.
|
1056
1525
|
* This method is obsolete, use #eos? instead.
|
1057
1526
|
*/
|
1527
|
+
|
1528
|
+
/* :nodoc: */
|
1058
1529
|
static VALUE
|
1059
1530
|
strscan_empty_p(VALUE self)
|
1060
1531
|
{
|
@@ -1063,6 +1534,9 @@ strscan_empty_p(VALUE self)
|
|
1063
1534
|
}
|
1064
1535
|
|
1065
1536
|
/*
|
1537
|
+
* call-seq:
|
1538
|
+
* rest?
|
1539
|
+
*
|
1066
1540
|
* Returns true if and only if there is more data in the string. See #eos?.
|
1067
1541
|
* This method is obsolete; use #eos? instead.
|
1068
1542
|
*
|
@@ -1071,6 +1545,8 @@ strscan_empty_p(VALUE self)
|
|
1071
1545
|
* s.eos? # => false
|
1072
1546
|
* s.rest? # => true
|
1073
1547
|
*/
|
1548
|
+
|
1549
|
+
/* :nodoc: */
|
1074
1550
|
static VALUE
|
1075
1551
|
strscan_rest_p(VALUE self)
|
1076
1552
|
{
|
@@ -1081,13 +1557,26 @@ strscan_rest_p(VALUE self)
|
|
1081
1557
|
}
|
1082
1558
|
|
1083
1559
|
/*
|
1084
|
-
*
|
1560
|
+
* :markup: markdown
|
1561
|
+
* :include: strscan/link_refs.txt
|
1562
|
+
*
|
1563
|
+
* call-seq:
|
1564
|
+
* matched? -> true or false
|
1565
|
+
*
|
1566
|
+
* Returns `true` of the most recent [match attempt][17] was successful,
|
1567
|
+
* `false` otherwise;
|
1568
|
+
* see [Basic Matched Values][18]:
|
1569
|
+
*
|
1570
|
+
* ```rb
|
1571
|
+
* scanner = StringScanner.new('foobarbaz')
|
1572
|
+
* scanner.matched? # => false
|
1573
|
+
* scanner.pos = 3
|
1574
|
+
* scanner.exist?(/baz/) # => 6
|
1575
|
+
* scanner.matched? # => true
|
1576
|
+
* scanner.exist?(/nope/) # => nil
|
1577
|
+
* scanner.matched? # => false
|
1578
|
+
* ```
|
1085
1579
|
*
|
1086
|
-
* s = StringScanner.new('test string')
|
1087
|
-
* s.match?(/\w+/) # => 4
|
1088
|
-
* s.matched? # => true
|
1089
|
-
* s.match?(/\d+/) # => nil
|
1090
|
-
* s.matched? # => false
|
1091
1580
|
*/
|
1092
1581
|
static VALUE
|
1093
1582
|
strscan_matched_p(VALUE self)
|
@@ -1099,11 +1588,27 @@ strscan_matched_p(VALUE self)
|
|
1099
1588
|
}
|
1100
1589
|
|
1101
1590
|
/*
|
1102
|
-
*
|
1591
|
+
* :markup: markdown
|
1592
|
+
* :include: strscan/link_refs.txt
|
1593
|
+
*
|
1594
|
+
* call-seq:
|
1595
|
+
* matched -> matched_substring or nil
|
1596
|
+
*
|
1597
|
+
* Returns the matched substring from the most recent [match][17] attempt
|
1598
|
+
* if it was successful,
|
1599
|
+
* or `nil` otherwise;
|
1600
|
+
* see [Basic Matched Values][18]:
|
1601
|
+
*
|
1602
|
+
* ```rb
|
1603
|
+
* scanner = StringScanner.new('foobarbaz')
|
1604
|
+
* scanner.matched # => nil
|
1605
|
+
* scanner.pos = 3
|
1606
|
+
* scanner.match?(/bar/) # => 3
|
1607
|
+
* scanner.matched # => "bar"
|
1608
|
+
* scanner.match?(/nope/) # => nil
|
1609
|
+
* scanner.matched # => nil
|
1610
|
+
* ```
|
1103
1611
|
*
|
1104
|
-
* s = StringScanner.new('test string')
|
1105
|
-
* s.match?(/\w+/) # -> 4
|
1106
|
-
* s.matched # -> "test"
|
1107
1612
|
*/
|
1108
1613
|
static VALUE
|
1109
1614
|
strscan_matched(VALUE self)
|
@@ -1118,15 +1623,29 @@ strscan_matched(VALUE self)
|
|
1118
1623
|
}
|
1119
1624
|
|
1120
1625
|
/*
|
1121
|
-
*
|
1122
|
-
*
|
1123
|
-
*
|
1626
|
+
* :markup: markdown
|
1627
|
+
* :include: strscan/link_refs.txt
|
1628
|
+
*
|
1629
|
+
* call-seq:
|
1630
|
+
* matched_size -> substring_size or nil
|
1631
|
+
*
|
1632
|
+
* Returns the size (in bytes) of the matched substring
|
1633
|
+
* from the most recent match [match attempt][17] if it was successful,
|
1634
|
+
* or `nil` otherwise;
|
1635
|
+
* see [Basic Matched Values][18]:
|
1636
|
+
*
|
1637
|
+
* ```rb
|
1638
|
+
* scanner = StringScanner.new('foobarbaz')
|
1639
|
+
* scanner.matched_size # => nil
|
1640
|
+
*
|
1641
|
+
* pos = 3
|
1642
|
+
* scanner.exist?(/baz/) # => 9
|
1643
|
+
* scanner.matched_size # => 3
|
1644
|
+
*
|
1645
|
+
* scanner.exist?(/nope/) # => nil
|
1646
|
+
* scanner.matched_size # => nil
|
1647
|
+
* ```
|
1124
1648
|
*
|
1125
|
-
* s = StringScanner.new('test string')
|
1126
|
-
* s.check /\w+/ # -> "test"
|
1127
|
-
* s.matched_size # -> 4
|
1128
|
-
* s.check /\d+/ # -> nil
|
1129
|
-
* s.matched_size # -> nil
|
1130
1649
|
*/
|
1131
1650
|
static VALUE
|
1132
1651
|
strscan_matched_size(VALUE self)
|
@@ -1157,30 +1676,75 @@ name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name
|
|
1157
1676
|
}
|
1158
1677
|
|
1159
1678
|
/*
|
1160
|
-
*
|
1161
|
-
*
|
1162
|
-
*
|
1163
|
-
*
|
1164
|
-
*
|
1165
|
-
*
|
1166
|
-
*
|
1167
|
-
*
|
1168
|
-
*
|
1169
|
-
*
|
1170
|
-
*
|
1171
|
-
*
|
1172
|
-
*
|
1173
|
-
*
|
1174
|
-
*
|
1175
|
-
*
|
1176
|
-
*
|
1177
|
-
*
|
1178
|
-
*
|
1179
|
-
*
|
1180
|
-
*
|
1181
|
-
*
|
1182
|
-
*
|
1183
|
-
*
|
1679
|
+
*
|
1680
|
+
* :markup: markdown
|
1681
|
+
* :include: strscan/link_refs.txt
|
1682
|
+
*
|
1683
|
+
* call-seq:
|
1684
|
+
* [](specifier) -> substring or nil
|
1685
|
+
*
|
1686
|
+
* Returns a captured substring or `nil`;
|
1687
|
+
* see [Captured Match Values][13].
|
1688
|
+
*
|
1689
|
+
* When there are captures:
|
1690
|
+
*
|
1691
|
+
* ```rb
|
1692
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
1693
|
+
* scanner.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /)
|
1694
|
+
* ```
|
1695
|
+
*
|
1696
|
+
* - `specifier` zero: returns the entire matched substring:
|
1697
|
+
*
|
1698
|
+
* ```rb
|
1699
|
+
* scanner[0] # => "Fri Dec 12 "
|
1700
|
+
* scanner.pre_match # => ""
|
1701
|
+
* scanner.post_match # => "1975 14:39"
|
1702
|
+
* ```
|
1703
|
+
*
|
1704
|
+
* - `specifier` positive integer. returns the `n`th capture, or `nil` if out of range:
|
1705
|
+
*
|
1706
|
+
* ```rb
|
1707
|
+
* scanner[1] # => "Fri"
|
1708
|
+
* scanner[2] # => "Dec"
|
1709
|
+
* scanner[3] # => "12"
|
1710
|
+
* scanner[4] # => nil
|
1711
|
+
* ```
|
1712
|
+
*
|
1713
|
+
* - `specifier` negative integer. counts backward from the last subgroup:
|
1714
|
+
*
|
1715
|
+
* ```rb
|
1716
|
+
* scanner[-1] # => "12"
|
1717
|
+
* scanner[-4] # => "Fri Dec 12 "
|
1718
|
+
* scanner[-5] # => nil
|
1719
|
+
* ```
|
1720
|
+
*
|
1721
|
+
* - `specifier` symbol or string. returns the named subgroup, or `nil` if no such:
|
1722
|
+
*
|
1723
|
+
* ```rb
|
1724
|
+
* scanner[:wday] # => "Fri"
|
1725
|
+
* scanner['wday'] # => "Fri"
|
1726
|
+
* scanner[:month] # => "Dec"
|
1727
|
+
* scanner[:day] # => "12"
|
1728
|
+
* scanner[:nope] # => nil
|
1729
|
+
* ```
|
1730
|
+
*
|
1731
|
+
* When there are no captures, only `[0]` returns non-`nil`:
|
1732
|
+
*
|
1733
|
+
* ```rb
|
1734
|
+
* scanner = StringScanner.new('foobarbaz')
|
1735
|
+
* scanner.exist?(/bar/)
|
1736
|
+
* scanner[0] # => "bar"
|
1737
|
+
* scanner[1] # => nil
|
1738
|
+
* ```
|
1739
|
+
*
|
1740
|
+
* For a failed match, even `[0]` returns `nil`:
|
1741
|
+
*
|
1742
|
+
* ```rb
|
1743
|
+
* scanner.scan(/nope/) # => nil
|
1744
|
+
* scanner[0] # => nil
|
1745
|
+
* scanner[1] # => nil
|
1746
|
+
* ```
|
1747
|
+
*
|
1184
1748
|
*/
|
1185
1749
|
static VALUE
|
1186
1750
|
strscan_aref(VALUE self, VALUE idx)
|
@@ -1217,14 +1781,28 @@ strscan_aref(VALUE self, VALUE idx)
|
|
1217
1781
|
}
|
1218
1782
|
|
1219
1783
|
/*
|
1220
|
-
*
|
1784
|
+
* :markup: markdown
|
1785
|
+
* :include: strscan/link_refs.txt
|
1786
|
+
*
|
1787
|
+
* call-seq:
|
1788
|
+
* size -> captures_count
|
1789
|
+
*
|
1790
|
+
* Returns the count of captures if the most recent match attempt succeeded, `nil` otherwise;
|
1791
|
+
* see [Captures Match Values][13]:
|
1792
|
+
*
|
1793
|
+
* ```rb
|
1794
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
1795
|
+
* scanner.size # => nil
|
1221
1796
|
*
|
1222
|
-
*
|
1223
|
-
*
|
1797
|
+
* pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
|
1798
|
+
* scanner.match?(pattern)
|
1799
|
+
* scanner.values_at(*0..scanner.size) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil]
|
1800
|
+
* scanner.size # => 4
|
1801
|
+
*
|
1802
|
+
* scanner.match?(/nope/) # => nil
|
1803
|
+
* scanner.size # => nil
|
1804
|
+
* ```
|
1224
1805
|
*
|
1225
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
1226
|
-
* s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
|
1227
|
-
* s.size # -> 4
|
1228
1806
|
*/
|
1229
1807
|
static VALUE
|
1230
1808
|
strscan_size(VALUE self)
|
@@ -1237,16 +1815,30 @@ strscan_size(VALUE self)
|
|
1237
1815
|
}
|
1238
1816
|
|
1239
1817
|
/*
|
1240
|
-
*
|
1818
|
+
* :markup: markdown
|
1819
|
+
* :include: strscan/link_refs.txt
|
1820
|
+
*
|
1821
|
+
* call-seq:
|
1822
|
+
* captures -> substring_array or nil
|
1241
1823
|
*
|
1242
|
-
* Returns the
|
1243
|
-
*
|
1824
|
+
* Returns the array of [captured match values][13] at indexes `(1..)`
|
1825
|
+
* if the most recent match attempt succeeded, or `nil` otherwise:
|
1826
|
+
*
|
1827
|
+
* ```rb
|
1828
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
1829
|
+
* scanner.captures # => nil
|
1830
|
+
*
|
1831
|
+
* scanner.exist?(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /)
|
1832
|
+
* scanner.captures # => ["Fri", "Dec", "12"]
|
1833
|
+
* scanner.values_at(*0..4) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil]
|
1834
|
+
*
|
1835
|
+
* scanner.exist?(/Fri/)
|
1836
|
+
* scanner.captures # => []
|
1837
|
+
*
|
1838
|
+
* scanner.scan(/nope/)
|
1839
|
+
* scanner.captures # => nil
|
1840
|
+
* ```
|
1244
1841
|
*
|
1245
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
1246
|
-
* s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> "Fri Dec 12 "
|
1247
|
-
* s.captures # -> ["Fri", "Dec", "12", nil]
|
1248
|
-
* s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> nil
|
1249
|
-
* s.captures # -> nil
|
1250
1842
|
*/
|
1251
1843
|
static VALUE
|
1252
1844
|
strscan_captures(VALUE self)
|
@@ -1276,17 +1868,25 @@ strscan_captures(VALUE self)
|
|
1276
1868
|
}
|
1277
1869
|
|
1278
1870
|
/*
|
1279
|
-
*
|
1280
|
-
*
|
1871
|
+
* :markup: markdown
|
1872
|
+
* :include: strscan/link_refs.txt
|
1281
1873
|
*
|
1282
|
-
*
|
1283
|
-
*
|
1874
|
+
* call-seq:
|
1875
|
+
* values_at(*specifiers) -> array_of_captures or nil
|
1876
|
+
*
|
1877
|
+
* Returns an array of captured substrings, or `nil` of none.
|
1878
|
+
*
|
1879
|
+
* For each `specifier`, the returned substring is `[specifier]`;
|
1880
|
+
* see #[].
|
1881
|
+
*
|
1882
|
+
* ```rb
|
1883
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
1884
|
+
* pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
|
1885
|
+
* scanner.match?(pattern)
|
1886
|
+
* scanner.values_at(*0..3) # => ["Fri Dec 12 ", "Fri", "Dec", "12"]
|
1887
|
+
* scanner.values_at(*%i[wday month day]) # => ["Fri", "Dec", "12"]
|
1888
|
+
* ```
|
1284
1889
|
*
|
1285
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
1286
|
-
* s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
|
1287
|
-
* s.values_at 0, -1, 5, 2 # -> ["Fri Dec 12 ", "12", nil, "Dec"]
|
1288
|
-
* s.scan(/(\w+) (\w+) (\d+) /) # -> nil
|
1289
|
-
* s.values_at 0, -1, 5, 2 # -> nil
|
1290
1890
|
*/
|
1291
1891
|
|
1292
1892
|
static VALUE
|
@@ -1308,13 +1908,29 @@ strscan_values_at(int argc, VALUE *argv, VALUE self)
|
|
1308
1908
|
}
|
1309
1909
|
|
1310
1910
|
/*
|
1311
|
-
*
|
1911
|
+
* :markup: markdown
|
1912
|
+
* :include: strscan/link_refs.txt
|
1913
|
+
*
|
1914
|
+
* call-seq:
|
1915
|
+
* pre_match -> substring
|
1916
|
+
*
|
1917
|
+
* Returns the substring that precedes the matched substring
|
1918
|
+
* from the most recent match attempt if it was successful,
|
1919
|
+
* or `nil` otherwise;
|
1920
|
+
* see [Basic Match Values][18]:
|
1921
|
+
*
|
1922
|
+
* ```rb
|
1923
|
+
* scanner = StringScanner.new('foobarbaz')
|
1924
|
+
* scanner.pre_match # => nil
|
1925
|
+
*
|
1926
|
+
* scanner.pos = 3
|
1927
|
+
* scanner.exist?(/baz/) # => 6
|
1928
|
+
* scanner.pre_match # => "foobar" # Substring of entire string, not just target string.
|
1929
|
+
*
|
1930
|
+
* scanner.exist?(/nope/) # => nil
|
1931
|
+
* scanner.pre_match # => nil
|
1932
|
+
* ```
|
1312
1933
|
*
|
1313
|
-
* s = StringScanner.new('test string')
|
1314
|
-
* s.scan(/\w+/) # -> "test"
|
1315
|
-
* s.scan(/\s+/) # -> " "
|
1316
|
-
* s.pre_match # -> "test"
|
1317
|
-
* s.post_match # -> "string"
|
1318
1934
|
*/
|
1319
1935
|
static VALUE
|
1320
1936
|
strscan_pre_match(VALUE self)
|
@@ -1329,13 +1945,29 @@ strscan_pre_match(VALUE self)
|
|
1329
1945
|
}
|
1330
1946
|
|
1331
1947
|
/*
|
1332
|
-
*
|
1948
|
+
* :markup: markdown
|
1949
|
+
* :include: strscan/link_refs.txt
|
1950
|
+
*
|
1951
|
+
* call-seq:
|
1952
|
+
* post_match -> substring
|
1953
|
+
*
|
1954
|
+
* Returns the substring that follows the matched substring
|
1955
|
+
* from the most recent match attempt if it was successful,
|
1956
|
+
* or `nil` otherwise;
|
1957
|
+
* see [Basic Match Values][18]:
|
1958
|
+
*
|
1959
|
+
* ```rb
|
1960
|
+
* scanner = StringScanner.new('foobarbaz')
|
1961
|
+
* scanner.post_match # => nil
|
1962
|
+
*
|
1963
|
+
* scanner.pos = 3
|
1964
|
+
* scanner.match?(/bar/) # => 3
|
1965
|
+
* scanner.post_match # => "baz"
|
1966
|
+
*
|
1967
|
+
* scanner.match?(/nope/) # => nil
|
1968
|
+
* scanner.post_match # => nil
|
1969
|
+
* ```
|
1333
1970
|
*
|
1334
|
-
* s = StringScanner.new('test string')
|
1335
|
-
* s.scan(/\w+/) # -> "test"
|
1336
|
-
* s.scan(/\s+/) # -> " "
|
1337
|
-
* s.pre_match # -> "test"
|
1338
|
-
* s.post_match # -> "string"
|
1339
1971
|
*/
|
1340
1972
|
static VALUE
|
1341
1973
|
strscan_post_match(VALUE self)
|
@@ -1350,8 +1982,24 @@ strscan_post_match(VALUE self)
|
|
1350
1982
|
}
|
1351
1983
|
|
1352
1984
|
/*
|
1353
|
-
*
|
1354
|
-
*
|
1985
|
+
* :markup: markdown
|
1986
|
+
* :include: strscan/link_refs.txt
|
1987
|
+
*
|
1988
|
+
* call-seq:
|
1989
|
+
* rest -> target_substring
|
1990
|
+
*
|
1991
|
+
* Returns the 'rest' of the [stored string][1] (all after the current [position][2]),
|
1992
|
+
* which is the [target substring][3]:
|
1993
|
+
*
|
1994
|
+
* ```rb
|
1995
|
+
* scanner = StringScanner.new('foobarbaz')
|
1996
|
+
* scanner.rest # => "foobarbaz"
|
1997
|
+
* scanner.pos = 3
|
1998
|
+
* scanner.rest # => "barbaz"
|
1999
|
+
* scanner.terminate
|
2000
|
+
* scanner.rest # => ""
|
2001
|
+
* ```
|
2002
|
+
*
|
1355
2003
|
*/
|
1356
2004
|
static VALUE
|
1357
2005
|
strscan_rest(VALUE self)
|
@@ -1366,7 +2014,26 @@ strscan_rest(VALUE self)
|
|
1366
2014
|
}
|
1367
2015
|
|
1368
2016
|
/*
|
1369
|
-
*
|
2017
|
+
* :markup: markdown
|
2018
|
+
* :include: strscan/link_refs.txt
|
2019
|
+
*
|
2020
|
+
* call-seq:
|
2021
|
+
* rest_size -> integer
|
2022
|
+
*
|
2023
|
+
* Returns the size (in bytes) of the #rest of the [stored string][1]:
|
2024
|
+
*
|
2025
|
+
* ```rb
|
2026
|
+
* scanner = StringScanner.new('foobarbaz')
|
2027
|
+
* scanner.rest # => "foobarbaz"
|
2028
|
+
* scanner.rest_size # => 9
|
2029
|
+
* scanner.pos = 3
|
2030
|
+
* scanner.rest # => "barbaz"
|
2031
|
+
* scanner.rest_size # => 6
|
2032
|
+
* scanner.terminate
|
2033
|
+
* scanner.rest # => ""
|
2034
|
+
* scanner.rest_size # => 0
|
2035
|
+
* ```
|
2036
|
+
*
|
1370
2037
|
*/
|
1371
2038
|
static VALUE
|
1372
2039
|
strscan_rest_size(VALUE self)
|
@@ -1383,9 +2050,14 @@ strscan_rest_size(VALUE self)
|
|
1383
2050
|
}
|
1384
2051
|
|
1385
2052
|
/*
|
2053
|
+
* call-seq:
|
2054
|
+
* restsize
|
2055
|
+
*
|
1386
2056
|
* <tt>s.restsize</tt> is equivalent to <tt>s.rest_size</tt>.
|
1387
2057
|
* This method is obsolete; use #rest_size instead.
|
1388
2058
|
*/
|
2059
|
+
|
2060
|
+
/* :nodoc: */
|
1389
2061
|
static VALUE
|
1390
2062
|
strscan_restsize(VALUE self)
|
1391
2063
|
{
|
@@ -1396,15 +2068,39 @@ strscan_restsize(VALUE self)
|
|
1396
2068
|
#define INSPECT_LENGTH 5
|
1397
2069
|
|
1398
2070
|
/*
|
1399
|
-
*
|
1400
|
-
*
|
1401
|
-
*
|
1402
|
-
* -
|
1403
|
-
*
|
1404
|
-
*
|
1405
|
-
*
|
1406
|
-
*
|
1407
|
-
*
|
2071
|
+
* :markup: markdown
|
2072
|
+
* :include: strscan/link_refs.txt
|
2073
|
+
*
|
2074
|
+
* call-seq:
|
2075
|
+
* inspect -> string
|
2076
|
+
*
|
2077
|
+
* Returns a string representation of `self` that may show:
|
2078
|
+
*
|
2079
|
+
* 1. The current [position][2].
|
2080
|
+
* 2. The size (in bytes) of the [stored string][1].
|
2081
|
+
* 3. The substring preceding the current position.
|
2082
|
+
* 4. The substring following the current position (which is also the [target substring][3]).
|
2083
|
+
*
|
2084
|
+
* ```rb
|
2085
|
+
* scanner = StringScanner.new("Fri Dec 12 1975 14:39")
|
2086
|
+
* scanner.pos = 11
|
2087
|
+
* scanner.inspect # => "#<StringScanner 11/21 \"...c 12 \" @ \"1975 ...\">"
|
2088
|
+
* ```
|
2089
|
+
*
|
2090
|
+
* If at beginning-of-string, item 4 above (following substring) is omitted:
|
2091
|
+
*
|
2092
|
+
* ```rb
|
2093
|
+
* scanner.reset
|
2094
|
+
* scanner.inspect # => "#<StringScanner 0/21 @ \"Fri D...\">"
|
2095
|
+
* ```
|
2096
|
+
*
|
2097
|
+
* If at end-of-string, all items above are omitted:
|
2098
|
+
*
|
2099
|
+
* ```rb
|
2100
|
+
* scanner.terminate
|
2101
|
+
* scanner.inspect # => "#<StringScanner fin>"
|
2102
|
+
* ```
|
2103
|
+
*
|
1408
2104
|
*/
|
1409
2105
|
static VALUE
|
1410
2106
|
strscan_inspect(VALUE self)
|
@@ -1476,13 +2172,13 @@ inspect2(struct strscanner *p)
|
|
1476
2172
|
}
|
1477
2173
|
|
1478
2174
|
/*
|
1479
|
-
*
|
1480
|
-
*
|
2175
|
+
* :markup: markdown
|
2176
|
+
* :include: strscan/link_refs.txt
|
1481
2177
|
*
|
1482
|
-
*
|
2178
|
+
* call-seq:
|
2179
|
+
* fixed_anchor? -> true or false
|
1483
2180
|
*
|
1484
|
-
*
|
1485
|
-
* the string. Otherwise, +\A+ always matches the current position.
|
2181
|
+
* Returns whether the [fixed-anchor property][10] is set.
|
1486
2182
|
*/
|
1487
2183
|
static VALUE
|
1488
2184
|
strscan_fixed_anchor_p(VALUE self)
|
@@ -1518,21 +2214,39 @@ named_captures_iter(const OnigUChar *name,
|
|
1518
2214
|
}
|
1519
2215
|
|
1520
2216
|
/*
|
2217
|
+
* :markup: markdown
|
2218
|
+
* :include: strscan/link_refs.txt
|
2219
|
+
*
|
1521
2220
|
* call-seq:
|
1522
|
-
*
|
2221
|
+
* named_captures -> hash
|
2222
|
+
*
|
2223
|
+
* Returns the array of captured match values at indexes (1..)
|
2224
|
+
* if the most recent match attempt succeeded, or nil otherwise;
|
2225
|
+
* see [Captured Match Values][13]:
|
1523
2226
|
*
|
1524
|
-
*
|
2227
|
+
* ```rb
|
2228
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
2229
|
+
* scanner.named_captures # => {}
|
2230
|
+
*
|
2231
|
+
* pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
|
2232
|
+
* scanner.match?(pattern)
|
2233
|
+
* scanner.named_captures # => {"wday"=>"Fri", "month"=>"Dec", "day"=>"12"}
|
2234
|
+
*
|
2235
|
+
* scanner.string = 'nope'
|
2236
|
+
* scanner.match?(pattern)
|
2237
|
+
* scanner.named_captures # => {"wday"=>nil, "month"=>nil, "day"=>nil}
|
2238
|
+
*
|
2239
|
+
* scanner.match?(/nosuch/)
|
2240
|
+
* scanner.named_captures # => {}
|
2241
|
+
* ```
|
1525
2242
|
*
|
1526
|
-
* scan = StringScanner.new('foobarbaz')
|
1527
|
-
* scan.match?(/(?<f>foo)(?<r>bar)(?<z>baz)/)
|
1528
|
-
* scan.named_captures # -> {"f"=>"foo", "r"=>"bar", "z"=>"baz"}
|
1529
2243
|
*/
|
1530
2244
|
static VALUE
|
1531
2245
|
strscan_named_captures(VALUE self)
|
1532
2246
|
{
|
1533
2247
|
struct strscanner *p;
|
1534
|
-
GET_SCANNER(self, p);
|
1535
2248
|
named_captures_data data;
|
2249
|
+
GET_SCANNER(self, p);
|
1536
2250
|
data.self = self;
|
1537
2251
|
data.captures = rb_hash_new();
|
1538
2252
|
if (!RB_NIL_P(p->regex)) {
|
@@ -1549,107 +2263,11 @@ strscan_named_captures(VALUE self)
|
|
1549
2263
|
/*
|
1550
2264
|
* Document-class: StringScanner
|
1551
2265
|
*
|
1552
|
-
*
|
1553
|
-
* an example of its usage:
|
1554
|
-
*
|
1555
|
-
* require 'strscan'
|
1556
|
-
*
|
1557
|
-
* s = StringScanner.new('This is an example string')
|
1558
|
-
* s.eos? # -> false
|
1559
|
-
*
|
1560
|
-
* p s.scan(/\w+/) # -> "This"
|
1561
|
-
* p s.scan(/\w+/) # -> nil
|
1562
|
-
* p s.scan(/\s+/) # -> " "
|
1563
|
-
* p s.scan(/\s+/) # -> nil
|
1564
|
-
* p s.scan(/\w+/) # -> "is"
|
1565
|
-
* s.eos? # -> false
|
1566
|
-
*
|
1567
|
-
* p s.scan(/\s+/) # -> " "
|
1568
|
-
* p s.scan(/\w+/) # -> "an"
|
1569
|
-
* p s.scan(/\s+/) # -> " "
|
1570
|
-
* p s.scan(/\w+/) # -> "example"
|
1571
|
-
* p s.scan(/\s+/) # -> " "
|
1572
|
-
* p s.scan(/\w+/) # -> "string"
|
1573
|
-
* s.eos? # -> true
|
1574
|
-
*
|
1575
|
-
* p s.scan(/\s+/) # -> nil
|
1576
|
-
* p s.scan(/\w+/) # -> nil
|
2266
|
+
* :markup: markdown
|
1577
2267
|
*
|
1578
|
-
*
|
1579
|
-
*
|
1580
|
-
* a time, so matches are sought after the scan pointer; usually immediately
|
1581
|
-
* after it.
|
2268
|
+
* :include: strscan/link_refs.txt
|
2269
|
+
* :include: strscan/strscan.md
|
1582
2270
|
*
|
1583
|
-
* Given the string "test string", here are the pertinent scan pointer
|
1584
|
-
* positions:
|
1585
|
-
*
|
1586
|
-
* t e s t s t r i n g
|
1587
|
-
* 0 1 2 ... 1
|
1588
|
-
* 0
|
1589
|
-
*
|
1590
|
-
* When you #scan for a pattern (a regular expression), the match must occur
|
1591
|
-
* at the character after the scan pointer. If you use #scan_until, then the
|
1592
|
-
* match can occur anywhere after the scan pointer. In both cases, the scan
|
1593
|
-
* pointer moves <i>just beyond</i> the last character of the match, ready to
|
1594
|
-
* scan again from the next character onwards. This is demonstrated by the
|
1595
|
-
* example above.
|
1596
|
-
*
|
1597
|
-
* == Method Categories
|
1598
|
-
*
|
1599
|
-
* There are other methods besides the plain scanners. You can look ahead in
|
1600
|
-
* the string without actually scanning. You can access the most recent match.
|
1601
|
-
* You can modify the string being scanned, reset or terminate the scanner,
|
1602
|
-
* find out or change the position of the scan pointer, skip ahead, and so on.
|
1603
|
-
*
|
1604
|
-
* === Advancing the Scan Pointer
|
1605
|
-
*
|
1606
|
-
* - #getch
|
1607
|
-
* - #get_byte
|
1608
|
-
* - #scan
|
1609
|
-
* - #scan_until
|
1610
|
-
* - #skip
|
1611
|
-
* - #skip_until
|
1612
|
-
*
|
1613
|
-
* === Looking Ahead
|
1614
|
-
*
|
1615
|
-
* - #check
|
1616
|
-
* - #check_until
|
1617
|
-
* - #exist?
|
1618
|
-
* - #match?
|
1619
|
-
* - #peek
|
1620
|
-
*
|
1621
|
-
* === Finding Where we Are
|
1622
|
-
*
|
1623
|
-
* - #beginning_of_line? (<tt>#bol?</tt>)
|
1624
|
-
* - #eos?
|
1625
|
-
* - #rest?
|
1626
|
-
* - #rest_size
|
1627
|
-
* - #pos
|
1628
|
-
*
|
1629
|
-
* === Setting Where we Are
|
1630
|
-
*
|
1631
|
-
* - #reset
|
1632
|
-
* - #terminate
|
1633
|
-
* - #pos=
|
1634
|
-
*
|
1635
|
-
* === Match Data
|
1636
|
-
*
|
1637
|
-
* - #matched
|
1638
|
-
* - #matched?
|
1639
|
-
* - #matched_size
|
1640
|
-
* - <tt>#[]</tt>
|
1641
|
-
* - #pre_match
|
1642
|
-
* - #post_match
|
1643
|
-
*
|
1644
|
-
* === Miscellaneous
|
1645
|
-
*
|
1646
|
-
* - <tt><<</tt>
|
1647
|
-
* - #concat
|
1648
|
-
* - #string
|
1649
|
-
* - #string=
|
1650
|
-
* - #unscan
|
1651
|
-
*
|
1652
|
-
* There are aliases to several of the methods.
|
1653
2271
|
*/
|
1654
2272
|
void
|
1655
2273
|
Init_strscan(void)
|
@@ -1664,6 +2282,10 @@ Init_strscan(void)
|
|
1664
2282
|
|
1665
2283
|
id_byteslice = rb_intern("byteslice");
|
1666
2284
|
|
2285
|
+
usascii_encindex = rb_usascii_encindex();
|
2286
|
+
utf8_encindex = rb_utf8_encindex();
|
2287
|
+
binary_encindex = rb_ascii8bit_encindex();
|
2288
|
+
|
1667
2289
|
StringScanner = rb_define_class("StringScanner", rb_cObject);
|
1668
2290
|
ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
|
1669
2291
|
if (!rb_const_defined(rb_cObject, id_scanerr)) {
|
@@ -1708,9 +2330,14 @@ Init_strscan(void)
|
|
1708
2330
|
rb_define_method(StringScanner, "getch", strscan_getch, 0);
|
1709
2331
|
rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
|
1710
2332
|
rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
|
2333
|
+
rb_define_method(StringScanner, "scan_byte", strscan_scan_byte, 0);
|
1711
2334
|
rb_define_method(StringScanner, "peek", strscan_peek, 1);
|
2335
|
+
rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0);
|
1712
2336
|
rb_define_method(StringScanner, "peep", strscan_peep, 1);
|
1713
2337
|
|
2338
|
+
rb_define_private_method(StringScanner, "scan_base10_integer", strscan_scan_base10_integer, 0);
|
2339
|
+
rb_define_private_method(StringScanner, "scan_base16_integer", strscan_scan_base16_integer, 0);
|
2340
|
+
|
1714
2341
|
rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
|
1715
2342
|
|
1716
2343
|
rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
|
@@ -1738,4 +2365,6 @@ Init_strscan(void)
|
|
1738
2365
|
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
|
1739
2366
|
|
1740
2367
|
rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0);
|
2368
|
+
|
2369
|
+
rb_require("strscan/strscan");
|
1741
2370
|
}
|