strscan 3.1.0 → 3.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rdoc_options +3 -0
- data/doc/strscan/helper_methods.md +124 -0
- data/doc/strscan/link_refs.txt +17 -0
- data/doc/strscan/methods/get_byte.md +30 -0
- data/doc/strscan/methods/get_charpos.md +19 -0
- data/doc/strscan/methods/get_pos.md +14 -0
- data/doc/strscan/methods/getch.md +43 -0
- data/doc/strscan/methods/scan.md +51 -0
- data/doc/strscan/methods/scan_until.md +52 -0
- data/doc/strscan/methods/set_pos.md +27 -0
- data/doc/strscan/methods/skip.md +43 -0
- data/doc/strscan/methods/skip_until.md +49 -0
- data/doc/strscan/methods/terminate.md +30 -0
- data/doc/strscan/strscan.md +544 -0
- data/ext/strscan/strscan.c +1096 -460
- data/lib/strscan/strscan.rb +25 -0
- metadata +36 -9
data/ext/strscan/strscan.c
CHANGED
@@ -22,7 +22,7 @@ extern size_t onig_region_memsize(const struct re_registers *regs);
|
|
22
22
|
|
23
23
|
#include <stdbool.h>
|
24
24
|
|
25
|
-
#define STRSCAN_VERSION "3.1.
|
25
|
+
#define STRSCAN_VERSION "3.1.4"
|
26
26
|
|
27
27
|
/* =======================================================================
|
28
28
|
Data Type Definitions
|
@@ -32,6 +32,8 @@ static VALUE StringScanner;
|
|
32
32
|
static VALUE ScanError;
|
33
33
|
static ID id_byteslice;
|
34
34
|
|
35
|
+
static int usascii_encindex, utf8_encindex, binary_encindex;
|
36
|
+
|
35
37
|
struct strscanner
|
36
38
|
{
|
37
39
|
/* multi-purpose flags */
|
@@ -56,8 +58,13 @@ struct strscanner
|
|
56
58
|
};
|
57
59
|
|
58
60
|
#define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
|
59
|
-
#define MATCHED(s)
|
60
|
-
#define
|
61
|
+
#define MATCHED(s) ((s)->flags |= FLAG_MATCHED)
|
62
|
+
#define CLEAR_MATCHED(s) ((s)->flags &= ~FLAG_MATCHED)
|
63
|
+
#define CLEAR_NAMED_CAPTURES(s) ((s)->regex = Qnil)
|
64
|
+
#define CLEAR_MATCH_STATUS(s) do {\
|
65
|
+
CLEAR_MATCHED(s);\
|
66
|
+
CLEAR_NAMED_CAPTURES(s);\
|
67
|
+
} while (0)
|
61
68
|
|
62
69
|
#define S_PBEG(s) (RSTRING_PTR((s)->str))
|
63
70
|
#define S_LEN(s) (RSTRING_LEN((s)->str))
|
@@ -115,6 +122,7 @@ static VALUE strscan_get_byte _((VALUE self));
|
|
115
122
|
static VALUE strscan_getbyte _((VALUE self));
|
116
123
|
static VALUE strscan_peek _((VALUE self, VALUE len));
|
117
124
|
static VALUE strscan_peep _((VALUE self, VALUE len));
|
125
|
+
static VALUE strscan_scan_base10_integer _((VALUE self));
|
118
126
|
static VALUE strscan_unscan _((VALUE self));
|
119
127
|
static VALUE strscan_bol_p _((VALUE self));
|
120
128
|
static VALUE strscan_eos_p _((VALUE self));
|
@@ -213,21 +221,32 @@ strscan_s_allocate(VALUE klass)
|
|
213
221
|
CLEAR_MATCH_STATUS(p);
|
214
222
|
onig_region_init(&(p->regs));
|
215
223
|
p->str = Qnil;
|
216
|
-
p->regex = Qnil;
|
217
224
|
return obj;
|
218
225
|
}
|
219
226
|
|
220
227
|
/*
|
221
|
-
*
|
222
|
-
*
|
223
|
-
* StringScanner.new(string, dup = false)
|
228
|
+
* :markup: markdown
|
229
|
+
* :include: strscan/link_refs.txt
|
224
230
|
*
|
225
|
-
*
|
226
|
-
*
|
227
|
-
*
|
228
|
-
*
|
231
|
+
* call-seq:
|
232
|
+
* StringScanner.new(string, fixed_anchor: false) -> string_scanner
|
233
|
+
*
|
234
|
+
* Returns a new `StringScanner` object whose [stored string][1]
|
235
|
+
* is the given `string`;
|
236
|
+
* sets the [fixed-anchor property][10]:
|
237
|
+
*
|
238
|
+
* ```rb
|
239
|
+
* scanner = StringScanner.new('foobarbaz')
|
240
|
+
* scanner.string # => "foobarbaz"
|
241
|
+
* scanner.fixed_anchor? # => false
|
242
|
+
* put_situation(scanner)
|
243
|
+
* # Situation:
|
244
|
+
* # pos: 0
|
245
|
+
* # charpos: 0
|
246
|
+
* # rest: "foobarbaz"
|
247
|
+
* # rest_size: 9
|
248
|
+
* ```
|
229
249
|
*
|
230
|
-
* +dup+ argument is obsolete and not used now.
|
231
250
|
*/
|
232
251
|
static VALUE
|
233
252
|
strscan_initialize(int argc, VALUE *argv, VALUE self)
|
@@ -266,11 +285,14 @@ check_strscan(VALUE obj)
|
|
266
285
|
}
|
267
286
|
|
268
287
|
/*
|
288
|
+
* :markup: markdown
|
289
|
+
* :include: strscan/link_refs.txt
|
290
|
+
*
|
269
291
|
* call-seq:
|
270
|
-
* dup
|
271
|
-
* clone
|
292
|
+
* dup -> shallow_copy
|
272
293
|
*
|
273
|
-
*
|
294
|
+
* Returns a shallow copy of `self`;
|
295
|
+
* the [stored string][1] in the copy is the same string as in `self`.
|
274
296
|
*/
|
275
297
|
static VALUE
|
276
298
|
strscan_init_copy(VALUE vself, VALUE vorig)
|
@@ -297,10 +319,13 @@ strscan_init_copy(VALUE vself, VALUE vorig)
|
|
297
319
|
======================================================================= */
|
298
320
|
|
299
321
|
/*
|
300
|
-
* call-seq:
|
322
|
+
* call-seq:
|
323
|
+
* StringScanner.must_C_version -> self
|
301
324
|
*
|
302
|
-
*
|
325
|
+
* Returns +self+; defined for backward compatibility.
|
303
326
|
*/
|
327
|
+
|
328
|
+
/* :nodoc: */
|
304
329
|
static VALUE
|
305
330
|
strscan_s_mustc(VALUE self)
|
306
331
|
{
|
@@ -308,7 +333,30 @@ strscan_s_mustc(VALUE self)
|
|
308
333
|
}
|
309
334
|
|
310
335
|
/*
|
311
|
-
*
|
336
|
+
* :markup: markdown
|
337
|
+
* :include: strscan/link_refs.txt
|
338
|
+
*
|
339
|
+
* call-seq:
|
340
|
+
* reset -> self
|
341
|
+
*
|
342
|
+
* Sets both [byte position][2] and [character position][7] to zero,
|
343
|
+
* and clears [match values][9];
|
344
|
+
* returns +self+:
|
345
|
+
*
|
346
|
+
* ```rb
|
347
|
+
* scanner = StringScanner.new('foobarbaz')
|
348
|
+
* scanner.exist?(/bar/) # => 6
|
349
|
+
* scanner.reset # => #<StringScanner 0/9 @ "fooba...">
|
350
|
+
* put_situation(scanner)
|
351
|
+
* # Situation:
|
352
|
+
* # pos: 0
|
353
|
+
* # charpos: 0
|
354
|
+
* # rest: "foobarbaz"
|
355
|
+
* # rest_size: 9
|
356
|
+
* # => nil
|
357
|
+
* match_values_cleared?(scanner) # => true
|
358
|
+
* ```
|
359
|
+
*
|
312
360
|
*/
|
313
361
|
static VALUE
|
314
362
|
strscan_reset(VALUE self)
|
@@ -322,11 +370,9 @@ strscan_reset(VALUE self)
|
|
322
370
|
}
|
323
371
|
|
324
372
|
/*
|
325
|
-
*
|
326
|
-
*
|
327
|
-
*
|
328
|
-
*
|
329
|
-
* Sets the scan pointer to the end of the string and clear matching data.
|
373
|
+
* :markup: markdown
|
374
|
+
* :include: strscan/link_refs.txt
|
375
|
+
* :include: strscan/methods/terminate.md
|
330
376
|
*/
|
331
377
|
static VALUE
|
332
378
|
strscan_terminate(VALUE self)
|
@@ -340,9 +386,13 @@ strscan_terminate(VALUE self)
|
|
340
386
|
}
|
341
387
|
|
342
388
|
/*
|
343
|
-
*
|
344
|
-
*
|
389
|
+
* call-seq:
|
390
|
+
* clear -> self
|
391
|
+
*
|
392
|
+
* This method is obsolete; use the equivalent method StringScanner#terminate.
|
345
393
|
*/
|
394
|
+
|
395
|
+
/* :nodoc: */
|
346
396
|
static VALUE
|
347
397
|
strscan_clear(VALUE self)
|
348
398
|
{
|
@@ -351,7 +401,21 @@ strscan_clear(VALUE self)
|
|
351
401
|
}
|
352
402
|
|
353
403
|
/*
|
354
|
-
*
|
404
|
+
* :markup: markdown
|
405
|
+
* :include: strscan/link_refs.txt
|
406
|
+
*
|
407
|
+
* call-seq:
|
408
|
+
* string -> stored_string
|
409
|
+
*
|
410
|
+
* Returns the [stored string][1]:
|
411
|
+
*
|
412
|
+
* ```rb
|
413
|
+
* scanner = StringScanner.new('foobar')
|
414
|
+
* scanner.string # => "foobar"
|
415
|
+
* scanner.concat('baz')
|
416
|
+
* scanner.string # => "foobarbaz"
|
417
|
+
* ```
|
418
|
+
*
|
355
419
|
*/
|
356
420
|
static VALUE
|
357
421
|
strscan_get_string(VALUE self)
|
@@ -363,10 +427,39 @@ strscan_get_string(VALUE self)
|
|
363
427
|
}
|
364
428
|
|
365
429
|
/*
|
366
|
-
*
|
430
|
+
* :markup: markdown
|
431
|
+
* :include: strscan/link_refs.txt
|
432
|
+
*
|
433
|
+
* call-seq:
|
434
|
+
* string = other_string -> other_string
|
435
|
+
*
|
436
|
+
* Replaces the [stored string][1] with the given `other_string`:
|
437
|
+
*
|
438
|
+
* - Sets both [positions][11] to zero.
|
439
|
+
* - Clears [match values][9].
|
440
|
+
* - Returns `other_string`.
|
441
|
+
*
|
442
|
+
* ```rb
|
443
|
+
* scanner = StringScanner.new('foobar')
|
444
|
+
* scanner.scan(/foo/)
|
445
|
+
* put_situation(scanner)
|
446
|
+
* # Situation:
|
447
|
+
* # pos: 3
|
448
|
+
* # charpos: 3
|
449
|
+
* # rest: "bar"
|
450
|
+
* # rest_size: 3
|
451
|
+
* match_values_cleared?(scanner) # => false
|
452
|
+
*
|
453
|
+
* scanner.string = 'baz' # => "baz"
|
454
|
+
* put_situation(scanner)
|
455
|
+
* # Situation:
|
456
|
+
* # pos: 0
|
457
|
+
* # charpos: 0
|
458
|
+
* # rest: "baz"
|
459
|
+
* # rest_size: 3
|
460
|
+
* match_values_cleared?(scanner) # => true
|
461
|
+
* ```
|
367
462
|
*
|
368
|
-
* Changes the string being scanned to +str+ and resets the scanner.
|
369
|
-
* Returns +str+.
|
370
463
|
*/
|
371
464
|
static VALUE
|
372
465
|
strscan_set_string(VALUE self, VALUE str)
|
@@ -381,18 +474,33 @@ strscan_set_string(VALUE self, VALUE str)
|
|
381
474
|
}
|
382
475
|
|
383
476
|
/*
|
384
|
-
*
|
385
|
-
*
|
386
|
-
* <<(str)
|
477
|
+
* :markup: markdown
|
478
|
+
* :include: strscan/link_refs.txt
|
387
479
|
*
|
388
|
-
*
|
389
|
-
*
|
480
|
+
* call-seq:
|
481
|
+
* concat(more_string) -> self
|
482
|
+
*
|
483
|
+
* - Appends the given `more_string`
|
484
|
+
* to the [stored string][1].
|
485
|
+
* - Returns `self`.
|
486
|
+
* - Does not affect the [positions][11]
|
487
|
+
* or [match values][9].
|
488
|
+
*
|
489
|
+
*
|
490
|
+
* ```rb
|
491
|
+
* scanner = StringScanner.new('foo')
|
492
|
+
* scanner.string # => "foo"
|
493
|
+
* scanner.terminate
|
494
|
+
* scanner.concat('barbaz') # => #<StringScanner 3/9 "foo" @ "barba...">
|
495
|
+
* scanner.string # => "foobarbaz"
|
496
|
+
* put_situation(scanner)
|
497
|
+
* # Situation:
|
498
|
+
* # pos: 3
|
499
|
+
* # charpos: 3
|
500
|
+
* # rest: "barbaz"
|
501
|
+
* # rest_size: 6
|
502
|
+
* ```
|
390
503
|
*
|
391
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
392
|
-
* s.scan(/Fri /)
|
393
|
-
* s << " +1000 GMT"
|
394
|
-
* s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT"
|
395
|
-
* s.scan(/Dec/) # -> "Dec"
|
396
504
|
*/
|
397
505
|
static VALUE
|
398
506
|
strscan_concat(VALUE self, VALUE str)
|
@@ -406,18 +514,9 @@ strscan_concat(VALUE self, VALUE str)
|
|
406
514
|
}
|
407
515
|
|
408
516
|
/*
|
409
|
-
*
|
410
|
-
*
|
411
|
-
*
|
412
|
-
*
|
413
|
-
* In short, it's a 0-based index into bytes of the string.
|
414
|
-
*
|
415
|
-
* s = StringScanner.new('test string')
|
416
|
-
* s.pos # -> 0
|
417
|
-
* s.scan_until /str/ # -> "test str"
|
418
|
-
* s.pos # -> 8
|
419
|
-
* s.terminate # -> #<StringScanner fin>
|
420
|
-
* s.pos # -> 11
|
517
|
+
* :markup: markdown
|
518
|
+
* :include: strscan/link_refs.txt
|
519
|
+
* :include: strscan/methods/get_pos.md
|
421
520
|
*/
|
422
521
|
static VALUE
|
423
522
|
strscan_get_pos(VALUE self)
|
@@ -425,21 +524,13 @@ strscan_get_pos(VALUE self)
|
|
425
524
|
struct strscanner *p;
|
426
525
|
|
427
526
|
GET_SCANNER(self, p);
|
428
|
-
return
|
527
|
+
return LONG2NUM(p->curr);
|
429
528
|
}
|
430
529
|
|
431
530
|
/*
|
432
|
-
*
|
433
|
-
*
|
434
|
-
*
|
435
|
-
*
|
436
|
-
* In short, it's a 0-based index into the string.
|
437
|
-
*
|
438
|
-
* s = StringScanner.new("abc\u00e4def\u00f6ghi")
|
439
|
-
* s.charpos # -> 0
|
440
|
-
* s.scan_until(/\u00e4/) # -> "abc\u00E4"
|
441
|
-
* s.pos # -> 5
|
442
|
-
* s.charpos # -> 4
|
531
|
+
* :markup: markdown
|
532
|
+
* :include: strscan/link_refs.txt
|
533
|
+
* :include: strscan/methods/get_charpos.md
|
443
534
|
*/
|
444
535
|
static VALUE
|
445
536
|
strscan_get_charpos(VALUE self)
|
@@ -452,13 +543,9 @@ strscan_get_charpos(VALUE self)
|
|
452
543
|
}
|
453
544
|
|
454
545
|
/*
|
455
|
-
*
|
456
|
-
*
|
457
|
-
*
|
458
|
-
*
|
459
|
-
* s = StringScanner.new('test string')
|
460
|
-
* s.pos = 7 # -> 7
|
461
|
-
* s.rest # -> "ring"
|
546
|
+
* :markup: markdown
|
547
|
+
* :include: strscan/link_refs.txt
|
548
|
+
* :include: strscan/methods/set_pos.md
|
462
549
|
*/
|
463
550
|
static VALUE
|
464
551
|
strscan_set_pos(VALUE self, VALUE v)
|
@@ -467,7 +554,7 @@ strscan_set_pos(VALUE self, VALUE v)
|
|
467
554
|
long i;
|
468
555
|
|
469
556
|
GET_SCANNER(self, p);
|
470
|
-
i =
|
557
|
+
i = NUM2LONG(v);
|
471
558
|
if (i < 0) i += S_LEN(p);
|
472
559
|
if (i < 0) rb_raise(rb_eRangeError, "index out of range");
|
473
560
|
if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range");
|
@@ -488,19 +575,20 @@ match_target(struct strscanner *p)
|
|
488
575
|
}
|
489
576
|
|
490
577
|
static inline void
|
491
|
-
set_registers(struct strscanner *p, size_t length)
|
578
|
+
set_registers(struct strscanner *p, size_t pos, size_t length)
|
492
579
|
{
|
493
580
|
const int at = 0;
|
494
581
|
OnigRegion *regs = &(p->regs);
|
495
582
|
onig_region_clear(regs);
|
496
583
|
if (onig_region_set(regs, at, 0, 0)) return;
|
497
584
|
if (p->fixed_anchor_p) {
|
498
|
-
regs->beg[at] = p->curr;
|
499
|
-
regs->end[at] = p->curr + length;
|
585
|
+
regs->beg[at] = pos + p->curr;
|
586
|
+
regs->end[at] = pos + p->curr + length;
|
500
587
|
}
|
501
588
|
else
|
502
589
|
{
|
503
|
-
regs->
|
590
|
+
regs->beg[at] = pos;
|
591
|
+
regs->end[at] = pos + length;
|
504
592
|
}
|
505
593
|
}
|
506
594
|
|
@@ -546,12 +634,13 @@ rb_reg_onig_match(VALUE re, VALUE str,
|
|
546
634
|
OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
|
547
635
|
void *args, struct re_registers *regs)
|
548
636
|
{
|
637
|
+
OnigPosition result;
|
549
638
|
regex_t *reg = rb_reg_prepare_re(re, str);
|
550
639
|
|
551
640
|
bool tmpreg = reg != RREGEXP_PTR(re);
|
552
641
|
if (!tmpreg) RREGEXP(re)->usecnt++;
|
553
642
|
|
554
|
-
|
643
|
+
result = match(reg, str, regs, args);
|
555
644
|
|
556
645
|
if (!tmpreg) RREGEXP(re)->usecnt--;
|
557
646
|
if (tmpreg) {
|
@@ -601,19 +690,19 @@ strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_pt
|
|
601
690
|
ONIG_OPTION_NONE);
|
602
691
|
}
|
603
692
|
|
693
|
+
static void
|
694
|
+
strscan_enc_check(VALUE str1, VALUE str2)
|
695
|
+
{
|
696
|
+
if (RB_ENCODING_GET(str1) != RB_ENCODING_GET(str2)) {
|
697
|
+
rb_enc_check(str1, str2);
|
698
|
+
}
|
699
|
+
}
|
700
|
+
|
604
701
|
static VALUE
|
605
702
|
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
|
606
703
|
{
|
607
704
|
struct strscanner *p;
|
608
705
|
|
609
|
-
if (headonly) {
|
610
|
-
if (!RB_TYPE_P(pattern, T_REGEXP)) {
|
611
|
-
StringValue(pattern);
|
612
|
-
}
|
613
|
-
}
|
614
|
-
else {
|
615
|
-
Check_Type(pattern, T_REGEXP);
|
616
|
-
}
|
617
706
|
GET_SCANNER(self, p);
|
618
707
|
|
619
708
|
CLEAR_MATCH_STATUS(p);
|
@@ -622,26 +711,42 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
|
|
622
711
|
}
|
623
712
|
|
624
713
|
if (RB_TYPE_P(pattern, T_REGEXP)) {
|
714
|
+
OnigPosition ret;
|
625
715
|
p->regex = pattern;
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
716
|
+
ret = rb_reg_onig_match(p->regex,
|
717
|
+
p->str,
|
718
|
+
headonly ? strscan_match : strscan_search,
|
719
|
+
(void *)p,
|
720
|
+
&(p->regs));
|
631
721
|
|
632
722
|
if (ret == ONIG_MISMATCH) {
|
633
723
|
return Qnil;
|
634
724
|
}
|
635
725
|
}
|
636
726
|
else {
|
637
|
-
|
727
|
+
StringValue(pattern);
|
638
728
|
if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
|
729
|
+
strscan_enc_check(p->str, pattern);
|
639
730
|
return Qnil;
|
640
731
|
}
|
641
|
-
|
642
|
-
|
732
|
+
|
733
|
+
if (headonly) {
|
734
|
+
strscan_enc_check(p->str, pattern);
|
735
|
+
|
736
|
+
if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
|
737
|
+
return Qnil;
|
738
|
+
}
|
739
|
+
set_registers(p, 0, RSTRING_LEN(pattern));
|
740
|
+
}
|
741
|
+
else {
|
742
|
+
rb_encoding *enc = rb_enc_check(p->str, pattern);
|
743
|
+
long pos = rb_memsearch(RSTRING_PTR(pattern), RSTRING_LEN(pattern),
|
744
|
+
CURPTR(p), S_RESTLEN(p), enc);
|
745
|
+
if (pos == -1) {
|
746
|
+
return Qnil;
|
747
|
+
}
|
748
|
+
set_registers(p, pos, RSTRING_LEN(pattern));
|
643
749
|
}
|
644
|
-
set_registers(p, RSTRING_LEN(pattern));
|
645
750
|
}
|
646
751
|
|
647
752
|
MATCHED(p);
|
@@ -662,20 +767,9 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
|
|
662
767
|
}
|
663
768
|
|
664
769
|
/*
|
665
|
-
*
|
666
|
-
*
|
667
|
-
*
|
668
|
-
* the scanner advances the "scan pointer" and returns the matched string.
|
669
|
-
* Otherwise, the scanner returns +nil+.
|
670
|
-
*
|
671
|
-
* s = StringScanner.new('test string')
|
672
|
-
* p s.scan(/\w+/) # -> "test"
|
673
|
-
* p s.scan(/\w+/) # -> nil
|
674
|
-
* p s.scan(/\s+/) # -> " "
|
675
|
-
* p s.scan("str") # -> "str"
|
676
|
-
* p s.scan(/\w+/) # -> "ing"
|
677
|
-
* p s.scan(/./) # -> nil
|
678
|
-
*
|
770
|
+
* :markup: markdown
|
771
|
+
* :include: strscan/link_refs.txt
|
772
|
+
* :include: strscan/methods/scan.md
|
679
773
|
*/
|
680
774
|
static VALUE
|
681
775
|
strscan_scan(VALUE self, VALUE re)
|
@@ -684,16 +778,60 @@ strscan_scan(VALUE self, VALUE re)
|
|
684
778
|
}
|
685
779
|
|
686
780
|
/*
|
687
|
-
*
|
781
|
+
* :markup: markdown
|
782
|
+
* :include: strscan/link_refs.txt
|
688
783
|
*
|
689
|
-
*
|
690
|
-
*
|
784
|
+
* call-seq:
|
785
|
+
* match?(pattern) -> updated_position or nil
|
786
|
+
*
|
787
|
+
* Attempts to [match][17] the given `pattern`
|
788
|
+
* at the beginning of the [target substring][3];
|
789
|
+
* does not modify the [positions][11].
|
790
|
+
*
|
791
|
+
* If the match succeeds:
|
792
|
+
*
|
793
|
+
* - Sets [match values][9].
|
794
|
+
* - Returns the size in bytes of the matched substring.
|
795
|
+
*
|
796
|
+
*
|
797
|
+
* ```rb
|
798
|
+
* scanner = StringScanner.new('foobarbaz')
|
799
|
+
* scanner.pos = 3
|
800
|
+
* scanner.match?(/bar/) => 3
|
801
|
+
* put_match_values(scanner)
|
802
|
+
* # Basic match values:
|
803
|
+
* # matched?: true
|
804
|
+
* # matched_size: 3
|
805
|
+
* # pre_match: "foo"
|
806
|
+
* # matched : "bar"
|
807
|
+
* # post_match: "baz"
|
808
|
+
* # Captured match values:
|
809
|
+
* # size: 1
|
810
|
+
* # captures: []
|
811
|
+
* # named_captures: {}
|
812
|
+
* # values_at: ["bar", nil]
|
813
|
+
* # []:
|
814
|
+
* # [0]: "bar"
|
815
|
+
* # [1]: nil
|
816
|
+
* put_situation(scanner)
|
817
|
+
* # Situation:
|
818
|
+
* # pos: 3
|
819
|
+
* # charpos: 3
|
820
|
+
* # rest: "barbaz"
|
821
|
+
* # rest_size: 6
|
822
|
+
* ```
|
823
|
+
*
|
824
|
+
* If the match fails:
|
825
|
+
*
|
826
|
+
* - Clears match values.
|
827
|
+
* - Returns `nil`.
|
828
|
+
* - Does not increment positions.
|
829
|
+
*
|
830
|
+
* ```rb
|
831
|
+
* scanner.match?(/nope/) # => nil
|
832
|
+
* match_values_cleared?(scanner) # => true
|
833
|
+
* ```
|
691
834
|
*
|
692
|
-
* s = StringScanner.new('test string')
|
693
|
-
* p s.match?(/\w+/) # -> 4
|
694
|
-
* p s.match?(/\w+/) # -> 4
|
695
|
-
* p s.match?("test") # -> 4
|
696
|
-
* p s.match?(/\s+/) # -> nil
|
697
835
|
*/
|
698
836
|
static VALUE
|
699
837
|
strscan_match_p(VALUE self, VALUE re)
|
@@ -702,22 +840,9 @@ strscan_match_p(VALUE self, VALUE re)
|
|
702
840
|
}
|
703
841
|
|
704
842
|
/*
|
705
|
-
*
|
706
|
-
*
|
707
|
-
*
|
708
|
-
* If it matches, the scan pointer is advanced to the end of the match, and the
|
709
|
-
* length of the match is returned. Otherwise, +nil+ is returned.
|
710
|
-
*
|
711
|
-
* It's similar to #scan, but without returning the matched string.
|
712
|
-
*
|
713
|
-
* s = StringScanner.new('test string')
|
714
|
-
* p s.skip(/\w+/) # -> 4
|
715
|
-
* p s.skip(/\w+/) # -> nil
|
716
|
-
* p s.skip(/\s+/) # -> 1
|
717
|
-
* p s.skip("st") # -> 2
|
718
|
-
* p s.skip(/\w+/) # -> 4
|
719
|
-
* p s.skip(/./) # -> nil
|
720
|
-
*
|
843
|
+
* :markup: markdown
|
844
|
+
* :include: strscan/link_refs.txt
|
845
|
+
* :include: strscan/methods/skip.md
|
721
846
|
*/
|
722
847
|
static VALUE
|
723
848
|
strscan_skip(VALUE self, VALUE re)
|
@@ -726,19 +851,59 @@ strscan_skip(VALUE self, VALUE re)
|
|
726
851
|
}
|
727
852
|
|
728
853
|
/*
|
729
|
-
*
|
730
|
-
*
|
731
|
-
* This returns the value that #scan would return, without advancing the scan
|
732
|
-
* pointer. The match register is affected, though.
|
854
|
+
* :markup: markdown
|
855
|
+
* :include: strscan/link_refs.txt
|
733
856
|
*
|
734
|
-
*
|
735
|
-
*
|
736
|
-
*
|
737
|
-
*
|
738
|
-
*
|
739
|
-
*
|
857
|
+
* call-seq:
|
858
|
+
* check(pattern) -> matched_substring or nil
|
859
|
+
*
|
860
|
+
* Attempts to [match][17] the given `pattern`
|
861
|
+
* at the beginning of the [target substring][3];
|
862
|
+
* does not modify the [positions][11].
|
863
|
+
*
|
864
|
+
* If the match succeeds:
|
865
|
+
*
|
866
|
+
* - Returns the matched substring.
|
867
|
+
* - Sets all [match values][9].
|
868
|
+
*
|
869
|
+
* ```rb
|
870
|
+
* scanner = StringScanner.new('foobarbaz')
|
871
|
+
* scanner.pos = 3
|
872
|
+
* scanner.check('bar') # => "bar"
|
873
|
+
* put_match_values(scanner)
|
874
|
+
* # Basic match values:
|
875
|
+
* # matched?: true
|
876
|
+
* # matched_size: 3
|
877
|
+
* # pre_match: "foo"
|
878
|
+
* # matched : "bar"
|
879
|
+
* # post_match: "baz"
|
880
|
+
* # Captured match values:
|
881
|
+
* # size: 1
|
882
|
+
* # captures: []
|
883
|
+
* # named_captures: {}
|
884
|
+
* # values_at: ["bar", nil]
|
885
|
+
* # []:
|
886
|
+
* # [0]: "bar"
|
887
|
+
* # [1]: nil
|
888
|
+
* # => 0..1
|
889
|
+
* put_situation(scanner)
|
890
|
+
* # Situation:
|
891
|
+
* # pos: 3
|
892
|
+
* # charpos: 3
|
893
|
+
* # rest: "barbaz"
|
894
|
+
* # rest_size: 6
|
895
|
+
* ```
|
896
|
+
*
|
897
|
+
* If the match fails:
|
898
|
+
*
|
899
|
+
* - Returns `nil`.
|
900
|
+
* - Clears all [match values][9].
|
901
|
+
*
|
902
|
+
* ```rb
|
903
|
+
* scanner.check(/nope/) # => nil
|
904
|
+
* match_values_cleared?(scanner) # => true
|
905
|
+
* ```
|
740
906
|
*
|
741
|
-
* Mnemonic: it "checks" to see whether a #scan will return a value.
|
742
907
|
*/
|
743
908
|
static VALUE
|
744
909
|
strscan_check(VALUE self, VALUE re)
|
@@ -747,15 +912,24 @@ strscan_check(VALUE self, VALUE re)
|
|
747
912
|
}
|
748
913
|
|
749
914
|
/*
|
750
|
-
* call-seq:
|
915
|
+
* call-seq:
|
916
|
+
* scan_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or nil
|
917
|
+
*
|
918
|
+
* Equivalent to one of the following:
|
919
|
+
*
|
920
|
+
* - +advance_pointer_p+ +true+:
|
921
|
+
*
|
922
|
+
* - +return_string_p+ +true+: StringScanner#scan(pattern).
|
923
|
+
* - +return_string_p+ +false+: StringScanner#skip(pattern).
|
924
|
+
*
|
925
|
+
* - +advance_pointer_p+ +false+:
|
751
926
|
*
|
752
|
-
*
|
753
|
-
*
|
754
|
-
* Returns the matched string if +return_string_p+ is true.
|
755
|
-
* The match register is affected.
|
927
|
+
* - +return_string_p+ +true+: StringScanner#check(pattern).
|
928
|
+
* - +return_string_p+ +false+: StringScanner#match?(pattern).
|
756
929
|
*
|
757
|
-
* "full" means "#scan with full parameters".
|
758
930
|
*/
|
931
|
+
|
932
|
+
/* :nodoc: */
|
759
933
|
static VALUE
|
760
934
|
strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
|
761
935
|
{
|
@@ -763,16 +937,9 @@ strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
|
|
763
937
|
}
|
764
938
|
|
765
939
|
/*
|
766
|
-
*
|
767
|
-
*
|
768
|
-
*
|
769
|
-
* to and including the end of the match, advancing the scan pointer to that
|
770
|
-
* location. If there is no match, +nil+ is returned.
|
771
|
-
*
|
772
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
773
|
-
* s.scan_until(/1/) # -> "Fri Dec 1"
|
774
|
-
* s.pre_match # -> "Fri Dec "
|
775
|
-
* s.scan_until(/XYZ/) # -> nil
|
940
|
+
* :markup: markdown
|
941
|
+
* :include: strscan/link_refs.txt
|
942
|
+
* :include: strscan/methods/scan_until.md
|
776
943
|
*/
|
777
944
|
static VALUE
|
778
945
|
strscan_scan_until(VALUE self, VALUE re)
|
@@ -781,17 +948,61 @@ strscan_scan_until(VALUE self, VALUE re)
|
|
781
948
|
}
|
782
949
|
|
783
950
|
/*
|
784
|
-
*
|
951
|
+
* :markup: markdown
|
952
|
+
* :include: strscan/link_refs.txt
|
785
953
|
*
|
786
|
-
*
|
787
|
-
*
|
788
|
-
*
|
954
|
+
* call-seq:
|
955
|
+
* exist?(pattern) -> byte_offset or nil
|
956
|
+
*
|
957
|
+
* Attempts to [match][17] the given `pattern`
|
958
|
+
* anywhere (at any [position][2])
|
959
|
+
* n the [target substring][3];
|
960
|
+
* does not modify the [positions][11].
|
961
|
+
*
|
962
|
+
* If the match succeeds:
|
963
|
+
*
|
964
|
+
* - Returns a byte offset:
|
965
|
+
* the distance in bytes between the current [position][2]
|
966
|
+
* and the end of the matched substring.
|
967
|
+
* - Sets all [match values][9].
|
968
|
+
*
|
969
|
+
* ```rb
|
970
|
+
* scanner = StringScanner.new('foobarbazbatbam')
|
971
|
+
* scanner.pos = 6
|
972
|
+
* scanner.exist?(/bat/) # => 6
|
973
|
+
* put_match_values(scanner)
|
974
|
+
* # Basic match values:
|
975
|
+
* # matched?: true
|
976
|
+
* # matched_size: 3
|
977
|
+
* # pre_match: "foobarbaz"
|
978
|
+
* # matched : "bat"
|
979
|
+
* # post_match: "bam"
|
980
|
+
* # Captured match values:
|
981
|
+
* # size: 1
|
982
|
+
* # captures: []
|
983
|
+
* # named_captures: {}
|
984
|
+
* # values_at: ["bat", nil]
|
985
|
+
* # []:
|
986
|
+
* # [0]: "bat"
|
987
|
+
* # [1]: nil
|
988
|
+
* put_situation(scanner)
|
989
|
+
* # Situation:
|
990
|
+
* # pos: 6
|
991
|
+
* # charpos: 6
|
992
|
+
* # rest: "bazbatbam"
|
993
|
+
* # rest_size: 9
|
994
|
+
* ```
|
995
|
+
*
|
996
|
+
* If the match fails:
|
997
|
+
*
|
998
|
+
* - Returns `nil`.
|
999
|
+
* - Clears all [match values][9].
|
1000
|
+
*
|
1001
|
+
* ```rb
|
1002
|
+
* scanner.exist?(/nope/) # => nil
|
1003
|
+
* match_values_cleared?(scanner) # => true
|
1004
|
+
* ```
|
789
1005
|
*
|
790
|
-
* s = StringScanner.new('test string')
|
791
|
-
* s.exist? /s/ # -> 3
|
792
|
-
* s.scan /test/ # -> "test"
|
793
|
-
* s.exist? /s/ # -> 2
|
794
|
-
* s.exist? /e/ # -> nil
|
795
1006
|
*/
|
796
1007
|
static VALUE
|
797
1008
|
strscan_exist_p(VALUE self, VALUE re)
|
@@ -800,20 +1011,9 @@ strscan_exist_p(VALUE self, VALUE re)
|
|
800
1011
|
}
|
801
1012
|
|
802
1013
|
/*
|
803
|
-
*
|
804
|
-
*
|
805
|
-
*
|
806
|
-
* the number of bytes advanced, or +nil+ if no match was found.
|
807
|
-
*
|
808
|
-
* Look ahead to match +pattern+, and advance the scan pointer to the _end_
|
809
|
-
* of the match. Return the number of characters advanced, or +nil+ if the
|
810
|
-
* match was unsuccessful.
|
811
|
-
*
|
812
|
-
* It's similar to #scan_until, but without returning the intervening string.
|
813
|
-
*
|
814
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
815
|
-
* s.skip_until /12/ # -> 10
|
816
|
-
* s #
|
1014
|
+
* :markup: markdown
|
1015
|
+
* :include: strscan/link_refs.txt
|
1016
|
+
* :include: strscan/methods/skip_until.md
|
817
1017
|
*/
|
818
1018
|
static VALUE
|
819
1019
|
strscan_skip_until(VALUE self, VALUE re)
|
@@ -822,17 +1022,61 @@ strscan_skip_until(VALUE self, VALUE re)
|
|
822
1022
|
}
|
823
1023
|
|
824
1024
|
/*
|
825
|
-
*
|
1025
|
+
* :markup: markdown
|
1026
|
+
* :include: strscan/link_refs.txt
|
826
1027
|
*
|
827
|
-
*
|
828
|
-
*
|
829
|
-
*
|
830
|
-
*
|
831
|
-
*
|
832
|
-
*
|
833
|
-
*
|
1028
|
+
* call-seq:
|
1029
|
+
* check_until(pattern) -> substring or nil
|
1030
|
+
*
|
1031
|
+
* Attempts to [match][17] the given `pattern`
|
1032
|
+
* anywhere (at any [position][2])
|
1033
|
+
* in the [target substring][3];
|
1034
|
+
* does not modify the [positions][11].
|
1035
|
+
*
|
1036
|
+
* If the match succeeds:
|
1037
|
+
*
|
1038
|
+
* - Sets all [match values][9].
|
1039
|
+
* - Returns the matched substring,
|
1040
|
+
* which extends from the current [position][2]
|
1041
|
+
* to the end of the matched substring.
|
1042
|
+
*
|
1043
|
+
* ```rb
|
1044
|
+
* scanner = StringScanner.new('foobarbazbatbam')
|
1045
|
+
* scanner.pos = 6
|
1046
|
+
* scanner.check_until(/bat/) # => "bazbat"
|
1047
|
+
* put_match_values(scanner)
|
1048
|
+
* # Basic match values:
|
1049
|
+
* # matched?: true
|
1050
|
+
* # matched_size: 3
|
1051
|
+
* # pre_match: "foobarbaz"
|
1052
|
+
* # matched : "bat"
|
1053
|
+
* # post_match: "bam"
|
1054
|
+
* # Captured match values:
|
1055
|
+
* # size: 1
|
1056
|
+
* # captures: []
|
1057
|
+
* # named_captures: {}
|
1058
|
+
* # values_at: ["bat", nil]
|
1059
|
+
* # []:
|
1060
|
+
* # [0]: "bat"
|
1061
|
+
* # [1]: nil
|
1062
|
+
* put_situation(scanner)
|
1063
|
+
* # Situation:
|
1064
|
+
* # pos: 6
|
1065
|
+
* # charpos: 6
|
1066
|
+
* # rest: "bazbatbam"
|
1067
|
+
* # rest_size: 9
|
1068
|
+
* ```
|
1069
|
+
*
|
1070
|
+
* If the match fails:
|
1071
|
+
*
|
1072
|
+
* - Clears all [match values][9].
|
1073
|
+
* - Returns `nil`.
|
1074
|
+
*
|
1075
|
+
* ```rb
|
1076
|
+
* scanner.check_until(/nope/) # => nil
|
1077
|
+
* match_values_cleared?(scanner) # => true
|
1078
|
+
* ```
|
834
1079
|
*
|
835
|
-
* Mnemonic: it "checks" to see whether a #scan_until will return a value.
|
836
1080
|
*/
|
837
1081
|
static VALUE
|
838
1082
|
strscan_check_until(VALUE self, VALUE re)
|
@@ -841,14 +1085,24 @@ strscan_check_until(VALUE self, VALUE re)
|
|
841
1085
|
}
|
842
1086
|
|
843
1087
|
/*
|
844
|
-
* call-seq:
|
1088
|
+
* call-seq:
|
1089
|
+
* search_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or position_delta or nil
|
1090
|
+
*
|
1091
|
+
* Equivalent to one of the following:
|
1092
|
+
*
|
1093
|
+
* - +advance_pointer_p+ +true+:
|
1094
|
+
*
|
1095
|
+
* - +return_string_p+ +true+: StringScanner#scan_until(pattern).
|
1096
|
+
* - +return_string_p+ +false+: StringScanner#skip_until(pattern).
|
1097
|
+
*
|
1098
|
+
* - +advance_pointer_p+ +false+:
|
1099
|
+
*
|
1100
|
+
* - +return_string_p+ +true+: StringScanner#check_until(pattern).
|
1101
|
+
* - +return_string_p+ +false+: StringScanner#exist?(pattern).
|
845
1102
|
*
|
846
|
-
* Scans the string _until_ the +pattern+ is matched.
|
847
|
-
* Advances the scan pointer if +advance_pointer_p+, otherwise not.
|
848
|
-
* Returns the matched string if +return_string_p+ is true, otherwise
|
849
|
-
* returns the number of bytes advanced.
|
850
|
-
* This method does affect the match register.
|
851
1103
|
*/
|
1104
|
+
|
1105
|
+
/* :nodoc: */
|
852
1106
|
static VALUE
|
853
1107
|
strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f)
|
854
1108
|
{
|
@@ -868,17 +1122,9 @@ adjust_registers_to_matched(struct strscanner *p)
|
|
868
1122
|
}
|
869
1123
|
|
870
1124
|
/*
|
871
|
-
*
|
872
|
-
*
|
873
|
-
*
|
874
|
-
* s = StringScanner.new("ab")
|
875
|
-
* s.getch # => "a"
|
876
|
-
* s.getch # => "b"
|
877
|
-
* s.getch # => nil
|
878
|
-
*
|
879
|
-
* s = StringScanner.new("\244\242".force_encoding("euc-jp"))
|
880
|
-
* s.getch # => "\x{A4A2}" # Japanese hira-kana "A" in EUC-JP
|
881
|
-
* s.getch # => nil
|
1125
|
+
* :markup: markdown
|
1126
|
+
* :include: strscan/link_refs.txt
|
1127
|
+
* :include: strscan/methods/getch.md
|
882
1128
|
*/
|
883
1129
|
static VALUE
|
884
1130
|
strscan_getch(VALUE self)
|
@@ -903,19 +1149,55 @@ strscan_getch(VALUE self)
|
|
903
1149
|
}
|
904
1150
|
|
905
1151
|
/*
|
906
|
-
*
|
1152
|
+
* call-seq:
|
1153
|
+
* scan_byte -> integer_byte
|
1154
|
+
*
|
1155
|
+
* Scans one byte and returns it as an integer.
|
907
1156
|
* This method is not multibyte character sensitive.
|
908
1157
|
* See also: #getch.
|
909
1158
|
*
|
1159
|
+
*/
|
1160
|
+
static VALUE
|
1161
|
+
strscan_scan_byte(VALUE self)
|
1162
|
+
{
|
1163
|
+
struct strscanner *p;
|
1164
|
+
VALUE byte;
|
1165
|
+
|
1166
|
+
GET_SCANNER(self, p);
|
1167
|
+
CLEAR_MATCH_STATUS(p);
|
1168
|
+
if (EOS_P(p))
|
1169
|
+
return Qnil;
|
1170
|
+
|
1171
|
+
byte = INT2FIX((unsigned char)*CURPTR(p));
|
1172
|
+
p->prev = p->curr;
|
1173
|
+
p->curr++;
|
1174
|
+
MATCHED(p);
|
1175
|
+
adjust_registers_to_matched(p);
|
1176
|
+
return byte;
|
1177
|
+
}
|
1178
|
+
|
1179
|
+
/*
|
1180
|
+
* Peeks at the current byte and returns it as an integer.
|
1181
|
+
*
|
910
1182
|
* s = StringScanner.new('ab')
|
911
|
-
* s.
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
*
|
917
|
-
|
918
|
-
|
1183
|
+
* s.peek_byte # => 97
|
1184
|
+
*/
|
1185
|
+
static VALUE
|
1186
|
+
strscan_peek_byte(VALUE self)
|
1187
|
+
{
|
1188
|
+
struct strscanner *p;
|
1189
|
+
|
1190
|
+
GET_SCANNER(self, p);
|
1191
|
+
if (EOS_P(p))
|
1192
|
+
return Qnil;
|
1193
|
+
|
1194
|
+
return INT2FIX((unsigned char)*CURPTR(p));
|
1195
|
+
}
|
1196
|
+
|
1197
|
+
/*
|
1198
|
+
* :markup: markdown
|
1199
|
+
* :include: strscan/link_refs.txt
|
1200
|
+
* :include: strscan/methods/get_byte.md
|
919
1201
|
*/
|
920
1202
|
static VALUE
|
921
1203
|
strscan_get_byte(VALUE self)
|
@@ -937,9 +1219,14 @@ strscan_get_byte(VALUE self)
|
|
937
1219
|
}
|
938
1220
|
|
939
1221
|
/*
|
1222
|
+
* call-seq:
|
1223
|
+
* getbyte
|
1224
|
+
*
|
940
1225
|
* Equivalent to #get_byte.
|
941
1226
|
* This method is obsolete; use #get_byte instead.
|
942
1227
|
*/
|
1228
|
+
|
1229
|
+
/* :nodoc: */
|
943
1230
|
static VALUE
|
944
1231
|
strscan_getbyte(VALUE self)
|
945
1232
|
{
|
@@ -948,14 +1235,22 @@ strscan_getbyte(VALUE self)
|
|
948
1235
|
}
|
949
1236
|
|
950
1237
|
/*
|
951
|
-
*
|
1238
|
+
* :markup: markdown
|
1239
|
+
* :include: strscan/link_refs.txt
|
952
1240
|
*
|
953
|
-
*
|
954
|
-
*
|
1241
|
+
* call-seq:
|
1242
|
+
* peek(length) -> substring
|
955
1243
|
*
|
956
|
-
*
|
957
|
-
*
|
958
|
-
*
|
1244
|
+
* Returns the substring `string[pos, length]`;
|
1245
|
+
* does not update [match values][9] or [positions][11]:
|
1246
|
+
*
|
1247
|
+
* ```rb
|
1248
|
+
* scanner = StringScanner.new('foobarbaz')
|
1249
|
+
* scanner.pos = 3
|
1250
|
+
* scanner.peek(3) # => "bar"
|
1251
|
+
* scanner.terminate
|
1252
|
+
* scanner.peek(3) # => ""
|
1253
|
+
* ```
|
959
1254
|
*
|
960
1255
|
*/
|
961
1256
|
static VALUE
|
@@ -975,9 +1270,14 @@ strscan_peek(VALUE self, VALUE vlen)
|
|
975
1270
|
}
|
976
1271
|
|
977
1272
|
/*
|
1273
|
+
* call-seq:
|
1274
|
+
* peep
|
1275
|
+
*
|
978
1276
|
* Equivalent to #peek.
|
979
1277
|
* This method is obsolete; use #peek instead.
|
980
1278
|
*/
|
1279
|
+
|
1280
|
+
/* :nodoc: */
|
981
1281
|
static VALUE
|
982
1282
|
strscan_peep(VALUE self, VALUE vlen)
|
983
1283
|
{
|
@@ -985,16 +1285,158 @@ strscan_peep(VALUE self, VALUE vlen)
|
|
985
1285
|
return strscan_peek(self, vlen);
|
986
1286
|
}
|
987
1287
|
|
1288
|
+
static VALUE
|
1289
|
+
strscan_parse_integer(struct strscanner *p, int base, long len)
|
1290
|
+
{
|
1291
|
+
VALUE buffer_v, integer;
|
1292
|
+
|
1293
|
+
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
1294
|
+
|
1295
|
+
MEMCPY(buffer, CURPTR(p), char, len);
|
1296
|
+
buffer[len] = '\0';
|
1297
|
+
integer = rb_cstr2inum(buffer, base);
|
1298
|
+
RB_ALLOCV_END(buffer_v);
|
1299
|
+
p->curr += len;
|
1300
|
+
|
1301
|
+
MATCHED(p);
|
1302
|
+
adjust_registers_to_matched(p);
|
1303
|
+
|
1304
|
+
return integer;
|
1305
|
+
}
|
1306
|
+
|
1307
|
+
static inline bool
|
1308
|
+
strscan_ascii_compat_fastpath(VALUE str) {
|
1309
|
+
int encindex = ENCODING_GET_INLINED(str);
|
1310
|
+
// The overwhelming majority of strings are in one of these 3 encodings.
|
1311
|
+
return encindex == utf8_encindex || encindex == binary_encindex || encindex == usascii_encindex;
|
1312
|
+
}
|
1313
|
+
|
1314
|
+
static inline void
|
1315
|
+
strscan_must_ascii_compat(VALUE str)
|
1316
|
+
{
|
1317
|
+
// The overwhelming majority of strings are in one of these 3 encodings.
|
1318
|
+
if (RB_LIKELY(strscan_ascii_compat_fastpath(str))) {
|
1319
|
+
return;
|
1320
|
+
}
|
1321
|
+
|
1322
|
+
rb_must_asciicompat(str);
|
1323
|
+
}
|
1324
|
+
|
1325
|
+
static VALUE
|
1326
|
+
strscan_scan_base10_integer(VALUE self)
|
1327
|
+
{
|
1328
|
+
char *ptr;
|
1329
|
+
long len = 0;
|
1330
|
+
struct strscanner *p;
|
1331
|
+
|
1332
|
+
GET_SCANNER(self, p);
|
1333
|
+
CLEAR_MATCH_STATUS(p);
|
1334
|
+
|
1335
|
+
strscan_must_ascii_compat(p->str);
|
1336
|
+
|
1337
|
+
ptr = CURPTR(p);
|
1338
|
+
|
1339
|
+
long remaining_len = S_RESTLEN(p);
|
1340
|
+
|
1341
|
+
if (remaining_len <= 0) {
|
1342
|
+
return Qnil;
|
1343
|
+
}
|
1344
|
+
|
1345
|
+
if (ptr[len] == '-' || ptr[len] == '+') {
|
1346
|
+
len++;
|
1347
|
+
}
|
1348
|
+
|
1349
|
+
if (!rb_isdigit(ptr[len])) {
|
1350
|
+
return Qnil;
|
1351
|
+
}
|
1352
|
+
|
1353
|
+
p->prev = p->curr;
|
1354
|
+
|
1355
|
+
while (len < remaining_len && rb_isdigit(ptr[len])) {
|
1356
|
+
len++;
|
1357
|
+
}
|
1358
|
+
|
1359
|
+
return strscan_parse_integer(p, 10, len);
|
1360
|
+
}
|
1361
|
+
|
1362
|
+
static VALUE
|
1363
|
+
strscan_scan_base16_integer(VALUE self)
|
1364
|
+
{
|
1365
|
+
char *ptr;
|
1366
|
+
long len = 0;
|
1367
|
+
struct strscanner *p;
|
1368
|
+
|
1369
|
+
GET_SCANNER(self, p);
|
1370
|
+
CLEAR_MATCH_STATUS(p);
|
1371
|
+
|
1372
|
+
strscan_must_ascii_compat(p->str);
|
1373
|
+
|
1374
|
+
ptr = CURPTR(p);
|
1375
|
+
|
1376
|
+
long remaining_len = S_RESTLEN(p);
|
1377
|
+
|
1378
|
+
if (remaining_len <= 0) {
|
1379
|
+
return Qnil;
|
1380
|
+
}
|
1381
|
+
|
1382
|
+
if (ptr[len] == '-' || ptr[len] == '+') {
|
1383
|
+
len++;
|
1384
|
+
}
|
1385
|
+
|
1386
|
+
if ((remaining_len >= (len + 3)) && ptr[len] == '0' && ptr[len + 1] == 'x' && rb_isxdigit(ptr[len + 2])) {
|
1387
|
+
len += 2;
|
1388
|
+
}
|
1389
|
+
|
1390
|
+
if (len >= remaining_len || !rb_isxdigit(ptr[len])) {
|
1391
|
+
return Qnil;
|
1392
|
+
}
|
1393
|
+
|
1394
|
+
p->prev = p->curr;
|
1395
|
+
|
1396
|
+
while (len < remaining_len && rb_isxdigit(ptr[len])) {
|
1397
|
+
len++;
|
1398
|
+
}
|
1399
|
+
|
1400
|
+
return strscan_parse_integer(p, 16, len);
|
1401
|
+
}
|
1402
|
+
|
988
1403
|
/*
|
989
|
-
*
|
990
|
-
*
|
1404
|
+
* :markup: markdown
|
1405
|
+
* :include: strscan/link_refs.txt
|
1406
|
+
*
|
1407
|
+
* call-seq:
|
1408
|
+
* unscan -> self
|
1409
|
+
*
|
1410
|
+
* Sets the [position][2] to its value previous to the recent successful
|
1411
|
+
* [match][17] attempt:
|
1412
|
+
*
|
1413
|
+
* ```rb
|
1414
|
+
* scanner = StringScanner.new('foobarbaz')
|
1415
|
+
* scanner.scan(/foo/)
|
1416
|
+
* put_situation(scanner)
|
1417
|
+
* # Situation:
|
1418
|
+
* # pos: 3
|
1419
|
+
* # charpos: 3
|
1420
|
+
* # rest: "barbaz"
|
1421
|
+
* # rest_size: 6
|
1422
|
+
* scanner.unscan
|
1423
|
+
* # => #<StringScanner 0/9 @ "fooba...">
|
1424
|
+
* put_situation(scanner)
|
1425
|
+
* # Situation:
|
1426
|
+
* # pos: 0
|
1427
|
+
* # charpos: 0
|
1428
|
+
* # rest: "foobarbaz"
|
1429
|
+
* # rest_size: 9
|
1430
|
+
* ```
|
1431
|
+
*
|
1432
|
+
* Raises an exception if match values are clear:
|
1433
|
+
*
|
1434
|
+
* ```rb
|
1435
|
+
* scanner.scan(/nope/) # => nil
|
1436
|
+
* match_values_cleared?(scanner) # => true
|
1437
|
+
* scanner.unscan # Raises StringScanner::Error.
|
1438
|
+
* ```
|
991
1439
|
*
|
992
|
-
* s = StringScanner.new('test string')
|
993
|
-
* s.scan(/\w+/) # => "test"
|
994
|
-
* s.unscan
|
995
|
-
* s.scan(/../) # => "te"
|
996
|
-
* s.scan(/\d/) # => nil
|
997
|
-
* s.unscan # ScanError: unscan failed: previous match record not exist
|
998
1440
|
*/
|
999
1441
|
static VALUE
|
1000
1442
|
strscan_unscan(VALUE self)
|
@@ -1010,16 +1452,37 @@ strscan_unscan(VALUE self)
|
|
1010
1452
|
}
|
1011
1453
|
|
1012
1454
|
/*
|
1013
|
-
*
|
1014
|
-
*
|
1015
|
-
*
|
1016
|
-
*
|
1017
|
-
*
|
1018
|
-
*
|
1019
|
-
*
|
1020
|
-
*
|
1021
|
-
*
|
1022
|
-
*
|
1455
|
+
*
|
1456
|
+
* :markup: markdown
|
1457
|
+
* :include: strscan/link_refs.txt
|
1458
|
+
*
|
1459
|
+
* call-seq:
|
1460
|
+
* beginning_of_line? -> true or false
|
1461
|
+
*
|
1462
|
+
* Returns whether the [position][2] is at the beginning of a line;
|
1463
|
+
* that is, at the beginning of the [stored string][1]
|
1464
|
+
* or immediately after a newline:
|
1465
|
+
*
|
1466
|
+
* scanner = StringScanner.new(MULTILINE_TEXT)
|
1467
|
+
* scanner.string
|
1468
|
+
* # => "Go placidly amid the noise and haste,\nand remember what peace there may be in silence.\n"
|
1469
|
+
* scanner.pos # => 0
|
1470
|
+
* scanner.beginning_of_line? # => true
|
1471
|
+
*
|
1472
|
+
* scanner.scan_until(/,/) # => "Go placidly amid the noise and haste,"
|
1473
|
+
* scanner.beginning_of_line? # => false
|
1474
|
+
*
|
1475
|
+
* scanner.scan(/\n/) # => "\n"
|
1476
|
+
* scanner.beginning_of_line? # => true
|
1477
|
+
*
|
1478
|
+
* scanner.terminate
|
1479
|
+
* scanner.beginning_of_line? # => true
|
1480
|
+
*
|
1481
|
+
* scanner.concat('x')
|
1482
|
+
* scanner.terminate
|
1483
|
+
* scanner.beginning_of_line? # => false
|
1484
|
+
*
|
1485
|
+
* StringScanner#bol? is an alias for StringScanner#beginning_of_line?.
|
1023
1486
|
*/
|
1024
1487
|
static VALUE
|
1025
1488
|
strscan_bol_p(VALUE self)
|
@@ -1033,14 +1496,24 @@ strscan_bol_p(VALUE self)
|
|
1033
1496
|
}
|
1034
1497
|
|
1035
1498
|
/*
|
1036
|
-
*
|
1499
|
+
* :markup: markdown
|
1500
|
+
* :include: strscan/link_refs.txt
|
1501
|
+
*
|
1502
|
+
* call-seq:
|
1503
|
+
* eos? -> true or false
|
1504
|
+
*
|
1505
|
+
* Returns whether the [position][2]
|
1506
|
+
* is at the end of the [stored string][1]:
|
1507
|
+
*
|
1508
|
+
* ```rb
|
1509
|
+
* scanner = StringScanner.new('foobarbaz')
|
1510
|
+
* scanner.eos? # => false
|
1511
|
+
* pos = 3
|
1512
|
+
* scanner.eos? # => false
|
1513
|
+
* scanner.terminate
|
1514
|
+
* scanner.eos? # => true
|
1515
|
+
* ```
|
1037
1516
|
*
|
1038
|
-
* s = StringScanner.new('test string')
|
1039
|
-
* p s.eos? # => false
|
1040
|
-
* s.scan(/test/)
|
1041
|
-
* p s.eos? # => false
|
1042
|
-
* s.terminate
|
1043
|
-
* p s.eos? # => true
|
1044
1517
|
*/
|
1045
1518
|
static VALUE
|
1046
1519
|
strscan_eos_p(VALUE self)
|
@@ -1052,9 +1525,14 @@ strscan_eos_p(VALUE self)
|
|
1052
1525
|
}
|
1053
1526
|
|
1054
1527
|
/*
|
1528
|
+
* call-seq:
|
1529
|
+
* empty?
|
1530
|
+
*
|
1055
1531
|
* Equivalent to #eos?.
|
1056
1532
|
* This method is obsolete, use #eos? instead.
|
1057
1533
|
*/
|
1534
|
+
|
1535
|
+
/* :nodoc: */
|
1058
1536
|
static VALUE
|
1059
1537
|
strscan_empty_p(VALUE self)
|
1060
1538
|
{
|
@@ -1063,6 +1541,9 @@ strscan_empty_p(VALUE self)
|
|
1063
1541
|
}
|
1064
1542
|
|
1065
1543
|
/*
|
1544
|
+
* call-seq:
|
1545
|
+
* rest?
|
1546
|
+
*
|
1066
1547
|
* Returns true if and only if there is more data in the string. See #eos?.
|
1067
1548
|
* This method is obsolete; use #eos? instead.
|
1068
1549
|
*
|
@@ -1071,6 +1552,8 @@ strscan_empty_p(VALUE self)
|
|
1071
1552
|
* s.eos? # => false
|
1072
1553
|
* s.rest? # => true
|
1073
1554
|
*/
|
1555
|
+
|
1556
|
+
/* :nodoc: */
|
1074
1557
|
static VALUE
|
1075
1558
|
strscan_rest_p(VALUE self)
|
1076
1559
|
{
|
@@ -1081,13 +1564,26 @@ strscan_rest_p(VALUE self)
|
|
1081
1564
|
}
|
1082
1565
|
|
1083
1566
|
/*
|
1084
|
-
*
|
1567
|
+
* :markup: markdown
|
1568
|
+
* :include: strscan/link_refs.txt
|
1569
|
+
*
|
1570
|
+
* call-seq:
|
1571
|
+
* matched? -> true or false
|
1572
|
+
*
|
1573
|
+
* Returns `true` of the most recent [match attempt][17] was successful,
|
1574
|
+
* `false` otherwise;
|
1575
|
+
* see [Basic Matched Values][18]:
|
1576
|
+
*
|
1577
|
+
* ```rb
|
1578
|
+
* scanner = StringScanner.new('foobarbaz')
|
1579
|
+
* scanner.matched? # => false
|
1580
|
+
* scanner.pos = 3
|
1581
|
+
* scanner.exist?(/baz/) # => 6
|
1582
|
+
* scanner.matched? # => true
|
1583
|
+
* scanner.exist?(/nope/) # => nil
|
1584
|
+
* scanner.matched? # => false
|
1585
|
+
* ```
|
1085
1586
|
*
|
1086
|
-
* s = StringScanner.new('test string')
|
1087
|
-
* s.match?(/\w+/) # => 4
|
1088
|
-
* s.matched? # => true
|
1089
|
-
* s.match?(/\d+/) # => nil
|
1090
|
-
* s.matched? # => false
|
1091
1587
|
*/
|
1092
1588
|
static VALUE
|
1093
1589
|
strscan_matched_p(VALUE self)
|
@@ -1099,11 +1595,27 @@ strscan_matched_p(VALUE self)
|
|
1099
1595
|
}
|
1100
1596
|
|
1101
1597
|
/*
|
1102
|
-
*
|
1598
|
+
* :markup: markdown
|
1599
|
+
* :include: strscan/link_refs.txt
|
1600
|
+
*
|
1601
|
+
* call-seq:
|
1602
|
+
* matched -> matched_substring or nil
|
1603
|
+
*
|
1604
|
+
* Returns the matched substring from the most recent [match][17] attempt
|
1605
|
+
* if it was successful,
|
1606
|
+
* or `nil` otherwise;
|
1607
|
+
* see [Basic Matched Values][18]:
|
1608
|
+
*
|
1609
|
+
* ```rb
|
1610
|
+
* scanner = StringScanner.new('foobarbaz')
|
1611
|
+
* scanner.matched # => nil
|
1612
|
+
* scanner.pos = 3
|
1613
|
+
* scanner.match?(/bar/) # => 3
|
1614
|
+
* scanner.matched # => "bar"
|
1615
|
+
* scanner.match?(/nope/) # => nil
|
1616
|
+
* scanner.matched # => nil
|
1617
|
+
* ```
|
1103
1618
|
*
|
1104
|
-
* s = StringScanner.new('test string')
|
1105
|
-
* s.match?(/\w+/) # -> 4
|
1106
|
-
* s.matched # -> "test"
|
1107
1619
|
*/
|
1108
1620
|
static VALUE
|
1109
1621
|
strscan_matched(VALUE self)
|
@@ -1118,15 +1630,29 @@ strscan_matched(VALUE self)
|
|
1118
1630
|
}
|
1119
1631
|
|
1120
1632
|
/*
|
1121
|
-
*
|
1122
|
-
*
|
1123
|
-
*
|
1633
|
+
* :markup: markdown
|
1634
|
+
* :include: strscan/link_refs.txt
|
1635
|
+
*
|
1636
|
+
* call-seq:
|
1637
|
+
* matched_size -> substring_size or nil
|
1638
|
+
*
|
1639
|
+
* Returns the size (in bytes) of the matched substring
|
1640
|
+
* from the most recent match [match attempt][17] if it was successful,
|
1641
|
+
* or `nil` otherwise;
|
1642
|
+
* see [Basic Matched Values][18]:
|
1643
|
+
*
|
1644
|
+
* ```rb
|
1645
|
+
* scanner = StringScanner.new('foobarbaz')
|
1646
|
+
* scanner.matched_size # => nil
|
1647
|
+
*
|
1648
|
+
* pos = 3
|
1649
|
+
* scanner.exist?(/baz/) # => 9
|
1650
|
+
* scanner.matched_size # => 3
|
1651
|
+
*
|
1652
|
+
* scanner.exist?(/nope/) # => nil
|
1653
|
+
* scanner.matched_size # => nil
|
1654
|
+
* ```
|
1124
1655
|
*
|
1125
|
-
* s = StringScanner.new('test string')
|
1126
|
-
* s.check /\w+/ # -> "test"
|
1127
|
-
* s.matched_size # -> 4
|
1128
|
-
* s.check /\d+/ # -> nil
|
1129
|
-
* s.matched_size # -> nil
|
1130
1656
|
*/
|
1131
1657
|
static VALUE
|
1132
1658
|
strscan_matched_size(VALUE self)
|
@@ -1141,46 +1667,89 @@ strscan_matched_size(VALUE self)
|
|
1141
1667
|
static int
|
1142
1668
|
name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end, rb_encoding *enc)
|
1143
1669
|
{
|
1144
|
-
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1152
|
-
rb_enc_raise(enc, rb_eIndexError, "undefined group name reference: %.*s",
|
1153
|
-
rb_long2int(name_end - name), name);
|
1670
|
+
if (RTEST(regexp)) {
|
1671
|
+
int num = onig_name_to_backref_number(RREGEXP_PTR(regexp),
|
1672
|
+
(const unsigned char* )name,
|
1673
|
+
(const unsigned char* )name_end,
|
1674
|
+
regs);
|
1675
|
+
if (num >= 1) {
|
1676
|
+
return num;
|
1677
|
+
}
|
1154
1678
|
}
|
1155
|
-
|
1156
|
-
|
1679
|
+
rb_enc_raise(enc, rb_eIndexError, "undefined group name reference: %.*s",
|
1680
|
+
rb_long2int(name_end - name), name);
|
1157
1681
|
}
|
1158
1682
|
|
1159
1683
|
/*
|
1160
|
-
*
|
1161
|
-
*
|
1162
|
-
*
|
1163
|
-
*
|
1164
|
-
*
|
1165
|
-
*
|
1166
|
-
*
|
1167
|
-
*
|
1168
|
-
*
|
1169
|
-
*
|
1170
|
-
*
|
1171
|
-
*
|
1172
|
-
*
|
1173
|
-
*
|
1174
|
-
*
|
1175
|
-
*
|
1176
|
-
*
|
1177
|
-
*
|
1178
|
-
*
|
1179
|
-
*
|
1180
|
-
*
|
1181
|
-
*
|
1182
|
-
*
|
1183
|
-
*
|
1684
|
+
*
|
1685
|
+
* :markup: markdown
|
1686
|
+
* :include: strscan/link_refs.txt
|
1687
|
+
*
|
1688
|
+
* call-seq:
|
1689
|
+
* [](specifier) -> substring or nil
|
1690
|
+
*
|
1691
|
+
* Returns a captured substring or `nil`;
|
1692
|
+
* see [Captured Match Values][13].
|
1693
|
+
*
|
1694
|
+
* When there are captures:
|
1695
|
+
*
|
1696
|
+
* ```rb
|
1697
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
1698
|
+
* scanner.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /)
|
1699
|
+
* ```
|
1700
|
+
*
|
1701
|
+
* - `specifier` zero: returns the entire matched substring:
|
1702
|
+
*
|
1703
|
+
* ```rb
|
1704
|
+
* scanner[0] # => "Fri Dec 12 "
|
1705
|
+
* scanner.pre_match # => ""
|
1706
|
+
* scanner.post_match # => "1975 14:39"
|
1707
|
+
* ```
|
1708
|
+
*
|
1709
|
+
* - `specifier` positive integer. returns the `n`th capture, or `nil` if out of range:
|
1710
|
+
*
|
1711
|
+
* ```rb
|
1712
|
+
* scanner[1] # => "Fri"
|
1713
|
+
* scanner[2] # => "Dec"
|
1714
|
+
* scanner[3] # => "12"
|
1715
|
+
* scanner[4] # => nil
|
1716
|
+
* ```
|
1717
|
+
*
|
1718
|
+
* - `specifier` negative integer. counts backward from the last subgroup:
|
1719
|
+
*
|
1720
|
+
* ```rb
|
1721
|
+
* scanner[-1] # => "12"
|
1722
|
+
* scanner[-4] # => "Fri Dec 12 "
|
1723
|
+
* scanner[-5] # => nil
|
1724
|
+
* ```
|
1725
|
+
*
|
1726
|
+
* - `specifier` symbol or string. returns the named subgroup, or `nil` if no such:
|
1727
|
+
*
|
1728
|
+
* ```rb
|
1729
|
+
* scanner[:wday] # => "Fri"
|
1730
|
+
* scanner['wday'] # => "Fri"
|
1731
|
+
* scanner[:month] # => "Dec"
|
1732
|
+
* scanner[:day] # => "12"
|
1733
|
+
* scanner[:nope] # => nil
|
1734
|
+
* ```
|
1735
|
+
*
|
1736
|
+
* When there are no captures, only `[0]` returns non-`nil`:
|
1737
|
+
*
|
1738
|
+
* ```rb
|
1739
|
+
* scanner = StringScanner.new('foobarbaz')
|
1740
|
+
* scanner.exist?(/bar/)
|
1741
|
+
* scanner[0] # => "bar"
|
1742
|
+
* scanner[1] # => nil
|
1743
|
+
* ```
|
1744
|
+
*
|
1745
|
+
* For a failed match, even `[0]` returns `nil`:
|
1746
|
+
*
|
1747
|
+
* ```rb
|
1748
|
+
* scanner.scan(/nope/) # => nil
|
1749
|
+
* scanner[0] # => nil
|
1750
|
+
* scanner[1] # => nil
|
1751
|
+
* ```
|
1752
|
+
*
|
1184
1753
|
*/
|
1185
1754
|
static VALUE
|
1186
1755
|
strscan_aref(VALUE self, VALUE idx)
|
@@ -1197,7 +1766,6 @@ strscan_aref(VALUE self, VALUE idx)
|
|
1197
1766
|
idx = rb_sym2str(idx);
|
1198
1767
|
/* fall through */
|
1199
1768
|
case T_STRING:
|
1200
|
-
if (!RTEST(p->regex)) return Qnil;
|
1201
1769
|
RSTRING_GETMEM(idx, name, i);
|
1202
1770
|
i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx));
|
1203
1771
|
break;
|
@@ -1217,14 +1785,28 @@ strscan_aref(VALUE self, VALUE idx)
|
|
1217
1785
|
}
|
1218
1786
|
|
1219
1787
|
/*
|
1220
|
-
*
|
1788
|
+
* :markup: markdown
|
1789
|
+
* :include: strscan/link_refs.txt
|
1221
1790
|
*
|
1222
|
-
*
|
1223
|
-
*
|
1791
|
+
* call-seq:
|
1792
|
+
* size -> captures_count
|
1793
|
+
*
|
1794
|
+
* Returns the count of captures if the most recent match attempt succeeded, `nil` otherwise;
|
1795
|
+
* see [Captures Match Values][13]:
|
1796
|
+
*
|
1797
|
+
* ```rb
|
1798
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
1799
|
+
* scanner.size # => nil
|
1800
|
+
*
|
1801
|
+
* pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
|
1802
|
+
* scanner.match?(pattern)
|
1803
|
+
* scanner.values_at(*0..scanner.size) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil]
|
1804
|
+
* scanner.size # => 4
|
1805
|
+
*
|
1806
|
+
* scanner.match?(/nope/) # => nil
|
1807
|
+
* scanner.size # => nil
|
1808
|
+
* ```
|
1224
1809
|
*
|
1225
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
1226
|
-
* s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
|
1227
|
-
* s.size # -> 4
|
1228
1810
|
*/
|
1229
1811
|
static VALUE
|
1230
1812
|
strscan_size(VALUE self)
|
@@ -1237,16 +1819,30 @@ strscan_size(VALUE self)
|
|
1237
1819
|
}
|
1238
1820
|
|
1239
1821
|
/*
|
1240
|
-
*
|
1822
|
+
* :markup: markdown
|
1823
|
+
* :include: strscan/link_refs.txt
|
1824
|
+
*
|
1825
|
+
* call-seq:
|
1826
|
+
* captures -> substring_array or nil
|
1827
|
+
*
|
1828
|
+
* Returns the array of [captured match values][13] at indexes `(1..)`
|
1829
|
+
* if the most recent match attempt succeeded, or `nil` otherwise:
|
1830
|
+
*
|
1831
|
+
* ```rb
|
1832
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
1833
|
+
* scanner.captures # => nil
|
1834
|
+
*
|
1835
|
+
* scanner.exist?(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /)
|
1836
|
+
* scanner.captures # => ["Fri", "Dec", "12"]
|
1837
|
+
* scanner.values_at(*0..4) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil]
|
1838
|
+
*
|
1839
|
+
* scanner.exist?(/Fri/)
|
1840
|
+
* scanner.captures # => []
|
1241
1841
|
*
|
1242
|
-
*
|
1243
|
-
*
|
1842
|
+
* scanner.scan(/nope/)
|
1843
|
+
* scanner.captures # => nil
|
1844
|
+
* ```
|
1244
1845
|
*
|
1245
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
1246
|
-
* s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> "Fri Dec 12 "
|
1247
|
-
* s.captures # -> ["Fri", "Dec", "12", nil]
|
1248
|
-
* s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> nil
|
1249
|
-
* s.captures # -> nil
|
1250
1846
|
*/
|
1251
1847
|
static VALUE
|
1252
1848
|
strscan_captures(VALUE self)
|
@@ -1276,17 +1872,25 @@ strscan_captures(VALUE self)
|
|
1276
1872
|
}
|
1277
1873
|
|
1278
1874
|
/*
|
1279
|
-
*
|
1280
|
-
*
|
1875
|
+
* :markup: markdown
|
1876
|
+
* :include: strscan/link_refs.txt
|
1281
1877
|
*
|
1282
|
-
*
|
1283
|
-
*
|
1878
|
+
* call-seq:
|
1879
|
+
* values_at(*specifiers) -> array_of_captures or nil
|
1880
|
+
*
|
1881
|
+
* Returns an array of captured substrings, or `nil` of none.
|
1882
|
+
*
|
1883
|
+
* For each `specifier`, the returned substring is `[specifier]`;
|
1884
|
+
* see #[].
|
1885
|
+
*
|
1886
|
+
* ```rb
|
1887
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
1888
|
+
* pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
|
1889
|
+
* scanner.match?(pattern)
|
1890
|
+
* scanner.values_at(*0..3) # => ["Fri Dec 12 ", "Fri", "Dec", "12"]
|
1891
|
+
* scanner.values_at(*%i[wday month day]) # => ["Fri", "Dec", "12"]
|
1892
|
+
* ```
|
1284
1893
|
*
|
1285
|
-
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
1286
|
-
* s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
|
1287
|
-
* s.values_at 0, -1, 5, 2 # -> ["Fri Dec 12 ", "12", nil, "Dec"]
|
1288
|
-
* s.scan(/(\w+) (\w+) (\d+) /) # -> nil
|
1289
|
-
* s.values_at 0, -1, 5, 2 # -> nil
|
1290
1894
|
*/
|
1291
1895
|
|
1292
1896
|
static VALUE
|
@@ -1308,13 +1912,29 @@ strscan_values_at(int argc, VALUE *argv, VALUE self)
|
|
1308
1912
|
}
|
1309
1913
|
|
1310
1914
|
/*
|
1311
|
-
*
|
1915
|
+
* :markup: markdown
|
1916
|
+
* :include: strscan/link_refs.txt
|
1917
|
+
*
|
1918
|
+
* call-seq:
|
1919
|
+
* pre_match -> substring
|
1920
|
+
*
|
1921
|
+
* Returns the substring that precedes the matched substring
|
1922
|
+
* from the most recent match attempt if it was successful,
|
1923
|
+
* or `nil` otherwise;
|
1924
|
+
* see [Basic Match Values][18]:
|
1925
|
+
*
|
1926
|
+
* ```rb
|
1927
|
+
* scanner = StringScanner.new('foobarbaz')
|
1928
|
+
* scanner.pre_match # => nil
|
1929
|
+
*
|
1930
|
+
* scanner.pos = 3
|
1931
|
+
* scanner.exist?(/baz/) # => 6
|
1932
|
+
* scanner.pre_match # => "foobar" # Substring of entire string, not just target string.
|
1933
|
+
*
|
1934
|
+
* scanner.exist?(/nope/) # => nil
|
1935
|
+
* scanner.pre_match # => nil
|
1936
|
+
* ```
|
1312
1937
|
*
|
1313
|
-
* s = StringScanner.new('test string')
|
1314
|
-
* s.scan(/\w+/) # -> "test"
|
1315
|
-
* s.scan(/\s+/) # -> " "
|
1316
|
-
* s.pre_match # -> "test"
|
1317
|
-
* s.post_match # -> "string"
|
1318
1938
|
*/
|
1319
1939
|
static VALUE
|
1320
1940
|
strscan_pre_match(VALUE self)
|
@@ -1329,13 +1949,29 @@ strscan_pre_match(VALUE self)
|
|
1329
1949
|
}
|
1330
1950
|
|
1331
1951
|
/*
|
1332
|
-
*
|
1952
|
+
* :markup: markdown
|
1953
|
+
* :include: strscan/link_refs.txt
|
1954
|
+
*
|
1955
|
+
* call-seq:
|
1956
|
+
* post_match -> substring
|
1957
|
+
*
|
1958
|
+
* Returns the substring that follows the matched substring
|
1959
|
+
* from the most recent match attempt if it was successful,
|
1960
|
+
* or `nil` otherwise;
|
1961
|
+
* see [Basic Match Values][18]:
|
1962
|
+
*
|
1963
|
+
* ```rb
|
1964
|
+
* scanner = StringScanner.new('foobarbaz')
|
1965
|
+
* scanner.post_match # => nil
|
1966
|
+
*
|
1967
|
+
* scanner.pos = 3
|
1968
|
+
* scanner.match?(/bar/) # => 3
|
1969
|
+
* scanner.post_match # => "baz"
|
1970
|
+
*
|
1971
|
+
* scanner.match?(/nope/) # => nil
|
1972
|
+
* scanner.post_match # => nil
|
1973
|
+
* ```
|
1333
1974
|
*
|
1334
|
-
* s = StringScanner.new('test string')
|
1335
|
-
* s.scan(/\w+/) # -> "test"
|
1336
|
-
* s.scan(/\s+/) # -> " "
|
1337
|
-
* s.pre_match # -> "test"
|
1338
|
-
* s.post_match # -> "string"
|
1339
1975
|
*/
|
1340
1976
|
static VALUE
|
1341
1977
|
strscan_post_match(VALUE self)
|
@@ -1350,8 +1986,24 @@ strscan_post_match(VALUE self)
|
|
1350
1986
|
}
|
1351
1987
|
|
1352
1988
|
/*
|
1353
|
-
*
|
1354
|
-
*
|
1989
|
+
* :markup: markdown
|
1990
|
+
* :include: strscan/link_refs.txt
|
1991
|
+
*
|
1992
|
+
* call-seq:
|
1993
|
+
* rest -> target_substring
|
1994
|
+
*
|
1995
|
+
* Returns the 'rest' of the [stored string][1] (all after the current [position][2]),
|
1996
|
+
* which is the [target substring][3]:
|
1997
|
+
*
|
1998
|
+
* ```rb
|
1999
|
+
* scanner = StringScanner.new('foobarbaz')
|
2000
|
+
* scanner.rest # => "foobarbaz"
|
2001
|
+
* scanner.pos = 3
|
2002
|
+
* scanner.rest # => "barbaz"
|
2003
|
+
* scanner.terminate
|
2004
|
+
* scanner.rest # => ""
|
2005
|
+
* ```
|
2006
|
+
*
|
1355
2007
|
*/
|
1356
2008
|
static VALUE
|
1357
2009
|
strscan_rest(VALUE self)
|
@@ -1366,7 +2018,26 @@ strscan_rest(VALUE self)
|
|
1366
2018
|
}
|
1367
2019
|
|
1368
2020
|
/*
|
1369
|
-
*
|
2021
|
+
* :markup: markdown
|
2022
|
+
* :include: strscan/link_refs.txt
|
2023
|
+
*
|
2024
|
+
* call-seq:
|
2025
|
+
* rest_size -> integer
|
2026
|
+
*
|
2027
|
+
* Returns the size (in bytes) of the #rest of the [stored string][1]:
|
2028
|
+
*
|
2029
|
+
* ```rb
|
2030
|
+
* scanner = StringScanner.new('foobarbaz')
|
2031
|
+
* scanner.rest # => "foobarbaz"
|
2032
|
+
* scanner.rest_size # => 9
|
2033
|
+
* scanner.pos = 3
|
2034
|
+
* scanner.rest # => "barbaz"
|
2035
|
+
* scanner.rest_size # => 6
|
2036
|
+
* scanner.terminate
|
2037
|
+
* scanner.rest # => ""
|
2038
|
+
* scanner.rest_size # => 0
|
2039
|
+
* ```
|
2040
|
+
*
|
1370
2041
|
*/
|
1371
2042
|
static VALUE
|
1372
2043
|
strscan_rest_size(VALUE self)
|
@@ -1383,9 +2054,14 @@ strscan_rest_size(VALUE self)
|
|
1383
2054
|
}
|
1384
2055
|
|
1385
2056
|
/*
|
2057
|
+
* call-seq:
|
2058
|
+
* restsize
|
2059
|
+
*
|
1386
2060
|
* <tt>s.restsize</tt> is equivalent to <tt>s.rest_size</tt>.
|
1387
2061
|
* This method is obsolete; use #rest_size instead.
|
1388
2062
|
*/
|
2063
|
+
|
2064
|
+
/* :nodoc: */
|
1389
2065
|
static VALUE
|
1390
2066
|
strscan_restsize(VALUE self)
|
1391
2067
|
{
|
@@ -1396,15 +2072,39 @@ strscan_restsize(VALUE self)
|
|
1396
2072
|
#define INSPECT_LENGTH 5
|
1397
2073
|
|
1398
2074
|
/*
|
1399
|
-
*
|
1400
|
-
*
|
1401
|
-
*
|
1402
|
-
* -
|
1403
|
-
*
|
1404
|
-
*
|
1405
|
-
*
|
1406
|
-
*
|
1407
|
-
*
|
2075
|
+
* :markup: markdown
|
2076
|
+
* :include: strscan/link_refs.txt
|
2077
|
+
*
|
2078
|
+
* call-seq:
|
2079
|
+
* inspect -> string
|
2080
|
+
*
|
2081
|
+
* Returns a string representation of `self` that may show:
|
2082
|
+
*
|
2083
|
+
* 1. The current [position][2].
|
2084
|
+
* 2. The size (in bytes) of the [stored string][1].
|
2085
|
+
* 3. The substring preceding the current position.
|
2086
|
+
* 4. The substring following the current position (which is also the [target substring][3]).
|
2087
|
+
*
|
2088
|
+
* ```rb
|
2089
|
+
* scanner = StringScanner.new("Fri Dec 12 1975 14:39")
|
2090
|
+
* scanner.pos = 11
|
2091
|
+
* scanner.inspect # => "#<StringScanner 11/21 \"...c 12 \" @ \"1975 ...\">"
|
2092
|
+
* ```
|
2093
|
+
*
|
2094
|
+
* If at beginning-of-string, item 4 above (following substring) is omitted:
|
2095
|
+
*
|
2096
|
+
* ```rb
|
2097
|
+
* scanner.reset
|
2098
|
+
* scanner.inspect # => "#<StringScanner 0/21 @ \"Fri D...\">"
|
2099
|
+
* ```
|
2100
|
+
*
|
2101
|
+
* If at end-of-string, all items above are omitted:
|
2102
|
+
*
|
2103
|
+
* ```rb
|
2104
|
+
* scanner.terminate
|
2105
|
+
* scanner.inspect # => "#<StringScanner fin>"
|
2106
|
+
* ```
|
2107
|
+
*
|
1408
2108
|
*/
|
1409
2109
|
static VALUE
|
1410
2110
|
strscan_inspect(VALUE self)
|
@@ -1476,13 +2176,13 @@ inspect2(struct strscanner *p)
|
|
1476
2176
|
}
|
1477
2177
|
|
1478
2178
|
/*
|
1479
|
-
*
|
1480
|
-
*
|
2179
|
+
* :markup: markdown
|
2180
|
+
* :include: strscan/link_refs.txt
|
1481
2181
|
*
|
1482
|
-
*
|
2182
|
+
* call-seq:
|
2183
|
+
* fixed_anchor? -> true or false
|
1483
2184
|
*
|
1484
|
-
*
|
1485
|
-
* the string. Otherwise, +\A+ always matches the current position.
|
2185
|
+
* Returns whether the [fixed-anchor property][10] is set.
|
1486
2186
|
*/
|
1487
2187
|
static VALUE
|
1488
2188
|
strscan_fixed_anchor_p(VALUE self)
|
@@ -1511,28 +2211,49 @@ named_captures_iter(const OnigUChar *name,
|
|
1511
2211
|
VALUE value = RUBY_Qnil;
|
1512
2212
|
int i;
|
1513
2213
|
for (i = 0; i < back_num; i++) {
|
1514
|
-
|
2214
|
+
VALUE v = strscan_aref(data->self, INT2NUM(back_refs[i]));
|
2215
|
+
if (!RB_NIL_P(v)) {
|
2216
|
+
value = v;
|
2217
|
+
}
|
1515
2218
|
}
|
1516
2219
|
rb_hash_aset(data->captures, key, value);
|
1517
2220
|
return 0;
|
1518
2221
|
}
|
1519
2222
|
|
1520
2223
|
/*
|
2224
|
+
* :markup: markdown
|
2225
|
+
* :include: strscan/link_refs.txt
|
2226
|
+
*
|
1521
2227
|
* call-seq:
|
1522
|
-
*
|
2228
|
+
* named_captures -> hash
|
1523
2229
|
*
|
1524
|
-
* Returns
|
2230
|
+
* Returns the array of captured match values at indexes (1..)
|
2231
|
+
* if the most recent match attempt succeeded, or nil otherwise;
|
2232
|
+
* see [Captured Match Values][13]:
|
2233
|
+
*
|
2234
|
+
* ```rb
|
2235
|
+
* scanner = StringScanner.new('Fri Dec 12 1975 14:39')
|
2236
|
+
* scanner.named_captures # => {}
|
2237
|
+
*
|
2238
|
+
* pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) /
|
2239
|
+
* scanner.match?(pattern)
|
2240
|
+
* scanner.named_captures # => {"wday"=>"Fri", "month"=>"Dec", "day"=>"12"}
|
2241
|
+
*
|
2242
|
+
* scanner.string = 'nope'
|
2243
|
+
* scanner.match?(pattern)
|
2244
|
+
* scanner.named_captures # => {"wday"=>nil, "month"=>nil, "day"=>nil}
|
2245
|
+
*
|
2246
|
+
* scanner.match?(/nosuch/)
|
2247
|
+
* scanner.named_captures # => {}
|
2248
|
+
* ```
|
1525
2249
|
*
|
1526
|
-
* scan = StringScanner.new('foobarbaz')
|
1527
|
-
* scan.match?(/(?<f>foo)(?<r>bar)(?<z>baz)/)
|
1528
|
-
* scan.named_captures # -> {"f"=>"foo", "r"=>"bar", "z"=>"baz"}
|
1529
2250
|
*/
|
1530
2251
|
static VALUE
|
1531
2252
|
strscan_named_captures(VALUE self)
|
1532
2253
|
{
|
1533
2254
|
struct strscanner *p;
|
1534
|
-
GET_SCANNER(self, p);
|
1535
2255
|
named_captures_data data;
|
2256
|
+
GET_SCANNER(self, p);
|
1536
2257
|
data.self = self;
|
1537
2258
|
data.captures = rb_hash_new();
|
1538
2259
|
if (!RB_NIL_P(p->regex)) {
|
@@ -1549,107 +2270,11 @@ strscan_named_captures(VALUE self)
|
|
1549
2270
|
/*
|
1550
2271
|
* Document-class: StringScanner
|
1551
2272
|
*
|
1552
|
-
*
|
1553
|
-
* an example of its usage:
|
1554
|
-
*
|
1555
|
-
* require 'strscan'
|
1556
|
-
*
|
1557
|
-
* s = StringScanner.new('This is an example string')
|
1558
|
-
* s.eos? # -> false
|
1559
|
-
*
|
1560
|
-
* p s.scan(/\w+/) # -> "This"
|
1561
|
-
* p s.scan(/\w+/) # -> nil
|
1562
|
-
* p s.scan(/\s+/) # -> " "
|
1563
|
-
* p s.scan(/\s+/) # -> nil
|
1564
|
-
* p s.scan(/\w+/) # -> "is"
|
1565
|
-
* s.eos? # -> false
|
1566
|
-
*
|
1567
|
-
* p s.scan(/\s+/) # -> " "
|
1568
|
-
* p s.scan(/\w+/) # -> "an"
|
1569
|
-
* p s.scan(/\s+/) # -> " "
|
1570
|
-
* p s.scan(/\w+/) # -> "example"
|
1571
|
-
* p s.scan(/\s+/) # -> " "
|
1572
|
-
* p s.scan(/\w+/) # -> "string"
|
1573
|
-
* s.eos? # -> true
|
2273
|
+
* :markup: markdown
|
1574
2274
|
*
|
1575
|
-
*
|
1576
|
-
*
|
2275
|
+
* :include: strscan/link_refs.txt
|
2276
|
+
* :include: strscan/strscan.md
|
1577
2277
|
*
|
1578
|
-
* Scanning a string means remembering the position of a <i>scan pointer</i>,
|
1579
|
-
* which is just an index. The point of scanning is to move forward a bit at
|
1580
|
-
* a time, so matches are sought after the scan pointer; usually immediately
|
1581
|
-
* after it.
|
1582
|
-
*
|
1583
|
-
* Given the string "test string", here are the pertinent scan pointer
|
1584
|
-
* positions:
|
1585
|
-
*
|
1586
|
-
* t e s t s t r i n g
|
1587
|
-
* 0 1 2 ... 1
|
1588
|
-
* 0
|
1589
|
-
*
|
1590
|
-
* When you #scan for a pattern (a regular expression), the match must occur
|
1591
|
-
* at the character after the scan pointer. If you use #scan_until, then the
|
1592
|
-
* match can occur anywhere after the scan pointer. In both cases, the scan
|
1593
|
-
* pointer moves <i>just beyond</i> the last character of the match, ready to
|
1594
|
-
* scan again from the next character onwards. This is demonstrated by the
|
1595
|
-
* example above.
|
1596
|
-
*
|
1597
|
-
* == Method Categories
|
1598
|
-
*
|
1599
|
-
* There are other methods besides the plain scanners. You can look ahead in
|
1600
|
-
* the string without actually scanning. You can access the most recent match.
|
1601
|
-
* You can modify the string being scanned, reset or terminate the scanner,
|
1602
|
-
* find out or change the position of the scan pointer, skip ahead, and so on.
|
1603
|
-
*
|
1604
|
-
* === Advancing the Scan Pointer
|
1605
|
-
*
|
1606
|
-
* - #getch
|
1607
|
-
* - #get_byte
|
1608
|
-
* - #scan
|
1609
|
-
* - #scan_until
|
1610
|
-
* - #skip
|
1611
|
-
* - #skip_until
|
1612
|
-
*
|
1613
|
-
* === Looking Ahead
|
1614
|
-
*
|
1615
|
-
* - #check
|
1616
|
-
* - #check_until
|
1617
|
-
* - #exist?
|
1618
|
-
* - #match?
|
1619
|
-
* - #peek
|
1620
|
-
*
|
1621
|
-
* === Finding Where we Are
|
1622
|
-
*
|
1623
|
-
* - #beginning_of_line? (<tt>#bol?</tt>)
|
1624
|
-
* - #eos?
|
1625
|
-
* - #rest?
|
1626
|
-
* - #rest_size
|
1627
|
-
* - #pos
|
1628
|
-
*
|
1629
|
-
* === Setting Where we Are
|
1630
|
-
*
|
1631
|
-
* - #reset
|
1632
|
-
* - #terminate
|
1633
|
-
* - #pos=
|
1634
|
-
*
|
1635
|
-
* === Match Data
|
1636
|
-
*
|
1637
|
-
* - #matched
|
1638
|
-
* - #matched?
|
1639
|
-
* - #matched_size
|
1640
|
-
* - <tt>#[]</tt>
|
1641
|
-
* - #pre_match
|
1642
|
-
* - #post_match
|
1643
|
-
*
|
1644
|
-
* === Miscellaneous
|
1645
|
-
*
|
1646
|
-
* - <tt><<</tt>
|
1647
|
-
* - #concat
|
1648
|
-
* - #string
|
1649
|
-
* - #string=
|
1650
|
-
* - #unscan
|
1651
|
-
*
|
1652
|
-
* There are aliases to several of the methods.
|
1653
2278
|
*/
|
1654
2279
|
void
|
1655
2280
|
Init_strscan(void)
|
@@ -1664,6 +2289,10 @@ Init_strscan(void)
|
|
1664
2289
|
|
1665
2290
|
id_byteslice = rb_intern("byteslice");
|
1666
2291
|
|
2292
|
+
usascii_encindex = rb_usascii_encindex();
|
2293
|
+
utf8_encindex = rb_utf8_encindex();
|
2294
|
+
binary_encindex = rb_ascii8bit_encindex();
|
2295
|
+
|
1667
2296
|
StringScanner = rb_define_class("StringScanner", rb_cObject);
|
1668
2297
|
ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
|
1669
2298
|
if (!rb_const_defined(rb_cObject, id_scanerr)) {
|
@@ -1708,9 +2337,14 @@ Init_strscan(void)
|
|
1708
2337
|
rb_define_method(StringScanner, "getch", strscan_getch, 0);
|
1709
2338
|
rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
|
1710
2339
|
rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
|
2340
|
+
rb_define_method(StringScanner, "scan_byte", strscan_scan_byte, 0);
|
1711
2341
|
rb_define_method(StringScanner, "peek", strscan_peek, 1);
|
2342
|
+
rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0);
|
1712
2343
|
rb_define_method(StringScanner, "peep", strscan_peep, 1);
|
1713
2344
|
|
2345
|
+
rb_define_private_method(StringScanner, "scan_base10_integer", strscan_scan_base10_integer, 0);
|
2346
|
+
rb_define_private_method(StringScanner, "scan_base16_integer", strscan_scan_base16_integer, 0);
|
2347
|
+
|
1714
2348
|
rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
|
1715
2349
|
|
1716
2350
|
rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
|
@@ -1738,4 +2372,6 @@ Init_strscan(void)
|
|
1738
2372
|
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
|
1739
2373
|
|
1740
2374
|
rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0);
|
2375
|
+
|
2376
|
+
rb_require("strscan/strscan");
|
1741
2377
|
}
|