strscan 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/ext/strscan/extconf.rb +4 -0
- data/ext/strscan/regenc.h +254 -0
- data/ext/strscan/regint.h +938 -0
- data/ext/strscan/strscan.c +1490 -0
- metadata +63 -0
@@ -0,0 +1,1490 @@
|
|
1
|
+
/*
|
2
|
+
$Id$
|
3
|
+
|
4
|
+
Copyright (c) 1999-2006 Minero Aoki
|
5
|
+
|
6
|
+
This program is free software.
|
7
|
+
You can distribute/modify this program under the terms of
|
8
|
+
the Ruby License. For details, see the file COPYING.
|
9
|
+
*/
|
10
|
+
|
11
|
+
#include "ruby/ruby.h"
|
12
|
+
#include "ruby/re.h"
|
13
|
+
#include "ruby/encoding.h"
|
14
|
+
#include "regint.h"
|
15
|
+
|
16
|
+
#define STRSCAN_VERSION "0.7.0"
|
17
|
+
|
18
|
+
/* =======================================================================
|
19
|
+
Data Type Definitions
|
20
|
+
======================================================================= */
|
21
|
+
|
22
|
+
static VALUE StringScanner;
|
23
|
+
static VALUE ScanError;
|
24
|
+
static ID id_byteslice;
|
25
|
+
|
26
|
+
struct strscanner
|
27
|
+
{
|
28
|
+
/* multi-purpose flags */
|
29
|
+
unsigned long flags;
|
30
|
+
#define FLAG_MATCHED (1 << 0)
|
31
|
+
|
32
|
+
/* the string to scan */
|
33
|
+
VALUE str;
|
34
|
+
|
35
|
+
/* scan pointers */
|
36
|
+
long prev; /* legal only when MATCHED_P(s) */
|
37
|
+
long curr; /* always legal */
|
38
|
+
|
39
|
+
/* the regexp register; legal only when MATCHED_P(s) */
|
40
|
+
struct re_registers regs;
|
41
|
+
|
42
|
+
/* regexp used for last scan */
|
43
|
+
VALUE regex;
|
44
|
+
};
|
45
|
+
|
46
|
+
#define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
|
47
|
+
#define MATCHED(s) (s)->flags |= FLAG_MATCHED
|
48
|
+
#define CLEAR_MATCH_STATUS(s) (s)->flags &= ~FLAG_MATCHED
|
49
|
+
|
50
|
+
#define S_PBEG(s) (RSTRING_PTR((s)->str))
|
51
|
+
#define S_LEN(s) (RSTRING_LEN((s)->str))
|
52
|
+
#define S_PEND(s) (S_PBEG(s) + S_LEN(s))
|
53
|
+
#define CURPTR(s) (S_PBEG(s) + (s)->curr)
|
54
|
+
#define S_RESTLEN(s) (S_LEN(s) - (s)->curr)
|
55
|
+
|
56
|
+
#define EOS_P(s) ((s)->curr >= RSTRING_LEN(p->str))
|
57
|
+
|
58
|
+
#define GET_SCANNER(obj,var) do {\
|
59
|
+
(var) = check_strscan(obj);\
|
60
|
+
if (NIL_P((var)->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");\
|
61
|
+
} while (0)
|
62
|
+
|
63
|
+
/* =======================================================================
|
64
|
+
Function Prototypes
|
65
|
+
======================================================================= */
|
66
|
+
|
67
|
+
static inline long minl _((const long n, const long x));
|
68
|
+
static VALUE infect _((VALUE str, struct strscanner *p));
|
69
|
+
static VALUE extract_range _((struct strscanner *p, long beg_i, long end_i));
|
70
|
+
static VALUE extract_beg_len _((struct strscanner *p, long beg_i, long len));
|
71
|
+
|
72
|
+
static struct strscanner *check_strscan _((VALUE obj));
|
73
|
+
static void strscan_mark _((void *p));
|
74
|
+
static void strscan_free _((void *p));
|
75
|
+
static size_t strscan_memsize _((const void *p));
|
76
|
+
static VALUE strscan_s_allocate _((VALUE klass));
|
77
|
+
static VALUE strscan_initialize _((int argc, VALUE *argv, VALUE self));
|
78
|
+
static VALUE strscan_init_copy _((VALUE vself, VALUE vorig));
|
79
|
+
|
80
|
+
static VALUE strscan_s_mustc _((VALUE self));
|
81
|
+
static VALUE strscan_terminate _((VALUE self));
|
82
|
+
static VALUE strscan_clear _((VALUE self));
|
83
|
+
static VALUE strscan_get_string _((VALUE self));
|
84
|
+
static VALUE strscan_set_string _((VALUE self, VALUE str));
|
85
|
+
static VALUE strscan_concat _((VALUE self, VALUE str));
|
86
|
+
static VALUE strscan_get_pos _((VALUE self));
|
87
|
+
static VALUE strscan_set_pos _((VALUE self, VALUE pos));
|
88
|
+
static VALUE strscan_do_scan _((VALUE self, VALUE regex,
|
89
|
+
int succptr, int getstr, int headonly));
|
90
|
+
static VALUE strscan_scan _((VALUE self, VALUE re));
|
91
|
+
static VALUE strscan_match_p _((VALUE self, VALUE re));
|
92
|
+
static VALUE strscan_skip _((VALUE self, VALUE re));
|
93
|
+
static VALUE strscan_check _((VALUE self, VALUE re));
|
94
|
+
static VALUE strscan_scan_full _((VALUE self, VALUE re,
|
95
|
+
VALUE succp, VALUE getp));
|
96
|
+
static VALUE strscan_scan_until _((VALUE self, VALUE re));
|
97
|
+
static VALUE strscan_skip_until _((VALUE self, VALUE re));
|
98
|
+
static VALUE strscan_check_until _((VALUE self, VALUE re));
|
99
|
+
static VALUE strscan_search_full _((VALUE self, VALUE re,
|
100
|
+
VALUE succp, VALUE getp));
|
101
|
+
static void adjust_registers_to_matched _((struct strscanner *p));
|
102
|
+
static VALUE strscan_getch _((VALUE self));
|
103
|
+
static VALUE strscan_get_byte _((VALUE self));
|
104
|
+
static VALUE strscan_getbyte _((VALUE self));
|
105
|
+
static VALUE strscan_peek _((VALUE self, VALUE len));
|
106
|
+
static VALUE strscan_peep _((VALUE self, VALUE len));
|
107
|
+
static VALUE strscan_unscan _((VALUE self));
|
108
|
+
static VALUE strscan_bol_p _((VALUE self));
|
109
|
+
static VALUE strscan_eos_p _((VALUE self));
|
110
|
+
static VALUE strscan_empty_p _((VALUE self));
|
111
|
+
static VALUE strscan_rest_p _((VALUE self));
|
112
|
+
static VALUE strscan_matched_p _((VALUE self));
|
113
|
+
static VALUE strscan_matched _((VALUE self));
|
114
|
+
static VALUE strscan_matched_size _((VALUE self));
|
115
|
+
static VALUE strscan_aref _((VALUE self, VALUE idx));
|
116
|
+
static VALUE strscan_pre_match _((VALUE self));
|
117
|
+
static VALUE strscan_post_match _((VALUE self));
|
118
|
+
static VALUE strscan_rest _((VALUE self));
|
119
|
+
static VALUE strscan_rest_size _((VALUE self));
|
120
|
+
|
121
|
+
static VALUE strscan_inspect _((VALUE self));
|
122
|
+
static VALUE inspect1 _((struct strscanner *p));
|
123
|
+
static VALUE inspect2 _((struct strscanner *p));
|
124
|
+
|
125
|
+
/* =======================================================================
|
126
|
+
Utils
|
127
|
+
======================================================================= */
|
128
|
+
|
129
|
+
static VALUE
|
130
|
+
infect(VALUE str, struct strscanner *p)
|
131
|
+
{
|
132
|
+
OBJ_INFECT(str, p->str);
|
133
|
+
return str;
|
134
|
+
}
|
135
|
+
|
136
|
+
static VALUE
|
137
|
+
str_new(struct strscanner *p, const char *ptr, long len)
|
138
|
+
{
|
139
|
+
VALUE str = rb_str_new(ptr, len);
|
140
|
+
rb_enc_copy(str, p->str);
|
141
|
+
return str;
|
142
|
+
}
|
143
|
+
|
144
|
+
static inline long
|
145
|
+
minl(const long x, const long y)
|
146
|
+
{
|
147
|
+
return (x < y) ? x : y;
|
148
|
+
}
|
149
|
+
|
150
|
+
static VALUE
|
151
|
+
extract_range(struct strscanner *p, long beg_i, long end_i)
|
152
|
+
{
|
153
|
+
if (beg_i > S_LEN(p)) return Qnil;
|
154
|
+
end_i = minl(end_i, S_LEN(p));
|
155
|
+
return infect(str_new(p, S_PBEG(p) + beg_i, end_i - beg_i), p);
|
156
|
+
}
|
157
|
+
|
158
|
+
static VALUE
|
159
|
+
extract_beg_len(struct strscanner *p, long beg_i, long len)
|
160
|
+
{
|
161
|
+
if (beg_i > S_LEN(p)) return Qnil;
|
162
|
+
len = minl(len, S_LEN(p) - beg_i);
|
163
|
+
return infect(str_new(p, S_PBEG(p) + beg_i, len), p);
|
164
|
+
}
|
165
|
+
|
166
|
+
/* =======================================================================
|
167
|
+
Constructor
|
168
|
+
======================================================================= */
|
169
|
+
|
170
|
+
static void
|
171
|
+
strscan_mark(void *ptr)
|
172
|
+
{
|
173
|
+
struct strscanner *p = ptr;
|
174
|
+
rb_gc_mark(p->str);
|
175
|
+
}
|
176
|
+
|
177
|
+
static void
|
178
|
+
strscan_free(void *ptr)
|
179
|
+
{
|
180
|
+
struct strscanner *p = ptr;
|
181
|
+
onig_region_free(&(p->regs), 0);
|
182
|
+
ruby_xfree(p);
|
183
|
+
}
|
184
|
+
|
185
|
+
static size_t
|
186
|
+
strscan_memsize(const void *ptr)
|
187
|
+
{
|
188
|
+
const struct strscanner *p = ptr;
|
189
|
+
return sizeof(*p) - sizeof(p->regs) + onig_region_memsize(&p->regs);
|
190
|
+
}
|
191
|
+
|
192
|
+
static const rb_data_type_t strscanner_type = {
|
193
|
+
"StringScanner",
|
194
|
+
{strscan_mark, strscan_free, strscan_memsize},
|
195
|
+
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
|
196
|
+
};
|
197
|
+
|
198
|
+
static VALUE
|
199
|
+
strscan_s_allocate(VALUE klass)
|
200
|
+
{
|
201
|
+
struct strscanner *p;
|
202
|
+
VALUE obj = TypedData_Make_Struct(klass, struct strscanner, &strscanner_type, p);
|
203
|
+
|
204
|
+
CLEAR_MATCH_STATUS(p);
|
205
|
+
onig_region_init(&(p->regs));
|
206
|
+
p->str = Qnil;
|
207
|
+
return obj;
|
208
|
+
}
|
209
|
+
|
210
|
+
/*
|
211
|
+
* call-seq: StringScanner.new(string, dup = false)
|
212
|
+
*
|
213
|
+
* Creates a new StringScanner object to scan over the given +string+.
|
214
|
+
* +dup+ argument is obsolete and not used now.
|
215
|
+
*/
|
216
|
+
static VALUE
|
217
|
+
strscan_initialize(int argc, VALUE *argv, VALUE self)
|
218
|
+
{
|
219
|
+
struct strscanner *p;
|
220
|
+
VALUE str, need_dup;
|
221
|
+
|
222
|
+
p = check_strscan(self);
|
223
|
+
rb_scan_args(argc, argv, "11", &str, &need_dup);
|
224
|
+
StringValue(str);
|
225
|
+
p->str = str;
|
226
|
+
|
227
|
+
return self;
|
228
|
+
}
|
229
|
+
|
230
|
+
static struct strscanner *
|
231
|
+
check_strscan(VALUE obj)
|
232
|
+
{
|
233
|
+
return rb_check_typeddata(obj, &strscanner_type);
|
234
|
+
}
|
235
|
+
|
236
|
+
/*
|
237
|
+
* call-seq:
|
238
|
+
* dup
|
239
|
+
* clone
|
240
|
+
*
|
241
|
+
* Duplicates a StringScanner object.
|
242
|
+
*/
|
243
|
+
static VALUE
|
244
|
+
strscan_init_copy(VALUE vself, VALUE vorig)
|
245
|
+
{
|
246
|
+
struct strscanner *self, *orig;
|
247
|
+
|
248
|
+
self = check_strscan(vself);
|
249
|
+
orig = check_strscan(vorig);
|
250
|
+
if (self != orig) {
|
251
|
+
self->flags = orig->flags;
|
252
|
+
self->str = orig->str;
|
253
|
+
self->prev = orig->prev;
|
254
|
+
self->curr = orig->curr;
|
255
|
+
if (rb_reg_region_copy(&self->regs, &orig->regs))
|
256
|
+
rb_memerror();
|
257
|
+
RB_GC_GUARD(vorig);
|
258
|
+
}
|
259
|
+
|
260
|
+
return vself;
|
261
|
+
}
|
262
|
+
|
263
|
+
/* =======================================================================
|
264
|
+
Instance Methods
|
265
|
+
======================================================================= */
|
266
|
+
|
267
|
+
/*
|
268
|
+
* call-seq: StringScanner.must_C_version
|
269
|
+
*
|
270
|
+
* This method is defined for backward compatibility.
|
271
|
+
*/
|
272
|
+
static VALUE
|
273
|
+
strscan_s_mustc(VALUE self)
|
274
|
+
{
|
275
|
+
return self;
|
276
|
+
}
|
277
|
+
|
278
|
+
/*
|
279
|
+
* Reset the scan pointer (index 0) and clear matching data.
|
280
|
+
*/
|
281
|
+
static VALUE
|
282
|
+
strscan_reset(VALUE self)
|
283
|
+
{
|
284
|
+
struct strscanner *p;
|
285
|
+
|
286
|
+
GET_SCANNER(self, p);
|
287
|
+
p->curr = 0;
|
288
|
+
CLEAR_MATCH_STATUS(p);
|
289
|
+
return self;
|
290
|
+
}
|
291
|
+
|
292
|
+
/*
|
293
|
+
* call-seq:
|
294
|
+
* terminate
|
295
|
+
* clear
|
296
|
+
*
|
297
|
+
* Set the scan pointer to the end of the string and clear matching data.
|
298
|
+
*/
|
299
|
+
static VALUE
|
300
|
+
strscan_terminate(VALUE self)
|
301
|
+
{
|
302
|
+
struct strscanner *p;
|
303
|
+
|
304
|
+
GET_SCANNER(self, p);
|
305
|
+
p->curr = S_LEN(p);
|
306
|
+
CLEAR_MATCH_STATUS(p);
|
307
|
+
return self;
|
308
|
+
}
|
309
|
+
|
310
|
+
/*
|
311
|
+
* Equivalent to #terminate.
|
312
|
+
* This method is obsolete; use #terminate instead.
|
313
|
+
*/
|
314
|
+
static VALUE
|
315
|
+
strscan_clear(VALUE self)
|
316
|
+
{
|
317
|
+
rb_warning("StringScanner#clear is obsolete; use #terminate instead");
|
318
|
+
return strscan_terminate(self);
|
319
|
+
}
|
320
|
+
|
321
|
+
/*
|
322
|
+
* Returns the string being scanned.
|
323
|
+
*/
|
324
|
+
static VALUE
|
325
|
+
strscan_get_string(VALUE self)
|
326
|
+
{
|
327
|
+
struct strscanner *p;
|
328
|
+
|
329
|
+
GET_SCANNER(self, p);
|
330
|
+
return p->str;
|
331
|
+
}
|
332
|
+
|
333
|
+
/*
|
334
|
+
* call-seq: string=(str)
|
335
|
+
*
|
336
|
+
* Changes the string being scanned to +str+ and resets the scanner.
|
337
|
+
* Returns +str+.
|
338
|
+
*/
|
339
|
+
static VALUE
|
340
|
+
strscan_set_string(VALUE self, VALUE str)
|
341
|
+
{
|
342
|
+
struct strscanner *p = check_strscan(self);
|
343
|
+
|
344
|
+
StringValue(str);
|
345
|
+
p->str = str;
|
346
|
+
p->curr = 0;
|
347
|
+
CLEAR_MATCH_STATUS(p);
|
348
|
+
return str;
|
349
|
+
}
|
350
|
+
|
351
|
+
/*
|
352
|
+
* call-seq:
|
353
|
+
* concat(str)
|
354
|
+
* <<(str)
|
355
|
+
*
|
356
|
+
* Appends +str+ to the string being scanned.
|
357
|
+
* This method does not affect scan pointer.
|
358
|
+
*
|
359
|
+
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
360
|
+
* s.scan(/Fri /)
|
361
|
+
* s << " +1000 GMT"
|
362
|
+
* s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT"
|
363
|
+
* s.scan(/Dec/) # -> "Dec"
|
364
|
+
*/
|
365
|
+
static VALUE
|
366
|
+
strscan_concat(VALUE self, VALUE str)
|
367
|
+
{
|
368
|
+
struct strscanner *p;
|
369
|
+
|
370
|
+
GET_SCANNER(self, p);
|
371
|
+
StringValue(str);
|
372
|
+
rb_str_append(p->str, str);
|
373
|
+
return self;
|
374
|
+
}
|
375
|
+
|
376
|
+
/*
|
377
|
+
* Returns the byte position of the scan pointer. In the 'reset' position, this
|
378
|
+
* value is zero. In the 'terminated' position (i.e. the string is exhausted),
|
379
|
+
* this value is the bytesize of the string.
|
380
|
+
*
|
381
|
+
* In short, it's a 0-based index into bytes of the string.
|
382
|
+
*
|
383
|
+
* s = StringScanner.new('test string')
|
384
|
+
* s.pos # -> 0
|
385
|
+
* s.scan_until /str/ # -> "test str"
|
386
|
+
* s.pos # -> 8
|
387
|
+
* s.terminate # -> #<StringScanner fin>
|
388
|
+
* s.pos # -> 11
|
389
|
+
*/
|
390
|
+
static VALUE
|
391
|
+
strscan_get_pos(VALUE self)
|
392
|
+
{
|
393
|
+
struct strscanner *p;
|
394
|
+
|
395
|
+
GET_SCANNER(self, p);
|
396
|
+
return INT2FIX(p->curr);
|
397
|
+
}
|
398
|
+
|
399
|
+
/*
|
400
|
+
* Returns the character position of the scan pointer. In the 'reset' position, this
|
401
|
+
* value is zero. In the 'terminated' position (i.e. the string is exhausted),
|
402
|
+
* this value is the size of the string.
|
403
|
+
*
|
404
|
+
* In short, it's a 0-based index into the string.
|
405
|
+
*
|
406
|
+
* s = StringScanner.new("abcädeföghi")
|
407
|
+
* s.charpos # -> 0
|
408
|
+
* s.scan_until(/ä/) # -> "abcä"
|
409
|
+
* s.pos # -> 5
|
410
|
+
* s.charpos # -> 4
|
411
|
+
*/
|
412
|
+
static VALUE
|
413
|
+
strscan_get_charpos(VALUE self)
|
414
|
+
{
|
415
|
+
struct strscanner *p;
|
416
|
+
VALUE substr;
|
417
|
+
|
418
|
+
GET_SCANNER(self, p);
|
419
|
+
|
420
|
+
substr = rb_funcall(p->str, id_byteslice, 2, INT2FIX(0), INT2NUM(p->curr));
|
421
|
+
|
422
|
+
return rb_str_length(substr);
|
423
|
+
}
|
424
|
+
|
425
|
+
/*
|
426
|
+
* call-seq: pos=(n)
|
427
|
+
*
|
428
|
+
* Set the byte position of the scan pointer.
|
429
|
+
*
|
430
|
+
* s = StringScanner.new('test string')
|
431
|
+
* s.pos = 7 # -> 7
|
432
|
+
* s.rest # -> "ring"
|
433
|
+
*/
|
434
|
+
static VALUE
|
435
|
+
strscan_set_pos(VALUE self, VALUE v)
|
436
|
+
{
|
437
|
+
struct strscanner *p;
|
438
|
+
long i;
|
439
|
+
|
440
|
+
GET_SCANNER(self, p);
|
441
|
+
i = NUM2INT(v);
|
442
|
+
if (i < 0) i += S_LEN(p);
|
443
|
+
if (i < 0) rb_raise(rb_eRangeError, "index out of range");
|
444
|
+
if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range");
|
445
|
+
p->curr = i;
|
446
|
+
return INT2NUM(i);
|
447
|
+
}
|
448
|
+
|
449
|
+
static VALUE
|
450
|
+
strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
|
451
|
+
{
|
452
|
+
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
453
|
+
struct strscanner *p;
|
454
|
+
regex_t *re;
|
455
|
+
long ret;
|
456
|
+
int tmpreg;
|
457
|
+
|
458
|
+
Check_Type(regex, T_REGEXP);
|
459
|
+
GET_SCANNER(self, p);
|
460
|
+
|
461
|
+
CLEAR_MATCH_STATUS(p);
|
462
|
+
if (S_RESTLEN(p) < 0) {
|
463
|
+
return Qnil;
|
464
|
+
}
|
465
|
+
|
466
|
+
p->regex = regex;
|
467
|
+
re = rb_reg_prepare_re(regex, p->str);
|
468
|
+
tmpreg = re != RREGEXP_PTR(regex);
|
469
|
+
if (!tmpreg) RREGEXP(regex)->usecnt++;
|
470
|
+
|
471
|
+
if (headonly) {
|
472
|
+
ret = onig_match(re, (UChar* )CURPTR(p),
|
473
|
+
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
474
|
+
(UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
|
475
|
+
}
|
476
|
+
else {
|
477
|
+
ret = onig_search(re,
|
478
|
+
(UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
479
|
+
(UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
|
480
|
+
&(p->regs), ONIG_OPTION_NONE);
|
481
|
+
}
|
482
|
+
if (!tmpreg) RREGEXP(regex)->usecnt--;
|
483
|
+
if (tmpreg) {
|
484
|
+
if (RREGEXP(regex)->usecnt) {
|
485
|
+
onig_free(re);
|
486
|
+
}
|
487
|
+
else {
|
488
|
+
onig_free(RREGEXP_PTR(regex));
|
489
|
+
RREGEXP_PTR(regex) = re;
|
490
|
+
}
|
491
|
+
}
|
492
|
+
|
493
|
+
if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
|
494
|
+
if (ret < 0) {
|
495
|
+
/* not matched */
|
496
|
+
return Qnil;
|
497
|
+
}
|
498
|
+
|
499
|
+
MATCHED(p);
|
500
|
+
p->prev = p->curr;
|
501
|
+
if (succptr) {
|
502
|
+
p->curr += p->regs.end[0];
|
503
|
+
}
|
504
|
+
if (getstr) {
|
505
|
+
return extract_beg_len(p, p->prev, p->regs.end[0]);
|
506
|
+
}
|
507
|
+
else {
|
508
|
+
return INT2FIX(p->regs.end[0]);
|
509
|
+
}
|
510
|
+
}
|
511
|
+
|
512
|
+
/*
|
513
|
+
* call-seq: scan(pattern) => String
|
514
|
+
*
|
515
|
+
* Tries to match with +pattern+ at the current position. If there's a match,
|
516
|
+
* the scanner advances the "scan pointer" and returns the matched string.
|
517
|
+
* Otherwise, the scanner returns +nil+.
|
518
|
+
*
|
519
|
+
* s = StringScanner.new('test string')
|
520
|
+
* p s.scan(/\w+/) # -> "test"
|
521
|
+
* p s.scan(/\w+/) # -> nil
|
522
|
+
* p s.scan(/\s+/) # -> " "
|
523
|
+
* p s.scan(/\w+/) # -> "string"
|
524
|
+
* p s.scan(/./) # -> nil
|
525
|
+
*
|
526
|
+
*/
|
527
|
+
static VALUE
|
528
|
+
strscan_scan(VALUE self, VALUE re)
|
529
|
+
{
|
530
|
+
return strscan_do_scan(self, re, 1, 1, 1);
|
531
|
+
}
|
532
|
+
|
533
|
+
/*
|
534
|
+
* call-seq: match?(pattern)
|
535
|
+
*
|
536
|
+
* Tests whether the given +pattern+ is matched from the current scan pointer.
|
537
|
+
* Returns the length of the match, or +nil+. The scan pointer is not advanced.
|
538
|
+
*
|
539
|
+
* s = StringScanner.new('test string')
|
540
|
+
* p s.match?(/\w+/) # -> 4
|
541
|
+
* p s.match?(/\w+/) # -> 4
|
542
|
+
* p s.match?(/\s+/) # -> nil
|
543
|
+
*/
|
544
|
+
static VALUE
|
545
|
+
strscan_match_p(VALUE self, VALUE re)
|
546
|
+
{
|
547
|
+
return strscan_do_scan(self, re, 0, 0, 1);
|
548
|
+
}
|
549
|
+
|
550
|
+
/*
|
551
|
+
* call-seq: skip(pattern)
|
552
|
+
*
|
553
|
+
* Attempts to skip over the given +pattern+ beginning with the scan pointer.
|
554
|
+
* If it matches, the scan pointer is advanced to the end of the match, and the
|
555
|
+
* length of the match is returned. Otherwise, +nil+ is returned.
|
556
|
+
*
|
557
|
+
* It's similar to #scan, but without returning the matched string.
|
558
|
+
*
|
559
|
+
* s = StringScanner.new('test string')
|
560
|
+
* p s.skip(/\w+/) # -> 4
|
561
|
+
* p s.skip(/\w+/) # -> nil
|
562
|
+
* p s.skip(/\s+/) # -> 1
|
563
|
+
* p s.skip(/\w+/) # -> 6
|
564
|
+
* p s.skip(/./) # -> nil
|
565
|
+
*
|
566
|
+
*/
|
567
|
+
static VALUE
|
568
|
+
strscan_skip(VALUE self, VALUE re)
|
569
|
+
{
|
570
|
+
return strscan_do_scan(self, re, 1, 0, 1);
|
571
|
+
}
|
572
|
+
|
573
|
+
/*
|
574
|
+
* call-seq: check(pattern)
|
575
|
+
*
|
576
|
+
* This returns the value that #scan would return, without advancing the scan
|
577
|
+
* pointer. The match register is affected, though.
|
578
|
+
*
|
579
|
+
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
580
|
+
* s.check /Fri/ # -> "Fri"
|
581
|
+
* s.pos # -> 0
|
582
|
+
* s.matched # -> "Fri"
|
583
|
+
* s.check /12/ # -> nil
|
584
|
+
* s.matched # -> nil
|
585
|
+
*
|
586
|
+
* Mnemonic: it "checks" to see whether a #scan will return a value.
|
587
|
+
*/
|
588
|
+
static VALUE
|
589
|
+
strscan_check(VALUE self, VALUE re)
|
590
|
+
{
|
591
|
+
return strscan_do_scan(self, re, 0, 1, 1);
|
592
|
+
}
|
593
|
+
|
594
|
+
/*
|
595
|
+
* call-seq: scan_full(pattern, advance_pointer_p, return_string_p)
|
596
|
+
*
|
597
|
+
* Tests whether the given +pattern+ is matched from the current scan pointer.
|
598
|
+
* Advances the scan pointer if +advance_pointer_p+ is true.
|
599
|
+
* Returns the matched string if +return_string_p+ is true.
|
600
|
+
* The match register is affected.
|
601
|
+
*
|
602
|
+
* "full" means "#scan with full parameters".
|
603
|
+
*/
|
604
|
+
static VALUE
|
605
|
+
strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
|
606
|
+
{
|
607
|
+
return strscan_do_scan(self, re, RTEST(s), RTEST(f), 1);
|
608
|
+
}
|
609
|
+
|
610
|
+
/*
|
611
|
+
* call-seq: scan_until(pattern)
|
612
|
+
*
|
613
|
+
* Scans the string _until_ the +pattern+ is matched. Returns the substring up
|
614
|
+
* to and including the end of the match, advancing the scan pointer to that
|
615
|
+
* location. If there is no match, +nil+ is returned.
|
616
|
+
*
|
617
|
+
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
618
|
+
* s.scan_until(/1/) # -> "Fri Dec 1"
|
619
|
+
* s.pre_match # -> "Fri Dec "
|
620
|
+
* s.scan_until(/XYZ/) # -> nil
|
621
|
+
*/
|
622
|
+
static VALUE
|
623
|
+
strscan_scan_until(VALUE self, VALUE re)
|
624
|
+
{
|
625
|
+
return strscan_do_scan(self, re, 1, 1, 0);
|
626
|
+
}
|
627
|
+
|
628
|
+
/*
|
629
|
+
* call-seq: exist?(pattern)
|
630
|
+
*
|
631
|
+
* Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string,
|
632
|
+
* without advancing the scan pointer. This predicates whether a #scan_until
|
633
|
+
* will return a value.
|
634
|
+
*
|
635
|
+
* s = StringScanner.new('test string')
|
636
|
+
* s.exist? /s/ # -> 3
|
637
|
+
* s.scan /test/ # -> "test"
|
638
|
+
* s.exist? /s/ # -> 2
|
639
|
+
* s.exist? /e/ # -> nil
|
640
|
+
*/
|
641
|
+
static VALUE
|
642
|
+
strscan_exist_p(VALUE self, VALUE re)
|
643
|
+
{
|
644
|
+
return strscan_do_scan(self, re, 0, 0, 0);
|
645
|
+
}
|
646
|
+
|
647
|
+
/*
|
648
|
+
* call-seq: skip_until(pattern)
|
649
|
+
*
|
650
|
+
* Advances the scan pointer until +pattern+ is matched and consumed. Returns
|
651
|
+
* the number of bytes advanced, or +nil+ if no match was found.
|
652
|
+
*
|
653
|
+
* Look ahead to match +pattern+, and advance the scan pointer to the _end_
|
654
|
+
* of the match. Return the number of characters advanced, or +nil+ if the
|
655
|
+
* match was unsuccessful.
|
656
|
+
*
|
657
|
+
* It's similar to #scan_until, but without returning the intervening string.
|
658
|
+
*
|
659
|
+
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
660
|
+
* s.skip_until /12/ # -> 10
|
661
|
+
* s #
|
662
|
+
*/
|
663
|
+
static VALUE
|
664
|
+
strscan_skip_until(VALUE self, VALUE re)
|
665
|
+
{
|
666
|
+
return strscan_do_scan(self, re, 1, 0, 0);
|
667
|
+
}
|
668
|
+
|
669
|
+
/*
|
670
|
+
* call-seq: check_until(pattern)
|
671
|
+
*
|
672
|
+
* This returns the value that #scan_until would return, without advancing the
|
673
|
+
* scan pointer. The match register is affected, though.
|
674
|
+
*
|
675
|
+
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
676
|
+
* s.check_until /12/ # -> "Fri Dec 12"
|
677
|
+
* s.pos # -> 0
|
678
|
+
* s.matched # -> 12
|
679
|
+
*
|
680
|
+
* Mnemonic: it "checks" to see whether a #scan_until will return a value.
|
681
|
+
*/
|
682
|
+
static VALUE
|
683
|
+
strscan_check_until(VALUE self, VALUE re)
|
684
|
+
{
|
685
|
+
return strscan_do_scan(self, re, 0, 1, 0);
|
686
|
+
}
|
687
|
+
|
688
|
+
/*
|
689
|
+
* call-seq: search_full(pattern, advance_pointer_p, return_string_p)
|
690
|
+
*
|
691
|
+
* Scans the string _until_ the +pattern+ is matched.
|
692
|
+
* Advances the scan pointer if +advance_pointer_p+, otherwise not.
|
693
|
+
* Returns the matched string if +return_string_p+ is true, otherwise
|
694
|
+
* returns the number of bytes advanced.
|
695
|
+
* This method does affect the match register.
|
696
|
+
*/
|
697
|
+
static VALUE
|
698
|
+
strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f)
|
699
|
+
{
|
700
|
+
return strscan_do_scan(self, re, RTEST(s), RTEST(f), 0);
|
701
|
+
}
|
702
|
+
|
703
|
+
static void
|
704
|
+
adjust_registers_to_matched(struct strscanner *p)
|
705
|
+
{
|
706
|
+
onig_region_clear(&(p->regs));
|
707
|
+
onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
|
708
|
+
}
|
709
|
+
|
710
|
+
/*
|
711
|
+
* Scans one character and returns it.
|
712
|
+
* This method is multibyte character sensitive.
|
713
|
+
*
|
714
|
+
* s = StringScanner.new("ab")
|
715
|
+
* s.getch # => "a"
|
716
|
+
* s.getch # => "b"
|
717
|
+
* s.getch # => nil
|
718
|
+
*
|
719
|
+
* $KCODE = 'EUC'
|
720
|
+
* s = StringScanner.new("\244\242")
|
721
|
+
* s.getch # => "\244\242" # Japanese hira-kana "A" in EUC-JP
|
722
|
+
* s.getch # => nil
|
723
|
+
*/
|
724
|
+
static VALUE
|
725
|
+
strscan_getch(VALUE self)
|
726
|
+
{
|
727
|
+
struct strscanner *p;
|
728
|
+
long len;
|
729
|
+
|
730
|
+
GET_SCANNER(self, p);
|
731
|
+
CLEAR_MATCH_STATUS(p);
|
732
|
+
if (EOS_P(p))
|
733
|
+
return Qnil;
|
734
|
+
|
735
|
+
len = rb_enc_mbclen(CURPTR(p), S_PEND(p), rb_enc_get(p->str));
|
736
|
+
len = minl(len, S_RESTLEN(p));
|
737
|
+
p->prev = p->curr;
|
738
|
+
p->curr += len;
|
739
|
+
MATCHED(p);
|
740
|
+
adjust_registers_to_matched(p);
|
741
|
+
return extract_range(p, p->prev + p->regs.beg[0],
|
742
|
+
p->prev + p->regs.end[0]);
|
743
|
+
}
|
744
|
+
|
745
|
+
/*
|
746
|
+
* Scans one byte and returns it.
|
747
|
+
* This method is not multibyte character sensitive.
|
748
|
+
* See also: #getch.
|
749
|
+
*
|
750
|
+
* s = StringScanner.new('ab')
|
751
|
+
* s.get_byte # => "a"
|
752
|
+
* s.get_byte # => "b"
|
753
|
+
* s.get_byte # => nil
|
754
|
+
*
|
755
|
+
* $KCODE = 'EUC'
|
756
|
+
* s = StringScanner.new("\244\242")
|
757
|
+
* s.get_byte # => "\244"
|
758
|
+
* s.get_byte # => "\242"
|
759
|
+
* s.get_byte # => nil
|
760
|
+
*/
|
761
|
+
static VALUE
|
762
|
+
strscan_get_byte(VALUE self)
|
763
|
+
{
|
764
|
+
struct strscanner *p;
|
765
|
+
|
766
|
+
GET_SCANNER(self, p);
|
767
|
+
CLEAR_MATCH_STATUS(p);
|
768
|
+
if (EOS_P(p))
|
769
|
+
return Qnil;
|
770
|
+
|
771
|
+
p->prev = p->curr;
|
772
|
+
p->curr++;
|
773
|
+
MATCHED(p);
|
774
|
+
adjust_registers_to_matched(p);
|
775
|
+
return extract_range(p, p->prev + p->regs.beg[0],
|
776
|
+
p->prev + p->regs.end[0]);
|
777
|
+
}
|
778
|
+
|
779
|
+
/*
|
780
|
+
* Equivalent to #get_byte.
|
781
|
+
* This method is obsolete; use #get_byte instead.
|
782
|
+
*/
|
783
|
+
static VALUE
|
784
|
+
strscan_getbyte(VALUE self)
|
785
|
+
{
|
786
|
+
rb_warning("StringScanner#getbyte is obsolete; use #get_byte instead");
|
787
|
+
return strscan_get_byte(self);
|
788
|
+
}
|
789
|
+
|
790
|
+
/*
|
791
|
+
* call-seq: peek(len)
|
792
|
+
*
|
793
|
+
* Extracts a string corresponding to <tt>string[pos,len]</tt>, without
|
794
|
+
* advancing the scan pointer.
|
795
|
+
*
|
796
|
+
* s = StringScanner.new('test string')
|
797
|
+
* s.peek(7) # => "test st"
|
798
|
+
* s.peek(7) # => "test st"
|
799
|
+
*
|
800
|
+
*/
|
801
|
+
static VALUE
|
802
|
+
strscan_peek(VALUE self, VALUE vlen)
|
803
|
+
{
|
804
|
+
struct strscanner *p;
|
805
|
+
long len;
|
806
|
+
|
807
|
+
GET_SCANNER(self, p);
|
808
|
+
|
809
|
+
len = NUM2LONG(vlen);
|
810
|
+
if (EOS_P(p))
|
811
|
+
return infect(str_new(p, "", 0), p);
|
812
|
+
|
813
|
+
len = minl(len, S_RESTLEN(p));
|
814
|
+
return extract_beg_len(p, p->curr, len);
|
815
|
+
}
|
816
|
+
|
817
|
+
/*
|
818
|
+
* Equivalent to #peek.
|
819
|
+
* This method is obsolete; use #peek instead.
|
820
|
+
*/
|
821
|
+
static VALUE
|
822
|
+
strscan_peep(VALUE self, VALUE vlen)
|
823
|
+
{
|
824
|
+
rb_warning("StringScanner#peep is obsolete; use #peek instead");
|
825
|
+
return strscan_peek(self, vlen);
|
826
|
+
}
|
827
|
+
|
828
|
+
/*
|
829
|
+
* Set the scan pointer to the previous position. Only one previous position is
|
830
|
+
* remembered, and it changes with each scanning operation.
|
831
|
+
*
|
832
|
+
* s = StringScanner.new('test string')
|
833
|
+
* s.scan(/\w+/) # => "test"
|
834
|
+
* s.unscan
|
835
|
+
* s.scan(/../) # => "te"
|
836
|
+
* s.scan(/\d/) # => nil
|
837
|
+
* s.unscan # ScanError: unscan failed: previous match record not exist
|
838
|
+
*/
|
839
|
+
static VALUE
|
840
|
+
strscan_unscan(VALUE self)
|
841
|
+
{
|
842
|
+
struct strscanner *p;
|
843
|
+
|
844
|
+
GET_SCANNER(self, p);
|
845
|
+
if (! MATCHED_P(p))
|
846
|
+
rb_raise(ScanError, "unscan failed: previous match record not exist");
|
847
|
+
p->curr = p->prev;
|
848
|
+
CLEAR_MATCH_STATUS(p);
|
849
|
+
return self;
|
850
|
+
}
|
851
|
+
|
852
|
+
/*
|
853
|
+
* Returns +true+ iff the scan pointer is at the beginning of the line.
|
854
|
+
*
|
855
|
+
* s = StringScanner.new("test\ntest\n")
|
856
|
+
* s.bol? # => true
|
857
|
+
* s.scan(/te/)
|
858
|
+
* s.bol? # => false
|
859
|
+
* s.scan(/st\n/)
|
860
|
+
* s.bol? # => true
|
861
|
+
* s.terminate
|
862
|
+
* s.bol? # => true
|
863
|
+
*/
|
864
|
+
static VALUE
|
865
|
+
strscan_bol_p(VALUE self)
|
866
|
+
{
|
867
|
+
struct strscanner *p;
|
868
|
+
|
869
|
+
GET_SCANNER(self, p);
|
870
|
+
if (CURPTR(p) > S_PEND(p)) return Qnil;
|
871
|
+
if (p->curr == 0) return Qtrue;
|
872
|
+
return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse;
|
873
|
+
}
|
874
|
+
|
875
|
+
/*
|
876
|
+
* Returns +true+ if the scan pointer is at the end of the string.
|
877
|
+
*
|
878
|
+
* s = StringScanner.new('test string')
|
879
|
+
* p s.eos? # => false
|
880
|
+
* s.scan(/test/)
|
881
|
+
* p s.eos? # => false
|
882
|
+
* s.terminate
|
883
|
+
* p s.eos? # => true
|
884
|
+
*/
|
885
|
+
static VALUE
|
886
|
+
strscan_eos_p(VALUE self)
|
887
|
+
{
|
888
|
+
struct strscanner *p;
|
889
|
+
|
890
|
+
GET_SCANNER(self, p);
|
891
|
+
return EOS_P(p) ? Qtrue : Qfalse;
|
892
|
+
}
|
893
|
+
|
894
|
+
/*
|
895
|
+
* Equivalent to #eos?.
|
896
|
+
* This method is obsolete, use #eos? instead.
|
897
|
+
*/
|
898
|
+
static VALUE
|
899
|
+
strscan_empty_p(VALUE self)
|
900
|
+
{
|
901
|
+
rb_warning("StringScanner#empty? is obsolete; use #eos? instead");
|
902
|
+
return strscan_eos_p(self);
|
903
|
+
}
|
904
|
+
|
905
|
+
/*
|
906
|
+
* Returns true iff there is more data in the string. See #eos?.
|
907
|
+
* This method is obsolete; use #eos? instead.
|
908
|
+
*
|
909
|
+
* s = StringScanner.new('test string')
|
910
|
+
* s.eos? # These two
|
911
|
+
* s.rest? # are opposites.
|
912
|
+
*/
|
913
|
+
static VALUE
|
914
|
+
strscan_rest_p(VALUE self)
|
915
|
+
{
|
916
|
+
struct strscanner *p;
|
917
|
+
|
918
|
+
GET_SCANNER(self, p);
|
919
|
+
return EOS_P(p) ? Qfalse : Qtrue;
|
920
|
+
}
|
921
|
+
|
922
|
+
/*
|
923
|
+
* Returns +true+ iff the last match was successful.
|
924
|
+
*
|
925
|
+
* s = StringScanner.new('test string')
|
926
|
+
* s.match?(/\w+/) # => 4
|
927
|
+
* s.matched? # => true
|
928
|
+
* s.match?(/\d+/) # => nil
|
929
|
+
* s.matched? # => false
|
930
|
+
*/
|
931
|
+
static VALUE
|
932
|
+
strscan_matched_p(VALUE self)
|
933
|
+
{
|
934
|
+
struct strscanner *p;
|
935
|
+
|
936
|
+
GET_SCANNER(self, p);
|
937
|
+
return MATCHED_P(p) ? Qtrue : Qfalse;
|
938
|
+
}
|
939
|
+
|
940
|
+
/*
|
941
|
+
* Returns the last matched string.
|
942
|
+
*
|
943
|
+
* s = StringScanner.new('test string')
|
944
|
+
* s.match?(/\w+/) # -> 4
|
945
|
+
* s.matched # -> "test"
|
946
|
+
*/
|
947
|
+
static VALUE
|
948
|
+
strscan_matched(VALUE self)
|
949
|
+
{
|
950
|
+
struct strscanner *p;
|
951
|
+
|
952
|
+
GET_SCANNER(self, p);
|
953
|
+
if (! MATCHED_P(p)) return Qnil;
|
954
|
+
return extract_range(p, p->prev + p->regs.beg[0],
|
955
|
+
p->prev + p->regs.end[0]);
|
956
|
+
}
|
957
|
+
|
958
|
+
/*
|
959
|
+
* Returns the size of the most recent match (see #matched), or +nil+ if there
|
960
|
+
* was no recent match.
|
961
|
+
*
|
962
|
+
* s = StringScanner.new('test string')
|
963
|
+
* s.check /\w+/ # -> "test"
|
964
|
+
* s.matched_size # -> 4
|
965
|
+
* s.check /\d+/ # -> nil
|
966
|
+
* s.matched_size # -> nil
|
967
|
+
*/
|
968
|
+
static VALUE
|
969
|
+
strscan_matched_size(VALUE self)
|
970
|
+
{
|
971
|
+
struct strscanner *p;
|
972
|
+
|
973
|
+
GET_SCANNER(self, p);
|
974
|
+
if (! MATCHED_P(p)) return Qnil;
|
975
|
+
return INT2NUM(p->regs.end[0] - p->regs.beg[0]);
|
976
|
+
}
|
977
|
+
|
978
|
+
static int
|
979
|
+
name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end, rb_encoding *enc)
|
980
|
+
{
|
981
|
+
int num;
|
982
|
+
|
983
|
+
num = onig_name_to_backref_number(RREGEXP_PTR(regexp),
|
984
|
+
(const unsigned char* )name, (const unsigned char* )name_end, regs);
|
985
|
+
if (num >= 1) {
|
986
|
+
return num;
|
987
|
+
}
|
988
|
+
else {
|
989
|
+
rb_enc_raise(enc, rb_eIndexError, "undefined group name reference: %.*s",
|
990
|
+
rb_long2int(name_end - name), name);
|
991
|
+
}
|
992
|
+
|
993
|
+
UNREACHABLE;
|
994
|
+
}
|
995
|
+
|
996
|
+
/*
|
997
|
+
* call-seq: [](n)
|
998
|
+
*
|
999
|
+
* Returns the n-th subgroup in the most recent match.
|
1000
|
+
*
|
1001
|
+
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
1002
|
+
* s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
|
1003
|
+
* s[0] # -> "Fri Dec 12 "
|
1004
|
+
* s[1] # -> "Fri"
|
1005
|
+
* s[2] # -> "Dec"
|
1006
|
+
* s[3] # -> "12"
|
1007
|
+
* s.post_match # -> "1975 14:39"
|
1008
|
+
* s.pre_match # -> ""
|
1009
|
+
*
|
1010
|
+
* s.reset
|
1011
|
+
* s.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /) # -> "Fri Dec 12 "
|
1012
|
+
* s[0] # -> "Fri Dec 12 "
|
1013
|
+
* s[1] # -> "Fri"
|
1014
|
+
* s[2] # -> "Dec"
|
1015
|
+
* s[3] # -> "12"
|
1016
|
+
* s[:wday] # -> "Fri"
|
1017
|
+
* s[:month] # -> "Dec"
|
1018
|
+
* s[:day] # -> "12"
|
1019
|
+
* s.post_match # -> "1975 14:39"
|
1020
|
+
* s.pre_match # -> ""
|
1021
|
+
*/
|
1022
|
+
static VALUE
|
1023
|
+
strscan_aref(VALUE self, VALUE idx)
|
1024
|
+
{
|
1025
|
+
const char *name;
|
1026
|
+
struct strscanner *p;
|
1027
|
+
long i;
|
1028
|
+
|
1029
|
+
GET_SCANNER(self, p);
|
1030
|
+
if (! MATCHED_P(p)) return Qnil;
|
1031
|
+
|
1032
|
+
switch (TYPE(idx)) {
|
1033
|
+
case T_SYMBOL:
|
1034
|
+
idx = rb_sym2str(idx);
|
1035
|
+
/* fall through */
|
1036
|
+
case T_STRING:
|
1037
|
+
if (!p->regex) return Qnil;
|
1038
|
+
RSTRING_GETMEM(idx, name, i);
|
1039
|
+
i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx));
|
1040
|
+
break;
|
1041
|
+
default:
|
1042
|
+
i = NUM2LONG(idx);
|
1043
|
+
}
|
1044
|
+
|
1045
|
+
if (i < 0)
|
1046
|
+
i += p->regs.num_regs;
|
1047
|
+
if (i < 0) return Qnil;
|
1048
|
+
if (i >= p->regs.num_regs) return Qnil;
|
1049
|
+
if (p->regs.beg[i] == -1) return Qnil;
|
1050
|
+
|
1051
|
+
return extract_range(p, p->prev + p->regs.beg[i],
|
1052
|
+
p->prev + p->regs.end[i]);
|
1053
|
+
}
|
1054
|
+
|
1055
|
+
/*
|
1056
|
+
* call-seq: size
|
1057
|
+
*
|
1058
|
+
* Returns the amount of subgroups in the most recent match.
|
1059
|
+
* The full match counts as a subgroup.
|
1060
|
+
*
|
1061
|
+
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
1062
|
+
* s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
|
1063
|
+
* s.size # -> 4
|
1064
|
+
*/
|
1065
|
+
static VALUE
|
1066
|
+
strscan_size(VALUE self)
|
1067
|
+
{
|
1068
|
+
struct strscanner *p;
|
1069
|
+
|
1070
|
+
GET_SCANNER(self, p);
|
1071
|
+
if (! MATCHED_P(p)) return Qnil;
|
1072
|
+
return INT2FIX(p->regs.num_regs);
|
1073
|
+
}
|
1074
|
+
|
1075
|
+
/*
|
1076
|
+
* call-seq: captures
|
1077
|
+
*
|
1078
|
+
* Returns the subgroups in the most recent match (not including the full match).
|
1079
|
+
* If nothing was priorly matched, it returns nil.
|
1080
|
+
*
|
1081
|
+
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
1082
|
+
* s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
|
1083
|
+
* s.captures # -> ["Fri", "Dec", "12"]
|
1084
|
+
* s.scan(/(\w+) (\w+) (\d+) /) # -> nil
|
1085
|
+
* s.captures # -> nil
|
1086
|
+
*/
|
1087
|
+
static VALUE
|
1088
|
+
strscan_captures(VALUE self)
|
1089
|
+
{
|
1090
|
+
struct strscanner *p;
|
1091
|
+
int i, num_regs;
|
1092
|
+
VALUE new_ary;
|
1093
|
+
|
1094
|
+
GET_SCANNER(self, p);
|
1095
|
+
if (! MATCHED_P(p)) return Qnil;
|
1096
|
+
|
1097
|
+
num_regs = p->regs.num_regs;
|
1098
|
+
new_ary = rb_ary_new2(num_regs);
|
1099
|
+
|
1100
|
+
for (i = 1; i < num_regs; i++) {
|
1101
|
+
VALUE str = extract_range(p, p->prev + p->regs.beg[i],
|
1102
|
+
p->prev + p->regs.end[i]);
|
1103
|
+
rb_ary_push(new_ary, str);
|
1104
|
+
}
|
1105
|
+
|
1106
|
+
return new_ary;
|
1107
|
+
}
|
1108
|
+
|
1109
|
+
/*
|
1110
|
+
* call-seq:
|
1111
|
+
* scanner.values_at( i1, i2, ... iN ) -> an_array
|
1112
|
+
*
|
1113
|
+
* Returns the subgroups in the most recent match at the given indices.
|
1114
|
+
* If nothing was priorly matched, it returns nil.
|
1115
|
+
*
|
1116
|
+
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
1117
|
+
* s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
|
1118
|
+
* s.values_at 0, -1, 5, 2 # -> ["Fri Dec 12 ", "12", nil, "Dec"]
|
1119
|
+
* s.scan(/(\w+) (\w+) (\d+) /) # -> nil
|
1120
|
+
* s.values_at 0, -1, 5, 2 # -> nil
|
1121
|
+
*/
|
1122
|
+
|
1123
|
+
static VALUE
|
1124
|
+
strscan_values_at(int argc, VALUE *argv, VALUE self)
|
1125
|
+
{
|
1126
|
+
struct strscanner *p;
|
1127
|
+
long i;
|
1128
|
+
VALUE new_ary;
|
1129
|
+
|
1130
|
+
GET_SCANNER(self, p);
|
1131
|
+
if (! MATCHED_P(p)) return Qnil;
|
1132
|
+
|
1133
|
+
new_ary = rb_ary_new2(argc);
|
1134
|
+
for (i = 0; i<argc; i++) {
|
1135
|
+
rb_ary_push(new_ary, strscan_aref(self, argv[i]));
|
1136
|
+
}
|
1137
|
+
|
1138
|
+
return new_ary;
|
1139
|
+
}
|
1140
|
+
|
1141
|
+
/*
|
1142
|
+
* Returns the <i><b>pre</b>-match</i> (in the regular expression sense) of the last scan.
|
1143
|
+
*
|
1144
|
+
* s = StringScanner.new('test string')
|
1145
|
+
* s.scan(/\w+/) # -> "test"
|
1146
|
+
* s.scan(/\s+/) # -> " "
|
1147
|
+
* s.pre_match # -> "test"
|
1148
|
+
* s.post_match # -> "string"
|
1149
|
+
*/
|
1150
|
+
static VALUE
|
1151
|
+
strscan_pre_match(VALUE self)
|
1152
|
+
{
|
1153
|
+
struct strscanner *p;
|
1154
|
+
|
1155
|
+
GET_SCANNER(self, p);
|
1156
|
+
if (! MATCHED_P(p)) return Qnil;
|
1157
|
+
return extract_range(p, 0, p->prev + p->regs.beg[0]);
|
1158
|
+
}
|
1159
|
+
|
1160
|
+
/*
|
1161
|
+
* Returns the <i><b>post</b>-match</i> (in the regular expression sense) of the last scan.
|
1162
|
+
*
|
1163
|
+
* s = StringScanner.new('test string')
|
1164
|
+
* s.scan(/\w+/) # -> "test"
|
1165
|
+
* s.scan(/\s+/) # -> " "
|
1166
|
+
* s.pre_match # -> "test"
|
1167
|
+
* s.post_match # -> "string"
|
1168
|
+
*/
|
1169
|
+
static VALUE
|
1170
|
+
strscan_post_match(VALUE self)
|
1171
|
+
{
|
1172
|
+
struct strscanner *p;
|
1173
|
+
|
1174
|
+
GET_SCANNER(self, p);
|
1175
|
+
if (! MATCHED_P(p)) return Qnil;
|
1176
|
+
return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
|
1177
|
+
}
|
1178
|
+
|
1179
|
+
/*
|
1180
|
+
* Returns the "rest" of the string (i.e. everything after the scan pointer).
|
1181
|
+
* If there is no more data (eos? = true), it returns <tt>""</tt>.
|
1182
|
+
*/
|
1183
|
+
static VALUE
|
1184
|
+
strscan_rest(VALUE self)
|
1185
|
+
{
|
1186
|
+
struct strscanner *p;
|
1187
|
+
|
1188
|
+
GET_SCANNER(self, p);
|
1189
|
+
if (EOS_P(p)) {
|
1190
|
+
return infect(str_new(p, "", 0), p);
|
1191
|
+
}
|
1192
|
+
return extract_range(p, p->curr, S_LEN(p));
|
1193
|
+
}
|
1194
|
+
|
1195
|
+
/*
|
1196
|
+
* <tt>s.rest_size</tt> is equivalent to <tt>s.rest.size</tt>.
|
1197
|
+
*/
|
1198
|
+
static VALUE
|
1199
|
+
strscan_rest_size(VALUE self)
|
1200
|
+
{
|
1201
|
+
struct strscanner *p;
|
1202
|
+
long i;
|
1203
|
+
|
1204
|
+
GET_SCANNER(self, p);
|
1205
|
+
if (EOS_P(p)) {
|
1206
|
+
return INT2FIX(0);
|
1207
|
+
}
|
1208
|
+
i = S_RESTLEN(p);
|
1209
|
+
return INT2FIX(i);
|
1210
|
+
}
|
1211
|
+
|
1212
|
+
/*
|
1213
|
+
* <tt>s.restsize</tt> is equivalent to <tt>s.rest_size</tt>.
|
1214
|
+
* This method is obsolete; use #rest_size instead.
|
1215
|
+
*/
|
1216
|
+
static VALUE
|
1217
|
+
strscan_restsize(VALUE self)
|
1218
|
+
{
|
1219
|
+
rb_warning("StringScanner#restsize is obsolete; use #rest_size instead");
|
1220
|
+
return strscan_rest_size(self);
|
1221
|
+
}
|
1222
|
+
|
1223
|
+
#define INSPECT_LENGTH 5
|
1224
|
+
|
1225
|
+
/*
|
1226
|
+
* Returns a string that represents the StringScanner object, showing:
|
1227
|
+
* - the current position
|
1228
|
+
* - the size of the string
|
1229
|
+
* - the characters surrounding the scan pointer
|
1230
|
+
*
|
1231
|
+
* s = StringScanner.new("Fri Dec 12 1975 14:39")
|
1232
|
+
* s.inspect # -> '#<StringScanner 0/21 @ "Fri D...">'
|
1233
|
+
* s.scan_until /12/ # -> "Fri Dec 12"
|
1234
|
+
* s.inspect # -> '#<StringScanner 10/21 "...ec 12" @ " 1975...">'
|
1235
|
+
*/
|
1236
|
+
static VALUE
|
1237
|
+
strscan_inspect(VALUE self)
|
1238
|
+
{
|
1239
|
+
struct strscanner *p;
|
1240
|
+
VALUE a, b;
|
1241
|
+
|
1242
|
+
p = check_strscan(self);
|
1243
|
+
if (NIL_P(p->str)) {
|
1244
|
+
a = rb_sprintf("#<%"PRIsVALUE" (uninitialized)>", rb_obj_class(self));
|
1245
|
+
return infect(a, p);
|
1246
|
+
}
|
1247
|
+
if (EOS_P(p)) {
|
1248
|
+
a = rb_sprintf("#<%"PRIsVALUE" fin>", rb_obj_class(self));
|
1249
|
+
return infect(a, p);
|
1250
|
+
}
|
1251
|
+
if (p->curr == 0) {
|
1252
|
+
b = inspect2(p);
|
1253
|
+
a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld @ %"PRIsVALUE">",
|
1254
|
+
rb_obj_class(self),
|
1255
|
+
p->curr, S_LEN(p),
|
1256
|
+
b);
|
1257
|
+
return infect(a, p);
|
1258
|
+
}
|
1259
|
+
a = inspect1(p);
|
1260
|
+
b = inspect2(p);
|
1261
|
+
a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld %"PRIsVALUE" @ %"PRIsVALUE">",
|
1262
|
+
rb_obj_class(self),
|
1263
|
+
p->curr, S_LEN(p),
|
1264
|
+
a, b);
|
1265
|
+
return infect(a, p);
|
1266
|
+
}
|
1267
|
+
|
1268
|
+
static VALUE
|
1269
|
+
inspect1(struct strscanner *p)
|
1270
|
+
{
|
1271
|
+
VALUE str;
|
1272
|
+
long len;
|
1273
|
+
|
1274
|
+
if (p->curr == 0) return rb_str_new2("");
|
1275
|
+
if (p->curr > INSPECT_LENGTH) {
|
1276
|
+
str = rb_str_new_cstr("...");
|
1277
|
+
len = INSPECT_LENGTH;
|
1278
|
+
}
|
1279
|
+
else {
|
1280
|
+
str = rb_str_new(0, 0);
|
1281
|
+
len = p->curr;
|
1282
|
+
}
|
1283
|
+
rb_str_cat(str, CURPTR(p) - len, len);
|
1284
|
+
return rb_str_dump(str);
|
1285
|
+
}
|
1286
|
+
|
1287
|
+
static VALUE
|
1288
|
+
inspect2(struct strscanner *p)
|
1289
|
+
{
|
1290
|
+
VALUE str;
|
1291
|
+
long len;
|
1292
|
+
|
1293
|
+
if (EOS_P(p)) return rb_str_new2("");
|
1294
|
+
len = S_RESTLEN(p);
|
1295
|
+
if (len > INSPECT_LENGTH) {
|
1296
|
+
str = rb_str_new(CURPTR(p), INSPECT_LENGTH);
|
1297
|
+
rb_str_cat2(str, "...");
|
1298
|
+
}
|
1299
|
+
else {
|
1300
|
+
str = rb_str_new(CURPTR(p), len);
|
1301
|
+
}
|
1302
|
+
return rb_str_dump(str);
|
1303
|
+
}
|
1304
|
+
|
1305
|
+
/* =======================================================================
|
1306
|
+
Ruby Interface
|
1307
|
+
======================================================================= */
|
1308
|
+
|
1309
|
+
/*
|
1310
|
+
* Document-class: StringScanner
|
1311
|
+
*
|
1312
|
+
* StringScanner provides for lexical scanning operations on a String. Here is
|
1313
|
+
* an example of its usage:
|
1314
|
+
*
|
1315
|
+
* s = StringScanner.new('This is an example string')
|
1316
|
+
* s.eos? # -> false
|
1317
|
+
*
|
1318
|
+
* p s.scan(/\w+/) # -> "This"
|
1319
|
+
* p s.scan(/\w+/) # -> nil
|
1320
|
+
* p s.scan(/\s+/) # -> " "
|
1321
|
+
* p s.scan(/\s+/) # -> nil
|
1322
|
+
* p s.scan(/\w+/) # -> "is"
|
1323
|
+
* s.eos? # -> false
|
1324
|
+
*
|
1325
|
+
* p s.scan(/\s+/) # -> " "
|
1326
|
+
* p s.scan(/\w+/) # -> "an"
|
1327
|
+
* p s.scan(/\s+/) # -> " "
|
1328
|
+
* p s.scan(/\w+/) # -> "example"
|
1329
|
+
* p s.scan(/\s+/) # -> " "
|
1330
|
+
* p s.scan(/\w+/) # -> "string"
|
1331
|
+
* s.eos? # -> true
|
1332
|
+
*
|
1333
|
+
* p s.scan(/\s+/) # -> nil
|
1334
|
+
* p s.scan(/\w+/) # -> nil
|
1335
|
+
*
|
1336
|
+
* Scanning a string means remembering the position of a <i>scan pointer</i>,
|
1337
|
+
* which is just an index. The point of scanning is to move forward a bit at
|
1338
|
+
* a time, so matches are sought after the scan pointer; usually immediately
|
1339
|
+
* after it.
|
1340
|
+
*
|
1341
|
+
* Given the string "test string", here are the pertinent scan pointer
|
1342
|
+
* positions:
|
1343
|
+
*
|
1344
|
+
* t e s t s t r i n g
|
1345
|
+
* 0 1 2 ... 1
|
1346
|
+
* 0
|
1347
|
+
*
|
1348
|
+
* When you #scan for a pattern (a regular expression), the match must occur
|
1349
|
+
* at the character after the scan pointer. If you use #scan_until, then the
|
1350
|
+
* match can occur anywhere after the scan pointer. In both cases, the scan
|
1351
|
+
* pointer moves <i>just beyond</i> the last character of the match, ready to
|
1352
|
+
* scan again from the next character onwards. This is demonstrated by the
|
1353
|
+
* example above.
|
1354
|
+
*
|
1355
|
+
* == Method Categories
|
1356
|
+
*
|
1357
|
+
* There are other methods besides the plain scanners. You can look ahead in
|
1358
|
+
* the string without actually scanning. You can access the most recent match.
|
1359
|
+
* You can modify the string being scanned, reset or terminate the scanner,
|
1360
|
+
* find out or change the position of the scan pointer, skip ahead, and so on.
|
1361
|
+
*
|
1362
|
+
* === Advancing the Scan Pointer
|
1363
|
+
*
|
1364
|
+
* - #getch
|
1365
|
+
* - #get_byte
|
1366
|
+
* - #scan
|
1367
|
+
* - #scan_until
|
1368
|
+
* - #skip
|
1369
|
+
* - #skip_until
|
1370
|
+
*
|
1371
|
+
* === Looking Ahead
|
1372
|
+
*
|
1373
|
+
* - #check
|
1374
|
+
* - #check_until
|
1375
|
+
* - #exist?
|
1376
|
+
* - #match?
|
1377
|
+
* - #peek
|
1378
|
+
*
|
1379
|
+
* === Finding Where we Are
|
1380
|
+
*
|
1381
|
+
* - #beginning_of_line? (#bol?)
|
1382
|
+
* - #eos?
|
1383
|
+
* - #rest?
|
1384
|
+
* - #rest_size
|
1385
|
+
* - #pos
|
1386
|
+
*
|
1387
|
+
* === Setting Where we Are
|
1388
|
+
*
|
1389
|
+
* - #reset
|
1390
|
+
* - #terminate
|
1391
|
+
* - #pos=
|
1392
|
+
*
|
1393
|
+
* === Match Data
|
1394
|
+
*
|
1395
|
+
* - #matched
|
1396
|
+
* - #matched?
|
1397
|
+
* - #matched_size
|
1398
|
+
* - []
|
1399
|
+
* - #pre_match
|
1400
|
+
* - #post_match
|
1401
|
+
*
|
1402
|
+
* === Miscellaneous
|
1403
|
+
*
|
1404
|
+
* - <<
|
1405
|
+
* - #concat
|
1406
|
+
* - #string
|
1407
|
+
* - #string=
|
1408
|
+
* - #unscan
|
1409
|
+
*
|
1410
|
+
* There are aliases to several of the methods.
|
1411
|
+
*/
|
1412
|
+
void
|
1413
|
+
Init_strscan(void)
|
1414
|
+
{
|
1415
|
+
ID id_scanerr = rb_intern("ScanError");
|
1416
|
+
VALUE tmp;
|
1417
|
+
|
1418
|
+
id_byteslice = rb_intern("byteslice");
|
1419
|
+
|
1420
|
+
StringScanner = rb_define_class("StringScanner", rb_cObject);
|
1421
|
+
ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
|
1422
|
+
if (!rb_const_defined(rb_cObject, id_scanerr)) {
|
1423
|
+
rb_const_set(rb_cObject, id_scanerr, ScanError);
|
1424
|
+
}
|
1425
|
+
tmp = rb_str_new2(STRSCAN_VERSION);
|
1426
|
+
rb_obj_freeze(tmp);
|
1427
|
+
rb_const_set(StringScanner, rb_intern("Version"), tmp);
|
1428
|
+
tmp = rb_str_new2("$Id$");
|
1429
|
+
rb_obj_freeze(tmp);
|
1430
|
+
rb_const_set(StringScanner, rb_intern("Id"), tmp);
|
1431
|
+
|
1432
|
+
rb_define_alloc_func(StringScanner, strscan_s_allocate);
|
1433
|
+
rb_define_private_method(StringScanner, "initialize", strscan_initialize, -1);
|
1434
|
+
rb_define_private_method(StringScanner, "initialize_copy", strscan_init_copy, 1);
|
1435
|
+
rb_define_singleton_method(StringScanner, "must_C_version", strscan_s_mustc, 0);
|
1436
|
+
rb_define_method(StringScanner, "reset", strscan_reset, 0);
|
1437
|
+
rb_define_method(StringScanner, "terminate", strscan_terminate, 0);
|
1438
|
+
rb_define_method(StringScanner, "clear", strscan_clear, 0);
|
1439
|
+
rb_define_method(StringScanner, "string", strscan_get_string, 0);
|
1440
|
+
rb_define_method(StringScanner, "string=", strscan_set_string, 1);
|
1441
|
+
rb_define_method(StringScanner, "concat", strscan_concat, 1);
|
1442
|
+
rb_define_method(StringScanner, "<<", strscan_concat, 1);
|
1443
|
+
rb_define_method(StringScanner, "pos", strscan_get_pos, 0);
|
1444
|
+
rb_define_method(StringScanner, "pos=", strscan_set_pos, 1);
|
1445
|
+
rb_define_method(StringScanner, "charpos", strscan_get_charpos, 0);
|
1446
|
+
rb_define_method(StringScanner, "pointer", strscan_get_pos, 0);
|
1447
|
+
rb_define_method(StringScanner, "pointer=", strscan_set_pos, 1);
|
1448
|
+
|
1449
|
+
rb_define_method(StringScanner, "scan", strscan_scan, 1);
|
1450
|
+
rb_define_method(StringScanner, "skip", strscan_skip, 1);
|
1451
|
+
rb_define_method(StringScanner, "match?", strscan_match_p, 1);
|
1452
|
+
rb_define_method(StringScanner, "check", strscan_check, 1);
|
1453
|
+
rb_define_method(StringScanner, "scan_full", strscan_scan_full, 3);
|
1454
|
+
|
1455
|
+
rb_define_method(StringScanner, "scan_until", strscan_scan_until, 1);
|
1456
|
+
rb_define_method(StringScanner, "skip_until", strscan_skip_until, 1);
|
1457
|
+
rb_define_method(StringScanner, "exist?", strscan_exist_p, 1);
|
1458
|
+
rb_define_method(StringScanner, "check_until", strscan_check_until, 1);
|
1459
|
+
rb_define_method(StringScanner, "search_full", strscan_search_full, 3);
|
1460
|
+
|
1461
|
+
rb_define_method(StringScanner, "getch", strscan_getch, 0);
|
1462
|
+
rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
|
1463
|
+
rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
|
1464
|
+
rb_define_method(StringScanner, "peek", strscan_peek, 1);
|
1465
|
+
rb_define_method(StringScanner, "peep", strscan_peep, 1);
|
1466
|
+
|
1467
|
+
rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
|
1468
|
+
|
1469
|
+
rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
|
1470
|
+
rb_alias(StringScanner, rb_intern("bol?"), rb_intern("beginning_of_line?"));
|
1471
|
+
rb_define_method(StringScanner, "eos?", strscan_eos_p, 0);
|
1472
|
+
rb_define_method(StringScanner, "empty?", strscan_empty_p, 0);
|
1473
|
+
rb_define_method(StringScanner, "rest?", strscan_rest_p, 0);
|
1474
|
+
|
1475
|
+
rb_define_method(StringScanner, "matched?", strscan_matched_p, 0);
|
1476
|
+
rb_define_method(StringScanner, "matched", strscan_matched, 0);
|
1477
|
+
rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0);
|
1478
|
+
rb_define_method(StringScanner, "[]", strscan_aref, 1);
|
1479
|
+
rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0);
|
1480
|
+
rb_define_method(StringScanner, "post_match", strscan_post_match, 0);
|
1481
|
+
rb_define_method(StringScanner, "size", strscan_size, 0);
|
1482
|
+
rb_define_method(StringScanner, "captures", strscan_captures, 0);
|
1483
|
+
rb_define_method(StringScanner, "values_at", strscan_values_at, -1);
|
1484
|
+
|
1485
|
+
rb_define_method(StringScanner, "rest", strscan_rest, 0);
|
1486
|
+
rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0);
|
1487
|
+
rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
|
1488
|
+
|
1489
|
+
rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
|
1490
|
+
}
|