re2 2.0.0-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.
data/ext/re2/re2.cc ADDED
@@ -0,0 +1,1889 @@
1
+ /*
2
+ * re2 (http://github.com/mudge/re2)
3
+ * Ruby bindings to re2, an "efficient, principled regular expression library"
4
+ *
5
+ * Copyright (c) 2010-2014, Paul Mucur (http://mudge.name)
6
+ * Released under the BSD Licence, please see LICENSE.txt
7
+ */
8
+
9
+ #include <ruby.h>
10
+ #include <re2/re2.h>
11
+ #include <re2/set.h>
12
+ #include <stdint.h>
13
+ #include <string>
14
+ #include <sstream>
15
+ #include <vector>
16
+ using std::string;
17
+ using std::ostringstream;
18
+ using std::nothrow;
19
+ using std::map;
20
+ using std::vector;
21
+
22
+ #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
23
+ #define UNUSED(x) ((void)x)
24
+
25
+ #ifndef RSTRING_LEN
26
+ #define RSTRING_LEN(x) (RSTRING(x)->len)
27
+ #endif
28
+
29
+ #ifndef RSTRING_PTR
30
+ #define RSTRING_PTR(x) (RSTRING(x)->ptr)
31
+ #endif
32
+
33
+ #ifdef HAVE_RUBY_ENCODING_H
34
+ #include <ruby/encoding.h>
35
+ #define ENCODED_STR_NEW(str, length, encoding) \
36
+ ({ \
37
+ VALUE _string = rb_str_new(str, length); \
38
+ int _enc = rb_enc_find_index(encoding); \
39
+ rb_enc_associate_index(_string, _enc); \
40
+ _string; \
41
+ })
42
+ #define ENCODED_STR_NEW2(str, length, str2) \
43
+ ({ \
44
+ VALUE _string = rb_str_new(str, length); \
45
+ int _enc = rb_enc_get_index(str2); \
46
+ rb_enc_associate_index(_string, _enc); \
47
+ _string; \
48
+ })
49
+ #else
50
+ #define ENCODED_STR_NEW(str, length, encoding) \
51
+ rb_str_new((const char *)str, (long)length)
52
+ #define ENCODED_STR_NEW2(str, length, str2) \
53
+ rb_str_new((const char *)str, (long)length)
54
+ #endif
55
+
56
+ #ifdef HAVE_RB_STR_SUBLEN
57
+ #define ENCODED_STR_SUBLEN(str, offset, encoding) \
58
+ LONG2NUM(rb_str_sublen(str, offset))
59
+ #else
60
+ #ifdef HAVE_RUBY_ENCODING_H
61
+ #define ENCODED_STR_SUBLEN(str, offset, encoding) \
62
+ ({ \
63
+ VALUE _string = ENCODED_STR_NEW(RSTRING_PTR(str), offset, encoding); \
64
+ rb_str_length(_string); \
65
+ })
66
+ #else
67
+ #define ENCODED_STR_SUBLEN(str, offset, encoding) \
68
+ LONG2NUM(offset)
69
+ #endif
70
+ #endif
71
+
72
+ #ifdef HAVE_ENDPOS_ARGUMENT
73
+ #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
74
+ (pattern->Match(text, startpos, endpos, anchor, match, nmatch))
75
+ #else
76
+ #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
77
+ (pattern->Match(text, startpos, anchor, match, nmatch))
78
+ #endif
79
+
80
+ typedef struct {
81
+ RE2 *pattern;
82
+ } re2_pattern;
83
+
84
+ typedef struct {
85
+ re2::StringPiece *matches;
86
+ int number_of_matches;
87
+ VALUE regexp, text;
88
+ } re2_matchdata;
89
+
90
+ typedef struct {
91
+ re2::StringPiece *input;
92
+ int number_of_capturing_groups;
93
+ bool eof;
94
+ VALUE regexp, text;
95
+ } re2_scanner;
96
+
97
+ typedef struct {
98
+ RE2::Set *set;
99
+ } re2_set;
100
+
101
+ VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
102
+ re2_eSetMatchError, re2_eSetUnsupportedError;
103
+
104
+ /* Symbols used in RE2 options. */
105
+ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
106
+ id_max_mem, id_literal, id_never_nl, id_case_sensitive,
107
+ id_perl_classes, id_word_boundary, id_one_line,
108
+ id_unanchored, id_anchor_start, id_anchor_both, id_exception;
109
+
110
+ void parse_re2_options(RE2::Options& re2_options, VALUE options) {
111
+ if (TYPE(options) != T_HASH) {
112
+ rb_raise(rb_eArgError, "options should be a hash");
113
+ }
114
+ VALUE utf8, posix_syntax, longest_match, log_errors,
115
+ max_mem, literal, never_nl, case_sensitive, perl_classes,
116
+ word_boundary, one_line;
117
+
118
+ utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
119
+ if (!NIL_P(utf8)) {
120
+ re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
121
+ }
122
+
123
+ posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
124
+ if (!NIL_P(posix_syntax)) {
125
+ re2_options.set_posix_syntax(RTEST(posix_syntax));
126
+ }
127
+
128
+ longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
129
+ if (!NIL_P(longest_match)) {
130
+ re2_options.set_longest_match(RTEST(longest_match));
131
+ }
132
+
133
+ log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
134
+ if (!NIL_P(log_errors)) {
135
+ re2_options.set_log_errors(RTEST(log_errors));
136
+ }
137
+
138
+ max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
139
+ if (!NIL_P(max_mem)) {
140
+ re2_options.set_max_mem(NUM2INT(max_mem));
141
+ }
142
+
143
+ literal = rb_hash_aref(options, ID2SYM(id_literal));
144
+ if (!NIL_P(literal)) {
145
+ re2_options.set_literal(RTEST(literal));
146
+ }
147
+
148
+ never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
149
+ if (!NIL_P(never_nl)) {
150
+ re2_options.set_never_nl(RTEST(never_nl));
151
+ }
152
+
153
+ case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
154
+ if (!NIL_P(case_sensitive)) {
155
+ re2_options.set_case_sensitive(RTEST(case_sensitive));
156
+ }
157
+
158
+ perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
159
+ if (!NIL_P(perl_classes)) {
160
+ re2_options.set_perl_classes(RTEST(perl_classes));
161
+ }
162
+
163
+ word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
164
+ if (!NIL_P(word_boundary)) {
165
+ re2_options.set_word_boundary(RTEST(word_boundary));
166
+ }
167
+
168
+ one_line = rb_hash_aref(options, ID2SYM(id_one_line));
169
+ if (!NIL_P(one_line)) {
170
+ re2_options.set_one_line(RTEST(one_line));
171
+ }
172
+ }
173
+
174
+ void re2_matchdata_mark(re2_matchdata* self) {
175
+ rb_gc_mark(self->regexp);
176
+ rb_gc_mark(self->text);
177
+ }
178
+
179
+ void re2_matchdata_free(re2_matchdata* self) {
180
+ if (self->matches) {
181
+ delete[] self->matches;
182
+ }
183
+ free(self);
184
+ }
185
+
186
+ void re2_scanner_mark(re2_scanner* self) {
187
+ rb_gc_mark(self->regexp);
188
+ rb_gc_mark(self->text);
189
+ }
190
+
191
+ void re2_scanner_free(re2_scanner* self) {
192
+ if (self->input) {
193
+ delete self->input;
194
+ }
195
+ free(self);
196
+ }
197
+
198
+ void re2_regexp_free(re2_pattern* self) {
199
+ if (self->pattern) {
200
+ delete self->pattern;
201
+ }
202
+ free(self);
203
+ }
204
+
205
+ static VALUE re2_matchdata_allocate(VALUE klass) {
206
+ re2_matchdata *m;
207
+ return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
208
+ re2_matchdata_free, m);
209
+ }
210
+
211
+ static VALUE re2_scanner_allocate(VALUE klass) {
212
+ re2_scanner *c;
213
+ return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
214
+ re2_scanner_free, c);
215
+ }
216
+
217
+ /*
218
+ * Returns a frozen copy of the string passed into +match+.
219
+ *
220
+ * @return [String] a frozen copy of the passed string.
221
+ * @example
222
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
223
+ * m.string #=> "bob 123"
224
+ */
225
+ static VALUE re2_matchdata_string(VALUE self) {
226
+ re2_matchdata *m;
227
+ Data_Get_Struct(self, re2_matchdata, m);
228
+
229
+ return m->text;
230
+ }
231
+
232
+ /*
233
+ * Returns the string passed into the scanner.
234
+ *
235
+ * @return [String] the original string.
236
+ * @example
237
+ * c = RE2::Regexp.new('(\d+)').scan("foo")
238
+ * c.string #=> "foo"
239
+ */
240
+ static VALUE re2_scanner_string(VALUE self) {
241
+ re2_scanner *c;
242
+ Data_Get_Struct(self, re2_scanner, c);
243
+
244
+ return c->text;
245
+ }
246
+
247
+ /*
248
+ * Returns whether the scanner has consumed all input or not.
249
+ *
250
+ * @return [Boolean] whether the scanner has consumed all input or not
251
+ * @example
252
+ * c = RE2::Regexp.new('(\d+)').scan("foo")
253
+ * c.eof? #=> true
254
+ */
255
+ static VALUE re2_scanner_eof(VALUE self) {
256
+ re2_scanner *c;
257
+ Data_Get_Struct(self, re2_scanner, c);
258
+
259
+ return BOOL2RUBY(c->eof);
260
+ }
261
+
262
+ /*
263
+ * Rewind the scanner to the start of the string.
264
+ *
265
+ * @example
266
+ * s = RE2::Regexp.new('(\d+)').scan("1 2 3")
267
+ * e = s.to_enum
268
+ * e.scan #=> ["1"]
269
+ * e.scan #=> ["2"]
270
+ * s.rewind
271
+ * e.scan #=> ["1"]
272
+ */
273
+ static VALUE re2_scanner_rewind(VALUE self) {
274
+ re2_scanner *c;
275
+ Data_Get_Struct(self, re2_scanner, c);
276
+
277
+ c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text));
278
+ c->eof = false;
279
+
280
+ return self;
281
+ }
282
+
283
+ /*
284
+ * Scan the given text incrementally for matches, returning an array of
285
+ * matches on each subsequent call. Returns nil if no matches are found.
286
+ *
287
+ * @return [Array<String>] the matches.
288
+ * @example
289
+ * s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
290
+ * s.scan #=> ["Foo"]
291
+ * s.scan #=> ["bar"]
292
+ */
293
+ static VALUE re2_scanner_scan(VALUE self) {
294
+ int i;
295
+ size_t original_input_size, new_input_size;
296
+ bool input_advanced;
297
+ re2_pattern *p;
298
+ re2_scanner *c;
299
+ VALUE result;
300
+
301
+ Data_Get_Struct(self, re2_scanner, c);
302
+ Data_Get_Struct(c->regexp, re2_pattern, p);
303
+
304
+ vector<RE2::Arg> argv(c->number_of_capturing_groups);
305
+ vector<RE2::Arg*> args(c->number_of_capturing_groups);
306
+ vector<string> matches(c->number_of_capturing_groups);
307
+
308
+ if (c->eof) {
309
+ return Qnil;
310
+ }
311
+
312
+ original_input_size = c->input->size();
313
+
314
+ for (i = 0; i < c->number_of_capturing_groups; i++) {
315
+ matches[i] = "";
316
+ argv[i] = &matches[i];
317
+ args[i] = &argv[i];
318
+ }
319
+
320
+ if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0],
321
+ c->number_of_capturing_groups)) {
322
+ result = rb_ary_new2(c->number_of_capturing_groups);
323
+ new_input_size = c->input->size();
324
+ input_advanced = new_input_size < original_input_size;
325
+
326
+ for (i = 0; i < c->number_of_capturing_groups; i++) {
327
+ if (matches[i].empty()) {
328
+ rb_ary_push(result, Qnil);
329
+ } else {
330
+ rb_ary_push(result, ENCODED_STR_NEW(matches[i].data(),
331
+ matches[i].size(),
332
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
333
+ }
334
+ }
335
+
336
+ /* Check whether we've exhausted the input yet. */
337
+ c->eof = new_input_size == 0;
338
+
339
+ /* If the match didn't advance the input, we need to do this ourselves. */
340
+ if (!input_advanced && new_input_size > 0) {
341
+ c->input->remove_prefix(1);
342
+ }
343
+ } else {
344
+ result = Qnil;
345
+ }
346
+
347
+ return result;
348
+ }
349
+
350
+ /*
351
+ * Retrieve a matchdata by index or name.
352
+ */
353
+ re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
354
+ int id;
355
+ re2_matchdata *m;
356
+ re2_pattern *p;
357
+ map<string, int> groups;
358
+ string name;
359
+ re2::StringPiece *match;
360
+
361
+ Data_Get_Struct(self, re2_matchdata, m);
362
+ Data_Get_Struct(m->regexp, re2_pattern, p);
363
+
364
+ if (FIXNUM_P(idx)) {
365
+ id = FIX2INT(idx);
366
+ } else {
367
+ if (SYMBOL_P(idx)) {
368
+ name = rb_id2name(SYM2ID(idx));
369
+ } else {
370
+ name = StringValuePtr(idx);
371
+ }
372
+
373
+ groups = p->pattern->NamedCapturingGroups();
374
+
375
+ if (groups.count(name) == 1) {
376
+ id = groups[name];
377
+ } else {
378
+ return NULL;
379
+ }
380
+ }
381
+
382
+ if (id >= 0 && id < m->number_of_matches) {
383
+ match = &m->matches[id];
384
+
385
+ if (!match->empty()) {
386
+ return match;
387
+ }
388
+ }
389
+
390
+ return NULL;
391
+ }
392
+
393
+ /*
394
+ * Returns the number of elements in the match array (including nils).
395
+ *
396
+ * @return [Integer] the number of elements
397
+ * @example
398
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
399
+ * m.size #=> 2
400
+ * m.length #=> 2
401
+ */
402
+ static VALUE re2_matchdata_size(VALUE self) {
403
+ re2_matchdata *m;
404
+ Data_Get_Struct(self, re2_matchdata, m);
405
+
406
+ return INT2FIX(m->number_of_matches);
407
+ }
408
+
409
+ /*
410
+ * Returns the offset of the start of the nth element of the matchdata.
411
+ *
412
+ * @param [Integer, String, Symbol] n the name or number of the match
413
+ * @return [Integer] the offset of the start of the match
414
+ * @example
415
+ * m = RE2::Regexp.new('ob (\d+)').match("bob 123")
416
+ * m.begin(0) #=> 1
417
+ * m.begin(1) #=> 4
418
+ */
419
+ static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
420
+ re2_matchdata *m;
421
+ re2_pattern *p;
422
+ re2::StringPiece *match;
423
+ long offset;
424
+
425
+ Data_Get_Struct(self, re2_matchdata, m);
426
+ Data_Get_Struct(m->regexp, re2_pattern, p);
427
+
428
+ match = re2_matchdata_find_match(n, self);
429
+ if (match == NULL) {
430
+ return Qnil;
431
+ } else {
432
+ offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text));
433
+
434
+ return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
435
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
436
+ }
437
+ }
438
+
439
+ /*
440
+ * Returns the offset of the character following the end of the nth element of the matchdata.
441
+ *
442
+ * @param [Integer, String, Symbol] n the name or number of the match
443
+ * @return [Integer] the offset of the character following the end of the match
444
+ * @example
445
+ * m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob")
446
+ * m.end(0) #=> 9
447
+ * m.end(1) #=> 7
448
+ */
449
+ static VALUE re2_matchdata_end(VALUE self, VALUE n) {
450
+ re2_matchdata *m;
451
+ re2_pattern *p;
452
+ re2::StringPiece *match;
453
+ long offset;
454
+
455
+ Data_Get_Struct(self, re2_matchdata, m);
456
+ Data_Get_Struct(m->regexp, re2_pattern, p);
457
+
458
+ match = re2_matchdata_find_match(n, self);
459
+
460
+ if (match == NULL) {
461
+ return Qnil;
462
+ } else {
463
+ offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text)) + match->size();
464
+
465
+ return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
466
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
467
+ }
468
+ }
469
+
470
+ /*
471
+ * Returns the {RE2::Regexp} used in the match.
472
+ *
473
+ * @return [RE2::Regexp] the regexp used in the match
474
+ * @example
475
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
476
+ * m.regexp #=> #<RE2::Regexp /(\d+)/>
477
+ */
478
+ static VALUE re2_matchdata_regexp(VALUE self) {
479
+ re2_matchdata *m;
480
+ Data_Get_Struct(self, re2_matchdata, m);
481
+ return m->regexp;
482
+ }
483
+
484
+ /*
485
+ * Returns the {RE2::Regexp} used in the scanner.
486
+ *
487
+ * @return [RE2::Regexp] the regexp used in the scanner
488
+ * @example
489
+ * c = RE2::Regexp.new('(\d+)').scan("bob 123")
490
+ * c.regexp #=> #<RE2::Regexp /(\d+)/>
491
+ */
492
+ static VALUE re2_scanner_regexp(VALUE self) {
493
+ re2_scanner *c;
494
+ Data_Get_Struct(self, re2_scanner, c);
495
+
496
+ return c->regexp;
497
+ }
498
+
499
+ static VALUE re2_regexp_allocate(VALUE klass) {
500
+ re2_pattern *p;
501
+ return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
502
+ }
503
+
504
+ /*
505
+ * Returns the array of matches.
506
+ *
507
+ * @return [Array<String, nil>] the array of matches
508
+ * @example
509
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
510
+ * m.to_a #=> ["123", "123"]
511
+ */
512
+ static VALUE re2_matchdata_to_a(VALUE self) {
513
+ int i;
514
+ re2_matchdata *m;
515
+ re2_pattern *p;
516
+ re2::StringPiece *match;
517
+ VALUE array;
518
+
519
+ Data_Get_Struct(self, re2_matchdata, m);
520
+ Data_Get_Struct(m->regexp, re2_pattern, p);
521
+
522
+ array = rb_ary_new2(m->number_of_matches);
523
+ for (i = 0; i < m->number_of_matches; i++) {
524
+ match = &m->matches[i];
525
+
526
+ if (match->empty()) {
527
+ rb_ary_push(array, Qnil);
528
+ } else {
529
+ rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
530
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
531
+ }
532
+ }
533
+
534
+ return array;
535
+ }
536
+
537
+ static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
538
+ re2_matchdata *m;
539
+ re2_pattern *p;
540
+ re2::StringPiece *match;
541
+
542
+ Data_Get_Struct(self, re2_matchdata, m);
543
+ Data_Get_Struct(m->regexp, re2_pattern, p);
544
+
545
+ if (nth < 0 || nth >= m->number_of_matches) {
546
+ return Qnil;
547
+ } else {
548
+ match = &m->matches[nth];
549
+
550
+ if (match->empty()) {
551
+ return Qnil;
552
+ } else {
553
+ return ENCODED_STR_NEW(match->data(), match->size(),
554
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
555
+ }
556
+ }
557
+ }
558
+
559
+ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
560
+ int idx;
561
+ re2_matchdata *m;
562
+ re2_pattern *p;
563
+ map<string, int> groups;
564
+ string name_as_string(name);
565
+
566
+ Data_Get_Struct(self, re2_matchdata, m);
567
+ Data_Get_Struct(m->regexp, re2_pattern, p);
568
+
569
+ groups = p->pattern->NamedCapturingGroups();
570
+
571
+ if (groups.count(name_as_string) == 1) {
572
+ idx = groups[name_as_string];
573
+ return re2_matchdata_nth_match(idx, self);
574
+ } else {
575
+ return Qnil;
576
+ }
577
+ }
578
+
579
+ /*
580
+ * Retrieve zero, one or more matches by index or name.
581
+ *
582
+ * @return [Array<String, nil>, String, Boolean]
583
+ *
584
+ * @overload [](index)
585
+ * Access a particular match by index.
586
+ *
587
+ * @param [Integer] index the index of the match to fetch
588
+ * @return [String, nil] the specified match
589
+ * @example
590
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
591
+ * m[0] #=> "123"
592
+ *
593
+ * @overload [](start, length)
594
+ * Access a range of matches by starting index and length.
595
+ *
596
+ * @param [Integer] start the index from which to start
597
+ * @param [Integer] length the number of elements to fetch
598
+ * @return [Array<String, nil>] the specified matches
599
+ * @example
600
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
601
+ * m[0, 1] #=> ["123"]
602
+ *
603
+ * @overload [](range)
604
+ * Access a range of matches by index.
605
+ *
606
+ * @param [Range] range the range of match indexes to fetch
607
+ * @return [Array<String, nil>] the specified matches
608
+ * @example
609
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
610
+ * m[0..1] #=> "[123", "123"]
611
+ *
612
+ * @overload [](name)
613
+ * Access a particular match by name.
614
+ *
615
+ * @param [String, Symbol] name the name of the match to fetch
616
+ * @return [String, nil] the specific match
617
+ * @example
618
+ * m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
619
+ * m["number"] #=> "123"
620
+ * m[:number] #=> "123"
621
+ */
622
+ static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
623
+ VALUE idx, rest;
624
+ rb_scan_args(argc, argv, "11", &idx, &rest);
625
+
626
+ if (TYPE(idx) == T_STRING) {
627
+ return re2_matchdata_named_match(StringValuePtr(idx), self);
628
+ } else if (SYMBOL_P(idx)) {
629
+ return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self);
630
+ } else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
631
+ return rb_ary_aref(argc, argv, re2_matchdata_to_a(self));
632
+ } else {
633
+ return re2_matchdata_nth_match(FIX2INT(idx), self);
634
+ }
635
+ }
636
+
637
+ /*
638
+ * Returns the entire matched string.
639
+ *
640
+ * @return [String] the entire matched string
641
+ */
642
+ static VALUE re2_matchdata_to_s(VALUE self) {
643
+ return re2_matchdata_nth_match(0, self);
644
+ }
645
+
646
+ /*
647
+ * Returns a printable version of the match.
648
+ *
649
+ * @return [String] a printable version of the match
650
+ * @example
651
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
652
+ * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
653
+ */
654
+ static VALUE re2_matchdata_inspect(VALUE self) {
655
+ int i;
656
+ re2_matchdata *m;
657
+ re2_pattern *p;
658
+ VALUE match, result;
659
+ ostringstream output;
660
+
661
+ Data_Get_Struct(self, re2_matchdata, m);
662
+ Data_Get_Struct(m->regexp, re2_pattern, p);
663
+
664
+ output << "#<RE2::MatchData";
665
+
666
+ for (i = 0; i < m->number_of_matches; i++) {
667
+ output << " ";
668
+
669
+ if (i > 0) {
670
+ output << i << ":";
671
+ }
672
+
673
+ match = re2_matchdata_nth_match(i, self);
674
+
675
+ if (match == Qnil) {
676
+ output << "nil";
677
+ } else {
678
+ output << "\"" << StringValuePtr(match) << "\"";
679
+ }
680
+ }
681
+
682
+ output << ">";
683
+
684
+ result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
685
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
686
+
687
+ return result;
688
+ }
689
+
690
+ /*
691
+ * Returns the array of submatches for pattern matching.
692
+ *
693
+ * @return [Array<String, nil>] the array of submatches
694
+ * @example
695
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
696
+ * m.deconstruct #=> ["123"]
697
+ *
698
+ * @example pattern matching
699
+ * case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
700
+ * in x, y
701
+ * puts "Matched #{x} #{y}"
702
+ * else
703
+ * puts "Unrecognised match"
704
+ * end
705
+ */
706
+ static VALUE re2_matchdata_deconstruct(VALUE self) {
707
+ int i;
708
+ re2_matchdata *m;
709
+ re2_pattern *p;
710
+ re2::StringPiece *match;
711
+ VALUE array;
712
+
713
+ Data_Get_Struct(self, re2_matchdata, m);
714
+ Data_Get_Struct(m->regexp, re2_pattern, p);
715
+
716
+ array = rb_ary_new2(m->number_of_matches - 1);
717
+ for (i = 1; i < m->number_of_matches; i++) {
718
+ match = &m->matches[i];
719
+
720
+ if (match->empty()) {
721
+ rb_ary_push(array, Qnil);
722
+ } else {
723
+ rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
724
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
725
+ }
726
+ }
727
+
728
+ return array;
729
+ }
730
+
731
+ /*
732
+ * Returns a hash of capturing group names to submatches for pattern matching.
733
+ *
734
+ * As this is used by Ruby's pattern matching, it will return an empty hash if given
735
+ * more keys than there are capturing groups. Given keys will populate the hash in
736
+ * order but an invalid name will cause the hash to be immediately returned.
737
+ *
738
+ * @return [Hash] a hash of capturing group names to submatches
739
+ * @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
740
+ * @example
741
+ * m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
742
+ * m.deconstruct_keys(nil) #=> {:numbers => "123", :letters => "abc"}
743
+ * m.deconstruct_keys([:numbers]) #=> {:numbers => "123"}
744
+ * m.deconstruct_keys([:fruit]) #=> {}
745
+ * m.deconstruct_keys([:letters, :fruit]) #=> {:letters => "abc"}
746
+ *
747
+ * @example pattern matching
748
+ * case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
749
+ * in numbers:, letters:
750
+ * puts "Numbers: #{numbers}, letters: #{letters}"
751
+ * else
752
+ * puts "Unrecognised match"
753
+ * end
754
+ */
755
+ static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
756
+ int i;
757
+ VALUE capturing_groups, key;
758
+ re2_matchdata *m;
759
+ re2_pattern *p;
760
+ map<string, int> groups;
761
+ map<string, int>::iterator iterator;
762
+
763
+ Data_Get_Struct(self, re2_matchdata, m);
764
+ Data_Get_Struct(m->regexp, re2_pattern, p);
765
+
766
+ groups = p->pattern->NamedCapturingGroups();
767
+ capturing_groups = rb_hash_new();
768
+
769
+ if (NIL_P(keys)) {
770
+ for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
771
+ rb_hash_aset(capturing_groups,
772
+ ID2SYM(rb_intern(iterator->first.data())),
773
+ re2_matchdata_nth_match(iterator->second, self));
774
+ }
775
+ } else {
776
+ Check_Type(keys, T_ARRAY);
777
+
778
+ if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
779
+ for (i = 0; i < RARRAY_LEN(keys); i++) {
780
+ key = rb_ary_entry(keys, i);
781
+ Check_Type(key, T_SYMBOL);
782
+ string name(rb_id2name(SYM2ID(key)));
783
+
784
+ if (groups.count(name) == 0) {
785
+ break;
786
+ }
787
+
788
+ rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
789
+ }
790
+ }
791
+ }
792
+
793
+ return capturing_groups;
794
+ }
795
+
796
+ /*
797
+ * Returns a new RE2 object with a compiled version of
798
+ * +pattern+ stored inside. Equivalent to +RE2::Regexp.new+.
799
+ *
800
+ * @see RE2::Regexp#initialize
801
+ *
802
+ */
803
+ static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
804
+ UNUSED(self);
805
+ return rb_class_new_instance(argc, argv, re2_cRegexp);
806
+ }
807
+
808
+ /*
809
+ * Returns a new {RE2::Regexp} object with a compiled version of
810
+ * +pattern+ stored inside.
811
+ *
812
+ * @return [RE2::Regexp]
813
+ *
814
+ * @overload initialize(pattern)
815
+ * Returns a new {RE2::Regexp} object with a compiled version of
816
+ * +pattern+ stored inside with the default options.
817
+ *
818
+ * @param [String] pattern the pattern to compile
819
+ * @return [RE2::Regexp] an RE2::Regexp with the specified pattern
820
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled
821
+ * pattern
822
+ *
823
+ * @overload initialize(pattern, options)
824
+ * Returns a new {RE2::Regexp} object with a compiled version of
825
+ * +pattern+ stored inside with the specified options.
826
+ *
827
+ * @param [String] pattern the pattern to compile
828
+ * @param [Hash] options the options with which to compile the pattern
829
+ * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
830
+ * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
831
+ * @option options [Boolean] :longest_match (false) search for longest match, not first match
832
+ * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
833
+ * @option options [Integer] :max_mem approx. max memory footprint of RE2
834
+ * @option options [Boolean] :literal (false) interpret string as literal, not regexp
835
+ * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
836
+ * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
837
+ * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
838
+ * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
839
+ * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
840
+ * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
841
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
842
+ */
843
+ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
844
+ VALUE pattern, options;
845
+ re2_pattern *p;
846
+
847
+ rb_scan_args(argc, argv, "11", &pattern, &options);
848
+ Data_Get_Struct(self, re2_pattern, p);
849
+
850
+ if (RTEST(options)) {
851
+ RE2::Options re2_options;
852
+ parse_re2_options(re2_options, options);
853
+
854
+ p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
855
+ } else {
856
+ p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
857
+ }
858
+
859
+ if (p->pattern == 0) {
860
+ rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
861
+ }
862
+
863
+ return self;
864
+ }
865
+
866
+ /*
867
+ * Returns a printable version of the regular expression +re2+.
868
+ *
869
+ * @return [String] a printable version of the regular expression
870
+ * @example
871
+ * re2 = RE2::Regexp.new("woo?")
872
+ * re2.inspect #=> "#<RE2::Regexp /woo?/>"
873
+ */
874
+ static VALUE re2_regexp_inspect(VALUE self) {
875
+ re2_pattern *p;
876
+ VALUE result;
877
+ ostringstream output;
878
+
879
+ Data_Get_Struct(self, re2_pattern, p);
880
+
881
+ output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
882
+
883
+ result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
884
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
885
+
886
+ return result;
887
+ }
888
+
889
+ /*
890
+ * Returns a string version of the regular expression +re2+.
891
+ *
892
+ * @return [String] a string version of the regular expression
893
+ * @example
894
+ * re2 = RE2::Regexp.new("woo?")
895
+ * re2.to_s #=> "woo?"
896
+ */
897
+ static VALUE re2_regexp_to_s(VALUE self) {
898
+ re2_pattern *p;
899
+ Data_Get_Struct(self, re2_pattern, p);
900
+ return ENCODED_STR_NEW(p->pattern->pattern().data(),
901
+ p->pattern->pattern().size(),
902
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
903
+ }
904
+
905
+ /*
906
+ * Returns whether or not the regular expression +re2+
907
+ * was compiled successfully or not.
908
+ *
909
+ * @return [Boolean] whether or not compilation was successful
910
+ * @example
911
+ * re2 = RE2::Regexp.new("woo?")
912
+ * re2.ok? #=> true
913
+ */
914
+ static VALUE re2_regexp_ok(VALUE self) {
915
+ re2_pattern *p;
916
+ Data_Get_Struct(self, re2_pattern, p);
917
+ return BOOL2RUBY(p->pattern->ok());
918
+ }
919
+
920
+ /*
921
+ * Returns whether or not the regular expression +re2+
922
+ * was compiled with the utf8 option set to true.
923
+ *
924
+ * @return [Boolean] the utf8 option
925
+ * @example
926
+ * re2 = RE2::Regexp.new("woo?", :utf8 => true)
927
+ * re2.utf8? #=> true
928
+ */
929
+ static VALUE re2_regexp_utf8(VALUE self) {
930
+ re2_pattern *p;
931
+ Data_Get_Struct(self, re2_pattern, p);
932
+ return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
933
+ }
934
+
935
+ /*
936
+ * Returns whether or not the regular expression +re2+
937
+ * was compiled with the posix_syntax option set to true.
938
+ *
939
+ * @return [Boolean] the posix_syntax option
940
+ * @example
941
+ * re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
942
+ * re2.posix_syntax? #=> true
943
+ */
944
+ static VALUE re2_regexp_posix_syntax(VALUE self) {
945
+ re2_pattern *p;
946
+ Data_Get_Struct(self, re2_pattern, p);
947
+ return BOOL2RUBY(p->pattern->options().posix_syntax());
948
+ }
949
+
950
+ /*
951
+ * Returns whether or not the regular expression +re2+
952
+ * was compiled with the longest_match option set to true.
953
+ *
954
+ * @return [Boolean] the longest_match option
955
+ * @example
956
+ * re2 = RE2::Regexp.new("woo?", :longest_match => true)
957
+ * re2.longest_match? #=> true
958
+ */
959
+ static VALUE re2_regexp_longest_match(VALUE self) {
960
+ re2_pattern *p;
961
+ Data_Get_Struct(self, re2_pattern, p);
962
+ return BOOL2RUBY(p->pattern->options().longest_match());
963
+ }
964
+
965
+ /*
966
+ * Returns whether or not the regular expression +re2+
967
+ * was compiled with the log_errors option set to true.
968
+ *
969
+ * @return [Boolean] the log_errors option
970
+ * @example
971
+ * re2 = RE2::Regexp.new("woo?", :log_errors => true)
972
+ * re2.log_errors? #=> true
973
+ */
974
+ static VALUE re2_regexp_log_errors(VALUE self) {
975
+ re2_pattern *p;
976
+ Data_Get_Struct(self, re2_pattern, p);
977
+ return BOOL2RUBY(p->pattern->options().log_errors());
978
+ }
979
+
980
+ /*
981
+ * Returns the max_mem setting for the regular expression
982
+ * +re2+.
983
+ *
984
+ * @return [Integer] the max_mem option
985
+ * @example
986
+ * re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
987
+ * re2.max_mem #=> 1024
988
+ */
989
+ static VALUE re2_regexp_max_mem(VALUE self) {
990
+ re2_pattern *p;
991
+ Data_Get_Struct(self, re2_pattern, p);
992
+ return INT2FIX(p->pattern->options().max_mem());
993
+ }
994
+
995
+ /*
996
+ * Returns whether or not the regular expression +re2+
997
+ * was compiled with the literal option set to true.
998
+ *
999
+ * @return [Boolean] the literal option
1000
+ * @example
1001
+ * re2 = RE2::Regexp.new("woo?", :literal => true)
1002
+ * re2.literal? #=> true
1003
+ */
1004
+ static VALUE re2_regexp_literal(VALUE self) {
1005
+ re2_pattern *p;
1006
+ Data_Get_Struct(self, re2_pattern, p);
1007
+ return BOOL2RUBY(p->pattern->options().literal());
1008
+ }
1009
+
1010
+ /*
1011
+ * Returns whether or not the regular expression +re2+
1012
+ * was compiled with the never_nl option set to true.
1013
+ *
1014
+ * @return [Boolean] the never_nl option
1015
+ * @example
1016
+ * re2 = RE2::Regexp.new("woo?", :never_nl => true)
1017
+ * re2.never_nl? #=> true
1018
+ */
1019
+ static VALUE re2_regexp_never_nl(VALUE self) {
1020
+ re2_pattern *p;
1021
+ Data_Get_Struct(self, re2_pattern, p);
1022
+ return BOOL2RUBY(p->pattern->options().never_nl());
1023
+ }
1024
+
1025
+ /*
1026
+ * Returns whether or not the regular expression +re2+
1027
+ * was compiled with the case_sensitive option set to true.
1028
+ *
1029
+ * @return [Boolean] the case_sensitive option
1030
+ * @example
1031
+ * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
1032
+ * re2.case_sensitive? #=> true
1033
+ */
1034
+ static VALUE re2_regexp_case_sensitive(VALUE self) {
1035
+ re2_pattern *p;
1036
+ Data_Get_Struct(self, re2_pattern, p);
1037
+ return BOOL2RUBY(p->pattern->options().case_sensitive());
1038
+ }
1039
+
1040
+ /*
1041
+ * Returns whether or not the regular expression +re2+
1042
+ * was compiled with the case_sensitive option set to false.
1043
+ *
1044
+ * @return [Boolean] the inverse of the case_sensitive option
1045
+ * @example
1046
+ * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
1047
+ * re2.case_insensitive? #=> false
1048
+ * re2.casefold? #=> false
1049
+ */
1050
+ static VALUE re2_regexp_case_insensitive(VALUE self) {
1051
+ return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
1052
+ }
1053
+
1054
+ /*
1055
+ * Returns whether or not the regular expression +re2+
1056
+ * was compiled with the perl_classes option set to true.
1057
+ *
1058
+ * @return [Boolean] the perl_classes option
1059
+ * @example
1060
+ * re2 = RE2::Regexp.new("woo?", :perl_classes => true)
1061
+ * re2.perl_classes? #=> true
1062
+ */
1063
+ static VALUE re2_regexp_perl_classes(VALUE self) {
1064
+ re2_pattern *p;
1065
+ Data_Get_Struct(self, re2_pattern, p);
1066
+ return BOOL2RUBY(p->pattern->options().perl_classes());
1067
+ }
1068
+
1069
+ /*
1070
+ * Returns whether or not the regular expression +re2+
1071
+ * was compiled with the word_boundary option set to true.
1072
+ *
1073
+ * @return [Boolean] the word_boundary option
1074
+ * @example
1075
+ * re2 = RE2::Regexp.new("woo?", :word_boundary => true)
1076
+ * re2.word_boundary? #=> true
1077
+ */
1078
+ static VALUE re2_regexp_word_boundary(VALUE self) {
1079
+ re2_pattern *p;
1080
+ Data_Get_Struct(self, re2_pattern, p);
1081
+ return BOOL2RUBY(p->pattern->options().word_boundary());
1082
+ }
1083
+
1084
+ /*
1085
+ * Returns whether or not the regular expression +re2+
1086
+ * was compiled with the one_line option set to true.
1087
+ *
1088
+ * @return [Boolean] the one_line option
1089
+ * @example
1090
+ * re2 = RE2::Regexp.new("woo?", :one_line => true)
1091
+ * re2.one_line? #=> true
1092
+ */
1093
+ static VALUE re2_regexp_one_line(VALUE self) {
1094
+ re2_pattern *p;
1095
+ Data_Get_Struct(self, re2_pattern, p);
1096
+ return BOOL2RUBY(p->pattern->options().one_line());
1097
+ }
1098
+
1099
+ /*
1100
+ * If the RE2 could not be created properly, returns an
1101
+ * error string otherwise returns nil.
1102
+ *
1103
+ * @return [String, nil] the error string or nil
1104
+ */
1105
+ static VALUE re2_regexp_error(VALUE self) {
1106
+ re2_pattern *p;
1107
+ Data_Get_Struct(self, re2_pattern, p);
1108
+ if (p->pattern->ok()) {
1109
+ return Qnil;
1110
+ } else {
1111
+ return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
1112
+ }
1113
+ }
1114
+
1115
+ /*
1116
+ * If the RE2 could not be created properly, returns
1117
+ * the offending portion of the regexp otherwise returns nil.
1118
+ *
1119
+ * @return [String, nil] the offending portion of the regexp or nil
1120
+ */
1121
+ static VALUE re2_regexp_error_arg(VALUE self) {
1122
+ re2_pattern *p;
1123
+ Data_Get_Struct(self, re2_pattern, p);
1124
+ if (p->pattern->ok()) {
1125
+ return Qnil;
1126
+ } else {
1127
+ return ENCODED_STR_NEW(p->pattern->error_arg().data(),
1128
+ p->pattern->error_arg().size(),
1129
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1130
+ }
1131
+ }
1132
+
1133
+ /*
1134
+ * Returns the program size, a very approximate measure
1135
+ * of a regexp's "cost". Larger numbers are more expensive
1136
+ * than smaller numbers.
1137
+ *
1138
+ * @return [Integer] the regexp "cost"
1139
+ */
1140
+ static VALUE re2_regexp_program_size(VALUE self) {
1141
+ re2_pattern *p;
1142
+ Data_Get_Struct(self, re2_pattern, p);
1143
+ return INT2FIX(p->pattern->ProgramSize());
1144
+ }
1145
+
1146
+ /*
1147
+ * Returns a hash of the options currently set for
1148
+ * +re2+.
1149
+ *
1150
+ * @return [Hash] the options
1151
+ */
1152
+ static VALUE re2_regexp_options(VALUE self) {
1153
+ VALUE options;
1154
+ re2_pattern *p;
1155
+
1156
+ Data_Get_Struct(self, re2_pattern, p);
1157
+ options = rb_hash_new();
1158
+
1159
+ rb_hash_aset(options, ID2SYM(id_utf8),
1160
+ BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8));
1161
+
1162
+ rb_hash_aset(options, ID2SYM(id_posix_syntax),
1163
+ BOOL2RUBY(p->pattern->options().posix_syntax()));
1164
+
1165
+ rb_hash_aset(options, ID2SYM(id_longest_match),
1166
+ BOOL2RUBY(p->pattern->options().longest_match()));
1167
+
1168
+ rb_hash_aset(options, ID2SYM(id_log_errors),
1169
+ BOOL2RUBY(p->pattern->options().log_errors()));
1170
+
1171
+ rb_hash_aset(options, ID2SYM(id_max_mem),
1172
+ INT2FIX(p->pattern->options().max_mem()));
1173
+
1174
+ rb_hash_aset(options, ID2SYM(id_literal),
1175
+ BOOL2RUBY(p->pattern->options().literal()));
1176
+
1177
+ rb_hash_aset(options, ID2SYM(id_never_nl),
1178
+ BOOL2RUBY(p->pattern->options().never_nl()));
1179
+
1180
+ rb_hash_aset(options, ID2SYM(id_case_sensitive),
1181
+ BOOL2RUBY(p->pattern->options().case_sensitive()));
1182
+
1183
+ rb_hash_aset(options, ID2SYM(id_perl_classes),
1184
+ BOOL2RUBY(p->pattern->options().perl_classes()));
1185
+
1186
+ rb_hash_aset(options, ID2SYM(id_word_boundary),
1187
+ BOOL2RUBY(p->pattern->options().word_boundary()));
1188
+
1189
+ rb_hash_aset(options, ID2SYM(id_one_line),
1190
+ BOOL2RUBY(p->pattern->options().one_line()));
1191
+
1192
+ /* This is a read-only hash after all... */
1193
+ rb_obj_freeze(options);
1194
+
1195
+ return options;
1196
+ }
1197
+
1198
+ /*
1199
+ * Returns the number of capturing subpatterns, or -1 if the regexp
1200
+ * wasn't valid on construction. The overall match ($0) does not
1201
+ * count: if the regexp is "(a)(b)", returns 2.
1202
+ *
1203
+ * @return [Integer] the number of capturing subpatterns
1204
+ */
1205
+ static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
1206
+ re2_pattern *p;
1207
+
1208
+ Data_Get_Struct(self, re2_pattern, p);
1209
+ return INT2FIX(p->pattern->NumberOfCapturingGroups());
1210
+ }
1211
+
1212
+ /*
1213
+ * Returns a hash of names to capturing indices of groups.
1214
+ *
1215
+ * @return [Hash] a hash of names to capturing indices
1216
+ */
1217
+ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
1218
+ VALUE capturing_groups;
1219
+ re2_pattern *p;
1220
+ map<string, int> groups;
1221
+ map<string, int>::iterator iterator;
1222
+
1223
+ Data_Get_Struct(self, re2_pattern, p);
1224
+ groups = p->pattern->NamedCapturingGroups();
1225
+ capturing_groups = rb_hash_new();
1226
+
1227
+ for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
1228
+ rb_hash_aset(capturing_groups,
1229
+ ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(),
1230
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"),
1231
+ INT2FIX(iterator->second));
1232
+ }
1233
+
1234
+ return capturing_groups;
1235
+ }
1236
+
1237
+ /*
1238
+ * Match the pattern against the given +text+ and return either
1239
+ * a boolean (if no submatches are required) or a {RE2::MatchData}
1240
+ * instance.
1241
+ *
1242
+ * @return [Boolean, RE2::MatchData]
1243
+ *
1244
+ * @overload match(text)
1245
+ * Returns an {RE2::MatchData} containing the matching
1246
+ * pattern and all subpatterns resulting from looking for
1247
+ * the regexp in +text+.
1248
+ *
1249
+ * @param [String] text the text to search
1250
+ * @return [RE2::MatchData] the matches
1251
+ * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1252
+ * @example
1253
+ * r = RE2::Regexp.new('w(o)(o)')
1254
+ * r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
1255
+ *
1256
+ * @overload match(text, 0)
1257
+ * Returns either true or false indicating whether a
1258
+ * successful match was made.
1259
+ *
1260
+ * @param [String] text the text to search
1261
+ * @return [Boolean] whether the match was successful
1262
+ * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1263
+ * @example
1264
+ * r = RE2::Regexp.new('w(o)(o)')
1265
+ * r.match('woo', 0) #=> true
1266
+ * r.match('bob', 0) #=> false
1267
+ *
1268
+ * @overload match(text, number_of_matches)
1269
+ * See +match(text)+ but with a specific number of
1270
+ * matches returned (padded with nils if necessary).
1271
+ *
1272
+ * @param [String] text the text to search
1273
+ * @param [Integer] number_of_matches the number of matches to return
1274
+ * @return [RE2::MatchData] the matches
1275
+ * @raise [ArgumentError] if given a negative number of matches
1276
+ * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1277
+ * @example
1278
+ * r = RE2::Regexp.new('w(o)(o)')
1279
+ * r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
1280
+ * r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
1281
+ */
1282
+ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1283
+ int n;
1284
+ bool matched;
1285
+ re2_pattern *p;
1286
+ re2_matchdata *m;
1287
+ VALUE text, number_of_matches, matchdata;
1288
+
1289
+ rb_scan_args(argc, argv, "11", &text, &number_of_matches);
1290
+
1291
+ /* Ensure text is a string. */
1292
+ text = StringValue(text);
1293
+
1294
+ Data_Get_Struct(self, re2_pattern, p);
1295
+
1296
+ if (RTEST(number_of_matches)) {
1297
+ n = NUM2INT(number_of_matches);
1298
+
1299
+ if (n < 0) {
1300
+ rb_raise(rb_eArgError, "number of matches should be >= 0");
1301
+ }
1302
+ } else {
1303
+ if (!p->pattern->ok()) {
1304
+ return Qnil;
1305
+ }
1306
+
1307
+ n = p->pattern->NumberOfCapturingGroups();
1308
+ }
1309
+
1310
+ if (n == 0) {
1311
+ matched = match(p->pattern, StringValuePtr(text), 0,
1312
+ static_cast<int>(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0);
1313
+ return BOOL2RUBY(matched);
1314
+ } else {
1315
+
1316
+ /* Because match returns the whole match as well. */
1317
+ n += 1;
1318
+
1319
+ matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1320
+ Data_Get_Struct(matchdata, re2_matchdata, m);
1321
+ m->matches = new(nothrow) re2::StringPiece[n];
1322
+ m->regexp = self;
1323
+ m->text = rb_str_dup(text);
1324
+ rb_str_freeze(m->text);
1325
+
1326
+ if (m->matches == 0) {
1327
+ rb_raise(rb_eNoMemError,
1328
+ "not enough memory to allocate StringPieces for matches");
1329
+ }
1330
+
1331
+ m->number_of_matches = n;
1332
+
1333
+ matched = match(p->pattern, StringValuePtr(m->text), 0,
1334
+ static_cast<int>(RSTRING_LEN(m->text)),
1335
+ RE2::UNANCHORED, m->matches, n);
1336
+
1337
+ if (matched) {
1338
+ return matchdata;
1339
+ } else {
1340
+ return Qnil;
1341
+ }
1342
+ }
1343
+ }
1344
+
1345
+ /*
1346
+ * Returns true or false to indicate a successful match.
1347
+ * Equivalent to +re2.match(text, 0)+.
1348
+ *
1349
+ * @return [Boolean] whether the match was successful
1350
+ */
1351
+ static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
1352
+ VALUE argv[2];
1353
+ argv[0] = text;
1354
+ argv[1] = INT2FIX(0);
1355
+
1356
+ return re2_regexp_match(2, argv, self);
1357
+ }
1358
+
1359
+ /*
1360
+ * Returns a {RE2::Scanner} for scanning the given text incrementally.
1361
+ *
1362
+ * @example
1363
+ * c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
1364
+ */
1365
+ static VALUE re2_regexp_scan(VALUE self, VALUE text) {
1366
+ re2_pattern *p;
1367
+ re2_scanner *c;
1368
+ VALUE scanner;
1369
+
1370
+ Data_Get_Struct(self, re2_pattern, p);
1371
+ scanner = rb_class_new_instance(0, 0, re2_cScanner);
1372
+ Data_Get_Struct(scanner, re2_scanner, c);
1373
+
1374
+ c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
1375
+ c->regexp = self;
1376
+ c->text = text;
1377
+
1378
+ if (p->pattern->ok()) {
1379
+ c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
1380
+ } else {
1381
+ c->number_of_capturing_groups = 0;
1382
+ }
1383
+
1384
+ c->eof = false;
1385
+
1386
+ return scanner;
1387
+ }
1388
+
1389
+ /*
1390
+ * Returns a copy of +str+ with the first occurrence +pattern+
1391
+ * replaced with +rewrite+.
1392
+ *
1393
+ * @param [String] str the string to modify
1394
+ * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1395
+ * @param [String] rewrite the string to replace with
1396
+ * @return [String] the resulting string
1397
+ * @example
1398
+ * RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
1399
+ * re2 = RE2::Regexp.new("hel+o")
1400
+ * RE2.Replace("hello there", re2, "yo") #=> "yo there"
1401
+ */
1402
+ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
1403
+ VALUE rewrite) {
1404
+ UNUSED(self);
1405
+ re2_pattern *p;
1406
+
1407
+ /* Convert all the inputs to be pumped into RE2::Replace. */
1408
+ string str_as_string(StringValuePtr(str));
1409
+
1410
+ /* Do the replacement. */
1411
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1412
+ Data_Get_Struct(pattern, re2_pattern, p);
1413
+ RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1414
+
1415
+ return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1416
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1417
+ } else {
1418
+ RE2::Replace(&str_as_string, StringValuePtr(pattern),
1419
+ StringValuePtr(rewrite));
1420
+
1421
+ return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(),
1422
+ pattern);
1423
+ }
1424
+
1425
+ }
1426
+
1427
+ /*
1428
+ * Return a copy of +str+ with +pattern+ replaced by +rewrite+.
1429
+ *
1430
+ * @param [String] str the string to modify
1431
+ * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1432
+ * @param [String] rewrite the string to replace with
1433
+ * @return [String] the resulting string
1434
+ * @example
1435
+ * re2 = RE2::Regexp.new("oo?")
1436
+ * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
1437
+ * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
1438
+ */
1439
+ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
1440
+ VALUE rewrite) {
1441
+ UNUSED(self);
1442
+
1443
+ /* Convert all the inputs to be pumped into RE2::GlobalReplace. */
1444
+ re2_pattern *p;
1445
+ string str_as_string(StringValuePtr(str));
1446
+
1447
+ /* Do the replacement. */
1448
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1449
+ Data_Get_Struct(pattern, re2_pattern, p);
1450
+ RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1451
+
1452
+ return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1453
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1454
+ } else {
1455
+ RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
1456
+ StringValuePtr(rewrite));
1457
+
1458
+ return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(),
1459
+ pattern);
1460
+ }
1461
+ }
1462
+
1463
+ /*
1464
+ * Returns a version of str with all potentially meaningful regexp
1465
+ * characters escaped. The returned string, used as a regular
1466
+ * expression, will exactly match the original string.
1467
+ *
1468
+ * @param [String] unquoted the unquoted string
1469
+ * @return [String] the escaped string
1470
+ * @example
1471
+ * RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
1472
+ */
1473
+ static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
1474
+ UNUSED(self);
1475
+ string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
1476
+ return rb_str_new(quoted_string.data(), quoted_string.size());
1477
+ }
1478
+
1479
+ void re2_set_free(re2_set *self) {
1480
+ if (self->set) {
1481
+ delete self->set;
1482
+ }
1483
+ free(self);
1484
+ }
1485
+
1486
+ static VALUE re2_set_allocate(VALUE klass) {
1487
+ re2_set *s;
1488
+ VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
1489
+ return result;
1490
+ }
1491
+
1492
+ /*
1493
+ * Returns a new {RE2::Set} object, a collection of patterns that can be
1494
+ * searched for simultaneously.
1495
+ *
1496
+ * @return [RE2::Set]
1497
+ *
1498
+ * @overload initialize
1499
+ * Returns a new {RE2::Set} object for unanchored patterns with the default
1500
+ * options.
1501
+ *
1502
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1503
+ * @return [RE2::Set]
1504
+ *
1505
+ * @overload initialize(anchor)
1506
+ * Returns a new {RE2::Set} object for the specified anchor with the default
1507
+ * options.
1508
+ *
1509
+ * @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
1510
+ * @raise [ArgumentError] if anchor is not :unanchored, :anchor_start or :anchor_both
1511
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1512
+ *
1513
+ * @overload initialize(anchor, options)
1514
+ * Returns a new {RE2::Set} object with the specified options.
1515
+ *
1516
+ * @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
1517
+ * @param [Hash] options the options with which to compile the pattern
1518
+ * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
1519
+ * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
1520
+ * @option options [Boolean] :longest_match (false) search for longest match, not first match
1521
+ * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
1522
+ * @option options [Integer] :max_mem approx. max memory footprint of RE2
1523
+ * @option options [Boolean] :literal (false) interpret string as literal, not regexp
1524
+ * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
1525
+ * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
1526
+ * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
1527
+ * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
1528
+ * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
1529
+ * @return [RE2::Set] an RE2::Set with the specified anchor and options
1530
+ * @raise [ArgumentError] if anchor is not one of the accepted choices
1531
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1532
+ */
1533
+ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1534
+ VALUE anchor, options;
1535
+ re2_set *s;
1536
+ RE2::Anchor re2_anchor;
1537
+ RE2::Options re2_options;
1538
+
1539
+ rb_scan_args(argc, argv, "02", &anchor, &options);
1540
+ Data_Get_Struct(self, re2_set, s);
1541
+
1542
+ if (RTEST(options)) {
1543
+ parse_re2_options(re2_options, options);
1544
+ }
1545
+ if (NIL_P(anchor)) {
1546
+ re2_anchor = RE2::UNANCHORED;
1547
+ } else {
1548
+ Check_Type(anchor, T_SYMBOL);
1549
+ ID id_anchor = SYM2ID(anchor);
1550
+ if (id_anchor == id_unanchored) {
1551
+ re2_anchor = RE2::UNANCHORED;
1552
+ } else if (id_anchor == id_anchor_start) {
1553
+ re2_anchor = RE2::ANCHOR_START;
1554
+ } else if (id_anchor == id_anchor_both) {
1555
+ re2_anchor = RE2::ANCHOR_BOTH;
1556
+ } else {
1557
+ rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
1558
+ }
1559
+ }
1560
+
1561
+ s->set = new(nothrow) RE2::Set(re2_options, re2_anchor);
1562
+ if (s->set == 0) {
1563
+ rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
1564
+ }
1565
+
1566
+ return self;
1567
+ }
1568
+
1569
+ /*
1570
+ * Adds a pattern to the set. Returns the index that will identify the pattern
1571
+ * in the output of #match. Cannot be called after #compile has been called.
1572
+ *
1573
+ * @param [String] pattern the regex pattern
1574
+ * @return [Integer] the index of the pattern in the set
1575
+ * @raise [ArgumentError] if called after compile or the pattern is rejected
1576
+ * @example
1577
+ * set = RE2::Set.new
1578
+ * set.add("abc") #=> 0
1579
+ * set.add("def") #=> 1
1580
+ */
1581
+ static VALUE re2_set_add(VALUE self, VALUE pattern) {
1582
+ Check_Type(pattern, T_STRING);
1583
+ re2::StringPiece regex(RSTRING_PTR(pattern), RSTRING_LEN(pattern));
1584
+ std::string err;
1585
+ re2_set *s;
1586
+ Data_Get_Struct(self, re2_set, s);
1587
+ int index = s->set->Add(regex, &err);
1588
+ if (index < 0) {
1589
+ rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", err.c_str());
1590
+ }
1591
+
1592
+ return INT2FIX(index);
1593
+ }
1594
+
1595
+ /*
1596
+ * Compiles a Set so it can be used to match against. Must be called after #add
1597
+ * and before #match.
1598
+ *
1599
+ * @return [Bool] whether compilation was a success
1600
+ * @example
1601
+ * set = RE2::Set.new
1602
+ * set.add("abc")
1603
+ * set.compile # => true
1604
+ */
1605
+ static VALUE re2_set_compile(VALUE self) {
1606
+ re2_set *s;
1607
+ Data_Get_Struct(self, re2_set, s);
1608
+
1609
+ return BOOL2RUBY(s->set->Compile());
1610
+ }
1611
+
1612
+ /*
1613
+ * Returns whether the underlying re2 version outputs error information from
1614
+ * RE2::Set::Match. If not, #match will raise an error if attempting to set its
1615
+ * :exception option to true.
1616
+ *
1617
+ * @return [Bool] whether the underlying re2 outputs error information from Set matches
1618
+ */
1619
+ static VALUE re2_set_match_raises_errors_p(VALUE self) {
1620
+ UNUSED(self);
1621
+ #ifdef HAVE_ERROR_INFO_ARGUMENT
1622
+ return Qtrue;
1623
+ #else
1624
+ return Qfalse;
1625
+ #endif
1626
+ }
1627
+
1628
+ /*
1629
+ * Matches the given text against patterns in the set, returning an array of
1630
+ * integer indices of the matching patterns if matched or an empty array if
1631
+ * there are no matches.
1632
+ *
1633
+ * @return [Array<Integer>]
1634
+ *
1635
+ * @overload match(str)
1636
+ * Returns an array of integer indices of patterns matching the given string
1637
+ * (if any). Raises exceptions if there are any errors while matching.
1638
+ *
1639
+ * @param [String] str the text to match against
1640
+ * @return [Array<Integer>] the indices of matching regexps
1641
+ * @raise [MatchError] if an error occurs while matching
1642
+ * @raise [UnsupportedError] if the underlying version of re2 does not output error information
1643
+ * @example
1644
+ * set = RE2::Set.new
1645
+ * set.add("abc")
1646
+ * set.add("def")
1647
+ * set.compile
1648
+ * set.match("abcdef") # => [0, 1]
1649
+ *
1650
+ * @overload match(str, options)
1651
+ * Returns an array of integer indices of patterns matching the given string
1652
+ * (if any). Raises exceptions if there are any errors while matching and the
1653
+ * :exception option is set to true.
1654
+ *
1655
+ * @param [String] str the text to match against
1656
+ * @param [Hash] options the options with which to match
1657
+ * @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
1658
+ * @return [Array<Integer>] the indices of matching regexps
1659
+ * @raise [MatchError] if an error occurs while matching
1660
+ * @raise [UnsupportedError] if the underlying version of re2 does not output error information
1661
+ * @example
1662
+ * set = RE2::Set.new
1663
+ * set.add("abc")
1664
+ * set.add("def")
1665
+ * set.compile
1666
+ * set.match("abcdef", :exception => true) # => [0, 1]
1667
+ */
1668
+ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1669
+ VALUE str, options, exception_option;
1670
+ bool raise_exception = true;
1671
+ rb_scan_args(argc, argv, "11", &str, &options);
1672
+ Check_Type(str, T_STRING);
1673
+ re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
1674
+ std::vector<int> v;
1675
+ re2_set *s;
1676
+ Data_Get_Struct(self, re2_set, s);
1677
+
1678
+ if (RTEST(options)) {
1679
+ Check_Type(options, T_HASH);
1680
+
1681
+ exception_option = rb_hash_aref(options, ID2SYM(id_exception));
1682
+ if (!NIL_P(exception_option)) {
1683
+ raise_exception = RTEST(exception_option);
1684
+ }
1685
+ }
1686
+
1687
+ if (raise_exception) {
1688
+ #ifdef HAVE_ERROR_INFO_ARGUMENT
1689
+ RE2::Set::ErrorInfo e;
1690
+ bool match_failed = !s->set->Match(data, &v, &e);
1691
+ VALUE result = rb_ary_new2(v.size());
1692
+
1693
+ if (match_failed) {
1694
+ switch (e.kind) {
1695
+ case RE2::Set::kNoError:
1696
+ break;
1697
+ case RE2::Set::kNotCompiled:
1698
+ rb_raise(re2_eSetMatchError, "#match must not be called before #compile");
1699
+ case RE2::Set::kOutOfMemory:
1700
+ rb_raise(re2_eSetMatchError, "The DFA ran out of memory");
1701
+ case RE2::Set::kInconsistent:
1702
+ rb_raise(re2_eSetMatchError, "RE2::Prog internal error");
1703
+ default: // Just in case a future version of libre2 adds new ErrorKinds
1704
+ rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
1705
+ }
1706
+ } else {
1707
+ for (size_t i = 0; i < v.size(); i++) {
1708
+ rb_ary_push(result, INT2FIX(v[i]));
1709
+ }
1710
+ }
1711
+
1712
+ return result;
1713
+ #else
1714
+ rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
1715
+ #endif
1716
+ } else {
1717
+ bool matched = s->set->Match(data, &v);
1718
+ VALUE result = rb_ary_new2(v.size());
1719
+
1720
+ if (matched) {
1721
+ for (size_t i = 0; i < v.size(); i++) {
1722
+ rb_ary_push(result, INT2FIX(v[i]));
1723
+ }
1724
+ }
1725
+
1726
+ return result;
1727
+ }
1728
+ }
1729
+
1730
+ /* Forward declare Init_re2 to be called by C code but define it separately so
1731
+ * that YARD can parse it.
1732
+ */
1733
+ extern "C" void Init_re2(void);
1734
+
1735
+ void Init_re2(void) {
1736
+ re2_mRE2 = rb_define_module("RE2");
1737
+ re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
1738
+ re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
1739
+ re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
1740
+ re2_cSet = rb_define_class_under(re2_mRE2, "Set", rb_cObject);
1741
+ re2_eSetMatchError = rb_define_class_under(re2_cSet, "MatchError",
1742
+ rb_const_get(rb_cObject, rb_intern("StandardError")));
1743
+ re2_eSetUnsupportedError = rb_define_class_under(re2_cSet, "UnsupportedError",
1744
+ rb_const_get(rb_cObject, rb_intern("StandardError")));
1745
+
1746
+ rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
1747
+ rb_define_alloc_func(re2_cMatchData,
1748
+ (VALUE (*)(VALUE))re2_matchdata_allocate);
1749
+ rb_define_alloc_func(re2_cScanner,
1750
+ (VALUE (*)(VALUE))re2_scanner_allocate);
1751
+ rb_define_alloc_func(re2_cSet, (VALUE (*)(VALUE))re2_set_allocate);
1752
+
1753
+ rb_define_method(re2_cMatchData, "string",
1754
+ RUBY_METHOD_FUNC(re2_matchdata_string), 0);
1755
+ rb_define_method(re2_cMatchData, "regexp",
1756
+ RUBY_METHOD_FUNC(re2_matchdata_regexp), 0);
1757
+ rb_define_method(re2_cMatchData, "to_a",
1758
+ RUBY_METHOD_FUNC(re2_matchdata_to_a), 0);
1759
+ rb_define_method(re2_cMatchData, "size",
1760
+ RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1761
+ rb_define_method(re2_cMatchData, "length",
1762
+ RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1763
+ rb_define_method(re2_cMatchData, "begin",
1764
+ RUBY_METHOD_FUNC(re2_matchdata_begin), 1);
1765
+ rb_define_method(re2_cMatchData, "end",
1766
+ RUBY_METHOD_FUNC(re2_matchdata_end), 1);
1767
+ rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
1768
+ -1);
1769
+ rb_define_method(re2_cMatchData, "to_s",
1770
+ RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
1771
+ rb_define_method(re2_cMatchData, "inspect",
1772
+ RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
1773
+ rb_define_method(re2_cMatchData, "deconstruct",
1774
+ RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
1775
+ rb_define_method(re2_cMatchData, "deconstruct_keys",
1776
+ RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);
1777
+
1778
+ rb_define_method(re2_cScanner, "string",
1779
+ RUBY_METHOD_FUNC(re2_scanner_string), 0);
1780
+ rb_define_method(re2_cScanner, "eof?",
1781
+ RUBY_METHOD_FUNC(re2_scanner_eof), 0);
1782
+ rb_define_method(re2_cScanner, "regexp",
1783
+ RUBY_METHOD_FUNC(re2_scanner_regexp), 0);
1784
+ rb_define_method(re2_cScanner, "scan",
1785
+ RUBY_METHOD_FUNC(re2_scanner_scan), 0);
1786
+ rb_define_method(re2_cScanner, "rewind",
1787
+ RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
1788
+
1789
+ rb_define_method(re2_cRegexp, "initialize",
1790
+ RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
1791
+ rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
1792
+ rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error),
1793
+ 0);
1794
+ rb_define_method(re2_cRegexp, "error_arg",
1795
+ RUBY_METHOD_FUNC(re2_regexp_error_arg), 0);
1796
+ rb_define_method(re2_cRegexp, "program_size",
1797
+ RUBY_METHOD_FUNC(re2_regexp_program_size), 0);
1798
+ rb_define_method(re2_cRegexp, "options",
1799
+ RUBY_METHOD_FUNC(re2_regexp_options), 0);
1800
+ rb_define_method(re2_cRegexp, "number_of_capturing_groups",
1801
+ RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0);
1802
+ rb_define_method(re2_cRegexp, "named_capturing_groups",
1803
+ RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
1804
+ rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
1805
+ -1);
1806
+ rb_define_method(re2_cRegexp, "match?",
1807
+ RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1808
+ rb_define_method(re2_cRegexp, "=~",
1809
+ RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1810
+ rb_define_method(re2_cRegexp, "===",
1811
+ RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1812
+ rb_define_method(re2_cRegexp, "scan",
1813
+ RUBY_METHOD_FUNC(re2_regexp_scan), 1);
1814
+ rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
1815
+ rb_define_method(re2_cRegexp, "to_str", RUBY_METHOD_FUNC(re2_regexp_to_s),
1816
+ 0);
1817
+ rb_define_method(re2_cRegexp, "pattern", RUBY_METHOD_FUNC(re2_regexp_to_s),
1818
+ 0);
1819
+ rb_define_method(re2_cRegexp, "source", RUBY_METHOD_FUNC(re2_regexp_to_s),
1820
+ 0);
1821
+ rb_define_method(re2_cRegexp, "inspect",
1822
+ RUBY_METHOD_FUNC(re2_regexp_inspect), 0);
1823
+ rb_define_method(re2_cRegexp, "utf8?", RUBY_METHOD_FUNC(re2_regexp_utf8),
1824
+ 0);
1825
+ rb_define_method(re2_cRegexp, "posix_syntax?",
1826
+ RUBY_METHOD_FUNC(re2_regexp_posix_syntax), 0);
1827
+ rb_define_method(re2_cRegexp, "longest_match?",
1828
+ RUBY_METHOD_FUNC(re2_regexp_longest_match), 0);
1829
+ rb_define_method(re2_cRegexp, "log_errors?",
1830
+ RUBY_METHOD_FUNC(re2_regexp_log_errors), 0);
1831
+ rb_define_method(re2_cRegexp, "max_mem",
1832
+ RUBY_METHOD_FUNC(re2_regexp_max_mem), 0);
1833
+ rb_define_method(re2_cRegexp, "literal?",
1834
+ RUBY_METHOD_FUNC(re2_regexp_literal), 0);
1835
+ rb_define_method(re2_cRegexp, "never_nl?",
1836
+ RUBY_METHOD_FUNC(re2_regexp_never_nl), 0);
1837
+ rb_define_method(re2_cRegexp, "case_sensitive?",
1838
+ RUBY_METHOD_FUNC(re2_regexp_case_sensitive), 0);
1839
+ rb_define_method(re2_cRegexp, "case_insensitive?",
1840
+ RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
1841
+ rb_define_method(re2_cRegexp, "casefold?",
1842
+ RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
1843
+ rb_define_method(re2_cRegexp, "perl_classes?",
1844
+ RUBY_METHOD_FUNC(re2_regexp_perl_classes), 0);
1845
+ rb_define_method(re2_cRegexp, "word_boundary?",
1846
+ RUBY_METHOD_FUNC(re2_regexp_word_boundary), 0);
1847
+ rb_define_method(re2_cRegexp, "one_line?",
1848
+ RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
1849
+
1850
+ rb_define_singleton_method(re2_cSet, "match_raises_errors?",
1851
+ RUBY_METHOD_FUNC(re2_set_match_raises_errors_p), 0);
1852
+ rb_define_method(re2_cSet, "initialize",
1853
+ RUBY_METHOD_FUNC(re2_set_initialize), -1);
1854
+ rb_define_method(re2_cSet, "add", RUBY_METHOD_FUNC(re2_set_add), 1);
1855
+ rb_define_method(re2_cSet, "compile", RUBY_METHOD_FUNC(re2_set_compile), 0);
1856
+ rb_define_method(re2_cSet, "match", RUBY_METHOD_FUNC(re2_set_match), -1);
1857
+
1858
+ rb_define_module_function(re2_mRE2, "Replace",
1859
+ RUBY_METHOD_FUNC(re2_Replace), 3);
1860
+ rb_define_module_function(re2_mRE2, "GlobalReplace",
1861
+ RUBY_METHOD_FUNC(re2_GlobalReplace), 3);
1862
+ rb_define_module_function(re2_mRE2, "QuoteMeta",
1863
+ RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1864
+ rb_define_singleton_method(re2_cRegexp, "escape",
1865
+ RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1866
+ rb_define_singleton_method(re2_cRegexp, "quote",
1867
+ RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1868
+ rb_define_singleton_method(re2_cRegexp, "compile",
1869
+ RUBY_METHOD_FUNC(rb_class_new_instance), -1);
1870
+
1871
+ rb_define_global_function("RE2", RUBY_METHOD_FUNC(re2_re2), -1);
1872
+
1873
+ /* Create the symbols used in options. */
1874
+ id_utf8 = rb_intern("utf8");
1875
+ id_posix_syntax = rb_intern("posix_syntax");
1876
+ id_longest_match = rb_intern("longest_match");
1877
+ id_log_errors = rb_intern("log_errors");
1878
+ id_max_mem = rb_intern("max_mem");
1879
+ id_literal = rb_intern("literal");
1880
+ id_never_nl = rb_intern("never_nl");
1881
+ id_case_sensitive = rb_intern("case_sensitive");
1882
+ id_perl_classes = rb_intern("perl_classes");
1883
+ id_word_boundary = rb_intern("word_boundary");
1884
+ id_one_line = rb_intern("one_line");
1885
+ id_unanchored = rb_intern("unanchored");
1886
+ id_anchor_start = rb_intern("anchor_start");
1887
+ id_anchor_both = rb_intern("anchor_both");
1888
+ id_exception = rb_intern("exception");
1889
+ }